From 769785c497aaa60c629e0299e3ebfff53a8e393e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 8 May 2015 22:32:37 -0700 Subject: Add vulkan driver for BDW --- src/Makefile.am | 4 + src/glsl/ast.h | 9 + src/glsl/ast_to_hir.cpp | 19 +- src/glsl/ast_type.cpp | 5 + src/glsl/glsl_parser.yy | 5 + src/glsl/ir.h | 14 +- src/glsl/link_uniform_block_active_visitor.cpp | 5 + src/glsl/link_uniform_block_active_visitor.h | 2 + src/glsl/link_uniform_blocks.cpp | 6 + src/mesa/drivers/dri/common/dri_test.c | 9 +- src/mesa/drivers/dri/i965/Makefile.am | 11 +- src/mesa/drivers/dri/i965/brw_context.c | 4 + src/mesa/drivers/dri/i965/brw_context.h | 9 + src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_device_info.c | 12 + src/mesa/drivers/dri/i965/brw_device_info.h | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 33 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 +- src/mesa/drivers/dri/i965/brw_gs.c | 45 +- src/mesa/drivers/dri/i965/brw_gs.h | 16 + src/mesa/drivers/dri/i965/brw_state_cache.c | 3 + src/mesa/drivers/dri/i965/brw_wm.c | 2 +- src/mesa/drivers/dri/i965/brw_wm.h | 6 + src/mesa/drivers/dri/i965/intel_debug.c | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 72 + src/mesa/main/mtypes.h | 6 + src/vulkan/Makefile.am | 67 + src/vulkan/allocator.c | 499 ++ src/vulkan/aub.c | 292 + src/vulkan/aub.h | 153 + src/vulkan/compiler.cpp | 931 +++ src/vulkan/device.c | 2634 +++++++ src/vulkan/gem.c | 283 + src/vulkan/gen8_pack.h | 8702 ++++++++++++++++++++++++ src/vulkan/image.c | 404 ++ src/vulkan/intel.c | 93 + src/vulkan/meta.c | 140 + src/vulkan/pipeline.c | 565 ++ src/vulkan/private.h | 594 ++ src/vulkan/util.c | 99 + src/vulkan/vk.c | 723 ++ 41 files changed, 16473 insertions(+), 27 deletions(-) create mode 100644 src/vulkan/Makefile.am create mode 100644 src/vulkan/allocator.c create mode 100644 src/vulkan/aub.c create mode 100644 src/vulkan/aub.h create mode 100644 src/vulkan/compiler.cpp create mode 100644 src/vulkan/device.c create mode 100644 src/vulkan/gem.c create mode 100644 src/vulkan/gen8_pack.h create mode 100644 src/vulkan/image.c create mode 100644 src/vulkan/intel.c create mode 100644 src/vulkan/meta.c create mode 100644 src/vulkan/pipeline.c create mode 100644 src/vulkan/private.h create mode 100644 src/vulkan/util.c create mode 100644 src/vulkan/vk.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 18cb4ce76d7..bf76e35f144 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -61,6 +61,10 @@ EXTRA_DIST = \ AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) +if HAVE_VULKAN +SUBDIRS += vulkan +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include/ \ -I$(top_srcdir)/src/mapi/ \ diff --git a/src/glsl/ast.h b/src/glsl/ast.h index ef74e5137b2..c52e518334d 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -514,6 +514,10 @@ struct ast_type_qualifier { unsigned stream:1; /**< Has stream value assigned */ unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */ /** \} */ + + /** \name Vulkan qualifiers */ + unsigned vk_set:1; + } /** \brief Set of flags, accessed by name. */ q; @@ -595,6 +599,11 @@ struct ast_type_qualifier { */ glsl_base_type image_base_type; + /** + * Vulkan descriptor set + */ + int set; + /** * Return true if and only if an interpolation qualifier is present. */ diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 14e63090557..f1daee38d7a 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2645,7 +2645,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; } - if (qual->flags.q.explicit_location) { + if (qual->flags.q.vk_set) { + if (!qual->flags.q.explicit_index) + _mesa_glsl_error(loc, state, + "Vulkan descriptor set layout requires both group and index " + "qualifiers"); + + var->data.vk_set = true; + var->data.set = qual->set; + var->data.index = qual->index; + } else if (qual->flags.q.explicit_location) { validate_explicit_location(qual, var, state, loc); } else if (qual->flags.q.explicit_index) { _mesa_glsl_error(loc, state, "explicit index requires explicit location"); @@ -5782,6 +5791,10 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; + var->data.vk_set = this->layout.flags.q.vk_set; + var->data.set = this->layout.set; + var->data.index = this->layout.index; + state->symbols->add_variable(var); instructions->push_tail(var); } @@ -5854,6 +5867,10 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; + var->data.vk_set = this->layout.flags.q.vk_set; + var->data.set = this->layout.set; + var->data.index = this->layout.index; + state->symbols->add_variable(var); instructions->push_tail(var); } diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index 1bcf6a2e81f..a988fd58792 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -252,6 +252,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, this->image_base_type = q.image_base_type; } + if (q.flags.q.vk_set) { + this->set = q.set; + this->index = q.index; + } + return true; } diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 3ce9e103f20..41106060e5e 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1431,6 +1431,11 @@ layout_qualifier_id: $$.binding = $3; } + if (match_layout_qualifier("set", $1, state) == 0) { + $$.flags.q.vk_set = 1; + $$.set = $3; + } + if (state->has_atomic_counters() && match_layout_qualifier("offset", $1, state) == 0) { $$.flags.q.explicit_offset = 1; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index fab1cd2d291..fdb595106c2 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -682,6 +682,11 @@ public: unsigned explicit_location:1; unsigned explicit_index:1; + /** + * Do we have a Vulkan (group, index) qualifier for this variable? + */ + unsigned vk_set:1; + /** * Was an initial binding explicitly set in the shader? * @@ -751,8 +756,10 @@ public: * \note * The GLSL spec only allows the values 0 or 1 for the index in \b dual * source blending. + * + * This is now also used for the Vulkan descriptor set index. */ - unsigned index:1; + int16_t index; /** * \brief Layout qualifier for gl_FragDepth. @@ -800,6 +807,11 @@ public: */ int16_t binding; + /** + * Vulkan descriptor set for the resource. + */ + int16_t set; + /** * Storage location of the base of this variable * diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp index 292cde343f9..701ca979b7f 100644 --- a/src/glsl/link_uniform_block_active_visitor.cpp +++ b/src/glsl/link_uniform_block_active_visitor.cpp @@ -53,6 +53,11 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) b->binding = 0; } + if (var->data.vk_set) { + b->set = var->data.set; + b->index = var->data.index; + } + _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b); return b; } else { diff --git a/src/glsl/link_uniform_block_active_visitor.h b/src/glsl/link_uniform_block_active_visitor.h index e5ea501553c..148a3915abd 100644 --- a/src/glsl/link_uniform_block_active_visitor.h +++ b/src/glsl/link_uniform_block_active_visitor.h @@ -35,6 +35,8 @@ struct link_uniform_block_active { unsigned num_array_elements; unsigned binding; + unsigned set; + unsigned index; bool has_instance_name; bool has_binding; diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index 898544bea82..ca87bf0df71 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -293,6 +293,9 @@ link_uniform_blocks(void *mem_ctx, blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + blocks[i].Set = b->set; + blocks[i].Index = b->index; + i++; } } else { @@ -311,6 +314,9 @@ link_uniform_blocks(void *mem_ctx, blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + blocks[i].Set = b->set; + blocks[i].Index = b->index; + i++; } } diff --git a/src/mesa/drivers/dri/common/dri_test.c b/src/mesa/drivers/dri/common/dri_test.c index 57bfa5b9394..310e7617e2f 100644 --- a/src/mesa/drivers/dri/common/dri_test.c +++ b/src/mesa/drivers/dri/common/dri_test.c @@ -1,3 +1,4 @@ +#include #include "main/glheader.h" #include "main/compiler.h" #include "glapi/glapi.h" @@ -33,12 +34,14 @@ _glapi_check_multithread(void) PUBLIC void _glapi_set_context(void *context) -{} +{ + _glapi_Context = context; +} PUBLIC void * _glapi_get_context(void) { - return 0; + return _glapi_Context; } PUBLIC void @@ -84,7 +87,7 @@ _glapi_set_nop_handler(_glapi_nop_handler_proc func) PUBLIC struct _glapi_table * _glapi_new_nop_table(unsigned num_entries) { - return NULL; + return malloc(16); } #ifndef NO_MAIN diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 235bcfeae02..cf2424e34b4 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -41,7 +41,7 @@ AM_CFLAGS = \ AM_CXXFLAGS = $(AM_CFLAGS) -noinst_LTLIBRARIES = libi965_dri.la +noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la libi965_dri_la_SOURCES = $(i965_FILES) libi965_dri_la_LIBADD = $(INTEL_LIBS) @@ -54,6 +54,15 @@ TEST_LIBS = \ $(CLOCK_LIB) \ ../common/libdri_test_stubs.la +libi965_compiler_la_SOURCES = $(i965_FILES) +libi965_compiler_la_LIBADD = $(INTEL_LIBS) \ + ../common/libdricommon.la \ + ../common/libmegadriver_stub.la \ + ../../../libmesa.la \ + $(DRI_LIB_DEPS) \ + $(CLOCK_LIB) \ + ../common/libdri_test_stubs.la -lm + TESTS = \ test_fs_cmod_propagation \ test_fs_saturate_propagation \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index fd7420a6c6f..18a30a5925e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -830,6 +830,7 @@ brwCreateContext(gl_api api, intel_batchbuffer_init(brw); +#if 0 if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us @@ -848,6 +849,7 @@ brwCreateContext(gl_api api, } brw_init_state(brw); +#endif intelInitExtensions(ctx); @@ -909,8 +911,10 @@ brwCreateContext(gl_api api, _mesa_compute_version(ctx); +#if 0 _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); +#endif if (ctx->Extensions.AMD_performance_monitor) { brw_init_performance_monitors(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 834aaa45737..2d4a7eab20b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -359,6 +359,9 @@ struct brw_stage_prog_data { /** @} */ } binding_table; + uint32_t *map_entries; + uint32_t *bind_map[4]; + GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; @@ -1976,6 +1979,12 @@ gen6_upload_push_constants(struct brw_context *brw, struct brw_stage_state *stage_state, enum aub_state_struct_type type); +struct intel_screen *intel_screen_create(int fd); +void intel_screen_destroy(struct intel_screen *screen); + +struct brw_context *intel_context_create(struct intel_screen *screen); +void intel_context_destroy(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 83d7a3535e4..3c704ee9d08 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -55,6 +55,7 @@ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) +#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */ #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 #define _3DPRIM_LINESTRIP 0x03 @@ -76,6 +77,7 @@ #define _3DPRIM_LINESTRIP_BF 0x13 #define _3DPRIM_LINESTRIP_CONT_BF 0x14 #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 +#endif /* We use this offset to be able to pass native primitive types in struct * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 97243a47293..a07b86e60e2 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -353,3 +353,15 @@ brw_get_device_info(int devid, int revision) return devinfo; } + +const char * +brw_get_device_name(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, name) case id: return name; +#include "pci_ids/i965_pci_ids.h" + default: + return NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 65c024ceeed..9192235fb0e 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -85,3 +85,4 @@ struct brw_device_info }; const struct brw_device_info *brw_get_device_info(int devid, int revision); +const char *brw_get_device_name(int devid); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b2701b89689..8a9bbdbae52 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1910,6 +1910,10 @@ fs_visitor::assign_vs_urb_setup() unsigned vue_entries = MAX2(count, vs_prog_data->base.vue_map.num_slots); + /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS + * command). Each attribute is 16 bytes (4 floats/dwords), so each unit + * fits four attributes. + */ vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4; vs_prog_data->base.urb_read_length = (count + 1) / 2; @@ -3033,9 +3037,22 @@ fs_visitor::emit_repclear_shader() brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; int base_mrf = 1; int color_mrf = base_mrf + 2; + fs_inst *mov; + + if (uniforms == 1) { + mov = emit(MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F))); + } else { + struct brw_reg reg = + brw_reg(BRW_GENERAL_REGISTER_FILE, + 2, 3, 0, 0, BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + + mov = emit(MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg))); + } - fs_inst *mov = emit(MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F))); mov->force_writemask_all = true; fs_inst *write; @@ -3065,8 +3082,10 @@ fs_visitor::emit_repclear_shader() assign_curb_setup(); /* Now that we have the uniform assigned, go ahead and force it to a vec4. */ - assert(mov->src[0].file == HW_REG); - mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0); + if (uniforms == 1) { + assert(mov->src[0].file == HW_REG); + mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0); + } } /** @@ -4081,7 +4100,8 @@ fs_visitor::run_vs() { assert(stage == MESA_SHADER_VERTEX); - assign_common_binding_table_offsets(0); + if (prog_data->map_entries == NULL) + assign_common_binding_table_offsets(0); setup_vs_payload(); if (INTEL_DEBUG & DEBUG_SHADER_TIME) @@ -4129,7 +4149,8 @@ fs_visitor::run_fs() sanity_param_count = prog->Parameters->NumParameters; - assign_binding_table_offsets(); + if (prog_data->map_entries == NULL) + assign_binding_table_offsets(); if (devinfo->gen >= 6) setup_payload_gen6(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 80ca1b750f8..975f5f6b2c9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1198,14 +1198,20 @@ fs_visitor::visit(ir_expression *ir) ir_constant *const_uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset = ir->operands[1]->as_constant(); fs_reg surf_index; + uint32_t binding, set, index, set_index; if (const_uniform_block) { /* The block index is a constant, so just emit the binding table entry * as an immediate. */ - surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + - const_uniform_block->value.u[0]); + index = const_uniform_block->value.u[0]; + set = shader->base.UniformBlocks[index].Set; + set_index = shader->base.UniformBlocks[index].Index; + binding = stage_prog_data->bind_map[set][set_index]; + surf_index = fs_reg(binding); } else { + assert(0 && "need more info from the ir for this."); + /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value * from any live channel. @@ -2289,8 +2295,13 @@ fs_visitor::emit_texture(ir_texture_opcode op, void fs_visitor::visit(ir_texture *ir) { - uint32_t sampler = - _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); + uint32_t sampler; + + ir_dereference_variable *deref_var = ir->sampler->as_dereference_variable(); + assert(deref_var); + ir_variable *var = deref_var->var; + + sampler = stage_prog_data->bind_map[var->data.set][var->data.index]; ir_rvalue *nonconst_sampler_index = _mesa_get_sampler_array_nonconst_index(ir->sampler); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 52c73031a3c..45c132b4a9e 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -33,19 +33,23 @@ #include "brw_state.h" #include "brw_ff_gs.h" - bool -brw_codegen_gs_prog(struct brw_context *brw, +brw_compile_gs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_geometry_program *gp, - struct brw_gs_prog_key *key) + struct brw_gs_prog_key *key, + struct brw_gs_compile_output *output) { - struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_compile c; memset(&c, 0, sizeof(c)); c.key = *key; c.gp = gp; + /* We get the bind map as input in the output struct...*/ + c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries; + memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map, + sizeof(c.prog_data.base.base.bind_map)); + c.prog_data.include_primitive_id = (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; @@ -274,18 +278,41 @@ brw_codegen_gs_prog(struct brw_context *brw, c.prog_data.base.base.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); + } + + output->mem_ctx = mem_ctx; + output->program = program; + output->program_size = program_size; + memcpy(&output->prog_data, &c.prog_data, + sizeof(output->prog_data)); + + return true; +} + +bool +brw_codegen_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct brw_gs_compile_output output; + struct brw_stage_state *stage_state = &brw->gs.base; + + if (brw_compile_gs_prog(brw, prog, gp, key, &output)) + return false; + if (output.prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &stage_state->scratch_bo, - c.prog_data.base.base.total_scratch * + output.prog_data.base.base.total_scratch * brw->max_gs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), + key, sizeof(*key), + output.program, output.program_size, + &output.prog_data, sizeof(output.prog_data), &stage_state->prog_offset, &brw->gs.prog_data); - ralloc_free(mem_ctx); + ralloc_free(output.mem_ctx); return true; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index b327c40f140..573bbdb16f8 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -37,6 +37,22 @@ struct gl_context; struct gl_shader_program; struct gl_program; +struct brw_gs_compile_output { + void *mem_ctx; + const void *program; + uint32_t program_size; + struct brw_gs_prog_data prog_data; +}; + +struct brw_gs_prog_key; + +bool +brw_compile_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, + struct brw_gs_compile_output *output); + bool brw_gs_prog_data_compare(const void *a, const void *b); void diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 157b33d4f4c..24778d25379 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -427,6 +427,9 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) DBG("%s\n", __func__); + if (cache->bo == NULL) + return; + if (brw->has_llc) drm_intel_bo_unmap(cache->bo); drm_intel_bo_unreference(cache->bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 45a03bba857..5496225a6c7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -46,7 +46,7 @@ * Return a bitfield where bit n is set if barycentric interpolation mode n * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. */ -static unsigned +unsigned brw_compute_barycentric_interp_modes(struct brw_context *brw, bool shade_model_flat, bool persample_shading, diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 0a8a97b2f5e..73a741f89e4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -90,6 +90,12 @@ bool brw_wm_prog_data_compare(const void *a, const void *b); void brw_upload_wm_prog(struct brw_context *brw); +unsigned +brw_compute_barycentric_interp_modes(struct brw_context *brw, + bool shade_model_flat, + bool persample_shading, + const struct gl_fragment_program *fprog); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 53f575ab78f..33a0348486d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -60,7 +60,7 @@ static const struct dri_debug_control debug_control[] = { { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, { "clip", DEBUG_CLIP }, - { "aub", DEBUG_AUB }, + { "foob", DEBUG_AUB }, /* disable aub dumbing in the dri driver */ { "shader_time", DEBUG_SHADER_TIME }, { "no16", DEBUG_NO16 }, { "blorp", DEBUG_BLORP }, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index dda16389f8a..4860a160ee9 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1416,6 +1416,78 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return (const __DRIconfig**) intel_screen_make_configs(psp); } +struct intel_screen * +intel_screen_create(int fd) +{ + __DRIscreen *psp; + __DRIconfig **configs; + int i; + + psp = malloc(sizeof(*psp)); + if (psp == NULL) + return NULL; + + psp->image.loader = (void *) 1; /* Don't complain about this being NULL */ + psp->fd = fd; + psp->dri2.useInvalidate = (void *) 1; + + configs = (__DRIconfig **) intelInitScreen2(psp); + for (i = 0; configs[i]; i++) + free(configs[i]); + free(configs); + + return psp->driverPrivate; +} + +void +intel_screen_destroy(struct intel_screen *screen) +{ + __DRIscreen *psp; + + psp = screen->driScrnPriv; + intelDestroyScreen(screen->driScrnPriv); + free(psp); +} + + +struct brw_context * +intel_context_create(struct intel_screen *screen) +{ + __DRIcontext *driContextPriv; + struct brw_context *brw; + unsigned error; + + driContextPriv = malloc(sizeof(*driContextPriv)); + if (driContextPriv == NULL) + return NULL; + + driContextPriv->driScreenPriv = screen->driScrnPriv; + + brwCreateContext(API_OPENGL_CORE, + NULL, /* visual */ + driContextPriv, + 3, 0, + 0, /* flags */ + false, /* notify_reset */ + &error, + NULL); + + brw = driContextPriv->driverPrivate; + brw->ctx.FirstTimeCurrent = false; + + return driContextPriv->driverPrivate; +} + +void +intel_context_destroy(struct brw_context *brw) +{ + __DRIcontext *driContextPriv; + + driContextPriv = brw->driContext; + intelDestroyContext(driContextPriv); + free(driContextPriv); +} + struct intel_buffer { __DRIbuffer base; drm_intel_bo *bo; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 737f0be6d62..895103470ee 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2570,6 +2570,12 @@ struct gl_uniform_block */ GLuint Binding; + /** + * Vulkan descriptor set and index qualifiers for this block. + */ + GLuint Set; + GLuint Index; + /** * Minimum size (in bytes) of a buffer object to back this uniform buffer * (GL_UNIFORM_BLOCK_DATA_SIZE). diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am new file mode 100644 index 00000000000..b131ac13897 --- /dev/null +++ b/src/vulkan/Makefile.am @@ -0,0 +1,67 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +lib_LTLIBRARIES = libvulkan.la + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include + +libvulkan_la_CFLAGS = \ + -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g \ + -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init + +libvulkan_la_CXXFLAGS = \ + -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g + +libvulkan_la_SOURCES = \ + private.h \ + gem.c \ + device.c \ + aub.c \ + allocator.c \ + util.c \ + pipeline.c \ + image.c \ + meta.c \ + intel.c \ + compiler.cpp + +bin_PROGRAMS = vk + +vk_SOURCES = vk.c +vk_LDADD = libvulkan.la -lpng16 + +libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c new file mode 100644 index 00000000000..67abaa45464 --- /dev/null +++ b/src/vulkan/allocator.c @@ -0,0 +1,499 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "private.h" + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) +{ + union anv_free_list current, next, old; + + current = *list; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + next.offset = *(uint32_t *)(*map + current.offset); + next.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, next.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) +{ + union anv_free_list current, old, new; + uint32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + *next_ptr = current.offset; + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +static int +anv_block_pool_grow(struct anv_block_pool *pool); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->size = 0; + pool->block_size = block_size; + pool->next_block = 0; + pool->free_list = ANV_FREE_LIST_EMPTY; + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + /* Immediately grow the pool so we'll have a backing bo. */ + anv_block_pool_grow(pool); +} + +/* The memfd path lets us create a map for an fd and lets us grow and remap + * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS + * path we can take for valgrind debugging. */ + +#define USE_MEMFD 0 + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + +#if USE_MEMFD + close(pool->fd); +#endif +} + +static int +anv_block_pool_grow(struct anv_block_pool *pool) +{ + size_t size; + void *map; + int gem_handle; + struct anv_mmap_cleanup *cleanup; + + if (pool->size == 0) { + size = 32 * pool->block_size; + } else { + size = pool->size * 2; + } + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + return -1; + *cleanup = ANV_MMAP_CLEANUP_INIT; + +#if USE_MEMFD + if (pool->size == 0) + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + + if (pool->fd == -1) + return -1; + + if (ftruncate(pool->fd, size) == -1) + return -1; + + /* First try to see if mremap can grow the map in place. */ + map = MAP_FAILED; + if (pool->size > 0) + map = mremap(pool->map, pool->size, size, 0); + if (map == MAP_FAILED) { + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, 0); + cleanup->map = map; + cleanup->size = size; + } + if (map == MAP_FAILED) + return -1; +#else + /* The MAP_ANONYMOUS fallback can't grow without races, so just bail here + * if we're trying to grow the pool. */ + assert(pool->size == 0); + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); + if (map == MAP_FAILED) + return -1; + cleanup->map = map; + cleanup->size = size; +#endif + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + return -1; + cleanup->gem_handle = gem_handle; + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + + /* Write size last and after the memory barrier here. We need the memory + * barrier to make sure map and gem_handle are written before other threads + * see the new size. A thread could allocate a block and then go try using + * the old pool->map and access out of bounds. */ + + __sync_synchronize(); + pool->size = size; + + return 0; +} + +uint32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + uint32_t offset, block, size; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) + return offset; + + restart: + size = pool->size; + block = __sync_fetch_and_add(&pool->next_block, pool->block_size); + if (block < size) { + return block; + } else if (block == size) { + /* We allocated the first block outside the pool, we have to grow it. + * pool->next_block acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + anv_block_pool_grow(pool); + futex_wake(&pool->size, INT_MAX); + } else { + futex_wait(&pool->size, size); + __sync_fetch_and_add(&pool->next_block, -pool->block_size); + goto restart; + } + + return block; +} + +void +anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset) +{ + anv_free_list_push(&pool->free_list, pool->map, offset); +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + uint32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) + return offset; + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + new.next = anv_block_pool_alloc(block_pool); + new.end = new.next + block_pool->block_size; + old.u64 = __sync_fetch_and_add(&pool->block.u64, new.u64 - block.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return new.next; + } else { + futex_wait(&pool->block.end, block.end); + __sync_fetch_and_add(&pool->block.u64, -pool->state_size); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct stream_block { + uint32_t next; +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->next = 0; + stream->end = 0; + stream->current_block = NULL_BLOCK; +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + struct stream_block *sb; + uint32_t block, next_block; + + block = stream->current_block; + while (block != 1) { + sb = stream->block_pool->map + block; + next_block = sb->next; + anv_block_pool_free(stream->block_pool, block); + block = next_block; + } +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct stream_block *sb; + struct anv_state state; + uint32_t block; + + state.offset = ALIGN_U32(stream->next, alignment); + if (state.offset + size > stream->end) { + block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; + sb->next = stream->current_block; + stream->current_block = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + state.offset = ALIGN_U32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + stream->next = state.offset + size; + + state.alloc_size = size; + state.map = stream->block_pool->map + state.offset; + + return state; +} diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c new file mode 100644 index 00000000000..5e66aa839e3 --- /dev/null +++ b/src/vulkan/aub.c @@ -0,0 +1,292 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "private.h" +#include "aub.h" + +struct anv_aub_writer { + FILE *file; + uint32_t offset; + int gen; +}; + +static void +aub_out(struct anv_aub_writer *writer, uint32_t data) +{ + fwrite(&data, 1, 4, writer->file); +} + +static void +aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) +{ + fwrite(data, 1, size, writer->file); +} + +static struct anv_aub_writer * +get_anv_aub_writer(struct anv_device *device) +{ + struct anv_aub_writer *writer = device->aub_writer; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + const char *filename; + + if (geteuid() != getuid()) + return NULL; + + if (writer) + return writer; + + writer = malloc(sizeof(*writer)); + if (writer == NULL) + return NULL; + + filename = "intel.aub"; + writer->gen = device->info.gen; + writer->file = fopen(filename, "w+"); + if (!writer->file) { + free(writer); + return NULL; + } + + /* Start allocating objects from just after the GTT. */ + writer->offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(writer, CMD_AUB_HEADER | (13 - 2)); + aub_out(writer, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(writer, 0); /* app name */ + } + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT_ENTRY | + AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, 0); /* subtype */ + aub_out(writer, 0); /* offset */ + aub_out(writer, gtt_size); /* size */ + if (writer->gen >= 8) + aub_out(writer, 0); + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(writer, entry); + } + + return device->aub_writer = writer; +} + +void +anv_aub_writer_destroy(struct anv_aub_writer *writer) +{ + fclose(writer->file); + free(writer); +} + + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, + void *virtual, uint32_t size, uint32_t gtt_offset) +{ + uint32_t block_size; + uint32_t offset; + uint32_t subtype = 0; + static const char null_block[8 * 4096]; + + for (offset = 0; offset < size; offset += block_size) { + block_size = size - offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | + type | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, subtype); + aub_out(writer, gtt_offset + offset); + aub_out(writer, ALIGN_U32(block_size, 4)); + if (writer->gen >= 8) + aub_out(writer, 0); + + if (virtual) + aub_out_data(writer, (char *) virtual + offset, block_size); + else + aub_out_data(writer, null_block, block_size); + + /* Pad to a multiple of 4 bytes. */ + aub_out_data(writer, null_block, -block_size & 3); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(struct anv_aub_writer *writer, + uint32_t batch_offset, uint32_t offset, + int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + else if (ring_flag == I915_EXEC_BLT) + ring = AUB_TRACE_TYPE_RING_PRB2; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + if (writer->gen >= 8) { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); + ringbuffer[ring_count++] = batch_offset; + ringbuffer[ring_count++] = 0; + } else { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_offset; + } + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(writer, 0); /* general/surface subtype */ + aub_out(writer, offset); + aub_out(writer, ring_count * 4); + if (writer->gen >= 8) + aub_out(writer, 0); + + /* FIXME: Need some flush operations here? */ + aub_out_data(writer, ringbuffer, ring_count * 4); +} + +struct aub_bo { + uint32_t offset; + void *map; + void *relocated; +}; + +static void +relocate_bo(struct anv_bo *bo, struct anv_reloc_list *list, struct aub_bo *bos) +{ + struct aub_bo *aub_bo = &bos[bo->index]; + struct drm_i915_gem_relocation_entry *reloc; + uint32_t *dw; + + aub_bo->relocated = malloc(bo->size); + memcpy(aub_bo->relocated, aub_bo->map, bo->size); + for (size_t i = 0; i < list->num_relocs; i++) { + reloc = &list->relocs[i]; + assert(reloc->offset < bo->size); + dw = aub_bo->relocated + reloc->offset; + *dw = bos[reloc->target_handle].offset + reloc->delta; + } +} + +void +anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_aub_writer *writer; + struct anv_bo *bo; + uint32_t ring_flag = 0; + uint32_t offset, length; + struct aub_bo *aub_bos; + + writer = get_anv_aub_writer(device); + if (writer == NULL) + return; + + aub_bos = malloc(cmd_buffer->bo_count * sizeof(aub_bos[0])); + offset = writer->offset; + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (bo->map) + aub_bos[i].map = bo->map; + else + aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); + aub_bos[i].relocated = aub_bos[i].map; + aub_bos[i].offset = offset; + offset = ALIGN_U32(offset + bo->size + 4095, 4096); + } + + relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos); + relocate_bo(&device->surface_state_block_pool.bo, + &batch->surf_relocs, aub_bos); + + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (i == cmd_buffer->bo_count - 1) { + length = batch->next - batch->bo.map; + aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, + aub_bos[i].relocated, + length, aub_bos[i].offset); + } else { + aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, + aub_bos[i].relocated, + bo->size, aub_bos[i].offset); + } + if (aub_bos[i].relocated != aub_bos[i].map) + free(aub_bos[i].relocated); + if (aub_bos[i].map != bo->map) + anv_gem_munmap(aub_bos[i].map, bo->size); + } + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(writer, aub_bos[batch->bo.index].offset, + offset, ring_flag); + + free(aub_bos); + + fflush(writer->file); +} diff --git a/src/vulkan/aub.h b/src/vulkan/aub.h new file mode 100644 index 00000000000..7a67712ff9c --- /dev/null +++ b/src/vulkan/aub.h @@ -0,0 +1,153 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ + +/** + * aub_state_struct_type enum values are encoded with the top 16 bits + * representing the type to be delivered to the .aub file, and the bottom 16 + * bits representing the subtype. This macro performs the encoding. + */ +#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) + +enum aub_state_struct_type { + AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), + AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), + AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), + AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), + AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), + AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), + AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), + AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), + AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), + AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), + AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), + AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), + AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), + + AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), + AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), + AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), + + AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), + AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), + AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), + AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), + AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), +}; + +#undef ENCODE_SS_TYPE + +/** + * Decode a aub_state_struct_type value to determine the type that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) +{ + return (ss_type & 0xFFFF0000) >> 16; +} + +/** + * Decode a state_struct_type value to determine the subtype that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) +{ + return ss_type & 0xFFFF; +} + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp new file mode 100644 index 00000000000..d7428d8a877 --- /dev/null +++ b/src/vulkan/compiler.cpp @@ -0,0 +1,931 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include /* brw_new_shader_program is here */ + +#include +#include + +#include +#include +#include +#include + +#include "private.h" + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static VkResult +set_binding_table_layout(struct brw_stage_prog_data *prog_data, + struct anv_pipeline *pipeline, uint32_t stage) +{ + uint32_t count, bias, set, *map; + + struct anv_pipeline_layout_entry *entries; + + if (stage == VK_SHADER_STAGE_FRAGMENT) + bias = MAX_RTS; + else + bias = 0; + + count = pipeline->layout->stage[stage].count; + entries = pipeline->layout->stage[stage].entries; + + prog_data->map_entries = + (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); + if (prog_data->map_entries == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + set = 0; + map = prog_data->map_entries; + for (uint32_t i = 0; i < count; i++) { + if (entries[i].set == set) { + prog_data->bind_map[set] = map; + set++; + } + *map++ = bias + i; + } + + return VK_SUCCESS; +} + +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_program *prog = (struct gl_program *) vp; + + memset(key, 0, sizeof(*key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key->base.program_string_id = vp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + vp->program.Base.UsesClipDistanceOut); + + /* _NEW_POLYGON */ + if (brw->gen < 6) { + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + } + + if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | + VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { + /* _NEW_LIGHT | _NEW_BUFFERS */ + key->clamp_vertex_color = ctx->Light._ClampVertexColor; + } + + /* _NEW_POINT */ + if (brw->gen < 6 && ctx->Point.PointSprite) { + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key->point_coord_replace |= (1 << i); + } + } + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, + &key->base.tex); +} + +static bool +really_do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_compile c; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base; + void *mem_ctx; + struct gl_shader *vs = NULL; + + if (prog) + vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + memset(prog_data, 0, sizeof(*prog_data)); + + mem_ctx = ralloc_context(NULL); + + c.vp = vp; + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (vs) { + /* We add padding around uniform values below vec4 size, with the worst + * case being a float value that gets blown up to a vec4, so be + * conservative here. + */ + param_count = vs->num_uniform_components * 4; + + } else { + param_count = vp->program.Base.Parameters->NumParameters * 4; + } + /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip + * planes as uniforms. + */ + param_count += c.key.base.nr_userclip_plane_consts * 4; + + /* Setting nr_params here NOT to the size of the param and pull_param + * arrays, but to the number of uniform components vec4_visitor + * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. + */ + stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; + if (vs) { + stage_prog_data->nr_params += vs->num_samplers; + } + + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; + prog_data->inputs_read = vp->program.Base.InputsRead; + + if (c.key.copy_edgeflag) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + prog_data->inputs_read |= VERT_BIT_EDGEFLAG; + } + + if (brw->gen < 6) { + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (c.key.point_coord_replace & (1 << i)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + /* In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ + if (c.key.base.userclip_active) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + brw_compute_vue_map(brw->intelScreen->devinfo, + &prog_data->base.vue_map, outputs_written); +\ + set_binding_table_layout(&prog_data->base.base, pipeline, + VK_SHADER_STAGE_VERTEX); + + /* Emit GEN4 code. + */ + program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + pipeline->vs_simd8 = pipeline->program_next; + memcpy((char *) pipeline->device->instruction_block_pool.map + + pipeline->vs_simd8, program, program_size); + + pipeline->program_next = align(pipeline->program_next + program_size, 64); + + ralloc_free(mem_ctx); + + if (stage_prog_data->total_scratch > 0) + if (!anv_bo_init_new(&pipeline->vs_scratch_bo, + pipeline->device, + stage_prog_data->total_scratch)) + return false; + + + return true; +} + +void brw_wm_populate_key(struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct gl_program *prog = (struct gl_program *) brw->fragment_program; + GLuint lookup = 0; + GLuint line_aa; + bool program_uses_dfdy = fp->program.UsesDFdy; + struct gl_framebuffer draw_buffer; + bool multisample_fbo; + + memset(key, 0, sizeof(*key)); + + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. */ + key->tex.swizzles[i] = SWIZZLE_XYZW; + } + + /* A non-zero framebuffer name indicates that the framebuffer was created by + * the user rather than the window system. */ + draw_buffer.Name = 1; + draw_buffer.Visual.samples = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer.Width = 400; + draw_buffer.Height = 400; + ctx->DrawBuffer = &draw_buffer; + + multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + + /* Build the index for table lookup + */ + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + key->line_aa = line_aa; + + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, + &key->tex); + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. + */ + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + } + + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } + + /* The unique fragment program ID */ + key->program_string_id = fp->id; + + ctx->DrawBuffer = NULL; +} + +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) +{ + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + +static bool +really_do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct gl_shader *fs = NULL; + unsigned int program_size; + const uint32_t *program; + uint32_t offset; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + memset(prog_data, 0, sizeof(*prog_data)); + + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. + */ + prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; + + prog_data->computed_depth_mode = computed_depth_mode(&fp->program); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (fs) { + param_count = fs->num_uniform_components; + } else { + param_count = fp->program.Base.Parameters->NumParameters * 4; + } + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + prog_data->base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.nr_params = param_count; + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw, key->flat_shade, + key->persample_shading, + &fp->program); + + set_binding_table_layout(&prog_data->base, pipeline, + VK_SHADER_STAGE_FRAGMENT); + /* This needs to come after shader time and pull constant entries, but we + * don't have those set up now, so just put it after the layout entries. + */ + prog_data->binding_table.render_target_start = 0; + + program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + offset = pipeline->program_next; + pipeline->program_next = align(pipeline->program_next + program_size, 64); + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + else + pipeline->ps_simd16 = NO_KERNEL; + + memcpy((char *) pipeline->device->instruction_block_pool.map + + offset, program, program_size); + + ralloc_free(mem_ctx); + + if (prog_data->base.total_scratch > 0) + if (!anv_bo_init_new(&pipeline->ps_scratch_bo, + pipeline->device, + prog_data->base.total_scratch)) + return false; + + return true; +} + +static void +brw_gs_populate_key(struct brw_context *brw, + struct anv_pipeline *pipeline, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_stage_state *stage_state = &brw->gs.base; + struct gl_program *prog = &gp->program.Base; + + memset(key, 0, sizeof(*key)); + + key->base.program_string_id = gp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + gp->program.Base.UsesClipDistanceOut); + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, + &key->base.tex); + + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + + /* BRW_NEW_VUE_MAP_VS */ + key->input_varyings = prog_data->base.vue_map.slots_valid; +} + +static bool +really_do_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct brw_gs_compile_output output; + uint32_t offset; + + /* FIXME: We pass the bind map to the compile in the output struct. Need + * something better. */ + set_binding_table_layout(&output.prog_data.base.base, + pipeline, VK_SHADER_STAGE_GEOMETRY); + + brw_compile_gs_prog(brw, prog, gp, key, &output); + + offset = pipeline->program_next; + pipeline->program_next = align(pipeline->program_next + output.program_size, 64); + + pipeline->gs_vec4 = offset; + pipeline->gs_vertex_count = gp->program.VerticesIn; + + memcpy((char *) pipeline->device->instruction_block_pool.map + + offset, output.program, output.program_size); + + ralloc_free(output.mem_ctx); + + if (output.prog_data.base.base.total_scratch) { + if (!anv_bo_init_new(&pipeline->gs_scratch_bo, + pipeline->device, + output.prog_data.base.base.total_scratch)) + return false; + } + + memcpy(&pipeline->gs_prog_data, &output.prog_data, sizeof pipeline->gs_prog_data); + + return true; +} + +static void +fail_on_compile_error(int status, const char *msg) +{ + int source, line, column; + char error[256]; + + if (status) + return; + + if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4) + fail_if(!status, "%d:%s\n", line, error); + else + fail_if(!status, "%s\n", msg); +} + +struct anv_compiler { + struct intel_screen *screen; + struct brw_context *brw; +}; + + +extern "C" { + +struct anv_compiler * +anv_compiler_create(int fd) +{ + struct anv_compiler *compiler; + + compiler = (struct anv_compiler *) malloc(sizeof *compiler); + if (compiler == NULL) + return NULL; + + compiler->screen = intel_screen_create(fd); + if (compiler->screen == NULL) { + free(compiler); + return NULL; + } + + compiler->brw = intel_context_create(compiler->screen); + if (compiler->brw == NULL) { + free(compiler); + return NULL; + } + + compiler->brw->precompile = false; + + return compiler; +} + +void +anv_compiler_destroy(struct anv_compiler *compiler) +{ + intel_context_destroy(compiler->brw); + intel_screen_destroy(compiler->screen); + free(compiler); +} + +/* From gen7_urb.c */ + +/* FIXME: Add to struct intel_device_info */ + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + unsigned vs_size = pipeline->vs_prog_data.base.urb_entry_size; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->gs_vec4 != NO_KERNEL; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static const struct { + uint32_t token; + const char *name; +} stage_info[] = { + { GL_VERTEX_SHADER, "vertex" }, + { GL_TESS_CONTROL_SHADER, "tess control" }, + { GL_TESS_EVALUATION_SHADER, "tess evaluation" }, + { GL_GEOMETRY_SHADER, "geometry" }, + { GL_FRAGMENT_SHADER, "fragment" }, + { GL_COMPUTE_SHADER, "compute" }, +}; + +static void +anv_compile_shader(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct gl_shader *shader; + int name = 0; + + shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); + shader->Source = strdup(pipeline->shaders[stage]->data); + _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); + fail_on_compile_error(shader->CompileStatus, shader->InfoLog); + + program->Shaders[program->NumShaders] = shader; + program->NumShaders++; +} + +int +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) +{ + struct gl_shader_program *program; + int name = 0; + struct brw_context *brw = compiler->brw; + struct anv_device *device = pipeline->device; + + brw->use_rep_send = pipeline->use_repclear; + brw->no_simd8 = pipeline->use_repclear; + + program = brw->ctx.Driver.NewShaderProgram(name); + program->Shaders = (struct gl_shader **) + calloc(VK_NUM_SHADER_STAGE, sizeof(struct gl_shader *)); + fail_if(program == NULL || program->Shaders == NULL, + "failed to create program\n"); + + /* FIXME: Only supports vs and fs combo at the moment */ + assert(pipeline->shaders[VK_SHADER_STAGE_VERTEX]); + assert(pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]); + + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_VERTEX); + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_FRAGMENT); + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_GEOMETRY); + + _mesa_glsl_link_shader(&brw->ctx, program); + fail_on_compile_error(program->LinkStatus, + program->InfoLog); + + pipeline->program_block = + anv_block_pool_alloc(&device->instruction_block_pool); + pipeline->program_next = pipeline->program_block; + + + bool success; + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; + + + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; + + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { + struct brw_gs_prog_key gs_key; + struct gl_geometry_program *gp = (struct gl_geometry_program *) + program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; + struct brw_geometry_program *bgp = brw_geometry_program(gp); + + brw_gs_populate_key(brw, pipeline, bgp, &gs_key); + + success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); + fail_if(!success, "do_gs_prog failed\n"); + pipeline->active_stages = VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base; + + } else { + pipeline->gs_vec4 = NO_KERNEL; + pipeline->active_stages = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + } + + + /* FIXME: Allocate more blocks if we fill up this one and worst case, + * allocate multiple continuous blocks from end of pool to hold really big + * programs. */ + assert(pipeline->program_next - pipeline->program_block < 8192); + + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + + gen7_compute_urb_partition(pipeline); + + return 0; +} + +/* This badly named function frees the struct anv_pipeline data that the compiler + * allocates. Currently just the prog_data structs. + */ +void +anv_compiler_free(struct anv_pipeline *pipeline) +{ + struct anv_device *device = pipeline->device; + + for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) + if (pipeline->prog_data[stage]) + free(pipeline->prog_data[stage]->map_entries); + + anv_block_pool_free(&device->instruction_block_pool, + pipeline->program_block); +} + +} diff --git a/src/vulkan/device.c b/src/vulkan/device.c new file mode 100644 index 00000000000..09b21e50c7c --- /dev/null +++ b/src/vulkan/device.c @@ -0,0 +1,2634 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +static int +anv_env_get_int(const char *name) +{ + const char *val = getenv(name); + + if (!val) + return 0; + + return strtol(val, NULL, 0); +} + +static VkResult +fill_physical_device(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + int fd; + + fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_error(VK_ERROR_UNAVAILABLE); + + device->instance = instance; + device->path = path; + + device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE"); + device->no_hw = false; + if (device->chipset_id) { + /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ + device->no_hw = true; + } else { + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + } + if (!device->chipset_id) + goto fail; + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id, -1); + if (!device->info) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + goto fail; + + close(fd); + + return VK_SUCCESS; + + fail: + close(fd); + + return vk_error(VK_ERROR_UNAVAILABLE); +} + +static void *default_alloc( + void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return malloc(size); +} + +static void default_free( + void* pUserData, + void* pMem) +{ + free(pMem); +} + +static const VkAllocCallbacks default_alloc_callbacks = { + .pUserData = NULL, + .pfnAlloc = default_alloc, + .pfnFree = default_free +}; + +VkResult VKAPI vkCreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + VkInstance* pInstance) +{ + struct anv_instance *instance; + const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; + void *user_data = NULL; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + if (pCreateInfo->pAllocCb) { + alloc_callbacks = pCreateInfo->pAllocCb; + user_data = pCreateInfo->pAllocCb->pUserData; + } + instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->pAllocUserData = alloc_callbacks->pUserData; + instance->pfnAlloc = alloc_callbacks->pfnAlloc; + instance->pfnFree = alloc_callbacks->pfnFree; + instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; + + instance->physicalDeviceCount = 0; + result = fill_physical_device(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result == VK_SUCCESS) + instance->physicalDeviceCount++; + + *pInstance = (VkInstance) instance; + + return VK_SUCCESS; +} + +VkResult VKAPI vkDestroyInstance( + VkInstance _instance) +{ + struct anv_instance *instance = (struct anv_instance *) _instance; + + instance->pfnFree(instance->pAllocUserData, instance); + + return VK_SUCCESS; +} + +VkResult VKAPI vkEnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + struct anv_instance *instance = (struct anv_instance *) _instance; + + if (*pPhysicalDeviceCount >= 1) + pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice; + *pPhysicalDeviceCount = instance->physicalDeviceCount; + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetPhysicalDeviceInfo( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceInfoType infoType, + size_t* pDataSize, + void* pData) +{ + struct anv_physical_device *device = (struct anv_physical_device *) physicalDevice; + VkPhysicalDeviceProperties *properties; + VkPhysicalDevicePerformance *performance; + VkPhysicalDeviceQueueProperties *queue_properties; + VkPhysicalDeviceMemoryProperties *memory_properties; + uint64_t ns_per_tick = 80; + + switch (infoType) { + case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES: + properties = pData; + assert(*pDataSize >= sizeof(*properties)); + *pDataSize = sizeof(*properties); /* Assuming we have to return the size of our struct. */ + + properties->apiVersion = 1; + properties->driverVersion = 1; + properties->vendorId = 0x8086; + properties->deviceId = device->chipset_id; + properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + strcpy(properties->deviceName, device->name); + properties->maxInlineMemoryUpdateSize = 0; + properties->maxBoundDescriptorSets = 0; + properties->maxThreadGroupSize = 0; + properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick; + properties->multiColorAttachmentClears = 0; + properties->maxDescriptorSets = 2; + properties->maxViewports = 16; + properties->maxColorAttachments = 8; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE: + performance = pData; + assert(*pDataSize >= sizeof(*performance)); + *pDataSize = sizeof(*performance); /* Assuming we have to return the size of our struct. */ + + performance->maxDeviceClock = 1.0; + performance->aluPerClock = 1.0; + performance->texPerClock = 1.0; + performance->primsPerClock = 1.0; + performance->pixelsPerClock = 1.0; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES: + queue_properties = pData; + assert(*pDataSize >= sizeof(*queue_properties)); + *pDataSize = sizeof(*queue_properties); + + queue_properties->queueFlags = 0; + queue_properties->queueCount = 1; + queue_properties->maxAtomicCounters = 0; + queue_properties->supportsTimestamps = 0; + queue_properties->maxMemReferences = 0; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES: + memory_properties = pData; + assert(*pDataSize >= sizeof(*memory_properties)); + *pDataSize = sizeof(*memory_properties); + + memory_properties->supportsMigration = false; + memory_properties->supportsPinning = false; + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } + +} + +void * vkGetProcAddr( + VkPhysicalDevice physicalDevice, + const char* pName) +{ + return NULL; +} + +static void +parse_debug_flags(struct anv_device *device) +{ + const char *debug, *p, *end; + + debug = getenv("INTEL_DEBUG"); + device->dump_aub = false; + if (debug) { + for (p = debug; *p; p = end + 1) { + end = strchrnul(p, ','); + if (end - p == 3 && memcmp(p, "aub", 3) == 0) + device->dump_aub = true; + if (end - p == 5 && memcmp(p, "no_hw", 5) == 0) + device->no_hw = true; + if (*end == '\0') + break; + } + } +} + +VkResult VKAPI vkCreateDevice( + VkPhysicalDevice _physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + VkDevice* pDevice) +{ + struct anv_physical_device *physicalDevice = + (struct anv_physical_device *) _physicalDevice; + struct anv_instance *instance = physicalDevice->instance; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + device = instance->pfnAlloc(instance->pAllocUserData, + sizeof(*device), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->no_hw = physicalDevice->no_hw; + parse_debug_flags(device); + + device->instance = physicalDevice->instance; + device->fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC); + if (device->fd == -1) + goto fail_device; + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) + goto fail_fd; + + anv_block_pool_init(&device->dyn_state_block_pool, device, 2048); + + anv_state_pool_init(&device->dyn_state_pool, + &device->dyn_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 2048); + anv_block_pool_init(&device->surface_state_block_pool, device, 2048); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + device->compiler = anv_compiler_create(device->fd); + device->aub_writer = NULL; + + device->info = *physicalDevice->info; + + pthread_mutex_init(&device->mutex, NULL); + + *pDevice = (VkDevice) device; + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_device_free(device, device); + + return vk_error(VK_ERROR_UNAVAILABLE); +} + +VkResult VKAPI vkDestroyDevice( + VkDevice _device) +{ + struct anv_device *device = (struct anv_device *) _device; + + anv_compiler_destroy(device->compiler); + + anv_block_pool_finish(&device->dyn_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + + close(device->fd); + + if (device->aub_writer) + anv_aub_writer_destroy(device->aub_writer); + + anv_device_free(device, device); + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetGlobalExtensionInfo( + VkExtensionInfoType infoType, + uint32_t extensionIndex, + size_t* pDataSize, + void* pData) +{ + uint32_t *count; + + switch (infoType) { + case VK_EXTENSION_INFO_TYPE_COUNT: + count = pData; + assert(*pDataSize == 4); + *count = 0; + return VK_SUCCESS; + + case VK_EXTENSION_INFO_TYPE_PROPERTIES: + return vk_error(VK_ERROR_INVALID_EXTENSION); + + default: + return VK_UNSUPPORTED; + } +} + +VkResult VKAPI vkGetPhysicalDeviceExtensionInfo( + VkPhysicalDevice physicalDevice, + VkExtensionInfoType infoType, + uint32_t extensionIndex, + size_t* pDataSize, + void* pData) +{ + uint32_t *count; + + switch (infoType) { + case VK_EXTENSION_INFO_TYPE_COUNT: + count = pData; + assert(*pDataSize == 4); + *count = 0; + return VK_SUCCESS; + + case VK_EXTENSION_INFO_TYPE_PROPERTIES: + return vk_error(VK_ERROR_INVALID_EXTENSION); + + default: + return VK_UNSUPPORTED; + } +} + +VkResult VKAPI vkEnumerateLayers( + VkPhysicalDevice physicalDevice, + size_t maxStringSize, + size_t* pLayerCount, + char* const* pOutLayers, + void* pReserved) +{ + *pLayerCount = 0; + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_queue *queue; + + /* FIXME: Should allocate these at device create time. */ + + queue = anv_device_alloc(device, sizeof(*queue), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (queue == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + queue->device = device; + queue->pool = &device->surface_state_pool; + + queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); + *(uint32_t *)queue->completed_serial.map = 0; + queue->next_serial = 1; + + *pQueue = (VkQueue) queue; + + return VK_SUCCESS; +} + +static const uint32_t BATCH_SIZE = 8192; + +VkResult +anv_batch_init(struct anv_batch *batch, struct anv_device *device) +{ + VkResult result; + + result = anv_bo_init_new(&batch->bo, device, BATCH_SIZE); + if (result != VK_SUCCESS) + return result; + + batch->bo.map = + anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE); + if (batch->bo.map == NULL) { + anv_gem_close(device, batch->bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + batch->cmd_relocs.num_relocs = 0; + batch->surf_relocs.num_relocs = 0; + batch->next = batch->bo.map; + + return VK_SUCCESS; +} + +void +anv_batch_finish(struct anv_batch *batch, struct anv_device *device) +{ + anv_gem_munmap(batch->bo.map, BATCH_SIZE); + anv_gem_close(device, batch->bo.gem_handle); +} + +void +anv_batch_reset(struct anv_batch *batch) +{ + batch->next = batch->bo.map; + batch->cmd_relocs.num_relocs = 0; + batch->surf_relocs.num_relocs = 0; +} + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + void *p = batch->next; + + batch->next += num_dwords * 4; + + return p; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, + struct anv_reloc_list *other, uint32_t offset) +{ + uint32_t i, count; + + count = list->num_relocs; + memcpy(&list->relocs[count], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[count], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + for (i = 0; i < other->num_relocs; i++) + list->relocs[i + count].offset += offset; + + count += other->num_relocs; +} + +static uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, + uint32_t offset, + struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + assert(list->num_relocs < ANV_BATCH_MAX_RELOCS); + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + + return target_bo->offset + delta; +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->bo.map; + memcpy(batch->next, other->bo.map, size); + + offset = batch->next - batch->bo.map; + anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset); + anv_reloc_list_append(&batch->surf_relocs, &other->surf_relocs, offset); + + batch->next += size; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(&batch->cmd_relocs, + location - batch->bo.map, bo, delta); +} + +VkResult VKAPI vkQueueSubmit( + VkQueue _queue, + uint32_t cmdBufferCount, + const VkCmdBuffer* pCmdBuffers, + VkFence fence) +{ + struct anv_queue *queue = (struct anv_queue *) _queue; + struct anv_device *device = queue->device; + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) pCmdBuffers[0]; + int ret; + + assert(cmdBufferCount == 1); + + if (device->dump_aub) + anv_cmd_buffer_dump(cmd_buffer); + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); + if (ret != 0) + goto fail; + + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) + cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; + } else { + *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; + } + + return VK_SUCCESS; + + fail: + pthread_mutex_unlock(&device->mutex); + + return vk_error(VK_ERROR_UNKNOWN); +} + +VkResult VKAPI vkQueueAddMemReferences( + VkQueue queue, + uint32_t count, + const VkDeviceMemory* pMems) +{ + return VK_SUCCESS; +} + +VkResult vkQueueRemoveMemReferences( + VkQueue queue, + uint32_t count, + const VkDeviceMemory* pMems) +{ + return VK_SUCCESS; +} + +VkResult VKAPI vkQueueWaitIdle( + VkQueue _queue) +{ + struct anv_queue *queue = (struct anv_queue *) _queue; + + return vkDeviceWaitIdle((VkDevice) queue->device); +} + +VkResult VKAPI vkDeviceWaitIdle( + VkDevice _device) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_state state; + struct anv_batch batch; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo *bo = NULL; + VkResult result; + int64_t timeout; + int ret; + + state = anv_state_pool_alloc(&device->dyn_state_pool, 32, 32); + bo = &device->dyn_state_pool.block_pool->bo; + batch.next = state.map; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = state.offset; + execbuf.batch_len = batch.next - state.map; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + } + + anv_state_pool_free(&device->dyn_state_pool, state); + + return VK_SUCCESS; + + fail: + anv_state_pool_free(&device->dyn_state_pool, state); + + return result; +} + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return device->instance->pfnAlloc(device->instance->pAllocUserData, + size, + alignment, + allocType); +} + +void +anv_device_free(struct anv_device * device, + void * mem) +{ + return device->instance->pfnFree(device->instance->pAllocUserData, + mem); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + + return VK_SUCCESS; +} + +VkResult VKAPI vkAllocMemory( + VkDevice _device, + const VkMemoryAllocInfo* pAllocInfo, + VkDeviceMemory* pMem) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize); + if (result != VK_SUCCESS) + goto fail; + + *pMem = (VkDeviceMemory) mem; + + return VK_SUCCESS; + + fail: + anv_device_free(device, mem); + + return result; +} + +VkResult VKAPI vkFreeMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_device_free(device, mem); + + return VK_SUCCESS; +} + +VkResult VKAPI vkSetMemoryPriority( + VkDevice device, + VkDeviceMemory mem, + VkMemoryPriority priority) +{ + return VK_SUCCESS; +} + +VkResult VKAPI vkMapMemory( + VkDevice _device, + VkDeviceMemory _mem, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size); + mem->map_size = size; + + *ppData = mem->map; + + return VK_SUCCESS; +} + +VkResult VKAPI vkUnmapMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + anv_gem_munmap(mem->map, mem->map_size); + + return VK_SUCCESS; +} + +VkResult VKAPI vkFlushMappedMemory( + VkDevice device, + VkDeviceMemory mem, + VkDeviceSize offset, + VkDeviceSize size) +{ + /* clflush here for !llc platforms */ + + return VK_SUCCESS; +} + +VkResult VKAPI vkPinSystemMemory( + VkDevice device, + const void* pSysMem, + size_t memSize, + VkDeviceMemory* pMem) +{ + return VK_SUCCESS; +} + +VkResult VKAPI vkGetMultiDeviceCompatibility( + VkPhysicalDevice physicalDevice0, + VkPhysicalDevice physicalDevice1, + VkPhysicalDeviceCompatibilityInfo* pInfo) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenSharedMemory( + VkDevice device, + const VkMemoryOpenInfo* pOpenInfo, + VkDeviceMemory* pMem) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenSharedSemaphore( + VkDevice device, + const VkSemaphoreOpenInfo* pOpenInfo, + VkSemaphore* pSemaphore) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenPeerMemory( + VkDevice device, + const VkPeerMemoryOpenInfo* pOpenInfo, + VkDeviceMemory* pMem) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenPeerImage( + VkDevice device, + const VkPeerImageOpenInfo* pOpenInfo, + VkImage* pImage, + VkDeviceMemory* pMem) +{ + return VK_UNSUPPORTED; +} + +static VkResult +anv_instance_destructor(struct anv_device * device, + VkObject object) +{ + return vkDestroyInstance(object); +} + +static VkResult +anv_noop_destructor(struct anv_device * device, + VkObject object) +{ + return VK_SUCCESS; +} + +static VkResult +anv_device_destructor(struct anv_device * device, + VkObject object) +{ + return vkDestroyDevice(object); +} + +static VkResult +anv_cmd_buffer_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_batch_finish(&cmd_buffer->batch, device); + anv_device_free(device, cmd_buffer->exec2_objects); + anv_device_free(device, cmd_buffer->exec2_bos); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_pipeline *pipeline = (struct anv_pipeline *) object; + + return anv_pipeline_destroy(pipeline); +} + +static VkResult +anv_free_destructor(struct anv_device * device, + VkObject object) +{ + anv_device_free(device, (void *) object); + + return VK_SUCCESS; +} + +static VkResult (*anv_object_destructors[])(struct anv_device *device, + VkObject object) = { + [VK_OBJECT_TYPE_INSTANCE] = anv_instance_destructor, + [VK_OBJECT_TYPE_PHYSICAL_DEVICE] = anv_noop_destructor, + [VK_OBJECT_TYPE_DEVICE] = anv_device_destructor, + [VK_OBJECT_TYPE_QUEUE] = anv_noop_destructor, + [VK_OBJECT_TYPE_COMMAND_BUFFER] = anv_cmd_buffer_destructor, + [VK_OBJECT_TYPE_PIPELINE] = anv_pipeline_destructor, + [VK_OBJECT_TYPE_SHADER] = anv_free_destructor, + [VK_OBJECT_TYPE_BUFFER] = anv_free_destructor, + [VK_OBJECT_TYPE_IMAGE] = anv_free_destructor, + [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor +}; + +VkResult VKAPI vkDestroyObject( + VkDevice _device, + VkObjectType objType, + VkObject object) +{ + struct anv_device *device = (struct anv_device *) _device; + + assert(objType < ARRAY_SIZE(anv_object_destructors) && + anv_object_destructors[objType] != NULL); + + return anv_object_destructors[objType](device, object); +} + +static void +fill_memory_requirements( + VkObjectType objType, + VkObject object, + VkMemoryRequirements * memory_requirements) +{ + struct anv_buffer *buffer; + struct anv_image *image; + + memory_requirements->memPropsAllowed = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT | + /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ + VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT | + VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL | + VK_MEMORY_PROPERTY_SHAREABLE_BIT; + + memory_requirements->memPropsRequired = 0; + + switch (objType) { + case VK_OBJECT_TYPE_BUFFER: + buffer = (struct anv_buffer *) object; + memory_requirements->size = buffer->size; + memory_requirements->alignment = 16; + break; + case VK_OBJECT_TYPE_IMAGE: + image = (struct anv_image *) object; + memory_requirements->size = image->size; + memory_requirements->alignment = image->alignment; + break; + default: + memory_requirements->size = 0; + break; + } +} + +VkResult VKAPI vkGetObjectInfo( + VkDevice _device, + VkObjectType objType, + VkObject object, + VkObjectInfoType infoType, + size_t* pDataSize, + void* pData) +{ + VkMemoryRequirements memory_requirements; + + switch (infoType) { + case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS: + fill_memory_requirements(objType, object, &memory_requirements); + memcpy(pData, &memory_requirements, + MIN2(*pDataSize, sizeof(memory_requirements))); + *pDataSize = sizeof(memory_requirements); + return VK_SUCCESS; + + case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT: + default: + return VK_UNSUPPORTED; + } + +} + +VkResult VKAPI vkQueueBindObjectMemory( + VkQueue queue, + VkObjectType objType, + VkObject object, + uint32_t allocationIdx, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + struct anv_buffer *buffer; + struct anv_image *image; + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + switch (objType) { + case VK_OBJECT_TYPE_BUFFER: + buffer = (struct anv_buffer *) object; + buffer->mem = mem; + buffer->offset = memOffset; + break; + case VK_OBJECT_TYPE_IMAGE: + image = (struct anv_image *) object; + image->mem = mem; + image->offset = memOffset; + break; + default: + break; + } + + return VK_SUCCESS; +} + +VkResult VKAPI vkQueueBindObjectMemoryRange( + VkQueue queue, + VkObjectType objType, + VkObject object, + uint32_t allocationIdx, + VkDeviceSize rangeOffset, + VkDeviceSize rangeSize, + VkDeviceMemory mem, + VkDeviceSize memOffset) +{ + return VK_UNSUPPORTED; +} + +VkResult vkQueueBindImageMemoryRange( + VkQueue queue, + VkImage image, + uint32_t allocationIdx, + const VkImageMemoryBindInfo* pBindInfo, + VkDeviceMemory mem, + VkDeviceSize memOffset) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkCreateFence( + VkDevice device, + const VkFenceCreateInfo* pCreateInfo, + VkFence* pFence) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkResetFences( + VkDevice device, + uint32_t fenceCount, + VkFence* pFences) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkGetFenceStatus( + VkDevice device, + VkFence fence) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkWaitForFences( + VkDevice device, + uint32_t fenceCount, + const VkFence* pFences, + bool32_t waitAll, + uint64_t timeout) +{ + return VK_UNSUPPORTED; +} + +// Queue semaphore functions + +VkResult VKAPI vkCreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + VkSemaphore* pSemaphore) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkQueueSignalSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkQueueWaitSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + return VK_UNSUPPORTED; +} + +// Event functions + +VkResult VKAPI vkCreateEvent( + VkDevice device, + const VkEventCreateInfo* pCreateInfo, + VkEvent* pEvent) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkGetEventStatus( + VkDevice device, + VkEvent event) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkSetEvent( + VkDevice device, + VkEvent event) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkResetEvent( + VkDevice device, + VkEvent event) +{ + return VK_UNSUPPORTED; +} + +// Query functions + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult VKAPI vkCreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + VkQueryPool* pQueryPool) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_query_pool *pool; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->type = pCreateInfo->queryType; + result = anv_bo_init_new(&pool->bo, device, pCreateInfo->slots * 16); + if (result != VK_SUCCESS) + goto fail; + + *pQueryPool = (VkQueryPool) pool; + + return VK_SUCCESS; + + fail: + anv_device_free(device, pool); + + return result; +} + +VkResult VKAPI vkGetQueryPoolResults( + VkDevice device, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + size_t* pDataSize, + void* pData, + VkQueryResultFlags flags) +{ + return VK_UNSUPPORTED; +} + +// Format capabilities + +VkResult VKAPI vkGetFormatInfo( + VkDevice device, + VkFormat format, + VkFormatInfoType infoType, + size_t* pDataSize, + void* pData) +{ + return VK_UNSUPPORTED; +} + +// Buffer functions + +VkResult VKAPI vkCreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + VkBuffer* pBuffer) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_device_alloc(device, sizeof(*buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->mem = NULL; + buffer->offset = 0; + + *pBuffer = (VkBuffer) buffer; + + return VK_SUCCESS; +} + +// Buffer view functions + +VkResult VKAPI vkCreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_buffer_view *view; + const struct anv_format *format; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->buffer = (struct anv_buffer *) pCreateInfo->buffer; + view->offset = pCreateInfo->offset; + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + format = anv_format_for_vk_format(pCreateInfo->format); + /* This assumes RGBA float format. */ + uint32_t stride = 4; + uint32_t num_elements = pCreateInfo->range / stride; + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format->format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = 0, /* FIXME: MOCS */ + .BaseMipLevel = 0, + .SurfaceQPitch = 0, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, view->buffer->offset + view->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + + *pView = (VkImageView) view; + + return VK_SUCCESS; +} + +// Sampler functions + +struct anv_sampler { + uint32_t state[4]; +}; + +VkResult VKAPI vkCreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_SAMPLER_STATE sampler_state = { + .SamplerDisable = 0, + .TextureBorderColorMode = 0, + .LODPreClampMode = 0, + .BaseMipLevel = 0, + .MipModeFilter = 0, + .MagModeFilter = 0, + .MinModeFilter = 0, + .TextureLODBias = 0, + .AnisotropicAlgorithm = 0, + .MinLOD = 0, + .MaxLOD = 0, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = 0, + .CubeSurfaceControlMode = 0, + .IndirectStatePointer = 0, + .LODClampMagnificationMode = 0, + .MaximumAnisotropy = 0, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = 0, + .TCYAddressControlMode = 0, + .TCZAddressControlMode = 0, + }; + + GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = (VkSampler) sampler; + + return VK_SUCCESS; +} + +// Descriptor set functions + +VkResult VKAPI vkCreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + VkDescriptorSetLayout* pSetLayout) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_descriptor_set_layout *set_layout; + uint32_t count, k; + size_t size, total; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + count = 0; + for (uint32_t i = 0; i < pCreateInfo->count; i++) + count += pCreateInfo->pBinding[i].count; + + size = sizeof(*set_layout) + + count * sizeof(set_layout->bindings[0]); + set_layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + k = 0; + total = 0; + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { + set_layout->bindings[k].mask = pCreateInfo->pBinding[i].stageFlags; + set_layout->bindings[k].type = pCreateInfo->pBinding[i].descriptorType; + k++; + } + + total += pCreateInfo->pBinding[i].count * + __builtin_popcount(pCreateInfo->pBinding[i].stageFlags); + } + + set_layout->total = total; + set_layout->count = count; + + *pSetLayout = (VkDescriptorSetLayout) set_layout; + + return VK_SUCCESS; +} + +VkResult VKAPI vkBeginDescriptorPoolUpdate( + VkDevice device, + VkDescriptorUpdateMode updateMode) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkEndDescriptorPoolUpdate( + VkDevice device, + VkCmdBuffer cmd) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkCreateDescriptorPool( + VkDevice device, + VkDescriptorPoolUsage poolUsage, + uint32_t maxSets, + const VkDescriptorPoolCreateInfo* pCreateInfo, + VkDescriptorPool* pDescriptorPool) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkAllocDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorSetUsage setUsage, + uint32_t count, + const VkDescriptorSetLayout* pSetLayouts, + VkDescriptorSet* pDescriptorSets, + uint32_t* pCount) +{ + struct anv_device *device = (struct anv_device *) _device; + const struct anv_descriptor_set_layout *layout; + struct anv_descriptor_set *set; + size_t size; + + for (uint32_t i = 0; i < count; i++) { + layout = (struct anv_descriptor_set_layout *) pSetLayouts[i]; + size = sizeof(*set) + layout->total * sizeof(set->descriptors[0]); + set = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set) { + *pCount = i; + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pDescriptorSets[i] = (VkDescriptorSet) set; + } + + *pCount = count; + + return VK_UNSUPPORTED; +} + +void VKAPI vkClearDescriptorSets( + VkDevice device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ +} + +void VKAPI vkUpdateDescriptors( + VkDevice _device, + VkDescriptorSet descriptorSet, + uint32_t updateCount, + const void** ppUpdateArray) +{ + struct anv_descriptor_set *set = (struct anv_descriptor_set *) descriptorSet; + VkUpdateSamplers *update_samplers; + VkUpdateSamplerTextures *update_sampler_textures; + VkUpdateImages *update_images; + VkUpdateBuffers *update_buffers; + VkUpdateAsCopy *update_as_copy; + + for (uint32_t i = 0; i < updateCount; i++) { + const struct anv_common *common = ppUpdateArray[i]; + + switch (common->sType) { + case VK_STRUCTURE_TYPE_UPDATE_SAMPLERS: + update_samplers = (VkUpdateSamplers *) common; + + for (uint32_t j = 0; j < update_samplers->count; j++) { + set->descriptors[update_samplers->binding + j] = + (void *) update_samplers->pSamplers[j]; + } + break; + + case VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES: + /* FIXME: Shouldn't this be *_UPDATE_SAMPLER_IMAGES? */ + update_sampler_textures = (VkUpdateSamplerTextures *) common; + + for (uint32_t j = 0; j < update_sampler_textures->count; j++) { + set->descriptors[update_sampler_textures->binding + j] = + (void *) update_sampler_textures->pSamplerImageViews[j].pImageView->view; + } + break; + + case VK_STRUCTURE_TYPE_UPDATE_IMAGES: + update_images = (VkUpdateImages *) common; + + for (uint32_t j = 0; j < update_images->count; j++) { + set->descriptors[update_images->binding + j] = + (void *) update_images->pImageViews[j].view; + } + break; + + case VK_STRUCTURE_TYPE_UPDATE_BUFFERS: + update_buffers = (VkUpdateBuffers *) common; + + for (uint32_t j = 0; j < update_buffers->count; j++) { + set->descriptors[update_buffers->binding + j] = + (void *) update_buffers->pBufferViews[j].view; + } + /* FIXME: descriptor arrays? */ + break; + + case VK_STRUCTURE_TYPE_UPDATE_AS_COPY: + update_as_copy = (VkUpdateAsCopy *) common; + (void) update_as_copy; + break; + + default: + break; + } + } +} + +// State object functions + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +VkResult VKAPI vkCreateDynamicViewportState( + VkDevice _device, + const VkDynamicVpStateCreateInfo* pCreateInfo, + VkDynamicVpState* pState) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_vp_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + unsigned count = pCreateInfo->viewportAndScissorCount; + state->sf_clip_vp = anv_state_pool_alloc(&device->dyn_state_pool, + count * 64, 64); + state->cc_vp = anv_state_pool_alloc(&device->dyn_state_pool, + count * 8, 32); + state->scissor = anv_state_pool_alloc(&device->dyn_state_pool, + count * 32, 32); + + for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { + const VkViewport *vp = &pCreateInfo->pViewports[i]; + const VkRect *s = &pCreateInfo->pScissors[i]; + + struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->originX + vp->width / 2, + .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->originX, + .XMaxViewPort = vp->originX + vp->width - 1, + .YMinViewPort = vp->originY, + .YMaxViewPort = vp->originY + vp->height - 1, + }; + + struct GEN8_CC_VIEWPORT cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN8_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN8_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); + GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); + } else { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); + } + } + + *pState = (VkDynamicVpState) state; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDynamicRasterState( + VkDevice _device, + const VkDynamicRsStateCreateInfo* pCreateInfo, + VkDynamicRsState* pState) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Missing these: + * float depthBias; + * float depthBiasClamp; + * float slopeScaledDepthBias; + * float pointFadeThreshold; + * // optional (GL45) - Size of point fade threshold + */ + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + .PointWidth = pCreateInfo->pointSize, + }; + + GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + + *pState = (VkDynamicRsState) state; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDynamicColorBlendState( + VkDevice _device, + const VkDynamicCbStateCreateInfo* pCreateInfo, + VkDynamicCbState* pState) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_cb_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *pState = (VkDynamicCbState) state; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDynamicDepthStencilState( + VkDevice device, + const VkDynamicDsStateCreateInfo* pCreateInfo, + VkDynamicDsState* pState) +{ + return VK_UNSUPPORTED; +} + +// Command buffer functions + +VkResult VKAPI vkCreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->device = device; + + result = anv_batch_init(&cmd_buffer->batch, device); + if (result != VK_SUCCESS) + goto fail; + + cmd_buffer->exec2_objects = + anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer->exec2_objects == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_batch; + } + + cmd_buffer->exec2_bos = + anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_bos[0]), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer->exec2_bos == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_exec2_objects; + } + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + + cmd_buffer->dirty = 0; + cmd_buffer->vb_dirty = 0; + + *pCmdBuffer = (VkCmdBuffer) cmd_buffer; + + return VK_SUCCESS; + + fail_exec2_objects: + anv_device_free(device, cmd_buffer->exec2_objects); + fail_batch: + anv_batch_finish(&cmd_buffer->batch, device); + fail: + anv_device_free(device, cmd_buffer); + + return result; +} + +VkResult VKAPI vkBeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_device *device = cmd_buffer->device; + + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP); + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { NULL, 0 }, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { &device->surface_state_block_pool.bo, 0 }, + .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dyn_state_block_pool.bo, 0 }, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = true); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + + /* Hardcoded state: */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .Width = 1, + .Height = 1, + .SurfaceFormat = D16_UNORM, + .SurfaceBaseAddress = { NULL, 0 }, + .HierarchicalDepthBufferEnable = 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_DEPTH_STENCIL, + .DepthTestEnable = false, + .DepthBufferWriteEnable = false); + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, struct anv_reloc_list *list) +{ + struct drm_i915_gem_exec_object2 *obj; + + bo->index = cmd_buffer->bo_count; + obj = &cmd_buffer->exec2_objects[bo->index]; + cmd_buffer->exec2_bos[bo->index] = bo; + cmd_buffer->bo_count++; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + + if (list) { + obj->relocation_count = list->num_relocs; + obj->relocs_ptr = (uintptr_t) list->relocs; + } +} + +static void +anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo, *batch_bo; + + batch_bo = &cmd_buffer->batch.bo; + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + /* Skip any relocations targeting the batch bo. We need to make sure + * it's the last in the list so we'll add it manually later. + */ + if (bo == batch_bo) + continue; + if (bo->index < cmd_buffer->bo_count && cmd_buffer->exec2_bos[bo->index] == bo) + continue; + + anv_cmd_buffer_add_bo(cmd_buffer, bo, NULL); + } +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +VkResult VKAPI vkEndCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((batch->next - batch->bo.map) & 4) + anv_batch_emit(batch, GEN8_MI_NOOP); + + cmd_buffer->bo_count = 0; + cmd_buffer->need_reloc = false; + + /* Lock for access to bo->index. */ + pthread_mutex_lock(&device->mutex); + + /* Add block pool bos first so we can add them with their relocs. */ + anv_cmd_buffer_add_bo(cmd_buffer, &device->surface_state_block_pool.bo, + &batch->surf_relocs); + + anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs); + + cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; + cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count; + cmd_buffer->execbuf.batch_start_offset = 0; + cmd_buffer->execbuf.batch_len = batch->next - batch->bo.map; + cmd_buffer->execbuf.cliprects_ptr = 0; + cmd_buffer->execbuf.num_cliprects = 0; + cmd_buffer->execbuf.DR1 = 0; + cmd_buffer->execbuf.DR4 = 0; + + cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT; + if (!cmd_buffer->need_reloc) + cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC; + cmd_buffer->execbuf.flags |= I915_EXEC_RENDER; + cmd_buffer->execbuf.rsvd1 = device->context_id; + cmd_buffer->execbuf.rsvd2 = 0; + + pthread_mutex_unlock(&device->mutex); + + return VK_SUCCESS; +} + +VkResult VKAPI vkResetCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_batch_reset(&cmd_buffer->batch); + + return VK_SUCCESS; +} + +// Command buffer building functions + +void VKAPI vkCmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + cmd_buffer->pipeline = (struct anv_pipeline *) _pipeline; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; +} + +void VKAPI vkCmdBindDynamicStateObject( + VkCmdBuffer cmdBuffer, + VkStateBindPoint stateBindPoint, + VkDynamicStateObject dynamicState) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_dynamic_vp_state *vp_state; + + switch (stateBindPoint) { + case VK_STATE_BIND_POINT_VIEWPORT: + vp_state = (struct anv_dynamic_vp_state *) dynamicState; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + break; + case VK_STATE_BIND_POINT_RASTER: + cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState; + cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; + break; + case VK_STATE_BIND_POINT_COLOR_BLEND: + case VK_STATE_BIND_POINT_DEPTH_STENCIL: + break; + default: + break; + }; +} + +void VKAPI vkCmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + /* What are the semantics for setting descriptor sets? Assuming that + * setting preserves lower sets and invalidate higher sets. This means that + * we can set the number of active sets to firstSet + setCount. + */ + + for (uint32_t i = 0; i < setCount; i++) + cmd_buffer->descriptor_sets[firstSet + i] = + (struct anv_descriptor_set *) pDescriptorSets[i]; + + cmd_buffer->num_descriptor_sets = firstSet + setCount; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; +} + +void VKAPI vkCmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT8] = INDEX_BYTE, + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = 0, + .BufferStartingAddress = { &buffer->mem->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} + +void VKAPI vkCmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + for (uint32_t i = 0; i < bindingCount; i++) { + cmd_buffer->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; + cmd_buffer->vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->vb_dirty |= 1 << (startBinding + i); + } +} + +static void +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer; + + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + + uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + uint32_t count, *table; + struct anv_state table_state; + + if (layout) + count = layout->stage[s].count + bias; + else if (s == VK_SHADER_STAGE_FRAGMENT) + count = framebuffer->color_attachment_count; + else + count = 0; + + if (count == 0) + continue; + + table_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + count * 4, 32); + table = table_state.map; + + if (s == VK_SHADER_STAGE_FRAGMENT) { + for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { + struct anv_color_attachment_view *view = framebuffer->color_attachments[i]; + table[i] = view->surface_state.offset; + + /* Don't write the reloc back to the surface state. We do that at + * submit time. Surface address is dwords 8-9. */ + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + view->surface_state.offset + 8 * sizeof(int32_t), + &view->image->mem->bo, view->image->offset); + } + } + + if (layout) { + for (uint32_t i = 0; i < layout->stage[s].count; i++) { + struct anv_pipeline_layout_entry *e = &layout->stage[s].entries[i]; + struct anv_image_view *image_view; + struct anv_buffer_view *buffer_view; + void *d = cmd_buffer->descriptor_sets[e->set]->descriptors[e->index]; + + switch (e->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + image_view = d; + table[bias + i] = image_view->surface_state.offset; + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + image_view->surface_state.offset + 8 * sizeof(int32_t), + &image_view->image->mem->bo, + image_view->image->offset); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + /* FIXME: What are these? TBOs? */ + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + buffer_view = d; + table[bias + i] = buffer_view->surface_state.offset; + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + buffer_view->surface_state.offset + 8 * sizeof(int32_t), + &buffer_view->buffer->mem->bo, + buffer_view->buffer->offset + buffer_view->offset); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + break; + default: + break; + } + } + } + + /* FIXME: Samplers */ + + /* The binding table pointer commands all have the same structure, only + * the opcode differs. + */ + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = opcodes[s], + .PointertoVSBindingTable = table_state.offset); + } +} + +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->pipeline; + const uint32_t num_buffers = __builtin_popcount(cmd_buffer->vb_dirty); + const uint32_t num_dwords = 1 + num_buffers * 4; + uint32_t *p; + + if (cmd_buffer->vb_dirty) { + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, cmd_buffer->vb_dirty) { + struct anv_buffer *buffer = cmd_buffer->vb[vb].buffer; + uint32_t offset = cmd_buffer->vb[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = 0, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { &buffer->mem->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { + /* maybe: anv_batch_merge(batch, GEN8_3DSTATE_SF, a, b) */ + uint32_t *dw; + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_3DSTATE_SF_length); + for (uint32_t i = 0; i < GEN8_3DSTATE_SF_length; i++) + dw[i] = cmd_buffer->rs_state->state_sf[i] | pipeline->state_sf[i]; + } + + cmd_buffer->vb_dirty = 0; + cmd_buffer->dirty = 0; +} + +void VKAPI vkCmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void VKAPI vkCmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void VKAPI vkCmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + struct anv_bo *bo = &buffer->mem->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void VKAPI vkCmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + struct anv_bo *bo = &buffer->mem->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void VKAPI vkCmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ +} + +void VKAPI vkCmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer buffer, + VkDeviceSize offset) +{ +} + +void VKAPI vkCmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipeEvent pipeEvent) +{ +} + +void VKAPI vkCmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipeEvent pipeEvent) +{ +} + +void VKAPI vkCmdWaitEvents( + VkCmdBuffer cmdBuffer, + VkWaitEvent waitEvent, + uint32_t eventCount, + const VkEvent* pEvents, + uint32_t memBarrierCount, + const void** ppMemBarriers) +{ +} + +void VKAPI vkCmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkWaitEvent waitEvent, + uint32_t pipeEventCount, + const VkPipeEvent* pPipeEvents, + uint32_t memBarrierCount, + const void** ppMemBarriers) +{ +} + +static void +anv_batch_emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void VKAPI vkCmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + break; + + default: + break; + } +} + +void VKAPI vkCmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16 + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + break; + + default: + break; + } +} + +void VKAPI vkCmdResetQueryPool( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount) +{ +} + +#define TIMESTAMP 0x44070 + +void VKAPI vkCmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + struct anv_bo *bo = &buffer->mem->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +void VKAPI vkCmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ +} + +void VKAPI vkCmdInitAtomicCounters( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t startCounter, + uint32_t counterCount, + const uint32_t* pData) +{ +} + +void VKAPI vkCmdLoadAtomicCounters( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t startCounter, + uint32_t counterCount, + VkBuffer srcBuffer, + VkDeviceSize srcOffset) +{ +} + +void VKAPI vkCmdSaveAtomicCounters( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t startCounter, + uint32_t counterCount, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ +} + +VkResult VKAPI vkCreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + VkFramebuffer* pFramebuffer) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount; + for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { + framebuffer->color_attachments[i] = + (struct anv_color_attachment_view *) pCreateInfo->pColorAttachments[i].view; + } + + if (pCreateInfo->pDepthStencilAttachment) { + framebuffer->depth_stencil = + (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view; + } + + framebuffer->sample_count = pCreateInfo->sampleCount; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + *pFramebuffer = (VkFramebuffer) framebuffer; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + VkRenderPass* pRenderPass) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_render_pass *pass; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + pass = anv_device_alloc(device, sizeof(*pass), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pass->render_area = pCreateInfo->renderArea; + + *pRenderPass = (VkRenderPass) pass; + + return VK_SUCCESS; +} + +void VKAPI vkCmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBegin* pRenderPassBegin) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass; + + cmd_buffer->framebuffer = (struct anv_framebuffer *) pRenderPassBegin->framebuffer; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = pass->render_area.offset.y, + .ClippedDrawingRectangleXMin = pass->render_area.offset.x, + .ClippedDrawingRectangleYMax = + pass->render_area.offset.y + pass->render_area.extent.height - 1, + .ClippedDrawingRectangleXMax = + pass->render_area.offset.x + pass->render_area.extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); +} + +void VKAPI vkCmdEndRenderPass( + VkCmdBuffer cmdBuffer, + VkRenderPass renderPass) +{ +} diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c new file mode 100644 index 00000000000..5cc5e5d8e84 --- /dev/null +++ b/src/vulkan/gem.c @@ -0,0 +1,283 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include + +#include "private.h" + +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#else +#define VG(x) +#endif + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, int gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + VG_CLEAR(gem_mmap); + gem_mmap.handle = gem_handle; + gem_mmap.offset = offset; + gem_mmap.size = size; + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); +} + +int +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + if (ret == -1) + return -errno; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + int gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = I915_TILING_X; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(struct anv_device *device, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +int +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h new file mode 100644 index 00000000000..c15afe9b266 --- /dev/null +++ b/src/vulkan/gen8_pack.h @@ -0,0 +1,8702 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for BDW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include +#include + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#define GEN8_3DSTATE_URB_VS_length 0x00000002 +#define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 31) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VS_length 0x00000009 +#define GEN8_3DSTATE_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t AccessesUAV; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t SIMD8DispatchEnable; + uint32_t VertexCacheDisable; + uint32_t FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 2, 2) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +struct GEN8_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddressHigh; +}; + +static inline void +GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddressHigh, dw1); + +} + +#define GEN8_MI_ATOMIC_length 0x00000003 +#define GEN8_MI_ATOMIC_length_bias 0x00000002 +#define GEN8_MI_ATOMIC_header \ + .CommandType = 0, \ + .MICommandOpcode = 47 + +struct GEN8_MI_ATOMIC { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; + uint32_t PostSyncOperation; +#define DWORD 0 +#define QWORD 1 +#define OCTWORD 2 +#define RESERVED 3 + uint32_t DataSize; + uint32_t InlineData; + uint32_t CSSTALL; + uint32_t ReturnDataControl; + uint32_t ATOMICOPCODE; + uint32_t DwordLength; + __gen_address_type MemoryAddress; + uint32_t MemoryAddressHigh; + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; +}; + +static inline void +GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ATOMIC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->DataSize, 19, 20) | + __gen_field(values->InlineData, 18, 18) | + __gen_field(values->CSSTALL, 17, 17) | + __gen_field(values->ReturnDataControl, 16, 16) | + __gen_field(values->ATOMICOPCODE, 8, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); + + dw[2] = + __gen_field(values->MemoryAddressHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->Operand1DataDword0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Operand2DataDword0, 0, 31) | + 0; + + dw[5] = + __gen_field(values->Operand1DataDword1, 0, 31) | + 0; + + dw[6] = + __gen_field(values->Operand2DataDword1, 0, 31) | + 0; + + dw[7] = + __gen_field(values->Operand1DataDword2, 0, 31) | + 0; + + dw[8] = + __gen_field(values->Operand2DataDword2, 0, 31) | + 0; + + dw[9] = + __gen_field(values->Operand1DataDword3, 0, 31) | + 0; + + dw[10] = + __gen_field(values->Operand2DataDword3, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 +#define GEN8_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +struct GEN8_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 +#define GEN8_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_SIGNAL_header \ + .CommandType = 0, \ + .MICommandOpcode = 27, \ + .DwordLength = 0 + +struct GEN8_MI_SEMAPHORE_SIGNAL { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t PostSyncOperation; +#define RCS 0 +#define VCS0 1 +#define BCS 2 +#define VECS 3 +#define VCS1 4 + uint32_t TargetEngineSelect; + uint32_t DwordLength; + uint32_t TargetContextID; +}; + +static inline void +GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->TargetEngineSelect, 15, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->TargetContextID, 0, 31) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 +#define GEN8_MI_SEMAPHORE_WAIT_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_WAIT_header \ + .CommandType = 0, \ + .MICommandOpcode = 28, \ + .DwordLength = 2 + +struct GEN8_MI_SEMAPHORE_WAIT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; +#define PollingMode 1 +#define SignalMode 0 + uint32_t WaitMode; +#define SAD_GREATER_THAN_SDD 0 +#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 +#define SAD_LESS_THAN_SDD 2 +#define SAD_LESS_THAN_OR_EQUAL_SDD 3 +#define SAD_EQUAL_SDD 4 +#define SAD_NOT_EQUAL_SDD 5 + uint32_t CompareOperation; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; + __gen_address_type SemaphoreAddress; +}; + +static inline void +GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->WaitMode, 15, 15) | + __gen_field(values->CompareOperation, 12, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + +} + +#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 +#define GEN8_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 2 + +struct GEN8_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_PIPELINE_SELECT_length 0x00000001 +#define GEN8_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN8_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +struct GEN8_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 +#define GEN8_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 14 + +struct GEN8_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + uint32_t GeneralStateMemoryObjectControlState; + uint32_t GeneralStateBaseAddressModifyEnable; + uint32_t StatelessDataPortAccessMemoryObjectControlState; + __gen_address_type SurfaceStateBaseAddress; + uint32_t SurfaceStateMemoryObjectControlState; + uint32_t SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + uint32_t DynamicStateMemoryObjectControlState; + uint32_t DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + uint32_t IndirectObjectMemoryObjectControlState; + uint32_t IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + uint32_t InstructionMemoryObjectControlState; + uint32_t InstructionBaseAddressModifyEnable; + uint32_t GeneralStateBufferSize; + uint32_t GeneralStateBufferSizeModifyEnable; + uint32_t DynamicStateBufferSize; + uint32_t DynamicStateBufferSizeModifyEnable; + uint32_t IndirectObjectBufferSize; + uint32_t IndirectObjectBufferSizeModifyEnable; + uint32_t InstructionBufferSize; + uint32_t InstructionBuffersizeModifyEnable; +}; + +static inline void +GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + dw[3] = + /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + uint32_t dw4 = + /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + + uint32_t dw6 = + /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + + uint32_t dw8 = + /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + + uint32_t dw10 = + /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[10] = + __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + + dw[12] = + __gen_field(values->GeneralStateBufferSize, 12, 31) | + __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[13] = + __gen_field(values->DynamicStateBufferSize, 12, 31) | + __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[14] = + __gen_field(values->IndirectObjectBufferSize, 12, 31) | + __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | + 0; + + dw[15] = + __gen_field(values->InstructionBufferSize, 12, 31) | + __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | + 0; + +} + +#define GEN8_STATE_PREFETCH_length 0x00000002 +#define GEN8_STATE_PREFETCH_length_bias 0x00000002 +#define GEN8_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN8_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN8_STATE_SIP_length 0x00000003 +#define GEN8_STATE_SIP_length_bias 0x00000002 +#define GEN8_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 1 + +struct GEN8_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 63) | + 0; + +} + +#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 +#define GEN8_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN8_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + uint32_t SWTessellationMemoryObjectControlState; + __gen_address_type SWTessellationBaseAddressHigh; +}; + +static inline void +GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SWTessellationBaseAddressHigh, dw2); + +} + +#define GEN8_3DPRIMITIVE_length 0x00000007 +#define GEN8_3DPRIMITIVE_length_bias 0x00000002 +#define GEN8_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +struct GEN8_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AAPointCoverageBias; + uint32_t AACoverageBias; + uint32_t AAPointCoverageSlope; + uint32_t AACoverageSlope; + uint32_t AAPointCoverageEndCapBias; + uint32_t AACoverageEndCapBias; + uint32_t AAPointCoverageEndCapSlope; + uint32_t AACoverageEndCapSlope; +}; + +static inline void +GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AAPointCoverageBias, 24, 31) | + __gen_field(values->AACoverageBias, 16, 23) | + __gen_field(values->AAPointCoverageSlope, 8, 15) | + __gen_field(values->AACoverageSlope, 0, 7) | + 0; + + dw[2] = + __gen_field(values->AAPointCoverageEndCapBias, 24, 31) | + __gen_field(values->AACoverageEndCapBias, 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope, 8, 15) | + __gen_field(values->AACoverageEndCapSlope, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + uint32_t SurfaceObjectControlState; +#define NoValidData 0 + uint32_t BindingTablePoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + dw[3] = + __gen_field(values->BindingTablePoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; + uint32_t BlendStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_field(values->BlendStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 +#define GEN8_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; + uint32_t ColorCalcStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_field(values->ColorCalcStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 +#define GEN8_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN8_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 +#define GEN8_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN8_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float DepthClearValue; + uint32_t DepthClearValueValid; +}; + +static inline void +GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_float(values->DepthClearValue) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CLIP_length 0x00000004 +#define GEN8_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define Normal 0 +#define Force 1 + uint32_t ForceUserClipDistanceCullTestEnableBitmask; +#define _8Bit 0 +#define _4Bit 1 + uint32_t VertexSubPixelPrecisionSelect; + uint32_t EarlyCullEnable; +#define Normal 0 +#define Force 1 + uint32_t ForceUserClipDistanceClipTestEnableBitmask; +#define Normal 0 +#define Force 1 + uint32_t ForceClipMode; + uint32_t ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + uint32_t ClipEnable; +#define API_OGL 0 + uint32_t APIMode; + uint32_t ViewportXYClipTestEnable; + uint32_t GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define NORMAL 0 +#define REJECT_ALL 3 +#define ACCEPT_ALL 4 + uint32_t ClipMode; + uint32_t PerspectiveDivideDisable; + uint32_t NonPerspectiveBarycentricEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; + uint32_t MinimumPointWidth; + uint32_t MaximumPointWidth; + uint32_t ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | + __gen_field(values->ForceClipMode, 16, 16) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth, 17, 27) | + __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 +#define GEN8_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 6 + +struct GEN8_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t DepthWriteEnable; + uint32_t StencilWriteEnable; + uint32_t HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; + uint32_t Depth; + uint32_t MinimumArrayElement; + uint32_t DepthBufferObjectControlState; + uint32_t RenderTargetViewExtent; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + dw[5] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_DS_length 0x00000009 +#define GEN8_3DSTATE_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t AccessesUAV; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t SIMD8DispatchEnable; + uint32_t ComputeWCoordinateEnable; + uint32_t CacheDisable; + uint32_t FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 3, 3) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->CacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +struct GEN8_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +struct GEN8_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +struct GEN8_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +struct GEN8_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + uint32_t ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +struct GEN8_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + uint32_t ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + uint32_t GatherPoolEnable; + uint32_t MemoryObjectControlState; + uint32_t GatherPoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + dw[3] = + __gen_field(values->GatherPoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_GS_length 0x0000000a +#define GEN8_3DSTATE_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 8 + +struct GEN8_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t AccessesUAV; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ExpectedVertexCount; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + uint32_t IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamId; +#define DispatchModeSingle 0 +#define DispatchModeDualInstance 1 +#define DispatchModeDualObject 2 +#define DispatchModeSIMD8 3 + uint32_t DispatchMode; + uint32_t StatisticsEnable; + uint32_t InvocationsIncrementValue; + uint32_t IncludePrimitiveID; + uint32_t Hint; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + uint32_t DiscardAdjacency; + uint32_t Enable; +#define CUT 0 +#define SID 1 + uint32_t ControlDataFormat; + uint32_t StaticOutput; + uint32_t StaticOutputVertexCount; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->ExpectedVertexCount, 0, 5) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamId, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->InvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->Enable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_field(values->StaticOutput, 30, 30) | + __gen_field(values->StaticOutputVertexCount, 16, 26) | + 0; + + dw[9] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_HS_length 0x00000009 +#define GEN8_3DSTATE_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t Enable; + uint32_t StatisticsEnable; + uint32_t MaximumNumberofThreads; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + uint32_t AccessesUAV; + uint32_t IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; +}; + +static inline void +GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->MaximumNumberofThreads, 8, 16) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[5] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[7] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->AccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[8] = + 0; + +} + +#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 +#define GEN8_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t MemoryObjectControlState; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->IndexFormat, 8, 9) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + + dw[4] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 +#define GEN8_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + uint32_t LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 +#define GEN8_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN8_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 +#define GEN8_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelPositionOffsetEnable; +#define CENTER 0 +#define UL_CORNER 1 + uint32_t PixelLocation; + uint32_t NumberofMultisamples; +}; + +static inline void +GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelPositionOffsetEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PatternRow, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_PS_length 0x0000000c +#define GEN8_3DSTATE_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 10 + +struct GEN8_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; +#define FlushedtoZero 0 +#define Retained 1 + uint32_t SinglePrecisionDenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreadsPerPSD; + uint32_t PushConstantEnable; + uint32_t RenderTargetFastClearEnable; + uint32_t RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + uint32_t _32PixelDispatchEnable; + uint32_t _16PixelDispatchEnable; + uint32_t _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | + 0; + + dw[8] = + __gen_offset(values->KernelStartPointer1, 6, 63) | + 0; + + dw[10] = + __gen_offset(values->KernelStartPointer2, 6, 63) | + 0; + +} + +#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 +#define GEN8_3DSTATE_PS_BLEND_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_BLEND_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 77, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PS_BLEND { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AlphaToCoverageEnable; + uint32_t HasWriteableRT; + uint32_t ColorBufferBlendEnable; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t AlphaTestEnable; + uint32_t IndependentAlphaBlendEnable; +}; + +static inline void +GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_BLEND * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->HasWriteableRT, 30, 30) | + __gen_field(values->ColorBufferBlendEnable, 29, 29) | + __gen_field(values->SourceAlphaBlendFactor, 24, 28) | + __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | + __gen_field(values->SourceBlendFactor, 14, 18) | + __gen_field(values->DestinationBlendFactor, 9, 13) | + __gen_field(values->AlphaTestEnable, 8, 8) | + __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | + 0; + +} + +#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 +#define GEN8_3DSTATE_PS_EXTRA_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_EXTRA_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 79, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PS_EXTRA { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelShaderValid; + uint32_t PixelShaderDoesnotwritetoRT; + uint32_t oMaskPresenttoRenderTarget; + uint32_t PixelShaderKillsPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; + uint32_t ForceComputedDepth; + uint32_t PixelShaderUsesSourceDepth; + uint32_t PixelShaderUsesSourceW; + uint32_t Removed; + uint32_t AttributeEnable; + uint32_t PixelShaderDisablesAlphaToCoverage; + uint32_t PixelShaderIsPerSample; + uint32_t PixelShaderHasUAV; + uint32_t PixelShaderUsesInputCoverageMask; +}; + +static inline void +GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_EXTRA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelShaderValid, 31, 31) | + __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | + __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | + __gen_field(values->PixelShaderKillsPixel, 28, 28) | + __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | + __gen_field(values->ForceComputedDepth, 25, 25) | + __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | + __gen_field(values->PixelShaderUsesSourceW, 23, 23) | + __gen_field(values->Removed, 17, 17) | + __gen_field(values->AttributeEnable, 8, 8) | + __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | + __gen_field(values->PixelShaderIsPerSample, 6, 6) | + __gen_field(values->PixelShaderHasUAV, 2, 2) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 1, 1) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_RASTER_length 0x00000005 +#define GEN8_3DSTATE_RASTER_length_bias 0x00000002 +#define GEN8_3DSTATE_RASTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 80, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_RASTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DX9OGL 0 +#define DX100 1 +#define DX101 2 + uint32_t APIMode; +#define Clockwise 0 +#define CounterClockwise 1 + uint32_t FrontWinding; +#define NUMRASTSAMPLES_0 0 +#define NUMRASTSAMPLES_1 1 +#define NUMRASTSAMPLES_2 2 +#define NUMRASTSAMPLES_4 3 +#define NUMRASTSAMPLES_8 4 +#define NUMRASTSAMPLES_16 5 + uint32_t ForcedSampleCount; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; +#define Normal 0 +#define Force 1 + uint32_t ForceMultisampling; + uint32_t SmoothPointEnable; + uint32_t DXMultisampleRasterizationEnable; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t DXMultisampleRasterizationMode; + uint32_t GlobalDepthOffsetEnableSolid; + uint32_t GlobalDepthOffsetEnableWireframe; + uint32_t GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + uint32_t AntialiasingEnable; + uint32_t ScissorRectangleEnable; + uint32_t ViewportZClipTestEnable; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_RASTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->APIMode, 22, 23) | + __gen_field(values->FrontWinding, 21, 21) | + __gen_field(values->ForcedSampleCount, 18, 20) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ForceMultisampling, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | + __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->AntialiasingEnable, 2, 2) | + __gen_field(values->ScissorRectangleEnable, 1, 1) | + __gen_field(values->ViewportZClipTestEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[3] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 +#define GEN8_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 +#define GEN8_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_PATTERN_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_SAMPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t _8xSample7XOffset; + uint32_t _8xSample7YOffset; + uint32_t _8xSample6XOffset; + uint32_t _8xSample6YOffset; + uint32_t _8xSample5XOffset; + uint32_t _8xSample5YOffset; + uint32_t _8xSample4XOffset; + uint32_t _8xSample4YOffset; + uint32_t _8xSample3XOffset; + uint32_t _8xSample3YOffset; + uint32_t _8xSample2XOffset; + uint32_t _8xSample2YOffset; + uint32_t _8xSample1XOffset; + uint32_t _8xSample1YOffset; + uint32_t _8xSample0XOffset; + uint32_t _8xSample0YOffset; + uint32_t _4xSample3XOffset; + uint32_t _4xSample3YOffset; + uint32_t _4xSample2XOffset; + uint32_t _4xSample2YOffset; + uint32_t _4xSample1XOffset; + uint32_t _4xSample1YOffset; + uint32_t _4xSample0XOffset; + uint32_t _4xSample0YOffset; + uint32_t _1xSample0XOffset; + uint32_t _1xSample0YOffset; + uint32_t _2xSample1XOffset; + uint32_t _2xSample1YOffset; + uint32_t _2xSample0XOffset; + uint32_t _2xSample0YOffset; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + 0; + + dw[5] = + __gen_field(values->_8xSample7XOffset, 28, 31) | + __gen_field(values->_8xSample7YOffset, 24, 27) | + __gen_field(values->_8xSample6XOffset, 20, 23) | + __gen_field(values->_8xSample6YOffset, 16, 19) | + __gen_field(values->_8xSample5XOffset, 12, 15) | + __gen_field(values->_8xSample5YOffset, 8, 11) | + __gen_field(values->_8xSample4XOffset, 4, 7) | + __gen_field(values->_8xSample4YOffset, 0, 3) | + 0; + + dw[6] = + __gen_field(values->_8xSample3XOffset, 28, 31) | + __gen_field(values->_8xSample3YOffset, 24, 27) | + __gen_field(values->_8xSample2XOffset, 20, 23) | + __gen_field(values->_8xSample2YOffset, 16, 19) | + __gen_field(values->_8xSample1XOffset, 12, 15) | + __gen_field(values->_8xSample1YOffset, 8, 11) | + __gen_field(values->_8xSample0XOffset, 4, 7) | + __gen_field(values->_8xSample0YOffset, 0, 3) | + 0; + + dw[7] = + __gen_field(values->_4xSample3XOffset, 28, 31) | + __gen_field(values->_4xSample3YOffset, 24, 27) | + __gen_field(values->_4xSample2XOffset, 20, 23) | + __gen_field(values->_4xSample2YOffset, 16, 19) | + __gen_field(values->_4xSample1XOffset, 12, 15) | + __gen_field(values->_4xSample1YOffset, 8, 11) | + __gen_field(values->_4xSample0XOffset, 4, 7) | + __gen_field(values->_4xSample0YOffset, 0, 3) | + 0; + + dw[8] = + __gen_field(values->_1xSample0XOffset, 20, 23) | + __gen_field(values->_1xSample0YOffset, 16, 19) | + __gen_field(values->_2xSample1XOffset, 12, 15) | + __gen_field(values->_2xSample1YOffset, 8, 11) | + __gen_field(values->_2xSample0XOffset, 4, 7) | + __gen_field(values->_2xSample0YOffset, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_SBE_length 0x00000004 +#define GEN8_3DSTATE_SBE_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ForceVertexURBEntryReadLength; + uint32_t ForceVertexURBEntryReadOffset; + uint32_t NumberofSFOutputAttributes; + uint32_t AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t PrimitiveIDOverrideComponentW; + uint32_t PrimitiveIDOverrideComponentZ; + uint32_t PrimitiveIDOverrideComponentY; + uint32_t PrimitiveIDOverrideComponentX; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t PrimitiveIDOverrideAttributeSelect; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable; +}; + +static inline void +GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | + __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | + __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | + __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | + __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 5, 10) | + __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | + 0; + + dw[2] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ConstantInterpolationEnable, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b +#define GEN8_3DSTATE_SBE_SWIZ_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_SWIZ_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 81, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_SBE_SWIZ { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t Attribute; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute09WrapShortestEnables; + uint32_t Attribute08WrapShortestEnables; + uint32_t Attribute07WrapShortestEnables; + uint32_t Attribute06WrapShortestEnables; + uint32_t Attribute05WrapShortestEnables; + uint32_t Attribute04WrapShortestEnables; + uint32_t Attribute03WrapShortestEnables; + uint32_t Attribute02WrapShortestEnables; + uint32_t Attribute01WrapShortestEnables; + uint32_t Attribute00WrapShortestEnables; +}; + +static inline void +GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct Attribute: found SF_OUTPUT_ATTRIBUTE_DETAIL */ + 0; + + dw[9] = + __gen_field(values->Attribute15WrapShortestEnables, 60, 63) | + __gen_field(values->Attribute14WrapShortestEnables, 56, 59) | + __gen_field(values->Attribute13WrapShortestEnables, 52, 55) | + __gen_field(values->Attribute12WrapShortestEnables, 48, 51) | + __gen_field(values->Attribute11WrapShortestEnables, 44, 47) | + __gen_field(values->Attribute10WrapShortestEnables, 40, 43) | + __gen_field(values->Attribute09WrapShortestEnables, 36, 39) | + __gen_field(values->Attribute08WrapShortestEnables, 32, 35) | + __gen_field(values->Attribute07WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute06WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute05WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute04WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute03WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute02WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute01WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute00WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SF_length 0x00000004 +#define GEN8_3DSTATE_SF_length_bias 0x00000002 +#define GEN8_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t LegacyGlobalDepthBiasEnable; + uint32_t StatisticsEnable; + uint32_t ViewportTransformEnable; + uint32_t LineWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LastPixelEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t SmoothPointEnable; + uint32_t VertexSubPixelPrecisionSelect; +#define Vertex 0 +#define State 1 + uint32_t PointWidthSource; + uint32_t PointWidth; +}; + +static inline void +GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ViewportTransformEnable, 1, 1) | + 0; + + dw[2] = + __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->PointWidthSource, 11, 11) | + __gen_field(values->PointWidth, 0, 10) | + 0; + +} + +#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 +#define GEN8_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 6 + +struct GEN8_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferEnable; + uint32_t SOBufferIndex; + uint32_t SOBufferObjectControlState; + uint32_t StreamOffsetWriteEnable; + uint32_t StreamOutputBufferOffsetAddressEnable; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceSize; + __gen_address_type StreamOutputBufferOffsetAddress; + uint32_t StreamOffset; +}; + +static inline void +GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOBufferEnable, 31, 31) | + __gen_field(values->SOBufferIndex, 29, 30) | + /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->StreamOffsetWriteEnable, 21, 21) | + __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->SurfaceSize, 0, 29) | + 0; + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + + dw[7] = + __gen_field(values->StreamOffset, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +struct GEN8_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 +#define GEN8_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + uint32_t StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 +#define GEN8_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN8_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t APIRenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + uint32_t SOStatisticsEnable; +#define Normal 0 +#define Resreved 1 +#define Force_Off 2 +#define Force_on 3 + uint32_t ForceRendering; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; + uint32_t Buffer1SurfacePitch; + uint32_t Buffer0SurfacePitch; + uint32_t Buffer3SurfacePitch; + uint32_t Buffer2SurfacePitch; +}; + +static inline void +GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->APIRenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->ForceRendering, 23, 24) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + + dw[3] = + __gen_field(values->Buffer1SurfacePitch, 16, 27) | + __gen_field(values->Buffer0SurfacePitch, 0, 11) | + 0; + + dw[4] = + __gen_field(values->Buffer3SurfacePitch, 16, 27) | + __gen_field(values->Buffer2SurfacePitch, 0, 11) | + 0; + +} + +#define GEN8_3DSTATE_TE_length 0x00000004 +#define GEN8_3DSTATE_TE_length_bias 0x00000002 +#define GEN8_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define LINE 1 +#define TRI_CW 2 +#define TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + uint32_t TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN8_3DSTATE_URB_DS_length 0x00000002 +#define GEN8_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 31) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_GS_length 0x00000002 +#define GEN8_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 31) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_HS_length 0x00000002 +#define GEN8_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 31) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +struct GEN8_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +struct GEN8_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VF_length 0x00000002 +#define GEN8_3DSTATE_VF_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 +#define GEN8_3DSTATE_VF_INSTANCING_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_INSTANCING_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 73, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_VF_INSTANCING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t InstancingEnable; + uint32_t VertexElementIndex; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstancingEnable, 8, 8) | + __gen_field(values->VertexElementIndex, 0, 5) | + 0; + + dw[2] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 +#define GEN8_3DSTATE_VF_SGVS_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_SGVS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 74, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VF_SGVS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t InstanceIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t InstanceIDComponentNumber; + uint32_t InstanceIDElementOffset; + uint32_t VertexIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t VertexIDComponentNumber; + uint32_t VertexIDElementOffset; +}; + +static inline void +GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_SGVS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstanceIDEnable, 31, 31) | + __gen_field(values->InstanceIDComponentNumber, 29, 30) | + __gen_field(values->InstanceIDElementOffset, 16, 21) | + __gen_field(values->VertexIDEnable, 15, 15) | + __gen_field(values->VertexIDComponentNumber, 13, 14) | + __gen_field(values->VertexIDElementOffset, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 +#define GEN8_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN8_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +struct GEN8_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t StatisticsEnable; +}; + +static inline void +GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 +#define GEN8_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_TOPOLOGY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 75, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VF_TOPOLOGY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PrimitiveTopologyType; +}; + +static inline void +GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_length 0x00000002 +#define GEN8_3DSTATE_WM_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StatisticsEnable; + uint32_t LegacyDepthBufferClearEnable; + uint32_t LegacyDepthBufferResolveEnable; + uint32_t LegacyHierarchicalDepthBufferResolveEnable; + uint32_t LegacyDiamondLineRasterization; +#define NORMAL 0 +#define PSEXEC 1 +#define PREPS 2 + uint32_t EarlyDepthStencilControl; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceThreadDispatchEnable; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineAntialiasingRegionWidth; + uint32_t PolygonStippleEnable; + uint32_t LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceKillPixelEnable; +}; + +static inline void +GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | + __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | + __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->ForceThreadDispatchEnable, 19, 20) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->ForceKillPixelEnable, 0, 1) | + 0; + +} + +#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 +#define GEN8_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_CHROMAKEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 76, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_WM_CHROMAKEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyKillEnable; +}; + +static inline void +GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyKillEnable, 31, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 78, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_WM_DEPTH_STENCIL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t BackfaceStencilTestFunction; + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestFunction; + uint32_t DepthTestFunction; +#define False 0 +#define True 1 + uint32_t DoubleSidedStencilEnable; + uint32_t StencilTestEnable; + uint32_t StencilBufferWriteEnable; + uint32_t DepthTestEnable; + uint32_t DepthBufferWriteEnable; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; +}; + +static inline void +GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilFailOp, 29, 31) | + __gen_field(values->StencilPassDepthFailOp, 26, 28) | + __gen_field(values->StencilPassDepthPassOp, 23, 25) | + __gen_field(values->BackfaceStencilTestFunction, 20, 22) | + __gen_field(values->BackfaceStencilFailOp, 17, 19) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | + __gen_field(values->StencilTestFunction, 8, 10) | + __gen_field(values->DepthTestFunction, 5, 7) | + __gen_field(values->DoubleSidedStencilEnable, 4, 4) | + __gen_field(values->StencilTestEnable, 3, 3) | + __gen_field(values->StencilBufferWriteEnable, 2, 2) | + __gen_field(values->DepthTestEnable, 1, 1) | + __gen_field(values->DepthBufferWriteEnable, 0, 0) | + 0; + + dw[2] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 +#define GEN8_3DSTATE_WM_HZ_OP_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_HZ_OP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 82, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_WM_HZ_OP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferClearEnable; + uint32_t DepthBufferClearEnable; + uint32_t ScissorRectangleEnable; + uint32_t DepthBufferResolveEnable; + uint32_t HierarchicalDepthBufferResolveEnable; + uint32_t PixelPositionOffsetEnable; + uint32_t FullSurfaceDepthClear; + uint32_t StencilClearValue; + uint32_t NumberofMultisamples; + uint32_t ClearRectangleYMin; + uint32_t ClearRectangleXMin; + uint32_t ClearRectangleYMax; + uint32_t ClearRectangleXMax; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferClearEnable, 31, 31) | + __gen_field(values->DepthBufferClearEnable, 30, 30) | + __gen_field(values->ScissorRectangleEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->PixelPositionOffsetEnable, 26, 26) | + __gen_field(values->FullSurfaceDepthClear, 25, 25) | + __gen_field(values->StencilClearValue, 16, 23) | + __gen_field(values->NumberofMultisamples, 13, 15) | + 0; + + dw[2] = + __gen_field(values->ClearRectangleYMin, 16, 31) | + __gen_field(values->ClearRectangleXMin, 0, 15) | + 0; + + dw[3] = + __gen_field(values->ClearRectangleYMax, 16, 31) | + __gen_field(values->ClearRectangleXMax, 0, 15) | + 0; + + dw[4] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_GPGPU_WALKER_length 0x0000000f +#define GEN8_GPGPU_WALKER_length_bias 0x00000002 +#define GEN8_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 5, \ + .DwordLength = 13 + +struct GEN8_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t IndirectParameterEnable; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingResumeZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 6, 31) | + 0; + + dw[4] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[9] = + 0; + + dw[10] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | + 0; + + dw[12] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[13] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 +#define GEN8_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +struct GEN8_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +struct GEN8_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + uint32_t ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_GRPID_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_GRPID_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 6 + +struct GEN8_MEDIA_OBJECT_GRPID { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t EndofThreadGroup; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + uint32_t ScoreboardMask; + uint32_t GroupID; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->EndofThreadGroup, 23, 23) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->GroupID, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 +#define GEN8_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +struct GEN8_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; + uint32_t PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData; +}; + +static inline void +GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->InlineData, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +struct GEN8_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t GroupIDLoopSelect; + uint32_t ScoreboardMask; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->GroupIDLoopSelect, 8, 31) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 +#define GEN8_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN8_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +struct GEN8_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN8_MEDIA_VFE_STATE_length 0x00000009 +#define GEN8_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN8_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 7 + +struct GEN8_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t ScratchSpaceBasePointerHigh; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t SliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + 0; + + dw[4] = + __gen_field(values->SliceDisable, 0, 1) | + 0; + + dw[5] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[6] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[7] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[8] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN8_MI_ARB_CHECK_length 0x00000001 +#define GEN8_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN8_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +struct GEN8_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 +#define GEN8_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN8_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +struct GEN8_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 +#define GEN8_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN8_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 1 + +struct GEN8_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + uint32_t AddOffsetEnable; + uint32_t PredicationEnable; + uint32_t ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; + __gen_address_type BatchBufferStartAddressHigh; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BatchBufferStartAddressHigh, dw2); + +} + +#define GEN8_MI_CLFLUSH_length_bias 0x00000002 +#define GEN8_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +struct GEN8_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 1 + +struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; + __gen_address_type CompareAddressHigh; +}; + +static inline void +GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->CompareAddressHigh, dw3); + +} + +#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 +#define GEN8_MI_COPY_MEM_MEM_length_bias 0x00000002 +#define GEN8_MI_COPY_MEM_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 46, \ + .DwordLength = 3 + +struct GEN8_MI_COPY_MEM_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTSource; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTDestination; + uint32_t DwordLength; + __gen_address_type DestinationMemoryAddress; + __gen_address_type SourceMemoryAddress; +}; + +static inline void +GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_COPY_MEM_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTTSource, 22, 22) | + __gen_field(values->UseGlobalGTTDestination, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + +} + +#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 +#define GEN8_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +struct GEN8_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 +#define GEN8_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 2 + +struct GEN8_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +struct GEN8_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +struct GEN8_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; +#define NeverForward 0 +#define AlwaysForward 1 +#define ConditionallyForward 2 + uint32_t ScanLineEventDoneForward; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->ScanLineEventDoneForward, 17, 18) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 +#define GEN8_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 2 + +struct GEN8_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_MI_MATH_length_bias 0x00000002 +#define GEN8_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +struct GEN8_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MI_NOOP_length 0x00000001 +#define GEN8_MI_NOOP_length_bias 0x00000001 +#define GEN8_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +struct GEN8_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN8_MI_PREDICATE_length 0x00000001 +#define GEN8_MI_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +struct GEN8_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define KEEP 0 +#define LOAD 2 +#define LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN8_MI_REPORT_HEAD_length 0x00000001 +#define GEN8_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN8_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +struct GEN8_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_RS_CONTEXT_length 0x00000001 +#define GEN8_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN8_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +struct GEN8_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Restore 0 +#define Save 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_CONTROL_length 0x00000001 +#define GEN8_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN8_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +struct GEN8_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Stop 0 +#define Start 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 +#define GEN8_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +struct GEN8_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN8_MI_SET_CONTEXT_length 0x00000002 +#define GEN8_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN8_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +struct GEN8_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + uint32_t CoreModeEnable; + uint32_t ResourceStreamerStateSaveEnable; + uint32_t ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN8_MI_SET_PREDICATE_length 0x00000001 +#define GEN8_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1 + +struct GEN8_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define NOOPNever 0 +#define NOOPonResult2clear 1 +#define NOOPonResult2set 2 +#define NOOPonResultclear 3 +#define NOOPonResultset 4 +#define Executewhenonesliceenabled 5 +#define Executewhentwoslicesareenabled 6 +#define Executewhenthreeslicesareenabled 7 +#define NOOPAlways 15 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 3) | + 0; + +} + +#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 +#define GEN8_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +struct GEN8_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t StoreQword; + uint32_t DwordLength; + __gen_address_type Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->StoreQword, 21, 21) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->Address, dw1); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 +#define GEN8_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +struct GEN8_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UsePerProcessHardwareStatusPage; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_URB_MEM_length 0x00000004 +#define GEN8_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 2 + +struct GEN8_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 +#define GEN8_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN8_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +struct GEN8_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t SuspendFlush; +}; + +static inline void +GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 +#define GEN8_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN8_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +struct GEN8_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN8_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN8_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +struct GEN8_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 +#define GEN8_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN8_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +struct GEN8_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN8_MI_URB_CLEAR_length 0x00000002 +#define GEN8_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN8_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +struct GEN8_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_field(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN8_MI_USER_INTERRUPT_length 0x00000001 +#define GEN8_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN8_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +struct GEN8_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 +#define GEN8_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN8_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +struct GEN8_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DisplayPipeCVerticalBlankWaitEnable; + uint32_t DisplaySpriteCFlipPendingWaitEnable; + uint32_t DisplayPlaneCFlipPendingWaitEnable; + uint32_t DisplayPipeCScanLineWaitEnable; + uint32_t DisplayPipeBVerticalBlankWaitEnable; + uint32_t DisplaySpriteBFlipPendingWaitEnable; + uint32_t DisplayPlaneBFlipPendingWaitEnable; + uint32_t DisplayPipeBScanLineWaitEnable; + uint32_t DisplayPipeAVerticalBlankWaitEnable; + uint32_t DisplaySpriteAFlipPendingWaitEnable; + uint32_t DisplayPlaneAFlipPendingWaitEnable; + uint32_t DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN8_PIPE_CONTROL_length 0x00000006 +#define GEN8_PIPE_CONTROL_length_bias 0x00000002 +#define GEN8_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 4 + +struct GEN8_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + uint32_t GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + uint32_t DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + uint32_t RenderTargetCacheFlushEnable; + uint32_t InstructionCacheInvalidateEnable; + uint32_t TextureCacheInvalidationEnable; + uint32_t IndirectStatePointersDisable; + uint32_t NotifyEnable; + uint32_t PipeControlFlushEnable; + uint32_t DCFlushEnable; + uint32_t VFCacheInvalidationEnable; + uint32_t ConstantCacheInvalidationEnable; + uint32_t StateCacheInvalidationEnable; + uint32_t StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + uint32_t DepthCacheFlushEnable; + __gen_address_type Address; + __gen_address_type AddressHigh; + uint32_t ImmediateData; +}; + +static inline void +GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->AddressHigh, dw3); + + dw[4] = + __gen_field(values->ImmediateData, 0, 63) | + 0; + +} + +struct GEN8_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + uint32_t dw6 = + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + uint32_t dw8 = + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + +} + +struct GEN8_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN8_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN8_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + uint32_t MemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +struct GEN8_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN8_SO_DECL_ENTRY { + uint32_t Stream3Decl; + uint32_t Stream2Decl; + uint32_t Stream1Decl; + uint32_t Stream0Decl; +}; + +static inline void +GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + /* Struct Stream3Decl: found SO_DECL */ + /* Struct Stream2Decl: found SO_DECL */ + /* Struct Stream1Decl: found SO_DECL */ + /* Struct Stream0Decl: found SO_DECL */ + 0; + +} + +struct GEN8_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { + uint32_t ComponentOverrideW; + uint32_t ComponentOverrideZ; + uint32_t ComponentOverrideY; + uint32_t ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + +struct GEN8_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +struct GEN8_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; + float XMinViewPort; + float XMaxViewPort; + float YMinViewPort; + float YMaxViewPort; +}; + +static inline void +GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + __gen_float(values->XMinViewPort) | + 0; + + dw[13] = + __gen_float(values->XMaxViewPort) | + 0; + + dw[14] = + __gen_float(values->YMinViewPort) | + 0; + + dw[15] = + __gen_float(values->YMaxViewPort) | + 0; + +} + +struct GEN8_BLEND_STATE { + uint32_t AlphaToCoverageEnable; + uint32_t IndependentAlphaBlendEnable; + uint32_t AlphaToOneEnable; + uint32_t AlphaToCoverageDitherEnable; + uint32_t AlphaTestEnable; + uint32_t AlphaTestFunction; + uint32_t ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t Entry; +}; + +static inline void +GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + dw[1] = + /* Struct Entry: found BLEND_STATE_ENTRY */ + 0; + +} + +struct GEN8_BLEND_STATE_ENTRY { + uint32_t LogicOpEnable; + uint32_t LogicOpFunction; + uint32_t PreBlendSourceOnlyClampEnable; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + uint32_t PreBlendColorClampEnable; + uint32_t PostBlendColorClampEnable; + uint32_t ColorBufferBlendEnable; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t ColorBlendFunction; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t AlphaBlendFunction; + uint32_t WriteDisableAlpha; + uint32_t WriteDisableRed; + uint32_t WriteDisableGreen; + uint32_t WriteDisableBlue; +}; + +static inline void +GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->LogicOpEnable, 63, 63) | + __gen_field(values->LogicOpFunction, 59, 62) | + __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | + __gen_field(values->ColorClampRange, 34, 35) | + __gen_field(values->PreBlendColorClampEnable, 33, 33) | + __gen_field(values->PostBlendColorClampEnable, 32, 32) | + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->SourceBlendFactor, 26, 30) | + __gen_field(values->DestinationBlendFactor, 21, 25) | + __gen_field(values->ColorBlendFunction, 18, 20) | + __gen_field(values->SourceAlphaBlendFactor, 13, 17) | + __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | + __gen_field(values->AlphaBlendFunction, 5, 7) | + __gen_field(values->WriteDisableAlpha, 3, 3) | + __gen_field(values->WriteDisableRed, 2, 2) | + __gen_field(values->WriteDisableGreen, 1, 1) | + __gen_field(values->WriteDisableBlue, 0, 0) | + 0; + +} + +struct GEN8_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +struct GEN8_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +struct GEN8_MEMORY_OBJECT_CONTROL_STATE { +}; + +static inline void +GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + 0; + +} + +struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { +#define UseCacheabilityControlsfrompagetableUCwithFenceifcoherentcycle 0 +#define UncacheableUCnoncacheable 1 +#define WritethroughWT 2 +#define WritebackWB 3 + uint32_t MemoryTypeLLCeLLCCacheabilityControlLeLLCCC; +#define eLLCOnly 0 +#define LLCOnly 1 +#define LLCeLLCAllowed 2 +#define L3LLCeLLCAllowed 3 + uint32_t TargetCacheTC; + uint32_t EncryptedData; +#define PoorChance 3 +#define NormalChance 2 +#define BetterChance 1 +#define BestChance 0 + uint32_t AgeforQUADLRUAGE; +}; + +static inline void +GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->MemoryTypeLLCeLLCCacheabilityControlLeLLCCC, 5, 6) | + __gen_field(values->TargetCacheTC, 3, 4) | + __gen_field(values->EncryptedData, 2, 2) | + __gen_field(values->AgeforQUADLRUAGE, 0, 1) | + 0; + +} + +struct GEN8_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; + uint32_t KernelStartPointerHigh; +#define Ftz 0 +#define SetByKernel 1 + uint32_t DenormMode; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantIndirectURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t BarrierEnable; +#define Encodes0k 0 +#define Encodes4k 1 +#define Encodes8k 2 +#define Encodes16k 4 +#define Encodes32k 8 +#define Encodes64k 16 + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointerHigh, 0, 15) | + 0; + + dw[2] = + __gen_field(values->DenormMode, 19, 19) | + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[4] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[5] = + __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | + 0; + + dw[7] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + +} + +struct GEN8_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN8_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t SurfaceArray; + uint32_t SurfaceFormat; +#define VALIGN4 1 +#define VALIGN8 2 +#define VALIGN16 3 + uint32_t SurfaceVerticalAlignment; +#define HALIGN4 1 +#define HALIGN8 2 +#define HALIGN16 3 + uint32_t SurfaceHorizontalAlignment; +#define LINEAR 0 +#define WMAJOR 1 +#define XMAJOR 2 +#define YMAJOR 3 + uint32_t TileMode; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; + uint32_t SamplerL2BypassModeDisable; +#define WriteOnlyCache 0 +#define ReadWriteCache 1 + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnablePositiveZ; + uint32_t CubeFaceEnableNegativeZ; + uint32_t CubeFaceEnablePositiveY; + uint32_t CubeFaceEnableNegativeY; + uint32_t CubeFaceEnablePositiveX; + uint32_t CubeFaceEnableNegativeX; + uint32_t MemoryObjectControlState; + uint32_t BaseMipLevel; + uint32_t SurfaceQPitch; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define _0DEG 0 +#define _90DEG 1 +#define _270DEG 3 + uint32_t RenderTargetAndSampleUnormRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSS 0 +#define DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_2 1 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t XOffset; + uint32_t YOffset; + uint32_t EWADisableForCube; +#define GPUcoherent 0 +#define IAcoherent 1 + uint32_t CoherencyType; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t AuxiliarySurfaceQPitch; + uint32_t AuxiliarySurfacePitch; +#define AUX_NONE 0 +#define AUX_MCS 1 +#define AUX_APPEND 2 +#define AUX_HIZ 3 + uint32_t AuxiliarySurfaceMode; + uint32_t SeparateUVPlaneEnable; + uint32_t XOffsetforUorUVPlane; + uint32_t YOffsetforUorUVPlane; + uint32_t RedClearColor; + uint32_t GreenClearColor; + uint32_t BlueClearColor; + uint32_t AlphaClearColor; + uint32_t ShaderChannelSelectRed; + uint32_t ShaderChannelSelectGreen; + uint32_t ShaderChannelSelectBlue; + uint32_t ShaderChannelSelectAlpha; + uint32_t ResourceMinLOD; + __gen_address_type SurfaceBaseAddress; + uint32_t XOffsetforVPlane; + uint32_t YOffsetforVPlane; + uint32_t AuxiliaryTableIndexforMediaCompressedSurface; + __gen_address_type AuxiliarySurfaceBaseAddress; +}; + +static inline void +GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | + __gen_field(values->TileMode, 12, 13) | + __gen_field(values->VerticalLineStride, 11, 11) | + __gen_field(values->VerticalLineStrideOffset, 10, 10) | + __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | + __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | + __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | + __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | + __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | + __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | + 0; + + dw[1] = + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->BaseMipLevel, 19, 23) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 21, 23) | + __gen_field(values->EWADisableForCube, 20, 20) | + __gen_field(values->CoherencyType, 14, 14) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | + __gen_field(values->AuxiliarySurfacePitch, 3, 11) | + __gen_field(values->AuxiliarySurfaceMode, 0, 2) | + __gen_field(values->SeparateUVPlaneEnable, 31, 31) | + __gen_field(values->XOffsetforUorUVPlane, 16, 29) | + __gen_field(values->YOffsetforUorUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ShaderChannelSelectRed, 25, 27) | + __gen_field(values->ShaderChannelSelectGreen, 22, 24) | + __gen_field(values->ShaderChannelSelectBlue, 19, 21) | + __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | + __gen_field(values->ResourceMinLOD, 0, 11) | + 0; + + uint32_t dw8 = + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + + uint32_t dw10 = + __gen_field(values->XOffsetforVPlane, 48, 61) | + __gen_field(values->YOffsetforVPlane, 32, 45) | + __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | + 0; + + dw[10] = + __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + + dw[12] = + 0; + + dw[13] = + 0; + + dw[14] = + 0; + + dw[15] = + 0; + +} + +struct GEN8_SAMPLER_STATE { + uint32_t SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define CLAMP_NONE 0 +#define CLAMP_OGL 2 + uint32_t LODPreClampMode; + uint32_t BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + uint32_t MinLOD; + uint32_t MaxLOD; + uint32_t ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t IndirectStatePointer; +#define MIPNONE 0 +#define MIPFILTER 1 + uint32_t LODClampMagnificationMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + uint32_t RAddressMinFilterRoundingEnable; + uint32_t RAddressMagFilterRoundingEnable; + uint32_t VAddressMinFilterRoundingEnable; + uint32_t VAddressMagFilterRoundingEnable; + uint32_t UAddressMinFilterRoundingEnable; + uint32_t UAddressMagFilterRoundingEnable; +#define FULL 0 +#define HIGH 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + uint32_t NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampMode, 27, 28) | + __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD, 20, 31) | + __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->ChromaKeyEnable, 7, 7) | + __gen_field(values->ChromaKeyIndex, 5, 6) | + __gen_field(values->ChromaKeyMode, 4, 4) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_field(values->IndirectStatePointer, 6, 23) | + __gen_field(values->LODClampMagnificationMode, 0, 0) | + 0; + + dw[3] = + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#ifndef _3DPRIM_POINTLIST +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 +#endif + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_PID 7 + +/* Enum WRAP_SHORTEST_ENABLE */ +#define X 1 +#define Y 2 +#define XY 3 +#define Z 4 +#define XZ 5 +#define YZ 6 +#define XYZ 7 +#define W 8 +#define XW 9 +#define YW 10 +#define XYW 11 +#define ZW 12 +#define XZW 13 +#define YZW 14 +#define XYZW 15 + +/* Enum 3D_Stencil_Operation */ +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +/* Enum 3D_Color_Buffer_Blend_Factor */ +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + +/* Enum 3D_Color_Buffer_Blend_Function */ +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum 3D_Logic_Op_Function */ +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Shader Channel Select */ +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + +/* Enum Clear Color */ +#define CC_ZERO 0 +#define CC_ONE 1 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 +#define TCM_HALF_BORDER 6 + diff --git a/src/vulkan/image.c b/src/vulkan/image.c new file mode 100644 index 00000000000..a5357198225 --- /dev/null +++ b/src/vulkan/image.c @@ -0,0 +1,404 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +// Image functions + +static const struct anv_format anv_formats[] = { + [VK_FORMAT_UNDEFINED] = { .format = RAW }, + // [VK_FORMAT_R4G4_UNORM] = { .format = R4G4_UNORM }, + // [VK_FORMAT_R4G4_USCALED] = { .format = R4G4_USCALED }, + // [VK_FORMAT_R4G4B4A4_UNORM] = { .format = R4G4B4A4_UNORM }, + // [VK_FORMAT_R4G4B4A4_USCALED] = { .format = R4G4B4A4_USCALED }, + // [VK_FORMAT_R5G6B5_UNORM] = { .format = R5G6B5_UNORM }, + // [VK_FORMAT_R5G6B5_USCALED] = { .format = R5G6B5_USCALED }, + // [VK_FORMAT_R5G5B5A1_UNORM] = { .format = R5G5B5A1_UNORM }, + // [VK_FORMAT_R5G5B5A1_USCALED] = { .format = R5G5B5A1_USCALED }, + [VK_FORMAT_R8_UNORM] = { .format = R8_UNORM, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SNORM] = { .format = R8_SNORM, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_USCALED] = { .format = R8_USCALED, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SSCALED] = { .format = R8_SSCALED, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_UINT] = { .format = R8_UINT, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SINT] = { .format = R8_SINT, .cpp = 1, .channels = 1 }, + // [VK_FORMAT_R8_SRGB] = { .format = R8_SRGB, .cpp = 1 }, + [VK_FORMAT_R8G8_UNORM] = { .format = R8G8_UNORM, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SNORM] = { .format = R8G8_SNORM, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_USCALED] = { .format = R8G8_USCALED, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SSCALED] = { .format = R8G8_SSCALED, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_UINT] = { .format = R8G8_UINT, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SINT] = { .format = R8G8_SINT, .cpp = 2, .channels = 2 }, + // [VK_FORMAT_R8G8_SRGB] = { .format = R8G8_SRGB }, + [VK_FORMAT_R8G8B8_UNORM] = { .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 }, + // [VK_FORMAT_R8G8B8_SNORM] = { .format = R8G8B8X8_SNORM, .cpp = 4 }, + [VK_FORMAT_R8G8B8_USCALED] = { .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_SSCALED] = { .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_UINT] = { .format = R8G8B8_UINT, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_SINT] = { .format = R8G8B8_SINT, .cpp = 3, .channels = 3 }, + // [VK_FORMAT_R8G8B8_SRGB] = { .format = R8G8B8_SRGB }, + [VK_FORMAT_R8G8B8A8_UNORM] = { .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SNORM] = { .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_USCALED] = { .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SSCALED] = { .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_UINT] = { .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SINT] = { .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 }, + // [VK_FORMAT_R8G8B8A8_SRGB] = { .format = R8G8B8A8_SRGB }, + // [VK_FORMAT_R10G10B10A2_UNORM] = { .format = R10G10B10A2_UNORM }, + // [VK_FORMAT_R10G10B10A2_SNORM] = { .format = R10G10B10A2_SNORM }, + // [VK_FORMAT_R10G10B10A2_USCALED] = { .format = R10G10B10A2_USCALED }, + // [VK_FORMAT_R10G10B10A2_SSCALED] = { .format = R10G10B10A2_SSCALED }, + // [VK_FORMAT_R10G10B10A2_UINT] = { .format = R10G10B10A2_UINT }, + // [VK_FORMAT_R10G10B10A2_SINT] = { .format = R10G10B10A2_SINT }, + // [VK_FORMAT_R16_UNORM] = { .format = R16_UNORM }, + // [VK_FORMAT_R16_SNORM] = { .format = R16_SNORM }, + // [VK_FORMAT_R16_USCALED] = { .format = R16_USCALED }, + // [VK_FORMAT_R16_SSCALED] = { .format = R16_SSCALED }, + // [VK_FORMAT_R16_UINT] = { .format = R16_UINT }, + // [VK_FORMAT_R16_SINT] = { .format = R16_SINT }, + [VK_FORMAT_R16_SFLOAT] = { .format = R16_FLOAT, .cpp = 2, .channels = 1 }, + // [VK_FORMAT_R16G16_UNORM] = { .format = R16G16_UNORM }, + // [VK_FORMAT_R16G16_SNORM] = { .format = R16G16_SNORM }, + // [VK_FORMAT_R16G16_USCALED] = { .format = R16G16_USCALED }, + // [VK_FORMAT_R16G16_SSCALED] = { .format = R16G16_SSCALED }, + // [VK_FORMAT_R16G16_UINT] = { .format = R16G16_UINT }, + // [VK_FORMAT_R16G16_SINT] = { .format = R16G16_SINT }, + [VK_FORMAT_R16G16_SFLOAT] = { .format = R16G16_FLOAT, .cpp = 4, .channels = 2 }, + // [VK_FORMAT_R16G16B16_UNORM] = { .format = R16G16B16_UNORM }, + // [VK_FORMAT_R16G16B16_SNORM] = { .format = R16G16B16_SNORM }, + // [VK_FORMAT_R16G16B16_USCALED] = { .format = R16G16B16_USCALED }, + // [VK_FORMAT_R16G16B16_SSCALED] = { .format = R16G16B16_SSCALED }, + // [VK_FORMAT_R16G16B16_UINT] = { .format = R16G16B16_UINT }, + // [VK_FORMAT_R16G16B16_SINT] = { .format = R16G16B16_SINT }, + [VK_FORMAT_R16G16B16_SFLOAT] = { .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 }, + // [VK_FORMAT_R16G16B16A16_UNORM] = { .format = R16G16B16A16_UNORM }, + // [VK_FORMAT_R16G16B16A16_SNORM] = { .format = R16G16B16A16_SNORM }, + // [VK_FORMAT_R16G16B16A16_USCALED] = { .format = R16G16B16A16_USCALED }, + // [VK_FORMAT_R16G16B16A16_SSCALED] = { .format = R16G16B16A16_SSCALED }, + // [VK_FORMAT_R16G16B16A16_UINT] = { .format = R16G16B16A16_UINT }, + // [VK_FORMAT_R16G16B16A16_SINT] = { .format = R16G16B16A16_SINT }, + [VK_FORMAT_R16G16B16A16_SFLOAT] = { .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 }, + // [VK_FORMAT_R32_UINT] = { .format = R32_UINT }, + // [VK_FORMAT_R32_SINT] = { .format = R32_SINT }, + [VK_FORMAT_R32_SFLOAT] = { .format = R32_FLOAT, .cpp = 4, .channels = 1 }, + // [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT }, + // [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT }, + [VK_FORMAT_R32G32_SFLOAT] = { .format = R32G32_FLOAT, .cpp = 8, .channels = 2 }, + // [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT }, + // [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT }, + [VK_FORMAT_R32G32B32_SFLOAT] = { .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3 }, + // [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT }, + // [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT }, + [VK_FORMAT_R32G32B32A32_SFLOAT] = { .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4 }, + [VK_FORMAT_R64_SFLOAT] = { .format = R64_FLOAT, .cpp = 8, .channels = 1 }, + [VK_FORMAT_R64G64_SFLOAT] = { .format = R64G64_FLOAT, .cpp = 16, .channels = 2 }, + [VK_FORMAT_R64G64B64_SFLOAT] = { .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 }, + [VK_FORMAT_R64G64B64A64_SFLOAT] = { .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 }, + // [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_UFLOAT }, + // [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_UFLOAT }, + // [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM }, + // [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM }, + // [VK_FORMAT_D32_SFLOAT] = { .format = D32_SFLOAT }, + // [VK_FORMAT_S8_UINT] = { .format = S8_UINT }, + // [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM }, + // [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM }, + // [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_SFLOAT }, + // [VK_FORMAT_BC1_RGB_UNORM] = { .format = BC1_RGB }, + // [VK_FORMAT_BC1_RGB_SRGB] = { .format = BC1_RGB }, + // [VK_FORMAT_BC1_RGBA_UNORM] = { .format = BC1_RGBA }, + // [VK_FORMAT_BC1_RGBA_SRGB] = { .format = BC1_RGBA }, + // [VK_FORMAT_BC2_UNORM] = { .format = BC2_UNORM }, + // [VK_FORMAT_BC2_SRGB] = { .format = BC2_SRGB }, + // [VK_FORMAT_BC3_UNORM] = { .format = BC3_UNORM }, + // [VK_FORMAT_BC3_SRGB] = { .format = BC3_SRGB }, + // [VK_FORMAT_BC4_UNORM] = { .format = BC4_UNORM }, + // [VK_FORMAT_BC4_SNORM] = { .format = BC4_SNORM }, + // [VK_FORMAT_BC5_UNORM] = { .format = BC5_UNORM }, + // [VK_FORMAT_BC5_SNORM] = { .format = BC5_SNORM }, + // [VK_FORMAT_BC6H_UFLOAT] = { .format = BC6H_UFLOAT }, + // [VK_FORMAT_BC6H_SFLOAT] = { .format = BC6H_SFLOAT }, + // [VK_FORMAT_BC7_UNORM] = { .format = BC7_UNORM }, + // [VK_FORMAT_BC7_SRGB] = { .format = BC7_SRGB }, + // [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = ETC2_R8G8B8 }, + // [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = ETC2_R8G8B8 }, + // [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = ETC2_R8G8B8A1 }, + // [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = ETC2_R8G8B8A1 }, + // [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = ETC2_R8G8B8A8 }, + // [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = ETC2_R8G8B8A8 }, + // [VK_FORMAT_EAC_R11_UNORM] = { .format = EAC_R11 }, + // [VK_FORMAT_EAC_R11_SNORM] = { .format = EAC_R11 }, + // [VK_FORMAT_EAC_R11G11_UNORM] = { .format = EAC_R11G11 }, + // [VK_FORMAT_EAC_R11G11_SNORM] = { .format = EAC_R11G11 }, + // [VK_FORMAT_ASTC_4x4_UNORM] = { .format = ASTC_4x4 }, + // [VK_FORMAT_ASTC_4x4_SRGB] = { .format = ASTC_4x4 }, + // [VK_FORMAT_ASTC_5x4_UNORM] = { .format = ASTC_5x4 }, + // [VK_FORMAT_ASTC_5x4_SRGB] = { .format = ASTC_5x4 }, + // [VK_FORMAT_ASTC_5x5_UNORM] = { .format = ASTC_5x5 }, + // [VK_FORMAT_ASTC_5x5_SRGB] = { .format = ASTC_5x5 }, + // [VK_FORMAT_ASTC_6x5_UNORM] = { .format = ASTC_6x5 }, + // [VK_FORMAT_ASTC_6x5_SRGB] = { .format = ASTC_6x5 }, + // [VK_FORMAT_ASTC_6x6_UNORM] = { .format = ASTC_6x6 }, + // [VK_FORMAT_ASTC_6x6_SRGB] = { .format = ASTC_6x6 }, + // [VK_FORMAT_ASTC_8x5_UNORM] = { .format = ASTC_8x5 }, + // [VK_FORMAT_ASTC_8x5_SRGB] = { .format = ASTC_8x5 }, + // [VK_FORMAT_ASTC_8x6_UNORM] = { .format = ASTC_8x6 }, + // [VK_FORMAT_ASTC_8x6_SRGB] = { .format = ASTC_8x6 }, + // [VK_FORMAT_ASTC_8x8_UNORM] = { .format = ASTC_8x8 }, + // [VK_FORMAT_ASTC_8x8_SRGB] = { .format = ASTC_8x8 }, + // [VK_FORMAT_ASTC_10x5_UNORM] = { .format = ASTC_10x5 }, + // [VK_FORMAT_ASTC_10x5_SRGB] = { .format = ASTC_10x5 }, + // [VK_FORMAT_ASTC_10x6_UNORM] = { .format = ASTC_10x6 }, + // [VK_FORMAT_ASTC_10x6_SRGB] = { .format = ASTC_10x6 }, + // [VK_FORMAT_ASTC_10x8_UNORM] = { .format = ASTC_10x8 }, + // [VK_FORMAT_ASTC_10x8_SRGB] = { .format = ASTC_10x8 }, + // [VK_FORMAT_ASTC_10x10_UNORM] = { .format = ASTC_10x10 }, + // [VK_FORMAT_ASTC_10x10_SRGB] = { .format = ASTC_10x10 }, + // [VK_FORMAT_ASTC_12x10_UNORM] = { .format = ASTC_12x10 }, + // [VK_FORMAT_ASTC_12x10_SRGB] = { .format = ASTC_12x10 }, + // [VK_FORMAT_ASTC_12x12_UNORM] = { .format = ASTC_12x12 }, + // [VK_FORMAT_ASTC_12x12_SRGB] = { .format = ASTC_12x12 }, + // [VK_FORMAT_B4G4R4A4_UNORM] = { .format = B4G4R4A4_UNORM }, + // [VK_FORMAT_B5G5R5A1_UNORM] = { .format = B5G5R5A1_UNORM }, + // [VK_FORMAT_B5G6R5_UNORM] = { .format = B5G6R5_UNORM }, + // [VK_FORMAT_B5G6R5_USCALED] = { .format = B5G6R5_USCALED }, + // [VK_FORMAT_B8G8R8_UNORM] = { .format = B8G8R8_UNORM }, + // [VK_FORMAT_B8G8R8_SNORM] = { .format = B8G8R8_SNORM }, + // [VK_FORMAT_B8G8R8_USCALED] = { .format = B8G8R8_USCALED }, + // [VK_FORMAT_B8G8R8_SSCALED] = { .format = B8G8R8_SSCALED }, + // [VK_FORMAT_B8G8R8_UINT] = { .format = B8G8R8_UINT }, + // [VK_FORMAT_B8G8R8_SINT] = { .format = B8G8R8_SINT }, + // [VK_FORMAT_B8G8R8_SRGB] = { .format = B8G8R8_SRGB }, + [VK_FORMAT_B8G8R8A8_UNORM] = { .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 }, + // [VK_FORMAT_B8G8R8A8_SNORM] = { .format = B8G8R8A8_SNORM }, + // [VK_FORMAT_B8G8R8A8_USCALED] = { .format = B8G8R8A8_USCALED }, + // [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = B8G8R8A8_SSCALED }, + // [VK_FORMAT_B8G8R8A8_UINT] = { .format = B8G8R8A8_UINT }, + // [VK_FORMAT_B8G8R8A8_SINT] = { .format = B8G8R8A8_SINT }, + // [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_SRGB }, + // [VK_FORMAT_B10G10R10A2_UNORM] = { .format = B10G10R10A2_UNORM }, + // [VK_FORMAT_B10G10R10A2_SNORM] = { .format = B10G10R10A2_SNORM }, + // [VK_FORMAT_B10G10R10A2_USCALED] = { .format = B10G10R10A2_USCALED }, + // [VK_FORMAT_B10G10R10A2_SSCALED] = { .format = B10G10R10A2_SSCALED }, + // [VK_FORMAT_B10G10R10A2_UINT] = { .format = B10G10R10A2_UINT }, + // [VK_FORMAT_B10G10R10A2_SINT] = { .format = B10G10R10A2_SINT } +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +static const struct anv_tile_mode_info { + int32_t tile_width; + int32_t tile_height; +} tile_mode_info[] = { + [LINEAR] = { 1, 1 }, + [XMAJOR] = { 512, 8 }, + [YMAJOR] = { 128, 32 }, + [WMAJOR] = { 128, 32 } +}; + +VkResult VKAPI vkCreateImage( + VkDevice _device, + const VkImageCreateInfo* pCreateInfo, + VkImage* pImage) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_image *image; + const struct anv_format *format; + int32_t aligned_height; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (image == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + image->mem = NULL; + image->offset = 0; + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth > 0); + + switch (pCreateInfo->tiling) { + case VK_IMAGE_TILING_LINEAR: + image->tile_mode = LINEAR; + /* Linear depth buffers must be 64 byte aligned, which is the strictest + * requirement for all kinds of linear surfaces. + */ + image->alignment = 64; + break; + case VK_IMAGE_TILING_OPTIMAL: + image->tile_mode = YMAJOR; + image->alignment = 4096; + break; + default: + break; + } + + format = anv_format_for_vk_format(pCreateInfo->format); + image->stride = ALIGN_I32(image->extent.width * format->cpp, + tile_mode_info[image->tile_mode].tile_width); + aligned_height = ALIGN_I32(image->extent.height, + tile_mode_info[image->tile_mode].tile_height); + image->size = image->stride * aligned_height; + + *pImage = (VkImage) image; + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetImageSubresourceInfo( + VkDevice device, + VkImage image, + const VkImageSubresource* pSubresource, + VkSubresourceInfoType infoType, + size_t* pDataSize, + void* pData) +{ + return VK_UNSUPPORTED; +} + +// Image view functions + +static struct anv_state +create_surface_state(struct anv_device *device, + struct anv_image *image, const struct anv_format *format) +{ + struct anv_state state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = false, + .SurfaceFormat = format->format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = image->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = 0, /* FIXME: MOCS */ + .BaseMipLevel = 0, + .SurfaceQPitch = 0, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = image->extent.depth - 1, + .SurfacePitch = image->stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, image->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, state.map, &surface_state); + + return state; +} + +VkResult VKAPI vkCreateImageView( + VkDevice _device, + const VkImageViewCreateInfo* pCreateInfo, + VkImageView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_image_view *view; + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->image = (struct anv_image *) pCreateInfo->image; + + view->surface_state = create_surface_state(device, view->image, format); + + *pView = (VkImageView) view; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateColorAttachmentView( + VkDevice _device, + const VkColorAttachmentViewCreateInfo* pCreateInfo, + VkColorAttachmentView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_color_attachment_view *view; + struct anv_image *image; + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->image = (struct anv_image *) pCreateInfo->image; + image = view->image; + + view->surface_state = create_surface_state(device, image, format); + + *pView = (VkColorAttachmentView) view; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDepthStencilView( + VkDevice device, + const VkDepthStencilViewCreateInfo* pCreateInfo, + VkDepthStencilView* pView) +{ + return VK_UNSUPPORTED; +} diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c new file mode 100644 index 00000000000..81bd722d3e1 --- /dev/null +++ b/src/vulkan/intel.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +#include + +VkResult VKAPI vkCreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (image == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mem; + } + + image->mem = mem; + image->offset = 0; + image->type = VK_IMAGE_TYPE_2D; + image->extent = pCreateInfo->extent; + image->tile_mode = XMAJOR; + image->stride = pCreateInfo->strideInBytes; + image->size = mem->bo.size; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = (VkDeviceMemory) mem; + *pImage = (VkImage) image; + + return VK_SUCCESS; + + fail_mem: + anv_gem_close(device, mem->bo.gem_handle); + fail: + anv_device_free(device, mem); + + return result; +} diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c new file mode 100644 index 00000000000..5ff5bb9bd68 --- /dev/null +++ b/src/vulkan/meta.c @@ -0,0 +1,140 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +void VKAPI vkCmdCopyBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ +} + +void VKAPI vkCmdCopyImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ +} + +void VKAPI vkCmdBlitImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions) +{ +} + +void VKAPI vkCmdCopyBufferToImage( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ +} + +void VKAPI vkCmdCopyImageToBuffer( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ +} + +void VKAPI vkCmdCloneImageData( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout) +{ +} + +void VKAPI vkCmdUpdateBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ +} + +void VKAPI vkCmdFillBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize fillSize, + uint32_t data) +{ +} + +void VKAPI vkCmdClearColorImage( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColor* color, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ +} + +void VKAPI vkCmdClearDepthStencil( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ +} + +void VKAPI vkCmdResolveImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageResolve* pRegions) +{ +} diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c new file mode 100644 index 00000000000..33b4f64f489 --- /dev/null +++ b/src/vulkan/pipeline.c @@ -0,0 +1,565 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +// Shader functions + +VkResult VKAPI vkCreateShader( + VkDevice _device, + const VkShaderCreateInfo* pCreateInfo, + VkShader* pShader) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_shader *shader; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); + + shader = anv_device_alloc(device, sizeof(*shader) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (shader == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->size = pCreateInfo->codeSize; + memcpy(shader->data, pCreateInfo->pCode, shader->size); + + *pShader = (VkShader) shader; + + return VK_SUCCESS; +} + +// Pipeline functions + +static void +emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo *info) +{ + const uint32_t num_dwords = 1 + info->attributeCount * 2; + uint32_t *p; + bool instancing_enable[32]; + + for (uint32_t i = 0; i < info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + instancing_enable[desc->binding] = true; + break; + } + } + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN8_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->location, + .Valid = true, + .SourceElementFormat = format->format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + .InstancingEnable = instancing_enable[desc->binding], + .VertexElementIndex = i, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = info->bindingCount, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = info->bindingCount); +} + +static void +emit_ia_state(struct anv_pipeline *pipeline, VkPipelineIaStateCreateInfo *info) +{ + static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LISTSTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + }; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + .CutIndex = info->primitiveRestartIndex); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + .PrimitiveTopologyType = vk_to_gen_primitive_type[info->topology]); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info) +{ + static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + }; + + static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID + }; + + static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise + }; + + static const uint32_t vk_to_gen_coordinate_origin[] = { + [VK_COORDINATE_ORIGIN_UPPER_LEFT] = UPPERLEFT, + [VK_COORDINATE_ORIGIN_LOWER_LEFT] = LOWERLEFT + }; + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .ViewportTransformEnable = true, + .TriangleStripListProvokingVertexSelect = + info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, + .LineStripListProvokingVertexSelect = + info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 1, + .TriangleFanProvokingVertexSelect = + info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, + .PointWidthSource = info->programPointSize ? Vertex : State, + }; + + /* bool32_t rasterizerDiscardEnable; */ + + + GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_RASTER, + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = true, + .ViewportZClipTestEnable = info->depthClipEnable); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = + vk_to_gen_coordinate_origin[info->pointOrigin], + .NumberofSFOutputAttributes = + pipeline->wm_prog_data.num_varying_inputs); + +} + +VkResult VKAPI vkCreateGraphicsPipeline( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_pipeline *pipeline; + const struct anv_common *common; + VkPipelineShaderStageCreateInfo *shader_create_info; + VkPipelineIaStateCreateInfo *ia_info; + VkPipelineRsStateCreateInfo *rs_info; + VkPipelineVertexInputCreateInfo *vi_info; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + result = anv_batch_init(&pipeline->batch, device); + if (result != VK_SUCCESS) + goto fail; + + for (common = pCreateInfo->pNext; common; common = common->pNext) { + switch (common->sType) { + case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO: + vi_info = (VkPipelineVertexInputCreateInfo *) common; + break; + case VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO: + ia_info = (VkPipelineIaStateCreateInfo *) common; + break; + case VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO: + break; + case VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO: + rs_info = (VkPipelineRsStateCreateInfo *) common; + break; + case VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO: + shader_create_info = (VkPipelineShaderStageCreateInfo *) common; + pipeline->shaders[shader_create_info->shader.stage] = + (struct anv_shader *) shader_create_info->shader.shader; + break; + default: + break; + } + } + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + emit_vertex_input(pipeline, vi_info); + emit_ia_state(pipeline, ia_info); + emit_rs_state(pipeline, rs_info); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->wm_prog_data.barycentric_interp_modes); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + bool enable_sampling = samples > 1 ? true : false; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + .PixelPositionOffsetEnable = enable_sampling, + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_vec4 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_vec4, + .VectorMaskEnable = Vmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = 0, + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + //pipeline->gs_prog_data.dispatch_mode | + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + //trp_generate_blend_hw_cmds(batch, pipeline); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + /* FIXME: pointer needs to be assigned outside as it aliases + * PerThreadScratchSpace. + */ + .ScratchSpaceBasePointer = 0, + .PerThreadScratchSpace = 0, + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = ia_info->disableVertexReuse, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + uint32_t ksp0, ksp2, grf_start0, grf_start2; + + ksp2 = 0; + grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + ksp0 = pipeline->ps_simd8; + grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + ksp2 = pipeline->ps_simd16; + grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + ksp0 = pipeline->ps_simd16; + grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + .KernelStartPointer0 = ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 0, + + .ScratchSpaceBasePointer = 0, + .PerThreadScratchSpace = 0, + + .MaximumNumberofThreadsPerPSD = 64 - 2, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps); + + *pPipeline = (VkPipeline) pipeline; + + return VK_SUCCESS; + + fail: + anv_device_free(device, pipeline); + + return result; +} + +VkResult +anv_pipeline_destroy(struct anv_pipeline *pipeline) +{ + anv_compiler_free(pipeline); + anv_batch_finish(&pipeline->batch, pipeline->device); + anv_device_free(pipeline->device, pipeline); + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateGraphicsPipelineDerivative( + VkDevice device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + VkPipeline basePipeline, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkCreateComputePipeline( + VkDevice device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkStorePipeline( + VkDevice device, + VkPipeline pipeline, + size_t* pDataSize, + void* pData) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkLoadPipeline( + VkDevice device, + size_t dataSize, + const void* pData, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkLoadPipelineDerivative( + VkDevice device, + size_t dataSize, + const void* pData, + VkPipeline basePipeline, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +// Pipeline layout functions + +VkResult VKAPI vkCreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + VkPipelineLayout* pPipelineLayout) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_pipeline_layout *layout; + struct anv_pipeline_layout_entry *entry; + uint32_t total; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + total = 0; + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + struct anv_descriptor_set_layout *set_layout = + (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; + for (uint32_t j = 0; j < set_layout->count; j++) + total += set_layout->total; + } + + size = sizeof(*layout) + total * sizeof(layout->entries[0]); + layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + entry = layout->entries; + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + layout->stage[s].entries = entry; + + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + struct anv_descriptor_set_layout *set_layout = + (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; + for (uint32_t j = 0; j < set_layout->count; j++) + if (set_layout->bindings[j].mask & (1 << s)) { + entry->type = set_layout->bindings[j].type; + entry->set = i; + entry->index = j; + entry++; + } + } + + layout->stage[s].count = entry - layout->stage[s].entries; + } + + *pPipelineLayout = (VkPipelineLayout) layout; + + return VK_SUCCESS; +} diff --git a/src/vulkan/private.h b/src/vulkan/private.h new file mode 100644 index 00000000000..31d4ab242fb --- /dev/null +++ b/src/vulkan/private.h @@ -0,0 +1,594 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "brw_device_info.h" +#include "util/macros.h" + +#define VK_PROTOTYPES +#include + +#undef VKAPI +#define VKAPI __attribute__ ((visibility ("default"))) + +#include "brw_context.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline uint32_t +ALIGN_U32(uint32_t v, uint32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +ALIGN_I32(int32_t v, int32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +static inline VkResult +vk_error(VkResult error) +{ +#ifdef DEBUG + fprintf(stderr, "vk_error: %x\n", error); +#endif + + return error; +} + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + int gem_handle; + uint32_t index; + uint64_t offset; + uint64_t size; + + /* This field is here for the benefit of the aub dumper. It can (and for + * userptr bos it must) be set to the cpu map of the buffer. Destroying + * the bo won't clean up the mmap, it's still the responsibility of the bo + * user to do that. */ + void *map; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + uint32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + void *map; + int fd; + uint32_t size; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + uint32_t next_block; + union anv_free_list free_list; +}; + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_state { + uint32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + uint32_t next; + uint32_t current_block; + uint32_t end; +}; + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +struct anv_physical_device { + struct anv_instance * instance; + uint32_t chipset_id; + bool no_hw; + const char * path; + const char * name; + const struct brw_device_info * info; +}; + +struct anv_instance { + void * pAllocUserData; + PFN_vkAllocFunction pfnAlloc; + PFN_vkFreeFunction pfnFree; + uint32_t apiVersion; + uint32_t physicalDeviceCount; + struct anv_physical_device physicalDevice; +}; + +struct anv_device { + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + int context_id; + int fd; + bool no_hw; + bool dump_aub; + + struct anv_block_pool dyn_state_block_pool; + struct anv_state_pool dyn_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_compiler * compiler; + struct anv_aub_writer * aub_writer; + pthread_mutex_t mutex; +}; + +struct anv_queue { + struct anv_device * device; + + struct anv_state_pool * pool; + + /** + * Serial number of the most recently completed batch executed on the + * engine. + */ + struct anv_state completed_serial; + + /** + * The next batch submitted to the engine will be assigned this serial + * number. + */ + uint32_t next_serial; + + uint32_t last_collected_serial; +}; + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType); + +void +anv_device_free(struct anv_device * device, + void * mem); + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, int gem_handle); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, int gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +int anv_gem_get_aperture(struct anv_device *device, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); +int anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +/* TODO: Remove hardcoded reloc limit. */ +#define ANV_BATCH_MAX_RELOCS 256 + +struct anv_reloc_list { + size_t num_relocs; + struct drm_i915_gem_relocation_entry relocs[ANV_BATCH_MAX_RELOCS]; + struct anv_bo * reloc_bos[ANV_BATCH_MAX_RELOCS]; +}; + +struct anv_batch { + struct anv_bo bo; + void * next; + struct anv_reloc_list cmd_relocs; + struct anv_reloc_list surf_relocs; +}; + +VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); +void anv_batch_finish(struct anv_batch *batch, struct anv_device *device); +void anv_batch_reset(struct anv_batch *batch); +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return delta; + } else { + assert(batch->bo.map <= location && + (char *) location < (char *) batch->bo.map + batch->bo.size); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +#undef GEN8_3DSTATE_MULTISAMPLE +#include "gen8_pack.h" + +#define anv_batch_emit(batch, cmd, ...) do { \ + struct cmd __template = { \ + cmd ## _header, \ + __VA_ARGS__ \ + }; \ + void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ + cmd ## _pack(batch, __dst, &__template); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + struct cmd __template = { \ + cmd ## _header, \ + .DwordLength = n - cmd ## _length_bias, \ + __VA_ARGS__ \ + }; \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + cmd ## _pack(batch, __dst, &__template); \ + __dst; \ + }) + +struct anv_device_memory { + struct anv_bo bo; + VkDeviceSize map_size; + void *map; +}; + +struct anv_dynamic_vp_state { + struct anv_state sf_clip_vp; + struct anv_state cc_vp; + struct anv_state scissor; +}; + +struct anv_dynamic_rs_state { + uint32_t state_sf[GEN8_3DSTATE_SF_length]; +}; + +struct anv_dynamic_cb_state { + uint32_t blend_offset; +}; + +struct anv_descriptor_set_layout { + uint32_t total; /* total number of entries in all stages */ + uint32_t count; + struct { + VkDescriptorType type; + uint32_t mask; + } bindings[0]; +}; + +struct anv_descriptor_set { + void *descriptors[0]; +}; + +struct anv_pipeline_layout_entry { + VkDescriptorType type; + uint32_t set; + uint32_t index; +}; + +struct anv_pipeline_layout { + struct { + uint32_t count; + struct anv_pipeline_layout_entry *entries; + } stage[VK_NUM_SHADER_STAGE]; + + struct anv_pipeline_layout_entry entries[0]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + /* Set when bound */ + struct anv_device_memory * mem; + VkDeviceSize offset; +}; + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 + +#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) +#define ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY (1 << 1) +#define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) + +struct anv_cmd_buffer { + struct anv_device * device; + + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 * exec2_objects; + struct anv_bo ** exec2_bos; + bool need_reloc; + uint32_t serial; + + uint32_t bo_count; + struct anv_batch batch; + struct anv_state_stream surface_state_stream; + + /* State required while building cmd buffer */ + struct { + struct anv_buffer *buffer; + VkDeviceSize offset; + } vb[MAX_VBS]; + uint32_t vb_dirty; + uint32_t num_descriptor_sets; + struct anv_descriptor_set * descriptor_sets[MAX_SETS]; + uint32_t dirty; + struct anv_pipeline * pipeline; + struct anv_framebuffer * framebuffer; + struct anv_dynamic_rs_state * rs_state; +}; + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +void anv_aub_writer_destroy(struct anv_aub_writer *writer); + +struct anv_shader { + uint32_t size; + char data[0]; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + struct anv_shader * shaders[VK_NUM_SHADER_STAGE]; + struct anv_pipeline_layout * layout; + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + struct anv_bo vs_scratch_bo; + struct anv_bo ps_scratch_bo; + struct anv_bo gs_scratch_bo; + + uint32_t active_stages; + uint32_t program_block; + uint32_t program_next; + uint32_t vs_simd8; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t gs_vec4; + uint32_t gs_vertex_count; + + uint32_t binding_stride[MAX_VBS]; + + uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; +}; + +VkResult anv_pipeline_destroy(struct anv_pipeline *pipeline); + +struct anv_compiler *anv_compiler_create(int fd); +void anv_compiler_destroy(struct anv_compiler *compiler); +int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); +void anv_compiler_free(struct anv_pipeline *pipeline); + +struct anv_format { + uint32_t format; + int32_t cpp; + int32_t channels; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); + +struct anv_image { + VkImageType type; + VkExtent3D extent; + uint32_t tile_mode; + VkDeviceSize size; + uint32_t alignment; + int32_t stride; + + /* Set when bound */ + struct anv_device_memory * mem; + VkDeviceSize offset; +}; + +struct anv_buffer_view { + struct anv_buffer * buffer; + struct anv_state surface_state; + uint32_t offset; +}; + +struct anv_color_attachment_view { + struct anv_image * image; + struct anv_state surface_state; +}; + +struct anv_image_view { + struct anv_image * image; + struct anv_state surface_state; +}; + +struct anv_depth_stencil_view { +}; + +struct anv_framebuffer { + uint32_t color_attachment_count; + struct anv_color_attachment_view * color_attachments[MAX_RTS]; + struct anv_depth_stencil_view * depth_stencil; + + uint32_t sample_count; + uint32_t width; + uint32_t height; + uint32_t layers; +}; + +struct anv_render_pass { + VkRect render_area; +}; + + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/util.c b/src/vulkan/util.c new file mode 100644 index 00000000000..847d13b2f55 --- /dev/null +++ b/src/vulkan/util.c @@ -0,0 +1,99 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "private.h" + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(is_power_of_two(size)); + assert(element_size < size && is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = ALIGN_U32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c new file mode 100644 index 00000000000..4bcb54d5e4f --- /dev/null +++ b/src/vulkan/vk.c @@ -0,0 +1,723 @@ +#include +#include +#include +#include + +#define VK_PROTOTYPES +#include + +#include +#include +#include +#include +#include +#include + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static void +write_png(char *path, int32_t width, int32_t height, int32_t stride, void *pixels) +{ + FILE *f = NULL; + png_structp png_writer = NULL; + png_infop png_info = NULL; + + uint8_t *rows[height]; + + for (int32_t y = 0; y < height; y++) + rows[y] = pixels + y * stride; + + f = fopen(path, "wb"); + fail_if(!f, "failed to open file for writing: %s", path); + + png_writer = png_create_write_struct(PNG_LIBPNG_VER_STRING, + NULL, NULL, NULL); + fail_if (!png_writer, "failed to create png writer"); + + png_info = png_create_info_struct(png_writer); + fail_if(!png_info, "failed to create png writer info"); + + png_init_io(png_writer, f); + png_set_IHDR(png_writer, png_info, + width, height, + 8, PNG_COLOR_TYPE_RGBA, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); + png_write_info(png_writer, png_info); + png_set_rows(png_writer, png_info, rows); + png_write_png(png_writer, png_info, PNG_TRANSFORM_IDENTITY, NULL); + + png_destroy_write_struct(&png_writer, &png_info); + + fclose(f); +} + +static void * +test_alloc(void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return malloc(size); +} + +static void +test_free(void* pUserData, + void* pMem) +{ + free(pMem); +} + +#define GLSL(src) "#version 330\n" #src + +static void +create_pipeline(VkDevice device, VkPipeline *pipeline, + VkPipelineLayout pipeline_layout) +{ + VkPipelineIaStateCreateInfo ia_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .disableVertexReuse = false, + .primitiveRestartEnable = false, + .primitiveRestartIndex = 0 + }; + + static const char vs_source[] = GLSL( + layout(location = 0) in vec4 a_position; + layout(location = 1) in vec4 a_color; + layout(set = 0, index = 0) uniform block1 { + vec4 color; + } u1; + layout(set = 0, index = 1) uniform block2 { + vec4 color; + } u2; + layout(set = 1, index = 0) uniform block3 { + vec4 color; + } u3; + out vec4 v_color; + void main() + { + gl_Position = a_position; + v_color = a_color + u1.color + u2.color + u3.color; + }); + + static const char fs_source[] = GLSL( + out vec4 f_color; + in vec4 v_color; + layout(set = 0, index = 0) uniform sampler2D tex; + void main() + { + f_color = v_color + texture2D(tex, vec2(0.1, 0.1)); + }); + + VkShader vs; + vkCreateShader(device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(vs_source), + .pCode = vs_source, + .flags = 0 + }, + &vs); + + VkShader fs; + vkCreateShader(device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(fs_source), + .pCode = fs_source, + .flags = 0 + }, + &fs); + + VkPipelineShaderStageCreateInfo vs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &ia_create_info, + .shader = { + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineShaderStageCreateInfo fs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &vs_create_info, + .shader = { + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineVertexInputCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + .pNext = &fs_create_info, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 0, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + } + }, + .attributeCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 0 + }, + { + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 0 + } + } + }; + + VkPipelineRsStateCreateInfo rs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pNext = &vi_create_info, + + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }; + + vkCreateGraphicsPipeline(device, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rs_create_info, + .flags = 0, + .layout = pipeline_layout + }, + pipeline); + + + vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, fs); + vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, vs); +} + +int main(int argc, char *argv[]) +{ + VkInstance instance; + vkCreateInstance(&(VkInstanceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pAllocCb = &(VkAllocCallbacks) { + .pUserData = NULL, + .pfnAlloc = test_alloc, + .pfnFree = test_free + }, + .pAppInfo = &(VkApplicationInfo) { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pAppName = "vk", + .apiVersion = 1 + } + }, + &instance); + + uint32_t count = 1; + VkPhysicalDevice physicalDevices[1]; + vkEnumeratePhysicalDevices(instance, &count, physicalDevices); + printf("%d physical devices\n", count); + + VkPhysicalDeviceProperties properties; + size_t size = sizeof(properties); + vkGetPhysicalDeviceInfo(physicalDevices[0], + VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES, + &size, &properties); + printf("vendor id %04x, device name %s\n", + properties.vendorId, properties.deviceName); + + VkDevice device; + vkCreateDevice(physicalDevices[0], + &(VkDeviceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueRecordCount = 1, + .pRequestedQueues = &(VkDeviceQueueCreateInfo) { + .queueNodeIndex = 0, + .queueCount = 1 + } + }, + &device); + + VkQueue queue; + vkGetDeviceQueue(device, 0, 0, &queue); + + VkCmdBuffer cmdBuffer; + vkCreateCommandBuffer(device, + &(VkCmdBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, + .queueNodeIndex = 0, + .flags = 0 + }, + &cmdBuffer); + + + VkDescriptorSetLayout set_layout[2]; + vkCreateDescriptorSetLayout(device, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 2, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .count = 2, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = NULL + }, + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .count = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + } + } + }, + &set_layout[0]); + + vkCreateDescriptorSetLayout(device, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 1, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .count = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = NULL + } + } + }, + &set_layout[1]); + + VkPipelineLayout pipeline_layout; + vkCreatePipelineLayout(device, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 2, + .pSetLayouts = set_layout, + }, + &pipeline_layout); + + VkPipeline pipeline; + create_pipeline(device, &pipeline, pipeline_layout); + + VkDescriptorSet set[2]; + vkAllocDescriptorSets(device, 0 /* pool */, + VK_DESCRIPTOR_SET_USAGE_STATIC, + 2, set_layout, set, &count); + + VkBuffer buffer; + vkCreateBuffer(device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = 1024, + .usage = VK_BUFFER_USAGE_GENERAL, + .flags = 0 + }, + &buffer); + + VkMemoryRequirements buffer_requirements; + size = sizeof(buffer_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &buffer_requirements); + + int32_t width = 256, height = 256; + + VkImage rt; + vkCreateImage(device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = { .width = width, .height = height, .depth = 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }, + &rt); + + VkMemoryRequirements rt_requirements; + size = sizeof(rt_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_IMAGE, rt, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &rt_requirements); + + VkBuffer vertex_buffer; + vkCreateBuffer(device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = 1024, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .flags = 0 + }, + &vertex_buffer); + + VkMemoryRequirements vb_requirements; + size = sizeof(vb_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, vertex_buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &vb_requirements); + + printf("buffer size: %lu, buffer alignment: %lu\n", + buffer_requirements.size, buffer_requirements.alignment); + printf("rt size: %lu, rt alignment: %lu\n", + rt_requirements.size, rt_requirements.alignment); + printf("vb size: %lu vb alignment: %lu\n", + vb_requirements.size, vb_requirements.alignment); + + size_t mem_size = rt_requirements.size + 2048 + 16 * 16 * 4; + VkDeviceMemory mem; + vkAllocMemory(device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = mem_size, + .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, + .memPriority = VK_MEMORY_PRIORITY_NORMAL + }, + &mem); + + void *map; + vkMapMemory(device, mem, 0, mem_size, 0, &map); + memset(map, 192, mem_size); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + buffer, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 128); + + float color[12] = { + 0.0, 0.2, 0.0, 0.0, + 0.0, 0.0, 0.5, 0.0, + 0.0, 0.0, 0.5, 0.5 + }; + memcpy(map + 128 + 16, color, sizeof(color)); + VkBufferView buffer_view[3]; + vkCreateBufferView(device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buffer, + .viewType = VK_BUFFER_VIEW_TYPE_RAW, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 16, + .range = 64 + }, + &buffer_view[0]); + + vkCreateBufferView(device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buffer, + .viewType = VK_BUFFER_VIEW_TYPE_RAW, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 32, + .range = 64 + }, + &buffer_view[1]); + + vkCreateBufferView(device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buffer, + .viewType = VK_BUFFER_VIEW_TYPE_RAW, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 48, + .range = 64 + }, + &buffer_view[2]); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + vertex_buffer, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 1024); + static const float vertex_data[] = { + /* Triangle coordinates */ + -0.5, -0.5, 0.0, 1.0, + 0.5, -0.5, 0.0, 1.0, + 0.0, 0.5, 0.0, 1.0, + /* Color */ + 1.0, 0.0, 0.0, 0.2, + }; + memcpy(map + 1024, vertex_data, sizeof(vertex_data)); + + VkDynamicVpState vp_state; + vkCreateDynamicViewportState(device, + &(VkDynamicVpStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, + .viewportAndScissorCount = 2, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = width, + .height = height, + .minDepth = 0, + .maxDepth = 1 + }, + { + .originX = -10, + .originY = -10, + .width = 20, + .height = 20, + .minDepth = -1, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect[]) { + { { 0, 0 }, { width, height } }, + { { 10, 10 }, { 236, 236 } } + } + }, + &vp_state); + + VkDynamicRsState rs_state; + vkCreateDynamicRasterState(device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &rs_state); + + /* FIXME: Need to query memory info before binding to memory */ + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, + rt, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 2048); + + const uint32_t texture_width = 16, texture_height = 16; + VkImage texture; + vkCreateImage(device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = { .width = texture_width, .height = texture_height, .depth = 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, + &texture); + + VkImageView image_view; + vkCreateImageView(device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = texture, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }, + &image_view); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, + texture, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 2048 + 256 * 256 * 4); + + vkUpdateDescriptors(device, set[0], 2, + (const void * []) { + &(VkUpdateBuffers) { + .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .arrayIndex = 0, + .binding = 0, + .count = 2, + .pBufferViews = (VkBufferViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, + .view = buffer_view[0] + }, + { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, + .view = buffer_view[1] + } + } + }, + &(VkUpdateImages) { + .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .binding = 2, + .count = 1, + .pImageViews = (VkImageViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, + .view = image_view, + .layout = VK_IMAGE_LAYOUT_GENERAL, + } + } + } + }); + + vkUpdateDescriptors(device, set[1], 1, + (const void * []) { + &(VkUpdateBuffers) { + .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .arrayIndex = 0, + .count = 1, + .pBufferViews = (VkBufferViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, + .view = buffer_view[2] + } + } + } + }); + + VkColorAttachmentView view; + vkCreateColorAttachmentView(device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = rt, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + .msaaResolveImage = 0, + .msaaResolveSubResource = { 0, } + }, + &view); + + VkFramebuffer framebuffer; + vkCreateFramebuffer(device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .colorAttachmentCount = 1, + .pColorAttachments = (VkColorAttachmentBindInfo[]) { + { + .view = view, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + } + }, + .pDepthStencilAttachment = NULL, + .sampleCount = 1, + .width = width, + .height = height, + .layers = 1 + }, + &framebuffer); + + VkRenderPass pass; + vkCreateRenderPass(device, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .renderArea = { { 0, 0 }, { width, height } }, + .colorAttachmentCount = 1, + .extent = { }, + .sampleCount = 1, + .layers = 1, + .pColorFormats = (VkFormat[]) { VK_FORMAT_R8G8B8A8_UNORM }, + .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }, + .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_CLEAR }, + .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, + .pColorLoadClearValues = (VkClearColor[]) { + { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } + }, + .depthStencilFormat = VK_FORMAT_UNDEFINED, + }, + &pass); + + vkBeginCommandBuffer(cmdBuffer, + &(VkCmdBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + .flags = 0 + }); + + vkCmdBeginRenderPass(cmdBuffer, + &(VkRenderPassBegin) { + .renderPass = pass, + .framebuffer = framebuffer + }); + + vkCmdBindVertexBuffers(cmdBuffer, 0, 2, + (VkBuffer[]) { vertex_buffer, vertex_buffer }, + (VkDeviceSize[]) { 0, 3 * 4 * sizeof(float) }); + + vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + vkCmdBindDescriptorSets(cmdBuffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, + &set[0], 0, NULL); + vkCmdBindDescriptorSets(cmdBuffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, 1, 1, + &set[1], 0, NULL); + + vkCmdBindDynamicStateObject(cmdBuffer, + VK_STATE_BIND_POINT_VIEWPORT, vp_state); + vkCmdBindDynamicStateObject(cmdBuffer, + VK_STATE_BIND_POINT_RASTER, rs_state); + + vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_TOP, buffer, 0); + vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_BOTTOM, buffer, 8); + + vkCmdDraw(cmdBuffer, 0, 3, 0, 1); + + vkCmdEndRenderPass(cmdBuffer, pass); + + vkEndCommandBuffer(cmdBuffer); + + vkQueueSubmit(queue, 1, &cmdBuffer, 0); + + vkQueueWaitIdle(queue); + + write_png("vk.png", width, height, 1024, map + 2048); + + vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, texture); + vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, rt); + vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer); + vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); + vkDestroyObject(device, VK_OBJECT_TYPE_PIPELINE, pipeline); + + vkDestroyDevice(device); + + vkDestroyInstance(instance); + + return 0; +} -- cgit v1.2.3 From 20ad071190b02c9fd904a0c969ce08318f015af9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 22:12:56 -0700 Subject: vk: Allow NULL as a valid pipeline layout Vertex buffers and render targets aren't part of the layout so having an empty layout is pretty common. --- src/vulkan/compiler.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index d7428d8a877..39858426cf4 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -61,6 +61,10 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, struct anv_pipeline_layout_entry *entries; + /* No layout is valid for shaders that don't bind any resources. */ + if (pipeline->layout == NULL) + return VK_SUCCESS; + if (stage == VK_SHADER_STAGE_FRAGMENT) bias = MAX_RTS; else -- cgit v1.2.3 From 4f25f5d86cf38c94eb27b98695e0bdf7523af779 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 22:17:04 -0700 Subject: vk: Support not having a vertex shader This lets us bypass the vertex shader and pass data straight into the rasterizer part of the pipeline. --- src/vulkan/compiler.cpp | 39 ++++++++++++------------ src/vulkan/pipeline.c | 79 ++++++++++++++++++++++++++++++------------------- 2 files changed, 68 insertions(+), 50 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 39858426cf4..3f2435402ad 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -676,7 +676,8 @@ static void gen7_compute_urb_partition(struct anv_pipeline *pipeline) { const struct brw_device_info *devinfo = &pipeline->device->info; - unsigned vs_size = pipeline->vs_prog_data.base.urb_entry_size; + bool vs_present = pipeline->vs_simd8 != NO_KERNEL; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; unsigned vs_entry_size_bytes = vs_size * 64; bool gs_present = pipeline->gs_vec4 != NO_KERNEL; unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; @@ -841,11 +842,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) fail_if(program == NULL || program->Shaders == NULL, "failed to create program\n"); - /* FIXME: Only supports vs and fs combo at the moment */ - assert(pipeline->shaders[VK_SHADER_STAGE_VERTEX]); - assert(pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]); - - anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_VERTEX); + if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_VERTEX); anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_FRAGMENT); if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_GEOMETRY); @@ -870,18 +868,25 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); fail_if(!success, "do_wm_prog failed\n"); pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; + pipeline->active_stages = VK_SHADER_STAGE_FRAGMENT_BIT; - struct brw_vs_prog_key vs_key; - struct gl_vertex_program *vp = (struct gl_vertex_program *) - program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; - struct brw_vertex_program *bvp = brw_vertex_program(vp); + if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); - brw_vs_populate_key(brw, bvp, &vs_key); + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; + pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;; + } else { + pipeline->vs_simd8 = NO_KERNEL; + } - success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { struct brw_gs_prog_key gs_key; @@ -893,14 +898,10 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); fail_if(!success, "do_gs_prog failed\n"); - pipeline->active_stages = VK_SHADER_STAGE_VERTEX_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + pipeline->active_stages |= VK_SHADER_STAGE_GEOMETRY_BIT; pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base; - } else { pipeline->gs_vec4 = NO_KERNEL; - pipeline->active_stages = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 33b4f64f489..445ffde94ff 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -266,6 +266,13 @@ VkResult VKAPI vkCreateGraphicsPipeline( anv_compiler_run(device->compiler, pipeline); + /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we + * hard code this to num_attributes - 2. This is because the attributes + * include VUE header and position, which aren't counted as varying + * inputs. */ + if (pipeline->vs_simd8 == NO_KERNEL) + pipeline->wm_prog_data.num_varying_inputs = vi_info->attributeCount - 2; + emit_vertex_input(pipeline, vi_info); emit_ia_state(pipeline, ia_info); emit_rs_state(pipeline, rs_info); @@ -358,40 +365,50 @@ VkResult VKAPI vkCreateGraphicsPipeline( offset = 1; length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .KernelStartPointer = pipeline->vs_simd8, - .SingleVertexDispatch = Multiple, - .VectorMaskEnable = Dmask, - .SamplerCount = 0, - .BindingTableEntryCount = + if (pipeline->vs_simd8 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .FunctionEnable = false, + .VertexURBEntryOutputReadOffset = 1, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. We use attribute + * count - 1, as we don't count the VUE header here. */ + .VertexURBEntryOutputLength = + DIV_ROUND_UP(vi_info->attributeCount - 1, 2)); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = Normal, - .FloatingPointMode = IEEE754, - .IllegalOpcodeExceptionEnable = false, - .AccessesUAV = false, - .SoftwareExceptionEnable = false, - - /* FIXME: pointer needs to be assigned outside as it aliases - * PerThreadScratchSpace. - */ - .ScratchSpaceBasePointer = 0, - .PerThreadScratchSpace = 0, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + /* FIXME: pointer needs to be assigned outside as it aliases + * PerThreadScratchSpace. + */ + .ScratchSpaceBasePointer = 0, + .PerThreadScratchSpace = 0, - .DispatchGRFStartRegisterForURBData = + .DispatchGRFStartRegisterForURBData = vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = false, - .SIMD8DispatchEnable = true, - .VertexCacheDisable = ia_info->disableVertexReuse, - .FunctionEnable = true, - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length, - .UserClipDistanceClipTestEnableBitmask = 0, - .UserClipDistanceCullTestEnableBitmask = 0); + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = ia_info->disableVertexReuse, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; uint32_t ksp0, ksp2, grf_start0, grf_start2; -- cgit v1.2.3 From 099faa1a2b339baf1dee2ee9c099e7457df2fce1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 22:19:58 -0700 Subject: vk: Store bo pointer in anv_image and anv_buffer We don't need to point back to the memory object the bo came from. Pointing directly to a bo lets us bind images and buffers to other bos - like our allocator bos. --- src/vulkan/device.c | 22 +++++++++++----------- src/vulkan/image.c | 2 +- src/vulkan/intel.c | 2 +- src/vulkan/private.h | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 09b21e50c7c..86fa3b8c59e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1028,12 +1028,12 @@ VkResult VKAPI vkQueueBindObjectMemory( switch (objType) { case VK_OBJECT_TYPE_BUFFER: buffer = (struct anv_buffer *) object; - buffer->mem = mem; + buffer->bo = &mem->bo; buffer->offset = memOffset; break; case VK_OBJECT_TYPE_IMAGE: image = (struct anv_image *) object; - image->mem = mem; + image->bo = &mem->bo; image->offset = memOffset; break; default: @@ -1236,7 +1236,7 @@ VkResult VKAPI vkCreateBuffer( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); buffer->size = pCreateInfo->size; - buffer->mem = NULL; + buffer->bo = NULL; buffer->offset = 0; *pBuffer = (VkBuffer) buffer; @@ -2073,7 +2073,7 @@ void VKAPI vkCmdBindIndexBuffer( anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, .IndexFormat = vk_to_gen_index_type[indexType], .MemoryObjectControlState = 0, - .BufferStartingAddress = { &buffer->mem->bo, buffer->offset + offset }, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferSize = buffer->size - offset); } @@ -2140,7 +2140,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) * submit time. Surface address is dwords 8-9. */ anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, view->surface_state.offset + 8 * sizeof(int32_t), - &view->image->mem->bo, view->image->offset); + view->image->bo, view->image->offset); } } @@ -2161,7 +2161,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) table[bias + i] = image_view->surface_state.offset; anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, image_view->surface_state.offset + 8 * sizeof(int32_t), - &image_view->image->mem->bo, + image_view->image->bo, image_view->image->offset); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -2175,7 +2175,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) table[bias + i] = buffer_view->surface_state.offset; anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, buffer_view->surface_state.offset + 8 * sizeof(int32_t), - &buffer_view->buffer->mem->bo, + buffer_view->buffer->bo, buffer_view->buffer->offset + buffer_view->offset); break; @@ -2221,7 +2221,7 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .MemoryObjectControlState = 0, .AddressModifyEnable = true, .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { &buffer->mem->bo, buffer->offset + offset }, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferSize = buffer->size - offset }; @@ -2324,7 +2324,7 @@ void VKAPI vkCmdDrawIndirect( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_buffer *buffer = (struct anv_buffer *) _buffer; - struct anv_bo *bo = &buffer->mem->bo; + struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; anv_cmd_buffer_flush_state(cmd_buffer); @@ -2349,7 +2349,7 @@ void VKAPI vkCmdDrawIndexedIndirect( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_buffer *buffer = (struct anv_buffer *) _buffer; - struct anv_bo *bo = &buffer->mem->bo; + struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; anv_cmd_buffer_flush_state(cmd_buffer); @@ -2485,7 +2485,7 @@ void VKAPI vkCmdWriteTimestamp( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; - struct anv_bo *bo = &buffer->mem->bo; + struct anv_bo *bo = buffer->bo; switch (timestampType) { case VK_TIMESTAMP_TYPE_TOP: diff --git a/src/vulkan/image.c b/src/vulkan/image.c index a5357198225..b983b887e95 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -241,7 +241,7 @@ VkResult VKAPI vkCreateImage( if (image == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - image->mem = NULL; + image->bo = NULL; image->offset = 0; image->type = pCreateInfo->imageType; image->extent = pCreateInfo->extent; diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c index 81bd722d3e1..52fadb1ab06 100644 --- a/src/vulkan/intel.c +++ b/src/vulkan/intel.c @@ -67,7 +67,7 @@ VkResult VKAPI vkCreateDmaBufImageINTEL( goto fail_mem; } - image->mem = mem; + image->bo = &mem->bo; image->offset = 0; image->type = VK_IMAGE_TYPE_2D; image->extent = pCreateInfo->extent; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 31d4ab242fb..cfcaf1c70a6 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -439,7 +439,7 @@ struct anv_buffer { VkDeviceSize size; /* Set when bound */ - struct anv_device_memory * mem; + struct anv_bo * bo; VkDeviceSize offset; }; @@ -550,7 +550,7 @@ struct anv_image { int32_t stride; /* Set when bound */ - struct anv_device_memory * mem; + struct anv_bo * bo; VkDeviceSize offset; }; -- cgit v1.2.3 From 55b9b703ea833d7e9321a1c0953fa7bc6aa68863 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 22:23:38 -0700 Subject: vk: Add anv_batch_emit_merge() helper macro This lets us emit a state packet by merging to half-backed versions, typically one from the pipeline object and one from a dynamic state objects. --- src/vulkan/device.c | 11 +++-------- src/vulkan/private.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 86fa3b8c59e..737db8618a6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2236,14 +2236,9 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY) flush_descriptor_sets(cmd_buffer); - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { - /* maybe: anv_batch_merge(batch, GEN8_3DSTATE_SF, a, b) */ - uint32_t *dw; - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_3DSTATE_SF_length); - for (uint32_t i = 0; i < GEN8_3DSTATE_SF_length; i++) - dw[i] = cmd_buffer->rs_state->state_sf[i] | pipeline->state_sf[i]; - } + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->rs_state->state_sf, pipeline->state_sf); cmd_buffer->vb_dirty = 0; cmd_buffer->dirty = 0; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cfcaf1c70a6..545aeb36cf7 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -386,6 +386,16 @@ __gen_combine_address(struct anv_batch *batch, void *location, __dst; \ }) +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + } while (0) + struct anv_device_memory { struct anv_bo bo; VkDeviceSize map_size; -- cgit v1.2.3 From 6a895c6681e38ecce95aa49cadc2baf2f74cfb9a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 22:24:57 -0700 Subject: vk: Add 32 bpc signed and unsigned integer formats --- src/vulkan/image.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index b983b887e95..d416bdd1b63 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -103,17 +103,17 @@ static const struct anv_format anv_formats[] = { // [VK_FORMAT_R16G16B16A16_UINT] = { .format = R16G16B16A16_UINT }, // [VK_FORMAT_R16G16B16A16_SINT] = { .format = R16G16B16A16_SINT }, [VK_FORMAT_R16G16B16A16_SFLOAT] = { .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 }, - // [VK_FORMAT_R32_UINT] = { .format = R32_UINT }, - // [VK_FORMAT_R32_SINT] = { .format = R32_SINT }, + [VK_FORMAT_R32_UINT] = { .format = R32_UINT, .cpp = 4, .channels = 1 }, + [VK_FORMAT_R32_SINT] = { .format = R32_SINT, .cpp = 4, .channels = 1 }, [VK_FORMAT_R32_SFLOAT] = { .format = R32_FLOAT, .cpp = 4, .channels = 1 }, - // [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT }, - // [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT }, + [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT, .cpp = 8, .channels = 2 }, + [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT, .cpp = 8, .channels = 2 }, [VK_FORMAT_R32G32_SFLOAT] = { .format = R32G32_FLOAT, .cpp = 8, .channels = 2 }, - // [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT }, - // [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT }, + [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT, .cpp = 12, .channels = 3 }, + [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT, .cpp = 12, .channels = 3 }, [VK_FORMAT_R32G32B32_SFLOAT] = { .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3 }, - // [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT }, - // [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT }, + [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4 }, + [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4 }, [VK_FORMAT_R32G32B32A32_SFLOAT] = { .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4 }, [VK_FORMAT_R64_SFLOAT] = { .format = R64_FLOAT, .cpp = 8, .channels = 1 }, [VK_FORMAT_R64G64_SFLOAT] = { .format = R64G64_FLOAT, .cpp = 16, .channels = 2 }, -- cgit v1.2.3 From ad132bbe488a0755cbcc77474414eabbe266c9be Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 22:25:52 -0700 Subject: vk: Fix 3DSTATE_VERTEX_BUFFER emission Set VertexBufferIndex to the attribute binding, not the location. --- src/vulkan/pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 445ffde94ff..a7775cf7765 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -92,7 +92,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo const struct anv_format *format = anv_format_for_vk_format(desc->format); struct GEN8_VERTEX_ELEMENT_STATE element = { - .VertexBufferIndex = desc->location, + .VertexBufferIndex = desc->binding, .Valid = true, .SourceElementFormat = format->format, .EdgeFlagEnable = false, -- cgit v1.2.3 From b734e0bcc54c5e8dc36c95923ce28e2f338e486f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 23:20:01 -0700 Subject: vk: Add support for driver-internal custom pipelines This lets us disable the viewport, use rect lists and repclear. --- src/vulkan/device.c | 3 --- src/vulkan/pipeline.c | 37 ++++++++++++++++++++++++++++++------- src/vulkan/private.h | 12 ++++++++++++ 3 files changed, 42 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 737db8618a6..56c8cb7240b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1833,9 +1833,6 @@ VkResult VKAPI vkBeginCommandBuffer( .ConstantBufferOffset = 8, .ConstantBufferSize = 4); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLIP, - .ClipEnable = true, - .ViewportXYClipTestEnable = true); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_CHROMAKEY, .ChromaKeyKillEnable = false); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index a7775cf7765..6b0a1366a3c 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -122,7 +122,9 @@ emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo } static void -emit_ia_state(struct anv_pipeline *pipeline, VkPipelineIaStateCreateInfo *info) +emit_ia_state(struct anv_pipeline *pipeline, + VkPipelineIaStateCreateInfo *info, + const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_primitive_type[] = { [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, @@ -137,16 +139,22 @@ emit_ia_state(struct anv_pipeline *pipeline, VkPipelineIaStateCreateInfo *info) [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 }; + uint32_t topology = vk_to_gen_primitive_type[info->topology]; + + if (extra && extra->use_rectlist) + topology = _3DPRIM_RECTLIST; anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF, .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, .CutIndex = info->primitiveRestartIndex); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, - .PrimitiveTopologyType = vk_to_gen_primitive_type[info->topology]); + .PrimitiveTopologyType = topology); } static void -emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info) +emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, + const struct anv_pipeline_create_info *extra) + { static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, @@ -173,7 +181,7 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info) struct GEN8_3DSTATE_SF sf = { GEN8_3DSTATE_SF_header, - .ViewportTransformEnable = true, + .ViewportTransformEnable = !(extra && extra->disable_viewport), .TriangleStripListProvokingVertexSelect = info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, .LineStripListProvokingVertexSelect = @@ -207,8 +215,19 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info) } VkResult VKAPI vkCreateGraphicsPipeline( + VkDevice device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + return anv_pipeline_create(device, pCreateInfo, NULL, pPipeline); +} + + +VkResult +anv_pipeline_create( VkDevice _device, const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_pipeline_create_info * extra, VkPipeline* pPipeline) { struct anv_device *device = (struct anv_device *) _device; @@ -262,7 +281,7 @@ VkResult VKAPI vkCreateGraphicsPipeline( } } - pipeline->use_repclear = false; + pipeline->use_repclear = extra && extra->use_repclear; anv_compiler_run(device->compiler, pipeline); @@ -274,8 +293,12 @@ VkResult VKAPI vkCreateGraphicsPipeline( pipeline->wm_prog_data.num_varying_inputs = vi_info->attributeCount - 2; emit_vertex_input(pipeline, vi_info); - emit_ia_state(pipeline, ia_info); - emit_rs_state(pipeline, rs_info); + emit_ia_state(pipeline, ia_info, extra); + emit_rs_state(pipeline, rs_info, extra); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport)); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, .StatisticsEnable = true, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 545aeb36cf7..f6298c19adb 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -535,6 +535,18 @@ struct anv_pipeline { uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; }; +struct anv_pipeline_create_info { + bool use_repclear; + bool disable_viewport; + bool use_rectlist; +}; + +VkResult +anv_pipeline_create(VkDevice device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_pipeline_create_info *extra, + VkPipeline *pPipeline); + VkResult anv_pipeline_destroy(struct anv_pipeline *pipeline); struct anv_compiler *anv_compiler_create(int fd); -- cgit v1.2.3 From d77c34d1d2ea8d0eaafc3058cc70238471f6858b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 11 May 2015 23:25:06 -0700 Subject: vk: Add clear load-op for render passes --- src/vulkan/device.c | 48 +++++++++- src/vulkan/meta.c | 247 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/private.h | 26 ++++++ 3 files changed, 318 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 56c8cb7240b..49384c7940c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -323,6 +323,8 @@ VkResult VKAPI vkCreateDevice( pthread_mutex_init(&device->mutex, NULL); + anv_device_init_meta(device); + *pDevice = (VkDevice) device; return VK_SUCCESS; @@ -1739,7 +1741,9 @@ VkResult VKAPI vkCreateCommandBuffer( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); cmd_buffer->device = device; - + cmd_buffer->rs_state = NULL; + cmd_buffer->vp_state = NULL; + result = anv_batch_init(&cmd_buffer->batch, device); if (result != VK_SUCCESS) goto fail; @@ -2008,7 +2012,9 @@ void VKAPI vkCmdBindDynamicStateObject( switch (stateBindPoint) { case VK_STATE_BIND_POINT_VIEWPORT: vp_state = (struct anv_dynamic_vp_state *) dynamicState; - + /* We emit state immediately, but set cmd_buffer->vp_state to indicate + * that vp state has been set in this command buffer. */ + cmd_buffer->vp_state = vp_state; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, .ScissorRectPointer = vp_state->scissor.offset); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, @@ -2571,6 +2577,27 @@ VkResult VKAPI vkCreateFramebuffer( framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; + vkCreateDynamicViewportState((VkDevice) device, + &(VkDynamicVpStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, + .viewportAndScissorCount = 2, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = pCreateInfo->width, + .height = pCreateInfo->height, + .minDepth = 0, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect[]) { + { { 0, 0 }, + { pCreateInfo->width, pCreateInfo->height } }, + } + }, + &framebuffer->vp_state); + *pFramebuffer = (VkFramebuffer) framebuffer; return VK_SUCCESS; @@ -2583,16 +2610,29 @@ VkResult VKAPI vkCreateRenderPass( { struct anv_device *device = (struct anv_device *) _device; struct anv_render_pass *pass; + size_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - pass = anv_device_alloc(device, sizeof(*pass), 8, + size = sizeof(*pass) + + pCreateInfo->layers * sizeof(struct anv_render_pass_layer); + pass = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (pass == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pass->render_area = pCreateInfo->renderArea; + pass->num_layers = pCreateInfo->layers; + + pass->num_clear_layers = 0; + for (uint32_t i = 0; i < pCreateInfo->layers; i++) { + pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i]; + pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i]; + if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + pass->num_clear_layers++; + } + *pRenderPass = (VkRenderPass) pass; return VK_SUCCESS; @@ -2617,6 +2657,8 @@ void VKAPI vkCmdBeginRenderPass( pass->render_area.offset.x + pass->render_area.extent.width - 1, .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear(cmd_buffer, pass); } void VKAPI vkCmdEndRenderPass( diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 5ff5bb9bd68..48fe5d32db0 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -29,6 +29,253 @@ #include "private.h" +#define GLSL(src) "#version 330\n" #src + +void +anv_device_init_meta(struct anv_device *device) +{ + VkPipelineIaStateCreateInfo ia_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .disableVertexReuse = false, + .primitiveRestartEnable = false, + .primitiveRestartIndex = 0 + }; + + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. + */ + static const char fs_source[] = GLSL( + out vec4 f_color; + flat in vec4 v_color; + void main() + { + f_color = v_color; + }); + + VkShader fs; + vkCreateShader((VkDevice) device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(fs_source), + .pCode = fs_source, + .flags = 0 + }, + &fs); + + VkPipelineShaderStageCreateInfo fs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &ia_create_info, + .shader = { + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + /* We use instanced rendering to clear multiple render targets. We have two + * vertex buffers: the first vertex buffer holds per-vertex data and + * provides the vertices for the clear rectangle. The second one holds + * per-instance data, which consists of the VUE header (which selects the + * layer) and the color (Vulkan supports per-RT clear colors). + */ + VkPipelineVertexInputCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + .pNext = &fs_create_info, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 8, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 32, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Color */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 16 + } + } + }; + + VkPipelineRsStateCreateInfo rs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pNext = &vi_create_info, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }; + + anv_pipeline_create((VkDevice) device, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rs_create_info, + .flags = 0, + .layout = 0 + }, + &(struct anv_pipeline_create_info) { + .use_repclear = true, + .disable_viewport = true, + .use_rectlist = true + }, + &device->clear_state.pipeline); + + vkDestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); + + vkCreateDynamicRasterState((VkDevice) device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->clear_state.rs_state); +} + +struct anv_saved_state { + struct { + struct anv_buffer *buffer; + VkDeviceSize offset; + } vb[2]; + struct anv_pipeline *pipeline; +}; + +static void +anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) +{ + memcpy(state->vb, cmd_buffer->vb, sizeof(state->vb)); + state->pipeline = cmd_buffer->pipeline; +} + +static void +anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) +{ + memcpy(cmd_buffer->vb, state->vb, sizeof(state->vb)); + cmd_buffer->pipeline = state->pipeline; + + cmd_buffer->vb_dirty |= (1 << ARRAY_SIZE(state->vb)) - 1; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; +} + +void +anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_saved_state saved_state; + struct anv_state state; + uint32_t size; + + struct instance_data { + struct { + uint32_t Reserved; + uint32_t RTAIndex; + uint32_t ViewportIndex; + float PointWidth; + } vue_header; + float color[4]; + } *instance_data; + + const float vertex_data[] = { + /* Rect-list coordinates */ + 0.0, 0.0, + fb->width, 0.0, + fb->width, fb->height, + + /* Align to 16 bytes */ + 0.0, 0.0, + }; + + size = sizeof(vertex_data) + pass->num_clear_layers * sizeof(instance_data[0]); + state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 16); + + memcpy(state.map, vertex_data, sizeof(vertex_data)); + instance_data = state.map + sizeof(vertex_data); + + for (uint32_t i = 0; i < pass->num_layers; i++) { + if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + *instance_data++ = (struct instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = { + pass->layers[i].clear_color.color.floatColor[0], + pass->layers[i].clear_color.color.floatColor[1], + pass->layers[i].clear_color.color.floatColor[2], + pass->layers[i].clear_color.color.floatColor[3], + } + }; + } + } + + struct anv_buffer vertex_buffer = { + .device = cmd_buffer->device, + .size = size, + .bo = &device->surface_state_block_pool.bo, + .offset = state.offset + }; + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + vkCmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, + (VkBuffer[]) { + (VkBuffer) &vertex_buffer, + (VkBuffer) &vertex_buffer + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); + + if ((VkPipeline) cmd_buffer->pipeline != device->clear_state.pipeline) + vkCmdBindPipeline((VkCmdBuffer) cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, device->clear_state.pipeline); + + /* We don't need anything here, only set if not already set. */ + if (cmd_buffer->rs_state == NULL) + vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_RASTER, + device->clear_state.rs_state); + + if (cmd_buffer->vp_state == NULL) + vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_VIEWPORT, + cmd_buffer->framebuffer->vp_state); + + vkCmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, pass->num_clear_layers); + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); + +} + void VKAPI vkCmdCopyBuffer( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f6298c19adb..4b47c1f55bb 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -245,6 +245,11 @@ struct anv_instance { struct anv_physical_device physicalDevice; }; +struct anv_clear_state { + VkPipeline pipeline; + VkDynamicRsState rs_state; +}; + struct anv_device { struct anv_instance * instance; uint32_t chipset_id; @@ -261,6 +266,8 @@ struct anv_device { struct anv_block_pool surface_state_block_pool; struct anv_state_pool surface_state_pool; + struct anv_clear_state clear_state; + struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; pthread_mutex_t mutex; @@ -486,6 +493,7 @@ struct anv_cmd_buffer { struct anv_pipeline * pipeline; struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; + struct anv_dynamic_vp_state * vp_state; }; void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); @@ -604,12 +612,30 @@ struct anv_framebuffer { uint32_t width; uint32_t height; uint32_t layers; + + /* Viewport for clears */ + VkDynamicVpState vp_state; +}; + +struct anv_render_pass_layer { + VkAttachmentLoadOp color_load_op; + VkClearColor clear_color; }; struct anv_render_pass { VkRect render_area; + + uint32_t num_clear_layers; + uint32_t num_layers; + struct anv_render_pass_layer layers[0]; }; +void anv_device_init_meta(struct anv_device *device); + +void +anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass); + #ifdef __cplusplus } -- cgit v1.2.3 From 4336a1bc00eb7ecf0b4c923ebc536df548232621 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 May 2015 12:52:53 -0700 Subject: vk/pipeline: Add support for disabling the scissor in "extra" --- src/vulkan/pipeline.c | 2 +- src/vulkan/private.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 6b0a1366a3c..a82466ce3da 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -201,7 +201,7 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, .CullMode = vk_to_gen_cullmode[info->cullMode], .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .ScissorRectangleEnable = true, + .ScissorRectangleEnable = !(extra && extra->disable_scissor), .ViewportZClipTestEnable = info->depthClipEnable); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 4b47c1f55bb..778e2b0704c 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -546,6 +546,7 @@ struct anv_pipeline { struct anv_pipeline_create_info { bool use_repclear; bool disable_viewport; + bool disable_scissor; bool use_rectlist; }; -- cgit v1.2.3 From 7727720585b94be2257060264b7e4b68fdcdf834 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 May 2015 13:03:50 -0700 Subject: vk/meta: Break setting up meta clear state into it's own functin --- src/vulkan/meta.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 48fe5d32db0..18e928053ff 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -31,8 +31,8 @@ #define GLSL(src) "#version 330\n" #src -void -anv_device_init_meta(struct anv_device *device) +static void +anv_device_init_meta_clear_state(struct anv_device *device) { VkPipelineIaStateCreateInfo ia_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, @@ -385,3 +385,9 @@ void VKAPI vkCmdResolveImage( const VkImageResolve* pRegions) { } + +void +anv_device_init_meta(struct anv_device *device) +{ + anv_device_init_meta_clear_state(device); +} -- cgit v1.2.3 From d3b374ce59c9366ef6fdf4b1e581a1a795bc3401 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 May 2015 13:43:36 -0700 Subject: vk/util: Add a anv_finishme function/macro --- src/vulkan/private.h | 8 ++++++++ src/vulkan/util.c | 12 ++++++++++++ 2 files changed, 20 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 778e2b0704c..0ee0dabc256 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -85,6 +85,14 @@ vk_error(VkResult error) return error; } +void __anv_finishme(const char *file, int line, const char *format, ...); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + /** * A dynamically growable, circular buffer. Elements are added at head and * removed from tail. head and tail are free-running uint32_t indices and we diff --git a/src/vulkan/util.c b/src/vulkan/util.c index 847d13b2f55..92f9e407684 100644 --- a/src/vulkan/util.c +++ b/src/vulkan/util.c @@ -30,6 +30,18 @@ #include "private.h" +void +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + fprintf(stderr, "%s:%d: FINISHME: ", file, line); + vfprintf(stderr, format, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + int anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) { -- cgit v1.2.3 From ffe9f60358c3eddd043873b5f4bbc1752d98547c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 May 2015 13:44:43 -0700 Subject: vk: Add stub() and stub_return() macros and mark piles of functions as stubs --- src/vulkan/device.c | 53 ++++++++++++++++++++++++++++++++------------------- src/vulkan/image.c | 4 ++-- src/vulkan/meta.c | 11 +++++++++++ src/vulkan/pipeline.c | 10 +++++----- src/vulkan/private.h | 12 ++++++++++++ 5 files changed, 63 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 49384c7940c..98e1570372b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1055,7 +1055,7 @@ VkResult VKAPI vkQueueBindObjectMemoryRange( VkDeviceMemory mem, VkDeviceSize memOffset) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult vkQueueBindImageMemoryRange( @@ -1066,7 +1066,7 @@ VkResult vkQueueBindImageMemoryRange( VkDeviceMemory mem, VkDeviceSize memOffset) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkCreateFence( @@ -1074,7 +1074,7 @@ VkResult VKAPI vkCreateFence( const VkFenceCreateInfo* pCreateInfo, VkFence* pFence) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkResetFences( @@ -1082,14 +1082,14 @@ VkResult VKAPI vkResetFences( uint32_t fenceCount, VkFence* pFences) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkGetFenceStatus( VkDevice device, VkFence fence) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkWaitForFences( @@ -1099,7 +1099,7 @@ VkResult VKAPI vkWaitForFences( bool32_t waitAll, uint64_t timeout) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Queue semaphore functions @@ -1109,21 +1109,21 @@ VkResult VKAPI vkCreateSemaphore( const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkQueueSignalSemaphore( VkQueue queue, VkSemaphore semaphore) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkQueueWaitSemaphore( VkQueue queue, VkSemaphore semaphore) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Event functions @@ -1133,28 +1133,28 @@ VkResult VKAPI vkCreateEvent( const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkGetEventStatus( VkDevice device, VkEvent event) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkSetEvent( VkDevice device, VkEvent event) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkResetEvent( VkDevice device, VkEvent event) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Query functions @@ -1205,7 +1205,7 @@ VkResult VKAPI vkGetQueryPoolResults( void* pData, VkQueryResultFlags flags) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Format capabilities @@ -1217,7 +1217,7 @@ VkResult VKAPI vkGetFormatInfo( size_t* pDataSize, void* pData) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Buffer functions @@ -1429,14 +1429,14 @@ VkResult VKAPI vkBeginDescriptorPoolUpdate( VkDevice device, VkDescriptorUpdateMode updateMode) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkEndDescriptorPoolUpdate( VkDevice device, VkCmdBuffer cmd) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkCreateDescriptorPool( @@ -1446,14 +1446,14 @@ VkResult VKAPI vkCreateDescriptorPool( const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkAllocDescriptorSets( @@ -1494,6 +1494,7 @@ void VKAPI vkClearDescriptorSets( uint32_t count, const VkDescriptorSet* pDescriptorSets) { + stub(); } void VKAPI vkUpdateDescriptors( @@ -1721,7 +1722,7 @@ VkResult VKAPI vkCreateDynamicDepthStencilState( const VkDynamicDsStateCreateInfo* pCreateInfo, VkDynamicDsState* pState) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Command buffer functions @@ -2369,6 +2370,7 @@ void VKAPI vkCmdDispatch( uint32_t y, uint32_t z) { + stub(); } void VKAPI vkCmdDispatchIndirect( @@ -2376,6 +2378,7 @@ void VKAPI vkCmdDispatchIndirect( VkBuffer buffer, VkDeviceSize offset) { + stub(); } void VKAPI vkCmdSetEvent( @@ -2383,6 +2386,7 @@ void VKAPI vkCmdSetEvent( VkEvent event, VkPipeEvent pipeEvent) { + stub(); } void VKAPI vkCmdResetEvent( @@ -2390,6 +2394,7 @@ void VKAPI vkCmdResetEvent( VkEvent event, VkPipeEvent pipeEvent) { + stub(); } void VKAPI vkCmdWaitEvents( @@ -2400,6 +2405,7 @@ void VKAPI vkCmdWaitEvents( uint32_t memBarrierCount, const void** ppMemBarriers) { + stub(); } void VKAPI vkCmdPipelineBarrier( @@ -2410,6 +2416,7 @@ void VKAPI vkCmdPipelineBarrier( uint32_t memBarrierCount, const void** ppMemBarriers) { + stub(); } static void @@ -2471,6 +2478,7 @@ void VKAPI vkCmdResetQueryPool( uint32_t startQuery, uint32_t queryCount) { + stub(); } #define TIMESTAMP 0x44070 @@ -2515,6 +2523,7 @@ void VKAPI vkCmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags) { + stub(); } void VKAPI vkCmdInitAtomicCounters( @@ -2524,6 +2533,7 @@ void VKAPI vkCmdInitAtomicCounters( uint32_t counterCount, const uint32_t* pData) { + stub(); } void VKAPI vkCmdLoadAtomicCounters( @@ -2534,6 +2544,7 @@ void VKAPI vkCmdLoadAtomicCounters( VkBuffer srcBuffer, VkDeviceSize srcOffset) { + stub(); } void VKAPI vkCmdSaveAtomicCounters( @@ -2544,6 +2555,7 @@ void VKAPI vkCmdSaveAtomicCounters( VkBuffer destBuffer, VkDeviceSize destOffset) { + stub(); } VkResult VKAPI vkCreateFramebuffer( @@ -2665,4 +2677,5 @@ void VKAPI vkCmdEndRenderPass( VkCmdBuffer cmdBuffer, VkRenderPass renderPass) { + stub(); } diff --git a/src/vulkan/image.c b/src/vulkan/image.c index d416bdd1b63..4dd96ceac3b 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -286,7 +286,7 @@ VkResult VKAPI vkGetImageSubresourceInfo( size_t* pDataSize, void* pData) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Image view functions @@ -400,5 +400,5 @@ VkResult VKAPI vkCreateDepthStencilView( const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 18e928053ff..c346c3ef871 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -283,6 +283,7 @@ void VKAPI vkCmdCopyBuffer( uint32_t regionCount, const VkBufferCopy* pRegions) { + stub(); } void VKAPI vkCmdCopyImage( @@ -294,6 +295,7 @@ void VKAPI vkCmdCopyImage( uint32_t regionCount, const VkImageCopy* pRegions) { + stub(); } void VKAPI vkCmdBlitImage( @@ -305,6 +307,7 @@ void VKAPI vkCmdBlitImage( uint32_t regionCount, const VkImageBlit* pRegions) { + stub(); } void VKAPI vkCmdCopyBufferToImage( @@ -315,6 +318,7 @@ void VKAPI vkCmdCopyBufferToImage( uint32_t regionCount, const VkBufferImageCopy* pRegions) { + stub(); } void VKAPI vkCmdCopyImageToBuffer( @@ -325,6 +329,7 @@ void VKAPI vkCmdCopyImageToBuffer( uint32_t regionCount, const VkBufferImageCopy* pRegions) { + stub(); } void VKAPI vkCmdCloneImageData( @@ -334,6 +339,7 @@ void VKAPI vkCmdCloneImageData( VkImage destImage, VkImageLayout destImageLayout) { + stub(); } void VKAPI vkCmdUpdateBuffer( @@ -343,6 +349,7 @@ void VKAPI vkCmdUpdateBuffer( VkDeviceSize dataSize, const uint32_t* pData) { + stub(); } void VKAPI vkCmdFillBuffer( @@ -352,6 +359,7 @@ void VKAPI vkCmdFillBuffer( VkDeviceSize fillSize, uint32_t data) { + stub(); } void VKAPI vkCmdClearColorImage( @@ -362,6 +370,7 @@ void VKAPI vkCmdClearColorImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { + stub(); } void VKAPI vkCmdClearDepthStencil( @@ -373,6 +382,7 @@ void VKAPI vkCmdClearDepthStencil( uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { + stub(); } void VKAPI vkCmdResolveImage( @@ -384,6 +394,7 @@ void VKAPI vkCmdResolveImage( uint32_t regionCount, const VkImageResolve* pRegions) { + stub(); } void diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index a82466ce3da..ce1fa303d7c 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -512,7 +512,7 @@ VkResult VKAPI vkCreateGraphicsPipelineDerivative( VkPipeline basePipeline, VkPipeline* pPipeline) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkCreateComputePipeline( @@ -520,7 +520,7 @@ VkResult VKAPI vkCreateComputePipeline( const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkStorePipeline( @@ -529,7 +529,7 @@ VkResult VKAPI vkStorePipeline( size_t* pDataSize, void* pData) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkLoadPipeline( @@ -538,7 +538,7 @@ VkResult VKAPI vkLoadPipeline( const void* pData, VkPipeline* pPipeline) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } VkResult VKAPI vkLoadPipelineDerivative( @@ -548,7 +548,7 @@ VkResult VKAPI vkLoadPipelineDerivative( VkPipeline basePipeline, VkPipeline* pPipeline) { - return VK_UNSUPPORTED; + stub_return(VK_UNSUPPORTED); } // Pipeline layout functions diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 0ee0dabc256..00aceb7ded1 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -93,6 +93,18 @@ void __anv_finishme(const char *file, int line, const char *format, ...); #define anv_finishme(format, ...) \ __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + /** * A dynamically growable, circular buffer. Elements are added at head and * removed from tail. head and tail are free-running uint32_t indices and we -- cgit v1.2.3 From 9905481552c4d507952af520c22d054a68cdb61c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 12 May 2015 14:25:47 -0700 Subject: vk: Add generated header for HSW and IVB (GEN75 and GEN7) --- src/vulkan/compiler.cpp | 4 +- src/vulkan/gen75_pack.h | 8037 +++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/gen7_pack.h | 6607 ++++++++++++++++++++++++++++++++++++++ src/vulkan/gen8_pack.h | 19 +- src/vulkan/private.h | 2 + 5 files changed, 14659 insertions(+), 10 deletions(-) create mode 100644 src/vulkan/gen75_pack.h create mode 100644 src/vulkan/gen7_pack.h (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 3f2435402ad..f8be0f070a8 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -25,6 +25,8 @@ #include #include +#include "private.h" + #include #include /* brw_new_shader_program is here */ @@ -36,8 +38,6 @@ #include #include -#include "private.h" - static void fail_if(int cond, const char *format, ...) { diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h new file mode 100644 index 00000000000..f01fdb07156 --- /dev/null +++ b/src/vulkan/gen75_pack.h @@ -0,0 +1,8037 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for HSW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include +#include + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN75_3DSTATE_URB_VS_length 0x00000002 +#define GEN75_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 30) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length 0x00000002 +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 0 + +struct GEN75_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + +} + +#define GEN75_MI_STORE_REGISTER_MEM_length 0x00000003 +#define GEN75_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN75_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 1 + +struct GEN75_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_PIPELINE_SELECT_length 0x00000001 +#define GEN75_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN75_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +struct GEN75_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN75_STATE_BASE_ADDRESS_length 0x0000000a +#define GEN75_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 8 + +struct GEN75_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + uint32_t GeneralStateMemoryObjectControlState; + uint32_t StatelessDataPortAccessMemoryObjectControlState; + uint32_t GeneralStateBaseAddressModifyEnable; + __gen_address_type SurfaceStateBaseAddress; + uint32_t SurfaceStateMemoryObjectControlState; + uint32_t SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + uint32_t DynamicStateMemoryObjectControlState; + uint32_t DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + uint32_t IndirectObjectMemoryObjectControlState; + uint32_t IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + uint32_t InstructionMemoryObjectControlState; + uint32_t InstructionBaseAddressModifyEnable; + __gen_address_type GeneralStateAccessUpperBound; + uint32_t GeneralStateAccessUpperBoundModifyEnable; + __gen_address_type DynamicStateAccessUpperBound; + uint32_t DynamicStateAccessUpperBoundModifyEnable; + __gen_address_type IndirectObjectAccessUpperBound; + uint32_t IndirectObjectAccessUpperBoundModifyEnable; + __gen_address_type InstructionAccessUpperBound; + uint32_t InstructionAccessUpperBoundModifyEnable; +}; + +static inline void +GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + uint32_t dw2 = + /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + + uint32_t dw3 = + /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + + uint32_t dw4 = + /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + + uint32_t dw5 = + /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); + + uint32_t dw6 = + __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); + + uint32_t dw7 = + __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[7] = + __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); + + uint32_t dw8 = + __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); + + uint32_t dw9 = + __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[9] = + __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); + +} + +#define GEN75_STATE_PREFETCH_length 0x00000002 +#define GEN75_STATE_PREFETCH_length_bias 0x00000002 +#define GEN75_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN75_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN75_STATE_SIP_length 0x00000002 +#define GEN75_STATE_SIP_length_bias 0x00000002 +#define GEN75_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 0 + +struct GEN75_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 31) | + 0; + +} + +#define GEN75_SWTESS_BASE_ADDRESS_length 0x00000002 +#define GEN75_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN75_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + uint32_t SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + +} + +#define GEN75_3DPRIMITIVE_length 0x00000007 +#define GEN75_3DPRIMITIVE_length_bias 0x00000002 +#define GEN75_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +struct GEN75_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AACoverageBias; + uint32_t AACoverageSlope; + uint32_t AACoverageEndCapBias; + uint32_t AACoverageEndCapSlope; +}; + +static inline void +GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AACoverageBias, 16, 23) | + __gen_field(values->AACoverageSlope, 0, 7) | + 0; + + dw[2] = + __gen_field(values->AACoverageEndCapBias, 16, 23) | + __gen_field(values->AACoverageEndCapSlope, 0, 7) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000003 +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + uint32_t SurfaceObjectControlState; + __gen_address_type BindingTablePoolUpperBound; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BindingTablePoolUpperBound, dw2); + +} + +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; +}; + +static inline void +GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_CC_STATE_POINTERS_length 0x00000002 +#define GEN75_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; +}; + +static inline void +GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_CHROMA_KEY_length 0x00000004 +#define GEN75_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN75_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +struct GEN75_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_CLEAR_PARAMS_length 0x00000003 +#define GEN75_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN75_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DepthClearValue; + uint32_t DepthClearValueValid; +}; + +static inline void +GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthClearValue, 0, 31) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CLIP_length 0x00000004 +#define GEN75_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN75_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +struct GEN75_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t FrontWinding; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t EarlyCullEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + uint32_t ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + uint32_t ClipEnable; +#define APIMODE_OGL 0 + uint32_t APIMode; + uint32_t ViewportXYClipTestEnable; + uint32_t ViewportZClipTestEnable; + uint32_t GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + uint32_t ClipMode; + uint32_t PerspectiveDivideDisable; + uint32_t NonPerspectiveBarycentricEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; + uint32_t MinimumPointWidth; + uint32_t MaximumPointWidth; + uint32_t ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FrontWinding, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->ViewportZClipTestEnable, 27, 27) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth, 17, 27) | + __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_DS_length 0x00000007 +#define GEN75_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 +#define GEN75_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 +#define GEN75_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 +#define GEN75_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 +#define GEN75_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 +#define GEN75_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t DepthWriteEnable; + uint32_t StencilWriteEnable; + uint32_t HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; +#define SURFTYPE_CUBEmustbezero 0 + uint32_t Depth; + uint32_t MinimumArrayElement; + uint32_t DepthBufferObjectControlState; + uint32_t DepthCoordinateOffsetY; + uint32_t DepthCoordinateOffsetX; + uint32_t RenderTargetViewExtent; +}; + +static inline void +GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[3] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + dw[4] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[5] = + __gen_field(values->DepthCoordinateOffsetY, 16, 31) | + __gen_field(values->DepthCoordinateOffsetX, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + 0; + +} + +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 37, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDEPTH_STENCIL_STATE; +}; + +static inline void +GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN75_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +struct GEN75_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_DS_length 0x00000006 +#define GEN75_3DSTATE_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 4 + +struct GEN75_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t AccessesUAV; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t ComputeWCoordinateEnable; + uint32_t DSCacheDisable; + uint32_t DSFunctionEnable; +}; + +static inline void +GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->DSCacheDisable, 1, 1) | + __gen_field(values->DSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +struct GEN75_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +struct GEN75_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +struct GEN75_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +struct GEN75_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +struct GEN75_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length 0x00000003 +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + uint32_t GatherPoolEnable; + uint32_t MemoryObjectControlState; + __gen_address_type GatherPoolUpperBound; +}; + +static inline void +GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->GatherPoolUpperBound, dw2); + +} + +#define GEN75_3DSTATE_GS_length 0x00000007 +#define GEN75_3DSTATE_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t GSaccessesUAV; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + uint32_t IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamID; +#define SINGLE 0 +#define DUAL_INSTANCE 1 +#define DUAL_OBJECT 2 + uint32_t DispatchMode; + uint32_t GSStatisticsEnable; + uint32_t GSInvocationsIncrementValue; + uint32_t IncludePrimitiveID; + uint32_t Hint; +#define REORDER_LEADING 0 +#define REORDER_TRAILING 1 + uint32_t ReorderMode; + uint32_t DiscardAdjacency; + uint32_t GSEnable; +#define GSCTL_CUT 0 +#define GSCTL_SID 1 + uint32_t ControlDataFormat; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->GSaccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamID, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->GSStatisticsEnable, 10, 10) | + __gen_field(values->GSInvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->GSEnable, 0, 0) | + 0; + + dw[6] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_offset(values->SemaphoreHandle, 0, 12) | + 0; + +} + +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN75_3DSTATE_HS_length 0x00000007 +#define GEN75_3DSTATE_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t MaximumNumberofThreads; + uint32_t Enable; + uint32_t StatisticsEnable; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + uint32_t HSaccessesUAV; + uint32_t IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + __gen_field(values->MaximumNumberofThreads, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->HSaccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 12) | + 0; + +} + +#define GEN75_3DSTATE_INDEX_BUFFER_length 0x00000003 +#define GEN75_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t MemoryObjectControlState; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t DwordLength; + __gen_address_type BufferStartingAddress; + __gen_address_type BufferEndingAddress; +}; + +static inline void +GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); + +} + +#define GEN75_3DSTATE_LINE_STIPPLE_length 0x00000003 +#define GEN75_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + uint32_t LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN75_3DSTATE_MONOFILTER_SIZE_length 0x00000002 +#define GEN75_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN75_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN75_3DSTATE_MULTISAMPLE_length 0x00000004 +#define GEN75_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 2 + +struct GEN75_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MultiSampleEnable; +#define PIXLOC_CENTER 0 +#define PIXLOC_UL_CORNER 1 + uint32_t PixelLocation; +#define NUMSAMPLES_1 0 +#define NUMSAMPLES_4 2 +#define NUMSAMPLES_8 3 + uint32_t NumberofMultisamples; + uint32_t Sample3XOffset; + uint32_t Sample3YOffset; + uint32_t Sample2XOffset; + uint32_t Sample2YOffset; + uint32_t Sample1XOffset; + uint32_t Sample1YOffset; + uint32_t Sample0XOffset; + uint32_t Sample0YOffset; + uint32_t Sample7XOffset; + uint32_t Sample7YOffset; + uint32_t Sample6XOffset; + uint32_t Sample6YOffset; + uint32_t Sample5XOffset; + uint32_t Sample5YOffset; + uint32_t Sample4XOffset; + uint32_t Sample4YOffset; +}; + +static inline void +GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MultiSampleEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset, 28, 31) | + __gen_field(values->Sample3YOffset, 24, 27) | + __gen_field(values->Sample2XOffset, 20, 23) | + __gen_field(values->Sample2YOffset, 16, 19) | + __gen_field(values->Sample1XOffset, 12, 15) | + __gen_field(values->Sample1YOffset, 8, 11) | + __gen_field(values->Sample0XOffset, 4, 7) | + __gen_field(values->Sample0YOffset, 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset, 28, 31) | + __gen_field(values->Sample7YOffset, 24, 27) | + __gen_field(values->Sample6XOffset, 20, 23) | + __gen_field(values->Sample6YOffset, 16, 19) | + __gen_field(values->Sample5XOffset, 12, 15) | + __gen_field(values->Sample5YOffset, 8, 11) | + __gen_field(values->Sample4XOffset, 4, 7) | + __gen_field(values->Sample4YOffset, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow; +}; + +static inline void +GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PatternRow, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_PS_length 0x00000008 +#define GEN75_3DSTATE_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 6 + +struct GEN75_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; + uint32_t SamplerCount; +#define FTZ 0 +#define RET 1 + uint32_t DenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadPriority; +#define IEEE745 0 +#define Alt 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t SampleMask; + uint32_t PushConstantEnable; + uint32_t AttributeEnable; + uint32_t oMaskPresenttoRenderTarget; + uint32_t RenderTargetFastClearEnable; + uint32_t DualSourceBlendEnable; + uint32_t RenderTargetResolveEnable; + uint32_t PSAccessesUAV; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + uint32_t _32PixelDispatchEnable; + uint32_t _16PixelDispatchEnable; + uint32_t _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterforConstantSetupData0; + uint32_t DispatchGRFStartRegisterforConstantSetupData1; + uint32_t DispatchGRFStartRegisterforConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->DenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->SampleMask, 12, 19) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->AttributeEnable, 10, 10) | + __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->DualSourceBlendEnable, 7, 7) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PSAccessesUAV, 5, 5) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[5] = + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | + 0; + + dw[6] = + __gen_offset(values->KernelStartPointer1, 6, 31) | + 0; + + dw[7] = + __gen_offset(values->KernelStartPointer2, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length 0x00000006 +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_RAST_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 4 + +struct GEN75_3DSTATE_RAST_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NRM_NUMRASTSAMPLES_1 0 +#define NRM_NUMRASTSAMPLES_2 1 +#define NRM_NUMRASTSAMPLES_4 2 +#define NRM_NUMRASTSAMPLES_8 3 +#define NRM_NUMRASTSAMPLES_16 4 + uint32_t NumberofRasterizationMultisamples; + uint32_t Sample3XOffset; + uint32_t Sample3YOffset; + uint32_t Sample2XOffset; + uint32_t Sample2YOffset; + uint32_t Sample1XOffset; + uint32_t Sample1YOffset; + uint32_t Sample0XOffset; + uint32_t Sample0YOffset; + uint32_t Sample7XOffset; + uint32_t Sample7YOffset; + uint32_t Sample6XOffset; + uint32_t Sample6YOffset; + uint32_t Sample5XOffset; + uint32_t Sample5YOffset; + uint32_t Sample4XOffset; + uint32_t Sample4YOffset; + uint32_t Sample11XOffset; + uint32_t Sample11YOffset; + uint32_t Sample10XOffset; + uint32_t Sample10YOffset; + uint32_t Sample9XOffset; + uint32_t Sample9YOffset; + uint32_t Sample8XOffset; + uint32_t Sample8YOffset; + uint32_t Sample15XOffset; + uint32_t Sample15YOffset; + uint32_t Sample14XOffset; + uint32_t Sample14YOffset; + uint32_t Sample13XOffset; + uint32_t Sample13YOffset; + uint32_t Sample12XOffset; + uint32_t Sample12YOffset; +}; + +static inline void +GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_RAST_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->NumberofRasterizationMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset, 28, 31) | + __gen_field(values->Sample3YOffset, 24, 27) | + __gen_field(values->Sample2XOffset, 20, 23) | + __gen_field(values->Sample2YOffset, 16, 19) | + __gen_field(values->Sample1XOffset, 12, 15) | + __gen_field(values->Sample1YOffset, 8, 11) | + __gen_field(values->Sample0XOffset, 4, 7) | + __gen_field(values->Sample0YOffset, 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset, 28, 31) | + __gen_field(values->Sample7YOffset, 24, 27) | + __gen_field(values->Sample6XOffset, 20, 23) | + __gen_field(values->Sample6YOffset, 16, 19) | + __gen_field(values->Sample5XOffset, 12, 15) | + __gen_field(values->Sample5YOffset, 8, 11) | + __gen_field(values->Sample4XOffset, 4, 7) | + __gen_field(values->Sample4YOffset, 0, 3) | + 0; + + dw[4] = + __gen_field(values->Sample11XOffset, 28, 31) | + __gen_field(values->Sample11YOffset, 24, 27) | + __gen_field(values->Sample10XOffset, 20, 23) | + __gen_field(values->Sample10YOffset, 16, 19) | + __gen_field(values->Sample9XOffset, 12, 15) | + __gen_field(values->Sample9YOffset, 8, 11) | + __gen_field(values->Sample8XOffset, 4, 7) | + __gen_field(values->Sample8YOffset, 0, 3) | + 0; + + dw[5] = + __gen_field(values->Sample15XOffset, 28, 31) | + __gen_field(values->Sample15YOffset, 24, 27) | + __gen_field(values->Sample14XOffset, 20, 23) | + __gen_field(values->Sample14YOffset, 16, 19) | + __gen_field(values->Sample13XOffset, 12, 15) | + __gen_field(values->Sample13YOffset, 8, 11) | + __gen_field(values->Sample12XOffset, 4, 7) | + __gen_field(values->Sample12YOffset, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLE_MASK_length 0x00000002 +#define GEN75_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 7) | + 0; + +} + +#define GEN75_3DSTATE_SBE_length 0x0000000e +#define GEN75_3DSTATE_SBE_length_bias 0x00000002 +#define GEN75_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 12 + +struct GEN75_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AttributeSwizzleControlMode; + uint32_t NumberofSFOutputAttributes; + uint32_t AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t Attribute2n1ComponentOverrideW; + uint32_t Attribute2n1ComponentOverrideZ; + uint32_t Attribute2n1ComponentOverrideY; + uint32_t Attribute2n1ComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2n1ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2n1SwizzleSelect; + uint32_t Attribute2n1SourceAttribute; + uint32_t Attribute2nComponentOverrideW; + uint32_t Attribute2nComponentOverrideZ; + uint32_t Attribute2nComponentOverrideY; + uint32_t Attribute2nComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2nConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2nSwizzleSelect; + uint32_t Attribute2nSourceAttribute; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable310; + uint32_t Attribute7WrapShortestEnables; + uint32_t Attribute6WrapShortestEnables; + uint32_t Attribute5WrapShortestEnables; + uint32_t Attribute4WrapShortestEnables; + uint32_t Attribute3WrapShortestEnables; + uint32_t Attribute2WrapShortestEnables; + uint32_t Attribute1WrapShortestEnables; + uint32_t Attribute0WrapShortestEnables; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute9WrapShortestEnables; + uint32_t Attribute8WrapShortestEnables; +}; + +static inline void +GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AttributeSwizzleControlMode, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[2] = + __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | + __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | + __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | + __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | + __gen_field(values->Attribute2n1ConstantSource, 25, 26) | + __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | + __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | + __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | + __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | + __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | + __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | + __gen_field(values->Attribute2nConstantSource, 9, 10) | + __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | + __gen_field(values->Attribute2nSourceAttribute, 0, 4) | + 0; + + dw[10] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ConstantInterpolationEnable310, 0, 31) | + 0; + + dw[12] = + __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | + 0; + + dw[13] = + __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SF_length 0x00000007 +#define GEN75_3DSTATE_SF_length_bias 0x00000002 +#define GEN75_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 5 + +struct GEN75_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define D32_FLOAT_S8X24_UINT 0 +#define D32_FLOAT 1 +#define D24_UNORM_S8_UINT 2 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t DepthBufferSurfaceFormat; + uint32_t LegacyGlobalDepthBiasEnable; + uint32_t StatisticsEnable; + uint32_t GlobalDepthOffsetEnableSolid; + uint32_t GlobalDepthOffsetEnableWireframe; + uint32_t GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + uint32_t ViewTransformEnable; + uint32_t FrontWinding; + uint32_t AntiAliasingEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + uint32_t LineWidth; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineStippleEnable; + uint32_t ScissorRectangleEnable; + uint32_t RTIndependentRasterizationEnable; + uint32_t MultisampleRasterizationMode; + uint32_t LastPixelEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t UsePointWidthState; + uint32_t PointWidth; + uint32_t GlobalDepthOffsetConstant; + uint32_t GlobalDepthOffsetScale; + uint32_t GlobalDepthOffsetClamp; +}; + +static inline void +GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->ViewTransformEnable, 1, 1) | + __gen_field(values->FrontWinding, 0, 0) | + 0; + + dw[2] = + __gen_field(values->AntiAliasingEnable, 31, 31) | + __gen_field(values->CullMode, 29, 30) | + __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + __gen_field(values->LineStippleEnable, 14, 14) | + __gen_field(values->ScissorRectangleEnable, 11, 11) | + __gen_field(values->RTIndependentRasterizationEnable, 10, 10) | + __gen_field(values->MultisampleRasterizationMode, 8, 9) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->UsePointWidthState, 11, 11) | + __gen_field(values->PointWidth, 0, 10) | + 0; + + dw[4] = + __gen_field(values->GlobalDepthOffsetConstant, 0, 31) | + 0; + + dw[5] = + __gen_field(values->GlobalDepthOffsetScale, 0, 31) | + 0; + + dw[6] = + __gen_field(values->GlobalDepthOffsetClamp, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_SO_BUFFER_length 0x00000004 +#define GEN75_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 2 + +struct GEN75_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferIndex; + uint32_t SOBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + __gen_address_type SurfaceEndAddress; +}; + +static inline void +GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOBufferIndex, 29, 30) | + /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 11) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); + +} + +#define GEN75_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN75_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +struct GEN75_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_STENCIL_BUFFER_length 0x00000003 +#define GEN75_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + uint32_t StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN75_3DSTATE_STREAMOUT_length 0x00000003 +#define GEN75_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN75_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t RenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + uint32_t SOStatisticsEnable; + uint32_t SOBufferEnable3; + uint32_t SOBufferEnable2; + uint32_t SOBufferEnable1; + uint32_t SOBufferEnable0; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; +}; + +static inline void +GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->RenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->SOBufferEnable3, 11, 11) | + __gen_field(values->SOBufferEnable2, 10, 10) | + __gen_field(values->SOBufferEnable1, 9, 9) | + __gen_field(values->SOBufferEnable0, 8, 8) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + +} + +#define GEN75_3DSTATE_TE_length 0x00000004 +#define GEN75_3DSTATE_TE_length_bias 0x00000002 +#define GEN75_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +struct GEN75_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define LINE 1 +#define TRI_CW 2 +#define TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + uint32_t TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN75_3DSTATE_URB_DS_length 0x00000002 +#define GEN75_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 30) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_URB_GS_length 0x00000002 +#define GEN75_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 30) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_URB_HS_length 0x00000002 +#define GEN75_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 30) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN75_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +struct GEN75_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN75_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +struct GEN75_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_VF_length 0x00000002 +#define GEN75_3DSTATE_VF_length_bias 0x00000002 +#define GEN75_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_VF_STATISTICS_length 0x00000001 +#define GEN75_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN75_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +struct GEN75_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t StatisticsEnable; +}; + +static inline void +GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_VS_length 0x00000006 +#define GEN75_3DSTATE_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 4 + +struct GEN75_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t VSaccessesUAV; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBaseOffset; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t VertexCacheDisable; + uint32_t VSFunctionEnable; +}; + +static inline void +GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->VSaccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->VSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_WM_length 0x00000003 +#define GEN75_3DSTATE_WM_length_bias 0x00000002 +#define GEN75_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 1 + +struct GEN75_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StatisticsEnable; + uint32_t DepthBufferClear; + uint32_t ThreadDispatchEnable; + uint32_t DepthBufferResolveEnable; + uint32_t HierarchicalDepthBufferResolveEnable; + uint32_t LegacyDiamondLineRasterization; + uint32_t PixelShaderKillPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; +#define EDSC_NORMAL 0 +#define EDSC_PSEXEC 1 +#define EDSC_PREPS 2 + uint32_t EarlyDepthStencilControl; + uint32_t PixelShaderUsesSourceDepth; + uint32_t PixelShaderUsesSourceW; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; + uint32_t PixelShaderUsesInputCoverageMask; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineAntialiasingRegionWidth; + uint32_t RTIndependentRasterizationEnable; + uint32_t PolygonStippleEnable; + uint32_t LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t MultisampleRasterizationMode; +#define MSDISPMODE_PERSAMPLE 0 +#define MSDISPMODE_PERPIXEL 1 + uint32_t MultisampleDispatchMode; +#define OFF 0 +#define ON 1 + uint32_t PSUAVonly; +}; + +static inline void +GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->DepthBufferClear, 30, 30) | + __gen_field(values->ThreadDispatchEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->PixelShaderKillPixel, 25, 25) | + __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | + __gen_field(values->PixelShaderUsesSourceW, 19, 19) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->RTIndependentRasterizationEnable, 5, 5) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->MultisampleRasterizationMode, 0, 1) | + 0; + + dw[2] = + __gen_field(values->MultisampleDispatchMode, 31, 31) | + __gen_field(values->PSUAVonly, 30, 30) | + 0; + +} + +#define GEN75_GPGPU_OBJECT_length 0x00000008 +#define GEN75_GPGPU_OBJECT_length_bias 0x00000002 +#define GEN75_GPGPU_OBJECT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 4, \ + .DwordLength = 6 + +struct GEN75_GPGPU_OBJECT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t SharedLocalMemoryFixedOffset; + uint32_t InterfaceDescriptorOffset; + uint32_t SharedLocalMemoryOffset; + uint32_t EndofThreadGroup; +#define Slice0 0 +#define Slice1 1 + uint32_t SliceDestinationSelect; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define EitherHalfSlice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ThreadGroupIDX; + uint32_t ThreadGroupIDY; + uint32_t ThreadGroupIDZ; + uint32_t ExecutionMask; +}; + +static inline void +GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SharedLocalMemoryOffset, 28, 31) | + __gen_field(values->EndofThreadGroup, 24, 24) | + __gen_field(values->SliceDestinationSelect, 19, 19) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDX, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDZ, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ExecutionMask, 0, 31) | + 0; + +} + +#define GEN75_GPGPU_WALKER_length 0x0000000b +#define GEN75_GPGPU_WALKER_length_bias 0x00000002 +#define GEN75_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcodeA = 5, \ + .DwordLength = 9 + +struct GEN75_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcodeA; + uint32_t IndirectParameterEnable; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcodeA, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[3] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[9] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[10] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_CURBE_LOAD_length 0x00000004 +#define GEN75_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN75_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +struct GEN75_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +struct GEN75_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define EitherSlice 0 + uint32_t SliceDestinationSelect; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define Eitherhalfslice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + uint32_t ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 19) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MEDIA_OBJECT_PRT_length 0x00000010 +#define GEN75_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +struct GEN75_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; + uint32_t PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData; +}; + +static inline void +GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->InlineData, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +struct GEN75_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ScoreboardMask; + uint32_t DualMode; + uint32_t Repel; + uint32_t QuadMode; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->DualMode, 31, 31) | + __gen_field(values->Repel, 30, 30) | + __gen_field(values->QuadMode, 29, 29) | + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MEDIA_STATE_FLUSH_length 0x00000002 +#define GEN75_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN75_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +struct GEN75_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t DisablePreemption; + uint32_t FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->DisablePreemption, 8, 8) | + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN75_MEDIA_VFE_STATE_length 0x00000008 +#define GEN75_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN75_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 6 + +struct GEN75_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t GPGPUMode; + uint32_t HalfSliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + __gen_field(values->GPGPUMode, 2, 2) | + 0; + + dw[3] = + __gen_field(values->HalfSliceDisable, 0, 1) | + 0; + + dw[4] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[5] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[7] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN75_MI_ARB_CHECK_length 0x00000001 +#define GEN75_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN75_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +struct GEN75_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_ARB_ON_OFF_length 0x00000001 +#define GEN75_MI_ARB_ON_OFF_length_bias 0x00000001 +#define GEN75_MI_ARB_ON_OFF_header \ + .CommandType = 0, \ + .MICommandOpcode = 8 + +struct GEN75_MI_ARB_ON_OFF { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ArbitrationEnable; +}; + +static inline void +GEN75_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_ARB_ON_OFF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ArbitrationEnable, 0, 0) | + 0; + +} + +#define GEN75_MI_BATCH_BUFFER_END_length 0x00000001 +#define GEN75_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN75_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +struct GEN75_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_BATCH_BUFFER_START_length 0x00000002 +#define GEN75_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN75_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 0 + +struct GEN75_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + uint32_t AddOffsetEnable; + uint32_t PredicationEnable; + uint32_t NonPrivileged; + uint32_t ClearCommandBufferEnable; + uint32_t ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN75_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->NonPrivileged, 13, 13) | + __gen_field(values->ClearCommandBufferEnable, 11, 11) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + +} + +#define GEN75_MI_CLFLUSH_length_bias 0x00000002 +#define GEN75_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +struct GEN75_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 0 + +struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + +} + +#define GEN75_MI_FLUSH_length 0x00000001 +#define GEN75_MI_FLUSH_length_bias 0x00000001 +#define GEN75_MI_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 4 + +struct GEN75_MI_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t IndirectStatePointersDisable; + uint32_t GenericMediaStateClear; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; +#define Flush 0 +#define DontFlush 1 + uint32_t RenderCacheFlushInhibit; +#define DontInvalidate 0 +#define Invalidate 1 + uint32_t StateInstructionCacheInvalidate; +}; + +static inline void +GEN75_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IndirectStatePointersDisable, 5, 5) | + __gen_field(values->GenericMediaStateClear, 4, 4) | + __gen_field(values->GlobalSnapshotCountReset, 3, 3) | + __gen_field(values->RenderCacheFlushInhibit, 2, 2) | + __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | + 0; + +} + +#define GEN75_MI_LOAD_REGISTER_IMM_length 0x00000003 +#define GEN75_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +struct GEN75_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN75_MI_LOAD_REGISTER_MEM_length 0x00000003 +#define GEN75_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 1 + +struct GEN75_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_LOAD_REGISTER_REG_length 0x00000003 +#define GEN75_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +struct GEN75_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +struct GEN75_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN75_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +struct GEN75_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN75_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN75_MI_LOAD_URB_MEM_length 0x00000003 +#define GEN75_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN75_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 1 + +struct GEN75_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_MATH_length_bias 0x00000002 +#define GEN75_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +struct GEN75_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MI_NOOP_length 0x00000001 +#define GEN75_MI_NOOP_length_bias 0x00000001 +#define GEN75_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +struct GEN75_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN75_MI_PREDICATE_length 0x00000001 +#define GEN75_MI_PREDICATE_length_bias 0x00000001 +#define GEN75_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +struct GEN75_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define KEEP 0 +#define LOAD 2 +#define LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN75_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN75_MI_REPORT_HEAD_length 0x00000001 +#define GEN75_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN75_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +struct GEN75_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_RS_CONTEXT_length 0x00000001 +#define GEN75_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN75_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +struct GEN75_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Restore 0 +#define Save 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN75_MI_RS_CONTROL_length 0x00000001 +#define GEN75_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN75_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +struct GEN75_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Stop 0 +#define Start 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN75_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN75_MI_RS_STORE_DATA_IMM_length 0x00000004 +#define GEN75_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN75_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +struct GEN75_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN75_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + 0; + + uint32_t dw2 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->DestinationAddress, dw2); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN75_MI_SEMAPHORE_MBOX_length 0x00000003 +#define GEN75_MI_SEMAPHORE_MBOX_length_bias 0x00000002 +#define GEN75_MI_SEMAPHORE_MBOX_header \ + .CommandType = 0, \ + .MICommandOpcode = 22, \ + .DwordLength = 1 + +struct GEN75_MI_SEMAPHORE_MBOX { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RVSYNC 0 +#define RVESYNC 1 +#define RBSYNC 2 +#define UseGeneralRegisterSelect 3 + uint32_t RegisterSelect; + uint32_t GeneralRegisterSelect; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; +}; + +static inline void +GEN75_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SEMAPHORE_MBOX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->RegisterSelect, 16, 17) | + __gen_field(values->GeneralRegisterSelect, 8, 13) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + dw[2] = + 0; + +} + +#define GEN75_MI_SET_CONTEXT_length 0x00000002 +#define GEN75_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN75_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +struct GEN75_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + uint32_t CoreModeEnable; + uint32_t ResourceStreamerStateSaveEnable; + uint32_t ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN75_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN75_MI_SET_PREDICATE_length 0x00000001 +#define GEN75_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN75_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1, \ + .PREDICATEENABLE = 6 + +struct GEN75_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PredicateAlways 0 +#define PredicateonClear 1 +#define PredicateonSet 2 +#define PredicateDisable 3 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 1) | + 0; + +} + +#define GEN75_MI_STORE_DATA_IMM_length 0x00000004 +#define GEN75_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN75_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +struct GEN75_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t DwordLength; + uint32_t Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN75_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->Address, 2, 31) | + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN75_MI_STORE_DATA_INDEX_length 0x00000003 +#define GEN75_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN75_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +struct GEN75_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN75_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN75_MI_STORE_URB_MEM_length 0x00000003 +#define GEN75_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN75_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 1 + +struct GEN75_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_SUSPEND_FLUSH_length 0x00000001 +#define GEN75_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN75_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +struct GEN75_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t SuspendFlush; +}; + +static inline void +GEN75_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN75_MI_TOPOLOGY_FILTER_length 0x00000001 +#define GEN75_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN75_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +struct GEN75_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN75_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN75_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN75_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +struct GEN75_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN75_MI_URB_ATOMIC_ALLOC_length 0x00000001 +#define GEN75_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN75_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +struct GEN75_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN75_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN75_MI_URB_CLEAR_length 0x00000002 +#define GEN75_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN75_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +struct GEN75_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_field(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN75_MI_USER_INTERRUPT_length 0x00000001 +#define GEN75_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN75_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +struct GEN75_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_WAIT_FOR_EVENT_length 0x00000001 +#define GEN75_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN75_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +struct GEN75_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DisplayPipeCHorizontalBlankWaitEnable; + uint32_t DisplayPipeCVerticalBlankWaitEnable; + uint32_t DisplaySpriteCFlipPendingWaitEnable; +#define Notenabled 0 + uint32_t ConditionCodeWaitSelect; + uint32_t DisplayPlaneCFlipPendingWaitEnable; + uint32_t DisplayPipeCScanLineWaitEnable; + uint32_t DisplayPipeBHorizontalBlankWaitEnable; + uint32_t DisplayPipeBVerticalBlankWaitEnable; + uint32_t DisplaySpriteBFlipPendingWaitEnable; + uint32_t DisplayPlaneBFlipPendingWaitEnable; + uint32_t DisplayPipeBScanLineWaitEnable; + uint32_t DisplayPipeAHorizontalBlankWaitEnable; + uint32_t DisplayPipeAVerticalBlankWaitEnable; + uint32_t DisplaySpriteAFlipPendingWaitEnable; + uint32_t DisplayPlaneAFlipPendingWaitEnable; + uint32_t DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->ConditionCodeWaitSelect, 16, 19) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN75_PIPE_CONTROL_length 0x00000005 +#define GEN75_PIPE_CONTROL_length_bias 0x00000002 +#define GEN75_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 3 + +struct GEN75_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + uint32_t GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + uint32_t DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + uint32_t RenderTargetCacheFlushEnable; + uint32_t InstructionCacheInvalidateEnable; + uint32_t TextureCacheInvalidationEnable; + uint32_t IndirectStatePointersDisable; + uint32_t NotifyEnable; + uint32_t PipeControlFlushEnable; + uint32_t DCFlushEnable; + uint32_t VFCacheInvalidationEnable; + uint32_t ConstantCacheInvalidationEnable; + uint32_t StateCacheInvalidationEnable; + uint32_t StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + uint32_t DepthCacheFlushEnable; + __gen_address_type Address; + uint32_t ImmediateData; + uint32_t ImmediateData0; +}; + +static inline void +GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[3] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + +} + +struct GEN75_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + uint32_t ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + +struct GEN75_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN75_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN75_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + uint32_t VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + /* Struct VertexBufferMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +struct GEN75_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN75_SO_DECL_ENTRY { + uint32_t Stream3Decl; + uint32_t Stream2Decl; + uint32_t Stream1Decl; + uint32_t Stream0Decl; +}; + +static inline void +GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + /* Struct Stream3Decl: found SO_DECL */ + /* Struct Stream2Decl: found SO_DECL */ + /* Struct Stream1Decl: found SO_DECL */ + /* Struct Stream0Decl: found SO_DECL */ + 0; + +} + +struct GEN75_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN75_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN75_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +struct GEN75_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; +}; + +static inline void +GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + 0; + +} + +struct GEN75_BLEND_STATE { + uint32_t ColorBufferBlendEnable; + uint32_t IndependentAlphaBlendEnable; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t AlphaBlendFunction; +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t ColorBlendFunction; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t AlphaToCoverageEnable; + uint32_t AlphaToOneEnable; + uint32_t AlphaToCoverageDitherEnable; + uint32_t WriteDisableAlpha; + uint32_t WriteDisableRed; + uint32_t WriteDisableGreen; + uint32_t WriteDisableBlue; + uint32_t LogicOpEnable; +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + uint32_t LogicOpFunction; + uint32_t AlphaTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t AlphaTestFunction; + uint32_t ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + uint32_t PreBlendColorClampEnable; + uint32_t PostBlendColorClampEnable; +}; + +static inline void +GEN75_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaBlendFunction, 26, 28) | + __gen_field(values->SourceAlphaBlendFactor, 20, 24) | + __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | + __gen_field(values->ColorBlendFunction, 11, 13) | + __gen_field(values->SourceBlendFactor, 5, 9) | + __gen_field(values->DestinationBlendFactor, 0, 4) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->AlphaToOneEnable, 30, 30) | + __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | + __gen_field(values->WriteDisableAlpha, 27, 27) | + __gen_field(values->WriteDisableRed, 26, 26) | + __gen_field(values->WriteDisableGreen, 25, 25) | + __gen_field(values->WriteDisableBlue, 24, 24) | + __gen_field(values->LogicOpEnable, 22, 22) | + __gen_field(values->LogicOpFunction, 18, 21) | + __gen_field(values->AlphaTestEnable, 16, 16) | + __gen_field(values->AlphaTestFunction, 13, 15) | + __gen_field(values->ColorDitherEnable, 12, 12) | + __gen_field(values->XDitherOffset, 10, 11) | + __gen_field(values->YDitherOffset, 8, 9) | + __gen_field(values->ColorClampRange, 2, 3) | + __gen_field(values->PreBlendColorClampEnable, 1, 1) | + __gen_field(values->PostBlendColorClampEnable, 0, 0) | + 0; + +} + +struct GEN75_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN75_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +struct GEN75_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +struct GEN75_DEPTH_STENCIL_STATE { + uint32_t StencilTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t StencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t StencilBufferWriteEnable; + uint32_t DoubleSidedStencilEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t BackFaceStencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + uint32_t DepthTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t DepthTestFunction; + uint32_t DepthBufferWriteEnable; +}; + +static inline void +GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_DEPTH_STENCIL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilTestEnable, 31, 31) | + __gen_field(values->StencilTestFunction, 28, 30) | + __gen_field(values->StencilFailOp, 25, 27) | + __gen_field(values->StencilPassDepthFailOp, 22, 24) | + __gen_field(values->StencilPassDepthPassOp, 19, 21) | + __gen_field(values->StencilBufferWriteEnable, 18, 18) | + __gen_field(values->DoubleSidedStencilEnable, 15, 15) | + __gen_field(values->BackFaceStencilTestFunction, 12, 14) | + __gen_field(values->BackfaceStencilFailOp, 9, 11) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | + 0; + + dw[1] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->DepthTestEnable, 31, 31) | + __gen_field(values->DepthTestFunction, 27, 29) | + __gen_field(values->DepthBufferWriteEnable, 26, 26) | + 0; + +} + +struct GEN75_MEMORY_OBJECT_CONTROL_STATE { + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { +#define Highestpriority 0 +#define Secondhighestpriority 1 +#define Thirdhighestpriority 2 +#define Lowestpriority 3 + uint32_t ArbitrationPriorityControl; +#define PTE 0 +#define UC 1 +#define LLCeLLCWBcacheable 2 +#define eLLCWBcacheable 3 + uint32_t LLCeLLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ArbitrationPriorityControl, 4, 5) | + __gen_field(values->LLCeLLCCacheabilityControlLLCCC, 1, 2) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN75_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantURBEntryReadLength; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t BarrierEnable; + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[2] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[3] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[4] = + __gen_field(values->ConstantURBEntryReadLength, 16, 31) | + 0; + + dw[5] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | + 0; + + dw[6] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + + dw[7] = + 0; + +} + +struct GEN75_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN75_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t SurfaceArray; + uint32_t SurfaceFormat; + uint32_t SurfaceVerticalAlignment; +#define HALIGN_4 0 +#define HALIGN_8 1 + uint32_t SurfaceHorizontalAlignment; + uint32_t TiledSurface; +#define TILEWALK_XMAJOR 0 +#define TILEWALK_YMAJOR 1 + uint32_t TileWalk; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; +#define ARYSPC_FULL 0 +#define ARYSPC_LOD0 1 + uint32_t SurfaceArraySpacing; + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnables; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t IntegerSurfaceFormat; + uint32_t SurfacePitch; +#define RTROTATE_0DEG 0 +#define RTROTATE_90DEG 1 +#define RTROTATE_270DEG 3 + uint32_t RenderTargetRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSFMT_MSS 0 +#define MSFMT_DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t MinimumArrayElement0; + uint32_t XOffset; + uint32_t YOffset; + uint32_t SurfaceObjectControlState; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + __gen_address_type MCSBaseAddress; + uint32_t MCSSurfacePitch; + __gen_address_type AppendCounterAddress; + uint32_t AppendCounterEnable; + uint32_t MCSEnable; + uint32_t ReservedMBZ; + uint32_t XOffsetforUVPlane; + uint32_t YOffsetforUVPlane; +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + uint32_t ShaderChannelSelectR; + uint32_t ShaderChannelSelectG; + uint32_t ShaderChannelSelectB; + uint32_t ShaderChannelSelectA; + uint32_t ResourceMinLOD; +}; + +static inline void +GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | + __gen_field(values->TiledSurface, 14, 14) | + __gen_field(values->TileWalk, 13, 13) | + __gen_field(values->VerticalLineStride, 12, 12) | + __gen_field(values->VerticalLineStrideOffset, 11, 11) | + __gen_field(values->SurfaceArraySpacing, 10, 10) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnables, 0, 5) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->IntegerSurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + __gen_field(values->MinimumArrayElement, 0, 26) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 20, 23) | + /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + uint32_t dw6 = + __gen_field(values->MCSSurfacePitch, 3, 11) | + __gen_field(values->AppendCounterEnable, 1, 1) | + __gen_field(values->MCSEnable, 0, 0) | + __gen_field(values->ReservedMBZ, 30, 31) | + __gen_field(values->XOffsetforUVPlane, 16, 29) | + __gen_field(values->YOffsetforUVPlane, 0, 13) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); + + dw[7] = + __gen_field(values->ShaderChannelSelectR, 25, 27) | + __gen_field(values->ShaderChannelSelectG, 22, 24) | + __gen_field(values->ShaderChannelSelectB, 19, 21) | + __gen_field(values->ShaderChannelSelectA, 16, 18) | + __gen_field(values->ResourceMinLOD, 0, 11) | + 0; + +} + +struct GEN75_SAMPLER_BORDER_COLOR_STATE { + uint32_t BorderColorRedDX100GL; + uint32_t BorderColorAlpha; + uint32_t BorderColorBlue; + uint32_t BorderColorGreen; + uint32_t BorderColorRedDX9; + uint32_t BorderColorGreen0; + uint32_t BorderColorBlue0; + uint32_t BorderColorAlpha0; + uint32_t BorderColor; + uint32_t BorderColor0; + uint32_t BorderColor1; +}; + +static inline void +GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SAMPLER_BORDER_COLOR_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorRedDX100GL, 0, 31) | + __gen_field(values->BorderColorAlpha, 24, 31) | + __gen_field(values->BorderColorBlue, 16, 23) | + __gen_field(values->BorderColorGreen, 8, 15) | + __gen_field(values->BorderColorRedDX9, 0, 7) | + 0; + + dw[1] = + __gen_field(values->BorderColorGreen, 0, 31) | + 0; + + dw[2] = + __gen_field(values->BorderColorBlue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->BorderColorAlpha, 0, 31) | + 0; + + dw[4] = + 0; + + dw[16] = + __gen_field(values->BorderColor, 0, 127) | + __gen_field(values->BorderColor, 0, 127) | + __gen_field(values->BorderColor, 0, 127) | + 0; + +} + +struct GEN75_SAMPLER_STATE { + uint32_t SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define OGL 1 + uint32_t LODPreClampEnable; + uint32_t BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + uint32_t MinLOD; + uint32_t MaxLOD; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t BorderColorPointer; + uint32_t ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + uint32_t RAddressMinFilterRoundingEnable; + uint32_t RAddressMagFilterRoundingEnable; + uint32_t VAddressMinFilterRoundingEnable; + uint32_t VAddressMagFilterRoundingEnable; + uint32_t UAddressMinFilterRoundingEnable; + uint32_t UAddressMagFilterRoundingEnable; +#define FULL 0 +#define TRIQUAL_HIGHMAG_CLAMP_MIPFILTER 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + uint32_t NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampEnable, 28, 28) | + __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD, 20, 31) | + __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->BorderColorPointer, 5, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyEnable, 25, 25) | + __gen_field(values->ChromaKeyIndex, 23, 24) | + __gen_field(values->ChromaKeyMode, 22, 22) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_VID 5 +#define VFCOMP_STORE_IID 6 +#define VFCOMP_STORE_PID 7 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 + diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h new file mode 100644 index 00000000000..5350e3a18a4 --- /dev/null +++ b/src/vulkan/gen7_pack.h @@ -0,0 +1,6607 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for IVB. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include +#include + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN7_3DSTATE_URB_VS_length 0x00000002 +#define GEN7_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 29) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_MI_STORE_REGISTER_MEM_length 0x00000003 +#define GEN7_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN7_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 1 + +struct GEN7_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN7_PIPELINE_SELECT_length 0x00000001 +#define GEN7_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN7_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +struct GEN7_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN7_STATE_BASE_ADDRESS_length 0x0000000a +#define GEN7_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN7_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 8 + +struct GEN7_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + uint32_t GeneralStateMemoryObjectControlState; + uint32_t StatelessDataPortAccessMemoryObjectControlState; + uint32_t StatelessDataPortAccessForceWriteThru; + uint32_t GeneralStateBaseAddressModifyEnable; + __gen_address_type SurfaceStateBaseAddress; + uint32_t SurfaceStateMemoryObjectControlState; + uint32_t SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + uint32_t DynamicStateMemoryObjectControlState; + uint32_t DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + uint32_t IndirectObjectMemoryObjectControlState; + uint32_t IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + uint32_t InstructionMemoryObjectControlState; + uint32_t InstructionBaseAddressModifyEnable; + __gen_address_type GeneralStateAccessUpperBound; + uint32_t GeneralStateAccessUpperBoundModifyEnable; + __gen_address_type DynamicStateAccessUpperBound; + uint32_t DynamicStateAccessUpperBoundModifyEnable; + __gen_address_type IndirectObjectAccessUpperBound; + uint32_t IndirectObjectAccessUpperBoundModifyEnable; + __gen_address_type InstructionAccessUpperBound; + uint32_t InstructionAccessUpperBoundModifyEnable; +}; + +static inline void +GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->StatelessDataPortAccessForceWriteThru, 3, 3) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + uint32_t dw2 = + /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + + uint32_t dw3 = + /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + + uint32_t dw4 = + /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + + uint32_t dw5 = + /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); + + uint32_t dw6 = + __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); + + uint32_t dw7 = + __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[7] = + __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); + + uint32_t dw8 = + __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); + + uint32_t dw9 = + __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[9] = + __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); + +} + +#define GEN7_STATE_PREFETCH_length 0x00000002 +#define GEN7_STATE_PREFETCH_length_bias 0x00000002 +#define GEN7_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN7_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN7_STATE_SIP_length 0x00000002 +#define GEN7_STATE_SIP_length_bias 0x00000002 +#define GEN7_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 0 + +struct GEN7_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 31) | + 0; + +} + +#define GEN7_SWTESS_BASE_ADDRESS_length 0x00000002 +#define GEN7_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN7_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN7_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + uint32_t SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + +} + +#define GEN7_3DPRIMITIVE_length 0x00000007 +#define GEN7_3DPRIMITIVE_length_bias 0x00000002 +#define GEN7_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +struct GEN7_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndirectParameterEnable; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AACoverageBias; + uint32_t AACoverageSlope; + uint32_t AACoverageEndCapBias; + uint32_t AACoverageEndCapSlope; +}; + +static inline void +GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AACoverageBias, 16, 23) | + __gen_field(values->AACoverageSlope, 0, 7) | + 0; + + dw[2] = + __gen_field(values->AACoverageEndCapBias, 16, 23) | + __gen_field(values->AACoverageEndCapSlope, 0, 7) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; +}; + +static inline void +GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_CC_STATE_POINTERS_length 0x00000002 +#define GEN7_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; +}; + +static inline void +GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_CHROMA_KEY_length 0x00000004 +#define GEN7_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN7_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +struct GEN7_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_CLEAR_PARAMS_length 0x00000003 +#define GEN7_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN7_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DepthClearValue; + uint32_t DepthClearValueValid; +}; + +static inline void +GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthClearValue, 0, 31) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CLIP_length 0x00000004 +#define GEN7_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN7_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +struct GEN7_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t FrontWinding; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t EarlyCullEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + uint32_t ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + uint32_t ClipEnable; +#define APIMODE_OGL 0 + uint32_t APIMode; + uint32_t ViewportXYClipTestEnable; + uint32_t ViewportZClipTestEnable; + uint32_t GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + uint32_t ClipMode; + uint32_t PerspectiveDivideDisable; + uint32_t NonPerspectiveBarycentricEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; + uint32_t MinimumPointWidth; + uint32_t MaximumPointWidth; + uint32_t ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FrontWinding, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->ViewportZClipTestEnable, 27, 27) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth, 17, 27) | + __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_DS_length 0x00000007 +#define GEN7_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 +#define GEN7_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 +#define GEN7_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 +#define GEN7_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 +#define GEN7_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 +#define GEN7_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t DepthWriteEnable; + uint32_t StencilWriteEnable; + uint32_t HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; +#define SURFTYPE_CUBEmustbezero 0 + uint32_t Depth; + uint32_t MinimumArrayElement; + uint32_t DepthBufferObjectControlState; + uint32_t DepthCoordinateOffsetY; + uint32_t DepthCoordinateOffsetX; + uint32_t RenderTargetViewExtent; +}; + +static inline void +GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[3] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + dw[4] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[5] = + __gen_field(values->DepthCoordinateOffsetY, 16, 31) | + __gen_field(values->DepthCoordinateOffsetX, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + 0; + +} + +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 37, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDEPTH_STENCIL_STATE; +}; + +static inline void +GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN7_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +struct GEN7_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_DS_length 0x00000006 +#define GEN7_3DSTATE_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 4 + +struct GEN7_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t ComputeWCoordinateEnable; + uint32_t DSCacheDisable; + uint32_t DSFunctionEnable; +}; + +static inline void +GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->DSCacheDisable, 1, 1) | + __gen_field(values->DSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_GS_length 0x00000007 +#define GEN7_3DSTATE_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + uint32_t IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t MaximumNumberofThreads; +#define GSCTL_CUT 0 +#define GSCTL_SID 1 + uint32_t ControlDataFormat; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamID; +#define SINGLE 0 +#define DUAL_INSTANCE 1 +#define DUAL_OBJECT 2 + uint32_t DispatchMode; + uint32_t GSStatisticsEnable; + uint32_t GSInvocationsIncrementValue; + uint32_t IncludePrimitiveID; + uint32_t Hint; + uint32_t ReorderEnable; + uint32_t DiscardAdjacency; + uint32_t GSEnable; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->ControlDataFormat, 24, 24) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamID, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->GSStatisticsEnable, 10, 10) | + __gen_field(values->GSInvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderEnable, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->GSEnable, 0, 0) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 11) | + 0; + +} + +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN7_3DSTATE_HS_length 0x00000007 +#define GEN7_3DSTATE_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t MaximumNumberofThreads; + uint32_t Enable; + uint32_t StatisticsEnable; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + uint32_t IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->MaximumNumberofThreads, 0, 6) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 11) | + 0; + +} + +#define GEN7_3DSTATE_INDEX_BUFFER_length 0x00000003 +#define GEN7_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t MemoryObjectControlState; + uint32_t CutIndexEnable; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t DwordLength; + __gen_address_type BufferStartingAddress; + __gen_address_type BufferEndingAddress; +}; + +static inline void +GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->CutIndexEnable, 10, 10) | + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); + +} + +#define GEN7_3DSTATE_LINE_STIPPLE_length 0x00000003 +#define GEN7_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN7_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + uint32_t LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN7_3DSTATE_MONOFILTER_SIZE_length 0x00000002 +#define GEN7_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN7_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN7_3DSTATE_MULTISAMPLE_length 0x00000004 +#define GEN7_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN7_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 2 + +struct GEN7_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define PIXLOC_CENTER 0 +#define PIXLOC_UL_CORNER 1 + uint32_t PixelLocation; +#define NUMSAMPLES_1 0 +#define NUMSAMPLES_4 2 +#define NUMSAMPLES_8 3 + uint32_t NumberofMultisamples; + uint32_t Sample3XOffset; + uint32_t Sample3YOffset; + uint32_t Sample2XOffset; + uint32_t Sample2YOffset; + uint32_t Sample1XOffset; + uint32_t Sample1YOffset; + uint32_t Sample0XOffset; + uint32_t Sample0YOffset; + uint32_t Sample7XOffset; + uint32_t Sample7YOffset; + uint32_t Sample6XOffset; + uint32_t Sample6YOffset; + uint32_t Sample5XOffset; + uint32_t Sample5YOffset; + uint32_t Sample4XOffset; + uint32_t Sample4YOffset; +}; + +static inline void +GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset, 28, 31) | + __gen_field(values->Sample3YOffset, 24, 27) | + __gen_field(values->Sample2XOffset, 20, 23) | + __gen_field(values->Sample2YOffset, 16, 19) | + __gen_field(values->Sample1XOffset, 12, 15) | + __gen_field(values->Sample1YOffset, 8, 11) | + __gen_field(values->Sample0XOffset, 4, 7) | + __gen_field(values->Sample0YOffset, 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset, 28, 31) | + __gen_field(values->Sample7YOffset, 24, 27) | + __gen_field(values->Sample6XOffset, 20, 23) | + __gen_field(values->Sample6YOffset, 16, 19) | + __gen_field(values->Sample5XOffset, 12, 15) | + __gen_field(values->Sample5YOffset, 8, 11) | + __gen_field(values->Sample4XOffset, 4, 7) | + __gen_field(values->Sample4YOffset, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow; +}; + +static inline void +GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PatternRow, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_PS_length 0x00000008 +#define GEN7_3DSTATE_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 6 + +struct GEN7_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; + uint32_t SamplerCount; +#define FTZ 0 +#define RET 1 + uint32_t DenormalMode; + uint32_t BindingTableEntryCount; +#define IEEE745 0 +#define Alt 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t PushConstantEnable; + uint32_t AttributeEnable; + uint32_t oMaskPresenttoRenderTarget; + uint32_t RenderTargetFastClearEnable; + uint32_t DualSourceBlendEnable; + uint32_t RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + uint32_t _32PixelDispatchEnable; + uint32_t _16PixelDispatchEnable; + uint32_t _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterforConstantSetupData0; + uint32_t DispatchGRFStartRegisterforConstantSetupData1; + uint32_t DispatchGRFStartRegisterforConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->DenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->AttributeEnable, 10, 10) | + __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->DualSourceBlendEnable, 7, 7) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[5] = + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | + 0; + + dw[6] = + __gen_offset(values->KernelStartPointer1, 6, 31) | + 0; + + dw[7] = + __gen_offset(values->KernelStartPointer2, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLE_MASK_length 0x00000002 +#define GEN7_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 7) | + 0; + +} + +#define GEN7_3DSTATE_SBE_length 0x0000000e +#define GEN7_3DSTATE_SBE_length_bias 0x00000002 +#define GEN7_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 12 + +struct GEN7_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SWIZ_0_15 0 +#define SWIZ_16_31 1 + uint32_t AttributeSwizzleControlMode; + uint32_t NumberofSFOutputAttributes; + uint32_t AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t Attribute2n1ComponentOverrideW; + uint32_t Attribute2n1ComponentOverrideZ; + uint32_t Attribute2n1ComponentOverrideY; + uint32_t Attribute2n1ComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2n1ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2n1SwizzleSelect; + uint32_t Attribute2n1SourceAttribute; + uint32_t Attribute2nComponentOverrideW; + uint32_t Attribute2nComponentOverrideZ; + uint32_t Attribute2nComponentOverrideY; + uint32_t Attribute2nComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2nConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2nSwizzleSelect; + uint32_t Attribute2nSourceAttribute; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable310; + uint32_t Attribute7WrapShortestEnables; + uint32_t Attribute6WrapShortestEnables; + uint32_t Attribute5WrapShortestEnables; + uint32_t Attribute4WrapShortestEnables; + uint32_t Attribute3WrapShortestEnables; + uint32_t Attribute2WrapShortestEnables; + uint32_t Attribute1WrapShortestEnables; + uint32_t Attribute0WrapShortestEnables; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute9WrapShortestEnables; + uint32_t Attribute8WrapShortestEnables; +}; + +static inline void +GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AttributeSwizzleControlMode, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[2] = + __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | + __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | + __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | + __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | + __gen_field(values->Attribute2n1ConstantSource, 25, 26) | + __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | + __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | + __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | + __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | + __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | + __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | + __gen_field(values->Attribute2nConstantSource, 9, 10) | + __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | + __gen_field(values->Attribute2nSourceAttribute, 0, 4) | + 0; + + dw[10] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ConstantInterpolationEnable310, 0, 31) | + 0; + + dw[12] = + __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | + 0; + + dw[13] = + __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SF_length 0x00000007 +#define GEN7_3DSTATE_SF_length_bias 0x00000002 +#define GEN7_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 5 + +struct GEN7_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define D32_FLOAT_S8X24_UINT 0 +#define D32_FLOAT 1 +#define D24_UNORM_S8_UINT 2 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t DepthBufferSurfaceFormat; + uint32_t LegacyGlobalDepthBiasEnable; + uint32_t StatisticsEnable; + uint32_t GlobalDepthOffsetEnableSolid; + uint32_t GlobalDepthOffsetEnableWireframe; + uint32_t GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + uint32_t ViewTransformEnable; + uint32_t FrontWinding; + uint32_t AntiAliasingEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + uint32_t LineWidth; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t ScissorRectangleEnable; + uint32_t MultisampleRasterizationMode; + uint32_t LastPixelEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t UsePointWidthState; + uint32_t PointWidth; + uint32_t GlobalDepthOffsetConstant; + uint32_t GlobalDepthOffsetScale; + uint32_t GlobalDepthOffsetClamp; +}; + +static inline void +GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->ViewTransformEnable, 1, 1) | + __gen_field(values->FrontWinding, 0, 0) | + 0; + + dw[2] = + __gen_field(values->AntiAliasingEnable, 31, 31) | + __gen_field(values->CullMode, 29, 30) | + __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + __gen_field(values->ScissorRectangleEnable, 11, 11) | + __gen_field(values->MultisampleRasterizationMode, 8, 9) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->UsePointWidthState, 11, 11) | + __gen_field(values->PointWidth, 0, 10) | + 0; + + dw[4] = + __gen_field(values->GlobalDepthOffsetConstant, 0, 31) | + 0; + + dw[5] = + __gen_field(values->GlobalDepthOffsetScale, 0, 31) | + 0; + + dw[6] = + __gen_field(values->GlobalDepthOffsetClamp, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_SO_BUFFER_length 0x00000004 +#define GEN7_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 2 + +struct GEN7_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferIndex; + uint32_t SOBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + __gen_address_type SurfaceEndAddress; +}; + +static inline void +GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOBufferIndex, 29, 30) | + /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 11) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); + +} + +#define GEN7_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN7_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +struct GEN7_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_STENCIL_BUFFER_length 0x00000003 +#define GEN7_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN7_3DSTATE_STREAMOUT_length 0x00000003 +#define GEN7_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN7_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t RenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + uint32_t SOStatisticsEnable; + uint32_t SOBufferEnable3; + uint32_t SOBufferEnable2; + uint32_t SOBufferEnable1; + uint32_t SOBufferEnable0; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; +}; + +static inline void +GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->RenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->SOBufferEnable3, 11, 11) | + __gen_field(values->SOBufferEnable2, 10, 10) | + __gen_field(values->SOBufferEnable1, 9, 9) | + __gen_field(values->SOBufferEnable0, 8, 8) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_TE_length 0x00000004 +#define GEN7_3DSTATE_TE_length_bias 0x00000002 +#define GEN7_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +struct GEN7_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define LINE 1 +#define TRI_CW 2 +#define TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + uint32_t TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN7_3DSTATE_URB_DS_length 0x00000002 +#define GEN7_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 29) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_URB_GS_length 0x00000002 +#define GEN7_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 29) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_URB_HS_length 0x00000002 +#define GEN7_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 29) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN7_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +struct GEN7_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN7_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +struct GEN7_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_VF_STATISTICS_length 0x00000001 +#define GEN7_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN7_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +struct GEN7_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t StatisticsEnable; +}; + +static inline void +GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_VS_length 0x00000006 +#define GEN7_3DSTATE_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 4 + +struct GEN7_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBaseOffset; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t VertexCacheDisable; + uint32_t VSFunctionEnable; +}; + +static inline void +GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->VSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_WM_length 0x00000003 +#define GEN7_3DSTATE_WM_length_bias 0x00000002 +#define GEN7_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 1 + +struct GEN7_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StatisticsEnable; + uint32_t DepthBufferClear; + uint32_t ThreadDispatchEnable; + uint32_t DepthBufferResolveEnable; + uint32_t HierarchicalDepthBufferResolveEnable; + uint32_t LegacyDiamondLineRasterization; + uint32_t PixelShaderKillPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; +#define EDSC_NORMAL 0 +#define EDSC_PSEXEC 1 +#define EDSC_PREPS 2 + uint32_t EarlyDepthStencilControl; + uint32_t PixelShaderUsesSourceDepth; + uint32_t PixelShaderUsesSourceW; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; + uint32_t PixelShaderUsesInputCoverageMask; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineAntialiasingRegionWidth; + uint32_t PolygonStippleEnable; + uint32_t LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t MultisampleRasterizationMode; +#define MSDISPMODE_PERSAMPLE 0 +#define MSDISPMODE_PERPIXEL 1 + uint32_t MultisampleDispatchMode; +}; + +static inline void +GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->DepthBufferClear, 30, 30) | + __gen_field(values->ThreadDispatchEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->PixelShaderKillPixel, 25, 25) | + __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | + __gen_field(values->PixelShaderUsesSourceW, 19, 19) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->MultisampleRasterizationMode, 0, 1) | + 0; + + dw[2] = + __gen_field(values->MultisampleDispatchMode, 31, 31) | + 0; + +} + +#define GEN7_GPGPU_OBJECT_length 0x00000008 +#define GEN7_GPGPU_OBJECT_length_bias 0x00000002 +#define GEN7_GPGPU_OBJECT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 4, \ + .DwordLength = 6 + +struct GEN7_GPGPU_OBJECT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t SharedLocalMemoryFixedOffset; + uint32_t InterfaceDescriptorOffset; + uint32_t SharedLocalMemoryOffset; + uint32_t EndofThreadGroup; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define EitherHalfSlice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ThreadGroupIDX; + uint32_t ThreadGroupIDY; + uint32_t ThreadGroupIDZ; + uint32_t ExecutionMask; +}; + +static inline void +GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_GPGPU_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->SharedLocalMemoryOffset, 28, 31) | + __gen_field(values->EndofThreadGroup, 24, 24) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDX, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDZ, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ExecutionMask, 0, 31) | + 0; + +} + +#define GEN7_GPGPU_WALKER_length 0x0000000b +#define GEN7_GPGPU_WALKER_length_bias 0x00000002 +#define GEN7_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcodeA = 5, \ + .DwordLength = 9 + +struct GEN7_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcodeA; + uint32_t IndirectParameterEnable; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcodeA, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[3] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[9] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[10] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_CURBE_LOAD_length 0x00000004 +#define GEN7_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN7_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +struct GEN7_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +struct GEN7_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define Eitherhalfslice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + uint32_t ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_MEDIA_OBJECT_PRT_length 0x00000010 +#define GEN7_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +struct GEN7_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; + uint32_t PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData; +}; + +static inline void +GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->InlineData, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +struct GEN7_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ScoreboardMask; + uint32_t DualMode; + uint32_t Repel; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalEndY; + uint32_t LocalEndX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->DualMode, 31, 31) | + __gen_field(values->Repel, 30, 30) | + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + __gen_field(values->LocalEndY, 16, 24) | + __gen_field(values->LocalEndX, 0, 8) | + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_MEDIA_STATE_FLUSH_length 0x00000002 +#define GEN7_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN7_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +struct GEN7_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN7_MEDIA_VFE_STATE_length 0x00000008 +#define GEN7_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN7_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 6 + +struct GEN7_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; +#define NoMMIOreadwriteallowed 0 +#define MMIOreadwritetoanyaddress 2 + uint32_t GatewayMMIOAccessControl; + uint32_t GPGPUMode; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + __gen_field(values->GatewayMMIOAccessControl, 3, 4) | + __gen_field(values->GPGPUMode, 2, 2) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[5] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[7] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN7_MI_ARB_CHECK_length 0x00000001 +#define GEN7_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN7_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +struct GEN7_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_ARB_ON_OFF_length 0x00000001 +#define GEN7_MI_ARB_ON_OFF_length_bias 0x00000001 +#define GEN7_MI_ARB_ON_OFF_header \ + .CommandType = 0, \ + .MICommandOpcode = 8 + +struct GEN7_MI_ARB_ON_OFF { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ArbitrationEnable; +}; + +static inline void +GEN7_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_ARB_ON_OFF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ArbitrationEnable, 0, 0) | + 0; + +} + +#define GEN7_MI_BATCH_BUFFER_END_length 0x00000001 +#define GEN7_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN7_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +struct GEN7_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_BATCH_BUFFER_START_length 0x00000002 +#define GEN7_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN7_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 0 + +struct GEN7_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ClearCommandBufferEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN7_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ClearCommandBufferEnable, 11, 11) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + +} + +#define GEN7_MI_CLFLUSH_length_bias 0x00000002 +#define GEN7_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +struct GEN7_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 0 + +struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + +} + +#define GEN7_MI_FLUSH_length 0x00000001 +#define GEN7_MI_FLUSH_length_bias 0x00000001 +#define GEN7_MI_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 4 + +struct GEN7_MI_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t IndirectStatePointersDisable; + uint32_t GenericMediaStateClear; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; +#define Flush 0 +#define DontFlush 1 + uint32_t RenderCacheFlushInhibit; +#define DontInvalidate 0 +#define Invalidate 1 + uint32_t StateInstructionCacheInvalidate; +}; + +static inline void +GEN7_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IndirectStatePointersDisable, 5, 5) | + __gen_field(values->GenericMediaStateClear, 4, 4) | + __gen_field(values->GlobalSnapshotCountReset, 3, 3) | + __gen_field(values->RenderCacheFlushInhibit, 2, 2) | + __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | + 0; + +} + +#define GEN7_MI_LOAD_REGISTER_IMM_length 0x00000003 +#define GEN7_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN7_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +struct GEN7_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN7_MI_LOAD_REGISTER_MEM_length 0x00000003 +#define GEN7_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN7_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 1 + +struct GEN7_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN7_MI_NOOP_length 0x00000001 +#define GEN7_MI_NOOP_length_bias 0x00000001 +#define GEN7_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +struct GEN7_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN7_MI_PREDICATE_length 0x00000001 +#define GEN7_MI_PREDICATE_length_bias 0x00000001 +#define GEN7_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +struct GEN7_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define KEEP 0 +#define LOAD 2 +#define LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN7_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN7_MI_REPORT_HEAD_length 0x00000001 +#define GEN7_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN7_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +struct GEN7_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_SEMAPHORE_MBOX_length 0x00000003 +#define GEN7_MI_SEMAPHORE_MBOX_length_bias 0x00000002 +#define GEN7_MI_SEMAPHORE_MBOX_header \ + .CommandType = 0, \ + .MICommandOpcode = 22, \ + .DwordLength = 1 + +struct GEN7_MI_SEMAPHORE_MBOX { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RVSYNC 0 +#define RBSYNC 2 +#define UseGeneralRegisterSelect 3 + uint32_t RegisterSelect; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; +}; + +static inline void +GEN7_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SEMAPHORE_MBOX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->RegisterSelect, 16, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + dw[2] = + 0; + +} + +#define GEN7_MI_SET_CONTEXT_length 0x00000002 +#define GEN7_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN7_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +struct GEN7_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + uint32_t ExtendedStateSaveEnable; + uint32_t ExtendedStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->ExtendedStateSaveEnable, 3, 3) | + __gen_field(values->ExtendedStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN7_MI_STORE_DATA_IMM_length 0x00000004 +#define GEN7_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN7_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +struct GEN7_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t DwordLength; + uint32_t Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN7_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->Address, 2, 31) | + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN7_MI_STORE_DATA_INDEX_length 0x00000003 +#define GEN7_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN7_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +struct GEN7_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN7_MI_SUSPEND_FLUSH_length 0x00000001 +#define GEN7_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN7_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +struct GEN7_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t SuspendFlush; +}; + +static inline void +GEN7_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN7_MI_TOPOLOGY_FILTER_length 0x00000001 +#define GEN7_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN7_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +struct GEN7_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN7_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN7_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN7_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +struct GEN7_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN7_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN7_MI_URB_CLEAR_length 0x00000002 +#define GEN7_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN7_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +struct GEN7_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 28) | + __gen_field(values->URBAddress, 0, 13) | + 0; + +} + +#define GEN7_MI_USER_INTERRUPT_length 0x00000001 +#define GEN7_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN7_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +struct GEN7_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_WAIT_FOR_EVENT_length 0x00000001 +#define GEN7_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN7_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +struct GEN7_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DisplayPipeCHorizontalBlankWaitEnable; + uint32_t DisplayPipeCVerticalBlankWaitEnable; + uint32_t DisplaySpriteCFlipPendingWaitEnable; +#define Notenabled 0 + uint32_t ConditionCodeWaitSelect; + uint32_t DisplayPlaneCFlipPendingWaitEnable; + uint32_t DisplayPipeCScanLineWaitEnable; + uint32_t DisplayPipeBHorizontalBlankWaitEnable; + uint32_t DisplayPipeBVerticalBlankWaitEnable; + uint32_t DisplaySpriteBFlipPendingWaitEnable; + uint32_t DisplayPlaneBFlipPendingWaitEnable; + uint32_t DisplayPipeBScanLineWaitEnable; + uint32_t DisplayPipeAHorizontalBlankWaitEnable; + uint32_t DisplayPipeAVerticalBlankWaitEnable; + uint32_t DisplaySpriteAFlipPendingWaitEnable; + uint32_t DisplayPlaneAFlipPendingWaitEnable; + uint32_t DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->ConditionCodeWaitSelect, 16, 19) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN7_PIPE_CONTROL_length 0x00000005 +#define GEN7_PIPE_CONTROL_length_bias 0x00000002 +#define GEN7_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 3 + +struct GEN7_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + uint32_t GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + uint32_t DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + uint32_t RenderTargetCacheFlushEnable; + uint32_t InstructionCacheInvalidateEnable; + uint32_t TextureCacheInvalidationEnable; + uint32_t IndirectStatePointersDisable; + uint32_t NotifyEnable; + uint32_t PipeControlFlushEnable; + uint32_t DCFlushEnable; + uint32_t VFCacheInvalidationEnable; + uint32_t ConstantCacheInvalidationEnable; + uint32_t StateCacheInvalidationEnable; + uint32_t StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + uint32_t DepthCacheFlushEnable; + __gen_address_type Address; + uint32_t ImmediateData; + uint32_t ImmediateData0; +}; + +static inline void +GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[3] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + +} + +struct GEN7_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + uint32_t ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + +struct GEN7_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + uint32_t VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + /* Struct VertexBufferMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +struct GEN7_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN7_SO_DECL_ENTRY { + uint32_t Stream3Decl; + uint32_t Stream2Decl; + uint32_t Stream1Decl; + uint32_t Stream0Decl; +}; + +static inline void +GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + /* Struct Stream3Decl: found SO_DECL */ + /* Struct Stream2Decl: found SO_DECL */ + /* Struct Stream1Decl: found SO_DECL */ + /* Struct Stream0Decl: found SO_DECL */ + 0; + +} + +struct GEN7_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN7_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN7_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +struct GEN7_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; +}; + +static inline void +GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + 0; + +} + +struct GEN7_BLEND_STATE { + uint32_t ColorBufferBlendEnable; + uint32_t IndependentAlphaBlendEnable; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t AlphaBlendFunction; +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t ColorBlendFunction; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t AlphaToCoverageEnable; + uint32_t AlphaToOneEnable; + uint32_t AlphaToCoverageDitherEnable; + uint32_t WriteDisableAlpha; + uint32_t WriteDisableRed; + uint32_t WriteDisableGreen; + uint32_t WriteDisableBlue; + uint32_t LogicOpEnable; +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + uint32_t LogicOpFunction; + uint32_t AlphaTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t AlphaTestFunction; + uint32_t ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + uint32_t PreBlendColorClampEnable; + uint32_t PostBlendColorClampEnable; +}; + +static inline void +GEN7_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaBlendFunction, 26, 28) | + __gen_field(values->SourceAlphaBlendFactor, 20, 24) | + __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | + __gen_field(values->ColorBlendFunction, 11, 13) | + __gen_field(values->SourceBlendFactor, 5, 9) | + __gen_field(values->DestinationBlendFactor, 0, 4) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->AlphaToOneEnable, 30, 30) | + __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | + __gen_field(values->WriteDisableAlpha, 27, 27) | + __gen_field(values->WriteDisableRed, 26, 26) | + __gen_field(values->WriteDisableGreen, 25, 25) | + __gen_field(values->WriteDisableBlue, 24, 24) | + __gen_field(values->LogicOpEnable, 22, 22) | + __gen_field(values->LogicOpFunction, 18, 21) | + __gen_field(values->AlphaTestEnable, 16, 16) | + __gen_field(values->AlphaTestFunction, 13, 15) | + __gen_field(values->ColorDitherEnable, 12, 12) | + __gen_field(values->XDitherOffset, 10, 11) | + __gen_field(values->YDitherOffset, 8, 9) | + __gen_field(values->ColorClampRange, 2, 3) | + __gen_field(values->PreBlendColorClampEnable, 1, 1) | + __gen_field(values->PostBlendColorClampEnable, 0, 0) | + 0; + +} + +struct GEN7_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN7_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +struct GEN7_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +struct GEN7_DEPTH_STENCIL_STATE { + uint32_t StencilTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t StencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t StencilBufferWriteEnable; + uint32_t DoubleSidedStencilEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t BackFaceStencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + uint32_t DepthTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t DepthTestFunction; + uint32_t DepthBufferWriteEnable; +}; + +static inline void +GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_DEPTH_STENCIL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilTestEnable, 31, 31) | + __gen_field(values->StencilTestFunction, 28, 30) | + __gen_field(values->StencilFailOp, 25, 27) | + __gen_field(values->StencilPassDepthFailOp, 22, 24) | + __gen_field(values->StencilPassDepthPassOp, 19, 21) | + __gen_field(values->StencilBufferWriteEnable, 18, 18) | + __gen_field(values->DoubleSidedStencilEnable, 15, 15) | + __gen_field(values->BackFaceStencilTestFunction, 12, 14) | + __gen_field(values->BackfaceStencilFailOp, 9, 11) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | + 0; + + dw[1] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->DepthTestEnable, 31, 31) | + __gen_field(values->DepthTestFunction, 27, 29) | + __gen_field(values->DepthBufferWriteEnable, 26, 26) | + 0; + +} + +struct GEN7_MEMORY_OBJECT_CONTROL_STATE { + uint32_t GraphicsDataTypeGFDT; + uint32_t LLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->GraphicsDataTypeGFDT, 2, 2) | + __gen_field(values->LLCCacheabilityControlLLCCC, 1, 1) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN7_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t BarrierEnable; + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; +}; + +static inline void +GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[2] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[3] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[4] = + __gen_field(values->ConstantURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[5] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + +} + +struct GEN7_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN7_SAMPLER_BORDER_COLOR_STATE { + uint32_t BorderColorRedDX100GL; + uint32_t BorderColorAlpha; + uint32_t BorderColorBlue; + uint32_t BorderColorGreen; + uint32_t BorderColorRedDX9; + uint32_t BorderColorGreen0; + uint32_t BorderColorBlue0; + uint32_t BorderColorAlpha0; +}; + +static inline void +GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SAMPLER_BORDER_COLOR_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorRedDX100GL, 0, 31) | + __gen_field(values->BorderColorAlpha, 24, 31) | + __gen_field(values->BorderColorBlue, 16, 23) | + __gen_field(values->BorderColorGreen, 8, 15) | + __gen_field(values->BorderColorRedDX9, 0, 7) | + 0; + + dw[1] = + __gen_field(values->BorderColorGreen, 0, 31) | + 0; + + dw[2] = + __gen_field(values->BorderColorBlue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->BorderColorAlpha, 0, 31) | + 0; + +} + +struct GEN7_SAMPLER_STATE { + uint32_t SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define OGL 1 + uint32_t LODPreClampEnable; + uint32_t BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + uint32_t MinLOD; + uint32_t MaxLOD; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t BorderColorPointer; + uint32_t ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + uint32_t RAddressMinFilterRoundingEnable; + uint32_t RAddressMagFilterRoundingEnable; + uint32_t VAddressMinFilterRoundingEnable; + uint32_t VAddressMagFilterRoundingEnable; + uint32_t UAddressMinFilterRoundingEnable; + uint32_t UAddressMagFilterRoundingEnable; +#define FULL 0 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + uint32_t NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampEnable, 28, 28) | + __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD, 20, 31) | + __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->BorderColorPointer, 5, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyEnable, 25, 25) | + __gen_field(values->ChromaKeyIndex, 23, 24) | + __gen_field(values->ChromaKeyMode, 22, 22) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_VID 5 +#define VFCOMP_STORE_IID 6 +#define VFCOMP_STORE_PID 7 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 + diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index c15afe9b266..bc221e52089 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -32,6 +32,9 @@ #include #include +#ifndef __gen_field_functions +#define __gen_field_functions + union __gen_value { float f; uint32_t dw; @@ -74,6 +77,8 @@ __gen_float(float v) #error #define __gen_combine_address before including this file #endif +#endif + #define GEN8_3DSTATE_URB_VS_length 0x00000002 #define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 #define GEN8_3DSTATE_URB_VS_header \ @@ -3480,12 +3485,12 @@ struct GEN8_3DSTATE_RASTER { #define Clockwise 0 #define CounterClockwise 1 uint32_t FrontWinding; -#define NUMRASTSAMPLES_0 0 -#define NUMRASTSAMPLES_1 1 -#define NUMRASTSAMPLES_2 2 -#define NUMRASTSAMPLES_4 3 -#define NUMRASTSAMPLES_8 4 -#define NUMRASTSAMPLES_16 5 +#define FSC_NUMRASTSAMPLES_0 0 +#define FSC_NUMRASTSAMPLES_1 1 +#define FSC_NUMRASTSAMPLES_2 2 +#define FSC_NUMRASTSAMPLES_4 3 +#define FSC_NUMRASTSAMPLES_8 4 +#define FSC_NUMRASTSAMPLES_16 5 uint32_t ForcedSampleCount; #define CULLMODE_BOTH 0 #define CULLMODE_NONE 1 @@ -8309,7 +8314,6 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, } /* Enum 3D_Prim_Topo_Type */ -#ifndef _3DPRIM_POINTLIST #define _3DPRIM_POINTLIST 1 #define _3DPRIM_LINELIST 2 #define _3DPRIM_LINESTRIP 3 @@ -8363,7 +8367,6 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, #define _3DPRIM_PATCHLIST_30 61 #define _3DPRIM_PATCHLIST_31 62 #define _3DPRIM_PATCHLIST_32 63 -#endif /* Enum 3D_Vertex_Component_Control */ #define VFCOMP_NOSTORE 0 diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 00aceb7ded1..aaf1b00ad04 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -390,6 +390,8 @@ __gen_combine_address(struct anv_batch *batch, void *location, } } +#include "gen7_pack.h" +#include "gen75_pack.h" #undef GEN8_3DSTATE_MULTISAMPLE #include "gen8_pack.h" -- cgit v1.2.3 From cb986ef59710ad7a09de02b8612665992ed867e3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 12 May 2015 14:38:12 -0700 Subject: vk: Submit all cmd buffers passed to vkQueueSubmit --- src/vulkan/device.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 98e1570372b..dcd0c380f6c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -567,23 +567,25 @@ VkResult VKAPI vkQueueSubmit( { struct anv_queue *queue = (struct anv_queue *) _queue; struct anv_device *device = queue->device; - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) pCmdBuffers[0]; int ret; - assert(cmdBufferCount == 1); + for (uint32_t i = 0; i < cmdBufferCount; i++) { + struct anv_cmd_buffer *cmd_buffer = + (struct anv_cmd_buffer *) pCmdBuffers[i]; - if (device->dump_aub) - anv_cmd_buffer_dump(cmd_buffer); + if (device->dump_aub) + anv_cmd_buffer_dump(cmd_buffer); - if (!device->no_hw) { - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); - if (ret != 0) - goto fail; + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); + if (ret != 0) + goto fail; - for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) - cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; - } else { - *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) + cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; + } else { + *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; + } } return VK_SUCCESS; -- cgit v1.2.3 From 2b7a060178f887c378e47bce08bd48d783febbd2 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 12 May 2015 14:38:58 -0700 Subject: vk: Fix stale error handling in vkQueueSubmit --- src/vulkan/device.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index dcd0c380f6c..45a26151604 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -579,7 +579,7 @@ VkResult VKAPI vkQueueSubmit( if (!device->no_hw) { ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); if (ret != 0) - goto fail; + return vk_error(VK_ERROR_UNKNOWN); for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; @@ -589,11 +589,6 @@ VkResult VKAPI vkQueueSubmit( } return VK_SUCCESS; - - fail: - pthread_mutex_unlock(&device->mutex); - - return vk_error(VK_ERROR_UNKNOWN); } VkResult VKAPI vkQueueAddMemReferences( -- cgit v1.2.3 From 828817b88f9f38cdf83ec55f55afe7e9ba3041db Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 May 2015 12:05:38 -0700 Subject: vk: Ignore vk executable --- src/vulkan/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/vulkan/.gitignore (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore new file mode 100644 index 00000000000..441005687f8 --- /dev/null +++ b/src/vulkan/.gitignore @@ -0,0 +1 @@ +/vk -- cgit v1.2.3 From a3fd1365094cbf04648fb6d67582122101d4c212 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 12 May 2015 21:44:59 -0700 Subject: vk: Fill out sampler state from API values --- src/vulkan/device.c | 61 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 45a26151604..2abc6cc0be9 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1336,25 +1336,58 @@ VkResult VKAPI vkCreateSampler( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + }; + + static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, + }; + + if (pCreateInfo->maxAnisotropy > 0) + anv_finishme("missing support for anisotropic filtering"); + struct GEN8_SAMPLER_STATE sampler_state = { - .SamplerDisable = 0, - .TextureBorderColorMode = 0, + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, .LODPreClampMode = 0, .BaseMipLevel = 0, - .MipModeFilter = 0, - .MagModeFilter = 0, - .MinModeFilter = 0, - .TextureLODBias = 0, - .AnisotropicAlgorithm = 0, - .MinLOD = 0, - .MaxLOD = 0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = vk_to_gen_tex_filter[pCreateInfo->magFilter], + .MinModeFilter = vk_to_gen_tex_filter[pCreateInfo->minFilter], + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod * 256, + .MaxLOD = pCreateInfo->maxLod * 256, .ChromaKeyEnable = 0, .ChromaKeyIndex = 0, .ChromaKeyMode = 0, - .ShadowFunction = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = 0, .IndirectStatePointer = 0, - .LODClampMagnificationMode = 0, + .LODClampMagnificationMode = MIPNONE, .MaximumAnisotropy = 0, .RAddressMinFilterRoundingEnable = 0, .RAddressMagFilterRoundingEnable = 0, @@ -1364,9 +1397,9 @@ VkResult VKAPI vkCreateSampler( .UAddressMagFilterRoundingEnable = 0, .TrilinearFilterQuality = 0, .NonnormalizedCoordinateEnable = 0, - .TCXAddressControlMode = 0, - .TCYAddressControlMode = 0, - .TCZAddressControlMode = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], }; GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); -- cgit v1.2.3 From a77229c979d0edee165da8f44c5af70bb0c25507 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 11:49:30 -0700 Subject: vk: Allocate layout->count number of descriptors layout->count is the number of descriptors the application requested. layout->total is the number of entries we need across all stages. --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 2abc6cc0be9..b547585c7b9 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1502,7 +1502,7 @@ VkResult VKAPI vkAllocDescriptorSets( for (uint32_t i = 0; i < count; i++) { layout = (struct anv_descriptor_set_layout *) pSetLayouts[i]; - size = sizeof(*set) + layout->total * sizeof(set->descriptors[0]); + size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!set) { -- cgit v1.2.3 From 3f52c016fa4ce9a3a669977f3a72b44d0e42adca Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 13:38:55 -0700 Subject: vk: Move struct anv_sampler to private.h --- src/vulkan/device.c | 4 ---- src/vulkan/private.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b547585c7b9..8394d56ae93 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1317,10 +1317,6 @@ VkResult VKAPI vkCreateBufferView( // Sampler functions -struct anv_sampler { - uint32_t state[4]; -}; - VkResult VKAPI vkCreateSampler( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index aaf1b00ad04..cfbbb7129bf 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -623,6 +623,10 @@ struct anv_image_view { struct anv_state surface_state; }; +struct anv_sampler { + uint32_t state[4]; +}; + struct anv_depth_stencil_view { }; -- cgit v1.2.3 From a1ec789b0bdd4e5878c08c8fc32bb7d29bc4fecf Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 13:51:08 -0700 Subject: vk: Add a dynamic state stream to anv_cmd_buffer We'll need this for sampler state. --- src/vulkan/device.c | 3 +++ src/vulkan/private.h | 1 + 2 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8394d56ae93..64433982573 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -898,6 +898,7 @@ anv_cmd_buffer_destructor(struct anv_device * device, struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_batch_finish(&cmd_buffer->batch, device); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); @@ -1793,6 +1794,8 @@ VkResult VKAPI vkCreateCommandBuffer( anv_state_stream_init(&cmd_buffer->surface_state_stream, &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dyn_state_block_pool); cmd_buffer->dirty = 0; cmd_buffer->vb_dirty = 0; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cfbbb7129bf..90fd0176b47 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -502,6 +502,7 @@ struct anv_cmd_buffer { uint32_t bo_count; struct anv_batch batch; struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; /* State required while building cmd buffer */ struct { -- cgit v1.2.3 From 18acfa7301b566baa6bd168302d0c923f75c7736 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 13:53:01 -0700 Subject: vk: Fix copy-n-paste sType in vkCreateSampler --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 64433982573..be274c857bb 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1326,7 +1326,7 @@ VkResult VKAPI vkCreateSampler( struct anv_device *device = (struct anv_device *) _device; struct anv_sampler *sampler; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); sampler = anv_device_alloc(device, sizeof(*sampler), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); -- cgit v1.2.3 From 5c9d77600bf713ae7bd7493e1461c088695e725d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 13:55:43 -0700 Subject: vk: Create and bind a sampler in vk.c --- src/vulkan/vk.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 4bcb54d5e4f..f1b3629e145 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -285,7 +285,7 @@ int main(int argc, char *argv[]) vkCreateDescriptorSetLayout(device, &(VkDescriptorSetLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .count = 2, + .count = 3, .pBinding = (VkDescriptorSetLayoutBinding[]) { { .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, @@ -298,6 +298,12 @@ int main(int argc, char *argv[]) .count = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = NULL + }, + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .count = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL } } }, @@ -561,7 +567,26 @@ int main(int argc, char *argv[]) 0, /* allocation index; for objects which need to bind to multiple mems */ mem, 2048 + 256 * 256 * 4); - vkUpdateDescriptors(device, set[0], 2, + VkSampler sampler; + vkCreateSampler(device, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_TEX_FILTER_LINEAR, + .minFilter = VK_TEX_FILTER_LINEAR, + .mipMode = VK_TEX_MIPMAP_MODE_NEAREST, + .addressU = VK_TEX_ADDRESS_CLAMP, + .addressV = VK_TEX_ADDRESS_CLAMP, + .addressW = VK_TEX_ADDRESS_CLAMP, + .mipLodBias = 0, + .maxAnisotropy = 0, + .compareOp = VK_COMPARE_OP_GREATER, + .minLod = 0, + .maxLod = 0, + .borderColor = VK_BORDER_COLOR_TRANSPARENT_BLACK + }, + &sampler); + + vkUpdateDescriptors(device, set[0], 3, (const void * []) { &(VkUpdateBuffers) { .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, @@ -592,6 +617,12 @@ int main(int argc, char *argv[]) .layout = VK_IMAGE_LAYOUT_GENERAL, } } + }, + &(const VkUpdateSamplers) { + .sType = VK_STRUCTURE_TYPE_UPDATE_SAMPLERS, + .binding = 3, + .count = 1, + .pSamplers = (const VkSampler[]) { sampler } } }); -- cgit v1.2.3 From 4f9eaf77a57976f2bc0138072b21cb63fc3a6814 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 14:02:35 -0700 Subject: vk: Use a typesafe anv_descriptor struct --- src/vulkan/device.c | 30 ++++++++++++++++++------------ src/vulkan/private.h | 12 +++++++++++- 2 files changed, 29 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index be274c857bb..7da2322137f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1545,8 +1545,8 @@ void VKAPI vkUpdateDescriptors( update_samplers = (VkUpdateSamplers *) common; for (uint32_t j = 0; j < update_samplers->count; j++) { - set->descriptors[update_samplers->binding + j] = - (void *) update_samplers->pSamplers[j]; + set->descriptors[update_samplers->binding + j].sampler = + (struct anv_sampler *) update_samplers->pSamplers[j]; } break; @@ -1555,8 +1555,12 @@ void VKAPI vkUpdateDescriptors( update_sampler_textures = (VkUpdateSamplerTextures *) common; for (uint32_t j = 0; j < update_sampler_textures->count; j++) { - set->descriptors[update_sampler_textures->binding + j] = - (void *) update_sampler_textures->pSamplerImageViews[j].pImageView->view; + set->descriptors[update_sampler_textures->binding + j].image_view = + (struct anv_image_view *) + update_sampler_textures->pSamplerImageViews[j].pImageView->view; + set->descriptors[update_sampler_textures->binding + j].sampler = + (struct anv_sampler *) + update_sampler_textures->pSamplerImageViews[j].sampler; } break; @@ -1564,8 +1568,8 @@ void VKAPI vkUpdateDescriptors( update_images = (VkUpdateImages *) common; for (uint32_t j = 0; j < update_images->count; j++) { - set->descriptors[update_images->binding + j] = - (void *) update_images->pImageViews[j].view; + set->descriptors[update_images->binding + j].image_view = + (struct anv_image_view *) update_images->pImageViews[j].view; } break; @@ -1573,8 +1577,8 @@ void VKAPI vkUpdateDescriptors( update_buffers = (VkUpdateBuffers *) common; for (uint32_t j = 0; j < update_buffers->count; j++) { - set->descriptors[update_buffers->binding + j] = - (void *) update_buffers->pBufferViews[j].view; + set->descriptors[update_buffers->binding + j].buffer_view = + (struct anv_buffer_view *) update_buffers->pBufferViews[j].view; } /* FIXME: descriptor arrays? */ break; @@ -2180,17 +2184,19 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) if (layout) { for (uint32_t i = 0; i < layout->stage[s].count; i++) { struct anv_pipeline_layout_entry *e = &layout->stage[s].entries[i]; + struct anv_descriptor *d = + &cmd_buffer->descriptor_sets[e->set]->descriptors[e->index]; struct anv_image_view *image_view; struct anv_buffer_view *buffer_view; - void *d = cmd_buffer->descriptor_sets[e->set]->descriptors[e->index]; switch (e->type) { case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + unreachable("sampler-only descriptor in the surface entries"); break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - image_view = d; + image_view = d->image_view; table[bias + i] = image_view->surface_state.offset; anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, image_view->surface_state.offset + 8 * sizeof(int32_t), @@ -2204,7 +2210,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - buffer_view = d; + buffer_view = d->buffer_view; table[bias + i] = buffer_view->surface_state.offset; anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, buffer_view->surface_state.offset + 8 * sizeof(int32_t), diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 90fd0176b47..dbb29acbf20 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -454,8 +454,18 @@ struct anv_descriptor_set_layout { } bindings[0]; }; +struct anv_descriptor { + union { + struct { + struct anv_sampler *sampler; + struct anv_image_view *image_view; + }; + struct anv_buffer_view *buffer_view; + }; +}; + struct anv_descriptor_set { - void *descriptors[0]; + struct anv_descriptor descriptors[0]; }; struct anv_pipeline_layout_entry { -- cgit v1.2.3 From 83c7e1f1db77c57802c72e0dff114f7a0daca5b5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 14:43:08 -0700 Subject: vk: Add support for sampler descriptors --- src/vulkan/compiler.cpp | 4 +- src/vulkan/device.c | 124 ++++++++++++++++++++++++++++++++++++------------ src/vulkan/pipeline.c | 66 ++++++++++++++++++++------ src/vulkan/private.h | 9 ++-- 4 files changed, 153 insertions(+), 50 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index f8be0f070a8..f81008b8f72 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -70,8 +70,8 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, else bias = 0; - count = pipeline->layout->stage[stage].count; - entries = pipeline->layout->stage[stage].entries; + count = pipeline->layout->stage[stage].surface_count; + entries = pipeline->layout->stage[stage].surface_entries; prog_data->map_entries = (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7da2322137f..d5eab4866d4 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1415,8 +1415,8 @@ VkResult VKAPI vkCreateDescriptorSetLayout( { struct anv_device *device = (struct anv_device *) _device; struct anv_descriptor_set_layout *set_layout; - uint32_t count, k; - size_t size, total; + uint32_t count, k, num_entries; + size_t size, sampler_total, surface_total; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); @@ -1432,7 +1432,8 @@ VkResult VKAPI vkCreateDescriptorSetLayout( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); k = 0; - total = 0; + sampler_total = 0; + surface_total = 0; for (uint32_t i = 0; i < pCreateInfo->count; i++) { for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { set_layout->bindings[k].mask = pCreateInfo->pBinding[i].stageFlags; @@ -1440,11 +1441,36 @@ VkResult VKAPI vkCreateDescriptorSetLayout( k++; } - total += pCreateInfo->pBinding[i].count * + num_entries = pCreateInfo->pBinding[i].count * __builtin_popcount(pCreateInfo->pBinding[i].stageFlags); + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + sampler_total += num_entries; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + sampler_total += num_entries; + surface_total += num_entries; + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + surface_total += num_entries; + break; + + default: + unreachable("invalid descriptor type"); + } } - set_layout->total = total; + set_layout->sampler_total = sampler_total; + set_layout->surface_total = surface_total; set_layout->count = count; *pSetLayout = (VkDescriptorSetLayout) set_layout; @@ -2136,36 +2162,25 @@ void VKAPI vkCmdBindVertexBuffers( static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { - static const uint32_t opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; - uint32_t count, *table; + uint32_t binding_table_length, *table; struct anv_state table_state; if (layout) - count = layout->stage[s].count + bias; + binding_table_length = layout->stage[s].surface_count + bias; else if (s == VK_SHADER_STAGE_FRAGMENT) - count = framebuffer->color_attachment_count; + binding_table_length = framebuffer->color_attachment_count; else - count = 0; + binding_table_length = 0; - if (count == 0) - continue; - - table_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, - count * 4, 32); + if (binding_table_length > 0) + table_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + binding_table_length * 4, 32); table = table_state.map; if (s == VK_SHADER_STAGE_FRAGMENT) { @@ -2182,8 +2197,8 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) } if (layout) { - for (uint32_t i = 0; i < layout->stage[s].count; i++) { - struct anv_pipeline_layout_entry *e = &layout->stage[s].entries[i]; + for (uint32_t i = 0; i < layout->stage[s].surface_count; i++) { + struct anv_pipeline_layout_entry *e = &layout->stage[s].surface_entries[i]; struct anv_descriptor *d = &cmd_buffer->descriptor_sets[e->set]->descriptors[e->index]; struct anv_image_view *image_view; @@ -2227,15 +2242,62 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) } } - /* FIXME: Samplers */ - /* The binding table pointer commands all have the same structure, only * the opcode differs. */ - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = opcodes[s], - .PointertoVSBindingTable = table_state.offset); + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (binding_table_length > 0) + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[s], + .PointertoVSBindingTable = table_state.offset); + + + if (layout && layout->stage[s].sampler_count > 0) { + struct anv_state sampler_state; + + sampler_state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + layout->stage[s].sampler_count * 16, 32); + for (uint32_t i = 0; i < layout->stage[s].sampler_count; i++) { + struct anv_pipeline_layout_entry *e = &layout->stage[s].sampler_entries[i]; + struct anv_sampler *sampler; + + switch (e->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + sampler = + cmd_buffer->descriptor_sets[e->set]->descriptors[e->index].sampler; + break; + default: + unreachable("non-sampler descriptor in sampler entries"); + break; + } + + memcpy(sampler_state.map + i * 16, sampler->state, sizeof(sampler->state)); + } + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[s], + .PointertoVSSamplerState = sampler_state.offset); + } } } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index ce1fa303d7c..a260c3cd24a 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -560,43 +560,81 @@ VkResult VKAPI vkCreatePipelineLayout( { struct anv_device *device = (struct anv_device *) _device; struct anv_pipeline_layout *layout; - struct anv_pipeline_layout_entry *entry; - uint32_t total; + struct anv_pipeline_layout_entry *sampler_entry, *surface_entry; + uint32_t sampler_total, surface_total; size_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - total = 0; + sampler_total = 0; + surface_total = 0; for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { struct anv_descriptor_set_layout *set_layout = (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; - for (uint32_t j = 0; j < set_layout->count; j++) - total += set_layout->total; + for (uint32_t j = 0; j < set_layout->count; j++) { + sampler_total += set_layout->sampler_total; + surface_total += set_layout->surface_total; + } } - size = sizeof(*layout) + total * sizeof(layout->entries[0]); + size = sizeof(*layout) + + (sampler_total + surface_total) * sizeof(layout->entries[0]); layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (layout == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - entry = layout->entries; + sampler_entry = layout->entries; + surface_entry = layout->entries + sampler_total; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - layout->stage[s].entries = entry; + layout->stage[s].sampler_entries = sampler_entry; + layout->stage[s].surface_entries = surface_entry; for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { struct anv_descriptor_set_layout *set_layout = (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; - for (uint32_t j = 0; j < set_layout->count; j++) + for (uint32_t j = 0; j < set_layout->count; j++) { if (set_layout->bindings[j].mask & (1 << s)) { - entry->type = set_layout->bindings[j].type; - entry->set = i; - entry->index = j; - entry++; + switch (set_layout->bindings[j].type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + sampler_entry->type = set_layout->bindings[j].type; + sampler_entry->set = i; + sampler_entry->index = j; + sampler_entry++; + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + sampler_entry->type = set_layout->bindings[j].type; + sampler_entry->set = i; + sampler_entry->index = j; + sampler_entry++; + /* fall through */ + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + surface_entry->type = set_layout->bindings[j].type; + surface_entry->set = i; + surface_entry->index = j; + surface_entry++; + break; + + default: + break; + } } + } } - layout->stage[s].count = entry - layout->stage[s].entries; + layout->stage[s].sampler_count = + sampler_entry - layout->stage[s].sampler_entries; + layout->stage[s].surface_count = + surface_entry - layout->stage[s].surface_entries; } *pPipelineLayout = (VkPipelineLayout) layout; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index dbb29acbf20..09535a73b64 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -446,7 +446,8 @@ struct anv_dynamic_cb_state { }; struct anv_descriptor_set_layout { - uint32_t total; /* total number of entries in all stages */ + uint32_t sampler_total; /* total number of samplers in all stages */ + uint32_t surface_total; /* total number of surfaces in all stages */ uint32_t count; struct { VkDescriptorType type; @@ -476,8 +477,10 @@ struct anv_pipeline_layout_entry { struct anv_pipeline_layout { struct { - uint32_t count; - struct anv_pipeline_layout_entry *entries; + uint32_t sampler_count; + struct anv_pipeline_layout_entry *sampler_entries; + uint32_t surface_count; + struct anv_pipeline_layout_entry *surface_entries; } stage[VK_NUM_SHADER_STAGE]; struct anv_pipeline_layout_entry entries[0]; -- cgit v1.2.3 From 50806e8dec69fd33d91776272b4f06afdd7dc5b8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 May 2015 17:49:26 -0700 Subject: vk: Install headers I need this for building a testsuite. --- src/vulkan/Makefile.am | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index b131ac13897..af2cde5dbbe 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -19,6 +19,14 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h + lib_LTLIBRARIES = libvulkan.la # The gallium includes are for the util/u_math.h include from main/macros.h -- cgit v1.2.3 From 43126388cd64a72f2566bbd8eda8fabc1c411447 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 17:34:37 -0700 Subject: vk/meta: Save/restore more stuff in cmd_buffer_restore --- src/vulkan/meta.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c346c3ef871..a10c21e84c5 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -161,24 +161,30 @@ struct anv_saved_state { struct anv_buffer *buffer; VkDeviceSize offset; } vb[2]; + struct anv_descriptor_set *dsets[1]; struct anv_pipeline *pipeline; }; static void -anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) +anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *state) { memcpy(state->vb, cmd_buffer->vb, sizeof(state->vb)); + memcpy(state->dsets, cmd_buffer->descriptor_sets, sizeof(state->dsets)); state->pipeline = cmd_buffer->pipeline; } static void -anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) +anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *state) { memcpy(cmd_buffer->vb, state->vb, sizeof(state->vb)); + memcpy(cmd_buffer->descriptor_sets, state->dsets, sizeof(state->dsets)); cmd_buffer->pipeline = state->pipeline; cmd_buffer->vb_dirty |= (1 << ARRAY_SIZE(state->vb)) - 1; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } void -- cgit v1.2.3 From 1f7dcf9d75cb64195b5fdfb654249c002226c649 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 17:37:12 -0700 Subject: vk/image: Stash more information in images and views --- src/vulkan/image.c | 11 ++++++++--- src/vulkan/private.h | 5 +++++ 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 4dd96ceac3b..c1504211cad 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -244,6 +244,7 @@ VkResult VKAPI vkCreateImage( image->bo = NULL; image->offset = 0; image->type = pCreateInfo->imageType; + image->format = pCreateInfo->format; image->extent = pCreateInfo->extent; assert(image->extent.width > 0); @@ -362,6 +363,9 @@ VkResult VKAPI vkCreateImageView( view->surface_state = create_surface_state(device, view->image, format); + /* TODO: Miplevels */ + view->extent = view->image->extent; + *pView = (VkImageView) view; return VK_SUCCESS; @@ -374,7 +378,6 @@ VkResult VKAPI vkCreateColorAttachmentView( { struct anv_device *device = (struct anv_device *) _device; struct anv_color_attachment_view *view; - struct anv_image *image; const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -386,9 +389,11 @@ VkResult VKAPI vkCreateColorAttachmentView( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); view->image = (struct anv_image *) pCreateInfo->image; - image = view->image; - view->surface_state = create_surface_state(device, image, format); + view->surface_state = create_surface_state(device, view->image, format); + + /* TODO: Miplevels */ + view->extent = view->image->extent; *pView = (VkColorAttachmentView) view; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 09535a73b64..fec29306064 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -610,6 +610,7 @@ anv_format_for_vk_format(VkFormat format); struct anv_image { VkImageType type; + VkFormat format; VkExtent3D extent; uint32_t tile_mode; VkDeviceSize size; @@ -630,11 +631,15 @@ struct anv_buffer_view { struct anv_color_attachment_view { struct anv_image * image; struct anv_state surface_state; + + VkExtent3D extent; }; struct anv_image_view { struct anv_image * image; struct anv_state surface_state; + + VkExtent3D extent; }; struct anv_sampler { -- cgit v1.2.3 From cd197181f28d683b0032828f67fbeab768920789 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:11:20 -0700 Subject: vk/compiler: Zero the prog data We use prog_data[stage] != NULL to determine whether or not we need to clean up that stage. Make sure it default to NULL. --- src/vulkan/compiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index f81008b8f72..bf3262fecd4 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -833,6 +833,11 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) struct brw_context *brw = compiler->brw; struct anv_device *device = pipeline->device; + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + brw->use_rep_send = pipeline->use_repclear; brw->no_simd8 = pipeline->use_repclear; -- cgit v1.2.3 From 07943656a7e0cadc6252621e877ed38937f67b91 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:12:01 -0700 Subject: vk/compiler: Set the binding table texture_start This is by no means a complete solution to the binding table problems. However, it does make texturing actually work. Before, we were texturing from the render target since they were both starting at 0. --- src/vulkan/compiler.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index bf3262fecd4..d843d035508 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -70,6 +70,8 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, else bias = 0; + prog_data->binding_table.texture_start = bias; + count = pipeline->layout->stage[stage].surface_count; entries = pipeline->layout->stage[stage].surface_entries; -- cgit v1.2.3 From a1309c5255a6474a46a2d3f1c828c3f3071c760d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:13:05 -0700 Subject: vk/pass: Emit a flushing pipe control at the end of the pass This is rather crude but it at least makes sure that all the render targets get flushed at the end of the pass. We probably actually want to do somthing based on image layout traansitions, but this will work for now. --- src/vulkan/device.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d5eab4866d4..cf855a87387 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2774,5 +2774,20 @@ void VKAPI vkCmdEndRenderPass( VkCmdBuffer cmdBuffer, VkRenderPass renderPass) { + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); + stub(); } -- cgit v1.2.3 From d3d4776202e34e0c6651aa7d701492fb9c6f4254 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:16:04 -0700 Subject: vk/pipeline: Add an extra flag for force-disabling the vertex shader This way we can pass in a vertex shader and yet have the pipeline emit an empty 3DSTATE_VS packet. We need this for meta because we need to trick the compiler into not deleting our inputs but at the same time disable the VS so that we can use a rectlist. This should go away once we actually get SPIR-V. --- src/vulkan/pipeline.c | 2 +- src/vulkan/private.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index a260c3cd24a..1c4eec5a776 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -388,7 +388,7 @@ anv_pipeline_create( offset = 1; length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - if (pipeline->vs_simd8 == NO_KERNEL) + if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, .FunctionEnable = false, .VertexURBEntryOutputReadOffset = 1, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index fec29306064..b6aac741cdd 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -583,6 +583,7 @@ struct anv_pipeline_create_info { bool use_repclear; bool disable_viewport; bool disable_scissor; + bool disable_vs; bool use_rectlist; }; -- cgit v1.2.3 From 94b8c0b810827c994c5ba1dff926fb2a2565258a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:17:38 -0700 Subject: vk/pipeline: Default to a SamplerCount of 1 for PS --- src/vulkan/pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 1c4eec5a776..30185dd0b79 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -457,7 +457,7 @@ anv_pipeline_create( .SingleProgramFlow = false, .VectorMaskEnable = true, - .SamplerCount = 0, + .SamplerCount = 1, .ScratchSpaceBasePointer = 0, .PerThreadScratchSpace = 0, -- cgit v1.2.3 From bd5b76d6d0793e12be300d65bd68d430f3b255ec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 17:37:47 -0700 Subject: vk/meta: Add the start of a blit implementation Currently, we only implement CopyImageToBuffer --- src/vulkan/meta.c | 466 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/vulkan/private.h | 7 + 2 files changed, 466 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index a10c21e84c5..5973013a5b8 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -187,6 +187,13 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } +struct vue_header { + uint32_t Reserved; + uint32_t RTAIndex; + uint32_t ViewportIndex; + float PointWidth; +}; + void anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass) @@ -198,12 +205,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, uint32_t size; struct instance_data { - struct { - uint32_t Reserved; - uint32_t RTAIndex; - uint32_t ViewportIndex; - float PointWidth; - } vue_header; + struct vue_header vue_header; float color[4]; } *instance_data; @@ -282,6 +284,375 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, } +static void +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkPipelineIaStateCreateInfo ia_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .disableVertexReuse = false, + .primitiveRestartEnable = false, + .primitiveRestartIndex = 0 + }; + + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. + */ + static const char vs_source[] = GLSL( + in vec2 a_pos; + in vec2 a_tex_coord; + out vec4 v_tex_coord; + void main() + { + v_tex_coord = vec4(a_tex_coord, 0, 1); + gl_Position = vec4(a_pos, 0, 1); + } + ); + + static const char fs_source[] = GLSL( + out vec4 f_color; + in vec4 v_tex_coord; + layout(set = 0, index = 0) uniform sampler2D u_tex; + void main() + { + f_color = texture2D(u_tex, v_tex_coord.xy); + } + ); + + VkShader vs; + vkCreateShader((VkDevice) device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(vs_source), + .pCode = vs_source, + .flags = 0 + }, + &vs); + + VkShader fs; + vkCreateShader((VkDevice) device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(fs_source), + .pCode = fs_source, + .flags = 0 + }, + &fs); + + VkPipelineShaderStageCreateInfo vs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &ia_create_info, + .shader = { + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineShaderStageCreateInfo fs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &vs_create_info, + .shader = { + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineVertexInputCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + .pNext = &fs_create_info, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 0, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 1, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .count = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + vkCreateDescriptorSetLayout((VkDevice) device, &ds_layout_info, + &device->blit_state.ds_layout); + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 1, + .pSetLayouts = &device->blit_state.ds_layout, + }; + + VkPipelineLayout pipeline_layout; + vkCreatePipelineLayout((VkDevice) device, &pipeline_layout_info, + &pipeline_layout); + + VkPipelineRsStateCreateInfo rs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pNext = &vi_create_info, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }; + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rs_create_info, + .flags = 0, + .layout = pipeline_layout, + }; + + anv_pipeline_create((VkDevice) device, &pipeline_info, + &(struct anv_pipeline_create_info) { + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->blit_state.pipeline); + + vkDestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, vs); + vkDestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); + + vkCreateDynamicRasterState((VkDevice) device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->blit_state.rs_state); +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *saved_state) +{ + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_save(cmd_buffer, saved_state); + + if ((VkPipeline) cmd_buffer->pipeline != device->blit_state.pipeline) + vkCmdBindPipeline((VkCmdBuffer) cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->blit_state.pipeline); + + /* We don't need anything here, only set if not already set. */ + if (cmd_buffer->rs_state == NULL) + vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_RASTER, + device->blit_state.rs_state); +} + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_color_attachment_view *dest, + VkOffset3D dest_offset, + VkExtent3D dest_extent) +{ + struct anv_device *device = cmd_buffer->device; + + struct blit_vb_data { + float pos[2]; + float tex_coord[2]; + } *vb_data; + + unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct vue_header)); + vb_data = vb_state.map + sizeof(struct vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src->extent.width, + (float)src_offset.y / (float)src->extent.height, + }, + }; + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->surface_state_block_pool.bo, + .offset = vb_state.offset, + }; + + vkCmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, + (VkBuffer[]) { + (VkBuffer) &vertex_buffer, + (VkBuffer) &vertex_buffer + }, + (VkDeviceSize[]) { + 0, + sizeof(struct vue_header), + }); + + uint32_t count; + VkDescriptorSet set; + vkAllocDescriptorSets((VkDevice) device, 0 /* pool */, + VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, + 1, &device->blit_state.ds_layout, &set, &count); + vkUpdateDescriptors((VkDevice) device, set, 1, + (const void * []) { + &(VkUpdateImages) { + .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .binding = 0, + .count = 1, + .pImageViews = (VkImageViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, + .view = (VkImageView) src, + .layout = VK_IMAGE_LAYOUT_GENERAL, + } + } + } + }); + + VkFramebufferCreateInfo fb_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .colorAttachmentCount = 1, + .pColorAttachments = (VkColorAttachmentBindInfo[]) { + { + .view = (VkColorAttachmentView) dest, + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .pDepthStencilAttachment = NULL, + .sampleCount = 1, + .width = dest->extent.width, + .height = dest->extent.height, + .layers = 1 + }; + + struct anv_framebuffer *fb; + vkCreateFramebuffer((VkDevice) device, &fb_info, (VkFramebuffer *)&fb); + + VkRenderPassCreateInfo pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .renderArea = { { 0, 0 }, { dest->extent.width, dest->extent.height } }, + .colorAttachmentCount = 1, + .extent = { }, + .sampleCount = 1, + .layers = 1, + .pColorFormats = (VkFormat[]) { dest->image->format }, + .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_GENERAL }, + .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_LOAD }, + .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, + .pColorLoadClearValues = (VkClearColor[]) { + { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } + }, + .depthStencilFormat = VK_FORMAT_UNDEFINED, + }; + + VkRenderPass pass; + vkCreateRenderPass((VkDevice )device, &pass_info, &pass); + + vkCmdBeginRenderPass((VkCmdBuffer) cmd_buffer, + &(VkRenderPassBegin) { + .renderPass = pass, + .framebuffer = (VkFramebuffer) fb, + }); + + vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_VIEWPORT, fb->vp_state); + + vkCmdBindDescriptorSets((VkCmdBuffer) cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, + &set, 0, NULL); + + vkCmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); + + vkCmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *saved_state) +{ + anv_cmd_buffer_restore(cmd_buffer, saved_state); +} + void VKAPI vkCmdCopyBuffer( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, @@ -335,7 +706,87 @@ void VKAPI vkCmdCopyImageToBuffer( uint32_t regionCount, const VkBufferImageCopy* pRegions) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + VkDevice vk_device = (VkDevice) cmd_buffer->device; + struct anv_image *src_image = (struct anv_image *)srcImage; + struct anv_buffer *dest_buffer = (struct anv_buffer *)destBuffer; + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageViewCreateInfo src_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1 + }, + .minLod = 0 + }; + + VkImageView src_view; + vkCreateImageView(vk_device, &src_view_info, &src_view); + + VkImageCreateInfo dest_image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = src_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }; + + struct anv_image *dest_image; + vkCreateImage(vk_device, &dest_image_info, (VkImage *)&dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + dest_image->bo = dest_buffer->bo; + dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; + + VkColorAttachmentViewCreateInfo dest_view_info = { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = src_image->format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }; + + VkColorAttachmentView dest_view; + vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + + meta_emit_blit(cmd_buffer, + (struct anv_image_view *)src_view, + pRegions[r].imageOffset, + pRegions[r].imageExtent, + (struct anv_color_attachment_view *)dest_view, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent); + } + + meta_finish_blit(cmd_buffer, &saved_state); } void VKAPI vkCmdCloneImageData( @@ -407,4 +858,5 @@ void anv_device_init_meta(struct anv_device *device) { anv_device_init_meta_clear_state(device); + anv_device_init_meta_blit_state(device); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index b6aac741cdd..34b4ce8d14a 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -270,6 +270,12 @@ struct anv_clear_state { VkDynamicRsState rs_state; }; +struct anv_blit_state { + VkPipeline pipeline; + VkDynamicRsState rs_state; + VkDescriptorSetLayout ds_layout; +}; + struct anv_device { struct anv_instance * instance; uint32_t chipset_id; @@ -287,6 +293,7 @@ struct anv_device { struct anv_state_pool surface_state_pool; struct anv_clear_state clear_state; + struct anv_blit_state blit_state; struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; -- cgit v1.2.3 From 4fb8bddc58099a87aa58230294416ea932db88d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:18:37 -0700 Subject: vk/test: Do a copy of the RT into a linear buffer and write that to a PNG --- src/vulkan/vk.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index f1b3629e145..9efa31d0d20 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -397,14 +397,33 @@ int main(int argc, char *argv[]) VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, &size, &vb_requirements); + VkBuffer image_buffer; + vkCreateBuffer(device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = width * height * 4, + .usage = VK_BUFFER_USAGE_TRANSFER_DESTINATION_BIT, + .flags = 0 + }, + &image_buffer); + + VkMemoryRequirements ib_requirements; + size = sizeof(ib_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, image_buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &ib_requirements); + printf("buffer size: %lu, buffer alignment: %lu\n", buffer_requirements.size, buffer_requirements.alignment); printf("rt size: %lu, rt alignment: %lu\n", rt_requirements.size, rt_requirements.alignment); printf("vb size: %lu vb alignment: %lu\n", vb_requirements.size, vb_requirements.alignment); + printf("ib size: %lu ib alignment: %lu\n", + ib_requirements.size, ib_requirements.alignment); - size_t mem_size = rt_requirements.size + 2048 + 16 * 16 * 4; + size_t mem_size = rt_requirements.size + ib_requirements.size + + 2048 + 16 * 16 * 4; VkDeviceMemory mem; vkAllocMemory(device, &(VkMemoryAllocInfo) { @@ -521,6 +540,11 @@ int main(int argc, char *argv[]) 0, /* allocation index; for objects which need to bind to multiple mems */ mem, 2048); + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + image_buffer, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 2048 + rt_requirements.size); + const uint32_t texture_width = 16, texture_height = 16; VkImage texture; vkCreateImage(device, @@ -732,13 +756,29 @@ int main(int argc, char *argv[]) vkCmdEndRenderPass(cmdBuffer, pass); + VkBufferImageCopy copy = { + .bufferOffset = 0, + .imageSubresource = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .mipLevel = 0, + .arraySlice = 0, + }, + .imageOffset = { .x = 0, .y = 0, .z = 0 }, + .imageExtent = { .width = width, .height = height, .depth = 1 }, + }; + + vkCmdCopyImageToBuffer(cmdBuffer, rt, VK_IMAGE_LAYOUT_GENERAL, + image_buffer, 1, ©); + vkEndCommandBuffer(cmdBuffer); vkQueueSubmit(queue, 1, &cmdBuffer, 0); vkQueueWaitIdle(queue); - write_png("vk.png", width, height, 1024, map + 2048); + write_png("vk-map.png", width, height, 1024, map + 2048); + write_png("vk-copy.png", width, height, 1024, + map + 2048 + rt_requirements.size); vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, texture); vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, rt); -- cgit v1.2.3 From 8c92701a69c351775989c7e3a18898af242a00dc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 May 2015 22:27:38 -0700 Subject: vk/test: Use VK_IMAGE_TILING_OPTIMAL for the render target --- src/vulkan/vk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 9efa31d0d20..484f61601c3 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -369,7 +369,7 @@ int main(int argc, char *argv[]) .mipLevels = 1, .arraySize = 1, .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, + .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }, -- cgit v1.2.3 From 018a0c17410c27b9a1971876bb1449f3c36df62f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 May 2015 11:39:32 -0700 Subject: vk/meta: Add a better comment about the VS for blits --- src/vulkan/meta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 5973013a5b8..e225da75b24 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -296,7 +296,9 @@ anv_device_init_meta_blit_state(struct anv_device *device) }; /* We don't use a vertex shader for clearing, but instead build and pass - * the VUEs directly to the rasterization backend. + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. */ static const char vs_source[] = GLSL( in vec2 a_pos; -- cgit v1.2.3 From 79ace6def603f235b40f5df16f5d88dd6fb4c4d5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 May 2015 11:58:58 -0700 Subject: vk/meta: Add a magic GLSL shader source macro --- src/vulkan/glsl_helpers.h | 37 +++++++++++++++++++++++++++++++++++++ src/vulkan/meta.c | 42 ++++++------------------------------------ 2 files changed, 43 insertions(+), 36 deletions(-) create mode 100644 src/vulkan/glsl_helpers.h (limited to 'src') diff --git a/src/vulkan/glsl_helpers.h b/src/vulkan/glsl_helpers.h new file mode 100644 index 00000000000..5288be56a20 --- /dev/null +++ b/src/vulkan/glsl_helpers.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#define GLSL_VK_SHADER(device, stage, ...) ({ \ + VkShader __shader; \ + const char __src[] = "#version 330\n" STRINGIFY((__ARGV__)); \ + VkShaderCreateInfo __shader_create_info = { \ + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \ + .codeSize = sizeof(__src), \ + .pCode = __src, \ + .flags = (1 << 31) /* GLSL back-door hack */ \ + }; \ + vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \ + __shader; \ +}) diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index e225da75b24..d39cba4ea42 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -28,8 +28,7 @@ #include #include "private.h" - -#define GLSL(src) "#version 330\n" #src +#include "glsl_helpers.h" static void anv_device_init_meta_clear_state(struct anv_device *device) @@ -45,23 +44,14 @@ anv_device_init_meta_clear_state(struct anv_device *device) /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. */ - static const char fs_source[] = GLSL( + VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, out vec4 f_color; flat in vec4 v_color; void main() { f_color = v_color; - }); - - VkShader fs; - vkCreateShader((VkDevice) device, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .codeSize = sizeof(fs_source), - .pCode = fs_source, - .flags = 0 - }, - &fs); + } + ); VkPipelineShaderStageCreateInfo fs_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -300,7 +290,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) * to provide GLSL source for the vertex shader so that the compiler * does not dead-code our inputs. */ - static const char vs_source[] = GLSL( + VkShader vs = GLSL_VK_SHADER(device, VERTEX, in vec2 a_pos; in vec2 a_tex_coord; out vec4 v_tex_coord; @@ -311,7 +301,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) } ); - static const char fs_source[] = GLSL( + VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, out vec4 f_color; in vec4 v_tex_coord; layout(set = 0, index = 0) uniform sampler2D u_tex; @@ -321,26 +311,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) } ); - VkShader vs; - vkCreateShader((VkDevice) device, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .codeSize = sizeof(vs_source), - .pCode = vs_source, - .flags = 0 - }, - &vs); - - VkShader fs; - vkCreateShader((VkDevice) device, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .codeSize = sizeof(fs_source), - .pCode = fs_source, - .flags = 0 - }, - &fs); - VkPipelineShaderStageCreateInfo vs_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = &ia_create_info, -- cgit v1.2.3 From 41db8db0f2310f2620ef63a3c24ab5842fe88118 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 May 2015 19:07:50 -0700 Subject: vk: Add a GLSL scraper utility This new utility, glsl_scraper.py scrapes C files for instances of the GLSL_VK_SHADER macro, pulls out the shader source, and compiles it to SPIR-V. The compilation is done using glslValidator. The result is then placed into another C file as arrays of dwords that can be easiliy handed to a Vulkan driver. --- src/vulkan/glsl_scraper.py | 236 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 src/vulkan/glsl_scraper.py (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py new file mode 100644 index 00000000000..4b99ae0a3e2 --- /dev/null +++ b/src/vulkan/glsl_scraper.py @@ -0,0 +1,236 @@ +#! /usr/bin/env python + +def print_usage(err): + print """\ +glsl_scraper.py [options] file + +This program scrapes a C file for any instance of the GLSL_VK_SHADER macro, +grabs the GLSL source code, compiles it to SPIR-V. The resulting SPIR-V +code is written to another C file as an array of 32-bit words. + +If '-' is passed as the input file or output file, stdin or stdout will be +used instead of a file on disc. + +Options: + -o outfile Output to the given file (default: stdout) + --with-glslang=PATH Full path to the glslangValidator program""" + exit(err) + +import os, sys, re, cStringIO, tempfile, subprocess, struct, shutil + +class Shader: + def __init__(self, stage, line): + self.stream = cStringIO.StringIO() + self.stage = stage + self.line = line + + if self.stage == 'VERTEX': + self.ext = 'vert' + elif self.stage == 'TESS_CONTROL': + self.ext = 'tesc' + elif self.stage == 'TESS_EVALUATION': + self.ext = 'tese' + elif self.stage == 'GEOMETRY': + self.ext = 'geom' + elif self.stage == 'FRAGMENT': + self.ext = 'frag' + elif self.stage == 'COMPUTE': + self.ext = 'comp' + else: + assert False + + def add_text(self, s): + self.stream.write(s) + + def glsl_source(self): + return self.stream.getvalue() + + def compile(self): + # We can assume if we got here that we have a temp directory and that + # we're currently living in it. + glsl_fname = 'shader{0}.{1}'.format(self.line, self.ext) + spirv_fname = self.ext + '.spv' + + glsl_file = open(glsl_fname, 'w') + glsl_file.write('#version 330\n') + glsl_file.write(self.glsl_source()) + glsl_file.close() + + out = open('glslang.out', 'wb') + err = subprocess.call([glslang, '-V', glsl_fname], stdout=out) + if err != 0: + out = open('glslang.out', 'r') + sys.stderr.write(out.read()) + out.close() + exit(1) + + def dwords(f): + while True: + dword_str = f.read(4) + if not dword_str: + return + assert len(dword_str) == 4 + yield struct.unpack('I', dword_str)[0] + + spirv_file = open(spirv_fname, 'rb') + self.dwords = list(dwords(spirv_file)) + spirv_file.close() + + os.remove(glsl_fname) + os.remove(spirv_fname) + + def dump_c_code(self, f): + f.write('\n\n') + var_prefix = '_glsl_helpers_shader{0}'.format(self.line) + + # First dump the GLSL source as strings + f.write('static const char {0}_glsl_src[] ='.format(var_prefix)) + f.write('\n"#version 330\\n"') + for line in self.glsl_source().splitlines(): + if not line.strip(): + continue + f.write('\n"{0}\\n"'.format(line)) + f.write(';\n\n') + + # Now dump the SPIR-V source + f.write('static const uint32_t {0}_spir_v_src[] = {{'.format(var_prefix)) + line_start = 0 + while line_start < len(self.dwords): + f.write('\n ') + for i in range(line_start, min(line_start + 6, len(self.dwords))): + f.write(' 0x{:08x},'.format(self.dwords[i])) + line_start += 6 + f.write('\n};\n') + +token_exp = re.compile(r'(GLSL_VK_SHADER|\(|\)|,)') + +class Parser: + def __init__(self, f): + self.infile = f + self.paren_depth = 0 + self.shader = None + self.line_number = 1 + self.shaders = [] + + def tokenize(f): + leftover = '' + for line in f: + pos = 0 + while True: + m = token_exp.search(line, pos) + if m: + if m.start() > pos: + leftover += line[pos:m.start()] + pos = m.end() + + if leftover: + yield leftover + leftover = '' + + yield m.group(0) + + else: + leftover += line[pos:] + break + + self.line_number += 1 + + if leftover: + yield leftover + + self.token_iter = tokenize(self.infile) + + def handle_shader_src(self): + paren_depth = 1 + for t in self.token_iter: + if t == '(': + paren_depth += 1 + elif t == ')': + paren_depth -= 1 + if paren_depth == 0: + return + + self.current_shader.add_text(t) + + def handle_macro(self): + line_number = self.line_number + + t = self.token_iter.next() + assert t == '(' + t = self.token_iter.next() + t = self.token_iter.next() + assert t == ',' + + stage = self.token_iter.next().strip() + + t = self.token_iter.next() + assert t == ',' + + self.current_shader = Shader(stage, line_number) + self.handle_shader_src() + self.shaders.append(self.current_shader) + + def run(self): + for t in self.token_iter: + if t == 'GLSL_VK_SHADER': + self.handle_macro() + +def open_file(name, mode): + if name == '-': + if mode == 'w': + return sys.stdout + elif mode == 'r': + return sys.stdin + else: + assert False + else: + return open(name, mode) + +infile = None +outfile = sys.stdout +glslang = 'glslangValidator' + +arg_idx = 1 +while arg_idx < len(sys.argv): + if sys.argv[arg_idx] == '-h': + print_usage(0) + elif sys.argv[arg_idx] == '-o': + arg_idx += 1 + outfile = open_file(sys.argv[arg_idx], 'w') + elif sys.argv[arg_idx].startswith('--with-glslang='): + glslang = sys.argv[arg_idx][len('--with-glslang='):] + else: + infile = open_file(sys.argv[arg_idx], 'r') + break + arg_idx += 1 + +if arg_idx < len(sys.argv) - 1 or not infile or not outfile: + print_usage(1) + +parser = Parser(infile) +parser.run() + +# glslangValidator has an absolutely *insane* interface. We pretty much +# have to run in a temporary directory. Sad day. +current_dir = os.getcwd() +tmpdir = tempfile.mkdtemp('glsl_scraper') + +try: + os.chdir(tmpdir) + + for shader in parser.shaders: + shader.compile() + + os.chdir(current_dir) +finally: + shutil.rmtree(tmpdir) + +outfile.write("""\ +/* =========================== DO NOT EDIT! =========================== + * This file is autogenerated by glsl_scraper.py. + */ + +#include """) + +for shader in parser.shaders: + shader.dump_c_code(outfile) -- cgit v1.2.3 From f5b0f1351f7155752d9caeb4dd8b46fdd051c629 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 15:31:26 -0700 Subject: vk: Consolidate image, buffer and color attachment views These are all just surface state, offset and a bo. --- src/vulkan/device.c | 52 ++++++++++++++++++++-------------------------------- src/vulkan/image.c | 26 ++++++++++++++------------ src/vulkan/meta.c | 10 +++++----- src/vulkan/private.h | 29 ++++++----------------------- src/vulkan/vk.c | 10 +++++----- 5 files changed, 50 insertions(+), 77 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index cf855a87387..e1aac64ffb0 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1252,7 +1252,8 @@ VkResult VKAPI vkCreateBufferView( VkBufferView* pView) { struct anv_device *device = (struct anv_device *) _device; - struct anv_buffer_view *view; + struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer; + struct anv_surface_view *view; const struct anv_format *format; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); @@ -1262,10 +1263,11 @@ VkResult VKAPI vkCreateBufferView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->buffer = (struct anv_buffer *) pCreateInfo->buffer; - view->offset = pCreateInfo->offset; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; view->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->format = pCreateInfo->format; format = anv_format_for_vk_format(pCreateInfo->format); /* This assumes RGBA float format. */ @@ -1306,7 +1308,7 @@ VkResult VKAPI vkCreateBufferView( .ShaderChannelSelectAlpha = SCS_ALPHA, .ResourceMinLOD = 0, /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, view->buffer->offset + view->offset }, + .SurfaceBaseAddress = { NULL, view->offset }, }; GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); @@ -1581,8 +1583,8 @@ void VKAPI vkUpdateDescriptors( update_sampler_textures = (VkUpdateSamplerTextures *) common; for (uint32_t j = 0; j < update_sampler_textures->count; j++) { - set->descriptors[update_sampler_textures->binding + j].image_view = - (struct anv_image_view *) + set->descriptors[update_sampler_textures->binding + j].view = + (struct anv_surface_view *) update_sampler_textures->pSamplerImageViews[j].pImageView->view; set->descriptors[update_sampler_textures->binding + j].sampler = (struct anv_sampler *) @@ -1594,8 +1596,8 @@ void VKAPI vkUpdateDescriptors( update_images = (VkUpdateImages *) common; for (uint32_t j = 0; j < update_images->count; j++) { - set->descriptors[update_images->binding + j].image_view = - (struct anv_image_view *) update_images->pImageViews[j].view; + set->descriptors[update_images->binding + j].view = + (struct anv_surface_view *) update_images->pImageViews[j].view; } break; @@ -1603,8 +1605,8 @@ void VKAPI vkUpdateDescriptors( update_buffers = (VkUpdateBuffers *) common; for (uint32_t j = 0; j < update_buffers->count; j++) { - set->descriptors[update_buffers->binding + j].buffer_view = - (struct anv_buffer_view *) update_buffers->pBufferViews[j].view; + set->descriptors[update_buffers->binding + j].view = + (struct anv_surface_view *) update_buffers->pBufferViews[j].view; } /* FIXME: descriptor arrays? */ break; @@ -2185,24 +2187,21 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) if (s == VK_SHADER_STAGE_FRAGMENT) { for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { - struct anv_color_attachment_view *view = framebuffer->color_attachments[i]; + struct anv_surface_view *view = framebuffer->color_attachments[i]; table[i] = view->surface_state.offset; /* Don't write the reloc back to the surface state. We do that at * submit time. Surface address is dwords 8-9. */ anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, view->surface_state.offset + 8 * sizeof(int32_t), - view->image->bo, view->image->offset); + view->bo, view->offset); } } if (layout) { for (uint32_t i = 0; i < layout->stage[s].surface_count; i++) { struct anv_pipeline_layout_entry *e = &layout->stage[s].surface_entries[i]; - struct anv_descriptor *d = - &cmd_buffer->descriptor_sets[e->set]->descriptors[e->index]; - struct anv_image_view *image_view; - struct anv_buffer_view *buffer_view; + struct anv_surface_view *view; switch (e->type) { case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -2211,26 +2210,15 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - image_view = d->image_view; - table[bias + i] = image_view->surface_state.offset; - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - image_view->surface_state.offset + 8 * sizeof(int32_t), - image_view->image->bo, - image_view->image->offset); - break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - /* FIXME: What are these? TBOs? */ - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - buffer_view = d->buffer_view; - table[bias + i] = buffer_view->surface_state.offset; + view = cmd_buffer->descriptor_sets[e->set]->descriptors[e->index].view; + table[bias + i] = view->surface_state.offset; anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - buffer_view->surface_state.offset + 8 * sizeof(int32_t), - buffer_view->buffer->bo, - buffer_view->buffer->offset + buffer_view->offset); + view->surface_state.offset + 8 * sizeof(int32_t), + view->bo, view->offset); break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -2673,7 +2661,7 @@ VkResult VKAPI vkCreateFramebuffer( framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount; for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { framebuffer->color_attachments[i] = - (struct anv_color_attachment_view *) pCreateInfo->pColorAttachments[i].view; + (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view; } if (pCreateInfo->pDepthStencilAttachment) { diff --git a/src/vulkan/image.c b/src/vulkan/image.c index c1504211cad..aae9fbf0e8c 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -348,7 +348,8 @@ VkResult VKAPI vkCreateImageView( VkImageView* pView) { struct anv_device *device = (struct anv_device *) _device; - struct anv_image_view *view; + struct anv_surface_view *view; + struct anv_image *image = (struct anv_image *) pCreateInfo->image; const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -359,12 +360,13 @@ VkResult VKAPI vkCreateImageView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->image = (struct anv_image *) pCreateInfo->image; - - view->surface_state = create_surface_state(device, view->image, format); + view->bo = image->bo; + view->offset = image->offset; + view->surface_state = create_surface_state(device, image, format); + view->format = pCreateInfo->format; /* TODO: Miplevels */ - view->extent = view->image->extent; + view->extent = image->extent; *pView = (VkImageView) view; @@ -377,7 +379,8 @@ VkResult VKAPI vkCreateColorAttachmentView( VkColorAttachmentView* pView) { struct anv_device *device = (struct anv_device *) _device; - struct anv_color_attachment_view *view; + struct anv_surface_view *view; + struct anv_image *image = (struct anv_image *) pCreateInfo->image; const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -388,12 +391,11 @@ VkResult VKAPI vkCreateColorAttachmentView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->image = (struct anv_image *) pCreateInfo->image; - - view->surface_state = create_surface_state(device, view->image, format); - - /* TODO: Miplevels */ - view->extent = view->image->extent; + view->bo = image->bo; + view->offset = image->offset; + view->surface_state = create_surface_state(device, image, format); + view->extent = image->extent; + view->format = pCreateInfo->format; *pView = (VkColorAttachmentView) view; diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index d39cba4ea42..4f210e3f235 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -468,10 +468,10 @@ struct blit_region { static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src, + struct anv_surface_view *src, VkOffset3D src_offset, VkExtent3D src_extent, - struct anv_color_attachment_view *dest, + struct anv_surface_view *dest, VkOffset3D dest_offset, VkExtent3D dest_extent) { @@ -587,7 +587,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .extent = { }, .sampleCount = 1, .layers = 1, - .pColorFormats = (VkFormat[]) { dest->image->format }, + .pColorFormats = (VkFormat[]) { dest->format }, .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_GENERAL }, .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_LOAD }, .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, @@ -750,10 +750,10 @@ void VKAPI vkCmdCopyImageToBuffer( vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); meta_emit_blit(cmd_buffer, - (struct anv_image_view *)src_view, + (struct anv_surface_view *)src_view, pRegions[r].imageOffset, pRegions[r].imageExtent, - (struct anv_color_attachment_view *)dest_view, + (struct anv_surface_view *)dest_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 34b4ce8d14a..62f77cd96d9 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -463,13 +463,8 @@ struct anv_descriptor_set_layout { }; struct anv_descriptor { - union { - struct { - struct anv_sampler *sampler; - struct anv_image_view *image_view; - }; - struct anv_buffer_view *buffer_view; - }; + struct anv_sampler *sampler; + struct anv_surface_view *view; }; struct anv_descriptor_set { @@ -630,24 +625,12 @@ struct anv_image { VkDeviceSize offset; }; -struct anv_buffer_view { - struct anv_buffer * buffer; +struct anv_surface_view { struct anv_state surface_state; + struct anv_bo * bo; uint32_t offset; -}; - -struct anv_color_attachment_view { - struct anv_image * image; - struct anv_state surface_state; - - VkExtent3D extent; -}; - -struct anv_image_view { - struct anv_image * image; - struct anv_state surface_state; - VkExtent3D extent; + VkFormat format; }; struct anv_sampler { @@ -659,7 +642,7 @@ struct anv_depth_stencil_view { struct anv_framebuffer { uint32_t color_attachment_count; - struct anv_color_attachment_view * color_attachments[MAX_RTS]; + struct anv_surface_view * color_attachments[MAX_RTS]; struct anv_depth_stencil_view * depth_stencil; uint32_t sample_count; diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 484f61601c3..399a1c2db1d 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -562,6 +562,11 @@ int main(int argc, char *argv[]) }, &texture); + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, + texture, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 2048 + 256 * 256 * 4); + VkImageView image_view; vkCreateImageView(device, &(VkImageViewCreateInfo) { @@ -586,11 +591,6 @@ int main(int argc, char *argv[]) }, &image_view); - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, - texture, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 2048 + 256 * 256 * 4); - VkSampler sampler; vkCreateSampler(device, &(VkSamplerCreateInfo) { -- cgit v1.2.3 From 0a775e1eab2d6a2a6f62e59d5373e6a77a95ac2a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 13 May 2015 15:34:34 -0700 Subject: vk: Rename dyn_state_pool to dynamic_state_pool Given that we already tolerate surface_state_pool and the even longer instruction_state_pool, there's no reason to arbitrarily abbreviate dynamic. --- src/vulkan/device.c | 26 +++++++++++++------------- src/vulkan/private.h | 4 ++-- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e1aac64ffb0..82ac248cb87 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -305,10 +305,10 @@ VkResult VKAPI vkCreateDevice( if (device->context_id == -1) goto fail_fd; - anv_block_pool_init(&device->dyn_state_block_pool, device, 2048); + anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); - anv_state_pool_init(&device->dyn_state_pool, - &device->dyn_state_block_pool); + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); anv_block_pool_init(&device->instruction_block_pool, device, 2048); anv_block_pool_init(&device->surface_state_block_pool, device, 2048); @@ -344,7 +344,7 @@ VkResult VKAPI vkDestroyDevice( anv_compiler_destroy(device->compiler); - anv_block_pool_finish(&device->dyn_state_block_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); anv_block_pool_finish(&device->instruction_block_pool); anv_block_pool_finish(&device->surface_state_block_pool); @@ -628,8 +628,8 @@ VkResult VKAPI vkDeviceWaitIdle( int64_t timeout; int ret; - state = anv_state_pool_alloc(&device->dyn_state_pool, 32, 32); - bo = &device->dyn_state_pool.block_pool->bo; + state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); + bo = &device->dynamic_state_pool.block_pool->bo; batch.next = state.map; anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN8_MI_NOOP); @@ -672,12 +672,12 @@ VkResult VKAPI vkDeviceWaitIdle( } } - anv_state_pool_free(&device->dyn_state_pool, state); + anv_state_pool_free(&device->dynamic_state_pool, state); return VK_SUCCESS; fail: - anv_state_pool_free(&device->dyn_state_pool, state); + anv_state_pool_free(&device->dynamic_state_pool, state); return result; } @@ -1651,11 +1651,11 @@ VkResult VKAPI vkCreateDynamicViewportState( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); unsigned count = pCreateInfo->viewportAndScissorCount; - state->sf_clip_vp = anv_state_pool_alloc(&device->dyn_state_pool, + state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, count * 64, 64); - state->cc_vp = anv_state_pool_alloc(&device->dyn_state_pool, + state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool, count * 8, 32); - state->scissor = anv_state_pool_alloc(&device->dyn_state_pool, + state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool, count * 32, 32); for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { @@ -1827,7 +1827,7 @@ VkResult VKAPI vkCreateCommandBuffer( anv_state_stream_init(&cmd_buffer->surface_state_stream, &device->surface_state_block_pool); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dyn_state_block_pool); + &device->dynamic_state_block_pool); cmd_buffer->dirty = 0; cmd_buffer->vb_dirty = 0; @@ -1867,7 +1867,7 @@ VkResult VKAPI vkBeginCommandBuffer( .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ .SurfaceStateBaseAddressModifyEnable = true, - .DynamicStateBaseAddress = { &device->dyn_state_block_pool.bo, 0 }, + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, .DynamicStateBaseAddressModifyEnable = true, .DynamicStateBufferSize = 0xfffff, .DynamicStateBufferSizeModifyEnable = true, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 62f77cd96d9..3120b8486e1 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -285,8 +285,8 @@ struct anv_device { bool no_hw; bool dump_aub; - struct anv_block_pool dyn_state_block_pool; - struct anv_state_pool dyn_state_pool; + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; struct anv_block_pool instruction_block_pool; struct anv_block_pool surface_state_block_pool; -- cgit v1.2.3 From b806e80e667a153c68bb0963f74581fa0ed97bb1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 14 May 2015 09:08:45 -0700 Subject: vk: Flip back to using memfd for the allocators --- src/vulkan/allocator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 67abaa45464..4c293f12b1a 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -203,7 +203,7 @@ anv_block_pool_init(struct anv_block_pool *pool, * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS * path we can take for valgrind debugging. */ -#define USE_MEMFD 0 +#define USE_MEMFD 1 void anv_block_pool_finish(struct anv_block_pool *pool) -- cgit v1.2.3 From 1f6c220b45fe92f48c2e78795b7cb30c0215a299 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 14:59:59 -0700 Subject: vk: Update the bind map length to reflect MAX_SETS --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2d4a7eab20b..b6cdc82444c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -360,7 +360,7 @@ struct brw_stage_prog_data { } binding_table; uint32_t *map_entries; - uint32_t *bind_map[4]; + uint32_t *bind_map[8]; /* MAX_SETS from vulkan/private.h */ GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; -- cgit v1.2.3 From bf096c9ec3c3f6ce53967386d0c0eb29cb192505 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 15:03:21 -0700 Subject: vk: Build binding tables at bind descriptor time This changes the way descriptor sets and layouts work so that we fill out binding table contents at the time we bind descriptor sets. We manipulate the binding table contents and sampler state in a shadow-copy in anv_cmd_buffer. At draw time, we allocate the actual binding table and sampler state and flush the anv_cmd_buffer copies. --- src/vulkan/compiler.cpp | 21 ++-- src/vulkan/device.c | 302 ++++++++++++++++++++++++++++-------------------- src/vulkan/meta.c | 6 +- src/vulkan/pipeline.c | 89 ++++---------- src/vulkan/private.h | 55 +++++---- 5 files changed, 241 insertions(+), 232 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index d843d035508..6f15b77426c 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -57,9 +57,8 @@ static VkResult set_binding_table_layout(struct brw_stage_prog_data *prog_data, struct anv_pipeline *pipeline, uint32_t stage) { - uint32_t count, bias, set, *map; - - struct anv_pipeline_layout_entry *entries; + uint32_t bias, count, k, *map; + struct anv_pipeline_layout *layout = pipeline->layout; /* No layout is valid for shaders that don't bind any resources. */ if (pipeline->layout == NULL) @@ -72,22 +71,18 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, prog_data->binding_table.texture_start = bias; - count = pipeline->layout->stage[stage].surface_count; - entries = pipeline->layout->stage[stage].surface_entries; - + count = layout->stage[stage].surface_count; prog_data->map_entries = (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); if (prog_data->map_entries == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - set = 0; + k = bias; map = prog_data->map_entries; - for (uint32_t i = 0; i < count; i++) { - if (entries[i].set == set) { - prog_data->bind_map[set] = map; - set++; - } - *map++ = bias + i; + for (uint32_t i = 0; i < layout->num_sets; i++) { + prog_data->bind_map[i] = map; + for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) + *map++ = k++; } return VK_SUCCESS; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 82ac248cb87..8b0c2279f92 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1417,43 +1417,101 @@ VkResult VKAPI vkCreateDescriptorSetLayout( { struct anv_device *device = (struct anv_device *) _device; struct anv_descriptor_set_layout *set_layout; - uint32_t count, k, num_entries; - size_t size, sampler_total, surface_total; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - count = 0; - for (uint32_t i = 0; i < pCreateInfo->count; i++) + uint32_t sampler_count[VK_NUM_SHADER_STAGE] = { 0, }; + uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, }; + uint32_t num_dynamic_buffers = 0; + uint32_t count = 0; + uint32_t s; + + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + sampler_count[s] += pCreateInfo->pBinding[i].count; + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + sampler_count[s] += pCreateInfo->pBinding[i].count; + + /* fall through */ + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + surface_count[s] += pCreateInfo->pBinding[i].count; + break; + default: + break; + } + count += pCreateInfo->pBinding[i].count; + } - size = sizeof(*set_layout) + - count * sizeof(set_layout->bindings[0]); + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + num_dynamic_buffers++; + break; + default: + break; + } + } + + uint32_t sampler_total = 0; + uint32_t surface_total = 0; + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + sampler_total += sampler_count[s]; + surface_total += surface_count[s]; + } + + size_t size = sizeof(*set_layout) + + (sampler_total + surface_total) * sizeof(uint32_t); set_layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!set_layout) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - k = 0; - sampler_total = 0; - surface_total = 0; - for (uint32_t i = 0; i < pCreateInfo->count; i++) { - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { - set_layout->bindings[k].mask = pCreateInfo->pBinding[i].stageFlags; - set_layout->bindings[k].type = pCreateInfo->pBinding[i].descriptorType; - k++; - } + set_layout->num_dynamic_buffers = num_dynamic_buffers; + set_layout->count = count; - num_entries = pCreateInfo->pBinding[i].count * - __builtin_popcount(pCreateInfo->pBinding[i].stageFlags); + uint32_t *p = set_layout->entries; + uint32_t *sampler[VK_NUM_SHADER_STAGE]; + uint32_t *surface[VK_NUM_SHADER_STAGE]; + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + set_layout->stage[s].surface_count = surface_count[s]; + set_layout->stage[s].surface_start = surface[s] = p; + p += surface_count[s]; + set_layout->stage[s].sampler_count = sampler_count[s]; + set_layout->stage[s].sampler_start = sampler[s] = p; + p += sampler_count[s]; + } + uint32_t descriptor = 0; + for (uint32_t i = 0; i < pCreateInfo->count; i++) { switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: - sampler_total += num_entries; + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) + *(sampler[s])++ = descriptor + j; break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - sampler_total += num_entries; - surface_total += num_entries; - break; + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) + *(sampler[s])++ = descriptor + j; + + /* fallthrough */ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -1463,18 +1521,17 @@ VkResult VKAPI vkCreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - surface_total += num_entries; + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { + *(surface[s])++ = descriptor + j; + } break; - default: - unreachable("invalid descriptor type"); + unreachable(""); } + descriptor += pCreateInfo->pBinding[i].count; } - set_layout->sampler_total = sampler_total; - set_layout->surface_total = surface_total; - set_layout->count = count; - *pSetLayout = (VkDescriptorSetLayout) set_layout; return VK_SUCCESS; @@ -1803,6 +1860,7 @@ VkResult VKAPI vkCreateCommandBuffer( cmd_buffer->device = device; cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; + memset(&cmd_buffer->bindings, 0, sizeof(cmd_buffer->bindings)); result = anv_batch_init(&cmd_buffer->batch, device); if (result != VK_SUCCESS) @@ -2106,17 +2164,43 @@ void VKAPI vkCmdBindDescriptorSets( const uint32_t* pDynamicOffsets) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; - /* What are the semantics for setting descriptor sets? Assuming that - * setting preserves lower sets and invalidate higher sets. This means that - * we can set the number of active sets to firstSet + setCount. - */ - - for (uint32_t i = 0; i < setCount; i++) - cmd_buffer->descriptor_sets[firstSet + i] = + uint32_t offset = 0; + for (uint32_t i = 0; i < setCount; i++) { + struct anv_descriptor_set *set = (struct anv_descriptor_set *) pDescriptorSets[i]; + struct anv_descriptor_set_layout *set_layout = layout->set[firstSet + i].layout; + + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + uint32_t *surface_to_desc = set_layout->stage[s].surface_start; + uint32_t *sampler_to_desc = set_layout->stage[s].sampler_start; + uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + uint32_t start; + + start = bias + layout->set[firstSet + i].surface_start[s]; + for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { + struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; + struct anv_bindings *bindings = &cmd_buffer->bindings; + + bindings->descriptors[s].surfaces[start + b] = + view->surface_state.offset; + bindings->descriptors[s].relocs[start + b].bo = view->bo; + bindings->descriptors[s].relocs[start + b].offset = view->offset; + } + + start = layout->set[firstSet + i].sampler_start[s]; + for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) { + struct anv_sampler *sampler = set->descriptors[sampler_to_desc[b]].sampler; + + memcpy(&cmd_buffer->bindings.descriptors[s].samplers[start + b], + sampler->state, sizeof(sampler->state)); + } + } + + offset += layout->set[firstSet + i].layout->num_dynamic_buffers; + } - cmd_buffer->num_descriptor_sets = firstSet + setCount; cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } @@ -2165,112 +2249,61 @@ static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; - struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; - uint32_t binding_table_length, *table; - struct anv_state table_state; + uint32_t binding_table_length; + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ if (layout) binding_table_length = layout->stage[s].surface_count + bias; - else if (s == VK_SHADER_STAGE_FRAGMENT) - binding_table_length = framebuffer->color_attachment_count; else - binding_table_length = 0; - - if (binding_table_length > 0) - table_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, - binding_table_length * 4, 32); - table = table_state.map; - - if (s == VK_SHADER_STAGE_FRAGMENT) { - for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { - struct anv_surface_view *view = framebuffer->color_attachments[i]; - table[i] = view->surface_state.offset; - - /* Don't write the reloc back to the surface state. We do that at - * submit time. Surface address is dwords 8-9. */ - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - view->surface_state.offset + 8 * sizeof(int32_t), - view->bo, view->offset); - } - } + binding_table_length = bias; - if (layout) { - for (uint32_t i = 0; i < layout->stage[s].surface_count; i++) { - struct anv_pipeline_layout_entry *e = &layout->stage[s].surface_entries[i]; - struct anv_surface_view *view; - - switch (e->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - unreachable("sampler-only descriptor in the surface entries"); - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - view = cmd_buffer->descriptor_sets[e->set]->descriptors[e->index].view; - table[bias + i] = view->surface_state.offset; - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - view->surface_state.offset + 8 * sizeof(int32_t), - view->bo, view->offset); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - break; - default: - break; - } + if (binding_table_length > 0) { + struct anv_state state; + uint32_t size; + + size = binding_table_length * sizeof(uint32_t); + state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 32); + memcpy(state.map, cmd_buffer->bindings.descriptors[s].surfaces, size); + + for (uint32_t i = 0; i < binding_table_length; i++) { + uint32_t offset = cmd_buffer->bindings.descriptors[s].surfaces[i]; + if (offset == 0) + continue; + + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + offset + 8 * sizeof(int32_t), + cmd_buffer->bindings.descriptors[s].relocs[i].bo, + cmd_buffer->bindings.descriptors[s].relocs[i].offset); } - } - /* The binding table pointer commands all have the same structure, only - * the opcode differs. - */ - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; - if (binding_table_length > 0) anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, ._3DCommandSubOpcode = binding_table_opcodes[s], - .PointertoVSBindingTable = table_state.offset); - + .PointertoVSBindingTable = state.offset); + } if (layout && layout->stage[s].sampler_count > 0) { - struct anv_state sampler_state; - - sampler_state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - layout->stage[s].sampler_count * 16, 32); - for (uint32_t i = 0; i < layout->stage[s].sampler_count; i++) { - struct anv_pipeline_layout_entry *e = &layout->stage[s].sampler_entries[i]; - struct anv_sampler *sampler; - - switch (e->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - sampler = - cmd_buffer->descriptor_sets[e->set]->descriptors[e->index].sampler; - break; - default: - unreachable("non-sampler descriptor in sampler entries"); - break; - } + struct anv_state state; + size_t size; - memcpy(sampler_state.map + i * 16, sampler->state, sizeof(sampler->state)); - } + size = layout->stage[s].sampler_count * 16; + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + memcpy(state.map, cmd_buffer->bindings.descriptors[s].samplers, size); static const uint32_t sampler_state_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 43, @@ -2284,7 +2317,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, ._3DCommandSubOpcode = sampler_state_opcodes[s], - .PointertoVSSamplerState = sampler_state.offset); + .PointertoVSSamplerState = state.offset); } } } @@ -2741,9 +2774,10 @@ void VKAPI vkCmdBeginRenderPass( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass; + struct anv_framebuffer *framebuffer = + (struct anv_framebuffer *) pRenderPassBegin->framebuffer; - cmd_buffer->framebuffer = (struct anv_framebuffer *) pRenderPassBegin->framebuffer; - cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + cmd_buffer->framebuffer = framebuffer; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, .ClippedDrawingRectangleYMin = pass->render_area.offset.y, @@ -2755,6 +2789,18 @@ void VKAPI vkCmdBeginRenderPass( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); + for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { + struct anv_surface_view *view = framebuffer->color_attachments[i]; + + cmd_buffer->bindings.descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = + view->surface_state.offset; + cmd_buffer->bindings.descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = + view->bo; + cmd_buffer->bindings.descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = + view->offset; + } + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + anv_cmd_buffer_clear(cmd_buffer, pass); } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 4f210e3f235..8fbec09e626 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -151,7 +151,7 @@ struct anv_saved_state { struct anv_buffer *buffer; VkDeviceSize offset; } vb[2]; - struct anv_descriptor_set *dsets[1]; + struct anv_bindings bindings; struct anv_pipeline *pipeline; }; @@ -160,7 +160,7 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) { memcpy(state->vb, cmd_buffer->vb, sizeof(state->vb)); - memcpy(state->dsets, cmd_buffer->descriptor_sets, sizeof(state->dsets)); + memcpy(&state->bindings, &cmd_buffer->bindings, sizeof(state->bindings)); state->pipeline = cmd_buffer->pipeline; } @@ -169,7 +169,7 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { memcpy(cmd_buffer->vb, state->vb, sizeof(state->vb)); - memcpy(cmd_buffer->descriptor_sets, state->dsets, sizeof(state->dsets)); + memcpy(&cmd_buffer->bindings, &state->bindings, sizeof(state->bindings)); cmd_buffer->pipeline = state->pipeline; cmd_buffer->vb_dirty |= (1 << ARRAY_SIZE(state->vb)) - 1; diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 30185dd0b79..60389e4bbba 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -560,81 +560,38 @@ VkResult VKAPI vkCreatePipelineLayout( { struct anv_device *device = (struct anv_device *) _device; struct anv_pipeline_layout *layout; - struct anv_pipeline_layout_entry *sampler_entry, *surface_entry; - uint32_t sampler_total, surface_total; - size_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - - sampler_total = 0; - surface_total = 0; - for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { - struct anv_descriptor_set_layout *set_layout = - (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; - for (uint32_t j = 0; j < set_layout->count; j++) { - sampler_total += set_layout->sampler_total; - surface_total += set_layout->surface_total; - } - } - size = sizeof(*layout) + - (sampler_total + surface_total) * sizeof(layout->entries[0]); - layout = anv_device_alloc(device, size, 8, + layout = anv_device_alloc(device, sizeof(*layout), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (layout == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - sampler_entry = layout->entries; - surface_entry = layout->entries + sampler_total; + layout->num_sets = pCreateInfo->descriptorSetCount; + + uint32_t surface_start[VK_NUM_SHADER_STAGE] = { 0, }; + uint32_t sampler_start[VK_NUM_SHADER_STAGE] = { 0, }; + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - layout->stage[s].sampler_entries = sampler_entry; - layout->stage[s].surface_entries = surface_entry; - - for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { - struct anv_descriptor_set_layout *set_layout = - (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; - for (uint32_t j = 0; j < set_layout->count; j++) { - if (set_layout->bindings[j].mask & (1 << s)) { - switch (set_layout->bindings[j].type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - sampler_entry->type = set_layout->bindings[j].type; - sampler_entry->set = i; - sampler_entry->index = j; - sampler_entry++; - break; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - sampler_entry->type = set_layout->bindings[j].type; - sampler_entry->set = i; - sampler_entry->index = j; - sampler_entry++; - /* fall through */ - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - surface_entry->type = set_layout->bindings[j].type; - surface_entry->set = i; - surface_entry->index = j; - surface_entry++; - break; - - default: - break; - } - } - } - } + layout->stage[s].surface_count = 0; + layout->stage[s].sampler_count = 0; + } + + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + struct anv_descriptor_set_layout *set_layout = + (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; - layout->stage[s].sampler_count = - sampler_entry - layout->stage[s].sampler_entries; - layout->stage[s].surface_count = - surface_entry - layout->stage[s].surface_entries; + layout->set[i].layout = set_layout; + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + layout->set[i].surface_start[s] = surface_start[s]; + surface_start[s] += set_layout->stage[s].surface_count; + layout->set[i].sampler_start[s] = sampler_start[s]; + sampler_start[s] += set_layout->stage[s].sampler_count; + + layout->stage[s].surface_count += set_layout->stage[s].surface_count; + layout->stage[s].sampler_count += set_layout->stage[s].sampler_count; + } } *pPipelineLayout = (VkPipelineLayout) layout; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 3120b8486e1..2e8ae9fb6bd 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -453,13 +453,16 @@ struct anv_dynamic_cb_state { }; struct anv_descriptor_set_layout { - uint32_t sampler_total; /* total number of samplers in all stages */ - uint32_t surface_total; /* total number of surfaces in all stages */ - uint32_t count; struct { - VkDescriptorType type; - uint32_t mask; - } bindings[0]; + uint32_t surface_count; + uint32_t *surface_start; + uint32_t sampler_count; + uint32_t *sampler_start; + } stage[VK_NUM_SHADER_STAGE]; + + uint32_t count; + uint32_t num_dynamic_buffers; + uint32_t entries[0]; }; struct anv_descriptor { @@ -471,21 +474,23 @@ struct anv_descriptor_set { struct anv_descriptor descriptors[0]; }; -struct anv_pipeline_layout_entry { - VkDescriptorType type; - uint32_t set; - uint32_t index; -}; +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 struct anv_pipeline_layout { struct { - uint32_t sampler_count; - struct anv_pipeline_layout_entry *sampler_entries; + struct anv_descriptor_set_layout *layout; + uint32_t surface_start[VK_NUM_SHADER_STAGE]; + uint32_t sampler_start[VK_NUM_SHADER_STAGE]; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { uint32_t surface_count; - struct anv_pipeline_layout_entry *surface_entries; + uint32_t sampler_count; } stage[VK_NUM_SHADER_STAGE]; - - struct anv_pipeline_layout_entry entries[0]; }; struct anv_buffer { @@ -497,14 +502,21 @@ struct anv_buffer { VkDeviceSize offset; }; -#define MAX_VBS 32 -#define MAX_SETS 8 -#define MAX_RTS 8 - #define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) #define ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY (1 << 1) #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) +struct anv_bindings { + struct { + uint32_t surfaces[256]; + struct { + struct anv_bo *bo; + uint32_t offset; + } relocs[256]; + struct { uint32_t dwords[4]; } samplers[16]; + } descriptors[VK_NUM_SHADER_STAGE]; +}; + struct anv_cmd_buffer { struct anv_device * device; @@ -525,13 +537,12 @@ struct anv_cmd_buffer { VkDeviceSize offset; } vb[MAX_VBS]; uint32_t vb_dirty; - uint32_t num_descriptor_sets; - struct anv_descriptor_set * descriptor_sets[MAX_SETS]; uint32_t dirty; struct anv_pipeline * pipeline; struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; struct anv_dynamic_vp_state * vp_state; + struct anv_bindings bindings; }; void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); -- cgit v1.2.3 From af45f4a558a0020ec77464a12e0cb33ea5531206 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 16:07:17 -0700 Subject: vk: Fix warning from missing initializer Struct initializers need to be { 0, } to zero out the variable they're initializing. --- src/vulkan/meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 8fbec09e626..5091977d2ba 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -584,7 +584,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .renderArea = { { 0, 0 }, { dest->extent.width, dest->extent.height } }, .colorAttachmentCount = 1, - .extent = { }, + .extent = { 0, }, .sampleCount = 1, .layers = 1, .pColorFormats = (VkFormat[]) { dest->format }, -- cgit v1.2.3 From 0cfc49377589081b495d4b3c112e341c9371637b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 16:15:04 -0700 Subject: vk: Fix GLSL_VK_SHADER macro Stringify doesn't work with __ARGV__. The last macro argument swallows up excess arguments and as such we can just stringify that. --- src/vulkan/glsl_helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_helpers.h b/src/vulkan/glsl_helpers.h index 5288be56a20..76b0cbebcf4 100644 --- a/src/vulkan/glsl_helpers.h +++ b/src/vulkan/glsl_helpers.h @@ -23,9 +23,9 @@ #pragma once -#define GLSL_VK_SHADER(device, stage, ...) ({ \ +#define GLSL_VK_SHADER(device, stage, source) ({ \ VkShader __shader; \ - const char __src[] = "#version 330\n" STRINGIFY((__ARGV__)); \ + const char __src[] = "#version 330\n" #source; \ VkShaderCreateInfo __shader_create_info = { \ .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \ .codeSize = sizeof(__src), \ -- cgit v1.2.3 From 9540130c412ec668ff1b9e50a420041a59ea1f15 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 16:34:31 -0700 Subject: vk: Move vertex buffers into struct anv_bindings --- src/vulkan/device.c | 8 ++++---- src/vulkan/meta.c | 9 ++------- src/vulkan/private.h | 9 +++++---- 3 files changed, 11 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8b0c2279f92..303f2854760 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2239,8 +2239,8 @@ void VKAPI vkCmdBindVertexBuffers( * stride from the pipeline. */ for (uint32_t i = 0; i < bindingCount; i++) { - cmd_buffer->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; - cmd_buffer->vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->bindings.vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; + cmd_buffer->bindings.vb[startBinding + i].offset = pOffsets[i]; cmd_buffer->vb_dirty |= 1 << (startBinding + i); } } @@ -2335,8 +2335,8 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) GEN8_3DSTATE_VERTEX_BUFFERS); uint32_t vb, i = 0; for_each_bit(vb, cmd_buffer->vb_dirty) { - struct anv_buffer *buffer = cmd_buffer->vb[vb].buffer; - uint32_t offset = cmd_buffer->vb[vb].offset; + struct anv_buffer *buffer = cmd_buffer->bindings.vb[vb].buffer; + uint32_t offset = cmd_buffer->bindings.vb[vb].offset; struct GEN8_VERTEX_BUFFER_STATE state = { .VertexBufferIndex = vb, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 5091977d2ba..6d44e3336ec 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -146,11 +146,8 @@ anv_device_init_meta_clear_state(struct anv_device *device) &device->clear_state.rs_state); } +#define NUM_VB_USED 2 struct anv_saved_state { - struct { - struct anv_buffer *buffer; - VkDeviceSize offset; - } vb[2]; struct anv_bindings bindings; struct anv_pipeline *pipeline; }; @@ -159,7 +156,6 @@ static void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) { - memcpy(state->vb, cmd_buffer->vb, sizeof(state->vb)); memcpy(&state->bindings, &cmd_buffer->bindings, sizeof(state->bindings)); state->pipeline = cmd_buffer->pipeline; } @@ -168,11 +164,10 @@ static void anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { - memcpy(cmd_buffer->vb, state->vb, sizeof(state->vb)); memcpy(&cmd_buffer->bindings, &state->bindings, sizeof(state->bindings)); cmd_buffer->pipeline = state->pipeline; - cmd_buffer->vb_dirty |= (1 << ARRAY_SIZE(state->vb)) - 1; + cmd_buffer->vb_dirty |= (1 << NUM_VB_USED) - 1; cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 2e8ae9fb6bd..48900dc8022 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -507,6 +507,11 @@ struct anv_buffer { #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) struct anv_bindings { + struct { + struct anv_buffer *buffer; + VkDeviceSize offset; + } vb[MAX_VBS]; + struct { uint32_t surfaces[256]; struct { @@ -532,10 +537,6 @@ struct anv_cmd_buffer { struct anv_state_stream dynamic_state_stream; /* State required while building cmd buffer */ - struct { - struct anv_buffer *buffer; - VkDeviceSize offset; - } vb[MAX_VBS]; uint32_t vb_dirty; uint32_t dirty; struct anv_pipeline * pipeline; -- cgit v1.2.3 From 3b9f32e893f8bd817d1b7c26a4adfabb3b8f6145 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 17:03:18 -0700 Subject: vk: Make cmd_buffer->bindings a pointer This lets us save and restore efficiently by just moving the pointer to a temporary bindings struct for meta. --- src/vulkan/device.c | 91 +++++++++++++++++++++++++++++++--------------------- src/vulkan/meta.c | 7 ++-- src/vulkan/private.h | 5 ++- 3 files changed, 64 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 303f2854760..18e3c0559d8 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1860,7 +1860,8 @@ VkResult VKAPI vkCreateCommandBuffer( cmd_buffer->device = device; cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; - memset(&cmd_buffer->bindings, 0, sizeof(cmd_buffer->bindings)); + memset(&cmd_buffer->default_bindings, 0, sizeof(cmd_buffer->default_bindings)); + cmd_buffer->bindings = &cmd_buffer->default_bindings; result = anv_batch_init(&cmd_buffer->batch, device); if (result != VK_SUCCESS) @@ -2165,6 +2166,7 @@ void VKAPI vkCmdBindDescriptorSets( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_bindings *bindings = cmd_buffer->bindings; uint32_t offset = 0; for (uint32_t i = 0; i < setCount; i++) { @@ -2181,7 +2183,6 @@ void VKAPI vkCmdBindDescriptorSets( start = bias + layout->set[firstSet + i].surface_start[s]; for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; - struct anv_bindings *bindings = &cmd_buffer->bindings; bindings->descriptors[s].surfaces[start + b] = view->surface_state.offset; @@ -2193,7 +2194,7 @@ void VKAPI vkCmdBindDescriptorSets( for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) { struct anv_sampler *sampler = set->descriptors[sampler_to_desc[b]].sampler; - memcpy(&cmd_buffer->bindings.descriptors[s].samplers[start + b], + memcpy(&bindings->descriptors[s].samplers[start + b], sampler->state, sizeof(sampler->state)); } } @@ -2234,13 +2235,14 @@ void VKAPI vkCmdBindVertexBuffers( const VkDeviceSize* pOffsets) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_bindings *bindings = cmd_buffer->bindings; /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ for (uint32_t i = 0; i < bindingCount; i++) { - cmd_buffer->bindings.vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; - cmd_buffer->bindings.vb[startBinding + i].offset = pOffsets[i]; + bindings->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; + bindings->vb[startBinding + i].offset = pOffsets[i]; cmd_buffer->vb_dirty |= 1 << (startBinding + i); } } @@ -2249,38 +2251,48 @@ static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_bindings *bindings = cmd_buffer->bindings; + uint32_t layers = cmd_buffer->framebuffer->layers; + uint32_t surface_count; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; - uint32_t binding_table_length; + uint32_t bias; + + if (s == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + layers = cmd_buffer->framebuffer->layers; + } else { + bias = 0; + layers = 0; + } /* This is a little awkward: layout can be NULL but we still have to * allocate and set a binding table for the PS stage for render * targets. */ if (layout) - binding_table_length = layout->stage[s].surface_count + bias; + surface_count = layout->stage[s].surface_count; else - binding_table_length = bias; + surface_count = 0; - if (binding_table_length > 0) { + if (layers + surface_count > 0) { struct anv_state state; uint32_t size; - size = binding_table_length * sizeof(uint32_t); - state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 32); - memcpy(state.map, cmd_buffer->bindings.descriptors[s].surfaces, size); + size = (layers + surface_count) * sizeof(uint32_t); + state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 32); + memcpy(state.map, bindings->descriptors[s].surfaces, size); - for (uint32_t i = 0; i < binding_table_length; i++) { - uint32_t offset = cmd_buffer->bindings.descriptors[s].surfaces[i]; - if (offset == 0) - continue; + for (uint32_t i = 0; i < layers; i++) + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t), + bindings->descriptors[s].relocs[i].bo, + bindings->descriptors[s].relocs[i].offset); + for (uint32_t i = 0; i < surface_count; i++) anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - offset + 8 * sizeof(int32_t), - cmd_buffer->bindings.descriptors[s].relocs[i].bo, - cmd_buffer->bindings.descriptors[s].relocs[i].offset); - } + bindings->descriptors[s].surfaces[bias + i] + 8 * sizeof(int32_t), + bindings->descriptors[s].relocs[bias + i].bo, + bindings->descriptors[s].relocs[bias + i].offset); static const uint32_t binding_table_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 38, @@ -2303,7 +2315,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) size = layout->stage[s].sampler_count * 16; state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); - memcpy(state.map, cmd_buffer->bindings.descriptors[s].samplers, size); + memcpy(state.map, bindings->descriptors[s].samplers, size); static const uint32_t sampler_state_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 43, @@ -2326,6 +2338,7 @@ static void anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->pipeline; + struct anv_bindings *bindings = cmd_buffer->bindings; const uint32_t num_buffers = __builtin_popcount(cmd_buffer->vb_dirty); const uint32_t num_dwords = 1 + num_buffers * 4; uint32_t *p; @@ -2335,8 +2348,8 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) GEN8_3DSTATE_VERTEX_BUFFERS); uint32_t vb, i = 0; for_each_bit(vb, cmd_buffer->vb_dirty) { - struct anv_buffer *buffer = cmd_buffer->bindings.vb[vb].buffer; - uint32_t offset = cmd_buffer->bindings.vb[vb].offset; + struct anv_buffer *buffer = bindings->vb[vb].buffer; + uint32_t offset = bindings->vb[vb].offset; struct GEN8_VERTEX_BUFFER_STATE state = { .VertexBufferIndex = vb, @@ -2768,6 +2781,22 @@ VkResult VKAPI vkCreateRenderPass( return VK_SUCCESS; } +void +anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer; + struct anv_bindings *bindings = cmd_buffer->bindings; + + for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { + struct anv_surface_view *view = framebuffer->color_attachments[i]; + + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = view->surface_state.offset; + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo; + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset; + } + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; +} + void VKAPI vkCmdBeginRenderPass( VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin) @@ -2789,17 +2818,7 @@ void VKAPI vkCmdBeginRenderPass( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { - struct anv_surface_view *view = framebuffer->color_attachments[i]; - - cmd_buffer->bindings.descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = - view->surface_state.offset; - cmd_buffer->bindings.descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = - view->bo; - cmd_buffer->bindings.descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = - view->offset; - } - cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + anv_cmd_buffer_fill_render_targets(cmd_buffer); anv_cmd_buffer_clear(cmd_buffer, pass); } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 6d44e3336ec..75d148b73c0 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -156,15 +156,18 @@ static void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) { - memcpy(&state->bindings, &cmd_buffer->bindings, sizeof(state->bindings)); + cmd_buffer->bindings = &state->bindings; state->pipeline = cmd_buffer->pipeline; + + /* Initialize render targets for the meta bindings. */ + anv_cmd_buffer_fill_render_targets(cmd_buffer); } static void anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { - memcpy(&cmd_buffer->bindings, &state->bindings, sizeof(state->bindings)); + cmd_buffer->bindings = &cmd_buffer->default_bindings; cmd_buffer->pipeline = state->pipeline; cmd_buffer->vb_dirty |= (1 << NUM_VB_USED) - 1; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 48900dc8022..554bd005a34 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -543,7 +543,8 @@ struct anv_cmd_buffer { struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; struct anv_dynamic_vp_state * vp_state; - struct anv_bindings bindings; + struct anv_bindings * bindings; + struct anv_bindings default_bindings; }; void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); @@ -685,6 +686,8 @@ void anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass); +void +anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer); #ifdef __cplusplus } -- cgit v1.2.3 From eb1952592ebd72c0be70b0ddf781021bbadb3692 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 May 2015 22:17:05 -0700 Subject: vk/glsl_helpers: Fix GLSL_VK_SHADER with respect to commas Previously, the GLSL_VK_SHADER macro didn't work if the shader contained commas outside of parentheses due to the way the C preprocessor works. This commit fixes this by making it variadic again and doing it correctly this time. --- src/vulkan/glsl_helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_helpers.h b/src/vulkan/glsl_helpers.h index 76b0cbebcf4..3c72523586f 100644 --- a/src/vulkan/glsl_helpers.h +++ b/src/vulkan/glsl_helpers.h @@ -23,9 +23,9 @@ #pragma once -#define GLSL_VK_SHADER(device, stage, source) ({ \ +#define GLSL_VK_SHADER(device, stage, ...) ({ \ VkShader __shader; \ - const char __src[] = "#version 330\n" #source; \ + const char __src[] = "#version 330\n" #__VA_ARGS__; \ VkShaderCreateInfo __shader_create_info = { \ .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \ .codeSize = sizeof(__src), \ -- cgit v1.2.3 From 4223de769ea9ced73771f4b0233fe320f4c2212d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 10:23:09 -0700 Subject: vk/device: Simplify surface_count calculation --- src/vulkan/device.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 18e3c0559d8..8e60398373b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2253,7 +2253,6 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; struct anv_bindings *bindings = cmd_buffer->bindings; uint32_t layers = cmd_buffer->framebuffer->layers; - uint32_t surface_count; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { uint32_t bias; @@ -2269,10 +2268,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) /* This is a little awkward: layout can be NULL but we still have to * allocate and set a binding table for the PS stage for render * targets. */ - if (layout) - surface_count = layout->stage[s].surface_count; - else - surface_count = 0; + uint32_t surface_count = layout ? layout->stage[s].surface_count : 0; if (layers + surface_count > 0) { struct anv_state state; -- cgit v1.2.3 From 120394ac923f186b18679448ea5d9f088728aa2d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 10:28:04 -0700 Subject: vk/meta: Save and restore the old bindings pointer If we don't do this then recursive meta is completely broken. What happens is that the outer meta call may change the bindings pointer and the inner meta call will change it again and, when it exits set it back to the default. However, the outer meta call may be relying on it being left alone so it uses the non-meta descriptor sets instead of its own. --- src/vulkan/meta.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 75d148b73c0..1255761c4d6 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -149,15 +149,17 @@ anv_device_init_meta_clear_state(struct anv_device *device) #define NUM_VB_USED 2 struct anv_saved_state { struct anv_bindings bindings; - struct anv_pipeline *pipeline; + struct anv_bindings *old_bindings; + struct anv_pipeline *old_pipeline; }; static void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) { + state->old_bindings = cmd_buffer->bindings; cmd_buffer->bindings = &state->bindings; - state->pipeline = cmd_buffer->pipeline; + state->old_pipeline = cmd_buffer->pipeline; /* Initialize render targets for the meta bindings. */ anv_cmd_buffer_fill_render_targets(cmd_buffer); @@ -167,8 +169,8 @@ static void anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { - cmd_buffer->bindings = &cmd_buffer->default_bindings; - cmd_buffer->pipeline = state->pipeline; + cmd_buffer->bindings = state->old_bindings; + cmd_buffer->pipeline = state->old_pipeline; cmd_buffer->vb_dirty |= (1 << NUM_VB_USED) - 1; cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY | -- cgit v1.2.3 From 22e61c9da4cf81d2a3ce753cb6cfd2d74aac8e47 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 10:23:43 -0700 Subject: vk/meta: Make clear a no-op if no layers need clearing Among other things, this prevents recursive meta. --- src/vulkan/meta.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 1255761c4d6..aa7b16aee34 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -199,6 +199,9 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, float color[4]; } *instance_data; + if (pass->num_clear_layers == 0) + return; + const float vertex_data[] = { /* Rect-list coordinates */ 0.0, 0.0, -- cgit v1.2.3 From 057bef8a84aaa783bec279b1c08b7a716dcb410c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 10:42:51 -0700 Subject: vk/device: Use bias rather than layers for computing binding table size Because we statically use the first 8 binding table entries for render targets, we need to create a table of size 8 + surfaces. --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8e60398373b..e2871fa43ba 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2274,7 +2274,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) struct anv_state state; uint32_t size; - size = (layers + surface_count) * sizeof(uint32_t); + size = (bias + surface_count) * sizeof(uint32_t); state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 32); memcpy(state.map, bindings->descriptors[s].surfaces, size); -- cgit v1.2.3 From 573ca4a4a79e19dc4e5160664843677010fe2c86 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Apr 2015 21:13:44 -0700 Subject: nir: Import the revision 30 SPIR-V header from Khronos --- src/glsl/nir/spirv.h | 1304 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1304 insertions(+) create mode 100644 src/glsl/nir/spirv.h (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h new file mode 100644 index 00000000000..93135c09596 --- /dev/null +++ b/src/glsl/nir/spirv.h @@ -0,0 +1,1304 @@ +/* +** Copyright (c) 2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Specification revision 30. +** Enumeration tokens for SPIR-V, in three styles: C, C++, generic. +** - C++ will have the tokens in the "spv" name space, with no prefix. +** - C will have tokens with as "Spv" prefix. +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +#ifdef __cplusplus + +namespace spv { + +const int MagicNumber = 0x07230203; +const int Version = 99; + +typedef unsigned int Id; + +const unsigned int OpCodeMask = 0xFFFF; +const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL = 3, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL12 = 2, + MemoryModelOpenCL20 = 3, + MemoryModelOpenCL21 = 4, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeEarlyFragmentTests = 8, + ExecutionModePointMode = 9, + ExecutionModeXfb = 10, + ExecutionModeDepthReplacing = 11, + ExecutionModeDepthAny = 12, + ExecutionModeDepthGreater = 13, + ExecutionModeDepthLess = 14, + ExecutionModeDepthUnchanged = 15, + ExecutionModeLocalSize = 16, + ExecutionModeLocalSizeHint = 17, + ExecutionModeInputPoints = 18, + ExecutionModeInputLines = 19, + ExecutionModeInputLinesAdjacency = 20, + ExecutionModeInputTriangles = 21, + ExecutionModeInputTrianglesAdjacency = 22, + ExecutionModeInputQuads = 23, + ExecutionModeInputIsolines = 24, + ExecutionModeOutputVertices = 25, + ExecutionModeOutputPoints = 26, + ExecutionModeOutputLineStrip = 27, + ExecutionModeOutputTriangleStrip = 28, + ExecutionModeVecTypeHint = 29, + ExecutionModeContractionOff = 30, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroupLocal = 4, + StorageClassWorkgroupGlobal = 5, + StorageClassPrivateGlobal = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPrivate = 9, + StorageClassAtomicCounter = 10, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeSVM = 6, + FunctionParameterAttributeNoWrite = 7, + FunctionParameterAttributeNoReadWrite = 8, +}; + +enum Decoration { + DecorationPrecisionLow = 0, + DecorationPrecisionMedium = 1, + DecorationPrecisionHigh = 2, + DecorationBlock = 3, + DecorationBufferBlock = 4, + DecorationRowMajor = 5, + DecorationColMajor = 6, + DecorationGLSLShared = 7, + DecorationGLSLStd140 = 8, + DecorationGLSLStd430 = 9, + DecorationGLSLPacked = 10, + DecorationSmooth = 11, + DecorationNoperspective = 12, + DecorationFlat = 13, + DecorationPatch = 14, + DecorationCentroid = 15, + DecorationSample = 16, + DecorationInvariant = 17, + DecorationRestrict = 18, + DecorationAliased = 19, + DecorationVolatile = 20, + DecorationConstant = 21, + DecorationCoherent = 22, + DecorationNonwritable = 23, + DecorationNonreadable = 24, + DecorationUniform = 25, + DecorationNoStaticUse = 26, + DecorationCPacked = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationAlignment = 36, + DecorationXfbBuffer = 37, + DecorationStride = 38, + DecorationBuiltIn = 39, + DecorationFuncParamAttr = 40, + DecorationFPRoundingMode = 41, + DecorationFPFastMathMode = 42, + DecorationLinkageAttributes = 43, + DecorationSpecId = 44, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipVertex = 2, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragColor = 21, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInWorkgroupLinearId = 35, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsRelaxedShift = 0, + MemorySemanticsSequentiallyConsistentShift = 1, + MemorySemanticsAcquireShift = 2, + MemorySemanticsReleaseShift = 3, + MemorySemanticsUniformMemoryShift = 4, + MemorySemanticsSubgroupMemoryShift = 5, + MemorySemanticsWorkgroupLocalMemoryShift = 6, + MemorySemanticsWorkgroupGlobalMemoryShift = 7, + MemorySemanticsAtomicCounterMemoryShift = 8, + MemorySemanticsImageMemoryShift = 9, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsRelaxedMask = 0x00000001, + MemorySemanticsSequentiallyConsistentMask = 0x00000002, + MemorySemanticsAcquireMask = 0x00000004, + MemorySemanticsReleaseMask = 0x00000008, + MemorySemanticsUniformMemoryMask = 0x00000010, + MemorySemanticsSubgroupMemoryMask = 0x00000020, + MemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, + MemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, + MemorySemanticsAtomicCounterMemoryMask = 0x00000100, + MemorySemanticsImageMemoryMask = 0x00000200, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, +}; + +enum ExecutionScope { + ExecutionScopeCrossDevice = 0, + ExecutionScopeDevice = 1, + ExecutionScopeWorkgroup = 2, + ExecutionScopeSubgroup = 3, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Op { + OpNop = 0, + OpSource = 1, + OpSourceExtension = 2, + OpExtension = 3, + OpExtInstImport = 4, + OpMemoryModel = 5, + OpEntryPoint = 6, + OpExecutionMode = 7, + OpTypeVoid = 8, + OpTypeBool = 9, + OpTypeInt = 10, + OpTypeFloat = 11, + OpTypeVector = 12, + OpTypeMatrix = 13, + OpTypeSampler = 14, + OpTypeFilter = 15, + OpTypeArray = 16, + OpTypeRuntimeArray = 17, + OpTypeStruct = 18, + OpTypeOpaque = 19, + OpTypePointer = 20, + OpTypeFunction = 21, + OpTypeEvent = 22, + OpTypeDeviceEvent = 23, + OpTypeReserveId = 24, + OpTypeQueue = 25, + OpTypePipe = 26, + OpConstantTrue = 27, + OpConstantFalse = 28, + OpConstant = 29, + OpConstantComposite = 30, + OpConstantSampler = 31, + OpConstantNullPointer = 32, + OpConstantNullObject = 33, + OpSpecConstantTrue = 34, + OpSpecConstantFalse = 35, + OpSpecConstant = 36, + OpSpecConstantComposite = 37, + OpVariable = 38, + OpVariableArray = 39, + OpFunction = 40, + OpFunctionParameter = 41, + OpFunctionEnd = 42, + OpFunctionCall = 43, + OpExtInst = 44, + OpUndef = 45, + OpLoad = 46, + OpStore = 47, + OpPhi = 48, + OpDecorationGroup = 49, + OpDecorate = 50, + OpMemberDecorate = 51, + OpGroupDecorate = 52, + OpGroupMemberDecorate = 53, + OpName = 54, + OpMemberName = 55, + OpString = 56, + OpLine = 57, + OpVectorExtractDynamic = 58, + OpVectorInsertDynamic = 59, + OpVectorShuffle = 60, + OpCompositeConstruct = 61, + OpCompositeExtract = 62, + OpCompositeInsert = 63, + OpCopyObject = 64, + OpCopyMemory = 65, + OpCopyMemorySized = 66, + OpSampler = 67, + OpTextureSample = 68, + OpTextureSampleDref = 69, + OpTextureSampleLod = 70, + OpTextureSampleProj = 71, + OpTextureSampleGrad = 72, + OpTextureSampleOffset = 73, + OpTextureSampleProjLod = 74, + OpTextureSampleProjGrad = 75, + OpTextureSampleLodOffset = 76, + OpTextureSampleProjOffset = 77, + OpTextureSampleGradOffset = 78, + OpTextureSampleProjLodOffset = 79, + OpTextureSampleProjGradOffset = 80, + OpTextureFetchTexelLod = 81, + OpTextureFetchTexelOffset = 82, + OpTextureFetchSample = 83, + OpTextureFetchTexel = 84, + OpTextureGather = 85, + OpTextureGatherOffset = 86, + OpTextureGatherOffsets = 87, + OpTextureQuerySizeLod = 88, + OpTextureQuerySize = 89, + OpTextureQueryLod = 90, + OpTextureQueryLevels = 91, + OpTextureQuerySamples = 92, + OpAccessChain = 93, + OpInBoundsAccessChain = 94, + OpSNegate = 95, + OpFNegate = 96, + OpNot = 97, + OpAny = 98, + OpAll = 99, + OpConvertFToU = 100, + OpConvertFToS = 101, + OpConvertSToF = 102, + OpConvertUToF = 103, + OpUConvert = 104, + OpSConvert = 105, + OpFConvert = 106, + OpConvertPtrToU = 107, + OpConvertUToPtr = 108, + OpPtrCastToGeneric = 109, + OpGenericCastToPtr = 110, + OpBitcast = 111, + OpTranspose = 112, + OpIsNan = 113, + OpIsInf = 114, + OpIsFinite = 115, + OpIsNormal = 116, + OpSignBitSet = 117, + OpLessOrGreater = 118, + OpOrdered = 119, + OpUnordered = 120, + OpArrayLength = 121, + OpIAdd = 122, + OpFAdd = 123, + OpISub = 124, + OpFSub = 125, + OpIMul = 126, + OpFMul = 127, + OpUDiv = 128, + OpSDiv = 129, + OpFDiv = 130, + OpUMod = 131, + OpSRem = 132, + OpSMod = 133, + OpFRem = 134, + OpFMod = 135, + OpVectorTimesScalar = 136, + OpMatrixTimesScalar = 137, + OpVectorTimesMatrix = 138, + OpMatrixTimesVector = 139, + OpMatrixTimesMatrix = 140, + OpOuterProduct = 141, + OpDot = 142, + OpShiftRightLogical = 143, + OpShiftRightArithmetic = 144, + OpShiftLeftLogical = 145, + OpLogicalOr = 146, + OpLogicalXor = 147, + OpLogicalAnd = 148, + OpBitwiseOr = 149, + OpBitwiseXor = 150, + OpBitwiseAnd = 151, + OpSelect = 152, + OpIEqual = 153, + OpFOrdEqual = 154, + OpFUnordEqual = 155, + OpINotEqual = 156, + OpFOrdNotEqual = 157, + OpFUnordNotEqual = 158, + OpULessThan = 159, + OpSLessThan = 160, + OpFOrdLessThan = 161, + OpFUnordLessThan = 162, + OpUGreaterThan = 163, + OpSGreaterThan = 164, + OpFOrdGreaterThan = 165, + OpFUnordGreaterThan = 166, + OpULessThanEqual = 167, + OpSLessThanEqual = 168, + OpFOrdLessThanEqual = 169, + OpFUnordLessThanEqual = 170, + OpUGreaterThanEqual = 171, + OpSGreaterThanEqual = 172, + OpFOrdGreaterThanEqual = 173, + OpFUnordGreaterThanEqual = 174, + OpDPdx = 175, + OpDPdy = 176, + OpFwidth = 177, + OpDPdxFine = 178, + OpDPdyFine = 179, + OpFwidthFine = 180, + OpDPdxCoarse = 181, + OpDPdyCoarse = 182, + OpFwidthCoarse = 183, + OpEmitVertex = 184, + OpEndPrimitive = 185, + OpEmitStreamVertex = 186, + OpEndStreamPrimitive = 187, + OpControlBarrier = 188, + OpMemoryBarrier = 189, + OpImagePointer = 190, + OpAtomicInit = 191, + OpAtomicLoad = 192, + OpAtomicStore = 193, + OpAtomicExchange = 194, + OpAtomicCompareExchange = 195, + OpAtomicCompareExchangeWeak = 196, + OpAtomicIIncrement = 197, + OpAtomicIDecrement = 198, + OpAtomicIAdd = 199, + OpAtomicISub = 200, + OpAtomicUMin = 201, + OpAtomicUMax = 202, + OpAtomicAnd = 203, + OpAtomicOr = 204, + OpAtomicXor = 205, + OpLoopMerge = 206, + OpSelectionMerge = 207, + OpLabel = 208, + OpBranch = 209, + OpBranchConditional = 210, + OpSwitch = 211, + OpKill = 212, + OpReturn = 213, + OpReturnValue = 214, + OpUnreachable = 215, + OpLifetimeStart = 216, + OpLifetimeStop = 217, + OpCompileFlag = 218, + OpAsyncGroupCopy = 219, + OpWaitGroupEvents = 220, + OpGroupAll = 221, + OpGroupAny = 222, + OpGroupBroadcast = 223, + OpGroupIAdd = 224, + OpGroupFAdd = 225, + OpGroupFMin = 226, + OpGroupUMin = 227, + OpGroupSMin = 228, + OpGroupFMax = 229, + OpGroupUMax = 230, + OpGroupSMax = 231, + OpGenericCastToPtrExplicit = 232, + OpGenericPtrMemSemantics = 233, + OpReadPipe = 234, + OpWritePipe = 235, + OpReservedReadPipe = 236, + OpReservedWritePipe = 237, + OpReserveReadPipePackets = 238, + OpReserveWritePipePackets = 239, + OpCommitReadPipe = 240, + OpCommitWritePipe = 241, + OpIsValidReserveId = 242, + OpGetNumPipePackets = 243, + OpGetMaxPipePackets = 244, + OpGroupReserveReadPipePackets = 245, + OpGroupReserveWritePipePackets = 246, + OpGroupCommitReadPipe = 247, + OpGroupCommitWritePipe = 248, + OpEnqueueMarker = 249, + OpEnqueueKernel = 250, + OpGetKernelNDrangeSubGroupCount = 251, + OpGetKernelNDrangeMaxSubGroupSize = 252, + OpGetKernelWorkGroupSize = 253, + OpGetKernelPreferredWorkGroupSizeMultiple = 254, + OpRetainEvent = 255, + OpReleaseEvent = 256, + OpCreateUserEvent = 257, + OpIsValidEvent = 258, + OpSetUserEventStatus = 259, + OpCaptureEventProfilingInfo = 260, + OpGetDefaultQueue = 261, + OpBuildNDRange = 262, + OpSatConvertSToU = 263, + OpSatConvertUToS = 264, + OpAtomicIMin = 265, + OpAtomicIMax = 266, +}; + +}; // end namespace spv + +#endif // #ifdef __cplusplus + + +#ifndef __cplusplus + +const int SpvMagicNumber = 0x07230203; +const int SpvVersion = 99; + +typedef unsigned int SpvId; + +const unsigned int SpvOpCodeMask = 0xFFFF; +const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL = 3, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL12 = 2, + SpvMemoryModelOpenCL20 = 3, + SpvMemoryModelOpenCL21 = 4, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeEarlyFragmentTests = 8, + SpvExecutionModePointMode = 9, + SpvExecutionModeXfb = 10, + SpvExecutionModeDepthReplacing = 11, + SpvExecutionModeDepthAny = 12, + SpvExecutionModeDepthGreater = 13, + SpvExecutionModeDepthLess = 14, + SpvExecutionModeDepthUnchanged = 15, + SpvExecutionModeLocalSize = 16, + SpvExecutionModeLocalSizeHint = 17, + SpvExecutionModeInputPoints = 18, + SpvExecutionModeInputLines = 19, + SpvExecutionModeInputLinesAdjacency = 20, + SpvExecutionModeInputTriangles = 21, + SpvExecutionModeInputTrianglesAdjacency = 22, + SpvExecutionModeInputQuads = 23, + SpvExecutionModeInputIsolines = 24, + SpvExecutionModeOutputVertices = 25, + SpvExecutionModeOutputPoints = 26, + SpvExecutionModeOutputLineStrip = 27, + SpvExecutionModeOutputTriangleStrip = 28, + SpvExecutionModeVecTypeHint = 29, + SpvExecutionModeContractionOff = 30, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroupLocal = 4, + SpvStorageClassWorkgroupGlobal = 5, + SpvStorageClassPrivateGlobal = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPrivate = 9, + SpvStorageClassAtomicCounter = 10, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeSVM = 6, + SpvFunctionParameterAttributeNoWrite = 7, + SpvFunctionParameterAttributeNoReadWrite = 8, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationPrecisionLow = 0, + SpvDecorationPrecisionMedium = 1, + SpvDecorationPrecisionHigh = 2, + SpvDecorationBlock = 3, + SpvDecorationBufferBlock = 4, + SpvDecorationRowMajor = 5, + SpvDecorationColMajor = 6, + SpvDecorationGLSLShared = 7, + SpvDecorationGLSLStd140 = 8, + SpvDecorationGLSLStd430 = 9, + SpvDecorationGLSLPacked = 10, + SpvDecorationSmooth = 11, + SpvDecorationNoperspective = 12, + SpvDecorationFlat = 13, + SpvDecorationPatch = 14, + SpvDecorationCentroid = 15, + SpvDecorationSample = 16, + SpvDecorationInvariant = 17, + SpvDecorationRestrict = 18, + SpvDecorationAliased = 19, + SpvDecorationVolatile = 20, + SpvDecorationConstant = 21, + SpvDecorationCoherent = 22, + SpvDecorationNonwritable = 23, + SpvDecorationNonreadable = 24, + SpvDecorationUniform = 25, + SpvDecorationNoStaticUse = 26, + SpvDecorationCPacked = 27, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationAlignment = 36, + SpvDecorationXfbBuffer = 37, + SpvDecorationStride = 38, + SpvDecorationBuiltIn = 39, + SpvDecorationFuncParamAttr = 40, + SpvDecorationFPRoundingMode = 41, + SpvDecorationFPFastMathMode = 42, + SpvDecorationLinkageAttributes = 43, + SpvDecorationSpecId = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipVertex = 2, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragColor = 21, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInWorkgroupLinearId = 35, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsRelaxedShift = 0, + SpvMemorySemanticsSequentiallyConsistentShift = 1, + SpvMemorySemanticsAcquireShift = 2, + SpvMemorySemanticsReleaseShift = 3, + SpvMemorySemanticsUniformMemoryShift = 4, + SpvMemorySemanticsSubgroupMemoryShift = 5, + SpvMemorySemanticsWorkgroupLocalMemoryShift = 6, + SpvMemorySemanticsWorkgroupGlobalMemoryShift = 7, + SpvMemorySemanticsAtomicCounterMemoryShift = 8, + SpvMemorySemanticsImageMemoryShift = 9, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsRelaxedMask = 0x00000001, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000002, + SpvMemorySemanticsAcquireMask = 0x00000004, + SpvMemorySemanticsReleaseMask = 0x00000008, + SpvMemorySemanticsUniformMemoryMask = 0x00000010, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000020, + SpvMemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, + SpvMemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000100, + SpvMemorySemanticsImageMemoryMask = 0x00000200, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, +} SpvMemoryAccessMask; + +typedef enum SpvExecutionScope_ { + SpvExecutionScopeCrossDevice = 0, + SpvExecutionScopeDevice = 1, + SpvExecutionScopeWorkgroup = 2, + SpvExecutionScopeSubgroup = 3, +} SpvExecutionScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpSource = 1, + SpvOpSourceExtension = 2, + SpvOpExtension = 3, + SpvOpExtInstImport = 4, + SpvOpMemoryModel = 5, + SpvOpEntryPoint = 6, + SpvOpExecutionMode = 7, + SpvOpTypeVoid = 8, + SpvOpTypeBool = 9, + SpvOpTypeInt = 10, + SpvOpTypeFloat = 11, + SpvOpTypeVector = 12, + SpvOpTypeMatrix = 13, + SpvOpTypeSampler = 14, + SpvOpTypeFilter = 15, + SpvOpTypeArray = 16, + SpvOpTypeRuntimeArray = 17, + SpvOpTypeStruct = 18, + SpvOpTypeOpaque = 19, + SpvOpTypePointer = 20, + SpvOpTypeFunction = 21, + SpvOpTypeEvent = 22, + SpvOpTypeDeviceEvent = 23, + SpvOpTypeReserveId = 24, + SpvOpTypeQueue = 25, + SpvOpTypePipe = 26, + SpvOpConstantTrue = 27, + SpvOpConstantFalse = 28, + SpvOpConstant = 29, + SpvOpConstantComposite = 30, + SpvOpConstantSampler = 31, + SpvOpConstantNullPointer = 32, + SpvOpConstantNullObject = 33, + SpvOpSpecConstantTrue = 34, + SpvOpSpecConstantFalse = 35, + SpvOpSpecConstant = 36, + SpvOpSpecConstantComposite = 37, + SpvOpVariable = 38, + SpvOpVariableArray = 39, + SpvOpFunction = 40, + SpvOpFunctionParameter = 41, + SpvOpFunctionEnd = 42, + SpvOpFunctionCall = 43, + SpvOpExtInst = 44, + SpvOpUndef = 45, + SpvOpLoad = 46, + SpvOpStore = 47, + SpvOpPhi = 48, + SpvOpDecorationGroup = 49, + SpvOpDecorate = 50, + SpvOpMemberDecorate = 51, + SpvOpGroupDecorate = 52, + SpvOpGroupMemberDecorate = 53, + SpvOpName = 54, + SpvOpMemberName = 55, + SpvOpString = 56, + SpvOpLine = 57, + SpvOpVectorExtractDynamic = 58, + SpvOpVectorInsertDynamic = 59, + SpvOpVectorShuffle = 60, + SpvOpCompositeConstruct = 61, + SpvOpCompositeExtract = 62, + SpvOpCompositeInsert = 63, + SpvOpCopyObject = 64, + SpvOpCopyMemory = 65, + SpvOpCopyMemorySized = 66, + SpvOpSampler = 67, + SpvOpTextureSample = 68, + SpvOpTextureSampleDref = 69, + SpvOpTextureSampleLod = 70, + SpvOpTextureSampleProj = 71, + SpvOpTextureSampleGrad = 72, + SpvOpTextureSampleOffset = 73, + SpvOpTextureSampleProjLod = 74, + SpvOpTextureSampleProjGrad = 75, + SpvOpTextureSampleLodOffset = 76, + SpvOpTextureSampleProjOffset = 77, + SpvOpTextureSampleGradOffset = 78, + SpvOpTextureSampleProjLodOffset = 79, + SpvOpTextureSampleProjGradOffset = 80, + SpvOpTextureFetchTexelLod = 81, + SpvOpTextureFetchTexelOffset = 82, + SpvOpTextureFetchSample = 83, + SpvOpTextureFetchTexel = 84, + SpvOpTextureGather = 85, + SpvOpTextureGatherOffset = 86, + SpvOpTextureGatherOffsets = 87, + SpvOpTextureQuerySizeLod = 88, + SpvOpTextureQuerySize = 89, + SpvOpTextureQueryLod = 90, + SpvOpTextureQueryLevels = 91, + SpvOpTextureQuerySamples = 92, + SpvOpAccessChain = 93, + SpvOpInBoundsAccessChain = 94, + SpvOpSNegate = 95, + SpvOpFNegate = 96, + SpvOpNot = 97, + SpvOpAny = 98, + SpvOpAll = 99, + SpvOpConvertFToU = 100, + SpvOpConvertFToS = 101, + SpvOpConvertSToF = 102, + SpvOpConvertUToF = 103, + SpvOpUConvert = 104, + SpvOpSConvert = 105, + SpvOpFConvert = 106, + SpvOpConvertPtrToU = 107, + SpvOpConvertUToPtr = 108, + SpvOpPtrCastToGeneric = 109, + SpvOpGenericCastToPtr = 110, + SpvOpBitcast = 111, + SpvOpTranspose = 112, + SpvOpIsNan = 113, + SpvOpIsInf = 114, + SpvOpIsFinite = 115, + SpvOpIsNormal = 116, + SpvOpSignBitSet = 117, + SpvOpLessOrGreater = 118, + SpvOpOrdered = 119, + SpvOpUnordered = 120, + SpvOpArrayLength = 121, + SpvOpIAdd = 122, + SpvOpFAdd = 123, + SpvOpISub = 124, + SpvOpFSub = 125, + SpvOpIMul = 126, + SpvOpFMul = 127, + SpvOpUDiv = 128, + SpvOpSDiv = 129, + SpvOpFDiv = 130, + SpvOpUMod = 131, + SpvOpSRem = 132, + SpvOpSMod = 133, + SpvOpFRem = 134, + SpvOpFMod = 135, + SpvOpVectorTimesScalar = 136, + SpvOpMatrixTimesScalar = 137, + SpvOpVectorTimesMatrix = 138, + SpvOpMatrixTimesVector = 139, + SpvOpMatrixTimesMatrix = 140, + SpvOpOuterProduct = 141, + SpvOpDot = 142, + SpvOpShiftRightLogical = 143, + SpvOpShiftRightArithmetic = 144, + SpvOpShiftLeftLogical = 145, + SpvOpLogicalOr = 146, + SpvOpLogicalXor = 147, + SpvOpLogicalAnd = 148, + SpvOpBitwiseOr = 149, + SpvOpBitwiseXor = 150, + SpvOpBitwiseAnd = 151, + SpvOpSelect = 152, + SpvOpIEqual = 153, + SpvOpFOrdEqual = 154, + SpvOpFUnordEqual = 155, + SpvOpINotEqual = 156, + SpvOpFOrdNotEqual = 157, + SpvOpFUnordNotEqual = 158, + SpvOpULessThan = 159, + SpvOpSLessThan = 160, + SpvOpFOrdLessThan = 161, + SpvOpFUnordLessThan = 162, + SpvOpUGreaterThan = 163, + SpvOpSGreaterThan = 164, + SpvOpFOrdGreaterThan = 165, + SpvOpFUnordGreaterThan = 166, + SpvOpULessThanEqual = 167, + SpvOpSLessThanEqual = 168, + SpvOpFOrdLessThanEqual = 169, + SpvOpFUnordLessThanEqual = 170, + SpvOpUGreaterThanEqual = 171, + SpvOpSGreaterThanEqual = 172, + SpvOpFOrdGreaterThanEqual = 173, + SpvOpFUnordGreaterThanEqual = 174, + SpvOpDPdx = 175, + SpvOpDPdy = 176, + SpvOpFwidth = 177, + SpvOpDPdxFine = 178, + SpvOpDPdyFine = 179, + SpvOpFwidthFine = 180, + SpvOpDPdxCoarse = 181, + SpvOpDPdyCoarse = 182, + SpvOpFwidthCoarse = 183, + SpvOpEmitVertex = 184, + SpvOpEndPrimitive = 185, + SpvOpEmitStreamVertex = 186, + SpvOpEndStreamPrimitive = 187, + SpvOpControlBarrier = 188, + SpvOpMemoryBarrier = 189, + SpvOpImagePointer = 190, + SpvOpAtomicInit = 191, + SpvOpAtomicLoad = 192, + SpvOpAtomicStore = 193, + SpvOpAtomicExchange = 194, + SpvOpAtomicCompareExchange = 195, + SpvOpAtomicCompareExchangeWeak = 196, + SpvOpAtomicIIncrement = 197, + SpvOpAtomicIDecrement = 198, + SpvOpAtomicIAdd = 199, + SpvOpAtomicISub = 200, + SpvOpAtomicUMin = 201, + SpvOpAtomicUMax = 202, + SpvOpAtomicAnd = 203, + SpvOpAtomicOr = 204, + SpvOpAtomicXor = 205, + SpvOpLoopMerge = 206, + SpvOpSelectionMerge = 207, + SpvOpLabel = 208, + SpvOpBranch = 209, + SpvOpBranchConditional = 210, + SpvOpSwitch = 211, + SpvOpKill = 212, + SpvOpReturn = 213, + SpvOpReturnValue = 214, + SpvOpUnreachable = 215, + SpvOpLifetimeStart = 216, + SpvOpLifetimeStop = 217, + SpvOpCompileFlag = 218, + SpvOpAsyncGroupCopy = 219, + SpvOpWaitGroupEvents = 220, + SpvOpGroupAll = 221, + SpvOpGroupAny = 222, + SpvOpGroupBroadcast = 223, + SpvOpGroupIAdd = 224, + SpvOpGroupFAdd = 225, + SpvOpGroupFMin = 226, + SpvOpGroupUMin = 227, + SpvOpGroupSMin = 228, + SpvOpGroupFMax = 229, + SpvOpGroupUMax = 230, + SpvOpGroupSMax = 231, + SpvOpGenericCastToPtrExplicit = 232, + SpvOpGenericPtrMemSemantics = 233, + SpvOpReadPipe = 234, + SpvOpWritePipe = 235, + SpvOpReservedReadPipe = 236, + SpvOpReservedWritePipe = 237, + SpvOpReserveReadPipePackets = 238, + SpvOpReserveWritePipePackets = 239, + SpvOpCommitReadPipe = 240, + SpvOpCommitWritePipe = 241, + SpvOpIsValidReserveId = 242, + SpvOpGetNumPipePackets = 243, + SpvOpGetMaxPipePackets = 244, + SpvOpGroupReserveReadPipePackets = 245, + SpvOpGroupReserveWritePipePackets = 246, + SpvOpGroupCommitReadPipe = 247, + SpvOpGroupCommitWritePipe = 248, + SpvOpEnqueueMarker = 249, + SpvOpEnqueueKernel = 250, + SpvOpGetKernelNDrangeSubGroupCount = 251, + SpvOpGetKernelNDrangeMaxSubGroupSize = 252, + SpvOpGetKernelWorkGroupSize = 253, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 254, + SpvOpRetainEvent = 255, + SpvOpReleaseEvent = 256, + SpvOpCreateUserEvent = 257, + SpvOpIsValidEvent = 258, + SpvOpSetUserEventStatus = 259, + SpvOpCaptureEventProfilingInfo = 260, + SpvOpGetDefaultQueue = 261, + SpvOpBuildNDRange = 262, + SpvOpSatConvertSToU = 263, + SpvOpSatConvertUToS = 264, + SpvOpAtomicIMin = 265, + SpvOpAtomicIMax = 266, +} SpvOp; + +#endif // #ifndef __cplusplus + +#endif // #ifndef spirv_H -- cgit v1.2.3 From 98452cd8ae22d2c7448b87c9090b3f1be09d9bc5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 28 Apr 2015 17:43:16 -0700 Subject: nir: Add the start of a SPIR-V to NIR translator At the moment, it can handle the very basics of strings and can ignore debug instructions. It also has basic support for decorations. --- src/glsl/Makefile.sources | 2 + src/glsl/nir/nir_spirv.h | 38 ++++ src/glsl/nir/spirv_to_nir.c | 453 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 493 insertions(+) create mode 100644 src/glsl/nir/nir_spirv.h create mode 100644 src/glsl/nir/spirv_to_nir.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index d784a810723..be6e4ecf839 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -59,6 +59,7 @@ NIR_FILES = \ nir/nir_remove_dead_variables.c \ nir/nir_search.c \ nir/nir_search.h \ + nir/nir_spirv.h \ nir/nir_split_var_copies.c \ nir/nir_sweep.c \ nir/nir_to_ssa.c \ @@ -68,6 +69,7 @@ NIR_FILES = \ nir/nir_worklist.c \ nir/nir_worklist.h \ nir/nir_types.cpp \ + nir/spirv_to_nir.c \ $(NIR_GENERATED_FILES) # libglsl diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h new file mode 100644 index 00000000000..789d30cd672 --- /dev/null +++ b/src/glsl/nir/nir_spirv.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir.h" + +nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, + const nir_shader_compiler_options *options); + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c new file mode 100644 index 00000000000..02b99db17ae --- /dev/null +++ b/src/glsl/nir/spirv_to_nir.c @@ -0,0 +1,453 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_spirv.h" +#include "spirv.h" + +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_ssa, + vtn_value_type_deref, +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + nir_ssa_def *ssa; + nir_deref_var *deref; + }; +}; + +struct vtn_decoration { + struct vtn_decoration *next; + const uint32_t *literals; + struct vtn_value *group; + SpvDecoration decoration; +}; + +struct vtn_builder { + nir_shader *shader; + nir_function_impl *impl; + + unsigned value_id_bound; + struct vtn_value *values; +}; + +static void +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type, void *ptr) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + b->values[value_id].ptr = ptr; +} + +static void +vtn_push_token(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + vtn_push_value(b, value_id, value_type, NULL); +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count) +{ + return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); +} + +typedef void (*decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + struct vtn_value *value, + decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, dec->group, cb, data); + } else { + cb(b, base_value, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +static void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, value, cb, data); +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_token(b, w[1], vtn_value_type_undef); + break; + + case SpvOpDecorate: { + struct vtn_value *val = &b->values[w[1]]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->decoration = w[2]; + dec->literals = &w[3]; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupDecorate: { + struct vtn_value *group = &b->values[w[1]]; + assert(group->value_type == vtn_value_type_decoration_group); + + for (unsigned i = 2; i < count; i++) { + struct vtn_value *val = &b->values[w[i]]; + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->group = group; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + case SpvOpGroupMemberDecorate: + assert(!"Bad instruction. Khronos Bug #13513"); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpMemberName: + case SpvOpLine: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string, + vtn_string_literal(b, &w[2], count - 2)); + break; + + case SpvOpUndef: + vtn_push_token(b, w[2], vtn_value_type_undef); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeSampler: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpConstantNullPointer: + case SpvOpConstantNullObject: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpVariableArray: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + case SpvOpImagePointer: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + case SpvOpTextureSample: + case SpvOpTextureSampleDref: + case SpvOpTextureSampleLod: + case SpvOpTextureSampleProj: + case SpvOpTextureSampleGrad: + case SpvOpTextureSampleOffset: + case SpvOpTextureSampleProjLod: + case SpvOpTextureSampleProjGrad: + case SpvOpTextureSampleLodOffset: + case SpvOpTextureSampleProjOffset: + case SpvOpTextureSampleGradOffset: + case SpvOpTextureSampleProjLodOffset: + case SpvOpTextureSampleProjGradOffset: + case SpvOpTextureFetchTexelLod: + case SpvOpTextureFetchTexelOffset: + case SpvOpTextureFetchSample: + case SpvOpTextureFetchTexel: + case SpvOpTextureGather: + case SpvOpTextureGatherOffset: + case SpvOpTextureGatherOffsets: + case SpvOpTextureQuerySizeLod: + case SpvOpTextureQuerySize: + case SpvOpTextureQueryLod: + case SpvOpTextureQueryLevels: + case SpvOpTextureQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpTranspose: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + case SpvOpOuterProduct: + case SpvOpDot: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalOr: + case SpvOpLogicalXor: + case SpvOpLogicalAnd: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + vtn_handle_alu(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +nir_shader * +spirv_to_nir(const uint32_t *words, size_t word_count, + const nir_shader_compiler_options *options) +{ + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] == 99); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + nir_shader *shader = nir_shader_create(NULL, options); + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->shader = shader; + b->value_id_bound = value_id_bound; + b->values = ralloc_array(b, struct vtn_value, value_id_bound); + + /* Start handling instructions */ + const uint32_t *word_end = words + word_count; + while (words < word_end) { + SpvOp opcode = words[0] & SpvOpCodeMask; + unsigned count = words[0] >> SpvWordCountShift; + assert(words + count <= word_end); + + vtn_handle_instruction(b, opcode, words, count); + + words += count; + } + + ralloc_free(b); + + return shader; +} -- cgit v1.2.3 From cae8db6b7e149e111fb9d3de69a45c0b3e036b76 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:30:22 -0700 Subject: glsl/compiler: Move the error_no_memory stub to standalone_scaffolding.cpp --- src/glsl/main.cpp | 6 ------ src/glsl/standalone_scaffolding.cpp | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index ccac8399646..fc54ddd7eb1 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -41,12 +41,6 @@ static int glsl_version = 330; -extern "C" void -_mesa_error_no_memory(const char *caller) -{ - fprintf(stderr, "Mesa error: out of memory in %s", caller); -} - static void initialize_context(struct gl_context *ctx, gl_api api) { diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index a109c4e92d2..6e1ecec3235 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -34,6 +34,12 @@ #include #include "util/ralloc.h" +extern "C" void +_mesa_error_no_memory(const char *caller) +{ + fprintf(stderr, "Mesa error: out of memory in %s", caller); +} + void _mesa_warning(struct gl_context *ctx, const char *fmt, ...) { -- cgit v1.2.3 From 4763a13b075105a6ba33bbbb6ae1fbb1c3956cb1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:29:38 -0700 Subject: REVERT: Add a simple helper program for testing SPIR-V -> NIR translation --- src/glsl/Makefile.am | 12 ++++++++++- src/glsl/nir/spirv2nir.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 src/glsl/nir/spirv2nir.c (limited to 'src') diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 23c6fe8bb6c..7af9a709d5a 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -77,7 +77,7 @@ check_PROGRAMS = \ tests/sampler-types-test \ tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler +noinst_PROGRAMS = glsl_compiler spirv2nir tests_blob_test_SOURCES = \ tests/blob_test.c @@ -162,6 +162,16 @@ glsl_compiler_LDADD = \ $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) +spirv2nir_SOURCES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + nir/spirv2nir.c + +spirv2nir_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + glsl_test_SOURCES = \ standalone_scaffolding.cpp \ tests/common.c \ diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c new file mode 100644 index 00000000000..0eed23fbc3f --- /dev/null +++ b/src/glsl/nir/spirv2nir.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * A simple executable that opens a SPIR-V shader, converts it to NIR, and + * dumps out the result. This should be useful for testing the + * spirv_to_nir code. + */ + +#include "nir_spirv.h" + +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + int fd = open(argv[1], O_RDONLY); + off_t len = lseek(fd, 0, SEEK_END); + + assert(len % 4 == 0); + size_t word_count = len / 4; + + const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + assert(map != NULL); + + nir_shader *shader = spirv_to_nir(map, word_count, NULL); + nir_print_shader(shader, stderr); +} -- cgit v1.2.3 From f9a31ba044d3abf07359e66a833eaf1292668c3d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:32:55 -0700 Subject: nir/spirv: Add stub support for extension instructions --- src/glsl/nir/spirv_to_nir.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 02b99db17ae..76ddce62c1c 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -64,6 +64,9 @@ struct vtn_builder { unsigned value_id_bound; struct vtn_value *values; + + SpvExecutionModel execution_model; + struct vtn_value *entry_point; }; static void @@ -91,6 +94,21 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); } +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: + /* Do nothing for the moment */ + break; + + case SpvOpExtInst: + default: + unreachable("Unhandled opcode"); + } +} + typedef void (*decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, const struct vtn_decoration *, @@ -216,6 +234,7 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSourceExtension: case SpvOpMemberName: case SpvOpLine: + case SpvOpExtension: /* Unhandled, but these are for debug so that's ok. */ break; @@ -232,6 +251,22 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_push_token(b, w[2], vtn_value_type_undef); break; + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: + assert(b->entry_point == NULL); + b->entry_point = &b->values[w[2]]; + b->execution_model = w[1]; + break; + + case SpvOpExtInstImport: + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: -- cgit v1.2.3 From 2b570a49a92458f4771252c7faf1bac5a3c9dca5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:34:06 -0700 Subject: nir/spirv: Rework the way values are added Instead of having functions to add values and set various things, we just have a function that does a few asserts and then returns the value. The caller is then responsible for setting the various fields. --- src/glsl/nir/spirv_to_nir.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 76ddce62c1c..e4bddbb6bdc 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -69,22 +69,16 @@ struct vtn_builder { struct vtn_value *entry_point; }; -static void +static struct vtn_value * vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type, void *ptr) + enum vtn_value_type value_type) { assert(value_id < b->value_id_bound); assert(b->values[value_id].value_type == vtn_value_type_invalid); b->values[value_id].value_type = value_type; - b->values[value_id].ptr = ptr; -} -static void -vtn_push_token(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - vtn_push_value(b, value_id, value_type, NULL); + return &b->values[value_id]; } static char * @@ -149,7 +143,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, { switch (opcode) { case SpvOpDecorationGroup: - vtn_push_token(b, w[1], vtn_value_type_undef); + vtn_push_value(b, w[1], vtn_value_type_undef); break; case SpvOpDecorate: { @@ -243,12 +237,12 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string, - vtn_string_literal(b, &w[2], count - 2)); + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); break; case SpvOpUndef: - vtn_push_token(b, w[2], vtn_value_type_undef); + vtn_push_value(b, w[2], vtn_value_type_undef); break; case SpvOpMemoryModel: -- cgit v1.2.3 From 7b63b3de93b747dc5bf64891d2559d0db52d0f4e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Feb 2015 16:27:32 -0800 Subject: glsl: Add GLSL_TYPE_FUNCTION to the base types enums --- src/glsl/ast_to_hir.cpp | 1 + src/glsl/glsl_types.cpp | 2 ++ src/glsl/glsl_types.h | 1 + src/glsl/ir_clone.cpp | 1 + src/glsl/link_uniform_initializers.cpp | 1 + src/glsl/nir/nir_lower_io.c | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 + src/mesa/program/ir_to_mesa.cpp | 2 ++ 11 files changed, 13 insertions(+) (limited to 'src') diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 14e63090557..bf68ec39229 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -970,6 +970,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ATOMIC_UINT: /* I assume a comparison of a struct containing a sampler just * ignores the sampler present in the type. diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 9c9b7efcbc7..3ee5c00b22d 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -955,6 +955,7 @@ glsl_type::component_slots() const case GLSL_TYPE_IMAGE: return 1; + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_VOID: @@ -1326,6 +1327,7 @@ glsl_type::count_attribute_slots() const case GLSL_TYPE_ARRAY: return this->length * this->fields.array->count_attribute_slots(); + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 5645dcd5011..c77e337bf63 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -56,6 +56,7 @@ enum glsl_base_type { GLSL_TYPE_IMAGE, GLSL_TYPE_ATOMIC_UINT, GLSL_TYPE_STRUCT, + GLSL_TYPE_FUNCTION, GLSL_TYPE_INTERFACE, GLSL_TYPE_ARRAY, GLSL_TYPE_VOID, diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index 914e0e4d540..636c143ddc2 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -357,6 +357,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const return c; } + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index 69073841ea4..60bfc9c15c9 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -88,6 +88,7 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: /* All other types should have already been filtered by other diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 03eed04e1e9..561bebd3a9c 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -67,6 +67,7 @@ type_size(const struct glsl_type *type) return 0; case GLSL_TYPE_IMAGE: return 0; + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_DOUBLE: diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b2701b89689..2fa5a664d30 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -671,6 +671,7 @@ fs_visitor::type_size(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 80ca1b750f8..c911a551038 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1348,6 +1348,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index c1fd859fef5..ebfb49acf8d 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -351,6 +351,7 @@ brw_type_for_base_type(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5a60fe43bf8..e51c140c0f2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -615,6 +615,7 @@ type_size(const struct glsl_type *type) case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 3dcb53702a5..fceed712bdb 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -541,6 +541,7 @@ type_size(const struct glsl_type *type) case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Invalid type in type_size"); break; } @@ -2448,6 +2449,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, case GLSL_TYPE_STRUCT: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Should not get here."); break; } -- cgit v1.2.3 From 053778c49362d49db93335d46cdafaa760038ac4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Feb 2015 16:29:33 -0800 Subject: glsl/types: Add support for function types --- src/glsl/glsl_types.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ src/glsl/glsl_types.h | 23 ++++++++++- 2 files changed, 122 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 3ee5c00b22d..0d83ee68e42 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -32,6 +32,7 @@ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; hash_table *glsl_type::array_types = NULL; hash_table *glsl_type::record_types = NULL; hash_table *glsl_type::interface_types = NULL; +hash_table *glsl_type::function_types = NULL; void *glsl_type::mem_ctx = NULL; void @@ -159,6 +160,39 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, mtx_unlock(&glsl_type::mutex); } +glsl_type::glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) : + gl_type(0), + base_type(GLSL_TYPE_FUNCTION), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_params) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + + this->fields.parameters = rzalloc_array(this->mem_ctx, + glsl_function_param, num_params + 1); + + /* We store the return type as the first parameter */ + this->fields.parameters[0].type = return_type; + this->fields.parameters[0].in = false; + this->fields.parameters[0].out = true; + + /* We store the i'th parameter in slot i+1 */ + for (i = 0; i < length; i++) { + this->fields.parameters[i + 1].type = params[i].type; + this->fields.parameters[i + 1].in = params[i].in; + this->fields.parameters[i + 1].out = params[i].out; + } + + mtx_unlock(&glsl_type::mutex); +} + bool glsl_type::contains_sampler() const @@ -827,6 +861,72 @@ glsl_type::get_interface_instance(const glsl_struct_field *fields, } +static int +function_key_compare(const void *a, const void *b) +{ + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + if (key1->length != key2->length) + return 1; + + return memcmp(key1->fields.parameters, key2->fields.parameters, + (key1->length + 1) * sizeof(*key1->fields.parameters)); +} + + +static unsigned +function_key_hash(const void *a) +{ + const glsl_type *const key = (glsl_type *) a; + char hash_key[128]; + unsigned size = 0; + + size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); + + for (unsigned i = 0; i < key->length; i++) { + if (size >= sizeof(hash_key)) + break; + + size += snprintf(& hash_key[size], sizeof(hash_key) - size, + "%p", (void *) key->fields.structure[i].type); + } + + return hash_table_string_hash(& hash_key); +} + +const glsl_type * +glsl_type::get_function_instance(const glsl_type *return_type, + const glsl_function_param *params, + unsigned num_params) +{ + const glsl_type key(return_type, params, num_params); + + mtx_lock(&glsl_type::mutex); + + if (function_types == NULL) { + function_types = hash_table_ctor(64, function_key_hash, + function_key_compare); + } + + const glsl_type *t = (glsl_type *) hash_table_find(function_types, &key); + if (t == NULL) { + mtx_unlock(&glsl_type::mutex); + t = new glsl_type(return_type, params, num_params); + mtx_lock(&glsl_type::mutex); + + hash_table_insert(function_types, (void *) t, t); + } + + assert(t->base_type == GLSL_TYPE_FUNCTION); + assert(t->length == num_params); + + mtx_unlock(&glsl_type::mutex); + + return t; +} + + const glsl_type * glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) { diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index c77e337bf63..4d726c6bcf9 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -179,7 +179,7 @@ struct glsl_type { */ union { const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ + struct glsl_function_param *parameters; /**< Parameters to function. */ struct glsl_struct_field *structure; /**< List of struct fields. */ } fields; @@ -276,6 +276,13 @@ struct glsl_type { enum glsl_interface_packing packing, const char *block_name); + /** + * Get the instance of a function type + */ + static const glsl_type *get_function_instance(const struct glsl_type *return_type, + const glsl_function_param *parameters, + unsigned num_params); + /** * Get the type resulting from a multiplication of \p type_a * \p type_b */ @@ -689,6 +696,10 @@ private: glsl_type(const glsl_struct_field *fields, unsigned num_fields, enum glsl_interface_packing packing, const char *name); + /** Constructor for interface types */ + glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params); + /** Constructor for array types */ glsl_type(const glsl_type *array, unsigned length); @@ -701,6 +712,9 @@ private: /** Hash table containing the known interface types. */ static struct hash_table *interface_types; + /** Hash table containing the known function types. */ + static struct hash_table *function_types; + static int record_key_compare(const void *a, const void *b); static unsigned record_key_hash(const void *key); @@ -771,6 +785,13 @@ struct glsl_struct_field { int stream; }; +struct glsl_function_param { + const struct glsl_type *type; + + bool in; + bool out; +}; + static inline unsigned int glsl_align(unsigned int a, unsigned int align) { -- cgit v1.2.3 From fe550f0738cf2052d5f0bc7d23de46a79f8ae04b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:48:12 -0700 Subject: glsl/types: Expose the function_param and struct_field structs to C Previously, they were hidden behind a #ifdef __cplusplus so C wouldn't find them. This commit simpliy moves the ifdef. --- src/glsl/glsl_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 4d726c6bcf9..2d4718572af 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -742,6 +742,10 @@ private: /*@}*/ }; +#undef DECL_TYPE +#undef STRUCT_TYPE +#endif /* __cplusplus */ + struct glsl_struct_field { const struct glsl_type *type; const char *name; @@ -798,8 +802,4 @@ glsl_align(unsigned int a, unsigned int align) return (a + align - 1) / align * align; } -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - #endif /* GLSL_TYPES_H */ -- cgit v1.2.3 From e9d3b1e6942fc7c3d8dcfb9797a78ee16f0b20bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:28:37 -0700 Subject: nir/types: Add more helpers for creating types --- src/glsl/nir/nir_types.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/glsl/nir/nir_types.h | 13 +++++++++++++ 2 files changed, 59 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 62176f508a1..937a842d98e 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -148,14 +148,60 @@ glsl_float_type(void) return glsl_type::float_type; } +const glsl_type * +glsl_int_type(void) +{ + return glsl_type::int_type; +} + +const glsl_type * +glsl_uint_type(void) +{ + return glsl_type::uint_type; +} + +const glsl_type * +glsl_bool_type(void) +{ + return glsl_type::bool_type; +} + const glsl_type * glsl_vec4_type(void) { return glsl_type::vec4_type; } +const glsl_type * +glsl_vector_type(enum glsl_base_type base_type, unsigned components) +{ + assert(components > 1 && components <= 4); + return glsl_type::get_instance(base_type, components, 1); +} + +const glsl_type * +glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) +{ + assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4); + return glsl_type::get_instance(base_type, rows, columns); +} + const glsl_type * glsl_array_type(const glsl_type *base, unsigned elements) { return glsl_type::get_array_instance(base, elements); } + +const glsl_type * +glsl_struct_type(const glsl_struct_field *fields, + unsigned num_fields, const char *name) +{ + return glsl_type::get_record_instance(fields, num_fields, name); +} + +const glsl_type * +glsl_function_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) +{ + return glsl_type::get_function_instance(return_type, params, num_params); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 276d4ad6234..aad43f7a8c0 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -70,9 +70,22 @@ bool glsl_type_is_matrix(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); +const struct glsl_type *glsl_int_type(void); +const struct glsl_type *glsl_uint_type(void); +const struct glsl_type *glsl_bool_type(void); + const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, + unsigned components); +const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, + unsigned rows, unsigned columns); const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); +const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, + unsigned num_fields, const char *name); +const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, + const struct glsl_function_param *params, + unsigned num_params); #ifdef __cplusplus } -- cgit v1.2.3 From 3f83579664b5ff9ec292fb94da6b3bb8b949868b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:36:01 -0700 Subject: nir/spirv: Add basic support for types --- src/glsl/nir/spirv_to_nir.c | 89 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e4bddbb6bdc..da85abebe15 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -26,6 +26,7 @@ */ #include "nir_spirv.h" +#include "nir_vla.h" #include "spirv.h" struct vtn_decoration; @@ -35,6 +36,7 @@ enum vtn_value_type { vtn_value_type_undef, vtn_value_type_string, vtn_value_type_decoration_group, + vtn_value_type_type, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -46,6 +48,7 @@ struct vtn_value { union { void *ptr; char *str; + const struct glsl_type *type; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -184,11 +187,88 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } } -static void +static const struct glsl_type * vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) + const uint32_t *args, unsigned count) { - unreachable("Unhandled opcode"); + switch (opcode) { + case SpvOpTypeVoid: + return glsl_void_type(); + case SpvOpTypeBool: + return glsl_bool_type(); + case SpvOpTypeInt: + return glsl_int_type(); + case SpvOpTypeFloat: + return glsl_float_type(); + + case SpvOpTypeVector: { + const struct glsl_type *base = b->values[args[0]].type; + unsigned elems = args[1]; + + assert(glsl_type_is_scalar(base)); + return glsl_vector_type(glsl_get_base_type(base), elems); + } + + case SpvOpTypeMatrix: { + const struct glsl_type *base = b->values[args[0]].type; + unsigned columns = args[1]; + + assert(glsl_type_is_vector(base)); + return glsl_matrix_type(glsl_get_base_type(base), + glsl_get_vector_elements(base), + columns); + } + + case SpvOpTypeArray: + return glsl_array_type(b->values[args[0]].type, args[1]); + + case SpvOpTypeStruct: { + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < count; i++) { + /* TODO: Handle decorators */ + fields[i].type = b->values[args[i]].type; + fields[i].name = ralloc_asprintf(b, "field%d", i); + fields[i].location = -1; + fields[i].interpolation = 0; + fields[i].centroid = 0; + fields[i].sample = 0; + fields[i].matrix_layout = 2; + fields[i].stream = -1; + } + return glsl_struct_type(fields, count, "struct"); + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = b->values[args[0]].type; + NIR_VLA(struct glsl_function_param, params, count - 1); + for (unsigned i = 1; i < count; i++) { + params[i - 1].type = b->values[args[i]].type; + + /* FIXME: */ + params[i - 1].in = true; + params[i - 1].out = true; + } + return glsl_function_type(return_type, params, count - 1); + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + return b->values[args[0]].type; + + case SpvOpTypeSampler: + case SpvOpTypeRuntimeArray: + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } } static void @@ -279,7 +359,8 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeReserveId: case SpvOpTypeQueue: case SpvOpTypePipe: - vtn_handle_type(b, opcode, w, count); + vtn_push_value(b, w[1], vtn_value_type_type)->type = + vtn_handle_type(b, opcode, &w[2], count - 2); break; case SpvOpConstantTrue: -- cgit v1.2.3 From b2db85d8e4593f7b13e4550159f1d940d9d87a80 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 18:14:11 -0700 Subject: nir/spirv: Add support for constants --- src/glsl/nir/spirv_to_nir.c | 68 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index da85abebe15..1e4c4439883 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -37,6 +37,7 @@ enum vtn_value_type { vtn_value_type_string, vtn_value_type_decoration_group, vtn_value_type_type, + vtn_value_type_constant, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -49,6 +50,7 @@ struct vtn_value { void *ptr; char *str; const struct glsl_type *type; + nir_constant *constant; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -84,6 +86,15 @@ vtn_push_value(struct vtn_builder *b, uint32_t value_id, return &b->values[value_id]; } +static struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == value_type); + return &b->values[value_id]; +} + static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) @@ -275,7 +286,62 @@ static void vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_constant *constant = ralloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(type == glsl_bool_type()); + constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(type == glsl_bool_type()); + constant->value.u[0] = NIR_FALSE; + break; + case SpvOpConstant: + assert(glsl_type_is_scalar(type)); + constant->value.u[0] = w[3]; + break; + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(type)) { + unsigned rows = glsl_get_vector_elements(type); + assert(glsl_get_matrix_columns(type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(type)); + assert(glsl_get_vector_elements(type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + default: + unreachable("Unhandled opcode"); + } + vtn_push_value(b, w[2], vtn_value_type_constant)->constant = constant; } static void -- cgit v1.2.3 From 707b706d183cd0733df0eaf0f79c7ef42e58a8e4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 19:37:41 -0700 Subject: nir/spirv: Add support for declaring variables Deref chains and variable load/store operations are still missing. --- src/glsl/nir/spirv_to_nir.c | 152 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1e4c4439883..131eecc1d4b 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -38,6 +38,7 @@ enum vtn_value_type { vtn_value_type_decoration_group, vtn_value_type_type, vtn_value_type_constant, + vtn_value_type_variable, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -51,6 +52,7 @@ struct vtn_value { char *str; const struct glsl_type *type; nir_constant *constant; + nir_variable *var; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -344,11 +346,159 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, vtn_push_value(b, w[2], vtn_value_type_constant)->constant = constant; } +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, + const struct vtn_decoration *dec, void *unused) +{ + assert(val->value_type == vtn_value_type_variable); + nir_variable *var = val->var; + switch (dec->decoration) { + case SpvDecorationPrecisionLow: + case SpvDecorationPrecisionMedium: + case SpvDecorationPrecisionHigh: + break; /* FIXME: Do nothing with these for now. */ + case SpvDecorationSmooth: + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoperspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonwritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.explicit_location = true; + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationBlock: + case SpvDecorationBufferBlock: + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationGLSLStd140: + case SpvDecorationGLSLStd430: + case SpvDecorationGLSLPacked: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonreadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationNoStaticUse: + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationDescriptorSet: + case SpvDecorationOffset: + case SpvDecorationAlignment: + case SpvDecorationXfbBuffer: + case SpvDecorationStride: + case SpvDecorationBuiltIn: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + default: + unreachable("Unhandled variable decoration"); + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + switch (opcode) { + case SpvOpVariable: { + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_variable); + + nir_variable *var = ralloc(b->shader, nir_variable); + val->var = var; + + var->type = type; + var->name = ralloc_strdup(var, val->name); + + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniformConstant: + var->data.mode = nir_var_uniform; + var->data.read_only = true; + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivateGlobal: + var->data.mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + break; + case SpvStorageClassUniform: + case SpvStorageClassWorkgroupLocal: + case SpvStorageClassWorkgroupGlobal: + case SpvStorageClassGeneric: + case SpvStorageClassPrivate: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + var->constant_initializer = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + } + + vtn_foreach_decoration(b, val, var_decoration_cb, NULL); + break; + } + + case SpvOpVariableArray: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + case SpvOpImagePointer: + default: + unreachable("Unhandled opcode"); + } } static void -- cgit v1.2.3 From 8ee23dab041fe76f20d6297a1aa34f7ff1be6dc8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:09:36 -0700 Subject: nir/types: Add accessors for function parameter/return types --- src/glsl/nir/nir_types.cpp | 12 ++++++++++++ src/glsl/nir/nir_types.h | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 937a842d98e..f2894d40c78 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -70,6 +70,18 @@ glsl_get_struct_field(const glsl_type *type, unsigned index) return type->fields.structure[index].type; } +const glsl_type * +glsl_get_function_return_type(const glsl_type *type) +{ + return type->fields.parameters[0].type; +} + +const glsl_function_param * +glsl_get_function_param(const glsl_type *type, unsigned index) +{ + return &type->fields.parameters[index + 1]; +} + const struct glsl_type * glsl_get_column_type(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index aad43f7a8c0..dd535770c9f 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -49,6 +49,12 @@ const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); const struct glsl_type *glsl_get_column_type(const struct glsl_type *type); +const struct glsl_type * +glsl_get_function_return_type(const struct glsl_type *type); + +const struct glsl_function_param * +glsl_get_function_param(const struct glsl_type *type, unsigned index); + enum glsl_base_type glsl_get_base_type(const struct glsl_type *type); unsigned glsl_get_vector_elements(const struct glsl_type *type); -- cgit v1.2.3 From a6cb9d92222079d0afcd651941cc6c3d091944a5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:10:20 -0700 Subject: nir/spirv: Add support for declaring functions --- src/glsl/nir/spirv_to_nir.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 131eecc1d4b..821927b9d84 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -39,6 +39,7 @@ enum vtn_value_type { vtn_value_type_type, vtn_value_type_constant, vtn_value_type_variable, + vtn_value_type_function, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -53,6 +54,7 @@ struct vtn_value { const struct glsl_type *type; nir_constant *constant; nir_variable *var; + nir_function_impl *impl; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -68,6 +70,7 @@ struct vtn_decoration { struct vtn_builder { nir_shader *shader; nir_function_impl *impl; + struct exec_list *cf_list; unsigned value_id_bound; struct vtn_value *values; @@ -501,6 +504,63 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } } +static void +vtn_handle_functions(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->impl == NULL); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + val->impl = b->impl = nir_function_impl_create(overload); + b->cf_list = &b->impl->body; + + break; + } + case SpvOpFunctionEnd: + b->impl = NULL; + break; + case SpvOpFunctionParameter: + case SpvOpFunctionCall: + default: + unreachable("Unhandled opcode"); + } +} + static void vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -614,6 +674,13 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_decoration(b, opcode, w, count); break; + case SpvOpFunction: + case SpvOpFunctionEnd: + case SpvOpFunctionParameter: + case SpvOpFunctionCall: + vtn_handle_functions(b, opcode, w, count); + break; + case SpvOpTextureSample: case SpvOpTextureSampleDref: case SpvOpTextureSampleLod: -- cgit v1.2.3 From eccd798cc29737f7375150488abefadf551d0f81 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:19:34 -0700 Subject: nir/spirv: Add support for OpLabel --- src/glsl/nir/spirv_to_nir.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 821927b9d84..e8acb33f481 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -40,6 +40,7 @@ enum vtn_value_type { vtn_value_type_constant, vtn_value_type_variable, vtn_value_type_function, + vtn_value_type_block, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -55,6 +56,7 @@ struct vtn_value { nir_constant *constant; nir_variable *var; nir_function_impl *impl; + nir_block *block; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -612,6 +614,17 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, b->execution_model = w[1]; break; + case SpvOpLabel: { + struct exec_node *list_tail = exec_list_get_tail(b->cf_list); + nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); + assert(tail_node->type == nir_cf_node_block); + nir_block *block = nir_cf_node_as_block(tail_node); + + assert(exec_list_is_empty(&block->instr_list)); + vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + break; + } + case SpvOpExtInstImport: case SpvOpExtInst: vtn_handle_extension(b, opcode, w, count); -- cgit v1.2.3 From 7182597e50253f07b66cb0edb806b11a56242b5c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:56:17 -0700 Subject: nir/types: Add a scalar type constructor --- src/glsl/nir/nir_types.cpp | 6 ++++++ src/glsl/nir/nir_types.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index f2894d40c78..f93a52b5fa5 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -184,6 +184,12 @@ glsl_vec4_type(void) return glsl_type::vec4_type; } +const glsl_type * +glsl_scalar_type(enum glsl_base_type base_type) +{ + return glsl_type::get_instance(base_type, 1, 1); +} + const glsl_type * glsl_vector_type(enum glsl_base_type base_type, unsigned components) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index dd535770c9f..40a80ec7130 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -81,6 +81,7 @@ const struct glsl_type *glsl_uint_type(void); const struct glsl_type *glsl_bool_type(void); const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type); const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, unsigned components); const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, -- cgit v1.2.3 From 5acd472271d72866a36a5de374f3e1a846b61dc8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:56:36 -0700 Subject: nir/spirv: Add support for deref chains --- src/glsl/nir/spirv_to_nir.c | 86 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e8acb33f481..ba536e8c81d 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -38,11 +38,10 @@ enum vtn_value_type { vtn_value_type_decoration_group, vtn_value_type_type, vtn_value_type_constant, - vtn_value_type_variable, + vtn_value_type_deref, vtn_value_type_function, vtn_value_type_block, vtn_value_type_ssa, - vtn_value_type_deref, }; struct vtn_value { @@ -54,11 +53,10 @@ struct vtn_value { char *str; const struct glsl_type *type; nir_constant *constant; - nir_variable *var; + nir_deref_var *deref; nir_function_impl *impl; nir_block *block; nir_ssa_def *ssa; - nir_deref_var *deref; }; }; @@ -353,10 +351,13 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, static void var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, - const struct vtn_decoration *dec, void *unused) + const struct vtn_decoration *dec, void *void_var) { - assert(val->value_type == vtn_value_type_variable); - nir_variable *var = val->var; + assert(val->value_type == vtn_value_type_deref); + assert(val->deref->deref.child == NULL); + assert(val->deref->var == void_var); + + nir_variable *var = void_var; switch (dec->decoration) { case SpvDecorationPrecisionLow: case SpvDecorationPrecisionMedium: @@ -446,10 +447,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpVariable: { const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_variable); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_variable *var = ralloc(b->shader, nir_variable); - val->var = var; var->type = type; var->name = ralloc_strdup(var, val->name); @@ -488,7 +488,71 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_constant)->constant; } - vtn_foreach_decoration(b, val, var_decoration_cb, NULL); + val->deref = nir_deref_var_create(b->shader, var); + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + + nir_deref *tail = &val->deref->deref; + while (tail->child) + tail = tail->child; + + for (unsigned i = 0; i < count - 3; i++) { + assert(w[i + 3] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 3]]; + + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY) { + deref_arr->deref.type = glsl_get_array_element(tail->type); + } else if (glsl_type_is_matrix(tail->type)) { + deref_arr->deref.type = glsl_get_column_type(tail->type); + } else { + assert(glsl_type_is_vector(tail->type)); + deref_arr->deref.type = glsl_scalar_type(base_type); + } + + if (idx_val->value_type == vtn_value_type_constant) { + unsigned idx = idx_val->constant->value.u[0]; + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + /* TODO */ + unreachable("Indirect array accesses not implemented"); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = glsl_get_struct_field(tail->type, idx); + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + tail = tail->child; + } break; } @@ -497,8 +561,6 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpStore: case SpvOpCopyMemory: case SpvOpCopyMemorySized: - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: case SpvOpArrayLength: case SpvOpImagePointer: default: -- cgit v1.2.3 From 6ff0830d64840d88631287db32ae464c7a436b17 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:26:40 -0700 Subject: nir/types: Add an is_vector_or_scalar helper --- src/glsl/nir/nir_types.cpp | 6 ++++++ src/glsl/nir/nir_types.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index f93a52b5fa5..a6d35fe6179 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -142,6 +142,12 @@ glsl_type_is_scalar(const struct glsl_type *type) return type->is_scalar(); } +bool +glsl_type_is_vector_or_scalar(const struct glsl_type *type) +{ + return type->is_vector() || type->is_scalar(); +} + bool glsl_type_is_matrix(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 40a80ec7130..f19f0e5db5d 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -72,6 +72,7 @@ const char *glsl_get_struct_elem_name(const struct glsl_type *type, bool glsl_type_is_void(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); +bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); -- cgit v1.2.3 From 01f3aa9c5191707e6b527f6858391c44db8c40db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:27:21 -0700 Subject: nir/spirv: Use vtn_value in the types code and fix a off-by-one error --- src/glsl/nir/spirv_to_nir.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ba536e8c81d..8e043100c37 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -218,7 +218,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, return glsl_float_type(); case SpvOpTypeVector: { - const struct glsl_type *base = b->values[args[0]].type; + const struct glsl_type *base = + vtn_value(b, args[0], vtn_value_type_type)->type; unsigned elems = args[1]; assert(glsl_type_is_scalar(base)); @@ -226,7 +227,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, } case SpvOpTypeMatrix: { - const struct glsl_type *base = b->values[args[0]].type; + const struct glsl_type *base = + vtn_value(b, args[0], vtn_value_type_type)->type; unsigned columns = args[1]; assert(glsl_type_is_vector(base)); @@ -242,7 +244,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, NIR_VLA(struct glsl_struct_field, fields, count); for (unsigned i = 0; i < count; i++) { /* TODO: Handle decorators */ - fields[i].type = b->values[args[i]].type; + fields[i].type = vtn_value(b, args[i], vtn_value_type_type)->type; fields[i].name = ralloc_asprintf(b, "field%d", i); fields[i].location = -1; fields[i].interpolation = 0; @@ -258,7 +260,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const struct glsl_type *return_type = b->values[args[0]].type; NIR_VLA(struct glsl_function_param, params, count - 1); for (unsigned i = 1; i < count; i++) { - params[i - 1].type = b->values[args[i]].type; + params[i - 1].type = vtn_value(b, args[i], vtn_value_type_type)->type; /* FIXME: */ params[i - 1].in = true; @@ -272,7 +274,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, * the same type. The validator should ensure that the proper number * of dereferences happen */ - return b->values[args[0]].type; + return vtn_value(b, args[1], vtn_value_type_type)->type; case SpvOpTypeSampler: case SpvOpTypeRuntimeArray: -- cgit v1.2.3 From 5045efa4aa2bd335b2b0110b6af792fbc4fcba7e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:27:44 -0700 Subject: nir/spirv: Add a vtn_untyped_value helper --- src/glsl/nir/spirv_to_nir.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 8e043100c37..7f74c9708d9 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -92,14 +92,21 @@ vtn_push_value(struct vtn_builder *b, uint32_t value_id, } static struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) { assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == value_type); return &b->values[value_id]; } +static struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) -- cgit v1.2.3 From 06acd174f3a6bb9097ca31a3d3bc5f10797b2f4d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:28:01 -0700 Subject: nir/spirv: Actaully add variables to the funciton or shader --- src/glsl/nir/spirv_to_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 7f74c9708d9..de3ad50e25c 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -497,6 +497,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_constant)->constant; } + if (var->data.mode == nir_var_local) { + exec_list_push_tail(&b->impl->locals, &var->node); + } else { + exec_list_push_tail(&b->shader->globals, &var->node); + } + val->deref = nir_deref_var_create(b->shader, var); vtn_foreach_decoration(b, val, var_decoration_cb, var); -- cgit v1.2.3 From 88f6fbc897b9eba82764465c5d1b3ef94dbfc990 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 10:19:24 -0700 Subject: nir: Add a helper for getting the tail of a deref chain --- src/glsl/nir/nir.h | 9 +++++++++ src/glsl/nir/nir_lower_var_copies.c | 15 ++------------- src/glsl/nir/nir_split_var_copies.c | 12 ++---------- 3 files changed, 13 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 697d37e95ac..61306e9b7e0 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -782,6 +782,15 @@ NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) +/** Returns the tail of a deref chain */ +static inline nir_deref * +nir_deref_tail(nir_deref *deref) +{ + while (deref->child) + deref = deref->child; + return deref; +} + typedef struct { nir_instr instr; diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c index 21672901f04..98c107aa50e 100644 --- a/src/glsl/nir/nir_lower_var_copies.c +++ b/src/glsl/nir/nir_lower_var_copies.c @@ -53,17 +53,6 @@ deref_next_wildcard_parent(nir_deref *deref) return NULL; } -/* Returns the last deref in the chain. - */ -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child) - deref = deref->child; - - return deref; -} - /* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. @@ -121,8 +110,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, } else { /* In this case, we have no wildcards anymore, so all we have to do * is just emit the load and store operations. */ - src_tail = get_deref_tail(src_tail); - dest_tail = get_deref_tail(dest_tail); + src_tail = nir_deref_tail(src_tail); + dest_tail = nir_deref_tail(dest_tail); assert(src_tail->type == dest_tail->type); diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c index fc72c078c77..5c163b59819 100644 --- a/src/glsl/nir/nir_split_var_copies.c +++ b/src/glsl/nir/nir_split_var_copies.c @@ -66,14 +66,6 @@ struct split_var_copies_state { void *dead_ctx; }; -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child != NULL) - deref = deref->child; - return deref; -} - /* Recursively constructs deref chains to split a copy instruction into * multiple (if needed) copy instructions with full-length deref chains. * External callers of this function should pass the tail and head of the @@ -225,8 +217,8 @@ split_var_copies_block(nir_block *block, void *void_state) nir_deref *dest_head = &intrinsic->variables[0]->deref; nir_deref *src_head = &intrinsic->variables[1]->deref; - nir_deref *dest_tail = get_deref_tail(dest_head); - nir_deref *src_tail = get_deref_tail(src_head); + nir_deref *dest_tail = nir_deref_tail(dest_head); + nir_deref *src_tail = nir_deref_tail(src_head); switch (glsl_get_base_type(src_tail->type)) { case GLSL_TYPE_ARRAY: -- cgit v1.2.3 From ae6d32c635707a5391c10ce12af37b79b190b8e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:28:18 -0700 Subject: nir/spirv: Implement load/store instructiosn --- src/glsl/nir/spirv_to_nir.c | 72 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index de3ad50e25c..85f07e06647 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -519,9 +519,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, while (tail->child) tail = tail->child; - for (unsigned i = 0; i < count - 3; i++) { - assert(w[i + 3] < b->value_id_bound); - struct vtn_value *idx_val = &b->values[w[i + 3]]; + for (unsigned i = 0; i < count - 4; i++) { + assert(w[i + 4] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 4]]; enum glsl_base_type base_type = glsl_get_base_type(tail->type); switch (base_type) { @@ -571,10 +571,70 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpCopyMemory: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + break; + } + + case SpvOpLoad: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; + const struct glsl_type *src_type = nir_deref_tail(&src->deref)->type; + assert(glsl_type_is_vector_or_scalar(src_type)); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref)); + load->num_components = glsl_get_vector_elements(src_type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, + val->name); + + nir_instr_insert_after_cf_list(b->cf_list, &load->instr); + val->ssa = &load->dest.ssa; + break; + } + + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type; + struct vtn_value *src_val = vtn_untyped_value(b, w[2]); + if (src_val->value_type == vtn_value_type_ssa) { + assert(glsl_type_is_vector_or_scalar(dest_type)); + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->src[0] = nir_src_for_ssa(src_val->ssa); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); + store->num_components = glsl_get_vector_elements(dest_type); + + nir_instr_insert_after_cf_list(b->cf_list, &store->instr); + } else { + assert(src_val->value_type == vtn_value_type_constant); + + nir_variable *const_tmp = rzalloc(b->shader, nir_variable); + const_tmp->type = dest_type; + const_tmp->data.mode = nir_var_local; + const_tmp->data.read_only = true; + exec_list_push_tail(&b->impl->locals, &const_tmp->node); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_var_create(copy, const_tmp); + + nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + } + break; + } + case SpvOpVariableArray: - case SpvOpLoad: - case SpvOpStore: - case SpvOpCopyMemory: case SpvOpCopyMemorySized: case SpvOpArrayLength: case SpvOpImagePointer: -- cgit v1.2.3 From f23afc549baa8b2b6608c009dbad8606d7a9a07f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 14:00:57 -0700 Subject: nir/spirv: Split instruction handling into preamble and body sections --- src/glsl/nir/spirv_to_nir.c | 138 +++++++++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 85f07e06647..898bb6a8e64 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -114,6 +114,28 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); } +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, SpvOp, + const uint32_t *, unsigned); + +static const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + if (!handler(b, opcode, w, count)) + return w; + + w += count; + } + assert(w == end); + return w; +} + static void vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -714,32 +736,19 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } -static void -vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpSource: case SpvOpSourceExtension: - case SpvOpMemberName: - case SpvOpLine: + case SpvOpCompileFlag: case SpvOpExtension: + case SpvOpExtInstImport: /* Unhandled, but these are for debug so that's ok. */ break; - case SpvOpName: - b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); - break; - - case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string)->str = - vtn_string_literal(b, &w[2], count - 2); - break; - - case SpvOpUndef: - vtn_push_value(b, w[2], vtn_value_type_undef); - break; - case SpvOpMemoryModel: assert(w[1] == SpvAddressingModelLogical); assert(w[2] == SpvMemoryModelGLSL450); @@ -751,20 +760,32 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, b->execution_model = w[1]; break; - case SpvOpLabel: { - struct exec_node *list_tail = exec_list_get_tail(b->cf_list); - nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); - assert(tail_node->type == nir_cf_node_block); - nir_block *block = nir_cf_node_as_block(tail_node); + case SpvOpExecutionMode: + unreachable("Execution modes not yet implemented"); + break; - assert(exec_list_is_empty(&block->instr_list)); - vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); break; - } - case SpvOpExtInstImport: - case SpvOpExtInst: - vtn_handle_extension(b, opcode, w, count); + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpLine: + break; /* Ignored for now */ + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); break; case SpvOpTypeVoid: @@ -803,6 +824,41 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_constant(b, opcode, w, count); break; + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: { + struct exec_node *list_tail = exec_list_get_tail(b->cf_list); + nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); + assert(tail_node->type == nir_cf_node_block); + nir_block *block = nir_cf_node_as_block(tail_node); + + assert(exec_list_is_empty(&block->instr_list)); + vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + break; + } + + case SpvOpUndef: + vtn_push_value(b, w[2], vtn_value_type_undef); + break; + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + case SpvOpVariable: case SpvOpVariableArray: case SpvOpLoad: @@ -816,14 +872,6 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_variables(b, opcode, w, count); break; - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - vtn_handle_decoration(b, opcode, w, count); - break; - case SpvOpFunction: case SpvOpFunctionEnd: case SpvOpFunctionParameter: @@ -953,6 +1001,8 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, default: unreachable("Unhandled opcode"); } + + return true; } nir_shader * @@ -978,17 +1028,15 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->value_id_bound = value_id_bound; b->values = ralloc_array(b, struct vtn_value, value_id_bound); - /* Start handling instructions */ const uint32_t *word_end = words + word_count; - while (words < word_end) { - SpvOp opcode = words[0] & SpvOpCodeMask; - unsigned count = words[0] >> SpvWordCountShift; - assert(words + count <= word_end); - vtn_handle_instruction(b, opcode, words, count); + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); - words += count; - } + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_body_instruction); + assert(words == word_end); ralloc_free(b); -- cgit v1.2.3 From ebc152e4c98eafcbe52aabcf4463664876fb9112 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 10:22:52 -0700 Subject: nir/spirv: Add a helper for getting a value as an SSA value --- src/glsl/nir/spirv_to_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 898bb6a8e64..eac21c499eb 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -107,6 +107,12 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, return val; } +static nir_ssa_def * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + return vtn_value(b, value_id, vtn_value_type_ssa)->ssa; +} + static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) -- cgit v1.2.3 From c5650148a9e4156847f852cd466c882fa668fac0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 10:23:09 -0700 Subject: nir/spirv: Handle OpBranchConditional We do control-flow handling as a two-step process. The first step is to walk the instructions list and record various information about blocks and functions. This is where the acutal nir_function_overload objects get created. We also record the start/stop instruction for each block. Then a second pass walks over each of the functions and over the blocks in each function in a way that's NIR-friendly and actually parses the instructions. --- src/glsl/nir/spirv_to_nir.c | 257 ++++++++++++++++++++++++++++++++------------ 1 file changed, 189 insertions(+), 68 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index eac21c499eb..c0d77d5453d 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -44,6 +44,19 @@ enum vtn_value_type { vtn_value_type_ssa, }; +struct vtn_block { + const uint32_t *label; + const uint32_t *branch; + nir_block *block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_overload *overload; + struct vtn_block *start_block; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -54,8 +67,8 @@ struct vtn_value { const struct glsl_type *type; nir_constant *constant; nir_deref_var *deref; - nir_function_impl *impl; - nir_block *block; + struct vtn_function *func; + struct vtn_block *block; nir_ssa_def *ssa; }; }; @@ -71,12 +84,17 @@ struct vtn_builder { nir_shader *shader; nir_function_impl *impl; struct exec_list *cf_list; + struct vtn_block *block; + struct vtn_block *merge_block; unsigned value_id_bound; struct vtn_value *values; SpvExecutionModel execution_model; struct vtn_value *entry_point; + + struct vtn_function *func; + struct exec_list functions; }; static struct vtn_value * @@ -672,60 +690,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } static void -vtn_handle_functions(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) { - switch (opcode) { - case SpvOpFunction: { - assert(b->impl == NULL); - - const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); - const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type; - - assert(glsl_get_function_return_type(func_type) == result_type); - - nir_function *func = - nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); - - nir_function_overload *overload = nir_function_overload_create(func); - overload->num_params = glsl_get_length(func_type); - overload->params = ralloc_array(overload, nir_parameter, - overload->num_params); - for (unsigned i = 0; i < overload->num_params; i++) { - const struct glsl_function_param *param = - glsl_get_function_param(func_type, i); - overload->params[i].type = param->type; - if (param->in) { - if (param->out) { - overload->params[i].param_type = nir_parameter_inout; - } else { - overload->params[i].param_type = nir_parameter_in; - } - } else { - if (param->out) { - overload->params[i].param_type = nir_parameter_out; - } else { - assert(!"Parameter is neither in nor out"); - } - } - } - - val->impl = b->impl = nir_function_impl_create(overload); - b->cf_list = &b->impl->body; - - break; - } - case SpvOpFunctionEnd: - b->impl = NULL; - break; - case SpvOpFunctionParameter: - case SpvOpFunctionCall: - default: - unreachable("Unhandled opcode"); - } + unreachable("Unhandled opcode"); } static void @@ -841,22 +809,118 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static bool +vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + b->func->overload = overload; + break; + } + + case SpvOpFunctionEnd: + b->func = NULL; + break; + + case SpvOpFunctionParameter: + break; /* Does nothing */ + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block); + b->block->branch = w; + b->block = NULL; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + static bool vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpLabel: { + struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; struct exec_node *list_tail = exec_list_get_tail(b->cf_list); nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); - nir_block *block = nir_cf_node_as_block(tail_node); - - assert(exec_list_is_empty(&block->instr_list)); - vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + block->block = nir_cf_node_as_block(tail_node); + assert(exec_list_is_empty(&block->block->instr_list)); break; } + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + assert(b->merge_block == NULL); + /* TODO: Selection Control */ + b->merge_block = vtn_value(b, w[1], vtn_value_type_block)->block; + break; + case SpvOpUndef: vtn_push_value(b, w[2], vtn_value_type_undef); break; @@ -878,11 +942,8 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_variables(b, opcode, w, count); break; - case SpvOpFunction: - case SpvOpFunctionEnd: - case SpvOpFunctionParameter: case SpvOpFunctionCall: - vtn_handle_functions(b, opcode, w, count); + vtn_handle_function_call(b, opcode, w, count); break; case SpvOpTextureSample: @@ -1011,10 +1072,65 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static void +vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, + struct vtn_block *end) +{ + struct vtn_block *block = start; + while (block != end) { + vtn_foreach_instruction(b, block->label, block->branch, + vtn_handle_body_instruction); + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + switch (branch_op) { + case SpvOpBranch: { + assert(vtn_value(b, w[1], vtn_value_type_block)->block == end); + return; + } + + case SpvOpBranchConditional: { + /* Gather up the branch blocks */ + struct vtn_block *then_block = + vtn_value(b, w[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, w[3], vtn_value_type_block)->block; + struct vtn_block *merge_block = b->merge_block; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + nir_cf_node_insert_end(b->cf_list, &if_stmt->cf_node); + + struct exec_list *old_list = b->cf_list; + + b->cf_list = &if_stmt->then_list; + vtn_walk_blocks(b, then_block, merge_block); + + b->cf_list = &if_stmt->else_list; + vtn_walk_blocks(b, else_block, merge_block); + + b->cf_list = old_list; + block = merge_block; + continue; + } + + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + nir_shader * spirv_to_nir(const uint32_t *words, size_t word_count, const nir_shader_compiler_options *options) { + const uint32_t *word_end = words + word_count; + /* Handle the SPIR-V header (first 4 dwords) */ assert(word_count > 5); @@ -1033,16 +1149,21 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->shader = shader; b->value_id_bound = value_id_bound; b->values = ralloc_array(b, struct vtn_value, value_id_bound); - - const uint32_t *word_end = words + word_count; + exec_list_make_empty(&b->functions); /* Handle all the preamble instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_body_instruction); - assert(words == word_end); + /* Do a very quick CFG analysis pass */ + vtn_foreach_instruction(b, words, word_end, + vtn_handle_first_cfg_pass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = nir_function_impl_create(func->overload); + b->cf_list = &b->impl->body; + vtn_walk_blocks(b, func->start_block, NULL); + } ralloc_free(b); -- cgit v1.2.3 From 683c99908aa3560722614bee6b61969f08cf0616 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:02:24 -0700 Subject: nir/spirv: Explicitly type constants and SSA values --- src/glsl/nir/spirv_to_nir.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index c0d77d5453d..4d425c9a846 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -61,10 +61,10 @@ struct vtn_value { enum vtn_value_type value_type; const char *name; struct vtn_decoration *decoration; + const struct glsl_type *type; union { void *ptr; char *str; - const struct glsl_type *type; nir_constant *constant; nir_deref_var *deref; struct vtn_function *func; @@ -346,20 +346,21 @@ static void vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_constant *constant = ralloc(b, nir_constant); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->constant = ralloc(b, nir_constant); switch (opcode) { case SpvOpConstantTrue: - assert(type == glsl_bool_type()); - constant->value.u[0] = NIR_TRUE; + assert(val->type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; break; case SpvOpConstantFalse: - assert(type == glsl_bool_type()); - constant->value.u[0] = NIR_FALSE; + assert(val->type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; break; case SpvOpConstant: - assert(glsl_type_is_scalar(type)); - constant->value.u[0] = w[3]; + assert(glsl_type_is_scalar(val->type)); + val->constant->value.u[0] = w[3]; break; case SpvOpConstantComposite: { unsigned elem_count = count - 3; @@ -367,29 +368,30 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < elem_count; i++) elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - switch (glsl_get_base_type(type)) { + switch (glsl_get_base_type(val->type)) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) { - unsigned rows = glsl_get_vector_elements(type); - assert(glsl_get_matrix_columns(type) == elem_count); + if (glsl_type_is_matrix(val->type)) { + unsigned rows = glsl_get_vector_elements(val->type); + assert(glsl_get_matrix_columns(val->type) == elem_count); for (unsigned i = 0; i < elem_count; i++) for (unsigned j = 0; j < rows; j++) - constant->value.u[rows * i + j] = elems[i]->value.u[j]; + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; } else { - assert(glsl_type_is_vector(type)); - assert(glsl_get_vector_elements(type) == elem_count); + assert(glsl_type_is_vector(val->type)); + assert(glsl_get_vector_elements(val->type) == elem_count); for (unsigned i = 0; i < elem_count; i++) - constant->value.u[i] = elems[i]->value.u[0]; + val->constant->value.u[i] = elems[i]->value.u[0]; } ralloc_free(elems); break; case GLSL_TYPE_STRUCT: case GLSL_TYPE_ARRAY: - constant->elements = elems; + ralloc_steal(val->constant, elems); + val->constant->elements = elems; break; default: @@ -401,7 +403,6 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, default: unreachable("Unhandled opcode"); } - vtn_push_value(b, w[2], vtn_value_type_constant)->constant = constant; } static void @@ -644,6 +645,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, val->name); nir_instr_insert_after_cf_list(b->cf_list, &load->instr); + val->type = src_type; val->ssa = &load->dest.ssa; break; } -- cgit v1.2.3 From d2a7972557209cfe47fd1d7325ccbca8b3b844a5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:02:57 -0700 Subject: nir/spirv: Add support for indirect array accesses --- src/glsl/nir/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 4d425c9a846..3f8ce2af10e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -595,8 +595,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } else { assert(idx_val->value_type == vtn_value_type_ssa); deref_arr->deref_array_type = nir_deref_array_type_indirect; - /* TODO */ - unreachable("Indirect array accesses not implemented"); + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1])); } tail->child = &deref_arr->deref; break; -- cgit v1.2.3 From ff828749eab5d100ba61988f1b6c17712e751559 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:04:02 -0700 Subject: nir/spirv: Add support for a bunch of ALU operations --- src/glsl/nir/spirv_to_nir.c | 202 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3f8ce2af10e..734ffeeed54 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -27,6 +27,7 @@ #include "nir_spirv.h" #include "nir_vla.h" +#include "nir_builder.h" #include "spirv.h" struct vtn_decoration; @@ -81,6 +82,8 @@ struct vtn_decoration { }; struct vtn_builder { + nir_builder nb; + nir_shader *shader; nir_function_impl *impl; struct exec_list *cf_list; @@ -705,11 +708,192 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Matrix math not handled"); +} + static void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 3; + nir_ssa_def *src[4]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 3]); + + /* We use the builder for some of the instructions. Go ahead and + * initialize it with the current cf_list. + */ + nir_builder_insert_after_cf_list(&b->nb, b->cf_list); + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_bany2; break; + case 3: op = nir_op_bany3; break; + case 4: op = nir_op_bany4; break; + } + break; + + case SpvOpAll: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_ball2; break; + case 3: op = nir_op_ball3; break; + case 4: op = nir_op_ball4; break; + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalXor: op = nir_op_ixor; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->type), val->name); + val->ssa = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_instr_insert_after_cf_list(b->cf_list, &instr->instr); } static bool @@ -993,7 +1177,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpPtrCastToGeneric: case SpvOpGenericCastToPtr: case SpvOpBitcast: - case SpvOpTranspose: case SpvOpIsNan: case SpvOpIsInf: case SpvOpIsFinite: @@ -1017,11 +1200,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpFRem: case SpvOpFMod: case SpvOpVectorTimesScalar: - case SpvOpMatrixTimesScalar: - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - case SpvOpOuterProduct: case SpvOpDot: case SpvOpShiftRightLogical: case SpvOpShiftRightArithmetic: @@ -1067,6 +1245,15 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_alu(b, opcode, w, count); break; + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_matrix_alu(b, opcode, w, count); + break; + default: unreachable("Unhandled opcode"); } @@ -1163,6 +1350,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); + nir_builder_init(&b->nb, b->impl); b->cf_list = &b->impl->body; vtn_walk_blocks(b, func->start_block, NULL); } -- cgit v1.2.3 From 98d78856f6f8965448f8ae5db74ab2f0609cb45e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:12:23 -0700 Subject: nir/spirv: Use the builder for all instructions We don't actually use it to create all the instructions but we do use it for insertion always. This should make things far more consistent for implementing extended instructions. --- src/glsl/nir/spirv_to_nir.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 734ffeeed54..3af84aecaa4 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -86,7 +86,6 @@ struct vtn_builder { nir_shader *shader; nir_function_impl *impl; - struct exec_list *cf_list; struct vtn_block *block; struct vtn_block *merge_block; @@ -630,7 +629,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + nir_builder_instr_insert(&b->nb, ©->instr); break; } @@ -647,7 +646,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, val->name); - nir_instr_insert_after_cf_list(b->cf_list, &load->instr); + nir_builder_instr_insert(&b->nb, &load->instr); val->type = src_type; val->ssa = &load->dest.ssa; break; @@ -665,7 +664,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); store->num_components = glsl_get_vector_elements(dest_type); - nir_instr_insert_after_cf_list(b->cf_list, &store->instr); + nir_builder_instr_insert(&b->nb, &store->instr); } else { assert(src_val->value_type == vtn_value_type_constant); @@ -680,7 +679,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); copy->variables[1] = nir_deref_var_create(copy, const_tmp); - nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + nir_builder_instr_insert(&b->nb, ©->instr); } break; } @@ -728,11 +727,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < num_inputs; i++) src[i] = vtn_ssa_value(b, w[i + 3]); - /* We use the builder for some of the instructions. Go ahead and - * initialize it with the current cf_list. - */ - nir_builder_insert_after_cf_list(&b->nb, b->cf_list); - /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. */ @@ -893,7 +887,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); - nir_instr_insert_after_cf_list(b->cf_list, &instr->instr); + nir_builder_instr_insert(&b->nb, &instr->instr); } static bool @@ -1092,7 +1086,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpLabel: { struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; - struct exec_node *list_tail = exec_list_get_tail(b->cf_list); + struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); block->block = nir_cf_node_as_block(tail_node); @@ -1288,17 +1282,17 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, nir_if *if_stmt = nir_if_create(b->shader); if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); - nir_cf_node_insert_end(b->cf_list, &if_stmt->cf_node); + nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); - struct exec_list *old_list = b->cf_list; + struct exec_list *old_list = b->nb.cf_node_list; - b->cf_list = &if_stmt->then_list; + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); vtn_walk_blocks(b, then_block, merge_block); - b->cf_list = &if_stmt->else_list; + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); vtn_walk_blocks(b, else_block, merge_block); - b->cf_list = old_list; + nir_builder_insert_after_cf_list(&b->nb, old_list); block = merge_block; continue; } @@ -1351,7 +1345,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); nir_builder_init(&b->nb, b->impl); - b->cf_list = &b->impl->body; + nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL); } -- cgit v1.2.3 From 1da9876486b61aa2a6f8c10da75e1b852058ef7d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:25:09 -0700 Subject: nir/spirv: Split the core datastructures into a header file --- src/glsl/nir/spirv_to_nir.c | 117 ++---------------------------- src/glsl/nir/spirv_to_nir_private.h | 141 ++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 112 deletions(-) create mode 100644 src/glsl/nir/spirv_to_nir_private.h (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3af84aecaa4..20b7f6e1dd1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -25,109 +25,10 @@ * */ -#include "nir_spirv.h" +#include "spirv_to_nir_private.h" #include "nir_vla.h" -#include "nir_builder.h" -#include "spirv.h" - -struct vtn_decoration; - -enum vtn_value_type { - vtn_value_type_invalid = 0, - vtn_value_type_undef, - vtn_value_type_string, - vtn_value_type_decoration_group, - vtn_value_type_type, - vtn_value_type_constant, - vtn_value_type_deref, - vtn_value_type_function, - vtn_value_type_block, - vtn_value_type_ssa, -}; - -struct vtn_block { - const uint32_t *label; - const uint32_t *branch; - nir_block *block; -}; - -struct vtn_function { - struct exec_node node; - - nir_function_overload *overload; - struct vtn_block *start_block; -}; - -struct vtn_value { - enum vtn_value_type value_type; - const char *name; - struct vtn_decoration *decoration; - const struct glsl_type *type; - union { - void *ptr; - char *str; - nir_constant *constant; - nir_deref_var *deref; - struct vtn_function *func; - struct vtn_block *block; - nir_ssa_def *ssa; - }; -}; - -struct vtn_decoration { - struct vtn_decoration *next; - const uint32_t *literals; - struct vtn_value *group; - SpvDecoration decoration; -}; - -struct vtn_builder { - nir_builder nb; - - nir_shader *shader; - nir_function_impl *impl; - struct vtn_block *block; - struct vtn_block *merge_block; - - unsigned value_id_bound; - struct vtn_value *values; - - SpvExecutionModel execution_model; - struct vtn_value *entry_point; - - struct vtn_function *func; - struct exec_list functions; -}; - -static struct vtn_value * -vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == vtn_value_type_invalid); - - b->values[value_id].value_type = value_type; - - return &b->values[value_id]; -} - -static struct vtn_value * -vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) -{ - assert(value_id < b->value_id_bound); - return &b->values[value_id]; -} -static struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - assert(val->value_type == value_type); - return val; -} - -static nir_ssa_def * +nir_ssa_def * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { return vtn_value(b, value_id, vtn_value_type_ssa)->ssa; @@ -140,9 +41,6 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); } -typedef bool (*vtn_instruction_handler)(struct vtn_builder *, SpvOp, - const uint32_t *, unsigned); - static const uint32_t * vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, const uint32_t *end, vtn_instruction_handler handler) @@ -177,16 +75,11 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, } } -typedef void (*decoration_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - const struct vtn_decoration *, - void *); - static void _foreach_decoration_helper(struct vtn_builder *b, struct vtn_value *base_value, struct vtn_value *value, - decoration_foreach_cb cb, void *data) + vtn_decoration_foreach_cb cb, void *data) { for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { if (dec->group) { @@ -204,9 +97,9 @@ _foreach_decoration_helper(struct vtn_builder *b, * value. If it encounters a decoration group, it recurses into the group * and iterates over all of those decorations as well. */ -static void +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - decoration_foreach_cb cb, void *data) + vtn_decoration_foreach_cb cb, void *data) { _foreach_decoration_helper(b, value, value, cb, data); } diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h new file mode 100644 index 00000000000..0a07b377e72 --- /dev/null +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -0,0 +1,141 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_spirv.h" +#include "nir_builder.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_deref, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, +}; + +struct vtn_block { + const uint32_t *label; + const uint32_t *branch; + nir_block *block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_overload *overload; + struct vtn_block *start_block; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + const struct glsl_type *type; + union { + void *ptr; + char *str; + nir_constant *constant; + nir_deref_var *deref; + struct vtn_function *func; + struct vtn_block *block; + nir_ssa_def *ssa; + }; +}; + +struct vtn_decoration { + struct vtn_decoration *next; + const uint32_t *literals; + struct vtn_value *group; + SpvDecoration decoration; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + struct vtn_block *merge_block; + + unsigned value_id_bound; + struct vtn_value *values; + + SpvExecutionModel execution_model; + struct vtn_value *entry_point; + + struct vtn_function *func; + struct exec_list functions; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); -- cgit v1.2.3 From b0d1854efc3863b4e362496d77820b865d3e8357 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 15:17:56 -0700 Subject: nir/spirv: Add initial support for GLSL 4.50 builtins --- src/glsl/Makefile.sources | 1 + src/glsl/nir/spirv_glsl450_to_nir.c | 284 ++++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv_to_nir.c | 24 ++- src/glsl/nir/spirv_to_nir_private.h | 5 + 4 files changed, 310 insertions(+), 4 deletions(-) create mode 100644 src/glsl/nir/spirv_glsl450_to_nir.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index be6e4ecf839..a234ac6f8e2 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -70,6 +70,7 @@ NIR_FILES = \ nir/nir_worklist.h \ nir/nir_types.cpp \ nir/spirv_to_nir.c \ + nir/spirv_glsl450_to_nir.c \ $(NIR_GENERATED_FILES) # libglsl diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c new file mode 100644 index 00000000000..240ff012fe1 --- /dev/null +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -0,0 +1,284 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "spirv_to_nir_private.h" + +enum GLSL450Entrypoint { + Round = 0, + RoundEven = 1, + Trunc = 2, + Abs = 3, + Sign = 4, + Floor = 5, + Ceil = 6, + Fract = 7, + + Radians = 8, + Degrees = 9, + Sin = 10, + Cos = 11, + Tan = 12, + Asin = 13, + Acos = 14, + Atan = 15, + Sinh = 16, + Cosh = 17, + Tanh = 18, + Asinh = 19, + Acosh = 20, + Atanh = 21, + Atan2 = 22, + + Pow = 23, + Exp = 24, + Log = 25, + Exp2 = 26, + Log2 = 27, + Sqrt = 28, + InverseSqrt = 29, + + Determinant = 30, + MatrixInverse = 31, + + Modf = 32, // second argument needs the OpVariable = , not an OpLoad + Min = 33, + Max = 34, + Clamp = 35, + Mix = 36, + Step = 37, + SmoothStep = 38, + + FloatBitsToInt = 39, + FloatBitsToUint = 40, + IntBitsToFloat = 41, + UintBitsToFloat = 42, + + Fma = 43, + Frexp = 44, + Ldexp = 45, + + PackSnorm4x8 = 46, + PackUnorm4x8 = 47, + PackSnorm2x16 = 48, + PackUnorm2x16 = 49, + PackHalf2x16 = 50, + PackDouble2x32 = 51, + UnpackSnorm2x16 = 52, + UnpackUnorm2x16 = 53, + UnpackHalf2x16 = 54, + UnpackSnorm4x8 = 55, + UnpackUnorm4x8 = 56, + UnpackDouble2x32 = 57, + + Length = 58, + Distance = 59, + Cross = 60, + Normalize = 61, + Ftransform = 62, + FaceForward = 63, + Reflect = 64, + Refract = 65, + + UaddCarry = 66, + UsubBorrow = 67, + UmulExtended = 68, + ImulExtended = 69, + BitfieldExtract = 70, + BitfieldInsert = 71, + BitfieldReverse = 72, + BitCount = 73, + FindLSB = 74, + FindMSB = 75, + + InterpolateAtCentroid = 76, + InterpolateAtSample = 77, + InterpolateAtOffset = 78, + + Count +}; + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5]); + + nir_op op; + switch (entrypoint) { + case Round: op = nir_op_fround_even; break; /* TODO */ + case RoundEven: op = nir_op_fround_even; break; + case Trunc: op = nir_op_ftrunc; break; + case Abs: op = nir_op_fabs; break; + case Sign: op = nir_op_fsign; break; + case Floor: op = nir_op_ffloor; break; + case Ceil: op = nir_op_fceil; break; + case Fract: op = nir_op_ffract; break; + case Radians: + val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + return; + case Degrees: + val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + return; + case Sin: op = nir_op_fsin; break; + case Cos: op = nir_op_fcos; break; + case Tan: + val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); + return; + case Pow: op = nir_op_fpow; break; + case Exp: op = nir_op_fexp; break; + case Log: op = nir_op_flog; break; + case Exp2: op = nir_op_fexp2; break; + case Log2: op = nir_op_flog2; break; + case Sqrt: op = nir_op_fsqrt; break; + case InverseSqrt: op = nir_op_frsq; break; + + case Modf: op = nir_op_fmod; break; + case Min: op = nir_op_fmin; break; + case Max: op = nir_op_fmax; break; + case Mix: op = nir_op_flrp; break; + case Step: + val->ssa = nir_sge(&b->nb, src[1], src[0]); + return; + + case FloatBitsToInt: + case FloatBitsToUint: + case IntBitsToFloat: + case UintBitsToFloat: + /* Probably going to be removed from the final version of the spec. */ + val->ssa = src[0]; + return; + + case Fma: op = nir_op_ffma; break; + case Ldexp: op = nir_op_ldexp; break; + + /* Packing/Unpacking functions */ + case PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; + case PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; + case PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; + case PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; + case PackHalf2x16: op = nir_op_pack_half_2x16; break; + case UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; + case UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; + case UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; + case UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; + case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; + + case Length: + val->ssa = build_length(&b->nb, src[0]); + return; + case Distance: + val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + return; + case Normalize: + val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + return; + + case UaddCarry: op = nir_op_uadd_carry; break; + case UsubBorrow: op = nir_op_usub_borrow; break; + case BitfieldExtract: op = nir_op_ubitfield_extract; break; /* TODO */ + case BitfieldInsert: op = nir_op_bitfield_insert; break; + case BitfieldReverse: op = nir_op_bitfield_reverse; break; + case BitCount: op = nir_op_bit_count; break; + case FindLSB: op = nir_op_find_lsb; break; + case FindMSB: op = nir_op_ufind_msb; break; /* TODO */ + + case Clamp: + case Asin: + case Acos: + case Atan: + case Atan2: + case Sinh: + case Cosh: + case Tanh: + case Asinh: + case Acosh: + case Atanh: + case SmoothStep: + case Frexp: + case PackDouble2x32: + case UnpackDouble2x32: + case Cross: + case Ftransform: + case FaceForward: + case Reflect: + case Refract: + case UmulExtended: + case ImulExtended: + default: + unreachable("Unhandled opcode"); + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->type), val->name); + val->ssa = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count) +{ + switch ((enum GLSL450Entrypoint)ext_opcode) { + case Determinant: + case MatrixInverse: + case InterpolateAtCentroid: + case InterpolateAtSample: + case InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSL450Entrypoint)ext_opcode, words, count); + } + + return true; +} diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 20b7f6e1dd1..f1c63ebff13 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -65,11 +65,24 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { - case SpvOpExtInstImport: - /* Do nothing for the moment */ + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } - case SpvOpExtInst: default: unreachable("Unhandled opcode"); } @@ -792,10 +805,13 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSourceExtension: case SpvOpCompileFlag: case SpvOpExtension: - case SpvOpExtInstImport: /* Unhandled, but these are for debug so that's ok. */ break; + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + case SpvOpMemoryModel: assert(w[1] == SpvAddressingModelLogical); assert(w[2] == SpvMemoryModelGLSL450); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 0a07b377e72..fd80dd4e161 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -43,6 +43,7 @@ enum vtn_value_type { vtn_value_type_function, vtn_value_type_block, vtn_value_type_ssa, + vtn_value_type_extension, }; struct vtn_block { @@ -74,6 +75,7 @@ struct vtn_value { struct vtn_function *func; struct vtn_block *block; nir_ssa_def *ssa; + vtn_instruction_handler ext_handler; }; }; @@ -139,3 +141,6 @@ typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); -- cgit v1.2.3 From 7b9c29e440d2ef3d2bd476ebd9ff06586f396da5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:35:30 -0700 Subject: nir/spirv: Make vtn_ssa_value handle constants as well as ssa values --- src/glsl/nir/spirv_to_nir.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index f1c63ebff13..62d377ed243 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -31,7 +31,26 @@ nir_ssa_def * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { - return vtn_value(b, value_id, vtn_value_type_ssa)->ssa; + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: { + assert(glsl_type_is_vector_or_scalar(val->type)); + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[0] = val->constant->value.u[0]; + + nir_builder_instr_insert(&b->nb, &load->instr); + return &load->def; + } + + case vtn_value_type_ssa: + return val->ssa; + default: + unreachable("Invalid type for an SSA value"); + } } static char * -- cgit v1.2.3 From a28f8ad9f1cce99e38c93fa5bf2892056861414c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:36:09 -0700 Subject: nir/spirv: Use the correct length for copying string literals --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 62d377ed243..d4bad887dc1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -57,7 +57,7 @@ static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) { - return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); + return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); } static const uint32_t * -- cgit v1.2.3 From 3a2db9207d5135f9cc2610e78aecaa9204d58641 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:36:31 -0700 Subject: nir/spirv: Set a name on temporary variables --- src/glsl/nir/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d4bad887dc1..3bbf91453fd 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -595,6 +595,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_variable *const_tmp = rzalloc(b->shader, nir_variable); const_tmp->type = dest_type; + const_tmp->name = "const_temp"; const_tmp->data.mode = nir_var_local; const_tmp->data.read_only = true; exec_list_push_tail(&b->impl->locals, &const_tmp->node); -- cgit v1.2.3 From 64bc58a88ee3c0131a7d540b2ff61a0c707563e4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:37:10 -0700 Subject: nir/spirv: Handle control-flow with loops --- src/glsl/nir/spirv_to_nir.c | 168 +++++++++++++++++++++++++++++++----- src/glsl/nir/spirv_to_nir_private.h | 4 +- 2 files changed, 151 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3bbf91453fd..a4f13603dac 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1000,6 +1000,13 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, b->block = NULL; break; + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge_op == SpvOpNop); + b->block->merge_op = opcode; + b->block->merge_block_id = w[1]; + break; + default: /* Continue on as per normal */ return true; @@ -1015,19 +1022,20 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpLabel: { struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; + assert(block->block == NULL); + struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); block->block = nir_cf_node_as_block(tail_node); + assert(exec_list_is_empty(&block->block->instr_list)); break; } case SpvOpLoopMerge: case SpvOpSelectionMerge: - assert(b->merge_block == NULL); - /* TODO: Selection Control */ - b->merge_block = vtn_value(b, w[1], vtn_value_type_block)->block; + /* This is handled by cfg pre-pass and walk_blocks */ break; case SpvOpUndef: @@ -1186,19 +1194,68 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, static void vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, - struct vtn_block *end) + struct vtn_block *break_block, struct vtn_block *cont_block, + struct vtn_block *end_block) { struct vtn_block *block = start; - while (block != end) { + while (block != end_block) { + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + + if (block->block != NULL) { + /* We've already visited this block once before so this is a + * back-edge. Back-edges are only allowed to point to a loop + * merge. + */ + assert(block == cont_block); + return; + } + + b->block = block; vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); - const uint32_t *w = block->branch; - SpvOp branch_op = w[0] & SpvOpCodeMask; switch (branch_op) { case SpvOpBranch: { - assert(vtn_value(b, w[1], vtn_value_type_block)->block == end); - return; + struct vtn_block *branch_block = + vtn_value(b, w[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == end_block) { + return; + } else if (branch_block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + cont_block = branch_block; + break_block = vtn_value(b, branch_block->merge_block_id, + vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); + + struct exec_list *old_list = b->nb.cf_node_list; + + nir_builder_insert_after_cf_list(&b->nb, &loop->body); + vtn_walk_blocks(b, branch_block, break_block, cont_block, NULL); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = break_block; + continue; + } else { + /* TODO: Can this ever happen? */ + block = branch_block; + continue; + } } case SpvOpBranchConditional: { @@ -1207,28 +1264,99 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, w[2], vtn_value_type_block)->block; struct vtn_block *else_block = vtn_value(b, w[3], vtn_value_type_block)->block; - struct vtn_block *merge_block = b->merge_block; nir_if *if_stmt = nir_if_create(b->shader); if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); - struct exec_list *old_list = b->nb.cf_node_list; + if (then_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else if (then_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else { + /* Conventional if statement */ + assert(block->merge_op == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + struct exec_list *old_list = b->nb.cf_node_list; - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); - vtn_walk_blocks(b, then_block, merge_block); + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); + vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); - vtn_walk_blocks(b, else_block, merge_block); + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); + vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = merge_block; + continue; + } - nir_builder_insert_after_cf_list(&b->nb, old_list); - block = merge_block; + /* If we got here then we inserted a predicated break or continue + * above and we need to handle the other case. We already set + * `block` above to indicate what block to visit after the + * predicated break. + */ + + /* It's possible that the other branch is also a break/continue. + * If it is, we handle that here. + */ + if (block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } + + /* If we got here then there was a predicated break/continue but + * the other half of the if has stuff in it. `block` was already + * set above so there is nothing left for us to do. + */ continue; } + case SpvOpReturn: { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpKill: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + return; + } + case SpvOpSwitch: - case SpvOpKill: - case SpvOpReturn: case SpvOpReturnValue: case SpvOpUnreachable: default: @@ -1275,7 +1403,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->impl = nir_function_impl_create(func->overload); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); - vtn_walk_blocks(b, func->start_block, NULL); + vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); } ralloc_free(b); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index fd80dd4e161..d2b364bdfeb 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -47,6 +47,9 @@ enum vtn_value_type { }; struct vtn_block { + /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */ + SpvOp merge_op; + uint32_t merge_block_id; const uint32_t *label; const uint32_t *branch; nir_block *block; @@ -92,7 +95,6 @@ struct vtn_builder { nir_shader *shader; nir_function_impl *impl; struct vtn_block *block; - struct vtn_block *merge_block; unsigned value_id_bound; struct vtn_value *values; -- cgit v1.2.3 From 56f533b3a05141df907a608c504bb1cc3dcf8641 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:54:02 -0700 Subject: nir/spirv: Handle boolean uniforms correctly --- src/glsl/nir/spirv_to_nir.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a4f13603dac..099bbaf42e1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -573,7 +573,16 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &load->instr); val->type = src_type; - val->ssa = &load->dest.ssa; + + if (src->var->data.mode == nir_var_uniform && + glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->ssa = &load->dest.ssa; + } break; } -- cgit v1.2.3 From 036a4b185560a562f2b2a7ae6deb0ab23878090b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 15:33:21 -0700 Subject: nir/spirv: Handle jump-to-loop in a more general way --- src/glsl/nir/spirv_to_nir.c | 46 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 099bbaf42e1..88b0e1bc980 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1208,9 +1208,6 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, { struct vtn_block *block = start; while (block != end_block) { - const uint32_t *w = block->branch; - SpvOp branch_op = w[0] & SpvOpCodeMask; - if (block->block != NULL) { /* We've already visited this block once before so this is a * back-edge. Back-edges are only allowed to point to a loop @@ -1220,6 +1217,31 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } + if (block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + cont_block = block; + break_block = vtn_value(b, block->merge_block_id, + vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); + + struct exec_list *old_list = b->nb.cf_node_list; + + /* Reset the merge_op to prerevent infinite recursion */ + block->merge_op = SpvOpNop; + + nir_builder_insert_after_cf_list(&b->nb, &loop->body); + vtn_walk_blocks(b, block, break_block, cont_block, NULL); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = break_block; + continue; + } + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + b->block = block; vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); @@ -1243,25 +1265,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } else if (branch_block == end_block) { return; - } else if (branch_block->merge_op == SpvOpLoopMerge) { - /* This is the jump into a loop. */ - cont_block = branch_block; - break_block = vtn_value(b, branch_block->merge_block_id, - vtn_value_type_block)->block; - - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); - - struct exec_list *old_list = b->nb.cf_node_list; - - nir_builder_insert_after_cf_list(&b->nb, &loop->body); - vtn_walk_blocks(b, branch_block, break_block, cont_block, NULL); - - nir_builder_insert_after_cf_list(&b->nb, old_list); - block = break_block; - continue; } else { - /* TODO: Can this ever happen? */ block = branch_block; continue; } -- cgit v1.2.3 From 0fa9211d7ff4f8d3e058d2689b5ba05372012539 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 15:36:54 -0700 Subject: nir/spirv: Make the global constants in spirv.h static I've been promissed in a bug that this will be fixed in a future version of the header. However, in the interest of my branch building, I'm adding these changes in myself for the moment. --- src/glsl/nir/spirv.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index 93135c09596..da717ecd342 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -48,13 +48,13 @@ namespace spv { -const int MagicNumber = 0x07230203; -const int Version = 99; +static const int MagicNumber = 0x07230203; +static const int Version = 99; typedef unsigned int Id; -const unsigned int OpCodeMask = 0xFFFF; -const unsigned int WordCountShift = 16; +static const unsigned int OpCodeMask = 0xFFFF; +static const unsigned int WordCountShift = 16; enum SourceLanguage { SourceLanguageUnknown = 0, @@ -677,13 +677,13 @@ enum Op { #ifndef __cplusplus -const int SpvMagicNumber = 0x07230203; -const int SpvVersion = 99; +static const int SpvMagicNumber = 0x07230203; +static const int SpvVersion = 99; typedef unsigned int SpvId; -const unsigned int SpvOpCodeMask = 0xFFFF; -const unsigned int SpvWordCountShift = 16; +static const unsigned int SpvOpCodeMask = 0xFFFF; +static const unsigned int SpvWordCountShift = 16; typedef enum SpvSourceLanguage_ { SpvSourceLanguageUnknown = 0, -- cgit v1.2.3 From a53e7955245334b1b8e47bc94f35e6c68859b10c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:32:58 -0700 Subject: nir/types: Add support for sampler types --- src/glsl/nir/nir_types.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ src/glsl/nir/nir_types.h | 8 ++++++++ 2 files changed, 49 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index a6d35fe6179..35421506545 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -124,6 +124,20 @@ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) return type->fields.structure[index].name; } +glsl_sampler_dim +glsl_get_sampler_dim(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_sampler_dim)type->sampler_dimensionality; +} + +glsl_base_type +glsl_get_sampler_result_type(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_base_type)type->sampler_type; +} + bool glsl_type_is_void(const glsl_type *type) { @@ -154,6 +168,26 @@ glsl_type_is_matrix(const struct glsl_type *type) return type->is_matrix(); } +bool +glsl_type_is_sampler(const struct glsl_type *type) +{ + return type->is_sampler(); +} + +bool +glsl_sampler_type_is_shadow(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_shadow; +} + +bool +glsl_sampler_type_is_array(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_array; +} + const glsl_type * glsl_void_type(void) { @@ -223,6 +257,13 @@ glsl_struct_type(const glsl_struct_field *fields, return glsl_type::get_record_instance(fields, num_fields, name); } +const struct glsl_type * +glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, + enum glsl_base_type base_type) +{ + return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type); +} + const glsl_type * glsl_function_type(const glsl_type *return_type, const glsl_function_param *params, unsigned num_params) diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index f19f0e5db5d..ceb131c9f47 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -68,12 +68,17 @@ unsigned glsl_get_length(const struct glsl_type *type); const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type); +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); bool glsl_type_is_void(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); +bool glsl_type_is_sampler(const struct glsl_type *type); +bool glsl_sampler_type_is_shadow(const struct glsl_type *type); +bool glsl_sampler_type_is_array(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); @@ -91,6 +96,9 @@ const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, unsigned num_fields, const char *name); +const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, + bool is_shadow, bool is_array, + enum glsl_base_type base_type); const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); -- cgit v1.2.3 From d6f52dfb3e9e40a085ba012e69f8786462a1c3c9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:33:29 -0700 Subject: nir/spirv: Move Exp and Log to the list of currently unhandled ALU ops NIR doesn't have the native opcodes for them anymore --- src/glsl/nir/spirv_glsl450_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 240ff012fe1..3b9d0940aad 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -170,8 +170,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_fcos(&b->nb, src[0])); return; case Pow: op = nir_op_fpow; break; - case Exp: op = nir_op_fexp; break; - case Log: op = nir_op_flog; break; case Exp2: op = nir_op_fexp2; break; case Log2: op = nir_op_flog2; break; case Sqrt: op = nir_op_fsqrt; break; @@ -227,6 +225,8 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case FindLSB: op = nir_op_find_lsb; break; case FindMSB: op = nir_op_ufind_msb; break; /* TODO */ + case Exp: + case Log: case Clamp: case Asin: case Acos: -- cgit v1.2.3 From 4e44dcc3122d9f5d79cedeff1b506cbfff36b968 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:34:15 -0700 Subject: nir/spirv: Add initial support for samplers --- src/glsl/nir/spirv_to_nir.c | 155 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 88b0e1bc980..1a789ee4786 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -256,7 +256,36 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, */ return vtn_value(b, args[1], vtn_value_type_type)->type; - case SpvOpTypeSampler: + case SpvOpTypeSampler: { + const struct glsl_type *sampled_type = + vtn_value(b, args[0], vtn_value_type_type)->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)args[1]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + /* TODO: Handle the various texture image/filter options */ + (void)args[2]; + + bool is_array = args[3]; + bool is_shadow = args[4]; + + assert(args[5] == 0 && "FIXME: Handl multi-sampled textures"); + + return glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } + case SpvOpTypeRuntimeArray: case SpvOpTypeOpaque: case SpvOpTypeEvent: @@ -559,10 +588,16 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } case SpvOpLoad: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; const struct glsl_type *src_type = nir_deref_tail(&src->deref)->type; + + if (glsl_get_base_type(src_type) == GLSL_TYPE_SAMPLER) { + vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + return; + } + assert(glsl_type_is_vector_or_scalar(src_type)); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); @@ -635,11 +670,125 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa); + src.src_type = type; + return src; +} + static void vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + nir_deref_var *sampler = vtn_value(b, w[3], vtn_value_type_deref)->deref; + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned coord_components = 0; + switch (opcode) { + case SpvOpTextureSample: + case SpvOpTextureSampleDref: + case SpvOpTextureSampleLod: + case SpvOpTextureSampleProj: + case SpvOpTextureSampleGrad: + case SpvOpTextureSampleOffset: + case SpvOpTextureSampleProjLod: + case SpvOpTextureSampleProjGrad: + case SpvOpTextureSampleLodOffset: + case SpvOpTextureSampleProjOffset: + case SpvOpTextureSampleGradOffset: + case SpvOpTextureSampleProjLodOffset: + case SpvOpTextureSampleProjGradOffset: + case SpvOpTextureFetchTexelLod: + case SpvOpTextureFetchTexelOffset: + case SpvOpTextureFetchSample: + case SpvOpTextureFetchTexel: + case SpvOpTextureGather: + case SpvOpTextureGatherOffset: + case SpvOpTextureGatherOffsets: + case SpvOpTextureQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_value *coord = vtn_value(b, w[4], vtn_value_type_ssa); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->ssa); + p->src_type = nir_tex_src_coord; + p++; + break; + } + default: + break; + } + + nir_texop texop; + switch (opcode) { + case SpvOpTextureSample: + texop = nir_texop_tex; + + if (count == 6) { + texop = nir_texop_txb; + *p++ = vtn_tex_src(b, w[5], nir_tex_src_bias); + } + break; + + case SpvOpTextureSampleDref: + case SpvOpTextureSampleLod: + case SpvOpTextureSampleProj: + case SpvOpTextureSampleGrad: + case SpvOpTextureSampleOffset: + case SpvOpTextureSampleProjLod: + case SpvOpTextureSampleProjGrad: + case SpvOpTextureSampleLodOffset: + case SpvOpTextureSampleProjOffset: + case SpvOpTextureSampleGradOffset: + case SpvOpTextureSampleProjLodOffset: + case SpvOpTextureSampleProjGradOffset: + case SpvOpTextureFetchTexelLod: + case SpvOpTextureFetchTexelOffset: + case SpvOpTextureFetchSample: + case SpvOpTextureFetchTexel: + case SpvOpTextureGather: + case SpvOpTextureGatherOffset: + case SpvOpTextureGatherOffsets: + case SpvOpTextureQuerySizeLod: + case SpvOpTextureQuerySize: + case SpvOpTextureQueryLod: + case SpvOpTextureQueryLevels: + case SpvOpTextureQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + + const struct glsl_type *sampler_type = nir_deref_tail(&sampler->deref)->type; + instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + + switch (glsl_get_sampler_result_type(sampler_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + instr->op = texop; + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + instr->coord_components = coord_components; + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + + instr->sampler = sampler; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); } static void -- cgit v1.2.3 From a63952510d64c09720614b971e461d7f8ed17c7a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:34:32 -0700 Subject: nir/spirv: Don't assert that the current block is empty It's possible that someone will give us SPIR-V code in which someone needlessly branches to new blocks. We should handle that ok now. --- src/glsl/nir/spirv_to_nir.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1a789ee4786..1fc1b8bc5dc 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1186,8 +1186,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); block->block = nir_cf_node_as_block(tail_node); - - assert(exec_list_is_empty(&block->block->instr_list)); break; } -- cgit v1.2.3 From 75cb85c56aea72f8a044a25e9a8ffd5621a4129f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 22:05:09 -0700 Subject: vk: Add missing VKAPI for vkQueueRemoveMemReferences --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e2871fa43ba..0a80a201d03 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -599,7 +599,7 @@ VkResult VKAPI vkQueueAddMemReferences( return VK_SUCCESS; } -VkResult vkQueueRemoveMemReferences( +VkResult VKAPI vkQueueRemoveMemReferences( VkQueue queue, uint32_t count, const VkDeviceMemory* pMems) -- cgit v1.2.3 From 454345da1e8d22f3612aa3fec6b1cf3027845d48 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 16:33:48 -0700 Subject: vk: Add script for generating ifunc entry points This lets us generate a hash table for vkGetProcAddress and lets us call public functions internally without the public entrypoint overhead. --- .gitignore | 2 + src/vulkan/Makefile.am | 12 +++ src/vulkan/device.c | 186 +++++++++++++++++----------------- src/vulkan/image.c | 10 +- src/vulkan/intel.c | 4 +- src/vulkan/meta.c | 22 ++-- src/vulkan/pipeline.c | 16 +-- src/vulkan/private.h | 7 +- src/vulkan/vk_gen.py | 266 +++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 403 insertions(+), 122 deletions(-) create mode 100644 src/vulkan/vk_gen.py (limited to 'src') diff --git a/.gitignore b/.gitignore index 21aa35cd36d..e395fd18803 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ manifest.txt Makefile Makefile.in .install-mesa-links +entrypoints.c +entrypoints.h diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index af2cde5dbbe..8b53a089615 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -62,8 +62,20 @@ libvulkan_la_SOURCES = \ image.c \ meta.c \ intel.c \ + entrypoints.c \ + entrypoints.h \ compiler.cpp +BUILT_SOURCES = entrypoints.h entrypoints.c + +entrypoints.h : $(vulkan_include_HEADERS) vk_gen.py + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) vk_gen.py header > $@ + +entrypoints.c : $(vulkan_include_HEADERS) vk_gen.py + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) vk_gen.py code > $@ + +CLEANFILES = entrypoints.h entrypoints.c + bin_PROGRAMS = vk vk_SOURCES = vk.c diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0a80a201d03..d9398e05a24 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -114,7 +114,7 @@ static const VkAllocCallbacks default_alloc_callbacks = { .pfnFree = default_free }; -VkResult VKAPI vkCreateInstance( +VkResult anv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance) { @@ -150,7 +150,7 @@ VkResult VKAPI vkCreateInstance( return VK_SUCCESS; } -VkResult VKAPI vkDestroyInstance( +VkResult anv_DestroyInstance( VkInstance _instance) { struct anv_instance *instance = (struct anv_instance *) _instance; @@ -160,7 +160,7 @@ VkResult VKAPI vkDestroyInstance( return VK_SUCCESS; } -VkResult VKAPI vkEnumeratePhysicalDevices( +VkResult anv_EnumeratePhysicalDevices( VkInstance _instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices) @@ -174,7 +174,7 @@ VkResult VKAPI vkEnumeratePhysicalDevices( return VK_SUCCESS; } -VkResult VKAPI vkGetPhysicalDeviceInfo( +VkResult anv_GetPhysicalDeviceInfo( VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, @@ -252,7 +252,7 @@ void * vkGetProcAddr( VkPhysicalDevice physicalDevice, const char* pName) { - return NULL; + return anv_lookup_entrypoint(pName); } static void @@ -275,7 +275,7 @@ parse_debug_flags(struct anv_device *device) } } -VkResult VKAPI vkCreateDevice( +VkResult anv_CreateDevice( VkPhysicalDevice _physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice) @@ -337,7 +337,7 @@ VkResult VKAPI vkCreateDevice( return vk_error(VK_ERROR_UNAVAILABLE); } -VkResult VKAPI vkDestroyDevice( +VkResult anv_DestroyDevice( VkDevice _device) { struct anv_device *device = (struct anv_device *) _device; @@ -358,7 +358,7 @@ VkResult VKAPI vkDestroyDevice( return VK_SUCCESS; } -VkResult VKAPI vkGetGlobalExtensionInfo( +VkResult anv_GetGlobalExtensionInfo( VkExtensionInfoType infoType, uint32_t extensionIndex, size_t* pDataSize, @@ -381,7 +381,7 @@ VkResult VKAPI vkGetGlobalExtensionInfo( } } -VkResult VKAPI vkGetPhysicalDeviceExtensionInfo( +VkResult anv_GetPhysicalDeviceExtensionInfo( VkPhysicalDevice physicalDevice, VkExtensionInfoType infoType, uint32_t extensionIndex, @@ -405,7 +405,7 @@ VkResult VKAPI vkGetPhysicalDeviceExtensionInfo( } } -VkResult VKAPI vkEnumerateLayers( +VkResult anv_EnumerateLayers( VkPhysicalDevice physicalDevice, size_t maxStringSize, size_t* pLayerCount, @@ -417,7 +417,7 @@ VkResult VKAPI vkEnumerateLayers( return VK_SUCCESS; } -VkResult VKAPI vkGetDeviceQueue( +VkResult anv_GetDeviceQueue( VkDevice _device, uint32_t queueNodeIndex, uint32_t queueIndex, @@ -559,7 +559,7 @@ anv_batch_emit_reloc(struct anv_batch *batch, location - batch->bo.map, bo, delta); } -VkResult VKAPI vkQueueSubmit( +VkResult anv_QueueSubmit( VkQueue _queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, @@ -591,7 +591,7 @@ VkResult VKAPI vkQueueSubmit( return VK_SUCCESS; } -VkResult VKAPI vkQueueAddMemReferences( +VkResult anv_QueueAddMemReferences( VkQueue queue, uint32_t count, const VkDeviceMemory* pMems) @@ -599,7 +599,7 @@ VkResult VKAPI vkQueueAddMemReferences( return VK_SUCCESS; } -VkResult VKAPI vkQueueRemoveMemReferences( +VkResult anv_QueueRemoveMemReferences( VkQueue queue, uint32_t count, const VkDeviceMemory* pMems) @@ -607,7 +607,7 @@ VkResult VKAPI vkQueueRemoveMemReferences( return VK_SUCCESS; } -VkResult VKAPI vkQueueWaitIdle( +VkResult anv_QueueWaitIdle( VkQueue _queue) { struct anv_queue *queue = (struct anv_queue *) _queue; @@ -615,7 +615,7 @@ VkResult VKAPI vkQueueWaitIdle( return vkDeviceWaitIdle((VkDevice) queue->device); } -VkResult VKAPI vkDeviceWaitIdle( +VkResult anv_DeviceWaitIdle( VkDevice _device) { struct anv_device *device = (struct anv_device *) _device; @@ -717,7 +717,7 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) return VK_SUCCESS; } -VkResult VKAPI vkAllocMemory( +VkResult anv_AllocMemory( VkDevice _device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem) @@ -747,7 +747,7 @@ VkResult VKAPI vkAllocMemory( return result; } -VkResult VKAPI vkFreeMemory( +VkResult anv_FreeMemory( VkDevice _device, VkDeviceMemory _mem) { @@ -765,7 +765,7 @@ VkResult VKAPI vkFreeMemory( return VK_SUCCESS; } -VkResult VKAPI vkSetMemoryPriority( +VkResult anv_SetMemoryPriority( VkDevice device, VkDeviceMemory mem, VkMemoryPriority priority) @@ -773,7 +773,7 @@ VkResult VKAPI vkSetMemoryPriority( return VK_SUCCESS; } -VkResult VKAPI vkMapMemory( +VkResult anv_MapMemory( VkDevice _device, VkDeviceMemory _mem, VkDeviceSize offset, @@ -798,7 +798,7 @@ VkResult VKAPI vkMapMemory( return VK_SUCCESS; } -VkResult VKAPI vkUnmapMemory( +VkResult anv_UnmapMemory( VkDevice _device, VkDeviceMemory _mem) { @@ -809,7 +809,7 @@ VkResult VKAPI vkUnmapMemory( return VK_SUCCESS; } -VkResult VKAPI vkFlushMappedMemory( +VkResult anv_FlushMappedMemory( VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, @@ -820,7 +820,7 @@ VkResult VKAPI vkFlushMappedMemory( return VK_SUCCESS; } -VkResult VKAPI vkPinSystemMemory( +VkResult anv_PinSystemMemory( VkDevice device, const void* pSysMem, size_t memSize, @@ -829,7 +829,7 @@ VkResult VKAPI vkPinSystemMemory( return VK_SUCCESS; } -VkResult VKAPI vkGetMultiDeviceCompatibility( +VkResult anv_GetMultiDeviceCompatibility( VkPhysicalDevice physicalDevice0, VkPhysicalDevice physicalDevice1, VkPhysicalDeviceCompatibilityInfo* pInfo) @@ -837,7 +837,7 @@ VkResult VKAPI vkGetMultiDeviceCompatibility( return VK_UNSUPPORTED; } -VkResult VKAPI vkOpenSharedMemory( +VkResult anv_OpenSharedMemory( VkDevice device, const VkMemoryOpenInfo* pOpenInfo, VkDeviceMemory* pMem) @@ -845,7 +845,7 @@ VkResult VKAPI vkOpenSharedMemory( return VK_UNSUPPORTED; } -VkResult VKAPI vkOpenSharedSemaphore( +VkResult anv_OpenSharedSemaphore( VkDevice device, const VkSemaphoreOpenInfo* pOpenInfo, VkSemaphore* pSemaphore) @@ -853,7 +853,7 @@ VkResult VKAPI vkOpenSharedSemaphore( return VK_UNSUPPORTED; } -VkResult VKAPI vkOpenPeerMemory( +VkResult anv_OpenPeerMemory( VkDevice device, const VkPeerMemoryOpenInfo* pOpenInfo, VkDeviceMemory* pMem) @@ -861,7 +861,7 @@ VkResult VKAPI vkOpenPeerMemory( return VK_UNSUPPORTED; } -VkResult VKAPI vkOpenPeerImage( +VkResult anv_OpenPeerImage( VkDevice device, const VkPeerImageOpenInfo* pOpenInfo, VkImage* pImage, @@ -939,7 +939,7 @@ static VkResult (*anv_object_destructors[])(struct anv_device *device, [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor }; -VkResult VKAPI vkDestroyObject( +VkResult anv_DestroyObject( VkDevice _device, VkObjectType objType, VkObject object) @@ -988,7 +988,7 @@ fill_memory_requirements( } } -VkResult VKAPI vkGetObjectInfo( +VkResult anv_GetObjectInfo( VkDevice _device, VkObjectType objType, VkObject object, @@ -1013,7 +1013,7 @@ VkResult VKAPI vkGetObjectInfo( } -VkResult VKAPI vkQueueBindObjectMemory( +VkResult anv_QueueBindObjectMemory( VkQueue queue, VkObjectType objType, VkObject object, @@ -1043,7 +1043,7 @@ VkResult VKAPI vkQueueBindObjectMemory( return VK_SUCCESS; } -VkResult VKAPI vkQueueBindObjectMemoryRange( +VkResult anv_QueueBindObjectMemoryRange( VkQueue queue, VkObjectType objType, VkObject object, @@ -1056,7 +1056,7 @@ VkResult VKAPI vkQueueBindObjectMemoryRange( stub_return(VK_UNSUPPORTED); } -VkResult vkQueueBindImageMemoryRange( +VkResult anv_QueueBindImageMemoryRange( VkQueue queue, VkImage image, uint32_t allocationIdx, @@ -1067,7 +1067,7 @@ VkResult vkQueueBindImageMemoryRange( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkCreateFence( +VkResult anv_CreateFence( VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence) @@ -1075,7 +1075,7 @@ VkResult VKAPI vkCreateFence( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkResetFences( +VkResult anv_ResetFences( VkDevice device, uint32_t fenceCount, VkFence* pFences) @@ -1083,14 +1083,14 @@ VkResult VKAPI vkResetFences( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkGetFenceStatus( +VkResult anv_GetFenceStatus( VkDevice device, VkFence fence) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkWaitForFences( +VkResult anv_WaitForFences( VkDevice device, uint32_t fenceCount, const VkFence* pFences, @@ -1102,7 +1102,7 @@ VkResult VKAPI vkWaitForFences( // Queue semaphore functions -VkResult VKAPI vkCreateSemaphore( +VkResult anv_CreateSemaphore( VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore) @@ -1110,14 +1110,14 @@ VkResult VKAPI vkCreateSemaphore( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkQueueSignalSemaphore( +VkResult anv_QueueSignalSemaphore( VkQueue queue, VkSemaphore semaphore) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkQueueWaitSemaphore( +VkResult anv_QueueWaitSemaphore( VkQueue queue, VkSemaphore semaphore) { @@ -1126,7 +1126,7 @@ VkResult VKAPI vkQueueWaitSemaphore( // Event functions -VkResult VKAPI vkCreateEvent( +VkResult anv_CreateEvent( VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent) @@ -1134,21 +1134,21 @@ VkResult VKAPI vkCreateEvent( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkGetEventStatus( +VkResult anv_GetEventStatus( VkDevice device, VkEvent event) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkSetEvent( +VkResult anv_SetEvent( VkDevice device, VkEvent event) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkResetEvent( +VkResult anv_ResetEvent( VkDevice device, VkEvent event) { @@ -1163,7 +1163,7 @@ struct anv_query_pool { struct anv_bo bo; }; -VkResult VKAPI vkCreateQueryPool( +VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool) @@ -1194,7 +1194,7 @@ VkResult VKAPI vkCreateQueryPool( return result; } -VkResult VKAPI vkGetQueryPoolResults( +VkResult anv_GetQueryPoolResults( VkDevice device, VkQueryPool queryPool, uint32_t startQuery, @@ -1208,7 +1208,7 @@ VkResult VKAPI vkGetQueryPoolResults( // Format capabilities -VkResult VKAPI vkGetFormatInfo( +VkResult anv_GetFormatInfo( VkDevice device, VkFormat format, VkFormatInfoType infoType, @@ -1220,7 +1220,7 @@ VkResult VKAPI vkGetFormatInfo( // Buffer functions -VkResult VKAPI vkCreateBuffer( +VkResult anv_CreateBuffer( VkDevice _device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer) @@ -1246,7 +1246,7 @@ VkResult VKAPI vkCreateBuffer( // Buffer view functions -VkResult VKAPI vkCreateBufferView( +VkResult anv_CreateBufferView( VkDevice _device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView) @@ -1320,7 +1320,7 @@ VkResult VKAPI vkCreateBufferView( // Sampler functions -VkResult VKAPI vkCreateSampler( +VkResult anv_CreateSampler( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler) @@ -1410,7 +1410,7 @@ VkResult VKAPI vkCreateSampler( // Descriptor set functions -VkResult VKAPI vkCreateDescriptorSetLayout( +VkResult anv_CreateDescriptorSetLayout( VkDevice _device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout) @@ -1537,21 +1537,21 @@ VkResult VKAPI vkCreateDescriptorSetLayout( return VK_SUCCESS; } -VkResult VKAPI vkBeginDescriptorPoolUpdate( +VkResult anv_BeginDescriptorPoolUpdate( VkDevice device, VkDescriptorUpdateMode updateMode) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkEndDescriptorPoolUpdate( +VkResult anv_EndDescriptorPoolUpdate( VkDevice device, VkCmdBuffer cmd) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkCreateDescriptorPool( +VkResult anv_CreateDescriptorPool( VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, @@ -1561,14 +1561,14 @@ VkResult VKAPI vkCreateDescriptorPool( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkResetDescriptorPool( +VkResult anv_ResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool) { stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkAllocDescriptorSets( +VkResult anv_AllocDescriptorSets( VkDevice _device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, @@ -1600,7 +1600,7 @@ VkResult VKAPI vkAllocDescriptorSets( return VK_UNSUPPORTED; } -void VKAPI vkClearDescriptorSets( +void anv_ClearDescriptorSets( VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, @@ -1609,7 +1609,7 @@ void VKAPI vkClearDescriptorSets( stub(); } -void VKAPI vkUpdateDescriptors( +void anv_UpdateDescriptors( VkDevice _device, VkDescriptorSet descriptorSet, uint32_t updateCount, @@ -1692,7 +1692,7 @@ clamp_int64(int64_t x, int64_t min, int64_t max) return max; } -VkResult VKAPI vkCreateDynamicViewportState( +VkResult anv_CreateDynamicViewportState( VkDevice _device, const VkDynamicVpStateCreateInfo* pCreateInfo, VkDynamicVpState* pState) @@ -1777,7 +1777,7 @@ VkResult VKAPI vkCreateDynamicViewportState( return VK_SUCCESS; } -VkResult VKAPI vkCreateDynamicRasterState( +VkResult anv_CreateDynamicRasterState( VkDevice _device, const VkDynamicRsStateCreateInfo* pCreateInfo, VkDynamicRsState* pState) @@ -1813,7 +1813,7 @@ VkResult VKAPI vkCreateDynamicRasterState( return VK_SUCCESS; } -VkResult VKAPI vkCreateDynamicColorBlendState( +VkResult anv_CreateDynamicColorBlendState( VkDevice _device, const VkDynamicCbStateCreateInfo* pCreateInfo, VkDynamicCbState* pState) @@ -1833,7 +1833,7 @@ VkResult VKAPI vkCreateDynamicColorBlendState( return VK_SUCCESS; } -VkResult VKAPI vkCreateDynamicDepthStencilState( +VkResult anv_CreateDynamicDepthStencilState( VkDevice device, const VkDynamicDsStateCreateInfo* pCreateInfo, VkDynamicDsState* pState) @@ -1843,7 +1843,7 @@ VkResult VKAPI vkCreateDynamicDepthStencilState( // Command buffer functions -VkResult VKAPI vkCreateCommandBuffer( +VkResult anv_CreateCommandBuffer( VkDevice _device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer) @@ -1905,7 +1905,7 @@ VkResult VKAPI vkCreateCommandBuffer( return result; } -VkResult VKAPI vkBeginCommandBuffer( +VkResult anv_BeginCommandBuffer( VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo) { @@ -2049,7 +2049,7 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, } } -VkResult VKAPI vkEndCommandBuffer( +VkResult anv_EndCommandBuffer( VkCmdBuffer cmdBuffer) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; @@ -2099,7 +2099,7 @@ VkResult VKAPI vkEndCommandBuffer( return VK_SUCCESS; } -VkResult VKAPI vkResetCommandBuffer( +VkResult anv_ResetCommandBuffer( VkCmdBuffer cmdBuffer) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; @@ -2111,7 +2111,7 @@ VkResult VKAPI vkResetCommandBuffer( // Command buffer building functions -void VKAPI vkCmdBindPipeline( +void anv_CmdBindPipeline( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) @@ -2122,7 +2122,7 @@ void VKAPI vkCmdBindPipeline( cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; } -void VKAPI vkCmdBindDynamicStateObject( +void anv_CmdBindDynamicStateObject( VkCmdBuffer cmdBuffer, VkStateBindPoint stateBindPoint, VkDynamicStateObject dynamicState) @@ -2155,7 +2155,7 @@ void VKAPI vkCmdBindDynamicStateObject( }; } -void VKAPI vkCmdBindDescriptorSets( +void anv_CmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t firstSet, @@ -2205,7 +2205,7 @@ void VKAPI vkCmdBindDescriptorSets( cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } -void VKAPI vkCmdBindIndexBuffer( +void anv_CmdBindIndexBuffer( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -2227,7 +2227,7 @@ void VKAPI vkCmdBindIndexBuffer( .BufferSize = buffer->size - offset); } -void VKAPI vkCmdBindVertexBuffers( +void anv_CmdBindVertexBuffers( VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, @@ -2375,7 +2375,7 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->dirty = 0; } -void VKAPI vkCmdDraw( +void anv_CmdDraw( VkCmdBuffer cmdBuffer, uint32_t firstVertex, uint32_t vertexCount, @@ -2395,7 +2395,7 @@ void VKAPI vkCmdDraw( .BaseVertexLocation = 0); } -void VKAPI vkCmdDrawIndexed( +void anv_CmdDrawIndexed( VkCmdBuffer cmdBuffer, uint32_t firstIndex, uint32_t indexCount, @@ -2441,7 +2441,7 @@ anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_3DPRIM_START_INSTANCE 0x243C #define GEN7_3DPRIM_BASE_VERTEX 0x2440 -void VKAPI vkCmdDrawIndirect( +void anv_CmdDrawIndirect( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -2466,7 +2466,7 @@ void VKAPI vkCmdDrawIndirect( .VertexAccessType = SEQUENTIAL); } -void VKAPI vkCmdDrawIndexedIndirect( +void anv_CmdDrawIndexedIndirect( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -2491,7 +2491,7 @@ void VKAPI vkCmdDrawIndexedIndirect( .VertexAccessType = RANDOM); } -void VKAPI vkCmdDispatch( +void anv_CmdDispatch( VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, @@ -2500,7 +2500,7 @@ void VKAPI vkCmdDispatch( stub(); } -void VKAPI vkCmdDispatchIndirect( +void anv_CmdDispatchIndirect( VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset) @@ -2508,7 +2508,7 @@ void VKAPI vkCmdDispatchIndirect( stub(); } -void VKAPI vkCmdSetEvent( +void anv_CmdSetEvent( VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent) @@ -2516,7 +2516,7 @@ void VKAPI vkCmdSetEvent( stub(); } -void VKAPI vkCmdResetEvent( +void anv_CmdResetEvent( VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent) @@ -2524,7 +2524,7 @@ void VKAPI vkCmdResetEvent( stub(); } -void VKAPI vkCmdWaitEvents( +void anv_CmdWaitEvents( VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t eventCount, @@ -2535,7 +2535,7 @@ void VKAPI vkCmdWaitEvents( stub(); } -void VKAPI vkCmdPipelineBarrier( +void anv_CmdPipelineBarrier( VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t pipeEventCount, @@ -2556,7 +2556,7 @@ anv_batch_emit_ps_depth_count(struct anv_batch *batch, .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ } -void VKAPI vkCmdBeginQuery( +void anv_CmdBeginQuery( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, @@ -2578,7 +2578,7 @@ void VKAPI vkCmdBeginQuery( } } -void VKAPI vkCmdEndQuery( +void anv_CmdEndQuery( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot) @@ -2599,7 +2599,7 @@ void VKAPI vkCmdEndQuery( } } -void VKAPI vkCmdResetQueryPool( +void anv_CmdResetQueryPool( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, @@ -2610,7 +2610,7 @@ void VKAPI vkCmdResetQueryPool( #define TIMESTAMP 0x44070 -void VKAPI vkCmdWriteTimestamp( +void anv_CmdWriteTimestamp( VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, @@ -2640,7 +2640,7 @@ void VKAPI vkCmdWriteTimestamp( } } -void VKAPI vkCmdCopyQueryPoolResults( +void anv_CmdCopyQueryPoolResults( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, @@ -2653,7 +2653,7 @@ void VKAPI vkCmdCopyQueryPoolResults( stub(); } -void VKAPI vkCmdInitAtomicCounters( +void anv_CmdInitAtomicCounters( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t startCounter, @@ -2663,7 +2663,7 @@ void VKAPI vkCmdInitAtomicCounters( stub(); } -void VKAPI vkCmdLoadAtomicCounters( +void anv_CmdLoadAtomicCounters( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t startCounter, @@ -2674,7 +2674,7 @@ void VKAPI vkCmdLoadAtomicCounters( stub(); } -void VKAPI vkCmdSaveAtomicCounters( +void anv_CmdSaveAtomicCounters( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t startCounter, @@ -2685,7 +2685,7 @@ void VKAPI vkCmdSaveAtomicCounters( stub(); } -VkResult VKAPI vkCreateFramebuffer( +VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer) @@ -2742,7 +2742,7 @@ VkResult VKAPI vkCreateFramebuffer( return VK_SUCCESS; } -VkResult VKAPI vkCreateRenderPass( +VkResult anv_CreateRenderPass( VkDevice _device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass) @@ -2793,7 +2793,7 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } -void VKAPI vkCmdBeginRenderPass( +void anv_CmdBeginRenderPass( VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin) { @@ -2819,7 +2819,7 @@ void VKAPI vkCmdBeginRenderPass( anv_cmd_buffer_clear(cmd_buffer, pass); } -void VKAPI vkCmdEndRenderPass( +void anv_CmdEndRenderPass( VkCmdBuffer cmdBuffer, VkRenderPass renderPass) { diff --git a/src/vulkan/image.c b/src/vulkan/image.c index aae9fbf0e8c..812fba88b9f 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -224,7 +224,7 @@ static const struct anv_tile_mode_info { [WMAJOR] = { 128, 32 } }; -VkResult VKAPI vkCreateImage( +VkResult anv_CreateImage( VkDevice _device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage) @@ -279,7 +279,7 @@ VkResult VKAPI vkCreateImage( return VK_SUCCESS; } -VkResult VKAPI vkGetImageSubresourceInfo( +VkResult anv_GetImageSubresourceInfo( VkDevice device, VkImage image, const VkImageSubresource* pSubresource, @@ -342,7 +342,7 @@ create_surface_state(struct anv_device *device, return state; } -VkResult VKAPI vkCreateImageView( +VkResult anv_CreateImageView( VkDevice _device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView) @@ -373,7 +373,7 @@ VkResult VKAPI vkCreateImageView( return VK_SUCCESS; } -VkResult VKAPI vkCreateColorAttachmentView( +VkResult anv_CreateColorAttachmentView( VkDevice _device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView) @@ -402,7 +402,7 @@ VkResult VKAPI vkCreateColorAttachmentView( return VK_SUCCESS; } -VkResult VKAPI vkCreateDepthStencilView( +VkResult anv_CreateDepthStencilView( VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView) diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c index 52fadb1ab06..fbfed596397 100644 --- a/src/vulkan/intel.c +++ b/src/vulkan/intel.c @@ -29,9 +29,7 @@ #include "private.h" -#include - -VkResult VKAPI vkCreateDmaBufImageINTEL( +VkResult anv_CreateDmaBufImageINTEL( VkDevice _device, const VkDmaBufImageCreateInfo* pCreateInfo, VkDeviceMemory* pMem, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index aa7b16aee34..c3679464151 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -628,7 +628,7 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_restore(cmd_buffer, saved_state); } -void VKAPI vkCmdCopyBuffer( +void anv_CmdCopyBuffer( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, @@ -638,7 +638,7 @@ void VKAPI vkCmdCopyBuffer( stub(); } -void VKAPI vkCmdCopyImage( +void anv_CmdCopyImage( VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -650,7 +650,7 @@ void VKAPI vkCmdCopyImage( stub(); } -void VKAPI vkCmdBlitImage( +void anv_CmdBlitImage( VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -662,7 +662,7 @@ void VKAPI vkCmdBlitImage( stub(); } -void VKAPI vkCmdCopyBufferToImage( +void anv_CmdCopyBufferToImage( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkImage destImage, @@ -673,7 +673,7 @@ void VKAPI vkCmdCopyBufferToImage( stub(); } -void VKAPI vkCmdCopyImageToBuffer( +void anv_CmdCopyImageToBuffer( VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -764,7 +764,7 @@ void VKAPI vkCmdCopyImageToBuffer( meta_finish_blit(cmd_buffer, &saved_state); } -void VKAPI vkCmdCloneImageData( +void anv_CmdCloneImageData( VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -774,7 +774,7 @@ void VKAPI vkCmdCloneImageData( stub(); } -void VKAPI vkCmdUpdateBuffer( +void anv_CmdUpdateBuffer( VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, @@ -784,7 +784,7 @@ void VKAPI vkCmdUpdateBuffer( stub(); } -void VKAPI vkCmdFillBuffer( +void anv_CmdFillBuffer( VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, @@ -794,7 +794,7 @@ void VKAPI vkCmdFillBuffer( stub(); } -void VKAPI vkCmdClearColorImage( +void anv_CmdClearColorImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, @@ -805,7 +805,7 @@ void VKAPI vkCmdClearColorImage( stub(); } -void VKAPI vkCmdClearDepthStencil( +void anv_CmdClearDepthStencil( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, @@ -817,7 +817,7 @@ void VKAPI vkCmdClearDepthStencil( stub(); } -void VKAPI vkCmdResolveImage( +void anv_CmdResolveImage( VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 60389e4bbba..6711d5e349f 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -31,7 +31,7 @@ // Shader functions -VkResult VKAPI vkCreateShader( +VkResult anv_CreateShader( VkDevice _device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader) @@ -214,7 +214,7 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, } -VkResult VKAPI vkCreateGraphicsPipeline( +VkResult anv_CreateGraphicsPipeline( VkDevice device, const VkGraphicsPipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) @@ -506,7 +506,7 @@ anv_pipeline_destroy(struct anv_pipeline *pipeline) return VK_SUCCESS; } -VkResult VKAPI vkCreateGraphicsPipelineDerivative( +VkResult anv_CreateGraphicsPipelineDerivative( VkDevice device, const VkGraphicsPipelineCreateInfo* pCreateInfo, VkPipeline basePipeline, @@ -515,7 +515,7 @@ VkResult VKAPI vkCreateGraphicsPipelineDerivative( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkCreateComputePipeline( +VkResult anv_CreateComputePipeline( VkDevice device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) @@ -523,7 +523,7 @@ VkResult VKAPI vkCreateComputePipeline( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkStorePipeline( +VkResult anv_StorePipeline( VkDevice device, VkPipeline pipeline, size_t* pDataSize, @@ -532,7 +532,7 @@ VkResult VKAPI vkStorePipeline( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkLoadPipeline( +VkResult anv_LoadPipeline( VkDevice device, size_t dataSize, const void* pData, @@ -541,7 +541,7 @@ VkResult VKAPI vkLoadPipeline( stub_return(VK_UNSUPPORTED); } -VkResult VKAPI vkLoadPipelineDerivative( +VkResult anv_LoadPipelineDerivative( VkDevice device, size_t dataSize, const void* pData, @@ -553,7 +553,7 @@ VkResult VKAPI vkLoadPipelineDerivative( // Pipeline layout functions -VkResult VKAPI vkCreatePipelineLayout( +VkResult anv_CreatePipelineLayout( VkDevice _device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout) diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 554bd005a34..cf5b88f6a68 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -35,9 +35,9 @@ #define VK_PROTOTYPES #include +#include -#undef VKAPI -#define VKAPI __attribute__ ((visibility ("default"))) +#include "entrypoints.h" #include "brw_context.h" @@ -689,6 +689,9 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer); +void * +anv_lookup_entrypoint(const char *name); + #ifdef __cplusplus } #endif diff --git a/src/vulkan/vk_gen.py b/src/vulkan/vk_gen.py new file mode 100644 index 00000000000..b82c90f73aa --- /dev/null +++ b/src/vulkan/vk_gen.py @@ -0,0 +1,266 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\(VKAPI \*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* This file generated from vk_gen.py, don't edit directly. */ + +#include "private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; + void *function; + void *validate; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +for type, name, args, num, h in entrypoints: + print "%s anv_validate_%s%s __attribute__ ((weak));" % (type, name, args) + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x, anv_%s, anv_validate_%s }," % (offsets[num], h, name, name) +print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static void * __attribute__ ((noinline)) +resolve_entrypoint(uint32_t index) +{ + if (enable_validate && entrypoints[index].validate) + return entrypoints[index].validate; + + return entrypoints[index].function; +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) -- cgit v1.2.3 From 60ebcbed544f022325469373f1b4b7114bfa5ea7 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 21:35:34 -0700 Subject: vk: Start Implementing vkGetFormatInfo() We move the format table and vkGetFormatInfo to their own file in the process. --- src/vulkan/Makefile.am | 1 + src/vulkan/device.c | 12 -- src/vulkan/formats.c | 427 +++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/image.c | 183 --------------------- src/vulkan/private.h | 2 + 5 files changed, 430 insertions(+), 195 deletions(-) create mode 100644 src/vulkan/formats.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 8b53a089615..8c05c5139fc 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -64,6 +64,7 @@ libvulkan_la_SOURCES = \ intel.c \ entrypoints.c \ entrypoints.h \ + formats.c \ compiler.cpp BUILT_SOURCES = entrypoints.h entrypoints.c diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d9398e05a24..da5573d2f72 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1206,18 +1206,6 @@ VkResult anv_GetQueryPoolResults( stub_return(VK_UNSUPPORTED); } -// Format capabilities - -VkResult anv_GetFormatInfo( - VkDevice device, - VkFormat format, - VkFormatInfoType infoType, - size_t* pDataSize, - void* pData) -{ - stub_return(VK_UNSUPPORTED); -} - // Buffer functions VkResult anv_CreateBuffer( diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c new file mode 100644 index 00000000000..8f8dd266465 --- /dev/null +++ b/src/vulkan/formats.c @@ -0,0 +1,427 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "private.h" + +#define UNSUPPORTED ~0 +static const struct anv_format anv_formats[] = { + [VK_FORMAT_UNDEFINED] = { .format = RAW }, + [VK_FORMAT_R4G4_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_R4G4_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_R4G4B4A4_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_R4G4B4A4_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_R5G6B5_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_R5G6B5_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_R5G5B5A1_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_R5G5B5A1_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_R8_UNORM] = { + .format = R8_UNORM, .cpp = 1, .channels = 1, + .linear_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT, + .tiled_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT + }, + [VK_FORMAT_R8_SNORM] = { + .format = R8_SNORM, .cpp = 1, .channels = 1, + }, + [VK_FORMAT_R8_USCALED] = { + .format = R8_USCALED, .cpp = 1, .channels = 1 + }, + [VK_FORMAT_R8_SSCALED] = { + .format = R8_SSCALED, .cpp = 1, .channels = 1 + }, + [VK_FORMAT_R8_UINT] = { + .format = R8_UINT, .cpp = 1, .channels = 1 + }, + [VK_FORMAT_R8_SINT] = { + .format = R8_SINT, .cpp = 1, .channels = 1 + }, + [VK_FORMAT_R8_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_R8G8_UNORM] = { + .format = R8G8_UNORM, .cpp = 2, .channels = 2 + }, + [VK_FORMAT_R8G8_SNORM] = { + .format = R8G8_SNORM, .cpp = 2, .channels = 2 + }, + [VK_FORMAT_R8G8_USCALED] = { + .format = R8G8_USCALED, .cpp = 2, .channels = 2 + }, + [VK_FORMAT_R8G8_SSCALED] = { + .format = R8G8_SSCALED, .cpp = 2, .channels = 2 + }, + [VK_FORMAT_R8G8_UINT] = { + .format = R8G8_UINT, .cpp = 2, .channels = 2 + }, + [VK_FORMAT_R8G8_SINT] = { + .format = R8G8_SINT, .cpp = 2, .channels = 2 + }, + [VK_FORMAT_R8G8_SRGB] = { .format = UNSUPPORTED }, /* L8A8_UNORM_SRGB */ + [VK_FORMAT_R8G8B8_UNORM] = { + .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 + }, + [VK_FORMAT_R8G8B8_SNORM] = { .format = R8G8B8_SNORM, .cpp = 4 }, + [VK_FORMAT_R8G8B8_USCALED] = { + .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 + }, + [VK_FORMAT_R8G8B8_SSCALED] = { + .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 + }, + [VK_FORMAT_R8G8B8_UINT] = { + .format = R8G8B8_UINT, .cpp = 3, .channels = 3 + }, + [VK_FORMAT_R8G8B8_SINT] = { + .format = R8G8B8_SINT, .cpp = 3, .channels = 3 + }, + [VK_FORMAT_R8G8B8_SRGB] = { .format = UNSUPPORTED }, /* B8G8R8A8_UNORM_SRGB */ + [VK_FORMAT_R8G8B8A8_UNORM] = { + .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R8G8B8A8_SNORM] = { + .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R8G8B8A8_USCALED] = { + .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R8G8B8A8_SSCALED] = { + .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R8G8B8A8_UINT] = { + .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R8G8B8A8_SINT] = { + .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R8G8B8A8_SRGB] = { + .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R10G10B10A2_UNORM] = { + .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R10G10B10A2_SNORM] = { + .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R10G10B10A2_USCALED] = { + .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R10G10B10A2_SSCALED] = { + .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R10G10B10A2_UINT] = { + .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R10G10B10A2_SINT] = { + .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_R16_UNORM] = { + .format = R16_UNORM, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16_SNORM] = { + .format = R16_SNORM, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16_USCALED] = { + .format = R16_USCALED, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16_SSCALED] = { + .format = R16_SSCALED, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16_UINT] = { + .format = R16_UINT, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16_SINT] = { + .format = R16_SINT, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16_SFLOAT] = { + .format = R16_FLOAT, .cpp = 2, .channels = 1 + }, + [VK_FORMAT_R16G16_UNORM] = { + .format = R16G16_UNORM, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16_SNORM] = { + .format = R16G16_SNORM, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16_USCALED] = { + .format = R16G16_USCALED, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16_SSCALED] = { + .format = R16G16_SSCALED, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16_UINT] = { + .format = R16G16_UINT, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16_SINT] = { + .format = R16G16_SINT, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16_SFLOAT] = { + .format = R16G16_FLOAT, .cpp = 4, .channels = 2 + }, + [VK_FORMAT_R16G16B16_UNORM] = { + .format = R16G16B16_UNORM, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16_SNORM] = { + .format = R16G16B16_SNORM, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16_USCALED] = { + .format = R16G16B16_USCALED, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16_SSCALED] = { + .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16_UINT] = { + .format = R16G16B16_UINT, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16_SINT] = { + .format = R16G16B16_SINT, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16_SFLOAT] = { + .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 + }, + [VK_FORMAT_R16G16B16A16_UNORM] = { + .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R16G16B16A16_SNORM] = { + .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R16G16B16A16_USCALED] = { + .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R16G16B16A16_SSCALED] = { + .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R16G16B16A16_UINT] = { + .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R16G16B16A16_SINT] = { + .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R16G16B16A16_SFLOAT] = { + .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 + }, + [VK_FORMAT_R32_UINT] = { + .format = R32_UINT, .cpp = 4, .channels = 1, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32_SINT] = { + .format = R32_SINT, .cpp = 4, .channels = 1, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32_SFLOAT] = { + .format = R32_FLOAT, .cpp = 4, .channels = 1, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32_UINT] = { + .format = R32G32_UINT, .cpp = 8, .channels = 2, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32_SINT] = { + .format = R32G32_SINT, .cpp = 8, .channels = 2, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32_SFLOAT] = { + .format = R32G32_FLOAT, .cpp = 8, .channels = 2, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32B32_UINT] = { + .format = R32G32B32_UINT, .cpp = 12, .channels = 3, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32B32_SINT] = { + .format = R32G32B32_SINT, .cpp = 12, .channels = 3, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32B32_SFLOAT] = { + .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32B32A32_UINT] = { + .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32B32A32_SINT] = { + .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R32G32B32A32_SFLOAT] = { + .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4, + .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT + }, + [VK_FORMAT_R64_SFLOAT] = { + .format = R64_FLOAT, .cpp = 8, .channels = 1 + }, + [VK_FORMAT_R64G64_SFLOAT] = { + .format = R64G64_FLOAT, .cpp = 16, .channels = 2 + }, + [VK_FORMAT_R64G64B64_SFLOAT] = { + .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 + }, + [VK_FORMAT_R64G64B64A64_SFLOAT] = { + .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 + }, + [VK_FORMAT_R11G11B10_UFLOAT] = { + .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3 + }, + [VK_FORMAT_R9G9B9E5_UFLOAT] = { + .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3 + }, + // [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM }, + // [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM }, + // [VK_FORMAT_D32_SFLOAT] = { .format = D32_SFLOAT }, + // [VK_FORMAT_S8_UINT] = { .format = S8_UINT }, + // [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM }, + // [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM }, + // [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_SFLOAT }, + // [VK_FORMAT_BC1_RGB_UNORM] = { .format = BC1_RGB }, + // [VK_FORMAT_BC1_RGB_SRGB] = { .format = BC1_RGB }, + // [VK_FORMAT_BC1_RGBA_UNORM] = { .format = BC1_RGBA }, + // [VK_FORMAT_BC1_RGBA_SRGB] = { .format = BC1_RGBA }, + // [VK_FORMAT_BC2_UNORM] = { .format = BC2_UNORM }, + // [VK_FORMAT_BC2_SRGB] = { .format = BC2_SRGB }, + // [VK_FORMAT_BC3_UNORM] = { .format = BC3_UNORM }, + // [VK_FORMAT_BC3_SRGB] = { .format = BC3_SRGB }, + // [VK_FORMAT_BC4_UNORM] = { .format = BC4_UNORM }, + // [VK_FORMAT_BC4_SNORM] = { .format = BC4_SNORM }, + // [VK_FORMAT_BC5_UNORM] = { .format = BC5_UNORM }, + // [VK_FORMAT_BC5_SNORM] = { .format = BC5_SNORM }, + // [VK_FORMAT_BC6H_UFLOAT] = { .format = BC6H_UFLOAT }, + // [VK_FORMAT_BC6H_SFLOAT] = { .format = BC6H_SFLOAT }, + // [VK_FORMAT_BC7_UNORM] = { .format = BC7_UNORM }, + // [VK_FORMAT_BC7_SRGB] = { .format = BC7_SRGB }, + // [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = ETC2_R8G8B8 }, + // [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = ETC2_R8G8B8 }, + // [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = ETC2_R8G8B8A1 }, + // [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = ETC2_R8G8B8A1 }, + // [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = ETC2_R8G8B8A8 }, + // [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = ETC2_R8G8B8A8 }, + // [VK_FORMAT_EAC_R11_UNORM] = { .format = EAC_R11 }, + // [VK_FORMAT_EAC_R11_SNORM] = { .format = EAC_R11 }, + // [VK_FORMAT_EAC_R11G11_UNORM] = { .format = EAC_R11G11 }, + // [VK_FORMAT_EAC_R11G11_SNORM] = { .format = EAC_R11G11 }, + // [VK_FORMAT_ASTC_4x4_UNORM] = { .format = ASTC_4x4 }, + // [VK_FORMAT_ASTC_4x4_SRGB] = { .format = ASTC_4x4 }, + // [VK_FORMAT_ASTC_5x4_UNORM] = { .format = ASTC_5x4 }, + // [VK_FORMAT_ASTC_5x4_SRGB] = { .format = ASTC_5x4 }, + // [VK_FORMAT_ASTC_5x5_UNORM] = { .format = ASTC_5x5 }, + // [VK_FORMAT_ASTC_5x5_SRGB] = { .format = ASTC_5x5 }, + // [VK_FORMAT_ASTC_6x5_UNORM] = { .format = ASTC_6x5 }, + // [VK_FORMAT_ASTC_6x5_SRGB] = { .format = ASTC_6x5 }, + // [VK_FORMAT_ASTC_6x6_UNORM] = { .format = ASTC_6x6 }, + // [VK_FORMAT_ASTC_6x6_SRGB] = { .format = ASTC_6x6 }, + // [VK_FORMAT_ASTC_8x5_UNORM] = { .format = ASTC_8x5 }, + // [VK_FORMAT_ASTC_8x5_SRGB] = { .format = ASTC_8x5 }, + // [VK_FORMAT_ASTC_8x6_UNORM] = { .format = ASTC_8x6 }, + // [VK_FORMAT_ASTC_8x6_SRGB] = { .format = ASTC_8x6 }, + // [VK_FORMAT_ASTC_8x8_UNORM] = { .format = ASTC_8x8 }, + // [VK_FORMAT_ASTC_8x8_SRGB] = { .format = ASTC_8x8 }, + // [VK_FORMAT_ASTC_10x5_UNORM] = { .format = ASTC_10x5 }, + // [VK_FORMAT_ASTC_10x5_SRGB] = { .format = ASTC_10x5 }, + // [VK_FORMAT_ASTC_10x6_UNORM] = { .format = ASTC_10x6 }, + // [VK_FORMAT_ASTC_10x6_SRGB] = { .format = ASTC_10x6 }, + // [VK_FORMAT_ASTC_10x8_UNORM] = { .format = ASTC_10x8 }, + // [VK_FORMAT_ASTC_10x8_SRGB] = { .format = ASTC_10x8 }, + // [VK_FORMAT_ASTC_10x10_UNORM] = { .format = ASTC_10x10 }, + // [VK_FORMAT_ASTC_10x10_SRGB] = { .format = ASTC_10x10 }, + // [VK_FORMAT_ASTC_12x10_UNORM] = { .format = ASTC_12x10 }, + // [VK_FORMAT_ASTC_12x10_SRGB] = { .format = ASTC_12x10 }, + // [VK_FORMAT_ASTC_12x12_UNORM] = { .format = ASTC_12x12 }, + // [VK_FORMAT_ASTC_12x12_SRGB] = { .format = ASTC_12x12 }, + [VK_FORMAT_B4G4R4A4_UNORM] = { + .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4 + }, + [VK_FORMAT_B5G5R5A1_UNORM] = { + .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4 + }, + [VK_FORMAT_B5G6R5_UNORM] = { + .format = B5G6R5_UNORM, .cpp = 2, .channels = 3 + }, + // [VK_FORMAT_B5G6R5_USCALED] = { .format = B5G6R5_USCALED }, + // [VK_FORMAT_B8G8R8_UNORM] = { .format = B8G8R8_UNORM }, + // [VK_FORMAT_B8G8R8_SNORM] = { .format = B8G8R8_SNORM }, + // [VK_FORMAT_B8G8R8_USCALED] = { .format = B8G8R8_USCALED }, + // [VK_FORMAT_B8G8R8_SSCALED] = { .format = B8G8R8_SSCALED }, + // [VK_FORMAT_B8G8R8_UINT] = { .format = B8G8R8_UINT }, + // [VK_FORMAT_B8G8R8_SINT] = { .format = B8G8R8_SINT }, + // [VK_FORMAT_B8G8R8_SRGB] = { .format = B8G8R8_SRGB }, + [VK_FORMAT_B8G8R8A8_UNORM] = { + .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 + }, + // [VK_FORMAT_B8G8R8A8_SNORM] = { .format = B8G8R8A8_SNORM }, + // [VK_FORMAT_B8G8R8A8_USCALED] = { .format = B8G8R8A8_USCALED }, + // [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = B8G8R8A8_SSCALED }, + // [VK_FORMAT_B8G8R8A8_UINT] = { .format = B8G8R8A8_UINT }, + // [VK_FORMAT_B8G8R8A8_SINT] = { .format = B8G8R8A8_SINT }, + // [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_SRGB }, + [VK_FORMAT_B10G10R10A2_UNORM] = { + .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_B10G10R10A2_SNORM] = { + .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_B10G10R10A2_USCALED] = { + .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_B10G10R10A2_SSCALED] = { + .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_B10G10R10A2_UINT] = { + .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4 + }, + [VK_FORMAT_B10G10R10A2_SINT] = { + .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4 + } +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +// Format capabilities + +VkResult anv_GetFormatInfo( + VkDevice _device, + VkFormat _format, + VkFormatInfoType infoType, + size_t* pDataSize, + void* pData) +{ + VkFormatProperties *properties; + const struct anv_format *format; + + format = anv_format_for_vk_format(_format); + if (format == 0) + return VK_ERROR_INVALID_VALUE; + + switch (infoType) { + case VK_FORMAT_INFO_TYPE_PROPERTIES: + properties = pData; + + *pDataSize = sizeof(*properties); + if (pData == NULL) + return VK_SUCCESS; + + properties->linearTilingFeatures = format->linear_flags; + properties->optimalTilingFeatures = format->tiled_flags; + return VK_SUCCESS; + + default: + return VK_ERROR_INVALID_VALUE; + } +} diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 812fba88b9f..d5e74dc6594 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -31,189 +31,6 @@ // Image functions -static const struct anv_format anv_formats[] = { - [VK_FORMAT_UNDEFINED] = { .format = RAW }, - // [VK_FORMAT_R4G4_UNORM] = { .format = R4G4_UNORM }, - // [VK_FORMAT_R4G4_USCALED] = { .format = R4G4_USCALED }, - // [VK_FORMAT_R4G4B4A4_UNORM] = { .format = R4G4B4A4_UNORM }, - // [VK_FORMAT_R4G4B4A4_USCALED] = { .format = R4G4B4A4_USCALED }, - // [VK_FORMAT_R5G6B5_UNORM] = { .format = R5G6B5_UNORM }, - // [VK_FORMAT_R5G6B5_USCALED] = { .format = R5G6B5_USCALED }, - // [VK_FORMAT_R5G5B5A1_UNORM] = { .format = R5G5B5A1_UNORM }, - // [VK_FORMAT_R5G5B5A1_USCALED] = { .format = R5G5B5A1_USCALED }, - [VK_FORMAT_R8_UNORM] = { .format = R8_UNORM, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SNORM] = { .format = R8_SNORM, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_USCALED] = { .format = R8_USCALED, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SSCALED] = { .format = R8_SSCALED, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_UINT] = { .format = R8_UINT, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SINT] = { .format = R8_SINT, .cpp = 1, .channels = 1 }, - // [VK_FORMAT_R8_SRGB] = { .format = R8_SRGB, .cpp = 1 }, - [VK_FORMAT_R8G8_UNORM] = { .format = R8G8_UNORM, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SNORM] = { .format = R8G8_SNORM, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_USCALED] = { .format = R8G8_USCALED, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SSCALED] = { .format = R8G8_SSCALED, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_UINT] = { .format = R8G8_UINT, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SINT] = { .format = R8G8_SINT, .cpp = 2, .channels = 2 }, - // [VK_FORMAT_R8G8_SRGB] = { .format = R8G8_SRGB }, - [VK_FORMAT_R8G8B8_UNORM] = { .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 }, - // [VK_FORMAT_R8G8B8_SNORM] = { .format = R8G8B8X8_SNORM, .cpp = 4 }, - [VK_FORMAT_R8G8B8_USCALED] = { .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_SSCALED] = { .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_UINT] = { .format = R8G8B8_UINT, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_SINT] = { .format = R8G8B8_SINT, .cpp = 3, .channels = 3 }, - // [VK_FORMAT_R8G8B8_SRGB] = { .format = R8G8B8_SRGB }, - [VK_FORMAT_R8G8B8A8_UNORM] = { .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SNORM] = { .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_USCALED] = { .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SSCALED] = { .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_UINT] = { .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SINT] = { .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 }, - // [VK_FORMAT_R8G8B8A8_SRGB] = { .format = R8G8B8A8_SRGB }, - // [VK_FORMAT_R10G10B10A2_UNORM] = { .format = R10G10B10A2_UNORM }, - // [VK_FORMAT_R10G10B10A2_SNORM] = { .format = R10G10B10A2_SNORM }, - // [VK_FORMAT_R10G10B10A2_USCALED] = { .format = R10G10B10A2_USCALED }, - // [VK_FORMAT_R10G10B10A2_SSCALED] = { .format = R10G10B10A2_SSCALED }, - // [VK_FORMAT_R10G10B10A2_UINT] = { .format = R10G10B10A2_UINT }, - // [VK_FORMAT_R10G10B10A2_SINT] = { .format = R10G10B10A2_SINT }, - // [VK_FORMAT_R16_UNORM] = { .format = R16_UNORM }, - // [VK_FORMAT_R16_SNORM] = { .format = R16_SNORM }, - // [VK_FORMAT_R16_USCALED] = { .format = R16_USCALED }, - // [VK_FORMAT_R16_SSCALED] = { .format = R16_SSCALED }, - // [VK_FORMAT_R16_UINT] = { .format = R16_UINT }, - // [VK_FORMAT_R16_SINT] = { .format = R16_SINT }, - [VK_FORMAT_R16_SFLOAT] = { .format = R16_FLOAT, .cpp = 2, .channels = 1 }, - // [VK_FORMAT_R16G16_UNORM] = { .format = R16G16_UNORM }, - // [VK_FORMAT_R16G16_SNORM] = { .format = R16G16_SNORM }, - // [VK_FORMAT_R16G16_USCALED] = { .format = R16G16_USCALED }, - // [VK_FORMAT_R16G16_SSCALED] = { .format = R16G16_SSCALED }, - // [VK_FORMAT_R16G16_UINT] = { .format = R16G16_UINT }, - // [VK_FORMAT_R16G16_SINT] = { .format = R16G16_SINT }, - [VK_FORMAT_R16G16_SFLOAT] = { .format = R16G16_FLOAT, .cpp = 4, .channels = 2 }, - // [VK_FORMAT_R16G16B16_UNORM] = { .format = R16G16B16_UNORM }, - // [VK_FORMAT_R16G16B16_SNORM] = { .format = R16G16B16_SNORM }, - // [VK_FORMAT_R16G16B16_USCALED] = { .format = R16G16B16_USCALED }, - // [VK_FORMAT_R16G16B16_SSCALED] = { .format = R16G16B16_SSCALED }, - // [VK_FORMAT_R16G16B16_UINT] = { .format = R16G16B16_UINT }, - // [VK_FORMAT_R16G16B16_SINT] = { .format = R16G16B16_SINT }, - [VK_FORMAT_R16G16B16_SFLOAT] = { .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 }, - // [VK_FORMAT_R16G16B16A16_UNORM] = { .format = R16G16B16A16_UNORM }, - // [VK_FORMAT_R16G16B16A16_SNORM] = { .format = R16G16B16A16_SNORM }, - // [VK_FORMAT_R16G16B16A16_USCALED] = { .format = R16G16B16A16_USCALED }, - // [VK_FORMAT_R16G16B16A16_SSCALED] = { .format = R16G16B16A16_SSCALED }, - // [VK_FORMAT_R16G16B16A16_UINT] = { .format = R16G16B16A16_UINT }, - // [VK_FORMAT_R16G16B16A16_SINT] = { .format = R16G16B16A16_SINT }, - [VK_FORMAT_R16G16B16A16_SFLOAT] = { .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R32_UINT] = { .format = R32_UINT, .cpp = 4, .channels = 1 }, - [VK_FORMAT_R32_SINT] = { .format = R32_SINT, .cpp = 4, .channels = 1 }, - [VK_FORMAT_R32_SFLOAT] = { .format = R32_FLOAT, .cpp = 4, .channels = 1 }, - [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT, .cpp = 8, .channels = 2 }, - [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT, .cpp = 8, .channels = 2 }, - [VK_FORMAT_R32G32_SFLOAT] = { .format = R32G32_FLOAT, .cpp = 8, .channels = 2 }, - [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT, .cpp = 12, .channels = 3 }, - [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT, .cpp = 12, .channels = 3 }, - [VK_FORMAT_R32G32B32_SFLOAT] = { .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3 }, - [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4 }, - [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4 }, - [VK_FORMAT_R32G32B32A32_SFLOAT] = { .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4 }, - [VK_FORMAT_R64_SFLOAT] = { .format = R64_FLOAT, .cpp = 8, .channels = 1 }, - [VK_FORMAT_R64G64_SFLOAT] = { .format = R64G64_FLOAT, .cpp = 16, .channels = 2 }, - [VK_FORMAT_R64G64B64_SFLOAT] = { .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 }, - [VK_FORMAT_R64G64B64A64_SFLOAT] = { .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 }, - // [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_UFLOAT }, - // [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_UFLOAT }, - // [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM }, - // [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM }, - // [VK_FORMAT_D32_SFLOAT] = { .format = D32_SFLOAT }, - // [VK_FORMAT_S8_UINT] = { .format = S8_UINT }, - // [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM }, - // [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM }, - // [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_SFLOAT }, - // [VK_FORMAT_BC1_RGB_UNORM] = { .format = BC1_RGB }, - // [VK_FORMAT_BC1_RGB_SRGB] = { .format = BC1_RGB }, - // [VK_FORMAT_BC1_RGBA_UNORM] = { .format = BC1_RGBA }, - // [VK_FORMAT_BC1_RGBA_SRGB] = { .format = BC1_RGBA }, - // [VK_FORMAT_BC2_UNORM] = { .format = BC2_UNORM }, - // [VK_FORMAT_BC2_SRGB] = { .format = BC2_SRGB }, - // [VK_FORMAT_BC3_UNORM] = { .format = BC3_UNORM }, - // [VK_FORMAT_BC3_SRGB] = { .format = BC3_SRGB }, - // [VK_FORMAT_BC4_UNORM] = { .format = BC4_UNORM }, - // [VK_FORMAT_BC4_SNORM] = { .format = BC4_SNORM }, - // [VK_FORMAT_BC5_UNORM] = { .format = BC5_UNORM }, - // [VK_FORMAT_BC5_SNORM] = { .format = BC5_SNORM }, - // [VK_FORMAT_BC6H_UFLOAT] = { .format = BC6H_UFLOAT }, - // [VK_FORMAT_BC6H_SFLOAT] = { .format = BC6H_SFLOAT }, - // [VK_FORMAT_BC7_UNORM] = { .format = BC7_UNORM }, - // [VK_FORMAT_BC7_SRGB] = { .format = BC7_SRGB }, - // [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = ETC2_R8G8B8 }, - // [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = ETC2_R8G8B8 }, - // [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = ETC2_R8G8B8A1 }, - // [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = ETC2_R8G8B8A1 }, - // [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = ETC2_R8G8B8A8 }, - // [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = ETC2_R8G8B8A8 }, - // [VK_FORMAT_EAC_R11_UNORM] = { .format = EAC_R11 }, - // [VK_FORMAT_EAC_R11_SNORM] = { .format = EAC_R11 }, - // [VK_FORMAT_EAC_R11G11_UNORM] = { .format = EAC_R11G11 }, - // [VK_FORMAT_EAC_R11G11_SNORM] = { .format = EAC_R11G11 }, - // [VK_FORMAT_ASTC_4x4_UNORM] = { .format = ASTC_4x4 }, - // [VK_FORMAT_ASTC_4x4_SRGB] = { .format = ASTC_4x4 }, - // [VK_FORMAT_ASTC_5x4_UNORM] = { .format = ASTC_5x4 }, - // [VK_FORMAT_ASTC_5x4_SRGB] = { .format = ASTC_5x4 }, - // [VK_FORMAT_ASTC_5x5_UNORM] = { .format = ASTC_5x5 }, - // [VK_FORMAT_ASTC_5x5_SRGB] = { .format = ASTC_5x5 }, - // [VK_FORMAT_ASTC_6x5_UNORM] = { .format = ASTC_6x5 }, - // [VK_FORMAT_ASTC_6x5_SRGB] = { .format = ASTC_6x5 }, - // [VK_FORMAT_ASTC_6x6_UNORM] = { .format = ASTC_6x6 }, - // [VK_FORMAT_ASTC_6x6_SRGB] = { .format = ASTC_6x6 }, - // [VK_FORMAT_ASTC_8x5_UNORM] = { .format = ASTC_8x5 }, - // [VK_FORMAT_ASTC_8x5_SRGB] = { .format = ASTC_8x5 }, - // [VK_FORMAT_ASTC_8x6_UNORM] = { .format = ASTC_8x6 }, - // [VK_FORMAT_ASTC_8x6_SRGB] = { .format = ASTC_8x6 }, - // [VK_FORMAT_ASTC_8x8_UNORM] = { .format = ASTC_8x8 }, - // [VK_FORMAT_ASTC_8x8_SRGB] = { .format = ASTC_8x8 }, - // [VK_FORMAT_ASTC_10x5_UNORM] = { .format = ASTC_10x5 }, - // [VK_FORMAT_ASTC_10x5_SRGB] = { .format = ASTC_10x5 }, - // [VK_FORMAT_ASTC_10x6_UNORM] = { .format = ASTC_10x6 }, - // [VK_FORMAT_ASTC_10x6_SRGB] = { .format = ASTC_10x6 }, - // [VK_FORMAT_ASTC_10x8_UNORM] = { .format = ASTC_10x8 }, - // [VK_FORMAT_ASTC_10x8_SRGB] = { .format = ASTC_10x8 }, - // [VK_FORMAT_ASTC_10x10_UNORM] = { .format = ASTC_10x10 }, - // [VK_FORMAT_ASTC_10x10_SRGB] = { .format = ASTC_10x10 }, - // [VK_FORMAT_ASTC_12x10_UNORM] = { .format = ASTC_12x10 }, - // [VK_FORMAT_ASTC_12x10_SRGB] = { .format = ASTC_12x10 }, - // [VK_FORMAT_ASTC_12x12_UNORM] = { .format = ASTC_12x12 }, - // [VK_FORMAT_ASTC_12x12_SRGB] = { .format = ASTC_12x12 }, - // [VK_FORMAT_B4G4R4A4_UNORM] = { .format = B4G4R4A4_UNORM }, - // [VK_FORMAT_B5G5R5A1_UNORM] = { .format = B5G5R5A1_UNORM }, - // [VK_FORMAT_B5G6R5_UNORM] = { .format = B5G6R5_UNORM }, - // [VK_FORMAT_B5G6R5_USCALED] = { .format = B5G6R5_USCALED }, - // [VK_FORMAT_B8G8R8_UNORM] = { .format = B8G8R8_UNORM }, - // [VK_FORMAT_B8G8R8_SNORM] = { .format = B8G8R8_SNORM }, - // [VK_FORMAT_B8G8R8_USCALED] = { .format = B8G8R8_USCALED }, - // [VK_FORMAT_B8G8R8_SSCALED] = { .format = B8G8R8_SSCALED }, - // [VK_FORMAT_B8G8R8_UINT] = { .format = B8G8R8_UINT }, - // [VK_FORMAT_B8G8R8_SINT] = { .format = B8G8R8_SINT }, - // [VK_FORMAT_B8G8R8_SRGB] = { .format = B8G8R8_SRGB }, - [VK_FORMAT_B8G8R8A8_UNORM] = { .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 }, - // [VK_FORMAT_B8G8R8A8_SNORM] = { .format = B8G8R8A8_SNORM }, - // [VK_FORMAT_B8G8R8A8_USCALED] = { .format = B8G8R8A8_USCALED }, - // [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = B8G8R8A8_SSCALED }, - // [VK_FORMAT_B8G8R8A8_UINT] = { .format = B8G8R8A8_UINT }, - // [VK_FORMAT_B8G8R8A8_SINT] = { .format = B8G8R8A8_SINT }, - // [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_SRGB }, - // [VK_FORMAT_B10G10R10A2_UNORM] = { .format = B10G10R10A2_UNORM }, - // [VK_FORMAT_B10G10R10A2_SNORM] = { .format = B10G10R10A2_SNORM }, - // [VK_FORMAT_B10G10R10A2_USCALED] = { .format = B10G10R10A2_USCALED }, - // [VK_FORMAT_B10G10R10A2_SSCALED] = { .format = B10G10R10A2_SSCALED }, - // [VK_FORMAT_B10G10R10A2_UINT] = { .format = B10G10R10A2_UINT }, - // [VK_FORMAT_B10G10R10A2_SINT] = { .format = B10G10R10A2_SINT } -}; - -const struct anv_format * -anv_format_for_vk_format(VkFormat format) -{ - return &anv_formats[format]; -} - static const struct anv_tile_mode_info { int32_t tile_width; int32_t tile_height; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cf5b88f6a68..f7a9460bc0f 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -619,6 +619,8 @@ struct anv_format { uint32_t format; int32_t cpp; int32_t channels; + uint32_t linear_flags; + uint32_t tiled_flags; }; const struct anv_format * -- cgit v1.2.3 From a9f21154860e5dba7ad98eeab074c74a091852bd Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 18:38:34 -0700 Subject: vk: Return VK_SUCCESS for all descriptor pool entry points --- src/vulkan/device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index da5573d2f72..ae6d0bf9377 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1529,14 +1529,14 @@ VkResult anv_BeginDescriptorPoolUpdate( VkDevice device, VkDescriptorUpdateMode updateMode) { - stub_return(VK_UNSUPPORTED); + return VK_SUCCESS; } VkResult anv_EndDescriptorPoolUpdate( VkDevice device, VkCmdBuffer cmd) { - stub_return(VK_UNSUPPORTED); + return VK_SUCCESS; } VkResult anv_CreateDescriptorPool( @@ -1546,14 +1546,16 @@ VkResult anv_CreateDescriptorPool( const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool) { - stub_return(VK_UNSUPPORTED); + *pDescriptorPool = 1; + + return VK_SUCCESS; } VkResult anv_ResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool) { - stub_return(VK_UNSUPPORTED); + return VK_SUCCESS; } VkResult anv_AllocDescriptorSets( @@ -1594,7 +1596,6 @@ void anv_ClearDescriptorSets( uint32_t count, const VkDescriptorSet* pDescriptorSets) { - stub(); } void anv_UpdateDescriptors( -- cgit v1.2.3 From b4b3bd1c51847b56d22731ba764cb4bc585f752a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 18:39:12 -0700 Subject: vk: Return VK_SUCCESS from vkAllocDescriptorSets This should've been returning VK_SUCCESS all along. --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index ae6d0bf9377..1191682ffe3 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1587,7 +1587,7 @@ VkResult anv_AllocDescriptorSets( *pCount = count; - return VK_UNSUPPORTED; + return VK_SUCCESS; } void anv_ClearDescriptorSets( -- cgit v1.2.3 From 783e6217fcd5f7dc4125c16d47a985f63a2ab486 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 19:22:52 -0700 Subject: vk: Change pData/pDataSize semantics We now always copy the entire struct unless pData is NULL and unconditionally write back the struct size. It's not clear this is useful if the structs may grow over time, but it seems to be the expected behaviour for now. --- src/vulkan/device.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 1191682ffe3..c76a8f73c4e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -190,8 +190,10 @@ VkResult anv_GetPhysicalDeviceInfo( switch (infoType) { case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES: properties = pData; - assert(*pDataSize >= sizeof(*properties)); - *pDataSize = sizeof(*properties); /* Assuming we have to return the size of our struct. */ + + *pDataSize = sizeof(*properties); + if (pData == NULL) + return VK_SUCCESS; properties->apiVersion = 1; properties->driverVersion = 1; @@ -211,8 +213,10 @@ VkResult anv_GetPhysicalDeviceInfo( case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE: performance = pData; - assert(*pDataSize >= sizeof(*performance)); - *pDataSize = sizeof(*performance); /* Assuming we have to return the size of our struct. */ + + *pDataSize = sizeof(*performance); + if (pData == NULL) + return VK_SUCCESS; performance->maxDeviceClock = 1.0; performance->aluPerClock = 1.0; @@ -223,8 +227,10 @@ VkResult anv_GetPhysicalDeviceInfo( case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES: queue_properties = pData; - assert(*pDataSize >= sizeof(*queue_properties)); + *pDataSize = sizeof(*queue_properties); + if (pData == NULL) + return VK_SUCCESS; queue_properties->queueFlags = 0; queue_properties->queueCount = 1; @@ -235,8 +241,10 @@ VkResult anv_GetPhysicalDeviceInfo( case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES: memory_properties = pData; - assert(*pDataSize >= sizeof(*memory_properties)); + *pDataSize = sizeof(*memory_properties); + if (pData == NULL) + return VK_SUCCESS; memory_properties->supportsMigration = false; memory_properties->supportsPinning = false; @@ -392,8 +400,11 @@ VkResult anv_GetPhysicalDeviceExtensionInfo( switch (infoType) { case VK_EXTENSION_INFO_TYPE_COUNT: + *pDataSize = 4; + if (pData == NULL) + return VK_SUCCESS; + count = pData; - assert(*pDataSize == 4); *count = 0; return VK_SUCCESS; @@ -1000,6 +1011,10 @@ VkResult anv_GetObjectInfo( switch (infoType) { case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS: + *pDataSize = sizeof(memory_requirements); + if (pData == NULL) + return VK_SUCCESS; + fill_memory_requirements(objType, object, &memory_requirements); memcpy(pData, &memory_requirements, MIN2(*pDataSize, sizeof(memory_requirements))); -- cgit v1.2.3 From b7fac7a7d1f07ee36029084cc29b83b5a52f726c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 19:25:28 -0700 Subject: vk: Implement allocation count query --- src/vulkan/device.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c76a8f73c4e..6faa0b04eb4 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -999,6 +999,18 @@ fill_memory_requirements( } } +static uint32_t +get_allocation_count(VkObjectType objType) +{ + switch (objType) { + case VK_OBJECT_TYPE_BUFFER: + case VK_OBJECT_TYPE_IMAGE: + return 1; + default: + return 0; + } +} + VkResult anv_GetObjectInfo( VkDevice _device, VkObjectType objType, @@ -1008,6 +1020,7 @@ VkResult anv_GetObjectInfo( void* pData) { VkMemoryRequirements memory_requirements; + uint32_t count; switch (infoType) { case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS: @@ -1018,10 +1031,16 @@ VkResult anv_GetObjectInfo( fill_memory_requirements(objType, object, &memory_requirements); memcpy(pData, &memory_requirements, MIN2(*pDataSize, sizeof(memory_requirements))); - *pDataSize = sizeof(memory_requirements); return VK_SUCCESS; case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT: + *pDataSize = sizeof(count); + if (pData == NULL) + return VK_SUCCESS; + + count = get_allocation_count(objType); + return VK_SUCCESS; + default: return VK_UNSUPPORTED; } -- cgit v1.2.3 From e26a7ffbd9c2a08d901f47f4adbe091f1b971347 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 17 May 2015 19:30:27 -0700 Subject: vk/meta: Use anv_* internal entrypoints --- src/vulkan/glsl_helpers.h | 2 +- src/vulkan/meta.c | 172 +++++++++++++++++++++++----------------------- 2 files changed, 87 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_helpers.h b/src/vulkan/glsl_helpers.h index 3c72523586f..e6cd93298ac 100644 --- a/src/vulkan/glsl_helpers.h +++ b/src/vulkan/glsl_helpers.h @@ -32,6 +32,6 @@ .pCode = __src, \ .flags = (1 << 31) /* GLSL back-door hack */ \ }; \ - vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \ + anv_CreateShader((VkDevice) device, &__shader_create_info, &__shader); \ __shader; \ }) diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c3679464151..f2e0a161027 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -137,13 +137,13 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, &device->clear_state.pipeline); - vkDestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); - vkCreateDynamicRasterState((VkDevice) device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &device->clear_state.rs_state); + anv_CreateDynamicRasterState((VkDevice) device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->clear_state.rs_state); } #define NUM_VB_USED 2 @@ -245,32 +245,32 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, &saved_state); - vkCmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, - (VkBuffer[]) { - (VkBuffer) &vertex_buffer, - (VkBuffer) &vertex_buffer - }, - (VkDeviceSize[]) { - 0, - sizeof(vertex_data) - }); + anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, + (VkBuffer[]) { + (VkBuffer) &vertex_buffer, + (VkBuffer) &vertex_buffer + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); if ((VkPipeline) cmd_buffer->pipeline != device->clear_state.pipeline) - vkCmdBindPipeline((VkCmdBuffer) cmd_buffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, device->clear_state.pipeline); + anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, device->clear_state.pipeline); /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) - vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, - VK_STATE_BIND_POINT_RASTER, - device->clear_state.rs_state); + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_RASTER, + device->clear_state.rs_state); if (cmd_buffer->vp_state == NULL) - vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, - VK_STATE_BIND_POINT_VIEWPORT, - cmd_buffer->framebuffer->vp_state); + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_VIEWPORT, + cmd_buffer->framebuffer->vp_state); - vkCmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, pass->num_clear_layers); + anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, pass->num_clear_layers); /* Restore API state */ anv_cmd_buffer_restore(cmd_buffer, &saved_state); @@ -392,8 +392,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, } }; - vkCreateDescriptorSetLayout((VkDevice) device, &ds_layout_info, - &device->blit_state.ds_layout); + anv_CreateDescriptorSetLayout((VkDevice) device, &ds_layout_info, + &device->blit_state.ds_layout); VkPipelineLayoutCreateInfo pipeline_layout_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -402,8 +402,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) }; VkPipelineLayout pipeline_layout; - vkCreatePipelineLayout((VkDevice) device, &pipeline_layout_info, - &pipeline_layout); + anv_CreatePipelineLayout((VkDevice) device, &pipeline_layout_info, + &pipeline_layout); VkPipelineRsStateCreateInfo rs_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, @@ -432,14 +432,14 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->blit_state.pipeline); - vkDestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, vs); - vkDestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, vs); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); - vkCreateDynamicRasterState((VkDevice) device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &device->blit_state.rs_state); + anv_CreateDynamicRasterState((VkDevice) device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->blit_state.rs_state); } static void @@ -451,15 +451,15 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, saved_state); if ((VkPipeline) cmd_buffer->pipeline != device->blit_state.pipeline) - vkCmdBindPipeline((VkCmdBuffer) cmd_buffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->blit_state.pipeline); + anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->blit_state.pipeline); /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) - vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, - VK_STATE_BIND_POINT_RASTER, - device->blit_state.rs_state); + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_RASTER, + device->blit_state.rs_state); } struct blit_region { @@ -532,37 +532,37 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .offset = vb_state.offset, }; - vkCmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, - (VkBuffer[]) { - (VkBuffer) &vertex_buffer, - (VkBuffer) &vertex_buffer - }, - (VkDeviceSize[]) { - 0, - sizeof(struct vue_header), - }); + anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, + (VkBuffer[]) { + (VkBuffer) &vertex_buffer, + (VkBuffer) &vertex_buffer + }, + (VkDeviceSize[]) { + 0, + sizeof(struct vue_header), + }); uint32_t count; VkDescriptorSet set; - vkAllocDescriptorSets((VkDevice) device, 0 /* pool */, - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - 1, &device->blit_state.ds_layout, &set, &count); - vkUpdateDescriptors((VkDevice) device, set, 1, - (const void * []) { - &(VkUpdateImages) { - .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .binding = 0, - .count = 1, - .pImageViews = (VkImageViewAttachInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, - .view = (VkImageView) src, - .layout = VK_IMAGE_LAYOUT_GENERAL, - } - } - } - }); + anv_AllocDescriptorSets((VkDevice) device, 0 /* pool */, + VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, + 1, &device->blit_state.ds_layout, &set, &count); + anv_UpdateDescriptors((VkDevice) device, set, 1, + (const void * []) { + &(VkUpdateImages) { + .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .binding = 0, + .count = 1, + .pImageViews = (VkImageViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, + .view = (VkImageView) src, + .layout = VK_IMAGE_LAYOUT_GENERAL, + } + } + } + }); VkFramebufferCreateInfo fb_info = { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, @@ -581,7 +581,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }; struct anv_framebuffer *fb; - vkCreateFramebuffer((VkDevice) device, &fb_info, (VkFramebuffer *)&fb); + anv_CreateFramebuffer((VkDevice) device, &fb_info, (VkFramebuffer *)&fb); VkRenderPassCreateInfo pass_info = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, @@ -601,24 +601,24 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }; VkRenderPass pass; - vkCreateRenderPass((VkDevice )device, &pass_info, &pass); + anv_CreateRenderPass((VkDevice )device, &pass_info, &pass); - vkCmdBeginRenderPass((VkCmdBuffer) cmd_buffer, - &(VkRenderPassBegin) { - .renderPass = pass, - .framebuffer = (VkFramebuffer) fb, - }); + anv_CmdBeginRenderPass((VkCmdBuffer) cmd_buffer, + &(VkRenderPassBegin) { + .renderPass = pass, + .framebuffer = (VkFramebuffer) fb, + }); - vkCmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, - VK_STATE_BIND_POINT_VIEWPORT, fb->vp_state); + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_VIEWPORT, fb->vp_state); - vkCmdBindDescriptorSets((VkCmdBuffer) cmd_buffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, - &set, 0, NULL); + anv_CmdBindDescriptorSets((VkCmdBuffer) cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, + &set, 0, NULL); - vkCmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); + anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); - vkCmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); + anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); } static void @@ -712,7 +712,7 @@ void anv_CmdCopyImageToBuffer( }; VkImageView src_view; - vkCreateImageView(vk_device, &src_view_info, &src_view); + anv_CreateImageView(vk_device, &src_view_info, &src_view); VkImageCreateInfo dest_image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -732,7 +732,7 @@ void anv_CmdCopyImageToBuffer( }; struct anv_image *dest_image; - vkCreateImage(vk_device, &dest_image_info, (VkImage *)&dest_image); + anv_CreateImage(vk_device, &dest_image_info, (VkImage *)&dest_image); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. @@ -750,7 +750,7 @@ void anv_CmdCopyImageToBuffer( }; VkColorAttachmentView dest_view; - vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + anv_CreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); meta_emit_blit(cmd_buffer, (struct anv_surface_view *)src_view, -- cgit v1.2.3 From 6afb26452b95c4e35860ba69fdf31a055d6b62e8 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 08:49:15 -0700 Subject: vk: Implement fences This basic implementation uses a throw-away bo for synchronization. --- src/vulkan/device.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++----- src/vulkan/private.h | 7 +++ 2 files changed, 127 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 6faa0b04eb4..d87227cd772 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -574,10 +574,11 @@ VkResult anv_QueueSubmit( VkQueue _queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, - VkFence fence) + VkFence _fence) { struct anv_queue *queue = (struct anv_queue *) _queue; struct anv_device *device = queue->device; + struct anv_fence *fence = (struct anv_fence *) _fence; int ret; for (uint32_t i = 0; i < cmdBufferCount; i++) { @@ -592,6 +593,12 @@ VkResult anv_QueueSubmit( if (ret != 0) return vk_error(VK_ERROR_UNKNOWN); + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) + return vk_error(VK_ERROR_UNKNOWN); + } + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; } else { @@ -936,6 +943,19 @@ anv_free_destructor(struct anv_device * device, return VK_SUCCESS; } +static VkResult +anv_fence_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_fence *fence = (struct anv_fence *) object; + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_device_free(device, fence); + + return VK_SUCCESS; +} + static VkResult (*anv_object_destructors[])(struct anv_device *device, VkObject object) = { [VK_OBJECT_TYPE_INSTANCE] = anv_instance_destructor, @@ -947,7 +967,8 @@ static VkResult (*anv_object_destructors[])(struct anv_device *device, [VK_OBJECT_TYPE_SHADER] = anv_free_destructor, [VK_OBJECT_TYPE_BUFFER] = anv_free_destructor, [VK_OBJECT_TYPE_IMAGE] = anv_free_destructor, - [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor + [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor, + [VK_OBJECT_TYPE_FENCE] = anv_fence_destructor }; VkResult anv_DestroyObject( @@ -1102,36 +1123,124 @@ VkResult anv_QueueBindImageMemoryRange( } VkResult anv_CreateFence( - VkDevice device, + VkDevice _device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_device_alloc(device, sizeof(*fence), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); + batch.next = fence->bo.map; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + *pFence = (VkQueryPool) fence; + + return VK_SUCCESS; + + fail: + anv_device_free(device, fence); + + return result; } VkResult anv_ResetFences( - VkDevice device, + VkDevice _device, uint32_t fenceCount, VkFence* pFences) { - stub_return(VK_UNSUPPORTED); + struct anv_fence **fences = (struct anv_fence **) pFences; + + for (uint32_t i; i < fenceCount; i++) + fences[i]->ready = false; + + return VK_SUCCESS; } VkResult anv_GetFenceStatus( - VkDevice device, - VkFence fence) + VkDevice _device, + VkFence _fence) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_fence *fence = (struct anv_fence *) _fence; + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; } VkResult anv_WaitForFences( - VkDevice device, + VkDevice _device, uint32_t fenceCount, const VkFence* pFences, bool32_t waitAll, uint64_t timeout) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_fence **fences = (struct anv_fence **) pFences; + int64_t t = timeout; + int ret; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ret = anv_gem_wait(device, fences[i]->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) + return VK_TIMEOUT; + else if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + return VK_SUCCESS; } // Queue semaphore functions diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f7a9460bc0f..1083d1fa142 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -550,6 +550,13 @@ struct anv_cmd_buffer { void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); void anv_aub_writer_destroy(struct anv_aub_writer *writer); +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + struct anv_shader { uint32_t size; char data[0]; -- cgit v1.2.3 From 05754549e86bb548f806d62f58ae1492c4968ef5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 08:50:04 -0700 Subject: vk: Fix vkGetOjectInfo return values We weren't properly returning the allocation count. --- src/vulkan/device.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d87227cd772..49b268a3739 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1041,7 +1041,7 @@ VkResult anv_GetObjectInfo( void* pData) { VkMemoryRequirements memory_requirements; - uint32_t count; + uint32_t *count; switch (infoType) { case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS: @@ -1049,9 +1049,7 @@ VkResult anv_GetObjectInfo( if (pData == NULL) return VK_SUCCESS; - fill_memory_requirements(objType, object, &memory_requirements); - memcpy(pData, &memory_requirements, - MIN2(*pDataSize, sizeof(memory_requirements))); + fill_memory_requirements(objType, object, pData); return VK_SUCCESS; case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT: @@ -1059,7 +1057,8 @@ VkResult anv_GetObjectInfo( if (pData == NULL) return VK_SUCCESS; - count = get_allocation_count(objType); + count = pData; + *count = get_allocation_count(objType); return VK_SUCCESS; default: -- cgit v1.2.3 From eb92745b2e056bc4db36c3eba6a1fe59c943c478 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 08:50:56 -0700 Subject: vk/gem: Just return -1 from anv_gem_wait() on error We were returning -errno, unlike all the other gem functions. --- src/vulkan/gem.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index 5cc5e5d8e84..99cc0be71a8 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -155,8 +155,6 @@ anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); *timeout_ns = wait.timeout_ns; - if (ret == -1) - return -errno; return ret; } -- cgit v1.2.3 From ca7e62d4211c712810959a22b52437b18abd5b25 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 08:52:22 -0700 Subject: vk: Add a logger wrapper for the generated entrypoint --- src/vulkan/vk_gen.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src') diff --git a/src/vulkan/vk_gen.py b/src/vulkan/vk_gen.py index b82c90f73aa..b97b50d4d00 100644 --- a/src/vulkan/vk_gen.py +++ b/src/vulkan/vk_gen.py @@ -189,6 +189,20 @@ for type, name, args, num, h in entrypoints: print "static void *resolve_%s(void) { return resolve_entrypoint(%d); }" % (name, num) print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + +logger = """%s __attribute__ ((weak)) anv_validate_%s%s +{ + fprintf(stderr, "%%s\\n", strings + %d); + void *args = __builtin_apply_args(); + void *result = __builtin_apply((void *) anv_%s, args, 100); + __builtin_return(result); +} +""" + +for type, name, args, num, h in entrypoints: + print logger % (type, name, args, offsets[num], name) + + # Now generate the hash table used for entry point look up. This is a # uint16_t table of entry point indices. We use 0xffff to indicate an entry # in the hash table is empty. -- cgit v1.2.3 From f7b0f922bec7e0d14d72ff14e0cab794c53addc5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 09:57:12 -0700 Subject: vk/gem: Only VK_CLEAR the addr_ptr in gen_mmap --- src/vulkan/gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index 99cc0be71a8..2833e0c2901 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -97,10 +97,10 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, struct drm_i915_gem_mmap gem_mmap; int ret; - VG_CLEAR(gem_mmap); gem_mmap.handle = gem_handle; gem_mmap.offset = offset; gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); if (ret != 0) { /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ -- cgit v1.2.3 From c15f3834e3a4c6aa348aed9296d1757d315a891e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 09:59:08 -0700 Subject: vk/gem: Set the gem_mmap.flags parameter to 0 if it exists --- src/vulkan/gem.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index 2833e0c2901..dd70f7c1cf7 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -101,6 +101,11 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, gem_mmap.offset = offset; gem_mmap.size = size; VG_CLEAR(gem_mmap.addr_ptr); + +#ifdef I915_MMAP_WC + gem_mmap.flags = 0; +#endif + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); if (ret != 0) { /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ -- cgit v1.2.3 From fb27d80781ef4a81166a9296813a5a9e43fc309f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 09:03:27 -0700 Subject: vk/meta: Add an initial implementation of vkCmdCopyImage Compile-tested only --- src/vulkan/meta.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index f2e0a161027..c734b157f21 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -647,7 +647,60 @@ void anv_CmdCopyImage( uint32_t regionCount, const VkImageCopy* pRegions) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + VkDevice vk_device = (VkDevice) cmd_buffer->device; + struct anv_image *src_image = (struct anv_image *)srcImage; + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageViewCreateInfo src_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + .minLod = 0 + }; + + VkImageView src_view; + vkCreateImageView(vk_device, &src_view_info, &src_view); + + VkColorAttachmentViewCreateInfo dest_view_info = { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = src_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }; + + VkColorAttachmentView dest_view; + vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + + meta_emit_blit(cmd_buffer, + (struct anv_surface_view *)src_view, + pRegions[r].srcOffset, + pRegions[r].extent, + (struct anv_surface_view *)dest_view, + pRegions[r].destOffset, + pRegions[r].extent); + } + + meta_finish_blit(cmd_buffer, &saved_state); } void anv_CmdBlitImage( -- cgit v1.2.3 From 08bd554cdab3e5b90633eb0876ef8ab26528194b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 09:06:06 -0700 Subject: vk/meta: Add an initial implementation of vkCmdBlitImage Compile-tested only --- src/vulkan/meta.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c734b157f21..c57b222fb71 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -712,7 +712,61 @@ void anv_CmdBlitImage( uint32_t regionCount, const VkImageBlit* pRegions) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + VkDevice vk_device = (VkDevice) cmd_buffer->device; + struct anv_image *src_image = (struct anv_image *)srcImage; + struct anv_image *dest_image = (struct anv_image *)destImage; + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageViewCreateInfo src_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + .minLod = 0 + }; + + VkImageView src_view; + vkCreateImageView(vk_device, &src_view_info, &src_view); + + VkColorAttachmentViewCreateInfo dest_view_info = { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = dest_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }; + + VkColorAttachmentView dest_view; + vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + + meta_emit_blit(cmd_buffer, + (struct anv_surface_view *)src_view, + pRegions[r].srcOffset, + pRegions[r].srcExtent, + (struct anv_surface_view *)dest_view, + pRegions[r].destOffset, + pRegions[r].destExtent); + } + + meta_finish_blit(cmd_buffer, &saved_state); } void anv_CmdCopyBufferToImage( -- cgit v1.2.3 From c25ce55fd384528f66f57b8bb80f5320cc03bf82 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 09:16:04 -0700 Subject: vk/meta: Add an initial implementation of vkCmdCopyBufferToImage Compile-tested only --- src/vulkan/meta.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c57b222fb71..d94b679041b 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -777,7 +777,87 @@ void anv_CmdCopyBufferToImage( uint32_t regionCount, const VkBufferImageCopy* pRegions) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + VkDevice vk_device = (VkDevice) cmd_buffer->device; + struct anv_buffer *src_buffer = (struct anv_buffer *)srcBuffer; + struct anv_image *dest_image = (struct anv_image *)destImage; + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageCreateInfo src_image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = dest_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }; + + struct anv_image *src_image; + vkCreateImage(vk_device, &src_image_info, (VkImage *)&src_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + src_image->bo = src_buffer->bo; + src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; + + VkImageViewCreateInfo src_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = (VkImage)src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }; + + VkImageView src_view; + vkCreateImageView(vk_device, &src_view_info, &src_view); + + VkColorAttachmentViewCreateInfo dest_view_info = { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = dest_image->format, + .mipLevel = pRegions[r].imageSubresource.mipLevel, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1, + }; + + VkColorAttachmentView dest_view; + vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + + meta_emit_blit(cmd_buffer, + (struct anv_surface_view *)src_view, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + (struct anv_surface_view *)dest_view, + pRegions[r].imageOffset, + pRegions[r].imageExtent); + } + + meta_finish_blit(cmd_buffer, &saved_state); } void anv_CmdCopyImageToBuffer( -- cgit v1.2.3 From cd7ab6ba4e5112c93fe24c02f4cf93e60627c616 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 09:42:09 -0700 Subject: vk/meta: Add an initial implementation of vkCmdCopyBuffer Compile-tested only --- src/vulkan/meta.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index d94b679041b..fe79fbe5807 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -635,7 +635,90 @@ void anv_CmdCopyBuffer( uint32_t regionCount, const VkBufferCopy* pRegions) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + VkDevice vk_device = (VkDevice) cmd_buffer->device; + struct anv_buffer *src_buffer = (struct anv_buffer *)srcBuffer; + struct anv_buffer *dest_buffer = (struct anv_buffer *)destBuffer; + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8_UNORM, + .extent = { + .width = pRegions[r].copySize, + .height = 1, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }; + + struct anv_image *src_image, *dest_image; + vkCreateImage(vk_device, &image_info, (VkImage *)&src_image); + vkCreateImage(vk_device, &image_info, (VkImage *)&dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + src_image->bo = src_buffer->bo; + src_image->offset = src_buffer->offset + pRegions[r].srcOffset; + dest_image->bo = dest_buffer->bo; + dest_image->offset = dest_buffer->offset + pRegions[r].destOffset; + + VkImageViewCreateInfo src_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = (VkImage)src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8_UNORM, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }; + + VkImageView src_view; + vkCreateImageView(vk_device, &src_view_info, &src_view); + + VkColorAttachmentViewCreateInfo dest_view_info = { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = VK_FORMAT_R8_UNORM, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }; + + VkColorAttachmentView dest_view; + vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + + meta_emit_blit(cmd_buffer, + (struct anv_surface_view *)src_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { pRegions[r].copySize, 0, 0 }, + (struct anv_surface_view *)dest_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { pRegions[r].copySize, 0, 0 }); + } + + meta_finish_blit(cmd_buffer, &saved_state); } void anv_CmdCopyImage( -- cgit v1.2.3 From 69fd473321c4a81ee08605cd16eb7692dd9fe25b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 09:05:11 -0700 Subject: vk: Use a temporary buffer for formatting in finishme This is more likely to avoid breaking up the message when racing with other threads. --- src/vulkan/util.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/util.c b/src/vulkan/util.c index 92f9e407684..082499d6093 100644 --- a/src/vulkan/util.c +++ b/src/vulkan/util.c @@ -34,12 +34,13 @@ void __anv_finishme(const char *file, int line, const char *format, ...) { va_list ap; + char buffer[256]; va_start(ap, format); - fprintf(stderr, "%s:%d: FINISHME: ", file, line); - vfprintf(stderr, format, ap); - fprintf(stderr, "\n"); + vsnprintf(buffer, sizeof(buffer), format, ap); va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); } int -- cgit v1.2.3 From 5286ef7849d5699636a84169da3cfe6ba3b86a29 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 10:17:53 -0700 Subject: vk: Provide more realistic values for device info --- src/vulkan/device.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 49b268a3739..b3fa69015c1 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -202,11 +202,11 @@ VkResult anv_GetPhysicalDeviceInfo( properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; strcpy(properties->deviceName, device->name); properties->maxInlineMemoryUpdateSize = 0; - properties->maxBoundDescriptorSets = 0; - properties->maxThreadGroupSize = 0; + properties->maxBoundDescriptorSets = MAX_SETS; + properties->maxThreadGroupSize = 512; properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick; - properties->multiColorAttachmentClears = 0; - properties->maxDescriptorSets = 2; + properties->multiColorAttachmentClears = true; + properties->maxDescriptorSets = 8; properties->maxViewports = 16; properties->maxColorAttachments = 8; return VK_SUCCESS; @@ -235,8 +235,8 @@ VkResult anv_GetPhysicalDeviceInfo( queue_properties->queueFlags = 0; queue_properties->queueCount = 1; queue_properties->maxAtomicCounters = 0; - queue_properties->supportsTimestamps = 0; - queue_properties->maxMemReferences = 0; + queue_properties->supportsTimestamps = true; + queue_properties->maxMemReferences = 256; return VK_SUCCESS; case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES: -- cgit v1.2.3 From 8440b13f5518b00788b70fe7214227c22afacb59 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 10:41:09 -0700 Subject: vk/meta: Rework the indentation style No functional change. --- src/vulkan/meta.c | 557 ++++++++++++++++++++++++++---------------------------- 1 file changed, 272 insertions(+), 285 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index fe79fbe5807..ebdf1a2b08d 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -124,26 +124,26 @@ anv_device_init_meta_clear_state(struct anv_device *device) }; anv_pipeline_create((VkDevice) device, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &rs_create_info, - .flags = 0, - .layout = 0 - }, - &(struct anv_pipeline_create_info) { - .use_repclear = true, - .disable_viewport = true, - .use_rectlist = true - }, - &device->clear_state.pipeline); + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rs_create_info, + .flags = 0, + .layout = 0 + }, + &(struct anv_pipeline_create_info) { + .use_repclear = true, + .disable_viewport = true, + .use_rectlist = true + }, + &device->clear_state.pipeline); anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); anv_CreateDynamicRasterState((VkDevice) device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &device->clear_state.rs_state); + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->clear_state.rs_state); } #define NUM_VB_USED 2 @@ -246,14 +246,14 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, &saved_state); anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, - (VkBuffer[]) { - (VkBuffer) &vertex_buffer, - (VkBuffer) &vertex_buffer - }, - (VkDeviceSize[]) { - 0, - sizeof(vertex_data) - }); + (VkBuffer[]) { + (VkBuffer) &vertex_buffer, + (VkBuffer) &vertex_buffer + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); if ((VkPipeline) cmd_buffer->pipeline != device->clear_state.pipeline) anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, @@ -436,10 +436,10 @@ anv_device_init_meta_blit_state(struct anv_device *device) anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); anv_CreateDynamicRasterState((VkDevice) device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &device->blit_state.rs_state); + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->blit_state.rs_state); } static void @@ -533,14 +533,14 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }; anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, - (VkBuffer[]) { - (VkBuffer) &vertex_buffer, - (VkBuffer) &vertex_buffer - }, - (VkDeviceSize[]) { - 0, - sizeof(struct vue_header), - }); + (VkBuffer[]) { + (VkBuffer) &vertex_buffer, + (VkBuffer) &vertex_buffer + }, + (VkDeviceSize[]) { + 0, + sizeof(struct vue_header), + }); uint32_t count; VkDescriptorSet set; @@ -548,66 +548,65 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, 1, &device->blit_state.ds_layout, &set, &count); anv_UpdateDescriptors((VkDevice) device, set, 1, - (const void * []) { - &(VkUpdateImages) { - .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .binding = 0, - .count = 1, - .pImageViews = (VkImageViewAttachInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, - .view = (VkImageView) src, - .layout = VK_IMAGE_LAYOUT_GENERAL, - } - } - } - }); - - VkFramebufferCreateInfo fb_info = { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .colorAttachmentCount = 1, - .pColorAttachments = (VkColorAttachmentBindInfo[]) { - { - .view = (VkColorAttachmentView) dest, - .layout = VK_IMAGE_LAYOUT_GENERAL + (const void * []) { + &(VkUpdateImages) { + .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .binding = 0, + .count = 1, + .pImageViews = (VkImageViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, + .view = (VkImageView) src, + .layout = VK_IMAGE_LAYOUT_GENERAL, + } + } } - }, - .pDepthStencilAttachment = NULL, - .sampleCount = 1, - .width = dest->extent.width, - .height = dest->extent.height, - .layers = 1 - }; + }); struct anv_framebuffer *fb; - anv_CreateFramebuffer((VkDevice) device, &fb_info, (VkFramebuffer *)&fb); - - VkRenderPassCreateInfo pass_info = { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .renderArea = { { 0, 0 }, { dest->extent.width, dest->extent.height } }, - .colorAttachmentCount = 1, - .extent = { 0, }, - .sampleCount = 1, - .layers = 1, - .pColorFormats = (VkFormat[]) { dest->format }, - .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_GENERAL }, - .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_LOAD }, - .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = (VkClearColor[]) { - { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } - }, - .depthStencilFormat = VK_FORMAT_UNDEFINED, - }; + anv_CreateFramebuffer((VkDevice) device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .colorAttachmentCount = 1, + .pColorAttachments = (VkColorAttachmentBindInfo[]) { + { + .view = (VkColorAttachmentView) dest, + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .pDepthStencilAttachment = NULL, + .sampleCount = 1, + .width = dest->extent.width, + .height = dest->extent.height, + .layers = 1 + }, (VkFramebuffer *)&fb); + VkRenderPass pass; - anv_CreateRenderPass((VkDevice )device, &pass_info, &pass); + anv_CreateRenderPass((VkDevice )device, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .renderArea = { { 0, 0 }, { dest->extent.width, dest->extent.height } }, + .colorAttachmentCount = 1, + .extent = { 0, }, + .sampleCount = 1, + .layers = 1, + .pColorFormats = (VkFormat[]) { dest->format }, + .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_GENERAL }, + .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_LOAD }, + .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, + .pColorLoadClearValues = (VkClearColor[]) { + { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } + }, + .depthStencilFormat = VK_FORMAT_UNDEFINED, + }, &pass); anv_CmdBeginRenderPass((VkCmdBuffer) cmd_buffer, - &(VkRenderPassBegin) { - .renderPass = pass, - .framebuffer = (VkFramebuffer) fb, - }); + &(VkRenderPassBegin) { + .renderPass = pass, + .framebuffer = (VkFramebuffer) fb, + }); anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, VK_STATE_BIND_POINT_VIEWPORT, fb->vp_state); @@ -673,41 +672,39 @@ void anv_CmdCopyBuffer( dest_image->bo = dest_buffer->bo; dest_image->offset = dest_buffer->offset + pRegions[r].destOffset; - VkImageViewCreateInfo src_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = (VkImage)src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_R8_UNORM, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = VK_IMAGE_ASPECT_COLOR, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArraySlice = 0, - .arraySize = 1 - }, - .minLod = 0 - }; - VkImageView src_view; - vkCreateImageView(vk_device, &src_view_info, &src_view); - - VkColorAttachmentViewCreateInfo dest_view_info = { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, - .format = VK_FORMAT_R8_UNORM, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - }; + anv_CreateImageView(vk_device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = (VkImage)src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8_UNORM, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }, &src_view); VkColorAttachmentView dest_view; - vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + anv_CreateColorAttachmentView(vk_device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = VK_FORMAT_R8_UNORM, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, &dest_view); meta_emit_blit(cmd_buffer, (struct anv_surface_view *)src_view, @@ -738,41 +735,39 @@ void anv_CmdCopyImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageViewCreateInfo src_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].srcSubresource.aspect, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arraySlice, - .arraySize = 1 - }, - .minLod = 0 - }; - VkImageView src_view; - vkCreateImageView(vk_device, &src_view_info, &src_view); - - VkColorAttachmentViewCreateInfo dest_view_info = { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = destImage, - .format = src_image->format, - .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = pRegions[r].destSubresource.arraySlice, - .arraySize = 1, - }; + anv_CreateImageView(vk_device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + .minLod = 0 + }, &src_view); VkColorAttachmentView dest_view; - vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + anv_CreateColorAttachmentView(vk_device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = src_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }, &dest_view); meta_emit_blit(cmd_buffer, (struct anv_surface_view *)src_view, @@ -804,41 +799,39 @@ void anv_CmdBlitImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageViewCreateInfo src_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].srcSubresource.aspect, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arraySlice, - .arraySize = 1 - }, - .minLod = 0 - }; - VkImageView src_view; - vkCreateImageView(vk_device, &src_view_info, &src_view); - - VkColorAttachmentViewCreateInfo dest_view_info = { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = destImage, - .format = dest_image->format, - .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = pRegions[r].destSubresource.arraySlice, - .arraySize = 1, - }; + anv_CreateImageView(vk_device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + .minLod = 0 + }, &src_view); VkColorAttachmentView dest_view; - vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + anv_CreateColorAttachmentView(vk_device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = dest_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }, &dest_view); meta_emit_blit(cmd_buffer, (struct anv_surface_view *)src_view, @@ -869,25 +862,24 @@ void anv_CmdCopyBufferToImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageCreateInfo src_image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = dest_image->format, - .extent = { - .width = pRegions[r].imageExtent.width, - .height = pRegions[r].imageExtent.height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }; - struct anv_image *src_image; - vkCreateImage(vk_device, &src_image_info, (VkImage *)&src_image); + anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = dest_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, (VkImage *)&src_image); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. @@ -895,41 +887,39 @@ void anv_CmdCopyBufferToImage( src_image->bo = src_buffer->bo; src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; - VkImageViewCreateInfo src_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = (VkImage)src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].imageSubresource.aspect, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArraySlice = 0, - .arraySize = 1 - }, - .minLod = 0 - }; - VkImageView src_view; - vkCreateImageView(vk_device, &src_view_info, &src_view); - - VkColorAttachmentViewCreateInfo dest_view_info = { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, - .format = dest_image->format, - .mipLevel = pRegions[r].imageSubresource.mipLevel, - .baseArraySlice = pRegions[r].imageSubresource.arraySlice, - .arraySize = 1, - }; + anv_CreateImageView(vk_device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = (VkImage)src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }, &src_view); VkColorAttachmentView dest_view; - vkCreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + anv_CreateColorAttachmentView(vk_device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = dest_image->format, + .mipLevel = pRegions[r].imageSubresource.mipLevel, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1, + }, &dest_view); meta_emit_blit(cmd_buffer, (struct anv_surface_view *)src_view, @@ -960,49 +950,47 @@ void anv_CmdCopyImageToBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageViewCreateInfo src_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].imageSubresource.aspect, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .mipLevels = 1, - .baseArraySlice = pRegions[r].imageSubresource.arraySlice, - .arraySize = 1 - }, - .minLod = 0 - }; - VkImageView src_view; - anv_CreateImageView(vk_device, &src_view_info, &src_view); - - VkImageCreateInfo dest_image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = src_image->format, - .extent = { - .width = pRegions[r].imageExtent.width, - .height = pRegions[r].imageExtent.height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }; + anv_CreateImageView(vk_device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1 + }, + .minLod = 0 + }, &src_view); struct anv_image *dest_image; - anv_CreateImage(vk_device, &dest_image_info, (VkImage *)&dest_image); + anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = src_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, (VkImage *)&dest_image); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. @@ -1010,17 +998,16 @@ void anv_CmdCopyImageToBuffer( dest_image->bo = dest_buffer->bo; dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; - VkColorAttachmentViewCreateInfo dest_view_info = { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, - .format = src_image->format, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - }; - VkColorAttachmentView dest_view; - anv_CreateColorAttachmentView(vk_device, &dest_view_info, &dest_view); + anv_CreateColorAttachmentView(vk_device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = src_image->format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, &dest_view); meta_emit_blit(cmd_buffer, (struct anv_surface_view *)src_view, -- cgit v1.2.3 From 28804fb9e4fe8b0437569505cfa8df8218395482 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 12:05:17 -0700 Subject: vk/gem: VG_CLEAR the padding for the gem_mmap struct --- src/vulkan/gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index dd70f7c1cf7..8598deedc71 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -98,6 +98,7 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, int ret; gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); gem_mmap.offset = offset; gem_mmap.size = size; VG_CLEAR(gem_mmap.addr_ptr); -- cgit v1.2.3 From 682d11a6e8110e817a25c2810fecf994d38cda8f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 15:46:42 -0700 Subject: vk/allocator: Assert that block_pool_grow succeeds --- src/vulkan/allocator.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 4c293f12b1a..94b7d548bfc 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -313,20 +313,25 @@ anv_block_pool_alloc(struct anv_block_pool *pool) uint32_t offset, block, size; /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(pool->map); return offset; + } restart: size = pool->size; block = __sync_fetch_and_add(&pool->next_block, pool->block_size); if (block < size) { + assert(pool->map); return block; } else if (block == size) { /* We allocated the first block outside the pool, we have to grow it. * pool->next_block acts a mutex: threads who try to allocate now will * get block indexes above the current limit and hit futex_wait * below. */ - anv_block_pool_grow(pool); + int err = anv_block_pool_grow(pool); + assert(err == 0); + (void) err; futex_wake(&pool->size, INT_MAX); } else { futex_wait(&pool->size, size); -- cgit v1.2.3 From b6ab076d6b7dd91ce0dda8741ba974b2402fa726 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 15:58:02 -0700 Subject: vk/allocator: Don't use memfd when valgrind is detected --- src/vulkan/allocator.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 94b7d548bfc..5cabc466841 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -203,7 +203,11 @@ anv_block_pool_init(struct anv_block_pool *pool, * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS * path we can take for valgrind debugging. */ -#define USE_MEMFD 1 +#ifdef HAVE_VALGRIND +# define USE_MEMFD 0 +#else +# define USE_MEMFD 1 +#endif void anv_block_pool_finish(struct anv_block_pool *pool) -- cgit v1.2.3 From 4063b7deb8f1d0c4f675d7e503be384fb60ee2d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 12:06:02 -0700 Subject: vk/allocator: Add support for valgrind tracking of state pools and streams We leave the block pool untracked so that reads/writes to freed blocks will get caught and do the tracking at the state pool/stream level. We have to do a few extra gymnastics for streams because valgrind works in terms of poitners and we work in terms of separate map and offset. Fortunately, the users of the state pool and stream should always be using the map pointer provided in the anv_state structure. We just have to track, per block, the map that was used when we initially got the block. Then we can make sure we always use that map and valgrind should stay happy. --- src/vulkan/allocator.c | 69 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 5cabc466841..aeffe761407 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -36,6 +36,27 @@ #include "private.h" +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG(x) +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + /* Design goals: * * - Lock free (except when resizing underlying bos) @@ -147,7 +168,8 @@ anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) */ __sync_synchronize(); - next.offset = *(uint32_t *)(*map + current.offset); + uint32_t *next_ptr = *map + current.offset; + next.offset = VG_NOACCESS_READ(next_ptr); next.count = current.count + 1; old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, next.u64); if (old.u64 == current.u64) { @@ -169,7 +191,7 @@ anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) old = *list; do { current = old; - *next_ptr = current.offset; + VG_NOACCESS_WRITE(next_ptr, current.offset); new.offset = offset; new.count = current.count + 1; old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); @@ -430,6 +452,7 @@ anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], pool->block_pool); state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MALLOCLIKE_BLOCK(state.map, size, 0, false)); return state; } @@ -442,6 +465,7 @@ anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) size_log2 <= ANV_MAX_STATE_SIZE_LOG2); unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + VG(VALGRIND_FREELIKE_BLOCK(state.map, 0)); anv_fixed_size_state_pool_free(&pool->buckets[bucket], pool->block_pool, state.offset); } @@ -449,6 +473,13 @@ anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) #define NULL_BLOCK 1 struct stream_block { uint32_t next; + + /* The map for the BO at the time the block was givne to us */ + void *current_map; + +#ifdef HAVE_VALGRIND + void *_vg_ptr; +#endif }; /* The state stream allocator is a one-shot, single threaded allocator for @@ -473,7 +504,8 @@ anv_state_stream_finish(struct anv_state_stream *stream) block = stream->current_block; while (block != 1) { sb = stream->block_pool->map + block; - next_block = sb->next; + next_block = VG_NOACCESS_READ(&sb->next); + VG(VALGRIND_FREELIKE_BLOCK(VG_NOACCESS_READ(&sb->_vg_ptr), 0)); anv_block_pool_free(stream->block_pool, block); block = next_block; } @@ -490,8 +522,11 @@ anv_state_stream_alloc(struct anv_state_stream *stream, state.offset = ALIGN_U32(stream->next, alignment); if (state.offset + size > stream->end) { block = anv_block_pool_alloc(stream->block_pool); - sb = stream->block_pool->map + block; - sb->next = stream->current_block; + void *current_map = stream->block_pool->map; + sb = current_map + block; + VG_NOACCESS_WRITE(&sb->current_map, current_map); + VG_NOACCESS_WRITE(&sb->next, stream->current_block); + VG(VG_NOACCESS_WRITE(&sb->_vg_ptr, 0)); stream->current_block = block; stream->next = block + sizeof(*sb); stream->end = block + stream->block_pool->block_size; @@ -499,10 +534,30 @@ anv_state_stream_alloc(struct anv_state_stream *stream, assert(state.offset + size <= stream->end); } - stream->next = state.offset + size; + sb = stream->block_pool->map + stream->current_block; + void *current_map = VG_NOACCESS_READ(&sb->current_map); + state.map = current_map + state.offset; state.alloc_size = size; - state.map = stream->block_pool->map + state.offset; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VG(VALGRIND_MALLOCLIKE_BLOCK(vg_ptr, size, 0, false)); + } else { + ptrdiff_t vg_offset = vg_ptr - current_map; + assert(vg_offset >= stream->current_block && + vg_offset < stream->end); + VALGRIND_RESIZEINPLACE_BLOCK(vg_ptr, + stream->next - vg_offset, + (state.offset + size) - vg_offset, + 0); + } +#endif + + stream->next = state.offset + size; return state; } -- cgit v1.2.3 From b6c7d8c9110db79b0046539a639ea692f1ba5861 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 15:47:28 -0700 Subject: vk/pipeline: Use a state_stream for storing programs Previously, we were effectively using a state_stream, it was just hand-rolled based on a block pool. Now we actually use the data structure. --- src/vulkan/compiler.cpp | 56 +++++++++++++++++-------------------------------- src/vulkan/pipeline.c | 3 +++ src/vulkan/private.h | 3 +-- 3 files changed, 23 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 6f15b77426c..7743fba2c9d 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -235,11 +235,11 @@ really_do_vs_prog(struct brw_context *brw, return false; } - pipeline->vs_simd8 = pipeline->program_next; - memcpy((char *) pipeline->device->instruction_block_pool.map + - pipeline->vs_simd8, program, program_size); + struct anv_state vs_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(vs_state.map, program, program_size); - pipeline->program_next = align(pipeline->program_next + program_size, 64); + pipeline->vs_simd8 = vs_state.offset; ralloc_free(mem_ctx); @@ -463,7 +463,6 @@ really_do_wm_prog(struct brw_context *brw, struct gl_shader *fs = NULL; unsigned int program_size; const uint32_t *program; - uint32_t offset; if (prog) fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; @@ -514,21 +513,20 @@ really_do_wm_prog(struct brw_context *brw, return false; } - offset = pipeline->program_next; - pipeline->program_next = align(pipeline->program_next + program_size, 64); + struct anv_state ps_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(ps_state.map, program, program_size); if (prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else - pipeline->ps_simd8 = offset; + pipeline->ps_simd8 = ps_state.offset; - if (prog_data->no_8 || prog_data->prog_offset_16) - pipeline->ps_simd16 = offset + prog_data->prog_offset_16; - else + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = ps_state.offset + prog_data->prog_offset_16; + } else { pipeline->ps_simd16 = NO_KERNEL; - - memcpy((char *) pipeline->device->instruction_block_pool.map + - offset, program, program_size); + } ralloc_free(mem_ctx); @@ -574,7 +572,6 @@ really_do_gs_prog(struct brw_context *brw, struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) { struct brw_gs_compile_output output; - uint32_t offset; /* FIXME: We pass the bind map to the compile in the output struct. Need * something better. */ @@ -583,15 +580,13 @@ really_do_gs_prog(struct brw_context *brw, brw_compile_gs_prog(brw, prog, gp, key, &output); - offset = pipeline->program_next; - pipeline->program_next = align(pipeline->program_next + output.program_size, 64); + struct anv_state gs_state = anv_state_stream_alloc(&pipeline->program_stream, + output.program_size, 64); + memcpy(gs_state.map, output.program, output.program_size); - pipeline->gs_vec4 = offset; + pipeline->gs_vec4 = gs_state.offset; pipeline->gs_vertex_count = gp->program.VerticesIn; - memcpy((char *) pipeline->device->instruction_block_pool.map + - offset, output.program, output.program_size); - ralloc_free(output.mem_ctx); if (output.prog_data.base.base.total_scratch) { @@ -601,8 +596,6 @@ really_do_gs_prog(struct brw_context *brw, return false; } - memcpy(&pipeline->gs_prog_data, &output.prog_data, sizeof pipeline->gs_prog_data); - return true; } @@ -854,10 +847,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) fail_on_compile_error(program->LinkStatus, program->InfoLog); - pipeline->program_block = - anv_block_pool_alloc(&device->instruction_block_pool); - pipeline->program_next = pipeline->program_block; - + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); bool success; struct brw_wm_prog_key wm_key; @@ -906,12 +897,6 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->gs_vec4 = NO_KERNEL; } - - /* FIXME: Allocate more blocks if we fill up this one and worst case, - * allocate multiple continuous blocks from end of pool to hold really big - * programs. */ - assert(pipeline->program_next - pipeline->program_block < 8192); - brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); gen7_compute_urb_partition(pipeline); @@ -925,14 +910,11 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) void anv_compiler_free(struct anv_pipeline *pipeline) { - struct anv_device *device = pipeline->device; - for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) if (pipeline->prog_data[stage]) free(pipeline->prog_data[stage]->map_entries); - anv_block_pool_free(&device->instruction_block_pool, - pipeline->program_block); + anv_state_stream_finish(&pipeline->program_stream); } } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 6711d5e349f..23b94130334 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -254,6 +254,9 @@ anv_pipeline_create( if (result != VK_SUCCESS) goto fail; + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + for (common = pCreateInfo->pNext; common; common = common->pNext) { switch (common->sType) { case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO: diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 1083d1fa142..554ec66d748 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -587,8 +587,7 @@ struct anv_pipeline { struct anv_bo gs_scratch_bo; uint32_t active_stages; - uint32_t program_block; - uint32_t program_next; + struct anv_state_stream program_stream; uint32_t vs_simd8; uint32_t ps_simd8; uint32_t ps_simd16; -- cgit v1.2.3 From f330bad5453dfcdf56f5d7d1adecbd5a2de1392f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 10:39:14 -0700 Subject: vk: Only fill render targets for meta clear Clear inherits the render targets from the current render pass. This means we need to fill out the binding table after switching to meta bindings. However, meta copies etc happen outside a render pass and break when we try to fill in the render targets. This change fills the render targets only for meta clear. --- src/vulkan/meta.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index ebdf1a2b08d..661584b0fdf 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -160,9 +160,6 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, state->old_bindings = cmd_buffer->bindings; cmd_buffer->bindings = &state->bindings; state->old_pipeline = cmd_buffer->pipeline; - - /* Initialize render targets for the meta bindings. */ - anv_cmd_buffer_fill_render_targets(cmd_buffer); } static void @@ -245,6 +242,9 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, &saved_state); + /* Initialize render targets for the meta bindings. */ + anv_cmd_buffer_fill_render_targets(cmd_buffer); + anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, (VkBuffer[]) { (VkBuffer) &vertex_buffer, -- cgit v1.2.3 From 1d40e6ade8042a851e9ac758ab023ad3aa9d7817 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 17:00:52 -0700 Subject: vk: Update generated header files This fixes a problem where register addresses where incorrectly shifted. --- src/vulkan/gen75_pack.h | 12 ++++++------ src/vulkan/gen7_pack.h | 8 ++++---- src/vulkan/gen8_pack.h | 12 ++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index f01fdb07156..ed4ab2e52c6 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -193,7 +193,7 @@ GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterAddress, 2, 22) | + __gen_offset(values->RegisterAddress, 2, 22) | 0; uint32_t dw2 = @@ -5616,7 +5616,7 @@ GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterOffset, 2, 22) | + __gen_offset(values->RegisterOffset, 2, 22) | 0; dw[2] = @@ -5657,7 +5657,7 @@ GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterAddress, 2, 22) | + __gen_offset(values->RegisterAddress, 2, 22) | 0; uint32_t dw2 = @@ -5696,11 +5696,11 @@ GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->SourceRegisterAddress, 2, 22) | + __gen_offset(values->SourceRegisterAddress, 2, 22) | 0; dw[2] = - __gen_field(values->DestinationRegisterAddress, 2, 22) | + __gen_offset(values->DestinationRegisterAddress, 2, 22) | 0; } @@ -6447,7 +6447,7 @@ GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_field(values->URBClearLength, 16, 29) | - __gen_field(values->URBAddress, 0, 14) | + __gen_offset(values->URBAddress, 0, 14) | 0; } diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 5350e3a18a4..ef56e97539d 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -151,7 +151,7 @@ GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterAddress, 2, 22) | + __gen_offset(values->RegisterAddress, 2, 22) | 0; uint32_t dw2 = @@ -4811,7 +4811,7 @@ GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterOffset, 2, 22) | + __gen_offset(values->RegisterOffset, 2, 22) | 0; dw[2] = @@ -4852,7 +4852,7 @@ GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterAddress, 2, 22) | + __gen_offset(values->RegisterAddress, 2, 22) | 0; uint32_t dw2 = @@ -5248,7 +5248,7 @@ GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_field(values->URBClearLength, 16, 28) | - __gen_field(values->URBAddress, 0, 13) | + __gen_offset(values->URBAddress, 0, 13) | 0; } diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index bc221e52089..68dcf34c493 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -399,11 +399,11 @@ GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->SourceRegisterAddress, 2, 22) | + __gen_offset(values->SourceRegisterAddress, 2, 22) | 0; dw[2] = - __gen_field(values->DestinationRegisterAddress, 2, 22) | + __gen_offset(values->DestinationRegisterAddress, 2, 22) | 0; } @@ -536,7 +536,7 @@ GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterAddress, 2, 22) | + __gen_offset(values->RegisterAddress, 2, 22) | 0; uint32_t dw2 = @@ -6351,7 +6351,7 @@ GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterOffset, 2, 22) | + __gen_offset(values->RegisterOffset, 2, 22) | 0; dw[2] = @@ -6392,7 +6392,7 @@ GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->RegisterAddress, 2, 22) | + __gen_offset(values->RegisterAddress, 2, 22) | 0; uint32_t dw2 = @@ -7107,7 +7107,7 @@ GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_field(values->URBClearLength, 16, 29) | - __gen_field(values->URBAddress, 0, 14) | + __gen_offset(values->URBAddress, 0, 14) | 0; } -- cgit v1.2.3 From 82ddab4b180ae5a0e2f033bf323f649f82109426 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 17:03:58 -0700 Subject: vk: Make occlusion query work, both copy and get functions --- src/vulkan/device.c | 189 +++++++++++++++++++++++++++++++++++++++++++++------ src/vulkan/private.h | 12 ++++ 2 files changed, 182 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b3fa69015c1..5cefc0ade6c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -956,6 +956,19 @@ anv_fence_destructor(struct anv_device * device, return VK_SUCCESS; } +static VkResult +anv_query_pool_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_query_pool *pool = (struct anv_query_pool *) object; + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); + + return VK_SUCCESS; +} + static VkResult (*anv_object_destructors[])(struct anv_device *device, VkObject object) = { [VK_OBJECT_TYPE_INSTANCE] = anv_instance_destructor, @@ -968,7 +981,8 @@ static VkResult (*anv_object_destructors[])(struct anv_device *device, [VK_OBJECT_TYPE_BUFFER] = anv_free_destructor, [VK_OBJECT_TYPE_IMAGE] = anv_free_destructor, [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor, - [VK_OBJECT_TYPE_FENCE] = anv_fence_destructor + [VK_OBJECT_TYPE_FENCE] = anv_fence_destructor, + [VK_OBJECT_TYPE_QUERY_POOL] = anv_query_pool_destructor }; VkResult anv_DestroyObject( @@ -1299,12 +1313,6 @@ VkResult anv_ResetEvent( // Query functions -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -1313,19 +1321,32 @@ VkResult anv_CreateQueryPool( struct anv_device *device = (struct anv_device *) _device; struct anv_query_pool *pool; VkResult result; + size_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_UNSUPPORTED; + default: + unreachable(""); + } + pool = anv_device_alloc(device, sizeof(*pool), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pool->type = pCreateInfo->queryType; - result = anv_bo_init_new(&pool->bo, device, pCreateInfo->slots * 16); + size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); + result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) goto fail; + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); + *pQueryPool = (VkQueryPool) pool; return VK_SUCCESS; @@ -1337,7 +1358,7 @@ VkResult anv_CreateQueryPool( } VkResult anv_GetQueryPoolResults( - VkDevice device, + VkDevice _device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, @@ -1345,7 +1366,49 @@ VkResult anv_GetQueryPoolResults( void* pData, VkQueryResultFlags flags) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + struct anv_query_pool_slot *slot = pool->bo.map; + int64_t timeout = INT64_MAX; + uint32_t *dst32 = pData; + uint64_t *dst64 = pData; + uint64_t result; + int ret; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return VK_UNSUPPORTED; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + if (flags & VK_QUERY_RESULT_64_BIT) + *pDataSize = queryCount * sizeof(uint64_t); + else + *pDataSize = queryCount * sizeof(uint32_t); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < queryCount; i++) { + result = slot[startQuery + i].end - slot[startQuery + i].begin; + if (flags & VK_QUERY_RESULT_64_BIT) { + *dst64++ = result; + } else { + if (result > UINT32_MAX) + result = UINT32_MAX; + *dst32++ = result; + } + } + + return VK_SUCCESS; } // Buffer functions @@ -2698,14 +2761,13 @@ void anv_CmdBeginQuery( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16); + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - break; - default: - break; + unreachable(""); } } @@ -2719,14 +2781,13 @@ void anv_CmdEndQuery( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16 + 8); + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - break; - default: - break; + unreachable(""); } } @@ -2771,6 +2832,50 @@ void anv_CmdWriteTimestamp( } } +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + void anv_CmdCopyQueryPoolResults( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, @@ -2781,7 +2886,53 @@ void anv_CmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } } void anv_CmdInitAtomicCounters( diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 554ec66d748..b79dea3ddc4 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -452,6 +452,18 @@ struct anv_dynamic_cb_state { uint32_t blend_offset; }; +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + struct anv_descriptor_set_layout { struct { uint32_t surface_count; -- cgit v1.2.3 From ae9ac47c748f88fe3ac77d16346389a6c8afdc00 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 17:04:32 -0700 Subject: vk: Make timestamp command work correctly This was using the wrong timestamp register and needs to write a 64 bit value. --- src/vulkan/device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 5cefc0ade6c..09dddb766c2 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2800,7 +2800,7 @@ void anv_CmdResetQueryPool( stub(); } -#define TIMESTAMP 0x44070 +#define TIMESTAMP 0x2358 void anv_CmdWriteTimestamp( VkCmdBuffer cmdBuffer, @@ -2817,6 +2817,9 @@ void anv_CmdWriteTimestamp( anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, .RegisterAddress = TIMESTAMP, .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); break; case VK_TIMESTAMP_TYPE_BOTTOM: -- cgit v1.2.3 From 241b59cba02005b477f0a6a949765c203b46b1fd Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 17:05:36 -0700 Subject: vk/test: Test timestamps and occlusion queries --- src/vulkan/vk.c | 198 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 155 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 399a1c2db1d..5468b195330 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -225,51 +225,83 @@ create_pipeline(VkDevice device, VkPipeline *pipeline, vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, vs); } -int main(int argc, char *argv[]) +static void +test_timestamp(VkDevice device, VkQueue queue) { - VkInstance instance; - vkCreateInstance(&(VkInstanceCreateInfo) { - .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, - .pAllocCb = &(VkAllocCallbacks) { - .pUserData = NULL, - .pfnAlloc = test_alloc, - .pfnFree = test_free - }, - .pAppInfo = &(VkApplicationInfo) { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pAppName = "vk", - .apiVersion = 1 - } - }, - &instance); + VkBuffer buffer; + vkCreateBuffer(device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = 1024, + .usage = VK_BUFFER_USAGE_GENERAL, + .flags = 0 + }, + &buffer); - uint32_t count = 1; - VkPhysicalDevice physicalDevices[1]; - vkEnumeratePhysicalDevices(instance, &count, physicalDevices); - printf("%d physical devices\n", count); + VkMemoryRequirements buffer_requirements; + size_t size = sizeof(buffer_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &buffer_requirements); - VkPhysicalDeviceProperties properties; - size_t size = sizeof(properties); - vkGetPhysicalDeviceInfo(physicalDevices[0], - VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES, - &size, &properties); - printf("vendor id %04x, device name %s\n", - properties.vendorId, properties.deviceName); + VkDeviceMemory mem; + vkAllocMemory(device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = buffer_requirements.size, + .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, + .memPriority = VK_MEMORY_PRIORITY_NORMAL + }, + &mem); - VkDevice device; - vkCreateDevice(physicalDevices[0], - &(VkDeviceCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .queueRecordCount = 1, - .pRequestedQueues = &(VkDeviceQueueCreateInfo) { - .queueNodeIndex = 0, - .queueCount = 1 - } - }, - &device); + void *map; + vkMapMemory(device, mem, 0, buffer_requirements.size, 0, &map); + memset(map, 0x11, buffer_requirements.size); - VkQueue queue; - vkGetDeviceQueue(device, 0, 0, &queue); + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + buffer, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 0); + + VkCmdBuffer cmdBuffer; + vkCreateCommandBuffer(device, + &(VkCmdBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, + .queueNodeIndex = 0, + .flags = 0 + }, + &cmdBuffer); + + vkBeginCommandBuffer(cmdBuffer, + &(VkCmdBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + .flags = 0 + }); + + vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_TOP, buffer, 0); + vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_BOTTOM, buffer, 8); + + vkEndCommandBuffer(cmdBuffer); + + vkQueueSubmit(queue, 1, &cmdBuffer, 0); + + vkQueueWaitIdle(queue); + + vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer); + vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); + + uint64_t *results = map; + printf("top timestamp: %20ld (%016lx)\n", results[0], results[0]); + printf("bottom timestamp: %20ld (%016lx)\n", results[1], results[1]); + + vkUnmapMemory(device, mem); + vkFreeMemory(device, mem); +} + +static void +test_triangle(VkDevice device, VkQueue queue) +{ + uint32_t count; VkCmdBuffer cmdBuffer; vkCreateCommandBuffer(device, @@ -352,7 +384,7 @@ int main(int argc, char *argv[]) &buffer); VkMemoryRequirements buffer_requirements; - size = sizeof(buffer_requirements); + size_t size = sizeof(buffer_requirements); vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer, VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, &size, &buffer_requirements); @@ -719,6 +751,16 @@ int main(int argc, char *argv[]) }, &pass); + VkQueryPool query_pool; + vkCreateQueryPool(device, + &(VkQueryPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .queryType = VK_QUERY_TYPE_OCCLUSION, + .slots = 4, + .pipelineStatistics = 0 + }, + &query_pool); + vkBeginCommandBuffer(cmdBuffer, &(VkCmdBufferBeginInfo) { .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, @@ -749,13 +791,17 @@ int main(int argc, char *argv[]) vkCmdBindDynamicStateObject(cmdBuffer, VK_STATE_BIND_POINT_RASTER, rs_state); - vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_TOP, buffer, 0); - vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_BOTTOM, buffer, 8); + vkCmdBeginQuery(cmdBuffer, query_pool, 0 /*slot*/, 0 /* flags */); vkCmdDraw(cmdBuffer, 0, 3, 0, 1); + vkCmdEndQuery(cmdBuffer, query_pool, 0); + vkCmdEndRenderPass(cmdBuffer, pass); + vkCmdCopyQueryPoolResults(cmdBuffer, query_pool, 0, 1, buffer, 16, 8, + VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); + VkBufferImageCopy copy = { .bufferOffset = 0, .imageSubresource = { @@ -776,6 +822,18 @@ int main(int argc, char *argv[]) vkQueueWaitIdle(queue); + /* Result gets written to buffer at offset 0. The buffer is bound to the + * memory object at offset 128 */ + uint64_t *results = map + 128; + + uint64_t get_result; + size = sizeof(get_result); + vkGetQueryPoolResults(device, query_pool, 0, 1, &size, &get_result, + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + + printf("oc query (copy): %20ld (%016lx)\n", results[2], results[2]); + printf("oc query (get): %20ld (%016lx)\n", get_result, get_result); + write_png("vk-map.png", width, height, 1024, map + 2048); write_png("vk-copy.png", width, height, 1024, map + 2048 + rt_requirements.size); @@ -785,6 +843,60 @@ int main(int argc, char *argv[]) vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer); vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); vkDestroyObject(device, VK_OBJECT_TYPE_PIPELINE, pipeline); + vkDestroyObject(device, VK_OBJECT_TYPE_QUERY_POOL, query_pool); +} + +int main(int argc, char *argv[]) +{ + VkInstance instance; + vkCreateInstance(&(VkInstanceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pAllocCb = &(VkAllocCallbacks) { + .pUserData = NULL, + .pfnAlloc = test_alloc, + .pfnFree = test_free + }, + .pAppInfo = &(VkApplicationInfo) { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pAppName = "vk", + .apiVersion = 1 + } + }, + &instance); + + uint32_t count = 1; + VkPhysicalDevice physicalDevices[1]; + vkEnumeratePhysicalDevices(instance, &count, physicalDevices); + printf("%d physical devices\n", count); + + VkPhysicalDeviceProperties properties; + size_t size = sizeof(properties); + vkGetPhysicalDeviceInfo(physicalDevices[0], + VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES, + &size, &properties); + printf("vendor id %04x, device name %s\n", + properties.vendorId, properties.deviceName); + + VkDevice device; + vkCreateDevice(physicalDevices[0], + &(VkDeviceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueRecordCount = 1, + .pRequestedQueues = &(VkDeviceQueueCreateInfo) { + .queueNodeIndex = 0, + .queueCount = 1 + } + }, + &device); + + VkQueue queue; + vkGetDeviceQueue(device, 0, 0, &queue); + + if (argc > 1 && strcmp(argv[1], "timestamp") == 0) { + test_timestamp(device, queue); + } else { + test_triangle(device, queue); + } vkDestroyDevice(device); -- cgit v1.2.3 From 997596e4c4106e98c5a63d0846dcae0cbd6bf662 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 20:44:37 -0700 Subject: vk/test: Add test that prints format features --- src/vulkan/vk.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 5468b195330..59c4bd35669 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -13,6 +13,11 @@ #include #include +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + static void fail_if(int cond, const char *format, ...) { @@ -298,6 +303,42 @@ test_timestamp(VkDevice device, VkQueue queue) vkFreeMemory(device, mem); } +static void +test_formats(VkDevice device, VkQueue queue) +{ + VkFormatProperties properties; + size_t size = sizeof(properties); + uint32_t f; + + static const char *features[] = { + "sampled_image", + "storage_image", + "storage_image_atomic", + "uniform_texel_buffer", + "storage_texel_buffer", + "storage_texel_buffer_atomic", + "vertex_buffer", + "color_attachment", + "color_attachment_blend", + "depth_stencil_attachment", + "conversion" + }; + + vkGetFormatInfo(device, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_INFO_TYPE_PROPERTIES, + &size, &properties); + + printf("linear tiling features:"); + for_each_bit(f, properties.linearTilingFeatures) + printf(" %s", features[f]); + + printf("\noptimal tiling features:"); + for_each_bit(f, properties.optimalTilingFeatures) + printf(" %s", features[f]); + printf("\n"); +} + static void test_triangle(VkDevice device, VkQueue queue) { @@ -894,6 +935,8 @@ int main(int argc, char *argv[]) if (argc > 1 && strcmp(argv[1], "timestamp") == 0) { test_timestamp(device, queue); + } else if (argc > 1 && strcmp(argv[1], "formats") == 0) { + test_formats(device, queue); } else { test_triangle(device, queue); } -- cgit v1.2.3 From d24f8245db3418d8d146f373e085780d2217335c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 19:55:34 -0700 Subject: vk/allocator: Add a concept of a slave block pool We probably need a better name but this will do for now. --- src/vulkan/allocator.c | 39 +++++++++++++++++++++++++++++++++++++++ src/vulkan/private.h | 3 +++ 2 files changed, 42 insertions(+) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index aeffe761407..11cdf398198 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -221,6 +221,39 @@ anv_block_pool_init(struct anv_block_pool *pool, anv_block_pool_grow(pool); } +/** Initializes a block pool that is a slave of another + * + * The newly initialized pool is not a block pool on its own but it rather + * takes a fixed number of blocks from the master pool and hands them out. + * In some sense, it's nothing more than a glorified free list. However, + * since it is a block pool, it can be used to back a pool or stream. + */ +void +anv_block_pool_init_slave(struct anv_block_pool *pool, + struct anv_block_pool *master_pool, + uint32_t num_blocks) +{ + pool->device = NULL; + + /* We don't have backing storage */ + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->size = 0; + pool->next_block = 0; + + pool->block_size = master_pool->block_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + /* Pull N blocks off the master pool and put them on this pool */ + for (uint32_t i = 0; i < num_blocks; i++) { + uint32_t block = anv_block_pool_alloc(master_pool); + pool->map = master_pool->map; + anv_block_pool_free(pool, block); + } +} + /* The memfd path lets us create a map for an fd and lets us grow and remap * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS * path we can take for valgrind debugging. */ @@ -258,6 +291,12 @@ anv_block_pool_grow(struct anv_block_pool *pool) int gem_handle; struct anv_mmap_cleanup *cleanup; + /* If we don't have a device then we can't resize the pool. This can be + * the case if the pool is a slave pool. + */ + if (pool->device == NULL) + return -1; + if (pool->size == 0) { size = 32 * pool->block_size; } else { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index b79dea3ddc4..fb28b61aff1 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -233,6 +233,9 @@ struct anv_state_stream { void anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size); +void anv_block_pool_init_slave(struct anv_block_pool *pool, + struct anv_block_pool *master_pool, + uint32_t num_blocks); void anv_block_pool_finish(struct anv_block_pool *pool); uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); -- cgit v1.2.3 From 923691c70dc647ea6a10766f58d4f6d56b547c43 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 19:56:32 -0700 Subject: vk: Use a separate block pool and state stream for binding tables The binding table pointers packet only allows for a 16-bit binding table address so all binding tables have to be in the first 64 KB of the surface state BO. We solve this by adding a slave block pool that pulls off the first 64 KB worth of blocks and reserves them for binding tables. --- src/vulkan/device.c | 14 +++++++++++++- src/vulkan/private.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 09dddb766c2..a26986b658a 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -321,6 +321,15 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->instruction_block_pool, device, 2048); anv_block_pool_init(&device->surface_state_block_pool, device, 2048); + + /* Binding table pointers are only 16 bits so we have to make sure that + * they get allocated at the beginning of the surface state BO. To + * handle this, we create a separate block pool that works out of the + * first 64 KB of the surface state BO. + */ + anv_block_pool_init_slave(&device->binding_table_block_pool, + &device->surface_state_block_pool, 32); + anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); @@ -2077,6 +2086,8 @@ VkResult anv_CreateCommandBuffer( goto fail_exec2_objects; } + anv_state_stream_init(&cmd_buffer->binding_table_state_stream, + &device->binding_table_block_pool); anv_state_stream_init(&cmd_buffer->surface_state_stream, &device->surface_state_block_pool); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, @@ -2469,7 +2480,8 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) uint32_t size; size = (bias + surface_count) * sizeof(uint32_t); - state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 32); + state = anv_state_stream_alloc(&cmd_buffer->binding_table_state_stream, + size, 32); memcpy(state.map, bindings->descriptors[s].surfaces, size); for (uint32_t i = 0; i < layers; i++) diff --git a/src/vulkan/private.h b/src/vulkan/private.h index fb28b61aff1..13be1b94791 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -293,6 +293,7 @@ struct anv_device { struct anv_block_pool instruction_block_pool; struct anv_block_pool surface_state_block_pool; + struct anv_block_pool binding_table_block_pool; struct anv_state_pool surface_state_pool; struct anv_clear_state clear_state; @@ -548,6 +549,7 @@ struct anv_cmd_buffer { uint32_t bo_count; struct anv_batch batch; + struct anv_state_stream binding_table_state_stream; struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; -- cgit v1.2.3 From 7c9f20942770b562037804b07bfe596885f1f28a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 20:22:51 -0700 Subject: Revert "vk/allocator: Don't use memfd when valgrind is detected" This reverts commit b6ab076d6b7dd91ce0dda8741ba974b2402fa726. It turns out setting USE_MEMFD to 0 is really bad because it means we can't resize the pool. Besides, valgrind SVN handles memfd so we really don't need this fallback for valgrind anymore. --- src/vulkan/allocator.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 11cdf398198..17d44423342 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -258,11 +258,7 @@ anv_block_pool_init_slave(struct anv_block_pool *pool, * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS * path we can take for valgrind debugging. */ -#ifdef HAVE_VALGRIND -# define USE_MEMFD 0 -#else -# define USE_MEMFD 1 -#endif +#define USE_MEMFD 1 void anv_block_pool_finish(struct anv_block_pool *pool) -- cgit v1.2.3 From 4668bbb16140816138366410ec28357ab5a43a39 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 20:42:03 -0700 Subject: vk/image: Factor view creation out into separate *_init functions The *_init functions work basically the same as the Vulkan entrypoints except that they act on an already-created view and take an optional command buffer option. If a command buffer is given, the surface state is allocated out of the command buffer's state stream. --- src/vulkan/image.c | 68 ++++++++++++++++++++++++++++++++++++---------------- src/vulkan/private.h | 10 ++++++++ 2 files changed, 57 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index d5e74dc6594..cf658e969da 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -111,10 +111,14 @@ VkResult anv_GetImageSubresourceInfo( static struct anv_state create_surface_state(struct anv_device *device, - struct anv_image *image, const struct anv_format *format) + struct anv_image *image, const struct anv_format *format, + struct anv_cmd_buffer *cmd_buffer) { - struct anv_state state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + struct anv_state state; + if (cmd_buffer) + state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + else + state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, @@ -159,6 +163,26 @@ create_surface_state(struct anv_device *device, return state; } +void +anv_image_view_init(struct anv_surface_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_image *image = (struct anv_image *) pCreateInfo->image; + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->bo = image->bo; + view->offset = image->offset; + view->surface_state = create_surface_state(device, image, format, + cmd_buffer); + view->format = pCreateInfo->format; + + /* TODO: Miplevels */ + view->extent = image->extent; +} + VkResult anv_CreateImageView( VkDevice _device, const VkImageViewCreateInfo* pCreateInfo, @@ -166,9 +190,6 @@ VkResult anv_CreateImageView( { struct anv_device *device = (struct anv_device *) _device; struct anv_surface_view *view; - struct anv_image *image = (struct anv_image *) pCreateInfo->image; - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); @@ -177,19 +198,31 @@ VkResult anv_CreateImageView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->bo = image->bo; - view->offset = image->offset; - view->surface_state = create_surface_state(device, image, format); - view->format = pCreateInfo->format; - - /* TODO: Miplevels */ - view->extent = image->extent; + anv_image_view_init(view, device, pCreateInfo, NULL); *pView = (VkImageView) view; return VK_SUCCESS; } +void +anv_color_attachment_view_init(struct anv_surface_view *view, + struct anv_device *device, + const VkColorAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_image *image = (struct anv_image *) pCreateInfo->image; + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->bo = image->bo; + view->offset = image->offset; + view->surface_state = create_surface_state(device, image, format, + cmd_buffer); + view->extent = image->extent; + view->format = pCreateInfo->format; +} + VkResult anv_CreateColorAttachmentView( VkDevice _device, const VkColorAttachmentViewCreateInfo* pCreateInfo, @@ -197,9 +230,6 @@ VkResult anv_CreateColorAttachmentView( { struct anv_device *device = (struct anv_device *) _device; struct anv_surface_view *view; - struct anv_image *image = (struct anv_image *) pCreateInfo->image; - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO); @@ -208,11 +238,7 @@ VkResult anv_CreateColorAttachmentView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->bo = image->bo; - view->offset = image->offset; - view->surface_state = create_surface_state(device, image, format); - view->extent = image->extent; - view->format = pCreateInfo->format; + anv_color_attachment_view_init(view, device, pCreateInfo, NULL); *pView = (VkColorAttachmentView) view; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 13be1b94791..7e935962742 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -671,6 +671,16 @@ struct anv_surface_view { VkFormat format; }; +void anv_image_view_init(struct anv_surface_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + +void anv_color_attachment_view_init(struct anv_surface_view *view, + struct anv_device *device, + const VkColorAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + struct anv_sampler { uint32_t state[4]; }; -- cgit v1.2.3 From 851495d344acf9a01053bcfb9a619b4d2f68b99a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 May 2015 20:56:58 -0700 Subject: vk/meta: Use the new *view_init functions and stack-allocated views This should save us a good deal of the leakage that meta currently has. --- src/vulkan/meta.c | 92 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 661584b0fdf..1aad8d91df9 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -672,8 +672,8 @@ void anv_CmdCopyBuffer( dest_image->bo = dest_buffer->bo; dest_image->offset = dest_buffer->offset + pRegions[r].destOffset; - VkImageView src_view; - anv_CreateImageView(vk_device, + struct anv_surface_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = (VkImage)src_image, @@ -693,10 +693,11 @@ void anv_CmdCopyBuffer( .arraySize = 1 }, .minLod = 0 - }, &src_view); + }, + cmd_buffer); - VkColorAttachmentView dest_view; - anv_CreateColorAttachmentView(vk_device, + struct anv_surface_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, .image = (VkImage)dest_image, @@ -704,13 +705,14 @@ void anv_CmdCopyBuffer( .mipLevel = 0, .baseArraySlice = 0, .arraySize = 1, - }, &dest_view); + }, + cmd_buffer); meta_emit_blit(cmd_buffer, - (struct anv_surface_view *)src_view, + &src_view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { pRegions[r].copySize, 0, 0 }, - (struct anv_surface_view *)dest_view, + &dest_view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { pRegions[r].copySize, 0, 0 }); } @@ -728,15 +730,14 @@ void anv_CmdCopyImage( const VkImageCopy* pRegions) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - VkDevice vk_device = (VkDevice) cmd_buffer->device; struct anv_image *src_image = (struct anv_image *)srcImage; struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageView src_view; - anv_CreateImageView(vk_device, + struct anv_surface_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, @@ -756,10 +757,11 @@ void anv_CmdCopyImage( .arraySize = 1 }, .minLod = 0 - }, &src_view); + }, + cmd_buffer); - VkColorAttachmentView dest_view; - anv_CreateColorAttachmentView(vk_device, + struct anv_surface_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -767,13 +769,14 @@ void anv_CmdCopyImage( .mipLevel = pRegions[r].destSubresource.mipLevel, .baseArraySlice = pRegions[r].destSubresource.arraySlice, .arraySize = 1, - }, &dest_view); + }, + cmd_buffer); meta_emit_blit(cmd_buffer, - (struct anv_surface_view *)src_view, + &src_view, pRegions[r].srcOffset, pRegions[r].extent, - (struct anv_surface_view *)dest_view, + &dest_view, pRegions[r].destOffset, pRegions[r].extent); } @@ -791,7 +794,6 @@ void anv_CmdBlitImage( const VkImageBlit* pRegions) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - VkDevice vk_device = (VkDevice) cmd_buffer->device; struct anv_image *src_image = (struct anv_image *)srcImage; struct anv_image *dest_image = (struct anv_image *)destImage; struct anv_saved_state saved_state; @@ -799,8 +801,8 @@ void anv_CmdBlitImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageView src_view; - anv_CreateImageView(vk_device, + struct anv_surface_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, @@ -820,10 +822,11 @@ void anv_CmdBlitImage( .arraySize = 1 }, .minLod = 0 - }, &src_view); + }, + cmd_buffer); - VkColorAttachmentView dest_view; - anv_CreateColorAttachmentView(vk_device, + struct anv_surface_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -831,13 +834,14 @@ void anv_CmdBlitImage( .mipLevel = pRegions[r].destSubresource.mipLevel, .baseArraySlice = pRegions[r].destSubresource.arraySlice, .arraySize = 1, - }, &dest_view); + }, + cmd_buffer); meta_emit_blit(cmd_buffer, - (struct anv_surface_view *)src_view, + &src_view, pRegions[r].srcOffset, pRegions[r].srcExtent, - (struct anv_surface_view *)dest_view, + &dest_view, pRegions[r].destOffset, pRegions[r].destExtent); } @@ -887,8 +891,8 @@ void anv_CmdCopyBufferToImage( src_image->bo = src_buffer->bo; src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; - VkImageView src_view; - anv_CreateImageView(vk_device, + struct anv_surface_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = (VkImage)src_image, @@ -908,10 +912,11 @@ void anv_CmdCopyBufferToImage( .arraySize = 1 }, .minLod = 0 - }, &src_view); + }, + cmd_buffer); - VkColorAttachmentView dest_view; - anv_CreateColorAttachmentView(vk_device, + struct anv_surface_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, .image = (VkImage)dest_image, @@ -919,13 +924,14 @@ void anv_CmdCopyBufferToImage( .mipLevel = pRegions[r].imageSubresource.mipLevel, .baseArraySlice = pRegions[r].imageSubresource.arraySlice, .arraySize = 1, - }, &dest_view); + }, + cmd_buffer); meta_emit_blit(cmd_buffer, - (struct anv_surface_view *)src_view, + &src_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, - (struct anv_surface_view *)dest_view, + &dest_view, pRegions[r].imageOffset, pRegions[r].imageExtent); } @@ -950,8 +956,8 @@ void anv_CmdCopyImageToBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkImageView src_view; - anv_CreateImageView(vk_device, + struct anv_surface_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, @@ -971,7 +977,8 @@ void anv_CmdCopyImageToBuffer( .arraySize = 1 }, .minLod = 0 - }, &src_view); + }, + cmd_buffer); struct anv_image *dest_image; anv_CreateImage(vk_device, @@ -998,8 +1005,8 @@ void anv_CmdCopyImageToBuffer( dest_image->bo = dest_buffer->bo; dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; - VkColorAttachmentView dest_view; - anv_CreateColorAttachmentView(vk_device, + struct anv_surface_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, .image = (VkImage)dest_image, @@ -1007,13 +1014,14 @@ void anv_CmdCopyImageToBuffer( .mipLevel = 0, .baseArraySlice = 0, .arraySize = 1, - }, &dest_view); + }, + cmd_buffer); meta_emit_blit(cmd_buffer, - (struct anv_surface_view *)src_view, + &src_view, pRegions[r].imageOffset, pRegions[r].imageExtent, - (struct anv_surface_view *)dest_view, + &dest_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); } -- cgit v1.2.3 From e4c11f50b56a45c436a94b6ed768867ae587b343 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 21:11:40 -0700 Subject: vk: Call finish for binding table state stream --- src/vulkan/device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a26986b658a..f6578d01341 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -926,6 +926,7 @@ anv_cmd_buffer_destructor(struct anv_device * device, anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); anv_batch_finish(&cmd_buffer->batch, device); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); -- cgit v1.2.3 From 15739131948f44a5c7226bb83e80138056affa02 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 10:44:59 -0700 Subject: vk/glsl_scraper: Don't open files until needed This prevents us from writing an empty file when the compile failed. --- src/vulkan/glsl_scraper.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index 4b99ae0a3e2..fa62daf5a2b 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -186,8 +186,8 @@ def open_file(name, mode): else: return open(name, mode) -infile = None -outfile = sys.stdout +infname = None +outfname = '-' glslang = 'glslangValidator' arg_idx = 1 @@ -196,19 +196,20 @@ while arg_idx < len(sys.argv): print_usage(0) elif sys.argv[arg_idx] == '-o': arg_idx += 1 - outfile = open_file(sys.argv[arg_idx], 'w') + outfname = sys.argv[arg_idx] elif sys.argv[arg_idx].startswith('--with-glslang='): glslang = sys.argv[arg_idx][len('--with-glslang='):] else: - infile = open_file(sys.argv[arg_idx], 'r') + infname = sys.argv[arg_idx] break arg_idx += 1 -if arg_idx < len(sys.argv) - 1 or not infile or not outfile: +if arg_idx < len(sys.argv) - 1 or not infname or not outfname: print_usage(1) -parser = Parser(infile) -parser.run() +with open_file(infname, 'r') as infile: + parser = Parser(infile) + parser.run() # glslangValidator has an absolutely *insane* interface. We pretty much # have to run in a temporary directory. Sad day. @@ -225,12 +226,13 @@ try: finally: shutil.rmtree(tmpdir) -outfile.write("""\ +with open_file(outfname, 'w') as outfile: + outfile.write("""\ /* =========================== DO NOT EDIT! =========================== * This file is autogenerated by glsl_scraper.py. */ #include """) -for shader in parser.shaders: - shader.dump_c_code(outfile) + for shader in parser.shaders: + shader.dump_c_code(outfile) -- cgit v1.2.3 From 4bcf58a19205067c18869eaf1912090a7a8e984a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 11:26:35 -0700 Subject: vk/glsl_scraper: Use the line number from the end of the macro We used to use the line number from the start of the macro but this doesn't seem to match the c preprocessor --- src/vulkan/glsl_scraper.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index fa62daf5a2b..55885b4bdbf 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -19,10 +19,9 @@ Options: import os, sys, re, cStringIO, tempfile, subprocess, struct, shutil class Shader: - def __init__(self, stage, line): + def __init__(self, stage): self.stream = cStringIO.StringIO() self.stage = stage - self.line = line if self.stage == 'VERTEX': self.ext = 'vert' @@ -42,6 +41,9 @@ class Shader: def add_text(self, s): self.stream.write(s) + def finish_text(self, line): + self.line = line + def glsl_source(self): return self.stream.getvalue() @@ -153,8 +155,6 @@ class Parser: self.current_shader.add_text(t) def handle_macro(self): - line_number = self.line_number - t = self.token_iter.next() assert t == '(' t = self.token_iter.next() @@ -166,9 +166,12 @@ class Parser: t = self.token_iter.next() assert t == ',' - self.current_shader = Shader(stage, line_number) + self.current_shader = Shader(stage) self.handle_shader_src() + self.current_shader.finish_text(self.line_number) + self.shaders.append(self.current_shader) + self.current_shader = None def run(self): for t in self.token_iter: -- cgit v1.2.3 From e37a89136ff0534ec3694e1bf1246a17d70194be Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 11:27:20 -0700 Subject: vk/glsl_scraper: Add a --glsl-only option --- src/vulkan/glsl_scraper.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index 55885b4bdbf..c35337bace5 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -81,7 +81,7 @@ class Shader: os.remove(glsl_fname) os.remove(spirv_fname) - def dump_c_code(self, f): + def dump_c_code(self, f, glsl_only = False): f.write('\n\n') var_prefix = '_glsl_helpers_shader{0}'.format(self.line) @@ -94,6 +94,9 @@ class Shader: f.write('\n"{0}\\n"'.format(line)) f.write(';\n\n') + if glsl_only: + return + # Now dump the SPIR-V source f.write('static const uint32_t {0}_spir_v_src[] = {{'.format(var_prefix)) line_start = 0 @@ -192,6 +195,7 @@ def open_file(name, mode): infname = None outfname = '-' glslang = 'glslangValidator' +glsl_only = False arg_idx = 1 while arg_idx < len(sys.argv): @@ -202,6 +206,8 @@ while arg_idx < len(sys.argv): outfname = sys.argv[arg_idx] elif sys.argv[arg_idx].startswith('--with-glslang='): glslang = sys.argv[arg_idx][len('--with-glslang='):] + elif sys.argv[arg_idx] == '--glsl-only': + glsl_only = True; else: infname = sys.argv[arg_idx] break @@ -214,20 +220,21 @@ with open_file(infname, 'r') as infile: parser = Parser(infile) parser.run() -# glslangValidator has an absolutely *insane* interface. We pretty much -# have to run in a temporary directory. Sad day. -current_dir = os.getcwd() -tmpdir = tempfile.mkdtemp('glsl_scraper') +if not glsl_only: + # glslangValidator has an absolutely *insane* interface. We pretty much + # have to run in a temporary directory. Sad day. + current_dir = os.getcwd() + tmpdir = tempfile.mkdtemp('glsl_scraper') -try: - os.chdir(tmpdir) + try: + os.chdir(tmpdir) - for shader in parser.shaders: - shader.compile() + for shader in parser.shaders: + shader.compile() - os.chdir(current_dir) -finally: - shutil.rmtree(tmpdir) + os.chdir(current_dir) + finally: + shutil.rmtree(tmpdir) with open_file(outfname, 'w') as outfile: outfile.write("""\ @@ -238,4 +245,4 @@ with open_file(outfname, 'w') as outfile: #include """) for shader in parser.shaders: - shader.dump_c_code(outfile) + shader.dump_c_code(outfile, glsl_only) -- cgit v1.2.3 From edff0761881fd833297a6295cb7c9050e1a26eba Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 12:42:21 -0700 Subject: vk: Use binding instead of index in uniform layout qualifiers This more closely matches what the Vulkan docs say to do. --- src/glsl/ast_to_hir.cpp | 12 ++++++------ src/glsl/ast_type.cpp | 2 +- src/glsl/glsl_parser.yy | 4 +--- src/vulkan/meta.c | 2 +- src/vulkan/vk.c | 8 ++++---- 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 8713cd85f0a..cd6a068e97d 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2647,14 +2647,14 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, } if (qual->flags.q.vk_set) { - if (!qual->flags.q.explicit_index) + if (!qual->flags.q.explicit_binding) _mesa_glsl_error(loc, state, - "Vulkan descriptor set layout requires both group and index " - "qualifiers"); + "Vulkan descriptor set layout requires both set " + "and binding qualifiers"); var->data.vk_set = true; var->data.set = qual->set; - var->data.index = qual->index; + var->data.binding = qual->binding; } else if (qual->flags.q.explicit_location) { validate_explicit_location(qual, var, state, loc); } else if (qual->flags.q.explicit_index) { @@ -5794,7 +5794,7 @@ ast_interface_block::hir(exec_list *instructions, var->data.vk_set = this->layout.flags.q.vk_set; var->data.set = this->layout.set; - var->data.index = this->layout.index; + var->data.binding = this->layout.binding; state->symbols->add_variable(var); instructions->push_tail(var); @@ -5870,7 +5870,7 @@ ast_interface_block::hir(exec_list *instructions, var->data.vk_set = this->layout.flags.q.vk_set; var->data.set = this->layout.set; - var->data.index = this->layout.index; + var->data.binding = this->layout.binding; state->symbols->add_variable(var); instructions->push_tail(var); diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index a988fd58792..5eb2913d6b7 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -254,7 +254,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, if (q.flags.q.vk_set) { this->set = q.set; - this->index = q.index; + this->binding = q.binding; } return true; diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 41106060e5e..05fa4ea9ac5 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1424,9 +1424,7 @@ layout_qualifier_id: } } - if ((state->ARB_shading_language_420pack_enable || - state->has_atomic_counters()) && - match_layout_qualifier("binding", $1, state) == 0) { + if (match_layout_qualifier("binding", $1, state) == 0) { $$.flags.q.explicit_binding = 1; $$.binding = $3; } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 1aad8d91df9..7be8a511655 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -307,7 +307,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, out vec4 f_color; in vec4 v_tex_coord; - layout(set = 0, index = 0) uniform sampler2D u_tex; + layout(set = 0, binding = 0) uniform sampler2D u_tex; void main() { f_color = texture2D(u_tex, v_tex_coord.xy); diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 59c4bd35669..bc78b558d56 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -103,13 +103,13 @@ create_pipeline(VkDevice device, VkPipeline *pipeline, static const char vs_source[] = GLSL( layout(location = 0) in vec4 a_position; layout(location = 1) in vec4 a_color; - layout(set = 0, index = 0) uniform block1 { + layout(set = 0, binding = 0) uniform block1 { vec4 color; } u1; - layout(set = 0, index = 1) uniform block2 { + layout(set = 0, binding = 1) uniform block2 { vec4 color; } u2; - layout(set = 1, index = 0) uniform block3 { + layout(set = 1, binding = 0) uniform block3 { vec4 color; } u3; out vec4 v_color; @@ -122,7 +122,7 @@ create_pipeline(VkDevice device, VkPipeline *pipeline, static const char fs_source[] = GLSL( out vec4 f_color; in vec4 v_color; - layout(set = 0, index = 0) uniform sampler2D tex; + layout(set = 0, binding = 0) uniform sampler2D tex; void main() { f_color = v_color + texture2D(tex, vec2(0.1, 0.1)); -- cgit v1.2.3 From d7044a19b1e607defc5c85389d2ed26f5c226e8c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 12:43:13 -0700 Subject: vk/meta: Use texture() instead of texture2D() --- src/vulkan/meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 7be8a511655..979655cb482 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -310,7 +310,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) layout(set = 0, binding = 0) uniform sampler2D u_tex; void main() { - f_color = texture2D(u_tex, v_tex_coord.xy); + f_color = texture(u_tex, v_tex_coord.xy); } ); -- cgit v1.2.3 From 13719e92254d055b560bdb1ec8c5e36ae856db99 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 15:11:21 -0700 Subject: vk/meta: Fix buffer copy extents --- src/vulkan/meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 979655cb482..f1524f9db28 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -711,10 +711,10 @@ void anv_CmdCopyBuffer( meta_emit_blit(cmd_buffer, &src_view, (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { pRegions[r].copySize, 0, 0 }, + (VkExtent3D) { pRegions[r].copySize, 1, 1 }, &dest_view, (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { pRegions[r].copySize, 0, 0 }); + (VkExtent3D) { pRegions[r].copySize, 1, 1 }); } meta_finish_blit(cmd_buffer, &saved_state); -- cgit v1.2.3 From 9557b85e3dc6b14a44ee724c3c618fbec8779ccd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 15:38:11 -0700 Subject: vk/meta: Use the biggest format possible for buffer copies This should substantially improve throughput of buffer copies. --- src/vulkan/meta.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index f1524f9db28..b1612c2dc2d 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -627,6 +627,23 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_restore(cmd_buffer, saved_state); } +static VkFormat +vk_format_for_cpp(int cpp) +{ + switch (cpp) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format cpp"); + } +} + void anv_CmdCopyBuffer( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, @@ -643,12 +660,37 @@ void anv_CmdCopyBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + size_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + size_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int cpp = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(src_offset % cpp == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(dest_offset % cpp == 0); + + fs = ffs(pRegions[r].copySize) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(pRegions[r].copySize % cpp == 0); + + VkFormat copy_format = vk_format_for_cpp(cpp); + VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R8_UNORM, + .format = copy_format, .extent = { - .width = pRegions[r].copySize, + .width = pRegions[r].copySize / cpp, .height = 1, .depth = 1, }, @@ -668,9 +710,9 @@ void anv_CmdCopyBuffer( * creating a dummy memory object etc. so there's really no point. */ src_image->bo = src_buffer->bo; - src_image->offset = src_buffer->offset + pRegions[r].srcOffset; + src_image->offset = src_offset; dest_image->bo = dest_buffer->bo; - dest_image->offset = dest_buffer->offset + pRegions[r].destOffset; + dest_image->offset = dest_offset; struct anv_surface_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, @@ -678,7 +720,7 @@ void anv_CmdCopyBuffer( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = (VkImage)src_image, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_R8_UNORM, + .format = copy_format, .channels = { VK_CHANNEL_SWIZZLE_R, VK_CHANNEL_SWIZZLE_G, @@ -701,7 +743,7 @@ void anv_CmdCopyBuffer( &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, .image = (VkImage)dest_image, - .format = VK_FORMAT_R8_UNORM, + .format = copy_format, .mipLevel = 0, .baseArraySlice = 0, .arraySize = 1, @@ -711,10 +753,10 @@ void anv_CmdCopyBuffer( meta_emit_blit(cmd_buffer, &src_view, (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { pRegions[r].copySize, 1, 1 }, + (VkExtent3D) { pRegions[r].copySize / cpp, 1, 1 }, &dest_view, (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { pRegions[r].copySize, 1, 1 }); + (VkExtent3D) { pRegions[r].copySize / cpp, 1, 1 }); } meta_finish_blit(cmd_buffer, &saved_state); -- cgit v1.2.3 From bea66ac5ad8e12e452123dd481c1a894b93e01cd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 16:17:59 -0700 Subject: vk/meta: Add support for copying arbitrary size buffers --- src/vulkan/meta.c | 187 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index b1612c2dc2d..b088e777dd2 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -644,6 +644,88 @@ vk_format_for_cpp(int cpp) } } +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = (VkDevice)cmd_buffer->device; + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }; + + struct anv_image *src_image, *dest_image; + anv_CreateImage(vk_device, &image_info, (VkImage *)&src_image); + anv_CreateImage(vk_device, &image_info, (VkImage *)&dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + src_image->bo = src; + src_image->offset = src_offset; + dest_image->bo = dest; + dest_image->offset = dest_offset; + + struct anv_surface_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = (VkImage)src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }, + cmd_buffer); + + struct anv_surface_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = (VkImage)dest_image, + .format = copy_format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + &dest_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }); +} + void anv_CmdCopyBuffer( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, @@ -652,7 +734,6 @@ void anv_CmdCopyBuffer( const VkBufferCopy* pRegions) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - VkDevice vk_device = (VkDevice) cmd_buffer->device; struct anv_buffer *src_buffer = (struct anv_buffer *)srcBuffer; struct anv_buffer *dest_buffer = (struct anv_buffer *)destBuffer; struct anv_saved_state saved_state; @@ -660,8 +741,9 @@ void anv_CmdCopyBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - size_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - size_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; + uint64_t copy_size = pRegions[r].copySize; /* First, we compute the biggest format that can be used with the * given offsets and size. @@ -685,78 +767,37 @@ void anv_CmdCopyBuffer( VkFormat copy_format = vk_format_for_cpp(cpp); - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = pRegions[r].copySize / cpp, - .height = 1, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }; - - struct anv_image *src_image, *dest_image; - vkCreateImage(vk_device, &image_info, (VkImage *)&src_image); - vkCreateImage(vk_device, &image_info, (VkImage *)&dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - src_image->bo = src_buffer->bo; - src_image->offset = src_offset; - dest_image->bo = dest_buffer->bo; - dest_image->offset = dest_offset; - - struct anv_surface_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = (VkImage)src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = VK_IMAGE_ASPECT_COLOR, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArraySlice = 0, - .arraySize = 1 - }, - .minLod = 0 - }, - cmd_buffer); + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * cpp; + while (copy_size > max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } - struct anv_surface_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, - .format = copy_format, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - }, - cmd_buffer); + uint64_t height = copy_size / (max_surface_dim * cpp); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * cpp; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } - meta_emit_blit(cmd_buffer, - &src_view, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { pRegions[r].copySize / cpp, 1, 1 }, - &dest_view, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { pRegions[r].copySize / cpp, 1, 1 }); + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / cpp, 1, copy_format); + } } meta_finish_blit(cmd_buffer, &saved_state); -- cgit v1.2.3 From 47c1cf5ce68d0f76e749883627983f330c2c8d76 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 15:11:08 -0700 Subject: vk/test: Add a test for testing buffer copies --- src/vulkan/vk.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index bc78b558d56..d1c51bbe980 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -303,6 +303,104 @@ test_timestamp(VkDevice device, VkQueue queue) vkFreeMemory(device, mem); } +static void +test_buffer_copy(VkDevice device, VkQueue queue) +{ + /* We'll test copying 1000k buffers */ + const int buffer_size = 1024000; + + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = buffer_size, + .usage = VK_BUFFER_USAGE_GENERAL, + .flags = 0 + }; + + VkBuffer buffer1, buffer2; + vkCreateBuffer(device, &buffer_info, &buffer1); + vkCreateBuffer(device, &buffer_info, &buffer2); + + VkMemoryRequirements buffer_requirements; + size_t size = sizeof(buffer_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer1, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &buffer_requirements); + + const int memory_size = buffer_requirements.size * 2; + + VkDeviceMemory mem; + vkAllocMemory(device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = memory_size, + .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, + .memPriority = VK_MEMORY_PRIORITY_NORMAL + }, &mem); + + void *map; + vkMapMemory(device, mem, 0, buffer_requirements.size * 2, 0, &map); + + /* Fill the first buffer_size of the memory with a pattern */ + uint32_t *map32 = map; + for (unsigned i = 0; i < buffer_size / sizeof(*map32); i++) + map32[i] = i; + + /* Fill the rest with 0 */ + memset((char *)map + buffer_size, 0, memory_size - buffer_size); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + buffer1, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 0); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + buffer2, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, buffer_requirements.size); + + VkCmdBuffer cmdBuffer; + vkCreateCommandBuffer(device, + &(VkCmdBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, + .queueNodeIndex = 0, + .flags = 0 + }, &cmdBuffer); + + vkBeginCommandBuffer(cmdBuffer, + &(VkCmdBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + .flags = 0 + }); + + vkCmdCopyBuffer(cmdBuffer, buffer1, buffer2, 1, + &(VkBufferCopy) { + .srcOffset = 0, + .destOffset = 0, + .copySize = buffer_size, + }); + + vkEndCommandBuffer(cmdBuffer); + + vkQueueSubmit(queue, 1, &cmdBuffer, 0); + + vkQueueWaitIdle(queue); + + vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer1); + vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer2); + vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); + + uint32_t *map32_2 = map + buffer_requirements.size; + for (unsigned i = 0; i < buffer_size / sizeof(*map32); i++) { + if (map32[i] != map32_2[i]) { + printf("buffer mismatch at dword %d: found 0x%x, expected 0x%x\n", + i, map32_2[i], map32[i]); + } + } + + vkUnmapMemory(device, mem); + vkFreeMemory(device, mem); +} + static void test_formats(VkDevice device, VkQueue queue) { @@ -937,6 +1035,8 @@ int main(int argc, char *argv[]) test_timestamp(device, queue); } else if (argc > 1 && strcmp(argv[1], "formats") == 0) { test_formats(device, queue); + } else if (argc > 1 && strcmp(argv[1], "buffer-copy") == 0) { + test_buffer_copy(device, queue); } else { test_triangle(device, queue); } -- cgit v1.2.3 From 14929046ba3a2d328d274522e34bcde6a19a47e5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 May 2015 17:05:38 -0700 Subject: vk/compiler: Add shader language detection This commit adds support for the LunarG GLSL back-door as well as detecting regular GLSL and SPIR-V. The SPIR-V path doesn't exist yet, so that will cause an assert-fail. --- src/vulkan/compiler.cpp | 86 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 7743fba2c9d..1907bd52cc8 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -38,6 +38,8 @@ #include #include +#define SPIR_V_MAGIC_NUMBER 0x07230203 + static void fail_if(int cond, const char *format, ...) { @@ -796,8 +798,35 @@ static const struct { { GL_COMPUTE_SHADER, "compute" }, }; +struct spirv_header{ + uint32_t magic; + uint32_t version; + uint32_t gen_magic; +}; + +static const char * +src_as_glsl(const char *data) +{ + const struct spirv_header *as_spirv = (const struct spirv_header *)data; + + /* Check alignment */ + if ((intptr_t)data & 0x3) { + return data; + } + + if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) { + /* LunarG back-door */ + if (as_spirv->version == 0) + return data + 12; + else + return NULL; + } else { + return data; + } +} + static void -anv_compile_shader(struct anv_compiler *compiler, +anv_compile_shader_glsl(struct anv_compiler *compiler, struct gl_shader_program *program, struct anv_pipeline *pipeline, uint32_t stage) { @@ -807,7 +836,8 @@ anv_compile_shader(struct anv_compiler *compiler, shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); - shader->Source = strdup(pipeline->shaders[stage]->data); + + shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->data)); _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); fail_on_compile_error(shader->CompileStatus, shader->InfoLog); @@ -815,13 +845,20 @@ anv_compile_shader(struct anv_compiler *compiler, program->NumShaders++; } +static void +anv_compile_shader_spirv(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + unreachable("SPIR-V is not supported yet!"); +} + int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) { struct gl_shader_program *program; int name = 0; struct brw_context *brw = compiler->brw; - struct anv_device *device = pipeline->device; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. @@ -837,18 +874,41 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) fail_if(program == NULL || program->Shaders == NULL, "failed to create program\n"); - if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) - anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_VERTEX); - anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_FRAGMENT); - if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) - anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_GEOMETRY); + bool all_spirv = true; + for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { + if (pipeline->shaders[i] == NULL) + continue; + + /* You need at least this much for "void main() { }" anyway */ + assert(pipeline->shaders[i]->size >= 12); + + if (src_as_glsl(pipeline->shaders[i]->data)) { + all_spirv = false; + break; + } + + assert(pipeline->shaders[i]->size % 4 == 0); + } + + assert(pipeline->shaders[VK_SHADER_STAGE_FRAGMENT] != NULL); - _mesa_glsl_link_shader(&brw->ctx, program); - fail_on_compile_error(program->LinkStatus, - program->InfoLog); + if (all_spirv) { + for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_spirv(compiler, program, pipeline, i); + } - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); + /* TODO: nir_link_shader? */ + } else { + for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_glsl(compiler, program, pipeline, i); + } + + _mesa_glsl_link_shader(&brw->ctx, program); + fail_on_compile_error(program->LinkStatus, + program->InfoLog); + } bool success; struct brw_wm_prog_key wm_key; -- cgit v1.2.3 From a61f3079967e774b40d27e378c463fb52a89558e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 May 2015 19:51:10 -0700 Subject: vk: Fix result of vkCreateInstance When fill_physical_device() fails, don't return VK_SUCCESS. --- src/vulkan/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index f6578d01341..3c245cd036b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -142,9 +142,11 @@ VkResult anv_CreateInstance( instance->physicalDeviceCount = 0; result = fill_physical_device(&instance->physicalDevice, instance, "/dev/dri/renderD128"); - if (result == VK_SUCCESS) - instance->physicalDeviceCount++; + if (result != VK_SUCCESS) + return result; + + instance->physicalDeviceCount++; *pInstance = (VkInstance) instance; return VK_SUCCESS; -- cgit v1.2.3 From 01504057f5a456bd9d2fc8ed969eda05421d3ebc Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 18 May 2015 22:08:55 -0700 Subject: vk: Use surface_format_info from dri driver for vkGetFormatInfo --- src/vulkan/formats.c | 411 +++++++++++++++++---------------------------------- src/vulkan/private.h | 6 +- src/vulkan/vk.c | 10 +- 3 files changed, 143 insertions(+), 284 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 8f8dd266465..dfcbe9ab906 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -23,7 +23,8 @@ #include "private.h" -#define UNSUPPORTED ~0 +#define UNSUPPORTED ~0U + static const struct anv_format anv_formats[] = { [VK_FORMAT_UNDEFINED] = { .format = RAW }, [VK_FORMAT_R4G4_UNORM] = { .format = UNSUPPORTED }, @@ -34,252 +35,86 @@ static const struct anv_format anv_formats[] = { [VK_FORMAT_R5G6B5_USCALED] = { .format = UNSUPPORTED }, [VK_FORMAT_R5G5B5A1_UNORM] = { .format = UNSUPPORTED }, [VK_FORMAT_R5G5B5A1_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_R8_UNORM] = { - .format = R8_UNORM, .cpp = 1, .channels = 1, - .linear_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT, - .tiled_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT - }, - [VK_FORMAT_R8_SNORM] = { - .format = R8_SNORM, .cpp = 1, .channels = 1, - }, - [VK_FORMAT_R8_USCALED] = { - .format = R8_USCALED, .cpp = 1, .channels = 1 - }, - [VK_FORMAT_R8_SSCALED] = { - .format = R8_SSCALED, .cpp = 1, .channels = 1 - }, - [VK_FORMAT_R8_UINT] = { - .format = R8_UINT, .cpp = 1, .channels = 1 - }, - [VK_FORMAT_R8_SINT] = { - .format = R8_SINT, .cpp = 1, .channels = 1 - }, + [VK_FORMAT_R8_UNORM] = { .format = R8_UNORM, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SNORM] = { .format = R8_SNORM, .cpp = 1, .channels = 1, }, + [VK_FORMAT_R8_USCALED] = { .format = R8_USCALED, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SSCALED] = { .format = R8_SSCALED, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_UINT] = { .format = R8_UINT, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SINT] = { .format = R8_SINT, .cpp = 1, .channels = 1 }, [VK_FORMAT_R8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_R8G8_UNORM] = { - .format = R8G8_UNORM, .cpp = 2, .channels = 2 - }, - [VK_FORMAT_R8G8_SNORM] = { - .format = R8G8_SNORM, .cpp = 2, .channels = 2 - }, - [VK_FORMAT_R8G8_USCALED] = { - .format = R8G8_USCALED, .cpp = 2, .channels = 2 - }, - [VK_FORMAT_R8G8_SSCALED] = { - .format = R8G8_SSCALED, .cpp = 2, .channels = 2 - }, - [VK_FORMAT_R8G8_UINT] = { - .format = R8G8_UINT, .cpp = 2, .channels = 2 - }, - [VK_FORMAT_R8G8_SINT] = { - .format = R8G8_SINT, .cpp = 2, .channels = 2 - }, + [VK_FORMAT_R8G8_UNORM] = { .format = R8G8_UNORM, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SNORM] = { .format = R8G8_SNORM, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_USCALED] = { .format = R8G8_USCALED, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SSCALED] = { .format = R8G8_SSCALED, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_UINT] = { .format = R8G8_UINT, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SINT] = { .format = R8G8_SINT, .cpp = 2, .channels = 2 }, [VK_FORMAT_R8G8_SRGB] = { .format = UNSUPPORTED }, /* L8A8_UNORM_SRGB */ - [VK_FORMAT_R8G8B8_UNORM] = { - .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 - }, + [VK_FORMAT_R8G8B8_UNORM] = { .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 }, [VK_FORMAT_R8G8B8_SNORM] = { .format = R8G8B8_SNORM, .cpp = 4 }, - [VK_FORMAT_R8G8B8_USCALED] = { - .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 - }, - [VK_FORMAT_R8G8B8_SSCALED] = { - .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 - }, - [VK_FORMAT_R8G8B8_UINT] = { - .format = R8G8B8_UINT, .cpp = 3, .channels = 3 - }, - [VK_FORMAT_R8G8B8_SINT] = { - .format = R8G8B8_SINT, .cpp = 3, .channels = 3 - }, + [VK_FORMAT_R8G8B8_USCALED] = { .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_SSCALED] = { .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_UINT] = { .format = R8G8B8_UINT, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_SINT] = { .format = R8G8B8_SINT, .cpp = 3, .channels = 3 }, [VK_FORMAT_R8G8B8_SRGB] = { .format = UNSUPPORTED }, /* B8G8R8A8_UNORM_SRGB */ - [VK_FORMAT_R8G8B8A8_UNORM] = { - .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R8G8B8A8_SNORM] = { - .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R8G8B8A8_USCALED] = { - .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R8G8B8A8_SSCALED] = { - .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R8G8B8A8_UINT] = { - .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R8G8B8A8_SINT] = { - .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R8G8B8A8_SRGB] = { - .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R10G10B10A2_UNORM] = { - .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R10G10B10A2_SNORM] = { - .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R10G10B10A2_USCALED] = { - .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R10G10B10A2_SSCALED] = { - .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R10G10B10A2_UINT] = { - .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R10G10B10A2_SINT] = { - .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_R16_UNORM] = { - .format = R16_UNORM, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16_SNORM] = { - .format = R16_SNORM, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16_USCALED] = { - .format = R16_USCALED, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16_SSCALED] = { - .format = R16_SSCALED, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16_UINT] = { - .format = R16_UINT, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16_SINT] = { - .format = R16_SINT, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16_SFLOAT] = { - .format = R16_FLOAT, .cpp = 2, .channels = 1 - }, - [VK_FORMAT_R16G16_UNORM] = { - .format = R16G16_UNORM, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16_SNORM] = { - .format = R16G16_SNORM, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16_USCALED] = { - .format = R16G16_USCALED, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16_SSCALED] = { - .format = R16G16_SSCALED, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16_UINT] = { - .format = R16G16_UINT, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16_SINT] = { - .format = R16G16_SINT, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16_SFLOAT] = { - .format = R16G16_FLOAT, .cpp = 4, .channels = 2 - }, - [VK_FORMAT_R16G16B16_UNORM] = { - .format = R16G16B16_UNORM, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16_SNORM] = { - .format = R16G16B16_SNORM, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16_USCALED] = { - .format = R16G16B16_USCALED, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16_SSCALED] = { - .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16_UINT] = { - .format = R16G16B16_UINT, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16_SINT] = { - .format = R16G16B16_SINT, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16_SFLOAT] = { - .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 - }, - [VK_FORMAT_R16G16B16A16_UNORM] = { - .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R16G16B16A16_SNORM] = { - .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R16G16B16A16_USCALED] = { - .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R16G16B16A16_SSCALED] = { - .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R16G16B16A16_UINT] = { - .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R16G16B16A16_SINT] = { - .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R16G16B16A16_SFLOAT] = { - .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 - }, - [VK_FORMAT_R32_UINT] = { - .format = R32_UINT, .cpp = 4, .channels = 1, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32_SINT] = { - .format = R32_SINT, .cpp = 4, .channels = 1, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32_SFLOAT] = { - .format = R32_FLOAT, .cpp = 4, .channels = 1, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32_UINT] = { - .format = R32G32_UINT, .cpp = 8, .channels = 2, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32_SINT] = { - .format = R32G32_SINT, .cpp = 8, .channels = 2, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32_SFLOAT] = { - .format = R32G32_FLOAT, .cpp = 8, .channels = 2, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32B32_UINT] = { - .format = R32G32B32_UINT, .cpp = 12, .channels = 3, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32B32_SINT] = { - .format = R32G32B32_SINT, .cpp = 12, .channels = 3, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32B32_SFLOAT] = { - .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32B32A32_UINT] = { - .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32B32A32_SINT] = { - .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R32G32B32A32_SFLOAT] = { - .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4, - .linear_flags = VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT - }, - [VK_FORMAT_R64_SFLOAT] = { - .format = R64_FLOAT, .cpp = 8, .channels = 1 - }, - [VK_FORMAT_R64G64_SFLOAT] = { - .format = R64G64_FLOAT, .cpp = 16, .channels = 2 - }, - [VK_FORMAT_R64G64B64_SFLOAT] = { - .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 - }, - [VK_FORMAT_R64G64B64A64_SFLOAT] = { - .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 - }, - [VK_FORMAT_R11G11B10_UFLOAT] = { - .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3 - }, - [VK_FORMAT_R9G9B9E5_UFLOAT] = { - .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3 - }, + [VK_FORMAT_R8G8B8A8_UNORM] = { .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SNORM] = { .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_USCALED] = { .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SSCALED] = { .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_UINT] = { .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SINT] = { .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SRGB] = { .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R10G10B10A2_UNORM] = { .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R10G10B10A2_SNORM] = { .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R10G10B10A2_USCALED] = { .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R10G10B10A2_SSCALED] = { .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R10G10B10A2_UINT] = { .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R10G10B10A2_SINT] = { .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R16_UNORM] = { .format = R16_UNORM, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16_SNORM] = { .format = R16_SNORM, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16_USCALED] = { .format = R16_USCALED, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16_SSCALED] = { .format = R16_SSCALED, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16_UINT] = { .format = R16_UINT, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16_SINT] = { .format = R16_SINT, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16_SFLOAT] = { .format = R16_FLOAT, .cpp = 2, .channels = 1 }, + [VK_FORMAT_R16G16_UNORM] = { .format = R16G16_UNORM, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16_SNORM] = { .format = R16G16_SNORM, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16_USCALED] = { .format = R16G16_USCALED, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16_SSCALED] = { .format = R16G16_SSCALED, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16_UINT] = { .format = R16G16_UINT, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16_SINT] = { .format = R16G16_SINT, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16_SFLOAT] = { .format = R16G16_FLOAT, .cpp = 4, .channels = 2 }, + [VK_FORMAT_R16G16B16_UNORM] = { .format = R16G16B16_UNORM, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16_SNORM] = { .format = R16G16B16_SNORM, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16_USCALED] = { .format = R16G16B16_USCALED, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16_SSCALED] = { .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16_UINT] = { .format = R16G16B16_UINT, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16_SINT] = { .format = R16G16B16_SINT, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16_SFLOAT] = { .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 }, + [VK_FORMAT_R16G16B16A16_UNORM] = { .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R16G16B16A16_SNORM] = { .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R16G16B16A16_USCALED] = { .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R16G16B16A16_SSCALED] = { .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R16G16B16A16_UINT] = { .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R16G16B16A16_SINT] = { .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R16G16B16A16_SFLOAT] = { .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 }, + [VK_FORMAT_R32_UINT] = { .format = R32_UINT, .cpp = 4, .channels = 1, }, + [VK_FORMAT_R32_SINT] = { .format = R32_SINT, .cpp = 4, .channels = 1, }, + [VK_FORMAT_R32_SFLOAT] = { .format = R32_FLOAT, .cpp = 4, .channels = 1, }, + [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT, .cpp = 8, .channels = 2, }, + [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT, .cpp = 8, .channels = 2, }, + [VK_FORMAT_R32G32_SFLOAT] = { .format = R32G32_FLOAT, .cpp = 8, .channels = 2, }, + [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT, .cpp = 12, .channels = 3, }, + [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT, .cpp = 12, .channels = 3, }, + [VK_FORMAT_R32G32B32_SFLOAT] = { .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3, }, + [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4, }, + [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4, }, + [VK_FORMAT_R32G32B32A32_SFLOAT] = { .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4, }, + [VK_FORMAT_R64_SFLOAT] = { .format = R64_FLOAT, .cpp = 8, .channels = 1 }, + [VK_FORMAT_R64G64_SFLOAT] = { .format = R64G64_FLOAT, .cpp = 16, .channels = 2 }, + [VK_FORMAT_R64G64B64_SFLOAT] = { .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 }, + [VK_FORMAT_R64G64B64A64_SFLOAT] = { .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 }, + [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3 }, + [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3 }, // [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM }, // [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM }, // [VK_FORMAT_D32_SFLOAT] = { .format = D32_SFLOAT }, @@ -341,15 +176,9 @@ static const struct anv_format anv_formats[] = { // [VK_FORMAT_ASTC_12x10_SRGB] = { .format = ASTC_12x10 }, // [VK_FORMAT_ASTC_12x12_UNORM] = { .format = ASTC_12x12 }, // [VK_FORMAT_ASTC_12x12_SRGB] = { .format = ASTC_12x12 }, - [VK_FORMAT_B4G4R4A4_UNORM] = { - .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4 - }, - [VK_FORMAT_B5G5R5A1_UNORM] = { - .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4 - }, - [VK_FORMAT_B5G6R5_UNORM] = { - .format = B5G6R5_UNORM, .cpp = 2, .channels = 3 - }, + [VK_FORMAT_B4G4R4A4_UNORM] = { .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4 }, + [VK_FORMAT_B5G5R5A1_UNORM] = { .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4 }, + [VK_FORMAT_B5G6R5_UNORM] = { .format = B5G6R5_UNORM, .cpp = 2, .channels = 3 }, // [VK_FORMAT_B5G6R5_USCALED] = { .format = B5G6R5_USCALED }, // [VK_FORMAT_B8G8R8_UNORM] = { .format = B8G8R8_UNORM }, // [VK_FORMAT_B8G8R8_SNORM] = { .format = B8G8R8_SNORM }, @@ -358,33 +187,19 @@ static const struct anv_format anv_formats[] = { // [VK_FORMAT_B8G8R8_UINT] = { .format = B8G8R8_UINT }, // [VK_FORMAT_B8G8R8_SINT] = { .format = B8G8R8_SINT }, // [VK_FORMAT_B8G8R8_SRGB] = { .format = B8G8R8_SRGB }, - [VK_FORMAT_B8G8R8A8_UNORM] = { - .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 - }, + [VK_FORMAT_B8G8R8A8_UNORM] = { .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 }, // [VK_FORMAT_B8G8R8A8_SNORM] = { .format = B8G8R8A8_SNORM }, // [VK_FORMAT_B8G8R8A8_USCALED] = { .format = B8G8R8A8_USCALED }, // [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = B8G8R8A8_SSCALED }, // [VK_FORMAT_B8G8R8A8_UINT] = { .format = B8G8R8A8_UINT }, // [VK_FORMAT_B8G8R8A8_SINT] = { .format = B8G8R8A8_SINT }, // [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_SRGB }, - [VK_FORMAT_B10G10R10A2_UNORM] = { - .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_B10G10R10A2_SNORM] = { - .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_B10G10R10A2_USCALED] = { - .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_B10G10R10A2_SSCALED] = { - .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_B10G10R10A2_UINT] = { - .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4 - }, - [VK_FORMAT_B10G10R10A2_SINT] = { - .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4 - } + [VK_FORMAT_B10G10R10A2_UNORM] = { .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_B10G10R10A2_SNORM] = { .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_B10G10R10A2_USCALED] = { .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_B10G10R10A2_SSCALED] = { .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_B10G10R10A2_UINT] = { .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_B10G10R10A2_SINT] = { .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4 } }; const struct anv_format * @@ -395,6 +210,21 @@ anv_format_for_vk_format(VkFormat format) // Format capabilities +struct surface_format_info { + bool exists; + int sampling; + int filtering; + int shadow_compare; + int chroma_key; + int render_target; + int alpha_blend; + int input_vb; + int streamed_output_vb; + int color_processing; +}; + +extern const struct surface_format_info surface_formats[]; + VkResult anv_GetFormatInfo( VkDevice _device, VkFormat _format, @@ -402,12 +232,41 @@ VkResult anv_GetFormatInfo( size_t* pDataSize, void* pData) { + struct anv_device *device = (struct anv_device *) _device; VkFormatProperties *properties; const struct anv_format *format; + const struct surface_format_info *info; + int gen; + + gen = device->info.gen * 10; + if (device->info.is_haswell) + gen += 5; format = anv_format_for_vk_format(_format); if (format == 0) - return VK_ERROR_INVALID_VALUE; + return vk_error(VK_ERROR_INVALID_VALUE); + if (format->format == UNSUPPORTED) + return VK_UNSUPPORTED; + info = &surface_formats[format->format]; + if (!info->exists) + return VK_UNSUPPORTED; + + uint32_t linear = 0, tiled = 0; + if (info->sampling <= gen) { + linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + } + if (info->render_target <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + if (info->alpha_blend <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + if (info->input_vb <= gen) { + linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + } switch (infoType) { case VK_FORMAT_INFO_TYPE_PROPERTIES: @@ -417,8 +276,8 @@ VkResult anv_GetFormatInfo( if (pData == NULL) return VK_SUCCESS; - properties->linearTilingFeatures = format->linear_flags; - properties->optimalTilingFeatures = format->tiled_flags; + properties->linearTilingFeatures = linear; + properties->optimalTilingFeatures = tiled; return VK_SUCCESS; default: diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 7e935962742..e47d5556a9e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -640,10 +640,8 @@ void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { uint32_t format; - int32_t cpp; - int32_t channels; - uint32_t linear_flags; - uint32_t tiled_flags; + uint32_t cpp; + uint32_t channels; }; const struct anv_format * diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index d1c51bbe980..8d112bba54a 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -422,16 +422,18 @@ test_formats(VkDevice device, VkQueue queue) "conversion" }; - vkGetFormatInfo(device, - VK_FORMAT_R32G32B32A32_SFLOAT, + VkFormat format = VK_FORMAT_R32G32B32A32_SFLOAT; + vkGetFormatInfo(device, format, VK_FORMAT_INFO_TYPE_PROPERTIES, &size, &properties); - printf("linear tiling features:"); + printf("format 0x%x:\n", format); + + printf(" linear tiling features (0x%x):", properties.linearTilingFeatures); for_each_bit(f, properties.linearTilingFeatures) printf(" %s", features[f]); - printf("\noptimal tiling features:"); + printf("\n optimal tiling features (0x%x):", properties.optimalTilingFeatures); for_each_bit(f, properties.optimalTilingFeatures) printf(" %s", features[f]); printf("\n"); -- cgit v1.2.3 From a1bd426393fb24a53d2cb180b21db4473ed71330 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 19 May 2015 14:14:24 -0700 Subject: vk: Stream surface state instead of using the surface pool Since the binding table pointer is only 16 bits, we can only have 64kb of binding table state allocated at any given time. With a block size of 1kb, that amounts to just 64 command buffers, which is not enough. --- src/vulkan/aub.c | 4 +- src/vulkan/device.c | 112 ++++++++++++++++++++++++++++++++++++++------------- src/vulkan/meta.c | 25 ++++++++++-- src/vulkan/private.h | 4 +- 4 files changed, 112 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c index 5e66aa839e3..bb4772a39ea 100644 --- a/src/vulkan/aub.c +++ b/src/vulkan/aub.c @@ -261,8 +261,8 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) } relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos); - relocate_bo(&device->surface_state_block_pool.bo, - &batch->surf_relocs, aub_bos); + relocate_bo(&cmd_buffer->surface_bo, + &cmd_buffer->surface_relocs, aub_bos); for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { bo = cmd_buffer->exec2_bos[i]; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3c245cd036b..64a2e492e29 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -481,15 +481,20 @@ anv_batch_init(struct anv_batch *batch, struct anv_device *device) batch->bo.map = anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE); if (batch->bo.map == NULL) { - anv_gem_close(device, batch->bo.gem_handle); - return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + result = vk_error(VK_ERROR_MEMORY_MAP_FAILED); + goto fail_bo; } batch->cmd_relocs.num_relocs = 0; - batch->surf_relocs.num_relocs = 0; batch->next = batch->bo.map; return VK_SUCCESS; + + fail_bo: + anv_gem_close(device, batch->bo.gem_handle); + + return result; + } void @@ -504,7 +509,6 @@ anv_batch_reset(struct anv_batch *batch) { batch->next = batch->bo.map; batch->cmd_relocs.num_relocs = 0; - batch->surf_relocs.num_relocs = 0; } void * @@ -568,7 +572,6 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) offset = batch->next - batch->bo.map; anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset); - anv_reloc_list_append(&batch->surf_relocs, &other->surf_relocs, offset); batch->next += size; } @@ -926,6 +929,8 @@ anv_cmd_buffer_destructor(struct anv_device * device, { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; + anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); + anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); @@ -2073,12 +2078,27 @@ VkResult anv_CreateCommandBuffer( if (result != VK_SUCCESS) goto fail; + result = anv_bo_init_new(&cmd_buffer->surface_bo, device, BATCH_SIZE); + if (result != VK_SUCCESS) + goto fail_batch; + + cmd_buffer->surface_bo.map = + anv_gem_mmap(device, cmd_buffer->surface_bo.gem_handle, 0, BATCH_SIZE); + if (cmd_buffer->surface_bo.map == NULL) { + result = vk_error(VK_ERROR_MEMORY_MAP_FAILED); + goto fail_surface_bo; + } + + /* Start surface_next at 1 so surface offset 0 is invalid. */ + cmd_buffer->surface_next = 1; + cmd_buffer->surface_relocs.num_relocs = 0; + cmd_buffer->exec2_objects = anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (cmd_buffer->exec2_objects == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_batch; + goto fail_surface_map; } cmd_buffer->exec2_bos = @@ -2105,6 +2125,10 @@ VkResult anv_CreateCommandBuffer( fail_exec2_objects: anv_device_free(device, cmd_buffer->exec2_objects); + fail_surface_map: + anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); + fail_surface_bo: + anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); fail_batch: anv_batch_finish(&cmd_buffer->batch, device); fail: @@ -2130,7 +2154,7 @@ VkResult anv_BeginCommandBuffer( .GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSizeModifyEnable = true, - .SurfaceStateBaseAddress = { &device->surface_state_block_pool.bo, 0 }, + .SurfaceStateBaseAddress = { &cmd_buffer->surface_bo, 0 }, .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ .SurfaceStateBaseAddressModifyEnable = true, @@ -2277,13 +2301,13 @@ VkResult anv_EndCommandBuffer( pthread_mutex_lock(&device->mutex); /* Add block pool bos first so we can add them with their relocs. */ - anv_cmd_buffer_add_bo(cmd_buffer, &device->surface_state_block_pool.bo, - &batch->surf_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &cmd_buffer->surface_bo, + &cmd_buffer->surface_relocs); - anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs); anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs); cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; @@ -2313,6 +2337,8 @@ VkResult anv_ResetCommandBuffer( struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; anv_batch_reset(&cmd_buffer->batch); + cmd_buffer->surface_next = 0; + cmd_buffer->surface_relocs.num_relocs = 0; return VK_SUCCESS; } @@ -2363,6 +2389,22 @@ void anv_CmdBindDynamicStateObject( }; } +static struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment); + state.map = cmd_buffer->surface_bo.map + state.offset; + state.alloc_size = size; + cmd_buffer->surface_next = state.offset + size; + + assert(state.offset + size < cmd_buffer->surface_bo.size); + + return state; +} + void anv_CmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -2392,8 +2434,11 @@ void anv_CmdBindDescriptorSets( for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; - bindings->descriptors[s].surfaces[start + b] = - view->surface_state.offset; + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + memcpy(state.map, view->surface_state.map, 64); + + bindings->descriptors[s].surfaces[start + b] = state.offset; bindings->descriptors[s].relocs[start + b].bo = view->bo; bindings->descriptors[s].relocs[start + b].offset = view->offset; } @@ -2480,24 +2525,33 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) if (layers + surface_count > 0) { struct anv_state state; + uint32_t offset; + uint32_t *address; uint32_t size; size = (bias + surface_count) * sizeof(uint32_t); - state = anv_state_stream_alloc(&cmd_buffer->binding_table_state_stream, - size, 32); + state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); memcpy(state.map, bindings->descriptors[s].surfaces, size); - for (uint32_t i = 0; i < layers; i++) - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t), - bindings->descriptors[s].relocs[i].bo, - bindings->descriptors[s].relocs[i].offset); + for (uint32_t i = 0; i < layers; i++) { + offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t); + address = cmd_buffer->surface_bo.map + offset; + + *address = + anv_reloc_list_add(&cmd_buffer->surface_relocs, offset, + bindings->descriptors[s].relocs[i].bo, + bindings->descriptors[s].relocs[i].offset); + } + + for (uint32_t i = 0; i < surface_count; i++) { + offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t); + address = cmd_buffer->surface_bo.map + offset; - for (uint32_t i = 0; i < surface_count; i++) - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - bindings->descriptors[s].surfaces[bias + i] + 8 * sizeof(int32_t), - bindings->descriptors[s].relocs[bias + i].bo, - bindings->descriptors[s].relocs[bias + i].offset); + *address = + anv_reloc_list_add(&cmd_buffer->surface_relocs, offset, + bindings->descriptors[s].relocs[bias + i].bo, + bindings->descriptors[s].relocs[bias + i].offset); + } static const uint32_t binding_table_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 38, @@ -2519,7 +2573,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) size_t size; size = layout->stage[s].sampler_count * 16; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); memcpy(state.map, bindings->descriptors[s].samplers, size); static const uint32_t sampler_state_opcodes[] = { @@ -3086,7 +3140,11 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { struct anv_surface_view *view = framebuffer->color_attachments[i]; - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = view->surface_state.offset; + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + memcpy(state.map, view->surface_state.map, 64); + + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = state.offset; bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo; bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset; } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index b088e777dd2..976a0ee3b6a 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -174,6 +174,27 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } +static void +anv_cmd_buffer_copy_render_targets(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *state) +{ + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_bindings *old_bindings = state->old_bindings; + struct anv_bindings *bindings = cmd_buffer->bindings; + + for (uint32_t i = 0; i < fb->color_attachment_count; i++) { + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = + old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i]; + + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = + old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo; + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = + old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset; + } + + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; +} + struct vue_header { uint32_t Reserved; uint32_t RTAIndex; @@ -241,9 +262,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, }; anv_cmd_buffer_save(cmd_buffer, &saved_state); - - /* Initialize render targets for the meta bindings. */ - anv_cmd_buffer_fill_render_targets(cmd_buffer); + anv_cmd_buffer_copy_render_targets(cmd_buffer, &saved_state); anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, (VkBuffer[]) { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e47d5556a9e..e4534f26e8c 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -368,7 +368,6 @@ struct anv_batch { struct anv_bo bo; void * next; struct anv_reloc_list cmd_relocs; - struct anv_reloc_list surf_relocs; }; VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); @@ -549,6 +548,9 @@ struct anv_cmd_buffer { uint32_t bo_count; struct anv_batch batch; + struct anv_bo surface_bo; + uint32_t surface_next; + struct anv_reloc_list surface_relocs; struct anv_state_stream binding_table_state_stream; struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; -- cgit v1.2.3 From 387a1bb58f650526746a959850c4b10a7e650106 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 20 May 2015 14:36:11 -0700 Subject: vk: Mark VK_FORMAT_UNDEFINED as 1 cpp, 1 channel --- src/vulkan/formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index dfcbe9ab906..c1cdc977ac4 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -26,7 +26,7 @@ #define UNSUPPORTED ~0U static const struct anv_format anv_formats[] = { - [VK_FORMAT_UNDEFINED] = { .format = RAW }, + [VK_FORMAT_UNDEFINED] = { .format = RAW, .cpp = 1, .channels = 1 }, [VK_FORMAT_R4G4_UNORM] = { .format = UNSUPPORTED }, [VK_FORMAT_R4G4_USCALED] = { .format = UNSUPPORTED }, [VK_FORMAT_R4G4B4A4_UNORM] = { .format = UNSUPPORTED }, -- cgit v1.2.3 From 63da97452964027012ccc67e22e9e8d3cb27247d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 20 May 2015 14:38:11 -0700 Subject: vk: Mark remaining unsupported formats as such --- src/vulkan/formats.c | 152 ++++++++++++++++++++++++++------------------------- 1 file changed, 77 insertions(+), 75 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index c1cdc977ac4..13ae1ed7538 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -115,85 +115,87 @@ static const struct anv_format anv_formats[] = { [VK_FORMAT_R64G64B64A64_SFLOAT] = { .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 }, [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3 }, [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3 }, - // [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM }, - // [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM }, - // [VK_FORMAT_D32_SFLOAT] = { .format = D32_SFLOAT }, - // [VK_FORMAT_S8_UINT] = { .format = S8_UINT }, - // [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM }, - // [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM }, - // [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_SFLOAT }, - // [VK_FORMAT_BC1_RGB_UNORM] = { .format = BC1_RGB }, - // [VK_FORMAT_BC1_RGB_SRGB] = { .format = BC1_RGB }, - // [VK_FORMAT_BC1_RGBA_UNORM] = { .format = BC1_RGBA }, - // [VK_FORMAT_BC1_RGBA_SRGB] = { .format = BC1_RGBA }, - // [VK_FORMAT_BC2_UNORM] = { .format = BC2_UNORM }, - // [VK_FORMAT_BC2_SRGB] = { .format = BC2_SRGB }, - // [VK_FORMAT_BC3_UNORM] = { .format = BC3_UNORM }, - // [VK_FORMAT_BC3_SRGB] = { .format = BC3_SRGB }, - // [VK_FORMAT_BC4_UNORM] = { .format = BC4_UNORM }, - // [VK_FORMAT_BC4_SNORM] = { .format = BC4_SNORM }, - // [VK_FORMAT_BC5_UNORM] = { .format = BC5_UNORM }, - // [VK_FORMAT_BC5_SNORM] = { .format = BC5_SNORM }, - // [VK_FORMAT_BC6H_UFLOAT] = { .format = BC6H_UFLOAT }, - // [VK_FORMAT_BC6H_SFLOAT] = { .format = BC6H_SFLOAT }, - // [VK_FORMAT_BC7_UNORM] = { .format = BC7_UNORM }, - // [VK_FORMAT_BC7_SRGB] = { .format = BC7_SRGB }, - // [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = ETC2_R8G8B8 }, - // [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = ETC2_R8G8B8 }, - // [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = ETC2_R8G8B8A1 }, - // [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = ETC2_R8G8B8A1 }, - // [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = ETC2_R8G8B8A8 }, - // [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = ETC2_R8G8B8A8 }, - // [VK_FORMAT_EAC_R11_UNORM] = { .format = EAC_R11 }, - // [VK_FORMAT_EAC_R11_SNORM] = { .format = EAC_R11 }, - // [VK_FORMAT_EAC_R11G11_UNORM] = { .format = EAC_R11G11 }, - // [VK_FORMAT_EAC_R11G11_SNORM] = { .format = EAC_R11G11 }, - // [VK_FORMAT_ASTC_4x4_UNORM] = { .format = ASTC_4x4 }, - // [VK_FORMAT_ASTC_4x4_SRGB] = { .format = ASTC_4x4 }, - // [VK_FORMAT_ASTC_5x4_UNORM] = { .format = ASTC_5x4 }, - // [VK_FORMAT_ASTC_5x4_SRGB] = { .format = ASTC_5x4 }, - // [VK_FORMAT_ASTC_5x5_UNORM] = { .format = ASTC_5x5 }, - // [VK_FORMAT_ASTC_5x5_SRGB] = { .format = ASTC_5x5 }, - // [VK_FORMAT_ASTC_6x5_UNORM] = { .format = ASTC_6x5 }, - // [VK_FORMAT_ASTC_6x5_SRGB] = { .format = ASTC_6x5 }, - // [VK_FORMAT_ASTC_6x6_UNORM] = { .format = ASTC_6x6 }, - // [VK_FORMAT_ASTC_6x6_SRGB] = { .format = ASTC_6x6 }, - // [VK_FORMAT_ASTC_8x5_UNORM] = { .format = ASTC_8x5 }, - // [VK_FORMAT_ASTC_8x5_SRGB] = { .format = ASTC_8x5 }, - // [VK_FORMAT_ASTC_8x6_UNORM] = { .format = ASTC_8x6 }, - // [VK_FORMAT_ASTC_8x6_SRGB] = { .format = ASTC_8x6 }, - // [VK_FORMAT_ASTC_8x8_UNORM] = { .format = ASTC_8x8 }, - // [VK_FORMAT_ASTC_8x8_SRGB] = { .format = ASTC_8x8 }, - // [VK_FORMAT_ASTC_10x5_UNORM] = { .format = ASTC_10x5 }, - // [VK_FORMAT_ASTC_10x5_SRGB] = { .format = ASTC_10x5 }, - // [VK_FORMAT_ASTC_10x6_UNORM] = { .format = ASTC_10x6 }, - // [VK_FORMAT_ASTC_10x6_SRGB] = { .format = ASTC_10x6 }, - // [VK_FORMAT_ASTC_10x8_UNORM] = { .format = ASTC_10x8 }, - // [VK_FORMAT_ASTC_10x8_SRGB] = { .format = ASTC_10x8 }, - // [VK_FORMAT_ASTC_10x10_UNORM] = { .format = ASTC_10x10 }, - // [VK_FORMAT_ASTC_10x10_SRGB] = { .format = ASTC_10x10 }, - // [VK_FORMAT_ASTC_12x10_UNORM] = { .format = ASTC_12x10 }, - // [VK_FORMAT_ASTC_12x10_SRGB] = { .format = ASTC_12x10 }, - // [VK_FORMAT_ASTC_12x12_UNORM] = { .format = ASTC_12x12 }, - // [VK_FORMAT_ASTC_12x12_SRGB] = { .format = ASTC_12x12 }, + + [VK_FORMAT_D16_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_D24_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_D32_SFLOAT] = { .format = UNSUPPORTED }, + [VK_FORMAT_S8_UINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = UNSUPPORTED }, + + [VK_FORMAT_BC1_RGB_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC1_RGB_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC1_RGBA_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC1_RGBA_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC2_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC2_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC3_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC3_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC4_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC4_SNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC5_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC5_SNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC6H_UFLOAT] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC6H_SFLOAT] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC7_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_BC7_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_EAC_R11_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_EAC_R11_SNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_EAC_R11G11_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_EAC_R11G11_SNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_4x4_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_4x4_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_5x4_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_5x4_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_5x5_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_5x5_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_6x5_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_6x5_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_6x6_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_6x6_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_8x5_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_8x5_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_8x6_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_8x6_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_8x8_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_8x8_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x5_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x5_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x6_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x6_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x8_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x8_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x10_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_10x10_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_12x10_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_12x10_SRGB] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_12x12_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_ASTC_12x12_SRGB] = { .format = UNSUPPORTED }, [VK_FORMAT_B4G4R4A4_UNORM] = { .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4 }, [VK_FORMAT_B5G5R5A1_UNORM] = { .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4 }, [VK_FORMAT_B5G6R5_UNORM] = { .format = B5G6R5_UNORM, .cpp = 2, .channels = 3 }, - // [VK_FORMAT_B5G6R5_USCALED] = { .format = B5G6R5_USCALED }, - // [VK_FORMAT_B8G8R8_UNORM] = { .format = B8G8R8_UNORM }, - // [VK_FORMAT_B8G8R8_SNORM] = { .format = B8G8R8_SNORM }, - // [VK_FORMAT_B8G8R8_USCALED] = { .format = B8G8R8_USCALED }, - // [VK_FORMAT_B8G8R8_SSCALED] = { .format = B8G8R8_SSCALED }, - // [VK_FORMAT_B8G8R8_UINT] = { .format = B8G8R8_UINT }, - // [VK_FORMAT_B8G8R8_SINT] = { .format = B8G8R8_SINT }, - // [VK_FORMAT_B8G8R8_SRGB] = { .format = B8G8R8_SRGB }, + [VK_FORMAT_B5G6R5_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_UNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_SNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_SSCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_UINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_SINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8_SRGB] = { .format = UNSUPPORTED }, [VK_FORMAT_B8G8R8A8_UNORM] = { .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 }, - // [VK_FORMAT_B8G8R8A8_SNORM] = { .format = B8G8R8A8_SNORM }, - // [VK_FORMAT_B8G8R8A8_USCALED] = { .format = B8G8R8A8_USCALED }, - // [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = B8G8R8A8_SSCALED }, - // [VK_FORMAT_B8G8R8A8_UINT] = { .format = B8G8R8A8_UINT }, - // [VK_FORMAT_B8G8R8A8_SINT] = { .format = B8G8R8A8_SINT }, - // [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_SRGB }, + [VK_FORMAT_B8G8R8A8_SNORM] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8A8_USCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8A8_UINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8A8_SINT] = { .format = UNSUPPORTED }, + [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .channels = 4 }, [VK_FORMAT_B10G10R10A2_UNORM] = { .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4 }, [VK_FORMAT_B10G10R10A2_SNORM] = { .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4 }, [VK_FORMAT_B10G10R10A2_USCALED] = { .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4 }, -- cgit v1.2.3 From f886647b75f5509095d51c9ce0ac74db595281d3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 22:04:15 -0700 Subject: vk: Add debug stubs --- src/vulkan/device.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 64a2e492e29..cad85bf5ec3 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3198,3 +3198,40 @@ void anv_CmdEndRenderPass( stub(); } + +void vkCmdDbgMarkerBegin( + VkCmdBuffer cmdBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCmdBuffer cmdBuffer) + __attribute__ ((visibility ("default"))); + +VkResult vkDbgSetObjectTag( + VkDevice device, + VkObject object, + size_t tagSize, + const void* pTag) + __attribute__ ((visibility ("default"))); + + +void vkCmdDbgMarkerBegin( + VkCmdBuffer cmdBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCmdBuffer cmdBuffer) +{ +} + +VkResult vkDbgSetObjectTag( + VkDevice device, + VkObject object, + size_t tagSize, + const void* pTag) +{ + return VK_SUCCESS; +} -- cgit v1.2.3 From a29df71dd2e73ce4b9aa664b2a5aee64d19b3ea9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 15 May 2015 22:04:52 -0700 Subject: vk: Add WSI implementation --- include/vulkan/vk_wsi_lunarg.h | 197 ++++++++++++++++++++++++++++ src/vulkan/Makefile.am | 10 +- src/vulkan/device.c | 44 +++++-- src/vulkan/image.c | 30 ++++- src/vulkan/private.h | 12 ++ src/vulkan/x11.c | 282 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 557 insertions(+), 18 deletions(-) create mode 100644 include/vulkan/vk_wsi_lunarg.h create mode 100644 src/vulkan/x11.c (limited to 'src') diff --git a/include/vulkan/vk_wsi_lunarg.h b/include/vulkan/vk_wsi_lunarg.h new file mode 100644 index 00000000000..84de8d2c6d1 --- /dev/null +++ b/include/vulkan/vk_wsi_lunarg.h @@ -0,0 +1,197 @@ +// +// File: vk_wsi_display.h +// +/* +** Copyright (c) 2014 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#ifndef __VK_WSI_LUNARG_H__ +#define __VK_WSI_LUNARG_H__ + +#include "vulkan.h" + +#define VK_WSI_LUNARG_REVISION 3 +#define VK_WSI_LUNARG_EXTENSION_NUMBER 1 + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +// ------------------------------------------------------------------------------------------------ +// Objects + +VK_DEFINE_DISP_SUBCLASS_HANDLE(VkDisplayWSI, VkObject) +VK_DEFINE_DISP_SUBCLASS_HANDLE(VkSwapChainWSI, VkObject) + +// ------------------------------------------------------------------------------------------------ +// Enumeration constants + +#define VK_WSI_LUNARG_ENUM(type,id) ((type)(VK_WSI_LUNARG_EXTENSION_NUMBER * -1000 + (id))) + +// Extend VkPhysicalDeviceInfoType enum with extension specific constants +#define VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI VK_WSI_LUNARG_ENUM(VkPhysicalDeviceInfoType, 0) +#define VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI VK_WSI_LUNARG_ENUM(VkPhysicalDeviceInfoType, 1) + +// Extend VkStructureType enum with extension specific constants +#define VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI VK_WSI_LUNARG_ENUM(VkStructureType, 0) +#define VK_STRUCTURE_TYPE_PRESENT_INFO_WSI VK_WSI_LUNARG_ENUM(VkStructureType, 1) + +// Extend VkImageLayout enum with extension specific constants +#define VK_IMAGE_LAYOUT_PRESENT_SOURCE_WSI VK_WSI_LUNARG_ENUM(VkImageLayout, 0) + +// Extend VkObjectType enum for new objects +#define VK_OBJECT_TYPE_DISPLAY_WSI VK_WSI_LUNARG_ENUM(VkObjectType, 0) +#define VK_OBJECT_TYPE_SWAP_CHAIN_WSI VK_WSI_LUNARG_ENUM(VkObjectType, 1) + +// ------------------------------------------------------------------------------------------------ +// Enumerations + +typedef enum VkDisplayInfoTypeWSI_ +{ + // Info type for vkGetDisplayInfo() + VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI = 0x00000003, // Return the VkFormat(s) supported for swap chains with the display + + VK_ENUM_RANGE(DISPLAY_INFO_TYPE, FORMAT_PROPERTIES_WSI, FORMAT_PROPERTIES_WSI) +} VkDisplayInfoTypeWSI; + +typedef enum VkSwapChainInfoTypeWSI_ +{ + // Info type for vkGetSwapChainInfo() + VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI = 0x00000000, // Return information about the persistent images of the swapchain + + VK_ENUM_RANGE(SWAP_CHAIN_INFO_TYPE, PERSISTENT_IMAGES_WSI, PERSISTENT_IMAGES_WSI) +} VkSwapChainInfoTypeWSI; + +// ------------------------------------------------------------------------------------------------ +// Flags + +typedef VkFlags VkSwapModeFlagsWSI; +typedef enum VkSwapModeFlagBitsWSI_ +{ + VK_SWAP_MODE_FLIP_BIT_WSI = VK_BIT(0), + VK_SWAP_MODE_BLIT_BIT_WSI = VK_BIT(1), +} VkSwapModeFlagBitsWSI; + +// ------------------------------------------------------------------------------------------------ +// Structures + +typedef struct VkDisplayPropertiesWSI_ +{ + VkDisplayWSI display; // Handle of the display object + VkExtent2D physicalResolution; // Max resolution for CRT? +} VkDisplayPropertiesWSI; + +typedef struct VkDisplayFormatPropertiesWSI_ +{ + VkFormat swapChainFormat; // Format of the images of the swap chain +} VkDisplayFormatPropertiesWSI; + +typedef struct VkSwapChainCreateInfoWSI_ +{ + VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI + const void* pNext; // Pointer to next structure + + // TBD: It is not yet clear what the use will be for the following two + // values. It seems to be needed for more-global window-system handles + // (e.g. X11 display). If not needed for the SDK, we will drop it from + // this extension, and from a future version of this header. + const void* pNativeWindowSystemHandle; // Pointer to native window system handle + const void* pNativeWindowHandle; // Pointer to native window handle + + uint32_t displayCount; // Number of displays the swap chain is created for + const VkDisplayWSI* pDisplays; // displayCount number of display objects the swap chain is created for + + uint32_t imageCount; // Number of images in the swap chain + + VkFormat imageFormat; // Format of the images of the swap chain + VkExtent2D imageExtent; // Width and height of the images of the swap chain + uint32_t imageArraySize; // Number of layers of the images of the swap chain (needed for multi-view rendering) + VkFlags imageUsageFlags; // Usage flags for the images of the swap chain (see VkImageUsageFlags) + + VkFlags swapModeFlags; // Allowed swap modes (see VkSwapModeFlagsWSI) +} VkSwapChainCreateInfoWSI; + +typedef struct VkSwapChainImageInfoWSI_ +{ + VkImage image; // Persistent swap chain image handle + VkDeviceMemory memory; // Persistent swap chain image's memory handle +} VkSwapChainImageInfoWSI; + +typedef struct VkPhysicalDeviceQueuePresentPropertiesWSI_ +{ + bool32_t supportsPresent; // Tells whether the queue supports presenting +} VkPhysicalDeviceQueuePresentPropertiesWSI; + +typedef struct VkPresentInfoWSI_ +{ + VkStructureType sType; // Must be VK_STRUCTURE_TYPE_PRESENT_INFO_WSI + const void* pNext; // Pointer to next structure + VkImage image; // Image to present + uint32_t flipInterval; // Flip interval +} VkPresentInfoWSI; + +// ------------------------------------------------------------------------------------------------ +// Function types + +typedef VkResult (VKAPI *PFN_vkGetDisplayInfoWSI)(VkDisplayWSI display, VkDisplayInfoTypeWSI infoType, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI *PFN_vkCreateSwapChainWSI)(VkDevice device, const VkSwapChainCreateInfoWSI* pCreateInfo, VkSwapChainWSI* pSwapChain); +typedef VkResult (VKAPI *PFN_vkDestroySwapChainWSI)(VkSwapChainWSI swapChain); +typedef VkResult (VKAPI *PFN_vkGetSwapChainInfoWSI)(VkSwapChainWSI swapChain, VkSwapChainInfoTypeWSI infoType, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI *PFN_vkQueuePresentWSI)(VkQueue queue, const VkPresentInfoWSI* pPresentInfo); + +// ------------------------------------------------------------------------------------------------ +// Function prototypes + +#ifdef VK_PROTOTYPES + +VkResult VKAPI vkGetDisplayInfoWSI( + VkDisplayWSI display, + VkDisplayInfoTypeWSI infoType, + size_t* pDataSize, + void* pData); + +VkResult VKAPI vkCreateSwapChainWSI( + VkDevice device, + const VkSwapChainCreateInfoWSI* pCreateInfo, + VkSwapChainWSI* pSwapChain); + +VkResult VKAPI vkDestroySwapChainWSI( + VkSwapChainWSI swapChain); + +VkResult VKAPI vkGetSwapChainInfoWSI( + VkSwapChainWSI swapChain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, + void* pData); + +VkResult VKAPI vkQueuePresentWSI( + VkQueue queue, + const VkPresentInfoWSI* pPresentInfo); + +#endif // VK_PROTOTYPES + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __VK_WSI_LUNARG_H__ diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 8c05c5139fc..404af61bcb1 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -22,10 +22,11 @@ vulkan_includedir = $(includedir)/vulkan -vulkan_include_HEADERS = \ - $(top_srcdir)/include/vulkan/vk_platform.h \ - $(top_srcdir)/include/vulkan/vulkan.h \ - $(top_srcdir)/include/vulkan/vulkan_intel.h +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h \ + $(top_srcdir)/include/vulkan/vk_wsi_lunarg.h lib_LTLIBRARIES = libvulkan.la @@ -64,6 +65,7 @@ libvulkan_la_SOURCES = \ intel.c \ entrypoints.c \ entrypoints.h \ + x11.c \ formats.c \ compiler.cpp diff --git a/src/vulkan/device.c b/src/vulkan/device.c index cad85bf5ec3..7cce0a1a394 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -187,9 +187,10 @@ VkResult anv_GetPhysicalDeviceInfo( VkPhysicalDevicePerformance *performance; VkPhysicalDeviceQueueProperties *queue_properties; VkPhysicalDeviceMemoryProperties *memory_properties; + VkDisplayPropertiesWSI *display_properties; uint64_t ns_per_tick = 80; - switch (infoType) { + switch ((uint32_t) infoType) { case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES: properties = pData; @@ -252,6 +253,23 @@ VkResult anv_GetPhysicalDeviceInfo( memory_properties->supportsPinning = false; return VK_SUCCESS; + case VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI: + anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI"); + + *pDataSize = sizeof(*display_properties); + if (pData == NULL) + return VK_SUCCESS; + + display_properties = pData; + display_properties->display = 0; + display_properties->physicalResolution = (VkExtent2D) { 0, 0 }; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI: + anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI"); + return VK_SUCCESS; + + default: return VK_UNSUPPORTED; } @@ -383,18 +401,28 @@ VkResult anv_GetGlobalExtensionInfo( size_t* pDataSize, void* pData) { - uint32_t *count; + static const VkExtensionProperties extensions[] = { + { + .extName = "VK_WSI_LunarG", + .version = 3 + } + }; + uint32_t count = ARRAY_SIZE(extensions); switch (infoType) { case VK_EXTENSION_INFO_TYPE_COUNT: - count = pData; - assert(*pDataSize == 4); - *count = 0; + memcpy(pData, &count, sizeof(count)); + *pDataSize = sizeof(count); return VK_SUCCESS; - + case VK_EXTENSION_INFO_TYPE_PROPERTIES: - return vk_error(VK_ERROR_INVALID_EXTENSION); - + if (extensionIndex >= count) + return vk_error(VK_ERROR_INVALID_EXTENSION); + + memcpy(pData, &extensions[extensionIndex], sizeof(extensions[0])); + *pDataSize = sizeof(extensions[0]); + return VK_SUCCESS; + default: return VK_UNSUPPORTED; } diff --git a/src/vulkan/image.c b/src/vulkan/image.c index cf658e969da..a7dc243b2a2 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -41,9 +41,10 @@ static const struct anv_tile_mode_info { [WMAJOR] = { 128, 32 } }; -VkResult anv_CreateImage( +VkResult anv_image_create( VkDevice _device, const VkImageCreateInfo* pCreateInfo, + const struct anv_image_create_info * extra, VkImage* pImage) { struct anv_device *device = (struct anv_device *) _device; @@ -63,6 +64,7 @@ VkResult anv_CreateImage( image->type = pCreateInfo->imageType; image->format = pCreateInfo->format; image->extent = pCreateInfo->extent; + image->swap_chain = NULL; assert(image->extent.width > 0); assert(image->extent.height > 0); @@ -71,20 +73,28 @@ VkResult anv_CreateImage( switch (pCreateInfo->tiling) { case VK_IMAGE_TILING_LINEAR: image->tile_mode = LINEAR; - /* Linear depth buffers must be 64 byte aligned, which is the strictest - * requirement for all kinds of linear surfaces. - */ - image->alignment = 64; break; case VK_IMAGE_TILING_OPTIMAL: image->tile_mode = YMAJOR; - image->alignment = 4096; break; default: break; } + if (extra) + image->tile_mode = extra->tile_mode; + + if (image->tile_mode == LINEAR) { + /* Linear depth buffers must be 64 byte aligned, which is the strictest + * requirement for all kinds of linear surfaces. + */ + image->alignment = 64; + } else { + image->alignment = 4096; + } + format = anv_format_for_vk_format(pCreateInfo->format); + assert(format->cpp > 0); image->stride = ALIGN_I32(image->extent.width * format->cpp, tile_mode_info[image->tile_mode].tile_width); aligned_height = ALIGN_I32(image->extent.height, @@ -96,6 +106,14 @@ VkResult anv_CreateImage( return VK_SUCCESS; } +VkResult anv_CreateImage( + VkDevice device, + const VkImageCreateInfo* pCreateInfo, + VkImage* pImage) +{ + return anv_image_create(device, pCreateInfo, NULL, pImage); +} + VkResult anv_GetImageSubresourceInfo( VkDevice device, VkImage image, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e4534f26e8c..f15856387d8 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -36,6 +36,7 @@ #define VK_PROTOTYPES #include #include +#include #include "entrypoints.h" @@ -661,6 +662,8 @@ struct anv_image { /* Set when bound */ struct anv_bo * bo; VkDeviceSize offset; + + struct anv_swap_chain * swap_chain; }; struct anv_surface_view { @@ -671,6 +674,15 @@ struct anv_surface_view { VkFormat format; }; +struct anv_image_create_info { + uint32_t tile_mode; +}; + +VkResult anv_image_create(VkDevice _device, + const VkImageCreateInfo *pCreateInfo, + const struct anv_image_create_info *extra, + VkImage *pImage); + void anv_image_view_init(struct anv_surface_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c new file mode 100644 index 00000000000..898aba056c7 --- /dev/null +++ b/src/vulkan/x11.c @@ -0,0 +1,282 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "private.h" + +#include +#include +#include + +static const VkFormat formats[] = { + VK_FORMAT_B5G6R5_UNORM, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, +}; + +VkResult anv_GetDisplayInfoWSI( + VkDisplayWSI display, + VkDisplayInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + VkDisplayFormatPropertiesWSI *properties = pData; + size_t size; + + if (pDataSize == NULL) + return VK_ERROR_INVALID_POINTER; + + switch (infoType) { + case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: + size = sizeof(properties[0]) * ARRAY_SIZE(formats); + if (pData && *pDataSize < size) + return vk_error(VK_ERROR_INVALID_VALUE); + + *pDataSize = size; + for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) + properties[i].swapChainFormat = formats[i]; + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +struct anv_swap_chain { + struct anv_device * device; + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t count; + struct { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + } images[0]; +}; + +VkResult anv_CreateSwapChainWSI( + VkDevice _device, + const VkSwapChainCreateInfoWSI* pCreateInfo, + VkSwapChainWSI* pSwapChain) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_swap_chain *chain; + xcb_void_cookie_t cookie; + VkResult result; + size_t size; + int ret; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + + size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); + chain = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->device = device; + chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; + chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; + chain->count = pCreateInfo->imageCount; + chain->extent = pCreateInfo->imageExtent; + + for (uint32_t i = 0; i < chain->count; i++) { + struct anv_image *image; + struct anv_device_memory *memory; + + anv_image_create((VkDevice) device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }, + &(struct anv_image_create_info) { + .tile_mode = XMAJOR + }, + (VkImage *) &image); + + anv_AllocMemory((VkDevice) device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = image->size, + }, + (VkDeviceMemory *) &memory); + + anv_QueueBindObjectMemory(VK_NULL_HANDLE, + VK_OBJECT_TYPE_IMAGE, + (VkImage) image, 0, + (VkDeviceMemory) memory, 0); + + ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + image->stride, I915_TILING_X); + if (ret) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + image->stride, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + image->swap_chain = chain; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *pSwapChain = (VkSwapChainWSI) chain; + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult anv_DestroySwapChainWSI( + VkSwapChainWSI swapChain) +{ + struct anv_swap_chain *chain = (struct anv_swap_chain *) swapChain; + struct anv_device *device = chain->device; + + anv_device_free(device, chain); + + return VK_SUCCESS; +} + +VkResult anv_GetSwapChainInfoWSI( + VkSwapChainWSI swapChain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + struct anv_swap_chain *chain = (struct anv_swap_chain *) swapChain; + VkSwapChainImageInfoWSI *images; + size_t size; + + switch (infoType) { + case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: + size = sizeof(*images) * chain->count; + if (pData && *pDataSize < size) + return VK_ERROR_INVALID_VALUE; + + *pDataSize = size; + if (!pData) + return VK_SUCCESS; + + images = pData; + for (uint32_t i = 0; i < chain->count; i++) { + images[i].image = (VkImage) chain->images[i].image; + images[i].memory = (VkDeviceMemory) chain->images[i].memory; + } + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +VkResult anv_QueuePresentWSI( + VkQueue queue_, + const VkPresentInfoWSI* pPresentInfo) +{ + struct anv_image *image = (struct anv_image *) pPresentInfo->image; + struct anv_swap_chain *chain = image->swap_chain; + xcb_void_cookie_t cookie; + xcb_pixmap_t pixmap; + + assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); + + if (chain == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + pixmap = XCB_NONE; + for (uint32_t i = 0; i < chain->count; i++) { + if ((VkImage) chain->images[i].image == pPresentInfo->image) { + pixmap = chain->images[i].pixmap; + break; + } + } + + if (pixmap == XCB_NONE) + return vk_error(VK_ERROR_INVALID_VALUE); + + cookie = xcb_copy_area(chain->conn, + pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} -- cgit v1.2.3 From f9e66ea62111ffaba4a2b0307b8a5e29c2b8368a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 20 May 2015 16:42:11 -0700 Subject: vk: Remove render pass stub call This isn't really a stub. --- src/vulkan/device.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7cce0a1a394..745e5dd7090 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3223,8 +3223,6 @@ void anv_CmdEndRenderPass( .VFCacheInvalidationEnable = true, .TextureCacheInvalidationEnable = true, .CommandStreamerStallEnable = true); - - stub(); } void vkCmdDbgMarkerBegin( -- cgit v1.2.3 From f294154e42571c996a595df02f1aaa617cd3b410 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 10:23:18 -0700 Subject: vk: Fix for out-of-tree builds --- src/vulkan/Makefile.am | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 404af61bcb1..7543cfcc159 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -71,11 +71,11 @@ libvulkan_la_SOURCES = \ BUILT_SOURCES = entrypoints.h entrypoints.c -entrypoints.h : $(vulkan_include_HEADERS) vk_gen.py - $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) vk_gen.py header > $@ +entrypoints.h : vk_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ -entrypoints.c : $(vulkan_include_HEADERS) vk_gen.py - $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) vk_gen.py code > $@ +entrypoints.c : vk_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ CLEANFILES = entrypoints.h entrypoints.c -- cgit v1.2.3 From b13c0f469b079c7e2f33c0ce7905ae0d311885e1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 11:28:40 -0700 Subject: vk: More out-of-tree build fixes --- src/vulkan/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 7543cfcc159..1010863846a 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -43,7 +43,8 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include + -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src/vulkan libvulkan_la_CFLAGS = \ -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g \ -- cgit v1.2.3 From f17e835c260e1f49d59d78089c30ab9de87c5278 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 11:28:08 -0700 Subject: vk/meta: Use glsl_scraper for our GLSL source We are not yet using SPIR-V for meta but this is a first step. --- src/vulkan/Makefile.am | 8 +++++++- src/vulkan/glsl_helpers.h | 8 +++++--- src/vulkan/meta.c | 1 + 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 1010863846a..291171d836a 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -70,7 +70,10 @@ libvulkan_la_SOURCES = \ formats.c \ compiler.cpp -BUILT_SOURCES = entrypoints.h entrypoints.c +BUILT_SOURCES = \ + entrypoints.h \ + entrypoints.c \ + meta-spirv.h entrypoints.h : vk_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ @@ -78,6 +81,9 @@ entrypoints.h : vk_gen.py $(vulkan_include_HEADERS) entrypoints.c : vk_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ +%-spirv.h: %.c glsl_scraper.py + $(AM_V_GEN) $(PYTHON2) $(srcdir)/glsl_scraper.py --glsl-only -o $@ $< + CLEANFILES = entrypoints.h entrypoints.c bin_PROGRAMS = vk diff --git a/src/vulkan/glsl_helpers.h b/src/vulkan/glsl_helpers.h index e6cd93298ac..2259f890ce1 100644 --- a/src/vulkan/glsl_helpers.h +++ b/src/vulkan/glsl_helpers.h @@ -23,13 +23,15 @@ #pragma once +#define _GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src +#define _GLSL_SRC_VAR(_line) _GLSL_SRC_VAR2(_line) + #define GLSL_VK_SHADER(device, stage, ...) ({ \ VkShader __shader; \ - const char __src[] = "#version 330\n" #__VA_ARGS__; \ VkShaderCreateInfo __shader_create_info = { \ .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \ - .codeSize = sizeof(__src), \ - .pCode = __src, \ + .codeSize = sizeof(_GLSL_SRC_VAR(__LINE__)), \ + .pCode = _GLSL_SRC_VAR(__LINE__), \ .flags = (1 << 31) /* GLSL back-door hack */ \ }; \ anv_CreateShader((VkDevice) device, &__shader_create_info, &__shader); \ diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 976a0ee3b6a..77393e24694 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -29,6 +29,7 @@ #include "private.h" #include "glsl_helpers.h" +#include "meta-spirv.h" static void anv_device_init_meta_clear_state(struct anv_device *device) -- cgit v1.2.3 From 0e441cde71da46ae357ab1444baa86d1439ac162 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 11:31:43 -0700 Subject: vk: Bake the GLSL_VK_SHADER macro into the scraper output file --- src/vulkan/glsl_helpers.h | 39 --------------------------------------- src/vulkan/glsl_scraper.py | 17 ++++++++++++++++- src/vulkan/meta.c | 1 - 3 files changed, 16 insertions(+), 41 deletions(-) delete mode 100644 src/vulkan/glsl_helpers.h (limited to 'src') diff --git a/src/vulkan/glsl_helpers.h b/src/vulkan/glsl_helpers.h deleted file mode 100644 index 2259f890ce1..00000000000 --- a/src/vulkan/glsl_helpers.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#define _GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src -#define _GLSL_SRC_VAR(_line) _GLSL_SRC_VAR2(_line) - -#define GLSL_VK_SHADER(device, stage, ...) ({ \ - VkShader __shader; \ - VkShaderCreateInfo __shader_create_info = { \ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \ - .codeSize = sizeof(_GLSL_SRC_VAR(__LINE__)), \ - .pCode = _GLSL_SRC_VAR(__LINE__), \ - .flags = (1 << 31) /* GLSL back-door hack */ \ - }; \ - anv_CreateShader((VkDevice) device, &__shader_create_info, &__shader); \ - __shader; \ -}) diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index c35337bace5..23e9b0bde1c 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -242,7 +242,22 @@ with open_file(outfname, 'w') as outfile: * This file is autogenerated by glsl_scraper.py. */ -#include """) +#include + +#define _GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src +#define _GLSL_SRC_VAR(_line) _GLSL_SRC_VAR2(_line) + +#define GLSL_VK_SHADER(device, stage, ...) ({ \\ + VkShader __shader; \\ + VkShaderCreateInfo __shader_create_info = { \\ + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \\ + .codeSize = sizeof(_GLSL_SRC_VAR(__LINE__)), \\ + .pCode = _GLSL_SRC_VAR(__LINE__), \\ + }; \\ + vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \\ + __shader; \\ +}) +""") for shader in parser.shaders: shader.dump_c_code(outfile, glsl_only) diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 77393e24694..cd4fbaf87f3 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -28,7 +28,6 @@ #include #include "private.h" -#include "glsl_helpers.h" #include "meta-spirv.h" static void -- cgit v1.2.3 From cb56372eeb3d2db7fe983d95fa8934add08af182 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 11:40:24 -0700 Subject: vk/glsl_scraper: Use a fake GLSL version that glslang will accept --- src/vulkan/glsl_scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index 23e9b0bde1c..2bb70ba72b7 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -54,7 +54,7 @@ class Shader: spirv_fname = self.ext + '.spv' glsl_file = open(glsl_fname, 'w') - glsl_file.write('#version 330\n') + glsl_file.write('#version 420 core\n') glsl_file.write(self.glsl_source()) glsl_file.close() -- cgit v1.2.3 From f3d70e4165d34cb4306a1205fcee41e1ed17635c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 12:19:24 -0700 Subject: vk/glsl_scraper: Use the LunarG back-door for GLSL source --- src/vulkan/glsl_scraper.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index 2bb70ba72b7..ca5bad4a8a1 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -87,6 +87,7 @@ class Shader: # First dump the GLSL source as strings f.write('static const char {0}_glsl_src[] ='.format(var_prefix)) + f.write('\n_ANV_SPIRV_' + self.stage) f.write('\n"#version 330\\n"') for line in self.glsl_source().splitlines(): if not line.strip(): @@ -244,15 +245,24 @@ with open_file(outfname, 'w') as outfile: #include -#define _GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src -#define _GLSL_SRC_VAR(_line) _GLSL_SRC_VAR2(_line) +#define _ANV_SPIRV_MAGIC "\\x03\\x02\\x23\\x07\\0\\0\\0\\0" + +#define _ANV_SPIRV_VERTEX _ANV_SPIRV_MAGIC "\\0\\0\\0\\0" +#define _ANV_SPIRV_TESS_CONTROL _ANV_SPIRV_MAGIC "\\1\\0\\0\\0" +#define _ANV_SPIRV_TESS_EVALUATION _ANV_SPIRV_MAGIC "\\2\\0\\0\\0" +#define _ANV_SPIRV_GEOMETRY _ANV_SPIRV_MAGIC "\\3\\0\\0\\0" +#define _ANV_SPIRV_FRAGMENT _ANV_SPIRV_MAGIC "\\4\\0\\0\\0" +#define _ANV_SPIRV_COMPUTE _ANV_SPIRV_MAGIC "\\5\\0\\0\\0" + +#define _ANV_GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src +#define _ANV_GLSL_SRC_VAR(_line) _ANV_GLSL_SRC_VAR2(_line) #define GLSL_VK_SHADER(device, stage, ...) ({ \\ VkShader __shader; \\ VkShaderCreateInfo __shader_create_info = { \\ .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \\ - .codeSize = sizeof(_GLSL_SRC_VAR(__LINE__)), \\ - .pCode = _GLSL_SRC_VAR(__LINE__), \\ + .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ + .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ }; \\ vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \\ __shader; \\ -- cgit v1.2.3 From ccf2bf9b99573e0091b956fd0e3a23991f11e86c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 11:59:29 -0700 Subject: vk/test: Use the glsl_scraper for building shaders --- src/vulkan/Makefile.am | 3 ++- src/vulkan/vk.c | 34 +++++++++------------------------- 2 files changed, 11 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 291171d836a..ec670c00f6a 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -73,7 +73,8 @@ libvulkan_la_SOURCES = \ BUILT_SOURCES = \ entrypoints.h \ entrypoints.c \ - meta-spirv.h + meta-spirv.h \ + vk-spirv.h entrypoints.h : vk_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index 8d112bba54a..ea21ece78a8 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -13,6 +13,8 @@ #include #include +#include "vk-spirv.h" + #define for_each_bit(b, dword) \ for (uint32_t __dword = (dword); \ (b) = __builtin_ffs(__dword) - 1, __dword; \ @@ -100,7 +102,7 @@ create_pipeline(VkDevice device, VkPipeline *pipeline, .primitiveRestartIndex = 0 }; - static const char vs_source[] = GLSL( + VkShader vs = GLSL_VK_SHADER(device, VERTEX, layout(location = 0) in vec4 a_position; layout(location = 1) in vec4 a_color; layout(set = 0, binding = 0) uniform block1 { @@ -117,36 +119,18 @@ create_pipeline(VkDevice device, VkPipeline *pipeline, { gl_Position = a_position; v_color = a_color + u1.color + u2.color + u3.color; - }); + } + ); - static const char fs_source[] = GLSL( + VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, out vec4 f_color; in vec4 v_color; layout(set = 0, binding = 0) uniform sampler2D tex; void main() { - f_color = v_color + texture2D(tex, vec2(0.1, 0.1)); - }); - - VkShader vs; - vkCreateShader(device, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .codeSize = sizeof(vs_source), - .pCode = vs_source, - .flags = 0 - }, - &vs); - - VkShader fs; - vkCreateShader(device, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .codeSize = sizeof(fs_source), - .pCode = fs_source, - .flags = 0 - }, - &fs); + f_color = v_color + texture(tex, vec2(0.1, 0.1)); + } + ); VkPipelineShaderStageCreateInfo vs_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, -- cgit v1.2.3 From 519fe765e21e43b650ebf2acc9e28725e528a0de Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 15:55:27 -0700 Subject: vk: Do relocations in surface states when they are created Previously, we waited until later and did a pass through the used surfaces and did the relocations then. This lead to doing double-relocations which was causing us to get bogus surface offsets. --- src/vulkan/device.c | 38 ++++++++++++-------------------------- src/vulkan/meta.c | 5 ----- src/vulkan/private.h | 4 ---- 3 files changed, 12 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 745e5dd7090..3381729680a 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2466,9 +2466,13 @@ void anv_CmdBindDescriptorSets( anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); memcpy(state.map, view->surface_state.map, 64); + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(&cmd_buffer->surface_relocs, + state.offset + 8 * 4, + view->bo, view->offset); + bindings->descriptors[s].surfaces[start + b] = state.offset; - bindings->descriptors[s].relocs[start + b].bo = view->bo; - bindings->descriptors[s].relocs[start + b].offset = view->offset; } start = layout->set[firstSet + i].sampler_start[s]; @@ -2553,34 +2557,12 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) if (layers + surface_count > 0) { struct anv_state state; - uint32_t offset; - uint32_t *address; uint32_t size; size = (bias + surface_count) * sizeof(uint32_t); state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); memcpy(state.map, bindings->descriptors[s].surfaces, size); - for (uint32_t i = 0; i < layers; i++) { - offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t); - address = cmd_buffer->surface_bo.map + offset; - - *address = - anv_reloc_list_add(&cmd_buffer->surface_relocs, offset, - bindings->descriptors[s].relocs[i].bo, - bindings->descriptors[s].relocs[i].offset); - } - - for (uint32_t i = 0; i < surface_count; i++) { - offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t); - address = cmd_buffer->surface_bo.map + offset; - - *address = - anv_reloc_list_add(&cmd_buffer->surface_relocs, offset, - bindings->descriptors[s].relocs[bias + i].bo, - bindings->descriptors[s].relocs[bias + i].offset); - } - static const uint32_t binding_table_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 38, [VK_SHADER_STAGE_TESS_CONTROL] = 39, @@ -3172,9 +3154,13 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); memcpy(state.map, view->surface_state.map, 64); + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(&cmd_buffer->surface_relocs, + state.offset + 8 * 4, + view->bo, view->offset); + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = state.offset; - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo; - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset; } cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index cd4fbaf87f3..c4d245614ce 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -185,11 +185,6 @@ anv_cmd_buffer_copy_render_targets(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < fb->color_attachment_count; i++) { bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i]; - - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = - old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo; - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = - old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset; } cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f15856387d8..e1d306f780e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -530,10 +530,6 @@ struct anv_bindings { struct { uint32_t surfaces[256]; - struct { - struct anv_bo *bo; - uint32_t offset; - } relocs[256]; struct { uint32_t dwords[4]; } samplers[16]; } descriptors[VK_NUM_SHADER_STAGE]; }; -- cgit v1.2.3 From 0a54751910f1e8f7e65fcc6dbb0b32eee7a7120e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 16:33:04 -0700 Subject: vk/device: Memset descriptor sets to 0 and handle descriptor set holes --- src/vulkan/device.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3381729680a..d464d66982e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1832,6 +1832,11 @@ VkResult anv_AllocDescriptorSets( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + /* Descriptor sets may not be 100% filled out so we need to memset to + * ensure that we can properly detect and handle holes. + */ + memset(set, 0, size); + pDescriptorSets[i] = (VkDescriptorSet) set; } @@ -2461,6 +2466,8 @@ void anv_CmdBindDescriptorSets( start = bias + layout->set[firstSet + i].surface_start[s]; for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; + if (!view) + continue; struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); @@ -2478,6 +2485,8 @@ void anv_CmdBindDescriptorSets( start = layout->set[firstSet + i].sampler_start[s]; for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) { struct anv_sampler *sampler = set->descriptors[sampler_to_desc[b]].sampler; + if (!sampler) + continue; memcpy(&bindings->descriptors[s].samplers[start + b], sampler->state, sizeof(sampler->state)); -- cgit v1.2.3 From 0f0b5aecb87a30ddb16804f2f5cfc51eba386e8e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 May 2015 16:49:55 -0700 Subject: vk/pipeline: Track VB's that are actually used by the pipeline Previously, we just blasted out whatever VB's we had marked as "dirty" regardless of which ones were used by the pipeline. Given that the stride of the VB is embedded in the pipeline this can cause problems. One problem is if the pipeline doesn't use the given VB binding we emit a bogus stride. Another problem is that we weren't properly resetting the dirty bits when the pipeline changed. --- src/vulkan/device.c | 18 +++++++++++------- src/vulkan/pipeline.c | 6 ++++-- src/vulkan/private.h | 1 + 3 files changed, 16 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d464d66982e..a1c8846f214 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2384,8 +2384,10 @@ void anv_CmdBindPipeline( VkPipeline _pipeline) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline; - cmd_buffer->pipeline = (struct anv_pipeline *) _pipeline; + cmd_buffer->pipeline = pipeline; + cmd_buffer->vb_dirty |= pipeline->vb_used; cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; } @@ -2617,18 +2619,20 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->pipeline; struct anv_bindings *bindings = cmd_buffer->bindings; - const uint32_t num_buffers = __builtin_popcount(cmd_buffer->vb_dirty); - const uint32_t num_dwords = 1 + num_buffers * 4; uint32_t *p; - if (cmd_buffer->vb_dirty) { + uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used; + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + if (vb_emit) { p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, GEN8_3DSTATE_VERTEX_BUFFERS); uint32_t vb, i = 0; - for_each_bit(vb, cmd_buffer->vb_dirty) { + for_each_bit(vb, vb_emit) { struct anv_buffer *buffer = bindings->vb[vb].buffer; uint32_t offset = bindings->vb[vb].offset; - + struct GEN8_VERTEX_BUFFER_STATE state = { .VertexBufferIndex = vb, .MemoryObjectControlState = 0, @@ -2653,7 +2657,7 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, cmd_buffer->rs_state->state_sf, pipeline->state_sf); - cmd_buffer->vb_dirty = 0; + cmd_buffer->vb_dirty &= ~vb_emit; cmd_buffer->dirty = 0; } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 23b94130334..52c96a248cc 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -62,11 +62,13 @@ emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo const uint32_t num_dwords = 1 + info->attributeCount * 2; uint32_t *p; bool instancing_enable[32]; - + + pipeline->vb_used = 0; for (uint32_t i = 0; i < info->bindingCount; i++) { const VkVertexInputBindingDescription *desc = &info->pVertexBindingDescriptions[i]; - + + pipeline->vb_used |= 1 << desc->binding; pipeline->binding_stride[desc->binding] = desc->strideInBytes; /* Step rate is programmed per vertex element (attribute), not diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e1d306f780e..da568e87017 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -610,6 +610,7 @@ struct anv_pipeline { uint32_t gs_vec4; uint32_t gs_vertex_count; + uint32_t vb_used; uint32_t binding_stride[MAX_VBS]; uint32_t state_sf[GEN8_3DSTATE_SF_length]; -- cgit v1.2.3 From 57153da2d5fe95e0eb731d827ba940028a80cf1c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 May 2015 15:15:08 -0700 Subject: vk: Actually implement some sort of destructor for all object types --- src/vulkan/device.c | 245 +++++++++++++++++++++++++++++--------------------- src/vulkan/pipeline.c | 24 ++--- src/vulkan/private.h | 25 ++++-- 3 files changed, 179 insertions(+), 115 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a1c8846f214..db90093eb85 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -930,117 +930,72 @@ VkResult anv_OpenPeerImage( return VK_UNSUPPORTED; } -static VkResult -anv_instance_destructor(struct anv_device * device, - VkObject object) -{ - return vkDestroyInstance(object); -} - -static VkResult -anv_noop_destructor(struct anv_device * device, - VkObject object) -{ - return VK_SUCCESS; -} - -static VkResult -anv_device_destructor(struct anv_device * device, - VkObject object) -{ - return vkDestroyDevice(object); -} - -static VkResult -anv_cmd_buffer_destructor(struct anv_device * device, - VkObject object) -{ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; - - anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); - anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); - anv_batch_finish(&cmd_buffer->batch, device); - anv_device_free(device, cmd_buffer->exec2_objects); - anv_device_free(device, cmd_buffer->exec2_bos); - anv_device_free(device, cmd_buffer); - - return VK_SUCCESS; -} - -static VkResult -anv_pipeline_destructor(struct anv_device * device, - VkObject object) -{ - struct anv_pipeline *pipeline = (struct anv_pipeline *) object; - - return anv_pipeline_destroy(pipeline); -} - -static VkResult -anv_free_destructor(struct anv_device * device, - VkObject object) +VkResult anv_DestroyObject( + VkDevice _device, + VkObjectType objType, + VkObject _object) { - anv_device_free(device, (void *) object); + struct anv_device *device = (struct anv_device *) _device; + struct anv_object *object = (struct anv_object *) _object; - return VK_SUCCESS; -} + switch (objType) { + case VK_OBJECT_TYPE_INSTANCE: + return anv_DestroyInstance((VkInstance) _object); -static VkResult -anv_fence_destructor(struct anv_device * device, - VkObject object) -{ - struct anv_fence *fence = (struct anv_fence *) object; + case VK_OBJECT_TYPE_PHYSICAL_DEVICE: + /* We don't want to actually destroy physical devices */ + return VK_SUCCESS; - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_device_free(device, fence); + case VK_OBJECT_TYPE_DEVICE: + assert(_device == (VkDevice) _object); + return anv_DestroyDevice((VkDevice) _object); - return VK_SUCCESS; -} + case VK_OBJECT_TYPE_QUEUE: + /* TODO */ + return VK_SUCCESS; -static VkResult -anv_query_pool_destructor(struct anv_device * device, - VkObject object) -{ - struct anv_query_pool *pool = (struct anv_query_pool *) object; + case VK_OBJECT_TYPE_DEVICE_MEMORY: + return anv_FreeMemory(_device, (VkDeviceMemory) _object); - anv_gem_munmap(pool->bo.map, pool->bo.size); - anv_gem_close(device, pool->bo.gem_handle); - anv_device_free(device, pool); + case VK_OBJECT_TYPE_DESCRIPTOR_POOL: + /* These are just dummys anyway, so we don't need to destroy them */ + return VK_SUCCESS; - return VK_SUCCESS; -} + case VK_OBJECT_TYPE_BUFFER: + case VK_OBJECT_TYPE_BUFFER_VIEW: + case VK_OBJECT_TYPE_IMAGE: + case VK_OBJECT_TYPE_IMAGE_VIEW: + case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW: + case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: + case VK_OBJECT_TYPE_SHADER: + case VK_OBJECT_TYPE_PIPELINE_LAYOUT: + case VK_OBJECT_TYPE_SAMPLER: + case VK_OBJECT_TYPE_DESCRIPTOR_SET: + case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: + case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: + case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: + case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: + case VK_OBJECT_TYPE_RENDER_PASS: + /* These are trivially destroyable */ + anv_device_free(device, (void *) _object); + return VK_SUCCESS; -static VkResult (*anv_object_destructors[])(struct anv_device *device, - VkObject object) = { - [VK_OBJECT_TYPE_INSTANCE] = anv_instance_destructor, - [VK_OBJECT_TYPE_PHYSICAL_DEVICE] = anv_noop_destructor, - [VK_OBJECT_TYPE_DEVICE] = anv_device_destructor, - [VK_OBJECT_TYPE_QUEUE] = anv_noop_destructor, - [VK_OBJECT_TYPE_COMMAND_BUFFER] = anv_cmd_buffer_destructor, - [VK_OBJECT_TYPE_PIPELINE] = anv_pipeline_destructor, - [VK_OBJECT_TYPE_SHADER] = anv_free_destructor, - [VK_OBJECT_TYPE_BUFFER] = anv_free_destructor, - [VK_OBJECT_TYPE_IMAGE] = anv_free_destructor, - [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor, - [VK_OBJECT_TYPE_FENCE] = anv_fence_destructor, - [VK_OBJECT_TYPE_QUERY_POOL] = anv_query_pool_destructor -}; + case VK_OBJECT_TYPE_COMMAND_BUFFER: + case VK_OBJECT_TYPE_PIPELINE: + case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: + case VK_OBJECT_TYPE_FENCE: + case VK_OBJECT_TYPE_QUERY_POOL: + case VK_OBJECT_TYPE_FRAMEBUFFER: + (object->destructor)(device, object, objType); + return VK_SUCCESS; -VkResult anv_DestroyObject( - VkDevice _device, - VkObjectType objType, - VkObject object) -{ - struct anv_device *device = (struct anv_device *) _device; + case VK_OBJECT_TYPE_SEMAPHORE: + case VK_OBJECT_TYPE_EVENT: + stub_return(VK_UNSUPPORTED); - assert(objType < ARRAY_SIZE(anv_object_destructors) && - anv_object_destructors[objType] != NULL); - - return anv_object_destructors[objType](device, object); + default: + unreachable("Invalid object type"); + } } static void @@ -1180,6 +1135,20 @@ VkResult anv_QueueBindImageMemoryRange( stub_return(VK_UNSUPPORTED); } +static void +anv_fence_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_fence *fence = (struct anv_fence *) object; + + assert(obj_type == VK_OBJECT_TYPE_FENCE); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_device_free(device, fence); +} + VkResult anv_CreateFence( VkDevice _device, const VkFenceCreateInfo* pCreateInfo, @@ -1203,6 +1172,8 @@ VkResult anv_CreateFence( if (result != VK_SUCCESS) goto fail; + fence->base.destructor = anv_fence_destroy; + fence->bo.map = anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); batch.next = fence->bo.map; @@ -1358,6 +1329,20 @@ VkResult anv_ResetEvent( // Query functions +static void +anv_query_pool_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_query_pool *pool = (struct anv_query_pool *) object; + + assert(obj_type == VK_OBJECT_TYPE_QUERY_POOL); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); +} + VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -1384,6 +1369,8 @@ VkResult anv_CreateQueryPool( if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + pool->base.destructor = anv_query_pool_destroy; + pool->type = pCreateInfo->queryType; size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); result = anv_bo_init_new(&pool->bo, device, size); @@ -1936,6 +1923,22 @@ clamp_int64(int64_t x, int64_t min, int64_t max) return max; } +static void +anv_dynamic_vp_state_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_dynamic_vp_state *state = (void *)object; + + assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE); + + anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp); + anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp); + anv_state_pool_free(&device->dynamic_state_pool, state->scissor); + + anv_device_free(device, state); +} + VkResult anv_CreateDynamicViewportState( VkDevice _device, const VkDynamicVpStateCreateInfo* pCreateInfo, @@ -1951,6 +1954,8 @@ VkResult anv_CreateDynamicViewportState( if (state == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + state->base.destructor = anv_dynamic_vp_state_destroy; + unsigned count = pCreateInfo->viewportAndScissorCount; state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, count * 64, 64); @@ -2087,6 +2092,26 @@ VkResult anv_CreateDynamicDepthStencilState( // Command buffer functions +static void +anv_cmd_buffer_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; + + assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER); + + anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); + anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); + anv_batch_finish(&cmd_buffer->batch, device); + anv_device_free(device, cmd_buffer->exec2_objects); + anv_device_free(device, cmd_buffer->exec2_bos); + anv_device_free(device, cmd_buffer); +} + VkResult anv_CreateCommandBuffer( VkDevice _device, const VkCmdBufferCreateInfo* pCreateInfo, @@ -2101,6 +2126,8 @@ VkResult anv_CreateCommandBuffer( if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + cmd_buffer->base.destructor = anv_cmd_buffer_destroy; + cmd_buffer->device = device; cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; @@ -3062,6 +3089,22 @@ void anv_CmdSaveAtomicCounters( stub(); } +static void +anv_framebuffer_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_framebuffer *fb = (struct anv_framebuffer *)object; + + assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER); + + anv_DestroyObject((VkDevice) device, + VK_OBJECT_TYPE_DYNAMIC_VP_STATE, + fb->vp_state); + + anv_device_free(device, fb); +} + VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, @@ -3077,6 +3120,8 @@ VkResult anv_CreateFramebuffer( if (framebuffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + framebuffer->base.destructor = anv_framebuffer_destroy; + framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount; for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { framebuffer->color_attachments[i] = diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 52c96a248cc..a9594a71c95 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -224,6 +224,19 @@ VkResult anv_CreateGraphicsPipeline( return anv_pipeline_create(device, pCreateInfo, NULL, pPipeline); } +static void +anv_pipeline_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_pipeline *pipeline = (struct anv_pipeline*) object; + + assert(obj_type == VK_OBJECT_TYPE_PIPELINE); + + anv_compiler_free(pipeline); + anv_batch_finish(&pipeline->batch, pipeline->device); + anv_device_free(pipeline->device, pipeline); +} VkResult anv_pipeline_create( @@ -249,6 +262,7 @@ anv_pipeline_create( if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + pipeline->base.destructor = anv_pipeline_destroy; pipeline->device = device; pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); @@ -501,16 +515,6 @@ anv_pipeline_create( return result; } -VkResult -anv_pipeline_destroy(struct anv_pipeline *pipeline) -{ - anv_compiler_free(pipeline); - anv_batch_finish(&pipeline->batch, pipeline->device); - anv_device_free(pipeline->device, pipeline); - - return VK_SUCCESS; -} - VkResult anv_CreateGraphicsPipelineDerivative( VkDevice device, const VkGraphicsPipelineCreateInfo* pCreateInfo, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index da568e87017..b957151cdbc 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -251,6 +251,17 @@ void anv_state_stream_finish(struct anv_state_stream *stream); struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t size, uint32_t alignment); +struct anv_object; +struct anv_device; + +typedef void (*anv_object_destructor_cb)(struct anv_device *, + struct anv_object *, + VkObjectType); + +struct anv_object { + anv_object_destructor_cb destructor; +}; + struct anv_physical_device { struct anv_instance * instance; uint32_t chipset_id; @@ -437,12 +448,13 @@ __gen_combine_address(struct anv_batch *batch, void *location, } while (0) struct anv_device_memory { - struct anv_bo bo; - VkDeviceSize map_size; - void *map; + struct anv_bo bo; + VkDeviceSize map_size; + void * map; }; struct anv_dynamic_vp_state { + struct anv_object base; struct anv_state sf_clip_vp; struct anv_state cc_vp; struct anv_state scissor; @@ -463,6 +475,7 @@ struct anv_query_pool_slot { }; struct anv_query_pool { + struct anv_object base; VkQueryType type; uint32_t slots; struct anv_bo bo; @@ -535,6 +548,7 @@ struct anv_bindings { }; struct anv_cmd_buffer { + struct anv_object base; struct anv_device * device; struct drm_i915_gem_execbuffer2 execbuf; @@ -567,6 +581,7 @@ void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); void anv_aub_writer_destroy(struct anv_aub_writer *writer); struct anv_fence { + struct anv_object base; struct anv_bo bo; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; @@ -579,6 +594,7 @@ struct anv_shader { }; struct anv_pipeline { + struct anv_object base; struct anv_device * device; struct anv_batch batch; struct anv_shader * shaders[VK_NUM_SHADER_STAGE]; @@ -631,8 +647,6 @@ anv_pipeline_create(VkDevice device, const struct anv_pipeline_create_info *extra, VkPipeline *pPipeline); -VkResult anv_pipeline_destroy(struct anv_pipeline *pipeline); - struct anv_compiler *anv_compiler_create(int fd); void anv_compiler_destroy(struct anv_compiler *compiler); int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); @@ -698,6 +712,7 @@ struct anv_depth_stencil_view { }; struct anv_framebuffer { + struct anv_object base; uint32_t color_attachment_count; struct anv_surface_view * color_attachments[MAX_RTS]; struct anv_depth_stencil_view * depth_stencil; -- cgit v1.2.3 From 903bd4b0569dd976b127aafadcc3de8748f4a985 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 12:42:59 -0700 Subject: vk/compiler: Fix up the binding hack and make it work in NIR --- src/glsl/link_uniform_blocks.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 8 ++++++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- src/mesa/main/mtypes.h | 3 +-- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index ca87bf0df71..c0d73076aa8 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -294,7 +294,7 @@ link_uniform_blocks(void *mem_ctx, (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); blocks[i].Set = b->set; - blocks[i].Index = b->index; + blocks[i].Binding = b->binding; i++; } @@ -315,7 +315,7 @@ link_uniform_blocks(void *mem_ctx, (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); blocks[i].Set = b->set; - blocks[i].Index = b->index; + blocks[i].Binding = b->binding; i++; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9cfd0e792a2..82dbca3cf43 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1399,9 +1399,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg surf_index; if (const_index) { - surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + - const_index->u[0]); + uint32_t index = const_index->u[0]; + uint32_t set = shader->base.UniformBlocks[index].Set; + uint32_t binding = shader->base.UniformBlocks[index].Binding; + + surf_index = fs_reg(stage_prog_data->bind_map[set][binding]); } else { + assert(0 && "need more info from the ir for this."); /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value * from any live channel. diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index d6bb1178f7c..07853c0d0d6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1206,7 +1206,7 @@ fs_visitor::visit(ir_expression *ir) */ index = const_uniform_block->value.u[0]; set = shader->base.UniformBlocks[index].Set; - set_index = shader->base.UniformBlocks[index].Index; + set_index = shader->base.UniformBlocks[index].Binding; binding = stage_prog_data->bind_map[set][set_index]; surf_index = fs_reg(binding); } else { diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 895103470ee..bd84113ea91 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2571,10 +2571,9 @@ struct gl_uniform_block GLuint Binding; /** - * Vulkan descriptor set and index qualifiers for this block. + * Vulkan descriptor set qualifier for this block. */ GLuint Set; - GLuint Index; /** * Minimum size (in bytes) of a buffer object to back this uniform buffer -- cgit v1.2.3 From 1b211feb6c4dad7149cc4fabddf9b4cf15289e83 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 16:59:24 -0700 Subject: vk/compiler: Zero out the vs_prog_data struct when VS is disabled Prevents uninitialized value errors --- src/vulkan/compiler.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 1907bd52cc8..2bd197e352b 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -937,6 +937,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;; } else { + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); pipeline->vs_simd8 = NO_KERNEL; } -- cgit v1.2.3 From 912944e59d60659d48569b5bccdf01d84593feac Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 17:03:57 -0700 Subject: vk/device: Use the correct number of viewports when creating default VP state Fixes valgrind uninitialized value errors --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index db90093eb85..066d63598a6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3141,7 +3141,7 @@ VkResult anv_CreateFramebuffer( vkCreateDynamicViewportState((VkDevice) device, &(VkDynamicVpStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, - .viewportAndScissorCount = 2, + .viewportAndScissorCount = 1, .pViewports = (VkViewport[]) { { .originX = 0, -- cgit v1.2.3 From ae8c93e023941c1b7622893b4f609c47585ca776 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 17:08:11 -0700 Subject: vk/cmd_buffer: Initialize the pipeline pointer to NULL If a meta operation is called before the pipeline is set, this can cause uses of undefined values. They *should* be harmless, but we might as well shut up valgrind on this one too. --- src/vulkan/device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 066d63598a6..5d37ce8b3be 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2178,6 +2178,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->dirty = 0; cmd_buffer->vb_dirty = 0; + cmd_buffer->pipeline = NULL; *pCmdBuffer = (VkCmdBuffer) cmd_buffer; -- cgit v1.2.3 From c03314bdd3ee1eebc0b571d9da39dc46582243d6 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 21 May 2015 14:34:55 -0700 Subject: vk: Update to header files with nested struct support This will let us do MOCS settings right. --- src/vulkan/gen75_pack.h | 679 ++++++++++++++++++++------------------ src/vulkan/gen7_pack.h | 588 +++++++++++++++++---------------- src/vulkan/gen8_pack.h | 852 ++++++++++++++++++++++++++---------------------- 3 files changed, 1143 insertions(+), 976 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index ed4ab2e52c6..4978d5977b7 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -248,6 +248,24 @@ GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 1, \ .DwordLength = 8 +struct GEN75_MEMORY_OBJECT_CONTROL_STATE { + uint32_t LLCeLLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->LLCeLLCCacheabilityControlLLCCC, 1, 2) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + struct GEN75_STATE_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -255,20 +273,20 @@ struct GEN75_STATE_BASE_ADDRESS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type GeneralStateBaseAddress; - uint32_t GeneralStateMemoryObjectControlState; - uint32_t StatelessDataPortAccessMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; uint32_t GeneralStateBaseAddressModifyEnable; __gen_address_type SurfaceStateBaseAddress; - uint32_t SurfaceStateMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; uint32_t SurfaceStateBaseAddressModifyEnable; __gen_address_type DynamicStateBaseAddress; - uint32_t DynamicStateMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; uint32_t DynamicStateBaseAddressModifyEnable; __gen_address_type IndirectObjectBaseAddress; - uint32_t IndirectObjectMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; uint32_t IndirectObjectBaseAddressModifyEnable; __gen_address_type InstructionBaseAddress; - uint32_t InstructionMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; uint32_t InstructionBaseAddressModifyEnable; __gen_address_type GeneralStateAccessUpperBound; uint32_t GeneralStateAccessUpperBoundModifyEnable; @@ -294,41 +312,59 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); uint32_t dw1 = - /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct GeneralStateMemoryObjectControlState (8..11): */ + __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | + /* Struct StatelessDataPortAccessMemoryObjectControlState (4..7): */ + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | 0; dw[1] = __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); uint32_t dw2 = - /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SurfaceStateMemoryObjectControlState (8..11): */ + __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | 0; dw[2] = __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); uint32_t dw3 = - /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct DynamicStateMemoryObjectControlState (8..11): */ + __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | 0; dw[3] = __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); uint32_t dw4 = - /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct IndirectObjectMemoryObjectControlState (8..11): */ + __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | 0; dw[4] = __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + uint32_t dw_InstructionMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); uint32_t dw5 = - /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct InstructionMemoryObjectControlState (8..11): */ + __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | 0; @@ -461,7 +497,7 @@ struct GEN75_SWTESS_BASE_ADDRESS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type SWTessellationBaseAddress; - uint32_t SWTessellationMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; }; static inline void @@ -478,8 +514,11 @@ GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); uint32_t dw1 = - /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SWTessellationMemoryObjectControlState (8..11): */ + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; dw[1] = @@ -616,6 +655,24 @@ GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 70 +struct GEN75_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -1031,7 +1088,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { uint32_t DwordLength; __gen_address_type BindingTablePoolBaseAddress; uint32_t BindingTablePoolEnable; - uint32_t SurfaceObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; __gen_address_type BindingTablePoolUpperBound; }; @@ -1049,9 +1106,12 @@ GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restri __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SurfaceObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); uint32_t dw1 = __gen_field(values->BindingTablePoolEnable, 11, 11) | - /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SurfaceObjectControlState (7..10): */ + __gen_field(dw_SurfaceObjectControlState, 7, 10) | 0; dw[1] = @@ -1343,13 +1403,71 @@ GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 5 +struct GEN75_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw_ConstantBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + uint32_t dw2 = + /* Struct ConstantBufferObjectControlState (0..4): */ + __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + struct GEN75_3DSTATE_CONSTANT_DS { uint32_t CommandType; uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1366,10 +1484,7 @@ GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 @@ -1387,7 +1502,7 @@ struct GEN75_3DSTATE_CONSTANT_GS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1404,10 +1519,7 @@ GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 @@ -1425,7 +1537,7 @@ struct GEN75_3DSTATE_CONSTANT_HS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1442,10 +1554,7 @@ GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 @@ -1463,7 +1572,7 @@ struct GEN75_3DSTATE_CONSTANT_PS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1480,10 +1589,7 @@ GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 @@ -1501,7 +1607,7 @@ struct GEN75_3DSTATE_CONSTANT_VS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1518,10 +1624,7 @@ GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 @@ -1560,7 +1663,7 @@ struct GEN75_3DSTATE_DEPTH_BUFFER { #define SURFTYPE_CUBEmustbezero 0 uint32_t Depth; uint32_t MinimumArrayElement; - uint32_t DepthBufferObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; uint32_t DepthCoordinateOffsetY; uint32_t DepthCoordinateOffsetX; uint32_t RenderTargetViewExtent; @@ -1601,10 +1704,13 @@ GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->LOD, 0, 3) | 0; + uint32_t dw_DepthBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); dw[4] = __gen_field(values->Depth, 21, 31) | __gen_field(values->MinimumArrayElement, 10, 20) | - /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct DepthBufferObjectControlState (0..3): */ + __gen_field(dw_DepthBufferObjectControlState, 0, 3) | 0; dw[5] = @@ -1823,6 +1929,26 @@ GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 55 +struct GEN75_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + struct GEN75_3DSTATE_GATHER_CONSTANT_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -2062,7 +2188,7 @@ struct GEN75_3DSTATE_GATHER_POOL_ALLOC { uint32_t DwordLength; __gen_address_type GatherPoolBaseAddress; uint32_t GatherPoolEnable; - uint32_t MemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; __gen_address_type GatherPoolUpperBound; }; @@ -2080,9 +2206,12 @@ GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_MemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); uint32_t dw1 = __gen_field(values->GatherPoolEnable, 11, 11) | - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct MemoryObjectControlState (0..3): */ + __gen_field(dw_MemoryObjectControlState, 0, 3) | 0; dw[1] = @@ -2246,7 +2375,7 @@ struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t HierarchicalDepthBufferObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; }; @@ -2265,8 +2394,11 @@ GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); dw[1] = - /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct HierarchicalDepthBufferObjectControlState (25..28): */ + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -2396,7 +2528,7 @@ struct GEN75_3DSTATE_INDEX_BUFFER { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t MemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; #define INDEX_BYTE 0 #define INDEX_WORD 1 #define INDEX_DWORD 2 @@ -2412,12 +2544,15 @@ GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_MemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct MemoryObjectControlState (12..15): */ + __gen_field(dw_MemoryObjectControlState, 12, 15) | __gen_field(values->IndexFormat, 8, 9) | __gen_field(values->DwordLength, 0, 7) | 0; @@ -3151,6 +3286,28 @@ GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 2 +struct GEN75_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 { uint32_t CommandType; uint32_t CommandSubType; @@ -3768,7 +3925,7 @@ struct GEN75_3DSTATE_SO_BUFFER { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t SOBufferIndex; - uint32_t SOBufferObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; __gen_address_type SurfaceEndAddress; @@ -3788,9 +3945,12 @@ GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SOBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); dw[1] = __gen_field(values->SOBufferIndex, 29, 30) | - /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SOBufferObjectControlState (25..28): */ + __gen_field(dw_SOBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 11) | 0; @@ -3815,6 +3975,63 @@ GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 23 +struct GEN75_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN75_SO_DECL_ENTRY { + struct GEN75_SO_DECL Stream3Decl; + struct GEN75_SO_DECL Stream2Decl; + struct GEN75_SO_DECL Stream1Decl; + struct GEN75_SO_DECL Stream0Decl; +}; + +static inline void +GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN75_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN75_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN75_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN75_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + dw[0] = + /* Struct Stream3Decl (48..63): */ + __gen_field(dw_Stream3Decl, 48, 63) | + /* Struct Stream2Decl (32..47): */ + __gen_field(dw_Stream2Decl, 32, 47) | + /* Struct Stream1Decl (16..31): */ + __gen_field(dw_Stream1Decl, 16, 31) | + /* Struct Stream0Decl (0..15): */ + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + GEN75_SO_DECL_pack(data, &dw[0], &values->Stream0Decl); +} + struct GEN75_3DSTATE_SO_DECL_LIST { uint32_t CommandType; uint32_t CommandSubType; @@ -3879,7 +4096,7 @@ struct GEN75_3DSTATE_STENCIL_BUFFER { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t StencilBufferEnable; - uint32_t StencilBufferObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; }; @@ -3898,9 +4115,12 @@ GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_StencilBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); dw[1] = __gen_field(values->StencilBufferEnable, 31, 31) | - /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct StencilBufferObjectControlState (25..28): */ + __gen_field(dw_StencilBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -4188,6 +4408,58 @@ GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 8 +struct GEN75_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_VertexBufferMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + /* Struct VertexBufferMemoryObjectControlState (16..19): */ + __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + struct GEN75_3DSTATE_VERTEX_BUFFERS { uint32_t CommandType; uint32_t CommandSubType; @@ -4221,6 +4493,41 @@ GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 9 +struct GEN75_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + struct GEN75_3DSTATE_VERTEX_ELEMENTS { uint32_t CommandType; uint32_t CommandSubType; @@ -6642,227 +6949,6 @@ GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -struct GEN75_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - uint32_t ConstantBufferObjectControlState; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw2 = - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); - - uint32_t dw4 = - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); - - uint32_t dw5 = - 0; - - dw[5] = - __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); - -} - -struct GEN75_BINDING_TABLE_EDIT_ENTRY { - uint32_t BindingTableIndex; - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BindingTableIndex, 16, 23) | - __gen_offset(values->SurfaceStatePointer, 0, 15) | - 0; - -} - -struct GEN75_GATHER_CONSTANT_ENTRY { - uint32_t ConstantBufferOffset; - uint32_t ChannelMask; - uint32_t BindingTableIndexOffset; -}; - -static inline void -GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->ConstantBufferOffset, 8, 15) | - __gen_field(values->ChannelMask, 4, 7) | - __gen_field(values->BindingTableIndexOffset, 0, 3) | - 0; - -} - -struct GEN75_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; -#define VERTEXDATA 0 -#define INSTANCEDATA 1 - uint32_t BufferAccessType; - uint32_t VertexBufferMemoryObjectControlState; - uint32_t AddressModifyEnable; - uint32_t NullVertexBuffer; - uint32_t VertexFetchInvalidate; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - __gen_address_type EndAddress; - uint32_t InstanceDataStepRate; -}; - -static inline void -GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->BufferAccessType, 20, 20) | - /* Struct VertexBufferMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->VertexFetchInvalidate, 12, 12) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->EndAddress, dw2); - - dw[3] = - __gen_field(values->InstanceDataStepRate, 0, 31) | - 0; - -} - -struct GEN75_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - uint32_t Valid; - uint32_t SourceElementFormat; - uint32_t EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN75_SO_DECL_ENTRY { - uint32_t Stream3Decl; - uint32_t Stream2Decl; - uint32_t Stream1Decl; - uint32_t Stream0Decl; -}; - -static inline void -GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - /* Struct Stream3Decl: found SO_DECL */ - /* Struct Stream2Decl: found SO_DECL */ - /* Struct Stream1Decl: found SO_DECL */ - /* Struct Stream0Decl: found SO_DECL */ - 0; - -} - -struct GEN75_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - struct GEN75_SCISSOR_RECT { uint32_t ScissorRectangleYMin; uint32_t ScissorRectangleXMin; @@ -7251,22 +7337,6 @@ GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, } -struct GEN75_MEMORY_OBJECT_CONTROL_STATE { - uint32_t L3CacheabilityControlL3CC; -}; - -static inline void -GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | - 0; - -} - struct GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { #define Highestpriority 0 #define Secondhighestpriority 1 @@ -7379,28 +7449,6 @@ GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } -struct GEN75_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - struct GEN75_RENDER_SURFACE_STATE { #define SURFTYPE_1D 0 #define SURFTYPE_2D 1 @@ -7454,7 +7502,7 @@ struct GEN75_RENDER_SURFACE_STATE { uint32_t MinimumArrayElement0; uint32_t XOffset; uint32_t YOffset; - uint32_t SurfaceObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; uint32_t SurfaceMinLOD; uint32_t MIPCountLOD; __gen_address_type MCSBaseAddress; @@ -7527,10 +7575,13 @@ GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MinimumArrayElement, 0, 26) | 0; + uint32_t dw_SurfaceObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); dw[5] = __gen_offset(values->XOffset, 25, 31) | __gen_offset(values->YOffset, 20, 23) | - /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SurfaceObjectControlState (16..19): */ + __gen_field(dw_SurfaceObjectControlState, 16, 19) | __gen_field(values->SurfaceMinLOD, 4, 7) | __gen_field(values->MIPCountLOD, 0, 3) | 0; diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index ef56e97539d..d13d92e3998 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -206,6 +206,26 @@ GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 1, \ .DwordLength = 8 +struct GEN7_MEMORY_OBJECT_CONTROL_STATE { + uint32_t GraphicsDataTypeGFDT; + uint32_t LLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->GraphicsDataTypeGFDT, 2, 2) | + __gen_field(values->LLCCacheabilityControlLLCCC, 1, 1) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + struct GEN7_STATE_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -213,21 +233,21 @@ struct GEN7_STATE_BASE_ADDRESS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type GeneralStateBaseAddress; - uint32_t GeneralStateMemoryObjectControlState; - uint32_t StatelessDataPortAccessMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; uint32_t StatelessDataPortAccessForceWriteThru; uint32_t GeneralStateBaseAddressModifyEnable; __gen_address_type SurfaceStateBaseAddress; - uint32_t SurfaceStateMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; uint32_t SurfaceStateBaseAddressModifyEnable; __gen_address_type DynamicStateBaseAddress; - uint32_t DynamicStateMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; uint32_t DynamicStateBaseAddressModifyEnable; __gen_address_type IndirectObjectBaseAddress; - uint32_t IndirectObjectMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; uint32_t IndirectObjectBaseAddressModifyEnable; __gen_address_type InstructionBaseAddress; - uint32_t InstructionMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; uint32_t InstructionBaseAddressModifyEnable; __gen_address_type GeneralStateAccessUpperBound; uint32_t GeneralStateAccessUpperBoundModifyEnable; @@ -253,9 +273,15 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); uint32_t dw1 = - /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct GeneralStateMemoryObjectControlState (8..11): */ + __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | + /* Struct StatelessDataPortAccessMemoryObjectControlState (4..7): */ + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | __gen_field(values->StatelessDataPortAccessForceWriteThru, 3, 3) | __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | 0; @@ -263,32 +289,44 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); uint32_t dw2 = - /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SurfaceStateMemoryObjectControlState (8..11): */ + __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | 0; dw[2] = __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); uint32_t dw3 = - /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct DynamicStateMemoryObjectControlState (8..11): */ + __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | 0; dw[3] = __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); uint32_t dw4 = - /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct IndirectObjectMemoryObjectControlState (8..11): */ + __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | 0; dw[4] = __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + uint32_t dw_InstructionMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); uint32_t dw5 = - /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct InstructionMemoryObjectControlState (8..11): */ + __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | 0; @@ -421,7 +459,7 @@ struct GEN7_SWTESS_BASE_ADDRESS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type SWTessellationBaseAddress; - uint32_t SWTessellationMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; }; static inline void @@ -438,8 +476,11 @@ GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); uint32_t dw1 = - /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SWTessellationMemoryObjectControlState (8..11): */ + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; dw[1] = @@ -1035,13 +1076,71 @@ GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 5 +struct GEN7_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw_ConstantBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + uint32_t dw2 = + /* Struct ConstantBufferObjectControlState (0..4): */ + __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + struct GEN7_3DSTATE_CONSTANT_DS { uint32_t CommandType; uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1058,10 +1157,7 @@ GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 @@ -1079,7 +1175,7 @@ struct GEN7_3DSTATE_CONSTANT_GS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1096,10 +1192,7 @@ GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 @@ -1117,7 +1210,7 @@ struct GEN7_3DSTATE_CONSTANT_HS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1134,10 +1227,7 @@ GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 @@ -1155,7 +1245,7 @@ struct GEN7_3DSTATE_CONSTANT_PS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1172,10 +1262,7 @@ GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 @@ -1193,7 +1280,7 @@ struct GEN7_3DSTATE_CONSTANT_VS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1210,10 +1297,7 @@ GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 @@ -1252,7 +1336,7 @@ struct GEN7_3DSTATE_DEPTH_BUFFER { #define SURFTYPE_CUBEmustbezero 0 uint32_t Depth; uint32_t MinimumArrayElement; - uint32_t DepthBufferObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; uint32_t DepthCoordinateOffsetY; uint32_t DepthCoordinateOffsetX; uint32_t RenderTargetViewExtent; @@ -1293,10 +1377,13 @@ GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->LOD, 0, 3) | 0; + uint32_t dw_DepthBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); dw[4] = __gen_field(values->Depth, 21, 31) | __gen_field(values->MinimumArrayElement, 10, 20) | - /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct DepthBufferObjectControlState (0..3): */ + __gen_field(dw_DepthBufferObjectControlState, 0, 3) | 0; dw[5] = @@ -1643,7 +1730,7 @@ struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t HierarchicalDepthBufferObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; }; @@ -1662,8 +1749,11 @@ GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); dw[1] = - /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct HierarchicalDepthBufferObjectControlState (25..28): */ + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -1787,7 +1877,7 @@ struct GEN7_3DSTATE_INDEX_BUFFER { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t MemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; uint32_t CutIndexEnable; #define INDEX_BYTE 0 #define INDEX_WORD 1 @@ -1804,12 +1894,15 @@ GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_MemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct MemoryObjectControlState (12..15): */ + __gen_field(dw_MemoryObjectControlState, 12, 15) | __gen_field(values->CutIndexEnable, 10, 10) | __gen_field(values->IndexFormat, 8, 9) | __gen_field(values->DwordLength, 0, 7) | @@ -2425,6 +2518,28 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 2 +struct GEN7_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 { uint32_t CommandType; uint32_t CommandSubType; @@ -3040,7 +3155,7 @@ struct GEN7_3DSTATE_SO_BUFFER { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t SOBufferIndex; - uint32_t SOBufferObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; __gen_address_type SurfaceEndAddress; @@ -3060,9 +3175,12 @@ GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SOBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); dw[1] = __gen_field(values->SOBufferIndex, 29, 30) | - /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SOBufferObjectControlState (25..28): */ + __gen_field(dw_SOBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 11) | 0; @@ -3087,6 +3205,63 @@ GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 23 +struct GEN7_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN7_SO_DECL_ENTRY { + struct GEN7_SO_DECL Stream3Decl; + struct GEN7_SO_DECL Stream2Decl; + struct GEN7_SO_DECL Stream1Decl; + struct GEN7_SO_DECL Stream0Decl; +}; + +static inline void +GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN7_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN7_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN7_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN7_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + dw[0] = + /* Struct Stream3Decl (48..63): */ + __gen_field(dw_Stream3Decl, 48, 63) | + /* Struct Stream2Decl (32..47): */ + __gen_field(dw_Stream2Decl, 32, 47) | + /* Struct Stream1Decl (16..31): */ + __gen_field(dw_Stream1Decl, 16, 31) | + /* Struct Stream0Decl (0..15): */ + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + GEN7_SO_DECL_pack(data, &dw[0], &values->Stream0Decl); +} + struct GEN7_3DSTATE_SO_DECL_LIST { uint32_t CommandType; uint32_t CommandSubType; @@ -3150,7 +3325,7 @@ struct GEN7_3DSTATE_STENCIL_BUFFER { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t StencilBufferObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; }; @@ -3169,8 +3344,11 @@ GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_StencilBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); dw[1] = - /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct StencilBufferObjectControlState (25..28): */ + __gen_field(dw_StencilBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -3458,6 +3636,58 @@ GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 8 +struct GEN7_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_VertexBufferMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + /* Struct VertexBufferMemoryObjectControlState (16..19): */ + __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + struct GEN7_3DSTATE_VERTEX_BUFFERS { uint32_t CommandType; uint32_t CommandSubType; @@ -3491,6 +3721,41 @@ GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 9 +struct GEN7_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + struct GEN7_3DSTATE_VERTEX_ELEMENTS { uint32_t CommandType; uint32_t CommandSubType; @@ -5443,189 +5708,6 @@ GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -struct GEN7_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - uint32_t ConstantBufferObjectControlState; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw2 = - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); - - uint32_t dw4 = - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); - - uint32_t dw5 = - 0; - - dw[5] = - __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); - -} - -struct GEN7_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; -#define VERTEXDATA 0 -#define INSTANCEDATA 1 - uint32_t BufferAccessType; - uint32_t VertexBufferMemoryObjectControlState; - uint32_t AddressModifyEnable; - uint32_t NullVertexBuffer; - uint32_t VertexFetchInvalidate; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - __gen_address_type EndAddress; - uint32_t InstanceDataStepRate; -}; - -static inline void -GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->BufferAccessType, 20, 20) | - /* Struct VertexBufferMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->VertexFetchInvalidate, 12, 12) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->EndAddress, dw2); - - dw[3] = - __gen_field(values->InstanceDataStepRate, 0, 31) | - 0; - -} - -struct GEN7_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - uint32_t Valid; - uint32_t SourceElementFormat; - uint32_t EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN7_SO_DECL_ENTRY { - uint32_t Stream3Decl; - uint32_t Stream2Decl; - uint32_t Stream1Decl; - uint32_t Stream0Decl; -}; - -static inline void -GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - /* Struct Stream3Decl: found SO_DECL */ - /* Struct Stream2Decl: found SO_DECL */ - /* Struct Stream1Decl: found SO_DECL */ - /* Struct Stream0Decl: found SO_DECL */ - 0; - -} - -struct GEN7_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - struct GEN7_SCISSOR_RECT { uint32_t ScissorRectangleYMin; uint32_t ScissorRectangleXMin; @@ -6014,26 +6096,6 @@ GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, } -struct GEN7_MEMORY_OBJECT_CONTROL_STATE { - uint32_t GraphicsDataTypeGFDT; - uint32_t LLCCacheabilityControlLLCCC; - uint32_t L3CacheabilityControlL3CC; -}; - -static inline void -GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->GraphicsDataTypeGFDT, 2, 2) | - __gen_field(values->LLCCacheabilityControlLLCCC, 1, 1) | - __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | - 0; - -} - struct GEN7_INTERFACE_DESCRIPTOR_DATA { uint32_t KernelStartPointer; #define Multiple 0 @@ -6118,28 +6180,6 @@ GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } -struct GEN7_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - struct GEN7_SAMPLER_BORDER_COLOR_STATE { uint32_t BorderColorRedDX100GL; uint32_t BorderColorAlpha; diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 68dcf34c493..844b0ce6aed 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -591,6 +591,34 @@ GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 1, \ .DwordLength = 14 +struct GEN8_MEMORY_OBJECT_CONTROL_STATE { +#define UCwithFenceifcoherentcycle 0 +#define UCUncacheable 1 +#define WT 2 +#define WB 3 + uint32_t MemoryTypeLLCeLLCCacheabilityControl; +#define eLLCOnlywheneDRAMispresentelsegetsallocatedinLLC 0 +#define LLCOnly 1 +#define LLCeLLCAllowed 2 +#define L3DefertoPATforLLCeLLCselection 3 + uint32_t TargetCache; + uint32_t AgeforQUADLRU; +}; + +static inline void +GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->MemoryTypeLLCeLLCCacheabilityControl, 5, 6) | + __gen_field(values->TargetCache, 3, 4) | + __gen_field(values->AgeforQUADLRU, 0, 1) | + 0; + +} + struct GEN8_STATE_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -598,20 +626,20 @@ struct GEN8_STATE_BASE_ADDRESS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type GeneralStateBaseAddress; - uint32_t GeneralStateMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; uint32_t GeneralStateBaseAddressModifyEnable; - uint32_t StatelessDataPortAccessMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; __gen_address_type SurfaceStateBaseAddress; - uint32_t SurfaceStateMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; uint32_t SurfaceStateBaseAddressModifyEnable; __gen_address_type DynamicStateBaseAddress; - uint32_t DynamicStateMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; uint32_t DynamicStateBaseAddressModifyEnable; __gen_address_type IndirectObjectBaseAddress; - uint32_t IndirectObjectMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; uint32_t IndirectObjectBaseAddressModifyEnable; __gen_address_type InstructionBaseAddress; - uint32_t InstructionMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; uint32_t InstructionBaseAddressModifyEnable; uint32_t GeneralStateBufferSize; uint32_t GeneralStateBufferSizeModifyEnable; @@ -637,44 +665,62 @@ GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); uint32_t dw1 = - /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct GeneralStateMemoryObjectControlState (4..10): */ + __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | 0; dw[1] = __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); dw[3] = - /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct StatelessDataPortAccessMemoryObjectControlState (16..22): */ + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | 0; + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); uint32_t dw4 = - /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SurfaceStateMemoryObjectControlState (4..10): */ + __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | 0; dw[4] = __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); uint32_t dw6 = - /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct DynamicStateMemoryObjectControlState (4..10): */ + __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | 0; dw[6] = __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); uint32_t dw8 = - /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct IndirectObjectMemoryObjectControlState (4..10): */ + __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | 0; dw[8] = __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + uint32_t dw_InstructionMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); uint32_t dw10 = - /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct InstructionMemoryObjectControlState (4..10): */ + __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | 0; @@ -799,7 +845,7 @@ struct GEN8_SWTESS_BASE_ADDRESS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type SWTessellationBaseAddress; - uint32_t SWTessellationMemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; __gen_address_type SWTessellationBaseAddressHigh; }; @@ -817,8 +863,11 @@ GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); uint32_t dw1 = - /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SWTessellationMemoryObjectControlState (8..11): */ + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; dw[1] = @@ -969,6 +1018,24 @@ GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 70 +struct GEN8_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -1384,7 +1451,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { uint32_t DwordLength; __gen_address_type BindingTablePoolBaseAddress; uint32_t BindingTablePoolEnable; - uint32_t SurfaceObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; #define NoValidData 0 uint32_t BindingTablePoolBufferSize; }; @@ -1403,9 +1470,12 @@ GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restric __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SurfaceObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); uint32_t dw1 = __gen_field(values->BindingTablePoolEnable, 11, 11) | - /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SurfaceObjectControlState (0..6): */ + __gen_field(dw_SurfaceObjectControlState, 0, 6) | 0; dw[1] = @@ -1695,14 +1765,67 @@ GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 9 +struct GEN8_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + uint32_t dw6 = + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + uint32_t dw8 = + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + +} + struct GEN8_3DSTATE_CONSTANT_DS { uint32_t CommandType; uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t ConstantBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1711,19 +1834,19 @@ GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct ConstantBufferObjectControlState (8..14): */ + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b @@ -1740,9 +1863,9 @@ struct GEN8_3DSTATE_CONSTANT_GS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t ConstantBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1751,19 +1874,19 @@ GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct ConstantBufferObjectControlState (8..14): */ + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b @@ -1780,9 +1903,9 @@ struct GEN8_3DSTATE_CONSTANT_HS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t ConstantBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1791,19 +1914,19 @@ GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct ConstantBufferObjectControlState (8..14): */ + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b @@ -1820,9 +1943,9 @@ struct GEN8_3DSTATE_CONSTANT_PS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t ConstantBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1831,19 +1954,19 @@ GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct ConstantBufferObjectControlState (8..14): */ + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b @@ -1860,9 +1983,9 @@ struct GEN8_3DSTATE_CONSTANT_VS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t ConstantBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; uint32_t DwordLength; - uint32_t ConstantBody; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; }; static inline void @@ -1871,19 +1994,19 @@ GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); dw[0] = __gen_field(values->CommandType, 29, 31) | __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct ConstantBufferObjectControlState (8..14): */ + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ - 0; - + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } #define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 @@ -1921,7 +2044,7 @@ struct GEN8_3DSTATE_DEPTH_BUFFER { uint32_t LOD; uint32_t Depth; uint32_t MinimumArrayElement; - uint32_t DepthBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; uint32_t RenderTargetViewExtent; uint32_t SurfaceQPitch; }; @@ -1961,10 +2084,13 @@ GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->LOD, 0, 3) | 0; + uint32_t dw_DepthBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); dw[5] = __gen_field(values->Depth, 21, 31) | __gen_field(values->MinimumArrayElement, 10, 20) | - /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct DepthBufferObjectControlState (0..6): */ + __gen_field(dw_DepthBufferObjectControlState, 0, 6) | 0; dw[6] = @@ -2157,6 +2283,26 @@ GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 55 +struct GEN8_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + struct GEN8_3DSTATE_GATHER_CONSTANT_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -2406,7 +2552,7 @@ struct GEN8_3DSTATE_GATHER_POOL_ALLOC { uint32_t DwordLength; __gen_address_type GatherPoolBaseAddress; uint32_t GatherPoolEnable; - uint32_t MemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; uint32_t GatherPoolBufferSize; }; @@ -2424,9 +2570,12 @@ GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); uint32_t dw1 = __gen_field(values->GatherPoolEnable, 11, 11) | - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct MemoryObjectControlState (0..6): */ + __gen_field(dw_MemoryObjectControlState, 0, 6) | 0; dw[1] = @@ -2604,7 +2753,7 @@ struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t HierarchicalDepthBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; uint32_t SurfaceQPitch; @@ -2624,8 +2773,11 @@ GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); dw[1] = - /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct HierarchicalDepthBufferObjectControlState (25..31): */ + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -2762,7 +2914,7 @@ struct GEN8_3DSTATE_INDEX_BUFFER { #define INDEX_WORD 1 #define INDEX_DWORD 2 uint32_t IndexFormat; - uint32_t MemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; __gen_address_type BufferStartingAddress; uint32_t BufferSize; }; @@ -2781,9 +2933,12 @@ GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[1] = __gen_field(values->IndexFormat, 8, 9) | - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct MemoryObjectControlState (0..6): */ + __gen_field(dw_MemoryObjectControlState, 0, 6) | 0; uint32_t dw2 = @@ -3580,6 +3735,28 @@ GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 2 +struct GEN8_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { uint32_t CommandType; uint32_t CommandSubType; @@ -4056,13 +4233,51 @@ GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 81, \ .DwordLength = 9 +struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { + uint32_t ComponentOverrideW; + uint32_t ComponentOverrideZ; + uint32_t ComponentOverrideY; + uint32_t ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + struct GEN8_3DSTATE_SBE_SWIZ { uint32_t CommandType; uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t Attribute; + struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute; uint32_t Attribute15WrapShortestEnables; uint32_t Attribute14WrapShortestEnables; uint32_t Attribute13WrapShortestEnables; @@ -4095,10 +4310,14 @@ GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_Attribute; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute, &values->Attribute); dw[1] = - /* Struct Attribute: found SF_OUTPUT_ATTRIBUTE_DETAIL */ + /* Struct Attribute (0..15): */ + __gen_field(dw_Attribute, 0, 15) | 0; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw[1], &values->Attribute); dw[9] = __gen_field(values->Attribute15WrapShortestEnables, 60, 63) | __gen_field(values->Attribute14WrapShortestEnables, 56, 59) | @@ -4252,7 +4471,7 @@ struct GEN8_3DSTATE_SO_BUFFER { uint32_t DwordLength; uint32_t SOBufferEnable; uint32_t SOBufferIndex; - uint32_t SOBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; uint32_t StreamOffsetWriteEnable; uint32_t StreamOutputBufferOffsetAddressEnable; __gen_address_type SurfaceBaseAddress; @@ -4275,10 +4494,13 @@ GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_SOBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); dw[1] = __gen_field(values->SOBufferEnable, 31, 31) | __gen_field(values->SOBufferIndex, 29, 30) | - /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct SOBufferObjectControlState (22..28): */ + __gen_field(dw_SOBufferObjectControlState, 22, 28) | __gen_field(values->StreamOffsetWriteEnable, 21, 21) | __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | 0; @@ -4312,6 +4534,63 @@ GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 23 +struct GEN8_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN8_SO_DECL_ENTRY { + struct GEN8_SO_DECL Stream3Decl; + struct GEN8_SO_DECL Stream2Decl; + struct GEN8_SO_DECL Stream1Decl; + struct GEN8_SO_DECL Stream0Decl; +}; + +static inline void +GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN8_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN8_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN8_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN8_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + dw[0] = + /* Struct Stream3Decl (48..63): */ + __gen_field(dw_Stream3Decl, 48, 63) | + /* Struct Stream2Decl (32..47): */ + __gen_field(dw_Stream2Decl, 32, 47) | + /* Struct Stream1Decl (16..31): */ + __gen_field(dw_Stream1Decl, 16, 31) | + /* Struct Stream0Decl (0..15): */ + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + GEN8_SO_DECL_pack(data, &dw[0], &values->Stream0Decl); +} + struct GEN8_3DSTATE_SO_DECL_LIST { uint32_t CommandType; uint32_t CommandSubType; @@ -4376,7 +4655,7 @@ struct GEN8_3DSTATE_STENCIL_BUFFER { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t StencilBufferEnable; - uint32_t StencilBufferObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; uint32_t SurfacePitch; __gen_address_type SurfaceBaseAddress; uint32_t SurfaceQPitch; @@ -4396,9 +4675,12 @@ GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + uint32_t dw_StencilBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); dw[1] = __gen_field(values->StencilBufferEnable, 31, 31) | - /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct StencilBufferObjectControlState (22..28): */ + __gen_field(dw_StencilBufferObjectControlState, 22, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -4702,6 +4984,45 @@ GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 8 +struct GEN8_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + /* Struct MemoryObjectControlState (16..22): */ + __gen_field(dw_MemoryObjectControlState, 16, 22) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + struct GEN8_3DSTATE_VERTEX_BUFFERS { uint32_t CommandType; uint32_t CommandSubType; @@ -4735,10 +5056,45 @@ GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 9 -struct GEN8_3DSTATE_VERTEX_ELEMENTS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; +struct GEN8_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN8_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; /* variable length fields follow */ @@ -7295,250 +7651,6 @@ GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -struct GEN8_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - uint32_t dw4 = - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); - - uint32_t dw6 = - 0; - - dw[6] = - __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); - - uint32_t dw8 = - 0; - - dw[8] = - __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); - -} - -struct GEN8_BINDING_TABLE_EDIT_ENTRY { - uint32_t BindingTableIndex; - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BindingTableIndex, 16, 23) | - __gen_offset(values->SurfaceStatePointer, 0, 15) | - 0; - -} - -struct GEN8_GATHER_CONSTANT_ENTRY { - uint32_t ConstantBufferOffset; - uint32_t ChannelMask; - uint32_t BindingTableIndexOffset; -}; - -static inline void -GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->ConstantBufferOffset, 8, 15) | - __gen_field(values->ChannelMask, 4, 7) | - __gen_field(values->BindingTableIndexOffset, 0, 3) | - 0; - -} - -struct GEN8_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; - uint32_t MemoryObjectControlState; - uint32_t AddressModifyEnable; - uint32_t NullVertexBuffer; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - uint32_t BufferSize; -}; - -static inline void -GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - dw[3] = - __gen_field(values->BufferSize, 0, 31) | - 0; - -} - -struct GEN8_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - uint32_t Valid; - uint32_t SourceElementFormat; - uint32_t EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN8_SO_DECL_ENTRY { - uint32_t Stream3Decl; - uint32_t Stream2Decl; - uint32_t Stream1Decl; - uint32_t Stream0Decl; -}; - -static inline void -GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - /* Struct Stream3Decl: found SO_DECL */ - /* Struct Stream2Decl: found SO_DECL */ - /* Struct Stream1Decl: found SO_DECL */ - /* Struct Stream0Decl: found SO_DECL */ - 0; - -} - -struct GEN8_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - -struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { - uint32_t ComponentOverrideW; - uint32_t ComponentOverrideZ; - uint32_t ComponentOverrideY; - uint32_t ComponentOverrideX; - uint32_t SwizzleControlMode; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t ConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t SwizzleSelect; - uint32_t SourceAttribute; -}; - -static inline void -GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ComponentOverrideW, 15, 15) | - __gen_field(values->ComponentOverrideZ, 14, 14) | - __gen_field(values->ComponentOverrideY, 13, 13) | - __gen_field(values->ComponentOverrideX, 12, 12) | - __gen_field(values->SwizzleControlMode, 11, 11) | - __gen_field(values->ConstantSource, 9, 10) | - __gen_field(values->SwizzleSelect, 6, 7) | - __gen_field(values->SourceAttribute, 0, 4) | - 0; - -} - struct GEN8_SCISSOR_RECT { uint32_t ScissorRectangleYMin; uint32_t ScissorRectangleXMin; @@ -7651,43 +7763,6 @@ GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } -struct GEN8_BLEND_STATE { - uint32_t AlphaToCoverageEnable; - uint32_t IndependentAlphaBlendEnable; - uint32_t AlphaToOneEnable; - uint32_t AlphaToCoverageDitherEnable; - uint32_t AlphaTestEnable; - uint32_t AlphaTestFunction; - uint32_t ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t Entry; -}; - -static inline void -GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BLEND_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | - __gen_field(values->AlphaToOneEnable, 29, 29) | - __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | - __gen_field(values->AlphaTestEnable, 27, 27) | - __gen_field(values->AlphaTestFunction, 24, 26) | - __gen_field(values->ColorDitherEnable, 23, 23) | - __gen_field(values->XDitherOffset, 21, 22) | - __gen_field(values->YDitherOffset, 19, 20) | - 0; - - dw[1] = - /* Struct Entry: found BLEND_STATE_ENTRY */ - 0; - -} - struct GEN8_BLEND_STATE_ENTRY { uint32_t LogicOpEnable; uint32_t LogicOpFunction; @@ -7739,6 +7814,40 @@ GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, } +struct GEN8_BLEND_STATE { + uint32_t AlphaToCoverageEnable; + uint32_t IndependentAlphaBlendEnable; + uint32_t AlphaToOneEnable; + uint32_t AlphaToCoverageDitherEnable; + uint32_t AlphaTestEnable; + uint32_t AlphaTestFunction; + uint32_t ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + struct GEN8_BLEND_STATE_ENTRY Entry; +}; + +static inline void +GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + GEN8_BLEND_STATE_ENTRY_pack(data, &dw[1], &values->Entry); +} + struct GEN8_CC_VIEWPORT { float MinimumDepth; float MaximumDepth; @@ -7813,20 +7922,6 @@ GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, } -struct GEN8_MEMORY_OBJECT_CONTROL_STATE { -}; - -static inline void -GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - 0; - -} - struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { #define UseCacheabilityControlsfrompagetableUCwithFenceifcoherentcycle 0 #define UncacheableUCnoncacheable 1 @@ -7959,28 +8054,6 @@ GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } -struct GEN8_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - struct GEN8_RENDER_SURFACE_STATE { #define SURFTYPE_1D 0 #define SURFTYPE_2D 1 @@ -8021,7 +8094,7 @@ struct GEN8_RENDER_SURFACE_STATE { uint32_t CubeFaceEnableNegativeY; uint32_t CubeFaceEnablePositiveX; uint32_t CubeFaceEnableNegativeX; - uint32_t MemoryObjectControlState; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; uint32_t BaseMipLevel; uint32_t SurfaceQPitch; uint32_t Height; @@ -8103,8 +8176,11 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | 0; + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[1] = - /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + /* Struct MemoryObjectControlState (24..30): */ + __gen_field(dw_MemoryObjectControlState, 24, 30) | __gen_field(values->BaseMipLevel, 19, 23) | __gen_field(values->SurfaceQPitch, 0, 14) | 0; -- cgit v1.2.3 From 0997a7b2e3c0c1cef9c59feb706243c0bfec4e65 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 21 May 2015 14:35:34 -0700 Subject: vk: Add basic MOCS settings This matches what we do for GL. --- src/vulkan/device.c | 12 ++++++++---- src/vulkan/image.c | 2 +- src/vulkan/private.h | 6 ++++++ 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 5d37ce8b3be..4301739022d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1509,7 +1509,7 @@ VkResult anv_CreateBufferView( .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = 0, /* FIXME: MOCS */ + .MemoryObjectControlState = GEN8_MOCS, .BaseMipLevel = 0, .SurfaceQPitch = 0, .Height = (num_elements >> 7) & 0x3fff, @@ -2211,25 +2211,29 @@ VkResult anv_BeginCommandBuffer( anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, .GeneralStateBaseAddress = { NULL, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, .GeneralStateBaseAddressModifyEnable = true, .GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSizeModifyEnable = true, .SurfaceStateBaseAddress = { &cmd_buffer->surface_bo, 0 }, - .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, .SurfaceStateBaseAddressModifyEnable = true, .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, .DynamicStateBaseAddressModifyEnable = true, .DynamicStateBufferSize = 0xfffff, .DynamicStateBufferSizeModifyEnable = true, .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, .IndirectObjectBaseAddressModifyEnable = true, .IndirectObjectBufferSize = 0xfffff, .IndirectObjectBufferSizeModifyEnable = true, .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, .InstructionBaseAddressModifyEnable = true, .InstructionBufferSize = 0xfffff, .InstructionBuffersizeModifyEnable = true); @@ -2546,7 +2550,7 @@ void anv_CmdBindIndexBuffer( anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = 0, + .MemoryObjectControlState = GEN8_MOCS, .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferSize = buffer->size - offset); } @@ -2663,7 +2667,7 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) struct GEN8_VERTEX_BUFFER_STATE state = { .VertexBufferIndex = vb, - .MemoryObjectControlState = 0, + .MemoryObjectControlState = GEN8_MOCS, .AddressModifyEnable = true, .BufferPitch = pipeline->binding_stride[vb], .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, diff --git a/src/vulkan/image.c b/src/vulkan/image.c index a7dc243b2a2..109a248b9a0 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -149,7 +149,7 @@ create_surface_state(struct anv_device *device, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = 0, /* FIXME: MOCS */ + .MemoryObjectControlState = GEN8_MOCS, .BaseMipLevel = 0, .SurfaceQPitch = 0, .Height = image->extent.height - 1, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index b957151cdbc..e3e5bd52a64 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -447,6 +447,12 @@ __gen_combine_address(struct anv_batch *batch, void *location, dw[i] = (dwords0)[i] | (dwords1)[i]; \ } while (0) +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + struct anv_device_memory { struct anv_bo bo; VkDeviceSize map_size; -- cgit v1.2.3 From 7c0d0021ebe3a3d751491a8843bfe0dcfebef7de Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 22 May 2015 15:18:37 -0700 Subject: vk/test: Add new depth-stencil test Not yet a depth stencil test, but will become one. --- src/vulkan/vk.c | 504 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 504 insertions(+) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index ea21ece78a8..ff5e184865d 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -20,6 +20,12 @@ (b) = __builtin_ffs(__dword) - 1, __dword; \ __dword &= ~(1 << (b))) +static inline uint32_t +align_u32(uint32_t value, uint32_t alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + static void fail_if(int cond, const char *format, ...) { @@ -423,6 +429,502 @@ test_formats(VkDevice device, VkQueue queue) printf("\n"); } +#define TEST_DEPTH_FLAG 0x01 + +struct test_context { + uint32_t width, height; + VkDevice device; + VkQueue queue; + VkCmdBuffer cmdBuffer; + VkPipeline pipeline; + VkImage rt; + VkImage ds; + VkBuffer vertex_buffer; + VkBuffer image_buffer; + VkDeviceMemory mem; + void *map; + void *rt_map; + void *vertex_map; + void *image_map; + VkDynamicVpState vp_state; + VkDynamicRsState rs_state; + VkDynamicDsState ds_state; + VkColorAttachmentView rt_view; + VkDepthStencilView ds_view; + uint32_t rt_size; + VkFramebuffer framebuffer; + VkRenderPass pass; +}; + +static void +test_prepare(struct test_context *ctx, VkDevice device, VkQueue queue, uint32_t flags) +{ + ctx->device = device; + ctx->queue = queue; + + vkCreateCommandBuffer(ctx->device, + &(VkCmdBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, + .queueNodeIndex = 0, + .flags = 0 + }, + &ctx->cmdBuffer); + + vkCreateBuffer(ctx->device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = 4096, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .flags = 0 + }, + &ctx->vertex_buffer); + + VkMemoryRequirements vb_requirements; + size_t size = sizeof(vb_requirements); + vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->vertex_buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &vb_requirements); + + vkCreateImage(ctx->device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = { .width = ctx->width, .height = ctx->height, .depth = 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }, + &ctx->rt); + + VkMemoryRequirements rt_requirements; + size = sizeof(rt_requirements); + vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_IMAGE, ctx->rt, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &rt_requirements); + ctx->rt_size = rt_requirements.size; + + VkDepthStencilBindInfo *ds_attachment; + VkDepthStencilBindInfo ds_bind_info; + VkMemoryRequirements ds_requirements; + + if (flags & TEST_DEPTH_FLAG) { + vkCreateImage(ctx->device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_D24_UNORM, + .extent = { .width = ctx->width, .height = ctx->height, .depth = 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_BIT, + .flags = 0, + }, + &ctx->ds); + + size = sizeof(ds_requirements); + vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_IMAGE, ctx->ds, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &ds_requirements); + } else { + ds_requirements.size = 0; + } + + vkCreateBuffer(ctx->device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = ctx->width * ctx->height * 4, + .usage = VK_BUFFER_USAGE_TRANSFER_DESTINATION_BIT, + .flags = 0 + }, + &ctx->image_buffer); + + VkMemoryRequirements ib_requirements; + size = sizeof(ib_requirements); + vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->image_buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &ib_requirements); + + size_t mem_size = + align_u32(vb_requirements.size, 4096) + + align_u32(rt_requirements.size, 4096) + + align_u32(ds_requirements.size, 4096) + + align_u32(ib_requirements.size, 4096); + + printf("mem size %ld\n", mem_size); + + vkAllocMemory(ctx->device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = mem_size, + .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, + .memPriority = VK_MEMORY_PRIORITY_NORMAL + }, + &ctx->mem); + + vkMapMemory(ctx->device, ctx->mem, 0, mem_size, 0, &ctx->map); + memset(ctx->map, 0, mem_size); + + uint32_t offset = 0; + printf("vb: %ldb at %d\n", vb_requirements.size, offset); + vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_BUFFER, + ctx->vertex_buffer, 0, ctx->mem, offset); + ctx->vertex_map = ctx->map + offset; + offset = align_u32(offset + vb_requirements.size, 4096); + + printf("rt: %ldb at %d\n", rt_requirements.size, offset); + vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_IMAGE, + ctx->rt, 0, ctx->mem, offset); + ctx->rt_map = ctx->map + offset; + offset = align_u32(offset + rt_requirements.size, 4096); + + if (flags & TEST_DEPTH_FLAG) { + printf("ds: %ldb at %d\n", ds_requirements.size, offset); + vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_IMAGE, + ctx->ds, 0, ctx->mem, offset); + offset = align_u32(offset + ds_requirements.size, 4096); + } + + printf("ib: %ldb at %d\n", ib_requirements.size, offset); + vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_BUFFER, + ctx->image_buffer, 0, ctx->mem, offset); + ctx->image_map = ctx->map + offset; + offset = align_u32(offset + ib_requirements.size, 4096); + + vkCreateDynamicViewportState(ctx->device, + &(VkDynamicVpStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, + .viewportAndScissorCount = 1, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = ctx->width, + .height = ctx->height, + .minDepth = -1, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect[]) { + { { 0, 0 }, { ctx->width, ctx->height } }, + } + }, + &ctx->vp_state); + + vkCreateDynamicRasterState(ctx->device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &ctx->rs_state); + + vkCreateDynamicDepthStencilState(ctx->device, + &(VkDynamicDsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO, + }, + &ctx->ds_state); + + vkCreateColorAttachmentView(ctx->device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = ctx->rt, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + .msaaResolveImage = 0, + .msaaResolveSubResource = { 0, } + }, + &ctx->rt_view); + + if (flags & TEST_DEPTH_FLAG) { + vkCreateDepthStencilView(ctx->device, + &(VkDepthStencilViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DEPTH_STENCIL_VIEW_CREATE_INFO, + .image = ctx->ds, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + .msaaResolveImage = 0, + .msaaResolveSubResource = { 0, } + }, + &ctx->ds_view); + ds_bind_info.view = ctx->ds_view; + ds_bind_info.layout = 0; + ds_attachment = &ds_bind_info; + } else { + ds_attachment = NULL; + } + + vkCreateFramebuffer(ctx->device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .colorAttachmentCount = 1, + .pColorAttachments = (VkColorAttachmentBindInfo[]) { + { + .view = ctx->rt_view, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + } + }, + .pDepthStencilAttachment = ds_attachment, + .sampleCount = 1, + .width = ctx->width, + .height = ctx->height, + .layers = 1 + }, + &ctx->framebuffer); + + vkCreateRenderPass(ctx->device, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .renderArea = { { 0, 0 }, { ctx->width, ctx->height } }, + .colorAttachmentCount = 1, + .extent = { }, + .sampleCount = 1, + .layers = 1, + .pColorFormats = (VkFormat[]) { VK_FORMAT_R8G8B8A8_UNORM }, + .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }, + .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_CLEAR }, + .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, + .pColorLoadClearValues = (VkClearColor[]) { + { .color = { .floatColor = { 0.2, 0.2, 0.2, 1.0 } }, .useRawValue = false } + }, + .depthStencilFormat = VK_FORMAT_D24_UNORM, + .depthStencilLayout = 0, + .depthLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .depthLoadClearValue = 0.5, + .depthStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + }, + &ctx->pass); + + vkBeginCommandBuffer(ctx->cmdBuffer, + &(VkCmdBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + .flags = 0 + }); + + vkCmdBeginRenderPass(ctx->cmdBuffer, + &(VkRenderPassBegin) { + .renderPass = ctx->pass, + .framebuffer = ctx->framebuffer + }); +} + +static void +test_finish(struct test_context *ctx) +{ + vkCmdEndRenderPass(ctx->cmdBuffer, ctx->pass); + + VkBufferImageCopy copy = { + .bufferOffset = 0, + .imageSubresource = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .mipLevel = 0, + .arraySlice = 0, + }, + .imageOffset = { .x = 0, .y = 0, .z = 0 }, + .imageExtent = { .width = ctx->width, .height = ctx->height, .depth = 1 }, + }; + + vkCmdCopyImageToBuffer(ctx->cmdBuffer, ctx->rt, VK_IMAGE_LAYOUT_GENERAL, + ctx->image_buffer, 1, ©); + + + vkEndCommandBuffer(ctx->cmdBuffer); + + vkQueueSubmit(ctx->queue, 1, &ctx->cmdBuffer, 0); + + vkQueueWaitIdle(ctx->queue); + + write_png("vk-map.png", ctx->width, ctx->height, 1024, ctx->rt_map); + write_png("vk-copy.png", ctx->width, ctx->height, 1024, ctx->image_map); + + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_COMMAND_BUFFER, ctx->cmdBuffer); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_PIPELINE, ctx->pipeline); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_IMAGE, ctx->rt); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->vertex_buffer); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->image_buffer); + vkUnmapMemory(ctx->device, ctx->mem); + vkFreeMemory(ctx->device, ctx->mem); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_VP_STATE, ctx->vp_state); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_RS_STATE, ctx->rs_state); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW, ctx->rt_view); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_FRAMEBUFFER, ctx->framebuffer); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_RENDER_PASS, ctx->pass); +} + +static void +test_create_solid_color_pipeline(struct test_context *ctx) +{ + VkPipelineIaStateCreateInfo ia_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .disableVertexReuse = false, + .primitiveRestartEnable = false, + .primitiveRestartIndex = 0 + }; + + VkShader vs = GLSL_VK_SHADER(ctx->device, VERTEX, + layout(location = 0) in vec4 a_position; + layout(location = 1) in vec4 a_color; + out vec4 v_color; + void main() + { + gl_Position = a_position; + v_color = a_color; + } + ); + + VkShader fs = GLSL_VK_SHADER(ctx->device, FRAGMENT, + out vec4 f_color; + in vec4 v_color; + void main() + { + f_color = v_color; + } + ); + + VkPipelineShaderStageCreateInfo vs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &ia_create_info, + .shader = { + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineShaderStageCreateInfo fs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &vs_create_info, + .shader = { + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineVertexInputCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + .pNext = &fs_create_info, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE + } + }, + .attributeCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 0 + }, + { + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 0 + } + } + }; + + VkPipelineRsStateCreateInfo rs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pNext = &vi_create_info, + + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }; + + VkPipelineDsStateCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO, + .pNext = &rs_create_info, + .format = VK_FORMAT_D24_UNORM, + .depthTestEnable = true, + .depthWriteEnable = true, + .depthCompareOp = VK_COMPARE_OP_GREATER + }; + + vkCreateGraphicsPipeline(ctx->device, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &ds_create_info, + .flags = 0, + .layout = VK_NULL_HANDLE + }, + &ctx->pipeline); + + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_SHADER, fs); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_SHADER, vs); +} + +static void +test_depth_stencil(VkDevice device, VkQueue queue) +{ + struct test_context ctx; + + ctx.width = 256; + ctx.height = 256; + + test_prepare(&ctx, device, queue, TEST_DEPTH_FLAG); + test_create_solid_color_pipeline(&ctx); + + static const float vertex_data[] = { + /* Triangle coordinates */ + -0.5, -0.5, 0.5, 1.0, + 0.5, -0.5, 0.5, 1.0, + 0.0, 0.5, 0.5, 1.0, + + /* Triangle coordinates */ + -0.3, -0.3, 0.0, 1.0, + 0.7, -0.3, 0.0, 1.0, + 0.2, 0.7, 0.8, 1.0, + + /* Color */ + 1.0, 1.0, 0.2, 1.0, + 0.2, 0.2, 1.0, 1.0, + }; + memcpy(ctx.vertex_map, vertex_data, sizeof(vertex_data)); + + vkCmdBindVertexBuffers(ctx.cmdBuffer, 0, 2, + (VkBuffer[]) { ctx.vertex_buffer, ctx.vertex_buffer }, + (VkDeviceSize[]) { 0, 6 * 4 * sizeof(float) }); + + vkCmdBindPipeline(ctx.cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx.pipeline); + + vkCmdBindDynamicStateObject(ctx.cmdBuffer, + VK_STATE_BIND_POINT_VIEWPORT, ctx.vp_state); + vkCmdBindDynamicStateObject(ctx.cmdBuffer, + VK_STATE_BIND_POINT_RASTER, ctx.rs_state); + vkCmdBindDynamicStateObject(ctx.cmdBuffer, + VK_STATE_BIND_POINT_DEPTH_STENCIL, ctx.ds_state); + + vkCmdDraw(ctx.cmdBuffer, 0, 3, 0, 1); + vkCmdDraw(ctx.cmdBuffer, 3, 3, 1, 1); + + test_finish(&ctx); +} + static void test_triangle(VkDevice device, VkQueue queue) { @@ -1023,6 +1525,8 @@ int main(int argc, char *argv[]) test_formats(device, queue); } else if (argc > 1 && strcmp(argv[1], "buffer-copy") == 0) { test_buffer_copy(device, queue); + } else if (argc > 1 && strcmp(argv[1], "depth-stencil") == 0) { + test_depth_stencil(device, queue); } else { test_triangle(device, queue); } -- cgit v1.2.3 From 37743f90bcaf3aa34c86ad3a382308164e4f1eaa Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 22 May 2015 22:59:12 -0700 Subject: vk: Set up depth and stencil buffers --- src/vulkan/device.c | 58 ++++++++++++++++++++++----- src/vulkan/formats.c | 20 ++++++---- src/vulkan/image.c | 109 +++++++++++++++++++++++++++++++++++++++------------ src/vulkan/private.h | 26 ++++++++---- 4 files changed, 163 insertions(+), 50 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 4301739022d..0c35d503ab0 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2260,15 +2260,6 @@ VkResult anv_BeginCommandBuffer( anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); - /* Hardcoded state: */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .Width = 1, - .Height = 1, - .SurfaceFormat = D16_UNORM, - .SurfaceBaseAddress = { NULL, 0 }, - .HierarchicalDepthBufferEnable = 0); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_DEPTH_STENCIL, .DepthTestEnable = false, .DepthBufferWriteEnable = false); @@ -3118,6 +3109,9 @@ VkResult anv_CreateFramebuffer( struct anv_device *device = (struct anv_device *) _device; struct anv_framebuffer *framebuffer; + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8, @@ -3136,6 +3130,8 @@ VkResult anv_CreateFramebuffer( if (pCreateInfo->pDepthStencilAttachment) { framebuffer->depth_stencil = (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view; + } else { + framebuffer->depth_stencil = &null_view; } framebuffer->sample_count = pCreateInfo->sampleCount; @@ -3211,7 +3207,7 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) struct anv_bindings *bindings = cmd_buffer->bindings; for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { - struct anv_surface_view *view = framebuffer->color_attachments[i]; + const struct anv_surface_view *view = framebuffer->color_attachments[i]; struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); @@ -3228,6 +3224,46 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } +static void +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass) +{ + const struct anv_depth_stencil_view *view = + cmd_buffer->framebuffer->depth_stencil; + + /* FIXME: Implement the PMA stall W/A */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = pass->render_area.extent.height - 1, + .Width = pass->render_area.extent.width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = 0); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = 0); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); +} + void anv_CmdBeginRenderPass( VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin) @@ -3251,6 +3287,8 @@ void anv_CmdBeginRenderPass( anv_cmd_buffer_fill_render_targets(cmd_buffer); + anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass); + anv_cmd_buffer_clear(cmd_buffer, pass); } diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 13ae1ed7538..66208f23ea9 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -23,7 +23,7 @@ #include "private.h" -#define UNSUPPORTED ~0U +#define UNSUPPORTED 0xffff static const struct anv_format anv_formats[] = { [VK_FORMAT_UNDEFINED] = { .format = RAW, .cpp = 1, .channels = 1 }, @@ -116,13 +116,17 @@ static const struct anv_format anv_formats[] = { [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3 }, [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3 }, - [VK_FORMAT_D16_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_D24_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_D32_SFLOAT] = { .format = UNSUPPORTED }, - [VK_FORMAT_S8_UINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = UNSUPPORTED }, + /* For depth/stencil formats, the .format and .cpp fields describe the + * depth format. The field .has_stencil indicates whether or not there's a + * stencil buffer. + */ + [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM, .cpp = 2, .channels = 1 }, + [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 1 }, + [VK_FORMAT_D32_SFLOAT] = { .format = D32_FLOAT, .cpp = 4, .channels = 1 }, + [VK_FORMAT_S8_UINT] = { .format = UNSUPPORTED, .cpp = 0, .channels = 1, .has_stencil = true }, + [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM, .cpp = 2, .channels = 2, .has_stencil = true }, + [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 2, .has_stencil = true }, + [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_FLOAT, .cpp = 4, .channels = 2, .has_stencil = true }, [VK_FORMAT_BC1_RGB_UNORM] = { .format = UNSUPPORTED }, [VK_FORMAT_BC1_RGB_SRGB] = { .format = UNSUPPORTED }, diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 109a248b9a0..99542f1b538 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -49,8 +49,9 @@ VkResult anv_image_create( { struct anv_device *device = (struct anv_device *) _device; struct anv_image *image; - const struct anv_format *format; + const struct anv_format *info; int32_t aligned_height; + uint32_t stencil_size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); @@ -93,13 +94,32 @@ VkResult anv_image_create( image->alignment = 4096; } - format = anv_format_for_vk_format(pCreateInfo->format); - assert(format->cpp > 0); - image->stride = ALIGN_I32(image->extent.width * format->cpp, - tile_mode_info[image->tile_mode].tile_width); - aligned_height = ALIGN_I32(image->extent.height, - tile_mode_info[image->tile_mode].tile_height); - image->size = image->stride * aligned_height; + info = anv_format_for_vk_format(pCreateInfo->format); + assert(info->cpp > 0 || info->has_stencil); + + if (info->cpp > 0) { + image->stride = ALIGN_I32(image->extent.width * info->cpp, + tile_mode_info[image->tile_mode].tile_width); + aligned_height = ALIGN_I32(image->extent.height, + tile_mode_info[image->tile_mode].tile_height); + image->size = image->stride * aligned_height; + } else { + image->size = 0; + image->stride = 0; + } + + if (info->has_stencil) { + image->stencil_offset = ALIGN_U32(image->size, 4096); + image->stencil_stride = ALIGN_I32(image->extent.width, + tile_mode_info[WMAJOR].tile_width); + aligned_height = ALIGN_I32(image->extent.height, + tile_mode_info[WMAJOR].tile_height); + stencil_size = image->stencil_stride * aligned_height; + image->size = image->stencil_offset + stencil_size; + } else { + image->stencil_offset = 0; + image->stencil_stride = 0; + } *pImage = (VkImage) image; @@ -125,14 +145,13 @@ VkResult anv_GetImageSubresourceInfo( stub_return(VK_UNSUPPORTED); } -// Image view functions - static struct anv_state create_surface_state(struct anv_device *device, - struct anv_image *image, const struct anv_format *format, - struct anv_cmd_buffer *cmd_buffer) + struct anv_image *image, uint32_t format, uint32_t tile_mode, + uint32_t offset, struct anv_cmd_buffer *cmd_buffer) { struct anv_state state; + if (cmd_buffer) state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); else @@ -141,10 +160,10 @@ create_surface_state(struct anv_device *device, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, .SurfaceArray = false, - .SurfaceFormat = format->format, + .SurfaceFormat = format, .SurfaceVerticalAlignment = VALIGN4, .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = image->tile_mode, + .TileMode = tile_mode, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, @@ -172,8 +191,7 @@ create_surface_state(struct anv_device *device, .ShaderChannelSelectBlue = SCS_BLUE, .ShaderChannelSelectAlpha = SCS_ALPHA, .ResourceMinLOD = 0, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, image->offset }, + .SurfaceBaseAddress = { NULL, offset }, }; GEN8_RENDER_SURFACE_STATE_pack(NULL, state.map, &surface_state); @@ -188,17 +206,33 @@ anv_image_view_init(struct anv_surface_view *view, struct anv_cmd_buffer *cmd_buffer) { struct anv_image *image = (struct anv_image *) pCreateInfo->image; - const struct anv_format *format = + const struct anv_format *info = anv_format_for_vk_format(pCreateInfo->format); + uint32_t tile_mode, format; view->bo = image->bo; - view->offset = image->offset; - view->surface_state = create_surface_state(device, image, format, - cmd_buffer); - view->format = pCreateInfo->format; + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + /* FIXME: How is stencil texturing formed? */ + view->offset = image->offset + image->stencil_offset; + tile_mode = WMAJOR; + format = R8_UINT; + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + tile_mode = image->tile_mode; + format = info->format; + break; + default: + assert(0); + break; + } /* TODO: Miplevels */ view->extent = image->extent; + view->surface_state = + create_surface_state(device, image, format, tile_mode, view->offset, cmd_buffer); } VkResult anv_CreateImageView( @@ -235,10 +269,11 @@ anv_color_attachment_view_init(struct anv_surface_view *view, view->bo = image->bo; view->offset = image->offset; - view->surface_state = create_surface_state(device, image, format, - cmd_buffer); view->extent = image->extent; view->format = pCreateInfo->format; + view->surface_state = + create_surface_state(device, image, + format->format, image->tile_mode, view->offset, cmd_buffer); } VkResult anv_CreateColorAttachmentView( @@ -264,9 +299,33 @@ VkResult anv_CreateColorAttachmentView( } VkResult anv_CreateDepthStencilView( - VkDevice device, + VkDevice _device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_depth_stencil_view *view; + struct anv_image *image = (struct anv_image *) pCreateInfo->image; + const struct anv_format *format = + anv_format_for_vk_format(image->format); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEPTH_STENCIL_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->bo = image->bo; + + view->depth_stride = image->stride; + view->depth_offset = image->offset; + view->depth_format = format->format; + + view->stencil_stride = image->stencil_stride; + view->stencil_offset = image->offset + image->stencil_offset; + + *pView = (VkDepthStencilView) view; + + return VK_SUCCESS; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e3e5bd52a64..f75e03c8598 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -659,9 +659,10 @@ int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipelin void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { - uint32_t format; - uint32_t cpp; - uint32_t channels; + uint16_t format; + uint8_t cpp; + uint8_t channels; + bool has_stencil; }; const struct anv_format * @@ -669,12 +670,15 @@ anv_format_for_vk_format(VkFormat format); struct anv_image { VkImageType type; - VkFormat format; VkExtent3D extent; + VkFormat format; uint32_t tile_mode; VkDeviceSize size; uint32_t alignment; - int32_t stride; + uint32_t stride; + + uint32_t stencil_offset; + uint32_t stencil_stride; /* Set when bound */ struct anv_bo * bo; @@ -715,13 +719,21 @@ struct anv_sampler { }; struct anv_depth_stencil_view { + struct anv_bo * bo; + + uint32_t depth_offset; + uint32_t depth_stride; + uint32_t depth_format; + + uint32_t stencil_offset; + uint32_t stencil_stride; }; struct anv_framebuffer { struct anv_object base; uint32_t color_attachment_count; - struct anv_surface_view * color_attachments[MAX_RTS]; - struct anv_depth_stencil_view * depth_stencil; + const struct anv_surface_view * color_attachments[MAX_RTS]; + const struct anv_depth_stencil_view * depth_stencil; uint32_t sample_count; uint32_t width; -- cgit v1.2.3 From cbe7ed416ec0ae7d6527916ef5dc50e483bc2685 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Sun, 24 May 2015 21:19:26 -0700 Subject: vk: Implement dynamic and pipeline ds state --- src/vulkan/device.c | 50 ++++++++++++++++++++++++++++++++----- src/vulkan/pipeline.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/vulkan/private.h | 7 ++++++ 3 files changed, 117 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0c35d503ab0..a64772ae1bc 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2083,11 +2083,44 @@ VkResult anv_CreateDynamicColorBlendState( } VkResult anv_CreateDynamicDepthStencilState( - VkDevice device, + VkDevice _device, const VkDynamicDsStateCreateInfo* pCreateInfo, VkDynamicDsState* pState) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* pCreateInfo->stencilFrontRef, + * pCreateInfo->stencilBackRef, + * go in cc state + */ + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, + + .StencilTestMask = pCreateInfo->stencilReadMask, + .StencilWriteMask = pCreateInfo->stencilWriteMask, + + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask, + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, + &wm_depth_stencil); + + *pState = (VkDynamicDsState) state; + + return VK_SUCCESS; } // Command buffer functions @@ -2260,10 +2293,6 @@ VkResult anv_BeginCommandBuffer( anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_DEPTH_STENCIL, - .DepthTestEnable = false, - .DepthBufferWriteEnable = false); - return VK_SUCCESS; } @@ -2440,7 +2469,10 @@ void anv_CmdBindDynamicStateObject( cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; break; case VK_STATE_BIND_POINT_COLOR_BLEND: + break; case VK_STATE_BIND_POINT_DEPTH_STENCIL: + cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; break; default: break; @@ -2680,6 +2712,12 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, cmd_buffer->rs_state->state_sf, pipeline->state_sf); + if (cmd_buffer->ds_state && + (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY))) + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + cmd_buffer->vb_dirty &= ~vb_emit; cmd_buffer->dirty = 0; } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index a9594a71c95..3e0dc15c1ae 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -156,7 +156,6 @@ emit_ia_state(struct anv_pipeline *pipeline, static void emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, const struct anv_pipeline_create_info *extra) - { static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, @@ -216,6 +215,53 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, } +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, + [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, + [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, + [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static void +emit_ds_state(struct anv_pipeline *pipeline, VkPipelineDsStateCreateInfo *info) +{ + /* bool32_t depthBoundsEnable; // optional (depth_bounds_test) */ + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); +} + VkResult anv_CreateGraphicsPipeline( VkDevice device, const VkGraphicsPipelineCreateInfo* pCreateInfo, @@ -249,8 +295,9 @@ anv_pipeline_create( struct anv_pipeline *pipeline; const struct anv_common *common; VkPipelineShaderStageCreateInfo *shader_create_info; - VkPipelineIaStateCreateInfo *ia_info; - VkPipelineRsStateCreateInfo *rs_info; + VkPipelineIaStateCreateInfo *ia_info = NULL; + VkPipelineRsStateCreateInfo *rs_info = NULL; + VkPipelineDsStateCreateInfo *ds_info = NULL; VkPipelineVertexInputCreateInfo *vi_info; VkResult result; uint32_t offset, length; @@ -282,14 +329,23 @@ anv_pipeline_create( ia_info = (VkPipelineIaStateCreateInfo *) common; break; case VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO: + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO"); + break; case VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO: + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO"); break; case VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO: rs_info = (VkPipelineRsStateCreateInfo *) common; break; case VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO: + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO"); + break; case VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO: + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO"); + break; case VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO: + ds_info = (VkPipelineDsStateCreateInfo *) common; + break; case VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO: shader_create_info = (VkPipelineShaderStageCreateInfo *) common; pipeline->shaders[shader_create_info->shader.stage] = @@ -311,9 +367,16 @@ anv_pipeline_create( if (pipeline->vs_simd8 == NO_KERNEL) pipeline->wm_prog_data.num_varying_inputs = vi_info->attributeCount - 2; + assert(vi_info); emit_vertex_input(pipeline, vi_info); + assert(ia_info); emit_ia_state(pipeline, ia_info, extra); + assert(rs_info); emit_rs_state(pipeline, rs_info, extra); + /* ds_info is optional if we're not using depth or stencil buffers, ps is + * optional for depth-only rendering. */ + if (ds_info) + emit_ds_state(pipeline, ds_info); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, .ClipEnable = true, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f75e03c8598..ec7ffb49dc7 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -470,6 +470,10 @@ struct anv_dynamic_rs_state { uint32_t state_sf[GEN8_3DSTATE_SF_length]; }; +struct anv_dynamic_ds_state { + uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; +}; + struct anv_dynamic_cb_state { uint32_t blend_offset; }; @@ -540,6 +544,7 @@ struct anv_buffer { #define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) #define ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY (1 << 1) #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) +#define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) struct anv_bindings { struct { @@ -578,6 +583,7 @@ struct anv_cmd_buffer { struct anv_pipeline * pipeline; struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; + struct anv_dynamic_ds_state * ds_state; struct anv_dynamic_vp_state * vp_state; struct anv_bindings * bindings; struct anv_bindings default_bindings; @@ -637,6 +643,7 @@ struct anv_pipeline { uint32_t state_sf[GEN8_3DSTATE_SF_length]; uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; + uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; }; struct anv_pipeline_create_info { -- cgit v1.2.3 From 0dbed616aff26edf007e8b55f6a5d85aea0ef0d1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 25 May 2015 22:12:24 -0700 Subject: vk: Add support for texture component swizzle This also drops the share create_surface_state helper and moves filling out SURFACE_STATE directly into anv_image_view_init() and anv_color_attachment_view_init(). --- src/vulkan/image.c | 152 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 97 insertions(+), 55 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 99542f1b538..5404dd29ea7 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -145,17 +145,47 @@ VkResult anv_GetImageSubresourceInfo( stub_return(VK_UNSUPPORTED); } -static struct anv_state -create_surface_state(struct anv_device *device, - struct anv_image *image, uint32_t format, uint32_t tile_mode, - uint32_t offset, struct anv_cmd_buffer *cmd_buffer) +void +anv_image_view_init(struct anv_surface_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) { - struct anv_state state; + struct anv_image *image = (struct anv_image *) pCreateInfo->image; + const struct anv_format *info = + anv_format_for_vk_format(pCreateInfo->format); + uint32_t tile_mode, format; - if (cmd_buffer) - state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - else - state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->bo = image->bo; + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + /* FIXME: How is stencil texturing formed? */ + view->offset = image->offset + image->stencil_offset; + tile_mode = WMAJOR; + format = R8_UINT; + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + tile_mode = image->tile_mode; + format = info->format; + break; + default: + assert(0); + break; + } + + /* TODO: Miplevels */ + view->extent = image->extent; + + static const uint32_t vk_to_gen_swizzle[] = { + [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, + [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, + [VK_CHANNEL_SWIZZLE_R] = SCS_RED, + [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, + [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, + [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA + }; struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, @@ -186,53 +216,22 @@ create_surface_state(struct anv_device *device, .GreenClearColor = 0, .BlueClearColor = 0, .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, + .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], .ResourceMinLOD = 0, - .SurfaceBaseAddress = { NULL, offset }, + .SurfaceBaseAddress = { NULL, view->offset }, }; - GEN8_RENDER_SURFACE_STATE_pack(NULL, state.map, &surface_state); - - return state; -} - -void -anv_image_view_init(struct anv_surface_view *view, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_image *image = (struct anv_image *) pCreateInfo->image; - const struct anv_format *info = - anv_format_for_vk_format(pCreateInfo->format); - uint32_t tile_mode, format; - - view->bo = image->bo; - switch (pCreateInfo->subresourceRange.aspect) { - case VK_IMAGE_ASPECT_STENCIL: - /* FIXME: How is stencil texturing formed? */ - view->offset = image->offset + image->stencil_offset; - tile_mode = WMAJOR; - format = R8_UINT; - break; - case VK_IMAGE_ASPECT_DEPTH: - case VK_IMAGE_ASPECT_COLOR: - view->offset = image->offset; - tile_mode = image->tile_mode; - format = info->format; - break; - default: - assert(0); - break; - } + if (cmd_buffer) + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + else + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - /* TODO: Miplevels */ - view->extent = image->extent; - view->surface_state = - create_surface_state(device, image, format, tile_mode, view->offset, cmd_buffer); + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } VkResult anv_CreateImageView( @@ -271,9 +270,52 @@ anv_color_attachment_view_init(struct anv_surface_view *view, view->offset = image->offset; view->extent = image->extent; view->format = pCreateInfo->format; - view->surface_state = - create_surface_state(device, image, - format->format, image->tile_mode, view->offset, cmd_buffer); + + if (cmd_buffer) + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + else + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = false, + .SurfaceFormat = format->format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = image->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + .BaseMipLevel = 0, + .SurfaceQPitch = 0, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = image->extent.depth - 1, + .SurfacePitch = image->stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } VkResult anv_CreateColorAttachmentView( -- cgit v1.2.3 From 998837764ffd19795a78dc6fe1e864e6b17ed712 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 09:40:10 -0700 Subject: vk: Program depth bias This makes 3DSTATE_RASTER a split state command. --- src/vulkan/device.c | 21 +++++++++++++++++---- src/vulkan/pipeline.c | 17 ++++++++++------- src/vulkan/private.h | 1 + 3 files changed, 28 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a64772ae1bc..de68fa551a5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2042,9 +2042,6 @@ VkResult anv_CreateDynamicRasterState( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); /* Missing these: - * float depthBias; - * float depthBiasClamp; - * float slopeScaledDepthBias; * float pointFadeThreshold; * // optional (GL45) - Size of point fade threshold */ @@ -2057,6 +2054,19 @@ VkResult anv_CreateDynamicRasterState( GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + struct GEN8_3DSTATE_RASTER raster = { + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); + *pState = (VkDynamicRsState) state; return VK_SUCCESS; @@ -2708,9 +2718,12 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY) flush_descriptor_sets(cmd_buffer); - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, cmd_buffer->rs_state->state_sf, pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->rs_state->state_raster, pipeline->state_raster); + } if (cmd_buffer->ds_state && (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY))) diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 3e0dc15c1ae..ca11ca8b343 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -197,13 +197,16 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_RASTER, - .FrontWinding = vk_to_gen_front_face[info->frontFace], - .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), - .ViewportZClipTestEnable = info->depthClipEnable); + struct GEN8_3DSTATE_RASTER raster = { + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ViewportZClipTestEnable = info->depthClipEnable + }; + + GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, .ForceVertexURBEntryReadLength = false, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index ec7ffb49dc7..4e18a434828 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -468,6 +468,7 @@ struct anv_dynamic_vp_state { struct anv_dynamic_rs_state { uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; }; struct anv_dynamic_ds_state { -- cgit v1.2.3 From 5e637c5d5aedff139537d1a27766954a2c403302 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 11:02:28 -0700 Subject: vk/pack: Generate length macros for structs --- src/vulkan/gen75_pack.h | 40 ++++++++++++++++++++++++++++++++++++++++ src/vulkan/gen7_pack.h | 32 ++++++++++++++++++++++++++++++++ src/vulkan/gen8_pack.h | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 4978d5977b7..1f041ab1a72 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -6949,6 +6949,22 @@ GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_3DSTATE_CONSTANT_BODY_length 0x00000006 + +#define GEN75_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +#define GEN75_GATHER_CONSTANT_ENTRY_length 0x00000001 + +#define GEN75_VERTEX_BUFFER_STATE_length 0x00000004 + +#define GEN75_VERTEX_ELEMENT_STATE_length 0x00000002 + +#define GEN75_SO_DECL_ENTRY_length 0x00000002 + +#define GEN75_SO_DECL_length 0x00000001 + +#define GEN75_SCISSOR_RECT_length 0x00000002 + struct GEN75_SCISSOR_RECT { uint32_t ScissorRectangleYMin; uint32_t ScissorRectangleXMin; @@ -6974,6 +6990,8 @@ GEN75_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_SF_CLIP_VIEWPORT_length 0x00000010 + struct GEN75_SF_CLIP_VIEWPORT { float ViewportMatrixElementm00; float ViewportMatrixElementm11; @@ -7044,6 +7062,8 @@ GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_BLEND_STATE_length 0x00000002 + struct GEN75_BLEND_STATE { uint32_t ColorBufferBlendEnable; uint32_t IndependentAlphaBlendEnable; @@ -7167,6 +7187,8 @@ GEN75_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_CC_VIEWPORT_length 0x00000002 + struct GEN75_CC_VIEWPORT { float MinimumDepth; float MaximumDepth; @@ -7188,6 +7210,8 @@ GEN75_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_COLOR_CALC_STATE_length 0x00000006 + struct GEN75_COLOR_CALC_STATE { uint32_t StencilReferenceValue; uint32_t BackFaceStencilReferenceValue; @@ -7241,6 +7265,8 @@ GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_DEPTH_STENCIL_STATE_length 0x00000003 + struct GEN75_DEPTH_STENCIL_STATE { uint32_t StencilTestEnable; #define COMPAREFUNCTION_ALWAYS 0 @@ -7337,6 +7363,10 @@ GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +#define GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_length 0x00000001 + struct GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { #define Highestpriority 0 #define Secondhighestpriority 1 @@ -7365,6 +7395,8 @@ GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void } +#define GEN75_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + struct GEN75_INTERFACE_DESCRIPTOR_DATA { uint32_t KernelStartPointer; #define Multiple 0 @@ -7449,6 +7481,10 @@ GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_PALETTE_ENTRY_length 0x00000001 + +#define GEN75_RENDER_SURFACE_STATE_length 0x00000008 + struct GEN75_RENDER_SURFACE_STATE { #define SURFTYPE_1D 0 #define SURFTYPE_2D 1 @@ -7608,6 +7644,8 @@ GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN75_SAMPLER_BORDER_COLOR_STATE_length 0x00000014 + struct GEN75_SAMPLER_BORDER_COLOR_STATE { uint32_t BorderColorRedDX100GL; uint32_t BorderColorAlpha; @@ -7659,6 +7697,8 @@ GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst } +#define GEN75_SAMPLER_STATE_length 0x00000004 + struct GEN75_SAMPLER_STATE { uint32_t SamplerDisable; #define DX10OGL 0 diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index d13d92e3998..34203d1d820 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -5708,6 +5708,18 @@ GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_3DSTATE_CONSTANT_BODY_length 0x00000006 + +#define GEN7_VERTEX_BUFFER_STATE_length 0x00000004 + +#define GEN7_VERTEX_ELEMENT_STATE_length 0x00000002 + +#define GEN7_SO_DECL_ENTRY_length 0x00000002 + +#define GEN7_SO_DECL_length 0x00000001 + +#define GEN7_SCISSOR_RECT_length 0x00000002 + struct GEN7_SCISSOR_RECT { uint32_t ScissorRectangleYMin; uint32_t ScissorRectangleXMin; @@ -5733,6 +5745,8 @@ GEN7_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_SF_CLIP_VIEWPORT_length 0x00000010 + struct GEN7_SF_CLIP_VIEWPORT { float ViewportMatrixElementm00; float ViewportMatrixElementm11; @@ -5803,6 +5817,8 @@ GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_BLEND_STATE_length 0x00000002 + struct GEN7_BLEND_STATE { uint32_t ColorBufferBlendEnable; uint32_t IndependentAlphaBlendEnable; @@ -5926,6 +5942,8 @@ GEN7_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_CC_VIEWPORT_length 0x00000002 + struct GEN7_CC_VIEWPORT { float MinimumDepth; float MaximumDepth; @@ -5947,6 +5965,8 @@ GEN7_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_COLOR_CALC_STATE_length 0x00000006 + struct GEN7_COLOR_CALC_STATE { uint32_t StencilReferenceValue; uint32_t BackFaceStencilReferenceValue; @@ -6000,6 +6020,8 @@ GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_DEPTH_STENCIL_STATE_length 0x00000003 + struct GEN7_DEPTH_STENCIL_STATE { uint32_t StencilTestEnable; #define COMPAREFUNCTION_ALWAYS 0 @@ -6096,6 +6118,10 @@ GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +#define GEN7_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + struct GEN7_INTERFACE_DESCRIPTOR_DATA { uint32_t KernelStartPointer; #define Multiple 0 @@ -6180,6 +6206,10 @@ GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_PALETTE_ENTRY_length 0x00000001 + +#define GEN7_SAMPLER_BORDER_COLOR_STATE_length 0x00000004 + struct GEN7_SAMPLER_BORDER_COLOR_STATE { uint32_t BorderColorRedDX100GL; uint32_t BorderColorAlpha; @@ -6219,6 +6249,8 @@ GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN7_SAMPLER_STATE_length 0x00000004 + struct GEN7_SAMPLER_STATE { uint32_t SamplerDisable; #define DX10OGL 0 diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 844b0ce6aed..59dbce8d4f6 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -7651,6 +7651,24 @@ GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a + +#define GEN8_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +#define GEN8_GATHER_CONSTANT_ENTRY_length 0x00000001 + +#define GEN8_VERTEX_BUFFER_STATE_length 0x00000004 + +#define GEN8_VERTEX_ELEMENT_STATE_length 0x00000002 + +#define GEN8_SO_DECL_ENTRY_length 0x00000002 + +#define GEN8_SO_DECL_length 0x00000001 + +#define GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 + +#define GEN8_SCISSOR_RECT_length 0x00000002 + struct GEN8_SCISSOR_RECT { uint32_t ScissorRectangleYMin; uint32_t ScissorRectangleXMin; @@ -7676,6 +7694,8 @@ GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_SF_CLIP_VIEWPORT_length 0x00000010 + struct GEN8_SF_CLIP_VIEWPORT { float ViewportMatrixElementm00; float ViewportMatrixElementm11; @@ -7763,6 +7783,8 @@ GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_BLEND_STATE_length 0x00000011 + struct GEN8_BLEND_STATE_ENTRY { uint32_t LogicOpEnable; uint32_t LogicOpFunction; @@ -7848,6 +7870,10 @@ GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, GEN8_BLEND_STATE_ENTRY_pack(data, &dw[1], &values->Entry); } +#define GEN8_BLEND_STATE_ENTRY_length 0x00000001 + +#define GEN8_CC_VIEWPORT_length 0x00000002 + struct GEN8_CC_VIEWPORT { float MinimumDepth; float MaximumDepth; @@ -7869,6 +7895,8 @@ GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_COLOR_CALC_STATE_length 0x00000006 + struct GEN8_COLOR_CALC_STATE { uint32_t StencilReferenceValue; uint32_t BackFaceStencilReferenceValue; @@ -7922,6 +7950,10 @@ GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +#define GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_length 0x00000001 + struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { #define UseCacheabilityControlsfrompagetableUCwithFenceifcoherentcycle 0 #define UncacheableUCnoncacheable 1 @@ -7956,6 +7988,8 @@ GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void * } +#define GEN8_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + struct GEN8_INTERFACE_DESCRIPTOR_DATA { uint32_t KernelStartPointer; uint32_t KernelStartPointerHigh; @@ -8054,6 +8088,10 @@ GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_PALETTE_ENTRY_length 0x00000001 + +#define GEN8_RENDER_SURFACE_STATE_length 0x00000010 + struct GEN8_RENDER_SURFACE_STATE { #define SURFTYPE_1D 0 #define SURFTYPE_2D 1 @@ -8263,6 +8301,8 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_SAMPLER_STATE_length 0x00000004 + struct GEN8_SAMPLER_STATE { uint32_t SamplerDisable; #define DX10OGL 0 -- cgit v1.2.3 From b29f44218d611dc7179a55a22ca71ff3a1624d71 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 11:22:12 -0700 Subject: vk: Emit color calc state This involves pulling stencil ref values out of DS dynamic state and the blend constant out of CB dynamic state. --- src/vulkan/device.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++---- src/vulkan/private.h | 6 ++++- 2 files changed, 68 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index de68fa551a5..4cdf3e2b8a6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2087,6 +2087,15 @@ VkResult anv_CreateDynamicColorBlendState( if (state == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .BlendConstantColorRed = pCreateInfo->blendConst[0], + .BlendConstantColorGreen = pCreateInfo->blendConst[1], + .BlendConstantColorBlue = pCreateInfo->blendConst[2], + .BlendConstantColorAlpha = pCreateInfo->blendConst[3] + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + *pState = (VkDynamicCbState) state; return VK_SUCCESS; @@ -2110,11 +2119,6 @@ VkResult anv_CreateDynamicDepthStencilState( struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - /* pCreateInfo->stencilFrontRef, - * pCreateInfo->stencilBackRef, - * go in cc state - */ - /* Is this what we need to do? */ .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, @@ -2128,6 +2132,13 @@ VkResult anv_CreateDynamicDepthStencilState( GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, &wm_depth_stencil); + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + *pState = (VkDynamicDsState) state; return VK_SUCCESS; @@ -2679,6 +2690,35 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) } } +static struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, uint32_t alignment) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_state state; + + state = anv_state_pool_alloc(&device->dynamic_state_pool, dwords * 4, alignment); + memcpy(state.map, a, dwords * 4); + + return state; +} + +static struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, uint32_t dwords, uint32_t alignment) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_state state; + uint32_t *p; + + state = anv_state_pool_alloc(&device->dynamic_state_pool, dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + return state; +} + static void anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { @@ -2731,6 +2771,24 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->ds_state->state_wm_depth_stencil, pipeline->state_wm_depth_stencil); + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->ds_state) + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->ds_state->state_color_calc, + cmd_buffer->cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 32); + else + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 32); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } + cmd_buffer->vb_dirty &= ~vb_emit; cmd_buffer->dirty = 0; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 4e18a434828..d29b20ad6d8 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -473,10 +473,12 @@ struct anv_dynamic_rs_state { struct anv_dynamic_ds_state { uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; }; struct anv_dynamic_cb_state { - uint32_t blend_offset; + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; + }; struct anv_query_pool_slot { @@ -546,6 +548,7 @@ struct anv_buffer { #define ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY (1 << 1) #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) #define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) +#define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) struct anv_bindings { struct { @@ -586,6 +589,7 @@ struct anv_cmd_buffer { struct anv_dynamic_rs_state * rs_state; struct anv_dynamic_ds_state * ds_state; struct anv_dynamic_vp_state * vp_state; + struct anv_dynamic_cb_state * cb_state; struct anv_bindings * bindings; struct anv_bindings default_bindings; }; -- cgit v1.2.3 From 610e6291da93f46dab29697606000bec9b425ee8 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 11:50:34 -0700 Subject: vk: Allocate samplers from dynamic stream --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 4cdf3e2b8a6..fb190025c78 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2670,7 +2670,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) size_t size; size = layout->stage[s].sampler_count * 16; - state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); memcpy(state.map, bindings->descriptors[s].samplers, size); static const uint32_t sampler_state_opcodes[] = { -- cgit v1.2.3 From 1cd8437b9da8c91b17161c25e3786153c3c8d779 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 14:47:06 -0700 Subject: vk/meta: Allocate and set color/blend state For color blend, we have to set our own state to avoid inheriting bogus blend state. --- src/vulkan/meta.c | 40 ++++++++++++++++++++++++++++++++++++++-- src/vulkan/private.h | 2 ++ 2 files changed, 40 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c4d245614ce..f8b66b0b2fe 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -123,10 +123,20 @@ anv_device_init_meta_clear_state(struct anv_device *device) .frontFace = VK_FRONT_FACE_CCW }; + VkPipelineCbStateCreateInfo cb_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .pNext = &rs_create_info, + .attachmentCount = 1, + .pAttachments = (VkPipelineCbAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }; + anv_pipeline_create((VkDevice) device, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &rs_create_info, + .pNext = &cb_create_info, .flags = 0, .layout = 0 }, @@ -144,6 +154,13 @@ anv_device_init_meta_clear_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, }, &device->clear_state.rs_state); + + anv_CreateDynamicColorBlendState((VkDevice) device, + &(VkDynamicCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO + }, + &device->clear_state.cb_state); + } #define NUM_VB_USED 2 @@ -151,6 +168,7 @@ struct anv_saved_state { struct anv_bindings bindings; struct anv_bindings *old_bindings; struct anv_pipeline *old_pipeline; + VkDynamicCbState cb_state; }; static void @@ -429,9 +447,19 @@ anv_device_init_meta_blit_state(struct anv_device *device) .frontFace = VK_FRONT_FACE_CCW }; + VkPipelineCbStateCreateInfo cb_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .pNext = &rs_create_info, + .attachmentCount = 1, + .pAttachments = (VkPipelineCbAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }; + VkGraphicsPipelineCreateInfo pipeline_info = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &rs_create_info, + .pNext = &cb_create_info, .flags = 0, .layout = pipeline_layout, }; @@ -474,6 +502,11 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, VK_STATE_BIND_POINT_RASTER, device->blit_state.rs_state); + + saved_state->cb_state = (VkDynamicCbState) cmd_buffer->cb_state; + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_COLOR_BLEND, + device->blit_state.cb_state); } struct blit_region { @@ -639,6 +672,9 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *saved_state) { anv_cmd_buffer_restore(cmd_buffer, saved_state); + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_COLOR_BLEND, + saved_state->cb_state); } static VkFormat diff --git a/src/vulkan/private.h b/src/vulkan/private.h index d29b20ad6d8..cad3423984b 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -283,12 +283,14 @@ struct anv_instance { struct anv_clear_state { VkPipeline pipeline; VkDynamicRsState rs_state; + VkDynamicCbState cb_state; }; struct anv_blit_state { VkPipeline pipeline; VkDynamicRsState rs_state; VkDescriptorSetLayout ds_layout; + VkDynamicCbState cb_state; }; struct anv_device { -- cgit v1.2.3 From 2514ac5547337cfe43ce8a117c6d1bb40b78758d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 16:48:21 -0700 Subject: vk/test: Create and use color/blend dynamic and pipeline state --- src/vulkan/vk.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c index ff5e184865d..e7d21cca9b0 100644 --- a/src/vulkan/vk.c +++ b/src/vulkan/vk.c @@ -206,10 +206,20 @@ create_pipeline(VkDevice device, VkPipeline *pipeline, .frontFace = VK_FRONT_FACE_CCW }; + VkPipelineCbStateCreateInfo cb_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .pNext = &rs_create_info, + .attachmentCount = 1, + .pAttachments = (VkPipelineCbAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }; + vkCreateGraphicsPipeline(device, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &rs_create_info, + .pNext = &cb_create_info, .flags = 0, .layout = pipeline_layout }, @@ -449,6 +459,7 @@ struct test_context { VkDynamicVpState vp_state; VkDynamicRsState rs_state; VkDynamicDsState ds_state; + VkDynamicCbState cb_state; VkColorAttachmentView rt_view; VkDepthStencilView ds_view; uint32_t rt_size; @@ -628,6 +639,12 @@ test_prepare(struct test_context *ctx, VkDevice device, VkQueue queue, uint32_t }, &ctx->ds_state); + vkCreateDynamicColorBlendState(ctx->device, + &(VkDynamicCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO + }, + &ctx->cb_state); + vkCreateColorAttachmentView(ctx->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, @@ -752,6 +769,7 @@ test_finish(struct test_context *ctx) vkFreeMemory(ctx->device, ctx->mem); vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_VP_STATE, ctx->vp_state); vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_RS_STATE, ctx->rs_state); + vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_CB_STATE, ctx->cb_state); vkDestroyObject(ctx->device, VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW, ctx->rt_view); vkDestroyObject(ctx->device, VK_OBJECT_TYPE_FRAMEBUFFER, ctx->framebuffer); vkDestroyObject(ctx->device, VK_OBJECT_TYPE_RENDER_PASS, ctx->pass); @@ -865,10 +883,20 @@ test_create_solid_color_pipeline(struct test_context *ctx) .depthCompareOp = VK_COMPARE_OP_GREATER }; + VkPipelineCbStateCreateInfo cb_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .pNext = &ds_create_info, + .attachmentCount = 1, + .pAttachments = (VkPipelineCbAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }; + vkCreateGraphicsPipeline(ctx->device, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &ds_create_info, + .pNext = &cb_create_info, .flags = 0, .layout = VK_NULL_HANDLE }, @@ -918,6 +946,8 @@ test_depth_stencil(VkDevice device, VkQueue queue) VK_STATE_BIND_POINT_RASTER, ctx.rs_state); vkCmdBindDynamicStateObject(ctx.cmdBuffer, VK_STATE_BIND_POINT_DEPTH_STENCIL, ctx.ds_state); + vkCmdBindDynamicStateObject(ctx.cmdBuffer, + VK_STATE_BIND_POINT_COLOR_BLEND, ctx.cb_state); vkCmdDraw(ctx.cmdBuffer, 0, 3, 0, 1); vkCmdDraw(ctx.cmdBuffer, 3, 3, 1, 1); @@ -1193,6 +1223,13 @@ test_triangle(VkDevice device, VkQueue queue) }, &rs_state); + VkDynamicCbState cb_state; + vkCreateDynamicColorBlendState(device, + &(VkDynamicCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO + }, + &cb_state); + /* FIXME: Need to query memory info before binding to memory */ vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, rt, @@ -1417,6 +1454,8 @@ test_triangle(VkDevice device, VkQueue queue) VK_STATE_BIND_POINT_VIEWPORT, vp_state); vkCmdBindDynamicStateObject(cmdBuffer, VK_STATE_BIND_POINT_RASTER, rs_state); + vkCmdBindDynamicStateObject(cmdBuffer, + VK_STATE_BIND_POINT_COLOR_BLEND, cb_state); vkCmdBeginQuery(cmdBuffer, query_pool, 0 /*slot*/, 0 /* flags */); -- cgit v1.2.3 From a1d30f867dc4cfc8c779ce0b3f53c6e6d9360e24 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 26 May 2015 17:12:18 -0700 Subject: vk: Add support for dynamic and pipeline color blend state --- src/vulkan/device.c | 16 +++++--- src/vulkan/pipeline.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/vulkan/private.h | 1 + 3 files changed, 113 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index fb190025c78..3729a3c544c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2490,6 +2490,8 @@ void anv_CmdBindDynamicStateObject( cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; break; case VK_STATE_BIND_POINT_COLOR_BLEND: + cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState; + cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; break; case VK_STATE_BIND_POINT_DEPTH_STENCIL: cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState; @@ -2773,15 +2775,19 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { struct anv_state state; - if (cmd_buffer->ds_state) + if (cmd_buffer->ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 32); + else if (cmd_buffer->cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 32); + else state = anv_cmd_buffer_merge_dynamic(cmd_buffer, cmd_buffer->ds_state->state_color_calc, cmd_buffer->cb_state->state_color_calc, GEN8_COLOR_CALC_STATE_length, 32); - else - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 32); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CC_STATE_POINTERS, diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index ca11ca8b343..e33b7e6c376 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -192,12 +192,12 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, .PointWidthSource = info->programPointSize ? Vertex : State, }; - /* bool32_t rasterizerDiscardEnable; */ - + /* FINISHME: bool32_t rasterizerDiscardEnable; */ GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); struct GEN8_3DSTATE_RASTER raster = { + GEN8_3DSTATE_RASTER_header, .FrontWinding = vk_to_gen_front_face[info->frontFace], .CullMode = vk_to_gen_cullmode[info->cullMode], .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], @@ -218,6 +218,101 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, } +static void +emit_cb_state(struct anv_pipeline *pipeline, VkPipelineCbStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, + }; + + static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + }; + + static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, + }; + + uint32_t num_dwords = 1 + info->attachmentCount * 2; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN8_BLEND_STATE blend_state = { + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + }; + + uint32_t *state = pipeline->blend_state.map; + GEN8_BLEND_STATE_pack(NULL, state, &blend_state); + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineCbAttachmentState *a = &info->pAttachments[i]; + + struct GEN8_BLEND_STATE_ENTRY entry = { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .PreBlendColorClampEnable = false, + .PostBlendColorClampEnable = false, + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + }; + + GEN8_BLEND_STATE_ENTRY_pack(NULL, state + i * 2 + 1, &entry); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + static const uint32_t vk_to_gen_compare_op[] = { [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, @@ -301,6 +396,7 @@ anv_pipeline_create( VkPipelineIaStateCreateInfo *ia_info = NULL; VkPipelineRsStateCreateInfo *rs_info = NULL; VkPipelineDsStateCreateInfo *ds_info = NULL; + VkPipelineCbStateCreateInfo *cb_info = NULL; VkPipelineVertexInputCreateInfo *vi_info; VkResult result; uint32_t offset, length; @@ -344,7 +440,7 @@ anv_pipeline_create( anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO"); break; case VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO: - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO"); + cb_info = (VkPipelineCbStateCreateInfo *) common; break; case VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO: ds_info = (VkPipelineDsStateCreateInfo *) common; @@ -381,6 +477,8 @@ anv_pipeline_create( if (ds_info) emit_ds_state(pipeline, ds_info); + emit_cb_state(pipeline, cb_info); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, .ClipEnable = true, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport)); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cad3423984b..7dbf288022e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -639,6 +639,7 @@ struct anv_pipeline { uint32_t active_stages; struct anv_state_stream program_stream; + struct anv_state blend_state; uint32_t vs_simd8; uint32_t ps_simd8; uint32_t ps_simd16; -- cgit v1.2.3 From 59328bac108ff6786b939bc56cfb124da87202bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 15:29:10 -0700 Subject: vk/allocator: Add a free list that acts on pointers instead of offsets --- src/vulkan/allocator.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 17d44423342..3fc6e8e39be 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -198,6 +198,52 @@ anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) } while (old.u64 != current.u64); } +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + static int anv_block_pool_grow(struct anv_block_pool *pool); -- cgit v1.2.3 From 6f3e3c715a784613bfaebedb049e3a9dac215e04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 15:29:27 -0700 Subject: vk/allocator: Add a BO pool --- src/vulkan/allocator.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/private.h | 18 +++++++++++++ 2 files changed, 91 insertions(+) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 3fc6e8e39be..aeb08ce1cec 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -642,3 +642,76 @@ anv_state_stream_alloc(struct anv_state_stream *stream, return state; } + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, pool->bo_size, 0, false)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(new_bo.map, pool->bo_size, 0, false)); + + *bo = new_bo; + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 7dbf288022e..5b7f936cdc3 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -251,6 +251,24 @@ void anv_state_stream_finish(struct anv_state_stream *stream); struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t size, uint32_t alignment); +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + struct anv_object; struct anv_device; -- cgit v1.2.3 From 5ef81f0a05ba383188aa9e3f9afd72e2f76cf032 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 15:46:48 -0700 Subject: vk/device: Use a bo pool for batch buffers --- src/vulkan/device.c | 26 ++++++++------------------ src/vulkan/private.h | 4 +++- 2 files changed, 11 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3729a3c544c..5c2e276210d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -303,6 +303,8 @@ parse_debug_flags(struct anv_device *device) } } +static const uint32_t BATCH_SIZE = 1 << 15; + VkResult anv_CreateDevice( VkPhysicalDevice _physicalDevice, const VkDeviceCreateInfo* pCreateInfo, @@ -333,6 +335,8 @@ VkResult anv_CreateDevice( if (device->context_id == -1) goto fail_fd; + anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE); + anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); anv_state_pool_init(&device->dynamic_state_pool, @@ -381,6 +385,8 @@ VkResult anv_DestroyDevice( anv_compiler_destroy(device->compiler); + + anv_bo_pool_finish(&device->batch_bo_pool); anv_block_pool_finish(&device->dynamic_state_block_pool); anv_block_pool_finish(&device->instruction_block_pool); anv_block_pool_finish(&device->surface_state_block_pool); @@ -495,41 +501,25 @@ VkResult anv_GetDeviceQueue( return VK_SUCCESS; } -static const uint32_t BATCH_SIZE = 8192; - VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device) { VkResult result; - result = anv_bo_init_new(&batch->bo, device, BATCH_SIZE); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &batch->bo); if (result != VK_SUCCESS) return result; - batch->bo.map = - anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE); - if (batch->bo.map == NULL) { - result = vk_error(VK_ERROR_MEMORY_MAP_FAILED); - goto fail_bo; - } - batch->cmd_relocs.num_relocs = 0; batch->next = batch->bo.map; return VK_SUCCESS; - - fail_bo: - anv_gem_close(device, batch->bo.gem_handle); - - return result; - } void anv_batch_finish(struct anv_batch *batch, struct anv_device *device) { - anv_gem_munmap(batch->bo.map, BATCH_SIZE); - anv_gem_close(device, batch->bo.gem_handle); + anv_bo_pool_free(&device->batch_bo_pool, &batch->bo); } void diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 5b7f936cdc3..6e43258c903 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -320,6 +320,8 @@ struct anv_device { bool no_hw; bool dump_aub; + struct anv_bo_pool batch_bo_pool; + struct anv_block_pool dynamic_state_block_pool; struct anv_state_pool dynamic_state_pool; @@ -421,7 +423,7 @@ struct anv_address { static inline uint64_t __gen_combine_address(struct anv_batch *batch, void *location, const struct anv_address address, uint32_t delta) -{ +{ if (address.bo == NULL) { return delta; } else { -- cgit v1.2.3 From 403266be05bbe36e9376f0a747dd454bf0da0958 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 17:38:15 -0700 Subject: vk/device: Make reloc lists growable --- src/vulkan/device.c | 121 +++++++++++++++++++++++++++++++++++++++++--------- src/vulkan/pipeline.c | 2 +- src/vulkan/private.h | 13 +++--- 3 files changed, 109 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 5c2e276210d..4cbf409b216 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -501,6 +501,75 @@ VkResult anv_GetDeviceQueue( return VK_SUCCESS; } +static VkResult +anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +{ + list->num_relocs = 0; + list->array_length = 256; + list->relocs = + anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) { + anv_device_free(device, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return VK_SUCCESS; +} + +static void +anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +{ + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) { + anv_device_free(device, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); + + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device) { @@ -510,16 +579,23 @@ anv_batch_init(struct anv_batch *batch, struct anv_device *device) if (result != VK_SUCCESS) return result; - batch->cmd_relocs.num_relocs = 0; + result = anv_reloc_list_init(&batch->cmd_relocs, device); + if (result != VK_SUCCESS) { + anv_bo_pool_free(&device->batch_bo_pool, &batch->bo); + return result; + } + + batch->device = device; batch->next = batch->bo.map; return VK_SUCCESS; } void -anv_batch_finish(struct anv_batch *batch, struct anv_device *device) +anv_batch_finish(struct anv_batch *batch) { - anv_bo_pool_free(&device->batch_bo_pool, &batch->bo); + anv_bo_pool_free(&batch->device->batch_bo_pool, &batch->bo); + anv_reloc_list_finish(&batch->cmd_relocs, batch->device); } void @@ -540,31 +616,32 @@ anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) } static void -anv_reloc_list_append(struct anv_reloc_list *list, +anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, struct anv_reloc_list *other, uint32_t offset) { - uint32_t i, count; + anv_reloc_list_grow(list, device, other->num_relocs); + /* TODO: Handle failure */ - count = list->num_relocs; - memcpy(&list->relocs[count], &other->relocs[0], + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[count], &other->reloc_bos[0], + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], other->num_relocs * sizeof(other->reloc_bos[0])); - for (i = 0; i < other->num_relocs; i++) - list->relocs[i + count].offset += offset; - count += other->num_relocs; + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; } static uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, - uint32_t offset, - struct anv_bo *target_bo, uint32_t delta) +anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) { struct drm_i915_gem_relocation_entry *entry; int index; - assert(list->num_relocs < ANV_BATCH_MAX_RELOCS); + anv_reloc_list_grow(list, device, 1); + /* TODO: Handle failure */ /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ index = list->num_relocs++; @@ -589,7 +666,8 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) memcpy(batch->next, other->bo.map, size); offset = batch->next - batch->bo.map; - anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset); + anv_reloc_list_append(&batch->cmd_relocs, batch->device, + &other->cmd_relocs, offset); batch->next += size; } @@ -598,7 +676,7 @@ uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t delta) { - return anv_reloc_list_add(&batch->cmd_relocs, + return anv_reloc_list_add(&batch->cmd_relocs, batch->device, location - batch->bo.map, bo, delta); } @@ -2147,10 +2225,11 @@ anv_cmd_buffer_destroy(struct anv_device *device, anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); + anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); - anv_batch_finish(&cmd_buffer->batch, device); + anv_batch_finish(&cmd_buffer->batch); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); anv_device_free(device, cmd_buffer); @@ -2195,7 +2274,7 @@ VkResult anv_CreateCommandBuffer( /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; - cmd_buffer->surface_relocs.num_relocs = 0; + anv_reloc_list_init(&cmd_buffer->surface_relocs, device); cmd_buffer->exec2_objects = anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, @@ -2235,7 +2314,7 @@ VkResult anv_CreateCommandBuffer( fail_surface_bo: anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); fail_batch: - anv_batch_finish(&cmd_buffer->batch, device); + anv_batch_finish(&cmd_buffer->batch); fail: anv_device_free(device, cmd_buffer); @@ -2546,6 +2625,7 @@ void anv_CmdBindDescriptorSets( /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ *(uint64_t *)(state.map + 8 * 4) = anv_reloc_list_add(&cmd_buffer->surface_relocs, + cmd_buffer->device, state.offset + 8 * 4, view->bo, view->offset); @@ -3321,6 +3401,7 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ *(uint64_t *)(state.map + 8 * 4) = anv_reloc_list_add(&cmd_buffer->surface_relocs, + cmd_buffer->device, state.offset + 8 * 4, view->bo, view->offset); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index e33b7e6c376..c0606dabba7 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -378,7 +378,7 @@ anv_pipeline_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_PIPELINE); anv_compiler_free(pipeline); - anv_batch_finish(&pipeline->batch, pipeline->device); + anv_batch_finish(&pipeline->batch); anv_device_free(pipeline->device, pipeline); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 6e43258c903..a77fb28449b 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -389,23 +389,24 @@ int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); -/* TODO: Remove hardcoded reloc limit. */ -#define ANV_BATCH_MAX_RELOCS 256 - struct anv_reloc_list { size_t num_relocs; - struct drm_i915_gem_relocation_entry relocs[ANV_BATCH_MAX_RELOCS]; - struct anv_bo * reloc_bos[ANV_BATCH_MAX_RELOCS]; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; }; struct anv_batch { + struct anv_device * device; + struct anv_bo bo; void * next; + struct anv_reloc_list cmd_relocs; }; VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); -void anv_batch_finish(struct anv_batch *batch, struct anv_device *device); +void anv_batch_finish(struct anv_batch *batch); void anv_batch_reset(struct anv_batch *batch); void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); -- cgit v1.2.3 From 1c63575de8870e80462dc4053938720d9ff58f26 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 May 2015 18:27:43 -0700 Subject: vk/cmd_buffer: Allocate the surface_bo from device->batch_bo_pool --- src/vulkan/device.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 4cbf409b216..90fe8246216 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2223,8 +2223,7 @@ anv_cmd_buffer_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER); - anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); - anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); + anv_bo_pool_free(&device->batch_bo_pool, &cmd_buffer->surface_bo); anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); @@ -2261,17 +2260,10 @@ VkResult anv_CreateCommandBuffer( if (result != VK_SUCCESS) goto fail; - result = anv_bo_init_new(&cmd_buffer->surface_bo, device, BATCH_SIZE); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &cmd_buffer->surface_bo); if (result != VK_SUCCESS) goto fail_batch; - cmd_buffer->surface_bo.map = - anv_gem_mmap(device, cmd_buffer->surface_bo.gem_handle, 0, BATCH_SIZE); - if (cmd_buffer->surface_bo.map == NULL) { - result = vk_error(VK_ERROR_MEMORY_MAP_FAILED); - goto fail_surface_bo; - } - /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; anv_reloc_list_init(&cmd_buffer->surface_relocs, device); @@ -2281,7 +2273,7 @@ VkResult anv_CreateCommandBuffer( VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (cmd_buffer->exec2_objects == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_surface_map; + goto fail_surface_bo; } cmd_buffer->exec2_bos = @@ -2309,10 +2301,8 @@ VkResult anv_CreateCommandBuffer( fail_exec2_objects: anv_device_free(device, cmd_buffer->exec2_objects); - fail_surface_map: - anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); fail_surface_bo: - anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); + anv_bo_pool_free(&device->batch_bo_pool, &cmd_buffer->surface_bo); fail_batch: anv_batch_finish(&cmd_buffer->batch); fail: -- cgit v1.2.3 From 59def43fc8bb8c980748004333a9b3d6a00f734b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 May 2015 11:41:28 -0700 Subject: Fixup for growable reloc lists --- src/vulkan/device.c | 4 ++-- src/vulkan/private.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 90fe8246216..8c4bb67653d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -501,7 +501,7 @@ VkResult anv_GetDeviceQueue( return VK_SUCCESS; } -static VkResult +VkResult anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) { list->num_relocs = 0; @@ -525,7 +525,7 @@ anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) return VK_SUCCESS; } -static void +void anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) { anv_device_free(device, list->relocs); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index a77fb28449b..8289772cfe1 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -396,6 +396,11 @@ struct anv_reloc_list { struct anv_bo ** reloc_bos; }; +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + struct anv_device *device); +void anv_reloc_list_finish(struct anv_reloc_list *list, + struct anv_device *device); + struct anv_batch { struct anv_device * device; -- cgit v1.2.3 From da8f1482036d35e56250173d479f74b6514d27c7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 May 2015 11:42:55 -0700 Subject: vk: Rework anv_batch and use chaining batch buffers This mega-commit primarily does two things. First, is to turn anv_batch into a better abstraction of a batch. Instead of actually having a BO, it now has a few pointers to some piece of memory that are used to add data to the "batch". If it gets to the end, there is a function pointer that it can call to attempt to grow the batch. The second change is to start using chained batch buffers. When the end of the current batch BO is reached, it automatically creates a new one and ineserts an MI_BATCH_BUFFER_START command to chain to it. In this way, our batch buffers are effectively infinite in length. --- src/vulkan/aub.c | 31 +++++-- src/vulkan/device.c | 247 +++++++++++++++++++++++++++++++++++--------------- src/vulkan/pipeline.c | 18 ++-- src/vulkan/private.h | 35 +++++-- 4 files changed, 231 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c index bb4772a39ea..dfe52213a40 100644 --- a/src/vulkan/aub.c +++ b/src/vulkan/aub.c @@ -216,7 +216,8 @@ struct aub_bo { }; static void -relocate_bo(struct anv_bo *bo, struct anv_reloc_list *list, struct aub_bo *bos) +relocate_bo(struct anv_bo *bo, struct drm_i915_gem_relocation_entry *relocs, + size_t num_relocs, struct aub_bo *bos) { struct aub_bo *aub_bo = &bos[bo->index]; struct drm_i915_gem_relocation_entry *reloc; @@ -224,8 +225,8 @@ relocate_bo(struct anv_bo *bo, struct anv_reloc_list *list, struct aub_bo *bos) aub_bo->relocated = malloc(bo->size); memcpy(aub_bo->relocated, aub_bo->map, bo->size); - for (size_t i = 0; i < list->num_relocs; i++) { - reloc = &list->relocs[i]; + for (size_t i = 0; i < num_relocs; i++) { + reloc = &relocs[i]; assert(reloc->offset < bo->size); dw = aub_bo->relocated + reloc->offset; *dw = bos[reloc->target_handle].offset + reloc->delta; @@ -240,7 +241,7 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) struct anv_aub_writer *writer; struct anv_bo *bo; uint32_t ring_flag = 0; - uint32_t offset, length; + uint32_t offset; struct aub_bo *aub_bos; writer = get_anv_aub_writer(device); @@ -260,17 +261,29 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) offset = ALIGN_U32(offset + bo->size + 4095, 4096); } - relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos); + struct anv_batch_bo *first_bbo; + for (struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + /* Keep stashing the current BO until we get to the beginning */ + first_bbo = bbo; + + /* Handle relocations for this batch BO */ + relocate_bo(&bbo->bo, &batch->relocs.relocs[bbo->first_reloc], + bbo->num_relocs, aub_bos); + } + assert(first_bbo->prev_batch_bo == NULL); + relocate_bo(&cmd_buffer->surface_bo, - &cmd_buffer->surface_relocs, aub_bos); + cmd_buffer->surface_relocs.relocs, + cmd_buffer->surface_relocs.num_relocs, aub_bos); for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { bo = cmd_buffer->exec2_bos[i]; if (i == cmd_buffer->bo_count - 1) { - length = batch->next - batch->bo.map; + assert(bo == &first_bbo->bo); aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, aub_bos[i].relocated, - length, aub_bos[i].offset); + first_bbo->length, aub_bos[i].offset); } else { aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, aub_bos[i].relocated, @@ -283,7 +296,7 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) } /* Dump ring buffer */ - aub_build_dump_ringbuffer(writer, aub_bos[batch->bo.index].offset, + aub_build_dump_ringbuffer(writer, aub_bos[first_bbo->bo.index].offset, offset, ring_flag); free(aub_bos); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8c4bb67653d..3c9c6d3e043 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -570,47 +570,64 @@ anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, return VK_SUCCESS; } -VkResult -anv_batch_init(struct anv_batch *batch, struct anv_device *device) +static VkResult +anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) { VkResult result; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &batch->bo); - if (result != VK_SUCCESS) - return result; + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_reloc_list_init(&batch->cmd_relocs, device); + bbo->num_relocs = 0; + bbo->prev_batch_bo = NULL; + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); if (result != VK_SUCCESS) { - anv_bo_pool_free(&device->batch_bo_pool, &batch->bo); + anv_device_free(device, bbo); return result; } - batch->device = device; - batch->next = batch->bo.map; + *bbo_out = bbo; return VK_SUCCESS; } -void -anv_batch_finish(struct anv_batch *batch) +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) { - anv_bo_pool_free(&batch->device->batch_bo_pool, &batch->bo); - anv_reloc_list_finish(&batch->cmd_relocs, batch->device); + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + bbo->first_reloc = batch->relocs.num_relocs; } -void -anv_batch_reset(struct anv_batch *batch) +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc; +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) { - batch->next = batch->bo.map; - batch->cmd_relocs.num_relocs = 0; + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_device_free(device, bbo); } void * anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) { + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + void *p = batch->next; batch->next += num_dwords * 4; + assert(batch->next <= batch->end); return p; } @@ -662,12 +679,19 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) { uint32_t size, offset; - size = other->next - other->bo.map; - memcpy(batch->next, other->bo.map, size); + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + memcpy(batch->next, other->start, size); - offset = batch->next - batch->bo.map; - anv_reloc_list_append(&batch->cmd_relocs, batch->device, - &other->cmd_relocs, offset); + offset = batch->next - batch->start; + anv_reloc_list_append(&batch->relocs, batch->device, + &other->relocs, offset); batch->next += size; } @@ -676,8 +700,8 @@ uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t delta) { - return anv_reloc_list_add(&batch->cmd_relocs, batch->device, - location - batch->bo.map, bo, delta); + return anv_reloc_list_add(&batch->relocs, batch->device, + location - batch->start, bo, delta); } VkResult anv_QueueSubmit( @@ -758,7 +782,8 @@ VkResult anv_DeviceWaitIdle( state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); bo = &device->dynamic_state_pool.block_pool->bo; - batch.next = state.map; + batch.start = batch.next = state.map; + batch.end = state.map + 32; anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN8_MI_NOOP); @@ -1244,7 +1269,8 @@ VkResult anv_CreateFence( fence->bo.map = anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); - batch.next = fence->bo.map; + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN8_MI_NOOP); @@ -2228,12 +2254,46 @@ anv_cmd_buffer_destroy(struct anv_device *device, anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); - anv_batch_finish(&cmd_buffer->batch); + anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); anv_device_free(device, cmd_buffer); } +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct GEN8_MI_BATCH_BUFFER_START cmd = { + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &new_bbo->bo, 0 }, + }; + GEN8_MI_BATCH_BUFFER_START_pack(batch, batch->next, &cmd); + + batch->next += GEN8_MI_BATCH_BUFFER_START_length * 4; + + /* Pad out to a 2-dword aligned boundary with zeros */ + if ((uintptr_t)batch->next % 8 != 0) + *(uint32_t *)batch->next = 0; + + anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch); + + new_bbo->prev_batch_bo = old_bbo; + cmd_buffer->last_batch_bo = new_bbo; + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + VkResult anv_CreateCommandBuffer( VkDevice _device, const VkCmdBufferCreateInfo* pCreateInfo, @@ -2256,33 +2316,32 @@ VkResult anv_CreateCommandBuffer( memset(&cmd_buffer->default_bindings, 0, sizeof(cmd_buffer->default_bindings)); cmd_buffer->bindings = &cmd_buffer->default_bindings; - result = anv_batch_init(&cmd_buffer->batch, device); + result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); if (result != VK_SUCCESS) goto fail; + result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device); + if (result != VK_SUCCESS) + goto fail_batch_bo; + + cmd_buffer->batch.device = device; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &cmd_buffer->surface_bo); if (result != VK_SUCCESS) - goto fail_batch; + goto fail_batch_relocs; /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; anv_reloc_list_init(&cmd_buffer->surface_relocs, device); - cmd_buffer->exec2_objects = - anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer->exec2_objects == NULL) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_surface_bo; - } - - cmd_buffer->exec2_bos = - anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_bos[0]), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer->exec2_bos == NULL) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_exec2_objects; - } + cmd_buffer->exec2_objects = NULL; + cmd_buffer->exec2_bos = NULL; + cmd_buffer->exec2_array_length = 0; anv_state_stream_init(&cmd_buffer->binding_table_state_stream, &device->binding_table_block_pool); @@ -2299,12 +2358,10 @@ VkResult anv_CreateCommandBuffer( return VK_SUCCESS; - fail_exec2_objects: - anv_device_free(device, cmd_buffer->exec2_objects); - fail_surface_bo: - anv_bo_pool_free(&device->batch_bo_pool, &cmd_buffer->surface_bo); - fail_batch: - anv_batch_finish(&cmd_buffer->batch); + fail_batch_relocs: + anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); + fail_batch_bo: + anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); fail: anv_device_free(device, cmd_buffer); @@ -2376,16 +2433,51 @@ VkResult anv_BeginCommandBuffer( return VK_SUCCESS; } -static void +static VkResult anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, struct anv_reloc_list *list) { struct drm_i915_gem_exec_object2 *obj; - bo->index = cmd_buffer->bo_count; + if (bo->index < cmd_buffer->bo_count && + cmd_buffer->exec2_bos[bo->index] == bo) + return VK_SUCCESS; + + if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) { + uint32_t new_len = cmd_buffer->exec2_objects ? + cmd_buffer->exec2_array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) { + anv_device_free(cmd_buffer->device, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->exec2_objects) { + memcpy(new_objects, cmd_buffer->exec2_objects, + cmd_buffer->bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->exec2_bos, + cmd_buffer->bo_count * sizeof(*new_bos)); + } + + cmd_buffer->exec2_objects = new_objects; + cmd_buffer->exec2_bos = new_bos; + cmd_buffer->exec2_array_length = new_len; + } + + assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length); + + bo->index = cmd_buffer->bo_count++; obj = &cmd_buffer->exec2_objects[bo->index]; cmd_buffer->exec2_bos[bo->index] = bo; - cmd_buffer->bo_count++; obj->handle = bo->gem_handle; obj->relocation_count = 0; @@ -2400,27 +2492,16 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, obj->relocation_count = list->num_relocs; obj->relocs_ptr = (uintptr_t) list->relocs; } + + return VK_SUCCESS; } static void anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, struct anv_reloc_list *list) { - struct anv_bo *bo, *batch_bo; - - batch_bo = &cmd_buffer->batch.bo; - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - /* Skip any relocations targeting the batch bo. We need to make sure - * it's the last in the list so we'll add it manually later. - */ - if (bo == batch_bo) - continue; - if (bo->index < cmd_buffer->bo_count && cmd_buffer->exec2_bos[bo->index] == bo) - continue; - - anv_cmd_buffer_add_bo(cmd_buffer, bo, NULL); - } + for (size_t i = 0; i < list->num_relocs; i++) + anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL); } static void @@ -2456,12 +2537,19 @@ VkResult anv_EndCommandBuffer( anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); /* Round batch up to an even number of dwords. */ - if ((batch->next - batch->bo.map) & 4) + if ((batch->next - batch->start) & 4) anv_batch_emit(batch, GEN8_MI_NOOP); + anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); + cmd_buffer->bo_count = 0; cmd_buffer->need_reloc = false; + /* Find the first batch bo in the list */ + struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo; + while (batch_bo->prev_batch_bo) + batch_bo = batch_bo->prev_batch_bo; + /* Lock for access to bo->index. */ pthread_mutex_lock(&device->mutex); @@ -2470,15 +2558,15 @@ VkResult anv_EndCommandBuffer( &cmd_buffer->surface_relocs); anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); - anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs); - anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, &batch->relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count; cmd_buffer->execbuf.batch_start_offset = 0; - cmd_buffer->execbuf.batch_len = batch->next - batch->bo.map; + cmd_buffer->execbuf.batch_len = batch->next - batch->start; cmd_buffer->execbuf.cliprects_ptr = 0; cmd_buffer->execbuf.num_cliprects = 0; cmd_buffer->execbuf.DR1 = 0; @@ -2501,7 +2589,18 @@ VkResult anv_ResetCommandBuffer( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - anv_batch_reset(&cmd_buffer->batch); + /* Delete all but the first batch bo */ + while (cmd_buffer->last_batch_bo->prev_batch_bo) { + struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo; + anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device); + cmd_buffer->last_batch_bo = prev; + } + assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL); + + cmd_buffer->batch.relocs.num_relocs = 0; + anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + cmd_buffer->surface_next = 0; cmd_buffer->surface_relocs.num_relocs = 0; diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index c0606dabba7..e8441e6efd5 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -378,7 +378,7 @@ anv_pipeline_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_PIPELINE); anv_compiler_free(pipeline); - anv_batch_finish(&pipeline->batch); + anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); anv_device_free(pipeline->device, pipeline); } @@ -412,9 +412,14 @@ anv_pipeline_create( pipeline->device = device; pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - result = anv_batch_init(&pipeline->batch, device); - if (result != VK_SUCCESS) - goto fail; + + result = anv_reloc_list_init(&pipeline->batch.relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); @@ -672,11 +677,6 @@ anv_pipeline_create( *pPipeline = (VkPipeline) pipeline; return VK_SUCCESS; - - fail: - anv_device_free(device, pipeline); - - return result; } VkResult anv_CreateGraphicsPipelineDerivative( diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 8289772cfe1..9a56c1949b8 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -401,18 +401,35 @@ VkResult anv_reloc_list_init(struct anv_reloc_list *list, void anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device); +struct anv_batch_bo { + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + /* These offsets reference the per-batch reloc list */ + size_t first_reloc; + size_t num_relocs; + + struct anv_batch_bo * prev_batch_bo; +}; + struct anv_batch { struct anv_device * device; - struct anv_bo bo; + void * start; + void * end; void * next; - struct anv_reloc_list cmd_relocs; + struct anv_reloc_list relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; }; -VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); -void anv_batch_finish(struct anv_batch *batch); -void anv_batch_reset(struct anv_batch *batch); void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); uint64_t anv_batch_emit_reloc(struct anv_batch *batch, @@ -433,13 +450,12 @@ __gen_combine_address(struct anv_batch *batch, void *location, if (address.bo == NULL) { return delta; } else { - assert(batch->bo.map <= location && - (char *) location < (char *) batch->bo.map + batch->bo.size); + assert(batch->start <= location && location < batch->end); return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); } } - + #include "gen7_pack.h" #include "gen75_pack.h" #undef GEN8_3DSTATE_MULTISAMPLE @@ -597,11 +613,13 @@ struct anv_cmd_buffer { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 * exec2_objects; struct anv_bo ** exec2_bos; + uint32_t exec2_array_length; bool need_reloc; uint32_t serial; uint32_t bo_count; struct anv_batch batch; + struct anv_batch_bo * last_batch_bo; struct anv_bo surface_bo; uint32_t surface_next; struct anv_reloc_list surface_relocs; @@ -642,6 +660,7 @@ struct anv_pipeline { struct anv_object base; struct anv_device * device; struct anv_batch batch; + uint32_t batch_data[256]; struct anv_shader * shaders[VK_NUM_SHADER_STAGE]; struct anv_pipeline_layout * layout; bool use_repclear; -- cgit v1.2.3 From b23885857feec96ef4d57f34b0fcc06f58e7c49b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 May 2015 12:06:30 -0700 Subject: vk/meta: Actually create the CB state for blits --- src/vulkan/meta.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index f8b66b0b2fe..83e37c43c7b 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -482,6 +482,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, }, &device->blit_state.rs_state); + + anv_CreateDynamicColorBlendState((VkDevice) device, + &(VkDynamicCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO + }, + &device->blit_state.cb_state); } static void -- cgit v1.2.3 From df4b02f4eda57842e790d5cade521ccdd0fd89e2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:02:50 -0700 Subject: vk/glsl_scraper: Fix code style for imports Python style is one module imported per line, and imports are at the top of the file. --- src/vulkan/glsl_scraper.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index ca5bad4a8a1..d476b3c0947 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -1,5 +1,14 @@ #! /usr/bin/env python +import cStringIO +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile + def print_usage(err): print """\ glsl_scraper.py [options] file @@ -16,8 +25,6 @@ Options: --with-glslang=PATH Full path to the glslangValidator program""" exit(err) -import os, sys, re, cStringIO, tempfile, subprocess, struct, shutil - class Shader: def __init__(self, stage): self.stream = cStringIO.StringIO() -- cgit v1.2.3 From fd8b5e0df2b07a1404cd4d94429663871024ff12 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:09:26 -0700 Subject: vk/glsl_scraper: Indent large text blocks Indent them to the same level as if the text was code. No changes in entrypoints.{c,h} after a clean build. --- src/vulkan/glsl_scraper.py | 81 +++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index d476b3c0947..c2f0495d4bc 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -8,21 +8,22 @@ import struct import subprocess import sys import tempfile +from textwrap import dedent def print_usage(err): - print """\ -glsl_scraper.py [options] file + print(dedent("""\ + glsl_scraper.py [options] file -This program scrapes a C file for any instance of the GLSL_VK_SHADER macro, -grabs the GLSL source code, compiles it to SPIR-V. The resulting SPIR-V -code is written to another C file as an array of 32-bit words. + This program scrapes a C file for any instance of the GLSL_VK_SHADER macro, + grabs the GLSL source code, compiles it to SPIR-V. The resulting SPIR-V + code is written to another C file as an array of 32-bit words. -If '-' is passed as the input file or output file, stdin or stdout will be -used instead of a file on disc. + If '-' is passed as the input file or output file, stdin or stdout will be + used instead of a file on disc. -Options: - -o outfile Output to the given file (default: stdout) - --with-glslang=PATH Full path to the glslangValidator program""" + Options: + -o outfile Output to the given file (default: stdout) + --with-glslang=PATH Full path to the glslangValidator program""")) exit(err) class Shader: @@ -245,36 +246,36 @@ if not glsl_only: shutil.rmtree(tmpdir) with open_file(outfname, 'w') as outfile: - outfile.write("""\ -/* =========================== DO NOT EDIT! =========================== - * This file is autogenerated by glsl_scraper.py. - */ - -#include - -#define _ANV_SPIRV_MAGIC "\\x03\\x02\\x23\\x07\\0\\0\\0\\0" - -#define _ANV_SPIRV_VERTEX _ANV_SPIRV_MAGIC "\\0\\0\\0\\0" -#define _ANV_SPIRV_TESS_CONTROL _ANV_SPIRV_MAGIC "\\1\\0\\0\\0" -#define _ANV_SPIRV_TESS_EVALUATION _ANV_SPIRV_MAGIC "\\2\\0\\0\\0" -#define _ANV_SPIRV_GEOMETRY _ANV_SPIRV_MAGIC "\\3\\0\\0\\0" -#define _ANV_SPIRV_FRAGMENT _ANV_SPIRV_MAGIC "\\4\\0\\0\\0" -#define _ANV_SPIRV_COMPUTE _ANV_SPIRV_MAGIC "\\5\\0\\0\\0" - -#define _ANV_GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src -#define _ANV_GLSL_SRC_VAR(_line) _ANV_GLSL_SRC_VAR2(_line) - -#define GLSL_VK_SHADER(device, stage, ...) ({ \\ - VkShader __shader; \\ - VkShaderCreateInfo __shader_create_info = { \\ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \\ - .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ - .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ - }; \\ - vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \\ - __shader; \\ -}) -""") + outfile.write(dedent("""\ + /* =========================== DO NOT EDIT! =========================== + * This file is autogenerated by glsl_scraper.py. + */ + + #include + + #define _ANV_SPIRV_MAGIC "\\x03\\x02\\x23\\x07\\0\\0\\0\\0" + + #define _ANV_SPIRV_VERTEX _ANV_SPIRV_MAGIC "\\0\\0\\0\\0" + #define _ANV_SPIRV_TESS_CONTROL _ANV_SPIRV_MAGIC "\\1\\0\\0\\0" + #define _ANV_SPIRV_TESS_EVALUATION _ANV_SPIRV_MAGIC "\\2\\0\\0\\0" + #define _ANV_SPIRV_GEOMETRY _ANV_SPIRV_MAGIC "\\3\\0\\0\\0" + #define _ANV_SPIRV_FRAGMENT _ANV_SPIRV_MAGIC "\\4\\0\\0\\0" + #define _ANV_SPIRV_COMPUTE _ANV_SPIRV_MAGIC "\\5\\0\\0\\0" + + #define _ANV_GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src + #define _ANV_GLSL_SRC_VAR(_line) _ANV_GLSL_SRC_VAR2(_line) + + #define GLSL_VK_SHADER(device, stage, ...) ({ \\ + VkShader __shader; \\ + VkShaderCreateInfo __shader_create_info = { \\ + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \\ + .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ + .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ + }; \\ + vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \\ + __shader; \\ + }) + """)) for shader in parser.shaders: shader.dump_c_code(outfile, glsl_only) -- cgit v1.2.3 From 4514e63893bf3dada0fb88cdb127ae6fcc1c9bfd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:14:26 -0700 Subject: vk/glsl: Reject invalid options The script incorrectly interpreted --blah as the input filename. --- src/vulkan/glsl_scraper.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index c2f0495d4bc..c213f702b18 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -26,6 +26,11 @@ def print_usage(err): --with-glslang=PATH Full path to the glslangValidator program""")) exit(err) +def usage_error(msg): + print('usage error: {}'.format(msg)) + print('') + print_usage(1) + class Shader: def __init__(self, stage): self.stream = cStringIO.StringIO() @@ -217,6 +222,8 @@ while arg_idx < len(sys.argv): glslang = sys.argv[arg_idx][len('--with-glslang='):] elif sys.argv[arg_idx] == '--glsl-only': glsl_only = True; + elif sys.argv[arg_idx].startswith('-'): + usage_error('unknown option {!r}'.format(sys.argv[arg_idx])) else: infname = sys.argv[arg_idx] break -- cgit v1.2.3 From c0739043b3ec0df99a64c49af0aa109a7af45c25 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:15:47 -0700 Subject: vk/image: Remove trailing whitespace --- src/vulkan/image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 5404dd29ea7..43556409ad1 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -81,7 +81,7 @@ VkResult anv_image_create( default: break; } - + if (extra) image->tile_mode = extra->tile_mode; -- cgit v1.2.3 From fab9011c4462d1665c088c5509a82181ce653b7e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:21:04 -0700 Subject: vk/image: Assert that VkImageTiling is valid --- src/vulkan/image.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 43556409ad1..42bb28f9823 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -79,6 +79,7 @@ VkResult anv_image_create( image->tile_mode = YMAJOR; break; default: + assert(!"bad VKImageTiling"); break; } -- cgit v1.2.3 From 466f61e9f677e9158d4a440f5bcace0200050640 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:52:29 -0700 Subject: vk/glsl_scraper: Replace adhoc arg parsing with argparse --- src/vulkan/glsl_scraper.py | 74 ++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index c213f702b18..f72aaf8299c 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -1,5 +1,6 @@ #! /usr/bin/env python +import argparse import cStringIO import os import re @@ -10,27 +11,6 @@ import sys import tempfile from textwrap import dedent -def print_usage(err): - print(dedent("""\ - glsl_scraper.py [options] file - - This program scrapes a C file for any instance of the GLSL_VK_SHADER macro, - grabs the GLSL source code, compiles it to SPIR-V. The resulting SPIR-V - code is written to another C file as an array of 32-bit words. - - If '-' is passed as the input file or output file, stdin or stdout will be - used instead of a file on disc. - - Options: - -o outfile Output to the given file (default: stdout) - --with-glslang=PATH Full path to the glslangValidator program""")) - exit(err) - -def usage_error(msg): - print('usage error: {}'.format(msg)) - print('') - print_usage(1) - class Shader: def __init__(self, stage): self.stream = cStringIO.StringIO() @@ -206,31 +186,35 @@ def open_file(name, mode): else: return open(name, mode) -infname = None -outfname = '-' -glslang = 'glslangValidator' -glsl_only = False - -arg_idx = 1 -while arg_idx < len(sys.argv): - if sys.argv[arg_idx] == '-h': - print_usage(0) - elif sys.argv[arg_idx] == '-o': - arg_idx += 1 - outfname = sys.argv[arg_idx] - elif sys.argv[arg_idx].startswith('--with-glslang='): - glslang = sys.argv[arg_idx][len('--with-glslang='):] - elif sys.argv[arg_idx] == '--glsl-only': - glsl_only = True; - elif sys.argv[arg_idx].startswith('-'): - usage_error('unknown option {!r}'.format(sys.argv[arg_idx])) - else: - infname = sys.argv[arg_idx] - break - arg_idx += 1 +def parse_args(): + description = dedent("""\ + This program scrapes a C file for any instance of the GLSL_VK_SHADER + macro, grabs the GLSL source code, compiles it to SPIR-V. The resulting + SPIR-V code is written to another C file as an array of 32-bit words. -if arg_idx < len(sys.argv) - 1 or not infname or not outfname: - print_usage(1) + If '-' is passed as the input file or output file, stdin or stdout will be + used instead of a file on disc.""") + + p = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-o', '--outfile', default='-', + help='Output to the given file (default: stdout).') + p.add_argument('--with-glslang', metavar='PATH', + default='glslangValidator', + dest='glslang', + help='Full path to the glslangValidator program.') + p.add_argument('--glsl-only', action='store_true') + p.add_argument('infile', metavar='INFILE') + + return p.parse_args() + + +args = parse_args() +infname = args.infile +outfname = args.outfile +glslang = args.glslang +glsl_only = args.glsl_only with open_file(infname, 'r') as infile: parser = Parser(infile) -- cgit v1.2.3 From ca385dcf2a11297d7efe2d10208c42fb9ec7b2bf Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:57:31 -0700 Subject: vk: gitignore generated source files --- src/vulkan/.gitignore | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 441005687f8..ef1acb7cbe2 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -1 +1,4 @@ /vk + +# Generated source files +/*-spirv.h -- cgit v1.2.3 From f559fe9134951b5e0fdcda50a3692a436b22b216 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 16:59:53 -0700 Subject: .gitignore: Scope Vulkan's generated source files Don't ignore any file named entrypoints.{c,h}. Ignore it only if it's in src/vulkan. --- .gitignore | 2 -- src/vulkan/.gitignore | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/.gitignore b/.gitignore index e395fd18803..21aa35cd36d 100644 --- a/.gitignore +++ b/.gitignore @@ -46,5 +46,3 @@ manifest.txt Makefile Makefile.in .install-mesa-links -entrypoints.c -entrypoints.h diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index ef1acb7cbe2..772daa8bf78 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -2,3 +2,5 @@ # Generated source files /*-spirv.h +/entrypoints.c +/entrypoints.h -- cgit v1.2.3 From 1435bf4bc44325d7b7caa392b60877f2b4610691 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 May 2015 17:01:09 -0700 Subject: .gitignore: Ignore spirv2nir binary --- src/glsl/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore index dda423f83db..e80f8af6bfc 100644 --- a/src/glsl/.gitignore +++ b/src/glsl/.gitignore @@ -4,6 +4,7 @@ glsl_parser.cpp glsl_parser.h glsl_parser.output glsl_test +spirv2nir subtest-cr/ subtest-lf/ subtest-cr-lf/ -- cgit v1.2.3 From 5a317ef4cba1fbd04fe2ec446a3556de2f384e63 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 27 May 2015 21:45:23 -0700 Subject: vk: Initialize dynamic state binding points to NULL We rely on these being initialized to NULL so meta can reliably detect whether or not they've been set. ds_state is also allowed to not be present so we need a well-defined value for that. --- src/vulkan/device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3c9c6d3e043..e2a2eebff2c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2353,6 +2353,9 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->dirty = 0; cmd_buffer->vb_dirty = 0; cmd_buffer->pipeline = NULL; + cmd_buffer->vp_state = NULL; + cmd_buffer->rs_state = NULL; + cmd_buffer->ds_state = NULL; *pCmdBuffer = (VkCmdBuffer) cmd_buffer; -- cgit v1.2.3 From 6eefeb1f8430bfa3f6f141dd563f2a4f82caede8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 10:00:38 -0700 Subject: vk/meta: Share the dummy RS and CB state between clear and blit --- src/vulkan/meta.c | 62 +++++++++++++++++++++------------------------------- src/vulkan/private.h | 26 ++++++++++++---------- 2 files changed, 39 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 83e37c43c7b..4ce917124f9 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -145,22 +145,9 @@ anv_device_init_meta_clear_state(struct anv_device *device) .disable_viewport = true, .use_rectlist = true }, - &device->clear_state.pipeline); + &device->meta_state.clear.pipeline); anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); - - anv_CreateDynamicRasterState((VkDevice) device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &device->clear_state.rs_state); - - anv_CreateDynamicColorBlendState((VkDevice) device, - &(VkDynamicCbStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO - }, - &device->clear_state.cb_state); - } #define NUM_VB_USED 2 @@ -287,15 +274,16 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, sizeof(vertex_data) }); - if ((VkPipeline) cmd_buffer->pipeline != device->clear_state.pipeline) + if ((VkPipeline) cmd_buffer->pipeline != device->meta_state.clear.pipeline) anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, device->clear_state.pipeline); + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.clear.pipeline); /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, VK_STATE_BIND_POINT_RASTER, - device->clear_state.rs_state); + device->meta_state.shared.rs_state); if (cmd_buffer->vp_state == NULL) anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, @@ -425,12 +413,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) } }; anv_CreateDescriptorSetLayout((VkDevice) device, &ds_layout_info, - &device->blit_state.ds_layout); + &device->meta_state.blit.ds_layout); VkPipelineLayoutCreateInfo pipeline_layout_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .descriptorSetCount = 1, - .pSetLayouts = &device->blit_state.ds_layout, + .pSetLayouts = &device->meta_state.blit.ds_layout, }; VkPipelineLayout pipeline_layout; @@ -472,22 +460,10 @@ anv_device_init_meta_blit_state(struct anv_device *device) .disable_vs = true, .use_rectlist = true }, - &device->blit_state.pipeline); + &device->meta_state.blit.pipeline); anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, vs); anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); - - anv_CreateDynamicRasterState((VkDevice) device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &device->blit_state.rs_state); - - anv_CreateDynamicColorBlendState((VkDevice) device, - &(VkDynamicCbStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO - }, - &device->blit_state.cb_state); } static void @@ -498,21 +474,21 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, saved_state); - if ((VkPipeline) cmd_buffer->pipeline != device->blit_state.pipeline) + if ((VkPipeline) cmd_buffer->pipeline != device->meta_state.blit.pipeline) anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->blit_state.pipeline); + device->meta_state.blit.pipeline); /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, VK_STATE_BIND_POINT_RASTER, - device->blit_state.rs_state); + device->meta_state.shared.rs_state); saved_state->cb_state = (VkDynamicCbState) cmd_buffer->cb_state; anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, VK_STATE_BIND_POINT_COLOR_BLEND, - device->blit_state.cb_state); + device->meta_state.shared.cb_state); } struct blit_region { @@ -599,7 +575,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkDescriptorSet set; anv_AllocDescriptorSets((VkDevice) device, 0 /* pool */, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - 1, &device->blit_state.ds_layout, &set, &count); + 1, &device->meta_state.blit.ds_layout, &set, &count); anv_UpdateDescriptors((VkDevice) device, set, 1, (const void * []) { &(VkUpdateImages) { @@ -1238,4 +1214,16 @@ anv_device_init_meta(struct anv_device *device) { anv_device_init_meta_clear_state(device); anv_device_init_meta_blit_state(device); + + anv_CreateDynamicRasterState((VkDevice) device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &device->meta_state.shared.rs_state); + + anv_CreateDynamicColorBlendState((VkDevice) device, + &(VkDynamicCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO + }, + &device->meta_state.shared.cb_state); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 9a56c1949b8..f2835ba660e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -298,17 +298,20 @@ struct anv_instance { struct anv_physical_device physicalDevice; }; -struct anv_clear_state { - VkPipeline pipeline; - VkDynamicRsState rs_state; - VkDynamicCbState cb_state; -}; +struct anv_meta_state { + struct { + VkPipeline pipeline; + } clear; -struct anv_blit_state { - VkPipeline pipeline; - VkDynamicRsState rs_state; - VkDescriptorSetLayout ds_layout; - VkDynamicCbState cb_state; + struct { + VkPipeline pipeline; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + VkDynamicRsState rs_state; + VkDynamicCbState cb_state; + } shared; }; struct anv_device { @@ -330,8 +333,7 @@ struct anv_device { struct anv_block_pool binding_table_block_pool; struct anv_state_pool surface_state_pool; - struct anv_clear_state clear_state; - struct anv_blit_state blit_state; + struct anv_meta_state meta_state; struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; -- cgit v1.2.3 From de221a672d9585510b7525610c1dce0c6d069e92 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 10:06:45 -0700 Subject: meta: Add a default ds_state and use it when no ds state is set --- src/vulkan/meta.c | 15 +++++++++++++++ src/vulkan/private.h | 1 + 2 files changed, 16 insertions(+) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 4ce917124f9..8e9529595a2 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -290,6 +290,11 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, VK_STATE_BIND_POINT_VIEWPORT, cmd_buffer->framebuffer->vp_state); + if (cmd_buffer->ds_state == NULL) + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_DEPTH_STENCIL, + device->meta_state.shared.ds_state); + anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, pass->num_clear_layers); /* Restore API state */ @@ -484,6 +489,10 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, VK_STATE_BIND_POINT_RASTER, device->meta_state.shared.rs_state); + if (cmd_buffer->ds_state == NULL) + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_DEPTH_STENCIL, + device->meta_state.shared.ds_state); saved_state->cb_state = (VkDynamicCbState) cmd_buffer->cb_state; anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, @@ -1226,4 +1235,10 @@ anv_device_init_meta(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO }, &device->meta_state.shared.cb_state); + + anv_CreateDynamicDepthStencilState((VkDevice) device, + &(VkDynamicDsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO + }, + &device->meta_state.shared.ds_state); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f2835ba660e..96b72596a1f 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -311,6 +311,7 @@ struct anv_meta_state { struct { VkDynamicRsState rs_state; VkDynamicCbState cb_state; + VkDynamicDsState ds_state; } shared; }; -- cgit v1.2.3 From 730ca0efb17feecd7179a81a9ea53905350ad405 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 10:20:18 -0700 Subject: vk/device: Fixups for batch buffer chaining Some how these didn't get merged with the other batch buffer chaining stuff. Oh well, it's here now. --- src/vulkan/device.c | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e2a2eebff2c..a5e4d9ffe6c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -303,7 +303,7 @@ parse_debug_flags(struct anv_device *device) } } -static const uint32_t BATCH_SIZE = 1 << 15; +static const uint32_t BATCH_SIZE = 8192; VkResult anv_CreateDevice( VkPhysicalDevice _physicalDevice, @@ -2264,6 +2264,7 @@ static VkResult anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) { struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo; VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); @@ -2281,8 +2282,10 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) batch->next += GEN8_MI_BATCH_BUFFER_START_length * 4; /* Pad out to a 2-dword aligned boundary with zeros */ - if ((uintptr_t)batch->next % 8 != 0) + if ((uintptr_t)batch->next % 8 != 0) { *(uint32_t *)batch->next = 0; + batch->next += 4; + } anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch); @@ -2438,7 +2441,9 @@ VkResult anv_BeginCommandBuffer( static VkResult anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, struct anv_reloc_list *list) + struct anv_bo *bo, + struct drm_i915_gem_relocation_entry *relocs, + size_t num_relocs) { struct drm_i915_gem_exec_object2 *obj; @@ -2491,9 +2496,9 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, obj->rsvd1 = 0; obj->rsvd2 = 0; - if (list) { - obj->relocation_count = list->num_relocs; - obj->relocs_ptr = (uintptr_t) list->relocs; + if (relocs) { + obj->relocation_count = num_relocs; + obj->relocs_ptr = (uintptr_t) relocs; } return VK_SUCCESS; @@ -2504,7 +2509,7 @@ anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, struct anv_reloc_list *list) { for (size_t i = 0; i < list->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL); + anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0); } static void @@ -2548,21 +2553,36 @@ VkResult anv_EndCommandBuffer( cmd_buffer->bo_count = 0; cmd_buffer->need_reloc = false; - /* Find the first batch bo in the list */ - struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo; - while (batch_bo->prev_batch_bo) - batch_bo = batch_bo->prev_batch_bo; - /* Lock for access to bo->index. */ pthread_mutex_lock(&device->mutex); /* Add block pool bos first so we can add them with their relocs. */ anv_cmd_buffer_add_bo(cmd_buffer, &cmd_buffer->surface_bo, - &cmd_buffer->surface_relocs); + cmd_buffer->surface_relocs.relocs, + cmd_buffer->surface_relocs.num_relocs); + /* Add all of the BOs referenced by surface state */ anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); + + /* Add all but the first batch BO */ + struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo; + while (batch_bo->prev_batch_bo) { + anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, + &batch->relocs.relocs[batch_bo->first_reloc], + batch_bo->num_relocs); + batch_bo = batch_bo->prev_batch_bo; + } + + /* Add everything referenced by the batches */ anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs); - anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, &batch->relocs); + + /* Add the first batch bo last */ + assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0); + anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, + &batch->relocs.relocs[batch_bo->first_reloc], + batch_bo->num_relocs); + assert(batch_bo->bo.index == cmd_buffer->bo_count - 1); + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); -- cgit v1.2.3 From 8cf932fd254b3c35e93b2898fdda95f611ea9f7a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 10:27:50 -0700 Subject: vk/query: Don't emit a CS stall by itself Both the bspec and the simulator don't like this. I'm not sure if stalling at the scoreboard is right but it at least shuts up the simulator. --- src/vulkan/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a5e4d9ffe6c..9bd3f60bfde 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3317,7 +3317,8 @@ void anv_CmdCopyQueryPoolResults( /* FIXME: If we're not waiting, should we just do this on the CPU? */ if (flags & VK_QUERY_RESULT_WAIT_BIT) anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true); + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); dst_offset = buffer->offset + destOffset; for (uint32_t i = 0; i < queryCount; i++) { -- cgit v1.2.3 From 2dc0f7fe5b447f9b0cb5a9e8bb7a867cfe6951e9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 13:08:21 -0700 Subject: vk/device: Actually destroy batch buffers --- src/vulkan/device.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9bd3f60bfde..fa913677403 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2249,6 +2249,14 @@ anv_cmd_buffer_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER); + /* Destroy all of the batch buffers */ + struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; + while (bbo->prev_batch_bo) { + struct anv_batch_bo *prev = bbo->prev_batch_bo; + anv_batch_bo_destroy(bbo, cmd_buffer->device); + bbo = prev; + } + anv_bo_pool_free(&device->batch_bo_pool, &cmd_buffer->surface_bo); anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); anv_state_stream_finish(&cmd_buffer->surface_state_stream); -- cgit v1.2.3 From 468c89a3517a197f419335bfc8c0cf8614c1772c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 15:25:02 -0700 Subject: vk/device: Use anv_batch_emit for MI_BATCH_BUFFER_START --- src/vulkan/device.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index fa913677403..0a6f0c6e6b9 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2279,15 +2279,19 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) if (result != VK_SUCCESS) return result; - struct GEN8_MI_BATCH_BUFFER_START cmd = { + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == old_bbo->bo.map + old_bbo->bo.size); + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, GEN8_MI_BATCH_BUFFER_START_header, ._2ndLevelBatchBuffer = _1stlevelbatch, .AddressSpaceIndicator = ASI_PPGTT, .BatchBufferStartAddress = { &new_bbo->bo, 0 }, - }; - GEN8_MI_BATCH_BUFFER_START_pack(batch, batch->next, &cmd); - - batch->next += GEN8_MI_BATCH_BUFFER_START_length * 4; + ); /* Pad out to a 2-dword aligned boundary with zeros */ if ((uintptr_t)batch->next % 8 != 0) { -- cgit v1.2.3 From 9ffc1bed154807316d12fba7970261918fc22538 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 May 2015 15:34:08 -0700 Subject: vk/device: Split state base address emit into its own function --- src/vulkan/device.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0a6f0c6e6b9..ed2b8d14b7f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2386,17 +2386,11 @@ VkResult anv_CreateCommandBuffer( return result; } -VkResult anv_BeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo) +static void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_device *device = cmd_buffer->device; - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = _3D); - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP); - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, .GeneralStateBaseAddress = { NULL, 0 }, .GeneralStateMemoryObjectControlState = GEN8_MOCS, @@ -2419,12 +2413,25 @@ VkResult anv_BeginCommandBuffer( .IndirectObjectBaseAddressModifyEnable = true, .IndirectObjectBufferSize = 0xfffff, .IndirectObjectBufferSizeModifyEnable = true, - + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, .InstructionMemoryObjectControlState = GEN8_MOCS, .InstructionBaseAddressModifyEnable = true, .InstructionBufferSize = 0xfffff, .InstructionBuffersizeModifyEnable = true); +} + +VkResult anv_BeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP); + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF_STATISTICS, .StatisticsEnable = true); -- cgit v1.2.3 From fad418ff47894a1d579b28346d605d1d57de9b74 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 27 May 2015 14:05:50 -0700 Subject: vk: Implement dynamic buffer offsets We do this by creating a surface state on the fly that incorporates the dynamic offset. This patch also refactor the descriptor set layout constructor a bit to be less clever with switch statement fall through. Instead of duplicating the subtle code to update the sampler and surface slot map, we just use two switch statements. --- src/vulkan/device.c | 151 +++++++++++++++++++++++++++++++-------------------- src/vulkan/private.h | 12 +++- 2 files changed, 101 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index ed2b8d14b7f..3f3319bb50a 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1565,37 +1565,21 @@ VkResult anv_CreateBuffer( // Buffer view functions -VkResult anv_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) +static void +fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer; - struct anv_surface_view *view; - const struct anv_format *format; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - view->bo = buffer->bo; - view->offset = buffer->offset + pCreateInfo->offset; - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - view->format = pCreateInfo->format; + const struct anv_format *info; - format = anv_format_for_vk_format(pCreateInfo->format); + info = anv_format_for_vk_format(format); /* This assumes RGBA float format. */ uint32_t stride = 4; - uint32_t num_elements = pCreateInfo->range / stride; + uint32_t num_elements = range / stride; + struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_BUFFER, .SurfaceArray = false, - .SurfaceFormat = format->format, + .SurfaceFormat = info->format, .SurfaceVerticalAlignment = VALIGN4, .SurfaceHorizontalAlignment = HALIGN4, .TileMode = LINEAR, @@ -1627,10 +1611,37 @@ VkResult anv_CreateBufferView( .ShaderChannelSelectAlpha = SCS_ALPHA, .ResourceMinLOD = 0, /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, view->offset }, + .SurfaceBaseAddress = { NULL, offset }, }; - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult anv_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer; + struct anv_surface_view *view; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->format = pCreateInfo->format; + view->range = pCreateInfo->range; + + fill_buffer_surface_state(view->surface_state.map, + pCreateInfo->format, view->offset, pCreateInfo->range); *pView = (VkImageView) view; @@ -1748,16 +1759,16 @@ VkResult anv_CreateDescriptorSetLayout( for (uint32_t i = 0; i < pCreateInfo->count; i++) { switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) sampler_count[s] += pCreateInfo->pBinding[i].count; break; + default: + break; + } + switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - sampler_count[s] += pCreateInfo->pBinding[i].count; - - /* fall through */ - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -1773,18 +1784,16 @@ VkResult anv_CreateDescriptorSetLayout( break; } - count += pCreateInfo->pBinding[i].count; - } - - for (uint32_t i = 0; i < pCreateInfo->count; i++) { switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - num_dynamic_buffers++; + num_dynamic_buffers += pCreateInfo->pBinding[i].count; break; default: break; } + + count += pCreateInfo->pBinding[i].count; } uint32_t sampler_total = 0; @@ -1795,7 +1804,7 @@ VkResult anv_CreateDescriptorSetLayout( } size_t size = sizeof(*set_layout) + - (sampler_total + surface_total) * sizeof(uint32_t); + (sampler_total + surface_total) * sizeof(set_layout->entries[0]); set_layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!set_layout) @@ -1804,9 +1813,9 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->num_dynamic_buffers = num_dynamic_buffers; set_layout->count = count; - uint32_t *p = set_layout->entries; - uint32_t *sampler[VK_NUM_SHADER_STAGE]; - uint32_t *surface[VK_NUM_SHADER_STAGE]; + struct anv_descriptor_slot *p = set_layout->entries; + struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE]; + struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE]; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { set_layout->stage[s].surface_count = surface_count[s]; set_layout->stage[s].surface_start = surface[s] = p; @@ -1817,21 +1826,34 @@ VkResult anv_CreateDescriptorSetLayout( } uint32_t descriptor = 0; + bool dynamic; for (uint32_t i = 0; i < pCreateInfo->count; i++) { switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) - *(sampler[s])++ = descriptor + j; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) - *(sampler[s])++ = descriptor + j; + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { + sampler[s]->index = descriptor + j; + sampler[s]->dynamic = false; + sampler[s]++; + } + break; + default: + break; + } - /* fallthrough */ + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + dynamic = true; + break; + default: + dynamic = false; + break; + } + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -1842,11 +1864,13 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { - *(surface[s])++ = descriptor + j; + surface[s]->index = descriptor + j; + surface[s]->dynamic = dynamic; + surface[s]++; } break; default: - unreachable(""); + break; } descriptor += pCreateInfo->pBinding[i].count; } @@ -2731,41 +2755,52 @@ void anv_CmdBindDescriptorSets( struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; struct anv_bindings *bindings = cmd_buffer->bindings; - uint32_t offset = 0; + uint32_t dynamic_slot = 0; for (uint32_t i = 0; i < setCount; i++) { struct anv_descriptor_set *set = (struct anv_descriptor_set *) pDescriptorSets[i]; struct anv_descriptor_set_layout *set_layout = layout->set[firstSet + i].layout; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - uint32_t *surface_to_desc = set_layout->stage[s].surface_start; - uint32_t *sampler_to_desc = set_layout->stage[s].sampler_start; + struct anv_descriptor_slot *surface_slots = set_layout->stage[s].surface_start; + struct anv_descriptor_slot *sampler_slots = set_layout->stage[s].sampler_start; uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; uint32_t start; start = bias + layout->set[firstSet + i].surface_start[s]; for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { - struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; + struct anv_surface_view *view = set->descriptors[surface_slots[b].index].view; if (!view) continue; struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - memcpy(state.map, view->surface_state.map, 64); + + uint32_t offset; + if (surface_slots[b].dynamic) { + offset = view->offset + pDynamicOffsets[dynamic_slot]; + fill_buffer_surface_state(state.map, view->format, offset, + view->range - pDynamicOffsets[dynamic_slot]); + dynamic_slot++; + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ *(uint64_t *)(state.map + 8 * 4) = anv_reloc_list_add(&cmd_buffer->surface_relocs, cmd_buffer->device, state.offset + 8 * 4, - view->bo, view->offset); + view->bo, offset); bindings->descriptors[s].surfaces[start + b] = state.offset; } start = layout->set[firstSet + i].sampler_start[s]; for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) { - struct anv_sampler *sampler = set->descriptors[sampler_to_desc[b]].sampler; + + struct anv_sampler *sampler = set->descriptors[sampler_slots[b].index].sampler; if (!sampler) continue; @@ -2773,8 +2808,6 @@ void anv_CmdBindDescriptorSets( sampler->state, sizeof(sampler->state)); } } - - offset += layout->set[firstSet + i].layout->num_dynamic_buffers; } cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 96b72596a1f..38b908aa6b3 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -541,17 +541,22 @@ struct anv_query_pool { struct anv_bo bo; }; +struct anv_descriptor_slot { + bool dynamic; + uint8_t index; +} entries[0]; + struct anv_descriptor_set_layout { struct { uint32_t surface_count; - uint32_t *surface_start; + struct anv_descriptor_slot *surface_start; uint32_t sampler_count; - uint32_t *sampler_start; + struct anv_descriptor_slot *sampler_start; } stage[VK_NUM_SHADER_STAGE]; uint32_t count; uint32_t num_dynamic_buffers; - uint32_t entries[0]; + struct anv_descriptor_slot entries[0]; }; struct anv_descriptor { @@ -754,6 +759,7 @@ struct anv_surface_view { struct anv_state surface_state; struct anv_bo * bo; uint32_t offset; + uint32_t range; VkExtent3D extent; VkFormat format; }; -- cgit v1.2.3 From 4aecec0bd6475057bb8a8e234c1dce115a08b24c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 29 May 2015 11:32:53 -0700 Subject: vk: Store dynamic slot index with struct anv_descriptor_slot We need to make sure we use the right index into dynamic offset array. Dynamic descriptors can be present or not in different stages and to get the right offset, we need to compute the index at vkCreateDescriptorSetLayout time. --- src/vulkan/device.c | 31 +++++++++++++++++++++---------- src/vulkan/private.h | 2 +- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3f3319bb50a..cabb5b83814 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1826,7 +1826,8 @@ VkResult anv_CreateDescriptorSetLayout( } uint32_t descriptor = 0; - bool dynamic; + int8_t dynamic_slot = 0; + bool is_dynamic; for (uint32_t i = 0; i < pCreateInfo->count; i++) { switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -1834,7 +1835,7 @@ VkResult anv_CreateDescriptorSetLayout( for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { sampler[s]->index = descriptor + j; - sampler[s]->dynamic = false; + sampler[s]->dynamic_slot = -1; sampler[s]++; } break; @@ -1845,10 +1846,10 @@ VkResult anv_CreateDescriptorSetLayout( switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - dynamic = true; + is_dynamic = true; break; default: - dynamic = false; + is_dynamic = false; break; } @@ -1865,13 +1866,20 @@ VkResult anv_CreateDescriptorSetLayout( for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { surface[s]->index = descriptor + j; - surface[s]->dynamic = dynamic; + if (is_dynamic) + surface[s]->dynamic_slot = dynamic_slot + j; + else + surface[s]->dynamic_slot = -1; surface[s]++; } break; default: break; } + + if (is_dynamic) + dynamic_slot += pCreateInfo->pBinding[i].count; + descriptor += pCreateInfo->pBinding[i].count; } @@ -2754,8 +2762,8 @@ void anv_CmdBindDescriptorSets( struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; struct anv_bindings *bindings = cmd_buffer->bindings; + uint32_t dynamic_base = 0; - uint32_t dynamic_slot = 0; for (uint32_t i = 0; i < setCount; i++) { struct anv_descriptor_set *set = (struct anv_descriptor_set *) pDescriptorSets[i]; @@ -2777,11 +2785,12 @@ void anv_CmdBindDescriptorSets( anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); uint32_t offset; - if (surface_slots[b].dynamic) { - offset = view->offset + pDynamicOffsets[dynamic_slot]; + if (surface_slots[b].dynamic_slot != -1) { + uint32_t dynamic_offset = + pDynamicOffsets[dynamic_base + surface_slots[b].dynamic_slot]; + offset = view->offset + dynamic_offset; fill_buffer_surface_state(state.map, view->format, offset, - view->range - pDynamicOffsets[dynamic_slot]); - dynamic_slot++; + view->range - dynamic_offset); } else { offset = view->offset; memcpy(state.map, view->surface_state.map, 64); @@ -2808,6 +2817,8 @@ void anv_CmdBindDescriptorSets( sampler->state, sizeof(sampler->state)); } } + + dynamic_base += set_layout->num_dynamic_buffers; } cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 38b908aa6b3..167a8c058b1 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -542,7 +542,7 @@ struct anv_query_pool { }; struct anv_descriptor_slot { - bool dynamic; + int8_t dynamic_slot; uint8_t index; } entries[0]; -- cgit v1.2.3 From c4bd5f87a0ef6af54e4f03c252dd95000ea026e3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 29 May 2015 15:16:58 -0700 Subject: vk/device: Do lazy surface state emission for binding tables Before, we were emitting surface states up-front when binding tables were updated. Now, we wait to emit the surface states until we emit the binding table. This makes meta simpler and should make it easier to deal with swapping out the surface state buffer. --- src/vulkan/device.c | 326 ++++++++++++++++++++++++++++----------------------- src/vulkan/meta.c | 30 ++--- src/vulkan/private.h | 27 ++--- 3 files changed, 198 insertions(+), 185 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index cabb5b83814..54e70d3a5de 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2360,8 +2360,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->device = device; cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; - memset(&cmd_buffer->default_bindings, 0, sizeof(cmd_buffer->default_bindings)); - cmd_buffer->bindings = &cmd_buffer->default_bindings; + memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); if (result != VK_SUCCESS) @@ -2761,64 +2760,25 @@ void anv_CmdBindDescriptorSets( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; - struct anv_bindings *bindings = cmd_buffer->bindings; - uint32_t dynamic_base = 0; - - for (uint32_t i = 0; i < setCount; i++) { - struct anv_descriptor_set *set = - (struct anv_descriptor_set *) pDescriptorSets[i]; - struct anv_descriptor_set_layout *set_layout = layout->set[firstSet + i].layout; - - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - struct anv_descriptor_slot *surface_slots = set_layout->stage[s].surface_start; - struct anv_descriptor_slot *sampler_slots = set_layout->stage[s].sampler_start; - uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; - uint32_t start; - - start = bias + layout->set[firstSet + i].surface_start[s]; - for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { - struct anv_surface_view *view = set->descriptors[surface_slots[b].index].view; - if (!view) - continue; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - uint32_t offset; - if (surface_slots[b].dynamic_slot != -1) { - uint32_t dynamic_offset = - pDynamicOffsets[dynamic_base + surface_slots[b].dynamic_slot]; - offset = view->offset + dynamic_offset; - fill_buffer_surface_state(state.map, view->format, offset, - view->range - dynamic_offset); - } else { - offset = view->offset; - memcpy(state.map, view->surface_state.map, 64); - } - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(&cmd_buffer->surface_relocs, - cmd_buffer->device, - state.offset + 8 * 4, - view->bo, offset); + struct anv_descriptor_set *set; + struct anv_descriptor_set_layout *set_layout; - bindings->descriptors[s].surfaces[start + b] = state.offset; - } + assert(firstSet + setCount < MAX_SETS); - start = layout->set[firstSet + i].sampler_start[s]; - for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) { + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < setCount; i++) { + set = (struct anv_descriptor_set *) pDescriptorSets[i]; + set_layout = layout->set[firstSet + i].layout; - struct anv_sampler *sampler = set->descriptors[sampler_slots[b].index].sampler; - if (!sampler) - continue; + cmd_buffer->descriptors[firstSet + i].set = set; - memcpy(&bindings->descriptors[s].samplers[start + b], - sampler->state, sizeof(sampler->state)); - } - } + assert(set_layout->num_dynamic_buffers < + ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets, + pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - dynamic_base += set_layout->num_dynamic_buffers; + dynamic_slot += set_layout->num_dynamic_buffers; } cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; @@ -2854,89 +2814,186 @@ void anv_CmdBindVertexBuffers( const VkDeviceSize* pOffsets) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_bindings *bindings = cmd_buffer->bindings; + struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings; /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ + assert(startBinding + bindingCount < MAX_VBS); for (uint32_t i = 0; i < bindingCount; i++) { - bindings->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; - bindings->vb[startBinding + i].offset = pOffsets[i]; + vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; + vb[startBinding + i].offset = pOffsets[i]; cmd_buffer->vb_dirty |= 1 << (startBinding + i); } } static void -flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage) { struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; - struct anv_bindings *bindings = cmd_buffer->bindings; - uint32_t layers = cmd_buffer->framebuffer->layers; + uint32_t color_attachments, bias, size; + struct anv_state bt_state; - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - uint32_t bias; + if (stage == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + color_attachments = cmd_buffer->framebuffer->color_attachment_count; + } else { + bias = 0; + color_attachments = 0; + } - if (s == VK_SHADER_STAGE_FRAGMENT) { - bias = MAX_RTS; - layers = cmd_buffer->framebuffer->layers; - } else { - bias = 0; - layers = 0; - } + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - /* This is a little awkward: layout can be NULL but we still have to - * allocate and set a binding table for the PS stage for render - * targets. */ - uint32_t surface_count = layout ? layout->stage[s].surface_count : 0; - - if (layers + surface_count > 0) { - struct anv_state state; - uint32_t size; - - size = (bias + surface_count) * sizeof(uint32_t); - state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); - memcpy(state.map, bindings->descriptors[s].surfaces, size); - - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; + if (color_attachments + surface_count == 0) + return; + + size = (bias + surface_count) * sizeof(uint32_t); + bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state.map; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = bt_state.offset); + + for (uint32_t ca = 0; ca < color_attachments; ca++) { + const struct anv_surface_view *view = + cmd_buffer->framebuffer->color_attachments[ca]; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[s], - .PointertoVSBindingTable = state.offset); + memcpy(state.map, view->surface_state.map, 64); + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(&cmd_buffer->surface_relocs, + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, view->offset); + + bt_map[ca] = state.offset; + } + + if (layout == NULL) + return; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *surface_slots = + set_layout->stage[stage].surface_start; + + uint32_t start = bias + layout->set[set].surface_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { + struct anv_surface_view *view = + d->set->descriptors[surface_slots[b].index].view; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + uint32_t offset; + if (surface_slots[b].dynamic_slot >= 0) { + uint32_t dynamic_offset = + d->dynamic_offsets[surface_slots[b].dynamic_slot]; + + offset = view->offset + dynamic_offset; + fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(&cmd_buffer->surface_relocs, + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, offset); + + bt_map[start + b] = state.offset; } + } +} - if (layout && layout->stage[s].sampler_count > 0) { - struct anv_state state; - size_t size; - - size = layout->stage[s].sampler_count * 16; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); - memcpy(state.map, bindings->descriptors[s].samplers, size); - - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; +static void +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) +{ + struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_state state; + + if (!layout) + return; + + uint32_t sampler_count = layout->stage[stage].sampler_count; + + if (sampler_count == 0) + return; + + uint32_t size = sampler_count * 16; + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[s], - .PointertoVSSamplerState = state.offset); + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = state.offset); + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *sampler_slots = + set_layout->stage[stage].sampler_start; + + uint32_t start = layout->set[set].sampler_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { + struct anv_sampler *sampler = + d->set->descriptors[sampler_slots[b].index].sampler; + + if (!sampler) + continue; + + memcpy(state.map + (start + b) * 16, + sampler->state, sizeof(sampler->state)); } } } +static void +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + cmd_buffer_emit_binding_table(cmd_buffer, s); + cmd_buffer_emit_samplers(cmd_buffer, s); + } + + cmd_buffer->dirty &= ~ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; +} + static struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t dwords, uint32_t alignment) @@ -2970,20 +3027,20 @@ static void anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->pipeline; - struct anv_bindings *bindings = cmd_buffer->bindings; uint32_t *p; uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used; - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, GEN8_3DSTATE_VERTEX_BUFFERS); uint32_t vb, i = 0; for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = bindings->vb[vb].buffer; - uint32_t offset = bindings->vb[vb].offset; + struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->vertex_bindings[vb].offset; struct GEN8_VERTEX_BUFFER_STATE state = { .VertexBufferIndex = vb, @@ -3561,31 +3618,6 @@ VkResult anv_CreateRenderPass( return VK_SUCCESS; } -void -anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer; - struct anv_bindings *bindings = cmd_buffer->bindings; - - for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { - const struct anv_surface_view *view = framebuffer->color_attachments[i]; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - memcpy(state.map, view->surface_state.map, 64); - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(&cmd_buffer->surface_relocs, - cmd_buffer->device, - state.offset + 8 * 4, - view->bo, view->offset); - - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = state.offset; - } - cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; -} - static void anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass) @@ -3637,6 +3669,8 @@ void anv_CmdBeginRenderPass( cmd_buffer->framebuffer = framebuffer; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, .ClippedDrawingRectangleYMin = pass->render_area.offset.y, .ClippedDrawingRectangleXMin = pass->render_area.offset.x, @@ -3647,8 +3681,6 @@ void anv_CmdBeginRenderPass( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - anv_cmd_buffer_fill_render_targets(cmd_buffer); - anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass); anv_cmd_buffer_clear(cmd_buffer, pass); diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 8e9529595a2..78617f05b0c 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -152,8 +152,8 @@ anv_device_init_meta_clear_state(struct anv_device *device) #define NUM_VB_USED 2 struct anv_saved_state { - struct anv_bindings bindings; - struct anv_bindings *old_bindings; + struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; + struct anv_descriptor_set *old_descriptor_set0; struct anv_pipeline *old_pipeline; VkDynamicCbState cb_state; }; @@ -162,39 +162,26 @@ static void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) { - state->old_bindings = cmd_buffer->bindings; - cmd_buffer->bindings = &state->bindings; state->old_pipeline = cmd_buffer->pipeline; + state->old_descriptor_set0 = cmd_buffer->descriptors[0].set; + memcpy(state->old_vertex_bindings, cmd_buffer->vertex_bindings, + sizeof(state->old_vertex_bindings)); } static void anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { - cmd_buffer->bindings = state->old_bindings; cmd_buffer->pipeline = state->old_pipeline; + cmd_buffer->descriptors[0].set = state->old_descriptor_set0; + memcpy(cmd_buffer->vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); cmd_buffer->vb_dirty |= (1 << NUM_VB_USED) - 1; cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } -static void -anv_cmd_buffer_copy_render_targets(struct anv_cmd_buffer *cmd_buffer, - struct anv_saved_state *state) -{ - struct anv_framebuffer *fb = cmd_buffer->framebuffer; - struct anv_bindings *old_bindings = state->old_bindings; - struct anv_bindings *bindings = cmd_buffer->bindings; - - for (uint32_t i = 0; i < fb->color_attachment_count; i++) { - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = - old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i]; - } - - cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; -} - struct vue_header { uint32_t Reserved; uint32_t RTAIndex; @@ -262,7 +249,6 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, }; anv_cmd_buffer_save(cmd_buffer, &saved_state); - anv_cmd_buffer_copy_render_targets(cmd_buffer, &saved_state); anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, (VkBuffer[]) { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 167a8c058b1..72a10e5c0a8 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -544,7 +544,7 @@ struct anv_query_pool { struct anv_descriptor_slot { int8_t dynamic_slot; uint8_t index; -} entries[0]; +}; struct anv_descriptor_set_layout { struct { @@ -601,17 +601,15 @@ struct anv_buffer { #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) #define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) #define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) - -struct anv_bindings { - struct { - struct anv_buffer *buffer; - VkDeviceSize offset; - } vb[MAX_VBS]; - struct { - uint32_t surfaces[256]; - struct { uint32_t dwords[4]; } samplers[16]; - } descriptors[VK_NUM_SHADER_STAGE]; +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_descriptor_set_binding { + struct anv_descriptor_set * set; + uint32_t dynamic_offsets[256]; }; struct anv_cmd_buffer { @@ -644,8 +642,8 @@ struct anv_cmd_buffer { struct anv_dynamic_ds_state * ds_state; struct anv_dynamic_vp_state * vp_state; struct anv_dynamic_cb_state * cb_state; - struct anv_bindings * bindings; - struct anv_bindings default_bindings; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set_binding descriptors[MAX_SETS]; }; void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); @@ -832,9 +830,6 @@ void anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass); -void -anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer); - void * anv_lookup_entrypoint(const char *name); -- cgit v1.2.3 From 4ffbab5ae06531d088404245e860822b758970e3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 29 May 2015 09:40:03 -0700 Subject: vk/device: Allow for starting a new surface state buffer This commit allows for us to create a whole new surface state buffer when the old one runs out of room. We simply re-emit the state base address for the new state, re-emit binding tables, and keep going. --- src/vulkan/aub.c | 11 +++-- src/vulkan/device.c | 137 +++++++++++++++++++++++++++++++++++++++++++-------- src/vulkan/private.h | 4 +- 3 files changed, 126 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c index dfe52213a40..e3f333a54aa 100644 --- a/src/vulkan/aub.c +++ b/src/vulkan/aub.c @@ -273,9 +273,14 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) } assert(first_bbo->prev_batch_bo == NULL); - relocate_bo(&cmd_buffer->surface_bo, - cmd_buffer->surface_relocs.relocs, - cmd_buffer->surface_relocs.num_relocs, aub_bos); + for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + + /* Handle relocations for this surface state BO */ + relocate_bo(&bbo->bo, + &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], + bbo->num_relocs, aub_bos); + } for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { bo = cmd_buffer->exec2_bos[i]; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 54e70d3a5de..27165e229b6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2285,16 +2285,23 @@ anv_cmd_buffer_destroy(struct anv_device *device, struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; while (bbo->prev_batch_bo) { struct anv_batch_bo *prev = bbo->prev_batch_bo; - anv_batch_bo_destroy(bbo, cmd_buffer->device); + anv_batch_bo_destroy(bbo, device); bbo = prev; } + anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); - anv_bo_pool_free(&device->batch_bo_pool, &cmd_buffer->surface_bo); + /* Destroy all of the surface state buffers */ + bbo = cmd_buffer->surface_batch_bo; + while (bbo->prev_batch_bo) { + struct anv_batch_bo *prev = bbo->prev_batch_bo; + anv_batch_bo_destroy(bbo, device); + bbo = prev; + } anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); + anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); - anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); anv_device_free(device, cmd_buffer); @@ -2377,13 +2384,17 @@ VkResult anv_CreateCommandBuffer( anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &cmd_buffer->surface_bo); + result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo); if (result != VK_SUCCESS) goto fail_batch_relocs; + cmd_buffer->surface_batch_bo->first_reloc = 0; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device); + if (result != VK_SUCCESS) + goto fail_ss_batch_bo; /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; - anv_reloc_list_init(&cmd_buffer->surface_relocs, device); cmd_buffer->exec2_objects = NULL; cmd_buffer->exec2_bos = NULL; @@ -2407,6 +2418,8 @@ VkResult anv_CreateCommandBuffer( return VK_SUCCESS; + fail_ss_batch_bo: + anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device); fail_batch_relocs: anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); fail_batch_bo: @@ -2429,7 +2442,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSizeModifyEnable = true, - .SurfaceStateBaseAddress = { &cmd_buffer->surface_bo, 0 }, + .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 }, .SurfaceStateMemoryObjectControlState = GEN8_MOCS, .SurfaceStateBaseAddressModifyEnable = true, @@ -2599,6 +2612,9 @@ VkResult anv_EndCommandBuffer( anv_batch_emit(batch, GEN8_MI_NOOP); anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); + cmd_buffer->surface_batch_bo->num_relocs = + cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; + cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next; cmd_buffer->bo_count = 0; cmd_buffer->need_reloc = false; @@ -2606,10 +2622,13 @@ VkResult anv_EndCommandBuffer( /* Lock for access to bo->index. */ pthread_mutex_lock(&device->mutex); - /* Add block pool bos first so we can add them with their relocs. */ - anv_cmd_buffer_add_bo(cmd_buffer, &cmd_buffer->surface_bo, - cmd_buffer->surface_relocs.relocs, - cmd_buffer->surface_relocs.num_relocs); + /* Add surface state bos first so we can add them with their relocs. */ + for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, + &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], + bbo->num_relocs); + } /* Add all of the BOs referenced by surface state */ anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); @@ -2674,7 +2693,15 @@ VkResult anv_ResetCommandBuffer( anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - cmd_buffer->surface_next = 0; + /* Delete all but the first batch bo */ + while (cmd_buffer->surface_batch_bo->prev_batch_bo) { + struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo; + anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device); + cmd_buffer->surface_batch_bo = prev; + } + assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL); + + cmd_buffer->surface_next = 1; cmd_buffer->surface_relocs.num_relocs = 0; return VK_SUCCESS; @@ -2740,15 +2767,46 @@ anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, struct anv_state state; state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment); - state.map = cmd_buffer->surface_bo.map + state.offset; + if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size) + return (struct anv_state) { 0 }; + + state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset; state.alloc_size = size; cmd_buffer->surface_next = state.offset + size; - assert(state.offset + size < cmd_buffer->surface_bo.size); + assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size); return state; } +static VkResult +anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo; + + /* Finish off the old buffer */ + old_bbo->num_relocs = + cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc; + old_bbo->length = cmd_buffer->surface_next; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs; + cmd_buffer->surface_next = 1; + + new_bbo->prev_batch_bo = old_bbo; + cmd_buffer->surface_batch_bo = new_bbo; + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + return VK_SUCCESS; +} + void anv_CmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -2827,7 +2885,7 @@ void anv_CmdBindVertexBuffers( } } -static void +static VkResult cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, unsigned stage) { @@ -2849,12 +2907,15 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; if (color_attachments + surface_count == 0) - return; + return VK_SUCCESS; size = (bias + surface_count) * sizeof(uint32_t); bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); uint32_t *bt_map = bt_state.map; + if (bt_state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + static const uint32_t binding_table_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 38, [VK_SHADER_STAGE_TESS_CONTROL] = 39, @@ -2876,6 +2937,9 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + memcpy(state.map, view->surface_state.map, 64); /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ @@ -2889,7 +2953,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } if (layout == NULL) - return; + return VK_SUCCESS; for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; @@ -2906,6 +2970,9 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + uint32_t offset; if (surface_slots[b].dynamic_slot >= 0) { uint32_t dynamic_offset = @@ -2929,25 +2996,30 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[start + b] = state.offset; } } + + return VK_SUCCESS; } -static void +static VkResult cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) { struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; struct anv_state state; if (!layout) - return; + return VK_SUCCESS; uint32_t sampler_count = layout->stage[stage].sampler_count; if (sampler_count == 0) - return; + return VK_SUCCESS; uint32_t size = sampler_count * 16; state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + static const uint32_t sampler_state_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 43, [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ @@ -2981,14 +3053,37 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) sampler->state, sizeof(sampler->state)); } } + + return VK_SUCCESS; } static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { + VkResult result; for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { - cmd_buffer_emit_binding_table(cmd_buffer, s); - cmd_buffer_emit_samplers(cmd_buffer, s); + result = cmd_buffer_emit_binding_table(cmd_buffer, s); + if (result != VK_SUCCESS) + break; + + result = cmd_buffer_emit_samplers(cmd_buffer, s); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); + + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + result = cmd_buffer_emit_binding_table(cmd_buffer, s); + result = cmd_buffer_emit_samplers(cmd_buffer, s); + } + + /* It had better succeed this time */ + assert(result == VK_SUCCESS); } cmd_buffer->dirty &= ~ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 72a10e5c0a8..153cb22d245 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -609,7 +609,7 @@ struct anv_vertex_binding { struct anv_descriptor_set_binding { struct anv_descriptor_set * set; - uint32_t dynamic_offsets[256]; + uint32_t dynamic_offsets[128]; }; struct anv_cmd_buffer { @@ -626,7 +626,7 @@ struct anv_cmd_buffer { uint32_t bo_count; struct anv_batch batch; struct anv_batch_bo * last_batch_bo; - struct anv_bo surface_bo; + struct anv_batch_bo * surface_batch_bo; uint32_t surface_next; struct anv_reloc_list surface_relocs; struct anv_state_stream binding_table_state_stream; -- cgit v1.2.3 From 03ffa9ca31f4ffcb7d4eebe45e2541de202cb985 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 29 May 2015 20:43:10 -0700 Subject: vk: Don't crash on partial descriptor sets --- src/vulkan/device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 27165e229b6..13137751a75 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2967,6 +2967,9 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_surface_view *view = d->set->descriptors[surface_slots[b].index].view; + if (!view) + continue; + struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); -- cgit v1.2.3 From b2b9fc9fadb1afeae64357c76829a5afd9a0c758 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 29 May 2015 21:15:47 -0700 Subject: vk/allocator: Don't call VALGRIND_MALLOCLIKE_BLOCK on fresh gem_mmap's --- src/vulkan/allocator.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index aeb08ce1cec..950b23c857a 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -700,7 +700,11 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) return vk_error(VK_ERROR_MEMORY_MAP_FAILED); } - VG(VALGRIND_MALLOCLIKE_BLOCK(new_bo.map, pool->bo_size, 0, false)); + /* We don't need to call VALGRIND_MALLOCLIKE_BLOCK here because valgrind + * already picks up on the gem_mmap and treats that as a malloc. If we + * really want to be pedantic we could do a VALGRIND_FREELIKE_BLOCK + * right after the mmap, but there's no good reason. + */ *bo = new_bo; return VK_SUCCESS; -- cgit v1.2.3 From 33cccbbb738ee71b4b4827707ebd63c791b39d91 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 May 2015 08:02:52 -0700 Subject: vk/device: Emit PIPE_CONTROL flushes surrounding new STATE_BASE_ADDRESS According to the bspec, you're supposed to emit a PIPE_CONTROL with a CS stall and a render target flush prior to chainging STATE_BASE_ADDRESS. A little experimentation, however, shows that this is not enough. It also appears as if you have to flush the texture cache after chainging base address or things won't propagate properly. --- src/vulkan/device.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 13137751a75..87e2e6f85ab 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2802,8 +2802,23 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) /* Re-emit state base addresses so we get the new surface state base * address before we start emitting binding tables etc. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .RenderTargetCacheFlushEnable = true); anv_cmd_buffer_emit_state_base_address(cmd_buffer); + /* It seems like just chainging the state base addresses isn't enough. + * If we don't do another PIPE_CONTROL afterwards to invalidate the + * texture cache, we still don't always get the right results. I have + * no idea if this is actually what we are supposed to do, but it seems + * to work. + * + * FIXME: We should look into this more. Maybe there is something more + * specific we're supposed to be doing. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); + return VK_SUCCESS; } -- cgit v1.2.3 From 2251305e1ac14417a634e175e53736a6f01dad0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 May 2015 10:07:29 -0700 Subject: vk/cmd_buffer: Track descriptor set dirtying per-stage --- src/vulkan/device.c | 22 +++++++++++++++------- src/vulkan/meta.c | 4 ++-- src/vulkan/private.h | 3 ++- 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 87e2e6f85ab..6dc411c46be 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1754,6 +1754,7 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, }; uint32_t num_dynamic_buffers = 0; uint32_t count = 0; + uint32_t stages = 0; uint32_t s; for (uint32_t i = 0; i < pCreateInfo->count; i++) { @@ -1793,6 +1794,7 @@ VkResult anv_CreateDescriptorSetLayout( break; } + stages |= pCreateInfo->pBinding[i].stageFlags; count += pCreateInfo->pBinding[i].count; } @@ -1812,6 +1814,7 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->num_dynamic_buffers = num_dynamic_buffers; set_layout->count = count; + set_layout->shader_stages = stages; struct anv_descriptor_slot *p = set_layout->entries; struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE]; @@ -2409,6 +2412,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->dirty = 0; cmd_buffer->vb_dirty = 0; + cmd_buffer->descriptors_dirty = 0; cmd_buffer->pipeline = NULL; cmd_buffer->vp_state = NULL; cmd_buffer->rs_state = NULL; @@ -2851,10 +2855,10 @@ void anv_CmdBindDescriptorSets( pDynamicOffsets + dynamic_slot, set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); + cmd_buffer->descriptors_dirty |= set_layout->shader_stages; + dynamic_slot += set_layout->num_dynamic_buffers; } - - cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } void anv_CmdBindIndexBuffer( @@ -3078,8 +3082,11 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { + uint32_t s, dirty = cmd_buffer->descriptors_dirty & + cmd_buffer->pipeline->active_stages; + VkResult result; - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + for_each_bit(s, dirty) { result = cmd_buffer_emit_binding_table(cmd_buffer, s); if (result != VK_SUCCESS) break; @@ -3095,7 +3102,8 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); assert(result == VK_SUCCESS); - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->pipeline->active_stages) { result = cmd_buffer_emit_binding_table(cmd_buffer, s); result = cmd_buffer_emit_samplers(cmd_buffer, s); } @@ -3104,7 +3112,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) assert(result == VK_SUCCESS); } - cmd_buffer->dirty &= ~ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages; } static struct anv_state @@ -3172,7 +3180,7 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY) + if (cmd_buffer->descriptors_dirty) flush_descriptor_sets(cmd_buffer); if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { @@ -3782,7 +3790,7 @@ void anv_CmdBeginRenderPass( cmd_buffer->framebuffer = framebuffer; - cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, .ClippedDrawingRectangleYMin = pass->render_area.offset.y, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 78617f05b0c..ee9593ae995 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -178,8 +178,8 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, sizeof(state->old_vertex_bindings)); cmd_buffer->vb_dirty |= (1 << NUM_VB_USED) - 1; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; } struct vue_header { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 153cb22d245..4b7c84262d3 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -556,6 +556,7 @@ struct anv_descriptor_set_layout { uint32_t count; uint32_t num_dynamic_buffers; + uint32_t shader_stages; struct anv_descriptor_slot entries[0]; }; @@ -597,7 +598,6 @@ struct anv_buffer { }; #define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) -#define ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY (1 << 1) #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) #define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) #define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) @@ -636,6 +636,7 @@ struct anv_cmd_buffer { /* State required while building cmd buffer */ uint32_t vb_dirty; uint32_t dirty; + uint32_t descriptors_dirty; struct anv_pipeline * pipeline; struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; -- cgit v1.2.3 From e497ac2c62ad5299bbf032220a287c144add7cda Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 May 2015 18:04:48 -0700 Subject: vk/device: Only flush the texture cache when setting state base address After further examination, it appears that the other flushes and stalls weren't actually needed. --- src/vulkan/device.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 6dc411c46be..f0e73b3687a 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2806,19 +2806,11 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) /* Re-emit state base addresses so we get the new surface state base * address before we start emitting binding tables etc. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true, - .RenderTargetCacheFlushEnable = true); anv_cmd_buffer_emit_state_base_address(cmd_buffer); - /* It seems like just chainging the state base addresses isn't enough. - * If we don't do another PIPE_CONTROL afterwards to invalidate the - * texture cache, we still don't always get the right results. I have - * no idea if this is actually what we are supposed to do, but it seems - * to work. - * - * FIXME: We should look into this more. Maybe there is something more - * specific we're supposed to be doing. + /* It seems like just changing the state base addresses isn't enough. + * Invalidating the cache seems to be enough to cause things to + * propagate. However, I'm not 100% sure what we're supposed to do. */ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, .TextureCacheInvalidationEnable = true); -- cgit v1.2.3 From dc56e4f7b88ab79be12632fbf032ef85fd32f602 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 29 May 2015 16:06:06 -0700 Subject: vk: Implement support for sampler border colors This supports the three Vulkan border color types for float color formats. The support for integer formats is a little trickier, as we don't know the format of the texture at this time. --- src/vulkan/device.c | 37 ++++++++++++++++++++++++++++++++++++- src/vulkan/private.h | 3 +++ 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index f0e73b3687a..1622bb8853e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -303,6 +303,35 @@ parse_debug_flags(struct anv_device *device) } } +static void +anv_device_init_border_colors(struct anv_device *device) +{ + float float_border_colors[][4] = { + [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1.0, 1.0, 1.0, 1.0 }, + [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0.0, 0.0, 0.0, 0.0 }, + [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0.0, 0.0, 0.0, 1.0 } + }; + + uint32_t uint32_border_colors[][4] = { + [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1, 1, 1, 1 }, + [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0, 0, 0, 0 }, + [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0, 0, 0, 1 } + }; + + device->float_border_colors = + anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(float_border_colors), 32); + memcpy(device->float_border_colors.map, + float_border_colors, sizeof(float_border_colors)); + + device->uint32_border_colors = + anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(uint32_border_colors), 32); + memcpy(device->uint32_border_colors.map, + uint32_border_colors, sizeof(uint32_border_colors)); + +} + static const uint32_t BATCH_SIZE = 8192; VkResult anv_CreateDevice( @@ -366,6 +395,8 @@ VkResult anv_CreateDevice( anv_device_init_meta(device); + anv_device_init_border_colors(device); + *pDevice = (VkDevice) device; return VK_SUCCESS; @@ -1715,7 +1746,11 @@ VkResult anv_CreateSampler( .ChromaKeyMode = 0, .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = 0, - .IndirectStatePointer = 0, + + .IndirectStatePointer = + device->float_border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4; + .LODClampMagnificationMode = MIPNONE, .MaximumAnisotropy = 0, .RAddressMinFilterRoundingEnable = 0, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 4b7c84262d3..25f8a230398 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -336,6 +336,9 @@ struct anv_device { struct anv_meta_state meta_state; + struct anv_state float_border_colors; + struct anv_state uint32_border_colors; + struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; pthread_mutex_t mutex; -- cgit v1.2.3 From 76bb658518bf06a706d258710501bde61ef54071 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 31 May 2015 22:15:34 -0700 Subject: vk: Add support for anisotropic bits --- src/vulkan/device.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 1622bb8853e..9a87aafe053 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1688,6 +1688,7 @@ VkResult anv_CreateSampler( { struct anv_device *device = (struct anv_device *) _device; struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); @@ -1726,17 +1727,24 @@ VkResult anv_CreateSampler( [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; - if (pCreateInfo->maxAnisotropy > 0) - anv_finishme("missing support for anisotropic filtering"); - + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + struct GEN8_SAMPLER_STATE sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampMode = 0, .BaseMipLevel = 0, .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], - .MagModeFilter = vk_to_gen_tex_filter[pCreateInfo->magFilter], - .MinModeFilter = vk_to_gen_tex_filter[pCreateInfo->minFilter], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, .TextureLODBias = pCreateInfo->mipLodBias * 256, .AnisotropicAlgorithm = EWAApproximation, .MinLOD = pCreateInfo->minLod * 256, @@ -1749,10 +1757,10 @@ VkResult anv_CreateSampler( .IndirectStatePointer = device->float_border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4; + pCreateInfo->borderColor * sizeof(float) * 4, .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = 0, + .MaximumAnisotropy = max_anisotropy, .RAddressMinFilterRoundingEnable = 0, .RAddressMagFilterRoundingEnable = 0, .VAddressMinFilterRoundingEnable = 0, -- cgit v1.2.3 From 5caa40857984d445d1cf6576d869f803af240b2c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 31 May 2015 22:35:11 -0700 Subject: vk: Indent tables to align '=' at column 48 --- src/vulkan/device.c | 66 +++++++++++++++++++++++----------------------- src/vulkan/image.c | 12 ++++----- src/vulkan/pipeline.c | 72 +++++++++++++++++++++++++-------------------------- 3 files changed, 75 insertions(+), 75 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9a87aafe053..07563d8640e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1698,33 +1698,33 @@ VkResult anv_CreateSampler( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); static const uint32_t vk_to_gen_tex_filter[] = { - [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR }; static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR }; static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, }; static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; if (pCreateInfo->maxAnisotropy > 1) { @@ -2906,9 +2906,9 @@ void anv_CmdBindIndexBuffer( struct anv_buffer *buffer = (struct anv_buffer *) _buffer; static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT8] = INDEX_BYTE, - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + [VK_INDEX_TYPE_UINT8] = INDEX_BYTE, + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, }; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, @@ -2971,12 +2971,12 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_ERROR_OUT_OF_DEVICE_MEMORY; static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, }; anv_batch_emit(&cmd_buffer->batch, @@ -3078,12 +3078,12 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) return VK_ERROR_OUT_OF_DEVICE_MEMORY; static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, }; anv_batch_emit(&cmd_buffer->batch, diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 42bb28f9823..14e9e3d5e61 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -180,12 +180,12 @@ anv_image_view_init(struct anv_surface_view *view, view->extent = image->extent; static const uint32_t vk_to_gen_swizzle[] = { - [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, - [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, - [VK_CHANNEL_SWIZZLE_R] = SCS_RED, - [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, - [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, - [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA + [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, + [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, + [VK_CHANNEL_SWIZZLE_R] = SCS_RED, + [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, + [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, + [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA }; struct GEN8_RENDER_SURFACE_STATE surface_state = { diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index e8441e6efd5..6babd183f1f 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -129,17 +129,17 @@ emit_ia_state(struct anv_pipeline *pipeline, const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LISTSTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LISTSTRIP_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 }; uint32_t topology = vk_to_gen_primitive_type[info->topology]; @@ -158,26 +158,26 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH }; static const uint32_t vk_to_gen_fillmode[] = { - [VK_FILL_MODE_POINTS] = RASTER_POINT, - [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, - [VK_FILL_MODE_SOLID] = RASTER_SOLID + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID }; static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = CounterClockwise, - [VK_FRONT_FACE_CW] = Clockwise + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise }; static const uint32_t vk_to_gen_coordinate_origin[] = { - [VK_COORDINATE_ORIGIN_UPPER_LEFT] = UPPERLEFT, - [VK_COORDINATE_ORIGIN_LOWER_LEFT] = LOWERLEFT + [VK_COORDINATE_ORIGIN_UPPER_LEFT] = UPPERLEFT, + [VK_COORDINATE_ORIGIN_LOWER_LEFT] = LOWERLEFT }; struct GEN8_3DSTATE_SF sf = { @@ -314,25 +314,25 @@ emit_cb_state(struct anv_pipeline *pipeline, VkPipelineCbStateCreateInfo *info) } static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, - [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, - [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, - [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, - [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, - [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, + [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, + [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, + [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, + [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, }; static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = 0, - [VK_STENCIL_OP_ZERO] = 0, - [VK_STENCIL_OP_REPLACE] = 0, - [VK_STENCIL_OP_INC_CLAMP] = 0, - [VK_STENCIL_OP_DEC_CLAMP] = 0, - [VK_STENCIL_OP_INVERT] = 0, - [VK_STENCIL_OP_INC_WRAP] = 0, - [VK_STENCIL_OP_DEC_WRAP] = 0 + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 }; static void -- cgit v1.2.3 From aded32bf04ed4b5f64bd2181f4fc885aabfe5701 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Jun 2015 10:00:44 -0700 Subject: NIR: Add a helper for doing sampler lowering for vulkan --- src/glsl/nir/nir.h | 1 + src/glsl/nir/nir_lower_samplers.cpp | 94 ++++++++++++++++++++++++++++--------- 2 files changed, 74 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 61306e9b7e0..38dbe13ac03 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1670,6 +1670,7 @@ void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, gl_shader_stage stage); +void nir_lower_samplers_for_vk(nir_shader *shader); void nir_lower_system_values(nir_shader *shader); void nir_lower_tex_projector(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 8fc5909b711..6ed5a4cb2b5 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -35,6 +35,30 @@ extern "C" { #include "program/program.h" } +static void +add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect) +{ + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, + indirect); +} + static unsigned get_sampler_index(const struct gl_shader_program *shader_program, gl_shader_stage stage, const char *name) @@ -83,27 +107,9 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); break; case nir_deref_array_type_indirect: { - /* First, we have to resize the array of texture sources */ - nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - new_srcs[i].src_type = instr->src[i].src_type; - nir_instr_move_src(&instr->instr, &new_srcs[i].src, - &instr->src[i].src); - } - - ralloc_free(instr->src); - instr->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; - instr->num_srcs++; - nir_instr_move_src(&instr->instr, - &instr->src[instr->num_srcs - 1].src, - &deref_array->indirect); + add_indirect_to_tex(instr, deref_array->indirect); + nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, + NIR_SRC_INIT); instr->sampler_array_size = glsl_get_length(deref->type); @@ -183,3 +189,49 @@ nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_pr lower_impl(overload->impl, shader_program, stage); } } + +static bool +lower_samplers_for_vk_block(nir_block *block, void *data) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + assert(tex->sampler); + + tex->sampler_set = tex->sampler->var->data.descriptor_set; + tex->sampler_index = tex->sampler->var->data.binding; + + if (tex->sampler->deref.child) { + assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child); + + /* Only one-level arrays are allowed in vulkan */ + assert(arr->deref.child == NULL); + + tex->sampler_index += arr->base_offset; + if (arr->deref_array_type == nir_deref_array_type_indirect) { + add_indirect_to_tex(tex, arr->indirect); + nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT); + + tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type); + } + } + + tex->sampler = NULL; + } + + return true; +} + +extern "C" void +nir_lower_samplers_for_vk(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL); + } + } +} -- cgit v1.2.3 From 510b5c3bedced1fe4cda0f5d459c45b83914b622 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Jun 2015 10:01:23 -0700 Subject: vk/HACK: Plumb real descriptor set/index into textures --- src/glsl/nir/glsl_to_nir.cpp | 1 + src/glsl/nir/nir.h | 4 ++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 +++- src/mesa/drivers/dri/i965/brw_nir.c | 2 +- src/vulkan/compiler.cpp | 2 -- 5 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 7a20e1a36f5..7c30be3fa72 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -321,6 +321,7 @@ nir_visitor::visit(ir_variable *ir) } var->data.index = ir->data.index; + var->data.descriptor_set = ir->data.set; var->data.binding = ir->data.binding; /* XXX Get rid of buffer_index */ var->data.atomic.buffer_index = ir->data.binding; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 38dbe13ac03..c666d93e66b 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -300,6 +300,7 @@ typedef struct { * * For array types, this represents the binding point for the first element. */ + int descriptor_set; int binding; /** @@ -985,6 +986,9 @@ typedef struct { /* gather component selector */ unsigned component : 2; + /* The descriptor set containing this texture */ + unsigned sampler_set; + /** The sampler index * * If this texture instruction has a nir_tex_src_sampler_offset source, diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 82dbca3cf43..2623241a056 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1622,7 +1622,9 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) void fs_visitor::nir_emit_texture(nir_tex_instr *instr) { - unsigned sampler = instr->sampler_index; + uint32_t set = instr->sampler_set; + uint32_t index = instr->sampler_index; + unsigned sampler = stage_prog_data->bind_map[set][index]; fs_reg sampler_reg(sampler); /* FINISHME: We're failing to recompile our programs when the sampler is diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index de4d7aafd44..e4119b1aa3f 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -144,7 +144,7 @@ brw_create_nir(struct brw_context *brw, nir_validate_shader(nir); if (shader_prog) { - nir_lower_samplers(nir, shader_prog, stage); + nir_lower_samplers_for_vk(nir); nir_validate_shader(nir); } diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 2bd197e352b..42c54fbe8d3 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -71,8 +71,6 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, else bias = 0; - prog_data->binding_table.texture_start = bias; - count = layout->stage[stage].surface_count; prog_data->map_entries = (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); -- cgit v1.2.3 From d4cbf6a728a149e312f00c92f3d20b1da4df8117 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Jun 2015 12:24:31 -0700 Subject: vk/compiler: Add an index_count to the bind map and check for OOB --- src/mesa/drivers/dri/i965/brw_context.h | 5 ++++- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +++++++++++++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 ++-- src/vulkan/compiler.cpp | 5 ++++- 4 files changed, 23 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index b6cdc82444c..5a35e48a481 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -360,7 +360,10 @@ struct brw_stage_prog_data { } binding_table; uint32_t *map_entries; - uint32_t *bind_map[8]; /* MAX_SETS from vulkan/private.h */ + struct { + uint32_t index_count; + uint32_t *index; + } bind_map[8]; /* MAX_SETS from vulkan/private.h */ GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 2623241a056..270131a73d1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1403,7 +1403,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) uint32_t set = shader->base.UniformBlocks[index].Set; uint32_t binding = shader->base.UniformBlocks[index].Binding; - surf_index = fs_reg(stage_prog_data->bind_map[set][binding]); + /* FIXME: We should probably assert here, but dota2 seems to hit + * it and we'd like to keep going. + */ + if (binding >= stage_prog_data->bind_map[set].index_count) + binding = 0; + + surf_index = fs_reg(stage_prog_data->bind_map[set].index[binding]); } else { assert(0 && "need more info from the ir for this."); /* The block index is not a constant. Evaluate the index expression @@ -1623,8 +1629,12 @@ void fs_visitor::nir_emit_texture(nir_tex_instr *instr) { uint32_t set = instr->sampler_set; - uint32_t index = instr->sampler_index; - unsigned sampler = stage_prog_data->bind_map[set][index]; + uint32_t binding = instr->sampler_index; + + assert(binding < stage_prog_data->bind_map[set].index_count); + assert(stage_prog_data->bind_map[set].index[binding] < 1000); + + unsigned sampler = stage_prog_data->bind_map[set].index[binding]; fs_reg sampler_reg(sampler); /* FINISHME: We're failing to recompile our programs when the sampler is diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 07853c0d0d6..e1f47d4ec44 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1207,7 +1207,7 @@ fs_visitor::visit(ir_expression *ir) index = const_uniform_block->value.u[0]; set = shader->base.UniformBlocks[index].Set; set_index = shader->base.UniformBlocks[index].Binding; - binding = stage_prog_data->bind_map[set][set_index]; + binding = stage_prog_data->bind_map[set].index[set_index]; surf_index = fs_reg(binding); } else { assert(0 && "need more info from the ir for this."); @@ -2302,7 +2302,7 @@ fs_visitor::visit(ir_texture *ir) assert(deref_var); ir_variable *var = deref_var->var; - sampler = stage_prog_data->bind_map[var->data.set][var->data.index]; + sampler = stage_prog_data->bind_map[var->data.set].index[var->data.index]; ir_rvalue *nonconst_sampler_index = _mesa_get_sampler_array_nonconst_index(ir->sampler); diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 42c54fbe8d3..ead4117479c 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -80,9 +80,12 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, k = bias; map = prog_data->map_entries; for (uint32_t i = 0; i < layout->num_sets; i++) { - prog_data->bind_map[i] = map; + prog_data->bind_map[i].index = map; for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) *map++ = k++; + + prog_data->bind_map[i].index_count = + layout->set[i].layout->stage[stage].surface_count; } return VK_SUCCESS; -- cgit v1.2.3 From 08748e3a0c069926627ea21d8077ca50a9a2fc92 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 7 May 2015 15:07:49 -0700 Subject: i965: Use NIR by default for vertex shaders on GEN8+ GLSL IR vs. NIR shader-db results for SIMD8 vertex shaders on Broadwell: total instructions in shared programs: 2742062 -> 2681339 (-2.21%) instructions in affected programs: 1514770 -> 1454047 (-4.01%) helped: 5813 HURT: 1120 The gained programs are ARB vertext programs that were previously going through the vec4 backend. Now that we have prog_to_nir, ARB vertex programs can go through the scalar backend so they show up as "gained" in the shader-db results. Acked-by: Kenneth Graunke Reviewed-by: Ian Romanick Acked-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 18a30a5925e..e01a7dbabee 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -588,7 +588,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; - if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) + if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; } -- cgit v1.2.3 From f98c89ef310bba6607935e36f67637d38d1cff89 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 1 Jun 2015 21:52:45 -0700 Subject: vk: Move query related functionality to new file query.c --- src/vulkan/Makefile.am | 3 +- src/vulkan/device.c | 315 ------------------------------------------- src/vulkan/private.h | 13 -- src/vulkan/query.c | 356 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 358 insertions(+), 329 deletions(-) create mode 100644 src/vulkan/query.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index ec670c00f6a..ae61b67802c 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -68,7 +68,8 @@ libvulkan_la_SOURCES = \ entrypoints.h \ x11.c \ formats.c \ - compiler.cpp + compiler.cpp \ + query.c BUILT_SOURCES = \ entrypoints.h \ diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 07563d8640e..b76942f5de5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1452,122 +1452,6 @@ VkResult anv_ResetEvent( stub_return(VK_UNSUPPORTED); } -// Query functions - -static void -anv_query_pool_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_query_pool *pool = (struct anv_query_pool *) object; - - assert(obj_type == VK_OBJECT_TYPE_QUERY_POOL); - - anv_gem_munmap(pool->bo.map, pool->bo.size); - anv_gem_close(device, pool->bo.gem_handle); - anv_device_free(device, pool); -} - -VkResult anv_CreateQueryPool( - VkDevice _device, - const VkQueryPoolCreateInfo* pCreateInfo, - VkQueryPool* pQueryPool) -{ - struct anv_device *device = (struct anv_device *) _device; - struct anv_query_pool *pool; - VkResult result; - size_t size; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); - - switch (pCreateInfo->queryType) { - case VK_QUERY_TYPE_OCCLUSION: - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - return VK_UNSUPPORTED; - default: - unreachable(""); - } - - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pool->base.destructor = anv_query_pool_destroy; - - pool->type = pCreateInfo->queryType; - size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); - result = anv_bo_init_new(&pool->bo, device, size); - if (result != VK_SUCCESS) - goto fail; - - pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); - - *pQueryPool = (VkQueryPool) pool; - - return VK_SUCCESS; - - fail: - anv_device_free(device, pool); - - return result; -} - -VkResult anv_GetQueryPoolResults( - VkDevice _device, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - size_t* pDataSize, - void* pData, - VkQueryResultFlags flags) -{ - struct anv_device *device = (struct anv_device *) _device; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; - struct anv_query_pool_slot *slot = pool->bo.map; - int64_t timeout = INT64_MAX; - uint32_t *dst32 = pData; - uint64_t *dst64 = pData; - uint64_t result; - int ret; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return VK_UNSUPPORTED; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - if (flags & VK_QUERY_RESULT_64_BIT) - *pDataSize = queryCount * sizeof(uint64_t); - else - *pDataSize = queryCount * sizeof(uint32_t); - - if (pData == NULL) - return VK_SUCCESS; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); - if (ret == -1) - return vk_error(VK_ERROR_UNKNOWN); - } - - for (uint32_t i = 0; i < queryCount; i++) { - result = slot[startQuery + i].end - slot[startQuery + i].begin; - if (flags & VK_QUERY_RESULT_64_BIT) { - *dst64++ = result; - } else { - if (result > UINT32_MAX) - result = UINT32_MAX; - *dst32++ = result; - } - } - - return VK_SUCCESS; -} - // Buffer functions VkResult anv_CreateBuffer( @@ -3428,205 +3312,6 @@ void anv_CmdPipelineBarrier( stub(); } -static void -anv_batch_emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ -} - -void anv_CmdBeginQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot, - VkQueryControlFlags flags) -{ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void anv_CmdEndQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot) -{ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot) + 8); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void anv_CmdResetQueryPool( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount) -{ - stub(); -} - -#define TIMESTAMP 0x2358 - -void anv_CmdWriteTimestamp( - VkCmdBuffer cmdBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset) -{ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; - struct anv_bo *bo = buffer->bo; - - switch (timestampType) { - case VK_TIMESTAMP_TYPE_TOP: - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { bo, buffer->offset + destOffset }); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); - break; - - case VK_TIMESTAMP_TYPE_BOTTOM: - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = /* FIXME: This is only lower 32 bits */ - { bo, buffer->offset + destOffset }); - break; - - default: - break; - } -} - -#define alu_opcode(v) __gen_field((v), 20, 31) -#define alu_operand1(v) __gen_field((v), 10, 19) -#define alu_operand2(v) __gen_field((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void anv_CmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; - struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - /* FIXME: If we're not waiting, should we just do this on the CPU? */ - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); - - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2), - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2) + 4, - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset + 4 }); - - dst_offset += destStride; - } -} - void anv_CmdInitAtomicCounters( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 25f8a230398..5ae39cf7cc9 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -531,19 +531,6 @@ struct anv_dynamic_cb_state { }; -struct anv_query_pool_slot { - uint64_t begin; - uint64_t end; - uint64_t available; -}; - -struct anv_query_pool { - struct anv_object base; - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - struct anv_descriptor_slot { int8_t dynamic_slot; uint8_t index; diff --git a/src/vulkan/query.c b/src/vulkan/query.c new file mode 100644 index 00000000000..759f76c8f59 --- /dev/null +++ b/src/vulkan/query.c @@ -0,0 +1,356 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + struct anv_object base; + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +static void +anv_query_pool_destroy(struct anv_device *device, + struct anv_object *object, + VkObjectType obj_type) +{ + struct anv_query_pool *pool = (struct anv_query_pool *) object; + + assert(obj_type == VK_OBJECT_TYPE_QUERY_POOL); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); +} + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + VkQueryPool* pQueryPool) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_query_pool *pool; + VkResult result; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_UNSUPPORTED; + default: + unreachable(""); + } + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->base.destructor = anv_query_pool_destroy; + + pool->type = pCreateInfo->queryType; + size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); + + *pQueryPool = (VkQueryPool) pool; + + return VK_SUCCESS; + + fail: + anv_device_free(device, pool); + + return result; +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + size_t* pDataSize, + void* pData, + VkQueryResultFlags flags) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + struct anv_query_pool_slot *slot = pool->bo.map; + int64_t timeout = INT64_MAX; + uint32_t *dst32 = pData; + uint64_t *dst64 = pData; + uint64_t result; + int ret; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return VK_UNSUPPORTED; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + if (flags & VK_QUERY_RESULT_64_BIT) + *pDataSize = queryCount * sizeof(uint64_t); + else + *pDataSize = queryCount * sizeof(uint32_t); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < queryCount; i++) { + result = slot[startQuery + i].end - slot[startQuery + i].begin; + if (flags & VK_QUERY_RESULT_64_BIT) { + *dst64++ = result; + } else { + if (result > UINT32_MAX) + result = UINT32_MAX; + *dst32++ = result; + } + } + + return VK_SUCCESS; +} + +static void +anv_batch_emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void anv_CmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void anv_CmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void anv_CmdResetQueryPool( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount) +{ + stub(); +} + +#define TIMESTAMP 0x2358 + +void anv_CmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + struct anv_bo *bo = buffer->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void anv_CmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +} -- cgit v1.2.3 From fbafc946c60f16ffa128ec8c310991d533a6febf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 2 Jun 2015 11:01:41 -0700 Subject: vk/formats: Rework the formats table --- src/vulkan/formats.c | 351 ++++++++++++++++++++++++++------------------------- 1 file changed, 177 insertions(+), 174 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 66208f23ea9..fb491d07327 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -25,187 +25,190 @@ #define UNSUPPORTED 0xffff +#define fmt(__vk_fmt, ...) \ + [VK_FORMAT_##__vk_fmt] = { __VA_ARGS__ } + static const struct anv_format anv_formats[] = { - [VK_FORMAT_UNDEFINED] = { .format = RAW, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R4G4_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_R4G4_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_R4G4B4A4_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_R4G4B4A4_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_R5G6B5_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_R5G6B5_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_R5G5B5A1_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_R5G5B5A1_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_R8_UNORM] = { .format = R8_UNORM, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SNORM] = { .format = R8_SNORM, .cpp = 1, .channels = 1, }, - [VK_FORMAT_R8_USCALED] = { .format = R8_USCALED, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SSCALED] = { .format = R8_SSCALED, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_UINT] = { .format = R8_UINT, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SINT] = { .format = R8_SINT, .cpp = 1, .channels = 1 }, - [VK_FORMAT_R8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_R8G8_UNORM] = { .format = R8G8_UNORM, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SNORM] = { .format = R8G8_SNORM, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_USCALED] = { .format = R8G8_USCALED, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SSCALED] = { .format = R8G8_SSCALED, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_UINT] = { .format = R8G8_UINT, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SINT] = { .format = R8G8_SINT, .cpp = 2, .channels = 2 }, - [VK_FORMAT_R8G8_SRGB] = { .format = UNSUPPORTED }, /* L8A8_UNORM_SRGB */ - [VK_FORMAT_R8G8B8_UNORM] = { .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_SNORM] = { .format = R8G8B8_SNORM, .cpp = 4 }, - [VK_FORMAT_R8G8B8_USCALED] = { .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_SSCALED] = { .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_UINT] = { .format = R8G8B8_UINT, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_SINT] = { .format = R8G8B8_SINT, .cpp = 3, .channels = 3 }, - [VK_FORMAT_R8G8B8_SRGB] = { .format = UNSUPPORTED }, /* B8G8R8A8_UNORM_SRGB */ - [VK_FORMAT_R8G8B8A8_UNORM] = { .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SNORM] = { .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_USCALED] = { .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SSCALED] = { .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_UINT] = { .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SINT] = { .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R8G8B8A8_SRGB] = { .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R10G10B10A2_UNORM] = { .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R10G10B10A2_SNORM] = { .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R10G10B10A2_USCALED] = { .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R10G10B10A2_SSCALED] = { .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R10G10B10A2_UINT] = { .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R10G10B10A2_SINT] = { .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4 }, - [VK_FORMAT_R16_UNORM] = { .format = R16_UNORM, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16_SNORM] = { .format = R16_SNORM, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16_USCALED] = { .format = R16_USCALED, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16_SSCALED] = { .format = R16_SSCALED, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16_UINT] = { .format = R16_UINT, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16_SINT] = { .format = R16_SINT, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16_SFLOAT] = { .format = R16_FLOAT, .cpp = 2, .channels = 1 }, - [VK_FORMAT_R16G16_UNORM] = { .format = R16G16_UNORM, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16_SNORM] = { .format = R16G16_SNORM, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16_USCALED] = { .format = R16G16_USCALED, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16_SSCALED] = { .format = R16G16_SSCALED, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16_UINT] = { .format = R16G16_UINT, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16_SINT] = { .format = R16G16_SINT, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16_SFLOAT] = { .format = R16G16_FLOAT, .cpp = 4, .channels = 2 }, - [VK_FORMAT_R16G16B16_UNORM] = { .format = R16G16B16_UNORM, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16_SNORM] = { .format = R16G16B16_SNORM, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16_USCALED] = { .format = R16G16B16_USCALED, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16_SSCALED] = { .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16_UINT] = { .format = R16G16B16_UINT, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16_SINT] = { .format = R16G16B16_SINT, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16_SFLOAT] = { .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 }, - [VK_FORMAT_R16G16B16A16_UNORM] = { .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R16G16B16A16_SNORM] = { .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R16G16B16A16_USCALED] = { .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R16G16B16A16_SSCALED] = { .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R16G16B16A16_UINT] = { .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R16G16B16A16_SINT] = { .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R16G16B16A16_SFLOAT] = { .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 }, - [VK_FORMAT_R32_UINT] = { .format = R32_UINT, .cpp = 4, .channels = 1, }, - [VK_FORMAT_R32_SINT] = { .format = R32_SINT, .cpp = 4, .channels = 1, }, - [VK_FORMAT_R32_SFLOAT] = { .format = R32_FLOAT, .cpp = 4, .channels = 1, }, - [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT, .cpp = 8, .channels = 2, }, - [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT, .cpp = 8, .channels = 2, }, - [VK_FORMAT_R32G32_SFLOAT] = { .format = R32G32_FLOAT, .cpp = 8, .channels = 2, }, - [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT, .cpp = 12, .channels = 3, }, - [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT, .cpp = 12, .channels = 3, }, - [VK_FORMAT_R32G32B32_SFLOAT] = { .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3, }, - [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4, }, - [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4, }, - [VK_FORMAT_R32G32B32A32_SFLOAT] = { .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4, }, - [VK_FORMAT_R64_SFLOAT] = { .format = R64_FLOAT, .cpp = 8, .channels = 1 }, - [VK_FORMAT_R64G64_SFLOAT] = { .format = R64G64_FLOAT, .cpp = 16, .channels = 2 }, - [VK_FORMAT_R64G64B64_SFLOAT] = { .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 }, - [VK_FORMAT_R64G64B64A64_SFLOAT] = { .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 }, - [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3 }, - [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3 }, + fmt(UNDEFINED, .format = RAW, .cpp = 1, .channels = 1), + fmt(R4G4_UNORM, .format = UNSUPPORTED), + fmt(R4G4_USCALED, .format = UNSUPPORTED), + fmt(R4G4B4A4_UNORM, .format = UNSUPPORTED), + fmt(R4G4B4A4_USCALED, .format = UNSUPPORTED), + fmt(R5G6B5_UNORM, .format = UNSUPPORTED), + fmt(R5G6B5_USCALED, .format = UNSUPPORTED), + fmt(R5G5B5A1_UNORM, .format = UNSUPPORTED), + fmt(R5G5B5A1_USCALED, .format = UNSUPPORTED), + fmt(R8_UNORM, .format = R8_UNORM, .cpp = 1, .channels = 1), + fmt(R8_SNORM, .format = R8_SNORM, .cpp = 1, .channels = 1,), + fmt(R8_USCALED, .format = R8_USCALED, .cpp = 1, .channels = 1), + fmt(R8_SSCALED, .format = R8_SSCALED, .cpp = 1, .channels = 1), + fmt(R8_UINT, .format = R8_UINT, .cpp = 1, .channels = 1), + fmt(R8_SINT, .format = R8_SINT, .cpp = 1, .channels = 1), + fmt(R8_SRGB, .format = UNSUPPORTED), + fmt(R8G8_UNORM, .format = R8G8_UNORM, .cpp = 2, .channels = 2), + fmt(R8G8_SNORM, .format = R8G8_SNORM, .cpp = 2, .channels = 2), + fmt(R8G8_USCALED, .format = R8G8_USCALED, .cpp = 2, .channels = 2), + fmt(R8G8_SSCALED, .format = R8G8_SSCALED, .cpp = 2, .channels = 2), + fmt(R8G8_UINT, .format = R8G8_UINT, .cpp = 2, .channels = 2), + fmt(R8G8_SINT, .format = R8G8_SINT, .cpp = 2, .channels = 2), + fmt(R8G8_SRGB, .format = UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(R8G8B8_UNORM, .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3), + fmt(R8G8B8_SNORM, .format = R8G8B8_SNORM, .cpp = 4), + fmt(R8G8B8_USCALED, .format = R8G8B8_USCALED, .cpp = 3, .channels = 3), + fmt(R8G8B8_SSCALED, .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3), + fmt(R8G8B8_UINT, .format = R8G8B8_UINT, .cpp = 3, .channels = 3), + fmt(R8G8B8_SINT, .format = R8G8B8_SINT, .cpp = 3, .channels = 3), + fmt(R8G8B8_SRGB, .format = UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(R8G8B8A8_UNORM, .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4), + fmt(R8G8B8A8_SNORM, .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4), + fmt(R8G8B8A8_USCALED, .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4), + fmt(R8G8B8A8_SSCALED, .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4), + fmt(R8G8B8A8_UINT, .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4), + fmt(R8G8B8A8_SINT, .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4), + fmt(R8G8B8A8_SRGB, .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4), + fmt(R10G10B10A2_UNORM, .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4), + fmt(R10G10B10A2_SNORM, .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4), + fmt(R10G10B10A2_USCALED, .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4), + fmt(R10G10B10A2_SSCALED, .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4), + fmt(R10G10B10A2_UINT, .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4), + fmt(R10G10B10A2_SINT, .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4), + fmt(R16_UNORM, .format = R16_UNORM, .cpp = 2, .channels = 1), + fmt(R16_SNORM, .format = R16_SNORM, .cpp = 2, .channels = 1), + fmt(R16_USCALED, .format = R16_USCALED, .cpp = 2, .channels = 1), + fmt(R16_SSCALED, .format = R16_SSCALED, .cpp = 2, .channels = 1), + fmt(R16_UINT, .format = R16_UINT, .cpp = 2, .channels = 1), + fmt(R16_SINT, .format = R16_SINT, .cpp = 2, .channels = 1), + fmt(R16_SFLOAT, .format = R16_FLOAT, .cpp = 2, .channels = 1), + fmt(R16G16_UNORM, .format = R16G16_UNORM, .cpp = 4, .channels = 2), + fmt(R16G16_SNORM, .format = R16G16_SNORM, .cpp = 4, .channels = 2), + fmt(R16G16_USCALED, .format = R16G16_USCALED, .cpp = 4, .channels = 2), + fmt(R16G16_SSCALED, .format = R16G16_SSCALED, .cpp = 4, .channels = 2), + fmt(R16G16_UINT, .format = R16G16_UINT, .cpp = 4, .channels = 2), + fmt(R16G16_SINT, .format = R16G16_SINT, .cpp = 4, .channels = 2), + fmt(R16G16_SFLOAT, .format = R16G16_FLOAT, .cpp = 4, .channels = 2), + fmt(R16G16B16_UNORM, .format = R16G16B16_UNORM, .cpp = 6, .channels = 3), + fmt(R16G16B16_SNORM, .format = R16G16B16_SNORM, .cpp = 6, .channels = 3), + fmt(R16G16B16_USCALED, .format = R16G16B16_USCALED, .cpp = 6, .channels = 3), + fmt(R16G16B16_SSCALED, .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3), + fmt(R16G16B16_UINT, .format = R16G16B16_UINT, .cpp = 6, .channels = 3), + fmt(R16G16B16_SINT, .format = R16G16B16_SINT, .cpp = 6, .channels = 3), + fmt(R16G16B16_SFLOAT, .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3), + fmt(R16G16B16A16_UNORM, .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4), + fmt(R16G16B16A16_SNORM, .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4), + fmt(R16G16B16A16_USCALED, .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4), + fmt(R16G16B16A16_SSCALED, .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4), + fmt(R16G16B16A16_UINT, .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4), + fmt(R16G16B16A16_SINT, .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4), + fmt(R16G16B16A16_SFLOAT, .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4), + fmt(R32_UINT, .format = R32_UINT, .cpp = 4, .channels = 1,), + fmt(R32_SINT, .format = R32_SINT, .cpp = 4, .channels = 1,), + fmt(R32_SFLOAT, .format = R32_FLOAT, .cpp = 4, .channels = 1,), + fmt(R32G32_UINT, .format = R32G32_UINT, .cpp = 8, .channels = 2,), + fmt(R32G32_SINT, .format = R32G32_SINT, .cpp = 8, .channels = 2,), + fmt(R32G32_SFLOAT, .format = R32G32_FLOAT, .cpp = 8, .channels = 2,), + fmt(R32G32B32_UINT, .format = R32G32B32_UINT, .cpp = 12, .channels = 3,), + fmt(R32G32B32_SINT, .format = R32G32B32_SINT, .cpp = 12, .channels = 3,), + fmt(R32G32B32_SFLOAT, .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3,), + fmt(R32G32B32A32_UINT, .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4,), + fmt(R32G32B32A32_SINT, .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4,), + fmt(R32G32B32A32_SFLOAT, .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4,), + fmt(R64_SFLOAT, .format = R64_FLOAT, .cpp = 8, .channels = 1), + fmt(R64G64_SFLOAT, .format = R64G64_FLOAT, .cpp = 16, .channels = 2), + fmt(R64G64B64_SFLOAT, .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3), + fmt(R64G64B64A64_SFLOAT, .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4), + fmt(R11G11B10_UFLOAT, .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3), + fmt(R9G9B9E5_UFLOAT, .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3), /* For depth/stencil formats, the .format and .cpp fields describe the * depth format. The field .has_stencil indicates whether or not there's a * stencil buffer. */ - [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM, .cpp = 2, .channels = 1 }, - [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 1 }, - [VK_FORMAT_D32_SFLOAT] = { .format = D32_FLOAT, .cpp = 4, .channels = 1 }, - [VK_FORMAT_S8_UINT] = { .format = UNSUPPORTED, .cpp = 0, .channels = 1, .has_stencil = true }, - [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM, .cpp = 2, .channels = 2, .has_stencil = true }, - [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 2, .has_stencil = true }, - [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_FLOAT, .cpp = 4, .channels = 2, .has_stencil = true }, + fmt(D16_UNORM, .format = D16_UNORM, .cpp = 2, .channels = 1), + fmt(D24_UNORM, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 1), + fmt(D32_SFLOAT, .format = D32_FLOAT, .cpp = 4, .channels = 1), + fmt(S8_UINT, .format = UNSUPPORTED, .cpp = 0, .channels = 1, .has_stencil = true), + fmt(D16_UNORM_S8_UINT, .format = D16_UNORM, .cpp = 2, .channels = 2, .has_stencil = true), + fmt(D24_UNORM_S8_UINT, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 2, .has_stencil = true), + fmt(D32_SFLOAT_S8_UINT, .format = D32_FLOAT, .cpp = 4, .channels = 2, .has_stencil = true), - [VK_FORMAT_BC1_RGB_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC1_RGB_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC1_RGBA_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC1_RGBA_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC2_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC2_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC3_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC3_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC4_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC4_SNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC5_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC5_SNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC6H_UFLOAT] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC6H_SFLOAT] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC7_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_BC7_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_EAC_R11_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_EAC_R11_SNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_EAC_R11G11_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_EAC_R11G11_SNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_4x4_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_4x4_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_5x4_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_5x4_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_5x5_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_5x5_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_6x5_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_6x5_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_6x6_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_6x6_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_8x5_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_8x5_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_8x6_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_8x6_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_8x8_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_8x8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x5_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x5_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x6_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x6_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x8_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x10_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_10x10_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_12x10_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_12x10_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_12x12_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_ASTC_12x12_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_B4G4R4A4_UNORM] = { .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4 }, - [VK_FORMAT_B5G5R5A1_UNORM] = { .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4 }, - [VK_FORMAT_B5G6R5_UNORM] = { .format = B5G6R5_UNORM, .cpp = 2, .channels = 3 }, - [VK_FORMAT_B5G6R5_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_UNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_SNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_SSCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_UINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_SINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8_SRGB] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8A8_UNORM] = { .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B8G8R8A8_SNORM] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8A8_USCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8A8_UINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8A8_SINT] = { .format = UNSUPPORTED }, - [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B10G10R10A2_UNORM] = { .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B10G10R10A2_SNORM] = { .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B10G10R10A2_USCALED] = { .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B10G10R10A2_SSCALED] = { .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B10G10R10A2_UINT] = { .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4 }, - [VK_FORMAT_B10G10R10A2_SINT] = { .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4 } + fmt(BC1_RGB_UNORM, .format = UNSUPPORTED), + fmt(BC1_RGB_SRGB, .format = UNSUPPORTED), + fmt(BC1_RGBA_UNORM, .format = UNSUPPORTED), + fmt(BC1_RGBA_SRGB, .format = UNSUPPORTED), + fmt(BC2_UNORM, .format = UNSUPPORTED), + fmt(BC2_SRGB, .format = UNSUPPORTED), + fmt(BC3_UNORM, .format = UNSUPPORTED), + fmt(BC3_SRGB, .format = UNSUPPORTED), + fmt(BC4_UNORM, .format = UNSUPPORTED), + fmt(BC4_SNORM, .format = UNSUPPORTED), + fmt(BC5_UNORM, .format = UNSUPPORTED), + fmt(BC5_SNORM, .format = UNSUPPORTED), + fmt(BC6H_UFLOAT, .format = UNSUPPORTED), + fmt(BC6H_SFLOAT, .format = UNSUPPORTED), + fmt(BC7_UNORM, .format = UNSUPPORTED), + fmt(BC7_SRGB, .format = UNSUPPORTED), + fmt(ETC2_R8G8B8_UNORM, .format = UNSUPPORTED), + fmt(ETC2_R8G8B8_SRGB, .format = UNSUPPORTED), + fmt(ETC2_R8G8B8A1_UNORM, .format = UNSUPPORTED), + fmt(ETC2_R8G8B8A1_SRGB, .format = UNSUPPORTED), + fmt(ETC2_R8G8B8A8_UNORM, .format = UNSUPPORTED), + fmt(ETC2_R8G8B8A8_SRGB, .format = UNSUPPORTED), + fmt(EAC_R11_UNORM, .format = UNSUPPORTED), + fmt(EAC_R11_SNORM, .format = UNSUPPORTED), + fmt(EAC_R11G11_UNORM, .format = UNSUPPORTED), + fmt(EAC_R11G11_SNORM, .format = UNSUPPORTED), + fmt(ASTC_4x4_UNORM, .format = UNSUPPORTED), + fmt(ASTC_4x4_SRGB, .format = UNSUPPORTED), + fmt(ASTC_5x4_UNORM, .format = UNSUPPORTED), + fmt(ASTC_5x4_SRGB, .format = UNSUPPORTED), + fmt(ASTC_5x5_UNORM, .format = UNSUPPORTED), + fmt(ASTC_5x5_SRGB, .format = UNSUPPORTED), + fmt(ASTC_6x5_UNORM, .format = UNSUPPORTED), + fmt(ASTC_6x5_SRGB, .format = UNSUPPORTED), + fmt(ASTC_6x6_UNORM, .format = UNSUPPORTED), + fmt(ASTC_6x6_SRGB, .format = UNSUPPORTED), + fmt(ASTC_8x5_UNORM, .format = UNSUPPORTED), + fmt(ASTC_8x5_SRGB, .format = UNSUPPORTED), + fmt(ASTC_8x6_UNORM, .format = UNSUPPORTED), + fmt(ASTC_8x6_SRGB, .format = UNSUPPORTED), + fmt(ASTC_8x8_UNORM, .format = UNSUPPORTED), + fmt(ASTC_8x8_SRGB, .format = UNSUPPORTED), + fmt(ASTC_10x5_UNORM, .format = UNSUPPORTED), + fmt(ASTC_10x5_SRGB, .format = UNSUPPORTED), + fmt(ASTC_10x6_UNORM, .format = UNSUPPORTED), + fmt(ASTC_10x6_SRGB, .format = UNSUPPORTED), + fmt(ASTC_10x8_UNORM, .format = UNSUPPORTED), + fmt(ASTC_10x8_SRGB, .format = UNSUPPORTED), + fmt(ASTC_10x10_UNORM, .format = UNSUPPORTED), + fmt(ASTC_10x10_SRGB, .format = UNSUPPORTED), + fmt(ASTC_12x10_UNORM, .format = UNSUPPORTED), + fmt(ASTC_12x10_SRGB, .format = UNSUPPORTED), + fmt(ASTC_12x12_UNORM, .format = UNSUPPORTED), + fmt(ASTC_12x12_SRGB, .format = UNSUPPORTED), + fmt(B4G4R4A4_UNORM, .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4), + fmt(B5G5R5A1_UNORM, .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4), + fmt(B5G6R5_UNORM, .format = B5G6R5_UNORM, .cpp = 2, .channels = 3), + fmt(B5G6R5_USCALED, .format = UNSUPPORTED), + fmt(B8G8R8_UNORM, .format = UNSUPPORTED), + fmt(B8G8R8_SNORM, .format = UNSUPPORTED), + fmt(B8G8R8_USCALED, .format = UNSUPPORTED), + fmt(B8G8R8_SSCALED, .format = UNSUPPORTED), + fmt(B8G8R8_UINT, .format = UNSUPPORTED), + fmt(B8G8R8_SINT, .format = UNSUPPORTED), + fmt(B8G8R8_SRGB, .format = UNSUPPORTED), + fmt(B8G8R8A8_UNORM, .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4), + fmt(B8G8R8A8_SNORM, .format = UNSUPPORTED), + fmt(B8G8R8A8_USCALED, .format = UNSUPPORTED), + fmt(B8G8R8A8_SSCALED, .format = UNSUPPORTED), + fmt(B8G8R8A8_UINT, .format = UNSUPPORTED), + fmt(B8G8R8A8_SINT, .format = UNSUPPORTED), + fmt(B8G8R8A8_SRGB, .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .channels = 4), + fmt(B10G10R10A2_UNORM, .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4), + fmt(B10G10R10A2_SNORM, .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4), + fmt(B10G10R10A2_USCALED, .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4), + fmt(B10G10R10A2_SSCALED, .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4), + fmt(B10G10R10A2_UINT, .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4), + fmt(B10G10R10A2_SINT, .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4) }; const struct anv_format * -- cgit v1.2.3 From e702197e3fdc46f3dea72211cbde4a9812371d9f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 2 Jun 2015 11:03:22 -0700 Subject: vk/formats: Add a name to the metadata and better logging --- src/vulkan/formats.c | 14 +++++++++++++- src/vulkan/private.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index fb491d07327..c5e52c47b5f 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -26,7 +26,7 @@ #define UNSUPPORTED 0xffff #define fmt(__vk_fmt, ...) \ - [VK_FORMAT_##__vk_fmt] = { __VA_ARGS__ } + [VK_FORMAT_##__vk_fmt] = { .name = "VK_FORMAT_" #__vk_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { fmt(UNDEFINED, .format = RAW, .cpp = 1, .channels = 1), @@ -234,6 +234,18 @@ struct surface_format_info { extern const struct surface_format_info surface_formats[]; +VkResult anv_validate_GetFormatInfo( + VkDevice _device, + VkFormat _format, + VkFormatInfoType infoType, + size_t* pDataSize, + void* pData) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatInfo(%s)\n", format->name); + return anv_GetFormatInfo(_device, _format, infoType, pDataSize, pData); +} + VkResult anv_GetFormatInfo( VkDevice _device, VkFormat _format, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 5ae39cf7cc9..1a6c3e0ca2f 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -716,6 +716,7 @@ int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipelin void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { + const char * name; uint16_t format; uint8_t cpp; uint8_t channels; -- cgit v1.2.3 From c8f078537e66ee4eba28bd5e381ef8a1a679f1be Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 2 Jun 2015 22:35:47 -0700 Subject: vk: Implement vertexOffset parameter of vkCmdDrawIndexed() As exposed by the func.draw_indexed test, we were ignoring the argument and hardcoding 0. --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b76942f5de5..e44f063237f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3179,7 +3179,7 @@ void anv_CmdDrawIndexed( .StartVertexLocation = firstIndex, .InstanceCount = instanceCount, .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); + .BaseVertexLocation = vertexOffset); } static void -- cgit v1.2.3 From 5744d1763c8a094c5141f37c2affe24f5feb63f9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 2 Jun 2015 22:51:42 -0700 Subject: vk: Set cb_state to NULL at cmd buffer create time Dynamic color/blend state can be NULL in case we're not rendering to color targets (only output to depth and/or stencil). Initialize cmd_buffer->cb_state to NULL so we can reliably detect whether it's been set or not. --- src/vulkan/device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e44f063237f..091d9280792 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2297,6 +2297,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->device = device; cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; + cmd_buffer->cb_state = NULL; memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); -- cgit v1.2.3 From 2f6aa424e9c7bbef5e330f9cbd724e64e8f61b69 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 2 Jun 2015 22:53:43 -0700 Subject: vk: Update generated headers with support for 64 bit fields --- src/vulkan/gen75_pack.h | 33 ++---- src/vulkan/gen7_pack.h | 24 +--- src/vulkan/gen8_pack.h | 286 +++++++++++++++++++++++++++++++++--------------- 3 files changed, 209 insertions(+), 134 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 1f041ab1a72..aa586df7785 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -317,9 +317,7 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); uint32_t dw1 = - /* Struct GeneralStateMemoryObjectControlState (8..11): */ __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | - /* Struct StatelessDataPortAccessMemoryObjectControlState (4..7): */ __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | 0; @@ -330,7 +328,6 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_SurfaceStateMemoryObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); uint32_t dw2 = - /* Struct SurfaceStateMemoryObjectControlState (8..11): */ __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | 0; @@ -341,7 +338,6 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_DynamicStateMemoryObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); uint32_t dw3 = - /* Struct DynamicStateMemoryObjectControlState (8..11): */ __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | 0; @@ -352,7 +348,6 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_IndirectObjectMemoryObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); uint32_t dw4 = - /* Struct IndirectObjectMemoryObjectControlState (8..11): */ __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | 0; @@ -363,7 +358,6 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_InstructionMemoryObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); uint32_t dw5 = - /* Struct InstructionMemoryObjectControlState (8..11): */ __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | 0; @@ -517,7 +511,6 @@ GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_SWTessellationMemoryObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); uint32_t dw1 = - /* Struct SWTessellationMemoryObjectControlState (8..11): */ __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; @@ -1110,7 +1103,6 @@ GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restri GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); uint32_t dw1 = __gen_field(values->BindingTablePoolEnable, 11, 11) | - /* Struct SurfaceObjectControlState (7..10): */ __gen_field(dw_SurfaceObjectControlState, 7, 10) | 0; @@ -1434,7 +1426,6 @@ GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_ConstantBufferObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); uint32_t dw2 = - /* Struct ConstantBufferObjectControlState (0..4): */ __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | 0; @@ -1709,7 +1700,6 @@ GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, dw[4] = __gen_field(values->Depth, 21, 31) | __gen_field(values->MinimumArrayElement, 10, 20) | - /* Struct DepthBufferObjectControlState (0..3): */ __gen_field(dw_DepthBufferObjectControlState, 0, 3) | 0; @@ -2210,7 +2200,6 @@ GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); uint32_t dw1 = __gen_field(values->GatherPoolEnable, 11, 11) | - /* Struct MemoryObjectControlState (0..3): */ __gen_field(dw_MemoryObjectControlState, 0, 3) | 0; @@ -2397,7 +2386,6 @@ GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_HierarchicalDepthBufferObjectControlState; GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); dw[1] = - /* Struct HierarchicalDepthBufferObjectControlState (25..28): */ __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -2551,7 +2539,6 @@ GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct MemoryObjectControlState (12..15): */ __gen_field(dw_MemoryObjectControlState, 12, 15) | __gen_field(values->IndexFormat, 8, 9) | __gen_field(values->DwordLength, 0, 7) | @@ -3949,7 +3936,6 @@ GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); dw[1] = __gen_field(values->SOBufferIndex, 29, 30) | - /* Struct SOBufferObjectControlState (25..28): */ __gen_field(dw_SOBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 11) | 0; @@ -4018,18 +4004,16 @@ GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, GEN75_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); uint32_t dw_Stream0Decl; GEN75_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - dw[0] = - /* Struct Stream3Decl (48..63): */ + uint64_t qw0 = __gen_field(dw_Stream3Decl, 48, 63) | - /* Struct Stream2Decl (32..47): */ __gen_field(dw_Stream2Decl, 32, 47) | - /* Struct Stream1Decl (16..31): */ __gen_field(dw_Stream1Decl, 16, 31) | - /* Struct Stream0Decl (0..15): */ __gen_field(dw_Stream0Decl, 0, 15) | 0; - GEN75_SO_DECL_pack(data, &dw[0], &values->Stream0Decl); + dw[0] = qw0; + dw[1] = qw0 >> 32; + } struct GEN75_3DSTATE_SO_DECL_LIST { @@ -4119,7 +4103,6 @@ GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); dw[1] = __gen_field(values->StencilBufferEnable, 31, 31) | - /* Struct StencilBufferObjectControlState (25..28): */ __gen_field(dw_StencilBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -4434,7 +4417,6 @@ GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, dw[0] = __gen_field(values->VertexBufferIndex, 26, 31) | __gen_field(values->BufferAccessType, 20, 20) | - /* Struct VertexBufferMemoryObjectControlState (16..19): */ __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | __gen_field(values->AddressModifyEnable, 14, 14) | __gen_field(values->NullVertexBuffer, 13, 13) | @@ -7616,7 +7598,6 @@ GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, dw[5] = __gen_offset(values->XOffset, 25, 31) | __gen_offset(values->YOffset, 20, 23) | - /* Struct SurfaceObjectControlState (16..19): */ __gen_field(dw_SurfaceObjectControlState, 16, 19) | __gen_field(values->SurfaceMinLOD, 4, 7) | __gen_field(values->MIPCountLOD, 0, 3) | @@ -7655,9 +7636,9 @@ struct GEN75_SAMPLER_BORDER_COLOR_STATE { uint32_t BorderColorGreen0; uint32_t BorderColorBlue0; uint32_t BorderColorAlpha0; - uint32_t BorderColor; - uint32_t BorderColor0; - uint32_t BorderColor1; + uint64_t BorderColor; + uint64_t BorderColor0; + uint64_t BorderColor1; }; static inline void diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 34203d1d820..0c9ca3fb248 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -278,9 +278,7 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); uint32_t dw1 = - /* Struct GeneralStateMemoryObjectControlState (8..11): */ __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | - /* Struct StatelessDataPortAccessMemoryObjectControlState (4..7): */ __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | __gen_field(values->StatelessDataPortAccessForceWriteThru, 3, 3) | __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | @@ -292,7 +290,6 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_SurfaceStateMemoryObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); uint32_t dw2 = - /* Struct SurfaceStateMemoryObjectControlState (8..11): */ __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | 0; @@ -303,7 +300,6 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_DynamicStateMemoryObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); uint32_t dw3 = - /* Struct DynamicStateMemoryObjectControlState (8..11): */ __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | 0; @@ -314,7 +310,6 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_IndirectObjectMemoryObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); uint32_t dw4 = - /* Struct IndirectObjectMemoryObjectControlState (8..11): */ __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | 0; @@ -325,7 +320,6 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_InstructionMemoryObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); uint32_t dw5 = - /* Struct InstructionMemoryObjectControlState (8..11): */ __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | 0; @@ -479,7 +473,6 @@ GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_SWTessellationMemoryObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); uint32_t dw1 = - /* Struct SWTessellationMemoryObjectControlState (8..11): */ __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; @@ -1107,7 +1100,6 @@ GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_ConstantBufferObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); uint32_t dw2 = - /* Struct ConstantBufferObjectControlState (0..4): */ __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | 0; @@ -1382,7 +1374,6 @@ GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, dw[4] = __gen_field(values->Depth, 21, 31) | __gen_field(values->MinimumArrayElement, 10, 20) | - /* Struct DepthBufferObjectControlState (0..3): */ __gen_field(dw_DepthBufferObjectControlState, 0, 3) | 0; @@ -1752,7 +1743,6 @@ GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_HierarchicalDepthBufferObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); dw[1] = - /* Struct HierarchicalDepthBufferObjectControlState (25..28): */ __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -1901,7 +1891,6 @@ GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct MemoryObjectControlState (12..15): */ __gen_field(dw_MemoryObjectControlState, 12, 15) | __gen_field(values->CutIndexEnable, 10, 10) | __gen_field(values->IndexFormat, 8, 9) | @@ -3179,7 +3168,6 @@ GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); dw[1] = __gen_field(values->SOBufferIndex, 29, 30) | - /* Struct SOBufferObjectControlState (25..28): */ __gen_field(dw_SOBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 11) | 0; @@ -3248,18 +3236,16 @@ GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, GEN7_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); uint32_t dw_Stream0Decl; GEN7_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - dw[0] = - /* Struct Stream3Decl (48..63): */ + uint64_t qw0 = __gen_field(dw_Stream3Decl, 48, 63) | - /* Struct Stream2Decl (32..47): */ __gen_field(dw_Stream2Decl, 32, 47) | - /* Struct Stream1Decl (16..31): */ __gen_field(dw_Stream1Decl, 16, 31) | - /* Struct Stream0Decl (0..15): */ __gen_field(dw_Stream0Decl, 0, 15) | 0; - GEN7_SO_DECL_pack(data, &dw[0], &values->Stream0Decl); + dw[0] = qw0; + dw[1] = qw0 >> 32; + } struct GEN7_3DSTATE_SO_DECL_LIST { @@ -3347,7 +3333,6 @@ GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_StencilBufferObjectControlState; GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); dw[1] = - /* Struct StencilBufferObjectControlState (25..28): */ __gen_field(dw_StencilBufferObjectControlState, 25, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -3662,7 +3647,6 @@ GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, dw[0] = __gen_field(values->VertexBufferIndex, 26, 31) | __gen_field(values->BufferAccessType, 20, 20) | - /* Struct VertexBufferMemoryObjectControlState (16..19): */ __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | __gen_field(values->AddressModifyEnable, 14, 14) | __gen_field(values->NullVertexBuffer, 13, 13) | diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 59dbce8d4f6..aeb5dadd020 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -136,7 +136,7 @@ struct GEN8_3DSTATE_VS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t KernelStartPointer; + uint64_t KernelStartPointer; #define Multiple 0 #define Single 1 uint32_t SingleVertexDispatch; @@ -159,7 +159,7 @@ struct GEN8_3DSTATE_VS { uint32_t IllegalOpcodeExceptionEnable; uint32_t AccessesUAV; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; @@ -189,10 +189,13 @@ GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = + uint64_t qw1 = __gen_offset(values->KernelStartPointer, 6, 63) | 0; + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->SingleVertexDispatch, 31, 31) | __gen_field(values->VectorMaskEnable, 30, 30) | @@ -205,11 +208,14 @@ GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[4] = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[4] = qw4; + dw[5] = qw4 >> 32; + dw[6] = __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | __gen_field(values->VertexURBEntryReadLength, 11, 16) | @@ -268,9 +274,12 @@ GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw1 = 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddressHigh, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + } #define GEN8_MI_ATOMIC_length 0x00000003 @@ -499,9 +508,12 @@ GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + } #define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 @@ -542,9 +554,12 @@ GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + } #define GEN8_PIPELINE_SELECT_length 0x00000001 @@ -668,65 +683,74 @@ GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_GeneralStateMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); uint32_t dw1 = - /* Struct GeneralStateMemoryObjectControlState (4..10): */ __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); dw[3] = - /* Struct StatelessDataPortAccessMemoryObjectControlState (16..22): */ __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | 0; uint32_t dw_SurfaceStateMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); uint32_t dw4 = - /* Struct SurfaceStateMemoryObjectControlState (4..10): */ __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | 0; - dw[4] = + uint64_t qw4 = __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + dw[4] = qw4; + dw[5] = qw4 >> 32; + uint32_t dw_DynamicStateMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); uint32_t dw6 = - /* Struct DynamicStateMemoryObjectControlState (4..10): */ __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | 0; - dw[6] = + uint64_t qw6 = __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + dw[6] = qw6; + dw[7] = qw6 >> 32; + uint32_t dw_IndirectObjectMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); uint32_t dw8 = - /* Struct IndirectObjectMemoryObjectControlState (4..10): */ __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | 0; - dw[8] = + uint64_t qw8 = __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + dw[8] = qw8; + dw[9] = qw8 >> 32; + uint32_t dw_InstructionMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); uint32_t dw10 = - /* Struct InstructionMemoryObjectControlState (4..10): */ __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | 0; - dw[10] = + uint64_t qw10 = __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + dw[10] = qw10; + dw[11] = qw10 >> 32; + dw[12] = __gen_field(values->GeneralStateBufferSize, 12, 31) | __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | @@ -806,7 +830,7 @@ struct GEN8_STATE_SIP { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t SystemInstructionPointer; + uint64_t SystemInstructionPointer; }; static inline void @@ -823,10 +847,13 @@ GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = + uint64_t qw1 = __gen_offset(values->SystemInstructionPointer, 4, 63) | 0; + dw[1] = qw1; + dw[2] = qw1 >> 32; + } #define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 @@ -866,7 +893,6 @@ GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_SWTessellationMemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); uint32_t dw1 = - /* Struct SWTessellationMemoryObjectControlState (8..11): */ __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; @@ -1474,13 +1500,15 @@ GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restric GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); uint32_t dw1 = __gen_field(values->BindingTablePoolEnable, 11, 11) | - /* Struct SurfaceObjectControlState (0..6): */ __gen_field(dw_SurfaceObjectControlState, 0, 6) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->BindingTablePoolBufferSize, 12, 31) | 0; @@ -1795,27 +1823,39 @@ GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + uint32_t dw4 = 0; - dw[4] = + uint64_t qw4 = __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + dw[4] = qw4; + dw[5] = qw4 >> 32; + uint32_t dw6 = 0; - dw[6] = + uint64_t qw6 = __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + dw[6] = qw6; + dw[7] = qw6 >> 32; + uint32_t dw8 = 0; - dw[8] = + uint64_t qw8 = __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + dw[8] = qw8; + dw[9] = qw8 >> 32; + } struct GEN8_3DSTATE_CONSTANT_DS { @@ -1841,7 +1881,6 @@ GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState (8..14): */ __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; @@ -1881,7 +1920,6 @@ GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState (8..14): */ __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; @@ -1921,7 +1959,6 @@ GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState (8..14): */ __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; @@ -1961,7 +1998,6 @@ GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState (8..14): */ __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; @@ -2001,7 +2037,6 @@ GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CommandSubType, 27, 28) | __gen_field(values->_3DCommandOpcode, 24, 26) | __gen_field(values->_3DCommandSubOpcode, 16, 23) | - /* Struct ConstantBufferObjectControlState (8..14): */ __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | __gen_field(values->DwordLength, 0, 7) | 0; @@ -2075,9 +2110,12 @@ GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + dw[4] = __gen_field(values->Height, 18, 31) | __gen_field(values->Width, 4, 17) | @@ -2089,7 +2127,6 @@ GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, dw[5] = __gen_field(values->Depth, 21, 31) | __gen_field(values->MinimumArrayElement, 10, 20) | - /* Struct DepthBufferObjectControlState (0..6): */ __gen_field(dw_DepthBufferObjectControlState, 0, 6) | 0; @@ -2177,7 +2214,7 @@ struct GEN8_3DSTATE_DS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t KernelStartPointer; + uint64_t KernelStartPointer; #define Multiple 0 #define Single 1 uint32_t SingleDomainPointDispatch; @@ -2200,7 +2237,7 @@ struct GEN8_3DSTATE_DS { uint32_t AccessesUAV; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -2231,10 +2268,13 @@ GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = + uint64_t qw1 = __gen_offset(values->KernelStartPointer, 6, 63) | 0; + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->SingleDomainPointDispatch, 31, 31) | __gen_field(values->VectorMaskEnable, 30, 30) | @@ -2247,11 +2287,14 @@ GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[4] = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[4] = qw4; + dw[5] = qw4 >> 32; + dw[6] = __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | __gen_field(values->PatchURBEntryReadLength, 11, 17) | @@ -2574,13 +2617,15 @@ GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); uint32_t dw1 = __gen_field(values->GatherPoolEnable, 11, 11) | - /* Struct MemoryObjectControlState (0..6): */ __gen_field(dw_MemoryObjectControlState, 0, 6) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->GatherPoolBufferSize, 12, 31) | 0; @@ -2602,7 +2647,7 @@ struct GEN8_3DSTATE_GS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t KernelStartPointer; + uint64_t KernelStartPointer; uint32_t SingleProgramFlow; #define Dmask 0 #define Vmask 1 @@ -2625,7 +2670,7 @@ struct GEN8_3DSTATE_GS { uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; uint32_t ExpectedVertexCount; - uint32_t ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -2676,10 +2721,13 @@ GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = + uint64_t qw1 = __gen_offset(values->KernelStartPointer, 6, 63) | 0; + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->SingleProgramFlow, 31, 31) | __gen_field(values->VectorMaskEnable, 30, 30) | @@ -2694,11 +2742,14 @@ GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->ExpectedVertexCount, 0, 5) | 0; - dw[4] = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[4] = qw4; + dw[5] = qw4 >> 32; + dw[6] = __gen_field(values->OutputVertexSize, 23, 28) | __gen_field(values->OutputTopology, 17, 22) | @@ -2776,7 +2827,6 @@ GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_HierarchicalDepthBufferObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); dw[1] = - /* Struct HierarchicalDepthBufferObjectControlState (25..31): */ __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -2784,9 +2834,12 @@ GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + dw[4] = __gen_field(values->SurfaceQPitch, 0, 14) | 0; @@ -2827,8 +2880,8 @@ struct GEN8_3DSTATE_HS { uint32_t StatisticsEnable; uint32_t MaximumNumberofThreads; uint32_t InstanceCount; - uint32_t KernelStartPointer; - uint32_t ScratchSpaceBasePointer; + uint64_t KernelStartPointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -2871,15 +2924,21 @@ GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->InstanceCount, 0, 3) | 0; - dw[3] = + uint64_t qw3 = __gen_offset(values->KernelStartPointer, 6, 63) | 0; - dw[5] = + dw[3] = qw3; + dw[4] = qw3 >> 32; + + uint64_t qw5 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[5] = qw5; + dw[6] = qw5 >> 32; + dw[7] = __gen_field(values->SingleProgramFlow, 27, 27) | __gen_field(values->VectorMaskEnable, 26, 26) | @@ -2937,16 +2996,18 @@ GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[1] = __gen_field(values->IndexFormat, 8, 9) | - /* Struct MemoryObjectControlState (0..6): */ __gen_field(dw_MemoryObjectControlState, 0, 6) | 0; uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + dw[4] = __gen_field(values->BufferSize, 0, 31) | 0; @@ -3181,7 +3242,7 @@ struct GEN8_3DSTATE_PS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t KernelStartPointer0; + uint64_t KernelStartPointer0; #define Multiple 0 #define Single 1 uint32_t SingleProgramFlow; @@ -3212,7 +3273,7 @@ struct GEN8_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreadsPerPSD; uint32_t PushConstantEnable; @@ -3228,8 +3289,8 @@ struct GEN8_3DSTATE_PS { uint32_t DispatchGRFStartRegisterForConstantSetupData0; uint32_t DispatchGRFStartRegisterForConstantSetupData1; uint32_t DispatchGRFStartRegisterForConstantSetupData2; - uint32_t KernelStartPointer1; - uint32_t KernelStartPointer2; + uint64_t KernelStartPointer1; + uint64_t KernelStartPointer2; }; static inline void @@ -3246,10 +3307,13 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = + uint64_t qw1 = __gen_offset(values->KernelStartPointer0, 6, 63) | 0; + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->SingleProgramFlow, 31, 31) | __gen_field(values->VectorMaskEnable, 30, 30) | @@ -3264,11 +3328,14 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[4] = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[4] = qw4; + dw[5] = qw4 >> 32; + dw[6] = __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | __gen_field(values->PushConstantEnable, 11, 11) | @@ -3286,14 +3353,20 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | 0; - dw[8] = + uint64_t qw8 = __gen_offset(values->KernelStartPointer1, 6, 63) | 0; - dw[10] = + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint64_t qw10 = __gen_offset(values->KernelStartPointer2, 6, 63) | 0; + dw[10] = qw10; + dw[11] = qw10 >> 32; + } #define GEN8_3DSTATE_PS_BLEND_length 0x00000002 @@ -4313,12 +4386,10 @@ GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_Attribute; GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute, &values->Attribute); dw[1] = - /* Struct Attribute (0..15): */ __gen_field(dw_Attribute, 0, 15) | 0; - GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw[1], &values->Attribute); - dw[9] = + uint64_t qw9 = __gen_field(values->Attribute15WrapShortestEnables, 60, 63) | __gen_field(values->Attribute14WrapShortestEnables, 56, 59) | __gen_field(values->Attribute13WrapShortestEnables, 52, 55) | @@ -4337,6 +4408,9 @@ GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->Attribute00WrapShortestEnables, 0, 3) | 0; + dw[9] = qw9; + dw[10] = qw9 >> 32; + } #define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 @@ -4499,7 +4573,6 @@ GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_field(values->SOBufferEnable, 31, 31) | __gen_field(values->SOBufferIndex, 29, 30) | - /* Struct SOBufferObjectControlState (22..28): */ __gen_field(dw_SOBufferObjectControlState, 22, 28) | __gen_field(values->StreamOffsetWriteEnable, 21, 21) | __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | @@ -4508,9 +4581,12 @@ GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + dw[4] = __gen_field(values->SurfaceSize, 0, 29) | 0; @@ -4518,9 +4594,12 @@ GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw5 = 0; - dw[5] = + uint64_t qw5 = __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + dw[5] = qw5; + dw[6] = qw5 >> 32; + dw[7] = __gen_field(values->StreamOffset, 0, 31) | 0; @@ -4577,18 +4656,16 @@ GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, GEN8_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); uint32_t dw_Stream0Decl; GEN8_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - dw[0] = - /* Struct Stream3Decl (48..63): */ + uint64_t qw0 = __gen_field(dw_Stream3Decl, 48, 63) | - /* Struct Stream2Decl (32..47): */ __gen_field(dw_Stream2Decl, 32, 47) | - /* Struct Stream1Decl (16..31): */ __gen_field(dw_Stream1Decl, 16, 31) | - /* Struct Stream0Decl (0..15): */ __gen_field(dw_Stream0Decl, 0, 15) | 0; - GEN8_SO_DECL_pack(data, &dw[0], &values->Stream0Decl); + dw[0] = qw0; + dw[1] = qw0 >> 32; + } struct GEN8_3DSTATE_SO_DECL_LIST { @@ -4679,7 +4756,6 @@ GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); dw[1] = __gen_field(values->StencilBufferEnable, 31, 31) | - /* Struct StencilBufferObjectControlState (22..28): */ __gen_field(dw_StencilBufferObjectControlState, 22, 28) | __gen_field(values->SurfacePitch, 0, 16) | 0; @@ -4687,9 +4763,12 @@ GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + dw[4] = __gen_field(values->SurfaceQPitch, 0, 14) | 0; @@ -5004,7 +5083,6 @@ GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[0] = __gen_field(values->VertexBufferIndex, 26, 31) | - /* Struct MemoryObjectControlState (16..22): */ __gen_field(dw_MemoryObjectControlState, 16, 22) | __gen_field(values->AddressModifyEnable, 14, 14) | __gen_field(values->NullVertexBuffer, 13, 13) | @@ -5014,9 +5092,12 @@ GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, uint32_t dw1 = 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->BufferSize, 0, 31) | 0; @@ -6666,15 +6747,21 @@ GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, uint32_t dw1 = 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + uint32_t dw3 = 0; - dw[3] = + uint64_t qw3 = __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + dw[3] = qw3; + dw[4] = qw3 >> 32; + } #define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 @@ -6754,9 +6841,12 @@ GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + } #define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 @@ -6876,9 +6966,12 @@ GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + } #define GEN8_MI_MATH_length_bias 0x00000002 @@ -7097,9 +7190,12 @@ GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CoreModeEnable, 0, 0) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->DataDWord0, 0, 31) | 0; @@ -7224,9 +7320,12 @@ GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CoreModeEnable, 0, 0) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->Address, dw1); + dw[1] = qw1; + dw[2] = qw1 >> 32; + dw[3] = __gen_field(values->DataDWord0, 0, 31) | 0; @@ -7315,9 +7414,12 @@ GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + dw[2] = qw2; + dw[3] = qw2 >> 32; + } #define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 @@ -7592,7 +7694,7 @@ struct GEN8_PIPE_CONTROL { uint32_t DepthCacheFlushEnable; __gen_address_type Address; __gen_address_type AddressHigh; - uint32_t ImmediateData; + uint64_t ImmediateData; }; static inline void @@ -7645,10 +7747,13 @@ GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, dw[3] = __gen_combine_address(data, &dw[3], values->AddressHigh, dw3); - dw[4] = + uint64_t qw4 = __gen_field(values->ImmediateData, 0, 63) | 0; + dw[4] = qw4; + dw[5] = qw4 >> 32; + } #define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a @@ -8217,7 +8322,6 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, uint32_t dw_MemoryObjectControlState; GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[1] = - /* Struct MemoryObjectControlState (24..30): */ __gen_field(dw_MemoryObjectControlState, 24, 30) | __gen_field(values->BaseMipLevel, 19, 23) | __gen_field(values->SurfaceQPitch, 0, 14) | @@ -8275,18 +8379,24 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, uint32_t dw8 = 0; - dw[8] = + uint64_t qw8 = __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + dw[8] = qw8; + dw[9] = qw8 >> 32; + uint32_t dw10 = __gen_field(values->XOffsetforVPlane, 48, 61) | __gen_field(values->YOffsetforVPlane, 32, 45) | __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | 0; - dw[10] = + uint64_t qw10 = __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + dw[10] = qw10; + dw[11] = qw10 >> 32; + dw[12] = 0; -- cgit v1.2.3 From 1286bd3160bc1e70fa3bba2ec12999b2a02ffad9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 2 Jun 2015 22:56:39 -0700 Subject: vk: Delete vk.c test case We now have crucible up and running and all vk sub-cases have been moved over. Delete this crufty old hack of a test case. --- src/vulkan/.gitignore | 2 - src/vulkan/Makefile.am | 5 - src/vulkan/vk.c | 1578 ------------------------------------------------ 3 files changed, 1585 deletions(-) delete mode 100644 src/vulkan/vk.c (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 772daa8bf78..617b6d4ebb9 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -1,5 +1,3 @@ -/vk - # Generated source files /*-spirv.h /entrypoints.c diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index ae61b67802c..ce3f6b06391 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -88,11 +88,6 @@ entrypoints.c : vk_gen.py $(vulkan_include_HEADERS) CLEANFILES = entrypoints.h entrypoints.c -bin_PROGRAMS = vk - -vk_SOURCES = vk.c -vk_LDADD = libvulkan.la -lpng16 - libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c deleted file mode 100644 index e7d21cca9b0..00000000000 --- a/src/vulkan/vk.c +++ /dev/null @@ -1,1578 +0,0 @@ -#include -#include -#include -#include - -#define VK_PROTOTYPES -#include - -#include -#include -#include -#include -#include -#include - -#include "vk-spirv.h" - -#define for_each_bit(b, dword) \ - for (uint32_t __dword = (dword); \ - (b) = __builtin_ffs(__dword) - 1, __dword; \ - __dword &= ~(1 << (b))) - -static inline uint32_t -align_u32(uint32_t value, uint32_t alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - -static void -fail_if(int cond, const char *format, ...) -{ - va_list args; - - if (!cond) - return; - - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - - exit(1); -} - -static void -write_png(char *path, int32_t width, int32_t height, int32_t stride, void *pixels) -{ - FILE *f = NULL; - png_structp png_writer = NULL; - png_infop png_info = NULL; - - uint8_t *rows[height]; - - for (int32_t y = 0; y < height; y++) - rows[y] = pixels + y * stride; - - f = fopen(path, "wb"); - fail_if(!f, "failed to open file for writing: %s", path); - - png_writer = png_create_write_struct(PNG_LIBPNG_VER_STRING, - NULL, NULL, NULL); - fail_if (!png_writer, "failed to create png writer"); - - png_info = png_create_info_struct(png_writer); - fail_if(!png_info, "failed to create png writer info"); - - png_init_io(png_writer, f); - png_set_IHDR(png_writer, png_info, - width, height, - 8, PNG_COLOR_TYPE_RGBA, - PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, - PNG_FILTER_TYPE_DEFAULT); - png_write_info(png_writer, png_info); - png_set_rows(png_writer, png_info, rows); - png_write_png(png_writer, png_info, PNG_TRANSFORM_IDENTITY, NULL); - - png_destroy_write_struct(&png_writer, &png_info); - - fclose(f); -} - -static void * -test_alloc(void* pUserData, - size_t size, - size_t alignment, - VkSystemAllocType allocType) -{ - return malloc(size); -} - -static void -test_free(void* pUserData, - void* pMem) -{ - free(pMem); -} - -#define GLSL(src) "#version 330\n" #src - -static void -create_pipeline(VkDevice device, VkPipeline *pipeline, - VkPipelineLayout pipeline_layout) -{ - VkPipelineIaStateCreateInfo ia_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .disableVertexReuse = false, - .primitiveRestartEnable = false, - .primitiveRestartIndex = 0 - }; - - VkShader vs = GLSL_VK_SHADER(device, VERTEX, - layout(location = 0) in vec4 a_position; - layout(location = 1) in vec4 a_color; - layout(set = 0, binding = 0) uniform block1 { - vec4 color; - } u1; - layout(set = 0, binding = 1) uniform block2 { - vec4 color; - } u2; - layout(set = 1, binding = 0) uniform block3 { - vec4 color; - } u3; - out vec4 v_color; - void main() - { - gl_Position = a_position; - v_color = a_color + u1.color + u2.color + u3.color; - } - ); - - VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, - out vec4 f_color; - in vec4 v_color; - layout(set = 0, binding = 0) uniform sampler2D tex; - void main() - { - f_color = v_color + texture(tex, vec2(0.1, 0.1)); - } - ); - - VkPipelineShaderStageCreateInfo vs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &ia_create_info, - .shader = { - .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; - - VkPipelineShaderStageCreateInfo fs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &vs_create_info, - .shader = { - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; - - VkPipelineVertexInputCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, - .pNext = &fs_create_info, - .bindingCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .strideInBytes = 16, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - { - .binding = 1, - .strideInBytes = 0, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - } - }, - .attributeCount = 2, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 0 - }, - { - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 0 - } - } - }; - - VkPipelineRsStateCreateInfo rs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, - .pNext = &vi_create_info, - - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }; - - VkPipelineCbStateCreateInfo cb_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, - .pNext = &rs_create_info, - .attachmentCount = 1, - .pAttachments = (VkPipelineCbAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }; - - vkCreateGraphicsPipeline(device, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &cb_create_info, - .flags = 0, - .layout = pipeline_layout - }, - pipeline); - - - vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, fs); - vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, vs); -} - -static void -test_timestamp(VkDevice device, VkQueue queue) -{ - VkBuffer buffer; - vkCreateBuffer(device, - &(VkBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = 1024, - .usage = VK_BUFFER_USAGE_GENERAL, - .flags = 0 - }, - &buffer); - - VkMemoryRequirements buffer_requirements; - size_t size = sizeof(buffer_requirements); - vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &buffer_requirements); - - VkDeviceMemory mem; - vkAllocMemory(device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = buffer_requirements.size, - .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, - .memPriority = VK_MEMORY_PRIORITY_NORMAL - }, - &mem); - - void *map; - vkMapMemory(device, mem, 0, buffer_requirements.size, 0, &map); - memset(map, 0x11, buffer_requirements.size); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, - buffer, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 0); - - VkCmdBuffer cmdBuffer; - vkCreateCommandBuffer(device, - &(VkCmdBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, - .queueNodeIndex = 0, - .flags = 0 - }, - &cmdBuffer); - - vkBeginCommandBuffer(cmdBuffer, - &(VkCmdBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, - .flags = 0 - }); - - vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_TOP, buffer, 0); - vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_BOTTOM, buffer, 8); - - vkEndCommandBuffer(cmdBuffer); - - vkQueueSubmit(queue, 1, &cmdBuffer, 0); - - vkQueueWaitIdle(queue); - - vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer); - vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); - - uint64_t *results = map; - printf("top timestamp: %20ld (%016lx)\n", results[0], results[0]); - printf("bottom timestamp: %20ld (%016lx)\n", results[1], results[1]); - - vkUnmapMemory(device, mem); - vkFreeMemory(device, mem); -} - -static void -test_buffer_copy(VkDevice device, VkQueue queue) -{ - /* We'll test copying 1000k buffers */ - const int buffer_size = 1024000; - - VkBufferCreateInfo buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = buffer_size, - .usage = VK_BUFFER_USAGE_GENERAL, - .flags = 0 - }; - - VkBuffer buffer1, buffer2; - vkCreateBuffer(device, &buffer_info, &buffer1); - vkCreateBuffer(device, &buffer_info, &buffer2); - - VkMemoryRequirements buffer_requirements; - size_t size = sizeof(buffer_requirements); - vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer1, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &buffer_requirements); - - const int memory_size = buffer_requirements.size * 2; - - VkDeviceMemory mem; - vkAllocMemory(device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = memory_size, - .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, - .memPriority = VK_MEMORY_PRIORITY_NORMAL - }, &mem); - - void *map; - vkMapMemory(device, mem, 0, buffer_requirements.size * 2, 0, &map); - - /* Fill the first buffer_size of the memory with a pattern */ - uint32_t *map32 = map; - for (unsigned i = 0; i < buffer_size / sizeof(*map32); i++) - map32[i] = i; - - /* Fill the rest with 0 */ - memset((char *)map + buffer_size, 0, memory_size - buffer_size); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, - buffer1, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 0); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, - buffer2, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, buffer_requirements.size); - - VkCmdBuffer cmdBuffer; - vkCreateCommandBuffer(device, - &(VkCmdBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, - .queueNodeIndex = 0, - .flags = 0 - }, &cmdBuffer); - - vkBeginCommandBuffer(cmdBuffer, - &(VkCmdBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, - .flags = 0 - }); - - vkCmdCopyBuffer(cmdBuffer, buffer1, buffer2, 1, - &(VkBufferCopy) { - .srcOffset = 0, - .destOffset = 0, - .copySize = buffer_size, - }); - - vkEndCommandBuffer(cmdBuffer); - - vkQueueSubmit(queue, 1, &cmdBuffer, 0); - - vkQueueWaitIdle(queue); - - vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer1); - vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer2); - vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); - - uint32_t *map32_2 = map + buffer_requirements.size; - for (unsigned i = 0; i < buffer_size / sizeof(*map32); i++) { - if (map32[i] != map32_2[i]) { - printf("buffer mismatch at dword %d: found 0x%x, expected 0x%x\n", - i, map32_2[i], map32[i]); - } - } - - vkUnmapMemory(device, mem); - vkFreeMemory(device, mem); -} - -static void -test_formats(VkDevice device, VkQueue queue) -{ - VkFormatProperties properties; - size_t size = sizeof(properties); - uint32_t f; - - static const char *features[] = { - "sampled_image", - "storage_image", - "storage_image_atomic", - "uniform_texel_buffer", - "storage_texel_buffer", - "storage_texel_buffer_atomic", - "vertex_buffer", - "color_attachment", - "color_attachment_blend", - "depth_stencil_attachment", - "conversion" - }; - - VkFormat format = VK_FORMAT_R32G32B32A32_SFLOAT; - vkGetFormatInfo(device, format, - VK_FORMAT_INFO_TYPE_PROPERTIES, - &size, &properties); - - printf("format 0x%x:\n", format); - - printf(" linear tiling features (0x%x):", properties.linearTilingFeatures); - for_each_bit(f, properties.linearTilingFeatures) - printf(" %s", features[f]); - - printf("\n optimal tiling features (0x%x):", properties.optimalTilingFeatures); - for_each_bit(f, properties.optimalTilingFeatures) - printf(" %s", features[f]); - printf("\n"); -} - -#define TEST_DEPTH_FLAG 0x01 - -struct test_context { - uint32_t width, height; - VkDevice device; - VkQueue queue; - VkCmdBuffer cmdBuffer; - VkPipeline pipeline; - VkImage rt; - VkImage ds; - VkBuffer vertex_buffer; - VkBuffer image_buffer; - VkDeviceMemory mem; - void *map; - void *rt_map; - void *vertex_map; - void *image_map; - VkDynamicVpState vp_state; - VkDynamicRsState rs_state; - VkDynamicDsState ds_state; - VkDynamicCbState cb_state; - VkColorAttachmentView rt_view; - VkDepthStencilView ds_view; - uint32_t rt_size; - VkFramebuffer framebuffer; - VkRenderPass pass; -}; - -static void -test_prepare(struct test_context *ctx, VkDevice device, VkQueue queue, uint32_t flags) -{ - ctx->device = device; - ctx->queue = queue; - - vkCreateCommandBuffer(ctx->device, - &(VkCmdBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, - .queueNodeIndex = 0, - .flags = 0 - }, - &ctx->cmdBuffer); - - vkCreateBuffer(ctx->device, - &(VkBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = 4096, - .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .flags = 0 - }, - &ctx->vertex_buffer); - - VkMemoryRequirements vb_requirements; - size_t size = sizeof(vb_requirements); - vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->vertex_buffer, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &vb_requirements); - - vkCreateImage(ctx->device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .extent = { .width = ctx->width, .height = ctx->height, .depth = 1 }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }, - &ctx->rt); - - VkMemoryRequirements rt_requirements; - size = sizeof(rt_requirements); - vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_IMAGE, ctx->rt, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &rt_requirements); - ctx->rt_size = rt_requirements.size; - - VkDepthStencilBindInfo *ds_attachment; - VkDepthStencilBindInfo ds_bind_info; - VkMemoryRequirements ds_requirements; - - if (flags & TEST_DEPTH_FLAG) { - vkCreateImage(ctx->device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_D24_UNORM, - .extent = { .width = ctx->width, .height = ctx->height, .depth = 1 }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_BIT, - .flags = 0, - }, - &ctx->ds); - - size = sizeof(ds_requirements); - vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_IMAGE, ctx->ds, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &ds_requirements); - } else { - ds_requirements.size = 0; - } - - vkCreateBuffer(ctx->device, - &(VkBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = ctx->width * ctx->height * 4, - .usage = VK_BUFFER_USAGE_TRANSFER_DESTINATION_BIT, - .flags = 0 - }, - &ctx->image_buffer); - - VkMemoryRequirements ib_requirements; - size = sizeof(ib_requirements); - vkGetObjectInfo(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->image_buffer, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &ib_requirements); - - size_t mem_size = - align_u32(vb_requirements.size, 4096) + - align_u32(rt_requirements.size, 4096) + - align_u32(ds_requirements.size, 4096) + - align_u32(ib_requirements.size, 4096); - - printf("mem size %ld\n", mem_size); - - vkAllocMemory(ctx->device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = mem_size, - .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, - .memPriority = VK_MEMORY_PRIORITY_NORMAL - }, - &ctx->mem); - - vkMapMemory(ctx->device, ctx->mem, 0, mem_size, 0, &ctx->map); - memset(ctx->map, 0, mem_size); - - uint32_t offset = 0; - printf("vb: %ldb at %d\n", vb_requirements.size, offset); - vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_BUFFER, - ctx->vertex_buffer, 0, ctx->mem, offset); - ctx->vertex_map = ctx->map + offset; - offset = align_u32(offset + vb_requirements.size, 4096); - - printf("rt: %ldb at %d\n", rt_requirements.size, offset); - vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_IMAGE, - ctx->rt, 0, ctx->mem, offset); - ctx->rt_map = ctx->map + offset; - offset = align_u32(offset + rt_requirements.size, 4096); - - if (flags & TEST_DEPTH_FLAG) { - printf("ds: %ldb at %d\n", ds_requirements.size, offset); - vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_IMAGE, - ctx->ds, 0, ctx->mem, offset); - offset = align_u32(offset + ds_requirements.size, 4096); - } - - printf("ib: %ldb at %d\n", ib_requirements.size, offset); - vkQueueBindObjectMemory(ctx->queue, VK_OBJECT_TYPE_BUFFER, - ctx->image_buffer, 0, ctx->mem, offset); - ctx->image_map = ctx->map + offset; - offset = align_u32(offset + ib_requirements.size, 4096); - - vkCreateDynamicViewportState(ctx->device, - &(VkDynamicVpStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, - .viewportAndScissorCount = 1, - .pViewports = (VkViewport[]) { - { - .originX = 0, - .originY = 0, - .width = ctx->width, - .height = ctx->height, - .minDepth = -1, - .maxDepth = 1 - }, - }, - .pScissors = (VkRect[]) { - { { 0, 0 }, { ctx->width, ctx->height } }, - } - }, - &ctx->vp_state); - - vkCreateDynamicRasterState(ctx->device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &ctx->rs_state); - - vkCreateDynamicDepthStencilState(ctx->device, - &(VkDynamicDsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO, - }, - &ctx->ds_state); - - vkCreateDynamicColorBlendState(ctx->device, - &(VkDynamicCbStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO - }, - &ctx->cb_state); - - vkCreateColorAttachmentView(ctx->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = ctx->rt, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - .msaaResolveImage = 0, - .msaaResolveSubResource = { 0, } - }, - &ctx->rt_view); - - if (flags & TEST_DEPTH_FLAG) { - vkCreateDepthStencilView(ctx->device, - &(VkDepthStencilViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DEPTH_STENCIL_VIEW_CREATE_INFO, - .image = ctx->ds, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - .msaaResolveImage = 0, - .msaaResolveSubResource = { 0, } - }, - &ctx->ds_view); - ds_bind_info.view = ctx->ds_view; - ds_bind_info.layout = 0; - ds_attachment = &ds_bind_info; - } else { - ds_attachment = NULL; - } - - vkCreateFramebuffer(ctx->device, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .colorAttachmentCount = 1, - .pColorAttachments = (VkColorAttachmentBindInfo[]) { - { - .view = ctx->rt_view, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - } - }, - .pDepthStencilAttachment = ds_attachment, - .sampleCount = 1, - .width = ctx->width, - .height = ctx->height, - .layers = 1 - }, - &ctx->framebuffer); - - vkCreateRenderPass(ctx->device, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .renderArea = { { 0, 0 }, { ctx->width, ctx->height } }, - .colorAttachmentCount = 1, - .extent = { }, - .sampleCount = 1, - .layers = 1, - .pColorFormats = (VkFormat[]) { VK_FORMAT_R8G8B8A8_UNORM }, - .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }, - .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_CLEAR }, - .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = (VkClearColor[]) { - { .color = { .floatColor = { 0.2, 0.2, 0.2, 1.0 } }, .useRawValue = false } - }, - .depthStencilFormat = VK_FORMAT_D24_UNORM, - .depthStencilLayout = 0, - .depthLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, - .depthLoadClearValue = 0.5, - .depthStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - }, - &ctx->pass); - - vkBeginCommandBuffer(ctx->cmdBuffer, - &(VkCmdBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, - .flags = 0 - }); - - vkCmdBeginRenderPass(ctx->cmdBuffer, - &(VkRenderPassBegin) { - .renderPass = ctx->pass, - .framebuffer = ctx->framebuffer - }); -} - -static void -test_finish(struct test_context *ctx) -{ - vkCmdEndRenderPass(ctx->cmdBuffer, ctx->pass); - - VkBufferImageCopy copy = { - .bufferOffset = 0, - .imageSubresource = { - .aspect = VK_IMAGE_ASPECT_COLOR, - .mipLevel = 0, - .arraySlice = 0, - }, - .imageOffset = { .x = 0, .y = 0, .z = 0 }, - .imageExtent = { .width = ctx->width, .height = ctx->height, .depth = 1 }, - }; - - vkCmdCopyImageToBuffer(ctx->cmdBuffer, ctx->rt, VK_IMAGE_LAYOUT_GENERAL, - ctx->image_buffer, 1, ©); - - - vkEndCommandBuffer(ctx->cmdBuffer); - - vkQueueSubmit(ctx->queue, 1, &ctx->cmdBuffer, 0); - - vkQueueWaitIdle(ctx->queue); - - write_png("vk-map.png", ctx->width, ctx->height, 1024, ctx->rt_map); - write_png("vk-copy.png", ctx->width, ctx->height, 1024, ctx->image_map); - - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_COMMAND_BUFFER, ctx->cmdBuffer); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_PIPELINE, ctx->pipeline); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_IMAGE, ctx->rt); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->vertex_buffer); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_BUFFER, ctx->image_buffer); - vkUnmapMemory(ctx->device, ctx->mem); - vkFreeMemory(ctx->device, ctx->mem); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_VP_STATE, ctx->vp_state); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_RS_STATE, ctx->rs_state); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_DYNAMIC_CB_STATE, ctx->cb_state); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW, ctx->rt_view); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_FRAMEBUFFER, ctx->framebuffer); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_RENDER_PASS, ctx->pass); -} - -static void -test_create_solid_color_pipeline(struct test_context *ctx) -{ - VkPipelineIaStateCreateInfo ia_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .disableVertexReuse = false, - .primitiveRestartEnable = false, - .primitiveRestartIndex = 0 - }; - - VkShader vs = GLSL_VK_SHADER(ctx->device, VERTEX, - layout(location = 0) in vec4 a_position; - layout(location = 1) in vec4 a_color; - out vec4 v_color; - void main() - { - gl_Position = a_position; - v_color = a_color; - } - ); - - VkShader fs = GLSL_VK_SHADER(ctx->device, FRAGMENT, - out vec4 f_color; - in vec4 v_color; - void main() - { - f_color = v_color; - } - ); - - VkPipelineShaderStageCreateInfo vs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &ia_create_info, - .shader = { - .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; - - VkPipelineShaderStageCreateInfo fs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &vs_create_info, - .shader = { - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; - - VkPipelineVertexInputCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, - .pNext = &fs_create_info, - .bindingCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .strideInBytes = 16, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - { - .binding = 1, - .strideInBytes = 16, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE - } - }, - .attributeCount = 2, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 0 - }, - { - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 0 - } - } - }; - - VkPipelineRsStateCreateInfo rs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, - .pNext = &vi_create_info, - - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }; - - VkPipelineDsStateCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO, - .pNext = &rs_create_info, - .format = VK_FORMAT_D24_UNORM, - .depthTestEnable = true, - .depthWriteEnable = true, - .depthCompareOp = VK_COMPARE_OP_GREATER - }; - - VkPipelineCbStateCreateInfo cb_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, - .pNext = &ds_create_info, - .attachmentCount = 1, - .pAttachments = (VkPipelineCbAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }; - - vkCreateGraphicsPipeline(ctx->device, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &cb_create_info, - .flags = 0, - .layout = VK_NULL_HANDLE - }, - &ctx->pipeline); - - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_SHADER, fs); - vkDestroyObject(ctx->device, VK_OBJECT_TYPE_SHADER, vs); -} - -static void -test_depth_stencil(VkDevice device, VkQueue queue) -{ - struct test_context ctx; - - ctx.width = 256; - ctx.height = 256; - - test_prepare(&ctx, device, queue, TEST_DEPTH_FLAG); - test_create_solid_color_pipeline(&ctx); - - static const float vertex_data[] = { - /* Triangle coordinates */ - -0.5, -0.5, 0.5, 1.0, - 0.5, -0.5, 0.5, 1.0, - 0.0, 0.5, 0.5, 1.0, - - /* Triangle coordinates */ - -0.3, -0.3, 0.0, 1.0, - 0.7, -0.3, 0.0, 1.0, - 0.2, 0.7, 0.8, 1.0, - - /* Color */ - 1.0, 1.0, 0.2, 1.0, - 0.2, 0.2, 1.0, 1.0, - }; - memcpy(ctx.vertex_map, vertex_data, sizeof(vertex_data)); - - vkCmdBindVertexBuffers(ctx.cmdBuffer, 0, 2, - (VkBuffer[]) { ctx.vertex_buffer, ctx.vertex_buffer }, - (VkDeviceSize[]) { 0, 6 * 4 * sizeof(float) }); - - vkCmdBindPipeline(ctx.cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx.pipeline); - - vkCmdBindDynamicStateObject(ctx.cmdBuffer, - VK_STATE_BIND_POINT_VIEWPORT, ctx.vp_state); - vkCmdBindDynamicStateObject(ctx.cmdBuffer, - VK_STATE_BIND_POINT_RASTER, ctx.rs_state); - vkCmdBindDynamicStateObject(ctx.cmdBuffer, - VK_STATE_BIND_POINT_DEPTH_STENCIL, ctx.ds_state); - vkCmdBindDynamicStateObject(ctx.cmdBuffer, - VK_STATE_BIND_POINT_COLOR_BLEND, ctx.cb_state); - - vkCmdDraw(ctx.cmdBuffer, 0, 3, 0, 1); - vkCmdDraw(ctx.cmdBuffer, 3, 3, 1, 1); - - test_finish(&ctx); -} - -static void -test_triangle(VkDevice device, VkQueue queue) -{ - uint32_t count; - - VkCmdBuffer cmdBuffer; - vkCreateCommandBuffer(device, - &(VkCmdBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, - .queueNodeIndex = 0, - .flags = 0 - }, - &cmdBuffer); - - - VkDescriptorSetLayout set_layout[2]; - vkCreateDescriptorSetLayout(device, - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .count = 3, - .pBinding = (VkDescriptorSetLayoutBinding[]) { - { - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .count = 2, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = NULL - }, - { - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .count = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - { - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .count = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - } - } - }, - &set_layout[0]); - - vkCreateDescriptorSetLayout(device, - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .count = 1, - .pBinding = (VkDescriptorSetLayoutBinding[]) { - { - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .count = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = NULL - } - } - }, - &set_layout[1]); - - VkPipelineLayout pipeline_layout; - vkCreatePipelineLayout(device, - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .descriptorSetCount = 2, - .pSetLayouts = set_layout, - }, - &pipeline_layout); - - VkPipeline pipeline; - create_pipeline(device, &pipeline, pipeline_layout); - - VkDescriptorSet set[2]; - vkAllocDescriptorSets(device, 0 /* pool */, - VK_DESCRIPTOR_SET_USAGE_STATIC, - 2, set_layout, set, &count); - - VkBuffer buffer; - vkCreateBuffer(device, - &(VkBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = 1024, - .usage = VK_BUFFER_USAGE_GENERAL, - .flags = 0 - }, - &buffer); - - VkMemoryRequirements buffer_requirements; - size_t size = sizeof(buffer_requirements); - vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &buffer_requirements); - - int32_t width = 256, height = 256; - - VkImage rt; - vkCreateImage(device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .extent = { .width = width, .height = height, .depth = 1 }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }, - &rt); - - VkMemoryRequirements rt_requirements; - size = sizeof(rt_requirements); - vkGetObjectInfo(device, VK_OBJECT_TYPE_IMAGE, rt, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &rt_requirements); - - VkBuffer vertex_buffer; - vkCreateBuffer(device, - &(VkBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = 1024, - .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .flags = 0 - }, - &vertex_buffer); - - VkMemoryRequirements vb_requirements; - size = sizeof(vb_requirements); - vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, vertex_buffer, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &vb_requirements); - - VkBuffer image_buffer; - vkCreateBuffer(device, - &(VkBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = width * height * 4, - .usage = VK_BUFFER_USAGE_TRANSFER_DESTINATION_BIT, - .flags = 0 - }, - &image_buffer); - - VkMemoryRequirements ib_requirements; - size = sizeof(ib_requirements); - vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, image_buffer, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, - &size, &ib_requirements); - - printf("buffer size: %lu, buffer alignment: %lu\n", - buffer_requirements.size, buffer_requirements.alignment); - printf("rt size: %lu, rt alignment: %lu\n", - rt_requirements.size, rt_requirements.alignment); - printf("vb size: %lu vb alignment: %lu\n", - vb_requirements.size, vb_requirements.alignment); - printf("ib size: %lu ib alignment: %lu\n", - ib_requirements.size, ib_requirements.alignment); - - size_t mem_size = rt_requirements.size + ib_requirements.size + - 2048 + 16 * 16 * 4; - VkDeviceMemory mem; - vkAllocMemory(device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = mem_size, - .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, - .memPriority = VK_MEMORY_PRIORITY_NORMAL - }, - &mem); - - void *map; - vkMapMemory(device, mem, 0, mem_size, 0, &map); - memset(map, 192, mem_size); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, - buffer, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 128); - - float color[12] = { - 0.0, 0.2, 0.0, 0.0, - 0.0, 0.0, 0.5, 0.0, - 0.0, 0.0, 0.5, 0.5 - }; - memcpy(map + 128 + 16, color, sizeof(color)); - VkBufferView buffer_view[3]; - vkCreateBufferView(device, - &(VkBufferViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .buffer = buffer, - .viewType = VK_BUFFER_VIEW_TYPE_RAW, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offset = 16, - .range = 64 - }, - &buffer_view[0]); - - vkCreateBufferView(device, - &(VkBufferViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .buffer = buffer, - .viewType = VK_BUFFER_VIEW_TYPE_RAW, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offset = 32, - .range = 64 - }, - &buffer_view[1]); - - vkCreateBufferView(device, - &(VkBufferViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .buffer = buffer, - .viewType = VK_BUFFER_VIEW_TYPE_RAW, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offset = 48, - .range = 64 - }, - &buffer_view[2]); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, - vertex_buffer, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 1024); - static const float vertex_data[] = { - /* Triangle coordinates */ - -0.5, -0.5, 0.0, 1.0, - 0.5, -0.5, 0.0, 1.0, - 0.0, 0.5, 0.0, 1.0, - /* Color */ - 1.0, 0.0, 0.0, 0.2, - }; - memcpy(map + 1024, vertex_data, sizeof(vertex_data)); - - VkDynamicVpState vp_state; - vkCreateDynamicViewportState(device, - &(VkDynamicVpStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, - .viewportAndScissorCount = 2, - .pViewports = (VkViewport[]) { - { - .originX = 0, - .originY = 0, - .width = width, - .height = height, - .minDepth = 0, - .maxDepth = 1 - }, - { - .originX = -10, - .originY = -10, - .width = 20, - .height = 20, - .minDepth = -1, - .maxDepth = 1 - }, - }, - .pScissors = (VkRect[]) { - { { 0, 0 }, { width, height } }, - { { 10, 10 }, { 236, 236 } } - } - }, - &vp_state); - - VkDynamicRsState rs_state; - vkCreateDynamicRasterState(device, - &(VkDynamicRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, - }, - &rs_state); - - VkDynamicCbState cb_state; - vkCreateDynamicColorBlendState(device, - &(VkDynamicCbStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO - }, - &cb_state); - - /* FIXME: Need to query memory info before binding to memory */ - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, - rt, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 2048); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, - image_buffer, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 2048 + rt_requirements.size); - - const uint32_t texture_width = 16, texture_height = 16; - VkImage texture; - vkCreateImage(device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .extent = { .width = texture_width, .height = texture_height, .depth = 1 }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }, - &texture); - - vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, - texture, - 0, /* allocation index; for objects which need to bind to multiple mems */ - mem, 2048 + 256 * 256 * 4); - - VkImageView image_view; - vkCreateImageView(device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = texture, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = VK_IMAGE_ASPECT_COLOR, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArraySlice = 0, - .arraySize = 1 - }, - .minLod = 0 - }, - &image_view); - - VkSampler sampler; - vkCreateSampler(device, - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_TEX_FILTER_LINEAR, - .minFilter = VK_TEX_FILTER_LINEAR, - .mipMode = VK_TEX_MIPMAP_MODE_NEAREST, - .addressU = VK_TEX_ADDRESS_CLAMP, - .addressV = VK_TEX_ADDRESS_CLAMP, - .addressW = VK_TEX_ADDRESS_CLAMP, - .mipLodBias = 0, - .maxAnisotropy = 0, - .compareOp = VK_COMPARE_OP_GREATER, - .minLod = 0, - .maxLod = 0, - .borderColor = VK_BORDER_COLOR_TRANSPARENT_BLACK - }, - &sampler); - - vkUpdateDescriptors(device, set[0], 3, - (const void * []) { - &(VkUpdateBuffers) { - .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .arrayIndex = 0, - .binding = 0, - .count = 2, - .pBufferViews = (VkBufferViewAttachInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, - .view = buffer_view[0] - }, - { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, - .view = buffer_view[1] - } - } - }, - &(VkUpdateImages) { - .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .binding = 2, - .count = 1, - .pImageViews = (VkImageViewAttachInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, - .view = image_view, - .layout = VK_IMAGE_LAYOUT_GENERAL, - } - } - }, - &(const VkUpdateSamplers) { - .sType = VK_STRUCTURE_TYPE_UPDATE_SAMPLERS, - .binding = 3, - .count = 1, - .pSamplers = (const VkSampler[]) { sampler } - } - }); - - vkUpdateDescriptors(device, set[1], 1, - (const void * []) { - &(VkUpdateBuffers) { - .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .arrayIndex = 0, - .count = 1, - .pBufferViews = (VkBufferViewAttachInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, - .view = buffer_view[2] - } - } - } - }); - - VkColorAttachmentView view; - vkCreateColorAttachmentView(device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = rt, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - .msaaResolveImage = 0, - .msaaResolveSubResource = { 0, } - }, - &view); - - VkFramebuffer framebuffer; - vkCreateFramebuffer(device, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .colorAttachmentCount = 1, - .pColorAttachments = (VkColorAttachmentBindInfo[]) { - { - .view = view, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - } - }, - .pDepthStencilAttachment = NULL, - .sampleCount = 1, - .width = width, - .height = height, - .layers = 1 - }, - &framebuffer); - - VkRenderPass pass; - vkCreateRenderPass(device, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .renderArea = { { 0, 0 }, { width, height } }, - .colorAttachmentCount = 1, - .extent = { }, - .sampleCount = 1, - .layers = 1, - .pColorFormats = (VkFormat[]) { VK_FORMAT_R8G8B8A8_UNORM }, - .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }, - .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_CLEAR }, - .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = (VkClearColor[]) { - { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } - }, - .depthStencilFormat = VK_FORMAT_UNDEFINED, - }, - &pass); - - VkQueryPool query_pool; - vkCreateQueryPool(device, - &(VkQueryPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - .queryType = VK_QUERY_TYPE_OCCLUSION, - .slots = 4, - .pipelineStatistics = 0 - }, - &query_pool); - - vkBeginCommandBuffer(cmdBuffer, - &(VkCmdBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, - .flags = 0 - }); - - vkCmdBeginRenderPass(cmdBuffer, - &(VkRenderPassBegin) { - .renderPass = pass, - .framebuffer = framebuffer - }); - - vkCmdBindVertexBuffers(cmdBuffer, 0, 2, - (VkBuffer[]) { vertex_buffer, vertex_buffer }, - (VkDeviceSize[]) { 0, 3 * 4 * sizeof(float) }); - - vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - - vkCmdBindDescriptorSets(cmdBuffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, - &set[0], 0, NULL); - vkCmdBindDescriptorSets(cmdBuffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, 1, 1, - &set[1], 0, NULL); - - vkCmdBindDynamicStateObject(cmdBuffer, - VK_STATE_BIND_POINT_VIEWPORT, vp_state); - vkCmdBindDynamicStateObject(cmdBuffer, - VK_STATE_BIND_POINT_RASTER, rs_state); - vkCmdBindDynamicStateObject(cmdBuffer, - VK_STATE_BIND_POINT_COLOR_BLEND, cb_state); - - vkCmdBeginQuery(cmdBuffer, query_pool, 0 /*slot*/, 0 /* flags */); - - vkCmdDraw(cmdBuffer, 0, 3, 0, 1); - - vkCmdEndQuery(cmdBuffer, query_pool, 0); - - vkCmdEndRenderPass(cmdBuffer, pass); - - vkCmdCopyQueryPoolResults(cmdBuffer, query_pool, 0, 1, buffer, 16, 8, - VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); - - VkBufferImageCopy copy = { - .bufferOffset = 0, - .imageSubresource = { - .aspect = VK_IMAGE_ASPECT_COLOR, - .mipLevel = 0, - .arraySlice = 0, - }, - .imageOffset = { .x = 0, .y = 0, .z = 0 }, - .imageExtent = { .width = width, .height = height, .depth = 1 }, - }; - - vkCmdCopyImageToBuffer(cmdBuffer, rt, VK_IMAGE_LAYOUT_GENERAL, - image_buffer, 1, ©); - - vkEndCommandBuffer(cmdBuffer); - - vkQueueSubmit(queue, 1, &cmdBuffer, 0); - - vkQueueWaitIdle(queue); - - /* Result gets written to buffer at offset 0. The buffer is bound to the - * memory object at offset 128 */ - uint64_t *results = map + 128; - - uint64_t get_result; - size = sizeof(get_result); - vkGetQueryPoolResults(device, query_pool, 0, 1, &size, &get_result, - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - - printf("oc query (copy): %20ld (%016lx)\n", results[2], results[2]); - printf("oc query (get): %20ld (%016lx)\n", get_result, get_result); - - write_png("vk-map.png", width, height, 1024, map + 2048); - write_png("vk-copy.png", width, height, 1024, - map + 2048 + rt_requirements.size); - - vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, texture); - vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, rt); - vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer); - vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); - vkDestroyObject(device, VK_OBJECT_TYPE_PIPELINE, pipeline); - vkDestroyObject(device, VK_OBJECT_TYPE_QUERY_POOL, query_pool); -} - -int main(int argc, char *argv[]) -{ - VkInstance instance; - vkCreateInstance(&(VkInstanceCreateInfo) { - .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, - .pAllocCb = &(VkAllocCallbacks) { - .pUserData = NULL, - .pfnAlloc = test_alloc, - .pfnFree = test_free - }, - .pAppInfo = &(VkApplicationInfo) { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pAppName = "vk", - .apiVersion = 1 - } - }, - &instance); - - uint32_t count = 1; - VkPhysicalDevice physicalDevices[1]; - vkEnumeratePhysicalDevices(instance, &count, physicalDevices); - printf("%d physical devices\n", count); - - VkPhysicalDeviceProperties properties; - size_t size = sizeof(properties); - vkGetPhysicalDeviceInfo(physicalDevices[0], - VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES, - &size, &properties); - printf("vendor id %04x, device name %s\n", - properties.vendorId, properties.deviceName); - - VkDevice device; - vkCreateDevice(physicalDevices[0], - &(VkDeviceCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .queueRecordCount = 1, - .pRequestedQueues = &(VkDeviceQueueCreateInfo) { - .queueNodeIndex = 0, - .queueCount = 1 - } - }, - &device); - - VkQueue queue; - vkGetDeviceQueue(device, 0, 0, &queue); - - if (argc > 1 && strcmp(argv[1], "timestamp") == 0) { - test_timestamp(device, queue); - } else if (argc > 1 && strcmp(argv[1], "formats") == 0) { - test_formats(device, queue); - } else if (argc > 1 && strcmp(argv[1], "buffer-copy") == 0) { - test_buffer_copy(device, queue); - } else if (argc > 1 && strcmp(argv[1], "depth-stencil") == 0) { - test_depth_stencil(device, queue); - } else { - test_triangle(device, queue); - } - - vkDestroyDevice(device); - - vkDestroyInstance(instance); - - return 0; -} -- cgit v1.2.3 From a37d122e883e183fba770c73f4611f00e0a8410f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 2 Jun 2015 23:08:05 -0700 Subject: vk: Set color/blend state in meta clear if not set yet --- src/vulkan/meta.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index ee9593ae995..285c0202dee 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -281,6 +281,11 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, VK_STATE_BIND_POINT_DEPTH_STENCIL, device->meta_state.shared.ds_state); + if (cmd_buffer->cb_state == NULL) + anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + VK_STATE_BIND_POINT_COLOR_BLEND, + device->meta_state.shared.cb_state); + anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, pass->num_clear_layers); /* Restore API state */ -- cgit v1.2.3 From 47bd462b0caa46a2dd1fb88e57f6a968aff6cfc4 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 3 Jun 2015 14:10:28 -0400 Subject: awesome control flow bugfixes/clarifications --- src/glsl/nir/spirv_to_nir.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1fc1b8bc5dc..979df2019e0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1355,20 +1355,11 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, { struct vtn_block *block = start; while (block != end_block) { - if (block->block != NULL) { - /* We've already visited this block once before so this is a - * back-edge. Back-edges are only allowed to point to a loop - * merge. - */ - assert(block == cont_block); - return; - } - if (block->merge_op == SpvOpLoopMerge) { /* This is the jump into a loop. */ - cont_block = block; - break_block = vtn_value(b, block->merge_block_id, - vtn_value_type_block)->block; + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; nir_loop *loop = nir_loop_create(b->shader); nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); @@ -1379,7 +1370,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, block->merge_op = SpvOpNop; nir_builder_insert_after_cf_list(&b->nb, &loop->body); - vtn_walk_blocks(b, block, break_block, cont_block, NULL); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); nir_builder_insert_after_cf_list(&b->nb, old_list); block = break_block; @@ -1411,8 +1402,16 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ return; } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ block = branch_block; continue; } @@ -1454,7 +1453,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, &jump->instr); block = then_block; } else { - /* Conventional if statement */ + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ assert(block->merge_op == SpvOpSelectionMerge); struct vtn_block *merge_block = vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; -- cgit v1.2.3 From 251aea80b029cdd18b9c263073a4149b9331ad2a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 3 Jun 2015 16:59:13 -0700 Subject: vk/DS: Mask stencil masks to 8 bits --- src/vulkan/device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 091d9280792..654dc872bf6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2178,11 +2178,11 @@ VkResult anv_CreateDynamicDepthStencilState( /* Is this what we need to do? */ .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, - .StencilTestMask = pCreateInfo->stencilReadMask, - .StencilWriteMask = pCreateInfo->stencilWriteMask, + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - .BackfaceStencilTestMask = pCreateInfo->stencilReadMask, - .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask, + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, }; GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, -- cgit v1.2.3 From 9cd42b3deabe6d5fecf0c946e22188d355b56159 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 4 Jun 2015 09:01:30 -0700 Subject: vk: Fix build Commit 1286bd, which deleted vk.c, broke the build. Update the Makefile to fix it. --- src/vulkan/Makefile.am | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index ce3f6b06391..f8517053f4f 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -74,8 +74,7 @@ libvulkan_la_SOURCES = \ BUILT_SOURCES = \ entrypoints.h \ entrypoints.c \ - meta-spirv.h \ - vk-spirv.h + meta-spirv.h entrypoints.h : vk_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ -- cgit v1.2.3 From 7f90e56e42eb1ca3f2c43a8339657dd964c5e743 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 4 Jun 2015 09:06:59 -0700 Subject: vk/device: Dissalow device destruction --- src/vulkan/device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 654dc872bf6..0f511917205 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -414,6 +414,9 @@ VkResult anv_DestroyDevice( { struct anv_device *device = (struct anv_device *) _device; + /* FIXME: We should make device destruction actually safe. */ + return VK_UNSUPPORTED; + anv_compiler_destroy(device->compiler); -- cgit v1.2.3 From 8d930da35df494f78e658cc8969a40a91286d6d6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 4 Jun 2015 09:12:37 -0700 Subject: vk/allocator: Remove an unneeded VG() wrapper --- src/vulkan/allocator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 950b23c857a..027108695fd 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -626,7 +626,7 @@ anv_state_stream_alloc(struct anv_state_stream *stream, if (vg_ptr == NULL) { vg_ptr = state.map; VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); - VG(VALGRIND_MALLOCLIKE_BLOCK(vg_ptr, size, 0, false)); + VALGRIND_MALLOCLIKE_BLOCK(vg_ptr, size, 0, false); } else { ptrdiff_t vg_offset = vg_ptr - current_map; assert(vg_offset >= stream->current_block && -- cgit v1.2.3 From b981379bcf3cb88f1119e465adc195d2783af7cc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 4 Jun 2015 14:26:03 -0700 Subject: vk: Make `make clean` remove generated spirv headers --- src/vulkan/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index f8517053f4f..6d10f84adf4 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -85,7 +85,7 @@ entrypoints.c : vk_gen.py $(vulkan_include_HEADERS) %-spirv.h: %.c glsl_scraper.py $(AM_V_GEN) $(PYTHON2) $(srcdir)/glsl_scraper.py --glsl-only -o $@ $< -CLEANFILES = entrypoints.h entrypoints.c +CLEANFILES = $(BUILT_SOURCES) libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la -- cgit v1.2.3 From 87d98e19357c84a816bb737357e22b02c0e7d40c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 4 Jun 2015 14:31:53 -0700 Subject: vk: Fix 2 incorrect typecasts The compiler didn't find the cast errors because all Vulkan types are just integers. --- src/vulkan/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0f511917205..6a93766158f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1331,7 +1331,7 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; - *pFence = (VkQueryPool) fence; + *pFence = (VkFence) fence; return VK_SUCCESS; @@ -1561,7 +1561,7 @@ VkResult anv_CreateBufferView( fill_buffer_surface_state(view->surface_state.map, pCreateInfo->format, view->offset, pCreateInfo->range); - *pView = (VkImageView) view; + *pView = (VkBufferView) view; return VK_SUCCESS; } -- cgit v1.2.3 From aa523d3c627a3e86aaef48671f1925f666ba33fd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 5 Jun 2015 16:41:49 -0700 Subject: vk/gem: Call VALGRIND_FREELIKE_BLOCK before unmapping --- src/vulkan/gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index 8598deedc71..ab6f5c4dfbd 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -123,8 +123,8 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, void anv_gem_munmap(void *p, uint64_t size) { - munmap(p, size); VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); } int -- cgit v1.2.3 From ed2ca020f84213a5f0f7faf1115a0b6f545b3e62 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 5 Jun 2015 17:11:49 -0700 Subject: vk/allocator: Avoid double-free in the bo pool --- src/vulkan/allocator.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 027108695fd..7fe7423b531 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -663,6 +663,13 @@ anv_bo_pool_finish(struct anv_bo_pool *pool) struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); while (link != NULL) { struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + /* The anv_gem_m[un]map() functions are also valgrind-safe so they + * act as an alloc/free. In order to avoid a double-free warning, we + * need to mark thiss as malloc'd before we unmap it. + */ + VG(VALGRIND_MALLOCLIKE_BLOCK(link_copy.bo.map, pool->bo_size, 0, false)); + anv_gem_munmap(link_copy.bo.map, pool->bo_size); anv_gem_close(pool->device, link_copy.bo.gem_handle); link = link_copy.next; @@ -700,10 +707,10 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) return vk_error(VK_ERROR_MEMORY_MAP_FAILED); } - /* We don't need to call VALGRIND_MALLOCLIKE_BLOCK here because valgrind - * already picks up on the gem_mmap and treats that as a malloc. If we - * really want to be pedantic we could do a VALGRIND_FREELIKE_BLOCK - * right after the mmap, but there's no good reason. + /* We don't need to call VALGRIND_MALLOCLIKE_BLOCK here because gem_mmap + * calls it for us. If we really want to be pedantic we could do a + * VALGRIND_FREELIKE_BLOCK right after the mmap, but there's no good + * reason. */ *bo = new_bo; -- cgit v1.2.3 From c2eeab305b0dd07969a0d1ba66451f5a3d07dcfe Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 5 Jun 2015 17:14:57 -0700 Subject: vk/pipeline: Actually free the program stream and dynamic pool --- src/vulkan/pipeline.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 6babd183f1f..2e965245789 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -379,6 +379,8 @@ anv_pipeline_destroy(struct anv_device *device, anv_compiler_free(pipeline); anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); + anv_state_stream_finish(&pipeline->program_stream); + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_device_free(pipeline->device, pipeline); } -- cgit v1.2.3 From e69588b764ecac9cdb13cd1f5c0041d41b2d8652 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 5 Jun 2015 17:26:01 -0700 Subject: vk/device: Use a 64-byte alignment for CC state --- src/vulkan/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 6a93766158f..39047aa3505 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3124,16 +3124,16 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->ds_state == NULL) state = anv_cmd_buffer_emit_dynamic(cmd_buffer, cmd_buffer->cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 32); + GEN8_COLOR_CALC_STATE_length, 64); else if (cmd_buffer->cb_state == NULL) state = anv_cmd_buffer_emit_dynamic(cmd_buffer, cmd_buffer->ds_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 32); + GEN8_COLOR_CALC_STATE_length, 64); else state = anv_cmd_buffer_merge_dynamic(cmd_buffer, cmd_buffer->ds_state->state_color_calc, cmd_buffer->cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 32); + GEN8_COLOR_CALC_STATE_length, 64); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CC_STATE_POINTERS, -- cgit v1.2.3 From ce00233c137f272263f28e531cb70eb68250f5ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 5 Jun 2015 17:14:41 -0700 Subject: vk/cmd_buffer: Use the dynamic state stream in emit_dynamic and merge_dynamic --- src/vulkan/device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 39047aa3505..7262481f8b5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3042,10 +3042,10 @@ static struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t dwords, uint32_t alignment) { - struct anv_device *device = cmd_buffer->device; struct anv_state state; - state = anv_state_pool_alloc(&device->dynamic_state_pool, dwords * 4, alignment); + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + dwords * 4, alignment); memcpy(state.map, a, dwords * 4); return state; @@ -3053,13 +3053,14 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, static struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, uint32_t dwords, uint32_t alignment) + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) { - struct anv_device *device = cmd_buffer->device; struct anv_state state; uint32_t *p; - state = anv_state_pool_alloc(&device->dynamic_state_pool, dwords * 4, alignment); + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + dwords * 4, alignment); p = state.map; for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; -- cgit v1.2.3 From 9eab70e54f40a13f55b29974ca7f765c1164f1be Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 3 Jun 2015 23:03:29 -0700 Subject: vk: Create a minimal context for the compiler This avoids the full brw context initialization and just sets up context constants, initializes extensions and sets a few driver vfuncs for the front-end GLSL compiler. --- src/mesa/drivers/dri/i965/brw_context.c | 5 ++- src/mesa/drivers/dri/i965/brw_context.h | 3 ++ src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/drivers/dri/i965/intel_extensions.c | 18 +++++--- src/vulkan/compiler.cpp | 63 +++++++++++++++++++++------- src/vulkan/device.c | 6 +-- src/vulkan/private.h | 2 +- 7 files changed, 70 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index e01a7dbabee..23838056690 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -310,7 +310,7 @@ brw_init_driver_functions(struct brw_context *brw, functions->GetSamplePosition = gen6_get_sample_position; } -static void +void brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -389,7 +389,8 @@ brw_initialize_context_constants(struct brw_context *brw) int max_samples; const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); const int clamp_max_samples = - driQueryOptioni(&brw->optionCache, "clamp_max_samples"); + brw->optionCache.info != NULL ? + driQueryOptioni(&brw->optionCache, "clamp_max_samples") : -1; if (clamp_max_samples < 0) { max_samples = msaa_modes[0]; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5a35e48a481..cb4cc7fb36b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1988,6 +1988,9 @@ void intel_screen_destroy(struct intel_screen *screen); struct brw_context *intel_context_create(struct intel_screen *screen); void intel_context_destroy(struct brw_context *brw); +void +brw_initialize_context_constants(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index e5c0d3c7604..b056fbfc427 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -275,7 +275,7 @@ brw_get_scratch_bo(struct brw_context *brw, void brwInitFragProgFuncs( struct dd_function_table *functions ) { - assert(functions->ProgramStringNotify == _tnl_program_string); + /* assert(functions->ProgramStringNotify == _tnl_program_string); */ functions->NewProgram = brwNewProgram; functions->DeleteProgram = brwDeleteProgram; diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index c3eee31d017..d6da34c7065 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -275,9 +275,11 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130; ctx->Extensions.EXT_timer_query = true; - if (brw->gen == 5 || can_write_oacontrol(brw)) { - ctx->Extensions.AMD_performance_monitor = true; - ctx->Extensions.INTEL_performance_query = true; + if (brw->bufmgr) { + if (brw->gen == 5 || can_write_oacontrol(brw)) { + ctx->Extensions.AMD_performance_monitor = true; + ctx->Extensions.INTEL_performance_query = true; + } } } @@ -285,6 +287,7 @@ intelInitExtensions(struct gl_context *ctx) uint64_t dummy; ctx->Extensions.ARB_blend_func_extended = + brw->optionCache.info == NULL || !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended"); ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_draw_buffers_blend = true; @@ -308,7 +311,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.OES_depth_texture_cube_map = true; /* Test if the kernel has the ioctl. */ - if (drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0) + if (brw->bufmgr && drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0) ctx->Extensions.ARB_timer_query = true; /* Only enable this in core profile because other parts of Mesa behave @@ -328,7 +331,8 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; - if (can_do_pipelined_register_writes(brw)) { + if (brw->bufmgr && + can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_draw_indirect = true; ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; @@ -353,7 +357,9 @@ intelInitExtensions(struct gl_context *ctx) if (ctx->API != API_OPENGL_CORE) ctx->Extensions.ARB_color_buffer_float = true; - if (ctx->Mesa_DXTn || driQueryOptionb(&brw->optionCache, "force_s3tc_enable")) + if (ctx->Mesa_DXTn || + (brw->optionCache.info != NULL && + driQueryOptionb(&brw->optionCache, "force_s3tc_enable"))) ctx->Extensions.EXT_texture_compression_s3tc = true; ctx->Extensions.ANGLE_texture_compression_dxt = true; diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index ead4117479c..a140217c4f4 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -620,43 +621,73 @@ fail_on_compile_error(int status, const char *msg) struct anv_compiler { struct intel_screen *screen; struct brw_context *brw; + struct gl_pipeline_object pipeline; }; - extern "C" { struct anv_compiler * -anv_compiler_create(int fd) +anv_compiler_create(struct anv_device *device) { + const struct brw_device_info *devinfo = &device->info; struct anv_compiler *compiler; + struct gl_context *ctx; - compiler = (struct anv_compiler *) malloc(sizeof *compiler); + compiler = rzalloc(NULL, struct anv_compiler); if (compiler == NULL) return NULL; - compiler->screen = intel_screen_create(fd); - if (compiler->screen == NULL) { - free(compiler); - return NULL; - } + compiler->screen = rzalloc(compiler, struct intel_screen); + if (compiler->screen == NULL) + goto fail; - compiler->brw = intel_context_create(compiler->screen); - if (compiler->brw == NULL) { - free(compiler); - return NULL; - } + compiler->brw = rzalloc(compiler, struct brw_context); + if (compiler->brw == NULL) + goto fail; + + compiler->brw->optionCache.info = NULL; + compiler->brw->bufmgr = NULL; + compiler->brw->gen = devinfo->gen; + compiler->brw->is_g4x = devinfo->is_g4x; + compiler->brw->is_baytrail = devinfo->is_baytrail; + compiler->brw->is_haswell = devinfo->is_haswell; + compiler->brw->is_cherryview = devinfo->is_cherryview; + compiler->brw->intelScreen = compiler->screen; + compiler->screen->devinfo = &device->info; + + brw_process_intel_debug_variable(compiler->brw); + + if (device->info.gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) + compiler->brw->scalar_vs = true; + + ctx = &compiler->brw->ctx; + _mesa_init_shader_object_functions(&ctx->Driver); + + _mesa_init_constants(&ctx->Const, API_OPENGL_CORE); + + brw_initialize_context_constants(compiler->brw); + + intelInitExtensions(ctx); + + /* Set dd::NewShader */ + brwInitFragProgFuncs(&ctx->Driver); + + compiler->screen->compiler = brw_compiler_create(compiler, &device->info); + ctx->_Shader = &compiler->pipeline; compiler->brw->precompile = false; return compiler; + + fail: + ralloc_free(compiler); + return NULL; } void anv_compiler_destroy(struct anv_compiler *compiler) { - intel_context_destroy(compiler->brw); - intel_screen_destroy(compiler->screen); - free(compiler); + ralloc_free(compiler); } /* From gen7_urb.c */ diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7262481f8b5..3aa1c39aaec 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -386,11 +386,11 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); - device->compiler = anv_compiler_create(device->fd); - device->aub_writer = NULL; - device->info = *physicalDevice->info; + device->compiler = anv_compiler_create(device); + device->aub_writer = NULL; + pthread_mutex_init(&device->mutex, NULL); anv_device_init_meta(device); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 1a6c3e0ca2f..65b81b3e918 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -710,7 +710,7 @@ anv_pipeline_create(VkDevice device, const struct anv_pipeline_create_info *extra, VkPipeline *pPipeline); -struct anv_compiler *anv_compiler_create(int fd); +struct anv_compiler *anv_compiler_create(struct anv_device *device); void anv_compiler_destroy(struct anv_compiler *compiler); int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); void anv_compiler_free(struct anv_pipeline *pipeline); -- cgit v1.2.3 From 52637c0996a27ede5ec8fcd84d41e4875fb1b368 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Jun 2015 11:51:30 -0700 Subject: vk: Quiet a few warnings --- src/vulkan/device.c | 2 +- src/vulkan/image.c | 2 +- src/vulkan/pipeline.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3aa1c39aaec..32de1b4a230 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1348,7 +1348,7 @@ VkResult anv_ResetFences( { struct anv_fence **fences = (struct anv_fence **) pFences; - for (uint32_t i; i < fenceCount; i++) + for (uint32_t i = 0; i < fenceCount; i++) fences[i]->ready = false; return VK_SUCCESS; diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 14e9e3d5e61..a7d1d49114d 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -172,7 +172,7 @@ anv_image_view_init(struct anv_surface_view *view, format = info->format; break; default: - assert(0); + unreachable(""); break; } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 2e965245789..1a193d91e09 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -399,7 +399,7 @@ anv_pipeline_create( VkPipelineRsStateCreateInfo *rs_info = NULL; VkPipelineDsStateCreateInfo *ds_info = NULL; VkPipelineCbStateCreateInfo *cb_info = NULL; - VkPipelineVertexInputCreateInfo *vi_info; + VkPipelineVertexInputCreateInfo *vi_info = NULL; VkResult result; uint32_t offset, length; -- cgit v1.2.3 From 920fb771d499df1b951e9c302ebb22eb413d9fa8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Jun 2015 13:50:52 -0700 Subject: vk/allocator: Make the use of NULL_BLOCK in state_stream_finish explicit --- src/vulkan/allocator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 7fe7423b531..1f16cb5b6a7 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -583,7 +583,7 @@ anv_state_stream_finish(struct anv_state_stream *stream) uint32_t block, next_block; block = stream->current_block; - while (block != 1) { + while (block != NULL_BLOCK) { sb = stream->block_pool->map + block; next_block = VG_NOACCESS_READ(&sb->next); VG(VALGRIND_FREELIKE_BLOCK(VG_NOACCESS_READ(&sb->_vg_ptr), 0)); -- cgit v1.2.3 From 66a4dab89ad0c5788a4bfa23b1c91f7ed977db06 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Jun 2015 13:52:34 -0700 Subject: vk/pipeline: Don't destroy the program stream It's freed in compiler.cpp and we don't want to free it twice. --- src/vulkan/pipeline.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 1a193d91e09..f5f43d5a76e 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -379,7 +379,6 @@ anv_pipeline_destroy(struct anv_device *device, anv_compiler_free(pipeline); anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); - anv_state_stream_finish(&pipeline->program_stream); anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_device_free(pipeline->device, pipeline); } -- cgit v1.2.3 From 531549d9fc7b34c4b36e0ce3d2be4e35e1040ae6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Jun 2015 14:27:02 -0700 Subject: vk/pipeline: Move freeing the program stream to pipeline.c It's created in pipeline.c so we should free it there. --- src/vulkan/compiler.cpp | 2 -- src/vulkan/pipeline.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index a140217c4f4..28271edaabe 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -1006,8 +1006,6 @@ anv_compiler_free(struct anv_pipeline *pipeline) for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) if (pipeline->prog_data[stage]) free(pipeline->prog_data[stage]->map_entries); - - anv_state_stream_finish(&pipeline->program_stream); } } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index f5f43d5a76e..1a193d91e09 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -379,6 +379,7 @@ anv_pipeline_destroy(struct anv_device *device, anv_compiler_free(pipeline); anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); + anv_state_stream_finish(&pipeline->program_stream); anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_device_free(pipeline->device, pipeline); } -- cgit v1.2.3 From b6363c3f1232c06d24239d2919ba56f497e24224 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Jun 2015 17:45:57 -0700 Subject: vk/device: Remove the binding table pools/streams --- src/vulkan/device.c | 12 ------------ src/vulkan/private.h | 2 -- 2 files changed, 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 32de1b4a230..46c6b10c401 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -374,15 +374,6 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->instruction_block_pool, device, 2048); anv_block_pool_init(&device->surface_state_block_pool, device, 2048); - - /* Binding table pointers are only 16 bits so we have to make sure that - * they get allocated at the beginning of the surface state BO. To - * handle this, we create a separate block pool that works out of the - * first 64 KB of the surface state BO. - */ - anv_block_pool_init_slave(&device->binding_table_block_pool, - &device->surface_state_block_pool, 32); - anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); @@ -2234,7 +2225,6 @@ anv_cmd_buffer_destroy(struct anv_device *device, anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); anv_device_free(device, cmd_buffer); @@ -2334,8 +2324,6 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->exec2_bos = NULL; cmd_buffer->exec2_array_length = 0; - anv_state_stream_init(&cmd_buffer->binding_table_state_stream, - &device->binding_table_block_pool); anv_state_stream_init(&cmd_buffer->surface_state_stream, &device->surface_state_block_pool); anv_state_stream_init(&cmd_buffer->dynamic_state_stream, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 65b81b3e918..dffec5d248a 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -331,7 +331,6 @@ struct anv_device { struct anv_block_pool instruction_block_pool; struct anv_block_pool surface_state_block_pool; - struct anv_block_pool binding_table_block_pool; struct anv_state_pool surface_state_pool; struct anv_meta_state meta_state; @@ -619,7 +618,6 @@ struct anv_cmd_buffer { struct anv_batch_bo * surface_batch_bo; uint32_t surface_next; struct anv_reloc_list surface_relocs; - struct anv_state_stream binding_table_state_stream; struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; -- cgit v1.2.3 From 58afc24e57bc96d159de84f1727e7239c7576edd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Jun 2015 17:46:32 -0700 Subject: vk/allocator: Remove the concept of a slave block pool This reverts commit d24f8245db3418d8d146f373e085780d2217335c. --- src/vulkan/allocator.c | 39 --------------------------------------- src/vulkan/private.h | 3 --- 2 files changed, 42 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 1f16cb5b6a7..80079ad82e4 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -267,39 +267,6 @@ anv_block_pool_init(struct anv_block_pool *pool, anv_block_pool_grow(pool); } -/** Initializes a block pool that is a slave of another - * - * The newly initialized pool is not a block pool on its own but it rather - * takes a fixed number of blocks from the master pool and hands them out. - * In some sense, it's nothing more than a glorified free list. However, - * since it is a block pool, it can be used to back a pool or stream. - */ -void -anv_block_pool_init_slave(struct anv_block_pool *pool, - struct anv_block_pool *master_pool, - uint32_t num_blocks) -{ - pool->device = NULL; - - /* We don't have backing storage */ - pool->bo.gem_handle = 0; - pool->bo.offset = 0; - pool->size = 0; - pool->next_block = 0; - - pool->block_size = master_pool->block_size; - pool->free_list = ANV_FREE_LIST_EMPTY; - anv_vector_init(&pool->mmap_cleanups, - round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); - - /* Pull N blocks off the master pool and put them on this pool */ - for (uint32_t i = 0; i < num_blocks; i++) { - uint32_t block = anv_block_pool_alloc(master_pool); - pool->map = master_pool->map; - anv_block_pool_free(pool, block); - } -} - /* The memfd path lets us create a map for an fd and lets us grow and remap * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS * path we can take for valgrind debugging. */ @@ -333,12 +300,6 @@ anv_block_pool_grow(struct anv_block_pool *pool) int gem_handle; struct anv_mmap_cleanup *cleanup; - /* If we don't have a device then we can't resize the pool. This can be - * the case if the pool is a slave pool. - */ - if (pool->device == NULL) - return -1; - if (pool->size == 0) { size = 32 * pool->block_size; } else { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index dffec5d248a..82dd5738316 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -234,9 +234,6 @@ struct anv_state_stream { void anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size); -void anv_block_pool_init_slave(struct anv_block_pool *pool, - struct anv_block_pool *master_pool, - uint32_t num_blocks); void anv_block_pool_finish(struct anv_block_pool *pool); uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); -- cgit v1.2.3 From e6162c2feff6dee48cd5c1e99e2fe70b5ce07f9f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 12:11:46 -0700 Subject: vk/image: Add anv_image::h_align,v_align Use the new fields to compute RENDER_SURFACE_STATE.Surface*Alignment. We still hardcode them to 4, though. --- src/vulkan/image.c | 24 ++++++++++++++++++++---- src/vulkan/private.h | 15 +++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index a7d1d49114d..529f7ad9774 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -31,6 +31,18 @@ // Image functions +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + static const struct anv_tile_mode_info { int32_t tile_width; int32_t tile_height; @@ -86,6 +98,10 @@ VkResult anv_image_create( if (extra) image->tile_mode = extra->tile_mode; + /* FINISHME: Stop hardcoding miptree image alignment */ + image->h_align = 4; + image->v_align = 4; + if (image->tile_mode == LINEAR) { /* Linear depth buffers must be 64 byte aligned, which is the strictest * requirement for all kinds of linear surfaces. @@ -192,8 +208,8 @@ anv_image_view_init(struct anv_surface_view *view, .SurfaceType = SURFTYPE_2D, .SurfaceArray = false, .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, + .SurfaceVerticalAlignment = anv_valign[image->v_align], + .SurfaceHorizontalAlignment = anv_halign[image->h_align], .TileMode = tile_mode, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, @@ -283,8 +299,8 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .SurfaceType = SURFTYPE_2D, .SurfaceArray = false, .SurfaceFormat = format->format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, + .SurfaceVerticalAlignment = anv_valign[image->v_align], + .SurfaceHorizontalAlignment = anv_halign[image->h_align], .TileMode = image->tile_mode, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 82dd5738316..794449c27c6 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -738,6 +738,21 @@ struct anv_image { VkDeviceSize offset; struct anv_swap_chain * swap_chain; + + /** + * \name Alignment of miptree images, in units of pixels. + * + * These fields contain the actual alignment values, not the values the + * hardware expects. For example, if h_align is 4, then program the hardware + * with HALIGN_4. + * + * \see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment + * \see RENDER_SURFACE_STATE.SurfaceVerticalAlignment + * \{ + */ + uint8_t h_align; + uint8_t v_align; + /** \} */ }; struct anv_surface_view { -- cgit v1.2.3 From 9d6f55dedf92697d551b699e3fe52389b5fb9c27 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 11:08:03 -0700 Subject: vk/surface_view: Add a destructor --- src/vulkan/device.c | 8 +++++--- src/vulkan/image.c | 19 +++++++++++++++++++ src/vulkan/private.h | 5 +++++ 3 files changed, 29 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 46c6b10c401..acb0822c3df 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1080,10 +1080,7 @@ VkResult anv_DestroyObject( return VK_SUCCESS; case VK_OBJECT_TYPE_BUFFER: - case VK_OBJECT_TYPE_BUFFER_VIEW: case VK_OBJECT_TYPE_IMAGE: - case VK_OBJECT_TYPE_IMAGE_VIEW: - case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW: case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: case VK_OBJECT_TYPE_SHADER: case VK_OBJECT_TYPE_PIPELINE_LAYOUT: @@ -1104,6 +1101,9 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_FENCE: case VK_OBJECT_TYPE_QUERY_POOL: case VK_OBJECT_TYPE_FRAMEBUFFER: + case VK_OBJECT_TYPE_BUFFER_VIEW: + case VK_OBJECT_TYPE_IMAGE_VIEW: + case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW: (object->destructor)(device, object, objType); return VK_SUCCESS; @@ -1542,6 +1542,8 @@ VkResult anv_CreateBufferView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + view->base.destructor = anv_surface_view_destroy; + view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; view->surface_state = diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 529f7ad9774..83f4a1bf0cb 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -162,6 +162,21 @@ VkResult anv_GetImageSubresourceInfo( stub_return(VK_UNSUPPORTED); } +void +anv_surface_view_destroy(struct anv_device *device, + struct anv_object *obj, VkObjectType obj_type) +{ + struct anv_surface_view *view = (struct anv_surface_view *)obj; + + assert(obj_type == VK_OBJECT_TYPE_BUFFER_VIEW || + obj_type == VK_OBJECT_TYPE_IMAGE_VIEW || + obj_type == VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW); + + anv_state_pool_free(&device->surface_state_pool, view->surface_state); + + anv_device_free(device, view); +} + void anv_image_view_init(struct anv_surface_view *view, struct anv_device *device, @@ -268,6 +283,8 @@ VkResult anv_CreateImageView( anv_image_view_init(view, device, pCreateInfo, NULL); + view->base.destructor = anv_surface_view_destroy; + *pView = (VkImageView) view; return VK_SUCCESS; @@ -352,6 +369,8 @@ VkResult anv_CreateColorAttachmentView( anv_color_attachment_view_init(view, device, pCreateInfo, NULL); + view->base.destructor = anv_surface_view_destroy; + *pView = (VkColorAttachmentView) view; return VK_SUCCESS; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 794449c27c6..05016d21792 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -756,6 +756,8 @@ struct anv_image { }; struct anv_surface_view { + struct anv_object base; + struct anv_state surface_state; struct anv_bo * bo; uint32_t offset; @@ -783,6 +785,9 @@ void anv_color_attachment_view_init(struct anv_surface_view *view, const VkColorAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void anv_surface_view_destroy(struct anv_device *device, + struct anv_object *obj, VkObjectType obj_type); + struct anv_sampler { uint32_t state[4]; }; -- cgit v1.2.3 From 3a38b0db5fc7ab732fc6d830a2787b0b3c1bb294 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 11:08:51 -0700 Subject: vk/meta: Clean up temporary objects --- src/vulkan/device.c | 1 + src/vulkan/meta.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------- src/vulkan/private.h | 2 ++ 3 files changed, 50 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index acb0822c3df..d6bd7899919 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -410,6 +410,7 @@ VkResult anv_DestroyDevice( anv_compiler_destroy(device->compiler); + anv_device_finish_meta(device); anv_bo_pool_finish(&device->batch_bo_pool); anv_block_pool_finish(&device->dynamic_state_block_pool); diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 285c0202dee..2167d29da6a 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -411,15 +411,13 @@ anv_device_init_meta_blit_state(struct anv_device *device) anv_CreateDescriptorSetLayout((VkDevice) device, &ds_layout_info, &device->meta_state.blit.ds_layout); - VkPipelineLayoutCreateInfo pipeline_layout_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, - }; - - VkPipelineLayout pipeline_layout; - anv_CreatePipelineLayout((VkDevice) device, &pipeline_layout_info, - &pipeline_layout); + anv_CreatePipelineLayout((VkDevice) device, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.blit.pipeline_layout); VkPipelineRsStateCreateInfo rs_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, @@ -445,7 +443,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = &cb_create_info, .flags = 0, - .layout = pipeline_layout, + .layout = device->meta_state.blit.pipeline_layout, }; anv_pipeline_create((VkDevice) device, &pipeline_info, @@ -647,6 +645,14 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DESCRIPTOR_SET, set); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_FRAMEBUFFER, + (VkFramebuffer) fb); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_RENDER_PASS, pass); } static void @@ -756,6 +762,9 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, &dest_view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); + + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) src_image); + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) dest_image); } void anv_CmdCopyBuffer( @@ -1049,6 +1058,8 @@ void anv_CmdCopyBufferToImage( &dest_view, pRegions[r].imageOffset, pRegions[r].imageExtent); + + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) src_image); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1139,6 +1150,8 @@ void anv_CmdCopyImageToBuffer( &dest_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); + + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) dest_image); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1233,3 +1246,27 @@ anv_device_init_meta(struct anv_device *device) }, &device->meta_state.shared.ds_state); } + +void +anv_device_finish_meta(struct anv_device *device) +{ + /* Clear */ + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_PIPELINE, + device->meta_state.clear.pipeline); + + /* Blit */ + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_PIPELINE, + device->meta_state.blit.pipeline); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_PIPELINE_LAYOUT, + device->meta_state.blit.pipeline_layout); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, + device->meta_state.blit.ds_layout); + + /* Shared */ + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DYNAMIC_RS_STATE, + device->meta_state.shared.rs_state); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DYNAMIC_CB_STATE, + device->meta_state.shared.cb_state); + anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DYNAMIC_DS_STATE, + device->meta_state.shared.ds_state); +} diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 05016d21792..873efaa48f5 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -302,6 +302,7 @@ struct anv_meta_state { struct { VkPipeline pipeline; + VkPipelineLayout pipeline_layout; VkDescriptorSetLayout ds_layout; } blit; @@ -832,6 +833,7 @@ struct anv_render_pass { }; void anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); void anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, -- cgit v1.2.3 From 999b56c507fa9542725556aa9409d029fdc19c94 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 11:40:22 -0700 Subject: vk/device: Destroy all batch buffers Due to a copy+paste error, we were destroying all but the first batch or surface state buffer. Now we destroy them all. --- src/vulkan/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d6bd7899919..843c1ed61f0 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2210,7 +2210,7 @@ anv_cmd_buffer_destroy(struct anv_device *device, /* Destroy all of the batch buffers */ struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; - while (bbo->prev_batch_bo) { + while (bbo) { struct anv_batch_bo *prev = bbo->prev_batch_bo; anv_batch_bo_destroy(bbo, device); bbo = prev; @@ -2219,7 +2219,7 @@ anv_cmd_buffer_destroy(struct anv_device *device, /* Destroy all of the surface state buffers */ bbo = cmd_buffer->surface_batch_bo; - while (bbo->prev_batch_bo) { + while (bbo) { struct anv_batch_bo *prev = bbo->prev_batch_bo; anv_batch_bo_destroy(bbo, device); bbo = prev; -- cgit v1.2.3 From 38f5eef59de0be41c4c5bd77fb221c75981e1e5c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 11:41:31 -0700 Subject: vk/device: Free border color states when we have valgrind --- src/vulkan/device.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 843c1ed61f0..37449d7ae4f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -412,6 +412,16 @@ VkResult anv_DestroyDevice( anv_device_finish_meta(device); +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, + device->float_border_colors); + anv_state_pool_free(&device->dynamic_state_pool, + device->uint32_border_colors); +#endif + anv_bo_pool_finish(&device->batch_bo_pool); anv_block_pool_finish(&device->dynamic_state_block_pool); anv_block_pool_finish(&device->instruction_block_pool); -- cgit v1.2.3 From 66b00d5e5a64dfa63f34514b01f6d01168f9a6a7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 12:28:58 -0700 Subject: vk/queue: Embed the queue in and allocate it with the device --- src/vulkan/device.c | 48 +++++++++++++++++++++++++++++++++--------------- src/vulkan/private.h | 42 ++++++++++++++++++++++-------------------- 2 files changed, 55 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 37449d7ae4f..9924baa84cb 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -303,6 +303,33 @@ parse_debug_flags(struct anv_device *device) } } +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->device = device; + queue->pool = &device->surface_state_pool; + + queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); + if (queue->completed_serial.map == NULL) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + *(uint32_t *)queue->completed_serial.map = 0; + queue->next_serial = 1; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +#ifdef HAVE_VALGRIND + /* This gets torn down with the device so we only need to do this if + * valgrind is present. + */ + anv_state_pool_free(queue->pool, queue->completed_serial); +#endif +} + static void anv_device_init_border_colors(struct anv_device *device) { @@ -384,6 +411,8 @@ VkResult anv_CreateDevice( pthread_mutex_init(&device->mutex, NULL); + anv_queue_init(device, &device->queue); + anv_device_init_meta(device); anv_device_init_border_colors(device); @@ -410,6 +439,8 @@ VkResult anv_DestroyDevice( anv_compiler_destroy(device->compiler); + anv_queue_finish(&device->queue); + anv_device_finish_meta(device); #ifdef HAVE_VALGRIND @@ -516,23 +547,10 @@ VkResult anv_GetDeviceQueue( VkQueue* pQueue) { struct anv_device *device = (struct anv_device *) _device; - struct anv_queue *queue; - - /* FIXME: Should allocate these at device create time. */ - - queue = anv_device_alloc(device, sizeof(*queue), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (queue == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - queue->device = device; - queue->pool = &device->surface_state_pool; - queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); - *(uint32_t *)queue->completed_serial.map = 0; - queue->next_serial = 1; + assert(queueIndex == 0); - *pQueue = (VkQueue) queue; + *pQueue = (VkQueue) &device->queue; return VK_SUCCESS; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 873efaa48f5..645e96d75a9 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -313,6 +313,26 @@ struct anv_meta_state { } shared; }; +struct anv_queue { + struct anv_device * device; + + struct anv_state_pool * pool; + + /** + * Serial number of the most recently completed batch executed on the + * engine. + */ + struct anv_state completed_serial; + + /** + * The next batch submitted to the engine will be assigned this serial + * number. + */ + uint32_t next_serial; + + uint32_t last_collected_serial; +}; + struct anv_device { struct anv_instance * instance; uint32_t chipset_id; @@ -336,31 +356,13 @@ struct anv_device { struct anv_state float_border_colors; struct anv_state uint32_border_colors; + struct anv_queue queue; + struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; pthread_mutex_t mutex; }; -struct anv_queue { - struct anv_device * device; - - struct anv_state_pool * pool; - - /** - * Serial number of the most recently completed batch executed on the - * engine. - */ - struct anv_state completed_serial; - - /** - * The next batch submitted to the engine will be assigned this serial - * number. - */ - uint32_t next_serial; - - uint32_t last_collected_serial; -}; - void * anv_device_alloc(struct anv_device * device, size_t size, -- cgit v1.2.3 From 9f292219bfd7dbbbfe5a94096143f0942e72fd7e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 12:29:37 -0700 Subject: vk/compiler: Free more of prog_data when tearing down a pipeline --- src/vulkan/compiler.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 28271edaabe..de4d8839f70 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -1003,9 +1003,13 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) void anv_compiler_free(struct anv_pipeline *pipeline) { - for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) - if (pipeline->prog_data[stage]) + for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) { + if (pipeline->prog_data[stage]) { free(pipeline->prog_data[stage]->map_entries); + ralloc_free(pipeline->prog_data[stage]->param); + ralloc_free(pipeline->prog_data[stage]->pull_param); + } + } } } -- cgit v1.2.3 From d842a6965fe6990c7c4f52abf59008b2cb214a17 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 12:35:21 -0700 Subject: vk/compiler: Free the GL errors data --- src/vulkan/compiler.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index de4d8839f70..6adf92b3662 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -687,6 +687,7 @@ anv_compiler_create(struct anv_device *device) void anv_compiler_destroy(struct anv_compiler *compiler) { + _mesa_free_errors_data(&compiler->brw->ctx); ralloc_free(compiler); } -- cgit v1.2.3 From 5b777e2bcf8efd813a312e4c3208d0ddb7440abb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 13:13:32 -0700 Subject: vk/image: Delete an old comment --- src/vulkan/image.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 83f4a1bf0cb..077b32650d6 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -29,8 +29,6 @@ #include "private.h" -// Image functions - static const uint8_t anv_halign[] = { [4] = HALIGN4, [8] = HALIGN8, -- cgit v1.2.3 From e6bd568f36ca04c34fdf566a40ad939039d207a6 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 13:29:08 -0700 Subject: vk/image: Rewrite tile info table - Reduce the number of table lookups in anv_image_create from 4 to 1. - Add field for surface alignment. - Shorten field names tile_width, tile_height -> width, height. --- src/vulkan/image.c | 55 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 077b32650d6..89fd1ebf270 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -41,14 +41,27 @@ static const uint8_t anv_valign[] = { [16] = VALIGN16, }; -static const struct anv_tile_mode_info { - int32_t tile_width; - int32_t tile_height; -} tile_mode_info[] = { - [LINEAR] = { 1, 1 }, - [XMAJOR] = { 512, 8 }, - [YMAJOR] = { 128, 32 }, - [WMAJOR] = { 128, 32 } +static const struct anv_tile_info { + uint32_t width; + uint32_t height; + + /** + * Alignment for RENDER_SURFACE_STATE.SurfaceBaseAddress. + * + * To simplify calculations, the alignments defined in the table are + * sometimes larger than required. For example, Skylake requires that X and + * Y tiled buffers be aligned to 4K, but Broadwell permits smaller + * alignment. We choose 4K to accomodate both chipsets. The alignment of + * a linear buffer depends on its element type and usage. Linear depth + * buffers have the largest alignment, 64B, so we choose that for all linear + * buffers. + */ + uint32_t surface_alignment; +} anv_tile_info_table[] = { + [LINEAR] = { 1, 1, 64 }, + [XMAJOR] = { 512, 8, 4096 }, + [YMAJOR] = { 128, 32, 4096 }, + [WMAJOR] = { 128, 32, 4096 }, }; VkResult anv_image_create( @@ -96,27 +109,22 @@ VkResult anv_image_create( if (extra) image->tile_mode = extra->tile_mode; + const struct anv_tile_info *tile_info = + &anv_tile_info_table[image->tile_mode]; + + image->alignment = tile_info->surface_alignment; + /* FINISHME: Stop hardcoding miptree image alignment */ image->h_align = 4; image->v_align = 4; - if (image->tile_mode == LINEAR) { - /* Linear depth buffers must be 64 byte aligned, which is the strictest - * requirement for all kinds of linear surfaces. - */ - image->alignment = 64; - } else { - image->alignment = 4096; - } - info = anv_format_for_vk_format(pCreateInfo->format); assert(info->cpp > 0 || info->has_stencil); if (info->cpp > 0) { image->stride = ALIGN_I32(image->extent.width * info->cpp, - tile_mode_info[image->tile_mode].tile_width); - aligned_height = ALIGN_I32(image->extent.height, - tile_mode_info[image->tile_mode].tile_height); + tile_info->width); + aligned_height = ALIGN_I32(image->extent.height, tile_info->height); image->size = image->stride * aligned_height; } else { image->size = 0; @@ -124,11 +132,10 @@ VkResult anv_image_create( } if (info->has_stencil) { + const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; image->stencil_offset = ALIGN_U32(image->size, 4096); - image->stencil_stride = ALIGN_I32(image->extent.width, - tile_mode_info[WMAJOR].tile_width); - aligned_height = ALIGN_I32(image->extent.height, - tile_mode_info[WMAJOR].tile_height); + image->stencil_stride = ALIGN_I32(image->extent.width, w_info->width); + aligned_height = ALIGN_I32(image->extent.height, w_info->height); stencil_size = image->stencil_stride * aligned_height; image->size = image->stencil_offset + stencil_size; } else { -- cgit v1.2.3 From 081f617b5afebba8348c1718f70dfeb92e0f954c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:03:44 -0700 Subject: vk/image: Stop hardcoding alignment of stencil surfaces Look up the alignment from anv_tile_info_table. --- src/vulkan/image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 89fd1ebf270..6e123e38530 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -133,7 +133,7 @@ VkResult anv_image_create( if (info->has_stencil) { const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; - image->stencil_offset = ALIGN_U32(image->size, 4096); + image->stencil_offset = ALIGN_U32(image->size, w_info->surface_alignment); image->stencil_stride = ALIGN_I32(image->extent.width, w_info->width); aligned_height = ALIGN_I32(image->extent.height, w_info->height); stencil_size = image->stencil_stride * aligned_height; -- cgit v1.2.3 From 822cb16abe9b1446b218f58d7aa679da02efca7f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:45:58 -0700 Subject: vk: Define anv_printflike() macro --- src/vulkan/private.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 645e96d75a9..8cffdf1f91b 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -46,6 +46,8 @@ extern "C" { #endif +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + static inline uint32_t ALIGN_U32(uint32_t v, uint32_t a) { -- cgit v1.2.3 From d57c4cf999381e4f6fbd8b5c85ead6bb5938290d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:46:18 -0700 Subject: vk/util: Annotate anv_finishme() as printflike --- src/vulkan/private.h | 3 ++- src/vulkan/util.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 8cffdf1f91b..822cd890073 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -88,7 +88,8 @@ vk_error(VkResult error) return error; } -void __anv_finishme(const char *file, int line, const char *format, ...); +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); /** * Print a FINISHME message, including its source location. diff --git a/src/vulkan/util.c b/src/vulkan/util.c index 082499d6093..928f191847f 100644 --- a/src/vulkan/util.c +++ b/src/vulkan/util.c @@ -30,7 +30,7 @@ #include "private.h" -void +void anv_printflike(3, 4) __anv_finishme(const char *file, int line, const char *format, ...) { va_list ap; -- cgit v1.2.3 From 0599d39dd908959746e4a89e4e1aeafa160844bd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 15:53:10 -0700 Subject: vk/device: Dedent the vkCreateDynamicViewportState call --- src/vulkan/device.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9924baa84cb..423f165e400 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3423,25 +3423,25 @@ VkResult anv_CreateFramebuffer( framebuffer->layers = pCreateInfo->layers; vkCreateDynamicViewportState((VkDevice) device, - &(VkDynamicVpStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, - .viewportAndScissorCount = 1, - .pViewports = (VkViewport[]) { - { - .originX = 0, - .originY = 0, - .width = pCreateInfo->width, - .height = pCreateInfo->height, - .minDepth = 0, - .maxDepth = 1 - }, - }, - .pScissors = (VkRect[]) { - { { 0, 0 }, - { pCreateInfo->width, pCreateInfo->height } }, - } - }, - &framebuffer->vp_state); + &(VkDynamicVpStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, + .viewportAndScissorCount = 1, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = pCreateInfo->width, + .height = pCreateInfo->height, + .minDepth = 0, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect[]) { + { { 0, 0 }, + { pCreateInfo->width, pCreateInfo->height } }, + } + }, + &framebuffer->vp_state); *pFramebuffer = (VkFramebuffer) framebuffer; -- cgit v1.2.3 From 919e7b75514cae0b36ce5559ea3fe92ecbee815d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 16:01:56 -0700 Subject: vk/device: Use anv_CreateDynamicViewportState instead of the vk one --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 423f165e400..9eba8709a89 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3422,7 +3422,7 @@ VkResult anv_CreateFramebuffer( framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; - vkCreateDynamicViewportState((VkDevice) device, + anv_CreateDynamicViewportState((VkDevice) device, &(VkDynamicVpStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, .viewportAndScissorCount = 1, -- cgit v1.2.3 From 634a6150b9302bd53c484759750854ce0fc0efb4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 16:26:55 -0700 Subject: vk/pipeline: Zero out the depth-stencil state when not in use --- src/vulkan/pipeline.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 1a193d91e09..6f0b5b4aafd 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -338,6 +338,15 @@ static const uint32_t vk_to_gen_stencil_op[] = { static void emit_ds_state(struct anv_pipeline *pipeline, VkPipelineDsStateCreateInfo *info) { + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->state_wm_depth_stencil, 0, + sizeof(pipeline->state_wm_depth_stencil)); + return; + } + /* bool32_t depthBoundsEnable; // optional (depth_bounds_test) */ struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { @@ -479,11 +488,7 @@ anv_pipeline_create( emit_ia_state(pipeline, ia_info, extra); assert(rs_info); emit_rs_state(pipeline, rs_info, extra); - /* ds_info is optional if we're not using depth or stencil buffers, ps is - * optional for depth-only rendering. */ - if (ds_info) - emit_ds_state(pipeline, ds_info); - + emit_ds_state(pipeline, ds_info); emit_cb_state(pipeline, cb_info); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, -- cgit v1.2.3 From 5d4b6a01af4ceda37dbde0159247a45cfbff404e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 16:27:55 -0700 Subject: vk/cmd_buffer: Properly initialize/reset dynamic states --- src/vulkan/device.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9eba8709a89..731cd8f32a4 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2322,6 +2322,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; cmd_buffer->cb_state = NULL; + cmd_buffer->ds_state = NULL; memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); @@ -2658,6 +2659,11 @@ VkResult anv_ResetCommandBuffer( cmd_buffer->surface_next = 1; cmd_buffer->surface_relocs.num_relocs = 0; + cmd_buffer->rs_state = NULL; + cmd_buffer->vp_state = NULL; + cmd_buffer->cb_state = NULL; + cmd_buffer->ds_state = NULL; + return VK_SUCCESS; } -- cgit v1.2.3 From 11e941900ad222bebc0142f58f78c70fe335caa9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 16:28:46 -0700 Subject: vk/device: Actually allow destruction --- src/vulkan/device.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 731cd8f32a4..4dc527e9f0f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -434,9 +434,6 @@ VkResult anv_DestroyDevice( { struct anv_device *device = (struct anv_device *) _device; - /* FIXME: We should make device destruction actually safe. */ - return VK_UNSUPPORTED; - anv_compiler_destroy(device->compiler); anv_queue_finish(&device->queue); -- cgit v1.2.3 From f1db3b386990513041fd8ca89950d8aa3f9a0133 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:33:05 -0700 Subject: vk/image: Factor tile mode selection into separate function Because it will eventually need to get smarter. --- src/vulkan/image.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 6e123e38530..2de5be93e1b 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -64,6 +64,24 @@ static const struct anv_tile_info { [WMAJOR] = { 128, 32, 4096 }, }; +static uint32_t +anv_image_choose_tile_mode(const VkImageCreateInfo *vk_info, + const struct anv_image_create_info *anv_info) +{ + if (anv_info) + return anv_info->tile_mode; + + switch (vk_info->tiling) { + case VK_IMAGE_TILING_LINEAR: + return LINEAR; + case VK_IMAGE_TILING_OPTIMAL: + return YMAJOR; + default: + assert(!"bad VKImageTiling"); + return LINEAR; + } +} + VkResult anv_image_create( VkDevice _device, const VkImageCreateInfo* pCreateInfo, @@ -89,26 +107,12 @@ VkResult anv_image_create( image->format = pCreateInfo->format; image->extent = pCreateInfo->extent; image->swap_chain = NULL; + image->tile_mode = anv_image_choose_tile_mode(pCreateInfo, extra); assert(image->extent.width > 0); assert(image->extent.height > 0); assert(image->extent.depth > 0); - switch (pCreateInfo->tiling) { - case VK_IMAGE_TILING_LINEAR: - image->tile_mode = LINEAR; - break; - case VK_IMAGE_TILING_OPTIMAL: - image->tile_mode = YMAJOR; - break; - default: - assert(!"bad VKImageTiling"); - break; - } - - if (extra) - image->tile_mode = extra->tile_mode; - const struct anv_tile_info *tile_info = &anv_tile_info_table[image->tile_mode]; -- cgit v1.2.3 From ffb1ee5d2072ef853d44f5a706a8b78ffd815d23 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:54:40 -0700 Subject: vk: Define anv_noreturn macro --- src/vulkan/private.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 822cd890073..26296c931fd 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -46,6 +46,7 @@ extern "C" { #endif +#define anv_noreturn __attribute__((__noreturn__)) #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) static inline uint32_t -- cgit v1.2.3 From 2d2e1489527401afb915496f525fcdd636e28a56 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:54:53 -0700 Subject: vk/util: Add anv_abortf(), anv_abortfv() Convenience functions to print an error message then abort. --- src/vulkan/private.h | 3 +++ src/vulkan/util.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 26296c931fd..36279fac2cb 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -98,6 +98,9 @@ void __anv_finishme(const char *file, int line, const char *format, ...) #define anv_finishme(format, ...) \ __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + #define stub_return(v) \ do { \ anv_finishme("stub %s", __func__); \ diff --git a/src/vulkan/util.c b/src/vulkan/util.c index 928f191847f..13af882cffd 100644 --- a/src/vulkan/util.c +++ b/src/vulkan/util.c @@ -43,6 +43,25 @@ __anv_finishme(const char *file, int line, const char *format, ...) fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); } +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + int anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) { -- cgit v1.2.3 From 1ee2d1c3fc3a5d567847d0ffdc1d5f888df7859e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 14:56:58 -0700 Subject: vk/image: Teach anv_image_choose_tile_mode about WMAJOR --- src/vulkan/image.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 2de5be93e1b..81426588945 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -73,9 +73,16 @@ anv_image_choose_tile_mode(const VkImageCreateInfo *vk_info, switch (vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: + if (unlikely(vk_info->format == VK_FORMAT_S8_UINT)) { + anv_abortf("requested linear stencil buffer"); + } return LINEAR; case VK_IMAGE_TILING_OPTIMAL: - return YMAJOR; + if (unlikely(vk_info->format == VK_FORMAT_S8_UINT)) { + return WMAJOR; + } else { + return YMAJOR; + } default: assert(!"bad VKImageTiling"); return LINEAR; -- cgit v1.2.3 From e17ed04b03e109acabe209a85c04031dbf40ad51 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Jun 2015 15:01:18 -0700 Subject: vk/image: Don't double-allocate stencil buffers If the main surface has format S8_UINT, then don't allocate the auxiliary stencil surface. --- src/vulkan/image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 81426588945..40d5024a0eb 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -142,7 +142,7 @@ VkResult anv_image_create( image->stride = 0; } - if (info->has_stencil) { + if (info->has_stencil && pCreateInfo->format != VK_FORMAT_S8_UINT) { const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; image->stencil_offset = ALIGN_U32(image->size, w_info->surface_alignment); image->stencil_stride = ALIGN_I32(image->extent.width, w_info->width); -- cgit v1.2.3 From d5ad24e39b704525f1f39dd75b2092801c1c6124 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 21:13:36 -0700 Subject: vk: Move the valgrind include and VG() macro to private.h --- src/vulkan/allocator.c | 4 ---- src/vulkan/gem.c | 8 -------- src/vulkan/private.h | 8 ++++++++ 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 80079ad82e4..8752edac91d 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -37,9 +37,6 @@ #include "private.h" #ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x #define VG_NOACCESS_READ(__ptr) ({ \ VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ __typeof(*(__ptr)) __val = *(__ptr); \ @@ -52,7 +49,6 @@ VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ }) #else -#define VG(x) #define VG_NOACCESS_READ(__ptr) (*(__ptr)) #define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) #endif diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index ab6f5c4dfbd..7bc5e49a810 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -32,14 +32,6 @@ #include "private.h" -#ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x -#else -#define VG(x) -#endif - #define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) static int diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 36279fac2cb..84c0524ad3c 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -30,6 +30,14 @@ #include #include +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#else +#define VG(x) +#endif + #include "brw_device_info.h" #include "util/macros.h" -- cgit v1.2.3 From 9cae3d18ac44677fb1c1a4d9c01a1630e62fcb7a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Jun 2015 21:36:12 -0700 Subject: vk: Add valgrind checks in various emit functions The check in batch_bo_finish should catch any undefined values in the batch but isn't that great for debugging. The checks in the various emit functions will help get better granularity. --- src/vulkan/device.c | 5 +++++ src/vulkan/private.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 4dc527e9f0f..39d818d2656 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -659,6 +659,7 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) { assert(batch->start == bbo->bo.map); bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc; } @@ -3070,6 +3071,8 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, dwords * 4, alignment); memcpy(state.map, a, dwords * 4); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + return state; } @@ -3087,6 +3090,8 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + return state; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 84c0524ad3c..7ffba4f63b6 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -488,6 +488,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, }; \ void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ cmd ## _pack(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, (cmd ## _length) * 4)); \ } while (0) #define anv_batch_emitn(batch, n, cmd, ...) ({ \ @@ -509,6 +510,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ } while (0) #define GEN8_MOCS { \ -- cgit v1.2.3 From 047ed02723071d7eccbed3210b5be6ae73603a53 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Jun 2015 12:43:02 -0700 Subject: vk/emit: Use valgrind to validate every packed field --- src/vulkan/gen8_pack.h | 7 +++++++ src/vulkan/private.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index aeb5dadd020..54ad55ab490 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -32,6 +32,10 @@ #include #include +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + #ifndef __gen_field_functions #define __gen_field_functions @@ -43,6 +47,7 @@ union __gen_value { static inline uint64_t __gen_field(uint64_t v, uint32_t start, uint32_t end) { + __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) assert(v < 1ul << (end - start + 1)); @@ -54,6 +59,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { + __gen_validate_value(v); #if DEBUG uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; @@ -66,6 +72,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) static inline uint32_t __gen_float(float v) { + __gen_validate_value(v); return ((union __gen_value) { .f = (v) }).dw; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 7ffba4f63b6..095f8f0f86e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -34,6 +34,7 @@ #include #include #define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) #else #define VG(x) #endif @@ -488,7 +489,6 @@ __gen_combine_address(struct anv_batch *batch, void *location, }; \ void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ cmd ## _pack(batch, __dst, &__template); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, (cmd ## _length) * 4)); \ } while (0) #define anv_batch_emitn(batch, n, cmd, ...) ({ \ -- cgit v1.2.3 From 29d2bbb2b512dae78eb7eb29d1562c7a550806d3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Jun 2015 16:37:31 -0700 Subject: vk/cmd: Add an initial implementation of PipelineBarrier We may want to do something more inteligent here later such as actually handling image layout transitions. However, this should do for now. --- src/vulkan/device.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 39d818d2656..8cb5b3f8c93 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3341,7 +3341,119 @@ void anv_CmdPipelineBarrier( uint32_t memBarrierCount, const void** ppMemBarriers) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + uint32_t b, *dw; + + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + for (uint32_t i = 0; i < pipeEventCount; i++) { + switch (pPipeEvents[i]) { + case VK_PIPE_EVENT_TOP_OF_PIPE: + /* This is just what PIPE_CONTROL does */ + break; + case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE: + case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE: + case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE: + cmd.StallAtPixelScoreboard = true; + break; + case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE: + case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE: + case VK_PIPE_EVENT_TRANSFER_COMPLETE: + case VK_PIPE_EVENT_COMMANDS_COMPLETE: + cmd.CommandStreamerStallEnable = true; + break; + default: + unreachable("Invalid VkPipeEvent"); + } + } + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common; + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common; + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common; + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_CPU_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_CPU_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); } void anv_CmdInitAtomicCounters( -- cgit v1.2.3 From 7153b56abc18f1a305cc368eaaace20b6df80766 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Jun 2015 21:03:50 -0700 Subject: vk/private: Add a non-fatal assert --- src/vulkan/private.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 095f8f0f86e..9f1345fd8ed 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -107,6 +107,16 @@ void __anv_finishme(const char *file, int line, const char *format, ...) #define anv_finishme(format, ...) \ __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); void anv_abortfv(const char *format, va_list va) anv_noreturn; -- cgit v1.2.3 From c8b62d109b9c4efc7bd493d220d959a16a456e5c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Jun 2015 21:04:13 -0700 Subject: vk: Add a couple vk_error calls --- src/vulkan/device.c | 2 +- src/vulkan/formats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8cb5b3f8c93..3df707429e1 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1221,7 +1221,7 @@ VkResult anv_GetObjectInfo( return VK_SUCCESS; default: - return VK_UNSUPPORTED; + return vk_error(VK_UNSUPPORTED); } } diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index c5e52c47b5f..e915156c1be 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -302,6 +302,6 @@ VkResult anv_GetFormatInfo( return VK_SUCCESS; default: - return VK_ERROR_INVALID_VALUE; + return vk_error(VK_ERROR_INVALID_VALUE); } } -- cgit v1.2.3 From 2a3c29698c43601751e43c0344d8f5ba1f57937d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Jun 2015 21:04:51 -0700 Subject: vk/image: Add a bunch of asserts --- src/vulkan/image.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 40d5024a0eb..b385be48917 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -108,6 +108,13 @@ VkResult anv_image_create( if (image == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); + anv_assert(pCreateInfo->mipLevels == 1); + anv_assert(pCreateInfo->arraySize == 1); + anv_assert(pCreateInfo->samples == 1); + anv_assert(pCreateInfo->extent.depth == 1); + image->bo = NULL; image->offset = 0; image->type = pCreateInfo->imageType; @@ -204,6 +211,13 @@ anv_image_view_init(struct anv_surface_view *view, anv_format_for_vk_format(pCreateInfo->format); uint32_t tile_mode, format; + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_2D); + anv_assert(pCreateInfo->subresourceRange.baseMipLevel == 0); + anv_assert(pCreateInfo->subresourceRange.mipLevels == 1); + anv_assert(pCreateInfo->subresourceRange.baseArraySlice == 0); + anv_assert(pCreateInfo->subresourceRange.arraySize == 1); + view->bo = image->bo; switch (pCreateInfo->subresourceRange.aspect) { case VK_IMAGE_ASPECT_STENCIL: @@ -316,6 +330,12 @@ anv_color_attachment_view_init(struct anv_surface_view *view, const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->mipLevel == 0); + anv_assert(pCreateInfo->baseArraySlice == 0); + anv_assert(pCreateInfo->arraySize == 1); + anv_assert(pCreateInfo->msaaResolveImage == 0); + view->bo = image->bo; view->offset = image->offset; view->extent = image->extent; @@ -410,6 +430,12 @@ VkResult anv_CreateDepthStencilView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->mipLevel == 0); + anv_assert(pCreateInfo->baseArraySlice == 0); + anv_assert(pCreateInfo->arraySize == 1); + anv_assert(pCreateInfo->msaaResolveImage == 0); + view->bo = image->bo; view->depth_stride = image->stride; -- cgit v1.2.3 From a566b1e08a269850189011ec3cd06f6408c7adf6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Jun 2015 09:11:14 -0700 Subject: vk/formats: Refactor format properties code Along with the refactor, we now do the right thing when we hit an unsupported format: Set the flags to 0 and return VK_SUCCESS. --- src/vulkan/formats.c | 50 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index e915156c1be..c119be6d041 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -246,16 +246,11 @@ VkResult anv_validate_GetFormatInfo( return anv_GetFormatInfo(_device, _format, infoType, pDataSize, pData); } -VkResult anv_GetFormatInfo( - VkDevice _device, - VkFormat _format, - VkFormatInfoType infoType, - size_t* pDataSize, - void* pData) +static void +anv_format_get_properties(struct anv_device *device, + const struct anv_format *format, + VkFormatProperties *properties) { - struct anv_device *device = (struct anv_device *) _device; - VkFormatProperties *properties; - const struct anv_format *format; const struct surface_format_info *info; int gen; @@ -263,14 +258,12 @@ VkResult anv_GetFormatInfo( if (device->info.is_haswell) gen += 5; - format = anv_format_for_vk_format(_format); - if (format == 0) - return vk_error(VK_ERROR_INVALID_VALUE); if (format->format == UNSUPPORTED) - return VK_UNSUPPORTED; + goto unsupported; + info = &surface_formats[format->format]; if (!info->exists) - return VK_UNSUPPORTED; + goto unsupported; uint32_t linear = 0, tiled = 0; if (info->sampling <= gen) { @@ -289,16 +282,39 @@ VkResult anv_GetFormatInfo( linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; } + properties->linearTilingFeatures = linear; + properties->optimalTilingFeatures = tiled; + return; + + unsupported: + properties->linearTilingFeatures = 0; + properties->optimalTilingFeatures = 0; +} + +VkResult anv_GetFormatInfo( + VkDevice _device, + VkFormat _format, + VkFormatInfoType infoType, + size_t* pDataSize, + void* pData) +{ + struct anv_device *device = (struct anv_device *) _device; + const struct anv_format *format; + VkFormatProperties *properties; + + format = anv_format_for_vk_format(_format); + if (format == 0) + return vk_error(VK_ERROR_INVALID_VALUE); + switch (infoType) { case VK_FORMAT_INFO_TYPE_PROPERTIES: - properties = pData; + properties = (VkFormatProperties *)pData; *pDataSize = sizeof(*properties); if (pData == NULL) return VK_SUCCESS; - properties->linearTilingFeatures = linear; - properties->optimalTilingFeatures = tiled; + anv_format_get_properties(device, format, properties); return VK_SUCCESS; default: -- cgit v1.2.3 From ea7ef46cf9c91c27e70c837db0d162382efaddcb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Jun 2015 22:06:46 -0700 Subject: vk: Regenerate headers with __gen_validate_value() --- src/vulkan/gen75_pack.h | 7 +++++++ src/vulkan/gen7_pack.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index aa586df7785..f9b34d89df8 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -32,6 +32,10 @@ #include #include +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + #ifndef __gen_field_functions #define __gen_field_functions @@ -43,6 +47,7 @@ union __gen_value { static inline uint64_t __gen_field(uint64_t v, uint32_t start, uint32_t end) { + __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) assert(v < 1ul << (end - start + 1)); @@ -54,6 +59,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { + __gen_validate_value(v); #if DEBUG uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; @@ -66,6 +72,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) static inline uint32_t __gen_float(float v) { + __gen_validate_value(v); return ((union __gen_value) { .f = (v) }).dw; } diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 0c9ca3fb248..5e56ce2d367 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -32,6 +32,10 @@ #include #include +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + #ifndef __gen_field_functions #define __gen_field_functions @@ -43,6 +47,7 @@ union __gen_value { static inline uint64_t __gen_field(uint64_t v, uint32_t start, uint32_t end) { + __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) assert(v < 1ul << (end - start + 1)); @@ -54,6 +59,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { + __gen_validate_value(v); #if DEBUG uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; @@ -66,6 +72,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) static inline uint32_t __gen_float(float v) { + __gen_validate_value(v); return ((union __gen_value) { .f = (v) }).dw; } -- cgit v1.2.3 From a5b49d2799a5d52c3a86f97efc2dfeecf6c9c8bf Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Jun 2015 23:11:37 -0700 Subject: vk: Use generated headers with fixed point support The generated headers now convert float in the template struct to the correct fixed point format. --- src/vulkan/device.c | 10 +- src/vulkan/gen75_pack.h | 244 ++++++++++++++++++++++++------------------------ src/vulkan/gen7_pack.h | 112 +++++++++++----------- src/vulkan/gen8_pack.h | 192 ++++++++++++++++++------------------- src/vulkan/image.c | 8 +- 5 files changed, 283 insertions(+), 283 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3df707429e1..7ee820b1bc5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1524,7 +1524,7 @@ fill_buffer_surface_state(void *state, VkFormat format, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0, + .BaseMipLevel = 0.0, .SurfaceQPitch = 0, .Height = (num_elements >> 7) & 0x3fff, .Width = num_elements & 0x7f, @@ -1545,7 +1545,7 @@ fill_buffer_surface_state(void *state, VkFormat format, .ShaderChannelSelectGreen = SCS_GREEN, .ShaderChannelSelectBlue = SCS_BLUE, .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0, + .ResourceMinLOD = 0.0, /* FIXME: We assume that the image must be bound at this time. */ .SurfaceBaseAddress = { NULL, offset }, }; @@ -1648,14 +1648,14 @@ VkResult anv_CreateSampler( .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampMode = 0, - .BaseMipLevel = 0, + .BaseMipLevel = 0.0, .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], .MagModeFilter = mag_filter, .MinModeFilter = min_filter, .TextureLODBias = pCreateInfo->mipLodBias * 256, .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod * 256, - .MaxLOD = pCreateInfo->maxLod * 256, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, .ChromaKeyEnable = 0, .ChromaKeyIndex = 0, .ChromaKeyMode = 0, diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index f9b34d89df8..3796d89967d 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -616,10 +616,10 @@ struct GEN75_3DSTATE_AA_LINE_PARAMETERS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t AACoverageBias; - uint32_t AACoverageSlope; - uint32_t AACoverageEndCapBias; - uint32_t AACoverageEndCapSlope; + float AACoverageBias; + float AACoverageSlope; + float AACoverageEndCapBias; + float AACoverageEndCapSlope; }; static inline void @@ -637,13 +637,13 @@ GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst 0; dw[1] = - __gen_field(values->AACoverageBias, 16, 23) | - __gen_field(values->AACoverageSlope, 0, 7) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | 0; dw[2] = - __gen_field(values->AACoverageEndCapBias, 16, 23) | - __gen_field(values->AACoverageEndCapSlope, 0, 7) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | 0; } @@ -1340,8 +1340,8 @@ struct GEN75_3DSTATE_CLIP { #define Vertex1 1 #define Vertex2 2 uint32_t TriangleFanProvokingVertexSelect; - uint32_t MinimumPointWidth; - uint32_t MaximumPointWidth; + float MinimumPointWidth; + float MaximumPointWidth; uint32_t ForceZeroRTAIndexEnable; uint32_t MaximumVPIndex; }; @@ -1385,8 +1385,8 @@ GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, 0; dw[3] = - __gen_field(values->MinimumPointWidth, 17, 27) | - __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | __gen_field(values->MaximumVPIndex, 0, 3) | 0; @@ -2584,7 +2584,7 @@ struct GEN75_3DSTATE_LINE_STIPPLE { uint32_t CurrentRepeatCounter; uint32_t CurrentStippleIndex; uint32_t LineStipplePattern; - uint32_t LineStippleInverseRepeatCount; + float LineStippleInverseRepeatCount; uint32_t LineStippleRepeatCount; }; @@ -2610,7 +2610,7 @@ GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | __gen_field(values->LineStippleRepeatCount, 0, 8) | 0; @@ -2679,22 +2679,22 @@ struct GEN75_3DSTATE_MULTISAMPLE { #define NUMSAMPLES_4 2 #define NUMSAMPLES_8 3 uint32_t NumberofMultisamples; - uint32_t Sample3XOffset; - uint32_t Sample3YOffset; - uint32_t Sample2XOffset; - uint32_t Sample2YOffset; - uint32_t Sample1XOffset; - uint32_t Sample1YOffset; - uint32_t Sample0XOffset; - uint32_t Sample0YOffset; - uint32_t Sample7XOffset; - uint32_t Sample7YOffset; - uint32_t Sample6XOffset; - uint32_t Sample6YOffset; - uint32_t Sample5XOffset; - uint32_t Sample5YOffset; - uint32_t Sample4XOffset; - uint32_t Sample4YOffset; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; }; static inline void @@ -2718,25 +2718,25 @@ GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->Sample3XOffset, 28, 31) | - __gen_field(values->Sample3YOffset, 24, 27) | - __gen_field(values->Sample2XOffset, 20, 23) | - __gen_field(values->Sample2YOffset, 16, 19) | - __gen_field(values->Sample1XOffset, 12, 15) | - __gen_field(values->Sample1YOffset, 8, 11) | - __gen_field(values->Sample0XOffset, 4, 7) | - __gen_field(values->Sample0YOffset, 0, 3) | + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | 0; dw[3] = - __gen_field(values->Sample7XOffset, 28, 31) | - __gen_field(values->Sample7YOffset, 24, 27) | - __gen_field(values->Sample6XOffset, 20, 23) | - __gen_field(values->Sample6YOffset, 16, 19) | - __gen_field(values->Sample5XOffset, 12, 15) | - __gen_field(values->Sample5YOffset, 8, 11) | - __gen_field(values->Sample4XOffset, 4, 7) | - __gen_field(values->Sample4YOffset, 0, 3) | + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | 0; } @@ -3175,38 +3175,38 @@ struct GEN75_3DSTATE_RAST_MULTISAMPLE { #define NRM_NUMRASTSAMPLES_8 3 #define NRM_NUMRASTSAMPLES_16 4 uint32_t NumberofRasterizationMultisamples; - uint32_t Sample3XOffset; - uint32_t Sample3YOffset; - uint32_t Sample2XOffset; - uint32_t Sample2YOffset; - uint32_t Sample1XOffset; - uint32_t Sample1YOffset; - uint32_t Sample0XOffset; - uint32_t Sample0YOffset; - uint32_t Sample7XOffset; - uint32_t Sample7YOffset; - uint32_t Sample6XOffset; - uint32_t Sample6YOffset; - uint32_t Sample5XOffset; - uint32_t Sample5YOffset; - uint32_t Sample4XOffset; - uint32_t Sample4YOffset; - uint32_t Sample11XOffset; - uint32_t Sample11YOffset; - uint32_t Sample10XOffset; - uint32_t Sample10YOffset; - uint32_t Sample9XOffset; - uint32_t Sample9YOffset; - uint32_t Sample8XOffset; - uint32_t Sample8YOffset; - uint32_t Sample15XOffset; - uint32_t Sample15YOffset; - uint32_t Sample14XOffset; - uint32_t Sample14YOffset; - uint32_t Sample13XOffset; - uint32_t Sample13YOffset; - uint32_t Sample12XOffset; - uint32_t Sample12YOffset; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; + float Sample11XOffset; + float Sample11YOffset; + float Sample10XOffset; + float Sample10YOffset; + float Sample9XOffset; + float Sample9YOffset; + float Sample8XOffset; + float Sample8YOffset; + float Sample15XOffset; + float Sample15YOffset; + float Sample14XOffset; + float Sample14YOffset; + float Sample13XOffset; + float Sample13YOffset; + float Sample12XOffset; + float Sample12YOffset; }; static inline void @@ -3228,47 +3228,47 @@ GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->Sample3XOffset, 28, 31) | - __gen_field(values->Sample3YOffset, 24, 27) | - __gen_field(values->Sample2XOffset, 20, 23) | - __gen_field(values->Sample2YOffset, 16, 19) | - __gen_field(values->Sample1XOffset, 12, 15) | - __gen_field(values->Sample1YOffset, 8, 11) | - __gen_field(values->Sample0XOffset, 4, 7) | - __gen_field(values->Sample0YOffset, 0, 3) | + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | 0; dw[3] = - __gen_field(values->Sample7XOffset, 28, 31) | - __gen_field(values->Sample7YOffset, 24, 27) | - __gen_field(values->Sample6XOffset, 20, 23) | - __gen_field(values->Sample6YOffset, 16, 19) | - __gen_field(values->Sample5XOffset, 12, 15) | - __gen_field(values->Sample5YOffset, 8, 11) | - __gen_field(values->Sample4XOffset, 4, 7) | - __gen_field(values->Sample4YOffset, 0, 3) | + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | 0; dw[4] = - __gen_field(values->Sample11XOffset, 28, 31) | - __gen_field(values->Sample11YOffset, 24, 27) | - __gen_field(values->Sample10XOffset, 20, 23) | - __gen_field(values->Sample10YOffset, 16, 19) | - __gen_field(values->Sample9XOffset, 12, 15) | - __gen_field(values->Sample9YOffset, 8, 11) | - __gen_field(values->Sample8XOffset, 4, 7) | - __gen_field(values->Sample8YOffset, 0, 3) | + __gen_field(values->Sample11XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample11YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample10XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample10YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample9XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample9YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample8XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample8YOffset * (1 << 4), 0, 3) | 0; dw[5] = - __gen_field(values->Sample15XOffset, 28, 31) | - __gen_field(values->Sample15YOffset, 24, 27) | - __gen_field(values->Sample14XOffset, 20, 23) | - __gen_field(values->Sample14YOffset, 16, 19) | - __gen_field(values->Sample13XOffset, 12, 15) | - __gen_field(values->Sample13YOffset, 8, 11) | - __gen_field(values->Sample12XOffset, 4, 7) | - __gen_field(values->Sample12YOffset, 0, 3) | + __gen_field(values->Sample15XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample15YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample14XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample14YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample13XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample13YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample12XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample12YOffset * (1 << 4), 0, 3) | 0; } @@ -3814,7 +3814,7 @@ struct GEN75_3DSTATE_SF { #define CULLMODE_FRONT 2 #define CULLMODE_BACK 3 uint32_t CullMode; - uint32_t LineWidth; + float LineWidth; uint32_t LineEndCapAntialiasingRegionWidth; uint32_t LineStippleEnable; uint32_t ScissorRectangleEnable; @@ -3834,7 +3834,7 @@ struct GEN75_3DSTATE_SF { uint32_t AALineDistanceMode; uint32_t VertexSubPixelPrecisionSelect; uint32_t UsePointWidthState; - uint32_t PointWidth; + float PointWidth; uint32_t GlobalDepthOffsetConstant; uint32_t GlobalDepthOffsetScale; uint32_t GlobalDepthOffsetClamp; @@ -3870,7 +3870,7 @@ GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, dw[2] = __gen_field(values->AntiAliasingEnable, 31, 31) | __gen_field(values->CullMode, 29, 30) | - __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | __gen_field(values->LineStippleEnable, 14, 14) | __gen_field(values->ScissorRectangleEnable, 11, 11) | @@ -3886,7 +3886,7 @@ GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->AALineDistanceMode, 14, 14) | __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | __gen_field(values->UsePointWidthState, 11, 11) | - __gen_field(values->PointWidth, 0, 10) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | 0; dw[4] = @@ -7548,7 +7548,7 @@ struct GEN75_RENDER_SURFACE_STATE { uint32_t ShaderChannelSelectG; uint32_t ShaderChannelSelectB; uint32_t ShaderChannelSelectA; - uint32_t ResourceMinLOD; + float ResourceMinLOD; }; static inline void @@ -7627,7 +7627,7 @@ GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->ShaderChannelSelectG, 22, 24) | __gen_field(values->ShaderChannelSelectB, 19, 21) | __gen_field(values->ShaderChannelSelectA, 16, 18) | - __gen_field(values->ResourceMinLOD, 0, 11) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | 0; } @@ -7694,7 +7694,7 @@ struct GEN75_SAMPLER_STATE { uint32_t TextureBorderColorMode; #define OGL 1 uint32_t LODPreClampEnable; - uint32_t BaseMipLevel; + float BaseMipLevel; #define MIPFILTER_NONE 0 #define MIPFILTER_NEAREST 1 #define MIPFILTER_LINEAR 3 @@ -7713,8 +7713,8 @@ struct GEN75_SAMPLER_STATE { #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; - uint32_t MinLOD; - uint32_t MaxLOD; + float MinLOD; + float MaxLOD; #define PREFILTEROPALWAYS 0 #define PREFILTEROPNEVER 1 #define PREFILTEROPLESS 2 @@ -7769,7 +7769,7 @@ GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SamplerDisable, 31, 31) | __gen_field(values->TextureBorderColorMode, 29, 29) | __gen_field(values->LODPreClampEnable, 28, 28) | - __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | @@ -7778,8 +7778,8 @@ GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->MinLOD, 20, 31) | - __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | __gen_field(values->ShadowFunction, 1, 3) | __gen_field(values->CubeSurfaceControlMode, 0, 0) | 0; diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 5e56ce2d367..09a477bc663 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -576,10 +576,10 @@ struct GEN7_3DSTATE_AA_LINE_PARAMETERS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t AACoverageBias; - uint32_t AACoverageSlope; - uint32_t AACoverageEndCapBias; - uint32_t AACoverageEndCapSlope; + float AACoverageBias; + float AACoverageSlope; + float AACoverageEndCapBias; + float AACoverageEndCapSlope; }; static inline void @@ -597,13 +597,13 @@ GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->AACoverageBias, 16, 23) | - __gen_field(values->AACoverageSlope, 0, 7) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | 0; dw[2] = - __gen_field(values->AACoverageEndCapBias, 16, 23) | - __gen_field(values->AACoverageEndCapSlope, 0, 7) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | 0; } @@ -1014,8 +1014,8 @@ struct GEN7_3DSTATE_CLIP { #define Vertex1 1 #define Vertex2 2 uint32_t TriangleFanProvokingVertexSelect; - uint32_t MinimumPointWidth; - uint32_t MaximumPointWidth; + float MinimumPointWidth; + float MaximumPointWidth; uint32_t ForceZeroRTAIndexEnable; uint32_t MaximumVPIndex; }; @@ -1059,8 +1059,8 @@ GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, 0; dw[3] = - __gen_field(values->MinimumPointWidth, 17, 27) | - __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | __gen_field(values->MaximumVPIndex, 0, 3) | 0; @@ -1937,7 +1937,7 @@ struct GEN7_3DSTATE_LINE_STIPPLE { uint32_t CurrentRepeatCounter; uint32_t CurrentStippleIndex; uint32_t LineStipplePattern; - uint32_t LineStippleInverseRepeatCount; + float LineStippleInverseRepeatCount; uint32_t LineStippleRepeatCount; }; @@ -1963,7 +1963,7 @@ GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | __gen_field(values->LineStippleRepeatCount, 0, 8) | 0; @@ -2031,22 +2031,22 @@ struct GEN7_3DSTATE_MULTISAMPLE { #define NUMSAMPLES_4 2 #define NUMSAMPLES_8 3 uint32_t NumberofMultisamples; - uint32_t Sample3XOffset; - uint32_t Sample3YOffset; - uint32_t Sample2XOffset; - uint32_t Sample2YOffset; - uint32_t Sample1XOffset; - uint32_t Sample1YOffset; - uint32_t Sample0XOffset; - uint32_t Sample0YOffset; - uint32_t Sample7XOffset; - uint32_t Sample7YOffset; - uint32_t Sample6XOffset; - uint32_t Sample6YOffset; - uint32_t Sample5XOffset; - uint32_t Sample5YOffset; - uint32_t Sample4XOffset; - uint32_t Sample4YOffset; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; }; static inline void @@ -2069,25 +2069,25 @@ GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->Sample3XOffset, 28, 31) | - __gen_field(values->Sample3YOffset, 24, 27) | - __gen_field(values->Sample2XOffset, 20, 23) | - __gen_field(values->Sample2YOffset, 16, 19) | - __gen_field(values->Sample1XOffset, 12, 15) | - __gen_field(values->Sample1YOffset, 8, 11) | - __gen_field(values->Sample0XOffset, 4, 7) | - __gen_field(values->Sample0YOffset, 0, 3) | + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | 0; dw[3] = - __gen_field(values->Sample7XOffset, 28, 31) | - __gen_field(values->Sample7YOffset, 24, 27) | - __gen_field(values->Sample6XOffset, 20, 23) | - __gen_field(values->Sample6YOffset, 16, 19) | - __gen_field(values->Sample5XOffset, 12, 15) | - __gen_field(values->Sample5YOffset, 8, 11) | - __gen_field(values->Sample4XOffset, 4, 7) | - __gen_field(values->Sample4YOffset, 0, 3) | + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | 0; } @@ -3050,7 +3050,7 @@ struct GEN7_3DSTATE_SF { #define CULLMODE_FRONT 2 #define CULLMODE_BACK 3 uint32_t CullMode; - uint32_t LineWidth; + float LineWidth; uint32_t LineEndCapAntialiasingRegionWidth; uint32_t ScissorRectangleEnable; uint32_t MultisampleRasterizationMode; @@ -3068,7 +3068,7 @@ struct GEN7_3DSTATE_SF { uint32_t AALineDistanceMode; uint32_t VertexSubPixelPrecisionSelect; uint32_t UsePointWidthState; - uint32_t PointWidth; + float PointWidth; uint32_t GlobalDepthOffsetConstant; uint32_t GlobalDepthOffsetScale; uint32_t GlobalDepthOffsetClamp; @@ -3104,7 +3104,7 @@ GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, dw[2] = __gen_field(values->AntiAliasingEnable, 31, 31) | __gen_field(values->CullMode, 29, 30) | - __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | __gen_field(values->ScissorRectangleEnable, 11, 11) | __gen_field(values->MultisampleRasterizationMode, 8, 9) | @@ -3118,7 +3118,7 @@ GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->AALineDistanceMode, 14, 14) | __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | __gen_field(values->UsePointWidthState, 11, 11) | - __gen_field(values->PointWidth, 0, 10) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | 0; dw[4] = @@ -6249,7 +6249,7 @@ struct GEN7_SAMPLER_STATE { uint32_t TextureBorderColorMode; #define OGL 1 uint32_t LODPreClampEnable; - uint32_t BaseMipLevel; + float BaseMipLevel; #define MIPFILTER_NONE 0 #define MIPFILTER_NEAREST 1 #define MIPFILTER_LINEAR 3 @@ -6268,8 +6268,8 @@ struct GEN7_SAMPLER_STATE { #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; - uint32_t MinLOD; - uint32_t MaxLOD; + float MinLOD; + float MaxLOD; #define PREFILTEROPALWAYS 0 #define PREFILTEROPNEVER 1 #define PREFILTEROPLESS 2 @@ -6323,7 +6323,7 @@ GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SamplerDisable, 31, 31) | __gen_field(values->TextureBorderColorMode, 29, 29) | __gen_field(values->LODPreClampEnable, 28, 28) | - __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | @@ -6332,8 +6332,8 @@ GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->MinLOD, 20, 31) | - __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | __gen_field(values->ShadowFunction, 1, 3) | __gen_field(values->CubeSurfaceControlMode, 0, 0) | 0; diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 54ad55ab490..117e64ace7b 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -1004,14 +1004,14 @@ struct GEN8_3DSTATE_AA_LINE_PARAMETERS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t AAPointCoverageBias; - uint32_t AACoverageBias; - uint32_t AAPointCoverageSlope; - uint32_t AACoverageSlope; - uint32_t AAPointCoverageEndCapBias; - uint32_t AACoverageEndCapBias; - uint32_t AAPointCoverageEndCapSlope; - uint32_t AACoverageEndCapSlope; + float AAPointCoverageBias; + float AACoverageBias; + float AAPointCoverageSlope; + float AACoverageSlope; + float AAPointCoverageEndCapBias; + float AACoverageEndCapBias; + float AAPointCoverageEndCapSlope; + float AACoverageEndCapSlope; }; static inline void @@ -1029,17 +1029,17 @@ GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->AAPointCoverageBias, 24, 31) | - __gen_field(values->AACoverageBias, 16, 23) | - __gen_field(values->AAPointCoverageSlope, 8, 15) | - __gen_field(values->AACoverageSlope, 0, 7) | + __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | 0; dw[2] = - __gen_field(values->AAPointCoverageEndCapBias, 24, 31) | - __gen_field(values->AACoverageEndCapBias, 16, 23) | - __gen_field(values->AAPointCoverageEndCapSlope, 8, 15) | - __gen_field(values->AACoverageEndCapSlope, 0, 7) | + __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | 0; } @@ -1738,8 +1738,8 @@ struct GEN8_3DSTATE_CLIP { uint32_t TriangleStripListProvokingVertexSelect; uint32_t LineStripListProvokingVertexSelect; uint32_t TriangleFanProvokingVertexSelect; - uint32_t MinimumPointWidth; - uint32_t MaximumPointWidth; + float MinimumPointWidth; + float MaximumPointWidth; uint32_t ForceZeroRTAIndexEnable; uint32_t MaximumVPIndex; }; @@ -1783,8 +1783,8 @@ GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, 0; dw[3] = - __gen_field(values->MinimumPointWidth, 17, 27) | - __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | __gen_field(values->MaximumVPIndex, 0, 3) | 0; @@ -3040,7 +3040,7 @@ struct GEN8_3DSTATE_LINE_STIPPLE { uint32_t CurrentRepeatCounter; uint32_t CurrentStippleIndex; uint32_t LineStipplePattern; - uint32_t LineStippleInverseRepeatCount; + float LineStippleInverseRepeatCount; uint32_t LineStippleRepeatCount; }; @@ -3066,7 +3066,7 @@ GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | __gen_field(values->LineStippleRepeatCount, 0, 8) | 0; @@ -4139,36 +4139,36 @@ struct GEN8_3DSTATE_SAMPLE_PATTERN { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t _8xSample7XOffset; - uint32_t _8xSample7YOffset; - uint32_t _8xSample6XOffset; - uint32_t _8xSample6YOffset; - uint32_t _8xSample5XOffset; - uint32_t _8xSample5YOffset; - uint32_t _8xSample4XOffset; - uint32_t _8xSample4YOffset; - uint32_t _8xSample3XOffset; - uint32_t _8xSample3YOffset; - uint32_t _8xSample2XOffset; - uint32_t _8xSample2YOffset; - uint32_t _8xSample1XOffset; - uint32_t _8xSample1YOffset; - uint32_t _8xSample0XOffset; - uint32_t _8xSample0YOffset; - uint32_t _4xSample3XOffset; - uint32_t _4xSample3YOffset; - uint32_t _4xSample2XOffset; - uint32_t _4xSample2YOffset; - uint32_t _4xSample1XOffset; - uint32_t _4xSample1YOffset; - uint32_t _4xSample0XOffset; - uint32_t _4xSample0YOffset; - uint32_t _1xSample0XOffset; - uint32_t _1xSample0YOffset; - uint32_t _2xSample1XOffset; - uint32_t _2xSample1YOffset; - uint32_t _2xSample0XOffset; - uint32_t _2xSample0YOffset; + float _8xSample7XOffset; + float _8xSample7YOffset; + float _8xSample6XOffset; + float _8xSample6YOffset; + float _8xSample5XOffset; + float _8xSample5YOffset; + float _8xSample4XOffset; + float _8xSample4YOffset; + float _8xSample3XOffset; + float _8xSample3YOffset; + float _8xSample2XOffset; + float _8xSample2YOffset; + float _8xSample1XOffset; + float _8xSample1YOffset; + float _8xSample0XOffset; + float _8xSample0YOffset; + float _4xSample3XOffset; + float _4xSample3YOffset; + float _4xSample2XOffset; + float _4xSample2YOffset; + float _4xSample1XOffset; + float _4xSample1YOffset; + float _4xSample0XOffset; + float _4xSample0YOffset; + float _1xSample0XOffset; + float _1xSample0YOffset; + float _2xSample1XOffset; + float _2xSample1YOffset; + float _2xSample0XOffset; + float _2xSample0YOffset; }; static inline void @@ -4189,45 +4189,45 @@ GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, 0; dw[5] = - __gen_field(values->_8xSample7XOffset, 28, 31) | - __gen_field(values->_8xSample7YOffset, 24, 27) | - __gen_field(values->_8xSample6XOffset, 20, 23) | - __gen_field(values->_8xSample6YOffset, 16, 19) | - __gen_field(values->_8xSample5XOffset, 12, 15) | - __gen_field(values->_8xSample5YOffset, 8, 11) | - __gen_field(values->_8xSample4XOffset, 4, 7) | - __gen_field(values->_8xSample4YOffset, 0, 3) | + __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | 0; dw[6] = - __gen_field(values->_8xSample3XOffset, 28, 31) | - __gen_field(values->_8xSample3YOffset, 24, 27) | - __gen_field(values->_8xSample2XOffset, 20, 23) | - __gen_field(values->_8xSample2YOffset, 16, 19) | - __gen_field(values->_8xSample1XOffset, 12, 15) | - __gen_field(values->_8xSample1YOffset, 8, 11) | - __gen_field(values->_8xSample0XOffset, 4, 7) | - __gen_field(values->_8xSample0YOffset, 0, 3) | + __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | 0; dw[7] = - __gen_field(values->_4xSample3XOffset, 28, 31) | - __gen_field(values->_4xSample3YOffset, 24, 27) | - __gen_field(values->_4xSample2XOffset, 20, 23) | - __gen_field(values->_4xSample2YOffset, 16, 19) | - __gen_field(values->_4xSample1XOffset, 12, 15) | - __gen_field(values->_4xSample1YOffset, 8, 11) | - __gen_field(values->_4xSample0XOffset, 4, 7) | - __gen_field(values->_4xSample0YOffset, 0, 3) | + __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | 0; dw[8] = - __gen_field(values->_1xSample0XOffset, 20, 23) | - __gen_field(values->_1xSample0YOffset, 16, 19) | - __gen_field(values->_2xSample1XOffset, 12, 15) | - __gen_field(values->_2xSample1YOffset, 8, 11) | - __gen_field(values->_2xSample0XOffset, 4, 7) | - __gen_field(values->_2xSample0YOffset, 0, 3) | + __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | + __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | + __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | 0; } @@ -4476,7 +4476,7 @@ struct GEN8_3DSTATE_SF { uint32_t LegacyGlobalDepthBiasEnable; uint32_t StatisticsEnable; uint32_t ViewportTransformEnable; - uint32_t LineWidth; + float LineWidth; #define _05pixels 0 #define _10pixels 1 #define _20pixels 2 @@ -4493,7 +4493,7 @@ struct GEN8_3DSTATE_SF { #define Vertex 0 #define State 1 uint32_t PointWidthSource; - uint32_t PointWidth; + float PointWidth; }; static inline void @@ -4517,7 +4517,7 @@ GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, 0; dw[2] = - __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | 0; @@ -4530,7 +4530,7 @@ GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SmoothPointEnable, 13, 13) | __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | __gen_field(values->PointWidthSource, 11, 11) | - __gen_field(values->PointWidth, 0, 10) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | 0; } @@ -8245,7 +8245,7 @@ struct GEN8_RENDER_SURFACE_STATE { uint32_t CubeFaceEnablePositiveX; uint32_t CubeFaceEnableNegativeX; struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - uint32_t BaseMipLevel; + float BaseMipLevel; uint32_t SurfaceQPitch; uint32_t Height; uint32_t Width; @@ -8292,7 +8292,7 @@ struct GEN8_RENDER_SURFACE_STATE { uint32_t ShaderChannelSelectGreen; uint32_t ShaderChannelSelectBlue; uint32_t ShaderChannelSelectAlpha; - uint32_t ResourceMinLOD; + float ResourceMinLOD; __gen_address_type SurfaceBaseAddress; uint32_t XOffsetforVPlane; uint32_t YOffsetforVPlane; @@ -8330,7 +8330,7 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); dw[1] = __gen_field(dw_MemoryObjectControlState, 24, 30) | - __gen_field(values->BaseMipLevel, 19, 23) | + __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | __gen_field(values->SurfaceQPitch, 0, 14) | 0; @@ -8380,7 +8380,7 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->ShaderChannelSelectGreen, 22, 24) | __gen_field(values->ShaderChannelSelectBlue, 19, 21) | __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | - __gen_field(values->ResourceMinLOD, 0, 11) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | 0; uint32_t dw8 = @@ -8428,7 +8428,7 @@ struct GEN8_SAMPLER_STATE { #define CLAMP_NONE 0 #define CLAMP_OGL 2 uint32_t LODPreClampMode; - uint32_t BaseMipLevel; + float BaseMipLevel; #define MIPFILTER_NONE 0 #define MIPFILTER_NEAREST 1 #define MIPFILTER_LINEAR 3 @@ -8447,8 +8447,8 @@ struct GEN8_SAMPLER_STATE { #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; - uint32_t MinLOD; - uint32_t MaxLOD; + float MinLOD; + float MaxLOD; uint32_t ChromaKeyEnable; uint32_t ChromaKeyIndex; #define KEYFILTER_KILL_ON_ANY_MATCH 0 @@ -8506,7 +8506,7 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SamplerDisable, 31, 31) | __gen_field(values->TextureBorderColorMode, 29, 29) | __gen_field(values->LODPreClampMode, 27, 28) | - __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | @@ -8515,8 +8515,8 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->MinLOD, 20, 31) | - __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | __gen_field(values->ChromaKeyEnable, 7, 7) | __gen_field(values->ChromaKeyIndex, 5, 6) | __gen_field(values->ChromaKeyMode, 4, 4) | diff --git a/src/vulkan/image.c b/src/vulkan/image.c index b385be48917..b8a15a1aa59 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -261,7 +261,7 @@ anv_image_view_init(struct anv_surface_view *view, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0, + .BaseMipLevel = 0.0, .SurfaceQPitch = 0, .Height = image->extent.height - 1, .Width = image->extent.width - 1, @@ -282,7 +282,7 @@ anv_image_view_init(struct anv_surface_view *view, .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], - .ResourceMinLOD = 0, + .ResourceMinLOD = 0.0, .SurfaceBaseAddress = { NULL, view->offset }, }; @@ -360,7 +360,7 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0, + .BaseMipLevel = 0.0, .SurfaceQPitch = 0, .Height = image->extent.height - 1, .Width = image->extent.width - 1, @@ -381,7 +381,7 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .ShaderChannelSelectGreen = SCS_GREEN, .ShaderChannelSelectBlue = SCS_BLUE, .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0, + .ResourceMinLOD = 0.0, .SurfaceBaseAddress = { NULL, view->offset }, }; -- cgit v1.2.3 From d76ea7644ac5df281ae26e8ce1f8c5201539781b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Jun 2015 23:13:24 -0700 Subject: vk: Set maximum point size range We set both minimum and maximum point size to 0 in 3DSTATE_CLIP, which will clip away all points. --- src/vulkan/pipeline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 6f0b5b4aafd..95123bf931e 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -493,7 +493,9 @@ anv_pipeline_create( anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, .ClipEnable = true, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport)); + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, .StatisticsEnable = true, -- cgit v1.2.3 From b581e924b6a9060ed4ef8e512d423d81cd3907b6 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 09:16:46 -0700 Subject: vk: Remove left-over trp call --- src/vulkan/pipeline.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 95123bf931e..dcb28984c1b 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -578,8 +578,6 @@ anv_pipeline_create( .VertexURBEntryOutputReadOffset = offset, .VertexURBEntryOutputLength = length); - //trp_generate_blend_hw_cmds(batch, pipeline); - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; /* Skip the VUE header and position slots */ offset = 1; -- cgit v1.2.3 From 1dd63fcbed5be028551e8a1d8cb8d3be6f7d544c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Jun 2015 10:09:58 -0700 Subject: vk/entrypoints: Don't print every single function call --- src/vulkan/vk_gen.py | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/vk_gen.py b/src/vulkan/vk_gen.py index b97b50d4d00..9b50dbd766e 100644 --- a/src/vulkan/vk_gen.py +++ b/src/vulkan/vk_gen.py @@ -190,19 +190,6 @@ for type, name, args, num, h in entrypoints: print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) -logger = """%s __attribute__ ((weak)) anv_validate_%s%s -{ - fprintf(stderr, "%%s\\n", strings + %d); - void *args = __builtin_apply_args(); - void *result = __builtin_apply((void *) anv_%s, args, 100); - __builtin_return(result); -} -""" - -for type, name, args, num, h in entrypoints: - print logger % (type, name, args, offsets[num], name) - - # Now generate the hash table used for entry point look up. This is a # uint16_t table of entry point indices. We use 0xffff to indicate an entry # in the hash table is empty. -- cgit v1.2.3 From 923e923bbceb76376edb9ddf138ee185105cb75b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 14:55:50 -0700 Subject: vk: Compile fragment shader after VS and GS Just moving code around to do shader stages in the natual order. --- src/vulkan/compiler.cpp | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 6adf92b3662..b5a287b622f 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -944,18 +944,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) } bool success; - struct brw_wm_prog_key wm_key; - struct gl_fragment_program *fp = (struct gl_fragment_program *) - program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; - struct brw_fragment_program *bfp = brw_fragment_program(fp); - - brw_wm_populate_key(brw, bfp, &wm_key); - - success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; - pipeline->active_stages = VK_SHADER_STAGE_FRAGMENT_BIT; - + pipeline->active_stages = 0; if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { struct brw_vs_prog_key vs_key; @@ -991,6 +980,20 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->gs_vec4 = NO_KERNEL; } + if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; + pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; + } + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); gen7_compute_urb_partition(pipeline); -- cgit v1.2.3 From 9aae480cc45d0f73c9d0d33e19d1244e703b323b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 14:56:29 -0700 Subject: vk: Don't emit STATE_SIP We don't have a SIP kernel and don't enable exceptions. --- src/vulkan/device.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7ee820b1bc5..d06d83ab168 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2426,7 +2426,6 @@ VkResult anv_BeginCommandBuffer( anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, .PipelineSelection = _3D); - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP); anv_cmd_buffer_emit_state_base_address(cmd_buffer); -- cgit v1.2.3 From f7fe06cf0a6f39df406d7b18fa81f9dc31e05fea Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 14:58:31 -0700 Subject: vk: Disable shader stages in the graphics pipeline batch We need to move this into the graphics pipeline batch so we don't emit it for compute pipelines. --- src/vulkan/device.c | 22 ---------------------- src/vulkan/pipeline.c | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d06d83ab168..f5ca70b83ec 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2429,28 +2429,6 @@ VkResult anv_BeginCommandBuffer( anv_cmd_buffer_emit_state_base_address(cmd_buffer); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF_STATISTICS, - .StatisticsEnable = true); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HS, .Enable = false); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_TE, .TEEnable = false); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_CHROMAKEY, - .ChromaKeyKillEnable = false); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); - return VK_SUCCESS; } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index dcb28984c1b..28818152ae0 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -491,6 +491,28 @@ anv_pipeline_create( emit_ds_state(pipeline, ds_info); emit_cb_state(pipeline, cb_info); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, .ClipEnable = true, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), -- cgit v1.2.3 From e7edde60ba2ec1e9a98641a7926ad847df10f73c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 15:04:09 -0700 Subject: vk: Defer setting viewport dynamic state We can't emit this until we've done a 3D pipeline select. --- src/vulkan/device.c | 22 +++++++++++----------- src/vulkan/private.h | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index f5ca70b83ec..9e872de89b6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2663,20 +2663,11 @@ void anv_CmdBindDynamicStateObject( VkDynamicStateObject dynamicState) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_dynamic_vp_state *vp_state; switch (stateBindPoint) { case VK_STATE_BIND_POINT_VIEWPORT: - vp_state = (struct anv_dynamic_vp_state *) dynamicState; - /* We emit state immediately, but set cmd_buffer->vp_state to indicate - * that vp state has been set in this command buffer. */ - cmd_buffer->vp_state = vp_state; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState; + cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; break; case VK_STATE_BIND_POINT_RASTER: cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState; @@ -3111,6 +3102,15 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->descriptors_dirty) flush_descriptor_sets(cmd_buffer); + if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) { + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset); + } + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, cmd_buffer->rs_state->state_sf, pipeline->state_sf); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 9f1345fd8ed..3e4dc796420 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -617,6 +617,7 @@ struct anv_buffer { #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) #define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) #define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) +#define ANV_CMD_BUFFER_VP_DIRTY (1 << 5) struct anv_vertex_binding { struct anv_buffer * buffer; -- cgit v1.2.3 From 405697eb3d3a08c5718dd7ec6a36dd8c416bd9b4 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 15:07:38 -0700 Subject: vk: Stop asserting we have a fragment shader Even for graphics, this is not a requirement, we can have a depth-only output pipeline. --- src/vulkan/compiler.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index b5a287b622f..28c4874a438 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -923,8 +923,6 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) assert(pipeline->shaders[i]->size % 4 == 0); } - assert(pipeline->shaders[VK_SHADER_STAGE_FRAGMENT] != NULL); - if (all_spirv) { for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { if (pipeline->shaders[i]) -- cgit v1.2.3 From 7637b02aaa4df9efaf856ee855ba03b5fe81d821 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 15:21:49 -0700 Subject: vk: Emit PIPELINE_SELECT on demand --- src/vulkan/device.c | 12 +++++++++--- src/vulkan/private.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9e872de89b6..e44fb2b1cd9 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2424,10 +2424,8 @@ VkResult anv_BeginCommandBuffer( { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = _3D); - anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->current_pipeline = UINT32_MAX; return VK_SUCCESS; } @@ -3071,6 +3069,14 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used; + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->current_pipeline = _3D; + } + if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); const uint32_t num_dwords = 1 + num_buffers * 4; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 3e4dc796420..8bc5fd10880 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -650,6 +650,7 @@ struct anv_cmd_buffer { struct anv_state_stream dynamic_state_stream; /* State required while building cmd buffer */ + uint32_t current_pipeline; uint32_t vb_dirty; uint32_t dirty; uint32_t descriptors_dirty; -- cgit v1.2.3 From 765175f5d195df727ed9d171720f4e843809744e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 15:31:42 -0700 Subject: vk: Implement basic compute shader support --- src/glsl/glsl_parser_extras.cpp | 2 + src/mesa/drivers/dri/i965/brw_cs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_cs.h | 9 ++ src/vulkan/compiler.cpp | 93 +++++++++++ src/vulkan/device.c | 291 +++++++++++++++++++++++++++-------- src/vulkan/pipeline.c | 54 ++++++- src/vulkan/private.h | 5 + 7 files changed, 393 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index be6713c46a2..982ade6a70a 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -87,6 +87,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->extensions = &ctx->Extensions; + this->ARB_compute_shader_enable = true; + this->Const.MaxLights = ctx->Const.MaxLights; this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 70731e4a4ff..2432875d0f4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -55,7 +55,7 @@ brw_cs_prog_data_compare(const void *in_a, const void *in_b) } -static const unsigned * +const unsigned * brw_cs_emit(struct brw_context *brw, void *mem_ctx, const struct brw_cs_prog_key *key, diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 8404aa3e824..b83d49a0635 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -41,6 +41,15 @@ bool brw_cs_prog_data_compare(const void *a, const void *b); void brw_upload_cs_prog(struct brw_context *brw); +const unsigned * +brw_cs_emit(struct brw_context *brw, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + struct gl_compute_program *cp, + struct gl_shader_program *prog, + unsigned *final_assembly_size); + #ifdef __cplusplus } #endif diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 28c4874a438..0db7935f085 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -603,6 +604,68 @@ really_do_gs_prog(struct brw_context *brw, return true; } +static bool +brw_codegen_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(&prog_data, 0, sizeof(prog_data)); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count = cs->num_uniform_components; + + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; + prog_data.base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.nr_params = param_count; + + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, + &cp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(cs_state.map, program, program_size); + + pipeline->cs_simd = cs_state.offset; + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_cs_populate_key(struct brw_context *brw, + struct brw_compute_program *bcp, struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = bcp->id; +} + static void fail_on_compile_error(int status, const char *msg) { @@ -652,6 +715,22 @@ anv_compiler_create(struct anv_device *device) compiler->brw->is_baytrail = devinfo->is_baytrail; compiler->brw->is_haswell = devinfo->is_haswell; compiler->brw->is_cherryview = devinfo->is_cherryview; + + /* We need this at least for CS, which will check brw->max_cs_threads + * against the work group size. */ + compiler->brw->max_vs_threads = devinfo->max_vs_threads; + compiler->brw->max_hs_threads = devinfo->max_hs_threads; + compiler->brw->max_ds_threads = devinfo->max_ds_threads; + compiler->brw->max_gs_threads = devinfo->max_gs_threads; + compiler->brw->max_wm_threads = devinfo->max_wm_threads; + compiler->brw->max_cs_threads = devinfo->max_cs_threads; + compiler->brw->urb.size = devinfo->urb.size; + compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; + compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; + compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + compiler->brw->intelScreen = compiler->screen; compiler->screen->devinfo = &device->info; @@ -992,6 +1071,20 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; } + if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { + struct brw_cs_prog_key cs_key; + struct gl_compute_program *cp = (struct gl_compute_program *) + program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; + struct brw_compute_program *bcp = brw_compute_program(cp); + + brw_cs_populate_key(brw, bcp, &cs_key); + + success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); + fail_if(!success, "brw_codegen_cs_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base; + pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + } + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); gen7_compute_urb_partition(pipeline); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e44fb2b1cd9..b27bd6d765b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2650,9 +2650,22 @@ void anv_CmdBindPipeline( struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline; - cmd_buffer->pipeline = pipeline; - cmd_buffer->vb_dirty |= pipeline->vb_used; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->compute_pipeline = pipeline; + cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->pipeline = pipeline; + cmd_buffer->vb_dirty |= pipeline->vb_used; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + default: + assert(!"invalid bind point"); + break; + } } void anv_CmdBindDynamicStateObject( @@ -2818,11 +2831,15 @@ void anv_CmdBindVertexBuffers( static VkResult cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage) + unsigned stage, struct anv_state *bt_state) { - struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_pipeline_layout *layout; uint32_t color_attachments, bias, size; - struct anv_state bt_state; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->compute_pipeline->layout; + else + layout = cmd_buffer->pipeline->layout; if (stage == VK_SHADER_STAGE_FRAGMENT) { bias = MAX_RTS; @@ -2841,26 +2858,12 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; size = (bias + surface_count) * sizeof(uint32_t); - bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); - uint32_t *bt_map = bt_state.map; + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; - if (bt_state.map == NULL) + if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = bt_state.offset); - for (uint32_t ca = 0; ca < color_attachments; ca++) { const struct anv_surface_view *view = cmd_buffer->framebuffer->color_attachments[ca]; @@ -2935,39 +2938,27 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } static VkResult -cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) { - struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; - struct anv_state state; + struct anv_pipeline_layout *layout; + uint32_t sampler_count; - if (!layout) - return VK_SUCCESS; - - uint32_t sampler_count = layout->stage[stage].sampler_count; + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->compute_pipeline->layout; + else + layout = cmd_buffer->pipeline->layout; + sampler_count = layout ? layout->stage[stage].sampler_count : 0; if (sampler_count == 0) return VK_SUCCESS; uint32_t size = sampler_count * 16; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); - if (state.map == NULL) + if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = state.offset); - for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; @@ -2983,7 +2974,7 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) if (!sampler) continue; - memcpy(state.map + (start + b) * 16, + memcpy(state->map + (start + b) * 16, sampler->state, sizeof(sampler->state)); } } @@ -2991,6 +2982,54 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) return VK_SUCCESS; } +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) +{ + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; +} + static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { @@ -2999,11 +3038,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) VkResult result; for_each_bit(s, dirty) { - result = cmd_buffer_emit_binding_table(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - - result = cmd_buffer_emit_samplers(cmd_buffer, s); + result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) break; } @@ -3016,12 +3051,11 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) /* Re-emit all active binding tables */ for_each_bit(s, cmd_buffer->pipeline->active_stages) { - result = cmd_buffer_emit_binding_table(cmd_buffer, s); - result = cmd_buffer_emit_samplers(cmd_buffer, s); - } + result = flush_descriptor_set(cmd_buffer, s); - /* It had better succeed this time */ - assert(result == VK_SUCCESS); + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } } cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages; @@ -3061,6 +3095,78 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, return state; } +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->current_pipeline = GPGPU; + } + + if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + if (result != VK_SUCCESS) { + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + } + cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->compute_dirty = 0; +} + static void anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { @@ -3278,15 +3384,80 @@ void anv_CmdDispatch( uint32_t y, uint32_t z) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + uint32_t size = SIMD8; /* FIXME */ + uint32_t right_mask = 0; /* FIXME */ + uint32_t thread_width_max = 0; /* FIXME */ + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + + .InterfaceDescriptorOffset = 0, + .IndirectDataLength = 0, + .IndirectDataStartAddress = 0, + + .SIMDSize = size, + + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = thread_width_max, + + .ThreadGroupIDStartingX = 0, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDStartingY = 0, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDStartingResumeZ = 0, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); } +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + void anv_CmdDispatchIndirect( VkCmdBuffer cmdBuffer, - VkBuffer buffer, + VkBuffer _buffer, VkDeviceSize offset) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + uint32_t size = SIMD8; /* FIXME */ + uint32_t right_mask = 0; /* FIXME */ + uint32_t thread_width_max = 0; /* FIXME */ + + /* FIXME: We can't compute thread_width_max for indirect, looks like it + * depends on DIMX. */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .InterfaceDescriptorOffset = 0, + .IndirectDataLength = 0, + .IndirectDataStartAddress = 0, + + .SIMDSize = size, + + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = thread_width_max, + + .RightExecutionMask = right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); } void anv_CmdSetEvent( diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 28818152ae0..cf7562ae496 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -718,11 +718,61 @@ VkResult anv_CreateGraphicsPipelineDerivative( } VkResult anv_CreateComputePipeline( - VkDevice device, + VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->base.destructor = anv_pipeline_destroy; + pipeline->device = device; + pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; + + result = anv_reloc_list_init(&pipeline->batch.relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = + (struct anv_shader *) pCreateInfo->cs.shader; + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + .ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */ + .StackSize = 0, + .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointerHigh = 0, + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, + .BypassGatewayControl = true, + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + *pPipeline = (VkPipeline) pipeline; + + return VK_SUCCESS; } VkResult anv_StorePipeline( diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 8bc5fd10880..cf1cf4c1268 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -653,8 +653,10 @@ struct anv_cmd_buffer { uint32_t current_pipeline; uint32_t vb_dirty; uint32_t dirty; + uint32_t compute_dirty; uint32_t descriptors_dirty; struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; struct anv_dynamic_ds_state * ds_state; @@ -692,6 +694,7 @@ struct anv_pipeline { struct brw_vs_prog_data vs_prog_data; struct brw_wm_prog_data wm_prog_data; struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; struct { uint32_t vs_start; @@ -705,6 +708,7 @@ struct anv_pipeline { struct anv_bo vs_scratch_bo; struct anv_bo ps_scratch_bo; struct anv_bo gs_scratch_bo; + struct anv_bo cs_scratch_bo; uint32_t active_stages; struct anv_state_stream program_stream; @@ -714,6 +718,7 @@ struct anv_pipeline { uint32_t ps_simd16; uint32_t gs_vec4; uint32_t gs_vertex_count; + uint32_t cs_simd; uint32_t vb_used; uint32_t binding_stride[MAX_VBS]; -- cgit v1.2.3 From fbc9fe3c92580208896d2799f117f23d477896f7 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 21:57:43 -0700 Subject: vk: Use compute pipeline layout when binding compute sets --- src/vulkan/device.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b27bd6d765b..0126b248233 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2761,12 +2761,17 @@ void anv_CmdBindDescriptorSets( const uint32_t* pDynamicOffsets) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_pipeline_layout *layout; struct anv_descriptor_set *set; struct anv_descriptor_set_layout *set_layout; assert(firstSet + setCount < MAX_SETS); + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) + layout = cmd_buffer->pipeline->layout; + else + layout = cmd_buffer->compute_pipeline->layout; + uint32_t dynamic_slot = 0; for (uint32_t i = 0; i < setCount; i++) { set = (struct anv_descriptor_set *) pDescriptorSets[i]; -- cgit v1.2.3 From 00494c6cb7ec0ffd3fe12c0b450744627564f8be Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 22:07:16 -0700 Subject: vk: Document how depth/stencil formats work in anv_image_create() This reverts commits e17ed04 * vk/image: Don't double-allocate stencil buffers 1ee2d1c * vk/image: Teach anv_image_choose_tile_mode about WMAJOR and instead adds a comment to describe the subtlety of how we create images for stencil only formats. --- src/vulkan/image.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index b8a15a1aa59..eaa0f24f732 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -73,16 +73,9 @@ anv_image_choose_tile_mode(const VkImageCreateInfo *vk_info, switch (vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: - if (unlikely(vk_info->format == VK_FORMAT_S8_UINT)) { - anv_abortf("requested linear stencil buffer"); - } return LINEAR; case VK_IMAGE_TILING_OPTIMAL: - if (unlikely(vk_info->format == VK_FORMAT_S8_UINT)) { - return WMAJOR; - } else { - return YMAJOR; - } + return YMAJOR; default: assert(!"bad VKImageTiling"); return LINEAR; @@ -139,6 +132,11 @@ VkResult anv_image_create( info = anv_format_for_vk_format(pCreateInfo->format); assert(info->cpp > 0 || info->has_stencil); + /* First allocate space for the color or depth buffer. info->cpp gives us + * the cpp of the color or depth in case of depth/stencil formats. Stencil + * only (VK_FORMAT_S8_UINT) has info->cpp == 0 and doesn't allocate + * anything here. + */ if (info->cpp > 0) { image->stride = ALIGN_I32(image->extent.width * info->cpp, tile_info->width); @@ -149,7 +147,13 @@ VkResult anv_image_create( image->stride = 0; } - if (info->has_stencil && pCreateInfo->format != VK_FORMAT_S8_UINT) { + /* Formats with a stencil buffer (either combined depth/stencil or + * VK_FORMAT_S8_UINT) have info->has_stencil == true. The stencil buffer is + * placed after the depth buffer and is a separate buffer from the GPU + * point of view, but as far as the API is concerned, depth and stencil are + * in the same image. + */ + if (info->has_stencil) { const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; image->stencil_offset = ALIGN_U32(image->size, w_info->surface_alignment); image->stencil_stride = ALIGN_I32(image->extent.width, w_info->width); -- cgit v1.2.3 From 2fdd17d259f2b6abb9a76df9be70c3a467e3fb65 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 12 Jun 2015 17:17:01 -0700 Subject: vk: Generate CS prog_data into the pipeline instance We were generating the prog_data into a local variable and never initializing the pipeline->cs_prog_data one. --- src/vulkan/compiler.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 0db7935f085..2a193e396e6 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -614,12 +614,13 @@ brw_codegen_cs_prog(struct brw_context *brw, const GLuint *program; void *mem_ctx = ralloc_context(NULL); GLuint program_size; - struct brw_cs_prog_data prog_data; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; assert (cs); - memset(&prog_data, 0, sizeof(prog_data)); + memset(prog_data, 0, sizeof(*prog_data)); + /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed @@ -629,13 +630,13 @@ brw_codegen_cs_prog(struct brw_context *brw, /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - prog_data.base.param = + prog_data->base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.pull_param = + prog_data->base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.nr_params = param_count; + prog_data->base.nr_params = param_count; - program = brw_cs_emit(brw, mem_ctx, key, &prog_data, + program = brw_cs_emit(brw, mem_ctx, key, prog_data, &cp->program, prog, &program_size); if (program == NULL) { ralloc_free(mem_ctx); -- cgit v1.2.3 From c103c4990c9a647891ed3ee1667c129ee3e16996 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 12 Jun 2015 17:17:43 -0700 Subject: vk: Set binding table layout for CS We weren't setting the binding table layout for the backend compiler. --- src/vulkan/compiler.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 2a193e396e6..5f5dacb5406 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -621,6 +621,7 @@ brw_codegen_cs_prog(struct brw_context *brw, memset(prog_data, 0, sizeof(*prog_data)); + set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed -- cgit v1.2.3 From fa8a07748d96525b208146065b5264821a9ef338 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 12 Jun 2015 17:21:01 -0700 Subject: vk: Compute CS exec mask and thread width max in pipeline We compute the right mask and thread width max parameters as part of pipeline creation and set them accordingly at vkCmdDispatch() and vkCmdDispatchIndirect() time. These parameters depend only on the local group size and the dispatch width of the program so we can figure this out at pipeline create time. --- src/vulkan/device.c | 42 ++++++++++-------------------------------- src/vulkan/pipeline.c | 12 ++++++++++++ src/vulkan/private.h | 3 +++ 3 files changed, 25 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0126b248233..e55e66fd74f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3390,31 +3390,20 @@ void anv_CmdDispatch( uint32_t z) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - uint32_t size = SIMD8; /* FIXME */ - uint32_t right_mask = 0; /* FIXME */ - uint32_t thread_width_max = 0; /* FIXME */ + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; anv_cmd_buffer_flush_compute_state(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - - .InterfaceDescriptorOffset = 0, - .IndirectDataLength = 0, - .IndirectDataStartAddress = 0, - - .SIMDSize = size, - + .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = thread_width_max, - - .ThreadGroupIDStartingX = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, .ThreadGroupIDXDimension = x, - .ThreadGroupIDStartingY = 0, .ThreadGroupIDYDimension = y, - .ThreadGroupIDStartingResumeZ = 0, .ThreadGroupIDZDimension = z, - .RightExecutionMask = right_mask, + .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); @@ -3430,6 +3419,8 @@ void anv_CmdDispatchIndirect( VkDeviceSize offset) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; struct anv_buffer *buffer = (struct anv_buffer *) _buffer; struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -3440,26 +3431,13 @@ void anv_CmdDispatchIndirect( anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - uint32_t size = SIMD8; /* FIXME */ - uint32_t right_mask = 0; /* FIXME */ - uint32_t thread_width_max = 0; /* FIXME */ - - /* FIXME: We can't compute thread_width_max for indirect, looks like it - * depends on DIMX. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, .IndirectParameterEnable = true, - .InterfaceDescriptorOffset = 0, - .IndirectDataLength = 0, - .IndirectDataStartAddress = 0, - - .SIMDSize = size, - + .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = thread_width_max, - - .RightExecutionMask = right_mask, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index cf7562ae496..aa24ad43004 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -770,6 +770,18 @@ VkResult anv_CreateComputePipeline( .URBEntryAllocationSize = 2, .CURBEAllocationSize = 0); + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + *pPipeline = (VkPipeline) pipeline; return VK_SUCCESS; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cf1cf4c1268..08dea1526e8 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -726,6 +726,9 @@ struct anv_pipeline { uint32_t state_sf[GEN8_3DSTATE_SF_length]; uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; }; struct anv_pipeline_create_info { -- cgit v1.2.3 From bf5a6156591797cd4783e73201298254b14914af Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 17 Jun 2015 16:25:38 -0700 Subject: composites composites composites --- src/glsl/nir/spirv_to_nir.c | 659 +++++++++++++++++++++++++++++++----- src/glsl/nir/spirv_to_nir_private.h | 21 +- 2 files changed, 593 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 979df2019e0..e84d7564300 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -28,24 +28,94 @@ #include "spirv_to_nir_private.h" #include "nir_vla.h" -nir_ssa_def * -vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) { - struct vtn_value *val = vtn_untyped_value(b, value_id); - switch (val->value_type) { - case vtn_value_type_constant: { - assert(glsl_type_is_vector_or_scalar(val->type)); - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components); + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); - for (unsigned i = 0; i < num_components; i++) - load->value.u[0] = val->constant->value.u[0]; + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = ralloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); - nir_builder_instr_insert(&b->nb, &load->instr); - return &load->def; + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; } + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->type); + case vtn_value_type_ssa: return val->ssa; default: @@ -451,6 +521,204 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, } } +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, + nir_deref *dest_deref_tail, struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src); + vtn_variable_store(b, val, dest); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -552,7 +820,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, assert(idx_val->value_type == vtn_value_type_ssa); deref_arr->deref_array_type = nir_deref_array_type_indirect; deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); } tail->child = &deref_arr->deref; break; @@ -578,12 +847,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); + vtn_variable_copy(b, src, dest); break; } @@ -596,61 +860,15 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, return; } - assert(glsl_type_is_vector_or_scalar(src_type)); struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref)); - load->num_components = glsl_get_vector_elements(src_type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, - val->name); - - nir_builder_instr_insert(&b->nb, &load->instr); - val->type = src_type; - - if (src->var->data.mode == nir_var_uniform && - glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->ssa = &load->dest.ssa; - } + val->ssa = vtn_variable_load(b, src); break; } case SpvOpStore: { nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type; - struct vtn_value *src_val = vtn_untyped_value(b, w[2]); - if (src_val->value_type == vtn_value_type_ssa) { - assert(glsl_type_is_vector_or_scalar(dest_type)); - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->src[0] = nir_src_for_ssa(src_val->ssa); - store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); - store->num_components = glsl_get_vector_elements(dest_type); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else { - assert(src_val->value_type == vtn_value_type_constant); - - nir_variable *const_tmp = rzalloc(b->shader, nir_variable); - const_tmp->type = dest_type; - const_tmp->name = "const_temp"; - const_tmp->data.mode = nir_var_local; - const_tmp->data.read_only = true; - exec_list_push_tail(&b->impl->locals, &const_tmp->node); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_var_create(copy, const_tmp); - - nir_builder_instr_insert(&b->nb, ©->instr); - } + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); break; } @@ -674,7 +892,7 @@ static nir_tex_src vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) { nir_tex_src src; - src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa); + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def); src.src_type = type; return src; } @@ -715,7 +933,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, /* All these types have the coordinate as their first real argument */ struct vtn_value *coord = vtn_value(b, w[4], vtn_value_type_ssa); coord_components = glsl_get_vector_elements(coord->type); - p->src = nir_src_for_ssa(coord->ssa); + p->src = nir_src_for_ssa(coord->ssa->def); p->src_type = nir_tex_src_coord; p++; break; @@ -786,7 +1004,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->sampler = sampler; nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - val->ssa = &instr->dest.ssa; + val->ssa->def = &instr->dest.ssa; + val->ssa->type = val->type; nir_builder_instr_insert(&b->nb, &instr->instr); } @@ -804,12 +1023,13 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa->type = val->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 3; nir_ssa_def *src[4]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 3]); + src[i] = vtn_ssa_value(b, w[i + 3])->def; /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. @@ -921,24 +1141,24 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; case SpvOpFwidth: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); return; case SpvOpFwidthFine: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); return; case SpvOpFwidthCoarse: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); return; case SpvOpVectorTimesScalar: /* The builder will take care of splatting for us. */ - val->ssa = nir_fmul(&b->nb, src[0], src[1]); + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); return; case SpvOpSRem: @@ -966,7 +1186,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); @@ -974,6 +1194,263 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + nir_alu_src alu_src; + alu_src.src = nir_src_for_ssa(src); + alu_src.swizzle[0] = index; + return nir_fmov_alu(&b->nb, alu_src, 1); +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + + return vec; +} + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = ralloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = ralloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(val->type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + val->ssa = ralloc(b, struct vtn_ssa_value); + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(val->type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } + + val->ssa->type = val->type; +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1341,6 +1818,16 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_matrix_alu(b, opcode, w, count); break; + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + default: unreachable("Unhandled opcode"); } @@ -1425,7 +1912,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, w[3], vtn_value_type_block)->block; nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); if (then_block == break_block) { @@ -1563,6 +2050,8 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index d2b364bdfeb..a4760620d46 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -65,6 +65,15 @@ struct vtn_function { typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, const uint32_t *, unsigned); +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + const struct glsl_type *type; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -77,7 +86,7 @@ struct vtn_value { nir_deref_var *deref; struct vtn_function *func; struct vtn_block *block; - nir_ssa_def *ssa; + struct vtn_ssa_value *ssa; vtn_instruction_handler ext_handler; }; }; @@ -96,6 +105,14 @@ struct vtn_builder { nir_function_impl *impl; struct vtn_block *block; + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + unsigned value_id_bound; struct vtn_value *values; @@ -134,7 +151,7 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, return val; } -nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, -- cgit v1.2.3 From aedd3c957940f9fd062495494ff6991a8eb4b548 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 13 Jun 2015 23:12:58 -0700 Subject: vk: Add missing gen7 RENDER_SURFACE_STATE struct --- src/vulkan/gen75_pack.h | 18 +++++ src/vulkan/gen7_pack.h | 173 ++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/gen8_pack.h | 18 +++++ 3 files changed, 209 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 3796d89967d..1653cb11ae3 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -7472,6 +7472,24 @@ GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, #define GEN75_PALETTE_ENTRY_length 0x00000001 +#define GEN75_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN75_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 5, 31) | + 0; + +} + #define GEN75_RENDER_SURFACE_STATE_length 0x00000008 struct GEN75_RENDER_SURFACE_STATE { diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 09a477bc663..75f193891d6 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -6199,6 +6199,179 @@ GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, #define GEN7_PALETTE_ENTRY_length 0x00000001 +#define GEN7_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN7_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN7_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 5, 31) | + 0; + +} + +#define GEN7_RENDER_SURFACE_STATE_length 0x00000008 + +struct GEN7_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t SurfaceArray; + uint32_t SurfaceFormat; + uint32_t SurfaceVerticalAlignment; +#define HALIGN_4 0 +#define HALIGN_8 1 + uint32_t SurfaceHorizontalAlignment; + uint32_t TiledSurface; +#define TILEWALK_XMAJOR 0 +#define TILEWALK_YMAJOR 1 + uint32_t TileWalk; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; +#define ARYSPC_FULL 0 +#define ARYSPC_LOD0 1 + uint32_t SurfaceArraySpacing; + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnables; + uint32_t SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define RTROTATE_0DEG 0 +#define RTROTATE_90DEG 1 +#define RTROTATE_270DEG 3 + uint32_t RenderTargetRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSFMT_MSS 0 +#define MSFMT_DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t MinimumArrayElement0; + uint32_t XOffset; + uint32_t YOffset; + uint32_t SurfaceObjectControlState; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t MCSBaseAddress; + uint32_t MCSSurfacePitch; + uint32_t AppendCounterAddress; + uint32_t AppendCounterEnable; + uint32_t MCSEnable; + uint32_t ReservedMBZ; + uint32_t XOffsetforUVPlane; + uint32_t YOffsetforUVPlane; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t RedClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t GreenClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t BlueClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t AlphaClearColor; + uint32_t ResourceMinLOD; +}; + +static inline void +GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | + __gen_field(values->TiledSurface, 14, 14) | + __gen_field(values->TileWalk, 13, 13) | + __gen_field(values->VerticalLineStride, 12, 12) | + __gen_field(values->VerticalLineStrideOffset, 11, 11) | + __gen_field(values->SurfaceArraySpacing, 10, 10) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnables, 0, 5) | + 0; + + dw[1] = + __gen_field(values->SurfaceBaseAddress, 0, 31) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + __gen_field(values->MinimumArrayElement, 0, 26) | + 0; + + dw[5] = + __gen_field(values->XOffset, 25, 31) | + __gen_field(values->YOffset, 20, 23) | + __gen_field(values->SurfaceObjectControlState, 16, 19) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->MCSBaseAddress, 12, 31) | + __gen_field(values->MCSSurfacePitch, 3, 11) | + __gen_field(values->AppendCounterAddress, 6, 31) | + __gen_field(values->AppendCounterEnable, 1, 1) | + __gen_field(values->MCSEnable, 0, 0) | + __gen_field(values->ReservedMBZ, 30, 31) | + __gen_field(values->XOffsetforUVPlane, 16, 29) | + __gen_field(values->YOffsetforUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ResourceMinLOD, 0, 11) | + 0; + +} + #define GEN7_SAMPLER_BORDER_COLOR_STATE_length 0x00000004 struct GEN7_SAMPLER_BORDER_COLOR_STATE { diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 117e64ace7b..3b9e6235975 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -8202,6 +8202,24 @@ GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, #define GEN8_PALETTE_ENTRY_length 0x00000001 +#define GEN8_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN8_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 6, 31) | + 0; + +} + #define GEN8_RENDER_SURFACE_STATE_length 0x00000010 struct GEN8_RENDER_SURFACE_STATE { -- cgit v1.2.3 From de4c31a085507719c9985035c885d72bd52cf3ac Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:33:08 -0700 Subject: fix glsl450 for composites --- src/glsl/nir/spirv_glsl450_to_nir.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 3b9d0940aad..100fde9ce7f 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -140,12 +140,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = val->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 5; nir_ssa_def *src[3]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5]); + src[i] = vtn_ssa_value(b, w[i + 5])->def; nir_op op; switch (entrypoint) { @@ -158,16 +160,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Ceil: op = nir_op_fceil; break; case Fract: op = nir_op_ffract; break; case Radians: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); return; case Degrees: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); return; case Sin: op = nir_op_fsin; break; case Cos: op = nir_op_fcos; break; case Tan: - val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), - nir_fcos(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); return; case Pow: op = nir_op_fpow; break; case Exp2: op = nir_op_fexp2; break; @@ -180,7 +182,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Max: op = nir_op_fmax; break; case Mix: op = nir_op_flrp; break; case Step: - val->ssa = nir_sge(&b->nb, src[1], src[0]); + val->ssa->def = nir_sge(&b->nb, src[1], src[0]); return; case FloatBitsToInt: @@ -188,7 +190,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case IntBitsToFloat: case UintBitsToFloat: /* Probably going to be removed from the final version of the spec. */ - val->ssa = src[0]; + val->ssa->def = src[0]; return; case Fma: op = nir_op_ffma; break; @@ -207,13 +209,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; case Length: - val->ssa = build_length(&b->nb, src[0]); + val->ssa->def = build_length(&b->nb, src[0]); return; case Distance: - val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); return; case Normalize: - val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); return; case UaddCarry: op = nir_op_uadd_carry; break; @@ -256,7 +258,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); -- cgit v1.2.3 From 0e86ab7c0a438d86c2177a0e02847798ef81e343 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:34:12 -0700 Subject: nir/types: add a helper to transpose a matrix type --- src/glsl/nir/nir_types.cpp | 7 +++++++ src/glsl/nir/nir_types.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 35421506545..d44d48095da 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -270,3 +270,10 @@ glsl_function_type(const glsl_type *return_type, { return glsl_type::get_function_instance(return_type, params, num_params); } + +const glsl_type * +glsl_transposed_type(const struct glsl_type *type) +{ + return glsl_type::get_instance(type->base_type, type->matrix_columns, + type->vector_elements); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index ceb131c9f47..60e1d9d96fc 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -103,6 +103,8 @@ const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 22854a60efc3ee2442b9150e2bf328e7441d6794 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:34:55 -0700 Subject: nir/builder: add a nir_fdot() convenience function --- src/glsl/nir/nir_builder.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 9223e838095..cffe38abf0e 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -240,6 +240,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. -- cgit v1.2.3 From d0fc04aacf12ad834f9542289f3c915d5ec9b945 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 18:51:51 -0700 Subject: nir/types: be less strict about constructing matrix types --- src/glsl/nir/nir_types.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index d44d48095da..3c00bdb3c18 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -240,7 +240,7 @@ glsl_vector_type(enum glsl_base_type base_type, unsigned components) const glsl_type * glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) { - assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4); + assert(rows >= 1 && rows <= 4 && columns >= 1 && columns <= 4); return glsl_type::get_instance(base_type, rows, columns); } -- cgit v1.2.3 From 841aab6f50c88d54d1a3a68a311621f948d09126 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 18:52:44 -0700 Subject: matrices matrices matrices --- src/glsl/nir/spirv_to_nir.c | 291 ++++++++++++++++++++++++++++++++---- src/glsl/nir/spirv_to_nir_private.h | 5 + 2 files changed, 268 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e84d7564300..a0a75040771 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -37,7 +37,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, if (entry) return entry->data; - struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = type; switch (glsl_get_base_type(type)) { @@ -63,7 +63,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); for (unsigned i = 0; i < columns; i++) { - struct vtn_ssa_value *col_val = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); col_val->type = glsl_get_column_type(val->type); nir_load_const_instr *load = nir_load_const_instr_create(b->shader, rows); @@ -516,6 +516,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, case SpvDecorationFPFastMathMode: case SpvDecorationLinkageAttributes: case SpvDecorationSpecId: + break; default: unreachable("Unhandled variable decoration"); } @@ -525,7 +526,7 @@ static struct vtn_ssa_value * _vtn_variable_load(struct vtn_builder *b, nir_deref_var *src_deref, nir_deref *src_deref_tail) { - struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = src_deref_tail->type; /* The deref tail may contain a deref to select a component of a vector (in @@ -1010,11 +1011,264 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + static void vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Matrix math not handled"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } } static void @@ -1197,29 +1451,10 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, static nir_ssa_def * vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) { - nir_alu_src alu_src; - alu_src.src = nir_src_for_ssa(src); - alu_src.swizzle[0] = index; - return nir_fmov_alu(&b->nb, alu_src, 1); + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); } -static nir_alu_instr * -create_vec(void *mem_ctx, unsigned num_components) -{ - nir_op op; - switch (num_components) { - case 1: op = nir_op_fmov; break; - case 2: op = nir_op_vec2; break; - case 3: op = nir_op_vec3; break; - case 4: op = nir_op_vec4; break; - default: unreachable("bad vector size"); - } - - nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); - - return vec; -} static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, @@ -1320,7 +1555,7 @@ vtn_vector_construct(struct vtn_builder *b, unsigned num_components, static struct vtn_ssa_value * vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) { - struct vtn_ssa_value *dest = ralloc(mem_ctx, struct vtn_ssa_value); + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); dest->type = src->type; if (glsl_type_is_vector_or_scalar(src->type)) { @@ -1376,7 +1611,7 @@ vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, * vector to extract. */ - struct vtn_ssa_value *ret = ralloc(b, struct vtn_ssa_value); + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); ret->def = vtn_vector_extract(b, cur->def, indices[i]); return ret; @@ -1413,7 +1648,7 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, break; case SpvOpCompositeConstruct: { - val->ssa = ralloc(b, struct vtn_ssa_value); + val->ssa = rzalloc(b, struct vtn_ssa_value); unsigned elems = count - 3; if (glsl_type_is_vector_or_scalar(val->type)) { nir_ssa_def *srcs[4]; diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index a4760620d46..937c45b08c2 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -71,6 +71,11 @@ struct vtn_ssa_value { struct vtn_ssa_value **elems; }; + /* For matrices, a transposed version of the value, or NULL if it hasn't + * been computed + */ + struct vtn_ssa_value *transposed; + const struct glsl_type *type; }; -- cgit v1.2.3 From aba75d054660a5c1636534128a93b7794625ef83 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 18 Jun 2015 11:12:36 -0700 Subject: vk/headers: Make General State offsets relocations --- src/vulkan/gen75_pack.h | 42 ++++++++++++++++++++++++++++++------------ src/vulkan/gen7_pack.h | 42 ++++++++++++++++++++++++++++++------------ src/vulkan/gen8_pack.h | 49 +++++++++++++++++++++++++++++++++++-------------- 3 files changed, 95 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 1653cb11ae3..c4e71686c2a 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -1856,7 +1856,7 @@ struct GEN75_3DSTATE_DS { uint32_t AccessesUAV; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -1898,11 +1898,14 @@ GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); + dw[4] = __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | __gen_field(values->PatchURBEntryReadLength, 11, 17) | @@ -2258,7 +2261,7 @@ struct GEN75_3DSTATE_GS { uint32_t GSaccessesUAV; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -2320,11 +2323,14 @@ GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); + dw[4] = __gen_field(values->OutputVertexSize, 23, 28) | __gen_field(values->OutputTopology, 17, 22) | @@ -2440,7 +2446,7 @@ struct GEN75_3DSTATE_HS { uint32_t StatisticsEnable; uint32_t InstanceCount; uint32_t KernelStartPointer; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -2488,11 +2494,14 @@ GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, __gen_offset(values->KernelStartPointer, 6, 31) | 0; - dw[4] = + uint32_t dw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[4] = + __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); + dw[5] = __gen_field(values->SingleProgramFlow, 27, 27) | __gen_field(values->VectorMaskEnable, 26, 26) | @@ -2860,7 +2869,7 @@ struct GEN75_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t SampleMask; @@ -2917,11 +2926,14 @@ GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); + dw[4] = __gen_field(values->MaximumNumberofThreads, 23, 31) | __gen_field(values->SampleMask, 12, 19) | @@ -4729,7 +4741,7 @@ struct GEN75_3DSTATE_VS { uint32_t IllegalOpcodeExceptionEnable; uint32_t VSaccessesUAV; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBaseOffset; + __gen_address_type ScratchSpaceBaseOffset; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterforURBData; uint32_t VertexURBEntryReadLength; @@ -4770,11 +4782,14 @@ GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBaseOffset, dw3); + dw[4] = __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | __gen_field(values->VertexURBEntryReadLength, 11, 16) | @@ -5517,7 +5532,7 @@ struct GEN75_MEDIA_VFE_STATE { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t StackSize; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; @@ -5571,12 +5586,15 @@ GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 15) | 0; - dw[1] = + uint32_t dw1 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->StackSize, 4, 7) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[1] = + __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, dw1); + dw[2] = __gen_field(values->MaximumNumberofThreads, 16, 31) | __gen_field(values->NumberofURBEntries, 8, 15) | diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 75f193891d6..88780322540 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -1521,7 +1521,7 @@ struct GEN7_3DSTATE_DS { uint32_t FloatingPointMode; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -1561,11 +1561,14 @@ GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); + dw[4] = __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | __gen_field(values->PatchURBEntryReadLength, 11, 17) | @@ -1618,7 +1621,7 @@ struct GEN7_3DSTATE_GS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -1677,11 +1680,14 @@ GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); + dw[4] = __gen_field(values->OutputVertexSize, 23, 28) | __gen_field(values->OutputTopology, 17, 22) | @@ -1794,7 +1800,7 @@ struct GEN7_3DSTATE_HS { uint32_t StatisticsEnable; uint32_t InstanceCount; uint32_t KernelStartPointer; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -1840,11 +1846,14 @@ GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, __gen_offset(values->KernelStartPointer, 6, 31) | 0; - dw[4] = + uint32_t dw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[4] = + __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); + dw[5] = __gen_field(values->SingleProgramFlow, 27, 27) | __gen_field(values->VectorMaskEnable, 26, 26) | @@ -2208,7 +2217,7 @@ struct GEN7_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t PushConstantEnable; @@ -2262,11 +2271,14 @@ GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); + dw[4] = __gen_field(values->MaximumNumberofThreads, 24, 31) | __gen_field(values->PushConstantEnable, 11, 11) | @@ -3915,7 +3927,7 @@ struct GEN7_3DSTATE_VS { uint32_t FloatingPointMode; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - uint32_t ScratchSpaceBaseOffset; + __gen_address_type ScratchSpaceBaseOffset; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterforURBData; uint32_t VertexURBEntryReadLength; @@ -3954,11 +3966,14 @@ GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - dw[3] = + uint32_t dw3 = __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[3] = + __gen_combine_address(data, &dw[3], values->ScratchSpaceBaseOffset, dw3); + dw[4] = __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | __gen_field(values->VertexURBEntryReadLength, 11, 16) | @@ -4684,7 +4699,7 @@ struct GEN7_MEDIA_VFE_STATE { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t NumberofURBEntries; @@ -4739,11 +4754,14 @@ GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 15) | 0; - dw[1] = + uint32_t dw1 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + dw[1] = + __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, dw1); + dw[2] = __gen_field(values->MaximumNumberofThreads, 16, 31) | __gen_field(values->NumberofURBEntries, 8, 15) | diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 3b9e6235975..04d4e6126f9 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -166,7 +166,7 @@ struct GEN8_3DSTATE_VS { uint32_t IllegalOpcodeExceptionEnable; uint32_t AccessesUAV; uint32_t SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; @@ -215,11 +215,14 @@ GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint64_t qw4 = + uint32_t dw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); + dw[4] = qw4; dw[5] = qw4 >> 32; @@ -2244,7 +2247,7 @@ struct GEN8_3DSTATE_DS { uint32_t AccessesUAV; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -2294,11 +2297,14 @@ GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint64_t qw4 = + uint32_t dw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); + dw[4] = qw4; dw[5] = qw4 >> 32; @@ -2677,7 +2683,7 @@ struct GEN8_3DSTATE_GS { uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; uint32_t ExpectedVertexCount; - uint64_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -2749,11 +2755,14 @@ GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->ExpectedVertexCount, 0, 5) | 0; - uint64_t qw4 = + uint32_t dw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); + dw[4] = qw4; dw[5] = qw4 >> 32; @@ -2888,7 +2897,7 @@ struct GEN8_3DSTATE_HS { uint32_t MaximumNumberofThreads; uint32_t InstanceCount; uint64_t KernelStartPointer; - uint64_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -2938,11 +2947,14 @@ GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, dw[3] = qw3; dw[4] = qw3 >> 32; - uint64_t qw5 = + uint32_t dw5 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + uint64_t qw5 = + __gen_combine_address(data, &dw[5], values->ScratchSpaceBasePointer, dw5); + dw[5] = qw5; dw[6] = qw5 >> 32; @@ -3280,7 +3292,7 @@ struct GEN8_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreadsPerPSD; uint32_t PushConstantEnable; @@ -3335,11 +3347,14 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint64_t qw4 = + uint32_t dw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); + dw[4] = qw4; dw[5] = qw4 >> 32; @@ -6404,10 +6419,10 @@ struct GEN8_MEDIA_VFE_STATE { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; + __gen_address_type ScratchSpaceBasePointer; uint32_t StackSize; uint32_t PerThreadScratchSpace; - uint32_t ScratchSpaceBasePointerHigh; + __gen_address_type ScratchSpaceBasePointerHigh; uint32_t MaximumNumberofThreads; uint32_t NumberofURBEntries; #define Maintainingtheexistingtimestampstate 0 @@ -6458,16 +6473,22 @@ GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 15) | 0; - dw[1] = + uint32_t dw1 = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->StackSize, 4, 7) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[2] = + dw[1] = + __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, dw1); + + uint32_t dw2 = __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | 0; + dw[2] = + __gen_combine_address(data, &dw[2], values->ScratchSpaceBasePointerHigh, dw2); + dw[3] = __gen_field(values->MaximumNumberofThreads, 16, 31) | __gen_field(values->NumberofURBEntries, 8, 15) | -- cgit v1.2.3 From b20794cfa81191993e702b333cef4efe908f5841 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 18 Jun 2015 15:50:47 -0700 Subject: vk/allocator: Get rid of non-memfd path We can just use modern valgrind now. --- src/vulkan/allocator.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 8752edac91d..6a7e845ef37 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -263,12 +263,6 @@ anv_block_pool_init(struct anv_block_pool *pool, anv_block_pool_grow(pool); } -/* The memfd path lets us create a map for an fd and lets us grow and remap - * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS - * path we can take for valgrind debugging. */ - -#define USE_MEMFD 1 - void anv_block_pool_finish(struct anv_block_pool *pool) { @@ -283,9 +277,7 @@ anv_block_pool_finish(struct anv_block_pool *pool) anv_vector_finish(&pool->mmap_cleanups); -#if USE_MEMFD close(pool->fd); -#endif } static int @@ -307,7 +299,6 @@ anv_block_pool_grow(struct anv_block_pool *pool) return -1; *cleanup = ANV_MMAP_CLEANUP_INIT; -#if USE_MEMFD if (pool->size == 0) pool->fd = memfd_create("block pool", MFD_CLOEXEC); @@ -335,17 +326,6 @@ anv_block_pool_grow(struct anv_block_pool *pool) } if (map == MAP_FAILED) return -1; -#else - /* The MAP_ANONYMOUS fallback can't grow without races, so just bail here - * if we're trying to grow the pool. */ - assert(pool->size == 0); - map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); - if (map == MAP_FAILED) - return -1; - cleanup->map = map; - cleanup->size = size; -#endif gem_handle = anv_gem_userptr(pool->device, map, size); if (gem_handle == 0) -- cgit v1.2.3 From 9e59003fb198ecf23a386c21a0c8a7696d69cc95 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 18 Jun 2015 16:17:58 -0700 Subject: vk: Undo relocs for scratch bos --- src/vulkan/gen75_pack.h | 42 ++++++++++++------------------------------ src/vulkan/gen7_pack.h | 42 ++++++++++++------------------------------ src/vulkan/gen8_pack.h | 49 ++++++++++++++----------------------------------- 3 files changed, 38 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index c4e71686c2a..1653cb11ae3 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -1856,7 +1856,7 @@ struct GEN75_3DSTATE_DS { uint32_t AccessesUAV; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -1898,14 +1898,11 @@ GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); - dw[4] = __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | __gen_field(values->PatchURBEntryReadLength, 11, 17) | @@ -2261,7 +2258,7 @@ struct GEN75_3DSTATE_GS { uint32_t GSaccessesUAV; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -2323,14 +2320,11 @@ GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); - dw[4] = __gen_field(values->OutputVertexSize, 23, 28) | __gen_field(values->OutputTopology, 17, 22) | @@ -2446,7 +2440,7 @@ struct GEN75_3DSTATE_HS { uint32_t StatisticsEnable; uint32_t InstanceCount; uint32_t KernelStartPointer; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -2494,14 +2488,11 @@ GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, __gen_offset(values->KernelStartPointer, 6, 31) | 0; - uint32_t dw4 = + dw[4] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[4] = - __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); - dw[5] = __gen_field(values->SingleProgramFlow, 27, 27) | __gen_field(values->VectorMaskEnable, 26, 26) | @@ -2869,7 +2860,7 @@ struct GEN75_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t SampleMask; @@ -2926,14 +2917,11 @@ GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); - dw[4] = __gen_field(values->MaximumNumberofThreads, 23, 31) | __gen_field(values->SampleMask, 12, 19) | @@ -4741,7 +4729,7 @@ struct GEN75_3DSTATE_VS { uint32_t IllegalOpcodeExceptionEnable; uint32_t VSaccessesUAV; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBaseOffset; + uint32_t ScratchSpaceBaseOffset; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterforURBData; uint32_t VertexURBEntryReadLength; @@ -4782,14 +4770,11 @@ GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBaseOffset, dw3); - dw[4] = __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | __gen_field(values->VertexURBEntryReadLength, 11, 16) | @@ -5532,7 +5517,7 @@ struct GEN75_MEDIA_VFE_STATE { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t StackSize; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; @@ -5586,15 +5571,12 @@ GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 15) | 0; - uint32_t dw1 = + dw[1] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->StackSize, 4, 7) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[1] = - __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, dw1); - dw[2] = __gen_field(values->MaximumNumberofThreads, 16, 31) | __gen_field(values->NumberofURBEntries, 8, 15) | diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 88780322540..75f193891d6 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -1521,7 +1521,7 @@ struct GEN7_3DSTATE_DS { uint32_t FloatingPointMode; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -1561,14 +1561,11 @@ GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); - dw[4] = __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | __gen_field(values->PatchURBEntryReadLength, 11, 17) | @@ -1621,7 +1618,7 @@ struct GEN7_3DSTATE_GS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -1680,14 +1677,11 @@ GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); - dw[4] = __gen_field(values->OutputVertexSize, 23, 28) | __gen_field(values->OutputTopology, 17, 22) | @@ -1800,7 +1794,7 @@ struct GEN7_3DSTATE_HS { uint32_t StatisticsEnable; uint32_t InstanceCount; uint32_t KernelStartPointer; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -1846,14 +1840,11 @@ GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, __gen_offset(values->KernelStartPointer, 6, 31) | 0; - uint32_t dw4 = + dw[4] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[4] = - __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); - dw[5] = __gen_field(values->SingleProgramFlow, 27, 27) | __gen_field(values->VectorMaskEnable, 26, 26) | @@ -2217,7 +2208,7 @@ struct GEN7_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t PushConstantEnable; @@ -2271,14 +2262,11 @@ GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBasePointer, dw3); - dw[4] = __gen_field(values->MaximumNumberofThreads, 24, 31) | __gen_field(values->PushConstantEnable, 11, 11) | @@ -3927,7 +3915,7 @@ struct GEN7_3DSTATE_VS { uint32_t FloatingPointMode; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBaseOffset; + uint32_t ScratchSpaceBaseOffset; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterforURBData; uint32_t VertexURBEntryReadLength; @@ -3966,14 +3954,11 @@ GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw3 = + dw[3] = __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[3] = - __gen_combine_address(data, &dw[3], values->ScratchSpaceBaseOffset, dw3); - dw[4] = __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | __gen_field(values->VertexURBEntryReadLength, 11, 16) | @@ -4699,7 +4684,7 @@ struct GEN7_MEDIA_VFE_STATE { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t NumberofURBEntries; @@ -4754,14 +4739,11 @@ GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 15) | 0; - uint32_t dw1 = + dw[1] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[1] = - __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, dw1); - dw[2] = __gen_field(values->MaximumNumberofThreads, 16, 31) | __gen_field(values->NumberofURBEntries, 8, 15) | diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 04d4e6126f9..3b9e6235975 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -166,7 +166,7 @@ struct GEN8_3DSTATE_VS { uint32_t IllegalOpcodeExceptionEnable; uint32_t AccessesUAV; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; @@ -215,14 +215,11 @@ GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw4 = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); - dw[4] = qw4; dw[5] = qw4 >> 32; @@ -2247,7 +2244,7 @@ struct GEN8_3DSTATE_DS { uint32_t AccessesUAV; uint32_t IllegalOpcodeExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; @@ -2297,14 +2294,11 @@ GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw4 = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); - dw[4] = qw4; dw[5] = qw4 >> 32; @@ -2683,7 +2677,7 @@ struct GEN8_3DSTATE_GS { uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; uint32_t ExpectedVertexCount; - __gen_address_type ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; @@ -2755,14 +2749,11 @@ GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->ExpectedVertexCount, 0, 5) | 0; - uint32_t dw4 = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); - dw[4] = qw4; dw[5] = qw4 >> 32; @@ -2897,7 +2888,7 @@ struct GEN8_3DSTATE_HS { uint32_t MaximumNumberofThreads; uint32_t InstanceCount; uint64_t KernelStartPointer; - __gen_address_type ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t SingleProgramFlow; #define Dmask 0 @@ -2947,14 +2938,11 @@ GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, dw[3] = qw3; dw[4] = qw3 >> 32; - uint32_t dw5 = + uint64_t qw5 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - uint64_t qw5 = - __gen_combine_address(data, &dw[5], values->ScratchSpaceBasePointer, dw5); - dw[5] = qw5; dw[6] = qw5 >> 32; @@ -3292,7 +3280,7 @@ struct GEN8_3DSTATE_PS { uint32_t IllegalOpcodeExceptionEnable; uint32_t MaskStackExceptionEnable; uint32_t SoftwareExceptionEnable; - __gen_address_type ScratchSpaceBasePointer; + uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreadsPerPSD; uint32_t PushConstantEnable; @@ -3347,14 +3335,11 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->SoftwareExceptionEnable, 7, 7) | 0; - uint32_t dw4 = + uint64_t qw4 = __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, dw4); - dw[4] = qw4; dw[5] = qw4 >> 32; @@ -6419,10 +6404,10 @@ struct GEN8_MEDIA_VFE_STATE { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - __gen_address_type ScratchSpaceBasePointer; + uint32_t ScratchSpaceBasePointer; uint32_t StackSize; uint32_t PerThreadScratchSpace; - __gen_address_type ScratchSpaceBasePointerHigh; + uint32_t ScratchSpaceBasePointerHigh; uint32_t MaximumNumberofThreads; uint32_t NumberofURBEntries; #define Maintainingtheexistingtimestampstate 0 @@ -6473,22 +6458,16 @@ GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 15) | 0; - uint32_t dw1 = + dw[1] = __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | __gen_field(values->StackSize, 4, 7) | __gen_field(values->PerThreadScratchSpace, 0, 3) | 0; - dw[1] = - __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, dw1); - - uint32_t dw2 = + dw[2] = __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | 0; - dw[2] = - __gen_combine_address(data, &dw[2], values->ScratchSpaceBasePointerHigh, dw2); - dw[3] = __gen_field(values->MaximumNumberofThreads, 16, 31) | __gen_field(values->NumberofURBEntries, 8, 15) | -- cgit v1.2.3 From 9b9f973ca6d3cc1ec5be27857def00a83c032464 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 19 Jun 2015 15:41:30 -0700 Subject: vk: Implement scratch buffers to make spilling work --- src/vulkan/compiler.cpp | 67 ++++++++++++++++++++++++++++--------------------- src/vulkan/device.c | 19 ++++++++++++-- src/vulkan/pipeline.c | 23 ++++++++--------- src/vulkan/private.h | 10 ++++---- 4 files changed, 72 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 5f5dacb5406..19a403aa1c1 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -248,13 +248,6 @@ really_do_vs_prog(struct brw_context *brw, ralloc_free(mem_ctx); - if (stage_prog_data->total_scratch > 0) - if (!anv_bo_init_new(&pipeline->vs_scratch_bo, - pipeline->device, - stage_prog_data->total_scratch)) - return false; - - return true; } @@ -535,12 +528,6 @@ really_do_wm_prog(struct brw_context *brw, ralloc_free(mem_ctx); - if (prog_data->base.total_scratch > 0) - if (!anv_bo_init_new(&pipeline->ps_scratch_bo, - pipeline->device, - prog_data->base.total_scratch)) - return false; - return true; } @@ -594,13 +581,6 @@ really_do_gs_prog(struct brw_context *brw, ralloc_free(output.mem_ctx); - if (output.prog_data.base.base.total_scratch) { - if (!anv_bo_init_new(&pipeline->gs_scratch_bo, - pipeline->device, - output.prog_data.base.base.total_scratch)) - return false; - } - return true; } @@ -684,6 +664,7 @@ fail_on_compile_error(int status, const char *msg) } struct anv_compiler { + struct anv_device *device; struct intel_screen *screen; struct brw_context *brw; struct gl_pipeline_object pipeline; @@ -710,6 +691,8 @@ anv_compiler_create(struct anv_device *device) if (compiler->brw == NULL) goto fail; + compiler->device = device; + compiler->brw->optionCache.info = NULL; compiler->brw->bufmgr = NULL; compiler->brw->gen = devinfo->gen; @@ -967,6 +950,28 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, unreachable("SPIR-V is not supported yet!"); } +static void +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + ALIGN_U32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) { @@ -978,6 +983,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); brw->use_rep_send = pipeline->use_repclear; brw->no_simd8 = pipeline->use_repclear; @@ -1024,6 +1030,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) bool success; pipeline->active_stages = 0; + pipeline->total_scratch = 0; if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { struct brw_vs_prog_key vs_key; @@ -1035,8 +1042,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); fail_if(!success, "do_wm_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; - pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;; + add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &pipeline->vs_prog_data.base.base); } else { memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); pipeline->vs_simd8 = NO_KERNEL; @@ -1053,8 +1060,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); fail_if(!success, "do_gs_prog failed\n"); - pipeline->active_stages |= VK_SHADER_STAGE_GEOMETRY_BIT; - pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base; + add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &pipeline->gs_prog_data.base.base); } else { pipeline->gs_vec4 = NO_KERNEL; } @@ -1069,8 +1076,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); fail_if(!success, "do_wm_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; - pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; + add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &pipeline->wm_prog_data.base); } if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { @@ -1083,12 +1090,16 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); fail_if(!success, "brw_codegen_cs_prog failed\n"); - pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base; - pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &pipeline->cs_prog_data.base); } brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + struct anv_device *device = compiler->device; + while (device->scratch_block_pool.bo.size < pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + gen7_compute_urb_partition(pipeline); return 0; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e55e66fd74f..c9e66743def 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -404,6 +404,8 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + device->info = *physicalDevice->info; device->compiler = anv_compiler_create(device); @@ -2387,9 +2389,14 @@ static void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { NULL, 0 }, + .GeneralStateBaseAddress = { scratch_bo, 0 }, .GeneralStateMemoryObjectControlState = GEN8_MOCS, .GeneralStateBaseAddressModifyEnable = true, .GeneralStateBufferSize = 0xfffff, @@ -3213,8 +3220,16 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } } - if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } if (cmd_buffer->descriptors_dirty) flush_descriptor_sets(cmd_buffer); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index aa24ad43004..8be47c91205 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -573,8 +573,8 @@ anv_pipeline_create( .BindingTableEntryCount = 0, .ExpectedVertexCount = pipeline->gs_vertex_count, - .PerThreadScratchSpace = 0, - .ScratchSpaceBasePointer = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, .OutputTopology = gs_prog_data->output_topology, @@ -628,11 +628,8 @@ anv_pipeline_create( .AccessesUAV = false, .SoftwareExceptionEnable = false, - /* FIXME: pointer needs to be assigned outside as it aliases - * PerThreadScratchSpace. - */ - .ScratchSpaceBasePointer = 0, - .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), .DispatchGRFStartRegisterForURBData = vue_prog_data->base.dispatch_grf_start_reg, @@ -676,8 +673,8 @@ anv_pipeline_create( .VectorMaskEnable = true, .SamplerCount = 1, - .ScratchSpaceBasePointer = 0, - .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), .MaximumNumberofThreadsPerPSD = 64 - 2, .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? @@ -757,11 +754,13 @@ VkResult anv_CreateComputePipeline( anv_compiler_run(device->compiler, pipeline); + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */ - .StackSize = 0, - .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, .MaximumNumberofThreads = device->info.max_cs_threads - 1, .NumberofURBEntries = 2, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 08dea1526e8..abc0cfb8ec2 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -384,6 +384,8 @@ struct anv_device { struct anv_queue queue; + struct anv_block_pool scratch_block_pool; + struct anv_compiler * compiler; struct anv_aub_writer * aub_writer; pthread_mutex_t mutex; @@ -655,6 +657,7 @@ struct anv_cmd_buffer { uint32_t dirty; uint32_t compute_dirty; uint32_t descriptors_dirty; + uint32_t scratch_size; struct anv_pipeline * pipeline; struct anv_pipeline * compute_pipeline; struct anv_framebuffer * framebuffer; @@ -696,6 +699,8 @@ struct anv_pipeline { struct brw_gs_prog_data gs_prog_data; struct brw_cs_prog_data cs_prog_data; struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; + uint32_t scratch_start[VK_NUM_SHADER_STAGE]; + uint32_t total_scratch; struct { uint32_t vs_start; uint32_t vs_size; @@ -705,11 +710,6 @@ struct anv_pipeline { uint32_t nr_gs_entries; } urb; - struct anv_bo vs_scratch_bo; - struct anv_bo ps_scratch_bo; - struct anv_bo gs_scratch_bo; - struct anv_bo cs_scratch_bo; - uint32_t active_stages; struct anv_state_stream program_stream; struct anv_state blend_state; -- cgit v1.2.3 From e9c21d0ca01eaada2d61bd73e97f59cd2835a0b0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 22 Jun 2015 11:59:55 -0700 Subject: unbreak things --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a0a75040771..e0f280ceaae 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1277,7 +1277,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - val->ssa->type = val->type; + val->ssa = vtn_create_ssa_value(b, val->type); /* Collect the various SSA sources */ unsigned num_inputs = count - 3; -- cgit v1.2.3 From 9a3dda101e88142cb3b3eebb18883edecb21b375 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 22 Jun 2015 13:53:08 -0700 Subject: nir/vtn: fix emitting code after loops When we're done emitting the code for a loop, we need to visit the new break block, which is the merge block of the current loop, rather than the old merge block, which is the merge block of the loop containing the one we just emitted code for. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e0f280ceaae..4aabf3cc4e0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2095,7 +2095,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); nir_builder_insert_after_cf_list(&b->nb, old_list); - block = break_block; + block = new_break_block; continue; } -- cgit v1.2.3 From fe1269cf28784450f0032432eb1f32e379beec17 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 23 Jun 2015 10:34:22 -0700 Subject: nir/builder: add support for inserting before/after blocks --- src/glsl/nir/nir_builder.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index cffe38abf0e..7d449262585 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -28,6 +28,10 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + + nir_block *before_block; + nir_block *after_block; + nir_instr *before_instr; nir_instr *after_instr; @@ -48,6 +52,30 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_block = NULL; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_before_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = block; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_after_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = block; build->before_instr = NULL; build->after_instr = NULL; } @@ -56,6 +84,8 @@ static inline void nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = before_instr; build->after_instr = NULL; } @@ -64,6 +94,8 @@ static inline void nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = NULL; build->after_instr = after_instr; } @@ -73,6 +105,10 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr) { if (build->cf_node_list) { nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else if (build->before_block) { + nir_instr_insert_before_block(build->before_block, instr); + } else if (build->after_block) { + nir_instr_insert_after_block(build->after_block, instr); } else if (build->before_instr) { nir_instr_insert_before(build->before_instr, instr); } else { -- cgit v1.2.3 From dee4a94e69d724e984386f81fe1897bc6e3a021c Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 23 Jun 2015 10:34:55 -0700 Subject: nir/vtn: add support for phi nodes --- src/glsl/nir/spirv_to_nir.c | 121 ++++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv_to_nir_private.h | 8 +++ 2 files changed, 129 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 4aabf3cc4e0..a5e9c4aa8ae 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1686,6 +1686,112 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, val->ssa->type = val->type; } +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_phi_node_create(b, val->type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + nir_builder_insert_before_block(&b->nb, block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1835,6 +1941,7 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, } case SpvOpFunctionEnd: + b->func->end = w; b->func = NULL; break; @@ -2063,6 +2170,10 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_composite(b, opcode, w, count); break; + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + default: unreachable("Unhandled opcode"); } @@ -2106,6 +2217,12 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); + nir_cf_node *cur_cf_node = + exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list), + node); + nir_block *cur_block = nir_cf_node_as_block(cur_cf_node); + _mesa_hash_table_insert(b->block_table, cur_block, block); + switch (branch_op) { case SpvOpBranch: { struct vtn_block *branch_block = @@ -2287,9 +2404,13 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->impl = nir_function_impl_create(func->overload); b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); } ralloc_free(b); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 937c45b08c2..b157e023a68 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -60,6 +60,8 @@ struct vtn_function { nir_function_overload *overload; struct vtn_block *start_block; + + const uint32_t *end; }; typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, @@ -118,6 +120,12 @@ struct vtn_builder { */ struct hash_table *const_table; + /* + * Map from nir_block to the vtn_block which ends with it -- used for + * handling phi nodes. + */ + struct hash_table *block_table; + unsigned value_id_bound; struct vtn_value *values; -- cgit v1.2.3 From 24dff4f8faa9c66f8813f759aca04fc4225ccb4c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 21 Jun 2015 23:47:10 -0700 Subject: vk/headers: Handle MBO fields These must be set to one. --- src/vulkan/gen75_pack.h | 10 ++++++++++ src/vulkan/gen7_pack.h | 9 +++++++++ src/vulkan/gen8_pack.h | 6 ++++++ 3 files changed, 25 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 1653cb11ae3..5d89a44efa5 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -44,6 +44,12 @@ union __gen_value { uint32_t dw; }; +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + static inline uint64_t __gen_field(uint64_t v, uint32_t start, uint32_t end) { @@ -1158,6 +1164,7 @@ GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict d dw[1] = __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_mbo(0, 0) | 0; } @@ -1196,6 +1203,7 @@ GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_mbo(0, 0) | 0; } @@ -1755,6 +1763,7 @@ GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * re dw[1] = __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + __gen_mbo(0, 0) | 0; } @@ -2207,6 +2216,7 @@ GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); uint32_t dw1 = __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_mbo(4, 5) | __gen_field(dw_MemoryObjectControlState, 0, 3) | 0; diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 75f193891d6..db3ddb22d2e 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -44,6 +44,12 @@ union __gen_value { uint32_t dw; }; +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + static inline uint64_t __gen_field(uint64_t v, uint32_t start, uint32_t end) { @@ -832,6 +838,7 @@ GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ds dw[1] = __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_mbo(0, 0) | 0; } @@ -870,6 +877,7 @@ GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, dw[1] = __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_mbo(0, 0) | 0; } @@ -1429,6 +1437,7 @@ GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * res dw[1] = __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + __gen_mbo(0, 0) | 0; } diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 3b9e6235975..bf6392ded4f 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -44,6 +44,12 @@ union __gen_value { uint32_t dw; }; +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + static inline uint64_t __gen_field(uint64_t v, uint32_t start, uint32_t end) { -- cgit v1.2.3 From 88d02a1b27613ebd67905333763ca743f238c629 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 15:58:45 -0700 Subject: vk: Build xmlconfig stuff into libi965_compiler --- src/mesa/drivers/dri/i965/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 9c947be88a0..3c8197226fe 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -58,6 +58,7 @@ TEST_LIBS = \ libi965_compiler_la_SOURCES = $(i965_FILES) libi965_compiler_la_LIBADD = $(INTEL_LIBS) \ ../common/libdricommon.la \ + ../common/libxmlconfig.la \ ../common/libmegadriver_stub.la \ ../../../libmesa.la \ $(DRI_LIB_DEPS) \ -- cgit v1.2.3 From 1bc0a1ad98fe23238e0eb819ede4aabd6c2a27e2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 19:01:10 -0700 Subject: nir/spirv: Make the header file C++ safe --- src/glsl/nir/nir_spirv.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h index 789d30cd672..3254f10a88d 100644 --- a/src/glsl/nir/nir_spirv.h +++ b/src/glsl/nir/nir_spirv.h @@ -32,7 +32,15 @@ #include "nir.h" +#ifdef __cplusplus +extern "C" { +#endif + nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, const nir_shader_compiler_options *options); +#ifdef __cplusplus +} +#endif + #endif /* _NIR_SPIRV_H_ */ -- cgit v1.2.3 From ba0d9d33d4f3038e3a07022619f00599eff099e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 20:29:33 -0700 Subject: nir/spirv: Add support for some more decorations including built-in --- src/glsl/nir/spirv_to_nir.c | 71 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1fc1b8bc5dc..dcb94fc43c2 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -415,6 +415,74 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, var->data.explicit_binding = true; var->data.binding = dec->literals[0]; break; + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + var->data.read_only = true; + switch ((SpvBuiltIn)dec->literals[0]) { + case SpvBuiltInFrontFacing: + var->data.location = SYSTEM_VALUE_FRONT_FACE; + break; + case SpvBuiltInVertexId: + var->data.location = SYSTEM_VALUE_VERTEX_ID; + break; + case SpvBuiltInInstanceId: + var->data.location = SYSTEM_VALUE_INSTANCE_ID; + break; + case SpvBuiltInSampleId: + var->data.location = SYSTEM_VALUE_SAMPLE_ID; + break; + case SpvBuiltInSamplePosition: + var->data.location = SYSTEM_VALUE_SAMPLE_POS; + break; + case SpvBuiltInSampleMask: + var->data.location = SYSTEM_VALUE_SAMPLE_MASK_IN; + break; + case SpvBuiltInInvocationId: + var->data.location = SYSTEM_VALUE_INVOCATION_ID; + break; + case SpvBuiltInPrimitiveId: + case SpvBuiltInPosition: + case SpvBuiltInPointSize: + case SpvBuiltInClipVertex: + case SpvBuiltInClipDistance: + case SpvBuiltInCullDistance: + case SpvBuiltInLayer: + case SpvBuiltInViewportIndex: + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + case SpvBuiltInFragCoord: + case SpvBuiltInPointCoord: + case SpvBuiltInFragColor: + case SpvBuiltInFragDepth: + case SpvBuiltInHelperInvocation: + case SpvBuiltInNumWorkgroups: + case SpvBuiltInWorkgroupSize: + case SpvBuiltInWorkgroupId: + case SpvBuiltInLocalInvocationId: + case SpvBuiltInGlobalInvocationId: + case SpvBuiltInLocalInvocationIndex: + case SpvBuiltInWorkDim: + case SpvBuiltInGlobalSize: + case SpvBuiltInEnqueuedWorkgroupSize: + case SpvBuiltInGlobalOffset: + case SpvBuiltInGlobalLinearId: + case SpvBuiltInWorkgroupLinearId: + case SpvBuiltInSubgroupSize: + case SpvBuiltInSubgroupMaxSize: + case SpvBuiltInNumSubgroups: + case SpvBuiltInNumEnqueuedSubgroups: + case SpvBuiltInSubgroupId: + case SpvBuiltInSubgroupLocalInvocationId: + unreachable("Unhandled builtin enum"); + } + break; + case SpvDecorationNoStaticUse: + /* This can safely be ignored */ + break; case SpvDecorationBlock: case SpvDecorationBufferBlock: case SpvDecorationRowMajor: @@ -431,16 +499,13 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, case SpvDecorationNonreadable: case SpvDecorationUniform: /* This is really nice but we have no use for it right now. */ - case SpvDecorationNoStaticUse: case SpvDecorationCPacked: case SpvDecorationSaturatedConversion: case SpvDecorationStream: - case SpvDecorationDescriptorSet: case SpvDecorationOffset: case SpvDecorationAlignment: case SpvDecorationXfbBuffer: case SpvDecorationStride: - case SpvDecorationBuiltIn: case SpvDecorationFuncParamAttr: case SpvDecorationFPRoundingMode: case SpvDecorationFPFastMathMode: -- cgit v1.2.3 From d0bd2bc60432363ae4bf1aa8c0d94a7ef53aa9f5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 20:32:03 -0700 Subject: nir/spirv: Add support for the Uniform storage class This is kida sketchy. I'm not really sure this is the way it's supposed to be used. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index dcb94fc43c2..8394ab7bd66 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -532,6 +532,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->name = ralloc_strdup(var, val->name); switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: case SpvStorageClassUniformConstant: var->data.mode = nir_var_uniform; var->data.read_only = true; @@ -549,7 +550,6 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassFunction: var->data.mode = nir_var_local; break; - case SpvStorageClassUniform: case SpvStorageClassWorkgroupLocal: case SpvStorageClassWorkgroupGlobal: case SpvStorageClassGeneric: -- cgit v1.2.3 From e369a0eb41a07929fa53193e930190397084eb51 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 20:38:52 -0700 Subject: nir/spirv: Use vtn_ssa_value for texture coordinates --- src/glsl/nir/spirv_to_nir.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 8394ab7bd66..37334edd0df 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -776,15 +776,13 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, case SpvOpTextureGather: case SpvOpTextureGatherOffset: case SpvOpTextureGatherOffsets: - case SpvOpTextureQueryLod: { + case SpvOpTextureQueryLod: /* All these types have the coordinate as their first real argument */ - struct vtn_value *coord = vtn_value(b, w[4], vtn_value_type_ssa); - coord_components = glsl_get_vector_elements(coord->type); - p->src = nir_src_for_ssa(coord->ssa); + coord_components = glsl_get_vector_elements(b->values[w[4]].type); + p->src = nir_src_for_ssa(vtn_ssa_value(b, w[4])); p->src_type = nir_tex_src_coord; p++; break; - } default: break; } -- cgit v1.2.3 From 2ecac045a4140176b402dbbde44ad06530bea2e2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 21:22:05 -0700 Subject: i965/nir: Split NIR shader handling into two functions The brw_create_nir function takes a GLSL or ARB shader and turns it into a NIR shader. The guts of the optimization and lowering code is now split into a new brw_process_shader function. --- src/mesa/drivers/dri/i965/brw_nir.c | 42 ++++++++++++++++++++++++------------- src/mesa/drivers/dri/i965/brw_nir.h | 6 ++++++ 2 files changed, 33 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index b7bb2315b97..e7e16b6686a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -89,7 +89,6 @@ brw_create_nir(struct brw_context *brw, const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[stage].NirOptions; struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL; - bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); nir_shader *nir; /* First, lower the GLSL IR or Mesa IR to NIR */ @@ -101,6 +100,28 @@ brw_create_nir(struct brw_context *brw, } nir_validate_shader(nir); + brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage); + + static GLuint msg_id = 0; + _mesa_gl_debug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, + "%s NIR shader: %d inst\n", + _mesa_shader_stage_to_abbrev(stage), + count_nir_instrs(nir)); + + return nir; +} + +void +brw_process_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + gl_shader_stage stage) +{ + bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); + nir_lower_global_vars_to_local(nir); nir_validate_shader(nir); @@ -135,9 +156,11 @@ brw_create_nir(struct brw_context *brw, nir_validate_shader(nir); if (shader_prog) { + nir_lower_samplers(nir, shader_prog, stage); + } else { nir_lower_samplers_for_vk(nir); - nir_validate_shader(nir); } + nir_validate_shader(nir); nir_lower_system_values(nir); nir_validate_shader(nir); @@ -147,7 +170,7 @@ brw_create_nir(struct brw_context *brw, nir_optimize(nir); - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ nir_opt_peephole_ffma(nir); nir_validate_shader(nir); @@ -178,15 +201,6 @@ brw_create_nir(struct brw_context *brw, nir_print_shader(nir, stderr); } - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s NIR shader: %d inst\n", - _mesa_shader_stage_to_abbrev(stage), - count_nir_instrs(nir)); - nir_convert_from_ssa(nir); nir_validate_shader(nir); @@ -195,7 +209,7 @@ brw_create_nir(struct brw_context *brw, * run it last because it stashes data in instr->pass_flags and we don't * want that to be squashed by other NIR passes. */ - if (brw->gen <= 5) + if (devinfo->gen <= 5) brw_nir_analyze_boolean_resolves(nir); nir_sweep(nir); @@ -205,6 +219,4 @@ brw_create_nir(struct brw_context *brw, _mesa_shader_stage_to_string(stage)); nir_print_shader(nir, stderr); } - - return nir; } diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 313110997bf..8487cef0901 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -79,6 +79,12 @@ nir_shader *brw_create_nir(struct brw_context *brw, const struct gl_program *prog, gl_shader_stage stage); +void +brw_process_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + gl_shader_stage stage); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 845002e163e394efa9536e726d09f78b2497606e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 21:38:49 -0700 Subject: i965/nir: Handle returns as long as they're at the end of a function --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index a378019af5b..46c30fcae26 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1774,6 +1774,12 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) bld.emit(BRW_OPCODE_CONTINUE); break; case nir_jump_return: + /* This has to be the last block in the shader. We don't handle + * early returns. + */ + assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && + instr->instr.block->cf_node.parent->type == nir_cf_node_function); + break; default: unreachable("unknown jump"); } -- cgit v1.2.3 From d178e15567e6c8c4629ae0af7854bdecb129831a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Jun 2015 21:39:07 -0700 Subject: nir/spirv: Fix up some dererf ralloc parenting --- src/glsl/nir/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 37334edd0df..2b7eda39008 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -571,7 +571,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, exec_list_push_tail(&b->shader->globals, &var->node); } - val->deref = nir_deref_var_create(b->shader, var); + val->deref = nir_deref_var_create(b, var); vtn_foreach_decoration(b, val, var_decoration_cb, var); break; @@ -846,7 +846,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->is_array = glsl_sampler_type_is_array(sampler_type); instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); - instr->sampler = sampler; + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); val->ssa = &instr->dest.ssa; -- cgit v1.2.3 From 7cec6c5dfdc44363f340bf4f8aa93e57a5140ca3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 09:50:18 -0700 Subject: vk: Define MAX(a, b) macro --- src/vulkan/private.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index abc0cfb8ec2..db7f5f30af3 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -58,6 +58,8 @@ extern "C" { #define anv_noreturn __attribute__((__noreturn__)) #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static inline uint32_t ALIGN_U32(uint32_t v, uint32_t a) { -- cgit v1.2.3 From f7fb7575ef6ebdcb88d4e62d48730a88de999410 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 09:50:26 -0700 Subject: vk: Add anv_minify() --- src/vulkan/private.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index db7f5f30af3..6693a288866 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -72,6 +72,15 @@ ALIGN_I32(int32_t v, int32_t a) return (v + a - 1) & ~(a - 1); } +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + #define for_each_bit(b, dword) \ for (uint32_t __dword = (dword); \ (b) = __builtin_ffs(__dword) - 1, __dword; \ -- cgit v1.2.3 From 5f2d469e37aef1de8a127f0541e69285988748d4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 16:26:07 -0700 Subject: vk: Add func anv_is_aligned() --- src/vulkan/private.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 6693a288866..0d199c836a3 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -72,6 +72,14 @@ ALIGN_I32(int32_t v, int32_t a) return (v + a - 1) & ~(a - 1); } +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + static inline uint32_t anv_minify(uint32_t n, uint32_t levels) { -- cgit v1.2.3 From 1132080d5d5ea4c337a5fc4e4a24a06e14a7d9cd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 08:07:54 -0700 Subject: vk/util: Add anv_loge() for logging error messages --- src/vulkan/private.h | 2 ++ src/vulkan/util.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 0d199c836a3..fc7f65598f5 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -119,6 +119,8 @@ vk_error(VkResult error) void __anv_finishme(const char *file, int line, const char *format, ...) anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); /** * Print a FINISHME message, including its source location. diff --git a/src/vulkan/util.c b/src/vulkan/util.c index 13af882cffd..cbeb663b5ef 100644 --- a/src/vulkan/util.c +++ b/src/vulkan/util.c @@ -30,6 +30,26 @@ #include "private.h" +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + void anv_printflike(3, 4) __anv_finishme(const char *file, int line, const char *format, ...) { -- cgit v1.2.3 From cb30acacedbe51a590b14be25c4a345d92413403 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 07:37:59 -0700 Subject: vk/image: Add tables for gen SurfaceType Tables for mapping VkImageType and VkImageViewType to gen SurfaceType. Tables are unused. --- src/vulkan/image.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index eaa0f24f732..4beb58ef05f 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -41,6 +41,19 @@ static const uint8_t anv_valign[] = { [16] = VALIGN16, }; +static const uint8_t anv_surf_type_from_image_type[] = { + [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, + [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, + [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, +}; + +static const uint8_t anv_surf_type_from_image_view_type[] = { + [VK_IMAGE_VIEW_TYPE_1D] = SURFTYPE_1D, + [VK_IMAGE_VIEW_TYPE_2D] = SURFTYPE_2D, + [VK_IMAGE_VIEW_TYPE_3D] = SURFTYPE_3D, + [VK_IMAGE_VIEW_TYPE_CUBE] = SURFTYPE_CUBE, +}; + static const struct anv_tile_info { uint32_t width; uint32_t height; -- cgit v1.2.3 From 7ea121687c9048788dc42ccdf4d0bbfc29810df1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 07:45:31 -0700 Subject: vk/image: Add anv_image::surf_type This the gen SurfaceType, such as SURFTYPE_2D. --- src/vulkan/image.c | 3 +++ src/vulkan/private.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 4beb58ef05f..8996d9cbcba 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -129,6 +129,9 @@ VkResult anv_image_create( image->swap_chain = NULL; image->tile_mode = anv_image_choose_tile_mode(pCreateInfo, extra); + /* TODO(chadv): How should we validate inputs? */ + image->surf_type = anv_surf_type_from_image_type[pCreateInfo->imageType]; + assert(image->extent.width > 0); assert(image->extent.height > 0); assert(image->extent.depth > 0); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index fc7f65598f5..3cd8f3eab60 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -814,6 +814,9 @@ struct anv_image { uint8_t h_align; uint8_t v_align; /** \} */ + + /** RENDER_SURFACE_STATE.SurfaceType */ + uint8_t surf_type; }; struct anv_surface_view { -- cgit v1.2.3 From 99031aa0f320302f186576ed916651276f250613 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 07:46:31 -0700 Subject: vk/image: Stop hardcoding SurfaceType of VkImageView Instead, translate VkImageViewType to a gen SurfaceType. --- src/vulkan/image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 8996d9cbcba..2a441a47053 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -270,7 +270,7 @@ anv_image_view_init(struct anv_surface_view *view, }; struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_2D, + .SurfaceType = anv_surf_type_from_image_view_type[pCreateInfo->viewType], .SurfaceArray = false, .SurfaceFormat = format, .SurfaceVerticalAlignment = anv_valign[image->v_align], -- cgit v1.2.3 From fa352969a2ef1ebc65a29624aa0dde362c3b4367 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 May 2015 07:40:22 -0700 Subject: vk/image: Check extent does not exceed surface type limits --- src/vulkan/image.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 2a441a47053..e2f8c4039b4 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -54,6 +54,19 @@ static const uint8_t anv_surf_type_from_image_view_type[] = { [VK_IMAGE_VIEW_TYPE_CUBE] = SURFTYPE_CUBE, }; +static const struct anv_surf_type_limits { + int32_t width; + int32_t height; + int32_t depth; +} anv_surf_type_limits[] = { + [SURFTYPE_1D] = {16384, 0, 2048}, + [SURFTYPE_2D] = {16384, 16384, 2048}, + [SURFTYPE_3D] = {2048, 2048, 2048}, + [SURFTYPE_CUBE] = {16384, 16384, 340}, + [SURFTYPE_BUFFER] = {128, 16384, 64}, + [SURFTYPE_STRBUF] = {128, 16384, 64}, +}; + static const struct anv_tile_info { uint32_t width; uint32_t height; @@ -132,6 +145,9 @@ VkResult anv_image_create( /* TODO(chadv): How should we validate inputs? */ image->surf_type = anv_surf_type_from_image_type[pCreateInfo->imageType]; + const struct anv_surf_type_limits *limits = + &anv_surf_type_limits[image->surf_type]; + assert(image->extent.width > 0); assert(image->extent.height > 0); assert(image->extent.depth > 0); @@ -139,6 +155,16 @@ VkResult anv_image_create( const struct anv_tile_info *tile_info = &anv_tile_info_table[image->tile_mode]; + if (image->extent.width > limits->width || + image->extent.height > limits->height || + image->extent.depth > limits->depth) { + anv_loge("image extent is too large"); + free(image); + + /* TODO(chadv): What is the correct error? */ + return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); + } + image->alignment = tile_info->surface_alignment; /* FINISHME: Stop hardcoding miptree image alignment */ -- cgit v1.2.3 From ee0a8f23e4c7dd03012bde788ee0769e5d3ffc32 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 16:37:12 -0700 Subject: glsl: Move vert_attrib varying_slot and frag_result enums to shader_enums.h --- src/glsl/shader_enums.h | 207 ++++++++++++++++++++++++++++++++++++++++++++++++ src/mesa/main/mtypes.h | 205 ----------------------------------------------- 2 files changed, 207 insertions(+), 205 deletions(-) (limited to 'src') diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 79e0f6b5f78..7ebc3b74b0e 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -23,6 +23,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "main/config.h" + #ifndef SHADER_ENUMS_H #define SHADER_ENUMS_H @@ -184,4 +186,209 @@ enum glsl_interp_qualifier }; +/** + * Indexes for vertex program attributes. + * GL_NV_vertex_program aliases generic attributes over the conventional + * attributes. In GL_ARB_vertex_program shader the aliasing is optional. + * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the + * generic attributes are distinct/separate). + */ +typedef enum +{ + VERT_ATTRIB_POS = 0, + VERT_ATTRIB_WEIGHT = 1, + VERT_ATTRIB_NORMAL = 2, + VERT_ATTRIB_COLOR0 = 3, + VERT_ATTRIB_COLOR1 = 4, + VERT_ATTRIB_FOG = 5, + VERT_ATTRIB_COLOR_INDEX = 6, + VERT_ATTRIB_EDGEFLAG = 7, + VERT_ATTRIB_TEX0 = 8, + VERT_ATTRIB_TEX1 = 9, + VERT_ATTRIB_TEX2 = 10, + VERT_ATTRIB_TEX3 = 11, + VERT_ATTRIB_TEX4 = 12, + VERT_ATTRIB_TEX5 = 13, + VERT_ATTRIB_TEX6 = 14, + VERT_ATTRIB_TEX7 = 15, + VERT_ATTRIB_POINT_SIZE = 16, + VERT_ATTRIB_GENERIC0 = 17, + VERT_ATTRIB_GENERIC1 = 18, + VERT_ATTRIB_GENERIC2 = 19, + VERT_ATTRIB_GENERIC3 = 20, + VERT_ATTRIB_GENERIC4 = 21, + VERT_ATTRIB_GENERIC5 = 22, + VERT_ATTRIB_GENERIC6 = 23, + VERT_ATTRIB_GENERIC7 = 24, + VERT_ATTRIB_GENERIC8 = 25, + VERT_ATTRIB_GENERIC9 = 26, + VERT_ATTRIB_GENERIC10 = 27, + VERT_ATTRIB_GENERIC11 = 28, + VERT_ATTRIB_GENERIC12 = 29, + VERT_ATTRIB_GENERIC13 = 30, + VERT_ATTRIB_GENERIC14 = 31, + VERT_ATTRIB_GENERIC15 = 32, + VERT_ATTRIB_MAX = 33 +} gl_vert_attrib; + +/** + * Symbolic constats to help iterating over + * specific blocks of vertex attributes. + * + * VERT_ATTRIB_FF + * includes all fixed function attributes as well as + * the aliased GL_NV_vertex_program shader attributes. + * VERT_ATTRIB_TEX + * include the classic texture coordinate attributes. + * Is a subset of VERT_ATTRIB_FF. + * VERT_ATTRIB_GENERIC + * include the OpenGL 2.0+ GLSL generic shader attributes. + * These alias the generic GL_ARB_vertex_shader attributes. + */ +#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i)) +#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0 + +#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i)) +#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS + +#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i)) +#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS + +/** + * Bitflags for vertex attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS) +#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT) +#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL) +#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0) +#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1) +#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG) +#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX) +#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG) +#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0) +#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1) +#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2) +#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3) +#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4) +#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5) +#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6) +#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7) +#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE) +#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0) + +#define VERT_BIT(i) BITFIELD64_BIT(i) +#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX) + +#define VERT_BIT_FF(i) VERT_BIT(i) +#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX) +#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i)) +#define VERT_BIT_TEX_ALL \ + BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX) + +#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i)) +#define VERT_BIT_GENERIC_ALL \ + BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) +/*@}*/ + + +/** + * Indexes for vertex shader outputs, geometry shader inputs/outputs, and + * fragment shader inputs. + * + * Note that some of these values are not available to all pipeline stages. + * + * When this enum is updated, the following code must be updated too: + * - vertResults (in prog_print.c's arb_output_attrib_string()) + * - fragAttribs (in prog_print.c's arb_input_attrib_string()) + * - _mesa_varying_slot_in_fs() + */ +typedef enum +{ + VARYING_SLOT_POS, + VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */ + VARYING_SLOT_COL1, + VARYING_SLOT_FOGC, + VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */ + VARYING_SLOT_TEX1, + VARYING_SLOT_TEX2, + VARYING_SLOT_TEX3, + VARYING_SLOT_TEX4, + VARYING_SLOT_TEX5, + VARYING_SLOT_TEX6, + VARYING_SLOT_TEX7, + VARYING_SLOT_PSIZ, /* Does not appear in FS */ + VARYING_SLOT_BFC0, /* Does not appear in FS */ + VARYING_SLOT_BFC1, /* Does not appear in FS */ + VARYING_SLOT_EDGE, /* Does not appear in FS */ + VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */ VARYING_SLOT_CLIP_DIST0, + VARYING_SLOT_CLIP_DIST1, + VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */ + VARYING_SLOT_LAYER, /* Appears as VS or GS output */ + VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */ + VARYING_SLOT_FACE, /* FS only */ + VARYING_SLOT_PNTC, /* FS only */ + VARYING_SLOT_VAR0, /* First generic varying slot */ + VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING +} gl_varying_slot; + + +/** + * Bitflags for varying slots. + */ +/*@{*/ +#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS) +#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0) +#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1) +#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC) +#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0) +#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1) +#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2) +#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3) +#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4) +#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5) +#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6) +#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7) +#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U)) +#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \ + MAX_TEXTURE_COORD_UNITS) +#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ) +#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0) +#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1) +#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE) +#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) +#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) +#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1) +#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID) +#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER) +#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) +#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE) +#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC) +#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V)) +/*@}*/ + + +/** + * Fragment program results + */ +typedef enum +{ + FRAG_RESULT_DEPTH = 0, + FRAG_RESULT_STENCIL = 1, + /* If a single color should be written to all render targets, this + * register is written. No FRAG_RESULT_DATAn will be written. + */ + FRAG_RESULT_COLOR = 2, + FRAG_RESULT_SAMPLE_MASK = 3, + + /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n] + * or ARB_fragment_program fragment.color[n]) color results. If + * any are written, FRAG_RESULT_COLOR will not be written. + */ + FRAG_RESULT_DATA0 = 4, + FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) +} gl_frag_result; + + #endif /* SHADER_ENUMS_H */ diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 481fd5e7fdf..2d285b87a78 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -96,189 +96,6 @@ struct vbo_context; -/** - * Indexes for vertex program attributes. - * GL_NV_vertex_program aliases generic attributes over the conventional - * attributes. In GL_ARB_vertex_program shader the aliasing is optional. - * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the - * generic attributes are distinct/separate). - */ -typedef enum -{ - VERT_ATTRIB_POS = 0, - VERT_ATTRIB_WEIGHT = 1, - VERT_ATTRIB_NORMAL = 2, - VERT_ATTRIB_COLOR0 = 3, - VERT_ATTRIB_COLOR1 = 4, - VERT_ATTRIB_FOG = 5, - VERT_ATTRIB_COLOR_INDEX = 6, - VERT_ATTRIB_EDGEFLAG = 7, - VERT_ATTRIB_TEX0 = 8, - VERT_ATTRIB_TEX1 = 9, - VERT_ATTRIB_TEX2 = 10, - VERT_ATTRIB_TEX3 = 11, - VERT_ATTRIB_TEX4 = 12, - VERT_ATTRIB_TEX5 = 13, - VERT_ATTRIB_TEX6 = 14, - VERT_ATTRIB_TEX7 = 15, - VERT_ATTRIB_POINT_SIZE = 16, - VERT_ATTRIB_GENERIC0 = 17, - VERT_ATTRIB_GENERIC1 = 18, - VERT_ATTRIB_GENERIC2 = 19, - VERT_ATTRIB_GENERIC3 = 20, - VERT_ATTRIB_GENERIC4 = 21, - VERT_ATTRIB_GENERIC5 = 22, - VERT_ATTRIB_GENERIC6 = 23, - VERT_ATTRIB_GENERIC7 = 24, - VERT_ATTRIB_GENERIC8 = 25, - VERT_ATTRIB_GENERIC9 = 26, - VERT_ATTRIB_GENERIC10 = 27, - VERT_ATTRIB_GENERIC11 = 28, - VERT_ATTRIB_GENERIC12 = 29, - VERT_ATTRIB_GENERIC13 = 30, - VERT_ATTRIB_GENERIC14 = 31, - VERT_ATTRIB_GENERIC15 = 32, - VERT_ATTRIB_MAX = 33 -} gl_vert_attrib; - -/** - * Symbolic constats to help iterating over - * specific blocks of vertex attributes. - * - * VERT_ATTRIB_FF - * includes all fixed function attributes as well as - * the aliased GL_NV_vertex_program shader attributes. - * VERT_ATTRIB_TEX - * include the classic texture coordinate attributes. - * Is a subset of VERT_ATTRIB_FF. - * VERT_ATTRIB_GENERIC - * include the OpenGL 2.0+ GLSL generic shader attributes. - * These alias the generic GL_ARB_vertex_shader attributes. - */ -#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i)) -#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0 - -#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i)) -#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS - -#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i)) -#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS - -/** - * Bitflags for vertex attributes. - * These are used in bitfields in many places. - */ -/*@{*/ -#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS) -#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT) -#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL) -#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0) -#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1) -#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG) -#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX) -#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG) -#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0) -#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1) -#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2) -#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3) -#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4) -#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5) -#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6) -#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7) -#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE) -#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0) - -#define VERT_BIT(i) BITFIELD64_BIT(i) -#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX) - -#define VERT_BIT_FF(i) VERT_BIT(i) -#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX) -#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i)) -#define VERT_BIT_TEX_ALL \ - BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX) - -#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i)) -#define VERT_BIT_GENERIC_ALL \ - BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) -/*@}*/ - - -/** - * Indexes for vertex shader outputs, geometry shader inputs/outputs, and - * fragment shader inputs. - * - * Note that some of these values are not available to all pipeline stages. - * - * When this enum is updated, the following code must be updated too: - * - vertResults (in prog_print.c's arb_output_attrib_string()) - * - fragAttribs (in prog_print.c's arb_input_attrib_string()) - * - _mesa_varying_slot_in_fs() - */ -typedef enum -{ - VARYING_SLOT_POS, - VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */ - VARYING_SLOT_COL1, - VARYING_SLOT_FOGC, - VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */ - VARYING_SLOT_TEX1, - VARYING_SLOT_TEX2, - VARYING_SLOT_TEX3, - VARYING_SLOT_TEX4, - VARYING_SLOT_TEX5, - VARYING_SLOT_TEX6, - VARYING_SLOT_TEX7, - VARYING_SLOT_PSIZ, /* Does not appear in FS */ - VARYING_SLOT_BFC0, /* Does not appear in FS */ - VARYING_SLOT_BFC1, /* Does not appear in FS */ - VARYING_SLOT_EDGE, /* Does not appear in FS */ - VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */ - VARYING_SLOT_CLIP_DIST0, - VARYING_SLOT_CLIP_DIST1, - VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */ - VARYING_SLOT_LAYER, /* Appears as VS or GS output */ - VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */ - VARYING_SLOT_FACE, /* FS only */ - VARYING_SLOT_PNTC, /* FS only */ - VARYING_SLOT_VAR0, /* First generic varying slot */ - VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING -} gl_varying_slot; - - -/** - * Bitflags for varying slots. - */ -/*@{*/ -#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS) -#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0) -#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1) -#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC) -#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0) -#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1) -#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2) -#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3) -#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4) -#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5) -#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6) -#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7) -#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U)) -#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \ - MAX_TEXTURE_COORD_UNITS) -#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ) -#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0) -#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1) -#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE) -#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) -#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) -#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1) -#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID) -#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER) -#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) -#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE) -#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC) -#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V)) -/*@}*/ - /** * Determine if the given gl_varying_slot appears in the fragment shader. */ @@ -299,28 +116,6 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot) } -/** - * Fragment program results - */ -typedef enum -{ - FRAG_RESULT_DEPTH = 0, - FRAG_RESULT_STENCIL = 1, - /* If a single color should be written to all render targets, this - * register is written. No FRAG_RESULT_DATAn will be written. - */ - FRAG_RESULT_COLOR = 2, - FRAG_RESULT_SAMPLE_MASK = 3, - - /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n] - * or ARB_fragment_program fragment.color[n]) color results. If - * any are written, FRAG_RESULT_COLOR will not be written. - */ - FRAG_RESULT_DATA0 = 4, - FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) -} gl_frag_result; - - /** * Indexes for all renderbuffers */ -- cgit v1.2.3 From b72936fdadfe9bd2484e142b7b5573d7e4adad34 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:31:07 -0700 Subject: nir/spirv: Actually put variables on the right linked list --- src/glsl/nir/spirv_to_nir.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 2b7eda39008..fbf6969f2fd 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -565,15 +565,30 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_constant)->constant; } - if (var->data.mode == nir_var_local) { - exec_list_push_tail(&b->impl->locals, &var->node); - } else { - exec_list_push_tail(&b->shader->globals, &var->node); - } - val->deref = nir_deref_var_create(b, var); vtn_foreach_decoration(b, val, var_decoration_cb, var); + + switch (var->data.mode) { + case nir_var_shader_in: + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case nir_var_shader_out: + exec_list_push_tail(&b->shader->outputs, &var->node); + break; + case nir_var_global: + exec_list_push_tail(&b->shader->globals, &var->node); + break; + case nir_var_local: + exec_list_push_tail(&b->impl->locals, &var->node); + break; + case nir_var_uniform: + exec_list_push_tail(&b->shader->uniforms, &var->node); + break; + case nir_var_system_value: + exec_list_push_tail(&b->shader->system_values, &var->node); + break; + } break; } -- cgit v1.2.3 From 7e1792b1b75b29e6d1087ceb865c5080005e8fe9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:31:56 -0700 Subject: nir/spirv: Set the system value mode on builtins --- src/glsl/nir/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index fbf6969f2fd..7632ca2bf18 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -419,6 +419,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, var->data.descriptor_set = dec->literals[0]; break; case SpvDecorationBuiltIn: + var->data.mode = nir_var_system_value; var->data.read_only = true; switch ((SpvBuiltIn)dec->literals[0]) { case SpvBuiltInFrontFacing: -- cgit v1.2.3 From 333b8ddd6bf189bb6398edd602bdda40d7914a34 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:32:10 -0700 Subject: nir/spirv: Set the interface type on uniform blocks --- src/glsl/nir/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 7632ca2bf18..2dd3c42db03 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -537,6 +537,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassUniformConstant: var->data.mode = nir_var_uniform; var->data.read_only = true; + var->interface_type = type; break; case SpvStorageClassInput: var->data.mode = nir_var_shader_in; -- cgit v1.2.3 From 588acdb431ff672394b5d488d7cdbd7b3f42adf2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:32:25 -0700 Subject: nir/spirv: Set the right location for shader input/outputs We need to add FRAG_RESULT_DATA0 etc. to the input/output location. --- src/glsl/nir/spirv_to_nir.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 2dd3c42db03..d8523fd5a52 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -571,6 +571,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_foreach_decoration(b, val, var_decoration_cb, var); + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + switch (var->data.mode) { case nir_var_shader_in: exec_list_push_tail(&b->shader->inputs, &var->node); -- cgit v1.2.3 From 4f5ef945e010422ab96b13573cd002a5cc82d4ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:33:18 -0700 Subject: i965: Don't print the GLSL IR if it doesn't exist --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2c0ff961182..b43dafc798c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4015,7 +4015,7 @@ brw_wm_fs_emit(struct brw_context *brw, if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (unlikely(INTEL_DEBUG & DEBUG_WM)) + if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir) brw_dump_ir("fragment", prog, &shader->base, &fp->Base); int st_index8 = -1, st_index16 = -1; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a5c686ceaaf..05f188fe116 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1882,7 +1882,7 @@ brw_vs_emit(struct brw_context *brw, st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base, ST_VS); - if (unlikely(INTEL_DEBUG & DEBUG_VS)) + if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); if (brw->intelScreen->compiler->scalar_vs) { -- cgit v1.2.3 From c4c1d96a0195f3520bd73c46a0c2f0a513035f72 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:34:11 -0700 Subject: HACK: Get rid of sanity_param_count for FS --- src/mesa/drivers/dri/i965/brw_fs.cpp | 9 --------- 1 file changed, 9 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b43dafc798c..8984b4cb3ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3863,8 +3863,6 @@ fs_visitor::run_fs(bool do_rep_send) assert(stage == MESA_SHADER_FRAGMENT); - sanity_param_count = prog->Parameters->NumParameters; - if (prog_data->map_entries == NULL) assign_binding_table_offsets(); @@ -3936,13 +3934,6 @@ fs_visitor::run_fs(bool do_rep_send) else wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used); - /* If any state parameters were appended, then ParameterValues could have - * been realloced, in which case the driver uniform storage set up by - * _mesa_associate_uniform_storage() would point to freed memory. Make - * sure that didn't happen. - */ - assert(sanity_param_count == prog->Parameters->NumParameters); - return !failed; } -- cgit v1.2.3 From d5e41a3a9959da6bce41a6ac4bd8eec5c07ebda9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Jun 2015 17:36:22 -0700 Subject: vk/compiler: Add the initial hacks to get SPIR-V up and going --- src/vulkan/compiler.cpp | 66 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 0ea44ac6ce5..144dd986260 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -29,6 +29,7 @@ #include #include /* brw_new_shader_program is here */ +#include #include #include @@ -40,6 +41,14 @@ #include #include +/* XXX: We need this to keep symbols in nir.h from conflicting with the + * generated GEN command packing headers. We need to fix *both* to not + * define something as generic as LOAD. + */ +#undef LOAD + +#include + #define SPIR_V_MAGIC_NUMBER 0x07230203 static void @@ -883,14 +892,15 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) static const struct { uint32_t token; + gl_shader_stage stage; const char *name; } stage_info[] = { - { GL_VERTEX_SHADER, "vertex" }, - { GL_TESS_CONTROL_SHADER, "tess control" }, - { GL_TESS_EVALUATION_SHADER, "tess evaluation" }, - { GL_GEOMETRY_SHADER, "geometry" }, - { GL_FRAGMENT_SHADER, "fragment" }, - { GL_COMPUTE_SHADER, "compute" }, + { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, + { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, + { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, + { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, + { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, + { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, }; struct spirv_header{ @@ -945,7 +955,38 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, struct gl_shader_program *program, struct anv_pipeline *pipeline, uint32_t stage) { - unreachable("SPIR-V is not supported yet!"); + struct brw_context *brw = compiler->brw; + struct anv_shader *shader = pipeline->shaders[stage]; + struct gl_shader *mesa_shader; + int name = 0; + + mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(mesa_shader == NULL, + "failed to create %s shader\n", stage_info[stage].name); + + mesa_shader->Program = rzalloc(mesa_shader, struct gl_program); + mesa_shader->Type = stage_info[stage].token; + mesa_shader->Stage = stage_info[stage].stage; + + assert(shader->size % 4 == 0); + + struct gl_shader_compiler_options *glsl_options = + &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; + + mesa_shader->Program->nir = + spirv_to_nir((uint32_t *)shader->data, shader->size / 4, + glsl_options->NirOptions); + nir_validate_shader(mesa_shader->Program->nir); + + brw_process_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, + NULL, mesa_shader->Stage); + + fail_if(mesa_shader->Program->nir == NULL, + "failed to translate SPIR-V to NIR\n"); + + program->Shaders[program->NumShaders] = mesa_shader; + program->NumShaders++; } static void @@ -1014,7 +1055,10 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) anv_compile_shader_spirv(compiler, program, pipeline, i); } - /* TODO: nir_link_shader? */ + for (unsigned i = 0; i < program->NumShaders; i++) { + struct gl_shader *shader = program->Shaders[i]; + program->_LinkedShaders[shader->Stage] = shader; + } } else { for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { if (pipeline->shaders[i]) @@ -1092,7 +1136,11 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) &pipeline->cs_prog_data.base); } - brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + /* XXX: Deleting the shader is broken with our current SPIR-V hacks. We + * need to fix this ASAP. + */ + if (!all_spirv) + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); struct anv_device *device = compiler->device; while (device->scratch_block_pool.bo.size < pipeline->total_scratch) -- cgit v1.2.3 From ae29fd1b5533ff8c996aff7d21cc1d8b59a7fead Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 17:56:20 -0700 Subject: vk/formats: Don't abbreviate tokens in the format table Abbreviating the VK_FORMAT_* tokens doesn't help much. To the contrary, it means grep and ctags can't find them. --- src/vulkan/formats.c | 350 +++++++++++++++++++++++++-------------------------- 1 file changed, 175 insertions(+), 175 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index c119be6d041..2fc85c81487 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -26,189 +26,189 @@ #define UNSUPPORTED 0xffff #define fmt(__vk_fmt, ...) \ - [VK_FORMAT_##__vk_fmt] = { .name = "VK_FORMAT_" #__vk_fmt, __VA_ARGS__ } + [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { - fmt(UNDEFINED, .format = RAW, .cpp = 1, .channels = 1), - fmt(R4G4_UNORM, .format = UNSUPPORTED), - fmt(R4G4_USCALED, .format = UNSUPPORTED), - fmt(R4G4B4A4_UNORM, .format = UNSUPPORTED), - fmt(R4G4B4A4_USCALED, .format = UNSUPPORTED), - fmt(R5G6B5_UNORM, .format = UNSUPPORTED), - fmt(R5G6B5_USCALED, .format = UNSUPPORTED), - fmt(R5G5B5A1_UNORM, .format = UNSUPPORTED), - fmt(R5G5B5A1_USCALED, .format = UNSUPPORTED), - fmt(R8_UNORM, .format = R8_UNORM, .cpp = 1, .channels = 1), - fmt(R8_SNORM, .format = R8_SNORM, .cpp = 1, .channels = 1,), - fmt(R8_USCALED, .format = R8_USCALED, .cpp = 1, .channels = 1), - fmt(R8_SSCALED, .format = R8_SSCALED, .cpp = 1, .channels = 1), - fmt(R8_UINT, .format = R8_UINT, .cpp = 1, .channels = 1), - fmt(R8_SINT, .format = R8_SINT, .cpp = 1, .channels = 1), - fmt(R8_SRGB, .format = UNSUPPORTED), - fmt(R8G8_UNORM, .format = R8G8_UNORM, .cpp = 2, .channels = 2), - fmt(R8G8_SNORM, .format = R8G8_SNORM, .cpp = 2, .channels = 2), - fmt(R8G8_USCALED, .format = R8G8_USCALED, .cpp = 2, .channels = 2), - fmt(R8G8_SSCALED, .format = R8G8_SSCALED, .cpp = 2, .channels = 2), - fmt(R8G8_UINT, .format = R8G8_UINT, .cpp = 2, .channels = 2), - fmt(R8G8_SINT, .format = R8G8_SINT, .cpp = 2, .channels = 2), - fmt(R8G8_SRGB, .format = UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(R8G8B8_UNORM, .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3), - fmt(R8G8B8_SNORM, .format = R8G8B8_SNORM, .cpp = 4), - fmt(R8G8B8_USCALED, .format = R8G8B8_USCALED, .cpp = 3, .channels = 3), - fmt(R8G8B8_SSCALED, .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3), - fmt(R8G8B8_UINT, .format = R8G8B8_UINT, .cpp = 3, .channels = 3), - fmt(R8G8B8_SINT, .format = R8G8B8_SINT, .cpp = 3, .channels = 3), - fmt(R8G8B8_SRGB, .format = UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(R8G8B8A8_UNORM, .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4), - fmt(R8G8B8A8_SNORM, .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4), - fmt(R8G8B8A8_USCALED, .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4), - fmt(R8G8B8A8_SSCALED, .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4), - fmt(R8G8B8A8_UINT, .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4), - fmt(R8G8B8A8_SINT, .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4), - fmt(R8G8B8A8_SRGB, .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4), - fmt(R10G10B10A2_UNORM, .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4), - fmt(R10G10B10A2_SNORM, .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4), - fmt(R10G10B10A2_USCALED, .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4), - fmt(R10G10B10A2_SSCALED, .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4), - fmt(R10G10B10A2_UINT, .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4), - fmt(R10G10B10A2_SINT, .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4), - fmt(R16_UNORM, .format = R16_UNORM, .cpp = 2, .channels = 1), - fmt(R16_SNORM, .format = R16_SNORM, .cpp = 2, .channels = 1), - fmt(R16_USCALED, .format = R16_USCALED, .cpp = 2, .channels = 1), - fmt(R16_SSCALED, .format = R16_SSCALED, .cpp = 2, .channels = 1), - fmt(R16_UINT, .format = R16_UINT, .cpp = 2, .channels = 1), - fmt(R16_SINT, .format = R16_SINT, .cpp = 2, .channels = 1), - fmt(R16_SFLOAT, .format = R16_FLOAT, .cpp = 2, .channels = 1), - fmt(R16G16_UNORM, .format = R16G16_UNORM, .cpp = 4, .channels = 2), - fmt(R16G16_SNORM, .format = R16G16_SNORM, .cpp = 4, .channels = 2), - fmt(R16G16_USCALED, .format = R16G16_USCALED, .cpp = 4, .channels = 2), - fmt(R16G16_SSCALED, .format = R16G16_SSCALED, .cpp = 4, .channels = 2), - fmt(R16G16_UINT, .format = R16G16_UINT, .cpp = 4, .channels = 2), - fmt(R16G16_SINT, .format = R16G16_SINT, .cpp = 4, .channels = 2), - fmt(R16G16_SFLOAT, .format = R16G16_FLOAT, .cpp = 4, .channels = 2), - fmt(R16G16B16_UNORM, .format = R16G16B16_UNORM, .cpp = 6, .channels = 3), - fmt(R16G16B16_SNORM, .format = R16G16B16_SNORM, .cpp = 6, .channels = 3), - fmt(R16G16B16_USCALED, .format = R16G16B16_USCALED, .cpp = 6, .channels = 3), - fmt(R16G16B16_SSCALED, .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3), - fmt(R16G16B16_UINT, .format = R16G16B16_UINT, .cpp = 6, .channels = 3), - fmt(R16G16B16_SINT, .format = R16G16B16_SINT, .cpp = 6, .channels = 3), - fmt(R16G16B16_SFLOAT, .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3), - fmt(R16G16B16A16_UNORM, .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4), - fmt(R16G16B16A16_SNORM, .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4), - fmt(R16G16B16A16_USCALED, .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4), - fmt(R16G16B16A16_SSCALED, .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4), - fmt(R16G16B16A16_UINT, .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4), - fmt(R16G16B16A16_SINT, .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4), - fmt(R16G16B16A16_SFLOAT, .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4), - fmt(R32_UINT, .format = R32_UINT, .cpp = 4, .channels = 1,), - fmt(R32_SINT, .format = R32_SINT, .cpp = 4, .channels = 1,), - fmt(R32_SFLOAT, .format = R32_FLOAT, .cpp = 4, .channels = 1,), - fmt(R32G32_UINT, .format = R32G32_UINT, .cpp = 8, .channels = 2,), - fmt(R32G32_SINT, .format = R32G32_SINT, .cpp = 8, .channels = 2,), - fmt(R32G32_SFLOAT, .format = R32G32_FLOAT, .cpp = 8, .channels = 2,), - fmt(R32G32B32_UINT, .format = R32G32B32_UINT, .cpp = 12, .channels = 3,), - fmt(R32G32B32_SINT, .format = R32G32B32_SINT, .cpp = 12, .channels = 3,), - fmt(R32G32B32_SFLOAT, .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3,), - fmt(R32G32B32A32_UINT, .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4,), - fmt(R32G32B32A32_SINT, .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4,), - fmt(R32G32B32A32_SFLOAT, .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4,), - fmt(R64_SFLOAT, .format = R64_FLOAT, .cpp = 8, .channels = 1), - fmt(R64G64_SFLOAT, .format = R64G64_FLOAT, .cpp = 16, .channels = 2), - fmt(R64G64B64_SFLOAT, .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3), - fmt(R64G64B64A64_SFLOAT, .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4), - fmt(R11G11B10_UFLOAT, .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3), - fmt(R9G9B9E5_UFLOAT, .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3), + fmt(VK_FORMAT_UNDEFINED, .format = RAW, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R4G4_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_R4G4_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_R8_UNORM, .format = R8_UNORM, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R8_SNORM, .format = R8_SNORM, .cpp = 1, .channels = 1,), + fmt(VK_FORMAT_R8_USCALED, .format = R8_USCALED, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R8_SSCALED, .format = R8_SSCALED, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R8_UINT, .format = R8_UINT, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R8_SINT, .format = R8_SINT, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R8_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, .format = R8G8_UNORM, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, .format = R8G8_SNORM, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, .format = R8G8_USCALED, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, .format = R8G8_SSCALED, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_UINT, .format = R8G8_UINT, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_SINT, .format = R8G8_SINT, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_SRGB, .format = UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, .format = R8G8B8_SNORM, .cpp = 4), + fmt(VK_FORMAT_R8G8B8_USCALED, .format = R8G8B8_USCALED, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, .format = R8G8B8_UINT, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, .format = R8G8B8_SINT, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_SRGB, .format = UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_R16_UNORM, .format = R16_UNORM, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16_SNORM, .format = R16_SNORM, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16_USCALED, .format = R16_USCALED, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16_SSCALED, .format = R16_SSCALED, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16_UINT, .format = R16_UINT, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16_SINT, .format = R16_SINT, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, .format = R16_FLOAT, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, .format = R16G16_UNORM, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, .format = R16G16_SNORM, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, .format = R16G16_USCALED, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, .format = R16G16_SSCALED, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16_UINT, .format = R16G16_UINT, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16_SINT, .format = R16G16_SINT, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, .format = R16G16_FLOAT, .cpp = 4, .channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, .format = R16G16B16_UNORM, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, .format = R16G16B16_SNORM, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, .format = R16G16B16_USCALED, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, .format = R16G16B16_UINT, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, .format = R16G16B16_SINT, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4), + fmt(VK_FORMAT_R32_UINT, .format = R32_UINT, .cpp = 4, .channels = 1,), + fmt(VK_FORMAT_R32_SINT, .format = R32_SINT, .cpp = 4, .channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, .format = R32_FLOAT, .cpp = 4, .channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, .format = R32G32_UINT, .cpp = 8, .channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, .format = R32G32_SINT, .cpp = 8, .channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, .format = R32G32_FLOAT, .cpp = 8, .channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, .format = R32G32B32_UINT, .cpp = 12, .channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, .format = R32G32B32_SINT, .cpp = 12, .channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, .format = R64_FLOAT, .cpp = 8, .channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, .format = R64G64_FLOAT, .cpp = 16, .channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3), /* For depth/stencil formats, the .format and .cpp fields describe the * depth format. The field .has_stencil indicates whether or not there's a * stencil buffer. */ - fmt(D16_UNORM, .format = D16_UNORM, .cpp = 2, .channels = 1), - fmt(D24_UNORM, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 1), - fmt(D32_SFLOAT, .format = D32_FLOAT, .cpp = 4, .channels = 1), - fmt(S8_UINT, .format = UNSUPPORTED, .cpp = 0, .channels = 1, .has_stencil = true), - fmt(D16_UNORM_S8_UINT, .format = D16_UNORM, .cpp = 2, .channels = 2, .has_stencil = true), - fmt(D24_UNORM_S8_UINT, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 2, .has_stencil = true), - fmt(D32_SFLOAT_S8_UINT, .format = D32_FLOAT, .cpp = 4, .channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM, .format = D16_UNORM, .cpp = 2, .channels = 1), + fmt(VK_FORMAT_D24_UNORM, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 1), + fmt(VK_FORMAT_D32_SFLOAT, .format = D32_FLOAT, .cpp = 4, .channels = 1), + fmt(VK_FORMAT_S8_UINT, .format = UNSUPPORTED, .cpp = 0, .channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, .format = D16_UNORM, .cpp = 2, .channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, .format = D32_FLOAT, .cpp = 4, .channels = 2, .has_stencil = true), - fmt(BC1_RGB_UNORM, .format = UNSUPPORTED), - fmt(BC1_RGB_SRGB, .format = UNSUPPORTED), - fmt(BC1_RGBA_UNORM, .format = UNSUPPORTED), - fmt(BC1_RGBA_SRGB, .format = UNSUPPORTED), - fmt(BC2_UNORM, .format = UNSUPPORTED), - fmt(BC2_SRGB, .format = UNSUPPORTED), - fmt(BC3_UNORM, .format = UNSUPPORTED), - fmt(BC3_SRGB, .format = UNSUPPORTED), - fmt(BC4_UNORM, .format = UNSUPPORTED), - fmt(BC4_SNORM, .format = UNSUPPORTED), - fmt(BC5_UNORM, .format = UNSUPPORTED), - fmt(BC5_SNORM, .format = UNSUPPORTED), - fmt(BC6H_UFLOAT, .format = UNSUPPORTED), - fmt(BC6H_SFLOAT, .format = UNSUPPORTED), - fmt(BC7_UNORM, .format = UNSUPPORTED), - fmt(BC7_SRGB, .format = UNSUPPORTED), - fmt(ETC2_R8G8B8_UNORM, .format = UNSUPPORTED), - fmt(ETC2_R8G8B8_SRGB, .format = UNSUPPORTED), - fmt(ETC2_R8G8B8A1_UNORM, .format = UNSUPPORTED), - fmt(ETC2_R8G8B8A1_SRGB, .format = UNSUPPORTED), - fmt(ETC2_R8G8B8A8_UNORM, .format = UNSUPPORTED), - fmt(ETC2_R8G8B8A8_SRGB, .format = UNSUPPORTED), - fmt(EAC_R11_UNORM, .format = UNSUPPORTED), - fmt(EAC_R11_SNORM, .format = UNSUPPORTED), - fmt(EAC_R11G11_UNORM, .format = UNSUPPORTED), - fmt(EAC_R11G11_SNORM, .format = UNSUPPORTED), - fmt(ASTC_4x4_UNORM, .format = UNSUPPORTED), - fmt(ASTC_4x4_SRGB, .format = UNSUPPORTED), - fmt(ASTC_5x4_UNORM, .format = UNSUPPORTED), - fmt(ASTC_5x4_SRGB, .format = UNSUPPORTED), - fmt(ASTC_5x5_UNORM, .format = UNSUPPORTED), - fmt(ASTC_5x5_SRGB, .format = UNSUPPORTED), - fmt(ASTC_6x5_UNORM, .format = UNSUPPORTED), - fmt(ASTC_6x5_SRGB, .format = UNSUPPORTED), - fmt(ASTC_6x6_UNORM, .format = UNSUPPORTED), - fmt(ASTC_6x6_SRGB, .format = UNSUPPORTED), - fmt(ASTC_8x5_UNORM, .format = UNSUPPORTED), - fmt(ASTC_8x5_SRGB, .format = UNSUPPORTED), - fmt(ASTC_8x6_UNORM, .format = UNSUPPORTED), - fmt(ASTC_8x6_SRGB, .format = UNSUPPORTED), - fmt(ASTC_8x8_UNORM, .format = UNSUPPORTED), - fmt(ASTC_8x8_SRGB, .format = UNSUPPORTED), - fmt(ASTC_10x5_UNORM, .format = UNSUPPORTED), - fmt(ASTC_10x5_SRGB, .format = UNSUPPORTED), - fmt(ASTC_10x6_UNORM, .format = UNSUPPORTED), - fmt(ASTC_10x6_SRGB, .format = UNSUPPORTED), - fmt(ASTC_10x8_UNORM, .format = UNSUPPORTED), - fmt(ASTC_10x8_SRGB, .format = UNSUPPORTED), - fmt(ASTC_10x10_UNORM, .format = UNSUPPORTED), - fmt(ASTC_10x10_SRGB, .format = UNSUPPORTED), - fmt(ASTC_12x10_UNORM, .format = UNSUPPORTED), - fmt(ASTC_12x10_SRGB, .format = UNSUPPORTED), - fmt(ASTC_12x12_UNORM, .format = UNSUPPORTED), - fmt(ASTC_12x12_SRGB, .format = UNSUPPORTED), - fmt(B4G4R4A4_UNORM, .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4), - fmt(B5G5R5A1_UNORM, .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4), - fmt(B5G6R5_UNORM, .format = B5G6R5_UNORM, .cpp = 2, .channels = 3), - fmt(B5G6R5_USCALED, .format = UNSUPPORTED), - fmt(B8G8R8_UNORM, .format = UNSUPPORTED), - fmt(B8G8R8_SNORM, .format = UNSUPPORTED), - fmt(B8G8R8_USCALED, .format = UNSUPPORTED), - fmt(B8G8R8_SSCALED, .format = UNSUPPORTED), - fmt(B8G8R8_UINT, .format = UNSUPPORTED), - fmt(B8G8R8_SINT, .format = UNSUPPORTED), - fmt(B8G8R8_SRGB, .format = UNSUPPORTED), - fmt(B8G8R8A8_UNORM, .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4), - fmt(B8G8R8A8_SNORM, .format = UNSUPPORTED), - fmt(B8G8R8A8_USCALED, .format = UNSUPPORTED), - fmt(B8G8R8A8_SSCALED, .format = UNSUPPORTED), - fmt(B8G8R8A8_UINT, .format = UNSUPPORTED), - fmt(B8G8R8A8_SINT, .format = UNSUPPORTED), - fmt(B8G8R8A8_SRGB, .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .channels = 4), - fmt(B10G10R10A2_UNORM, .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4), - fmt(B10G10R10A2_SNORM, .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4), - fmt(B10G10R10A2_USCALED, .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4), - fmt(B10G10R10A2_SSCALED, .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4), - fmt(B10G10R10A2_UINT, .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4), - fmt(B10G10R10A2_SINT, .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4) + fmt(VK_FORMAT_BC1_RGB_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC2_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC2_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC3_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC3_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC4_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC4_SNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC5_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC5_SNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC6H_UFLOAT, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC6H_SFLOAT, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC7_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_BC7_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_SNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_SNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_B4G4R4A4_UNORM, .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, .format = B5G6R5_UNORM, .cpp = 2, .channels = 3), + fmt(VK_FORMAT_B5G6R5_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B8G8R8A8_SNORM, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, .format = UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4) }; const struct anv_format * -- cgit v1.2.3 From af0ade0d6c76eaba7ef7c0f4d4bbadea4c3d3cbe Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 17:58:17 -0700 Subject: vk: Reindent struct anv_format --- src/vulkan/private.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 3cd8f3eab60..620c5e468cc 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -772,11 +772,11 @@ int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipelin void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { - const char * name; - uint16_t format; - uint8_t cpp; - uint8_t channels; - bool has_stencil; + const char *name; + uint16_t format; + uint8_t cpp; + uint8_t channels; + bool has_stencil; }; const struct anv_format * -- cgit v1.2.3 From 4b8b451a1dc6bd67704813ae885e2e8c351647b0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 18:01:08 -0700 Subject: vk/formats: Rename anv_format::channels -> num_channels I misinterpreted anv_format::channels as a bitmask of channels. Renaming it to 'num_channels' makes it unambiguous. --- src/vulkan/formats.c | 190 +++++++++++++++++++++++++------------------------- src/vulkan/pipeline.c | 6 +- src/vulkan/private.h | 2 +- 3 files changed, 99 insertions(+), 99 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 2fc85c81487..d0ee51360dd 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -29,7 +29,7 @@ [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, .format = RAW, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_UNDEFINED, .format = RAW, .cpp = 1, .num_channels = 1), fmt(VK_FORMAT_R4G4_UNORM, .format = UNSUPPORTED), fmt(VK_FORMAT_R4G4_USCALED, .format = UNSUPPORTED), fmt(VK_FORMAT_R4G4B4A4_UNORM, .format = UNSUPPORTED), @@ -38,98 +38,98 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R5G6B5_USCALED, .format = UNSUPPORTED), fmt(VK_FORMAT_R5G5B5A1_UNORM, .format = UNSUPPORTED), fmt(VK_FORMAT_R5G5B5A1_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, .format = R8_UNORM, .cpp = 1, .channels = 1), - fmt(VK_FORMAT_R8_SNORM, .format = R8_SNORM, .cpp = 1, .channels = 1,), - fmt(VK_FORMAT_R8_USCALED, .format = R8_USCALED, .cpp = 1, .channels = 1), - fmt(VK_FORMAT_R8_SSCALED, .format = R8_SSCALED, .cpp = 1, .channels = 1), - fmt(VK_FORMAT_R8_UINT, .format = R8_UINT, .cpp = 1, .channels = 1), - fmt(VK_FORMAT_R8_SINT, .format = R8_SINT, .cpp = 1, .channels = 1), + fmt(VK_FORMAT_R8_UNORM, .format = R8_UNORM, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, .format = R8_SNORM, .cpp = 1, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, .format = R8_USCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, .format = R8_SSCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, .format = R8_UINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, .format = R8_SINT, .cpp = 1, .num_channels = 1), fmt(VK_FORMAT_R8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, .format = R8G8_UNORM, .cpp = 2, .channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, .format = R8G8_SNORM, .cpp = 2, .channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, .format = R8G8_USCALED, .cpp = 2, .channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, .format = R8G8_SSCALED, .cpp = 2, .channels = 2), - fmt(VK_FORMAT_R8G8_UINT, .format = R8G8_UINT, .cpp = 2, .channels = 2), - fmt(VK_FORMAT_R8G8_SINT, .format = R8G8_SINT, .cpp = 2, .channels = 2), + fmt(VK_FORMAT_R8G8_UNORM, .format = R8G8_UNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, .format = R8G8_SNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, .format = R8G8_USCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, .format = R8G8_SSCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, .format = R8G8_UINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, .format = R8G8_SINT, .cpp = 2, .num_channels = 2), fmt(VK_FORMAT_R8G8_SRGB, .format = UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_UNORM, .format = R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_SNORM, .format = R8G8B8_SNORM, .cpp = 4), - fmt(VK_FORMAT_R8G8B8_USCALED, .format = R8G8B8_USCALED, .cpp = 3, .channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, .format = R8G8B8_UINT, .cpp = 3, .channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, .format = R8G8B8_SINT, .cpp = 3, .channels = 3), + fmt(VK_FORMAT_R8G8B8_USCALED, .format = R8G8B8_USCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, .format = R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, .format = R8G8B8_UINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, .format = R8G8B8_SINT, .cpp = 3, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_SRGB, .format = UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, .format = R10G10B10A2_UNORM, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, .format = R10G10B10A2_SNORM, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, .format = R10G10B10A2_USCALED, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, .format = R10G10B10A2_SSCALED, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, .format = R10G10B10A2_UINT, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, .format = R10G10B10A2_SINT, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_R16_UNORM, .format = R16_UNORM, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16_SNORM, .format = R16_SNORM, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16_USCALED, .format = R16_USCALED, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16_SSCALED, .format = R16_SSCALED, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16_UINT, .format = R16_UINT, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16_SINT, .format = R16_SINT, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, .format = R16_FLOAT, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, .format = R16G16_UNORM, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, .format = R16G16_SNORM, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, .format = R16G16_USCALED, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, .format = R16G16_SSCALED, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16_UINT, .format = R16G16_UINT, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16_SINT, .format = R16G16_SINT, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, .format = R16G16_FLOAT, .cpp = 4, .channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, .format = R16G16B16_UNORM, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, .format = R16G16B16_SNORM, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, .format = R16G16B16_USCALED, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, .format = R16G16B16_SSCALED, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, .format = R16G16B16_UINT, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, .format = R16G16B16_SINT, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, .format = R16G16B16A16_UNORM, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, .format = R16G16B16A16_SNORM, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, .format = R16G16B16A16_USCALED, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, .format = R16G16B16A16_SSCALED, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, .format = R16G16B16A16_UINT, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, .format = R16G16B16A16_SINT, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4), - fmt(VK_FORMAT_R32_UINT, .format = R32_UINT, .cpp = 4, .channels = 1,), - fmt(VK_FORMAT_R32_SINT, .format = R32_SINT, .cpp = 4, .channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, .format = R32_FLOAT, .cpp = 4, .channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, .format = R32G32_UINT, .cpp = 8, .channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, .format = R32G32_SINT, .cpp = 8, .channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, .format = R32G32_FLOAT, .cpp = 8, .channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, .format = R32G32B32_UINT, .cpp = 12, .channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, .format = R32G32B32_SINT, .cpp = 12, .channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, .format = R32G32B32A32_UINT, .cpp = 16, .channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, .format = R32G32B32A32_SINT, .cpp = 16, .channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4,), - fmt(VK_FORMAT_R64_SFLOAT, .format = R64_FLOAT, .cpp = 8, .channels = 1), - fmt(VK_FORMAT_R64G64_SFLOAT, .format = R64G64_FLOAT, .cpp = 16, .channels = 2), - fmt(VK_FORMAT_R64G64B64_SFLOAT, .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, .format = R11G11B10_FLOAT, .cpp = 4, .channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .channels = 3), + fmt(VK_FORMAT_R8G8B8A8_UNORM, .format = R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, .format = R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, .format = R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, .format = R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, .format = R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, .format = R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, .format = R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, .format = R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, .format = R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, .format = R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, .format = R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, .format = R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, .format = R16_UNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, .format = R16_SNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, .format = R16_USCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, .format = R16_SSCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, .format = R16_UINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, .format = R16_SINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, .format = R16_FLOAT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, .format = R16G16_UNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, .format = R16G16_SNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, .format = R16G16_USCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, .format = R16G16_SSCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, .format = R16G16_UINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, .format = R16G16_SINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, .format = R16G16_FLOAT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, .format = R16G16B16_UNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, .format = R16G16B16_SNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, .format = R16G16B16_USCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, .format = R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, .format = R16G16B16_UINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, .format = R16G16B16_SINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, .format = R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, .format = R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, .format = R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, .format = R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, .format = R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, .format = R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, .format = R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, .format = R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, .format = R32_UINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, .format = R32_SINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, .format = R32_FLOAT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, .format = R32G32_UINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, .format = R32G32_SINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, .format = R32G32_FLOAT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, .format = R32G32B32_UINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, .format = R32G32B32_SINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, .format = R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, .format = R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, .format = R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, .format = R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, .format = R64_FLOAT, .cpp = 8, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, .format = R64G64_FLOAT, .cpp = 16, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, .format = R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, .format = R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, .format = R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), /* For depth/stencil formats, the .format and .cpp fields describe the * depth format. The field .has_stencil indicates whether or not there's a * stencil buffer. */ - fmt(VK_FORMAT_D16_UNORM, .format = D16_UNORM, .cpp = 2, .channels = 1), - fmt(VK_FORMAT_D24_UNORM, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 1), - fmt(VK_FORMAT_D32_SFLOAT, .format = D32_FLOAT, .cpp = 4, .channels = 1), - fmt(VK_FORMAT_S8_UINT, .format = UNSUPPORTED, .cpp = 0, .channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, .format = D16_UNORM, .cpp = 2, .channels = 2, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, .format = D24_UNORM_X8_UINT, .cpp = 4, .channels = 2, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, .format = D32_FLOAT, .cpp = 4, .channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM, .format = D16_UNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_D24_UNORM, .format = D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 1), + fmt(VK_FORMAT_D32_SFLOAT, .format = D32_FLOAT, .cpp = 4, .num_channels = 1), + fmt(VK_FORMAT_S8_UINT, .format = UNSUPPORTED, .cpp = 0, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, .format = D16_UNORM, .cpp = 2, .num_channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, .format = D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, .format = D32_FLOAT, .cpp = 4, .num_channels = 2, .has_stencil = true), fmt(VK_FORMAT_BC1_RGB_UNORM, .format = UNSUPPORTED), fmt(VK_FORMAT_BC1_RGB_SRGB, .format = UNSUPPORTED), @@ -185,9 +185,9 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_ASTC_12x10_SRGB, .format = UNSUPPORTED), fmt(VK_FORMAT_ASTC_12x12_UNORM, .format = UNSUPPORTED), fmt(VK_FORMAT_ASTC_12x12_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, .format = B4G4R4A4_UNORM, .cpp = 2, .channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, .format = B5G5R5A1_UNORM, .cpp = 2, .channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, .format = B5G6R5_UNORM, .cpp = 2, .channels = 3), + fmt(VK_FORMAT_B4G4R4A4_UNORM, .format = B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, .format = B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, .format = B5G6R5_UNORM, .cpp = 2, .num_channels = 3), fmt(VK_FORMAT_B5G6R5_USCALED, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_UNORM, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SNORM, .format = UNSUPPORTED), @@ -196,19 +196,19 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B8G8R8_UINT, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SINT, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4), + fmt(VK_FORMAT_B8G8R8A8_UNORM, .format = B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), fmt(VK_FORMAT_B8G8R8A8_SNORM, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_USCALED, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SSCALED, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_UINT, .format = UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SINT, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, .format = B10G10R10A2_UNORM, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, .format = B10G10R10A2_SNORM, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, .format = B10G10R10A2_USCALED, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, .format = B10G10R10A2_SSCALED, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, .format = B10G10R10A2_UINT, .cpp = 4, .channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, .format = B10G10R10A2_SINT, .cpp = 4, .channels = 4) + fmt(VK_FORMAT_B8G8R8A8_SRGB, .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, .format = B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, .format = B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, .format = B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, .format = B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, .format = B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, .format = B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) }; const struct anv_format * diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 8be47c91205..ae57d5ea4a2 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -100,9 +100,9 @@ emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo .EdgeFlagEnable = false, .SourceElementOffset = desc->offsetInBytes, .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = format->channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component2Control = format->channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component3Control = format->channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP }; GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 620c5e468cc..0c0fe071704 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -775,7 +775,7 @@ struct anv_format { const char *name; uint16_t format; uint8_t cpp; - uint8_t channels; + uint8_t num_channels; bool has_stencil; }; -- cgit v1.2.3 From 4c8146313f110a3af2b2e17bb3f17512cd6601e8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 18:18:06 -0700 Subject: vk/formats: Rename anv_format::format -> surface_format I misinterpreted anv_format::format as a VkFormat. Instead, it is a hardware surface format (RENDER_SURFACE_STATE.SurfaceFormat). Rename the field to 'surface_format' to make it unambiguous. --- src/vulkan/device.c | 2 +- src/vulkan/formats.c | 352 +++++++++++++++++++++++++------------------------- src/vulkan/image.c | 6 +- src/vulkan/pipeline.c | 2 +- src/vulkan/private.h | 2 +- 5 files changed, 182 insertions(+), 182 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c9e66743def..3c5691f3cb3 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1517,7 +1517,7 @@ fill_buffer_surface_state(void *state, VkFormat format, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_BUFFER, .SurfaceArray = false, - .SurfaceFormat = info->format, + .SurfaceFormat = info->surface_format, .SurfaceVerticalAlignment = VALIGN4, .SurfaceHorizontalAlignment = HALIGN4, .TileMode = LINEAR, diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index d0ee51360dd..d93802b23e1 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -29,186 +29,186 @@ [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, .format = RAW, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R4G4_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_R4G4_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, .format = R8_UNORM, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, .format = R8_SNORM, .cpp = 1, .num_channels = 1,), - fmt(VK_FORMAT_R8_USCALED, .format = R8_USCALED, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SSCALED, .format = R8_SSCALED, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_UINT, .format = R8_UINT, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SINT, .format = R8_SINT, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, .format = R8G8_UNORM, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, .format = R8G8_SNORM, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, .format = R8G8_USCALED, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, .format = R8G8_SSCALED, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_UINT, .format = R8G8_UINT, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SINT, .format = R8G8_SINT, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SRGB, .format = UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, .format = R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, .format = R8G8B8_SNORM, .cpp = 4), - fmt(VK_FORMAT_R8G8B8_USCALED, .format = R8G8B8_USCALED, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, .format = R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, .format = R8G8B8_UINT, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, .format = R8G8B8_SINT, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SRGB, .format = UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, .format = R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, .format = R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, .format = R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, .format = R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, .format = R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, .format = R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, .format = R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, .format = R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, .format = R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, .format = R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, .format = R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, .format = R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, .format = R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R16_UNORM, .format = R16_UNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SNORM, .format = R16_SNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_USCALED, .format = R16_USCALED, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SSCALED, .format = R16_SSCALED, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_UINT, .format = R16_UINT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SINT, .format = R16_SINT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, .format = R16_FLOAT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, .format = R16G16_UNORM, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, .format = R16G16_SNORM, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, .format = R16G16_USCALED, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, .format = R16G16_SSCALED, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_UINT, .format = R16G16_UINT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SINT, .format = R16G16_SINT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, .format = R16G16_FLOAT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, .format = R16G16B16_UNORM, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, .format = R16G16B16_SNORM, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, .format = R16G16B16_USCALED, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, .format = R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, .format = R16G16B16_UINT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, .format = R16G16B16_SINT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, .format = R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, .format = R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, .format = R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, .format = R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, .format = R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, .format = R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, .format = R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, .format = R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R32_UINT, .format = R32_UINT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SINT, .format = R32_SINT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, .format = R32_FLOAT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, .format = R32G32_UINT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, .format = R32G32_SINT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, .format = R32G32_FLOAT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, .format = R32G32B32_UINT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, .format = R32G32B32_SINT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, .format = R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, .format = R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, .format = R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, .format = R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R64_SFLOAT, .format = R64_FLOAT, .cpp = 8, .num_channels = 1), - fmt(VK_FORMAT_R64G64_SFLOAT, .format = R64G64_FLOAT, .cpp = 16, .num_channels = 2), - fmt(VK_FORMAT_R64G64B64_SFLOAT, .format = R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, .format = R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, .format = R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, .format = R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), + fmt(VK_FORMAT_UNDEFINED, RAW, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R4G4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R4G4_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .cpp = 1, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, R8_UINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, R8_SINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 4), + fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SRGB, UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, R16_UINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, R16_SINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, R32_UINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, R32_SINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .cpp = 8, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .cpp = 16, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), /* For depth/stencil formats, the .format and .cpp fields describe the * depth format. The field .has_stencil indicates whether or not there's a * stencil buffer. */ - fmt(VK_FORMAT_D16_UNORM, .format = D16_UNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_D24_UNORM, .format = D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 1), - fmt(VK_FORMAT_D32_SFLOAT, .format = D32_FLOAT, .cpp = 4, .num_channels = 1), - fmt(VK_FORMAT_S8_UINT, .format = UNSUPPORTED, .cpp = 0, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, .format = D16_UNORM, .cpp = 2, .num_channels = 2, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, .format = D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 2, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, .format = D32_FLOAT, .cpp = 4, .num_channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM, D16_UNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_D24_UNORM, D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 1), + fmt(VK_FORMAT_D32_SFLOAT, D32_FLOAT, .cpp = 4, .num_channels = 1), + fmt(VK_FORMAT_S8_UINT, UNSUPPORTED, .cpp = 0, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, D16_UNORM, .cpp = 2, .num_channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, D32_FLOAT, .cpp = 4, .num_channels = 2, .has_stencil = true), - fmt(VK_FORMAT_BC1_RGB_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGB_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC2_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC2_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC3_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC3_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC4_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC4_SNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC5_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC5_SNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC6H_UFLOAT, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC6H_SFLOAT, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC7_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_BC7_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11_SNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11G11_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11G11_SNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, .format = B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, .format = B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, .format = B5G6R5_UNORM, .cpp = 2, .num_channels = 3), - fmt(VK_FORMAT_B5G6R5_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SSCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UINT, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SINT, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SRGB, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, .format = B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B8G8R8A8_SNORM, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_USCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SSCALED, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UINT, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SINT, .format = UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, .format = B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, .format = B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, .format = B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, .format = B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, .format = B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, .format = B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, .format = B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) + fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC2_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC2_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC3_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC3_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC4_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC5_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC6H_UFLOAT, UNSUPPORTED), + fmt(VK_FORMAT_BC6H_SFLOAT, UNSUPPORTED), + fmt(VK_FORMAT_BC7_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC7_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .cpp = 2, .num_channels = 3), + fmt(VK_FORMAT_B5G6R5_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) }; const struct anv_format * @@ -258,10 +258,10 @@ anv_format_get_properties(struct anv_device *device, if (device->info.is_haswell) gen += 5; - if (format->format == UNSUPPORTED) + if (format->surface_format == UNSUPPORTED) goto unsupported; - info = &surface_formats[format->format]; + info = &surface_formats[format->surface_format]; if (!info->exists) goto unsupported; diff --git a/src/vulkan/image.c b/src/vulkan/image.c index e2f8c4039b4..9f1ee97b950 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -276,7 +276,7 @@ anv_image_view_init(struct anv_surface_view *view, case VK_IMAGE_ASPECT_COLOR: view->offset = image->offset; tile_mode = image->tile_mode; - format = info->format; + format = info->surface_format; break; default: unreachable(""); @@ -397,7 +397,7 @@ anv_color_attachment_view_init(struct anv_surface_view *view, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, .SurfaceArray = false, - .SurfaceFormat = format->format, + .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = anv_valign[image->v_align], .SurfaceHorizontalAlignment = anv_halign[image->h_align], .TileMode = image->tile_mode, @@ -486,7 +486,7 @@ VkResult anv_CreateDepthStencilView( view->depth_stride = image->stride; view->depth_offset = image->offset; - view->depth_format = format->format; + view->depth_format = format->surface_format; view->stencil_stride = image->stencil_stride; view->stencil_offset = image->offset + image->stencil_offset; diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index ae57d5ea4a2..f574038f83f 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -96,7 +96,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo struct GEN8_VERTEX_ELEMENT_STATE element = { .VertexBufferIndex = desc->binding, .Valid = true, - .SourceElementFormat = format->format, + .SourceElementFormat = format->surface_format, .EdgeFlagEnable = false, .SourceElementOffset = desc->offsetInBytes, .Component0Control = VFCOMP_STORE_SRC, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 0c0fe071704..f89e92af2d1 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -773,7 +773,7 @@ void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { const char *name; - uint16_t format; + uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ uint8_t cpp; uint8_t num_channels; bool has_stencil; -- cgit v1.2.3 From 528071f00478b7e77881cbbb4fe6125fcdcae386 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 18:22:59 -0700 Subject: vk/formats: Fix table entry for R8G8B8_SNORM Now that anv_formats[] is formatted like a table, buggy entries are easier to see. --- src/vulkan/formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index d93802b23e1..c88c7ce1ab9 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -53,7 +53,7 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .cpp = 2, .num_channels = 2), fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 4), + fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 3, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .cpp = 3, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .cpp = 3, .num_channels = 3), -- cgit v1.2.3 From 45b804a049c9c28be9e6c4b3ac92464cffee4997 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 19:03:43 -0700 Subject: vk/image: Rename local variable in anv_image_create() This function has many local variables for info structs. Having one named simply 'info' is confusing. Rename it to 'format_info'. --- src/vulkan/image.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 9f1ee97b950..8a6f1952a4a 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -116,7 +116,7 @@ VkResult anv_image_create( { struct anv_device *device = (struct anv_device *) _device; struct anv_image *image; - const struct anv_format *info; + const struct anv_format *format_info; int32_t aligned_height; uint32_t stencil_size; @@ -171,16 +171,16 @@ VkResult anv_image_create( image->h_align = 4; image->v_align = 4; - info = anv_format_for_vk_format(pCreateInfo->format); - assert(info->cpp > 0 || info->has_stencil); + format_info = anv_format_for_vk_format(pCreateInfo->format); + assert(format_info->cpp > 0 || format_info->has_stencil); /* First allocate space for the color or depth buffer. info->cpp gives us * the cpp of the color or depth in case of depth/stencil formats. Stencil * only (VK_FORMAT_S8_UINT) has info->cpp == 0 and doesn't allocate * anything here. */ - if (info->cpp > 0) { - image->stride = ALIGN_I32(image->extent.width * info->cpp, + if (format_info->cpp > 0) { + image->stride = ALIGN_I32(image->extent.width * format_info->cpp, tile_info->width); aligned_height = ALIGN_I32(image->extent.height, tile_info->height); image->size = image->stride * aligned_height; @@ -195,7 +195,7 @@ VkResult anv_image_create( * point of view, but as far as the API is concerned, depth and stencil are * in the same image. */ - if (info->has_stencil) { + if (format_info->has_stencil) { const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; image->stencil_offset = ALIGN_U32(image->size, w_info->surface_alignment); image->stencil_stride = ALIGN_I32(image->extent.width, w_info->width); -- cgit v1.2.3 From ebe1e768b8f0c3a7e2958c6e82182016a1c9bb5c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 19:29:59 -0700 Subject: vk/formats: Fix incorrect depth formats anv_format::surface_format was incorrect for Vulkan depth formats. For example, the format table mapped VK_FORMAT_D24_UNORM -> .surface_format = D24_UNORM_X8_UINT VK_FORMAT_D32_FLOAT -> .surface_format = D32_FLOAT but should have mapped VK_FORMAT_D24_UNORM -> .surface_format = R24_UNORM_X8_TYPELESS VK_FORMAT_D32_FLOAT -> .surface_format = R32_FLOAT The Crucible test func.depthstencil.basic passed despite the bug, but only because it did not attempt to texture from the depth surface. The core problem is that RENDER_SURFACE_STATE.SurfaceFormat and 3DSTATE_DEPTH_BUFFER.SurfaceFormat are distinct types. Considering them as enum spaces, the two enum spaces have incompatible collisions. Fix this by adding a new field 'depth_format' to struct anv_format. Refer to brw_surface_formats.c:translate_tex_format() for precedent. --- src/vulkan/formats.c | 14 +++++++------- src/vulkan/image.c | 2 +- src/vulkan/private.h | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index c88c7ce1ab9..c5cac6db7f7 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -123,13 +123,13 @@ static const struct anv_format anv_formats[] = { * depth format. The field .has_stencil indicates whether or not there's a * stencil buffer. */ - fmt(VK_FORMAT_D16_UNORM, D16_UNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_D24_UNORM, D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 1), - fmt(VK_FORMAT_D32_SFLOAT, D32_FLOAT, .cpp = 4, .num_channels = 1), - fmt(VK_FORMAT_S8_UINT, UNSUPPORTED, .cpp = 0, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, D16_UNORM, .cpp = 2, .num_channels = 2, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, D24_UNORM_X8_UINT, .cpp = 4, .num_channels = 2, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, D32_FLOAT, .cpp = 4, .num_channels = 2, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), + fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, UNSUPPORTED, .cpp = 0, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 8a6f1952a4a..5999318baaa 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -486,7 +486,7 @@ VkResult anv_CreateDepthStencilView( view->depth_stride = image->stride; view->depth_offset = image->offset; - view->depth_format = format->surface_format; + view->depth_format = format->depth_format; view->stencil_stride = image->stencil_stride; view->stencil_offset = image->offset + image->stencil_offset; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f89e92af2d1..7833f080b53 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -776,6 +776,7 @@ struct anv_format { uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ uint8_t cpp; uint8_t num_channels; + uint8_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; }; -- cgit v1.2.3 From b91a76de980fd3f1c9006f406b7be65c236c1ab5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 19:42:09 -0700 Subject: vk: Reindent and document struct anv_depth_stencil_view --- src/vulkan/private.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 7833f080b53..397881f75a4 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -858,14 +858,14 @@ struct anv_sampler { }; struct anv_depth_stencil_view { - struct anv_bo * bo; + struct anv_bo *bo; - uint32_t depth_offset; - uint32_t depth_stride; - uint32_t depth_format; + uint32_t depth_offset; /**< Offset into bo. */ + uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ + uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ - uint32_t stencil_offset; - uint32_t stencil_stride; + uint32_t stencil_offset; /**< Offset into bo. */ + uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ }; struct anv_framebuffer { -- cgit v1.2.3 From 7ea707a42a21bced33de482f95b4a3dca781ae19 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 25 Jun 2015 19:46:42 -0700 Subject: vk/image: Add qpitch fields to anv_depth_stencil_view For now, hard-code them to 0. --- src/vulkan/device.c | 4 ++-- src/vulkan/image.c | 2 ++ src/vulkan/private.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3c5691f3cb3..e637d2064ec 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3779,7 +3779,7 @@ anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, .MinimumArrayElement = 0, .DepthBufferObjectControlState = GEN8_MOCS, .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = 0); + .SurfaceQPitch = view->depth_qpitch >> 2); /* Disable hierarchial depth buffers. */ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); @@ -3789,7 +3789,7 @@ anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, .StencilBufferObjectControlState = GEN8_MOCS, .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = 0); + .SurfaceQPitch = view->stencil_qpitch >> 2); /* Clear the clear params. */ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 5999318baaa..f3487302f28 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -487,9 +487,11 @@ VkResult anv_CreateDepthStencilView( view->depth_stride = image->stride; view->depth_offset = image->offset; view->depth_format = format->depth_format; + view->depth_qpitch = 0; /* FINISHME: QPitch */ view->stencil_stride = image->stencil_stride; view->stencil_offset = image->offset + image->stencil_offset; + view->stencil_qpitch = 0; /* FINISHME: QPitch */ *pView = (VkDepthStencilView) view; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 397881f75a4..353a51401eb 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -863,9 +863,11 @@ struct anv_depth_stencil_view { uint32_t depth_offset; /**< Offset into bo. */ uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ uint32_t stencil_offset; /**< Offset into bo. */ uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ + uint16_t stencil_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ }; struct anv_framebuffer { -- cgit v1.2.3 From 068b8a41e2bbb2e4a7097be439c8281a0dece278 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 07:31:57 -0700 Subject: vk: Fix comment for anv_depth_stencil_view::stencil_qpitch s/DEPTH/STENCIL/ --- src/vulkan/private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 353a51401eb..23755c7e9ac 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -867,7 +867,7 @@ struct anv_depth_stencil_view { uint32_t stencil_offset; /**< Offset into bo. */ uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ - uint16_t stencil_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ + uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ }; struct anv_framebuffer { -- cgit v1.2.3 From 0349e8d60794c6a71fb8081a256ed5d3e747b4b4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 07:38:02 -0700 Subject: vk/formats: #undef fmt at end of format table --- src/vulkan/formats.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index c5cac6db7f7..a40181414d0 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -211,6 +211,8 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) }; +#undef fmt + const struct anv_format * anv_format_for_vk_format(VkFormat format) { -- cgit v1.2.3 From 5d7103ee15593dda3811c251a1c5239a5d82cac5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 09:05:46 -0700 Subject: vk/image: Group some assertions closer together In anv_image_create(), group together the assertions on VkImageCreateInfo. --- src/vulkan/image.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index f3487302f28..607454fb498 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -132,6 +132,8 @@ VkResult anv_image_create( anv_assert(pCreateInfo->mipLevels == 1); anv_assert(pCreateInfo->arraySize == 1); anv_assert(pCreateInfo->samples == 1); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth == 1); image->bo = NULL; @@ -148,10 +150,6 @@ VkResult anv_image_create( const struct anv_surf_type_limits *limits = &anv_surf_type_limits[image->surf_type]; - assert(image->extent.width > 0); - assert(image->extent.height > 0); - assert(image->extent.depth > 0); - const struct anv_tile_info *tile_info = &anv_tile_info_table[image->tile_mode]; -- cgit v1.2.3 From 67a7659d69a6e2c0927ca218efcc3c99086ae31b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 09:17:52 -0700 Subject: vk/image: Refactor anv_image_create() From my experience with intel_mipmap_tree.c, I learned that for struct's like anv_image and intel_mipmap_tree, which have sprawling multi-function construction codepaths, it's easy to mistakenly use unitialized struct members during construction. Let's eliminate the risk of using unitialized anv_image members during construction. Fill the struct at the function bottom instead of piecemeal throughout the constructor. --- src/vulkan/image.c | 113 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 607454fb498..c8a26f66130 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -115,18 +115,10 @@ VkResult anv_image_create( VkImage* pImage) { struct anv_device *device = (struct anv_device *) _device; - struct anv_image *image; - const struct anv_format *format_info; - int32_t aligned_height; - uint32_t stencil_size; + const VkExtent3D *restrict extent = &pCreateInfo->extent; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - image = anv_device_alloc(device, sizeof(*image), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (image == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); anv_assert(pCreateInfo->mipLevels == 1); @@ -136,55 +128,48 @@ VkResult anv_image_create( anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth == 1); - image->bo = NULL; - image->offset = 0; - image->type = pCreateInfo->imageType; - image->format = pCreateInfo->format; - image->extent = pCreateInfo->extent; - image->swap_chain = NULL; - image->tile_mode = anv_image_choose_tile_mode(pCreateInfo, extra); + const uint32_t tile_mode = + anv_image_choose_tile_mode(pCreateInfo, extra); /* TODO(chadv): How should we validate inputs? */ - image->surf_type = anv_surf_type_from_image_type[pCreateInfo->imageType]; + const uint8_t surf_type = + anv_surf_type_from_image_type[pCreateInfo->imageType]; const struct anv_surf_type_limits *limits = - &anv_surf_type_limits[image->surf_type]; + &anv_surf_type_limits[surf_type]; const struct anv_tile_info *tile_info = - &anv_tile_info_table[image->tile_mode]; - - if (image->extent.width > limits->width || - image->extent.height > limits->height || - image->extent.depth > limits->depth) { - anv_loge("image extent is too large"); - free(image); + &anv_tile_info_table[tile_mode]; + if (extent->width > limits->width || + extent->height > limits->height || + extent->depth > limits->depth) { /* TODO(chadv): What is the correct error? */ + anv_loge("image extent is too large"); return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); } - image->alignment = tile_info->surface_alignment; - - /* FINISHME: Stop hardcoding miptree image alignment */ - image->h_align = 4; - image->v_align = 4; - - format_info = anv_format_for_vk_format(pCreateInfo->format); + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); assert(format_info->cpp > 0 || format_info->has_stencil); + uint32_t image_stride = 0; + uint32_t image_size = 0; + uint32_t stencil_offset = 0; + uint32_t stencil_stride = 0; + /* First allocate space for the color or depth buffer. info->cpp gives us * the cpp of the color or depth in case of depth/stencil formats. Stencil * only (VK_FORMAT_S8_UINT) has info->cpp == 0 and doesn't allocate * anything here. */ if (format_info->cpp > 0) { - image->stride = ALIGN_I32(image->extent.width * format_info->cpp, - tile_info->width); - aligned_height = ALIGN_I32(image->extent.height, tile_info->height); - image->size = image->stride * aligned_height; - } else { - image->size = 0; - image->stride = 0; + uint32_t aligned_height; + + image_stride = ALIGN_I32(extent->width * format_info->cpp, + tile_info->width); + aligned_height = ALIGN_I32(extent->height, tile_info->height); + image_size = image_stride * aligned_height; } /* Formats with a stencil buffer (either combined depth/stencil or @@ -195,16 +180,50 @@ VkResult anv_image_create( */ if (format_info->has_stencil) { const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; - image->stencil_offset = ALIGN_U32(image->size, w_info->surface_alignment); - image->stencil_stride = ALIGN_I32(image->extent.width, w_info->width); - aligned_height = ALIGN_I32(image->extent.height, w_info->height); - stencil_size = image->stencil_stride * aligned_height; - image->size = image->stencil_offset + stencil_size; - } else { - image->stencil_offset = 0; - image->stencil_stride = 0; + uint32_t aligned_height; + uint32_t stencil_size; + + stencil_offset = ALIGN_U32(image_size, w_info->surface_alignment); + stencil_stride = ALIGN_I32(extent->width, w_info->width); + aligned_height = ALIGN_I32(extent->height, w_info->height); + stencil_size = stencil_stride * aligned_height; + image_size = stencil_offset + stencil_size; } + struct anv_image *image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* To eliminate the risk of using unitialized struct members above, fill the + * image struct here at the function bottom instead of piecemeal throughout + * the function body. + */ + *image = (struct anv_image) { + .type = pCreateInfo->imageType, + .extent = pCreateInfo->extent, + .format = pCreateInfo->format, + + .size = image_size, + .alignment = tile_info->surface_alignment, + .stride = image_stride, + + .bo = NULL, + .offset = 0, + + .stencil_offset = stencil_offset, + .stencil_stride = stencil_stride, + + .tile_mode = tile_mode, + .surf_type = surf_type, + + /* FINISHME: Stop hardcoding miptree image alignment */ + .h_align = 4, + .v_align = 4, + + .swap_chain = NULL, + }; + *pImage = (VkImage) image; return VK_SUCCESS; -- cgit v1.2.3 From 37d6e04ba14eae96b57fc1f4f0da5daf16821060 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 09:47:17 -0700 Subject: vk/formats: Remove the cpp=0 stencil hack The format table defined cpp = 0 for stencil-only formats. The real cpp is 1. When code begins to lie, especially about stencil buffers, code becomes increasingly fragile as time progresses, and the damage becomes increasingly hard to undo. (For precedent, see the painful history of stencil buffer cpp in the git log for gen6 and gen7 in the i965 driver). Let's undo the stencil buffer cpp lie now to avoid future pain. In the format table, set cpp = 1 for VK_FORMAT_S8; replace checks for cpp == 0; and delete all comments about the hack. --- src/vulkan/formats.c | 6 +----- src/vulkan/image.c | 20 +++++++------------- 2 files changed, 8 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index a40181414d0..eb96e94bb61 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -119,14 +119,10 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), - /* For depth/stencil formats, the .format and .cpp fields describe the - * depth format. The field .has_stencil indicates whether or not there's a - * stencil buffer. - */ fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, UNSUPPORTED, .cpp = 0, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_S8_UINT, UNSUPPORTED, .cpp = 1, .num_channels = 1, .has_stencil = true), fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), diff --git a/src/vulkan/image.c b/src/vulkan/image.c index c8a26f66130..9cf9d35a0f7 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -151,19 +151,14 @@ VkResult anv_image_create( const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - assert(format_info->cpp > 0 || format_info->has_stencil); uint32_t image_stride = 0; uint32_t image_size = 0; uint32_t stencil_offset = 0; uint32_t stencil_stride = 0; - /* First allocate space for the color or depth buffer. info->cpp gives us - * the cpp of the color or depth in case of depth/stencil formats. Stencil - * only (VK_FORMAT_S8_UINT) has info->cpp == 0 and doesn't allocate - * anything here. - */ - if (format_info->cpp > 0) { + if (!format_info->has_stencil || format_info->depth_format) { + /* The format has a color or depth component. Calculate space for it. */ uint32_t aligned_height; image_stride = ALIGN_I32(extent->width * format_info->cpp, @@ -172,13 +167,12 @@ VkResult anv_image_create( image_size = image_stride * aligned_height; } - /* Formats with a stencil buffer (either combined depth/stencil or - * VK_FORMAT_S8_UINT) have info->has_stencil == true. The stencil buffer is - * placed after the depth buffer and is a separate buffer from the GPU - * point of view, but as far as the API is concerned, depth and stencil are - * in the same image. - */ if (format_info->has_stencil) { + /* From the GPU's perspective, the depth buffer and stencil buffer are + * separate buffers. From Vulkan's perspective, though, depth and + * stencil reside in the same image. To satisfy Vulkan and the GPU, we + * place the depth and stencil buffers in the same bo. + */ const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; uint32_t aligned_height; uint32_t stencil_size; -- cgit v1.2.3 From 55752fe94a2001433631cc504f37d23f8f050b8f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 15:07:59 -0700 Subject: vk: Rename functions ALIGN_*32 -> align_*32 ALIGN_U32 and ALIGN_I32 are functions, not macros. So stop using allcaps. --- src/vulkan/allocator.c | 4 ++-- src/vulkan/aub.c | 4 ++-- src/vulkan/compiler.cpp | 2 +- src/vulkan/device.c | 2 +- src/vulkan/image.c | 10 +++++----- src/vulkan/private.h | 4 ++-- src/vulkan/util.c | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c index 6a7e845ef37..2d0d255721b 100644 --- a/src/vulkan/allocator.c +++ b/src/vulkan/allocator.c @@ -537,7 +537,7 @@ anv_state_stream_alloc(struct anv_state_stream *stream, struct anv_state state; uint32_t block; - state.offset = ALIGN_U32(stream->next, alignment); + state.offset = align_u32(stream->next, alignment); if (state.offset + size > stream->end) { block = anv_block_pool_alloc(stream->block_pool); void *current_map = stream->block_pool->map; @@ -548,7 +548,7 @@ anv_state_stream_alloc(struct anv_state_stream *stream, stream->current_block = block; stream->next = block + sizeof(*sb); stream->end = block + stream->block_pool->block_size; - state.offset = ALIGN_U32(stream->next, alignment); + state.offset = align_u32(stream->next, alignment); assert(state.offset + size <= stream->end); } diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c index e3f333a54aa..42d4611eb6e 100644 --- a/src/vulkan/aub.c +++ b/src/vulkan/aub.c @@ -149,7 +149,7 @@ aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, type | AUB_TRACE_OP_DATA_WRITE); aub_out(writer, subtype); aub_out(writer, gtt_offset + offset); - aub_out(writer, ALIGN_U32(block_size, 4)); + aub_out(writer, align_u32(block_size, 4)); if (writer->gen >= 8) aub_out(writer, 0); @@ -258,7 +258,7 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); aub_bos[i].relocated = aub_bos[i].map; aub_bos[i].offset = offset; - offset = ALIGN_U32(offset + bo->size + 4095, 4096); + offset = align_u32(offset + bo->size + 4095, 4096); } struct anv_batch_bo *first_bbo; diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 144dd986260..b220331408c 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -1007,7 +1007,7 @@ add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, pipeline->active_stages |= 1 << stage; pipeline->scratch_start[stage] = pipeline->total_scratch; pipeline->total_scratch = - ALIGN_U32(pipeline->total_scratch, 1024) + + align_u32(pipeline->total_scratch, 1024) + prog_data->total_scratch * max_threads[stage]; } diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e637d2064ec..b6d0984d93d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2710,7 +2710,7 @@ anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, { struct anv_state state; - state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment); + state.offset = align_u32(cmd_buffer->surface_next, alignment); if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size) return (struct anv_state) { 0 }; diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 9cf9d35a0f7..a2f76ef27d6 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -161,9 +161,9 @@ VkResult anv_image_create( /* The format has a color or depth component. Calculate space for it. */ uint32_t aligned_height; - image_stride = ALIGN_I32(extent->width * format_info->cpp, + image_stride = align_i32(extent->width * format_info->cpp, tile_info->width); - aligned_height = ALIGN_I32(extent->height, tile_info->height); + aligned_height = align_i32(extent->height, tile_info->height); image_size = image_stride * aligned_height; } @@ -177,9 +177,9 @@ VkResult anv_image_create( uint32_t aligned_height; uint32_t stencil_size; - stencil_offset = ALIGN_U32(image_size, w_info->surface_alignment); - stencil_stride = ALIGN_I32(extent->width, w_info->width); - aligned_height = ALIGN_I32(extent->height, w_info->height); + stencil_offset = align_u32(image_size, w_info->surface_alignment); + stencil_stride = align_i32(extent->width, w_info->width); + aligned_height = align_i32(extent->height, w_info->height); stencil_size = stencil_stride * aligned_height; image_size = stencil_offset + stencil_size; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 23755c7e9ac..e9943062807 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -61,13 +61,13 @@ extern "C" { #define MAX(a, b) ((a) > (b) ? (a) : (b)) static inline uint32_t -ALIGN_U32(uint32_t v, uint32_t a) +align_u32(uint32_t v, uint32_t a) { return (v + a - 1) & ~(a - 1); } static inline int32_t -ALIGN_I32(int32_t v, int32_t a) +align_i32(int32_t v, int32_t a) { return (v + a - 1) & ~(a - 1); } diff --git a/src/vulkan/util.c b/src/vulkan/util.c index cbeb663b5ef..21cb6484670 100644 --- a/src/vulkan/util.c +++ b/src/vulkan/util.c @@ -108,7 +108,7 @@ anv_vector_add(struct anv_vector *vector) data = malloc(size); if (data == NULL) return NULL; - split = ALIGN_U32(vector->tail, vector->size); + split = align_u32(vector->tail, vector->size); tail = vector->tail & (vector->size - 1); if (vector->head - split < vector->size) { memcpy(data + tail, -- cgit v1.2.3 From 74e3eb304f72ac826d48f54a69ab03816796a7bc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 15:06:09 -0700 Subject: vk: Define MIN(a, b) macro --- src/vulkan/private.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e9943062807..65986155317 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -58,6 +58,7 @@ extern "C" { #define anv_noreturn __attribute__((__noreturn__)) #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) static inline uint32_t -- cgit v1.2.3 From 667529fbaaac1f8ccccf56dd9255422d5c914425 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 15:27:20 -0700 Subject: vk: Reindent struct anv_image --- src/vulkan/private.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 65986155317..6e122899e2c 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -785,22 +785,22 @@ const struct anv_format * anv_format_for_vk_format(VkFormat format); struct anv_image { - VkImageType type; - VkExtent3D extent; - VkFormat format; - uint32_t tile_mode; - VkDeviceSize size; - uint32_t alignment; - uint32_t stride; + VkImageType type; + VkExtent3D extent; + VkFormat format; + uint32_t tile_mode; + VkDeviceSize size; + uint32_t alignment; + uint32_t stride; - uint32_t stencil_offset; - uint32_t stencil_stride; + uint32_t stencil_offset; + uint32_t stencil_stride; /* Set when bound */ - struct anv_bo * bo; - VkDeviceSize offset; + struct anv_bo *bo; + VkDeviceSize offset; - struct anv_swap_chain * swap_chain; + struct anv_swap_chain *swap_chain; /** * \name Alignment of miptree images, in units of pixels. -- cgit v1.2.3 From 9c46ba9ca20c4d6ce0dd969f7a09b74d276f6149 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 19:23:21 -0700 Subject: vk/image: Abort on stencil image views The code doesn't work. Not even close. Replace the broken code with a FINISHME and abort. --- src/vulkan/image.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index a2f76ef27d6..dc05b694c8a 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -278,10 +278,8 @@ anv_image_view_init(struct anv_surface_view *view, view->bo = image->bo; switch (pCreateInfo->subresourceRange.aspect) { case VK_IMAGE_ASPECT_STENCIL: - /* FIXME: How is stencil texturing formed? */ - view->offset = image->offset + image->stencil_offset; - tile_mode = WMAJOR; - format = R8_UINT; + anv_finishme("stencil image views"); + abort(); break; case VK_IMAGE_ASPECT_DEPTH: case VK_IMAGE_ASPECT_COLOR: -- cgit v1.2.3 From ca6cef330290eb3ce20187d4b39f13fc07e6c708 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 19:50:04 -0700 Subject: vk/image: Drop some tmp vars in anv_image_view_init() Variables 'tile_mode' and 'format' are unneeded. --- src/vulkan/image.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index dc05b694c8a..b55fb9fde5c 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -264,9 +264,9 @@ anv_image_view_init(struct anv_surface_view *view, struct anv_cmd_buffer *cmd_buffer) { struct anv_image *image = (struct anv_image *) pCreateInfo->image; - const struct anv_format *info = + + const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - uint32_t tile_mode, format; /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_2D); @@ -276,6 +276,7 @@ anv_image_view_init(struct anv_surface_view *view, anv_assert(pCreateInfo->subresourceRange.arraySize == 1); view->bo = image->bo; + switch (pCreateInfo->subresourceRange.aspect) { case VK_IMAGE_ASPECT_STENCIL: anv_finishme("stencil image views"); @@ -284,8 +285,6 @@ anv_image_view_init(struct anv_surface_view *view, case VK_IMAGE_ASPECT_DEPTH: case VK_IMAGE_ASPECT_COLOR: view->offset = image->offset; - tile_mode = image->tile_mode; - format = info->surface_format; break; default: unreachable(""); @@ -307,10 +306,10 @@ anv_image_view_init(struct anv_surface_view *view, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = anv_surf_type_from_image_view_type[pCreateInfo->viewType], .SurfaceArray = false, - .SurfaceFormat = format, + .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[image->v_align], .SurfaceHorizontalAlignment = anv_halign[image->h_align], - .TileMode = tile_mode, + .TileMode = image->tile_mode, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, -- cgit v1.2.3 From fdcd71f71d62a9d4bb08b54173121289b67b19ec Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 20:06:08 -0700 Subject: vk/image: Embed VkImageCreateInfo* into anv_image_create_info All function signatures that matched this pattern, old: f(const VkImageCreateInfo *, const struct anv_image_create_info *) were rewritten as new: f(const struct anv_image_create_info *) --- src/vulkan/image.c | 20 +++++++++++--------- src/vulkan/private.h | 7 ++++--- src/vulkan/x11.c | 43 ++++++++++++++++++++++--------------------- 3 files changed, 37 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index b55fb9fde5c..dcea1dc8863 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -91,13 +91,12 @@ static const struct anv_tile_info { }; static uint32_t -anv_image_choose_tile_mode(const VkImageCreateInfo *vk_info, - const struct anv_image_create_info *anv_info) +anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) { - if (anv_info) + if (anv_info->force_tile_mode) return anv_info->tile_mode; - switch (vk_info->tiling) { + switch (anv_info->vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: return LINEAR; case VK_IMAGE_TILING_OPTIMAL: @@ -110,11 +109,11 @@ anv_image_choose_tile_mode(const VkImageCreateInfo *vk_info, VkResult anv_image_create( VkDevice _device, - const VkImageCreateInfo* pCreateInfo, - const struct anv_image_create_info * extra, + const struct anv_image_create_info * create_info, VkImage* pImage) { struct anv_device *device = (struct anv_device *) _device; + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; const VkExtent3D *restrict extent = &pCreateInfo->extent; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); @@ -128,8 +127,7 @@ VkResult anv_image_create( anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth == 1); - const uint32_t tile_mode = - anv_image_choose_tile_mode(pCreateInfo, extra); + const uint32_t tile_mode = anv_image_choose_tile_mode(create_info); /* TODO(chadv): How should we validate inputs? */ const uint8_t surf_type = @@ -228,7 +226,11 @@ VkResult anv_CreateImage( const VkImageCreateInfo* pCreateInfo, VkImage* pImage) { - return anv_image_create(device, pCreateInfo, NULL, pImage); + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + }, + pImage); } VkResult anv_GetImageSubresourceInfo( diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 6e122899e2c..d55912d7543 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -833,12 +833,13 @@ struct anv_surface_view { }; struct anv_image_create_info { - uint32_t tile_mode; + const VkImageCreateInfo *vk_info; + bool force_tile_mode; + uint8_t tile_mode; }; VkResult anv_image_create(VkDevice _device, - const VkImageCreateInfo *pCreateInfo, - const struct anv_image_create_info *extra, + const struct anv_image_create_info *info, VkImage *pImage); void anv_image_view_init(struct anv_surface_view *view, diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index 898aba056c7..6aeb34b9b56 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -107,27 +107,28 @@ VkResult anv_CreateSwapChainWSI( struct anv_device_memory *memory; anv_image_create((VkDevice) device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }, - &(struct anv_image_create_info) { - .tile_mode = XMAJOR - }, - (VkImage *) &image); + &(struct anv_image_create_info) { + .force_tile_mode = true, + .tile_mode = XMAJOR, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + (VkImage *) &image); anv_AllocMemory((VkDevice) device, &(VkMemoryAllocInfo) { -- cgit v1.2.3 From 127cb3f6c5f29c4cb23ba23a294fc5b65702ffdf Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 20:12:42 -0700 Subject: vk/image: Reformat function signatures Reformat them to match Mesa code-style. --- src/vulkan/image.c | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index dcea1dc8863..584e4bc62f3 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -107,10 +107,10 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) } } -VkResult anv_image_create( - VkDevice _device, - const struct anv_image_create_info * create_info, - VkImage* pImage) +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + VkImage *pImage) { struct anv_device *device = (struct anv_device *) _device; const VkImageCreateInfo *pCreateInfo = create_info->vk_info; @@ -221,10 +221,10 @@ VkResult anv_image_create( return VK_SUCCESS; } -VkResult anv_CreateImage( - VkDevice device, - const VkImageCreateInfo* pCreateInfo, - VkImage* pImage) +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + VkImage *pImage) { return anv_image_create(device, &(struct anv_image_create_info) { @@ -233,13 +233,13 @@ VkResult anv_CreateImage( pImage); } -VkResult anv_GetImageSubresourceInfo( - VkDevice device, - VkImage image, - const VkImageSubresource* pSubresource, - VkSubresourceInfoType infoType, - size_t* pDataSize, - void* pData) +VkResult +anv_GetImageSubresourceInfo(VkDevice device, + VkImage image, + const VkImageSubresource *pSubresource, + VkSubresourceInfoType infoType, + size_t *pDataSize, + void *pData) { stub_return(VK_UNSUPPORTED); } @@ -352,10 +352,10 @@ anv_image_view_init(struct anv_surface_view *view, GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } -VkResult anv_CreateImageView( - VkDevice _device, - const VkImageViewCreateInfo* pCreateInfo, - VkImageView* pView) +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) { struct anv_device *device = (struct anv_device *) _device; struct anv_surface_view *view; @@ -444,10 +444,10 @@ anv_color_attachment_view_init(struct anv_surface_view *view, GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } -VkResult anv_CreateColorAttachmentView( - VkDevice _device, - const VkColorAttachmentViewCreateInfo* pCreateInfo, - VkColorAttachmentView* pView) +VkResult +anv_CreateColorAttachmentView(VkDevice _device, + const VkColorAttachmentViewCreateInfo *pCreateInfo, + VkColorAttachmentView *pView) { struct anv_device *device = (struct anv_device *) _device; struct anv_surface_view *view; @@ -468,10 +468,10 @@ VkResult anv_CreateColorAttachmentView( return VK_SUCCESS; } -VkResult anv_CreateDepthStencilView( - VkDevice _device, - const VkDepthStencilViewCreateInfo* pCreateInfo, - VkDepthStencilView* pView) +VkResult +anv_CreateDepthStencilView(VkDevice _device, + const VkDepthStencilViewCreateInfo *pCreateInfo, + VkDepthStencilView *pView) { struct anv_device *device = (struct anv_device *) _device; struct anv_depth_stencil_view *view; -- cgit v1.2.3 From c6e76aed9dc28aae05cb11857201b5dab0c0cae8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 18:48:34 -0700 Subject: vk/image: Define anv_surface, refactor anv_image This prepares for upcoming miptree support. anv_surface is a proxy for color surfaces, depth surfaces, and stencil surfaces. Embed two instances of anv_surface into anv_image: the primary surface (color or depth), and an optional stencil surface. --- src/vulkan/image.c | 152 +++++++++++++++++++++++++++------------------------ src/vulkan/intel.c | 20 ++++--- src/vulkan/private.h | 52 +++++++++++------- src/vulkan/x11.c | 7 ++- 4 files changed, 131 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 584e4bc62f3..4a42aecba41 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -96,6 +96,9 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) if (anv_info->force_tile_mode) return anv_info->tile_mode; + if (anv_info->vk_info->format == VK_FORMAT_S8_UINT) + return WMAJOR; + switch (anv_info->vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: return LINEAR; @@ -107,6 +110,43 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) } } +static void +anv_image_make_surface(const struct anv_image_create_info *create_info, + uint64_t *inout_image_size, + uint32_t *inout_image_alignment, + struct anv_surface *out_surface) +{ + const VkExtent3D *restrict extent = &create_info->vk_info->extent; + + const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); + + const struct anv_tile_info *tile_info = + &anv_tile_info_table[tile_mode]; + + const struct anv_format *format_info = + anv_format_for_vk_format(create_info->vk_info->format); + + uint32_t stride = align_u32(extent->width * format_info->cpp, + tile_info->width); + uint32_t size = stride * align_u32(extent->height, tile_info->height); + uint32_t offset = align_u32(*inout_image_size, + tile_info->surface_alignment); + + *inout_image_size = offset + size; + *inout_image_alignment = MAX(*inout_image_alignment, + tile_info->surface_alignment); + + *out_surface = (struct anv_surface) { + .offset = offset, + .stride = stride, + .tile_mode = tile_mode, + + /* FINISHME: Stop hardcoding miptree subimage alignment */ + .h_align = 4, + .v_align = 4, + }; +} + VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *create_info, @@ -115,6 +155,7 @@ anv_image_create(VkDevice _device, struct anv_device *device = (struct anv_device *) _device; const VkImageCreateInfo *pCreateInfo = create_info->vk_info; const VkExtent3D *restrict extent = &pCreateInfo->extent; + struct anv_image *image; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); @@ -127,8 +168,6 @@ anv_image_create(VkDevice _device, anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth == 1); - const uint32_t tile_mode = anv_image_choose_tile_mode(create_info); - /* TODO(chadv): How should we validate inputs? */ const uint8_t surf_type = anv_surf_type_from_image_type[pCreateInfo->imageType]; @@ -136,9 +175,6 @@ anv_image_create(VkDevice _device, const struct anv_surf_type_limits *limits = &anv_surf_type_limits[surf_type]; - const struct anv_tile_info *tile_info = - &anv_tile_info_table[tile_mode]; - if (extent->width > limits->width || extent->height > limits->height || extent->depth > limits->depth) { @@ -150,19 +186,21 @@ anv_image_create(VkDevice _device, const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - uint32_t image_stride = 0; - uint32_t image_size = 0; - uint32_t stencil_offset = 0; - uint32_t stencil_stride = 0; + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - if (!format_info->has_stencil || format_info->depth_format) { - /* The format has a color or depth component. Calculate space for it. */ - uint32_t aligned_height; + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->format = pCreateInfo->format; + image->surf_type = surf_type; - image_stride = align_i32(extent->width * format_info->cpp, - tile_info->width); - aligned_height = align_i32(extent->height, tile_info->height); - image_size = image_stride * aligned_height; + if (likely(!format_info->has_stencil || format_info->depth_format)) { + /* The image's primary surface is a color or depth surface. */ + anv_image_make_surface(create_info, &image->size, &image->alignment, + &image->primary_surface); } if (format_info->has_stencil) { @@ -171,51 +209,16 @@ anv_image_create(VkDevice _device, * stencil reside in the same image. To satisfy Vulkan and the GPU, we * place the depth and stencil buffers in the same bo. */ - const struct anv_tile_info *w_info = &anv_tile_info_table[WMAJOR]; - uint32_t aligned_height; - uint32_t stencil_size; - - stencil_offset = align_u32(image_size, w_info->surface_alignment); - stencil_stride = align_i32(extent->width, w_info->width); - aligned_height = align_i32(extent->height, w_info->height); - stencil_size = stencil_stride * aligned_height; - image_size = stencil_offset + stencil_size; + VkImageCreateInfo stencil_info = *pCreateInfo; + stencil_info.format = VK_FORMAT_S8_UINT; + + anv_image_make_surface( + &(struct anv_image_create_info) { + .vk_info = &stencil_info, + }, + &image->size, &image->alignment, &image->stencil_surface); } - struct anv_image *image = anv_device_alloc(device, sizeof(*image), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!image) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* To eliminate the risk of using unitialized struct members above, fill the - * image struct here at the function bottom instead of piecemeal throughout - * the function body. - */ - *image = (struct anv_image) { - .type = pCreateInfo->imageType, - .extent = pCreateInfo->extent, - .format = pCreateInfo->format, - - .size = image_size, - .alignment = tile_info->surface_alignment, - .stride = image_stride, - - .bo = NULL, - .offset = 0, - - .stencil_offset = stencil_offset, - .stencil_stride = stencil_stride, - - .tile_mode = tile_mode, - .surf_type = surf_type, - - /* FINISHME: Stop hardcoding miptree image alignment */ - .h_align = 4, - .v_align = 4, - - .swap_chain = NULL, - }; - *pImage = (VkImage) image; return VK_SUCCESS; @@ -266,6 +269,7 @@ anv_image_view_init(struct anv_surface_view *view, struct anv_cmd_buffer *cmd_buffer) { struct anv_image *image = (struct anv_image *) pCreateInfo->image; + struct anv_surface *surface = NULL; const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); @@ -287,12 +291,15 @@ anv_image_view_init(struct anv_surface_view *view, case VK_IMAGE_ASPECT_DEPTH: case VK_IMAGE_ASPECT_COLOR: view->offset = image->offset; + surface = &image->primary_surface; break; default: unreachable(""); break; } + view->offset = image->offset + surface->offset; + /* TODO: Miplevels */ view->extent = image->extent; @@ -309,9 +316,9 @@ anv_image_view_init(struct anv_surface_view *view, .SurfaceType = anv_surf_type_from_image_view_type[pCreateInfo->viewType], .SurfaceArray = false, .SurfaceFormat = format_info->surface_format, - .SurfaceVerticalAlignment = anv_valign[image->v_align], - .SurfaceHorizontalAlignment = anv_halign[image->h_align], - .TileMode = image->tile_mode, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, @@ -322,7 +329,7 @@ anv_image_view_init(struct anv_surface_view *view, .Height = image->extent.height - 1, .Width = image->extent.width - 1, .Depth = image->extent.depth - 1, - .SurfacePitch = image->stride - 1, + .SurfacePitch = surface->stride - 1, .MinimumArrayElement = 0, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, @@ -383,6 +390,7 @@ anv_color_attachment_view_init(struct anv_surface_view *view, struct anv_cmd_buffer *cmd_buffer) { struct anv_image *image = (struct anv_image *) pCreateInfo->image; + struct anv_surface *surface = &image->primary_surface; const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -408,9 +416,9 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .SurfaceType = SURFTYPE_2D, .SurfaceArray = false, .SurfaceFormat = format->surface_format, - .SurfaceVerticalAlignment = anv_valign[image->v_align], - .SurfaceHorizontalAlignment = anv_halign[image->h_align], - .TileMode = image->tile_mode, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, @@ -421,7 +429,7 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .Height = image->extent.height - 1, .Width = image->extent.width - 1, .Depth = image->extent.depth - 1, - .SurfacePitch = image->stride - 1, + .SurfacePitch = surface->stride - 1, .MinimumArrayElement = 0, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, @@ -476,6 +484,8 @@ anv_CreateDepthStencilView(VkDevice _device, struct anv_device *device = (struct anv_device *) _device; struct anv_depth_stencil_view *view; struct anv_image *image = (struct anv_image *) pCreateInfo->image; + struct anv_surface *depth_surface = &image->primary_surface; + struct anv_surface *stencil_surface = &image->stencil_surface; const struct anv_format *format = anv_format_for_vk_format(image->format); @@ -494,13 +504,13 @@ anv_CreateDepthStencilView(VkDevice _device, view->bo = image->bo; - view->depth_stride = image->stride; - view->depth_offset = image->offset; + view->depth_stride = depth_surface->stride; + view->depth_offset = image->offset + depth_surface->offset; view->depth_format = format->depth_format; view->depth_qpitch = 0; /* FINISHME: QPitch */ - view->stencil_stride = image->stencil_stride; - view->stencil_offset = image->offset + image->stencil_offset; + view->stencil_stride = stencil_surface->stride; + view->stencil_offset = image->offset + stencil_surface->offset; view->stencil_qpitch = 0; /* FINISHME: QPitch */ *pView = (VkDepthStencilView) view; diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c index fbfed596397..06e79591ee4 100644 --- a/src/vulkan/intel.c +++ b/src/vulkan/intel.c @@ -65,13 +65,19 @@ VkResult anv_CreateDmaBufImageINTEL( goto fail_mem; } - image->bo = &mem->bo; - image->offset = 0; - image->type = VK_IMAGE_TYPE_2D; - image->extent = pCreateInfo->extent; - image->tile_mode = XMAJOR; - image->stride = pCreateInfo->strideInBytes; - image->size = mem->bo.size; + *image = (struct anv_image) { + .bo = &mem->bo, + .offset = 0, + .type = VK_IMAGE_TYPE_2D, + .extent = pCreateInfo->extent, + .size = mem->bo.size, + + .primary_surface = { + .offset = 0, + .stride = pCreateInfo->strideInBytes, + .tile_mode = XMAJOR, + }, + }; assert(image->extent.width > 0); assert(image->extent.height > 0); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index d55912d7543..9a1d0a2c53a 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -784,17 +784,38 @@ struct anv_format { const struct anv_format * anv_format_for_vk_format(VkFormat format); +/** + * A proxy for the color surfaces, depth surfaces, and stencil surfaces. + */ +struct anv_surface { + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; + + uint32_t stride; /**< RENDER_SURFACE_STATE.SurfacePitch */ + + /** + * \name Alignment of miptree images, in units of pixels. + * + * These fields contain the real alignment values, not the values to be + * given to the GPU. For example, if h_align is 4, then program the GPU + * with HALIGN_4. + * \{ + */ + uint8_t h_align; /**< RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */ + uint8_t v_align; /**< RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ + /** \} */ + + uint8_t tile_mode; /**< RENDER_SURFACE_STATE.TileMode */ +}; + struct anv_image { VkImageType type; VkExtent3D extent; VkFormat format; - uint32_t tile_mode; VkDeviceSize size; uint32_t alignment; - uint32_t stride; - - uint32_t stencil_offset; - uint32_t stencil_stride; /* Set when bound */ struct anv_bo *bo; @@ -802,23 +823,14 @@ struct anv_image { struct anv_swap_chain *swap_chain; - /** - * \name Alignment of miptree images, in units of pixels. - * - * These fields contain the actual alignment values, not the values the - * hardware expects. For example, if h_align is 4, then program the hardware - * with HALIGN_4. - * - * \see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment - * \see RENDER_SURFACE_STATE.SurfaceVerticalAlignment - * \{ - */ - uint8_t h_align; - uint8_t v_align; - /** \} */ - /** RENDER_SURFACE_STATE.SurfaceType */ uint8_t surf_type; + + /** Primary surface is either color or depth. */ + struct anv_surface primary_surface; + + /** Stencil surface is optional. */ + struct anv_surface stencil_surface; }; struct anv_surface_view { diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index 6aeb34b9b56..d49b93331a8 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -104,6 +104,7 @@ VkResult anv_CreateSwapChainWSI( for (uint32_t i = 0; i < chain->count; i++) { struct anv_image *image; + struct anv_surface *surface; struct anv_device_memory *memory; anv_image_create((VkDevice) device, @@ -130,6 +131,8 @@ VkResult anv_CreateSwapChainWSI( }}, (VkImage *) &image); + surface = &image->primary_surface; + anv_AllocMemory((VkDevice) device, &(VkMemoryAllocInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, @@ -143,7 +146,7 @@ VkResult anv_CreateSwapChainWSI( (VkDeviceMemory) memory, 0); ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - image->stride, I915_TILING_X); + surface->stride, I915_TILING_X); if (ret) { result = vk_error(VK_ERROR_UNKNOWN); goto fail; @@ -166,7 +169,7 @@ VkResult anv_CreateSwapChainWSI( image->size, pCreateInfo->imageExtent.width, pCreateInfo->imageExtent.height, - image->stride, + surface->stride, depth, bpp, fd); chain->images[i].image = image; -- cgit v1.2.3 From 5b3a1ceb83a2a47a4fcde38688d26e008ddda0eb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 21:27:54 -0700 Subject: vk/image: Enable 2d single-sample color miptrees What's been tested, for both image views and color attachment views: - VK_FORMAT_R8G8B8A8_UNORM - VK_IMAGE_VIEW_TYPE_2D - mipLevels: 1, 2 - baseMipLevel: 0, 1 - arraySize: 1, 2 - baseArraySlice: 0, 1 What's known to be broken: - Depth and stencil miptrees. To fix this, anv_depth_stencil_view needs major rework. - VkImageViewType != 2D - MSAA Fixes Crucible tests: func.miptree.view-2d.levels02.array01.* func.miptree.view-2d.levels01.array02.* func.miptree.view-2d.levels02.array02.* --- src/vulkan/image.c | 200 +++++++++++++++++++++++++++++++++++++-------------- src/vulkan/private.h | 4 ++ 2 files changed, 152 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 4a42aecba41..9930d90aa41 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -110,13 +110,19 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) } } -static void +static VkResult anv_image_make_surface(const struct anv_image_create_info *create_info, uint64_t *inout_image_size, uint32_t *inout_image_alignment, struct anv_surface *out_surface) { + /* See RENDER_SURFACE_STATE.SurfaceQPitch */ + static const uint16_t min_qpitch UNUSED = 0x4; + static const uint16_t max_qpitch UNUSED = 0x1ffc; + const VkExtent3D *restrict extent = &create_info->vk_info->extent; + const uint32_t levels = create_info->vk_info->mipLevels; + const uint32_t array_size = create_info->vk_info->arraySize; const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); @@ -126,11 +132,47 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const struct anv_format *format_info = anv_format_for_vk_format(create_info->vk_info->format); - uint32_t stride = align_u32(extent->width * format_info->cpp, - tile_info->width); - uint32_t size = stride * align_u32(extent->height, tile_info->height); - uint32_t offset = align_u32(*inout_image_size, - tile_info->surface_alignment); + const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t w0 = align_u32(extent->width, i); + const uint32_t h0 = align_u32(extent->height, j); + + uint16_t qpitch; + uint32_t mt_width; + uint32_t mt_height; + + if (levels == 1 && array_size == 1) { + qpitch = min_qpitch; + mt_width = w0; + mt_height = h0; + } else { + uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); + uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); + uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); + + qpitch = h0 + h1 + 11 * j; + mt_width = MAX(w0, w1 + w2); + mt_height = array_size * qpitch; + } + + assert(qpitch >= min_qpitch); + if (qpitch > max_qpitch) { + anv_loge("image qpitch > 0x%x\n", max_qpitch); + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + /* From the Broadwell PRM, RENDER_SURFACE_STATE.SurfaceQpitch: + * + * This field must be set an integer multiple of the Surface Vertical + * Alignment. + */ + assert(anv_is_aligned(qpitch, j)); + + const uint32_t stride = align_u32(mt_width * format_info->cpp, + tile_info->width); + const uint32_t size = stride * align_u32(mt_height, tile_info->height); + const uint32_t offset = align_u32(*inout_image_size, + tile_info->surface_alignment); *inout_image_size = offset + size; *inout_image_alignment = MAX(*inout_image_alignment, @@ -140,11 +182,12 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, .offset = offset, .stride = stride, .tile_mode = tile_mode, - - /* FINISHME: Stop hardcoding miptree subimage alignment */ - .h_align = 4, - .v_align = 4, + .qpitch = qpitch, + .h_align = i, + .v_align = j, }; + + return VK_SUCCESS; } VkResult @@ -155,18 +198,19 @@ anv_image_create(VkDevice _device, struct anv_device *device = (struct anv_device *) _device; const VkImageCreateInfo *pCreateInfo = create_info->vk_info; const VkExtent3D *restrict extent = &pCreateInfo->extent; - struct anv_image *image; + struct anv_image *image = NULL; + VkResult r; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); - anv_assert(pCreateInfo->mipLevels == 1); - anv_assert(pCreateInfo->arraySize == 1); + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->samples == 1); anv_assert(pCreateInfo->extent.width > 0); anv_assert(pCreateInfo->extent.height > 0); - anv_assert(pCreateInfo->extent.depth == 1); + anv_assert(pCreateInfo->extent.depth > 0); /* TODO(chadv): How should we validate inputs? */ const uint8_t surf_type = @@ -195,12 +239,16 @@ anv_image_create(VkDevice _device, image->type = pCreateInfo->imageType; image->extent = pCreateInfo->extent; image->format = pCreateInfo->format; + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arraySize; image->surf_type = surf_type; if (likely(!format_info->has_stencil || format_info->depth_format)) { /* The image's primary surface is a color or depth surface. */ - anv_image_make_surface(create_info, &image->size, &image->alignment, - &image->primary_surface); + r = anv_image_make_surface(create_info, &image->size, &image->alignment, + &image->primary_surface); + if (r != VK_SUCCESS) + goto fail; } if (format_info->has_stencil) { @@ -212,16 +260,25 @@ anv_image_create(VkDevice _device, VkImageCreateInfo stencil_info = *pCreateInfo; stencil_info.format = VK_FORMAT_S8_UINT; - anv_image_make_surface( - &(struct anv_image_create_info) { - .vk_info = &stencil_info, - }, - &image->size, &image->alignment, &image->stencil_surface); + r = anv_image_make_surface( + &(struct anv_image_create_info) { + .vk_info = &stencil_info, + }, + &image->size, &image->alignment, &image->stencil_surface); + + if (r != VK_SUCCESS) + goto fail; } *pImage = (VkImage) image; return VK_SUCCESS; + +fail: + if (image) + anv_device_free(device, image); + + return r; } VkResult @@ -268,20 +325,20 @@ anv_image_view_init(struct anv_surface_view *view, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; struct anv_image *image = (struct anv_image *) pCreateInfo->image; - struct anv_surface *surface = NULL; + struct anv_surface *surface; const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_2D); - anv_assert(pCreateInfo->subresourceRange.baseMipLevel == 0); - anv_assert(pCreateInfo->subresourceRange.mipLevels == 1); - anv_assert(pCreateInfo->subresourceRange.baseArraySlice == 0); - anv_assert(pCreateInfo->subresourceRange.arraySize == 1); + anv_assert(range->mipLevels > 0); + anv_assert(range->arraySize > 0); + anv_assert(range->baseMipLevel + range->mipLevels <= image->levels); + anv_assert(range->baseArraySlice + range->arraySize <= image->array_size); - view->bo = image->bo; + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); switch (pCreateInfo->subresourceRange.aspect) { case VK_IMAGE_ASPECT_STENCIL: @@ -298,10 +355,22 @@ anv_image_view_init(struct anv_surface_view *view, break; } + view->bo = image->bo; view->offset = image->offset + surface->offset; + view->format = pCreateInfo->format; - /* TODO: Miplevels */ - view->extent = image->extent; + view->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + + uint32_t depth = 1; + if (range->arraySize > 1) { + depth = range->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } static const uint32_t vk_to_gen_swizzle[] = { [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, @@ -314,7 +383,7 @@ anv_image_view_init(struct anv_surface_view *view, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = anv_surf_type_from_image_view_type[pCreateInfo->viewType], - .SurfaceArray = false, + .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], @@ -324,18 +393,24 @@ anv_image_view_init(struct anv_surface_view *view, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0.0, - .SurfaceQPitch = 0, + .BaseMipLevel = (float) pCreateInfo->minLod, + .SurfaceQPitch = surface->qpitch >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, - .Depth = image->extent.depth - 1, + .Depth = depth - 1, .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = 0, + .MinimumArrayElement = range->baseArraySlice, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, .YOffset = 0, - .SurfaceMinLOD = 0, - .MIPCountLOD = 0, + + /* For sampler surfaces, the hardware interprets field MIPCount/LOD as + * MIPCount. The range of levels accessible by the sampler engine is + * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + .MIPCountLOD = range->mipLevels - 1, + .SurfaceMinLOD = range->baseMipLevel, + .AuxiliarySurfaceMode = AUX_NONE, .RedClearColor = 0, .GreenClearColor = 0, @@ -391,20 +466,33 @@ anv_color_attachment_view_init(struct anv_surface_view *view, { struct anv_image *image = (struct anv_image *) pCreateInfo->image; struct anv_surface *surface = &image->primary_surface; - const struct anv_format *format = + const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->mipLevel == 0); - anv_assert(pCreateInfo->baseArraySlice == 0); - anv_assert(pCreateInfo->arraySize == 1); - anv_assert(pCreateInfo->msaaResolveImage == 0); + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->mipLevel < image->levels); + anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + + if (pCreateInfo->msaaResolveImage) + anv_finishme("msaaResolveImage"); view->bo = image->bo; - view->offset = image->offset; - view->extent = image->extent; + view->offset = image->offset + surface->offset; view->format = pCreateInfo->format; + view->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), + .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), + .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), + }; + + uint32_t depth = 1; + if (pCreateInfo->arraySize > 1) { + depth = pCreateInfo->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + if (cmd_buffer) view->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); @@ -414,8 +502,8 @@ anv_color_attachment_view_init(struct anv_surface_view *view, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, - .SurfaceArray = false, - .SurfaceFormat = format->surface_format, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], .TileMode = surface->tile_mode, @@ -425,17 +513,25 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, .BaseMipLevel = 0.0, - .SurfaceQPitch = 0, + .SurfaceQPitch = surface->qpitch >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, - .Depth = image->extent.depth - 1, + .Depth = depth - 1, .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = 0, + .MinimumArrayElement = pCreateInfo->baseArraySlice, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, .YOffset = 0, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ .SurfaceMinLOD = 0, - .MIPCountLOD = 0, + .MIPCountLOD = pCreateInfo->mipLevel, + .AuxiliarySurfaceMode = AUX_NONE, .RedClearColor = 0, .GreenClearColor = 0, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 9a1d0a2c53a..efb97391c56 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -794,6 +794,7 @@ struct anv_surface { uint32_t offset; uint32_t stride; /**< RENDER_SURFACE_STATE.SurfacePitch */ + uint16_t qpitch; /**< RENDER_SURFACE_STATE.QPitch */ /** * \name Alignment of miptree images, in units of pixels. @@ -814,6 +815,9 @@ struct anv_image { VkImageType type; VkExtent3D extent; VkFormat format; + uint32_t levels; + uint32_t array_size; + VkDeviceSize size; uint32_t alignment; -- cgit v1.2.3 From 709fa463ec5d0591622c6ba1653631195a93f6b8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 26 Jun 2015 22:15:03 -0700 Subject: vk/depth: Add a FIXME 3DSTATE_DEPTH_BUFFER.Width,Height are wrong. --- src/vulkan/device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b6d0984d93d..74771e30031 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3763,6 +3763,7 @@ anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->framebuffer->depth_stencil; /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, -- cgit v1.2.3 From 6720b47717de31186bc4fc6f2f5a25d6353464d7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 30 Jun 2015 09:20:07 -0700 Subject: vk/formats: Document new meaning of anv_format::cpp The way the code currently works is that anv_format::cpp is the cpp of anv_format::surface_format. Me and Kristian disagree about how the code *should* work. Despite that, I think it's in our discussion's best interest to document how the code *currently* works. That should eliminate confusion. If and when the code begins to work differently, then we'll update the anv_format comments. --- src/vulkan/private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index efb97391c56..0f5986b8b8a 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -775,7 +775,7 @@ void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { const char *name; uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ - uint8_t cpp; + uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ uint8_t num_channels; uint8_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; -- cgit v1.2.3 From bba767a9af24d75212c16989400f05a77d2ba386 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 30 Jun 2015 09:41:06 -0700 Subject: vk/formats: Fix entry for S8_UINT I forgot to update this when fixing the depth formats. --- src/vulkan/formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index eb96e94bb61..75432c9e915 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -122,7 +122,7 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, UNSUPPORTED, .cpp = 1, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_S8_UINT, R8_UINT, .cpp = 1, .num_channels = 1, .has_stencil = true), fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), -- cgit v1.2.3 From 682eb9489d3d3705a56abd82a3bea9c26047bee6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 1 Jul 2015 14:17:52 -0700 Subject: vk/x11: Allow for the client querying the size of the format properties --- src/vulkan/x11.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index d49b93331a8..a98d5e2607b 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -48,10 +48,17 @@ VkResult anv_GetDisplayInfoWSI( switch (infoType) { case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: size = sizeof(properties[0]) * ARRAY_SIZE(formats); - if (pData && *pDataSize < size) + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) return vk_error(VK_ERROR_INVALID_VALUE); *pDataSize = size; + for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) properties[i].swapChainFormat = formats[i]; -- cgit v1.2.3 From 7a749aa4baec6e80d0e2780452afdf7a9de7f27f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 1 Jul 2015 14:17:24 -0700 Subject: nir/spirv: Add basic support for Op[Group]MemberDecorate --- src/glsl/nir/spirv_to_nir.c | 45 ++++++++++++++++++++++++------------- src/glsl/nir/spirv_to_nir_private.h | 2 ++ 2 files changed, 32 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a47c683ffdf..8eeae84cc54 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -180,15 +180,21 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, static void _foreach_decoration_helper(struct vtn_builder *b, struct vtn_value *base_value, + int member, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data) { for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->member >= 0) { + assert(member == -1); + member = dec->member; + } + if (dec->group) { assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, dec->group, cb, data); + _foreach_decoration_helper(b, base_value, member, dec->group, cb, data); } else { - cb(b, base_value, dec, data); + cb(b, base_value, member, dec, data); } } } @@ -203,24 +209,33 @@ void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data) { - _foreach_decoration_helper(b, value, value, cb, data); + _foreach_decoration_helper(b, value, -1, value, cb, data); } static void vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + int member = -1; switch (opcode) { case SpvOpDecorationGroup: - vtn_push_value(b, w[1], vtn_value_type_undef); + vtn_push_value(b, target, vtn_value_type_undef); break; + case SpvOpMemberDecorate: + member = *(w++); + /* fallthrough */ case SpvOpDecorate: { - struct vtn_value *val = &b->values[w[1]]; + struct vtn_value *val = &b->values[target]; struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->decoration = w[2]; - dec->literals = &w[3]; + dec->member = member; + dec->decoration = *(w++); + dec->literals = w; /* Link into the list */ dec->next = val->decoration; @@ -228,13 +243,17 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpGroupMemberDecorate: + member = *(w++); + /* fallthrough */ case SpvOpGroupDecorate: { - struct vtn_value *group = &b->values[w[1]]; + struct vtn_value *group = &b->values[target]; assert(group->value_type == vtn_value_type_decoration_group); - for (unsigned i = 2; i < count; i++) { - struct vtn_value *val = &b->values[w[i]]; + for (; w < w_end; w++) { + struct vtn_value *val = &b->values[*w]; struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; dec->group = group; /* Link into the list */ @@ -244,10 +263,6 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, break; } - case SpvOpGroupMemberDecorate: - assert(!"Bad instruction. Khronos Bug #13513"); - break; - default: unreachable("Unhandled opcode"); } @@ -432,7 +447,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, const struct vtn_decoration *dec, void *void_var) { assert(val->value_type == vtn_value_type_deref); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index b157e023a68..950979f21f1 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -100,6 +100,7 @@ struct vtn_value { struct vtn_decoration { struct vtn_decoration *next; + int member; /* -1 if not a member decoration */ const uint32_t *literals; struct vtn_value *group; SpvDecoration decoration; @@ -168,6 +169,7 @@ struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, + int member, const struct vtn_decoration *, void *); -- cgit v1.2.3 From e3d60d479bd85433564d1dd594a56465e7c6458e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 1 Jul 2015 15:34:08 -0700 Subject: nir/spirv: Make vtn_handle_type match the other handler functions Previously, the caller of vtn_handle_type had to handle actually inserting the type. However, this didn't really work if the type was decorated in any way. --- src/glsl/nir/spirv_to_nir.c | 84 ++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 8eeae84cc54..a892c178bdc 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -268,48 +268,57 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } } -static const struct glsl_type * +static void vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *args, unsigned count) + const uint32_t *w, unsigned count) { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + switch (opcode) { case SpvOpTypeVoid: - return glsl_void_type(); + val->type = glsl_void_type(); + return; case SpvOpTypeBool: - return glsl_bool_type(); + val->type = glsl_bool_type(); + return; case SpvOpTypeInt: - return glsl_int_type(); + val->type = glsl_int_type(); + return; case SpvOpTypeFloat: - return glsl_float_type(); + val->type = glsl_float_type(); + return; case SpvOpTypeVector: { const struct glsl_type *base = - vtn_value(b, args[0], vtn_value_type_type)->type; - unsigned elems = args[1]; + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned elems = w[3]; assert(glsl_type_is_scalar(base)); - return glsl_vector_type(glsl_get_base_type(base), elems); + val->type = glsl_vector_type(glsl_get_base_type(base), elems); + return; } case SpvOpTypeMatrix: { const struct glsl_type *base = - vtn_value(b, args[0], vtn_value_type_type)->type; - unsigned columns = args[1]; + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; assert(glsl_type_is_vector(base)); - return glsl_matrix_type(glsl_get_base_type(base), - glsl_get_vector_elements(base), - columns); + val->type = glsl_matrix_type(glsl_get_base_type(base), + glsl_get_vector_elements(base), + columns); + return; } case SpvOpTypeArray: - return glsl_array_type(b->values[args[0]].type, args[1]); + val->type = glsl_array_type(b->values[w[2]].type, w[3]); + return; case SpvOpTypeStruct: { NIR_VLA(struct glsl_struct_field, fields, count); - for (unsigned i = 0; i < count; i++) { + for (unsigned i = 0; i < count - 2; i++) { /* TODO: Handle decorators */ - fields[i].type = vtn_value(b, args[i], vtn_value_type_type)->type; + fields[i].type = vtn_value(b, w[i + 2], vtn_value_type_type)->type; fields[i].name = ralloc_asprintf(b, "field%d", i); fields[i].location = -1; fields[i].interpolation = 0; @@ -318,20 +327,22 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, fields[i].matrix_layout = 2; fields[i].stream = -1; } - return glsl_struct_type(fields, count, "struct"); + val->type = glsl_struct_type(fields, count, "struct"); + return; } case SpvOpTypeFunction: { - const struct glsl_type *return_type = b->values[args[0]].type; - NIR_VLA(struct glsl_function_param, params, count - 1); - for (unsigned i = 1; i < count; i++) { - params[i - 1].type = vtn_value(b, args[i], vtn_value_type_type)->type; + const struct glsl_type *return_type = b->values[w[2]].type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type; /* FIXME: */ - params[i - 1].in = true; - params[i - 1].out = true; + params[i].in = true; + params[i].out = true; } - return glsl_function_type(return_type, params, count - 1); + val->type = glsl_function_type(return_type, params, count - 3); + return; } case SpvOpTypePointer: @@ -339,16 +350,17 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, * the same type. The validator should ensure that the proper number * of dereferences happen */ - return vtn_value(b, args[1], vtn_value_type_type)->type; + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + return; case SpvOpTypeSampler: { const struct glsl_type *sampled_type = - vtn_value(b, args[0], vtn_value_type_type)->type; + vtn_value(b, w[2], vtn_value_type_type)->type; assert(glsl_type_is_vector_or_scalar(sampled_type)); enum glsl_sampler_dim dim; - switch ((SpvDim)args[1]) { + switch ((SpvDim)w[3]) { case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; @@ -360,15 +372,16 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, } /* TODO: Handle the various texture image/filter options */ - (void)args[2]; + (void)w[4]; - bool is_array = args[3]; - bool is_shadow = args[4]; + bool is_array = w[5]; + bool is_shadow = w[6]; - assert(args[5] == 0 && "FIXME: Handl multi-sampled textures"); + assert(w[7] == 0 && "FIXME: Handl multi-sampled textures"); - return glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); + val->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + return; } case SpvOpTypeRuntimeArray: @@ -1974,8 +1987,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeReserveId: case SpvOpTypeQueue: case SpvOpTypePipe: - vtn_push_value(b, w[1], vtn_value_type_type)->type = - vtn_handle_type(b, opcode, &w[2], count - 2); + vtn_handle_type(b, opcode, w, count); break; case SpvOpConstantTrue: -- cgit v1.2.3 From fa663c27f5df3b96a8448a6c7512f7b000b248e0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 1 Jul 2015 15:38:26 -0700 Subject: nir/spirv: Add initial structure member decoration support --- src/glsl/nir/spirv_to_nir.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a892c178bdc..e02a7af7acd 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -268,6 +268,44 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } } +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_fields) +{ + struct glsl_struct_field *fields = void_fields; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationPrecisionLow: + case SpvDecorationPrecisionMedium: + case SpvDecorationPrecisionHigh: + break; /* FIXME: Do nothing with these for now. */ + case SpvDecorationSmooth: + fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoperspective: + fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + fields[member].centroid = true; + break; + case SpvDecorationSample: + fields[member].sample = true; + break; + case SpvDecorationLocation: + fields[member].location = dec->literals[0]; + break; + default: + unreachable("Unhandled member decoration"); + } +} + static void vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -327,7 +365,12 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, fields[i].matrix_layout = 2; fields[i].stream = -1; } - val->type = glsl_struct_type(fields, count, "struct"); + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, fields); + + const char *name = val->name ? val->name : "struct"; + + val->type = glsl_struct_type(fields, count, name); return; } -- cgit v1.2.3 From beb0e253275d320e11657f725c1bf6c1d4ffc5dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 1 Jul 2015 16:44:10 -0700 Subject: vk: Roll back to API v90 This is what version 0.1 of the Vulkan SDK is built against. --- include/vulkan/vulkan.h | 6 +++--- src/vulkan/meta.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index e5be978c6d3..3e729963e7c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -33,7 +33,7 @@ #include "vk_platform.h" // Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 91, 0) +#define VK_API_VERSION VK_MAKE_VERSION(0, 90, 0) #ifdef __cplusplus extern "C" @@ -2220,7 +2220,7 @@ typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkIm typedef void (VKAPI *PFN_vkCmdCloneImageData)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout); typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); -typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColor* color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdClearDepthStencil)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); @@ -2853,7 +2853,7 @@ void VKAPI vkCmdClearColorImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, - const VkClearColor* color, + VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 2167d29da6a..afda3bcd4a6 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -1191,7 +1191,7 @@ void anv_CmdClearColorImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, - const VkClearColor* color, + VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { -- cgit v1.2.3 From 8a6c8177e0a8230b9ee01f63384eaeceee2cdc35 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 2 Jul 2015 17:10:37 -0700 Subject: vk/meta: Factor the guts out of cmd_buffer_clear --- src/vulkan/meta.c | 85 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index afda3bcd4a6..7bc24b693de 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -189,24 +189,21 @@ struct vue_header { float PointWidth; }; -void -anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass) +struct clear_instance_data { + struct vue_header vue_header; + float color[4]; +}; + +static void +meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, + int num_instances, + struct clear_instance_data *instance_data) { struct anv_device *device = cmd_buffer->device; struct anv_framebuffer *fb = cmd_buffer->framebuffer; - struct anv_saved_state saved_state; struct anv_state state; uint32_t size; - struct instance_data { - struct vue_header vue_header; - float color[4]; - } *instance_data; - - if (pass->num_clear_layers == 0) - return; - const float vertex_data[] = { /* Rect-list coordinates */ 0.0, 0.0, @@ -217,29 +214,13 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, 0.0, 0.0, }; - size = sizeof(vertex_data) + pass->num_clear_layers * sizeof(instance_data[0]); + size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 16); + /* Copy in the vertex and instance data */ memcpy(state.map, vertex_data, sizeof(vertex_data)); - instance_data = state.map + sizeof(vertex_data); - - for (uint32_t i = 0; i < pass->num_layers; i++) { - if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - *instance_data++ = (struct instance_data) { - .vue_header = { - .RTAIndex = i, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = { - pass->layers[i].clear_color.color.floatColor[0], - pass->layers[i].clear_color.color.floatColor[1], - pass->layers[i].clear_color.color.floatColor[2], - pass->layers[i].clear_color.color.floatColor[3], - } - }; - } - } + memcpy(state.map + sizeof(vertex_data), instance_data, + num_instances * sizeof(*instance_data)); struct anv_buffer vertex_buffer = { .device = cmd_buffer->device, @@ -248,8 +229,6 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, .offset = state.offset }; - anv_cmd_buffer_save(cmd_buffer, &saved_state); - anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, (VkBuffer[]) { (VkBuffer) &vertex_buffer, @@ -286,11 +265,45 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, VK_STATE_BIND_POINT_COLOR_BLEND, device->meta_state.shared.cb_state); - anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, pass->num_clear_layers); + anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, num_instances); +} + +void +anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass) +{ + struct anv_saved_state saved_state; + + int num_clear_layers = 0; + struct clear_instance_data instance_data[MAX_RTS]; + + for (uint32_t i = 0; i < pass->num_layers; i++) { + if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + instance_data[num_clear_layers++] = (struct clear_instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = { + pass->layers[i].clear_color.color.floatColor[0], + pass->layers[i].clear_color.color.floatColor[1], + pass->layers[i].clear_color.color.floatColor[2], + pass->layers[i].clear_color.color.floatColor[3], + } + }; + } + } + + if (num_clear_layers == 0) + return; + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + meta_emit_clear(cmd_buffer, num_clear_layers, instance_data); /* Restore API state */ anv_cmd_buffer_restore(cmd_buffer, &saved_state); - } static void -- cgit v1.2.3 From 1f1465f077144692277cf1d14607aade0fe8e289 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 2 Jul 2015 18:15:06 -0700 Subject: vk/meta: Add an initial implementation of ClearColorImage --- src/vulkan/meta.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 7bc24b693de..058c5b70a25 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -1202,13 +1202,97 @@ void anv_CmdFillBuffer( void anv_CmdClearColorImage( VkCmdBuffer cmdBuffer, - VkImage image, + VkImage _image, VkImageLayout imageLayout, VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + struct anv_image *image = (struct anv_image *)_image; + struct anv_saved_state saved_state; + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + for (uint32_t r = 0; r < rangeCount; r++) { + for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { + for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { + struct anv_surface_view view; + anv_color_attachment_view_init(&view, cmd_buffer->device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = _image, + .format = image->format, + .mipLevel = pRanges[r].baseMipLevel + l, + .baseArraySlice = pRanges[r].baseArraySlice + s, + .arraySize = 1, + }, + cmd_buffer); + + VkFramebuffer fb; + anv_CreateFramebuffer((VkDevice) cmd_buffer->device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .colorAttachmentCount = 1, + .pColorAttachments = (VkColorAttachmentBindInfo[]) { + { + .view = (VkColorAttachmentView) &view, + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .pDepthStencilAttachment = NULL, + .sampleCount = 1, + .width = view.extent.width, + .height = view.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass((VkDevice) cmd_buffer->device, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .renderArea = { { 0, 0 }, { view.extent.width, view.extent.height } }, + .colorAttachmentCount = 1, + .extent = { 0, }, + .sampleCount = 1, + .layers = 1, + .pColorFormats = (VkFormat[]) { image->format }, + .pColorLayouts = (VkImageLayout[]) { imageLayout }, + .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_DONT_CARE }, + .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, + .pColorLoadClearValues = &color, + .depthStencilFormat = VK_FORMAT_UNDEFINED, + }, &pass); + + anv_CmdBeginRenderPass((VkCmdBuffer) cmd_buffer, + &(VkRenderPassBegin) { + .renderPass = pass, + .framebuffer = (VkFramebuffer) fb, + }); + + struct clear_instance_data instance_data = { + .vue_header = { + .RTAIndex = 0, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = { + color.color.floatColor[0], + color.color.floatColor[1], + color.color.floatColor[2], + color.color.floatColor[3], + } + }; + + meta_emit_clear(cmd_buffer, 1, &instance_data); + + anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); + } + } + } + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); } void anv_CmdClearDepthStencil( -- cgit v1.2.3 From 4a42f45514a9adb24ef6587235a27abb48d359f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 14:38:45 -0700 Subject: vk: Remove atomic counters stubs --- include/vulkan/vulkan.h | 27 --------------------------- src/vulkan/device.c | 33 --------------------------------- 2 files changed, 60 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 4ecdb4bc2d5..ea8357a25a9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1348,7 +1348,6 @@ typedef struct VkPhysicalDeviceQueueProperties_ { VkQueueFlags queueFlags; // Queue flags uint32_t queueCount; - uint32_t maxAtomicCounters; bool32_t supportsTimestamps; uint32_t maxMemReferences; // Tells how many memory references can be active for the given queue } VkPhysicalDeviceQueueProperties; @@ -2218,9 +2217,6 @@ typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool query typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); -typedef void (VKAPI *PFN_vkCmdInitAtomicCounters)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t startCounter, uint32_t counterCount, const uint32_t* pData); -typedef void (VKAPI *PFN_vkCmdLoadAtomicCounters)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t startCounter, uint32_t counterCount, VkBuffer srcBuffer, VkDeviceSize srcOffset); -typedef void (VKAPI *PFN_vkCmdSaveAtomicCounters)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t startCounter, uint32_t counterCount, VkBuffer destBuffer, VkDeviceSize destOffset); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin); @@ -2871,29 +2867,6 @@ void VKAPI vkCmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags); -void VKAPI vkCmdInitAtomicCounters( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - uint32_t startCounter, - uint32_t counterCount, - const uint32_t* pData); - -void VKAPI vkCmdLoadAtomicCounters( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - uint32_t startCounter, - uint32_t counterCount, - VkBuffer srcBuffer, - VkDeviceSize srcOffset); - -void VKAPI vkCmdSaveAtomicCounters( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - uint32_t startCounter, - uint32_t counterCount, - VkBuffer destBuffer, - VkDeviceSize destOffset); - VkResult VKAPI vkCreateFramebuffer( VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 74771e30031..437eb5cfa1a 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -237,7 +237,6 @@ VkResult anv_GetPhysicalDeviceInfo( queue_properties->queueFlags = 0; queue_properties->queueCount = 1; - queue_properties->maxAtomicCounters = 0; queue_properties->supportsTimestamps = true; queue_properties->maxMemReferences = 256; return VK_SUCCESS; @@ -3608,38 +3607,6 @@ void anv_CmdPipelineBarrier( GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); } -void anv_CmdInitAtomicCounters( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - uint32_t startCounter, - uint32_t counterCount, - const uint32_t* pData) -{ - stub(); -} - -void anv_CmdLoadAtomicCounters( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - uint32_t startCounter, - uint32_t counterCount, - VkBuffer srcBuffer, - VkDeviceSize srcOffset) -{ - stub(); -} - -void anv_CmdSaveAtomicCounters( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - uint32_t startCounter, - uint32_t counterCount, - VkBuffer destBuffer, - VkDeviceSize destOffset) -{ - stub(); -} - static void anv_framebuffer_destroy(struct anv_device *device, struct anv_object *object, -- cgit v1.2.3 From f9bb95ad4a492727484a268844ee2cfff4252ac9 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 2 Jul 2015 12:55:25 -0700 Subject: nir/spirv: move 'type' into the union Since SSA values now have their own types, it's more convenient to make 'type' only used when we want to look up an actual SPIR-V type, since we're going to change its type soon to support various decorations that are handled at the SPIR-V -> NIR level. --- src/glsl/nir/spirv_to_nir.c | 120 ++++++++++++++++++------------------ src/glsl/nir/spirv_to_nir_private.h | 7 ++- 2 files changed, 65 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e02a7af7acd..9420c9106c8 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -114,7 +114,7 @@ vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) struct vtn_value *val = vtn_untyped_value(b, value_id); switch (val->value_type) { case vtn_value_type_constant: - return vtn_const_ssa_value(b, val->constant, val->type); + return vtn_const_ssa_value(b, val->constant, val->const_type); case vtn_value_type_ssa: return val->ssa; @@ -444,19 +444,19 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type; val->constant = ralloc(b, nir_constant); switch (opcode) { case SpvOpConstantTrue: - assert(val->type == glsl_bool_type()); + assert(val->const_type == glsl_bool_type()); val->constant->value.u[0] = NIR_TRUE; break; case SpvOpConstantFalse: - assert(val->type == glsl_bool_type()); + assert(val->const_type == glsl_bool_type()); val->constant->value.u[0] = NIR_FALSE; break; case SpvOpConstant: - assert(glsl_type_is_scalar(val->type)); + assert(glsl_type_is_scalar(val->const_type)); val->constant->value.u[0] = w[3]; break; case SpvOpConstantComposite: { @@ -465,20 +465,20 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < elem_count; i++) elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - switch (glsl_get_base_type(val->type)) { + switch (glsl_get_base_type(val->const_type)) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(val->type)) { - unsigned rows = glsl_get_vector_elements(val->type); - assert(glsl_get_matrix_columns(val->type) == elem_count); + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); for (unsigned i = 0; i < elem_count; i++) for (unsigned j = 0; j < rows; j++) val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; } else { - assert(glsl_type_is_vector(val->type)); - assert(glsl_get_vector_elements(val->type) == elem_count); + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); for (unsigned i = 0; i < elem_count; i++) val->constant->value.u[i] = elems[i]->value.u[0]; } @@ -1053,6 +1053,43 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + static nir_tex_src vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) { @@ -1170,48 +1207,12 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); val->ssa->def = &instr->dest.ssa; - val->ssa->type = val->type; nir_builder_instr_insert(&b->nb, &instr->instr); } -static struct vtn_ssa_value * -vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (!glsl_type_is_vector_or_scalar(type)) { - unsigned elems = glsl_get_length(type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *child_type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - child_type = glsl_get_column_type(type); - break; - case GLSL_TYPE_ARRAY: - child_type = glsl_get_array_element(type); - break; - case GLSL_TYPE_STRUCT: - child_type = glsl_get_struct_field(type, i); - break; - default: - unreachable("unkown base type"); - } - - val->elems[i] = vtn_create_ssa_value(b, child_type); - } - } - - return val; -} static nir_alu_instr * create_vec(void *mem_ctx, unsigned num_components) @@ -1392,7 +1393,6 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; switch (opcode) { case SpvOpTranspose: { @@ -1441,8 +1441,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - val->ssa = vtn_create_ssa_value(b, val->type); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_create_ssa_value(b, type); /* Collect the various SSA sources */ unsigned num_inputs = count - 3; @@ -1604,7 +1605,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(val->type), val->name); + glsl_get_vector_elements(type), val->name); val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) @@ -1791,7 +1792,8 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_create_ssa_value(b, type); switch (opcode) { case SpvOpVectorExtractDynamic: @@ -1806,7 +1808,7 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, break; case SpvOpVectorShuffle: - val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(val->type), + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), vtn_ssa_value(b, w[3])->def, vtn_ssa_value(b, w[4])->def, w + 5); @@ -1815,12 +1817,12 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, case SpvOpCompositeConstruct: { val->ssa = rzalloc(b, struct vtn_ssa_value); unsigned elems = count - 3; - if (glsl_type_is_vector_or_scalar(val->type)) { + if (glsl_type_is_vector_or_scalar(type)) { nir_ssa_def *srcs[4]; for (unsigned i = 0; i < elems; i++) srcs[i] = vtn_ssa_value(b, w[3 + i])->def; val->ssa->def = - vtn_vector_construct(b, glsl_get_vector_elements(val->type), + vtn_vector_construct(b, glsl_get_vector_elements(type), elems, srcs); } else { val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); @@ -1847,8 +1849,6 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, default: unreachable("unknown composite operation"); } - - val->ssa->type = val->type; } static void @@ -1880,8 +1880,8 @@ static void vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - val->ssa = vtn_phi_node_create(b, val->type); + const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_phi_node_create(b, type); } static void diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 950979f21f1..aa8dea9d3fc 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -85,11 +85,14 @@ struct vtn_value { enum vtn_value_type value_type; const char *name; struct vtn_decoration *decoration; - const struct glsl_type *type; union { void *ptr; char *str; - nir_constant *constant; + const struct glsl_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; nir_deref_var *deref; struct vtn_function *func; struct vtn_block *block; -- cgit v1.2.3 From 23c179be75b0d00e08a5b7ff2349d1272751e93d Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 2 Jul 2015 14:24:12 -0700 Subject: nir/spirv: add a vtn_type struct This will handle decorations that aren't in the glsl_type. --- src/glsl/nir/spirv_glsl450_to_nir.c | 5 +-- src/glsl/nir/spirv_to_nir.c | 74 +++++++++++++++++++++++-------------- src/glsl/nir/spirv_to_nir_private.h | 26 ++++++++++++- 3 files changed, 73 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 100fde9ce7f..52b048820f3 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -139,9 +139,8 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->type = val->type; + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 5; @@ -257,7 +256,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(val->type), val->name); + glsl_get_vector_elements(val->ssa->type), val->name); val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 9420c9106c8..41182a7003e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -312,51 +312,66 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + val->type = ralloc(b, struct vtn_type); + val->type->is_builtin = false; + switch (opcode) { case SpvOpTypeVoid: - val->type = glsl_void_type(); + val->type->type = glsl_void_type(); return; case SpvOpTypeBool: - val->type = glsl_bool_type(); + val->type->type = glsl_bool_type(); return; case SpvOpTypeInt: - val->type = glsl_int_type(); + val->type->type = glsl_int_type(); return; case SpvOpTypeFloat: - val->type = glsl_float_type(); + val->type->type = glsl_float_type(); return; case SpvOpTypeVector: { const struct glsl_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type; + vtn_value(b, w[2], vtn_value_type_type)->type->type; unsigned elems = w[3]; assert(glsl_type_is_scalar(base)); - val->type = glsl_vector_type(glsl_get_base_type(base), elems); + val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); return; } case SpvOpTypeMatrix: { const struct glsl_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type; + vtn_value(b, w[2], vtn_value_type_type)->type->type; unsigned columns = w[3]; assert(glsl_type_is_vector(base)); - val->type = glsl_matrix_type(glsl_get_base_type(base), - glsl_get_vector_elements(base), - columns); + val->type->type = glsl_matrix_type(glsl_get_base_type(base), + glsl_get_vector_elements(base), + columns); + val->type->row_major = false; + val->type->stride = 0; return; } - case SpvOpTypeArray: - val->type = glsl_array_type(b->values[w[2]].type, w[3]); + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + val->type->type = glsl_array_type(array_element->type, w[3]); + val->type->array_element = array_element; + val->type->stride = 0; return; + } case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + NIR_VLA(struct glsl_struct_field, fields, count); - for (unsigned i = 0; i < count - 2; i++) { + for (unsigned i = 0; i < num_fields; i++) { /* TODO: Handle decorators */ - fields[i].type = vtn_value(b, w[i + 2], vtn_value_type_type)->type; + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i].type = val->type->members[i]->type; fields[i].name = ralloc_asprintf(b, "field%d", i); fields[i].location = -1; fields[i].interpolation = 0; @@ -370,21 +385,22 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const char *name = val->name ? val->name : "struct"; - val->type = glsl_struct_type(fields, count, name); + val->type->type = glsl_struct_type(fields, count, name); return; } case SpvOpTypeFunction: { - const struct glsl_type *return_type = b->values[w[2]].type; + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; NIR_VLA(struct glsl_function_param, params, count - 3); for (unsigned i = 0; i < count - 3; i++) { - params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type; + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; /* FIXME: */ params[i].in = true; params[i].out = true; } - val->type = glsl_function_type(return_type, params, count - 3); + val->type->type = glsl_function_type(return_type, params, count - 3); return; } @@ -398,7 +414,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeSampler: { const struct glsl_type *sampled_type = - vtn_value(b, w[2], vtn_value_type_type)->type; + vtn_value(b, w[2], vtn_value_type_type)->type->type; assert(glsl_type_is_vector_or_scalar(sampled_type)); @@ -422,8 +438,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, assert(w[7] == 0 && "FIXME: Handl multi-sampled textures"); - val->type = glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); return; } @@ -444,7 +460,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); - val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; val->constant = ralloc(b, nir_constant); switch (opcode) { case SpvOpConstantTrue: @@ -864,7 +880,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpVariable: { const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_value(b, w[1], vtn_value_type_type)->type->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_variable *var = ralloc(b->shader, nir_variable); @@ -1442,7 +1458,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_value(b, w[1], vtn_value_type_type)->type->type; val->ssa = vtn_create_ssa_value(b, type); /* Collect the various SSA sources */ @@ -1792,7 +1808,8 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; val->ssa = vtn_create_ssa_value(b, type); switch (opcode) { @@ -1880,7 +1897,8 @@ static void vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; val->ssa = vtn_phi_node_create(b, type); } @@ -2068,10 +2086,10 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, b->func = rzalloc(b, struct vtn_function); const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_value(b, w[1], vtn_value_type_type)->type->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type; + vtn_value(b, w[4], vtn_value_type_type)->type->type; assert(glsl_get_function_return_type(func_type) == result_type); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index aa8dea9d3fc..d02762cc111 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -81,6 +81,30 @@ struct vtn_ssa_value { const struct glsl_type *type; }; +struct vtn_type { + const struct glsl_type *type; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -88,7 +112,7 @@ struct vtn_value { union { void *ptr; char *str; - const struct glsl_type *type; + struct vtn_type *type; struct { nir_constant *constant; const struct glsl_type *const_type; -- cgit v1.2.3 From 66375e2852a6d458db77a292e06858d109ea7924 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 2 Jul 2015 15:22:27 -0700 Subject: nir/spirv: handle structure member builtin decorations --- src/glsl/nir/spirv_to_nir.c | 81 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 41182a7003e..531a6cf388e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -268,12 +268,64 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } } +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_vector_or_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_ARRAY: + dest->array_element = src->array_element; + dest->stride = src->stride; + break; + + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* matrices */ + dest->row_major = src->row_major; + dest->stride = src->stride; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + static void struct_member_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_fields) + const struct vtn_decoration *dec, void *void_ctx) { - struct glsl_struct_field *fields = void_fields; + struct member_decoration_ctx *ctx = void_ctx; if (member < 0) return; @@ -284,22 +336,28 @@ struct_member_decoration_cb(struct vtn_builder *b, case SpvDecorationPrecisionHigh: break; /* FIXME: Do nothing with these for now. */ case SpvDecorationSmooth: - fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; + ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; break; case SpvDecorationNoperspective: - fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; break; case SpvDecorationFlat: - fields[member].interpolation = INTERP_QUALIFIER_FLAT; + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; break; case SpvDecorationCentroid: - fields[member].centroid = true; + ctx->fields[member].centroid = true; break; case SpvDecorationSample: - fields[member].sample = true; + ctx->fields[member].sample = true; break; case SpvDecorationLocation: - fields[member].location = dec->literals[0]; + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, + ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; break; default: unreachable("Unhandled member decoration"); @@ -381,7 +439,12 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, fields[i].stream = -1; } - vtn_foreach_decoration(b, val, struct_member_decoration_cb, fields); + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); const char *name = val->name ? val->name : "struct"; -- cgit v1.2.3 From aca5fc6af160ca557235e37fe49043ce1f8d3e99 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 2 Jul 2015 15:49:08 -0700 Subject: nir/spirv: plumb through the type of dereferences We need this to know if a deref is of a builtin. --- src/glsl/nir/spirv_to_nir.c | 36 +++++++++++++++++++++--------------- src/glsl/nir/spirv_to_nir_private.h | 5 ++++- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 531a6cf388e..9fdad4702c4 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -398,14 +398,15 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, } case SpvOpTypeMatrix: { - const struct glsl_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type->type; + struct vtn_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type; unsigned columns = w[3]; - assert(glsl_type_is_vector(base)); - val->type->type = glsl_matrix_type(glsl_get_base_type(base), - glsl_get_vector_elements(base), + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), columns); + val->type->array_element = base; val->type->row_major = false; val->type->stride = 0; return; @@ -942,13 +943,13 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, { switch (opcode) { case SpvOpVariable: { - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_variable *var = ralloc(b->shader, nir_variable); - var->type = type; + var->type = type->type; var->name = ralloc_strdup(var, val->name); switch ((SpvStorageClass)w[3]) { @@ -956,7 +957,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassUniformConstant: var->data.mode = nir_var_uniform; var->data.read_only = true; - var->interface_type = type; + var->interface_type = type->type; break; case SpvStorageClassInput: var->data.mode = nir_var_shader_in; @@ -987,6 +988,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } val->deref = nir_deref_var_create(b, var); + val->deref_type = type; vtn_foreach_decoration(b, val, var_decoration_cb, var); @@ -1029,6 +1031,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; nir_deref *tail = &val->deref->deref; while (tail->child) @@ -1047,15 +1050,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case GLSL_TYPE_BOOL: case GLSL_TYPE_ARRAY: { nir_deref_array *deref_arr = nir_deref_array_create(b); - if (base_type == GLSL_TYPE_ARRAY) { - deref_arr->deref.type = glsl_get_array_element(tail->type); - } else if (glsl_type_is_matrix(tail->type)) { - deref_arr->deref.type = glsl_get_column_type(tail->type); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + val->deref_type = val->deref_type->array_element; } else { assert(glsl_type_is_vector(tail->type)); - deref_arr->deref.type = glsl_scalar_type(base_type); + val->deref_type = ralloc(b, struct vtn_type); + val->deref_type->type = glsl_scalar_type(base_type); } + deref_arr->deref.type = val->deref_type->type; + if (idx_val->value_type == vtn_value_type_constant) { unsigned idx = idx_val->constant->value.u[0]; deref_arr->deref_array_type = nir_deref_array_type_direct; @@ -1074,8 +1079,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case GLSL_TYPE_STRUCT: { assert(idx_val->value_type == vtn_value_type_constant); unsigned idx = idx_val->constant->value.u[0]; + val->deref_type = val->deref_type->members[idx]; nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = glsl_get_struct_field(tail->type, idx); + deref_struct->deref.type = val->deref_type->type; tail->child = &deref_struct->deref; break; } diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index d02762cc111..126955ae877 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -117,7 +117,10 @@ struct vtn_value { nir_constant *constant; const struct glsl_type *const_type; }; - nir_deref_var *deref; + struct { + nir_deref_var *deref; + struct vtn_type *deref_type; + }; struct vtn_function *func; struct vtn_block *block; struct vtn_ssa_value *ssa; -- cgit v1.2.3 From 70d2336e7ef38112ab2b72abce4e2c97be00eb96 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sat, 4 Jul 2015 10:49:12 -0700 Subject: nir/spirv: pull out logic for getting builtin locations Also add support for more builtins. --- src/glsl/nir/spirv_to_nir.c | 169 ++++++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 9fdad4702c4..a7f0837e67f 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -582,6 +582,104 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } } +static void +vtn_get_builtin_location(SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + *mode = nir_var_shader_out; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + *mode = nir_var_shader_out; + break; + case SpvBuiltInClipVertex: + *location = VARYING_SLOT_CLIP_VERTEX; + *mode = nir_var_shader_out; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + *mode = nir_var_shader_in; + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexId: + *location = SYSTEM_VALUE_VERTEX_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + *mode = nir_var_shader_in; + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + *mode = nir_var_shader_out; + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + *mode = nir_var_shader_out; + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + *mode = nir_var_shader_in; + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + *mode = nir_var_shader_in; + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + *mode = nir_var_shader_in; + break; + case SpvBuiltInFragColor: + *location = FRAG_RESULT_COLOR; + *mode = nir_var_shader_out; + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + *mode = nir_var_shader_out; + break; + case SpvBuiltInHelperInvocation: + unreachable("unsupported builtin"); /* XXX */ + break; + case SpvBuiltInNumWorkgroups: + case SpvBuiltInWorkgroupSize: + /* these are constants, need to be handled specially */ + unreachable("unsupported builtin"); + case SpvBuiltInWorkgroupId: + case SpvBuiltInLocalInvocationId: + case SpvBuiltInGlobalInvocationId: + case SpvBuiltInLocalInvocationIndex: + unreachable("no compute shader support"); + default: + unreachable("unsupported builtin"); + } +} + static void var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, const struct vtn_decoration *dec, void *void_var) @@ -639,69 +737,16 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationDescriptorSet: var->data.descriptor_set = dec->literals[0]; break; - case SpvDecorationBuiltIn: - var->data.mode = nir_var_system_value; - var->data.read_only = true; - switch ((SpvBuiltIn)dec->literals[0]) { - case SpvBuiltInFrontFacing: - var->data.location = SYSTEM_VALUE_FRONT_FACE; - break; - case SpvBuiltInVertexId: - var->data.location = SYSTEM_VALUE_VERTEX_ID; - break; - case SpvBuiltInInstanceId: - var->data.location = SYSTEM_VALUE_INSTANCE_ID; - break; - case SpvBuiltInSampleId: - var->data.location = SYSTEM_VALUE_SAMPLE_ID; - break; - case SpvBuiltInSamplePosition: - var->data.location = SYSTEM_VALUE_SAMPLE_POS; - break; - case SpvBuiltInSampleMask: - var->data.location = SYSTEM_VALUE_SAMPLE_MASK_IN; - break; - case SpvBuiltInInvocationId: - var->data.location = SYSTEM_VALUE_INVOCATION_ID; - break; - case SpvBuiltInPrimitiveId: - case SpvBuiltInPosition: - case SpvBuiltInPointSize: - case SpvBuiltInClipVertex: - case SpvBuiltInClipDistance: - case SpvBuiltInCullDistance: - case SpvBuiltInLayer: - case SpvBuiltInViewportIndex: - case SpvBuiltInTessLevelOuter: - case SpvBuiltInTessLevelInner: - case SpvBuiltInTessCoord: - case SpvBuiltInPatchVertices: - case SpvBuiltInFragCoord: - case SpvBuiltInPointCoord: - case SpvBuiltInFragColor: - case SpvBuiltInFragDepth: - case SpvBuiltInHelperInvocation: - case SpvBuiltInNumWorkgroups: - case SpvBuiltInWorkgroupSize: - case SpvBuiltInWorkgroupId: - case SpvBuiltInLocalInvocationId: - case SpvBuiltInGlobalInvocationId: - case SpvBuiltInLocalInvocationIndex: - case SpvBuiltInWorkDim: - case SpvBuiltInGlobalSize: - case SpvBuiltInEnqueuedWorkgroupSize: - case SpvBuiltInGlobalOffset: - case SpvBuiltInGlobalLinearId: - case SpvBuiltInWorkgroupLinearId: - case SpvBuiltInSubgroupSize: - case SpvBuiltInSubgroupMaxSize: - case SpvBuiltInNumSubgroups: - case SpvBuiltInNumEnqueuedSubgroups: - case SpvBuiltInSubgroupId: - case SpvBuiltInSubgroupLocalInvocationId: - unreachable("Unhandled builtin enum"); - } + case SpvDecorationBuiltIn: { + nir_variable_mode mode; + vtn_get_builtin_location(dec->literals[0], &var->data.location, + &mode); + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + b->builtins[dec->literals[0]] = var; break; + } case SpvDecorationNoStaticUse: /* This can safely be ignored */ break; -- cgit v1.2.3 From 73351c6a18ee0104739b75555f82dc407885a559 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sat, 4 Jul 2015 15:46:58 -0700 Subject: nir/spirv: fix a bad assertion in the decoration handling We should be asserting that the parent decoration didn't hand us a member if the child decoration did, but different child decorations may obviously have different members. --- src/glsl/nir/spirv_to_nir.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a7f0837e67f..ed23a9cb747 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -184,17 +184,20 @@ _foreach_decoration_helper(struct vtn_builder *b, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data) { + int new_member = member; + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { if (dec->member >= 0) { assert(member == -1); - member = dec->member; + new_member = dec->member; } if (dec->group) { assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, member, dec->group, cb, data); + _foreach_decoration_helper(b, base_value, new_member, dec->group, + cb, data); } else { - cb(b, base_value, member, dec, data); + cb(b, base_value, new_member, dec, data); } } } -- cgit v1.2.3 From 15047514c9503313427b2467334624f2e697430d Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sat, 4 Jul 2015 15:51:24 -0700 Subject: nir/spirv: fix a bug with structure creation We were creating 2 extra bogus fields. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ed23a9cb747..4b97a862eda 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -452,7 +452,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const char *name = val->name ? val->name : "struct"; - val->type->type = glsl_struct_type(fields, count, name); + val->type->type = glsl_struct_type(fields, num_fields, name); return; } -- cgit v1.2.3 From f3ea3b6e58db743097e87216e860f1666590416e Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sat, 4 Jul 2015 15:53:47 -0700 Subject: nir/spirv: add support for builtins inside structures We may be able to revert this depending on the outcome of bug 14190, but for now it gets vertex shaders working with SPIR-V. --- src/glsl/nir/spirv_to_nir.c | 141 ++++++++++++++++++++++++++++++------ src/glsl/nir/spirv_to_nir_private.h | 5 ++ 2 files changed, 125 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 4b97a862eda..23b80b12e53 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -787,13 +787,93 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, } } +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var = b->builtins[builtin]; + + if (!var) { + var = ralloc(b->shader, nir_variable); + var->type = type; + + nir_variable_mode mode; + vtn_get_builtin_location(builtin, &var->data.location, &mode); + var->data.mode = mode; + var->name = ralloc_strdup(b->shader, "builtin"); + + switch (mode) { + case nir_var_shader_in: + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case nir_var_shader_out: + exec_list_push_tail(&b->shader->outputs, &var->node); + break; + case nir_var_system_value: + exec_list_push_tail(&b->shader->system_values, &var->node); + break; + default: + unreachable("bad builtin mode"); + } + + b->builtins[builtin] = var; + } + + return var; +} + +static void +vtn_builtin_load(struct vtn_builder *b, + struct vtn_ssa_value *val, + SpvBuiltIn builtin) +{ + assert(glsl_type_is_vector_or_scalar(val->type)); + + nir_variable *var = get_builtin_variable(b, val->type, builtin); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + nir_ssa_dest_init(&load->instr, &load->dest, + glsl_get_vector_elements(val->type), NULL); + + load->variables[0] = nir_deref_var_create(b->shader, var); + load->num_components = glsl_get_vector_elements(val->type); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; +} + +static void +vtn_builtin_store(struct vtn_builder *b, + struct vtn_ssa_value *val, + SpvBuiltIn builtin) +{ + assert(glsl_type_is_vector_or_scalar(val->type)); + + nir_variable *var = get_builtin_variable(b, val->type, builtin); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + + store->variables[0] = nir_deref_var_create(b->shader, var); + store->num_components = glsl_get_vector_elements(val->type); + store->src[0] = nir_src_for_ssa(val->def); + nir_builder_instr_insert(&b->nb, &store->instr); +} + static struct vtn_ssa_value * _vtn_variable_load(struct vtn_builder *b, - nir_deref_var *src_deref, nir_deref *src_deref_tail) + nir_deref_var *src_deref, struct vtn_type *src_type, + nir_deref *src_deref_tail) { struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = src_deref_tail->type; + if (src_type->is_builtin) { + vtn_builtin_load(b, val, src_type->builtin); + return val; + } + /* The deref tail may contain a deref to select a component of a vector (in * other words, it might not be an actual tail) so we have to save it away * here since we overwrite it later. @@ -830,7 +910,9 @@ _vtn_variable_load(struct vtn_builder *b, src_deref_tail->child = &deref->deref; for (unsigned i = 0; i < elems; i++) { deref->base_offset = i; - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + val->elems[i] = _vtn_variable_load(b, src_deref, + src_type->array_element, + &deref->deref); } } else { assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); @@ -842,7 +924,9 @@ _vtn_variable_load(struct vtn_builder *b, for (unsigned i = 0; i < elems; i++) { deref->index = i; deref->deref.type = glsl_get_struct_field(val->type, i); - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + val->elems[i] = _vtn_variable_load(b, src_deref, + src_type->members[i], + &deref->deref); } } @@ -852,9 +936,15 @@ _vtn_variable_load(struct vtn_builder *b, } static void -_vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, - nir_deref *dest_deref_tail, struct vtn_ssa_value *src) +_vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) { + if (dest_type->is_builtin) { + vtn_builtin_store(b, src, dest_type->builtin); + return; + } + nir_deref *old_child = dest_deref_tail->child; if (glsl_type_is_vector_or_scalar(src->type)) { @@ -875,7 +965,8 @@ _vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, dest_deref_tail->child = &deref->deref; for (unsigned i = 0; i < elems; i++) { deref->base_offset = i; - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + _vtn_variable_store(b, dest_type->array_element, dest_deref, + &deref->deref, src->elems[i]); } } else { assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); @@ -886,7 +977,8 @@ _vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, for (unsigned i = 0; i < elems; i++) { deref->index = i; deref->deref.type = glsl_get_struct_field(src->type, i); - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + _vtn_variable_store(b, dest_type->members[i], dest_deref, + &deref->deref, src->elems[i]); } } @@ -917,10 +1009,11 @@ static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *index); static struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, nir_deref_var *src) +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type) { nir_deref *src_tail = get_deref_tail(src); - struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); + struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_type, src_tail); if (src_tail->child) { nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); @@ -946,11 +1039,12 @@ static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *index); static void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest) + nir_deref_var *dest, struct vtn_type *dest_type) { nir_deref *dest_tail = get_deref_tail(dest); if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_type, + dest_tail); nir_deref_array *deref = nir_deref_as_array(dest_tail->child); assert(deref->deref.child == NULL); if (deref->deref_array_type == nir_deref_array_type_direct) @@ -959,22 +1053,22 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, else val->def = vtn_vector_insert_dynamic(b, val->def, src->def, deref->indirect.ssa); - _vtn_variable_store(b, dest, dest_tail, val); + _vtn_variable_store(b, dest_type, dest, dest_tail, val); } else { - _vtn_variable_store(b, dest, dest_tail, src); + _vtn_variable_store(b, dest_type, dest, dest_tail, src); } } static void vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, - nir_deref_var *dest) + nir_deref_var *dest, struct vtn_type *type) { nir_deref *src_tail = get_deref_tail(src); if (src_tail->child) { assert(get_deref_tail(dest)->child); - struct vtn_ssa_value *val = vtn_variable_load(b, src); - vtn_variable_store(b, val, dest); + struct vtn_ssa_value *val = vtn_variable_load(b, src, type); + vtn_variable_store(b, val, dest, type); } else { nir_intrinsic_instr *copy = nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); @@ -1144,29 +1238,34 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpCopyMemory: { nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; - vtn_variable_copy(b, src, dest); + vtn_variable_copy(b, src, dest, type); break; } case SpvOpLoad: { nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; - const struct glsl_type *src_type = nir_deref_tail(&src->deref)->type; + struct vtn_type *src_type = + vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - if (glsl_get_base_type(src_type) == GLSL_TYPE_SAMPLER) { + if (glsl_get_base_type(src_type->type) == GLSL_TYPE_SAMPLER) { vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; return; } struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_variable_load(b, src); + val->ssa = vtn_variable_load(b, src, src_type); break; } case SpvOpStore: { nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + struct vtn_type *dest_type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); - vtn_variable_store(b, src, dest); + vtn_variable_store(b, src, dest, dest_type); break; } diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 126955ae877..7262755d019 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -157,6 +157,11 @@ struct vtn_builder { */ struct hash_table *block_table; + /* + * NIR variable for each SPIR-V builtin. + */ + nir_variable *builtins[42]; /* XXX need symbolic constant from SPIR-V header */ + unsigned value_id_bound; struct vtn_value *values; -- cgit v1.2.3 From f9dbc34a187946331a13475b0d34ecdb161025e7 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 6 Jul 2015 14:58:55 -0700 Subject: nir/spirv: fix some bugs --- src/glsl/nir/spirv_to_nir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 23b80b12e53..ec26111930a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -837,7 +837,7 @@ vtn_builtin_load(struct vtn_builder *b, nir_ssa_dest_init(&load->instr, &load->dest, glsl_get_vector_elements(val->type), NULL); - load->variables[0] = nir_deref_var_create(b->shader, var); + load->variables[0] = nir_deref_var_create(load, var); load->num_components = glsl_get_vector_elements(val->type); nir_builder_instr_insert(&b->nb, &load->instr); val->def = &load->dest.ssa; @@ -855,7 +855,7 @@ vtn_builtin_store(struct vtn_builder *b, nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->variables[0] = nir_deref_var_create(b->shader, var); + store->variables[0] = nir_deref_var_create(store, var); store->num_components = glsl_get_vector_elements(val->type); store->src[0] = nir_src_for_ssa(val->def); nir_builder_instr_insert(&b->nb, &store->instr); @@ -952,6 +952,7 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); store->src[0] = nir_src_for_ssa(src->def); nir_builder_instr_insert(&b->nb, &store->instr); @@ -1460,6 +1461,7 @@ create_vec(void *mem_ctx, unsigned num_components) nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; return vec; } @@ -2048,7 +2050,6 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, break; case SpvOpCompositeConstruct: { - val->ssa = rzalloc(b, struct vtn_ssa_value); unsigned elems = count - 3; if (glsl_type_is_vector_or_scalar(type)) { nir_ssa_def *srcs[4]; -- cgit v1.2.3 From 11cf2145782d7fe78a610fa71c2207bcb1847d32 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 15:27:58 -0700 Subject: vk: Remove the stub support for explicit memory references --- include/vulkan/vulkan.h | 13 ------------- src/vulkan/device.c | 17 ----------------- 2 files changed, 30 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 976d6e7a9d9..6bb9d674719 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1260,7 +1260,6 @@ typedef struct { VkQueueFlags queueFlags; uint32_t queueCount; bool32_t supportsTimestamps; - uint32_t maxMemReferences; } VkPhysicalDeviceQueueProperties; typedef struct { @@ -2003,8 +2002,6 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionInfo)(VkPhysicalDevice typedef VkResult (VKAPI *PFN_vkEnumerateLayers)(VkPhysicalDevice physicalDevice, size_t maxStringSize, size_t* pLayerCount, char* const* pOutLayers, void* pReserved); typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueNodeIndex, uint32_t queueIndex, VkQueue* pQueue); typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, VkFence fence); -typedef VkResult (VKAPI *PFN_vkQueueAddMemReferences)(VkQueue queue, uint32_t count, const VkDeviceMemory* pMems); -typedef VkResult (VKAPI *PFN_vkQueueRemoveMemReferences)(VkQueue queue, uint32_t count, const VkDeviceMemory* pMems); typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI *PFN_vkDeviceWaitIdle)(VkDevice device); typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); @@ -2171,16 +2168,6 @@ VkResult VKAPI vkQueueSubmit( const VkCmdBuffer* pCmdBuffers, VkFence fence); -VkResult VKAPI vkQueueAddMemReferences( - VkQueue queue, - uint32_t count, - const VkDeviceMemory* pMems); - -VkResult VKAPI vkQueueRemoveMemReferences( - VkQueue queue, - uint32_t count, - const VkDeviceMemory* pMems); - VkResult VKAPI vkQueueWaitIdle( VkQueue queue); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 437eb5cfa1a..bc91c907d25 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -238,7 +238,6 @@ VkResult anv_GetPhysicalDeviceInfo( queue_properties->queueFlags = 0; queue_properties->queueCount = 1; queue_properties->supportsTimestamps = true; - queue_properties->maxMemReferences = 256; return VK_SUCCESS; case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES: @@ -796,22 +795,6 @@ VkResult anv_QueueSubmit( return VK_SUCCESS; } -VkResult anv_QueueAddMemReferences( - VkQueue queue, - uint32_t count, - const VkDeviceMemory* pMems) -{ - return VK_SUCCESS; -} - -VkResult anv_QueueRemoveMemReferences( - VkQueue queue, - uint32_t count, - const VkDeviceMemory* pMems) -{ - return VK_SUCCESS; -} - VkResult anv_QueueWaitIdle( VkQueue _queue) { -- cgit v1.2.3 From 78a0d23d4e92297b7885ebc32fe4469e05cda37e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 15:28:10 -0700 Subject: vk: Remove the stub support for memory priorities --- include/vulkan/vulkan.h | 19 ------------------- src/vulkan/device.c | 8 -------- 2 files changed, 27 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 6bb9d674719..1ac6b2a8e08 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -118,18 +118,6 @@ VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkRenderPass, VkNonDispatchable) // ------------------------------------------------------------------------------------------------ // Enumerations -typedef enum VkMemoryPriority_ -{ - VK_MEMORY_PRIORITY_UNUSED = 0x00000000, - VK_MEMORY_PRIORITY_VERY_LOW = 0x00000001, - VK_MEMORY_PRIORITY_LOW = 0x00000002, - VK_MEMORY_PRIORITY_NORMAL = 0x00000003, - VK_MEMORY_PRIORITY_HIGH = 0x00000004, - VK_MEMORY_PRIORITY_VERY_HIGH = 0x00000005, - - VK_ENUM_RANGE(MEMORY_PRIORITY, UNUSED, VERY_HIGH) -} VkMemoryPriority; - typedef enum VkImageLayout_ { VK_IMAGE_LAYOUT_UNDEFINED = 0x00000000, // Implicit layout an image is when its contents are undefined due to various reasons (e.g. right after creation) @@ -1292,7 +1280,6 @@ typedef struct { const void* pNext; VkDeviceSize allocationSize; VkMemoryPropertyFlags memProps; - VkMemoryPriority memPriority; } VkMemoryAllocInfo; typedef struct { @@ -2006,7 +1993,6 @@ typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI *PFN_vkDeviceWaitIdle)(VkDevice device); typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); typedef VkResult (VKAPI *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI *PFN_vkSetMemoryPriority)(VkDevice device, VkDeviceMemory mem, VkMemoryPriority priority); typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); typedef VkResult (VKAPI *PFN_vkFlushMappedMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size); @@ -2183,11 +2169,6 @@ VkResult VKAPI vkFreeMemory( VkDevice device, VkDeviceMemory mem); -VkResult VKAPI vkSetMemoryPriority( - VkDevice device, - VkDeviceMemory mem, - VkMemoryPriority priority); - VkResult VKAPI vkMapMemory( VkDevice device, VkDeviceMemory mem, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index bc91c907d25..7c0f759460f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -954,14 +954,6 @@ VkResult anv_FreeMemory( return VK_SUCCESS; } -VkResult anv_SetMemoryPriority( - VkDevice device, - VkDeviceMemory mem, - VkMemoryPriority priority) -{ - return VK_SUCCESS; -} - VkResult anv_MapMemory( VkDevice _device, VkDeviceMemory _mem, -- cgit v1.2.3 From 9a798fa9464e2bfbdca0ea0335181e5b7446786c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 15:30:05 -0700 Subject: vk: Remove stub for CloneImageData --- include/vulkan/vulkan.h | 8 -------- src/vulkan/meta.c | 10 ---------- 2 files changed, 18 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1ac6b2a8e08..8f4833ae965 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2069,7 +2069,6 @@ typedef void (VKAPI *PFN_vkCmdCopyImage)(VkCmdBuffer cmdBuffer, VkImage srcImage typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions); typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCloneImageData)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout); typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); @@ -2623,13 +2622,6 @@ void VKAPI vkCmdCopyImageToBuffer( uint32_t regionCount, const VkBufferImageCopy* pRegions); -void VKAPI vkCmdCloneImageData( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout); - void VKAPI vkCmdUpdateBuffer( VkCmdBuffer cmdBuffer, VkBuffer destBuffer, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 058c5b70a25..2c5a4548dc1 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -1170,16 +1170,6 @@ void anv_CmdCopyImageToBuffer( meta_finish_blit(cmd_buffer, &saved_state); } -void anv_CmdCloneImageData( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout) -{ - stub(); -} - void anv_CmdUpdateBuffer( VkCmdBuffer cmdBuffer, VkBuffer destBuffer, -- cgit v1.2.3 From ea5fbe19570d6ba006561f79b9a20245e2dcd870 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 15:32:27 -0700 Subject: vk: Remove begin/end descriptor pool update --- include/vulkan/vulkan.h | 10 ---------- src/vulkan/device.c | 14 -------------- 2 files changed, 24 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 8f4833ae965..93696178e87 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2038,8 +2038,6 @@ typedef VkResult (VKAPI *PFN_vkLoadPipelineDerivative)(VkDevice device, size_t d typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -typedef VkResult (VKAPI *PFN_vkBeginDescriptorPoolUpdate)(VkDevice device, VkDescriptorUpdateMode updateMode); -typedef VkResult (VKAPI *PFN_vkEndDescriptorPoolUpdate)(VkDevice device, VkCmdBuffer cmd); typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); @@ -2424,14 +2422,6 @@ VkResult VKAPI vkCreateDescriptorSetLayout( const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -VkResult VKAPI vkBeginDescriptorPoolUpdate( - VkDevice device, - VkDescriptorUpdateMode updateMode); - -VkResult VKAPI vkEndDescriptorPoolUpdate( - VkDevice device, - VkCmdBuffer cmd); - VkResult VKAPI vkCreateDescriptorPool( VkDevice device, VkDescriptorPoolUsage poolUsage, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7c0f759460f..53db70e19d5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1817,20 +1817,6 @@ VkResult anv_CreateDescriptorSetLayout( return VK_SUCCESS; } -VkResult anv_BeginDescriptorPoolUpdate( - VkDevice device, - VkDescriptorUpdateMode updateMode) -{ - return VK_SUCCESS; -} - -VkResult anv_EndDescriptorPoolUpdate( - VkDevice device, - VkCmdBuffer cmd) -{ - return VK_SUCCESS; -} - VkResult anv_CreateDescriptorPool( VkDevice device, VkDescriptorPoolUsage poolUsage, -- cgit v1.2.3 From c5ab5925df3a68f56fc82939461ad6ef59addebd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 15:32:40 -0700 Subject: vk: Remove ClearDescriptorSets --- include/vulkan/vulkan.h | 7 ------- src/vulkan/device.c | 8 -------- 2 files changed, 15 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 93696178e87..aeab77ee6ae 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2041,7 +2041,6 @@ typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); -typedef void (VKAPI *PFN_vkClearDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); typedef void (VKAPI *PFN_vkUpdateDescriptors)(VkDevice device, VkDescriptorSet descriptorSet, uint32_t updateCount, const void** ppUpdateArray); typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicVpStateCreateInfo* pCreateInfo, VkDynamicVpState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRsStateCreateInfo* pCreateInfo, VkDynamicRsState* pState); @@ -2442,12 +2441,6 @@ VkResult VKAPI vkAllocDescriptorSets( VkDescriptorSet* pDescriptorSets, uint32_t* pCount); -void VKAPI vkClearDescriptorSets( - VkDevice device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet* pDescriptorSets); - void VKAPI vkUpdateDescriptors( VkDevice device, VkDescriptorSet descriptorSet, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 53db70e19d5..280ce55bbf1 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1873,14 +1873,6 @@ VkResult anv_AllocDescriptorSets( return VK_SUCCESS; } -void anv_ClearDescriptorSets( - VkDevice device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet* pDescriptorSets) -{ -} - void anv_UpdateDescriptors( VkDevice _device, VkDescriptorSet descriptorSet, -- cgit v1.2.3 From c5ffcc9958589da0e4b1dcfa82d10107adb2af45 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 15:34:55 -0700 Subject: vk: Remove multi-device stuff --- include/vulkan/vulkan.h | 31 ------------------------------- src/vulkan/device.c | 41 ----------------------------------------- 2 files changed, 72 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index aeab77ee6ae..b463796e1a9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1997,11 +1997,6 @@ typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, V typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); typedef VkResult (VKAPI *PFN_vkFlushMappedMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size); typedef VkResult (VKAPI *PFN_vkPinSystemMemory)(VkDevice device, const void* pSysMem, size_t memSize, VkDeviceMemory* pMem); -typedef VkResult (VKAPI *PFN_vkGetMultiDeviceCompatibility)(VkPhysicalDevice physicalDevice0, VkPhysicalDevice physicalDevice1, VkPhysicalDeviceCompatibilityInfo* pInfo); -typedef VkResult (VKAPI *PFN_vkOpenSharedMemory)(VkDevice device, const VkMemoryOpenInfo* pOpenInfo, VkDeviceMemory* pMem); -typedef VkResult (VKAPI *PFN_vkOpenSharedSemaphore)(VkDevice device, const VkSemaphoreOpenInfo* pOpenInfo, VkSemaphore* pSemaphore); -typedef VkResult (VKAPI *PFN_vkOpenPeerMemory)(VkDevice device, const VkPeerMemoryOpenInfo* pOpenInfo, VkDeviceMemory* pMem); -typedef VkResult (VKAPI *PFN_vkOpenPeerImage)(VkDevice device, const VkPeerImageOpenInfo* pOpenInfo, VkImage* pImage, VkDeviceMemory* pMem); typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); typedef VkResult (VKAPI *PFN_vkGetObjectInfo)(VkDevice device, VkObjectType objType, VkObject object, VkObjectInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemory)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceMemory mem, VkDeviceSize offset); @@ -2189,32 +2184,6 @@ VkResult VKAPI vkPinSystemMemory( size_t memSize, VkDeviceMemory* pMem); -VkResult VKAPI vkGetMultiDeviceCompatibility( - VkPhysicalDevice physicalDevice0, - VkPhysicalDevice physicalDevice1, - VkPhysicalDeviceCompatibilityInfo* pInfo); - -VkResult VKAPI vkOpenSharedMemory( - VkDevice device, - const VkMemoryOpenInfo* pOpenInfo, - VkDeviceMemory* pMem); - -VkResult VKAPI vkOpenSharedSemaphore( - VkDevice device, - const VkSemaphoreOpenInfo* pOpenInfo, - VkSemaphore* pSemaphore); - -VkResult VKAPI vkOpenPeerMemory( - VkDevice device, - const VkPeerMemoryOpenInfo* pOpenInfo, - VkDeviceMemory* pMem); - -VkResult VKAPI vkOpenPeerImage( - VkDevice device, - const VkPeerImageOpenInfo* pOpenInfo, - VkImage* pImage, - VkDeviceMemory* pMem); - VkResult VKAPI vkDestroyObject( VkDevice device, VkObjectType objType, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 280ce55bbf1..64ef36a3041 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1010,47 +1010,6 @@ VkResult anv_PinSystemMemory( return VK_SUCCESS; } -VkResult anv_GetMultiDeviceCompatibility( - VkPhysicalDevice physicalDevice0, - VkPhysicalDevice physicalDevice1, - VkPhysicalDeviceCompatibilityInfo* pInfo) -{ - return VK_UNSUPPORTED; -} - -VkResult anv_OpenSharedMemory( - VkDevice device, - const VkMemoryOpenInfo* pOpenInfo, - VkDeviceMemory* pMem) -{ - return VK_UNSUPPORTED; -} - -VkResult anv_OpenSharedSemaphore( - VkDevice device, - const VkSemaphoreOpenInfo* pOpenInfo, - VkSemaphore* pSemaphore) -{ - return VK_UNSUPPORTED; -} - -VkResult anv_OpenPeerMemory( - VkDevice device, - const VkPeerMemoryOpenInfo* pOpenInfo, - VkDeviceMemory* pMem) -{ - return VK_UNSUPPORTED; -} - -VkResult anv_OpenPeerImage( - VkDevice device, - const VkPeerImageOpenInfo* pOpenInfo, - VkImage* pImage, - VkDeviceMemory* pMem) -{ - return VK_UNSUPPORTED; -} - VkResult anv_DestroyObject( VkDevice _device, VkObjectType objType, -- cgit v1.2.3 From 8c5e48f3074d11d3541c0d336723cc550800a9e5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 16:43:28 -0700 Subject: vk: Rename NUM_SHADER_STAGE to SHADER_STAGE_NUM This is a refactor of more than just the header but it lets us finish reformating the shader stage enum. --- include/vulkan/vulkan.h | 6 ++++-- src/vulkan/compiler.cpp | 10 +++++----- src/vulkan/device.c | 12 ++++++------ src/vulkan/pipeline.c | 8 ++++---- src/vulkan/private.h | 14 +++++++------- 5 files changed, 26 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 7c3aae9d344..1d78177c155 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -544,8 +544,10 @@ typedef enum { VK_SHADER_STAGE_GEOMETRY = 3, VK_SHADER_STAGE_FRAGMENT = 4, VK_SHADER_STAGE_COMPUTE = 5, - - VK_ENUM_RANGE(SHADER_STAGE, VERTEX, COMPUTE) + VK_SHADER_STAGE_BEGIN_RANGE = VK_SHADER_STAGE_VERTEX, + VK_SHADER_STAGE_END_RANGE = VK_SHADER_STAGE_COMPUTE, + VK_SHADER_STAGE_NUM = (VK_SHADER_STAGE_COMPUTE - VK_SHADER_STAGE_VERTEX + 1), + VK_SHADER_STAGE_MAX_ENUM = 0x7FFFFFFF } VkShaderStage; typedef enum { diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index b220331408c..7ba42151c19 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -1029,12 +1029,12 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) program = brw->ctx.Driver.NewShaderProgram(name); program->Shaders = (struct gl_shader **) - calloc(VK_NUM_SHADER_STAGE, sizeof(struct gl_shader *)); + calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); fail_if(program == NULL || program->Shaders == NULL, "failed to create program\n"); bool all_spirv = true; - for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { if (pipeline->shaders[i] == NULL) continue; @@ -1050,7 +1050,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) } if (all_spirv) { - for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { if (pipeline->shaders[i]) anv_compile_shader_spirv(compiler, program, pipeline, i); } @@ -1060,7 +1060,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) program->_LinkedShaders[shader->Stage] = shader; } } else { - for (unsigned i = 0; i < VK_NUM_SHADER_STAGE; i++) { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { if (pipeline->shaders[i]) anv_compile_shader_glsl(compiler, program, pipeline, i); } @@ -1157,7 +1157,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) void anv_compiler_free(struct anv_pipeline *pipeline) { - for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) { + for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { if (pipeline->prog_data[stage]) { free(pipeline->prog_data[stage]->map_entries); ralloc_free(pipeline->prog_data[stage]->param); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 64ef36a3041..35eaad21774 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1635,8 +1635,8 @@ VkResult anv_CreateDescriptorSetLayout( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - uint32_t sampler_count[VK_NUM_SHADER_STAGE] = { 0, }; - uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, }; + uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; uint32_t num_dynamic_buffers = 0; uint32_t count = 0; uint32_t stages = 0; @@ -1685,7 +1685,7 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t sampler_total = 0; uint32_t surface_total = 0; - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { sampler_total += sampler_count[s]; surface_total += surface_count[s]; } @@ -1702,9 +1702,9 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->shader_stages = stages; struct anv_descriptor_slot *p = set_layout->entries; - struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE]; - struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE]; - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM]; + struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM]; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { set_layout->stage[s].surface_count = surface_count[s]; set_layout->stage[s].surface_start = surface[s] = p; p += surface_count[s]; diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index f574038f83f..cd4294bfa08 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -833,10 +833,10 @@ VkResult anv_CreatePipelineLayout( layout->num_sets = pCreateInfo->descriptorSetCount; - uint32_t surface_start[VK_NUM_SHADER_STAGE] = { 0, }; - uint32_t sampler_start[VK_NUM_SHADER_STAGE] = { 0, }; + uint32_t surface_start[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t sampler_start[VK_SHADER_STAGE_NUM] = { 0, }; - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { layout->stage[s].surface_count = 0; layout->stage[s].sampler_count = 0; } @@ -846,7 +846,7 @@ VkResult anv_CreatePipelineLayout( (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; layout->set[i].layout = set_layout; - for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { layout->set[i].surface_start[s] = surface_start[s]; surface_start[s] += set_layout->stage[s].surface_count; layout->set[i].sampler_start[s] = sampler_start[s]; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 0f5986b8b8a..a0ccb903224 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -592,7 +592,7 @@ struct anv_descriptor_set_layout { struct anv_descriptor_slot *surface_start; uint32_t sampler_count; struct anv_descriptor_slot *sampler_start; - } stage[VK_NUM_SHADER_STAGE]; + } stage[VK_SHADER_STAGE_NUM]; uint32_t count; uint32_t num_dynamic_buffers; @@ -616,8 +616,8 @@ struct anv_descriptor_set { struct anv_pipeline_layout { struct { struct anv_descriptor_set_layout *layout; - uint32_t surface_start[VK_NUM_SHADER_STAGE]; - uint32_t sampler_start[VK_NUM_SHADER_STAGE]; + uint32_t surface_start[VK_SHADER_STAGE_NUM]; + uint32_t sampler_start[VK_SHADER_STAGE_NUM]; } set[MAX_SETS]; uint32_t num_sets; @@ -625,7 +625,7 @@ struct anv_pipeline_layout { struct { uint32_t surface_count; uint32_t sampler_count; - } stage[VK_NUM_SHADER_STAGE]; + } stage[VK_SHADER_STAGE_NUM]; }; struct anv_buffer { @@ -712,7 +712,7 @@ struct anv_pipeline { struct anv_device * device; struct anv_batch batch; uint32_t batch_data[256]; - struct anv_shader * shaders[VK_NUM_SHADER_STAGE]; + struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; struct anv_pipeline_layout * layout; bool use_repclear; @@ -720,8 +720,8 @@ struct anv_pipeline { struct brw_wm_prog_data wm_prog_data; struct brw_gs_prog_data gs_prog_data; struct brw_cs_prog_data cs_prog_data; - struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; - uint32_t scratch_start[VK_NUM_SHADER_STAGE]; + struct brw_stage_prog_data * prog_data[VK_SHADER_STAGE_NUM]; + uint32_t scratch_start[VK_SHADER_STAGE_NUM]; uint32_t total_scratch; struct { uint32_t vs_start; -- cgit v1.2.3 From 2de388c49cfb3786936ce9ca1f2e479aa82fd26b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 17:12:47 -0700 Subject: vk: Remove SHAREABLE bits They were removed from the Vulkan API and we don't really use them because there are no multi-GPU i965 systems. --- include/vulkan/vulkan.h | 15 ++++----------- src/vulkan/device.c | 3 +-- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 5f0906f932e..cdd60288fe1 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -911,7 +911,6 @@ typedef enum { VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT = 0x00000004, VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT = 0x00000008, VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL = 0x00000010, - VK_MEMORY_PROPERTY_SHAREABLE_BIT = 0x00000011, } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; @@ -926,10 +925,6 @@ typedef enum { VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, } VkFenceCreateFlagBits; typedef VkFlags VkFenceCreateFlags; - -typedef enum { - VK_SEMAPHORE_CREATE_SHAREABLE_BIT = 0x00000001, -} VkSemaphoreCreateFlagBits; typedef VkFlags VkSemaphoreCreateFlags; typedef VkFlags VkEventCreateFlags; @@ -973,8 +968,7 @@ typedef enum { typedef VkFlags VkBufferUsageFlags; typedef enum { - VK_BUFFER_CREATE_SHAREABLE_BIT = 0x00000001, - VK_BUFFER_CREATE_SPARSE_BIT = 0x00000002, + VK_BUFFER_CREATE_SPARSE_BIT = 0x00000001, } VkBufferCreateFlagBits; typedef VkFlags VkBufferCreateFlags; @@ -993,10 +987,9 @@ typedef VkFlags VkImageUsageFlags; typedef enum { VK_IMAGE_CREATE_INVARIANT_DATA_BIT = 0x00000001, VK_IMAGE_CREATE_CLONEABLE_BIT = 0x00000002, - VK_IMAGE_CREATE_SHAREABLE_BIT = 0x00000004, - VK_IMAGE_CREATE_SPARSE_BIT = 0x00000008, - VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000010, - VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000020, + VK_IMAGE_CREATE_SPARSE_BIT = 0x00000004, + VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008, + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010, } VkImageCreateFlagBits; typedef VkFlags VkImageCreateFlags; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 35eaad21774..35f3f31e67b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1092,8 +1092,7 @@ fill_memory_requirements( VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT | /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT | - VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL | - VK_MEMORY_PROPERTY_SHAREABLE_BIT; + VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL; memory_requirements->memPropsRequired = 0; -- cgit v1.2.3 From 2b404e5d00a257b527db7cd830112960a10ad465 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 17:18:25 -0700 Subject: vk: Rename CPU_READ/WRITE_BIT to HOST_READ/WRITE_BIT --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/device.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index f509f4178b2..2d9990f2fde 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1039,7 +1039,7 @@ typedef enum { typedef VkFlags VkQueryControlFlags; typedef enum { - VK_MEMORY_OUTPUT_CPU_WRITE_BIT = 0x00000001, + VK_MEMORY_OUTPUT_HOST_WRITE_BIT = 0x00000001, VK_MEMORY_OUTPUT_SHADER_WRITE_BIT = 0x00000002, VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT = 0x00000004, VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000008, @@ -1048,7 +1048,7 @@ typedef enum { typedef VkFlags VkMemoryOutputFlags; typedef enum { - VK_MEMORY_INPUT_CPU_READ_BIT = 0x00000001, + VK_MEMORY_INPUT_HOST_READ_BIT = 0x00000001, VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT = 0x00000002, VK_MEMORY_INPUT_INDEX_FETCH_BIT = 0x00000004, VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT = 0x00000008, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 35f3f31e67b..c833d24a997 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3469,7 +3469,7 @@ void anv_CmdPipelineBarrier( for_each_bit(b, out_flags) { switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_CPU_WRITE_BIT: + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: break; /* FIXME: Little-core systems */ case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: cmd.DCFlushEnable = true; @@ -3491,7 +3491,7 @@ void anv_CmdPipelineBarrier( for_each_bit(b, out_flags) { switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_CPU_READ_BIT: + case VK_MEMORY_INPUT_HOST_READ_BIT: break; /* FIXME: Little-core systems */ case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: case VK_MEMORY_INPUT_INDEX_FETCH_BIT: -- cgit v1.2.3 From 68fa750f2e1c94be6021d39b744cb0a2f37dcd5f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 17:32:28 -0700 Subject: vk/vulkan.h: Replace DEVICE_COHERENT_BIT with DEVICE_NON_COHERENT_BIT --- include/vulkan/vulkan.h | 2 +- src/vulkan/device.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 70ce0e4c3bd..7f6776ebbeb 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -905,7 +905,7 @@ typedef VkFlags VkQueueFlags; typedef enum { VK_MEMORY_PROPERTY_DEVICE_ONLY = 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000001, - VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT = 0x00000002, + VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT = 0x00000002, VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT = 0x00000004, VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT = 0x00000008, VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL = 0x00000010, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c833d24a997..088a3f6bb81 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1089,7 +1089,7 @@ fill_memory_requirements( memory_requirements->memPropsAllowed = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT | + /* VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT | */ /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT | VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL; -- cgit v1.2.3 From 65f9ccb4e70da0386e14399a1f31135a64ad9f94 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 17:33:43 -0700 Subject: vk/vulkan.h: Remove VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL_BIT We weren't doing anything with it, so this is a no-op --- include/vulkan/vulkan.h | 1 - src/vulkan/device.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 7f6776ebbeb..bb6bdf9d566 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -908,7 +908,6 @@ typedef enum { VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT = 0x00000002, VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT = 0x00000004, VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT = 0x00000008, - VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL = 0x00000010, } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 088a3f6bb81..3c73a404f67 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1091,8 +1091,7 @@ fill_memory_requirements( VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | /* VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT | */ /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ - VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT | - VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL; + VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT; memory_requirements->memPropsRequired = 0; -- cgit v1.2.3 From 63c1190e4734894b95fd62e3e4ee75bc0ff6dde0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 17:43:58 -0700 Subject: vk/vulkan.h: Rename count to arraySize in VkDescriptorSetLayoutBinding --- include/vulkan/vulkan.h | 2 +- src/vulkan/device.c | 16 ++++++++-------- src/vulkan/meta.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index e3c29b55b10..a0f50e0f024 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1573,7 +1573,7 @@ typedef struct { typedef struct { VkDescriptorType descriptorType; - uint32_t count; + uint32_t arraySize; VkShaderStageFlags stageFlags; const VkSampler* pImmutableSamplers; } VkDescriptorSetLayoutBinding; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3c73a404f67..cfe9201e2e6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1645,7 +1645,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - sampler_count[s] += pCreateInfo->pBinding[i].count; + sampler_count[s] += pCreateInfo->pBinding[i].arraySize; break; default: break; @@ -1662,7 +1662,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - surface_count[s] += pCreateInfo->pBinding[i].count; + surface_count[s] += pCreateInfo->pBinding[i].arraySize; break; default: break; @@ -1671,14 +1671,14 @@ VkResult anv_CreateDescriptorSetLayout( switch (pCreateInfo->pBinding[i].descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - num_dynamic_buffers += pCreateInfo->pBinding[i].count; + num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize; break; default: break; } stages |= pCreateInfo->pBinding[i].stageFlags; - count += pCreateInfo->pBinding[i].count; + count += pCreateInfo->pBinding[i].arraySize; } uint32_t sampler_total = 0; @@ -1719,7 +1719,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { sampler[s]->index = descriptor + j; sampler[s]->dynamic_slot = -1; sampler[s]++; @@ -1750,7 +1750,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { surface[s]->index = descriptor + j; if (is_dynamic) surface[s]->dynamic_slot = dynamic_slot + j; @@ -1764,9 +1764,9 @@ VkResult anv_CreateDescriptorSetLayout( } if (is_dynamic) - dynamic_slot += pCreateInfo->pBinding[i].count; + dynamic_slot += pCreateInfo->pBinding[i].arraySize; - descriptor += pCreateInfo->pBinding[i].count; + descriptor += pCreateInfo->pBinding[i].arraySize; } *pSetLayout = (VkDescriptorSetLayout) set_layout; diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 2c5a4548dc1..b782279e7b9 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -415,7 +415,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .pBinding = (VkDescriptorSetLayoutBinding[]) { { .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .count = 1, + .arraySize = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = NULL }, -- cgit v1.2.3 From 1f1b26bceb2a0bea92734d911a7282119ca684bf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 6 Jul 2015 17:47:18 -0700 Subject: vk/vulkan.h: Rename VkRect to VkRect2D --- include/vulkan/vulkan.h | 7 +++---- src/vulkan/device.c | 4 ++-- src/vulkan/private.h | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a0f50e0f024..6711ba2ab44 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1619,14 +1619,14 @@ typedef struct { typedef struct { VkOffset2D offset; VkExtent2D extent; -} VkRect; +} VkRect2D; typedef struct { VkStructureType sType; const void* pNext; uint32_t viewportAndScissorCount; const VkViewport* pViewports; - const VkRect* pScissors; + const VkRect2D* pScissors; } VkDynamicVpStateCreateInfo; typedef struct { @@ -1694,8 +1694,7 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; - - VkRect renderArea; + VkRect2D renderArea; uint32_t colorAttachmentCount; VkExtent2D extent; uint32_t sampleCount; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index cfe9201e2e6..b1cc618fdd3 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1956,7 +1956,7 @@ VkResult anv_CreateDynamicViewportState( for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { const VkViewport *vp = &pCreateInfo->pViewports[i]; - const VkRect *s = &pCreateInfo->pScissors[i]; + const VkRect2D *s = &pCreateInfo->pScissors[i]; struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { .ViewportMatrixElementm00 = vp->width / 2, @@ -3585,7 +3585,7 @@ VkResult anv_CreateFramebuffer( .maxDepth = 1 }, }, - .pScissors = (VkRect[]) { + .pScissors = (VkRect2D[]) { { { 0, 0 }, { pCreateInfo->width, pCreateInfo->height } }, } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index a0ccb903224..cb290ffea99 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -909,7 +909,7 @@ struct anv_render_pass_layer { }; struct anv_render_pass { - VkRect render_area; + VkRect2D render_area; uint32_t num_clear_layers; uint32_t num_layers; -- cgit v1.2.3 From 5b04db71ff9416fad7d8399b3bad4d754deeacc8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 6 Jul 2015 16:24:28 -0700 Subject: vk/image: Move validation for vkCreateImageView Move the validation from anv_CreateImageView() and anv_image_view_init() to anv_validate_CreateImageView(). No new validation is added. --- src/vulkan/image.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 9930d90aa41..00effbbc87b 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -332,11 +332,6 @@ anv_image_view_init(struct anv_surface_view *view, const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - anv_assert(range->mipLevels > 0); - anv_assert(range->arraySize > 0); - anv_assert(range->baseMipLevel + range->mipLevels <= image->levels); - anv_assert(range->baseArraySlice + range->arraySize <= image->array_size); - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); @@ -434,6 +429,29 @@ anv_image_view_init(struct anv_surface_view *view, GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) +{ + const struct anv_image *image; + const VkImageSubresourceRange *range; + + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + assert(pView); + + image = (struct anv_image *) pCreateInfo->image; + range = &pCreateInfo->subresourceRange; + + assert(range->mipLevels > 0); + assert(range->arraySize > 0); + assert(range->baseMipLevel + range->mipLevels <= image->levels); + assert(range->baseArraySlice + range->arraySize <= image->array_size); + + return anv_CreateImageView(_device, pCreateInfo, pView); +} + VkResult anv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, @@ -442,8 +460,6 @@ anv_CreateImageView(VkDevice _device, struct anv_device *device = (struct anv_device *) _device; struct anv_surface_view *view; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); - view = anv_device_alloc(device, sizeof(*view), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (view == NULL) -- cgit v1.2.3 From 69e11adeccce1ef054f01dbb1f4c062e2b3eeea2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 6 Jul 2015 16:25:59 -0700 Subject: vk/image: Add more info to VkImageViewType table Convert the table from the direct mapping VkImageViewType -> SurfaceType into a mapping to an info struct VkImageViewType -> struct anv_image_view_info --- src/vulkan/image.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 00effbbc87b..51b874d41c1 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -29,6 +29,12 @@ #include "private.h" +struct anv_image_view_info { + uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ + bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ + bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ +}; + static const uint8_t anv_halign[] = { [4] = HALIGN4, [8] = HALIGN8, @@ -45,13 +51,20 @@ static const uint8_t anv_surf_type_from_image_type[] = { [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, + }; -static const uint8_t anv_surf_type_from_image_view_type[] = { - [VK_IMAGE_VIEW_TYPE_1D] = SURFTYPE_1D, - [VK_IMAGE_VIEW_TYPE_2D] = SURFTYPE_2D, - [VK_IMAGE_VIEW_TYPE_3D] = SURFTYPE_3D, - [VK_IMAGE_VIEW_TYPE_CUBE] = SURFTYPE_CUBE, +static const struct anv_image_view_info +anv_image_view_info_table[] = { + #define INFO(s, ...) { .surface_type = s, __VA_ARGS__ } + [VK_IMAGE_VIEW_TYPE_1D] = INFO(SURFTYPE_1D), + [VK_IMAGE_VIEW_TYPE_2D] = INFO(SURFTYPE_2D), + [VK_IMAGE_VIEW_TYPE_3D] = INFO(SURFTYPE_3D), + [VK_IMAGE_VIEW_TYPE_CUBE] = INFO(SURFTYPE_CUBE, .is_cube = 1), + [VK_IMAGE_VIEW_TYPE_1D_ARRAY] = INFO(SURFTYPE_1D, .is_array = 1), + [VK_IMAGE_VIEW_TYPE_2D_ARRAY] = INFO(SURFTYPE_2D, .is_array = 1), + [VK_IMAGE_VIEW_TYPE_CUBE_ARRAY] = INFO(SURFTYPE_CUBE, .is_array = 1, .is_cube = 1), + #undef INFO }; static const struct anv_surf_type_limits { @@ -332,6 +345,9 @@ anv_image_view_init(struct anv_surface_view *view, const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); + const struct anv_image_view_info *view_type_info + = &anv_image_view_info_table[pCreateInfo->viewType]; + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); @@ -377,7 +393,7 @@ anv_image_view_init(struct anv_surface_view *view, }; struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = anv_surf_type_from_image_view_type[pCreateInfo->viewType], + .SurfaceType = view_type_info->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], -- cgit v1.2.3 From 23075bccb37d0df48f27f3be7ddc8a3484e3e760 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 6 Jul 2015 17:04:13 -0700 Subject: vk/image: Validate vkCreateImageView more Exhaustively validate the function input. If it's not validated and doesn't have an anv_finishme(), then I overlooked it. --- src/vulkan/image.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 51b874d41c1..81d5f100ba6 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -451,19 +451,82 @@ anv_validate_CreateImageView(VkDevice _device, VkImageView *pView) { const struct anv_image *image; - const VkImageSubresourceRange *range; + const VkImageSubresourceRange *subresource; + const struct anv_image_view_info *view_info; + const struct anv_format *view_format_info; + const struct anv_format *image_format_info; + /* Validate structure type before dereferencing it. */ assert(pCreateInfo); assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); - assert(pView); + subresource = &pCreateInfo->subresourceRange; + /* Validate image pointer before dereferencing it. */ + assert(pCreateInfo->image != 0); image = (struct anv_image *) pCreateInfo->image; - range = &pCreateInfo->subresourceRange; - assert(range->mipLevels > 0); - assert(range->arraySize > 0); - assert(range->baseMipLevel + range->mipLevels <= image->levels); - assert(range->baseArraySlice + range->arraySize <= image->array_size); + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + view_info = &anv_image_view_info_table[pCreateInfo->viewType]; + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + image_format_info = anv_format_for_vk_format(image->format); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->channels.r >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.r <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.g >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.g <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.b >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.b <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.a >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.a <= VK_CHANNEL_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspect >= VK_IMAGE_ASPECT_BEGIN_RANGE); + assert(subresource->aspect <= VK_IMAGE_ASPECT_END_RANGE); + assert(subresource->mipLevels > 0); + assert(subresource->arraySize > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); + assert(subresource->baseArraySlice < image->array_size); + assert(subresource->baseArraySlice + subresource->arraySize <= image->array_size); + assert(pCreateInfo->minLod >= 0); + assert(pCreateInfo->minLod < image->levels); + assert(pView); + + if (view_info->is_cube) { + assert(subresource->baseArraySlice % 6 == 0); + assert(subresource->arraySize % 6 == 0); + } + + /* Validate format. */ + switch (subresource->aspect) { + case VK_IMAGE_ASPECT_COLOR: + assert(!image_format_info->depth_format); + assert(!image_format_info->has_stencil); + assert(!view_format_info->depth_format); + assert(!view_format_info->has_stencil); + assert(view_format_info->cpp == image_format_info->cpp); + break; + case VK_IMAGE_ASPECT_DEPTH: + assert(image_format_info->depth_format); + assert(view_format_info->depth_format); + assert(view_format_info->cpp == image_format_info->cpp); + break; + case VK_IMAGE_ASPECT_STENCIL: + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image_format_info->has_stencil); + assert(view_format_info->has_stencil); + break; + default: + assert(!"bad VkImageAspect"); + break; + } return anv_CreateImageView(_device, pCreateInfo, pView); } -- cgit v1.2.3 From c325bb24b5b17ac94a7128335475ac82d20f4b35 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 21 Jun 2015 23:47:10 -0700 Subject: vk: Pull in new generated headers The new headers use stdbool for enable/disable fields which implicitly converts expressions like (flags & 8) to 0 or 1. Also handles MBO (must-be-one) fields by setting them to one, corrects a bspec typo (_3DPRIM_LISTSTRIP_ADJ -> LINESTRIP) and makes a few enum values less clashy. --- src/vulkan/gen75_pack.h | 434 +++++++++++++++++++-------------------- src/vulkan/gen7_pack.h | 384 +++++++++++++++++----------------- src/vulkan/gen8_pack.h | 532 ++++++++++++++++++++++++------------------------ src/vulkan/pipeline.c | 2 +- 4 files changed, 675 insertions(+), 677 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 5d89a44efa5..9ea8c2bcf65 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -184,7 +184,7 @@ GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_STORE_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t PredicateEnable; uint32_t DwordLength; uint32_t RegisterAddress; @@ -288,27 +288,27 @@ struct GEN75_STATE_BASE_ADDRESS { __gen_address_type GeneralStateBaseAddress; struct GEN75_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; struct GEN75_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; - uint32_t GeneralStateBaseAddressModifyEnable; + bool GeneralStateBaseAddressModifyEnable; __gen_address_type SurfaceStateBaseAddress; struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - uint32_t SurfaceStateBaseAddressModifyEnable; + bool SurfaceStateBaseAddressModifyEnable; __gen_address_type DynamicStateBaseAddress; struct GEN75_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - uint32_t DynamicStateBaseAddressModifyEnable; + bool DynamicStateBaseAddressModifyEnable; __gen_address_type IndirectObjectBaseAddress; struct GEN75_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - uint32_t IndirectObjectBaseAddressModifyEnable; + bool IndirectObjectBaseAddressModifyEnable; __gen_address_type InstructionBaseAddress; struct GEN75_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - uint32_t InstructionBaseAddressModifyEnable; + bool InstructionBaseAddressModifyEnable; __gen_address_type GeneralStateAccessUpperBound; - uint32_t GeneralStateAccessUpperBoundModifyEnable; + bool GeneralStateAccessUpperBoundModifyEnable; __gen_address_type DynamicStateAccessUpperBound; - uint32_t DynamicStateAccessUpperBoundModifyEnable; + bool DynamicStateAccessUpperBoundModifyEnable; __gen_address_type IndirectObjectAccessUpperBound; - uint32_t IndirectObjectAccessUpperBoundModifyEnable; + bool IndirectObjectAccessUpperBoundModifyEnable; __gen_address_type InstructionAccessUpperBound; - uint32_t InstructionAccessUpperBoundModifyEnable; + bool InstructionAccessUpperBoundModifyEnable; }; static inline void @@ -546,11 +546,11 @@ struct GEN75_3DPRIMITIVE { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t IndirectParameterEnable; + bool IndirectParameterEnable; uint32_t UAVCoherencyRequired; - uint32_t PredicateEnable; + bool PredicateEnable; uint32_t DwordLength; - uint32_t EndOffsetEnable; + bool EndOffsetEnable; #define SEQUENTIAL 0 #define RANDOM 1 uint32_t VertexAccessType; @@ -1272,7 +1272,7 @@ struct GEN75_3DSTATE_CLEAR_PARAMS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t DepthClearValue; - uint32_t DepthClearValueValid; + bool DepthClearValueValid; }; static inline void @@ -1316,27 +1316,27 @@ struct GEN75_3DSTATE_CLIP { uint32_t DwordLength; uint32_t FrontWinding; uint32_t VertexSubPixelPrecisionSelect; - uint32_t EarlyCullEnable; + bool EarlyCullEnable; #define CULLMODE_BOTH 0 #define CULLMODE_NONE 1 #define CULLMODE_FRONT 2 #define CULLMODE_BACK 3 uint32_t CullMode; - uint32_t ClipperStatisticsEnable; + bool ClipperStatisticsEnable; uint32_t UserClipDistanceCullTestEnableBitmask; - uint32_t ClipEnable; + bool ClipEnable; #define APIMODE_OGL 0 uint32_t APIMode; - uint32_t ViewportXYClipTestEnable; - uint32_t ViewportZClipTestEnable; - uint32_t GuardbandClipTestEnable; + bool ViewportXYClipTestEnable; + bool ViewportZClipTestEnable; + bool GuardbandClipTestEnable; uint32_t UserClipDistanceClipTestEnableBitmask; #define CLIPMODE_NORMAL 0 #define CLIPMODE_REJECT_ALL 3 #define CLIPMODE_ACCEPT_ALL 4 uint32_t ClipMode; - uint32_t PerspectiveDivideDisable; - uint32_t NonPerspectiveBarycentricEnable; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; #define Vertex0 0 #define Vertex1 1 #define Vertex2 2 @@ -1350,7 +1350,7 @@ struct GEN75_3DSTATE_CLIP { uint32_t TriangleFanProvokingVertexSelect; float MinimumPointWidth; float MaximumPointWidth; - uint32_t ForceZeroRTAIndexEnable; + bool ForceZeroRTAIndexEnable; uint32_t MaximumVPIndex; }; @@ -1654,9 +1654,9 @@ struct GEN75_3DSTATE_DEPTH_BUFFER { #define SURFTYPE_CUBE 3 #define SURFTYPE_NULL 7 uint32_t SurfaceType; - uint32_t DepthWriteEnable; - uint32_t StencilWriteEnable; - uint32_t HierarchicalDepthBufferEnable; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; #define D32_FLOAT 1 #define D24_UNORM_X8_UINT 3 #define D16_UNORM 5 @@ -1862,19 +1862,19 @@ struct GEN75_3DSTATE_DS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t AccessesUAV; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; uint32_t PatchURBEntryReadOffset; uint32_t MaximumNumberofThreads; - uint32_t StatisticsEnable; - uint32_t ComputeWCoordinateEnable; - uint32_t DSCacheDisable; - uint32_t DSFunctionEnable; + bool StatisticsEnable; + bool ComputeWCoordinateEnable; + bool DSCacheDisable; + bool DSFunctionEnable; }; static inline void @@ -2099,7 +2099,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_PS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9Enable; + bool ConstantBufferDx9Enable; /* variable length fields follow */ }; @@ -2146,7 +2146,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_VS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9Enable; + bool ConstantBufferDx9Enable; /* variable length fields follow */ }; @@ -2193,7 +2193,7 @@ struct GEN75_3DSTATE_GATHER_POOL_ALLOC { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type GatherPoolBaseAddress; - uint32_t GatherPoolEnable; + bool GatherPoolEnable; struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; __gen_address_type GatherPoolUpperBound; }; @@ -2264,16 +2264,16 @@ struct GEN75_3DSTATE_GS { #define IEEE754 0 #define alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; + bool IllegalOpcodeExceptionEnable; uint32_t GSaccessesUAV; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; uint32_t VertexURBEntryReadLength; - uint32_t IncludeVertexHandles; + bool IncludeVertexHandles; uint32_t VertexURBEntryReadOffset; uint32_t DispatchGRFStartRegisterforURBData; uint32_t MaximumNumberofThreads; @@ -2286,13 +2286,13 @@ struct GEN75_3DSTATE_GS { uint32_t DispatchMode; uint32_t GSStatisticsEnable; uint32_t GSInvocationsIncrementValue; - uint32_t IncludePrimitiveID; + bool IncludePrimitiveID; uint32_t Hint; #define REORDER_LEADING 0 #define REORDER_TRAILING 1 uint32_t ReorderMode; - uint32_t DiscardAdjacency; - uint32_t GSEnable; + bool DiscardAdjacency; + bool GSEnable; #define GSCTL_CUT 0 #define GSCTL_SID 1 uint32_t ControlDataFormat; @@ -2443,11 +2443,11 @@ struct GEN75_3DSTATE_HS { #define IEEE754 0 #define alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; uint32_t MaximumNumberofThreads; - uint32_t Enable; - uint32_t StatisticsEnable; + bool Enable; + bool StatisticsEnable; uint32_t InstanceCount; uint32_t KernelStartPointer; uint32_t ScratchSpaceBasePointer; @@ -2456,8 +2456,8 @@ struct GEN75_3DSTATE_HS { #define Dmask 0 #define Vmask 1 uint32_t VectorMaskEnable; - uint32_t HSaccessesUAV; - uint32_t IncludeVertexHandles; + bool HSaccessesUAV; + bool IncludeVertexHandles; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; @@ -2590,7 +2590,7 @@ struct GEN75_3DSTATE_LINE_STIPPLE { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; uint32_t CurrentRepeatCounter; uint32_t CurrentStippleIndex; uint32_t LineStipplePattern; @@ -2681,7 +2681,7 @@ struct GEN75_3DSTATE_MULTISAMPLE { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t MultiSampleEnable; + bool MultiSampleEnable; #define PIXLOC_CENTER 0 #define PIXLOC_UL_CORNER 1 uint32_t PixelLocation; @@ -2867,27 +2867,27 @@ struct GEN75_3DSTATE_PS { #define RD 2 #define RTZ 3 uint32_t RoundingMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; uint32_t SampleMask; - uint32_t PushConstantEnable; - uint32_t AttributeEnable; - uint32_t oMaskPresenttoRenderTarget; - uint32_t RenderTargetFastClearEnable; - uint32_t DualSourceBlendEnable; - uint32_t RenderTargetResolveEnable; - uint32_t PSAccessesUAV; + bool PushConstantEnable; + bool AttributeEnable; + bool oMaskPresenttoRenderTarget; + bool RenderTargetFastClearEnable; + bool DualSourceBlendEnable; + bool RenderTargetResolveEnable; + bool PSAccessesUAV; #define POSOFFSET_NONE 0 #define POSOFFSET_CENTROID 2 #define POSOFFSET_SAMPLE 3 uint32_t PositionXYOffsetSelect; - uint32_t _32PixelDispatchEnable; - uint32_t _16PixelDispatchEnable; - uint32_t _8PixelDispatchEnable; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; uint32_t DispatchGRFStartRegisterforConstantSetupData0; uint32_t DispatchGRFStartRegisterforConstantSetupData1; uint32_t DispatchGRFStartRegisterforConstantSetupData2; @@ -3616,16 +3616,16 @@ struct GEN75_3DSTATE_SBE { uint32_t DwordLength; uint32_t AttributeSwizzleControlMode; uint32_t NumberofSFOutputAttributes; - uint32_t AttributeSwizzleEnable; + bool AttributeSwizzleEnable; #define UPPERLEFT 0 #define LOWERLEFT 1 uint32_t PointSpriteTextureCoordinateOrigin; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; - uint32_t Attribute2n1ComponentOverrideW; - uint32_t Attribute2n1ComponentOverrideZ; - uint32_t Attribute2n1ComponentOverrideY; - uint32_t Attribute2n1ComponentOverrideX; + bool Attribute2n1ComponentOverrideW; + bool Attribute2n1ComponentOverrideZ; + bool Attribute2n1ComponentOverrideY; + bool Attribute2n1ComponentOverrideX; #define CONST_0000 0 #define CONST_0001_FLOAT 1 #define CONST_1111_FLOAT 2 @@ -3637,10 +3637,10 @@ struct GEN75_3DSTATE_SBE { #define INPUTATTR_FACING_W 3 uint32_t Attribute2n1SwizzleSelect; uint32_t Attribute2n1SourceAttribute; - uint32_t Attribute2nComponentOverrideW; - uint32_t Attribute2nComponentOverrideZ; - uint32_t Attribute2nComponentOverrideY; - uint32_t Attribute2nComponentOverrideX; + bool Attribute2nComponentOverrideW; + bool Attribute2nComponentOverrideZ; + bool Attribute2nComponentOverrideY; + bool Attribute2nComponentOverrideX; #define CONST_0000 0 #define CONST_0001_FLOAT 1 #define CONST_1111_FLOAT 2 @@ -3803,11 +3803,11 @@ struct GEN75_3DSTATE_SF { #define D24_UNORM_X8_UINT 3 #define D16_UNORM 5 uint32_t DepthBufferSurfaceFormat; - uint32_t LegacyGlobalDepthBiasEnable; - uint32_t StatisticsEnable; - uint32_t GlobalDepthOffsetEnableSolid; - uint32_t GlobalDepthOffsetEnableWireframe; - uint32_t GlobalDepthOffsetEnablePoint; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; #define RASTER_SOLID 0 #define RASTER_WIREFRAME 1 #define RASTER_POINT 2 @@ -3816,9 +3816,9 @@ struct GEN75_3DSTATE_SF { #define RASTER_WIREFRAME 1 #define RASTER_POINT 2 uint32_t BackFaceFillMode; - uint32_t ViewTransformEnable; + bool ViewTransformEnable; uint32_t FrontWinding; - uint32_t AntiAliasingEnable; + bool AntiAliasingEnable; #define CULLMODE_BOTH 0 #define CULLMODE_NONE 1 #define CULLMODE_FRONT 2 @@ -3826,11 +3826,11 @@ struct GEN75_3DSTATE_SF { uint32_t CullMode; float LineWidth; uint32_t LineEndCapAntialiasingRegionWidth; - uint32_t LineStippleEnable; - uint32_t ScissorRectangleEnable; - uint32_t RTIndependentRasterizationEnable; + bool LineStippleEnable; + bool ScissorRectangleEnable; + bool RTIndependentRasterizationEnable; uint32_t MultisampleRasterizationMode; - uint32_t LastPixelEnable; + bool LastPixelEnable; #define Vertex0 0 #define Vertex1 1 #define Vertex2 2 @@ -4153,7 +4153,7 @@ struct GEN75_3DSTATE_STREAMOUT { #define LEADING 0 #define TRAILING 1 uint32_t ReorderMode; - uint32_t SOStatisticsEnable; + bool SOStatisticsEnable; uint32_t SOBufferEnable3; uint32_t SOBufferEnable2; uint32_t SOBufferEnable1; @@ -4227,9 +4227,9 @@ struct GEN75_3DSTATE_TE { #define EVEN_FRACTIONAL 2 uint32_t Partitioning; #define POINT 0 -#define LINE 1 -#define TRI_CW 2 -#define TRI_CCW 3 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 uint32_t OutputTopology; #define QUAD 0 #define TRI 1 @@ -4238,7 +4238,7 @@ struct GEN75_3DSTATE_TE { #define HW_TESS 0 #define SW_TESS 1 uint32_t TEMode; - uint32_t TEEnable; + bool TEEnable; float MaximumTessellationFactorOdd; float MaximumTessellationFactorNotOdd; }; @@ -4415,7 +4415,7 @@ struct GEN75_VERTEX_BUFFER_STATE { uint32_t BufferAccessType; struct GEN75_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; uint32_t AddressModifyEnable; - uint32_t NullVertexBuffer; + bool NullVertexBuffer; uint32_t VertexFetchInvalidate; uint32_t BufferPitch; __gen_address_type BufferStartingAddress; @@ -4494,9 +4494,9 @@ GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, struct GEN75_VERTEX_ELEMENT_STATE { uint32_t VertexBufferIndex; - uint32_t Valid; + bool Valid; uint32_t SourceElementFormat; - uint32_t EdgeFlagEnable; + bool EdgeFlagEnable; uint32_t SourceElementOffset; uint32_t Component0Control; uint32_t Component1Control; @@ -4567,7 +4567,7 @@ struct GEN75_3DSTATE_VF { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t IndexedDrawCutIndexEnable; + bool IndexedDrawCutIndexEnable; uint32_t DwordLength; uint32_t CutIndex; }; @@ -4606,7 +4606,7 @@ struct GEN75_3DSTATE_VF_STATISTICS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t StatisticsEnable; + bool StatisticsEnable; }; static inline void @@ -4736,18 +4736,18 @@ struct GEN75_3DSTATE_VS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t VSaccessesUAV; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool VSaccessesUAV; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBaseOffset; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterforURBData; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; uint32_t MaximumNumberofThreads; - uint32_t StatisticsEnable; - uint32_t VertexCacheDisable; - uint32_t VSFunctionEnable; + bool StatisticsEnable; + bool VertexCacheDisable; + bool VSFunctionEnable; }; static inline void @@ -4815,13 +4815,13 @@ struct GEN75_3DSTATE_WM { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t StatisticsEnable; - uint32_t DepthBufferClear; - uint32_t ThreadDispatchEnable; - uint32_t DepthBufferResolveEnable; - uint32_t HierarchicalDepthBufferResolveEnable; - uint32_t LegacyDiamondLineRasterization; - uint32_t PixelShaderKillPixel; + bool StatisticsEnable; + bool DepthBufferClear; + bool ThreadDispatchEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; + bool PixelShaderKillPixel; #define PSCDEPTH_OFF 0 #define PSCDEPTH_ON 1 #define PSCDEPTH_ON_GE 2 @@ -4831,19 +4831,19 @@ struct GEN75_3DSTATE_WM { #define EDSC_PSEXEC 1 #define EDSC_PREPS 2 uint32_t EarlyDepthStencilControl; - uint32_t PixelShaderUsesSourceDepth; - uint32_t PixelShaderUsesSourceW; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; #define INTERP_PIXEL 0 #define INTERP_CENTROID 2 #define INTERP_SAMPLE 3 uint32_t PositionZWInterpolationMode; uint32_t BarycentricInterpolationMode; - uint32_t PixelShaderUsesInputCoverageMask; + bool PixelShaderUsesInputCoverageMask; uint32_t LineEndCapAntialiasingRegionWidth; uint32_t LineAntialiasingRegionWidth; - uint32_t RTIndependentRasterizationEnable; - uint32_t PolygonStippleEnable; - uint32_t LineStippleEnable; + bool RTIndependentRasterizationEnable; + bool PolygonStippleEnable; + bool LineStippleEnable; #define RASTRULE_UPPER_LEFT 0 #define RASTRULE_UPPER_RIGHT 1 uint32_t PointRasterizationRule; @@ -4919,7 +4919,7 @@ struct GEN75_GPGPU_OBJECT { uint32_t Pipeline; uint32_t MediaCommandOpcode; uint32_t SubOpcode; - uint32_t PredicateEnable; + bool PredicateEnable; uint32_t DwordLength; uint32_t SharedLocalMemoryFixedOffset; uint32_t InterfaceDescriptorOffset; @@ -5004,8 +5004,8 @@ struct GEN75_GPGPU_WALKER { uint32_t Pipeline; uint32_t MediaCommandOpcode; uint32_t SubOpcodeA; - uint32_t IndirectParameterEnable; - uint32_t PredicateEnable; + bool IndirectParameterEnable; + bool PredicateEnable; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; #define SIMD8 0 @@ -5192,7 +5192,7 @@ struct GEN75_MEDIA_OBJECT { uint32_t MediaCommandSubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; + bool ChildrenPresent; #define Nothreadsynchronization 0 #define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 uint32_t ThreadSynchronization; @@ -5212,7 +5212,7 @@ struct GEN75_MEDIA_OBJECT { uint32_t ScoredboardY; uint32_t ScoreboardX; uint32_t ScoreboardColor; - uint32_t ScoreboardMask; + bool ScoreboardMask; /* variable length fields follow */ }; @@ -5278,8 +5278,8 @@ struct GEN75_MEDIA_OBJECT_PRT { uint32_t SubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; - uint32_t PRT_FenceNeeded; + bool ChildrenPresent; + bool PRT_FenceNeeded; #define Rootthreadqueue 0 #define VFEstateflush 1 uint32_t PRT_FenceType; @@ -5333,7 +5333,7 @@ struct GEN75_MEDIA_OBJECT_WALKER { uint32_t SubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; + bool ChildrenPresent; #define Nothreadsynchronization 0 #define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 uint32_t ThreadSynchronization; @@ -5342,10 +5342,10 @@ struct GEN75_MEDIA_OBJECT_WALKER { uint32_t UseScoreboard; uint32_t IndirectDataLength; uint32_t IndirectDataStartAddress; - uint32_t ScoreboardMask; - uint32_t DualMode; - uint32_t Repel; - uint32_t QuadMode; + bool ScoreboardMask; + bool DualMode; + bool Repel; + bool QuadMode; uint32_t ColorCountMinusOne; uint32_t MiddleLoopExtraSteps; uint32_t LocalMidLoopUnitY; @@ -5483,8 +5483,8 @@ struct GEN75_MEDIA_STATE_FLUSH { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - uint32_t DisablePreemption; - uint32_t FlushtoGO; + bool DisablePreemption; + bool FlushtoGO; uint32_t WatermarkRequired; uint32_t InterfaceDescriptorOffset; }; @@ -5667,7 +5667,7 @@ GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_ARB_ON_OFF { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t ArbitrationEnable; + bool ArbitrationEnable; }; static inline void @@ -5721,11 +5721,11 @@ struct GEN75_MI_BATCH_BUFFER_START { #define _1stlevelbatch 0 #define _2ndlevelbatch 1 uint32_t _2ndLevelBatchBuffer; - uint32_t AddOffsetEnable; - uint32_t PredicationEnable; + bool AddOffsetEnable; + bool PredicationEnable; uint32_t NonPrivileged; - uint32_t ClearCommandBufferEnable; - uint32_t ResourceStreamerEnable; + bool ClearCommandBufferEnable; + bool ResourceStreamerEnable; #define ASI_GGTT 0 #define ASI_PPGTT 1 uint32_t AddressSpaceIndicator; @@ -5861,17 +5861,17 @@ GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restric struct GEN75_MI_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t IndirectStatePointersDisable; - uint32_t GenericMediaStateClear; + bool IndirectStatePointersDisable; + bool GenericMediaStateClear; #define DontReset 0 #define Reset 1 - uint32_t GlobalSnapshotCountReset; + bool GlobalSnapshotCountReset; #define Flush 0 #define DontFlush 1 - uint32_t RenderCacheFlushInhibit; + bool RenderCacheFlushInhibit; #define DontInvalidate 0 #define Invalidate 1 - uint32_t StateInstructionCacheInvalidate; + bool StateInstructionCacheInvalidate; }; static inline void @@ -5941,7 +5941,7 @@ GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_LOAD_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t AsyncModeEnable; uint32_t DwordLength; uint32_t RegisterAddress; @@ -6174,7 +6174,7 @@ GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_NOOP { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t IdentificationNumberRegisterWriteEnable; + bool IdentificationNumberRegisterWriteEnable; uint32_t IdentificationNumber; }; @@ -6202,9 +6202,9 @@ GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; -#define KEEP 0 -#define LOAD 2 -#define LOADINV 3 +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 uint32_t LoadOperation; #define COMBINE_SET 0 #define COMBINE_AND 1 @@ -6265,8 +6265,8 @@ GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_RS_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; -#define Restore 0 -#define Save 1 +#define RS_RESTORE 0 +#define RS_SAVE 1 uint32_t ResourceStreamerSave; }; @@ -6293,8 +6293,8 @@ GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_RS_CONTROL { uint32_t CommandType; uint32_t MICommandOpcode; -#define Stop 0 -#define Start 1 +#define RS_STOP 0 +#define RS_START 1 uint32_t ResourceStreamerControl; }; @@ -6412,9 +6412,9 @@ struct GEN75_MI_SET_CONTEXT { uint32_t DwordLength; __gen_address_type LogicalContextAddress; uint32_t ReservedMustbe1; - uint32_t CoreModeEnable; - uint32_t ResourceStreamerStateSaveEnable; - uint32_t ResourceStreamerStateRestoreEnable; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; uint32_t ForceRestore; uint32_t RestoreInhibit; }; @@ -6459,7 +6459,7 @@ struct GEN75_MI_SET_PREDICATE { #define PredicateonClear 1 #define PredicateonSet 2 #define PredicateDisable 3 - uint32_t PREDICATEENABLE; + bool PREDICATEENABLE; }; static inline void @@ -6486,7 +6486,7 @@ GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t DwordLength; uint32_t Address; uint32_t CoreModeEnable; @@ -6615,7 +6615,7 @@ GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_SUSPEND_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t SuspendFlush; + bool SuspendFlush; }; static inline void @@ -6791,23 +6791,23 @@ GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, struct GEN75_MI_WAIT_FOR_EVENT { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t DisplayPipeCHorizontalBlankWaitEnable; - uint32_t DisplayPipeCVerticalBlankWaitEnable; - uint32_t DisplaySpriteCFlipPendingWaitEnable; + bool DisplayPipeCHorizontalBlankWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; #define Notenabled 0 uint32_t ConditionCodeWaitSelect; - uint32_t DisplayPlaneCFlipPendingWaitEnable; - uint32_t DisplayPipeCScanLineWaitEnable; - uint32_t DisplayPipeBHorizontalBlankWaitEnable; - uint32_t DisplayPipeBVerticalBlankWaitEnable; - uint32_t DisplaySpriteBFlipPendingWaitEnable; - uint32_t DisplayPlaneBFlipPendingWaitEnable; - uint32_t DisplayPipeBScanLineWaitEnable; - uint32_t DisplayPipeAHorizontalBlankWaitEnable; - uint32_t DisplayPipeAVerticalBlankWaitEnable; - uint32_t DisplaySpriteAFlipPendingWaitEnable; - uint32_t DisplayPlaneAFlipPendingWaitEnable; - uint32_t DisplayPipeAScanLineWaitEnable; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBHorizontalBlankWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAHorizontalBlankWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; }; static inline void @@ -6866,29 +6866,29 @@ struct GEN75_PIPE_CONTROL { #define Reset 1 uint32_t GlobalSnapshotCountReset; uint32_t TLBInvalidate; - uint32_t GenericMediaStateClear; + bool GenericMediaStateClear; #define NoWrite 0 #define WriteImmediateData 1 #define WritePSDepthCount 2 #define WriteTimestamp 3 uint32_t PostSyncOperation; - uint32_t DepthStallEnable; + bool DepthStallEnable; #define DisableFlush 0 #define EnableFlush 1 - uint32_t RenderTargetCacheFlushEnable; - uint32_t InstructionCacheInvalidateEnable; - uint32_t TextureCacheInvalidationEnable; - uint32_t IndirectStatePointersDisable; - uint32_t NotifyEnable; - uint32_t PipeControlFlushEnable; - uint32_t DCFlushEnable; - uint32_t VFCacheInvalidationEnable; - uint32_t ConstantCacheInvalidationEnable; - uint32_t StateCacheInvalidationEnable; - uint32_t StallAtPixelScoreboard; + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; #define FlushDisabled 0 #define FlushEnabled 1 - uint32_t DepthCacheFlushEnable; + bool DepthCacheFlushEnable; __gen_address_type Address; uint32_t ImmediateData; uint32_t ImmediateData0; @@ -7064,8 +7064,8 @@ GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, #define GEN75_BLEND_STATE_length 0x00000002 struct GEN75_BLEND_STATE { - uint32_t ColorBufferBlendEnable; - uint32_t IndependentAlphaBlendEnable; + bool ColorBufferBlendEnable; + bool IndependentAlphaBlendEnable; #define BLENDFUNCTION_ADD 0 #define BLENDFUNCTION_SUBTRACT 1 #define BLENDFUNCTION_REVERSE_SUBTRACT 2 @@ -7101,14 +7101,14 @@ struct GEN75_BLEND_STATE { uint32_t ColorBlendFunction; uint32_t SourceBlendFactor; uint32_t DestinationBlendFactor; - uint32_t AlphaToCoverageEnable; - uint32_t AlphaToOneEnable; - uint32_t AlphaToCoverageDitherEnable; - uint32_t WriteDisableAlpha; - uint32_t WriteDisableRed; - uint32_t WriteDisableGreen; - uint32_t WriteDisableBlue; - uint32_t LogicOpEnable; + bool AlphaToCoverageEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; + bool LogicOpEnable; #define LOGICOP_CLEAR 0 #define LOGICOP_NOR 1 #define LOGICOP_AND_INVERTED 2 @@ -7126,7 +7126,7 @@ struct GEN75_BLEND_STATE { #define LOGICOP_OR 14 #define LOGICOP_SET 15 uint32_t LogicOpFunction; - uint32_t AlphaTestEnable; + bool AlphaTestEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -7136,15 +7136,15 @@ struct GEN75_BLEND_STATE { #define COMPAREFUNCTION_NOTEQUAL 6 #define COMPAREFUNCTION_GEQUAL 7 uint32_t AlphaTestFunction; - uint32_t ColorDitherEnable; + bool ColorDitherEnable; uint32_t XDitherOffset; uint32_t YDitherOffset; #define COLORCLAMP_UNORM 0 #define COLORCLAMP_SNORM 1 #define COLORCLAMP_RTFORMAT 2 uint32_t ColorClampRange; - uint32_t PreBlendColorClampEnable; - uint32_t PostBlendColorClampEnable; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; }; static inline void @@ -7267,7 +7267,7 @@ GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN75_DEPTH_STENCIL_STATE_length 0x00000003 struct GEN75_DEPTH_STENCIL_STATE { - uint32_t StencilTestEnable; + bool StencilTestEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -7288,8 +7288,8 @@ struct GEN75_DEPTH_STENCIL_STATE { uint32_t StencilFailOp; uint32_t StencilPassDepthFailOp; uint32_t StencilPassDepthPassOp; - uint32_t StencilBufferWriteEnable; - uint32_t DoubleSidedStencilEnable; + bool StencilBufferWriteEnable; + bool DoubleSidedStencilEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -7314,7 +7314,7 @@ struct GEN75_DEPTH_STENCIL_STATE { uint32_t StencilWriteMask; uint32_t BackfaceStencilTestMask; uint32_t BackfaceStencilWriteMask; - uint32_t DepthTestEnable; + bool DepthTestEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -7324,7 +7324,7 @@ struct GEN75_DEPTH_STENCIL_STATE { #define COMPAREFUNCTION_NOTEQUAL 6 #define COMPAREFUNCTION_GEQUAL 7 uint32_t DepthTestFunction; - uint32_t DepthBufferWriteEnable; + bool DepthBufferWriteEnable; }; static inline void @@ -7407,9 +7407,9 @@ struct GEN75_INTERFACE_DESCRIPTOR_DATA { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t SamplerStatePointer; #define Nosamplersused 0 #define Between1and4samplersused 1 @@ -7425,7 +7425,7 @@ struct GEN75_INTERFACE_DESCRIPTOR_DATA { #define RD 2 #define RTZ 3 uint32_t RoundingMode; - uint32_t BarrierEnable; + bool BarrierEnable; uint32_t SharedLocalMemorySize; uint32_t NumberofThreadsinGPGPUThreadGroup; uint32_t CrossThreadConstantDataReadLength; @@ -7511,7 +7511,7 @@ struct GEN75_RENDER_SURFACE_STATE { #define SURFTYPE_STRBUF 5 #define SURFTYPE_NULL 7 uint32_t SurfaceType; - uint32_t SurfaceArray; + bool SurfaceArray; uint32_t SurfaceFormat; uint32_t SurfaceVerticalAlignment; #define HALIGN_4 0 @@ -7561,8 +7561,8 @@ struct GEN75_RENDER_SURFACE_STATE { __gen_address_type MCSBaseAddress; uint32_t MCSSurfacePitch; __gen_address_type AppendCounterAddress; - uint32_t AppendCounterEnable; - uint32_t MCSEnable; + bool AppendCounterEnable; + bool MCSEnable; uint32_t ReservedMBZ; uint32_t XOffsetforUVPlane; uint32_t YOffsetforUVPlane; @@ -7716,7 +7716,7 @@ GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst #define GEN75_SAMPLER_STATE_length 0x00000004 struct GEN75_SAMPLER_STATE { - uint32_t SamplerDisable; + bool SamplerDisable; #define DX10OGL 0 #define DX9 1 uint32_t TextureBorderColorMode; @@ -7756,7 +7756,7 @@ struct GEN75_SAMPLER_STATE { #define OVERRIDE 1 uint32_t CubeSurfaceControlMode; uint32_t BorderColorPointer; - uint32_t ChromaKeyEnable; + bool ChromaKeyEnable; uint32_t ChromaKeyIndex; #define KEYFILTER_KILL_ON_ANY_MATCH 0 #define KEYFILTER_REPLACE_BLACK 1 @@ -7770,18 +7770,18 @@ struct GEN75_SAMPLER_STATE { #define RATIO141 6 #define RATIO161 7 uint32_t MaximumAnisotropy; - uint32_t RAddressMinFilterRoundingEnable; - uint32_t RAddressMagFilterRoundingEnable; - uint32_t VAddressMinFilterRoundingEnable; - uint32_t VAddressMagFilterRoundingEnable; - uint32_t UAddressMinFilterRoundingEnable; - uint32_t UAddressMagFilterRoundingEnable; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; #define FULL 0 #define TRIQUAL_HIGHMAG_CLAMP_MIPFILTER 1 #define MED 2 #define LOW 3 uint32_t TrilinearFilterQuality; - uint32_t NonnormalizedCoordinateEnable; + bool NonnormalizedCoordinateEnable; uint32_t TCXAddressControlMode; uint32_t TCYAddressControlMode; uint32_t TCZAddressControlMode; @@ -7846,7 +7846,7 @@ GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, #define _3DPRIM_QUADLIST 7 #define _3DPRIM_QUADSTRIP 8 #define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_LINESTRIP_ADJ 10 #define _3DPRIM_TRILIST_ADJ 11 #define _3DPRIM_TRISTRIP_ADJ 12 #define _3DPRIM_TRISTRIP_REVERSE 13 diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index db3ddb22d2e..f9121b78868 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -144,7 +144,7 @@ GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_STORE_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t DwordLength; uint32_t RegisterAddress; __gen_address_type MemoryAddress; @@ -249,27 +249,27 @@ struct GEN7_STATE_BASE_ADDRESS { struct GEN7_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; struct GEN7_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; uint32_t StatelessDataPortAccessForceWriteThru; - uint32_t GeneralStateBaseAddressModifyEnable; + bool GeneralStateBaseAddressModifyEnable; __gen_address_type SurfaceStateBaseAddress; struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - uint32_t SurfaceStateBaseAddressModifyEnable; + bool SurfaceStateBaseAddressModifyEnable; __gen_address_type DynamicStateBaseAddress; struct GEN7_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - uint32_t DynamicStateBaseAddressModifyEnable; + bool DynamicStateBaseAddressModifyEnable; __gen_address_type IndirectObjectBaseAddress; struct GEN7_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - uint32_t IndirectObjectBaseAddressModifyEnable; + bool IndirectObjectBaseAddressModifyEnable; __gen_address_type InstructionBaseAddress; struct GEN7_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - uint32_t InstructionBaseAddressModifyEnable; + bool InstructionBaseAddressModifyEnable; __gen_address_type GeneralStateAccessUpperBound; - uint32_t GeneralStateAccessUpperBoundModifyEnable; + bool GeneralStateAccessUpperBoundModifyEnable; __gen_address_type DynamicStateAccessUpperBound; - uint32_t DynamicStateAccessUpperBoundModifyEnable; + bool DynamicStateAccessUpperBoundModifyEnable; __gen_address_type IndirectObjectAccessUpperBound; - uint32_t IndirectObjectAccessUpperBoundModifyEnable; + bool IndirectObjectAccessUpperBoundModifyEnable; __gen_address_type InstructionAccessUpperBound; - uint32_t InstructionAccessUpperBoundModifyEnable; + bool InstructionAccessUpperBoundModifyEnable; }; static inline void @@ -508,10 +508,10 @@ struct GEN7_3DPRIMITIVE { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t IndirectParameterEnable; - uint32_t PredicateEnable; + bool IndirectParameterEnable; + bool PredicateEnable; uint32_t DwordLength; - uint32_t EndOffsetEnable; + bool EndOffsetEnable; #define SEQUENTIAL 0 #define RANDOM 1 uint32_t VertexAccessType; @@ -946,7 +946,7 @@ struct GEN7_3DSTATE_CLEAR_PARAMS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t DepthClearValue; - uint32_t DepthClearValueValid; + bool DepthClearValueValid; }; static inline void @@ -990,27 +990,27 @@ struct GEN7_3DSTATE_CLIP { uint32_t DwordLength; uint32_t FrontWinding; uint32_t VertexSubPixelPrecisionSelect; - uint32_t EarlyCullEnable; + bool EarlyCullEnable; #define CULLMODE_BOTH 0 #define CULLMODE_NONE 1 #define CULLMODE_FRONT 2 #define CULLMODE_BACK 3 uint32_t CullMode; - uint32_t ClipperStatisticsEnable; + bool ClipperStatisticsEnable; uint32_t UserClipDistanceCullTestEnableBitmask; - uint32_t ClipEnable; + bool ClipEnable; #define APIMODE_OGL 0 uint32_t APIMode; - uint32_t ViewportXYClipTestEnable; - uint32_t ViewportZClipTestEnable; - uint32_t GuardbandClipTestEnable; + bool ViewportXYClipTestEnable; + bool ViewportZClipTestEnable; + bool GuardbandClipTestEnable; uint32_t UserClipDistanceClipTestEnableBitmask; #define CLIPMODE_NORMAL 0 #define CLIPMODE_REJECT_ALL 3 #define CLIPMODE_ACCEPT_ALL 4 uint32_t ClipMode; - uint32_t PerspectiveDivideDisable; - uint32_t NonPerspectiveBarycentricEnable; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; #define Vertex0 0 #define Vertex1 1 #define Vertex2 2 @@ -1024,7 +1024,7 @@ struct GEN7_3DSTATE_CLIP { uint32_t TriangleFanProvokingVertexSelect; float MinimumPointWidth; float MaximumPointWidth; - uint32_t ForceZeroRTAIndexEnable; + bool ForceZeroRTAIndexEnable; uint32_t MaximumVPIndex; }; @@ -1328,9 +1328,9 @@ struct GEN7_3DSTATE_DEPTH_BUFFER { #define SURFTYPE_CUBE 3 #define SURFTYPE_NULL 7 uint32_t SurfaceType; - uint32_t DepthWriteEnable; - uint32_t StencilWriteEnable; - uint32_t HierarchicalDepthBufferEnable; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; #define D32_FLOAT 1 #define D24_UNORM_X8_UINT 3 #define D16_UNORM 5 @@ -1528,18 +1528,18 @@ struct GEN7_3DSTATE_DS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; uint32_t PatchURBEntryReadOffset; uint32_t MaximumNumberofThreads; - uint32_t StatisticsEnable; - uint32_t ComputeWCoordinateEnable; - uint32_t DSCacheDisable; - uint32_t DSFunctionEnable; + bool StatisticsEnable; + bool ComputeWCoordinateEnable; + bool DSCacheDisable; + bool DSFunctionEnable; }; static inline void @@ -1624,15 +1624,15 @@ struct GEN7_3DSTATE_GS { #define IEEE754 0 #define alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; uint32_t VertexURBEntryReadLength; - uint32_t IncludeVertexHandles; + bool IncludeVertexHandles; uint32_t VertexURBEntryReadOffset; uint32_t DispatchGRFStartRegisterforURBData; uint32_t MaximumNumberofThreads; @@ -1648,11 +1648,11 @@ struct GEN7_3DSTATE_GS { uint32_t DispatchMode; uint32_t GSStatisticsEnable; uint32_t GSInvocationsIncrementValue; - uint32_t IncludePrimitiveID; + bool IncludePrimitiveID; uint32_t Hint; - uint32_t ReorderEnable; - uint32_t DiscardAdjacency; - uint32_t GSEnable; + bool ReorderEnable; + bool DiscardAdjacency; + bool GSEnable; uint32_t SemaphoreHandle; }; @@ -1796,11 +1796,11 @@ struct GEN7_3DSTATE_HS { #define IEEE754 0 #define alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; uint32_t MaximumNumberofThreads; - uint32_t Enable; - uint32_t StatisticsEnable; + bool Enable; + bool StatisticsEnable; uint32_t InstanceCount; uint32_t KernelStartPointer; uint32_t ScratchSpaceBasePointer; @@ -1809,7 +1809,7 @@ struct GEN7_3DSTATE_HS { #define Dmask 0 #define Vmask 1 uint32_t VectorMaskEnable; - uint32_t IncludeVertexHandles; + bool IncludeVertexHandles; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; @@ -1884,7 +1884,7 @@ struct GEN7_3DSTATE_INDEX_BUFFER { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; struct GEN7_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - uint32_t CutIndexEnable; + bool CutIndexEnable; #define INDEX_BYTE 0 #define INDEX_WORD 1 #define INDEX_DWORD 2 @@ -1942,7 +1942,7 @@ struct GEN7_3DSTATE_LINE_STIPPLE { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; uint32_t CurrentRepeatCounter; uint32_t CurrentStippleIndex; uint32_t LineStipplePattern; @@ -2214,25 +2214,25 @@ struct GEN7_3DSTATE_PS { #define RD 2 #define RTZ 3 uint32_t RoundingMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreads; - uint32_t PushConstantEnable; - uint32_t AttributeEnable; - uint32_t oMaskPresenttoRenderTarget; - uint32_t RenderTargetFastClearEnable; - uint32_t DualSourceBlendEnable; - uint32_t RenderTargetResolveEnable; + bool PushConstantEnable; + bool AttributeEnable; + bool oMaskPresenttoRenderTarget; + bool RenderTargetFastClearEnable; + bool DualSourceBlendEnable; + bool RenderTargetResolveEnable; #define POSOFFSET_NONE 0 #define POSOFFSET_CENTROID 2 #define POSOFFSET_SAMPLE 3 uint32_t PositionXYOffsetSelect; - uint32_t _32PixelDispatchEnable; - uint32_t _16PixelDispatchEnable; - uint32_t _8PixelDispatchEnable; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; uint32_t DispatchGRFStartRegisterforConstantSetupData0; uint32_t DispatchGRFStartRegisterforConstantSetupData1; uint32_t DispatchGRFStartRegisterforConstantSetupData2; @@ -2851,16 +2851,16 @@ struct GEN7_3DSTATE_SBE { #define SWIZ_16_31 1 uint32_t AttributeSwizzleControlMode; uint32_t NumberofSFOutputAttributes; - uint32_t AttributeSwizzleEnable; + bool AttributeSwizzleEnable; #define UPPERLEFT 0 #define LOWERLEFT 1 uint32_t PointSpriteTextureCoordinateOrigin; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; - uint32_t Attribute2n1ComponentOverrideW; - uint32_t Attribute2n1ComponentOverrideZ; - uint32_t Attribute2n1ComponentOverrideY; - uint32_t Attribute2n1ComponentOverrideX; + bool Attribute2n1ComponentOverrideW; + bool Attribute2n1ComponentOverrideZ; + bool Attribute2n1ComponentOverrideY; + bool Attribute2n1ComponentOverrideX; #define CONST_0000 0 #define CONST_0001_FLOAT 1 #define CONST_1111_FLOAT 2 @@ -2872,10 +2872,10 @@ struct GEN7_3DSTATE_SBE { #define INPUTATTR_FACING_W 3 uint32_t Attribute2n1SwizzleSelect; uint32_t Attribute2n1SourceAttribute; - uint32_t Attribute2nComponentOverrideW; - uint32_t Attribute2nComponentOverrideZ; - uint32_t Attribute2nComponentOverrideY; - uint32_t Attribute2nComponentOverrideX; + bool Attribute2nComponentOverrideW; + bool Attribute2nComponentOverrideZ; + bool Attribute2nComponentOverrideY; + bool Attribute2nComponentOverrideX; #define CONST_0000 0 #define CONST_0001_FLOAT 1 #define CONST_1111_FLOAT 2 @@ -3038,11 +3038,11 @@ struct GEN7_3DSTATE_SF { #define D24_UNORM_X8_UINT 3 #define D16_UNORM 5 uint32_t DepthBufferSurfaceFormat; - uint32_t LegacyGlobalDepthBiasEnable; - uint32_t StatisticsEnable; - uint32_t GlobalDepthOffsetEnableSolid; - uint32_t GlobalDepthOffsetEnableWireframe; - uint32_t GlobalDepthOffsetEnablePoint; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; #define RASTER_SOLID 0 #define RASTER_WIREFRAME 1 #define RASTER_POINT 2 @@ -3051,9 +3051,9 @@ struct GEN7_3DSTATE_SF { #define RASTER_WIREFRAME 1 #define RASTER_POINT 2 uint32_t BackFaceFillMode; - uint32_t ViewTransformEnable; + bool ViewTransformEnable; uint32_t FrontWinding; - uint32_t AntiAliasingEnable; + bool AntiAliasingEnable; #define CULLMODE_BOTH 0 #define CULLMODE_NONE 1 #define CULLMODE_FRONT 2 @@ -3061,9 +3061,9 @@ struct GEN7_3DSTATE_SF { uint32_t CullMode; float LineWidth; uint32_t LineEndCapAntialiasingRegionWidth; - uint32_t ScissorRectangleEnable; + bool ScissorRectangleEnable; uint32_t MultisampleRasterizationMode; - uint32_t LastPixelEnable; + bool LastPixelEnable; #define Vertex0 0 #define Vertex1 1 #define Vertex2 2 @@ -3382,7 +3382,7 @@ struct GEN7_3DSTATE_STREAMOUT { #define LEADING 0 #define TRAILING 1 uint32_t ReorderMode; - uint32_t SOStatisticsEnable; + bool SOStatisticsEnable; uint32_t SOBufferEnable3; uint32_t SOBufferEnable2; uint32_t SOBufferEnable1; @@ -3456,9 +3456,9 @@ struct GEN7_3DSTATE_TE { #define EVEN_FRACTIONAL 2 uint32_t Partitioning; #define POINT 0 -#define LINE 1 -#define TRI_CW 2 -#define TRI_CCW 3 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 uint32_t OutputTopology; #define QUAD 0 #define TRI 1 @@ -3467,7 +3467,7 @@ struct GEN7_3DSTATE_TE { #define HW_TESS 0 #define SW_TESS 1 uint32_t TEMode; - uint32_t TEEnable; + bool TEEnable; float MaximumTessellationFactorOdd; float MaximumTessellationFactorNotOdd; }; @@ -3644,7 +3644,7 @@ struct GEN7_VERTEX_BUFFER_STATE { uint32_t BufferAccessType; struct GEN7_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; uint32_t AddressModifyEnable; - uint32_t NullVertexBuffer; + bool NullVertexBuffer; uint32_t VertexFetchInvalidate; uint32_t BufferPitch; __gen_address_type BufferStartingAddress; @@ -3723,9 +3723,9 @@ GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, struct GEN7_VERTEX_ELEMENT_STATE { uint32_t VertexBufferIndex; - uint32_t Valid; + bool Valid; uint32_t SourceElementFormat; - uint32_t EdgeFlagEnable; + bool EdgeFlagEnable; uint32_t SourceElementOffset; uint32_t Component0Control; uint32_t Component1Control; @@ -3795,7 +3795,7 @@ struct GEN7_3DSTATE_VF_STATISTICS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t StatisticsEnable; + bool StatisticsEnable; }; static inline void @@ -3922,17 +3922,17 @@ struct GEN7_3DSTATE_VS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ScratchSpaceBaseOffset; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterforURBData; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; uint32_t MaximumNumberofThreads; - uint32_t StatisticsEnable; - uint32_t VertexCacheDisable; - uint32_t VSFunctionEnable; + bool StatisticsEnable; + bool VertexCacheDisable; + bool VSFunctionEnable; }; static inline void @@ -3998,13 +3998,13 @@ struct GEN7_3DSTATE_WM { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t StatisticsEnable; - uint32_t DepthBufferClear; - uint32_t ThreadDispatchEnable; - uint32_t DepthBufferResolveEnable; - uint32_t HierarchicalDepthBufferResolveEnable; - uint32_t LegacyDiamondLineRasterization; - uint32_t PixelShaderKillPixel; + bool StatisticsEnable; + bool DepthBufferClear; + bool ThreadDispatchEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; + bool PixelShaderKillPixel; #define PSCDEPTH_OFF 0 #define PSCDEPTH_ON 1 #define PSCDEPTH_ON_GE 2 @@ -4014,18 +4014,18 @@ struct GEN7_3DSTATE_WM { #define EDSC_PSEXEC 1 #define EDSC_PREPS 2 uint32_t EarlyDepthStencilControl; - uint32_t PixelShaderUsesSourceDepth; - uint32_t PixelShaderUsesSourceW; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; #define INTERP_PIXEL 0 #define INTERP_CENTROID 2 #define INTERP_SAMPLE 3 uint32_t PositionZWInterpolationMode; uint32_t BarycentricInterpolationMode; - uint32_t PixelShaderUsesInputCoverageMask; + bool PixelShaderUsesInputCoverageMask; uint32_t LineEndCapAntialiasingRegionWidth; uint32_t LineAntialiasingRegionWidth; - uint32_t PolygonStippleEnable; - uint32_t LineStippleEnable; + bool PolygonStippleEnable; + bool LineStippleEnable; #define RASTRULE_UPPER_LEFT 0 #define RASTRULE_UPPER_RIGHT 1 uint32_t PointRasterizationRule; @@ -4096,7 +4096,7 @@ struct GEN7_GPGPU_OBJECT { uint32_t Pipeline; uint32_t MediaCommandOpcode; uint32_t SubOpcode; - uint32_t PredicateEnable; + bool PredicateEnable; uint32_t DwordLength; uint32_t SharedLocalMemoryFixedOffset; uint32_t InterfaceDescriptorOffset; @@ -4177,8 +4177,8 @@ struct GEN7_GPGPU_WALKER { uint32_t Pipeline; uint32_t MediaCommandOpcode; uint32_t SubOpcodeA; - uint32_t IndirectParameterEnable; - uint32_t PredicateEnable; + bool IndirectParameterEnable; + bool PredicateEnable; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; #define SIMD8 0 @@ -4365,7 +4365,7 @@ struct GEN7_MEDIA_OBJECT { uint32_t MediaCommandSubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; + bool ChildrenPresent; #define Nothreadsynchronization 0 #define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 uint32_t ThreadSynchronization; @@ -4381,7 +4381,7 @@ struct GEN7_MEDIA_OBJECT { uint32_t ScoredboardY; uint32_t ScoreboardX; uint32_t ScoreboardColor; - uint32_t ScoreboardMask; + bool ScoreboardMask; /* variable length fields follow */ }; @@ -4446,8 +4446,8 @@ struct GEN7_MEDIA_OBJECT_PRT { uint32_t SubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; - uint32_t PRT_FenceNeeded; + bool ChildrenPresent; + bool PRT_FenceNeeded; #define Rootthreadqueue 0 #define VFEstateflush 1 uint32_t PRT_FenceType; @@ -4501,7 +4501,7 @@ struct GEN7_MEDIA_OBJECT_WALKER { uint32_t SubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; + bool ChildrenPresent; #define Nothreadsynchronization 0 #define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 uint32_t ThreadSynchronization; @@ -4510,9 +4510,9 @@ struct GEN7_MEDIA_OBJECT_WALKER { uint32_t UseScoreboard; uint32_t IndirectDataLength; uint32_t IndirectDataStartAddress; - uint32_t ScoreboardMask; - uint32_t DualMode; - uint32_t Repel; + bool ScoreboardMask; + bool DualMode; + bool Repel; uint32_t ColorCountMinusOne; uint32_t MiddleLoopExtraSteps; uint32_t LocalMidLoopUnitY; @@ -4833,7 +4833,7 @@ GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_ARB_ON_OFF { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t ArbitrationEnable; + bool ArbitrationEnable; }; static inline void @@ -4884,7 +4884,7 @@ GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_BATCH_BUFFER_START { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t ClearCommandBufferEnable; + bool ClearCommandBufferEnable; #define ASI_GGTT 0 #define ASI_PPGTT 1 uint32_t AddressSpaceIndicator; @@ -5015,17 +5015,17 @@ GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict struct GEN7_MI_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t IndirectStatePointersDisable; - uint32_t GenericMediaStateClear; + bool IndirectStatePointersDisable; + bool GenericMediaStateClear; #define DontReset 0 #define Reset 1 - uint32_t GlobalSnapshotCountReset; + bool GlobalSnapshotCountReset; #define Flush 0 #define DontFlush 1 - uint32_t RenderCacheFlushInhibit; + bool RenderCacheFlushInhibit; #define DontInvalidate 0 #define Invalidate 1 - uint32_t StateInstructionCacheInvalidate; + bool StateInstructionCacheInvalidate; }; static inline void @@ -5095,7 +5095,7 @@ GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_LOAD_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t AsyncModeEnable; uint32_t DwordLength; uint32_t RegisterAddress; @@ -5137,7 +5137,7 @@ GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_NOOP { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t IdentificationNumberRegisterWriteEnable; + bool IdentificationNumberRegisterWriteEnable; uint32_t IdentificationNumber; }; @@ -5165,9 +5165,9 @@ GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; -#define KEEP 0 -#define LOAD 2 -#define LOADINV 3 +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 uint32_t LoadOperation; #define COMBINE_SET 0 #define COMBINE_AND 1 @@ -5272,8 +5272,8 @@ struct GEN7_MI_SET_CONTEXT { uint32_t DwordLength; __gen_address_type LogicalContextAddress; uint32_t ReservedMustbe1; - uint32_t ExtendedStateSaveEnable; - uint32_t ExtendedStateRestoreEnable; + bool ExtendedStateSaveEnable; + bool ExtendedStateRestoreEnable; uint32_t ForceRestore; uint32_t RestoreInhibit; }; @@ -5313,7 +5313,7 @@ GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t DwordLength; uint32_t Address; uint32_t CoreModeEnable; @@ -5403,7 +5403,7 @@ GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_SUSPEND_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t SuspendFlush; + bool SuspendFlush; }; static inline void @@ -5551,23 +5551,23 @@ GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, struct GEN7_MI_WAIT_FOR_EVENT { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t DisplayPipeCHorizontalBlankWaitEnable; - uint32_t DisplayPipeCVerticalBlankWaitEnable; - uint32_t DisplaySpriteCFlipPendingWaitEnable; + bool DisplayPipeCHorizontalBlankWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; #define Notenabled 0 uint32_t ConditionCodeWaitSelect; - uint32_t DisplayPlaneCFlipPendingWaitEnable; - uint32_t DisplayPipeCScanLineWaitEnable; - uint32_t DisplayPipeBHorizontalBlankWaitEnable; - uint32_t DisplayPipeBVerticalBlankWaitEnable; - uint32_t DisplaySpriteBFlipPendingWaitEnable; - uint32_t DisplayPlaneBFlipPendingWaitEnable; - uint32_t DisplayPipeBScanLineWaitEnable; - uint32_t DisplayPipeAHorizontalBlankWaitEnable; - uint32_t DisplayPipeAVerticalBlankWaitEnable; - uint32_t DisplaySpriteAFlipPendingWaitEnable; - uint32_t DisplayPlaneAFlipPendingWaitEnable; - uint32_t DisplayPipeAScanLineWaitEnable; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBHorizontalBlankWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAHorizontalBlankWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; }; static inline void @@ -5626,29 +5626,29 @@ struct GEN7_PIPE_CONTROL { #define Reset 1 uint32_t GlobalSnapshotCountReset; uint32_t TLBInvalidate; - uint32_t GenericMediaStateClear; + bool GenericMediaStateClear; #define NoWrite 0 #define WriteImmediateData 1 #define WritePSDepthCount 2 #define WriteTimestamp 3 uint32_t PostSyncOperation; - uint32_t DepthStallEnable; + bool DepthStallEnable; #define DisableFlush 0 #define EnableFlush 1 - uint32_t RenderTargetCacheFlushEnable; - uint32_t InstructionCacheInvalidateEnable; - uint32_t TextureCacheInvalidationEnable; - uint32_t IndirectStatePointersDisable; - uint32_t NotifyEnable; - uint32_t PipeControlFlushEnable; - uint32_t DCFlushEnable; - uint32_t VFCacheInvalidationEnable; - uint32_t ConstantCacheInvalidationEnable; - uint32_t StateCacheInvalidationEnable; - uint32_t StallAtPixelScoreboard; + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; #define FlushDisabled 0 #define FlushEnabled 1 - uint32_t DepthCacheFlushEnable; + bool DepthCacheFlushEnable; __gen_address_type Address; uint32_t ImmediateData; uint32_t ImmediateData0; @@ -5820,8 +5820,8 @@ GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, #define GEN7_BLEND_STATE_length 0x00000002 struct GEN7_BLEND_STATE { - uint32_t ColorBufferBlendEnable; - uint32_t IndependentAlphaBlendEnable; + bool ColorBufferBlendEnable; + bool IndependentAlphaBlendEnable; #define BLENDFUNCTION_ADD 0 #define BLENDFUNCTION_SUBTRACT 1 #define BLENDFUNCTION_REVERSE_SUBTRACT 2 @@ -5857,14 +5857,14 @@ struct GEN7_BLEND_STATE { uint32_t ColorBlendFunction; uint32_t SourceBlendFactor; uint32_t DestinationBlendFactor; - uint32_t AlphaToCoverageEnable; - uint32_t AlphaToOneEnable; - uint32_t AlphaToCoverageDitherEnable; - uint32_t WriteDisableAlpha; - uint32_t WriteDisableRed; - uint32_t WriteDisableGreen; - uint32_t WriteDisableBlue; - uint32_t LogicOpEnable; + bool AlphaToCoverageEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; + bool LogicOpEnable; #define LOGICOP_CLEAR 0 #define LOGICOP_NOR 1 #define LOGICOP_AND_INVERTED 2 @@ -5882,7 +5882,7 @@ struct GEN7_BLEND_STATE { #define LOGICOP_OR 14 #define LOGICOP_SET 15 uint32_t LogicOpFunction; - uint32_t AlphaTestEnable; + bool AlphaTestEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -5892,15 +5892,15 @@ struct GEN7_BLEND_STATE { #define COMPAREFUNCTION_NOTEQUAL 6 #define COMPAREFUNCTION_GEQUAL 7 uint32_t AlphaTestFunction; - uint32_t ColorDitherEnable; + bool ColorDitherEnable; uint32_t XDitherOffset; uint32_t YDitherOffset; #define COLORCLAMP_UNORM 0 #define COLORCLAMP_SNORM 1 #define COLORCLAMP_RTFORMAT 2 uint32_t ColorClampRange; - uint32_t PreBlendColorClampEnable; - uint32_t PostBlendColorClampEnable; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; }; static inline void @@ -6023,7 +6023,7 @@ GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN7_DEPTH_STENCIL_STATE_length 0x00000003 struct GEN7_DEPTH_STENCIL_STATE { - uint32_t StencilTestEnable; + bool StencilTestEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -6044,8 +6044,8 @@ struct GEN7_DEPTH_STENCIL_STATE { uint32_t StencilFailOp; uint32_t StencilPassDepthFailOp; uint32_t StencilPassDepthPassOp; - uint32_t StencilBufferWriteEnable; - uint32_t DoubleSidedStencilEnable; + bool StencilBufferWriteEnable; + bool DoubleSidedStencilEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -6070,7 +6070,7 @@ struct GEN7_DEPTH_STENCIL_STATE { uint32_t StencilWriteMask; uint32_t BackfaceStencilTestMask; uint32_t BackfaceStencilWriteMask; - uint32_t DepthTestEnable; + bool DepthTestEnable; #define COMPAREFUNCTION_ALWAYS 0 #define COMPAREFUNCTION_NEVER 1 #define COMPAREFUNCTION_LESS 2 @@ -6080,7 +6080,7 @@ struct GEN7_DEPTH_STENCIL_STATE { #define COMPAREFUNCTION_NOTEQUAL 6 #define COMPAREFUNCTION_GEQUAL 7 uint32_t DepthTestFunction; - uint32_t DepthBufferWriteEnable; + bool DepthBufferWriteEnable; }; static inline void @@ -6133,9 +6133,9 @@ struct GEN7_INTERFACE_DESCRIPTOR_DATA { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t SamplerStatePointer; #define Nosamplersused 0 #define Between1and4samplersused 1 @@ -6152,7 +6152,7 @@ struct GEN7_INTERFACE_DESCRIPTOR_DATA { #define RD 2 #define RTZ 3 uint32_t RoundingMode; - uint32_t BarrierEnable; + bool BarrierEnable; uint32_t SharedLocalMemorySize; uint32_t NumberofThreadsinGPGPUThreadGroup; }; @@ -6425,7 +6425,7 @@ GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN7_SAMPLER_STATE_length 0x00000004 struct GEN7_SAMPLER_STATE { - uint32_t SamplerDisable; + bool SamplerDisable; #define DX10OGL 0 #define DX9 1 uint32_t TextureBorderColorMode; @@ -6465,7 +6465,7 @@ struct GEN7_SAMPLER_STATE { #define OVERRIDE 1 uint32_t CubeSurfaceControlMode; uint32_t BorderColorPointer; - uint32_t ChromaKeyEnable; + bool ChromaKeyEnable; uint32_t ChromaKeyIndex; #define KEYFILTER_KILL_ON_ANY_MATCH 0 #define KEYFILTER_REPLACE_BLACK 1 @@ -6479,17 +6479,17 @@ struct GEN7_SAMPLER_STATE { #define RATIO141 6 #define RATIO161 7 uint32_t MaximumAnisotropy; - uint32_t RAddressMinFilterRoundingEnable; - uint32_t RAddressMagFilterRoundingEnable; - uint32_t VAddressMinFilterRoundingEnable; - uint32_t VAddressMagFilterRoundingEnable; - uint32_t UAddressMinFilterRoundingEnable; - uint32_t UAddressMagFilterRoundingEnable; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; #define FULL 0 #define MED 2 #define LOW 3 uint32_t TrilinearFilterQuality; - uint32_t NonnormalizedCoordinateEnable; + bool NonnormalizedCoordinateEnable; uint32_t TCXAddressControlMode; uint32_t TCYAddressControlMode; uint32_t TCZAddressControlMode; @@ -6554,7 +6554,7 @@ GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, #define _3DPRIM_QUADLIST 7 #define _3DPRIM_QUADSTRIP 8 #define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_LINESTRIP_ADJ 10 #define _3DPRIM_TRILIST_ADJ 11 #define _3DPRIM_TRISTRIP_ADJ 12 #define _3DPRIM_TRISTRIP_REVERSE 13 diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index bf6392ded4f..dd7f1b55a50 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -169,19 +169,19 @@ struct GEN8_3DSTATE_VS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t AccessesUAV; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool SoftwareExceptionEnable; uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; uint32_t MaximumNumberofThreads; - uint32_t StatisticsEnable; - uint32_t SIMD8DispatchEnable; - uint32_t VertexCacheDisable; - uint32_t FunctionEnable; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool VertexCacheDisable; + bool FunctionEnable; uint32_t VertexURBEntryOutputReadOffset; uint32_t VertexURBEntryOutputLength; uint32_t UserClipDistanceClipTestEnableBitmask; @@ -539,7 +539,7 @@ GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_STORE_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t PredicateEnable; uint32_t DwordLength; uint32_t RegisterAddress; @@ -655,28 +655,28 @@ struct GEN8_STATE_BASE_ADDRESS { uint32_t DwordLength; __gen_address_type GeneralStateBaseAddress; struct GEN8_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; - uint32_t GeneralStateBaseAddressModifyEnable; + bool GeneralStateBaseAddressModifyEnable; struct GEN8_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; __gen_address_type SurfaceStateBaseAddress; struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - uint32_t SurfaceStateBaseAddressModifyEnable; + bool SurfaceStateBaseAddressModifyEnable; __gen_address_type DynamicStateBaseAddress; struct GEN8_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - uint32_t DynamicStateBaseAddressModifyEnable; + bool DynamicStateBaseAddressModifyEnable; __gen_address_type IndirectObjectBaseAddress; struct GEN8_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - uint32_t IndirectObjectBaseAddressModifyEnable; + bool IndirectObjectBaseAddressModifyEnable; __gen_address_type InstructionBaseAddress; struct GEN8_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - uint32_t InstructionBaseAddressModifyEnable; + bool InstructionBaseAddressModifyEnable; uint32_t GeneralStateBufferSize; - uint32_t GeneralStateBufferSizeModifyEnable; + bool GeneralStateBufferSizeModifyEnable; uint32_t DynamicStateBufferSize; - uint32_t DynamicStateBufferSizeModifyEnable; + bool DynamicStateBufferSizeModifyEnable; uint32_t IndirectObjectBufferSize; - uint32_t IndirectObjectBufferSizeModifyEnable; + bool IndirectObjectBufferSizeModifyEnable; uint32_t InstructionBufferSize; - uint32_t InstructionBuffersizeModifyEnable; + bool InstructionBuffersizeModifyEnable; }; static inline void @@ -934,11 +934,11 @@ struct GEN8_3DPRIMITIVE { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t IndirectParameterEnable; + bool IndirectParameterEnable; uint32_t UAVCoherencyRequired; - uint32_t PredicateEnable; + bool PredicateEnable; uint32_t DwordLength; - uint32_t EndOffsetEnable; + bool EndOffsetEnable; #define SEQUENTIAL 0 #define RANDOM 1 uint32_t VertexAccessType; @@ -1544,7 +1544,7 @@ struct GEN8_3DSTATE_BLEND_STATE_POINTERS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t BlendStatePointer; - uint32_t BlendStatePointerValid; + bool BlendStatePointerValid; }; static inline void @@ -1584,7 +1584,7 @@ struct GEN8_3DSTATE_CC_STATE_POINTERS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; uint32_t ColorCalcStatePointer; - uint32_t ColorCalcStatePointerValid; + bool ColorCalcStatePointerValid; }; static inline void @@ -1672,7 +1672,7 @@ struct GEN8_3DSTATE_CLEAR_PARAMS { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; float DepthClearValue; - uint32_t DepthClearValueValid; + bool DepthClearValueValid; }; static inline void @@ -1716,37 +1716,37 @@ struct GEN8_3DSTATE_CLIP { uint32_t DwordLength; #define Normal 0 #define Force 1 - uint32_t ForceUserClipDistanceCullTestEnableBitmask; + bool ForceUserClipDistanceCullTestEnableBitmask; #define _8Bit 0 #define _4Bit 1 uint32_t VertexSubPixelPrecisionSelect; - uint32_t EarlyCullEnable; + bool EarlyCullEnable; #define Normal 0 #define Force 1 - uint32_t ForceUserClipDistanceClipTestEnableBitmask; + bool ForceUserClipDistanceClipTestEnableBitmask; #define Normal 0 #define Force 1 - uint32_t ForceClipMode; - uint32_t ClipperStatisticsEnable; + bool ForceClipMode; + bool ClipperStatisticsEnable; uint32_t UserClipDistanceCullTestEnableBitmask; - uint32_t ClipEnable; + bool ClipEnable; #define API_OGL 0 uint32_t APIMode; - uint32_t ViewportXYClipTestEnable; - uint32_t GuardbandClipTestEnable; + bool ViewportXYClipTestEnable; + bool GuardbandClipTestEnable; uint32_t UserClipDistanceClipTestEnableBitmask; #define NORMAL 0 #define REJECT_ALL 3 #define ACCEPT_ALL 4 uint32_t ClipMode; - uint32_t PerspectiveDivideDisable; - uint32_t NonPerspectiveBarycentricEnable; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; uint32_t TriangleStripListProvokingVertexSelect; uint32_t LineStripListProvokingVertexSelect; uint32_t TriangleFanProvokingVertexSelect; float MinimumPointWidth; float MaximumPointWidth; - uint32_t ForceZeroRTAIndexEnable; + bool ForceZeroRTAIndexEnable; uint32_t MaximumVPIndex; }; @@ -2078,9 +2078,9 @@ struct GEN8_3DSTATE_DEPTH_BUFFER { #define SURFTYPE_CUBE 3 #define SURFTYPE_NULL 7 uint32_t SurfaceType; - uint32_t DepthWriteEnable; - uint32_t StencilWriteEnable; - uint32_t HierarchicalDepthBufferEnable; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; #define D32_FLOAT 1 #define D24_UNORM_X8_UINT 3 #define D16_UNORM 5 @@ -2247,20 +2247,20 @@ struct GEN8_3DSTATE_DS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t AccessesUAV; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t DispatchGRFStartRegisterForURBData; uint32_t PatchURBEntryReadLength; uint32_t PatchURBEntryReadOffset; uint32_t MaximumNumberofThreads; - uint32_t StatisticsEnable; - uint32_t SIMD8DispatchEnable; - uint32_t ComputeWCoordinateEnable; - uint32_t CacheDisable; - uint32_t FunctionEnable; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool ComputeWCoordinateEnable; + bool CacheDisable; + bool FunctionEnable; uint32_t VertexURBEntryOutputReadOffset; uint32_t VertexURBEntryOutputLength; uint32_t UserClipDistanceClipTestEnableBitmask; @@ -2368,7 +2368,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_DS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9GenerateStall; /* variable length fields follow */ }; @@ -2415,7 +2415,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_GS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9GenerateStall; /* variable length fields follow */ }; @@ -2462,7 +2462,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_HS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9GenerateStall; /* variable length fields follow */ }; @@ -2509,8 +2509,8 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_PS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9GenerateStall; - uint32_t ConstantBufferDx9Enable; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; /* variable length fields follow */ }; @@ -2558,8 +2558,8 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_VS { uint32_t ConstantBufferValid; uint32_t ConstantBufferBindingTableBlock; uint32_t GatherBufferOffset; - uint32_t ConstantBufferDx9GenerateStall; - uint32_t ConstantBufferDx9Enable; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; /* variable length fields follow */ }; @@ -2607,7 +2607,7 @@ struct GEN8_3DSTATE_GATHER_POOL_ALLOC { uint32_t _3DCommandSubOpcode; uint32_t DwordLength; __gen_address_type GatherPoolBaseAddress; - uint32_t GatherPoolEnable; + bool GatherPoolEnable; struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; uint32_t GatherPoolBufferSize; }; @@ -2678,17 +2678,17 @@ struct GEN8_3DSTATE_GS { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t AccessesUAV; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t ExpectedVertexCount; uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t OutputVertexSize; uint32_t OutputTopology; uint32_t VertexURBEntryReadLength; - uint32_t IncludeVertexHandles; + bool IncludeVertexHandles; uint32_t VertexURBEntryReadOffset; uint32_t DispatchGRFStartRegisterForURBData; uint32_t MaximumNumberofThreads; @@ -2700,19 +2700,19 @@ struct GEN8_3DSTATE_GS { #define DispatchModeDualObject 2 #define DispatchModeSIMD8 3 uint32_t DispatchMode; - uint32_t StatisticsEnable; + bool StatisticsEnable; uint32_t InvocationsIncrementValue; - uint32_t IncludePrimitiveID; + bool IncludePrimitiveID; uint32_t Hint; #define LEADING 0 #define TRAILING 1 uint32_t ReorderMode; - uint32_t DiscardAdjacency; - uint32_t Enable; + bool DiscardAdjacency; + bool Enable; #define CUT 0 #define SID 1 uint32_t ControlDataFormat; - uint32_t StaticOutput; + bool StaticOutput; uint32_t StaticOutputVertexCount; uint32_t VertexURBEntryOutputReadOffset; uint32_t VertexURBEntryOutputLength; @@ -2887,21 +2887,21 @@ struct GEN8_3DSTATE_HS { #define IEEE754 0 #define alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t SoftwareExceptionEnable; - uint32_t Enable; - uint32_t StatisticsEnable; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + bool Enable; + bool StatisticsEnable; uint32_t MaximumNumberofThreads; uint32_t InstanceCount; uint64_t KernelStartPointer; uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; - uint32_t SingleProgramFlow; + bool SingleProgramFlow; #define Dmask 0 #define Vmask 1 uint32_t VectorMaskEnable; - uint32_t AccessesUAV; - uint32_t IncludeVertexHandles; + bool AccessesUAV; + bool IncludeVertexHandles; uint32_t DispatchGRFStartRegisterForURBData; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; @@ -3042,7 +3042,7 @@ struct GEN8_3DSTATE_LINE_STIPPLE { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; uint32_t CurrentRepeatCounter; uint32_t CurrentStippleIndex; uint32_t LineStipplePattern; @@ -3283,22 +3283,22 @@ struct GEN8_3DSTATE_PS { #define RD 2 #define RTZ 3 uint32_t RoundingMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint64_t ScratchSpaceBasePointer; uint32_t PerThreadScratchSpace; uint32_t MaximumNumberofThreadsPerPSD; - uint32_t PushConstantEnable; - uint32_t RenderTargetFastClearEnable; - uint32_t RenderTargetResolveEnable; + bool PushConstantEnable; + bool RenderTargetFastClearEnable; + bool RenderTargetResolveEnable; #define POSOFFSET_NONE 0 #define POSOFFSET_CENTROID 2 #define POSOFFSET_SAMPLE 3 uint32_t PositionXYOffsetSelect; - uint32_t _32PixelDispatchEnable; - uint32_t _16PixelDispatchEnable; - uint32_t _8PixelDispatchEnable; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; uint32_t DispatchGRFStartRegisterForConstantSetupData0; uint32_t DispatchGRFStartRegisterForConstantSetupData1; uint32_t DispatchGRFStartRegisterForConstantSetupData2; @@ -3397,15 +3397,15 @@ struct GEN8_3DSTATE_PS_BLEND { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t AlphaToCoverageEnable; - uint32_t HasWriteableRT; - uint32_t ColorBufferBlendEnable; + bool AlphaToCoverageEnable; + bool HasWriteableRT; + bool ColorBufferBlendEnable; uint32_t SourceAlphaBlendFactor; uint32_t DestinationAlphaBlendFactor; uint32_t SourceBlendFactor; uint32_t DestinationBlendFactor; - uint32_t AlphaTestEnable; - uint32_t IndependentAlphaBlendEnable; + bool AlphaTestEnable; + bool IndependentAlphaBlendEnable; }; static inline void @@ -3451,24 +3451,24 @@ struct GEN8_3DSTATE_PS_EXTRA { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t PixelShaderValid; - uint32_t PixelShaderDoesnotwritetoRT; - uint32_t oMaskPresenttoRenderTarget; - uint32_t PixelShaderKillsPixel; + bool PixelShaderValid; + bool PixelShaderDoesnotwritetoRT; + bool oMaskPresenttoRenderTarget; + bool PixelShaderKillsPixel; #define PSCDEPTH_OFF 0 #define PSCDEPTH_ON 1 #define PSCDEPTH_ON_GE 2 #define PSCDEPTH_ON_LE 3 uint32_t PixelShaderComputedDepthMode; - uint32_t ForceComputedDepth; - uint32_t PixelShaderUsesSourceDepth; - uint32_t PixelShaderUsesSourceW; + bool ForceComputedDepth; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; uint32_t Removed; - uint32_t AttributeEnable; - uint32_t PixelShaderDisablesAlphaToCoverage; - uint32_t PixelShaderIsPerSample; - uint32_t PixelShaderHasUAV; - uint32_t PixelShaderUsesInputCoverageMask; + bool AttributeEnable; + bool PixelShaderDisablesAlphaToCoverage; + bool PixelShaderIsPerSample; + bool PixelShaderHasUAV; + bool PixelShaderUsesInputCoverageMask; }; static inline void @@ -3741,16 +3741,16 @@ struct GEN8_3DSTATE_RASTER { #define Normal 0 #define Force 1 uint32_t ForceMultisampling; - uint32_t SmoothPointEnable; - uint32_t DXMultisampleRasterizationEnable; + bool SmoothPointEnable; + bool DXMultisampleRasterizationEnable; #define MSRASTMODE_OFF_PIXEL 0 #define MSRASTMODE_OFF_PATTERN 1 #define MSRASTMODE_ON_PIXEL 2 #define MSRASTMODE_ON_PATTERN 3 uint32_t DXMultisampleRasterizationMode; - uint32_t GlobalDepthOffsetEnableSolid; - uint32_t GlobalDepthOffsetEnableWireframe; - uint32_t GlobalDepthOffsetEnablePoint; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; #define RASTER_SOLID 0 #define RASTER_WIREFRAME 1 #define RASTER_POINT 2 @@ -3759,9 +3759,9 @@ struct GEN8_3DSTATE_RASTER { #define RASTER_WIREFRAME 1 #define RASTER_POINT 2 uint32_t BackFaceFillMode; - uint32_t AntialiasingEnable; - uint32_t ScissorRectangleEnable; - uint32_t ViewportZClipTestEnable; + bool AntialiasingEnable; + bool ScissorRectangleEnable; + bool ViewportZClipTestEnable; float GlobalDepthOffsetConstant; float GlobalDepthOffsetScale; float GlobalDepthOffsetClamp; @@ -4253,17 +4253,17 @@ struct GEN8_3DSTATE_SBE { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ForceVertexURBEntryReadLength; - uint32_t ForceVertexURBEntryReadOffset; + bool ForceVertexURBEntryReadLength; + bool ForceVertexURBEntryReadOffset; uint32_t NumberofSFOutputAttributes; - uint32_t AttributeSwizzleEnable; + bool AttributeSwizzleEnable; #define UPPERLEFT 0 #define LOWERLEFT 1 uint32_t PointSpriteTextureCoordinateOrigin; - uint32_t PrimitiveIDOverrideComponentW; - uint32_t PrimitiveIDOverrideComponentZ; - uint32_t PrimitiveIDOverrideComponentY; - uint32_t PrimitiveIDOverrideComponentX; + bool PrimitiveIDOverrideComponentW; + bool PrimitiveIDOverrideComponentZ; + bool PrimitiveIDOverrideComponentY; + bool PrimitiveIDOverrideComponentX; uint32_t VertexURBEntryReadLength; uint32_t VertexURBEntryReadOffset; uint32_t PrimitiveIDOverrideAttributeSelect; @@ -4320,10 +4320,10 @@ GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, .DwordLength = 9 struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { - uint32_t ComponentOverrideW; - uint32_t ComponentOverrideZ; - uint32_t ComponentOverrideY; - uint32_t ComponentOverrideX; + bool ComponentOverrideW; + bool ComponentOverrideZ; + bool ComponentOverrideY; + bool ComponentOverrideX; uint32_t SwizzleControlMode; #define CONST_0000 0 #define CONST_0001_FLOAT 1 @@ -4479,22 +4479,22 @@ struct GEN8_3DSTATE_SF { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t LegacyGlobalDepthBiasEnable; - uint32_t StatisticsEnable; - uint32_t ViewportTransformEnable; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool ViewportTransformEnable; float LineWidth; #define _05pixels 0 #define _10pixels 1 #define _20pixels 2 #define _40pixels 3 uint32_t LineEndCapAntialiasingRegionWidth; - uint32_t LastPixelEnable; + bool LastPixelEnable; uint32_t TriangleStripListProvokingVertexSelect; uint32_t LineStripListProvokingVertexSelect; uint32_t TriangleFanProvokingVertexSelect; #define AALINEDISTANCE_TRUE 1 uint32_t AALineDistanceMode; - uint32_t SmoothPointEnable; + bool SmoothPointEnable; uint32_t VertexSubPixelPrecisionSelect; #define Vertex 0 #define State 1 @@ -4556,11 +4556,11 @@ struct GEN8_3DSTATE_SO_BUFFER { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t SOBufferEnable; + bool SOBufferEnable; uint32_t SOBufferIndex; struct GEN8_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; - uint32_t StreamOffsetWriteEnable; - uint32_t StreamOutputBufferOffsetAddressEnable; + bool StreamOffsetWriteEnable; + bool StreamOutputBufferOffsetAddressEnable; __gen_address_type SurfaceBaseAddress; uint32_t SurfaceSize; __gen_address_type StreamOutputBufferOffsetAddress; @@ -4809,7 +4809,7 @@ struct GEN8_3DSTATE_STREAMOUT { #define LEADING 0 #define TRAILING 1 uint32_t ReorderMode; - uint32_t SOStatisticsEnable; + bool SOStatisticsEnable; #define Normal 0 #define Resreved 1 #define Force_Off 2 @@ -4895,9 +4895,9 @@ struct GEN8_3DSTATE_TE { #define EVEN_FRACTIONAL 2 uint32_t Partitioning; #define POINT 0 -#define LINE 1 -#define TRI_CW 2 -#define TRI_CCW 3 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 uint32_t OutputTopology; #define QUAD 0 #define TRI 1 @@ -4906,7 +4906,7 @@ struct GEN8_3DSTATE_TE { #define HW_TESS 0 #define SW_TESS 1 uint32_t TEMode; - uint32_t TEEnable; + bool TEEnable; float MaximumTessellationFactorOdd; float MaximumTessellationFactorNotOdd; }; @@ -5080,7 +5080,7 @@ struct GEN8_VERTEX_BUFFER_STATE { uint32_t VertexBufferIndex; struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; uint32_t AddressModifyEnable; - uint32_t NullVertexBuffer; + bool NullVertexBuffer; uint32_t BufferPitch; __gen_address_type BufferStartingAddress; uint32_t BufferSize; @@ -5152,9 +5152,9 @@ GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, struct GEN8_VERTEX_ELEMENT_STATE { uint32_t VertexBufferIndex; - uint32_t Valid; + bool Valid; uint32_t SourceElementFormat; - uint32_t EdgeFlagEnable; + bool EdgeFlagEnable; uint32_t SourceElementOffset; uint32_t Component0Control; uint32_t Component1Control; @@ -5225,7 +5225,7 @@ struct GEN8_3DSTATE_VF { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t IndexedDrawCutIndexEnable; + bool IndexedDrawCutIndexEnable; uint32_t DwordLength; uint32_t CutIndex; }; @@ -5266,7 +5266,7 @@ struct GEN8_3DSTATE_VF_INSTANCING { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t InstancingEnable; + bool InstancingEnable; uint32_t VertexElementIndex; uint32_t InstanceDataStepRate; }; @@ -5311,14 +5311,14 @@ struct GEN8_3DSTATE_VF_SGVS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t InstanceIDEnable; + bool InstanceIDEnable; #define COMP_0 0 #define COMP_1 1 #define COMP_2 2 #define COMP_3 3 uint32_t InstanceIDComponentNumber; uint32_t InstanceIDElementOffset; - uint32_t VertexIDEnable; + bool VertexIDEnable; #define COMP_0 0 #define COMP_1 1 #define COMP_2 2 @@ -5365,7 +5365,7 @@ struct GEN8_3DSTATE_VF_STATISTICS { uint32_t CommandSubType; uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; - uint32_t StatisticsEnable; + bool StatisticsEnable; }; static inline void @@ -5513,11 +5513,11 @@ struct GEN8_3DSTATE_WM { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t StatisticsEnable; - uint32_t LegacyDepthBufferClearEnable; - uint32_t LegacyDepthBufferResolveEnable; - uint32_t LegacyHierarchicalDepthBufferResolveEnable; - uint32_t LegacyDiamondLineRasterization; + bool StatisticsEnable; + bool LegacyDepthBufferClearEnable; + bool LegacyDepthBufferResolveEnable; + bool LegacyHierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; #define NORMAL 0 #define PSEXEC 1 #define PREPS 2 @@ -5541,8 +5541,8 @@ struct GEN8_3DSTATE_WM { #define _20pixels 2 #define _40pixels 3 uint32_t LineAntialiasingRegionWidth; - uint32_t PolygonStippleEnable; - uint32_t LineStippleEnable; + bool PolygonStippleEnable; + bool LineStippleEnable; #define RASTRULE_UPPER_LEFT 0 #define RASTRULE_UPPER_RIGHT 1 uint32_t PointRasterizationRule; @@ -5601,7 +5601,7 @@ struct GEN8_3DSTATE_WM_CHROMAKEY { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t ChromaKeyKillEnable; + bool ChromaKeyKillEnable; }; static inline void @@ -5648,13 +5648,11 @@ struct GEN8_3DSTATE_WM_DEPTH_STENCIL { uint32_t BackfaceStencilPassDepthPassOp; uint32_t StencilTestFunction; uint32_t DepthTestFunction; -#define False 0 -#define True 1 - uint32_t DoubleSidedStencilEnable; - uint32_t StencilTestEnable; - uint32_t StencilBufferWriteEnable; - uint32_t DepthTestEnable; - uint32_t DepthBufferWriteEnable; + bool DoubleSidedStencilEnable; + bool StencilTestEnable; + bool StencilBufferWriteEnable; + bool DepthTestEnable; + bool DepthBufferWriteEnable; uint32_t StencilTestMask; uint32_t StencilWriteMask; uint32_t BackfaceStencilTestMask; @@ -5716,13 +5714,13 @@ struct GEN8_3DSTATE_WM_HZ_OP { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t StencilBufferClearEnable; - uint32_t DepthBufferClearEnable; - uint32_t ScissorRectangleEnable; - uint32_t DepthBufferResolveEnable; - uint32_t HierarchicalDepthBufferResolveEnable; + bool StencilBufferClearEnable; + bool DepthBufferClearEnable; + bool ScissorRectangleEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; uint32_t PixelPositionOffsetEnable; - uint32_t FullSurfaceDepthClear; + bool FullSurfaceDepthClear; uint32_t StencilClearValue; uint32_t NumberofMultisamples; uint32_t ClearRectangleYMin; @@ -5788,8 +5786,8 @@ struct GEN8_GPGPU_WALKER { uint32_t Pipeline; uint32_t MediaCommandOpcode; uint32_t SubOpcode; - uint32_t IndirectParameterEnable; - uint32_t PredicateEnable; + bool IndirectParameterEnable; + bool PredicateEnable; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; uint32_t IndirectDataLength; @@ -5992,7 +5990,7 @@ struct GEN8_MEDIA_OBJECT { uint32_t MediaCommandSubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; + bool ChildrenPresent; #define Nothreadsynchronization 0 #define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 uint32_t ThreadSynchronization; @@ -6013,7 +6011,7 @@ struct GEN8_MEDIA_OBJECT { uint32_t ScoredboardY; uint32_t ScoreboardX; uint32_t ScoreboardColor; - uint32_t ScoreboardMask; + bool ScoreboardMask; /* variable length fields follow */ }; @@ -6096,7 +6094,7 @@ struct GEN8_MEDIA_OBJECT_GRPID { uint32_t ScoredboardY; uint32_t ScoreboardX; uint32_t ScoreboardColor; - uint32_t ScoreboardMask; + bool ScoreboardMask; uint32_t GroupID; /* variable length fields follow */ }; @@ -6167,8 +6165,8 @@ struct GEN8_MEDIA_OBJECT_PRT { uint32_t SubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; - uint32_t PRT_FenceNeeded; + bool ChildrenPresent; + bool PRT_FenceNeeded; #define Rootthreadqueue 0 #define VFEstateflush 1 uint32_t PRT_FenceType; @@ -6222,7 +6220,7 @@ struct GEN8_MEDIA_OBJECT_WALKER { uint32_t SubOpcode; uint32_t DwordLength; uint32_t InterfaceDescriptorOffset; - uint32_t ChildrenPresent; + bool ChildrenPresent; #define Nothreadsynchronization 0 #define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 uint32_t ThreadSynchronization; @@ -6232,7 +6230,7 @@ struct GEN8_MEDIA_OBJECT_WALKER { uint32_t IndirectDataLength; uint32_t IndirectDataStartAddress; uint32_t GroupIDLoopSelect; - uint32_t ScoreboardMask; + bool ScoreboardMask; uint32_t ColorCountMinusOne; uint32_t MiddleLoopExtraSteps; uint32_t LocalMidLoopUnitY; @@ -6368,7 +6366,7 @@ struct GEN8_MEDIA_STATE_FLUSH { uint32_t MediaCommandOpcode; uint32_t SubOpcode; uint32_t DwordLength; - uint32_t FlushtoGO; + bool FlushtoGO; uint32_t WatermarkRequired; uint32_t InterfaceDescriptorOffset; }; @@ -6581,9 +6579,9 @@ struct GEN8_MI_BATCH_BUFFER_START { #define _1stlevelbatch 0 #define _2ndlevelbatch 1 uint32_t _2ndLevelBatchBuffer; - uint32_t AddOffsetEnable; + bool AddOffsetEnable; uint32_t PredicationEnable; - uint32_t ResourceStreamerEnable; + bool ResourceStreamerEnable; #define ASI_GGTT 0 #define ASI_PPGTT 1 uint32_t AddressSpaceIndicator; @@ -6826,7 +6824,7 @@ GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_LOAD_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; + bool UseGlobalGTT; uint32_t AsyncModeEnable; uint32_t DwordLength; uint32_t RegisterAddress; @@ -6918,7 +6916,7 @@ struct GEN8_MI_LOAD_SCAN_LINES_INCL { #define NeverForward 0 #define AlwaysForward 1 #define ConditionallyForward 2 - uint32_t ScanLineEventDoneForward; + bool ScanLineEventDoneForward; uint32_t DwordLength; uint32_t StartScanLineNumber; uint32_t EndScanLineNumber; @@ -7033,7 +7031,7 @@ GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_NOOP { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t IdentificationNumberRegisterWriteEnable; + bool IdentificationNumberRegisterWriteEnable; uint32_t IdentificationNumber; }; @@ -7061,9 +7059,9 @@ GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; -#define KEEP 0 -#define LOAD 2 -#define LOADINV 3 +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 uint32_t LoadOperation; #define COMBINE_SET 0 #define COMBINE_AND 1 @@ -7124,8 +7122,8 @@ GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_RS_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; -#define Restore 0 -#define Save 1 +#define RS_RESTORE 0 +#define RS_SAVE 1 uint32_t ResourceStreamerSave; }; @@ -7152,8 +7150,8 @@ GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_RS_CONTROL { uint32_t CommandType; uint32_t MICommandOpcode; -#define Stop 0 -#define Start 1 +#define RS_STOP 0 +#define RS_START 1 uint32_t ResourceStreamerControl; }; @@ -7228,9 +7226,9 @@ struct GEN8_MI_SET_CONTEXT { uint32_t DwordLength; __gen_address_type LogicalContextAddress; uint32_t ReservedMustbe1; - uint32_t CoreModeEnable; - uint32_t ResourceStreamerStateSaveEnable; - uint32_t ResourceStreamerStateRestoreEnable; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; uint32_t ForceRestore; uint32_t RestoreInhibit; }; @@ -7306,8 +7304,8 @@ GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; - uint32_t StoreQword; + bool UseGlobalGTT; + bool StoreQword; uint32_t DwordLength; __gen_address_type Address; uint32_t CoreModeEnable; @@ -7444,7 +7442,7 @@ GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_SUSPEND_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t SuspendFlush; + bool SuspendFlush; }; static inline void @@ -7616,18 +7614,18 @@ GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, struct GEN8_MI_WAIT_FOR_EVENT { uint32_t CommandType; uint32_t MICommandOpcode; - uint32_t DisplayPipeCVerticalBlankWaitEnable; - uint32_t DisplaySpriteCFlipPendingWaitEnable; - uint32_t DisplayPlaneCFlipPendingWaitEnable; - uint32_t DisplayPipeCScanLineWaitEnable; - uint32_t DisplayPipeBVerticalBlankWaitEnable; - uint32_t DisplaySpriteBFlipPendingWaitEnable; - uint32_t DisplayPlaneBFlipPendingWaitEnable; - uint32_t DisplayPipeBScanLineWaitEnable; - uint32_t DisplayPipeAVerticalBlankWaitEnable; - uint32_t DisplaySpriteAFlipPendingWaitEnable; - uint32_t DisplayPlaneAFlipPendingWaitEnable; - uint32_t DisplayPipeAScanLineWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; }; static inline void @@ -7682,29 +7680,29 @@ struct GEN8_PIPE_CONTROL { #define Reset 1 uint32_t GlobalSnapshotCountReset; uint32_t TLBInvalidate; - uint32_t GenericMediaStateClear; + bool GenericMediaStateClear; #define NoWrite 0 #define WriteImmediateData 1 #define WritePSDepthCount 2 #define WriteTimestamp 3 uint32_t PostSyncOperation; - uint32_t DepthStallEnable; + bool DepthStallEnable; #define DisableFlush 0 #define EnableFlush 1 - uint32_t RenderTargetCacheFlushEnable; - uint32_t InstructionCacheInvalidateEnable; - uint32_t TextureCacheInvalidationEnable; - uint32_t IndirectStatePointersDisable; - uint32_t NotifyEnable; - uint32_t PipeControlFlushEnable; - uint32_t DCFlushEnable; - uint32_t VFCacheInvalidationEnable; - uint32_t ConstantCacheInvalidationEnable; - uint32_t StateCacheInvalidationEnable; - uint32_t StallAtPixelScoreboard; + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; #define FlushDisabled 0 #define FlushEnabled 1 - uint32_t DepthCacheFlushEnable; + bool DepthCacheFlushEnable; __gen_address_type Address; __gen_address_type AddressHigh; uint64_t ImmediateData; @@ -7904,26 +7902,26 @@ GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, #define GEN8_BLEND_STATE_length 0x00000011 struct GEN8_BLEND_STATE_ENTRY { - uint32_t LogicOpEnable; + bool LogicOpEnable; uint32_t LogicOpFunction; uint32_t PreBlendSourceOnlyClampEnable; #define COLORCLAMP_UNORM 0 #define COLORCLAMP_SNORM 1 #define COLORCLAMP_RTFORMAT 2 uint32_t ColorClampRange; - uint32_t PreBlendColorClampEnable; - uint32_t PostBlendColorClampEnable; - uint32_t ColorBufferBlendEnable; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; + bool ColorBufferBlendEnable; uint32_t SourceBlendFactor; uint32_t DestinationBlendFactor; uint32_t ColorBlendFunction; uint32_t SourceAlphaBlendFactor; uint32_t DestinationAlphaBlendFactor; uint32_t AlphaBlendFunction; - uint32_t WriteDisableAlpha; - uint32_t WriteDisableRed; - uint32_t WriteDisableGreen; - uint32_t WriteDisableBlue; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; }; static inline void @@ -7955,13 +7953,13 @@ GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, } struct GEN8_BLEND_STATE { - uint32_t AlphaToCoverageEnable; - uint32_t IndependentAlphaBlendEnable; - uint32_t AlphaToOneEnable; - uint32_t AlphaToCoverageDitherEnable; - uint32_t AlphaTestEnable; + bool AlphaToCoverageEnable; + bool IndependentAlphaBlendEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool AlphaTestEnable; uint32_t AlphaTestFunction; - uint32_t ColorDitherEnable; + bool ColorDitherEnable; uint32_t XDitherOffset; uint32_t YDitherOffset; struct GEN8_BLEND_STATE_ENTRY Entry; @@ -8083,12 +8081,12 @@ struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { #define LLCeLLCAllowed 2 #define L3LLCeLLCAllowed 3 uint32_t TargetCacheTC; - uint32_t EncryptedData; + bool EncryptedData; #define PoorChance 3 #define NormalChance 2 #define BetterChance 1 #define BestChance 0 - uint32_t AgeforQUADLRUAGE; + bool AgeforQUADLRUAGE; }; static inline void @@ -8123,9 +8121,9 @@ struct GEN8_INTERFACE_DESCRIPTOR_DATA { #define IEEE754 0 #define Alternate 1 uint32_t FloatingPointMode; - uint32_t IllegalOpcodeExceptionEnable; - uint32_t MaskStackExceptionEnable; - uint32_t SoftwareExceptionEnable; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; uint32_t SamplerStatePointer; #define Nosamplersused 0 #define Between1and4samplersused 1 @@ -8142,7 +8140,7 @@ struct GEN8_INTERFACE_DESCRIPTOR_DATA { #define RD 2 #define RTZ 3 uint32_t RoundingMode; - uint32_t BarrierEnable; + bool BarrierEnable; #define Encodes0k 0 #define Encodes4k 1 #define Encodes8k 2 @@ -8237,7 +8235,7 @@ struct GEN8_RENDER_SURFACE_STATE { #define SURFTYPE_STRBUF 5 #define SURFTYPE_NULL 7 uint32_t SurfaceType; - uint32_t SurfaceArray; + bool SurfaceArray; uint32_t SurfaceFormat; #define VALIGN4 1 #define VALIGN8 2 @@ -8254,7 +8252,7 @@ struct GEN8_RENDER_SURFACE_STATE { uint32_t TileMode; uint32_t VerticalLineStride; uint32_t VerticalLineStrideOffset; - uint32_t SamplerL2BypassModeDisable; + bool SamplerL2BypassModeDisable; #define WriteOnlyCache 0 #define ReadWriteCache 1 uint32_t RenderCacheReadWriteMode; @@ -8262,12 +8260,12 @@ struct GEN8_RENDER_SURFACE_STATE { #define PROGRESSIVE_FRAME 2 #define INTERLACED_FRAME 3 uint32_t MediaBoundaryPixelMode; - uint32_t CubeFaceEnablePositiveZ; - uint32_t CubeFaceEnableNegativeZ; - uint32_t CubeFaceEnablePositiveY; - uint32_t CubeFaceEnableNegativeY; - uint32_t CubeFaceEnablePositiveX; - uint32_t CubeFaceEnableNegativeX; + bool CubeFaceEnablePositiveZ; + bool CubeFaceEnableNegativeZ; + bool CubeFaceEnablePositiveY; + bool CubeFaceEnableNegativeY; + bool CubeFaceEnablePositiveX; + bool CubeFaceEnableNegativeX; struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; float BaseMipLevel; uint32_t SurfaceQPitch; @@ -8292,7 +8290,7 @@ struct GEN8_RENDER_SURFACE_STATE { uint32_t MultisamplePositionPaletteIndex; uint32_t XOffset; uint32_t YOffset; - uint32_t EWADisableForCube; + bool EWADisableForCube; #define GPUcoherent 0 #define IAcoherent 1 uint32_t CoherencyType; @@ -8305,7 +8303,7 @@ struct GEN8_RENDER_SURFACE_STATE { #define AUX_APPEND 2 #define AUX_HIZ 3 uint32_t AuxiliarySurfaceMode; - uint32_t SeparateUVPlaneEnable; + bool SeparateUVPlaneEnable; uint32_t XOffsetforUorUVPlane; uint32_t YOffsetforUorUVPlane; uint32_t RedClearColor; @@ -8445,7 +8443,7 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN8_SAMPLER_STATE_length 0x00000004 struct GEN8_SAMPLER_STATE { - uint32_t SamplerDisable; + bool SamplerDisable; #define DX10OGL 0 #define DX9 1 uint32_t TextureBorderColorMode; @@ -8473,7 +8471,7 @@ struct GEN8_SAMPLER_STATE { uint32_t AnisotropicAlgorithm; float MinLOD; float MaxLOD; - uint32_t ChromaKeyEnable; + bool ChromaKeyEnable; uint32_t ChromaKeyIndex; #define KEYFILTER_KILL_ON_ANY_MATCH 0 #define KEYFILTER_REPLACE_BLACK 1 @@ -8503,18 +8501,18 @@ struct GEN8_SAMPLER_STATE { #define RATIO141 6 #define RATIO161 7 uint32_t MaximumAnisotropy; - uint32_t RAddressMinFilterRoundingEnable; - uint32_t RAddressMagFilterRoundingEnable; - uint32_t VAddressMinFilterRoundingEnable; - uint32_t VAddressMagFilterRoundingEnable; - uint32_t UAddressMinFilterRoundingEnable; - uint32_t UAddressMagFilterRoundingEnable; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; #define FULL 0 #define HIGH 1 #define MED 2 #define LOW 3 uint32_t TrilinearFilterQuality; - uint32_t NonnormalizedCoordinateEnable; + bool NonnormalizedCoordinateEnable; uint32_t TCXAddressControlMode; uint32_t TCYAddressControlMode; uint32_t TCZAddressControlMode; @@ -8580,7 +8578,7 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, #define _3DPRIM_QUADLIST 7 #define _3DPRIM_QUADSTRIP 8 #define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_LINESTRIP_ADJ 10 #define _3DPRIM_TRILIST_ADJ 11 #define _3DPRIM_TRISTRIP_ADJ 12 #define _3DPRIM_TRISTRIP_REVERSE 13 @@ -8634,21 +8632,21 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, #define VFCOMP_STORE_PID 7 /* Enum WRAP_SHORTEST_ENABLE */ -#define X 1 -#define Y 2 -#define XY 3 -#define Z 4 -#define XZ 5 -#define YZ 6 -#define XYZ 7 -#define W 8 -#define XW 9 -#define YW 10 -#define XYW 11 -#define ZW 12 -#define XZW 13 -#define YZW 14 -#define XYZW 15 +#define WSE_X 1 +#define WSE_Y 2 +#define WSE_XY 3 +#define WSE_Z 4 +#define WSE_XZ 5 +#define WSE_YZ 6 +#define WSE_XYZ 7 +#define WSE_W 8 +#define WSE_XW 9 +#define WSE_YW 10 +#define WSE_XYW 11 +#define WSE_ZW 12 +#define WSE_XZW 13 +#define WSE_YZW 14 +#define WSE_XYZW 15 /* Enum 3D_Stencil_Operation */ #define STENCILOP_KEEP 0 diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index cd4294bfa08..bf8b3a0bea2 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -136,7 +136,7 @@ emit_ia_state(struct anv_pipeline *pipeline, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LISTSTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 -- cgit v1.2.3 From a1eea996d4f851a68e1ade138237f22be2749470 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 6 Jul 2015 22:21:51 -0700 Subject: vk: Emit 3DSTATE_SAMPLE_MASK This was missing and was causing the driver to not work with execlists. Presumably we get a different initial hw context with execlists enabled, that has sample mask 0 initially. Set this to 0xffff for now. When we add MS support, we need to take the value from VkPipelineMsStateCreateInfo::sampleMask. --- src/vulkan/pipeline.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index bf8b3a0bea2..5003156e914 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -538,6 +538,9 @@ anv_pipeline_create( .PixelLocation = CENTER, .NumberofMultisamples = log2_samples); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xffff); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, .VSURBStartingAddress = pipeline->urb.vs_start, .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, -- cgit v1.2.3 From 245583075ce09fe19eec6ef9e7dd1740dfec6e46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 11:26:26 -0700 Subject: vk/vulkan.h: Remove UINT8 index buffers --- include/vulkan/vulkan.h | 9 ++++----- src/vulkan/device.c | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index baa5c6a4b5e..a150f0f875e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -839,12 +839,11 @@ typedef enum { } VkStateBindPoint; typedef enum { - VK_INDEX_TYPE_UINT8 = 0, - VK_INDEX_TYPE_UINT16 = 1, - VK_INDEX_TYPE_UINT32 = 2, - VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT8, + VK_INDEX_TYPE_UINT16 = 0, + VK_INDEX_TYPE_UINT32 = 1, + VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, - VK_INDEX_TYPE_NUM = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT8 + 1), + VK_INDEX_TYPE_NUM = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF } VkIndexType; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b1cc618fdd3..5237f2d6f82 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2717,7 +2717,6 @@ void anv_CmdBindIndexBuffer( struct anv_buffer *buffer = (struct anv_buffer *) _buffer; static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT8] = INDEX_BYTE, [VK_INDEX_TYPE_UINT16] = INDEX_WORD, [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, }; -- cgit v1.2.3 From 1fb859e4b279a95ad8f3f3c00dc3597e8acb6f5b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 12:35:32 -0700 Subject: vk/vulkan.h: Remove client-settable pointSize from DynamicRsState --- include/vulkan/vulkan.h | 2 -- src/vulkan/device.c | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a150f0f875e..e2f5fd08195 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1637,8 +1637,6 @@ typedef struct { float depthBias; float depthBiasClamp; float slopeScaledDepthBias; - float pointSize; - float pointFadeThreshold; float lineWidth; } VkDynamicRsStateCreateInfo; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 5237f2d6f82..60d6910577e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2031,15 +2031,10 @@ VkResult anv_CreateDynamicRasterState( if (state == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - /* Missing these: - * float pointFadeThreshold; - * // optional (GL45) - Size of point fade threshold - */ - struct GEN8_3DSTATE_SF sf = { GEN8_3DSTATE_SF_header, .LineWidth = pCreateInfo->lineWidth, - .PointWidth = pCreateInfo->pointSize, + .PointWidth = 1.0, }; GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); -- cgit v1.2.3 From 7fbed521bb0fba037a6f4b0f7a8adad77cda0736 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 15:11:56 -0700 Subject: vk/vulkan.h: Remove the explicit primitive restart index Unfortunately, this requires some non-trivial changes to the driver. Now that the primitive restart index isn't given explicitly by the client, we always use ~0 for everything like D3D does. Unfortunately, our hardware is awesome and a 32-bit version of ~0 doesn't match any 16-bit values. This means, we have to set it to either UINT16_MAX or UINT32_MAX depending on the size of the index type. Since we get the index type from CmdBindIndexBuffer and the rest of the VF packet from the pipeline, we need to lazy-emit the VF packet. --- include/vulkan/vulkan.h | 1 - src/vulkan/device.c | 14 ++++++++++++++ src/vulkan/meta.c | 2 -- src/vulkan/pipeline.c | 9 ++++++--- src/vulkan/private.h | 3 +++ 5 files changed, 23 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 7a11f96a9b3..4e2d8dfc7cc 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1452,7 +1452,6 @@ typedef struct { VkPrimitiveTopology topology; bool32_t disableVertexReuse; bool32_t primitiveRestartEnable; - uint32_t primitiveRestartIndex; } VkPipelineIaStateCreateInfo; typedef struct { diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 60d6910577e..11b820dc2ce 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2227,6 +2227,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->vp_state = NULL; cmd_buffer->cb_state = NULL; cmd_buffer->ds_state = NULL; + memset(&cmd_buffer->state_vf, 0, sizeof(cmd_buffer->state_vf)); memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); @@ -2716,6 +2717,14 @@ void anv_CmdBindIndexBuffer( [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, }; + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state_vf, &vf); + + cmd_buffer->dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, .IndexFormat = vk_to_gen_index_type[indexType], .MemoryObjectControlState = GEN8_MOCS, @@ -3181,6 +3190,11 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .ColorCalcStatePointerValid = true); } + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state_vf, pipeline->state_vf); + } + cmd_buffer->vb_dirty &= ~vb_emit; cmd_buffer->dirty = 0; } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index b782279e7b9..6c1f57a6956 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -38,7 +38,6 @@ anv_device_init_meta_clear_state(struct anv_device *device) .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, .disableVertexReuse = false, .primitiveRestartEnable = false, - .primitiveRestartIndex = 0 }; /* We don't use a vertex shader for clearing, but instead build and pass @@ -314,7 +313,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, .disableVertexReuse = false, .primitiveRestartEnable = false, - .primitiveRestartIndex = 0 }; /* We don't use a vertex shader for clearing, but instead build and pass diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 5003156e914..f0f578706e9 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -146,9 +146,12 @@ emit_ia_state(struct anv_pipeline *pipeline, if (extra && extra->use_rectlist) topology = _3DPRIM_RECTLIST; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF, - .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, - .CutIndex = info->primitiveRestartIndex); + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + }; + GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, .PrimitiveTopologyType = topology); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cb290ffea99..e0b18eaeddf 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -642,6 +642,7 @@ struct anv_buffer { #define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) #define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) #define ANV_CMD_BUFFER_VP_DIRTY (1 << 5) +#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 6) struct anv_vertex_binding { struct anv_buffer * buffer; @@ -687,6 +688,7 @@ struct anv_cmd_buffer { struct anv_dynamic_ds_state * ds_state; struct anv_dynamic_vp_state * vp_state; struct anv_dynamic_cb_state * cb_state; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set_binding descriptors[MAX_SETS]; }; @@ -746,6 +748,7 @@ struct anv_pipeline { uint32_t binding_stride[MAX_VBS]; uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; -- cgit v1.2.3 From 788a8352b9f8db54304f68e15a568563e0341bce Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 14:42:47 -0700 Subject: vk/vulkan.h: Remove some unused fields. In particular, the following are removed: - disableVertexReuse - clipOrigin - depthMode - pointOrigin - provokingVertex --- include/vulkan/vulkan.h | 26 -------------------------- src/vulkan/meta.c | 2 -- src/vulkan/pipeline.c | 19 +++++-------------- 3 files changed, 5 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 4e2d8dfc7cc..bef5df4785b 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1061,27 +1061,6 @@ typedef enum { } VkMemoryInputFlagBits; typedef VkFlags VkMemoryInputFlags; -typedef enum { - VK_PROVOKING_VERTEX_FIRST = 0x00000000, - VK_PROVOKING_VERTEX_LAST = 0x00000001, - - VK_ENUM_RANGE(PROVOKING_VERTEX, FIRST, LAST) -} VkProvokingVertex; - -typedef enum { - VK_COORDINATE_ORIGIN_UPPER_LEFT = 0x00000000, - VK_COORDINATE_ORIGIN_LOWER_LEFT = 0x00000001, - - VK_ENUM_RANGE(COORDINATE_ORIGIN, UPPER_LEFT, LOWER_LEFT) -} VkCoordinateOrigin; - -typedef enum { - VK_DEPTH_MODE_ZERO_TO_ONE = 0x00000000, - VK_DEPTH_MODE_NEGATIVE_ONE_TO_ONE = 0x00000001, - - VK_ENUM_RANGE(DEPTH_MODE, ZERO_TO_ONE, NEGATIVE_ONE_TO_ONE) -} VkDepthMode; - typedef enum { // Info type for vkGetPhysicalDeviceInfo() VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES = 0x00000000, @@ -1450,7 +1429,6 @@ typedef struct { VkStructureType sType; const void* pNext; VkPrimitiveTopology topology; - bool32_t disableVertexReuse; bool32_t primitiveRestartEnable; } VkPipelineIaStateCreateInfo; @@ -1464,8 +1442,6 @@ typedef struct { VkStructureType sType; const void* pNext; uint32_t viewportCount; - VkCoordinateOrigin clipOrigin; - VkDepthMode depthMode; } VkPipelineVpStateCreateInfo; typedef struct { @@ -1474,8 +1450,6 @@ typedef struct { bool32_t depthClipEnable; bool32_t rasterizerDiscardEnable; bool32_t programPointSize; - VkCoordinateOrigin pointOrigin; - VkProvokingVertex provokingVertex; VkFillMode fillMode; VkCullMode cullMode; VkFrontFace frontFace; diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 6c1f57a6956..1613e4356cf 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -36,7 +36,6 @@ anv_device_init_meta_clear_state(struct anv_device *device) VkPipelineIaStateCreateInfo ia_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .disableVertexReuse = false, .primitiveRestartEnable = false, }; @@ -311,7 +310,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) VkPipelineIaStateCreateInfo ia_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .disableVertexReuse = false, .primitiveRestartEnable = false, }; diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index f0f578706e9..c500c7a0a33 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -177,21 +177,13 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, [VK_FRONT_FACE_CCW] = CounterClockwise, [VK_FRONT_FACE_CW] = Clockwise }; - - static const uint32_t vk_to_gen_coordinate_origin[] = { - [VK_COORDINATE_ORIGIN_UPPER_LEFT] = UPPERLEFT, - [VK_COORDINATE_ORIGIN_LOWER_LEFT] = LOWERLEFT - }; struct GEN8_3DSTATE_SF sf = { GEN8_3DSTATE_SF_header, .ViewportTransformEnable = !(extra && extra->disable_viewport), - .TriangleStripListProvokingVertexSelect = - info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, - .LineStripListProvokingVertexSelect = - info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 1, - .TriangleFanProvokingVertexSelect = - info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, .PointWidthSource = info->programPointSize ? Vertex : State, }; @@ -214,8 +206,7 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, .ForceVertexURBEntryReadLength = false, .ForceVertexURBEntryReadOffset = false, - .PointSpriteTextureCoordinateOrigin = - vk_to_gen_coordinate_origin[info->pointOrigin], + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs); @@ -645,7 +636,7 @@ anv_pipeline_create( .MaximumNumberofThreads = device->info.max_vs_threads - 1, .StatisticsEnable = false, .SIMD8DispatchEnable = true, - .VertexCacheDisable = ia_info->disableVertexReuse, + .VertexCacheDisable = false, .FunctionEnable = true, .VertexURBEntryOutputReadOffset = offset, -- cgit v1.2.3 From 4af79ab076942092ce66c746e67513c46857b85a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:42:04 -0700 Subject: vk: Add func anv_clear_mask() A little helper func for inspecting and clearing bitmasks. --- src/vulkan/private.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e0b18eaeddf..cee152bada0 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -90,6 +90,17 @@ anv_minify(uint32_t n, uint32_t levels) return MAX(n >> levels, 1); } +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + #define for_each_bit(b, dword) \ for (uint32_t __dword = (dword); \ (b) = __builtin_ffs(__dword) - 1, __dword; \ -- cgit v1.2.3 From 18ee32ef9da1dc960e9c0cb34512b23f4a6983ce Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:42:38 -0700 Subject: vk: Update vkCmdPipelineBarrier to 0.130 header --- include/vulkan/vulkan.h | 20 ++++++++++++++++---- src/vulkan/device.c | 49 +++++++++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index bef5df4785b..29386263bed 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1034,6 +1034,19 @@ typedef enum { } VkCmdBufferOptimizeFlagBits; typedef VkFlags VkCmdBufferOptimizeFlags; +typedef enum { + VK_PIPE_EVENT_TOP_OF_PIPE_BIT = 0x00000001, + VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT = 0x00000002, + VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE_BIT = 0x00000004, + VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE_BIT = 0x00000008, + VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE_BIT = 0x00000010, + VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE_BIT = 0x00000020, + VK_PIPE_EVENT_TRANSFER_COMPLETE_BIT = 0x00000040, + VK_PIPE_EVENT_COMMANDS_COMPLETE_BIT = 0x00000080, + VK_PIPE_EVENT_CPU_SIGNAL_BIT = 0x00000100, +} VkPipeEventFlagBits; +typedef VkFlags VkPipeEventFlags; + typedef enum { VK_QUERY_CONTROL_CONSERVATIVE_BIT = 0x00000001, } VkQueryControlFlagBits; @@ -1974,7 +1987,7 @@ typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcIm typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, uint32_t memBarrierCount, const void** ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t pipeEventCount, const VkPipeEvent* pPipeEvents, uint32_t memBarrierCount, const void** ppMemBarriers); +typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot); typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); @@ -2536,10 +2549,9 @@ void VKAPI vkCmdWaitEvents( void VKAPI vkCmdPipelineBarrier( VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, - uint32_t pipeEventCount, - const VkPipeEvent* pPipeEvents, + VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, - const void** ppMemBarriers); + const void* const* ppMemBarriers); void VKAPI vkCmdBeginQuery( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 11b820dc2ce..39229586d51 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3405,10 +3405,9 @@ void anv_CmdWaitEvents( void anv_CmdPipelineBarrier( VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, - uint32_t pipeEventCount, - const VkPipeEvent* pPipeEvents, + VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, - const void** ppMemBarriers) + const void* const* ppMemBarriers) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; uint32_t b, *dw; @@ -3420,27 +3419,33 @@ void anv_CmdPipelineBarrier( /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - for (uint32_t i = 0; i < pipeEventCount; i++) { - switch (pPipeEvents[i]) { - case VK_PIPE_EVENT_TOP_OF_PIPE: - /* This is just what PIPE_CONTROL does */ - break; - case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE: - case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE: - case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE: - cmd.StallAtPixelScoreboard = true; - break; - case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE: - case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE: - case VK_PIPE_EVENT_TRANSFER_COMPLETE: - case VK_PIPE_EVENT_COMMANDS_COMPLETE: - cmd.CommandStreamerStallEnable = true; - break; - default: - unreachable("Invalid VkPipeEvent"); - } + if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&pipeEventMask, + VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT | + VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE_BIT | + VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + + if (anv_clear_mask(&pipeEventMask, + VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE_BIT | + VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE_BIT | + VK_PIPE_EVENT_TRANSFER_COMPLETE_BIT | + VK_PIPE_EVENT_COMMANDS_COMPLETE_BIT)) { + cmd.CommandStreamerStallEnable = true; } + if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_CPU_SIGNAL_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* We checked all known VkPipeEventFlags. */ + anv_assert(pipeEventMask == 0); + /* XXX: Right now, we're really dumb and just flush whatever categories * the app asks for. One of these days we may make this a bit better * but right now that's all the hardware allows for in most areas. -- cgit v1.2.3 From f78d68477290d05fd67042cdb31c8f9330645290 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:46:19 -0700 Subject: vk: Stub vkCmdPushConstants() from 0.130 header --- include/vulkan/vulkan.h | 9 +++++++++ src/vulkan/device.c | 11 +++++++++++ 2 files changed, 20 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 29386263bed..d03fb9928a0 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1993,6 +1993,7 @@ typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool query typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); +typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCmdBuffer cmdBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin); @@ -2586,6 +2587,14 @@ void VKAPI vkCmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags); +void VKAPI vkCmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values); + void VKAPI vkCmdBeginRenderPass( VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 39229586d51..8b1894c933d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3686,6 +3686,17 @@ anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); } +void anv_CmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values) +{ + stub(); +} + void anv_CmdBeginRenderPass( VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin) -- cgit v1.2.3 From 0ecb789b7177ce5b1e2a275aa94990c67ead9136 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:47:24 -0700 Subject: vk: Remove unused 'v' param from stub() macro --- src/vulkan/private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cee152bada0..a9c9f26bc73 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -159,7 +159,7 @@ void anv_abortfv(const char *format, va_list va) anv_noreturn; return (v); \ } while (0) -#define stub(v) \ +#define stub() \ do { \ anv_finishme("stub %s", __func__); \ return; \ -- cgit v1.2.3 From 85c0d69be96b4bcd1404e2782064a33254a46c2b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:49:57 -0700 Subject: vk/0.130: Update vkCmdWaitEvents() signature --- include/vulkan/vulkan.h | 5 +++-- src/vulkan/device.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index d03fb9928a0..e8e117edff5 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1986,7 +1986,7 @@ typedef void (VKAPI *PFN_vkCmdClearDepthStencil)(VkCmdBuffer cmdBuffer, VkImage typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); -typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, uint32_t memBarrierCount, const void** ppMemBarriers); +typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot); @@ -2544,8 +2544,9 @@ void VKAPI vkCmdWaitEvents( VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, + VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, - const void** ppMemBarriers); + const void* const* ppMemBarriers); void VKAPI vkCmdPipelineBarrier( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8b1894c933d..cc00057b826 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3396,8 +3396,9 @@ void anv_CmdWaitEvents( VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, + VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, - const void** ppMemBarriers) + const void* const* ppMemBarriers) { stub(); } -- cgit v1.2.3 From dff32238c760c45756cf8d621d113115c021f026 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:51:55 -0700 Subject: vk/0.130: Stub vkCmdExecuteCommands() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index e8e117edff5..f8f30aa205f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1998,6 +1998,7 @@ typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFrame typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin); typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCmdBuffer cmdBuffer, VkRenderPass renderPass); +typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCmdBuffer cmdBuffer, uint32_t cmdBuffersCount, const VkCmdBuffer* pCmdBuffers); #ifdef VK_PROTOTYPES VkResult VKAPI vkCreateInstance( @@ -2604,6 +2605,10 @@ void VKAPI vkCmdEndRenderPass( VkCmdBuffer cmdBuffer, VkRenderPass renderPass); +void VKAPI vkCmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers); #endif #ifdef __cplusplus diff --git a/src/vulkan/device.c b/src/vulkan/device.c index cc00057b826..4c0082779a8 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3746,6 +3746,14 @@ void anv_CmdEndRenderPass( .CommandStreamerStallEnable = true); } +void anv_CmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers) +{ + stub(); +} + void vkCmdDbgMarkerBegin( VkCmdBuffer cmdBuffer, const char* pMarker) -- cgit v1.2.3 From 11901a9100a0e78c101de59609229600cd841a55 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:53:35 -0700 Subject: vk/0.130: Update name of vkCmdClearDepthStencilImage() --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/meta.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index f8f30aa205f..af0f78a2e50 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1982,7 +1982,7 @@ typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearDepthStencil)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); @@ -2512,7 +2512,7 @@ void VKAPI vkCmdClearColorImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -void VKAPI vkCmdClearDepthStencil( +void VKAPI vkCmdClearDepthStencilImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 1613e4356cf..6d3a6f07439 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -1281,7 +1281,7 @@ void anv_CmdClearColorImage( anv_cmd_buffer_restore(cmd_buffer, &saved_state); } -void anv_CmdClearDepthStencil( +void anv_CmdClearDepthStencilImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, -- cgit v1.2.3 From e7ddfe03ab5d2f25f585664f4adab54c62c8843a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 7 Jul 2015 15:56:12 -0700 Subject: vk/0.130: Stub vkCmdClear*Attachment() funcs vkCmdClearColorAttachment vkCmdClearDepthStencilAttachment --- include/vulkan/vulkan.h | 19 +++++++++++++++++++ src/vulkan/meta.c | 23 +++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 581e741138e..234814e92ca 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1995,6 +1995,8 @@ typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer dest typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCmdBuffer cmdBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); +typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags imageAspectMask, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rectCount, const VkRect3D* pRects); typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); @@ -2533,6 +2535,23 @@ void VKAPI vkCmdClearDepthStencilImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +void VKAPI vkCmdClearColorAttachment( + VkCmdBuffer cmdBuffer, + uint32_t colorAttachment, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rectCount, + const VkRect3D* pRects); + +void VKAPI vkCmdClearDepthStencilAttachment( + VkCmdBuffer cmdBuffer, + VkImageAspectFlags imageAspectMask, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rectCount, + const VkRect3D* pRects); + void VKAPI vkCmdResolveImage( VkCmdBuffer cmdBuffer, VkImage srcImage, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 6d3a6f07439..8a6bc377447 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -1293,6 +1293,29 @@ void anv_CmdClearDepthStencilImage( stub(); } +void anv_CmdClearColorAttachment( + VkCmdBuffer cmdBuffer, + uint32_t colorAttachment, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdClearDepthStencilAttachment( + VkCmdBuffer cmdBuffer, + VkImageAspectFlags imageAspectMask, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + void anv_CmdResolveImage( VkCmdBuffer cmdBuffer, VkImage srcImage, -- cgit v1.2.3 From abbb776bbefd148d9f67b41abe142205fb657a4e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 15:57:03 -0700 Subject: vk/vulkan.h: Remove programPointSize Instead, we auto-detect whether or not your shader writes gl_PointSize. If it does, we use 1.0, otherwise we take it from the shader. --- include/vulkan/vulkan.h | 1 - src/vulkan/compiler.cpp | 8 ++++++++ src/vulkan/device.c | 1 - src/vulkan/pipeline.c | 3 ++- src/vulkan/private.h | 1 + 5 files changed, 11 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 234814e92ca..3057a982f3e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1469,7 +1469,6 @@ typedef struct { const void* pNext; bool32_t depthClipEnable; bool32_t rasterizerDiscardEnable; - bool32_t programPointSize; VkFillMode fillMode; VkCullMode cullMode; VkFrontFace frontFace; diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 7ba42151c19..558a31001e2 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -1018,6 +1018,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) int name = 0; struct brw_context *brw = compiler->brw; + pipeline->writes_point_size = false; + /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ @@ -1086,6 +1088,9 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) fail_if(!success, "do_wm_prog failed\n"); add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, &pipeline->vs_prog_data.base.base); + + if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; } else { memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); pipeline->vs_simd8 = NO_KERNEL; @@ -1104,6 +1109,9 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) fail_if(!success, "do_gs_prog failed\n"); add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, &pipeline->gs_prog_data.base.base); + + if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; } else { pipeline->gs_vec4 = NO_KERNEL; } diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 4c0082779a8..9654637b6e1 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2034,7 +2034,6 @@ VkResult anv_CreateDynamicRasterState( struct GEN8_3DSTATE_SF sf = { GEN8_3DSTATE_SF_header, .LineWidth = pCreateInfo->lineWidth, - .PointWidth = 1.0, }; GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index c500c7a0a33..665ee773fe4 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -184,7 +184,8 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, .TriangleFanProvokingVertexSelect = 0, - .PointWidthSource = info->programPointSize ? Vertex : State, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, }; /* FINISHME: bool32_t rasterizerDiscardEnable; */ diff --git a/src/vulkan/private.h b/src/vulkan/private.h index a9c9f26bc73..2663d97834f 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -733,6 +733,7 @@ struct anv_pipeline { struct brw_wm_prog_data wm_prog_data; struct brw_gs_prog_data gs_prog_data; struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; struct brw_stage_prog_data * prog_data[VK_SHADER_STAGE_NUM]; uint32_t scratch_start[VK_SHADER_STAGE_NUM]; uint32_t total_scratch; -- cgit v1.2.3 From d2ca7e24b448ade7f153a3097c800be6595c7417 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 16:15:55 -0700 Subject: vk/vulkan.h: Rename VertexInputStateInfo to VertexInputStateCreateInfo --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/meta.c | 8 ++++---- src/vulkan/pipeline.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 3057a982f3e..a243f41f61f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -194,7 +194,7 @@ typedef enum { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 23, VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 24, VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 26, VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO = 27, VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO = 28, VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO = 29, @@ -1443,7 +1443,7 @@ typedef struct { const VkVertexInputBindingDescription* pVertexBindingDescriptions; uint32_t attributeCount; const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; -} VkPipelineVertexInputCreateInfo; +} VkPipelineVertexInputStateCreateInfo; typedef struct { VkStructureType sType; diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 8a6bc377447..f4479512ea6 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -69,8 +69,8 @@ anv_device_init_meta_clear_state(struct anv_device *device) * per-instance data, which consists of the VUE header (which selects the * layer) and the color (Vulkan supports per-RT clear colors). */ - VkPipelineVertexInputCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .pNext = &fs_create_info, .bindingCount = 2, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { @@ -363,8 +363,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) } }; - VkPipelineVertexInputCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .pNext = &fs_create_info, .bindingCount = 2, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 665ee773fe4..7df34d16a0a 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -57,7 +57,7 @@ VkResult anv_CreateShader( // Pipeline functions static void -emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo *info) +emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputStateCreateInfo *info) { const uint32_t num_dwords = 1 + info->attributeCount * 2; uint32_t *p; @@ -403,7 +403,7 @@ anv_pipeline_create( VkPipelineRsStateCreateInfo *rs_info = NULL; VkPipelineDsStateCreateInfo *ds_info = NULL; VkPipelineCbStateCreateInfo *cb_info = NULL; - VkPipelineVertexInputCreateInfo *vi_info = NULL; + VkPipelineVertexInputStateCreateInfo *vi_info = NULL; VkResult result; uint32_t offset, length; @@ -432,8 +432,8 @@ anv_pipeline_create( for (common = pCreateInfo->pNext; common; common = common->pNext) { switch (common->sType) { - case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO: - vi_info = (VkPipelineVertexInputCreateInfo *) common; + case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO: + vi_info = (VkPipelineVertexInputStateCreateInfo *) common; break; case VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO: ia_info = (VkPipelineIaStateCreateInfo *) common; -- cgit v1.2.3 From a35fef1ab27cd7b1d896738f8eddd6b4fa23c0cf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 16:22:23 -0700 Subject: vk/vulkan.h: Remove the pass argument from EndRenderPass --- include/vulkan/vulkan.h | 5 ++--- src/vulkan/device.c | 3 +-- src/vulkan/meta.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a243f41f61f..0c50b877573 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2010,7 +2010,7 @@ typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCmdBuffer cmdBuffer, VkPipelineLa typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin); -typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCmdBuffer cmdBuffer, VkRenderPass renderPass); +typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCmdBuffer cmdBuffer); typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCmdBuffer cmdBuffer, uint32_t cmdBuffersCount, const VkCmdBuffer* pCmdBuffers); #ifdef VK_PROTOTYPES @@ -2632,8 +2632,7 @@ void VKAPI vkCmdBeginRenderPass( const VkRenderPassBegin* pRenderPassBegin); void VKAPI vkCmdEndRenderPass( - VkCmdBuffer cmdBuffer, - VkRenderPass renderPass); + VkCmdBuffer cmdBuffer); void VKAPI vkCmdExecuteCommands( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9654637b6e1..69fd335634b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3726,8 +3726,7 @@ void anv_CmdBeginRenderPass( } void anv_CmdEndRenderPass( - VkCmdBuffer cmdBuffer, - VkRenderPass renderPass) + VkCmdBuffer cmdBuffer) { /* Emit a flushing pipe control at the end of a pass. This is kind of a * hack but it ensures that render targets always actually get written. diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index f4479512ea6..9bba3540a37 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -653,7 +653,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); - anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); + anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer); /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. @@ -1272,7 +1272,7 @@ void anv_CmdClearColorImage( meta_emit_clear(cmd_buffer, 1, &instance_data); - anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer, pass); + anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer); } } } -- cgit v1.2.3 From 3e4b00d283a7fb9059753a1b267efa723e8e70ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 16:27:06 -0700 Subject: meta: Use the VkClearColorValue structure for the color attribute --- src/vulkan/meta.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 9bba3540a37..57e0ec5093a 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -189,7 +189,7 @@ struct vue_header { struct clear_instance_data { struct vue_header vue_header; - float color[4]; + VkClearColorValue color; }; static void @@ -283,12 +283,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, .ViewportIndex = 0, .PointWidth = 0.0 }, - .color = { - pass->layers[i].clear_color.color.floatColor[0], - pass->layers[i].clear_color.color.floatColor[1], - pass->layers[i].clear_color.color.floatColor[2], - pass->layers[i].clear_color.color.floatColor[3], - } + .color = pass->layers[i].clear_color.color, }; } } @@ -1262,12 +1257,7 @@ void anv_CmdClearColorImage( .ViewportIndex = 0, .PointWidth = 0.0 }, - .color = { - color.color.floatColor[0], - color.color.floatColor[1], - color.color.floatColor[2], - color.color.floatColor[3], - } + .color = color.color, }; meta_emit_clear(cmd_buffer, 1, &instance_data); -- cgit v1.2.3 From 80046a7d54591e4a68a10413df47d1661e0dd31f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 16:37:43 -0700 Subject: vk/vulkan.h: Update clear color handling to 130 --- include/vulkan/vulkan.h | 19 +++++++------------ src/vulkan/meta.c | 12 ++++++------ src/vulkan/private.h | 2 +- 3 files changed, 14 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 0c50b877573..03d49d61828 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1671,17 +1671,12 @@ typedef struct { uint32_t layers; } VkFramebufferCreateInfo; -typedef union VkClearColorValue_ -{ - float floatColor[4]; - uint32_t rawColor[4]; +typedef union { + float f32[4]; + int32_t s32[4]; + uint32_t u32[4]; } VkClearColorValue; -typedef struct { - VkClearColorValue color; - bool32_t useRawValue; -} VkClearColor; - typedef struct { VkStructureType sType; const void* pNext; @@ -1694,7 +1689,7 @@ typedef struct { const VkImageLayout* pColorLayouts; const VkAttachmentLoadOp* pColorLoadOps; const VkAttachmentStoreOp* pColorStoreOps; - const VkClearColor* pColorLoadClearValues; + const VkClearColorValue* pColorLoadClearValues; VkFormat depthStencilFormat; VkImageLayout depthStencilLayout; VkAttachmentLoadOp depthLoadOp; @@ -1992,7 +1987,7 @@ typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCmdBuffer cmdBuffer, VkBuffer typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); -typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, VkClearColor color, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCmdBuffer cmdBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags imageAspectMask, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rectCount, const VkRect3D* pRects); @@ -2521,7 +2516,7 @@ void VKAPI vkCmdClearColorImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, - VkClearColor color, + const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 57e0ec5093a..7e22269e368 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -283,7 +283,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, .ViewportIndex = 0, .PointWidth = 0.0 }, - .color = pass->layers[i].clear_color.color, + .color = pass->layers[i].clear_color, }; } } @@ -627,8 +627,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_GENERAL }, .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_LOAD }, .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = (VkClearColor[]) { - { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } + .pColorLoadClearValues = (VkClearColorValue[]) { + { .f32 = { 1.0, 0.0, 0.0, 1.0 } } }, .depthStencilFormat = VK_FORMAT_UNDEFINED, }, &pass); @@ -1185,7 +1185,7 @@ void anv_CmdClearColorImage( VkCmdBuffer cmdBuffer, VkImage _image, VkImageLayout imageLayout, - VkClearColor color, + const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { @@ -1241,7 +1241,7 @@ void anv_CmdClearColorImage( .pColorLayouts = (VkImageLayout[]) { imageLayout }, .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_DONT_CARE }, .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = &color, + .pColorLoadClearValues = pColor, .depthStencilFormat = VK_FORMAT_UNDEFINED, }, &pass); @@ -1257,7 +1257,7 @@ void anv_CmdClearColorImage( .ViewportIndex = 0, .PointWidth = 0.0 }, - .color = color.color, + .color = *pColor, }; meta_emit_clear(cmd_buffer, 1, &instance_data); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 2663d97834f..77de59340cf 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -920,7 +920,7 @@ struct anv_framebuffer { struct anv_render_pass_layer { VkAttachmentLoadOp color_load_op; - VkClearColor clear_color; + VkClearColorValue clear_color; }; struct anv_render_pass { -- cgit v1.2.3 From 435b062b2616124df4d4352b78799e42a9f6a621 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 17:06:10 -0700 Subject: vk/vulkan.h: Add a PipelineLayout parameter to BindDescriptorSets --- include/vulkan/vulkan.h | 3 ++- src/vulkan/device.c | 8 ++------ src/vulkan/meta.c | 3 ++- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 80edb316cc7..856dbe499df 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1971,7 +1971,7 @@ typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer); typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); typedef void (VKAPI *PFN_vkCmdBindDynamicStateObject)(VkCmdBuffer cmdBuffer, VkStateBindPoint stateBindPoint, VkDynamicStateObject dynamicState); -typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); +typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount); @@ -2398,6 +2398,7 @@ void VKAPI vkCmdBindDynamicStateObject( void VKAPI vkCmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 69fd335634b..a43bd04b645 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2665,6 +2665,7 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) void anv_CmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, @@ -2672,17 +2673,12 @@ void anv_CmdBindDescriptorSets( const uint32_t* pDynamicOffsets) { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_pipeline_layout *layout; + struct anv_pipeline_layout *layout = (struct anv_pipeline_layout *) _layout; struct anv_descriptor_set *set; struct anv_descriptor_set_layout *set_layout; assert(firstSet + setCount < MAX_SETS); - if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) - layout = cmd_buffer->pipeline->layout; - else - layout = cmd_buffer->compute_pipeline->layout; - uint32_t dynamic_slot = 0; for (uint32_t i = 0; i < setCount; i++) { set = (struct anv_descriptor_set *) pDescriptorSets[i]; diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 7e22269e368..37c8421a46e 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -643,7 +643,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VK_STATE_BIND_POINT_VIEWPORT, fb->vp_state); anv_CmdBindDescriptorSets((VkCmdBuffer) cmd_buffer, - VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, &set, 0, NULL); anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); -- cgit v1.2.3 From 0ff06540aed13719f0af0c3be4feb7b635cb981a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 17:11:35 -0700 Subject: vk: Implement the GetRenderAreaGranularity function At the moment, we're just going to scissor clears so a granularity of 1x1 is all we need. --- include/vulkan/vulkan.h | 6 ++++++ src/vulkan/device.c | 10 ++++++++++ 2 files changed, 16 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 856dbe499df..e5784faa22d 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1965,6 +1965,7 @@ typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, cons typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRsStateCreateInfo* pCreateInfo, VkDynamicRsState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicCbStateCreateInfo* pCreateInfo, VkDynamicCbState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDsStateCreateInfo* pCreateInfo, VkDynamicDsState* pState); +typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); @@ -2370,6 +2371,11 @@ VkResult VKAPI vkCreateRenderPass( const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); +VkResult VKAPI vkGetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity); + VkResult VKAPI vkCreateCommandBuffer( VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a43bd04b645..65f8ce9778f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3641,6 +3641,16 @@ VkResult anv_CreateRenderPass( return VK_SUCCESS; } +VkResult anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; + + return VK_SUCCESS; +} + static void anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass) -- cgit v1.2.3 From d5349b1b18b2c9bece7f6491e5bf3918a53fedd2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 17:18:00 -0700 Subject: vk/vulkan.h: Constify the pFences parameter to ResetFences --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/device.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index f196ba1bd58..865c2d5635a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1927,7 +1927,7 @@ typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemory)(VkQueue queue, VkObjectTyp typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemoryRange)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkQueueBindImageMemoryRange)(VkQueue queue, VkImage image, uint32_t allocationIdx, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, VkFence* pFences); +typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, bool32_t waitAll, uint64_t timeout); typedef VkResult (VKAPI *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); @@ -2158,7 +2158,7 @@ VkResult VKAPI vkCreateFence( VkResult VKAPI vkResetFences( VkDevice device, uint32_t fenceCount, - VkFence* pFences); + const VkFence* pFences); VkResult VKAPI vkGetFenceStatus( VkDevice device, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 65f8ce9778f..ca0d41290a5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1295,7 +1295,7 @@ VkResult anv_CreateFence( VkResult anv_ResetFences( VkDevice _device, uint32_t fenceCount, - VkFence* pFences) + const VkFence* pFences) { struct anv_fence **fences = (struct anv_fence **) pFences; -- cgit v1.2.3 From d9c2caea6ae6538095196b590f7eec8da1aa1fea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 17:22:29 -0700 Subject: vk: Update memory flushing functions to 130 This involves updating the prototype for FlushMappedMemory, adding InvalidateMappedMemoryRanges, and removing PinSystemMemory. --- include/vulkan/vulkan.h | 26 ++++++++++++++++---------- src/vulkan/device.c | 16 +++++++--------- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 865c2d5635a..8cb9827eae2 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1236,6 +1236,14 @@ typedef struct { VkMemoryPropertyFlags memProps; } VkMemoryAllocInfo; +typedef struct { + VkStructureType sType; + const void* pNext; + VkDeviceMemory mem; + VkDeviceSize offset; + VkDeviceSize size; +} VkMappedMemoryRange; + typedef struct { VkDeviceSize size; VkDeviceSize alignment; @@ -1919,8 +1927,8 @@ typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAlloc typedef VkResult (VKAPI *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI *PFN_vkFlushMappedMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size); -typedef VkResult (VKAPI *PFN_vkPinSystemMemory)(VkDevice device, const void* pSysMem, size_t memSize, VkDeviceMemory* pMem); +typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); +typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); typedef VkResult (VKAPI *PFN_vkGetObjectInfo)(VkDevice device, VkObjectType objType, VkObject object, VkObjectInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemory)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceMemory mem, VkDeviceSize offset); @@ -2099,17 +2107,15 @@ VkResult VKAPI vkUnmapMemory( VkDevice device, VkDeviceMemory mem); -VkResult VKAPI vkFlushMappedMemory( +VkResult VKAPI vkFlushMappedMemoryRanges( VkDevice device, - VkDeviceMemory mem, - VkDeviceSize offset, - VkDeviceSize size); + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges); -VkResult VKAPI vkPinSystemMemory( +VkResult VKAPI vkInvalidateMappedMemoryRanges( VkDevice device, - const void* pSysMem, - size_t memSize, - VkDeviceMemory* pMem); + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges); VkResult VKAPI vkDestroyObject( VkDevice device, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index ca0d41290a5..83c3908b4fd 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -990,24 +990,22 @@ VkResult anv_UnmapMemory( return VK_SUCCESS; } -VkResult anv_FlushMappedMemory( +VkResult anv_FlushMappedMemoryRanges( VkDevice device, - VkDeviceMemory mem, - VkDeviceSize offset, - VkDeviceSize size) + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges) { /* clflush here for !llc platforms */ return VK_SUCCESS; } -VkResult anv_PinSystemMemory( +VkResult anv_InvalidateMappedMemoryRanges( VkDevice device, - const void* pSysMem, - size_t memSize, - VkDeviceMemory* pMem) + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges) { - return VK_SUCCESS; + return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); } VkResult anv_DestroyObject( -- cgit v1.2.3 From ef8980e256c30fa672efd515e768a1405752d803 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 18:16:42 -0700 Subject: vk/vulkan.h: Switch from GetObjectInfo to GetMemoryRequirements --- include/vulkan/vulkan.h | 24 +++++---------- src/vulkan/device.c | 80 +++++++++++-------------------------------------- 2 files changed, 24 insertions(+), 80 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 8cb9827eae2..8a593556414 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1113,14 +1113,6 @@ typedef enum { VK_ENUM_RANGE(SUBRESOURCE_INFO_TYPE, LAYOUT, LAYOUT) } VkSubresourceInfoType; -typedef enum { - // Info type for vkGetObjectInfo() - VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT = 0x00000000, - VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS = 0x00000001, - - VK_ENUM_RANGE(OBJECT_INFO_TYPE, MEMORY_ALLOCATION_COUNT, MEMORY_REQUIREMENTS) -} VkObjectInfoType; - // Physical device compatibility flags typedef VkFlags VkPhysicalDeviceCompatibilityFlags; @@ -1930,8 +1922,8 @@ typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem) typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); -typedef VkResult (VKAPI *PFN_vkGetObjectInfo)(VkDevice device, VkObjectType objType, VkObject object, VkObjectInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemory)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceMemory mem, VkDeviceSize offset); +typedef VkResult (VKAPI *PFN_vkGetObjectMemoryRequirements)(VkDevice device, VkObjectType objType, VkObject object, VkMemoryRequirements* pMemoryRequirements); typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemoryRange)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkQueueBindImageMemoryRange)(VkQueue queue, VkImage image, uint32_t allocationIdx, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); @@ -2122,14 +2114,6 @@ VkResult VKAPI vkDestroyObject( VkObjectType objType, VkObject object); -VkResult VKAPI vkGetObjectInfo( - VkDevice device, - VkObjectType objType, - VkObject object, - VkObjectInfoType infoType, - size_t* pDataSize, - void* pData); - VkResult VKAPI vkQueueBindObjectMemory( VkQueue queue, VkObjectType objType, @@ -2138,6 +2122,12 @@ VkResult VKAPI vkQueueBindObjectMemory( VkDeviceMemory mem, VkDeviceSize memOffset); +VkResult VKAPI vkGetObjectMemoryRequirements( + VkDevice device, + VkObjectType objType, + VkObject object, + VkMemoryRequirements* pMemoryRequirements); + VkResult VKAPI vkQueueBindObjectMemoryRange( VkQueue queue, VkObjectType objType, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 83c3908b4fd..53c29cafb3d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1076,85 +1076,39 @@ VkResult anv_DestroyObject( } } -static void -fill_memory_requirements( +VkResult anv_GetObjectMemoryRequirements( + VkDevice device, VkObjectType objType, VkObject object, - VkMemoryRequirements * memory_requirements) + VkMemoryRequirements* pMemoryRequirements) { - struct anv_buffer *buffer; - struct anv_image *image; - - memory_requirements->memPropsAllowed = + pMemoryRequirements->memPropsAllowed = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | /* VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT | */ /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT; - memory_requirements->memPropsRequired = 0; + pMemoryRequirements->memPropsRequired = 0; switch (objType) { - case VK_OBJECT_TYPE_BUFFER: - buffer = (struct anv_buffer *) object; - memory_requirements->size = buffer->size; - memory_requirements->alignment = 16; - break; - case VK_OBJECT_TYPE_IMAGE: - image = (struct anv_image *) object; - memory_requirements->size = image->size; - memory_requirements->alignment = image->alignment; - break; - default: - memory_requirements->size = 0; + case VK_OBJECT_TYPE_BUFFER: { + struct anv_buffer *buffer = (struct anv_buffer *) object; + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; break; } -} - -static uint32_t -get_allocation_count(VkObjectType objType) -{ - switch (objType) { - case VK_OBJECT_TYPE_BUFFER: - case VK_OBJECT_TYPE_IMAGE: - return 1; - default: - return 0; + case VK_OBJECT_TYPE_IMAGE: { + struct anv_image *image = (struct anv_image *) object; + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; + break; } -} - -VkResult anv_GetObjectInfo( - VkDevice _device, - VkObjectType objType, - VkObject object, - VkObjectInfoType infoType, - size_t* pDataSize, - void* pData) -{ - VkMemoryRequirements memory_requirements; - uint32_t *count; - - switch (infoType) { - case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS: - *pDataSize = sizeof(memory_requirements); - if (pData == NULL) - return VK_SUCCESS; - - fill_memory_requirements(objType, object, pData); - return VK_SUCCESS; - - case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT: - *pDataSize = sizeof(count); - if (pData == NULL) - return VK_SUCCESS; - - count = pData; - *count = get_allocation_count(objType); - return VK_SUCCESS; - default: - return vk_error(VK_UNSUPPORTED); + pMemoryRequirements->size = 0; + break; } + return VK_SUCCESS; } VkResult anv_QueueBindObjectMemory( -- cgit v1.2.3 From db24afee2f3057f198f972838c91ecdf464c7f72 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 18:20:18 -0700 Subject: vk/vulkan.h: Switch from GetImageSubresourceInfo to GetImageSubresourceLayout --- include/vulkan/vulkan.h | 15 +++------------ src/vulkan/image.c | 12 +++++------- 2 files changed, 8 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 8a593556414..96695e37acd 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1106,13 +1106,6 @@ typedef enum { VK_ENUM_RANGE(FORMAT_INFO_TYPE, PROPERTIES, PROPERTIES) } VkFormatInfoType; -typedef enum { - // Info type for vkGetImageSubresourceInfo() - VK_SUBRESOURCE_INFO_TYPE_LAYOUT = 0x00000000, - - VK_ENUM_RANGE(SUBRESOURCE_INFO_TYPE, LAYOUT, LAYOUT) -} VkSubresourceInfoType; - // Physical device compatibility flags typedef VkFlags VkPhysicalDeviceCompatibilityFlags; @@ -1943,7 +1936,7 @@ typedef VkResult (VKAPI *PFN_vkGetFormatInfo)(VkDevice device, VkFormat format, typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -typedef VkResult (VKAPI *PFN_vkGetImageSubresourceInfo)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceInfoType infoType, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); @@ -2233,13 +2226,11 @@ VkResult VKAPI vkCreateImage( const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -VkResult VKAPI vkGetImageSubresourceInfo( +VkResult VKAPI vkGetImageSubresourceLayout( VkDevice device, VkImage image, const VkImageSubresource* pSubresource, - VkSubresourceInfoType infoType, - size_t* pDataSize, - void* pData); + VkSubresourceLayout* pLayout); VkResult VKAPI vkCreateImageView( VkDevice device, diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 81d5f100ba6..be80a5d53dc 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -306,13 +306,11 @@ anv_CreateImage(VkDevice device, pImage); } -VkResult -anv_GetImageSubresourceInfo(VkDevice device, - VkImage image, - const VkImageSubresource *pSubresource, - VkSubresourceInfoType infoType, - size_t *pDataSize, - void *pData) +VkResult anv_GetImageSubresourceLayout( + VkDevice device, + VkImage image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) { stub_return(VK_UNSUPPORTED); } -- cgit v1.2.3 From e7acdda184de2d2d21a2413097cf11b2c489b387 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 7 Jul 2015 18:51:53 -0700 Subject: vk/vulkan.h: Switch to the split ProcAddr functions in 130 --- include/vulkan/vulkan.h | 12 +++++++++--- src/vulkan/device.c | 11 +++++++++-- src/vulkan/vk_gen.py | 2 ++ 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 96695e37acd..ee8e4114509 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1194,6 +1194,7 @@ typedef struct { bool32_t supportsPinning; } VkPhysicalDeviceMemoryProperties; +typedef void (VKAPI *PFN_vkVoidFunction)(void); typedef struct { uint32_t queueNodeIndex; uint32_t queueCount; @@ -1898,7 +1899,8 @@ typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCrea typedef VkResult (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData); -typedef void * (VKAPI *PFN_vkGetProcAddr)(VkPhysicalDevice physicalDevice, const char * pName); +typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); +typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); typedef VkResult (VKAPI *PFN_vkDestroyDevice)(VkDevice device); typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionInfo)(VkExtensionInfoType infoType, uint32_t extensionIndex, size_t* pDataSize, void* pData); @@ -2021,8 +2023,12 @@ VkResult VKAPI vkGetPhysicalDeviceInfo( size_t* pDataSize, void* pData); -void * VKAPI vkGetProcAddr( - VkPhysicalDevice physicalDevice, +PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( + VkInstance instance, + const char* pName); + +PFN_vkVoidFunction VKAPI vkGetDeviceProcAddr( + VkDevice device, const char* pName); VkResult VKAPI vkCreateDevice( diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 53c29cafb3d..22fbb0e6431 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -274,8 +274,15 @@ VkResult anv_GetPhysicalDeviceInfo( } -void * vkGetProcAddr( - VkPhysicalDevice physicalDevice, +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, const char* pName) { return anv_lookup_entrypoint(pName); diff --git a/src/vulkan/vk_gen.py b/src/vulkan/vk_gen.py index 9b50dbd766e..d481af74ef0 100644 --- a/src/vulkan/vk_gen.py +++ b/src/vulkan/vk_gen.py @@ -67,6 +67,8 @@ i = 0 for line in fileinput.input(): m = p.match(line) if (m): + if m.group(2) == 'VoidFunction': + continue fullname = "vk" + m.group(2) h = hash(fullname) entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) -- cgit v1.2.3 From bb6567f5d157983d2de2a6a863f217eba6506593 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 09:04:16 -0700 Subject: vk/vulkan.h: Switch BindObjectMemory to a device function and remove the index --- include/vulkan/vulkan.h | 7 +++---- src/vulkan/device.c | 7 +++---- src/vulkan/x11.c | 8 ++++---- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index ee8e4114509..c0e087aae06 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1917,7 +1917,7 @@ typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem) typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); -typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemory)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceMemory mem, VkDeviceSize offset); +typedef VkResult (VKAPI *PFN_vkBindObjectMemory)(VkDevice device, VkObjectType objType, VkObject object, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkGetObjectMemoryRequirements)(VkDevice device, VkObjectType objType, VkObject object, VkMemoryRequirements* pMemoryRequirements); typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemoryRange)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkQueueBindImageMemoryRange)(VkQueue queue, VkImage image, uint32_t allocationIdx, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); @@ -2113,11 +2113,10 @@ VkResult VKAPI vkDestroyObject( VkObjectType objType, VkObject object); -VkResult VKAPI vkQueueBindObjectMemory( - VkQueue queue, +VkResult VKAPI vkBindObjectMemory( + VkDevice device, VkObjectType objType, VkObject object, - uint32_t allocationIdx, VkDeviceMemory mem, VkDeviceSize memOffset); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 22fbb0e6431..0963acf9856 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1118,11 +1118,10 @@ VkResult anv_GetObjectMemoryRequirements( return VK_SUCCESS; } -VkResult anv_QueueBindObjectMemory( - VkQueue queue, +VkResult anv_BindObjectMemory( + VkDevice device, VkObjectType objType, VkObject object, - uint32_t allocationIdx, VkDeviceMemory _mem, VkDeviceSize memOffset) { @@ -1144,7 +1143,7 @@ VkResult anv_QueueBindObjectMemory( default: break; } - + return VK_SUCCESS; } diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index a98d5e2607b..ee9cdcdd51c 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -147,10 +147,10 @@ VkResult anv_CreateSwapChainWSI( }, (VkDeviceMemory *) &memory); - anv_QueueBindObjectMemory(VK_NULL_HANDLE, - VK_OBJECT_TYPE_IMAGE, - (VkImage) image, 0, - (VkDeviceMemory) memory, 0); + anv_BindObjectMemory(VK_NULL_HANDLE, + VK_OBJECT_TYPE_IMAGE, + (VkImage) image, + (VkDeviceMemory) memory, 0); ret = anv_gem_set_tiling(device, memory->bo.gem_handle, surface->stride, I915_TILING_X); -- cgit v1.2.3 From 3c65a1ac143277382dd48015570b602965ba5954 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 09:16:48 -0700 Subject: vk/vulkan.h: Remove the MemoryRange stubs and add sparse stubs --- include/vulkan/vulkan.h | 13 +++++-------- src/vulkan/device.c | 9 +++------ 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c0e087aae06..35876502d2e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1919,8 +1919,8 @@ typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, ui typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); typedef VkResult (VKAPI *PFN_vkBindObjectMemory)(VkDevice device, VkObjectType objType, VkObject object, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkGetObjectMemoryRequirements)(VkDevice device, VkObjectType objType, VkObject object, VkMemoryRequirements* pMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkQueueBindObjectMemoryRange)(VkQueue queue, VkObjectType objType, VkObject object, uint32_t allocationIdx, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkQueueBindImageMemoryRange)(VkQueue queue, VkImage image, uint32_t allocationIdx, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); @@ -2126,20 +2126,17 @@ VkResult VKAPI vkGetObjectMemoryRequirements( VkObject object, VkMemoryRequirements* pMemoryRequirements); -VkResult VKAPI vkQueueBindObjectMemoryRange( +VkResult VKAPI vkQueueBindSparseBufferMemory( VkQueue queue, - VkObjectType objType, - VkObject object, - uint32_t allocationIdx, + VkBuffer buffer, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); -VkResult VKAPI vkQueueBindImageMemoryRange( +VkResult VKAPI vkQueueBindSparseImageMemory( VkQueue queue, VkImage image, - uint32_t allocationIdx, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0963acf9856..8e4694b1beb 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1147,11 +1147,9 @@ VkResult anv_BindObjectMemory( return VK_SUCCESS; } -VkResult anv_QueueBindObjectMemoryRange( +VkResult anv_QueueBindSparseBufferMemory( VkQueue queue, - VkObjectType objType, - VkObject object, - uint32_t allocationIdx, + VkBuffer buffer, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, @@ -1160,10 +1158,9 @@ VkResult anv_QueueBindObjectMemoryRange( stub_return(VK_UNSUPPORTED); } -VkResult anv_QueueBindImageMemoryRange( +VkResult anv_QueueBindSparseImageMemory( VkQueue queue, VkImage image, - uint32_t allocationIdx, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset) -- cgit v1.2.3 From cc29a5f4be5316c7e4254e1e0a69a3eb23ca7c99 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 09:34:47 -0700 Subject: vk/vulkan.h: Move format quering to the physical device --- include/vulkan/vulkan.h | 14 +++++----- src/vulkan/formats.c | 70 ++++++++++++++++--------------------------------- 2 files changed, 29 insertions(+), 55 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index ee4811a7caf..d073d5929df 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1890,6 +1890,7 @@ typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCrea typedef VkResult (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); @@ -1925,7 +1926,6 @@ typedef VkResult (VKAPI *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkResetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); -typedef VkResult (VKAPI *PFN_vkGetFormatInfo)(VkDevice device, VkFormat format, VkFormatInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); @@ -2014,6 +2014,11 @@ VkResult VKAPI vkGetPhysicalDeviceInfo( size_t* pDataSize, void* pData); +VkResult VKAPI vkGetPhysicalDeviceFormatInfo( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatInfo); + PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( VkInstance instance, const char* pName); @@ -2197,13 +2202,6 @@ VkResult VKAPI vkGetQueryPoolResults( void* pData, VkQueryResultFlags flags); -VkResult VKAPI vkGetFormatInfo( - VkDevice device, - VkFormat format, - VkFormatInfoType infoType, - size_t* pDataSize, - void* pData); - VkResult VKAPI vkCreateBuffer( VkDevice device, const VkBufferCreateInfo* pCreateInfo, diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 75432c9e915..00300e19c23 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -232,28 +232,32 @@ struct surface_format_info { extern const struct surface_format_info surface_formats[]; -VkResult anv_validate_GetFormatInfo( - VkDevice _device, +VkResult anv_validate_GetPhysicalDeviceFormatInfo( + VkPhysicalDevice physicalDevice, VkFormat _format, - VkFormatInfoType infoType, - size_t* pDataSize, - void* pData) + VkFormatProperties* pFormatInfo) { const struct anv_format *format = anv_format_for_vk_format(_format); fprintf(stderr, "vkGetFormatInfo(%s)\n", format->name); - return anv_GetFormatInfo(_device, _format, infoType, pDataSize, pData); + return anv_GetPhysicalDeviceFormatInfo(physicalDevice, _format, pFormatInfo); } -static void -anv_format_get_properties(struct anv_device *device, - const struct anv_format *format, - VkFormatProperties *properties) +VkResult anv_GetPhysicalDeviceFormatInfo( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatInfo) { + struct anv_physical_device *physical_device = + (struct anv_physical_device *) physicalDevice; const struct surface_format_info *info; int gen; - gen = device->info.gen * 10; - if (device->info.is_haswell) + const struct anv_format *format = anv_format_for_vk_format(_format); + if (format == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) gen += 5; if (format->surface_format == UNSUPPORTED) @@ -280,42 +284,14 @@ anv_format_get_properties(struct anv_device *device, linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; } - properties->linearTilingFeatures = linear; - properties->optimalTilingFeatures = tiled; - return; - - unsupported: - properties->linearTilingFeatures = 0; - properties->optimalTilingFeatures = 0; -} - -VkResult anv_GetFormatInfo( - VkDevice _device, - VkFormat _format, - VkFormatInfoType infoType, - size_t* pDataSize, - void* pData) -{ - struct anv_device *device = (struct anv_device *) _device; - const struct anv_format *format; - VkFormatProperties *properties; - - format = anv_format_for_vk_format(_format); - if (format == 0) - return vk_error(VK_ERROR_INVALID_VALUE); + pFormatInfo->linearTilingFeatures = linear; + pFormatInfo->optimalTilingFeatures = tiled; - switch (infoType) { - case VK_FORMAT_INFO_TYPE_PROPERTIES: - properties = (VkFormatProperties *)pData; + return VK_SUCCESS; - *pDataSize = sizeof(*properties); - if (pData == NULL) - return VK_SUCCESS; - - anv_format_get_properties(device, format, properties); - return VK_SUCCESS; + unsupported: + pFormatInfo->linearTilingFeatures = 0; + pFormatInfo->optimalTilingFeatures = 0; - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + return VK_SUCCESS; } -- cgit v1.2.3 From 8e05bbeee9f75e1902bd1592c27f5965124ee11b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 10:38:07 -0700 Subject: vk/vulkan.h: Update extension handling to rev 130 --- include/vulkan/vulkan.h | 33 ++++++++++---------- src/vulkan/device.c | 80 ++++++++++++++++++++----------------------------- 2 files changed, 48 insertions(+), 65 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index d073d5929df..a463349dc48 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1088,14 +1088,6 @@ typedef enum { VK_ENUM_RANGE(PHYSICAL_DEVICE_INFO_TYPE, PROPERTIES, MEMORY_PROPERTIES) } VkPhysicalDeviceInfoType; -typedef enum { - // Info type for vkGetGlobalExtensionInfo() and vkGetPhysicalDeviceExtensionInfo() - VK_EXTENSION_INFO_TYPE_COUNT = 0x00000000, - VK_EXTENSION_INFO_TYPE_PROPERTIES = 0x00000001, - - VK_ENUM_RANGE(EXTENSION_INFO_TYPE, COUNT, PROPERTIES) -} VkExtensionInfoType; - typedef enum { // Info type for vkGetFormatInfo() VK_FORMAT_INFO_TYPE_PROPERTIES = 0x00000000, @@ -1895,8 +1887,10 @@ typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instanc typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); typedef VkResult (VKAPI *PFN_vkDestroyDevice)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionInfo)(VkExtensionInfoType infoType, uint32_t extensionIndex, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionInfo)(VkPhysicalDevice physicalDevice, VkExtensionInfoType infoType, uint32_t extensionIndex, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionCount)(uint32_t* pCount); +typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionProperties)(uint32_t extensionIndex, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, uint32_t extensionIndex, VkExtensionProperties* pProperties); typedef VkResult (VKAPI *PFN_vkEnumerateLayers)(VkPhysicalDevice physicalDevice, size_t maxStringSize, size_t* pLayerCount, char* const* pOutLayers, void* pReserved); typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueNodeIndex, uint32_t queueIndex, VkQueue* pQueue); typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, VkFence fence); @@ -2035,18 +2029,21 @@ VkResult VKAPI vkCreateDevice( VkResult VKAPI vkDestroyDevice( VkDevice device); -VkResult VKAPI vkGetGlobalExtensionInfo( - VkExtensionInfoType infoType, +VkResult VKAPI vkGetGlobalExtensionCount( + uint32_t* pCount); + +VkResult VKAPI vkGetGlobalExtensionProperties( uint32_t extensionIndex, - size_t* pDataSize, - void* pData); + VkExtensionProperties* pProperties); + +VkResult VKAPI vkGetPhysicalDeviceExtensionCount( + VkPhysicalDevice physicalDevice, + uint32_t* pCount); -VkResult VKAPI vkGetPhysicalDeviceExtensionInfo( +VkResult VKAPI vkGetPhysicalDeviceExtensionProperties( VkPhysicalDevice physicalDevice, - VkExtensionInfoType infoType, uint32_t extensionIndex, - size_t* pDataSize, - void* pData); + VkExtensionProperties* pProperties); VkResult VKAPI vkEnumerateLayers( VkPhysicalDevice physicalDevice, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8e4694b1beb..ebd04e205b5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -472,64 +472,50 @@ VkResult anv_DestroyDevice( return VK_SUCCESS; } -VkResult anv_GetGlobalExtensionInfo( - VkExtensionInfoType infoType, - uint32_t extensionIndex, - size_t* pDataSize, - void* pData) +static const VkExtensionProperties global_extensions[] = { + { + .extName = "VK_WSI_LunarG", + .version = 3 + } +}; + +VkResult anv_GetGlobalExtensionCount( + uint32_t* pCount) { - static const VkExtensionProperties extensions[] = { - { - .extName = "VK_WSI_LunarG", - .version = 3 - } - }; - uint32_t count = ARRAY_SIZE(extensions); + *pCount = ARRAY_SIZE(global_extensions); - switch (infoType) { - case VK_EXTENSION_INFO_TYPE_COUNT: - memcpy(pData, &count, sizeof(count)); - *pDataSize = sizeof(count); - return VK_SUCCESS; + return VK_SUCCESS; +} - case VK_EXTENSION_INFO_TYPE_PROPERTIES: - if (extensionIndex >= count) - return vk_error(VK_ERROR_INVALID_EXTENSION); - memcpy(pData, &extensions[extensionIndex], sizeof(extensions[0])); - *pDataSize = sizeof(extensions[0]); - return VK_SUCCESS; +VkResult anv_GetGlobalExtensionProperties( + uint32_t extensionIndex, + VkExtensionProperties* pProperties) +{ + assert(extensionIndex < ARRAY_SIZE(global_extensions)); - default: - return VK_UNSUPPORTED; - } + *pProperties = global_extensions[extensionIndex]; + + return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceExtensionInfo( +VkResult anv_GetPhysicalDeviceExtensionCount( VkPhysicalDevice physicalDevice, - VkExtensionInfoType infoType, - uint32_t extensionIndex, - size_t* pDataSize, - void* pData) + uint32_t* pCount) { - uint32_t *count; + /* None supported at this time */ + *pCount = 0; - switch (infoType) { - case VK_EXTENSION_INFO_TYPE_COUNT: - *pDataSize = 4; - if (pData == NULL) - return VK_SUCCESS; + return VK_SUCCESS; +} - count = pData; - *count = 0; - return VK_SUCCESS; - - case VK_EXTENSION_INFO_TYPE_PROPERTIES: - return vk_error(VK_ERROR_INVALID_EXTENSION); - - default: - return VK_UNSUPPORTED; - } +VkResult anv_GetPhysicalDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + uint32_t extensionIndex, + VkExtensionProperties* pProperties) +{ + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_EXTENSION); } VkResult anv_EnumerateLayers( -- cgit v1.2.3 From e19d6be2a93104762bf5840456a695f708c56a57 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 10:53:32 -0700 Subject: vk/vulkan.h: Add command buffer levels --- include/vulkan/vulkan.h | 11 +++++++++++ src/vulkan/device.c | 2 ++ 2 files changed, 13 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 8d4c6a15250..b8e2b687bd0 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -818,6 +818,15 @@ typedef enum { VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentStoreOp; +typedef enum { + VK_CMD_BUFFER_LEVEL_PRIMARY = 0, + VK_CMD_BUFFER_LEVEL_SECONDARY = 1, + VK_CMD_BUFFER_LEVEL_BEGIN_RANGE = VK_CMD_BUFFER_LEVEL_PRIMARY, + VK_CMD_BUFFER_LEVEL_END_RANGE = VK_CMD_BUFFER_LEVEL_SECONDARY, + VK_CMD_BUFFER_LEVEL_NUM = (VK_CMD_BUFFER_LEVEL_SECONDARY - VK_CMD_BUFFER_LEVEL_PRIMARY + 1), + VK_CMD_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF +} VkCmdBufferLevel; + typedef enum { VK_PIPELINE_BIND_POINT_COMPUTE = 0, VK_PIPELINE_BIND_POINT_GRAPHICS = 1, @@ -1029,6 +1038,7 @@ typedef enum { VK_CMD_BUFFER_OPTIMIZE_PIPELINE_SWITCH_BIT = 0x00000002, VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT = 0x00000004, VK_CMD_BUFFER_OPTIMIZE_DESCRIPTOR_SET_SWITCH_BIT = 0x00000008, + VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT = 0x00000010, } VkCmdBufferOptimizeFlagBits; typedef VkFlags VkCmdBufferOptimizeFlags; @@ -1680,6 +1690,7 @@ typedef struct { VkStructureType sType; const void* pNext; uint32_t queueNodeIndex; + VkCmdBufferLevel level; VkCmdBufferCreateFlags flags; } VkCmdBufferCreateInfo; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index ebd04e205b5..38d922767bb 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2155,6 +2155,8 @@ VkResult anv_CreateCommandBuffer( struct anv_cmd_buffer *cmd_buffer; VkResult result; + assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (cmd_buffer == NULL) -- cgit v1.2.3 From 52940e8fcfec9ed2c232bd196404d709b5bbac57 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 10:57:13 -0700 Subject: vk/vulkan.h: Add RenderPassBeginContents --- include/vulkan/vulkan.h | 11 +++++++++++ src/vulkan/device.c | 2 ++ 2 files changed, 13 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index b8e2b687bd0..96873b14818 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -890,6 +890,16 @@ typedef enum { VK_TIMESTAMP_TYPE_MAX_ENUM = 0x7FFFFFFF } VkTimestampType; +typedef enum { + VK_RENDER_PASS_CONTENTS_INLINE = 0, + VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS = 1, + VK_RENDER_PASS_CONTENTS_BEGIN_RANGE = VK_RENDER_PASS_CONTENTS_INLINE, + VK_RENDER_PASS_CONTENTS_END_RANGE = VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS, + VK_RENDER_PASS_CONTENTS_NUM = (VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS - VK_RENDER_PASS_CONTENTS_INLINE + 1), + VK_RENDER_PASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF +} VkRenderPassContents; + + typedef enum { VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002, @@ -1746,6 +1756,7 @@ typedef struct { typedef struct { VkRenderPass renderPass; VkFramebuffer framebuffer; + VkRenderPassContents contents; } VkRenderPassBegin; typedef struct { diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 38d922767bb..ce12c0defa8 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3655,6 +3655,8 @@ void anv_CmdBeginRenderPass( struct anv_framebuffer *framebuffer = (struct anv_framebuffer *) pRenderPassBegin->framebuffer; + assert(pRenderPassBegin->contents == VK_RENDER_PASS_CONTENTS_INLINE); + cmd_buffer->framebuffer = framebuffer; cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; -- cgit v1.2.3 From 522ab835d6f14da3d6f08ed0bbd81573fcee1b25 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 11:44:52 -0700 Subject: vk/vulkan.h: Move over to the new border color enums --- include/vulkan/vulkan.h | 15 +++++++++------ src/vulkan/device.c | 38 ++++++++++++-------------------------- src/vulkan/private.h | 3 +-- 3 files changed, 22 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 3ff56aae5ea..14d4bd52706 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -732,12 +732,15 @@ typedef enum { } VkTexAddress; typedef enum { - VK_BORDER_COLOR_OPAQUE_WHITE = 0, - VK_BORDER_COLOR_TRANSPARENT_BLACK = 1, - VK_BORDER_COLOR_OPAQUE_BLACK = 2, - VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_OPAQUE_WHITE, - VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_OPAQUE_BLACK, - VK_BORDER_COLOR_NUM = (VK_BORDER_COLOR_OPAQUE_BLACK - VK_BORDER_COLOR_OPAQUE_WHITE + 1), + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, + VK_BORDER_COLOR_INT_TRANSPARENT_BLACK = 1, + VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK = 2, + VK_BORDER_COLOR_INT_OPAQUE_BLACK = 3, + VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE = 4, + VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, + VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + VK_BORDER_COLOR_NUM = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF } VkBorderColor; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index ce12c0defa8..4040745c25a 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -338,30 +338,19 @@ anv_queue_finish(struct anv_queue *queue) static void anv_device_init_border_colors(struct anv_device *device) { - float float_border_colors[][4] = { - [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1.0, 1.0, 1.0, 1.0 }, - [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0.0, 0.0, 0.0, 0.0 }, - [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0.0, 0.0, 0.0, 1.0 } + static const VkClearColorValue border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } }, }; - uint32_t uint32_border_colors[][4] = { - [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1, 1, 1, 1 }, - [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0, 0, 0, 0 }, - [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0, 0, 0, 1 } - }; - - device->float_border_colors = + device->border_colors = anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(float_border_colors), 32); - memcpy(device->float_border_colors.map, - float_border_colors, sizeof(float_border_colors)); - - device->uint32_border_colors = - anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(uint32_border_colors), 32); - memcpy(device->uint32_border_colors.map, - uint32_border_colors, sizeof(uint32_border_colors)); - + sizeof(border_colors), 32); + memcpy(device->border_colors.map, border_colors, sizeof(border_colors)); } static const uint32_t BATCH_SIZE = 8192; @@ -451,10 +440,7 @@ VkResult anv_DestroyDevice( /* We only need to free these to prevent valgrind errors. The backing * BO will go away in a couple of lines so we don't actually leak. */ - anv_state_pool_free(&device->dynamic_state_pool, - device->float_border_colors); - anv_state_pool_free(&device->dynamic_state_pool, - device->uint32_border_colors); + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); #endif anv_bo_pool_finish(&device->batch_bo_pool); @@ -1537,7 +1523,7 @@ VkResult anv_CreateSampler( .CubeSurfaceControlMode = 0, .IndirectStatePointer = - device->float_border_colors.offset + + device->border_colors.offset + pCreateInfo->borderColor * sizeof(float) * 4, .LODClampMagnificationMode = MIPNONE, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 77de59340cf..4eb3a953173 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -412,8 +412,7 @@ struct anv_device { struct anv_meta_state meta_state; - struct anv_state float_border_colors; - struct anv_state uint32_border_colors; + struct anv_state border_colors; struct anv_queue queue; -- cgit v1.2.3 From b4ef7f354b11f6e10e6cd27862c67ac908f1145e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 8 Jul 2015 11:49:43 -0700 Subject: vk/0.130: Remove msaa members of VkDepthStencilViewCreateInfo --- include/vulkan/vulkan.h | 2 -- src/vulkan/image.c | 1 - 2 files changed, 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 14d4bd52706..99489fe58cc 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1382,8 +1382,6 @@ typedef struct { uint32_t mipLevel; uint32_t baseArraySlice; uint32_t arraySize; - VkImage msaaResolveImage; - VkImageSubresourceRange msaaResolveSubResource; VkDepthStencilViewCreateFlags flags; } VkDepthStencilViewCreateInfo; diff --git a/src/vulkan/image.c b/src/vulkan/image.c index be80a5d53dc..33ce8151fe9 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -689,7 +689,6 @@ anv_CreateDepthStencilView(VkDevice _device, anv_assert(pCreateInfo->mipLevel == 0); anv_assert(pCreateInfo->baseArraySlice == 0); anv_assert(pCreateInfo->arraySize == 1); - anv_assert(pCreateInfo->msaaResolveImage == 0); view->bo = image->bo; -- cgit v1.2.3 From aae45ab583aec2c62503fc692d34f987b395ec81 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 11:51:12 -0700 Subject: vk/vulkan.h: Add packing parameters to BufferImageCopy --- include/vulkan/vulkan.h | 2 ++ src/vulkan/meta.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 99489fe58cc..2c45b201e15 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1741,6 +1741,8 @@ typedef struct { typedef struct { VkDeviceSize bufferOffset; + uint32_t bufferRowLength; + uint32_t bufferImageHeight; VkImageSubresource imageSubresource; VkOffset3D imageOffset; VkExtent3D imageExtent; diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 37c8421a46e..b759510e992 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -995,6 +995,11 @@ void anv_CmdCopyBufferToImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (pRegions[r].bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + struct anv_image *src_image; anv_CreateImage(vk_device, &(VkImageCreateInfo) { @@ -1087,6 +1092,11 @@ void anv_CmdCopyImageToBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (pRegions[r].bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + struct anv_surface_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { -- cgit v1.2.3 From e9034ed875b7b370319241c623a811da0292811a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 8 Jul 2015 12:46:35 -0700 Subject: vk/0.130: Update vkCmdBlitImage signature Add VkTexFilter param. Ignored for now. --- include/vulkan/vulkan.h | 5 +++-- src/vulkan/meta.c | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2c45b201e15..eee665d1dac 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1983,7 +1983,7 @@ typedef void (VKAPI *PFN_vkCmdDispatch)(VkCmdBuffer cmdBuffer, uint32_t x, uint3 typedef void (VKAPI *PFN_vkCmdDispatchIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset); typedef void (VKAPI *PFN_vkCmdCopyBuffer)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); typedef void (VKAPI *PFN_vkCmdCopyImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions); +typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); @@ -2480,7 +2480,8 @@ void VKAPI vkCmdBlitImage( VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, - const VkImageBlit* pRegions); + const VkImageBlit* pRegions, + VkTexFilter filter); void VKAPI vkCmdCopyBufferToImage( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index b759510e992..d28468ff173 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -920,13 +920,17 @@ void anv_CmdBlitImage( VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, - const VkImageBlit* pRegions) + const VkImageBlit* pRegions, + VkTexFilter filter) + { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; struct anv_image *src_image = (struct anv_image *)srcImage; struct anv_image *dest_image = (struct anv_image *)destImage; struct anv_saved_state saved_state; + anv_finishme("respect VkTexFilter"); + meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { -- cgit v1.2.3 From c8577b5f5243ee696a9f534e2cab0646474456a2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 14:24:12 -0700 Subject: vk: Add a macro for creating anv variables from vulkan handles This is very helpful for doing the mass bunch of casts at the top of a function. It will also be invaluable when we get type saftey in the API. --- src/vulkan/device.c | 21 ++++++++++----------- src/vulkan/private.h | 3 +++ 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 4040745c25a..f303878bc1e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1260,15 +1260,15 @@ VkResult anv_WaitForFences( bool32_t waitAll, uint64_t timeout) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_fence **fences = (struct anv_fence **) pFences; + ANV_FROM_HANDLE(anv_device, device, _device); int64_t t = timeout; int ret; /* FIXME: handle !waitAll */ for (uint32_t i = 0; i < fenceCount; i++) { - ret = anv_gem_wait(device, fences[i]->bo.gem_handle, &t); + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); if (ret == -1 && errno == ETIME) return VK_TIMEOUT; else if (ret == -1) @@ -1340,7 +1340,7 @@ VkResult anv_CreateBuffer( const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_buffer *buffer; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); @@ -1418,8 +1418,8 @@ VkResult anv_CreateBufferView( const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); struct anv_surface_view *view; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); @@ -1453,7 +1453,7 @@ VkResult anv_CreateSampler( const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_sampler *sampler; uint32_t mag_filter, min_filter, max_anisotropy; @@ -1555,7 +1555,7 @@ VkResult anv_CreateDescriptorSetLayout( const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_descriptor_set_layout *set_layout; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); @@ -1729,13 +1729,12 @@ VkResult anv_AllocDescriptorSets( VkDescriptorSet* pDescriptorSets, uint32_t* pCount) { - struct anv_device *device = (struct anv_device *) _device; - const struct anv_descriptor_set_layout *layout; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_descriptor_set *set; size_t size; for (uint32_t i = 0; i < count; i++) { - layout = (struct anv_descriptor_set_layout *) pSetLayouts[i]; + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 4eb3a953173..6f1f350a5b6 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -940,6 +940,9 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = (struct __anv_type *) __handle + #ifdef __cplusplus } #endif -- cgit v1.2.3 From d29ec8fa36dfa6565c7950dc53815a73217ae72f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 14:24:56 -0700 Subject: vk/vulkan.h: Update to the new UpdateDescriptorSets api --- include/vulkan/vulkan.h | 114 ++++++++++++++++-------------------------------- src/vulkan/device.c | 98 +++++++++++++++++++---------------------- src/vulkan/meta.c | 26 ++++++----- 3 files changed, 98 insertions(+), 140 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index fd57e1ad22a..0faad2b51eb 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -211,13 +211,10 @@ typedef enum { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 43, VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 44, VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 45, - VK_STRUCTURE_TYPE_UPDATE_SAMPLERS = 46, - VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES = 47, - VK_STRUCTURE_TYPE_UPDATE_IMAGES = 48, - VK_STRUCTURE_TYPE_UPDATE_BUFFERS = 49, - VK_STRUCTURE_TYPE_UPDATE_AS_COPY = 50, VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 51, VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 52, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, VK_ENUM_RANGE(STRUCTURE_TYPE, APPLICATION_INFO, PIPELINE_LAYOUT_CREATE_INFO) } VkStructureType; @@ -1584,6 +1581,36 @@ typedef struct { const VkDescriptorTypeCount* pTypeCount; } VkDescriptorPoolCreateInfo; +typedef struct { + VkBufferView bufferView; + VkSampler sampler; + VkImageView imageView; + VkImageLayout imageLayout; +} VkDescriptorInfo; + +typedef struct { + VkStructureType sType; + const void* pNext; + VkDescriptorSet destSet; + uint32_t destBinding; + uint32_t destArrayElement; + uint32_t count; + VkDescriptorType descriptorType; + const VkDescriptorInfo* pDescriptors; +} VkWriteDescriptorSet; + +typedef struct { + VkStructureType sType; + const void* pNext; + VkDescriptorSet srcSet; + uint32_t srcBinding; + uint32_t srcArrayElement; + VkDescriptorSet destSet; + uint32_t destBinding; + uint32_t destArrayElement; + uint32_t count; +} VkCopyDescriptorSet; + typedef struct { float originX; float originY; @@ -1814,72 +1841,6 @@ typedef struct { VkMemoryInputFlags inputMask; } VkMemoryBarrier; -typedef struct { - VkStructureType sType; - const void* pNext; - VkBufferView view; -} VkBufferViewAttachInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkImageView view; - VkImageLayout layout; -} VkImageViewAttachInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t binding; - uint32_t arrayIndex; - uint32_t count; - const VkSampler* pSamplers; -} VkUpdateSamplers; - -typedef struct { - VkSampler sampler; - const VkImageViewAttachInfo* pImageView; -} VkSamplerImageViewInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t binding; - uint32_t arrayIndex; - uint32_t count; - const VkSamplerImageViewInfo* pSamplerImageViews; -} VkUpdateSamplerTextures; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDescriptorType descriptorType; - uint32_t binding; - uint32_t arrayIndex; - uint32_t count; - const VkImageViewAttachInfo* pImageViews; -} VkUpdateImages; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDescriptorType descriptorType; - uint32_t binding; - uint32_t arrayIndex; - uint32_t count; - const VkBufferViewAttachInfo* pBufferViews; -} VkUpdateBuffers; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDescriptorType descriptorType; - VkDescriptorSet descriptorSet; - uint32_t binding; - uint32_t arrayElement; - uint32_t count; -} VkUpdateAsCopy; - typedef struct { VkStructureType sType; const void* pNext; @@ -1949,7 +1910,7 @@ typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); -typedef void (VKAPI *PFN_vkUpdateDescriptors)(VkDevice device, VkDescriptorSet descriptorSet, uint32_t updateCount, const void** ppUpdateArray); +typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicVpStateCreateInfo* pCreateInfo, VkDynamicVpState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRsStateCreateInfo* pCreateInfo, VkDynamicRsState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicCbStateCreateInfo* pCreateInfo, VkDynamicCbState* pState); @@ -2319,11 +2280,12 @@ VkResult VKAPI vkAllocDescriptorSets( VkDescriptorSet* pDescriptorSets, uint32_t* pCount); -void VKAPI vkUpdateDescriptors( +VkResult VKAPI vkUpdateDescriptorSets( VkDevice device, - VkDescriptorSet descriptorSet, - uint32_t updateCount, - const void** ppUpdateArray); + uint32_t writeCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t copyCount, + const VkCopyDescriptorSet* pDescriptorCopies); VkResult VKAPI vkCreateDynamicViewportState( VkDevice device, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index f303878bc1e..379095c24da 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1756,74 +1756,68 @@ VkResult anv_AllocDescriptorSets( return VK_SUCCESS; } -void anv_UpdateDescriptors( - VkDevice _device, - VkDescriptorSet descriptorSet, - uint32_t updateCount, - const void** ppUpdateArray) +VkResult anv_UpdateDescriptorSets( + VkDevice device, + uint32_t writeCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t copyCount, + const VkCopyDescriptorSet* pDescriptorCopies) { - struct anv_descriptor_set *set = (struct anv_descriptor_set *) descriptorSet; - VkUpdateSamplers *update_samplers; - VkUpdateSamplerTextures *update_sampler_textures; - VkUpdateImages *update_images; - VkUpdateBuffers *update_buffers; - VkUpdateAsCopy *update_as_copy; - - for (uint32_t i = 0; i < updateCount; i++) { - const struct anv_common *common = ppUpdateArray[i]; - - switch (common->sType) { - case VK_STRUCTURE_TYPE_UPDATE_SAMPLERS: - update_samplers = (VkUpdateSamplers *) common; + for (uint32_t i = 0; i < writeCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); - for (uint32_t j = 0; j < update_samplers->count; j++) { - set->descriptors[update_samplers->binding + j].sampler = - (struct anv_sampler *) update_samplers->pSamplers[j]; + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->count; j++) { + set->descriptors[write->destBinding + j].sampler = + (struct anv_sampler *) write->pDescriptors[j].sampler; } - break; - case VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES: - /* FIXME: Shouldn't this be *_UPDATE_SAMPLER_IMAGES? */ - update_sampler_textures = (VkUpdateSamplerTextures *) common; - - for (uint32_t j = 0; j < update_sampler_textures->count; j++) { - set->descriptors[update_sampler_textures->binding + j].view = - (struct anv_surface_view *) - update_sampler_textures->pSamplerImageViews[j].pImageView->view; - set->descriptors[update_sampler_textures->binding + j].sampler = - (struct anv_sampler *) - update_sampler_textures->pSamplerImageViews[j].sampler; - } - break; + if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) + break; - case VK_STRUCTURE_TYPE_UPDATE_IMAGES: - update_images = (VkUpdateImages *) common; + /* fallthrough */ - for (uint32_t j = 0; j < update_images->count; j++) { - set->descriptors[update_images->binding + j].view = - (struct anv_surface_view *) update_images->pImageViews[j].view; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->count; j++) { + set->descriptors[write->destBinding + j].view = + (struct anv_surface_view *) write->pDescriptors[j].imageView; } break; - case VK_STRUCTURE_TYPE_UPDATE_BUFFERS: - update_buffers = (VkUpdateBuffers *) common; - - for (uint32_t j = 0; j < update_buffers->count; j++) { - set->descriptors[update_buffers->binding + j].view = - (struct anv_surface_view *) update_buffers->pBufferViews[j].view; - } - /* FIXME: descriptor arrays? */ + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_finishme("texel buffers not implemented"); break; - case VK_STRUCTURE_TYPE_UPDATE_AS_COPY: - update_as_copy = (VkUpdateAsCopy *) common; - (void) update_as_copy; - break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->count; j++) { + set->descriptors[write->destBinding + j].view = + (struct anv_surface_view *) write->pDescriptors[j].bufferView; + } default: break; } } + + for (uint32_t i = 0; i < copyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); + ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); + for (uint32_t j = 0; j < copy->count; j++) { + dest->descriptors[copy->destBinding + j] = + src->descriptors[copy->srcBinding + j]; + } + } + + return VK_SUCCESS; } // State object functions diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index d28468ff173..60f263d3d8f 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -578,22 +578,24 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_AllocDescriptorSets((VkDevice) device, 0 /* pool */, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, 1, &device->meta_state.blit.ds_layout, &set, &count); - anv_UpdateDescriptors((VkDevice) device, set, 1, - (const void * []) { - &(VkUpdateImages) { - .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .binding = 0, + anv_UpdateDescriptorSets((VkDevice) device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .destSet = set, + .destBinding = 0, + .destArrayElement = 0, .count = 1, - .pImageViews = (VkImageViewAttachInfo[]) { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pDescriptors = (VkDescriptorInfo[]) { { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, - .view = (VkImageView) src, - .layout = VK_IMAGE_LAYOUT_GENERAL, - } + .imageView = (VkImageView) src, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, } } - }); + }, 0, NULL); struct anv_framebuffer *fb; anv_CreateFramebuffer((VkDevice) device, -- cgit v1.2.3 From 4fcb32a17df153c6bf696c8812d33c932f889b65 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 8 Jul 2015 14:39:05 -0700 Subject: vk/0.130: Remove VkImageViewCreateInfo::minLod It's now set solely through VkSampler. --- include/vulkan/vulkan.h | 1 - src/vulkan/image.c | 16 +++++++++++++--- src/vulkan/meta.c | 5 ----- 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9dfd6571f0b..fd226c5570a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1348,7 +1348,6 @@ typedef struct { VkFormat format; VkChannelMapping channels; VkImageSubresourceRange subresourceRange; - float minLod; } VkImageViewCreateInfo; typedef struct { diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 33ce8151fe9..70d7c5321d5 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -402,7 +402,13 @@ anv_image_view_init(struct anv_surface_view *view, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = (float) pCreateInfo->minLod, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + .SurfaceQPitch = surface->qpitch >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, @@ -493,8 +499,6 @@ anv_validate_CreateImageView(VkDevice _device, assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); assert(subresource->baseArraySlice < image->array_size); assert(subresource->baseArraySlice + subresource->arraySize <= image->array_size); - assert(pCreateInfo->minLod >= 0); - assert(pCreateInfo->minLod < image->levels); assert(pView); if (view_info->is_cube) { @@ -605,7 +609,13 @@ anv_color_attachment_view_init(struct anv_surface_view *view, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ .BaseMipLevel = 0.0, + .SurfaceQPitch = surface->qpitch >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 60f263d3d8f..c89373f3316 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -746,7 +746,6 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .baseArraySlice = 0, .arraySize = 1 }, - .minLod = 0 }, cmd_buffer); @@ -887,7 +886,6 @@ void anv_CmdCopyImage( .baseArraySlice = pRegions[r].srcSubresource.arraySlice, .arraySize = 1 }, - .minLod = 0 }, cmd_buffer); @@ -956,7 +954,6 @@ void anv_CmdBlitImage( .baseArraySlice = pRegions[r].srcSubresource.arraySlice, .arraySize = 1 }, - .minLod = 0 }, cmd_buffer); @@ -1051,7 +1048,6 @@ void anv_CmdCopyBufferToImage( .baseArraySlice = 0, .arraySize = 1 }, - .minLod = 0 }, cmd_buffer); @@ -1123,7 +1119,6 @@ void anv_CmdCopyImageToBuffer( .baseArraySlice = pRegions[r].imageSubresource.arraySlice, .arraySize = 1 }, - .minLod = 0 }, cmd_buffer); -- cgit v1.2.3 From 5a4ebf6bc12b01fdecdb04c1e4a1df573d75ce8a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jul 2015 17:29:49 -0700 Subject: vk: Move to the new pipeline creation API's --- include/vulkan/vulkan.h | 130 ++++++++++++++--------- src/vulkan/compiler.cpp | 12 +-- src/vulkan/device.c | 4 +- src/vulkan/glsl_scraper.py | 24 +++-- src/vulkan/meta.c | 213 +++++++++++++++++++------------------ src/vulkan/pipeline.c | 254 +++++++++++++++++++++++++++------------------ src/vulkan/private.h | 9 +- 7 files changed, 368 insertions(+), 278 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index fd226c5570a..98146b1276a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -215,6 +215,8 @@ typedef enum { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 52, VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, VK_ENUM_RANGE(STRUCTURE_TYPE, APPLICATION_INFO, PIPELINE_LAYOUT_CREATE_INFO) } VkStructureType; @@ -437,7 +439,7 @@ typedef enum { VK_OBJECT_TYPE_IMAGE_VIEW = 9, VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW = 10, VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW = 11, - + VK_OBJECT_TYPE_SHADER_MODULE = 12, VK_OBJECT_TYPE_SHADER = 13, VK_OBJECT_TYPE_PIPELINE = 14, VK_OBJECT_TYPE_PIPELINE_LAYOUT = 15, @@ -455,9 +457,10 @@ typedef enum { VK_OBJECT_TYPE_QUERY_POOL = 27, VK_OBJECT_TYPE_FRAMEBUFFER = 28, VK_OBJECT_TYPE_RENDER_PASS = 29, + VK_OBJECT_TYPE_PIPELINE_CACHE = 30, VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_INSTANCE, - VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_RENDER_PASS, - VK_NUM_OBJECT_TYPE = (VK_OBJECT_TYPE_END_RANGE - VK_OBJECT_TYPE_BEGIN_RANGE + 1), + VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_PIPELINE_CACHE, + VK_OBJECT_TYPE_NUM = (VK_OBJECT_TYPE_PIPELINE_CACHE - VK_OBJECT_TYPE_INSTANCE + 1), VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF } VkObjectType; @@ -1006,6 +1009,7 @@ typedef enum { VK_DEPTH_STENCIL_VIEW_CREATE_READ_ONLY_STENCIL_BIT = 0x00000002, } VkDepthStencilViewCreateFlagBits; typedef VkFlags VkDepthStencilViewCreateFlags; +typedef VkFlags VkShaderModuleCreateFlags; typedef VkFlags VkShaderCreateFlags; typedef enum { @@ -1019,6 +1023,7 @@ typedef VkFlags VkChannelFlags; typedef enum { VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, + VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, } VkPipelineCreateFlagBits; typedef VkFlags VkPipelineCreateFlags; @@ -1377,20 +1382,30 @@ typedef struct { const void* pNext; size_t codeSize; const void* pCode; + VkShaderModuleCreateFlags flags; +} VkShaderModuleCreateInfo; + +typedef struct { + VkStructureType sType; + const void* pNext; + VkShaderModule module; + const char* pName; VkShaderCreateFlags flags; } VkShaderCreateInfo; +typedef struct { + VkStructureType sType; + const void* pNext; + size_t initialSize; + const void* initialData; + size_t maxSize; +} VkPipelineCacheCreateInfo; + typedef struct { uint32_t constantId; uint32_t offset; } VkSpecializationMapEntry; -typedef struct { - uint32_t bufferId; - size_t bufferSize; - const void* pBufferData; -} VkLinkConstBuffer; - typedef struct { uint32_t mapEntryCount; const VkSpecializationMapEntry* pMap; @@ -1398,17 +1413,11 @@ typedef struct { } VkSpecializationInfo; typedef struct { + VkStructureType sType; + const void* pNext; VkShaderStage stage; VkShader shader; - uint32_t linkConstBufferCount; - const VkLinkConstBuffer* pLinkConstBufferInfo; const VkSpecializationInfo* pSpecializationInfo; -} VkPipelineShader; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkPipelineShader shader; } VkPipelineShaderStageCreateInfo; typedef struct { @@ -1517,23 +1526,45 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + const VkPipelineVertexInputStateCreateInfo* pVertexInputState; + const VkPipelineIaStateCreateInfo* pIaState; + const VkPipelineTessStateCreateInfo* pTessState; + const VkPipelineVpStateCreateInfo* pVpState; + const VkPipelineRsStateCreateInfo* pRsState; + const VkPipelineMsStateCreateInfo* pMsState; + const VkPipelineDsStateCreateInfo* pDsState; + const VkPipelineCbStateCreateInfo* pCbState; VkPipelineCreateFlags flags; VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; } VkGraphicsPipelineCreateInfo; typedef struct { VkStructureType sType; const void* pNext; - VkPipelineShader cs; + VkPipelineShaderStageCreateInfo cs; VkPipelineCreateFlags flags; VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; } VkComputePipelineCreateInfo; +typedef struct { + VkShaderStageFlags stageFlags; + uint32_t start; + uint32_t length; +} VkPushConstantRange; + typedef struct { VkStructureType sType; const void* pNext; uint32_t descriptorSetCount; const VkDescriptorSetLayout* pSetLayouts; + uint32_t pushConstantRangeCount; + const VkPushConstantRange* pPushConstantRanges; } VkPipelineLayoutCreateInfo; typedef struct { @@ -1892,13 +1923,14 @@ typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkIma typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); +typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipeline)(VkDevice device, const VkGraphicsPipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline); -typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelineDerivative)(VkDevice device, const VkGraphicsPipelineCreateInfo* pCreateInfo, VkPipeline basePipeline, VkPipeline* pPipeline); -typedef VkResult (VKAPI *PFN_vkCreateComputePipeline)(VkDevice device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline); -typedef VkResult (VKAPI *PFN_vkStorePipeline)(VkDevice device, VkPipeline pipeline, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI *PFN_vkLoadPipeline)(VkDevice device, size_t dataSize, const void* pData, VkPipeline* pPipeline); -typedef VkResult (VKAPI *PFN_vkLoadPipelineDerivative)(VkDevice device, size_t dataSize, const void* pData, VkPipeline basePipeline, VkPipeline* pPipeline); +typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); +typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); +typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); +typedef VkResult (VKAPI *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); +typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); +typedef VkResult (VKAPI *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); @@ -2200,45 +2232,49 @@ VkResult VKAPI vkCreateDepthStencilView( const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); +VkResult VKAPI vkCreateShaderModule( + VkDevice device, + const VkShaderModuleCreateInfo* pCreateInfo, + VkShaderModule* pShaderModule); + VkResult VKAPI vkCreateShader( VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -VkResult VKAPI vkCreateGraphicsPipeline( +VkResult VKAPI vkCreatePipelineCache( VkDevice device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - VkPipeline* pPipeline); + const VkPipelineCacheCreateInfo* pCreateInfo, + VkPipelineCache* pPipelineCache); -VkResult VKAPI vkCreateGraphicsPipelineDerivative( +size_t VKAPI vkGetPipelineCacheSize( VkDevice device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - VkPipeline basePipeline, - VkPipeline* pPipeline); + VkPipelineCache pipelineCache); -VkResult VKAPI vkCreateComputePipeline( +VkResult VKAPI vkGetPipelineCacheData( VkDevice device, - const VkComputePipelineCreateInfo* pCreateInfo, - VkPipeline* pPipeline); + VkPipelineCache pipelineCache, + void* pData); -VkResult VKAPI vkStorePipeline( +VkResult VKAPI vkMergePipelineCaches( VkDevice device, - VkPipeline pipeline, - size_t* pDataSize, - void* pData); + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches); -VkResult VKAPI vkLoadPipeline( +VkResult VKAPI vkCreateGraphicsPipelines( VkDevice device, - size_t dataSize, - const void* pData, - VkPipeline* pPipeline); + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines); -VkResult VKAPI vkLoadPipelineDerivative( +VkResult VKAPI vkCreateComputePipelines( VkDevice device, - size_t dataSize, - const void* pData, - VkPipeline basePipeline, - VkPipeline* pPipeline); + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines); VkResult VKAPI vkCreatePipelineLayout( VkDevice device, diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 558a31001e2..fb9615306e6 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -942,7 +942,7 @@ anv_compile_shader_glsl(struct anv_compiler *compiler, shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); - shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->data)); + shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data)); _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); fail_on_compile_error(shader->CompileStatus, shader->InfoLog); @@ -968,13 +968,13 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, mesa_shader->Type = stage_info[stage].token; mesa_shader->Stage = stage_info[stage].stage; - assert(shader->size % 4 == 0); + assert(shader->module->size % 4 == 0); struct gl_shader_compiler_options *glsl_options = &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; mesa_shader->Program->nir = - spirv_to_nir((uint32_t *)shader->data, shader->size / 4, + spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4, glsl_options->NirOptions); nir_validate_shader(mesa_shader->Program->nir); @@ -1041,14 +1041,14 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) continue; /* You need at least this much for "void main() { }" anyway */ - assert(pipeline->shaders[i]->size >= 12); + assert(pipeline->shaders[i]->module->size >= 12); - if (src_as_glsl(pipeline->shaders[i]->data)) { + if (src_as_glsl(pipeline->shaders[i]->module->data)) { all_spirv = false; break; } - assert(pipeline->shaders[i]->size % 4 == 0); + assert(pipeline->shaders[i]->module->size % 4 == 0); } if (all_spirv) { diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 379095c24da..61e29a78015 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1015,6 +1015,7 @@ VkResult anv_DestroyObject( return anv_FreeMemory(_device, (VkDeviceMemory) _object); case VK_OBJECT_TYPE_DESCRIPTOR_POOL: + case VK_OBJECT_TYPE_PIPELINE_CACHE: /* These are just dummys anyway, so we don't need to destroy them */ return VK_SUCCESS; @@ -1022,6 +1023,7 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_IMAGE: case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: case VK_OBJECT_TYPE_SHADER: + case VK_OBJECT_TYPE_SHADER_MODULE: case VK_OBJECT_TYPE_PIPELINE_LAYOUT: case VK_OBJECT_TYPE_SAMPLER: case VK_OBJECT_TYPE_DESCRIPTOR_SET: @@ -2877,7 +2879,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) uint32_t s, dirty = cmd_buffer->descriptors_dirty & cmd_buffer->pipeline->active_stages; - VkResult result; + VkResult result = VK_SUCCESS; for_each_bit(s, dirty) { result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index f72aaf8299c..918c156027a 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -101,7 +101,7 @@ class Shader: line_start += 6 f.write('\n};\n') -token_exp = re.compile(r'(GLSL_VK_SHADER|\(|\)|,)') +token_exp = re.compile(r'(GLSL_VK_SHADER_MODULE|\(|\)|,)') class Parser: def __init__(self, f): @@ -172,7 +172,7 @@ class Parser: def run(self): for t in self.token_iter: - if t == 'GLSL_VK_SHADER': + if t == 'GLSL_VK_SHADER_MODULE': self.handle_macro() def open_file(name, mode): @@ -188,9 +188,10 @@ def open_file(name, mode): def parse_args(): description = dedent("""\ - This program scrapes a C file for any instance of the GLSL_VK_SHADER - macro, grabs the GLSL source code, compiles it to SPIR-V. The resulting - SPIR-V code is written to another C file as an array of 32-bit words. + This program scrapes a C file for any instance of the + GLSL_VK_SHADER_MODULE macro, grabs the GLSL source code, compiles it + to SPIR-V. The resulting SPIR-V code is written to another C file as + an array of 32-bit words. If '-' is passed as the input file or output file, stdin or stdout will be used instead of a file on disc.""") @@ -256,15 +257,16 @@ with open_file(outfname, 'w') as outfile: #define _ANV_GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src #define _ANV_GLSL_SRC_VAR(_line) _ANV_GLSL_SRC_VAR2(_line) - #define GLSL_VK_SHADER(device, stage, ...) ({ \\ - VkShader __shader; \\ - VkShaderCreateInfo __shader_create_info = { \\ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, \\ + #define GLSL_VK_SHADER_MODULE(device, stage, ...) ({ \\ + VkShaderModule __module; \\ + VkShaderModuleCreateInfo __shader_create_info = { \\ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, \\ .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ }; \\ - vkCreateShader((VkDevice) device, &__shader_create_info, &__shader); \\ - __shader; \\ + vkCreateShaderModule((VkDevice) device, \\ + &__shader_create_info, &__module); \\ + __module; \\ }) """)) diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c89373f3316..7f17adee733 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -33,16 +33,10 @@ static void anv_device_init_meta_clear_state(struct anv_device *device) { - VkPipelineIaStateCreateInfo ia_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }; - /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. */ - VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, + VkShader fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, out vec4 f_color; flat in vec4 v_color; void main() @@ -51,17 +45,13 @@ anv_device_init_meta_clear_state(struct anv_device *device) } ); - VkPipelineShaderStageCreateInfo fs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &ia_create_info, - .shader = { - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; + VkShader fs; + anv_CreateShader((VkDevice) device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm, + .pName = "main", + }, &fs); /* We use instanced rendering to clear multiple render targets. We have two * vertex buffers: the first vertex buffer holds per-vertex data and @@ -71,7 +61,6 @@ anv_device_init_meta_clear_state(struct anv_device *device) */ VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = &fs_create_info, .bindingCount = 2, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { @@ -111,32 +100,39 @@ anv_device_init_meta_clear_state(struct anv_device *device) } }; - VkPipelineRsStateCreateInfo rs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, - .pNext = &vi_create_info, - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }; - - VkPipelineCbStateCreateInfo cb_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, - .pNext = &rs_create_info, - .attachmentCount = 1, - .pAttachments = (VkPipelineCbAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }; - anv_pipeline_create((VkDevice) device, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &cb_create_info, + .stageCount = 1, + .pStages = &(VkPipelineShaderStageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL, + }, + .pVertexInputState = &vi_create_info, + .pIaState = &(VkPipelineIaStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pRsState = &(VkPipelineRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pCbState = &(VkPipelineCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineCbAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, .flags = 0, - .layout = 0 }, &(struct anv_pipeline_create_info) { .use_repclear = true, @@ -302,18 +298,12 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, static void anv_device_init_meta_blit_state(struct anv_device *device) { - VkPipelineIaStateCreateInfo ia_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }; - /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need * to provide GLSL source for the vertex shader so that the compiler * does not dead-code our inputs. */ - VkShader vs = GLSL_VK_SHADER(device, VERTEX, + VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, in vec2 a_pos; in vec2 a_tex_coord; out vec4 v_tex_coord; @@ -324,7 +314,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) } ); - VkShader fs = GLSL_VK_SHADER(device, FRAGMENT, + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, out vec4 f_color; in vec4 v_tex_coord; layout(set = 0, binding = 0) uniform sampler2D u_tex; @@ -334,33 +324,24 @@ anv_device_init_meta_blit_state(struct anv_device *device) } ); - VkPipelineShaderStageCreateInfo vs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &ia_create_info, - .shader = { - .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; - - VkPipelineShaderStageCreateInfo fs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &vs_create_info, - .shader = { - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .linkConstBufferCount = 0, - .pLinkConstBufferInfo = NULL, - .pSpecializationInfo = NULL - } - }; + VkShader vs; + anv_CreateShader((VkDevice) device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = vsm, + .pName = "main", + }, &vs); + + VkShader fs; + anv_CreateShader((VkDevice) device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm, + .pName = "main", + }, &fs); VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = &fs_create_info, .bindingCount = 2, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { @@ -423,42 +404,56 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline_layout); - VkPipelineRsStateCreateInfo rs_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, - .pNext = &vi_create_info, - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }; - - VkPipelineCbStateCreateInfo cb_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, - .pNext = &rs_create_info, - .attachmentCount = 1, - .pAttachments = (VkPipelineCbAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }; - - VkGraphicsPipelineCreateInfo pipeline_info = { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &cb_create_info, - .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - }; - - anv_pipeline_create((VkDevice) device, &pipeline_info, - &(struct anv_pipeline_create_info) { - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }, - &device->meta_state.blit.pipeline); + anv_pipeline_create((VkDevice) device, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL + }, + }, + .pVertexInputState = &vi_create_info, + .pIaState = &(VkPipelineIaStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pRsState = &(VkPipelineRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pCbState = &(VkPipelineCbStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineCbAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + }, + &(struct anv_pipeline_create_info) { + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.blit.pipeline); anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, vs); anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 7df34d16a0a..9307a452bc9 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -31,33 +31,100 @@ // Shader functions +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + VkShader* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = (VkShaderModule) module; + + return VK_SUCCESS; +} + VkResult anv_CreateShader( VkDevice _device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module); struct anv_shader *shader; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); + assert(pCreateInfo->flags == 0); - shader = anv_device_alloc(device, sizeof(*shader) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + size_t name_len = strlen(pCreateInfo->pName); + + if (strcmp(pCreateInfo->pName, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (shader == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - shader->size = pCreateInfo->codeSize; - memcpy(shader->data, pCreateInfo->pCode, shader->size); + shader->module = module; + memcpy(shader->entrypoint, pCreateInfo->pName, name_len + 1); *pShader = (VkShader) shader; return VK_SUCCESS; } +VkResult anv_CreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo* pCreateInfo, + VkPipelineCache* pPipelineCache) +{ + *pPipelineCache = 1; + + stub_return(VK_SUCCESS); +} + +size_t anv_GetPipelineCacheSize( + VkDevice device, + VkPipelineCache pipelineCache) +{ + stub_return(0); +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + void* pData) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_UNSUPPORTED); +} + // Pipeline functions static void -emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputStateCreateInfo *info) +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) { const uint32_t num_dwords = 1 + info->attributeCount * 2; uint32_t *p; @@ -125,7 +192,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputStateCreat static void emit_ia_state(struct anv_pipeline *pipeline, - VkPipelineIaStateCreateInfo *info, + const VkPipelineIaStateCreateInfo *info, const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_primitive_type[] = { @@ -157,7 +224,8 @@ emit_ia_state(struct anv_pipeline *pipeline, } static void -emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRsStateCreateInfo *info, const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_cullmode[] = { @@ -214,7 +282,8 @@ emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info, } static void -emit_cb_state(struct anv_pipeline *pipeline, VkPipelineCbStateCreateInfo *info) +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineCbStateCreateInfo *info) { struct anv_device *device = pipeline->device; @@ -331,7 +400,8 @@ static const uint32_t vk_to_gen_stencil_op[] = { }; static void -emit_ds_state(struct anv_pipeline *pipeline, VkPipelineDsStateCreateInfo *info) +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDsStateCreateInfo *info) { if (info == NULL) { /* We're going to OR this together with the dynamic state. We need @@ -364,14 +434,6 @@ emit_ds_state(struct anv_pipeline *pipeline, VkPipelineDsStateCreateInfo *info) GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); } -VkResult anv_CreateGraphicsPipeline( - VkDevice device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - VkPipeline* pPipeline) -{ - return anv_pipeline_create(device, pCreateInfo, NULL, pPipeline); -} - static void anv_pipeline_destroy(struct anv_device *device, struct anv_object *object, @@ -397,13 +459,6 @@ anv_pipeline_create( { struct anv_device *device = (struct anv_device *) _device; struct anv_pipeline *pipeline; - const struct anv_common *common; - VkPipelineShaderStageCreateInfo *shader_create_info; - VkPipelineIaStateCreateInfo *ia_info = NULL; - VkPipelineRsStateCreateInfo *rs_info = NULL; - VkPipelineDsStateCreateInfo *ds_info = NULL; - VkPipelineCbStateCreateInfo *cb_info = NULL; - VkPipelineVertexInputStateCreateInfo *vi_info = NULL; VkResult result; uint32_t offset, length; @@ -430,42 +485,18 @@ anv_pipeline_create( anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); - for (common = pCreateInfo->pNext; common; common = common->pNext) { - switch (common->sType) { - case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO: - vi_info = (VkPipelineVertexInputStateCreateInfo *) common; - break; - case VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO: - ia_info = (VkPipelineIaStateCreateInfo *) common; - break; - case VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO: - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO"); - break; - case VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO: - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO"); - break; - case VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO: - rs_info = (VkPipelineRsStateCreateInfo *) common; - break; - case VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO: - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO"); - break; - case VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO: - cb_info = (VkPipelineCbStateCreateInfo *) common; - break; - case VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO: - ds_info = (VkPipelineDsStateCreateInfo *) common; - break; - case VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO: - shader_create_info = (VkPipelineShaderStageCreateInfo *) common; - pipeline->shaders[shader_create_info->shader.stage] = - (struct anv_shader *) shader_create_info->shader.shader; - break; - default: - break; - } + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + pipeline->shaders[pCreateInfo->pStages[i].stage] = + (struct anv_shader *) pCreateInfo->pStages[i].shader; } + if (pCreateInfo->pTessState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO"); + if (pCreateInfo->pVpState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO"); + if (pCreateInfo->pMsState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO"); + pipeline->use_repclear = extra && extra->use_repclear; anv_compiler_run(device->compiler, pipeline); @@ -474,17 +505,19 @@ anv_pipeline_create( * hard code this to num_attributes - 2. This is because the attributes * include VUE header and position, which aren't counted as varying * inputs. */ - if (pipeline->vs_simd8 == NO_KERNEL) - pipeline->wm_prog_data.num_varying_inputs = vi_info->attributeCount - 2; - - assert(vi_info); - emit_vertex_input(pipeline, vi_info); - assert(ia_info); - emit_ia_state(pipeline, ia_info, extra); - assert(rs_info); - emit_rs_state(pipeline, rs_info, extra); - emit_ds_state(pipeline, ds_info); - emit_cb_state(pipeline, cb_info); + if (pipeline->vs_simd8 == NO_KERNEL) { + pipeline->wm_prog_data.num_varying_inputs = + pCreateInfo->pVertexInputState->attributeCount - 2; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + assert(pCreateInfo->pIaState); + emit_ia_state(pipeline, pCreateInfo->pIaState, extra); + assert(pCreateInfo->pRsState); + emit_rs_state(pipeline, pCreateInfo->pRsState, extra); + emit_ds_state(pipeline, pCreateInfo->pDsState); + emit_cb_state(pipeline, pCreateInfo->pCbState); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, .StatisticsEnable = true); @@ -611,7 +644,7 @@ anv_pipeline_create( * vertex data to read from this field. We use attribute * count - 1, as we don't count the VUE header here. */ .VertexURBEntryOutputLength = - DIV_ROUND_UP(vi_info->attributeCount - 1, 2)); + DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); else anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, .KernelStartPointer = pipeline->vs_simd8, @@ -703,16 +736,34 @@ anv_pipeline_create( return VK_SUCCESS; } -VkResult anv_CreateGraphicsPipelineDerivative( - VkDevice device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - VkPipeline basePipeline, - VkPipeline* pPipeline) +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) { - stub_return(VK_UNSUPPORTED); + ANV_FROM_HANDLE(anv_device, device, _device); + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_pipeline_destroy(device, (struct anv_object *)pPipelines[j], + VK_OBJECT_TYPE_PIPELINE); + } + + return result; + } + } + + return VK_SUCCESS; } -VkResult anv_CreateComputePipeline( +static VkResult anv_compute_pipeline_create( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) @@ -784,32 +835,31 @@ VkResult anv_CreateComputePipeline( return VK_SUCCESS; } -VkResult anv_StorePipeline( - VkDevice device, - VkPipeline pipeline, - size_t* pDataSize, - void* pData) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_LoadPipeline( - VkDevice device, - size_t dataSize, - const void* pData, - VkPipeline* pPipeline) +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) { - stub_return(VK_UNSUPPORTED); -} + ANV_FROM_HANDLE(anv_device, device, _device); + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, &pCreateInfos[i], + &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_pipeline_destroy(device, (struct anv_object *)pPipelines[j], + VK_OBJECT_TYPE_PIPELINE); + } + + return result; + } + } -VkResult anv_LoadPipelineDerivative( - VkDevice device, - size_t dataSize, - const void* pData, - VkPipeline basePipeline, - VkPipeline* pPipeline) -{ - stub_return(VK_UNSUPPORTED); + return VK_SUCCESS; } // Pipeline layout functions diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 6f1f350a5b6..0965b999a65 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -714,9 +714,14 @@ struct anv_fence { bool ready; }; +struct anv_shader_module { + uint32_t size; + char data[0]; +}; + struct anv_shader { - uint32_t size; - char data[0]; + struct anv_shader_module * module; + char entrypoint[0]; }; struct anv_pipeline { -- cgit v1.2.3 From a841e2c747d71916cd6d5cdfd3e66e17912ab9d7 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 8 Jul 2015 21:47:43 -0400 Subject: vk/compiler: mark inputs/outputs as read/written This doesn't handle inputs and outputs larger than a vec4, but we plan to add a varyiing splitting/packing pass to handle those anyways. --- src/vulkan/compiler.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index fb9615306e6..0a8ec87415f 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -950,6 +950,23 @@ anv_compile_shader_glsl(struct anv_compiler *compiler, program->NumShaders++; } +static void +setup_nir_io(struct gl_program *prog, + nir_shader *shader) +{ + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + prog->InputsRead |= BITFIELD64_BIT(var->data.location); + } + + foreach_list_typed(nir_variable, var, node, &shader->outputs) { + /* XXX glslang gives us this but we never use it */ + if (!strcmp(var->name, "gl_PerVertex")) + continue; + + prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); + } +} + static void anv_compile_shader_spirv(struct anv_compiler *compiler, struct gl_shader_program *program, @@ -982,6 +999,8 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, compiler->screen->devinfo, NULL, mesa_shader->Stage); + setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir); + fail_if(mesa_shader->Program->nir == NULL, "failed to translate SPIR-V to NIR\n"); -- cgit v1.2.3 From 997831868fea1f56223b5fbf59526d0b538527c0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 8 Jul 2015 21:48:57 -0400 Subject: vk/compiler: create the right kind of program struct This fixes Valgrind errors and gets all the tests to pass with --use-spir-v. --- src/vulkan/compiler.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 0a8ec87415f..216da89c697 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -981,7 +981,21 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, fail_if(mesa_shader == NULL, "failed to create %s shader\n", stage_info[stage].name); - mesa_shader->Program = rzalloc(mesa_shader, struct gl_program); + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base; + break; + case VK_SHADER_STAGE_GEOMETRY: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base; + break; + case VK_SHADER_STAGE_FRAGMENT: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base; + break; + case VK_SHADER_STAGE_COMPUTE: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base; + break; + } + mesa_shader->Type = stage_info[stage].token; mesa_shader->Stage = stage_info[stage].stage; -- cgit v1.2.3 From e4292ac03929485c6419a40541201e14b874f160 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 8 Jul 2015 21:50:16 -0400 Subject: nir/spirv: zero out value array Before values are pushed or annotated with a name, decoration, etc., they need to have an invalid type, NULL name, NULL decoration, etc. ralloc zero's everything by accident, so this wasn't an issue in practice, but we should be explicitly zero'ing it. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ec26111930a..1c25493b551 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2788,7 +2788,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); b->shader = shader; b->value_id_bound = value_id_bound; - b->values = ralloc_array(b, struct vtn_value, value_id_bound); + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); /* Handle all the preamble instructions */ -- cgit v1.2.3 From b8fedc19f5f4cccac827160b7e9ae3068e9982f6 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 8 Jul 2015 22:01:33 -0400 Subject: nir/spirv: fix memory context for builtin variable Fixes valgrind errors with func.depthstencil.basic. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1c25493b551..5401908b15b 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -801,7 +801,7 @@ get_builtin_variable(struct vtn_builder *b, nir_variable_mode mode; vtn_get_builtin_location(builtin, &var->data.location, &mode); var->data.mode = mode; - var->name = ralloc_strdup(b->shader, "builtin"); + var->name = ralloc_strdup(var, "builtin"); switch (mode) { case nir_var_shader_in: -- cgit v1.2.3 From 3318a86d12cfd2d436bf5418b73d363018678d84 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 9 Jul 2015 14:28:39 -0400 Subject: nir/spirv: fix wrong writemask for ALU operations --- src/glsl/nir/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 5401908b15b..6819f88833a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1840,6 +1840,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(type), val->name); + instr->dest.write_mask = (1 << glsl_get_vector_elements(type)) - 1; val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) -- cgit v1.2.3 From cff06bbe7d78a48e3b9aa2ae36edd3408d24ca2c Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 9 Jul 2015 14:29:23 -0400 Subject: vk/compiler: create an empty parameters list Prevents problems when initializing the sanity_param_count. --- src/vulkan/compiler.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 216da89c697..cf34e7b4414 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -996,6 +996,9 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, break; } + mesa_shader->Program->Parameters = + rzalloc(mesa_shader, struct gl_program_parameter_list); + mesa_shader->Type = stage_info[stage].token; mesa_shader->Stage = stage_info[stage].stage; -- cgit v1.2.3 From c34d314db32edf6cbeed799e5d992086ee53e159 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 15:23:25 -0700 Subject: vk/device: Be consistent about path to DRM device Function fill_physical_device() has a 'path' parameter, and struct anv_physical_device has a 'path' member. Sometimes these are used; sometimes hardcoded "/dev/dri/renderD128" is used instead. Be consistent. Hardcode "/dev/dri/renderD128" in exactly one location, during initialization of the physical device. --- src/vulkan/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 61e29a78015..ca8fd843f52 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -47,7 +47,7 @@ fill_physical_device(struct anv_physical_device *device, { int fd; - fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC); + fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) return vk_error(VK_ERROR_UNAVAILABLE); @@ -377,7 +377,7 @@ VkResult anv_CreateDevice( parse_debug_flags(device); device->instance = physicalDevice->instance; - device->fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC); + device->fd = open(physicalDevice->path, O_RDWR | O_CLOEXEC); if (device->fd == -1) goto fail_device; -- cgit v1.2.3 From fa915b661d4159014c0ed3c593505ddf57f27ee5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 15:38:58 -0700 Subject: vk/device: Move device enumeration to vkEnumeratePhysicalDevices() Don't enumerate devices in vkCreateInstance(). That's where global, device-independent initialization should happen. Move device enumeration to the more logical location, vkEnumeratePhysicalDevices(). --- src/vulkan/device.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index ca8fd843f52..bbe4ff1c87e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -121,7 +121,6 @@ VkResult anv_CreateInstance( struct anv_instance *instance; const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; void *user_data = NULL; - VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); @@ -138,15 +137,8 @@ VkResult anv_CreateInstance( instance->pfnAlloc = alloc_callbacks->pfnAlloc; instance->pfnFree = alloc_callbacks->pfnFree; instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; - instance->physicalDeviceCount = 0; - result = fill_physical_device(&instance->physicalDevice, - instance, "/dev/dri/renderD128"); - - if (result != VK_SUCCESS) - return result; - instance->physicalDeviceCount++; *pInstance = (VkInstance) instance; return VK_SUCCESS; @@ -168,6 +160,16 @@ VkResult anv_EnumeratePhysicalDevices( VkPhysicalDevice* pPhysicalDevices) { struct anv_instance *instance = (struct anv_instance *) _instance; + VkResult result; + + if (instance->physicalDeviceCount == 0) { + result = fill_physical_device(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result != VK_SUCCESS) + return result; + + instance->physicalDeviceCount++; + } if (*pPhysicalDeviceCount >= 1) pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice; -- cgit v1.2.3 From 5b75dffd040210aa6968e4543b15162869d0127d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 15:51:06 -0700 Subject: vk/device: Fix vkEnumeratePhysicalDevices() The Vulkan spec says that pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; otherwise it's an inout parameter. Mesa incorrectly treated it unconditionally as an inout parameter, which could have lead to reading unitialized data. --- src/vulkan/device.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index bbe4ff1c87e..85f1cba23dd 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -168,12 +168,34 @@ VkResult anv_EnumeratePhysicalDevices( if (result != VK_SUCCESS) return result; - instance->physicalDeviceCount++; - } - - if (*pPhysicalDeviceCount >= 1) + instance->physicalDeviceCount = 1; + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice; - *pPhysicalDeviceCount = instance->physicalDeviceCount; + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } return VK_SUCCESS; } -- cgit v1.2.3 From f6d51f3fd3c2677418de677897c724f1bae12d3a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 13:54:08 -0700 Subject: vk: Add GetPhysicalDeviceFeatures --- include/vulkan/vulkan.h | 49 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/device.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 5497e069c37..4f467bb99ac 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1152,6 +1152,50 @@ typedef struct { const char*const* ppEnabledExtensionNames; } VkInstanceCreateInfo; +typedef struct { + bool32_t robustBufferAccess; + bool32_t fullDrawIndexUint32; + bool32_t imageCubeArray; + bool32_t independentBlend; + bool32_t geometryShader; + bool32_t tessellationShader; + bool32_t sampleRateShading; + bool32_t dualSourceBlend; + bool32_t logicOp; + bool32_t instancedDrawIndirect; + bool32_t depthClip; + bool32_t depthBiasClamp; + bool32_t fillModeNonSolid; + bool32_t depthBounds; + bool32_t wideLines; + bool32_t largePoints; + bool32_t textureCompressionETC2; + bool32_t textureCompressionASTC_LDR; + bool32_t textureCompressionBC; + bool32_t pipelineStatisticsQuery; + bool32_t vertexSideEffects; + bool32_t tessellationSideEffects; + bool32_t geometrySideEffects; + bool32_t fragmentSideEffects; + bool32_t shaderTessellationPointSize; + bool32_t shaderGeometryPointSize; + bool32_t shaderTextureGatherExtended; + bool32_t shaderStorageImageExtendedFormats; + bool32_t shaderStorageImageMultisample; + bool32_t shaderStorageBufferArrayConstantIndexing; + bool32_t shaderStorageImageArrayConstantIndexing; + bool32_t shaderUniformBufferArrayDynamicIndexing; + bool32_t shaderSampledImageArrayDynamicIndexing; + bool32_t shaderStorageBufferArrayDynamicIndexing; + bool32_t shaderStorageImageArrayDynamicIndexing; + bool32_t shaderClipDistance; + bool32_t shaderCullDistance; + bool32_t shaderFloat64; + bool32_t shaderInt64; + bool32_t shaderFloat16; + bool32_t shaderInt16; +} VkPhysicalDeviceFeatures; + typedef struct { VkFormatFeatureFlags linearTilingFeatures; VkFormatFeatureFlags optimalTilingFeatures; @@ -1877,6 +1921,7 @@ typedef struct { typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); typedef VkResult (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); @@ -1999,6 +2044,10 @@ VkResult VKAPI vkEnumeratePhysicalDevices( uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); +VkResult VKAPI vkGetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures); + VkResult VKAPI vkGetPhysicalDeviceInfo( VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 85f1cba23dd..7df4374c153 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -200,6 +200,59 @@ VkResult anv_EnumeratePhysicalDevices( return VK_SUCCESS; } +VkResult anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + anv_finishme("Get correct values for PhysicalDeviceFeatures"); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = false, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSourceBlend = true, + .logicOp = true, + .instancedDrawIndirect = true, + .depthClip = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .pipelineStatisticsQuery = true, + .vertexSideEffects = false, + .tessellationSideEffects = false, + .geometrySideEffects = false, + .fragmentSideEffects = false, + .shaderTessellationPointSize = false, + .shaderGeometryPointSize = true, + .shaderTextureGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageBufferArrayConstantIndexing = false, + .shaderStorageImageArrayConstantIndexing = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderFloat16 = false, + .shaderInt16 = false, + }; + + return VK_SUCCESS; +} + VkResult anv_GetPhysicalDeviceInfo( VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, -- cgit v1.2.3 From 65e0b304b6ccd0ac516fe12b2bc3f2a1cbc0926a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 15:38:30 -0700 Subject: vk: Add support for GetPhysicalDeviceLimits --- include/vulkan/vulkan.h | 99 +++++++++++++++++++++++++++++++++++++++++ src/vulkan/device.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 4f467bb99ac..173a697ff62 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1201,6 +1201,100 @@ typedef struct { VkFormatFeatureFlags optimalTilingFeatures; } VkFormatProperties; +typedef struct { + uint32_t maxImageDimension1D; + uint32_t maxImageDimension2D; + uint32_t maxImageDimension3D; + uint32_t maxImageDimensionCube; + uint32_t maxImageArrayLayers; + uint32_t maxTexelBufferSize; + uint32_t maxUniformBufferSize; + uint32_t maxStorageBufferSize; + uint32_t maxPushConstantsSize; + uint32_t maxMemoryAllocationCount; + uint32_t maxBoundDescriptorSets; + uint32_t maxDescriptorSets; + uint32_t maxPerStageDescriptorSamplers; + uint32_t maxPerStageDescriptorUniformBuffers; + uint32_t maxPerStageDescriptorStorageBuffers; + uint32_t maxPerStageDescriptorSampledImages; + uint32_t maxPerStageDescriptorStorageImages; + uint32_t maxDescriptorSetSamplers; + uint32_t maxDescriptorSetUniformBuffers; + uint32_t maxDescriptorSetStorageBuffers; + uint32_t maxDescriptorSetSampledImages; + uint32_t maxDescriptorSetStorageImages; + uint32_t maxVertexInputAttributes; + uint32_t maxVertexInputAttributeOffset; + uint32_t maxVertexInputBindingStride; + uint32_t maxVertexOutputComponents; + uint32_t maxTessGenLevel; + uint32_t maxTessPatchSize; + uint32_t maxTessControlPerVertexInputComponents; + uint32_t maxTessControlPerVertexOutputComponents; + uint32_t maxTessControlPerPatchOutputComponents; + uint32_t maxTessControlTotalOutputComponents; + uint32_t maxTessEvaluationInputComponents; + uint32_t maxTessEvaluationOutputComponents; + uint32_t maxGeometryShaderInvocations; + uint32_t maxGeometryInputComponents; + uint32_t maxGeometryOutputComponents; + uint32_t maxGeometryOutputVertices; + uint32_t maxGeometryTotalOutputComponents; + uint32_t maxFragmentInputComponents; + uint32_t maxFragmentOutputBuffers; + uint32_t maxFragmentDualSourceBuffers; + uint32_t maxFragmentCombinedOutputResources; + uint32_t maxComputeSharedMemorySize; + uint32_t maxComputeWorkGroupCount[3]; + uint32_t maxComputeWorkGroupInvocations; + uint32_t maxComputeWorkGroupSize[3]; + uint32_t subPixelPrecisionBits; + uint32_t subTexelPrecisionBits; + uint32_t mipmapPrecisionBits; + uint32_t maxDrawIndexedIndexValue; + uint32_t maxDrawIndirectInstanceCount; + bool32_t primitiveRestartForPatches; + float maxSamplerLodBias; + uint32_t maxSamplerAnisotropy; + uint32_t maxViewports; + uint32_t maxDynamicViewportStates; + uint32_t maxViewportDimensions[2]; + float viewportBoundsRange[2]; + uint32_t viewportSubPixelBits; + uint32_t minMemoryMapAlignment; + uint32_t minTexelBufferOffsetAlignment; + uint32_t minUniformBufferOffsetAlignment; + uint32_t minStorageBufferOffsetAlignment; + uint32_t minTexelOffset; + uint32_t maxTexelOffset; + uint32_t minTexelGatherOffset; + uint32_t maxTexelGatherOffset; + float minInterpolationOffset; + float maxInterpolationOffset; + uint32_t subPixelInterpolationOffsetBits; + uint32_t maxFramebufferWidth; + uint32_t maxFramebufferHeight; + uint32_t maxFramebufferLayers; + uint32_t maxFramebufferColorSamples; + uint32_t maxFramebufferDepthSamples; + uint32_t maxFramebufferStencilSamples; + uint32_t maxColorAttachments; + uint32_t maxSampledImageColorSamples; + uint32_t maxSampledImageDepthSamples; + uint32_t maxSampledImageIntegerSamples; + uint32_t maxStorageImageSamples; + uint32_t maxSampleMaskWords; + uint64_t timestampFrequency; + uint32_t maxClipDistances; + uint32_t maxCullDistances; + uint32_t maxCombinedClipAndCullDistances; + float pointSizeRange[2]; + float lineWidthRange[2]; + float pointSizeGranularity; + float lineWidthGranularity; +} VkPhysicalDeviceLimits; + typedef struct { uint32_t apiVersion; uint32_t driverVersion; @@ -1924,6 +2018,7 @@ typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, ui typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); @@ -2059,6 +2154,10 @@ VkResult VKAPI vkGetPhysicalDeviceFormatInfo( VkFormat format, VkFormatProperties* pFormatInfo); +VkResult VKAPI vkGetPhysicalDeviceLimits( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceLimits* pLimits); + PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( VkInstance instance, const char* pName); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7df4374c153..3f39f61a0da 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -253,6 +253,120 @@ VkResult anv_GetPhysicalDeviceFeatures( return VK_SUCCESS; } +VkResult anv_GetPhysicalDeviceLimits( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceLimits* pLimits) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct brw_device_info *devinfo = physical_device->info; + + anv_finishme("Get correct values for PhysicalDeviceLimits"); + + *pLimits = (VkPhysicalDeviceLimits) { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferSize = (1 << 14), + .maxUniformBufferSize = UINT32_MAX, + .maxStorageBufferSize = UINT32_MAX, + .maxPushConstantsSize = 128, + .maxMemoryAllocationCount = UINT32_MAX, + .maxBoundDescriptorSets = MAX_SETS, + .maxDescriptorSets = UINT32_MAX, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputAttributeOffset = 256, + .maxVertexInputBindingStride = 256, + .maxVertexOutputComponents = 32, + .maxTessGenLevel = 0, + .maxTessPatchSize = 0, + .maxTessControlPerVertexInputComponents = 0, + .maxTessControlPerVertexOutputComponents = 0, + .maxTessControlPerPatchOutputComponents = 0, + .maxTessControlTotalOutputComponents = 0, + .maxTessEvaluationInputComponents = 0, + .maxTessEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 6, + .maxGeometryInputComponents = 16, + .maxGeometryOutputComponents = 16, + .maxGeometryOutputVertices = 16, + .maxGeometryTotalOutputComponents = 16, + .maxFragmentInputComponents = 16, + .maxFragmentOutputBuffers = 8, + .maxFragmentDualSourceBuffers = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 1024, + .maxComputeWorkGroupCount = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectInstanceCount = UINT32_MAX, + .primitiveRestartForPatches = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = 32, + .maxDynamicViewportStates = UINT32_MAX, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 64, /* A cache line */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = 0, /* FIXME */ + .maxTexelOffset = 0, /* FIXME */ + .minTexelGatherOffset = 0, /* FIXME */ + .maxTexelGatherOffset = 0, /* FIXME */ + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .maxFramebufferColorSamples = 8, + .maxFramebufferDepthSamples = 8, + .maxFramebufferStencilSamples = 8, + .maxColorAttachments = MAX_RTS, + .maxSampledImageColorSamples = 8, + .maxSampledImageDepthSamples = 8, + .maxSampledImageIntegerSamples = 1, + .maxStorageImageSamples = 1, + .maxSampleMaskWords = 1, + .timestampFrequency = 0 /* FIXME */, + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + }; + + return VK_SUCCESS; +} + VkResult anv_GetPhysicalDeviceInfo( VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, -- cgit v1.2.3 From 977a469bce41df42508121433c6e2b86534ec362 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 15:53:03 -0700 Subject: vk: Support GetPhysicalDeviceProperties --- include/vulkan/vulkan.h | 14 ++++++------- src/vulkan/device.c | 52 +++++++++++++++++++++++-------------------------- 2 files changed, 30 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 173a697ff62..1fec157a054 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1302,14 +1302,7 @@ typedef struct { uint32_t deviceId; VkPhysicalDeviceType deviceType; char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME]; - VkDeviceSize maxInlineMemoryUpdateSize; - uint32_t maxBoundDescriptorSets; - uint32_t maxThreadGroupSize; - uint64_t timestampFrequency; - bool32_t multiColorAttachmentClears; - uint32_t maxDescriptorSets; - uint32_t maxViewports; - uint32_t maxColorAttachments; + uint8_t pipelineCacheUUID[VK_UUID_LENGTH]; } VkPhysicalDeviceProperties; typedef struct { @@ -2019,6 +2012,7 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physi typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); @@ -2158,6 +2152,10 @@ VkResult VKAPI vkGetPhysicalDeviceLimits( VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); +VkResult VKAPI vkGetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties); + PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( VkInstance instance, const char* pName); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3f39f61a0da..c5e35bcc049 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -28,6 +28,7 @@ #include #include "private.h" +#include "mesa/main/git_sha1.h" static int anv_env_get_int(const char *name) @@ -326,7 +327,7 @@ VkResult anv_GetPhysicalDeviceLimits( .primitiveRestartForPatches = UINT32_MAX, .maxSamplerLodBias = 16, .maxSamplerAnisotropy = 16, - .maxViewports = 32, + .maxViewports = 16, .maxDynamicViewportStates = UINT32_MAX, .maxViewportDimensions = { (1 << 14), (1 << 14) }, .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ @@ -354,7 +355,7 @@ VkResult anv_GetPhysicalDeviceLimits( .maxSampledImageIntegerSamples = 1, .maxStorageImageSamples = 1, .maxSampleMaskWords = 1, - .timestampFrequency = 0 /* FIXME */, + .timestampFrequency = 1000 * 1000 * 1000 / 80, .maxClipDistances = 0 /* FIXME */, .maxCullDistances = 0 /* FIXME */, .maxCombinedClipAndCullDistances = 0 /* FIXME */, @@ -367,44 +368,39 @@ VkResult anv_GetPhysicalDeviceLimits( return VK_SUCCESS; } +VkResult anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = 1, + .driverVersion = 1, + .vendorId = 0x8086, + .deviceId = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + }; + + strcpy(pProperties->deviceName, pdevice->name); + snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH, + "anv-%s", MESA_GIT_SHA1 + 4); + + return VK_SUCCESS; +} + VkResult anv_GetPhysicalDeviceInfo( VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData) { - struct anv_physical_device *device = (struct anv_physical_device *) physicalDevice; - VkPhysicalDeviceProperties *properties; VkPhysicalDevicePerformance *performance; VkPhysicalDeviceQueueProperties *queue_properties; VkPhysicalDeviceMemoryProperties *memory_properties; VkDisplayPropertiesWSI *display_properties; - uint64_t ns_per_tick = 80; switch ((uint32_t) infoType) { - case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES: - properties = pData; - - *pDataSize = sizeof(*properties); - if (pData == NULL) - return VK_SUCCESS; - - properties->apiVersion = 1; - properties->driverVersion = 1; - properties->vendorId = 0x8086; - properties->deviceId = device->chipset_id; - properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; - strcpy(properties->deviceName, device->name); - properties->maxInlineMemoryUpdateSize = 0; - properties->maxBoundDescriptorSets = MAX_SETS; - properties->maxThreadGroupSize = 512; - properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick; - properties->multiColorAttachmentClears = true; - properties->maxDescriptorSets = 8; - properties->maxViewports = 16; - properties->maxColorAttachments = 8; - return VK_SUCCESS; - case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE: performance = pData; -- cgit v1.2.3 From 1f907011a344d7e05fa54933cb17942ecd990602 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 16:11:24 -0700 Subject: vk: Add the new PhysicalDeviceQueue queries --- include/vulkan/vulkan.h | 11 +++++++++++ src/vulkan/device.c | 27 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1fec157a054..d21c56bd191 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2013,6 +2013,8 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalD typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueProperties)(VkPhysicalDevice physicalDevice, uint32_t count, VkPhysicalDeviceQueueProperties* pQueueProperties); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); @@ -2156,6 +2158,15 @@ VkResult VKAPI vkGetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); +VkResult VKAPI vkGetPhysicalDeviceQueueCount( + VkPhysicalDevice physicalDevice, + uint32_t* pCount); + +VkResult VKAPI vkGetPhysicalDeviceQueueProperties( + VkPhysicalDevice physicalDevice, + uint32_t count, + VkPhysicalDeviceQueueProperties* pQueueProperties); + PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( VkInstance instance, const char* pName); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c5e35bcc049..3cb04a1d4c2 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -389,6 +389,33 @@ VkResult anv_GetPhysicalDeviceProperties( return VK_SUCCESS; } +VkResult anv_GetPhysicalDeviceQueueCount( + VkPhysicalDevice physicalDevice, + uint32_t* pCount) +{ + *pCount = 1; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceQueueProperties( + VkPhysicalDevice physicalDevice, + uint32_t count, + VkPhysicalDeviceQueueProperties* pQueueProperties) +{ + assert(count == 1); + + *pQueueProperties = (VkPhysicalDeviceQueueProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_DMA_BIT, + .queueCount = 1, + .supportsTimestamps = true, + }; + + return VK_SUCCESS; +} + VkResult anv_GetPhysicalDeviceInfo( VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, -- cgit v1.2.3 From 8c2c37fae74cc40072f103f49e1d9c2c235912f3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 16:14:31 -0700 Subject: vk: Remove the old GetPhysicalDeviceInfo call --- include/vulkan/vk_wsi_lunarg.h | 19 +++++++++++ include/vulkan/vulkan.h | 59 ---------------------------------- src/vulkan/device.c | 72 ------------------------------------------ 3 files changed, 19 insertions(+), 131 deletions(-) (limited to 'src') diff --git a/include/vulkan/vk_wsi_lunarg.h b/include/vulkan/vk_wsi_lunarg.h index 84de8d2c6d1..a439012e2cb 100644 --- a/include/vulkan/vk_wsi_lunarg.h +++ b/include/vulkan/vk_wsi_lunarg.h @@ -37,6 +37,25 @@ extern "C" { #endif // __cplusplus +// This macro defines INT_MAX in enumerations to force compilers to use 32 bits +// to represent them. This may or may not be necessary on some compilers. The +// option to compile it out may allow compilers that warn about missing enumerants +// in switch statements to be silenced. +// Using this macro is not needed for flag bit enums because those aren't used +// as storage type anywhere. +#define VK_MAX_ENUM(Prefix) VK_##Prefix##_MAX_ENUM = 0x7FFFFFFF + +// This macro defines the BEGIN_RANGE, END_RANGE, NUM, and MAX_ENUM constants for +// the enumerations. +#define VK_ENUM_RANGE(Prefix, First, Last) \ + VK_##Prefix##_BEGIN_RANGE = VK_##Prefix##_##First, \ + VK_##Prefix##_END_RANGE = VK_##Prefix##_##Last, \ + VK_NUM_##Prefix = (VK_##Prefix##_END_RANGE - VK_##Prefix##_BEGIN_RANGE + 1), \ + VK_MAX_ENUM(Prefix) + +// This is a helper macro to define the value of flag bit enum values. +#define VK_BIT(bit) (1 << (bit)) + // ------------------------------------------------------------------------------------------------ // Objects diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index d21c56bd191..5ae5f6e781f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -103,24 +103,6 @@ VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicDsState, VkDynamicStateObject) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkFramebuffer, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkRenderPass, VkNonDispatchable) -// This macro defines INT_MAX in enumerations to force compilers to use 32 bits -// to represent them. This may or may not be necessary on some compilers. The -// option to compile it out may allow compilers that warn about missing enumerants -// in switch statements to be silenced. -// Using this macro is not needed for flag bit enums because those aren't used -// as storage type anywhere. -#define VK_MAX_ENUM(Prefix) VK_##Prefix##_MAX_ENUM = 0x7FFFFFFF - -// This macro defines the BEGIN_RANGE, END_RANGE, NUM, and MAX_ENUM constants for -// the enumerations. -#define VK_ENUM_RANGE(Prefix, First, Last) \ - VK_##Prefix##_BEGIN_RANGE = VK_##Prefix##_##First, \ - VK_##Prefix##_END_RANGE = VK_##Prefix##_##Last, \ - VK_NUM_##Prefix = (VK_##Prefix##_END_RANGE - VK_##Prefix##_BEGIN_RANGE + 1), \ - VK_MAX_ENUM(Prefix) - -// This is a helper macro to define the value of flag bit enum values. -#define VK_BIT(bit) (1 << (bit)) typedef enum { VK_SUCCESS = 0, @@ -1095,28 +1077,6 @@ typedef enum { } VkMemoryInputFlagBits; typedef VkFlags VkMemoryInputFlags; -typedef enum { - // Info type for vkGetPhysicalDeviceInfo() - VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES = 0x00000000, - VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE = 0x00000001, - VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES = 0x00000002, - VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES = 0x00000003, - - VK_ENUM_RANGE(PHYSICAL_DEVICE_INFO_TYPE, PROPERTIES, MEMORY_PROPERTIES) -} VkPhysicalDeviceInfoType; - -// Physical device compatibility flags -typedef VkFlags VkPhysicalDeviceCompatibilityFlags; -typedef enum { - VK_PHYSICAL_DEVICE_COMPATIBILITY_FEATURES_BIT = VK_BIT(0), - VK_PHYSICAL_DEVICE_COMPATIBILITY_IQ_MATCH_BIT = VK_BIT(1), - VK_PHYSICAL_DEVICE_COMPATIBILITY_PEER_TRANSFER_BIT = VK_BIT(2), - VK_PHYSICAL_DEVICE_COMPATIBILITY_SHARED_MEMORY_BIT = VK_BIT(3), - VK_PHYSICAL_DEVICE_COMPATIBILITY_SHARED_SYNC_BIT = VK_BIT(4), - VK_PHYSICAL_DEVICE_COMPATIBILITY_SHARED_DEVICE0_DISPLAY_BIT = VK_BIT(5), - VK_PHYSICAL_DEVICE_COMPATIBILITY_SHARED_DEVICE1_DISPLAY_BIT = VK_BIT(6), -} VkPhysicalDeviceCompatibilityFlagBits; - typedef struct { VkStructureType sType; const void* pNext; @@ -1305,18 +1265,6 @@ typedef struct { uint8_t pipelineCacheUUID[VK_UUID_LENGTH]; } VkPhysicalDeviceProperties; -typedef struct { - float maxDeviceClock; - float aluPerClock; - float texPerClock; - float primsPerClock; - float pixelsPerClock; -} VkPhysicalDevicePerformance; - -typedef struct { - VkPhysicalDeviceCompatibilityFlags compatibilityFlags; -} VkPhysicalDeviceCompatibilityInfo; - typedef struct { VkQueueFlags queueFlags; uint32_t queueCount; @@ -2009,7 +1957,6 @@ typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCrea typedef VkResult (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceInfo)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); @@ -2139,12 +2086,6 @@ VkResult VKAPI vkGetPhysicalDeviceFeatures( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -VkResult VKAPI vkGetPhysicalDeviceInfo( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceInfoType infoType, - size_t* pDataSize, - void* pData); - VkResult VKAPI vkGetPhysicalDeviceFormatInfo( VkPhysicalDevice physicalDevice, VkFormat format, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3cb04a1d4c2..c574c5fdbbc 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -416,78 +416,6 @@ VkResult anv_GetPhysicalDeviceQueueProperties( return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceInfo( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceInfoType infoType, - size_t* pDataSize, - void* pData) -{ - VkPhysicalDevicePerformance *performance; - VkPhysicalDeviceQueueProperties *queue_properties; - VkPhysicalDeviceMemoryProperties *memory_properties; - VkDisplayPropertiesWSI *display_properties; - - switch ((uint32_t) infoType) { - case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE: - performance = pData; - - *pDataSize = sizeof(*performance); - if (pData == NULL) - return VK_SUCCESS; - - performance->maxDeviceClock = 1.0; - performance->aluPerClock = 1.0; - performance->texPerClock = 1.0; - performance->primsPerClock = 1.0; - performance->pixelsPerClock = 1.0; - return VK_SUCCESS; - - case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES: - queue_properties = pData; - - *pDataSize = sizeof(*queue_properties); - if (pData == NULL) - return VK_SUCCESS; - - queue_properties->queueFlags = 0; - queue_properties->queueCount = 1; - queue_properties->supportsTimestamps = true; - return VK_SUCCESS; - - case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES: - memory_properties = pData; - - *pDataSize = sizeof(*memory_properties); - if (pData == NULL) - return VK_SUCCESS; - - memory_properties->supportsMigration = false; - memory_properties->supportsPinning = false; - return VK_SUCCESS; - - case VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI: - anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI"); - - *pDataSize = sizeof(*display_properties); - if (pData == NULL) - return VK_SUCCESS; - - display_properties = pData; - display_properties->display = 0; - display_properties->physicalResolution = (VkExtent2D) { 0, 0 }; - return VK_SUCCESS; - - case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI: - anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI"); - return VK_SUCCESS; - - - default: - return VK_UNSUPPORTED; - } - -} - PFN_vkVoidFunction anv_GetInstanceProcAddr( VkInstance instance, const char* pName) -- cgit v1.2.3 From c95f9b61f2cc395a8b8d972d2796e4e7969cfd2c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 18:20:10 -0700 Subject: vk/device.c: Use ANV_FROM_HANDLE a bunch of places --- src/vulkan/device.c | 123 +++++++++++++++++++++++++--------------------------- 1 file changed, 60 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c574c5fdbbc..f542dc3bf07 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -498,13 +498,12 @@ anv_device_init_border_colors(struct anv_device *device) static const uint32_t BATCH_SIZE = 8192; VkResult anv_CreateDevice( - VkPhysicalDevice _physicalDevice, + VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice) { - struct anv_physical_device *physicalDevice = - (struct anv_physical_device *) _physicalDevice; - struct anv_instance *instance = physicalDevice->instance; + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + struct anv_instance *instance = physical_device->instance; struct anv_device *device; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); @@ -515,11 +514,11 @@ VkResult anv_CreateDevice( if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - device->no_hw = physicalDevice->no_hw; + device->no_hw = physical_device->no_hw; parse_debug_flags(device); - device->instance = physicalDevice->instance; - device->fd = open(physicalDevice->path, O_RDWR | O_CLOEXEC); + device->instance = physical_device->instance; + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); if (device->fd == -1) goto fail_device; @@ -542,7 +541,7 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); - device->info = *physicalDevice->info; + device->info = *physical_device->info; device->compiler = anv_compiler_create(device); device->aub_writer = NULL; @@ -570,7 +569,7 @@ VkResult anv_CreateDevice( VkResult anv_DestroyDevice( VkDevice _device) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); anv_compiler_destroy(device->compiler); @@ -664,7 +663,7 @@ VkResult anv_GetDeviceQueue( uint32_t queueIndex, VkQueue* pQueue) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); assert(queueIndex == 0); @@ -883,14 +882,13 @@ VkResult anv_QueueSubmit( const VkCmdBuffer* pCmdBuffers, VkFence _fence) { - struct anv_queue *queue = (struct anv_queue *) _queue; + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); struct anv_device *device = queue->device; - struct anv_fence *fence = (struct anv_fence *) _fence; int ret; for (uint32_t i = 0; i < cmdBufferCount; i++) { - struct anv_cmd_buffer *cmd_buffer = - (struct anv_cmd_buffer *) pCmdBuffers[i]; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]); if (device->dump_aub) anv_cmd_buffer_dump(cmd_buffer); @@ -919,7 +917,7 @@ VkResult anv_QueueSubmit( VkResult anv_QueueWaitIdle( VkQueue _queue) { - struct anv_queue *queue = (struct anv_queue *) _queue; + ANV_FROM_HANDLE(anv_queue, queue, _queue); return vkDeviceWaitIdle((VkDevice) queue->device); } @@ -927,7 +925,7 @@ VkResult anv_QueueWaitIdle( VkResult anv_DeviceWaitIdle( VkDevice _device) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_state state; struct anv_batch batch; struct drm_i915_gem_execbuffer2 execbuf; @@ -1032,7 +1030,7 @@ VkResult anv_AllocMemory( const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_device_memory *mem; VkResult result; @@ -1049,7 +1047,7 @@ VkResult anv_AllocMemory( *pMem = (VkDeviceMemory) mem; - return VK_SUCCESS; + return VK_SUCCESS; fail: anv_device_free(device, mem); @@ -1061,8 +1059,8 @@ VkResult anv_FreeMemory( VkDevice _device, VkDeviceMemory _mem) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); if (mem->bo.map) anv_gem_munmap(mem->bo.map, mem->bo.size); @@ -1083,8 +1081,8 @@ VkResult anv_MapMemory( VkMemoryMapFlags flags, void** ppData) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only * takes a VkDeviceMemory pointer, it seems like only one map of the memory @@ -1104,7 +1102,7 @@ VkResult anv_UnmapMemory( VkDevice _device, VkDeviceMemory _mem) { - struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); anv_gem_munmap(mem->map, mem->map_size); @@ -1134,7 +1132,7 @@ VkResult anv_DestroyObject( VkObjectType objType, VkObject _object) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_object *object = (struct anv_object *) _object; switch (objType) { @@ -1241,9 +1239,9 @@ VkResult anv_BindObjectMemory( VkDeviceMemory _mem, VkDeviceSize memOffset) { + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); struct anv_buffer *buffer; struct anv_image *image; - struct anv_device_memory *mem = (struct anv_device_memory *) _mem; switch (objType) { case VK_OBJECT_TYPE_BUFFER: @@ -1303,7 +1301,7 @@ VkResult anv_CreateFence( const VkFenceCreateInfo* pCreateInfo, VkFence* pFence) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_fence *fence; struct anv_batch batch; VkResult result; @@ -1380,8 +1378,8 @@ VkResult anv_GetFenceStatus( VkDevice _device, VkFence _fence) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_fence *fence = (struct anv_fence *) _fence; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); int64_t t = 0; int ret; @@ -1998,7 +1996,7 @@ VkResult anv_CreateDynamicViewportState( const VkDynamicVpStateCreateInfo* pCreateInfo, VkDynamicVpState* pState) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_vp_state *state; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO); @@ -2085,7 +2083,7 @@ VkResult anv_CreateDynamicRasterState( const VkDynamicRsStateCreateInfo* pCreateInfo, VkDynamicRsState* pState) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_rs_state *state; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO); @@ -2125,7 +2123,7 @@ VkResult anv_CreateDynamicColorBlendState( const VkDynamicCbStateCreateInfo* pCreateInfo, VkDynamicCbState* pState) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_cb_state *state; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO); @@ -2154,7 +2152,7 @@ VkResult anv_CreateDynamicDepthStencilState( const VkDynamicDsStateCreateInfo* pCreateInfo, VkDynamicDsState* pState) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_ds_state *state; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO); @@ -2274,7 +2272,7 @@ VkResult anv_CreateCommandBuffer( const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_buffer *cmd_buffer; VkResult result; @@ -2399,7 +2397,7 @@ VkResult anv_BeginCommandBuffer( VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); anv_cmd_buffer_emit_state_base_address(cmd_buffer); cmd_buffer->current_pipeline = UINT32_MAX; @@ -2506,7 +2504,7 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, VkResult anv_EndCommandBuffer( VkCmdBuffer cmdBuffer) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_device *device = cmd_buffer->device; struct anv_batch *batch = &cmd_buffer->batch; @@ -2584,7 +2582,7 @@ VkResult anv_EndCommandBuffer( VkResult anv_ResetCommandBuffer( VkCmdBuffer cmdBuffer) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); /* Delete all but the first batch bo */ while (cmd_buffer->last_batch_bo->prev_batch_bo) { @@ -2624,8 +2622,8 @@ void anv_CmdBindPipeline( VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_COMPUTE: @@ -2650,7 +2648,7 @@ void anv_CmdBindDynamicStateObject( VkStateBindPoint stateBindPoint, VkDynamicStateObject dynamicState) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); switch (stateBindPoint) { case VK_STATE_BIND_POINT_VIEWPORT: @@ -2738,16 +2736,15 @@ void anv_CmdBindDescriptorSets( uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_pipeline_layout *layout = (struct anv_pipeline_layout *) _layout; - struct anv_descriptor_set *set; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); struct anv_descriptor_set_layout *set_layout; assert(firstSet + setCount < MAX_SETS); uint32_t dynamic_slot = 0; for (uint32_t i = 0; i < setCount; i++) { - set = (struct anv_descriptor_set *) pDescriptorSets[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; cmd_buffer->descriptors[firstSet + i].set = set; @@ -2770,8 +2767,8 @@ void anv_CmdBindIndexBuffer( VkDeviceSize offset, VkIndexType indexType) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); static const uint32_t vk_to_gen_index_type[] = { [VK_INDEX_TYPE_UINT16] = INDEX_WORD, @@ -2800,7 +2797,7 @@ void anv_CmdBindVertexBuffers( const VkBuffer* pBuffers, const VkDeviceSize* pOffsets) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings; /* We have to defer setting up vertex buffer since we need the buffer @@ -3267,7 +3264,7 @@ void anv_CmdDraw( uint32_t firstInstance, uint32_t instanceCount) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); anv_cmd_buffer_flush_state(cmd_buffer); @@ -3288,7 +3285,7 @@ void anv_CmdDrawIndexed( uint32_t firstInstance, uint32_t instanceCount) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); anv_cmd_buffer_flush_state(cmd_buffer); @@ -3333,8 +3330,8 @@ void anv_CmdDrawIndirect( uint32_t count, uint32_t stride) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -3358,8 +3355,8 @@ void anv_CmdDrawIndexedIndirect( uint32_t count, uint32_t stride) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -3382,7 +3379,7 @@ void anv_CmdDispatch( uint32_t y, uint32_t z) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; @@ -3411,10 +3408,10 @@ void anv_CmdDispatchIndirect( VkBuffer _buffer, VkDeviceSize offset) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_buffer *buffer = (struct anv_buffer *) _buffer; struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -3471,7 +3468,7 @@ void anv_CmdPipelineBarrier( uint32_t memBarrierCount, const void* const* ppMemBarriers) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); uint32_t b, *dw; struct GEN8_PIPE_CONTROL cmd = { @@ -3613,7 +3610,7 @@ VkResult anv_CreateFramebuffer( const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_framebuffer *framebuffer; static const struct anv_depth_stencil_view null_view = @@ -3677,7 +3674,7 @@ VkResult anv_CreateRenderPass( const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_render_pass *pass; size_t size; @@ -3773,10 +3770,9 @@ void anv_CmdBeginRenderPass( VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass; - struct anv_framebuffer *framebuffer = - (struct anv_framebuffer *) pRenderPassBegin->framebuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); assert(pRenderPassBegin->contents == VK_RENDER_PASS_CONTENTS_INLINE); @@ -3802,12 +3798,13 @@ void anv_CmdBeginRenderPass( void anv_CmdEndRenderPass( VkCmdBuffer cmdBuffer) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + /* Emit a flushing pipe control at the end of a pass. This is kind of a * hack but it ensures that render targets always actually get written. * Eventually, we should do flushing based on image format transitions * or something of that nature. */ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, .PostSyncOperation = NoWrite, .RenderTargetCacheFlushEnable = true, -- cgit v1.2.3 From 5c49730164e0607008d3f82336b1b5ea3c5f855a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 18:20:28 -0700 Subject: vk/device.c: Fix whitespace issues --- src/vulkan/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index f542dc3bf07..8aadf0344fa 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1391,7 +1391,7 @@ VkResult anv_GetFenceStatus( fence->ready = true; return VK_SUCCESS; } - + return VK_NOT_READY; } @@ -1415,7 +1415,7 @@ VkResult anv_WaitForFences( return VK_TIMEOUT; else if (ret == -1) return vk_error(VK_ERROR_UNKNOWN); - } + } return VK_SUCCESS; } @@ -1645,7 +1645,7 @@ VkResult anv_CreateSampler( min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; max_anisotropy = RATIO21; } - + struct GEN8_SAMPLER_STATE sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, -- cgit v1.2.3 From 7d24fab4ef1519b4ef523188b91e5257a7cd1482 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 18:35:22 -0700 Subject: vk/private.h: Add a bunch of static inline casting functions We will need these as soon as we turn on type saftey. We might as well define and start using them now rather than later. --- src/vulkan/private.h | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 0965b999a65..baa2e7f7901 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -945,8 +945,44 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); +#define ANV_DEFINE_CASTS(__anv_type, __VkType) \ +static inline struct __anv_type * \ +__anv_type ## _from_handle(__VkType _handle) \ +{ \ + return (struct __anv_type *) _handle; \ +} \ + \ +static inline __VkType \ +__anv_type ## _to_handle(struct __anv_type *_obj) \ +{ \ + return (__VkType) _obj; \ +} + +ANV_DEFINE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_CASTS(anv_queue, VkQueue) +ANV_DEFINE_CASTS(anv_device, VkDevice) +ANV_DEFINE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_CASTS(anv_dynamic_vp_state, VkDynamicVpState) +ANV_DEFINE_CASTS(anv_dynamic_rs_state, VkDynamicRsState) +ANV_DEFINE_CASTS(anv_dynamic_ds_state, VkDynamicDsState) +ANV_DEFINE_CASTS(anv_dynamic_cb_state, VkDynamicCbState) +ANV_DEFINE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_CASTS(anv_cmd_buffer, VkCmdBuffer) +ANV_DEFINE_CASTS(anv_fence, VkFence) +ANV_DEFINE_CASTS(anv_shader_module, VkShaderModule) +ANV_DEFINE_CASTS(anv_shader, VkShader) +ANV_DEFINE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_CASTS(anv_image, VkImage) +ANV_DEFINE_CASTS(anv_depth_stencil_view, VkDepthStencilView) +ANV_DEFINE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_CASTS(anv_render_pass, VkRenderPass) + #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ - struct __anv_type *__name = (struct __anv_type *) __handle + struct __anv_type *__name = __anv_type ## _from_handle(__handle) #ifdef __cplusplus } -- cgit v1.2.3 From 73f9187e33582613fd1f255321891578cd9b97b0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 18:41:27 -0700 Subject: device.c: Use the cast helpers --- src/vulkan/device.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 8aadf0344fa..6451f3cfb32 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -148,7 +148,7 @@ VkResult anv_CreateInstance( VkResult anv_DestroyInstance( VkInstance _instance) { - struct anv_instance *instance = (struct anv_instance *) _instance; + ANV_FROM_HANDLE(anv_instance, instance, _instance); instance->pfnFree(instance->pAllocUserData, instance); @@ -160,7 +160,7 @@ VkResult anv_EnumeratePhysicalDevices( uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices) { - struct anv_instance *instance = (struct anv_instance *) _instance; + ANV_FROM_HANDLE(anv_instance, instance, _instance); VkResult result; if (instance->physicalDeviceCount == 0) { @@ -1213,13 +1213,13 @@ VkResult anv_GetObjectMemoryRequirements( switch (objType) { case VK_OBJECT_TYPE_BUFFER: { - struct anv_buffer *buffer = (struct anv_buffer *) object; + struct anv_buffer *buffer = anv_buffer_from_handle(object); pMemoryRequirements->size = buffer->size; pMemoryRequirements->alignment = 16; break; } case VK_OBJECT_TYPE_IMAGE: { - struct anv_image *image = (struct anv_image *) object; + struct anv_image *image = anv_buffer_from_handle(object); pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; break; @@ -1245,12 +1245,12 @@ VkResult anv_BindObjectMemory( switch (objType) { case VK_OBJECT_TYPE_BUFFER: - buffer = (struct anv_buffer *) object; + buffer = anv_buffer_from_handle(object); buffer->bo = &mem->bo; buffer->offset = memOffset; break; case VK_OBJECT_TYPE_IMAGE: - image = (struct anv_image *) object; + image = anv_image_from_handle(object); image->bo = &mem->bo; image->offset = memOffset; break; @@ -1914,7 +1914,7 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for (uint32_t j = 0; j < write->count; j++) { set->descriptors[write->destBinding + j].sampler = - (struct anv_sampler *) write->pDescriptors[j].sampler; + anv_sampler_from_handle(write->pDescriptors[j].sampler); } if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) @@ -1926,7 +1926,7 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: for (uint32_t j = 0; j < write->count; j++) { set->descriptors[write->destBinding + j].view = - (struct anv_surface_view *) write->pDescriptors[j].imageView; + anv_surface_view_from_handle(write->pDescriptors[j].imageView); } break; @@ -1941,7 +1941,7 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->count; j++) { set->descriptors[write->destBinding + j].view = - (struct anv_surface_view *) write->pDescriptors[j].bufferView; + anv_surface_view_from_handle(write->pDescriptors[j].bufferView); } default: @@ -2652,19 +2652,19 @@ void anv_CmdBindDynamicStateObject( switch (stateBindPoint) { case VK_STATE_BIND_POINT_VIEWPORT: - cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState; + cmd_buffer->vp_state = anv_dynamic_vp_state_from_handle(dynamicState); cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; break; case VK_STATE_BIND_POINT_RASTER: - cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState; + cmd_buffer->rs_state = anv_dynamic_rs_state_from_handle(dynamicState); cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; break; case VK_STATE_BIND_POINT_COLOR_BLEND: - cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState; + cmd_buffer->cb_state = anv_dynamic_cb_state_from_handle(dynamicState); cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; break; case VK_STATE_BIND_POINT_DEPTH_STENCIL: - cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState; + cmd_buffer->ds_state = anv_dynamic_ds_state_from_handle(dynamicState); cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; break; default: @@ -2805,7 +2805,7 @@ void anv_CmdBindVertexBuffers( assert(startBinding + bindingCount < MAX_VBS); for (uint32_t i = 0; i < bindingCount; i++) { - vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); vb[startBinding + i].offset = pOffsets[i]; cmd_buffer->vb_dirty |= 1 << (startBinding + i); } @@ -3633,7 +3633,7 @@ VkResult anv_CreateFramebuffer( if (pCreateInfo->pDepthStencilAttachment) { framebuffer->depth_stencil = - (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view; + anv_depth_stencil_view_from_handle(pCreateInfo->pDepthStencilAttachment->view); } else { framebuffer->depth_stencil = &null_view; } -- cgit v1.2.3 From 098209eedf9532bb442caee7c28cb57f6b959004 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 18:41:27 -0700 Subject: device.c: Use the cast helpers a bunch of places --- src/vulkan/device.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 6451f3cfb32..d8adfe0e159 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -140,7 +140,7 @@ VkResult anv_CreateInstance( instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; instance->physicalDeviceCount = 0; - *pInstance = (VkInstance) instance; + *pInstance = anv_instance_to_handle(instance); return VK_SUCCESS; } @@ -192,7 +192,7 @@ VkResult anv_EnumeratePhysicalDevices( if (!pPhysicalDevices) { *pPhysicalDeviceCount = instance->physicalDeviceCount; } else if (*pPhysicalDeviceCount >= 1) { - pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice; + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); *pPhysicalDeviceCount = 1; } else { *pPhysicalDeviceCount = 0; @@ -554,7 +554,7 @@ VkResult anv_CreateDevice( anv_device_init_border_colors(device); - *pDevice = (VkDevice) device; + *pDevice = anv_device_to_handle(device); return VK_SUCCESS; @@ -667,7 +667,7 @@ VkResult anv_GetDeviceQueue( assert(queueIndex == 0); - *pQueue = (VkQueue) &device->queue; + *pQueue = anv_queue_to_handle(&device->queue); return VK_SUCCESS; } @@ -919,7 +919,7 @@ VkResult anv_QueueWaitIdle( { ANV_FROM_HANDLE(anv_queue, queue, _queue); - return vkDeviceWaitIdle((VkDevice) queue->device); + return vkDeviceWaitIdle(anv_device_to_handle(queue->device)); } VkResult anv_DeviceWaitIdle( @@ -1045,7 +1045,7 @@ VkResult anv_AllocMemory( if (result != VK_SUCCESS) goto fail; - *pMem = (VkDeviceMemory) mem; + *pMem = anv_device_memory_to_handle(mem); return VK_SUCCESS; @@ -1351,7 +1351,7 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; - *pFence = (VkFence) fence; + *pFence = anv_fence_to_handle(fence); return VK_SUCCESS; @@ -1496,7 +1496,7 @@ VkResult anv_CreateBuffer( buffer->bo = NULL; buffer->offset = 0; - *pBuffer = (VkBuffer) buffer; + *pBuffer = anv_buffer_to_handle(buffer); return VK_SUCCESS; } @@ -1685,7 +1685,7 @@ VkResult anv_CreateSampler( GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); - *pSampler = (VkSampler) sampler; + *pSampler = anv_sampler_to_handle(sampler); return VK_SUCCESS; } @@ -1838,7 +1838,7 @@ VkResult anv_CreateDescriptorSetLayout( descriptor += pCreateInfo->pBinding[i].arraySize; } - *pSetLayout = (VkDescriptorSetLayout) set_layout; + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); return VK_SUCCESS; } @@ -1890,7 +1890,7 @@ VkResult anv_AllocDescriptorSets( */ memset(set, 0, size); - pDescriptorSets[i] = (VkDescriptorSet) set; + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); } *pCount = count; @@ -2073,7 +2073,7 @@ VkResult anv_CreateDynamicViewportState( } } - *pState = (VkDynamicVpState) state; + *pState = anv_descriptor_set_to_handle(state); return VK_SUCCESS; } @@ -2113,7 +2113,7 @@ VkResult anv_CreateDynamicRasterState( GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); - *pState = (VkDynamicRsState) state; + *pState = anv_dynamic_rs_state_to_handle(state); return VK_SUCCESS; } @@ -2142,7 +2142,7 @@ VkResult anv_CreateDynamicColorBlendState( GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); - *pState = (VkDynamicCbState) state; + *pState = anv_dynamic_cb_state_to_handle(state); return VK_SUCCESS; } @@ -2185,7 +2185,7 @@ VkResult anv_CreateDynamicDepthStencilState( GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); - *pState = (VkDynamicDsState) state; + *pState = anv_dynamic_ds_state_to_handle(state); return VK_SUCCESS; } @@ -2337,7 +2337,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->rs_state = NULL; cmd_buffer->ds_state = NULL; - *pCmdBuffer = (VkCmdBuffer) cmd_buffer; + *pState = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; @@ -3643,7 +3643,7 @@ VkResult anv_CreateFramebuffer( framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; - anv_CreateDynamicViewportState((VkDevice) device, + anv_CreateDynamicViewportState(anv_device_to_handle(device), &(VkDynamicVpStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, .viewportAndScissorCount = 1, @@ -3664,7 +3664,7 @@ VkResult anv_CreateFramebuffer( }, &framebuffer->vp_state); - *pFramebuffer = (VkFramebuffer) framebuffer; + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); return VK_SUCCESS; } @@ -3699,7 +3699,7 @@ VkResult anv_CreateRenderPass( pass->num_clear_layers++; } - *pRenderPass = (VkRenderPass) pass; + *pRenderPass = anv_render_pass_to_handle(pass); return VK_SUCCESS; } -- cgit v1.2.3 From 92556c77f49dfcf36e39fa72c66bddc9ef54952c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 18:59:05 -0700 Subject: vk: Fix the build --- src/vulkan/device.c | 10 +++++----- src/vulkan/private.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d8adfe0e159..bc3bdb64a10 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1219,7 +1219,7 @@ VkResult anv_GetObjectMemoryRequirements( break; } case VK_OBJECT_TYPE_IMAGE: { - struct anv_image *image = anv_buffer_from_handle(object); + struct anv_image *image = anv_image_from_handle(object); pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; break; @@ -1926,7 +1926,7 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: for (uint32_t j = 0; j < write->count; j++) { set->descriptors[write->destBinding + j].view = - anv_surface_view_from_handle(write->pDescriptors[j].imageView); + (struct anv_surface_view *)write->pDescriptors[j].imageView; } break; @@ -1941,7 +1941,7 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->count; j++) { set->descriptors[write->destBinding + j].view = - anv_surface_view_from_handle(write->pDescriptors[j].bufferView); + (struct anv_surface_view *)write->pDescriptors[j].bufferView; } default: @@ -2073,7 +2073,7 @@ VkResult anv_CreateDynamicViewportState( } } - *pState = anv_descriptor_set_to_handle(state); + *pState = anv_dynamic_vp_state_to_handle(state); return VK_SUCCESS; } @@ -2337,7 +2337,7 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->rs_state = NULL; cmd_buffer->ds_state = NULL; - *pState = anv_cmd_buffer_to_handle(cmd_buffer); + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index baa2e7f7901..635ec287712 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -977,6 +977,7 @@ ANV_DEFINE_CASTS(anv_shader_module, VkShaderModule) ANV_DEFINE_CASTS(anv_shader, VkShader) ANV_DEFINE_CASTS(anv_pipeline, VkPipeline) ANV_DEFINE_CASTS(anv_image, VkImage) +ANV_DEFINE_CASTS(anv_sampler, VkSampler) ANV_DEFINE_CASTS(anv_depth_stencil_view, VkDepthStencilView) ANV_DEFINE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_CASTS(anv_render_pass, VkRenderPass) -- cgit v1.2.3 From 8739e8fbe28b3cfe2b138705c8c2a5aa42197257 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 20:16:13 -0700 Subject: vk/meta.c: Use the casting functions --- src/vulkan/meta.c | 198 +++++++++++++++++++++++++++++------------------------- 1 file changed, 105 insertions(+), 93 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 7f17adee733..6b580dc9c49 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -46,7 +46,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) ); VkShader fs; - anv_CreateShader((VkDevice) device, + anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, .module = fsm, @@ -100,7 +100,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) } }; - anv_pipeline_create((VkDevice) device, + anv_pipeline_create(anv_device_to_handle(device), &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 1, @@ -141,7 +141,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, &device->meta_state.clear.pipeline); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); + anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_SHADER, fs); } #define NUM_VB_USED 2 @@ -223,43 +223,43 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, .offset = state.offset }; - anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, (VkBuffer[]) { - (VkBuffer) &vertex_buffer, - (VkBuffer) &vertex_buffer + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) }, (VkDeviceSize[]) { 0, sizeof(vertex_data) }); - if ((VkPipeline) cmd_buffer->pipeline != device->meta_state.clear.pipeline) - anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, + if (cmd_buffer->pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.clear.pipeline); /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_RASTER, device->meta_state.shared.rs_state); if (cmd_buffer->vp_state == NULL) - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_VIEWPORT, cmd_buffer->framebuffer->vp_state); if (cmd_buffer->ds_state == NULL) - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_DEPTH_STENCIL, device->meta_state.shared.ds_state); if (cmd_buffer->cb_state == NULL) - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_COLOR_BLEND, device->meta_state.shared.cb_state); - anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, num_instances); + anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); } void @@ -325,7 +325,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) ); VkShader vs; - anv_CreateShader((VkDevice) device, + anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, .module = vsm, @@ -333,7 +333,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &vs); VkShader fs; - anv_CreateShader((VkDevice) device, + anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, .module = fsm, @@ -393,10 +393,10 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, } }; - anv_CreateDescriptorSetLayout((VkDevice) device, &ds_layout_info, + anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, &device->meta_state.blit.ds_layout); - anv_CreatePipelineLayout((VkDevice) device, + anv_CreatePipelineLayout(anv_device_to_handle(device), &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .descriptorSetCount = 1, @@ -404,7 +404,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline_layout); - anv_pipeline_create((VkDevice) device, + anv_pipeline_create(anv_device_to_handle(device), &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 2, @@ -455,8 +455,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, vs); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_SHADER, fs); + anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_SHADER, vs); + anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_SHADER, fs); } static void @@ -467,23 +467,23 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, saved_state); - if ((VkPipeline) cmd_buffer->pipeline != device->meta_state.blit.pipeline) - anv_CmdBindPipeline((VkCmdBuffer) cmd_buffer, + if (cmd_buffer->pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline); /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_RASTER, device->meta_state.shared.rs_state); if (cmd_buffer->ds_state == NULL) - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_DEPTH_STENCIL, device->meta_state.shared.ds_state); - saved_state->cb_state = (VkDynamicCbState) cmd_buffer->cb_state; - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->cb_state); + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_COLOR_BLEND, device->meta_state.shared.cb_state); } @@ -558,10 +558,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .offset = vb_state.offset, }; - anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, (VkBuffer[]) { - (VkBuffer) &vertex_buffer, - (VkBuffer) &vertex_buffer + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) }, (VkDeviceSize[]) { 0, @@ -570,10 +570,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, uint32_t count; VkDescriptorSet set; - anv_AllocDescriptorSets((VkDevice) device, 0 /* pool */, + anv_AllocDescriptorSets(anv_device_to_handle(device), 0 /* pool */, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, 1, &device->meta_state.blit.ds_layout, &set, &count); - anv_UpdateDescriptorSets((VkDevice) device, + anv_UpdateDescriptorSets(anv_device_to_handle(device), 1, /* writeCount */ (VkWriteDescriptorSet[]) { { @@ -592,8 +592,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, } }, 0, NULL); - struct anv_framebuffer *fb; - anv_CreateFramebuffer((VkDevice) device, + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .colorAttachmentCount = 1, @@ -608,11 +608,11 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .width = dest->extent.width, .height = dest->extent.height, .layers = 1 - }, (VkFramebuffer *)&fb); + }, &fb); VkRenderPass pass; - anv_CreateRenderPass((VkDevice )device, + anv_CreateRenderPass(anv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .renderArea = { { 0, 0 }, { dest->extent.width, dest->extent.height } }, @@ -630,31 +630,34 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .depthStencilFormat = VK_FORMAT_UNDEFINED, }, &pass); - anv_CmdBeginRenderPass((VkCmdBuffer) cmd_buffer, + anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBegin) { .renderPass = pass, - .framebuffer = (VkFramebuffer) fb, + .framebuffer = fb, }); - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, - VK_STATE_BIND_POINT_VIEWPORT, fb->vp_state); + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), + VK_STATE_BIND_POINT_VIEWPORT, + anv_framebuffer_from_handle(fb)->vp_state); - anv_CmdBindDescriptorSets((VkCmdBuffer) cmd_buffer, + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout, 0, 1, &set, 0, NULL); - anv_CmdDraw((VkCmdBuffer) cmd_buffer, 0, 3, 0, 1); + anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); - anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer); + anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DESCRIPTOR_SET, set); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_FRAMEBUFFER, - (VkFramebuffer) fb); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_RENDER_PASS, pass); + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_DESCRIPTOR_SET, set); + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_FRAMEBUFFER, fb); + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_RENDER_PASS, pass); } static void @@ -662,7 +665,7 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *saved_state) { anv_cmd_buffer_restore(cmd_buffer, saved_state); - anv_CmdBindDynamicStateObject((VkCmdBuffer) cmd_buffer, + anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_COLOR_BLEND, saved_state->cb_state); } @@ -690,7 +693,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *dest, uint64_t dest_offset, int width, int height, VkFormat copy_format) { - VkDevice vk_device = (VkDevice)cmd_buffer->device; + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -709,23 +712,23 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .flags = 0, }; - struct anv_image *src_image, *dest_image; - anv_CreateImage(vk_device, &image_info, (VkImage *)&src_image); - anv_CreateImage(vk_device, &image_info, (VkImage *)&dest_image); + VkImage src_image, dest_image; + anv_CreateImage(vk_device, &image_info, &src_image); + anv_CreateImage(vk_device, &image_info, &dest_image); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. */ - src_image->bo = src; - src_image->offset = src_offset; - dest_image->bo = dest; - dest_image->offset = dest_offset; + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; struct anv_surface_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = (VkImage)src_image, + .image = src_image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = copy_format, .channels = { @@ -748,7 +751,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, + .image = dest_image, .format = copy_format, .mipLevel = 0, .baseArraySlice = 0, @@ -764,8 +767,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) src_image); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) dest_image); + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, src_image); + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, dest_image); } void anv_CmdCopyBuffer( @@ -984,10 +987,10 @@ void anv_CmdCopyBufferToImage( uint32_t regionCount, const VkBufferImageCopy* pRegions) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - VkDevice vk_device = (VkDevice) cmd_buffer->device; - struct anv_buffer *src_buffer = (struct anv_buffer *)srcBuffer; - struct anv_image *dest_image = (struct anv_image *)destImage; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -998,7 +1001,7 @@ void anv_CmdCopyBufferToImage( if (pRegions[r].bufferImageHeight != 0) anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); - struct anv_image *src_image; + VkImage srcImage; anv_CreateImage(vk_device, &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -1015,7 +1018,9 @@ void anv_CmdCopyBufferToImage( .tiling = VK_IMAGE_TILING_LINEAR, .usage = VK_IMAGE_USAGE_SAMPLED_BIT, .flags = 0, - }, (VkImage *)&src_image); + }, &srcImage); + + ANV_FROM_HANDLE(anv_image, src_image, srcImage); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. @@ -1027,7 +1032,7 @@ void anv_CmdCopyBufferToImage( anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = (VkImage)src_image, + .image = anv_image_to_handle(src_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_image->format, .channels = { @@ -1050,7 +1055,7 @@ void anv_CmdCopyBufferToImage( anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, + .image = anv_image_to_handle(dest_image), .format = dest_image->format, .mipLevel = pRegions[r].imageSubresource.mipLevel, .baseArraySlice = pRegions[r].imageSubresource.arraySlice, @@ -1066,7 +1071,7 @@ void anv_CmdCopyBufferToImage( pRegions[r].imageOffset, pRegions[r].imageExtent); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) src_image); + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, srcImage); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1080,10 +1085,10 @@ void anv_CmdCopyImageToBuffer( uint32_t regionCount, const VkBufferImageCopy* pRegions) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - VkDevice vk_device = (VkDevice) cmd_buffer->device; - struct anv_image *src_image = (struct anv_image *)srcImage; - struct anv_buffer *dest_buffer = (struct anv_buffer *)destBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -1117,7 +1122,7 @@ void anv_CmdCopyImageToBuffer( }, cmd_buffer); - struct anv_image *dest_image; + VkImage destImage; anv_CreateImage(vk_device, &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -1134,7 +1139,9 @@ void anv_CmdCopyImageToBuffer( .tiling = VK_IMAGE_TILING_LINEAR, .usage = VK_IMAGE_USAGE_SAMPLED_BIT, .flags = 0, - }, (VkImage *)&dest_image); + }, &destImage); + + ANV_FROM_HANDLE(anv_image, dest_image, destImage); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. @@ -1146,7 +1153,7 @@ void anv_CmdCopyImageToBuffer( anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkColorAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, - .image = (VkImage)dest_image, + .image = destImage, .format = src_image->format, .mipLevel = 0, .baseArraySlice = 0, @@ -1162,7 +1169,7 @@ void anv_CmdCopyImageToBuffer( (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, (VkImage) dest_image); + anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, destImage); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1196,8 +1203,8 @@ void anv_CmdClearColorImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - struct anv_image *image = (struct anv_image *)_image; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, image, _image); struct anv_saved_state saved_state; anv_cmd_buffer_save(cmd_buffer, &saved_state); @@ -1218,7 +1225,7 @@ void anv_CmdClearColorImage( cmd_buffer); VkFramebuffer fb; - anv_CreateFramebuffer((VkDevice) cmd_buffer->device, + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .colorAttachmentCount = 1, @@ -1236,7 +1243,7 @@ void anv_CmdClearColorImage( }, &fb); VkRenderPass pass; - anv_CreateRenderPass((VkDevice) cmd_buffer->device, + anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .renderArea = { { 0, 0 }, { view.extent.width, view.extent.height } }, @@ -1252,10 +1259,10 @@ void anv_CmdClearColorImage( .depthStencilFormat = VK_FORMAT_UNDEFINED, }, &pass); - anv_CmdBeginRenderPass((VkCmdBuffer) cmd_buffer, + anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBegin) { .renderPass = pass, - .framebuffer = (VkFramebuffer) fb, + .framebuffer = fb, }); struct clear_instance_data instance_data = { @@ -1269,7 +1276,7 @@ void anv_CmdClearColorImage( meta_emit_clear(cmd_buffer, 1, &instance_data); - anv_CmdEndRenderPass((VkCmdBuffer) cmd_buffer); + anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); } } } @@ -1331,19 +1338,19 @@ anv_device_init_meta(struct anv_device *device) anv_device_init_meta_clear_state(device); anv_device_init_meta_blit_state(device); - anv_CreateDynamicRasterState((VkDevice) device, + anv_CreateDynamicRasterState(anv_device_to_handle(device), &(VkDynamicRsStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, }, &device->meta_state.shared.rs_state); - anv_CreateDynamicColorBlendState((VkDevice) device, + anv_CreateDynamicColorBlendState(anv_device_to_handle(device), &(VkDynamicCbStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO }, &device->meta_state.shared.cb_state); - anv_CreateDynamicDepthStencilState((VkDevice) device, + anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), &(VkDynamicDsStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO }, @@ -1354,22 +1361,27 @@ void anv_device_finish_meta(struct anv_device *device) { /* Clear */ - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_PIPELINE, + anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_PIPELINE, device->meta_state.clear.pipeline); /* Blit */ - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_PIPELINE, + anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_PIPELINE, device->meta_state.blit.pipeline); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_PIPELINE_LAYOUT, + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_PIPELINE_LAYOUT, device->meta_state.blit.pipeline_layout); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, device->meta_state.blit.ds_layout); /* Shared */ - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DYNAMIC_RS_STATE, + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_DYNAMIC_RS_STATE, device->meta_state.shared.rs_state); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DYNAMIC_CB_STATE, + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_DYNAMIC_CB_STATE, device->meta_state.shared.cb_state); - anv_DestroyObject((VkDevice) device, VK_OBJECT_TYPE_DYNAMIC_DS_STATE, + anv_DestroyObject(anv_device_to_handle(device), + VK_OBJECT_TYPE_DYNAMIC_DS_STATE, device->meta_state.shared.ds_state); } -- cgit v1.2.3 From b1de1d4f6ef4fec7c8bd78b48bfbd3492bda4cb0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 20:23:46 -0700 Subject: vk/device.c: One more use of a casting function --- src/vulkan/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index bc3bdb64a10..312105a946d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3598,7 +3598,7 @@ anv_framebuffer_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER); - anv_DestroyObject((VkDevice) device, + anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_DYNAMIC_VP_STATE, fb->vp_state); -- cgit v1.2.3 From a52e2082038a9f2c1ef510cb78594cdd341e65ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 20:24:07 -0700 Subject: vk/image.c: Use the casting functions --- src/vulkan/image.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 70d7c5321d5..d63ae1dfed8 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -208,7 +208,7 @@ anv_image_create(VkDevice _device, const struct anv_image_create_info *create_info, VkImage *pImage) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); const VkImageCreateInfo *pCreateInfo = create_info->vk_info; const VkExtent3D *restrict extent = &pCreateInfo->extent; struct anv_image *image = NULL; @@ -283,7 +283,7 @@ anv_image_create(VkDevice _device, goto fail; } - *pImage = (VkImage) image; + *pImage = anv_image_to_handle(image); return VK_SUCCESS; @@ -336,8 +336,8 @@ anv_image_view_init(struct anv_surface_view *view, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_image *image = (struct anv_image *) pCreateInfo->image; struct anv_surface *surface; const struct anv_format *format_info = @@ -454,7 +454,7 @@ anv_validate_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, VkImageView *pView) { - const struct anv_image *image; + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *subresource; const struct anv_image_view_info *view_info; const struct anv_format *view_format_info; @@ -465,10 +465,6 @@ anv_validate_CreateImageView(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); subresource = &pCreateInfo->subresourceRange; - /* Validate image pointer before dereferencing it. */ - assert(pCreateInfo->image != 0); - image = (struct anv_image *) pCreateInfo->image; - /* Validate viewType is in range before using it. */ assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); @@ -538,7 +534,7 @@ anv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, VkImageView *pView) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_surface_view *view; view = anv_device_alloc(device, sizeof(*view), 8, @@ -561,7 +557,7 @@ anv_color_attachment_view_init(struct anv_surface_view *view, const VkColorAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { - struct anv_image *image = (struct anv_image *) pCreateInfo->image; + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_surface *surface = &image->primary_surface; const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); @@ -656,7 +652,7 @@ anv_CreateColorAttachmentView(VkDevice _device, const VkColorAttachmentViewCreateInfo *pCreateInfo, VkColorAttachmentView *pView) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_surface_view *view; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO); @@ -680,9 +676,9 @@ anv_CreateDepthStencilView(VkDevice _device, const VkDepthStencilViewCreateInfo *pCreateInfo, VkDepthStencilView *pView) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_depth_stencil_view *view; - struct anv_image *image = (struct anv_image *) pCreateInfo->image; struct anv_surface *depth_surface = &image->primary_surface; struct anv_surface *stencil_surface = &image->stencil_surface; const struct anv_format *format = @@ -711,7 +707,7 @@ anv_CreateDepthStencilView(VkDevice _device, view->stencil_offset = image->offset + stencil_surface->offset; view->stencil_qpitch = 0; /* FINISHME: QPitch */ - *pView = (VkDepthStencilView) view; + *pView = anv_depth_stencil_view_to_handle(view); return VK_SUCCESS; } -- cgit v1.2.3 From fb4e2195ecd8565a8222d6182fb5aba91b6a92d4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 20:24:17 -0700 Subject: vk/formats.c: Use the casting functions --- src/vulkan/formats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 00300e19c23..0fa47fda681 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -247,8 +247,7 @@ VkResult anv_GetPhysicalDeviceFormatInfo( VkFormat _format, VkFormatProperties* pFormatInfo) { - struct anv_physical_device *physical_device = - (struct anv_physical_device *) physicalDevice; + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); const struct surface_format_info *info; int gen; -- cgit v1.2.3 From 6eb221c884a28786b68904faf950cf6f7881d9d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 20:28:08 -0700 Subject: vk/pipeline.c: Use the casting functions --- src/vulkan/pipeline.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 9307a452bc9..8fc6b0daef6 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -50,7 +50,7 @@ VkResult anv_CreateShaderModule( module->size = pCreateInfo->codeSize; memcpy(module->data, pCreateInfo->pCode, module->size); - *pShaderModule = (VkShaderModule) module; + *pShaderModule = anv_shader_module_to_handle(module); return VK_SUCCESS; } @@ -81,7 +81,7 @@ VkResult anv_CreateShader( shader->module = module; memcpy(shader->entrypoint, pCreateInfo->pName, name_len + 1); - *pShader = (VkShader) shader; + *pShader = anv_shader_to_handle(shader); return VK_SUCCESS; } @@ -457,7 +457,7 @@ anv_pipeline_create( const struct anv_pipeline_create_info * extra, VkPipeline* pPipeline) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_pipeline *pipeline; VkResult result; uint32_t offset, length; @@ -471,7 +471,7 @@ anv_pipeline_create( pipeline->base.destructor = anv_pipeline_destroy; pipeline->device = device; - pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); result = anv_reloc_list_init(&pipeline->batch.relocs, device); @@ -487,7 +487,7 @@ anv_pipeline_create( for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { pipeline->shaders[pCreateInfo->pStages[i].stage] = - (struct anv_shader *) pCreateInfo->pStages[i].shader; + anv_shader_from_handle(pCreateInfo->pStages[i].shader); } if (pCreateInfo->pTessState) @@ -731,7 +731,7 @@ anv_pipeline_create( .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, .PixelShaderIsPerSample = per_sample_ps); - *pPipeline = (VkPipeline) pipeline; + *pPipeline = anv_pipeline_to_handle(pipeline); return VK_SUCCESS; } @@ -768,7 +768,7 @@ static VkResult anv_compute_pipeline_create( const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_pipeline *pipeline; VkResult result; @@ -781,7 +781,7 @@ static VkResult anv_compute_pipeline_create( pipeline->base.destructor = anv_pipeline_destroy; pipeline->device = device; - pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); result = anv_reloc_list_init(&pipeline->batch.relocs, device); if (result != VK_SUCCESS) { @@ -797,7 +797,7 @@ static VkResult anv_compute_pipeline_create( memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - (struct anv_shader *) pCreateInfo->cs.shader; + anv_shader_from_handle(pCreateInfo->cs.shader); pipeline->use_repclear = false; @@ -830,7 +830,7 @@ static VkResult anv_compute_pipeline_create( pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - *pPipeline = (VkPipeline) pipeline; + *pPipeline = anv_pipeline_to_handle(pipeline); return VK_SUCCESS; } @@ -869,7 +869,7 @@ VkResult anv_CreatePipelineLayout( const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_pipeline_layout *layout; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); @@ -890,8 +890,8 @@ VkResult anv_CreatePipelineLayout( } for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { - struct anv_descriptor_set_layout *set_layout = - (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[i]); layout->set[i].layout = set_layout; for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { @@ -905,7 +905,7 @@ VkResult anv_CreatePipelineLayout( } } - *pPipelineLayout = (VkPipelineLayout) layout; + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); return VK_SUCCESS; } -- cgit v1.2.3 From 19f0a9b58210de2f0fb736b56050e2de86e40f6a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 9 Jul 2015 20:32:44 -0700 Subject: vk/query.c: Use the casting functions --- src/vulkan/private.h | 1 + src/vulkan/query.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 635ec287712..ef4b183056e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -981,6 +981,7 @@ ANV_DEFINE_CASTS(anv_sampler, VkSampler) ANV_DEFINE_CASTS(anv_depth_stencil_view, VkDepthStencilView) ANV_DEFINE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_CASTS(anv_query_pool, VkQueryPool) #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ struct __anv_type *__name = __anv_type ## _from_handle(__handle) diff --git a/src/vulkan/query.c b/src/vulkan/query.c index 759f76c8f59..2c68e30eeee 100644 --- a/src/vulkan/query.c +++ b/src/vulkan/query.c @@ -61,13 +61,13 @@ VkResult anv_CreateQueryPool( const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_query_pool *pool; VkResult result; size_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); - + switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: break; @@ -92,7 +92,7 @@ VkResult anv_CreateQueryPool( pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); - *pQueryPool = (VkQueryPool) pool; + *pQueryPool = anv_query_pool_to_handle(pool); return VK_SUCCESS; @@ -111,8 +111,8 @@ VkResult anv_GetQueryPoolResults( void* pData, VkQueryResultFlags flags) { - struct anv_device *device = (struct anv_device *) _device; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); struct anv_query_pool_slot *slot = pool->bo.map; int64_t timeout = INT64_MAX; uint32_t *dst32 = pData; @@ -172,8 +172,8 @@ void anv_CmdBeginQuery( uint32_t slot, VkQueryControlFlags flags) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: @@ -192,8 +192,8 @@ void anv_CmdEndQuery( VkQueryPool queryPool, uint32_t slot) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: @@ -224,8 +224,8 @@ void anv_CmdWriteTimestamp( VkBuffer destBuffer, VkDeviceSize destOffset) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); struct anv_bo *bo = buffer->bo; switch (timestampType) { @@ -305,9 +305,9 @@ void anv_CmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; - struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; - struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); uint32_t slot_offset, dst_offset; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { -- cgit v1.2.3 From 9e64a2a8e4821dd637daac54ba83895a490d4790 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 18:46:21 -0700 Subject: mesa: Fix generation of git_sha1.h.tmp for gitlinks Don't assume that $(top_srcdir)/.git is a directory. It may be a gitlink file [1] if $(top_srcdir) is a submodule checkout or a linked worktree [2]. [1] A "gitlink" is a text file that specifies the real location of the gitdir. [2] Linked worktrees are a new feature in Git 2.5. Cc: "10.6, 10.5" Reviewed-by: Iago Toral Quiroga (cherry picked from commit 75784243df1f5bb0652fb243b37d69f36d493a86) --- src/mesa/Makefile.am | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 71794b5dada..4ba5b2fac29 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -40,8 +40,11 @@ gl_HEADERS = $(top_srcdir)/include/GL/*.h .PHONY: main/git_sha1.h.tmp main/git_sha1.h.tmp: + @# Don't assume that $(top_srcdir)/.git is a directory. It may be + @# a gitlink file if $(top_srcdir) is a submodule checkout or a linked + @# worktree. @touch main/git_sha1.h.tmp - @if test -d $(top_srcdir)/.git; then \ + @if test -e $(top_srcdir)/.git; then \ if which git > /dev/null; then \ git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \ sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \ -- cgit v1.2.3 From 18340883e3c9736db5ed319dfb036af00c39ba82 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 10 Jul 2015 10:59:09 -0700 Subject: nir: Add C++ versions of NIR_(SRC|DEST)_INIT --- src/glsl/nir/nir.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index c666d93e66b..7a088c44e8b 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -511,7 +511,11 @@ typedef struct nir_src { bool is_ssa; } nir_src; -#define NIR_SRC_INIT (nir_src) { { NULL } } +#ifdef __cplusplus +# define NIR_SRC_INIT nir_src() +#else +# define NIR_SRC_INIT (nir_src) { { NULL } } +#endif #define nir_foreach_use(reg_or_ssa_def, src) \ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) @@ -534,7 +538,11 @@ typedef struct { bool is_ssa; } nir_dest; -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#ifdef __cplusplus +# define NIR_DEST_INIT nir_dest() +#else +# define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#endif #define nir_foreach_def(reg, dest) \ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) -- cgit v1.2.3 From b94b8dfad5ca8168db0436721100254d2e61b4b7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 10 Jul 2015 12:25:30 -0700 Subject: vk/image: Add explicit constructors for buffer/image view types --- src/vulkan/device.c | 29 +++++++++++++++++++++++------ src/vulkan/image.c | 43 ++++++++++++++++++++++++++++++++----------- src/vulkan/private.h | 10 +++++++--- 3 files changed, 62 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 312105a946d..e9c3eebea96 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1159,9 +1159,20 @@ VkResult anv_DestroyObject( /* These are just dummys anyway, so we don't need to destroy them */ return VK_SUCCESS; + case VK_OBJECT_TYPE_BUFFER_VIEW: + return anv_DestroyBufferView(_device, _object); + + case VK_OBJECT_TYPE_IMAGE_VIEW: + return anv_DestroyImageView(_device, _object); + + case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW: + return anv_DestroyColorAttachmentView(_device, _object); + + case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: + return anv_DestroyDepthStencilView(_device, _object); + case VK_OBJECT_TYPE_BUFFER: case VK_OBJECT_TYPE_IMAGE: - case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: case VK_OBJECT_TYPE_SHADER: case VK_OBJECT_TYPE_SHADER_MODULE: case VK_OBJECT_TYPE_PIPELINE_LAYOUT: @@ -1182,9 +1193,6 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_FENCE: case VK_OBJECT_TYPE_QUERY_POOL: case VK_OBJECT_TYPE_FRAMEBUFFER: - case VK_OBJECT_TYPE_BUFFER_VIEW: - case VK_OBJECT_TYPE_IMAGE_VIEW: - case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW: (object->destructor)(device, object, objType); return VK_SUCCESS; @@ -1571,8 +1579,6 @@ VkResult anv_CreateBufferView( if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->base.destructor = anv_surface_view_destroy; - view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; view->surface_state = @@ -1588,6 +1594,17 @@ VkResult anv_CreateBufferView( return VK_SUCCESS; } +VkResult anv_DestroyBufferView( + VkDevice _device, + VkBufferView _view) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_surface_view_destroy(device, (struct anv_surface_view *)_view); + + return VK_SUCCESS; +} + // Sampler functions VkResult anv_CreateSampler( diff --git a/src/vulkan/image.c b/src/vulkan/image.c index d63ae1dfed8..5f82ed369cb 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -317,14 +317,8 @@ VkResult anv_GetImageSubresourceLayout( void anv_surface_view_destroy(struct anv_device *device, - struct anv_object *obj, VkObjectType obj_type) + struct anv_surface_view *view) { - struct anv_surface_view *view = (struct anv_surface_view *)obj; - - assert(obj_type == VK_OBJECT_TYPE_BUFFER_VIEW || - obj_type == VK_OBJECT_TYPE_IMAGE_VIEW || - obj_type == VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW); - anv_state_pool_free(&device->surface_state_pool, view->surface_state); anv_device_free(device, view); @@ -544,13 +538,21 @@ anv_CreateImageView(VkDevice _device, anv_image_view_init(view, device, pCreateInfo, NULL); - view->base.destructor = anv_surface_view_destroy; - *pView = (VkImageView) view; return VK_SUCCESS; } +VkResult +anv_DestroyImageView(VkDevice _device, VkImageView _view) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_surface_view_destroy(device, (struct anv_surface_view *)_view); + + return VK_SUCCESS; +} + void anv_color_attachment_view_init(struct anv_surface_view *view, struct anv_device *device, @@ -664,13 +666,21 @@ anv_CreateColorAttachmentView(VkDevice _device, anv_color_attachment_view_init(view, device, pCreateInfo, NULL); - view->base.destructor = anv_surface_view_destroy; - *pView = (VkColorAttachmentView) view; return VK_SUCCESS; } +VkResult +anv_DestroyColorAttachmentView(VkDevice _device, VkColorAttachmentView _view) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_surface_view_destroy(device, (struct anv_surface_view *)_view); + + return VK_SUCCESS; +} + VkResult anv_CreateDepthStencilView(VkDevice _device, const VkDepthStencilViewCreateInfo *pCreateInfo, @@ -711,3 +721,14 @@ anv_CreateDepthStencilView(VkDevice _device, return VK_SUCCESS; } + +VkResult +anv_DestroyDepthStencilView(VkDevice _device, VkDepthStencilView _view) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_depth_stencil_view, view, _view); + + anv_device_free(device, view); + + return VK_SUCCESS; +} diff --git a/src/vulkan/private.h b/src/vulkan/private.h index ef4b183056e..91f3e50b5d8 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -857,8 +857,6 @@ struct anv_image { }; struct anv_surface_view { - struct anv_object base; - struct anv_state surface_state; struct anv_bo * bo; uint32_t offset; @@ -888,7 +886,7 @@ void anv_color_attachment_view_init(struct anv_surface_view *view, struct anv_cmd_buffer *cmd_buffer); void anv_surface_view_destroy(struct anv_device *device, - struct anv_object *obj, VkObjectType obj_type); + struct anv_surface_view *view); struct anv_sampler { uint32_t state[4]; @@ -945,6 +943,12 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); +VkResult anv_DestroyImageView(VkDevice device, VkImageView imageView); +VkResult anv_DestroyBufferView(VkDevice device, VkBufferView bufferView); +VkResult anv_DestroyColorAttachmentView(VkDevice device, + VkColorAttachmentView view); +VkResult anv_DestroyDepthStencilView(VkDevice device, VkDepthStencilView view); + #define ANV_DEFINE_CASTS(__anv_type, __VkType) \ static inline struct __anv_type * \ __anv_type ## _from_handle(__VkType _handle) \ -- cgit v1.2.3 From 8b342b39a3f2779fdf112fb839af5b0d69808235 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 10 Jul 2015 12:30:58 -0700 Subject: vk/image: Add an explicit DestroyImage function --- src/vulkan/device.c | 4 +++- src/vulkan/image.c | 10 ++++++++++ src/vulkan/private.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e9c3eebea96..6efcfbdf470 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1171,8 +1171,10 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: return anv_DestroyDepthStencilView(_device, _object); - case VK_OBJECT_TYPE_BUFFER: case VK_OBJECT_TYPE_IMAGE: + return anv_DestroyImage(_device, _object); + + case VK_OBJECT_TYPE_BUFFER: case VK_OBJECT_TYPE_SHADER: case VK_OBJECT_TYPE_SHADER_MODULE: case VK_OBJECT_TYPE_PIPELINE_LAYOUT: diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 5f82ed369cb..5b042a0e297 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -306,6 +306,16 @@ anv_CreateImage(VkDevice device, pImage); } +VkResult +anv_DestroyImage(VkDevice _device, VkImage _image) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_device_free(device, anv_image_from_handle(_image)); + + return VK_SUCCESS; +} + VkResult anv_GetImageSubresourceLayout( VkDevice device, VkImage image, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 91f3e50b5d8..c374311fc99 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -943,6 +943,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); +VkResult anv_DestroyImage(VkDevice device, VkImage image); VkResult anv_DestroyImageView(VkDevice device, VkImageView imageView); VkResult anv_DestroyBufferView(VkDevice device, VkBufferView bufferView); VkResult anv_DestroyColorAttachmentView(VkDevice device, -- cgit v1.2.3 From 7552e026da51846c8d8755dbd95b830f82086080 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 10 Jul 2015 12:33:04 -0700 Subject: vk/device: Add an explicit destructor for RenderPass --- src/vulkan/device.c | 11 +++++++++++ src/vulkan/private.h | 1 + 2 files changed, 12 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 6efcfbdf470..25df95853ad 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3723,6 +3723,17 @@ VkResult anv_CreateRenderPass( return VK_SUCCESS; } +VkResult anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass renderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_device_free(device, anv_render_pass_from_handle(renderPass)); + + return VK_SUCCESS; +} + VkResult anv_GetRenderAreaGranularity( VkDevice device, VkRenderPass renderPass, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index c374311fc99..ea846bfc06e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -949,6 +949,7 @@ VkResult anv_DestroyBufferView(VkDevice device, VkBufferView bufferView); VkResult anv_DestroyColorAttachmentView(VkDevice device, VkColorAttachmentView view); VkResult anv_DestroyDepthStencilView(VkDevice device, VkDepthStencilView view); +VkResult anv_DestroyRenderPass(VkDevice device, VkRenderPass renderPass); #define ANV_DEFINE_CASTS(__anv_type, __VkType) \ static inline struct __anv_type * \ -- cgit v1.2.3 From 4422bd4cf61c4f05f853dc790c03ab04c138d7b0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 16:22:18 -0700 Subject: vk/device: Add func anv_physical_device_finish() Because in a follow-up patch I need to do some non-trival teardown on anv_physical_device. Currently, however, anv_physical_device_finish() is currently a no-op that's just called in the right place. Also, rename function fill_physical_device -> anv_physical_device_init for symmetry. --- src/vulkan/device.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 25df95853ad..0856bcec400 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -41,10 +41,16 @@ anv_env_get_int(const char *name) return strtol(val, NULL, 0); } +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + /* Nothing to do */ +} + static VkResult -fill_physical_device(struct anv_physical_device *device, - struct anv_instance *instance, - const char *path) +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) { int fd; @@ -150,6 +156,10 @@ VkResult anv_DestroyInstance( { ANV_FROM_HANDLE(anv_instance, instance, _instance); + if (instance->physicalDeviceCount > 0) { + anv_physical_device_finish(&instance->physicalDevice); + } + instance->pfnFree(instance->pAllocUserData, instance); return VK_SUCCESS; @@ -164,8 +174,8 @@ VkResult anv_EnumeratePhysicalDevices( VkResult result; if (instance->physicalDeviceCount == 0) { - result = fill_physical_device(&instance->physicalDevice, - instance, "/dev/dri/renderD128"); + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); if (result != VK_SUCCESS) return result; -- cgit v1.2.3 From 8cda3e9b1bc0df5b2acfb8e3afac854a8d0defe7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 16:31:39 -0700 Subject: vk/device: Add member anv_physical_device::fd During anv_physical_device_init(), we opend the DRM device to do some queries, then promptly closed it. Now we keep it open for the lifetime of the anv_physical_device so that we can query it some more during vkGetPhysicalDevice*Properties() [which will happen in follow-up commits]. --- src/vulkan/device.c | 28 +++++++++++++--------------- src/vulkan/private.h | 1 + 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0856bcec400..c5079dfebd9 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -44,7 +44,8 @@ anv_env_get_int(const char *name) static void anv_physical_device_finish(struct anv_physical_device *device) { - /* Nothing to do */ + if (device->fd >= 0) + close(device->fd); } static VkResult @@ -52,10 +53,8 @@ anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, const char *path) { - int fd; - - fd = open(path, O_RDWR | O_CLOEXEC); - if (fd < 0) + device->fd = open(path, O_RDWR | O_CLOEXEC); + if (device->fd < 0) return vk_error(VK_ERROR_UNAVAILABLE); device->instance = instance; @@ -67,7 +66,7 @@ anv_physical_device_init(struct anv_physical_device *device, /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ device->no_hw = true; } else { - device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + device->chipset_id = anv_gem_get_param(device->fd, I915_PARAM_CHIPSET_ID); } if (!device->chipset_id) goto fail; @@ -77,25 +76,22 @@ anv_physical_device_init(struct anv_physical_device *device, if (!device->info) goto fail; - if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_WAIT_TIMEOUT)) goto fail; - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXECBUF2)) goto fail; - if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_LLC)) goto fail; - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXEC_CONSTANTS)) goto fail; - - close(fd); return VK_SUCCESS; - fail: - close(fd); - +fail: + anv_physical_device_finish(device); return vk_error(VK_ERROR_UNAVAILABLE); } @@ -528,6 +524,8 @@ VkResult anv_CreateDevice( parse_debug_flags(device); device->instance = physical_device->instance; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); if (device->fd == -1) goto fail_device; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index ea846bfc06e..b4f7e3f7a45 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -343,6 +343,7 @@ struct anv_physical_device { const char * path; const char * name; const struct brw_device_info * info; + int fd; }; struct anv_instance { -- cgit v1.2.3 From c7f512721c5b835e255e82b0a2aa7f0c40053ab7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 19:38:39 -0700 Subject: vk/gem: Change signature of anv_gem_get_aperture() Replace the anv_device parameter with anv_physical_device, because this needs querying before vkCreateDevice. --- src/vulkan/gem.c | 4 ++-- src/vulkan/private.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c index 7bc5e49a810..db0d29f42c7 100644 --- a/src/vulkan/gem.c +++ b/src/vulkan/gem.c @@ -230,13 +230,13 @@ anv_gem_destroy_context(struct anv_device *device, int context) } int -anv_gem_get_aperture(struct anv_device *device, uint64_t *size) +anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size) { struct drm_i915_gem_get_aperture aperture; int ret; VG_CLEAR(aperture); - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + ret = anv_ioctl(physical_dev->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); if (ret == -1) return -1; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index b4f7e3f7a45..38679edaa2b 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -448,7 +448,7 @@ int anv_gem_set_tiling(struct anv_device *device, int gem_handle, int anv_gem_create_context(struct anv_device *device); int anv_gem_destroy_context(struct anv_device *device, int context); int anv_gem_get_param(int fd, uint32_t param); -int anv_gem_get_aperture(struct anv_device *device, uint64_t *size); +int anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size); int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); int anv_gem_fd_to_handle(struct anv_device *device, int fd); int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -- cgit v1.2.3 From df2a013881532c9a29f6c9fd36b628ddc8565749 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 19:49:19 -0700 Subject: vk/0.130: Implement vkGetPhysicalDeviceMemoryProperties() --- include/vulkan/vulkan.h | 26 ++++++++++++++++++++++++-- src/vulkan/device.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 26f5a235a84..72eef2ade08 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -910,6 +910,11 @@ typedef enum { } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; +typedef enum { + VK_MEMORY_HEAP_HOST_LOCAL = 0x00000001, +} VkMemoryHeapFlagBits; +typedef VkFlags VkMemoryHeapFlags; + typedef enum { VK_DEVICE_CREATE_VALIDATION_BIT = 0x00000001, } VkDeviceCreateFlagBits; @@ -1272,8 +1277,20 @@ typedef struct { } VkPhysicalDeviceQueueProperties; typedef struct { - bool32_t supportsMigration; - bool32_t supportsPinning; + VkMemoryPropertyFlags propertyFlags; + uint32_t heapIndex; +} VkMemoryType; + +typedef struct { + VkDeviceSize size; + VkMemoryHeapFlags flags; +} VkMemoryHeap; + +typedef struct { + uint32_t memoryTypeCount; + VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; + uint32_t memoryHeapCount; + VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; } VkPhysicalDeviceMemoryProperties; typedef void (VKAPI *PFN_vkVoidFunction)(void); @@ -1962,6 +1979,7 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physica typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueProperties)(VkPhysicalDevice physicalDevice, uint32_t count, VkPhysicalDeviceQueueProperties* pQueueProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperies); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); @@ -2108,6 +2126,10 @@ VkResult VKAPI vkGetPhysicalDeviceQueueProperties( uint32_t count, VkPhysicalDeviceQueueProperties* pQueueProperties); +VkResult VKAPI vkGetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperies); + PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( VkInstance instance, const char* pName); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c5079dfebd9..15cdc1dca83 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -422,6 +422,39 @@ VkResult anv_GetPhysicalDeviceQueueProperties( return VK_SUCCESS; } +VkResult anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + size_t aperture_size; + size_t heap_size; + + if (anv_gem_get_aperture(physical_device, &aperture_size) == -1) + return vk_error(VK_ERROR_UNAVAILABLE); + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * aperture_size / 4; + + /* The property flags below are valid only for llc platforms. */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + .heapIndex = 1, + }; + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_HOST_LOCAL, + }; + + return VK_SUCCESS; +} + PFN_vkVoidFunction anv_GetInstanceProcAddr( VkInstance instance, const char* pName) -- cgit v1.2.3 From f43a304dc6284c8c98b9bcef32283b7715f1f820 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 19:59:44 -0700 Subject: vk/0.130: Update vkAllocMemory to use VkMemoryType --- include/vulkan/vulkan.h | 5 ++--- src/vulkan/device.c | 23 +++++++++++++++++------ src/vulkan/x11.c | 1 + 3 files changed, 20 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 72eef2ade08..805e03680e7 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1319,7 +1319,7 @@ typedef struct { VkStructureType sType; const void* pNext; VkDeviceSize allocationSize; - VkMemoryPropertyFlags memProps; + uint32_t memoryTypeIndex; } VkMemoryAllocInfo; typedef struct { @@ -1334,8 +1334,7 @@ typedef struct { VkDeviceSize size; VkDeviceSize alignment; VkDeviceSize granularity; - VkMemoryPropertyFlags memPropsAllowed; - VkMemoryPropertyFlags memPropsRequired; + uint32_t memoryTypeBits; } VkMemoryRequirements; typedef struct { diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 15cdc1dca83..43202a0867f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1077,6 +1077,13 @@ VkResult anv_AllocMemory( assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); + if (pAllocInfo->memoryTypeIndex != 0) { + /* We support exactly one memory heap. */ + return vk_error(VK_ERROR_INVALID_VALUE); + } + + /* FINISHME: Fail if allocation request exceeds heap size. */ + mem = anv_device_alloc(device, sizeof(*mem), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (mem == NULL) @@ -1254,13 +1261,17 @@ VkResult anv_GetObjectMemoryRequirements( VkObject object, VkMemoryRequirements* pMemoryRequirements) { - pMemoryRequirements->memPropsAllowed = - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - /* VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT | */ - /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ - VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT; - pMemoryRequirements->memPropsRequired = 0; + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; switch (objType) { case VK_OBJECT_TYPE_BUFFER: { diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index ee9cdcdd51c..7b6cee011a4 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -144,6 +144,7 @@ VkResult anv_CreateSwapChainWSI( &(VkMemoryAllocInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, .allocationSize = image->size, + .memoryTypeIndex = 0, }, (VkDeviceMemory *) &memory); -- cgit v1.2.3 From 8f3b2187e1add8fabe30e0da93d6bfbf2c89673d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 13 Jul 2015 12:59:42 -0700 Subject: vk/0.132: Rename bool32_t -> VkBool32 sed -i 's/bool32_t/VkBool32/g' \ $(git ls-files src/vulkan include/vulkan) --- include/vulkan/vk_platform.h | 2 +- include/vulkan/vk_wsi_lunarg.h | 2 +- include/vulkan/vulkan.h | 114 ++++++++++++++++++++--------------------- src/vulkan/device.c | 2 +- src/vulkan/pipeline.c | 4 +- 5 files changed, 62 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/include/vulkan/vk_platform.h b/include/vulkan/vk_platform.h index 162ec06092e..7ba8d77b875 100644 --- a/include/vulkan/vk_platform.h +++ b/include/vulkan/vk_platform.h @@ -68,7 +68,7 @@ extern "C" #endif // !defined(VK_NO_STDINT_H) typedef uint64_t VkDeviceSize; -typedef uint32_t bool32_t; +typedef uint32_t VkBool32; typedef uint32_t VkSampleMask; typedef uint32_t VkFlags; diff --git a/include/vulkan/vk_wsi_lunarg.h b/include/vulkan/vk_wsi_lunarg.h index a439012e2cb..c1af9f50fb0 100644 --- a/include/vulkan/vk_wsi_lunarg.h +++ b/include/vulkan/vk_wsi_lunarg.h @@ -158,7 +158,7 @@ typedef struct VkSwapChainImageInfoWSI_ typedef struct VkPhysicalDeviceQueuePresentPropertiesWSI_ { - bool32_t supportsPresent; // Tells whether the queue supports presenting + VkBool32 supportsPresent; // Tells whether the queue supports presenting } VkPhysicalDeviceQueuePresentPropertiesWSI; typedef struct VkPresentInfoWSI_ diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index bb78cae9c79..5070134b2cf 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1125,47 +1125,47 @@ typedef struct { } VkInstanceCreateInfo; typedef struct { - bool32_t robustBufferAccess; - bool32_t fullDrawIndexUint32; - bool32_t imageCubeArray; - bool32_t independentBlend; - bool32_t geometryShader; - bool32_t tessellationShader; - bool32_t sampleRateShading; - bool32_t dualSourceBlend; - bool32_t logicOp; - bool32_t instancedDrawIndirect; - bool32_t depthClip; - bool32_t depthBiasClamp; - bool32_t fillModeNonSolid; - bool32_t depthBounds; - bool32_t wideLines; - bool32_t largePoints; - bool32_t textureCompressionETC2; - bool32_t textureCompressionASTC_LDR; - bool32_t textureCompressionBC; - bool32_t pipelineStatisticsQuery; - bool32_t vertexSideEffects; - bool32_t tessellationSideEffects; - bool32_t geometrySideEffects; - bool32_t fragmentSideEffects; - bool32_t shaderTessellationPointSize; - bool32_t shaderGeometryPointSize; - bool32_t shaderTextureGatherExtended; - bool32_t shaderStorageImageExtendedFormats; - bool32_t shaderStorageImageMultisample; - bool32_t shaderStorageBufferArrayConstantIndexing; - bool32_t shaderStorageImageArrayConstantIndexing; - bool32_t shaderUniformBufferArrayDynamicIndexing; - bool32_t shaderSampledImageArrayDynamicIndexing; - bool32_t shaderStorageBufferArrayDynamicIndexing; - bool32_t shaderStorageImageArrayDynamicIndexing; - bool32_t shaderClipDistance; - bool32_t shaderCullDistance; - bool32_t shaderFloat64; - bool32_t shaderInt64; - bool32_t shaderFloat16; - bool32_t shaderInt16; + VkBool32 robustBufferAccess; + VkBool32 fullDrawIndexUint32; + VkBool32 imageCubeArray; + VkBool32 independentBlend; + VkBool32 geometryShader; + VkBool32 tessellationShader; + VkBool32 sampleRateShading; + VkBool32 dualSourceBlend; + VkBool32 logicOp; + VkBool32 instancedDrawIndirect; + VkBool32 depthClip; + VkBool32 depthBiasClamp; + VkBool32 fillModeNonSolid; + VkBool32 depthBounds; + VkBool32 wideLines; + VkBool32 largePoints; + VkBool32 textureCompressionETC2; + VkBool32 textureCompressionASTC_LDR; + VkBool32 textureCompressionBC; + VkBool32 pipelineStatisticsQuery; + VkBool32 vertexSideEffects; + VkBool32 tessellationSideEffects; + VkBool32 geometrySideEffects; + VkBool32 fragmentSideEffects; + VkBool32 shaderTessellationPointSize; + VkBool32 shaderGeometryPointSize; + VkBool32 shaderTextureGatherExtended; + VkBool32 shaderStorageImageExtendedFormats; + VkBool32 shaderStorageImageMultisample; + VkBool32 shaderStorageBufferArrayConstantIndexing; + VkBool32 shaderStorageImageArrayConstantIndexing; + VkBool32 shaderUniformBufferArrayDynamicIndexing; + VkBool32 shaderSampledImageArrayDynamicIndexing; + VkBool32 shaderStorageBufferArrayDynamicIndexing; + VkBool32 shaderStorageImageArrayDynamicIndexing; + VkBool32 shaderClipDistance; + VkBool32 shaderCullDistance; + VkBool32 shaderFloat64; + VkBool32 shaderInt64; + VkBool32 shaderFloat16; + VkBool32 shaderInt16; } VkPhysicalDeviceFeatures; typedef struct { @@ -1226,7 +1226,7 @@ typedef struct { uint32_t mipmapPrecisionBits; uint32_t maxDrawIndexedIndexValue; uint32_t maxDrawIndirectInstanceCount; - bool32_t primitiveRestartForPatches; + VkBool32 primitiveRestartForPatches; float maxSamplerLodBias; uint32_t maxSamplerAnisotropy; uint32_t maxViewports; @@ -1280,7 +1280,7 @@ typedef struct { typedef struct { VkQueueFlags queueFlags; uint32_t queueCount; - bool32_t supportsTimestamps; + VkBool32 supportsTimestamps; } VkPhysicalDeviceQueueProperties; typedef struct { @@ -1549,7 +1549,7 @@ typedef struct { VkStructureType sType; const void* pNext; VkPrimitiveTopology topology; - bool32_t primitiveRestartEnable; + VkBool32 primitiveRestartEnable; } VkPipelineIaStateCreateInfo; typedef struct { @@ -1567,8 +1567,8 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; - bool32_t depthClipEnable; - bool32_t rasterizerDiscardEnable; + VkBool32 depthClipEnable; + VkBool32 rasterizerDiscardEnable; VkFillMode fillMode; VkCullMode cullMode; VkFrontFace frontFace; @@ -1578,7 +1578,7 @@ typedef struct { VkStructureType sType; const void* pNext; uint32_t rasterSamples; - bool32_t sampleShadingEnable; + VkBool32 sampleShadingEnable; float minSampleShading; VkSampleMask sampleMask; } VkPipelineMsStateCreateInfo; @@ -1594,17 +1594,17 @@ typedef struct { VkStructureType sType; const void* pNext; VkFormat format; - bool32_t depthTestEnable; - bool32_t depthWriteEnable; + VkBool32 depthTestEnable; + VkBool32 depthWriteEnable; VkCompareOp depthCompareOp; - bool32_t depthBoundsEnable; - bool32_t stencilTestEnable; + VkBool32 depthBoundsEnable; + VkBool32 stencilTestEnable; VkStencilOpState front; VkStencilOpState back; } VkPipelineDsStateCreateInfo; typedef struct { - bool32_t blendEnable; + VkBool32 blendEnable; VkFormat format; VkBlend srcBlendColor; VkBlend destBlendColor; @@ -1618,8 +1618,8 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; - bool32_t alphaToCoverageEnable; - bool32_t logicOpEnable; + VkBool32 alphaToCoverageEnable; + VkBool32 logicOpEnable; VkLogicOp logicOp; uint32_t attachmentCount; const VkPipelineCbAttachmentState* pAttachments; @@ -1680,7 +1680,7 @@ typedef struct { VkTexAddress addressW; float mipLodBias; uint32_t maxAnisotropy; - bool32_t compareEnable; + VkBool32 compareEnable; VkCompareOp compareOp; float minLod; float maxLod; @@ -2013,7 +2013,7 @@ typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImag typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); -typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, bool32_t waitAll, uint64_t timeout); +typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); typedef VkResult (VKAPI *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); typedef VkResult (VKAPI *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); @@ -2275,7 +2275,7 @@ VkResult VKAPI vkWaitForFences( VkDevice device, uint32_t fenceCount, const VkFence* pFences, - bool32_t waitAll, + VkBool32 waitAll, uint64_t timeout); VkResult VKAPI vkCreateSemaphore( diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 43202a0867f..9173d6d5e58 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1461,7 +1461,7 @@ VkResult anv_WaitForFences( VkDevice _device, uint32_t fenceCount, const VkFence* pFences, - bool32_t waitAll, + VkBool32 waitAll, uint64_t timeout) { ANV_FROM_HANDLE(anv_device, device, _device); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 8fc6b0daef6..6396e6cc0f6 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -256,7 +256,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .PointWidth = 1.0, }; - /* FINISHME: bool32_t rasterizerDiscardEnable; */ + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); @@ -412,7 +412,7 @@ emit_ds_state(struct anv_pipeline *pipeline, return; } - /* bool32_t depthBoundsEnable; // optional (depth_bounds_test) */ + /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { .DepthTestEnable = info->depthTestEnable, -- cgit v1.2.3 From ffb51fd112393b4bfa67da0d7c42964c83d8a9da Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 13 Jul 2015 10:19:32 -0700 Subject: nir/spirv: update to SPIR-V revision 31 This means that now the internal version of glslangValidator is required. This includes some changes due to the sampler/texture rework, but doesn't actually enable anything more yet. We also don't yet handle UBO's correctly, and don't handle matrix stride and row major/column major yet. --- src/glsl/nir/spirv.h | 1474 ++++++++++++++----------------------------- src/glsl/nir/spirv_to_nir.c | 217 +++---- 2 files changed, 599 insertions(+), 1092 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index da717ecd342..55bdcbee8b5 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -1,20 +1,20 @@ /* ** Copyright (c) 2015 The Khronos Group Inc. -** +** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), ** to deal in the Materials without restriction, including without limitation ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** and/or sell copies of the Materials, and to permit persons to whom the ** Materials are furnished to do so, subject to the following conditions: -** +** ** The above copyright notice and this permission notice shall be included in ** all copies or substantial portions of the Materials. -** +** ** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS ** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -30,11 +30,16 @@ */ /* -** Specification revision 30. -** Enumeration tokens for SPIR-V, in three styles: C, C++, generic. -** - C++ will have the tokens in the "spv" name space, with no prefix. -** - C will have tokens with as "Spv" prefix. -** +** Specification revision 31. +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have ** "Mask" in their name, and a parallel enum that has the shift @@ -44,645 +49,11 @@ #ifndef spirv_H #define spirv_H -#ifdef __cplusplus - -namespace spv { - -static const int MagicNumber = 0x07230203; -static const int Version = 99; - -typedef unsigned int Id; - -static const unsigned int OpCodeMask = 0xFFFF; -static const unsigned int WordCountShift = 16; - -enum SourceLanguage { - SourceLanguageUnknown = 0, - SourceLanguageESSL = 1, - SourceLanguageGLSL = 2, - SourceLanguageOpenCL = 3, -}; - -enum ExecutionModel { - ExecutionModelVertex = 0, - ExecutionModelTessellationControl = 1, - ExecutionModelTessellationEvaluation = 2, - ExecutionModelGeometry = 3, - ExecutionModelFragment = 4, - ExecutionModelGLCompute = 5, - ExecutionModelKernel = 6, -}; - -enum AddressingModel { - AddressingModelLogical = 0, - AddressingModelPhysical32 = 1, - AddressingModelPhysical64 = 2, -}; - -enum MemoryModel { - MemoryModelSimple = 0, - MemoryModelGLSL450 = 1, - MemoryModelOpenCL12 = 2, - MemoryModelOpenCL20 = 3, - MemoryModelOpenCL21 = 4, -}; - -enum ExecutionMode { - ExecutionModeInvocations = 0, - ExecutionModeSpacingEqual = 1, - ExecutionModeSpacingFractionalEven = 2, - ExecutionModeSpacingFractionalOdd = 3, - ExecutionModeVertexOrderCw = 4, - ExecutionModeVertexOrderCcw = 5, - ExecutionModePixelCenterInteger = 6, - ExecutionModeOriginUpperLeft = 7, - ExecutionModeEarlyFragmentTests = 8, - ExecutionModePointMode = 9, - ExecutionModeXfb = 10, - ExecutionModeDepthReplacing = 11, - ExecutionModeDepthAny = 12, - ExecutionModeDepthGreater = 13, - ExecutionModeDepthLess = 14, - ExecutionModeDepthUnchanged = 15, - ExecutionModeLocalSize = 16, - ExecutionModeLocalSizeHint = 17, - ExecutionModeInputPoints = 18, - ExecutionModeInputLines = 19, - ExecutionModeInputLinesAdjacency = 20, - ExecutionModeInputTriangles = 21, - ExecutionModeInputTrianglesAdjacency = 22, - ExecutionModeInputQuads = 23, - ExecutionModeInputIsolines = 24, - ExecutionModeOutputVertices = 25, - ExecutionModeOutputPoints = 26, - ExecutionModeOutputLineStrip = 27, - ExecutionModeOutputTriangleStrip = 28, - ExecutionModeVecTypeHint = 29, - ExecutionModeContractionOff = 30, -}; - -enum StorageClass { - StorageClassUniformConstant = 0, - StorageClassInput = 1, - StorageClassUniform = 2, - StorageClassOutput = 3, - StorageClassWorkgroupLocal = 4, - StorageClassWorkgroupGlobal = 5, - StorageClassPrivateGlobal = 6, - StorageClassFunction = 7, - StorageClassGeneric = 8, - StorageClassPrivate = 9, - StorageClassAtomicCounter = 10, -}; - -enum Dim { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - DimCube = 3, - DimRect = 4, - DimBuffer = 5, -}; - -enum SamplerAddressingMode { - SamplerAddressingModeNone = 0, - SamplerAddressingModeClampToEdge = 1, - SamplerAddressingModeClamp = 2, - SamplerAddressingModeRepeat = 3, - SamplerAddressingModeRepeatMirrored = 4, -}; - -enum SamplerFilterMode { - SamplerFilterModeNearest = 0, - SamplerFilterModeLinear = 1, -}; - -enum FPFastMathModeShift { - FPFastMathModeNotNaNShift = 0, - FPFastMathModeNotInfShift = 1, - FPFastMathModeNSZShift = 2, - FPFastMathModeAllowRecipShift = 3, - FPFastMathModeFastShift = 4, -}; - -enum FPFastMathModeMask { - FPFastMathModeMaskNone = 0, - FPFastMathModeNotNaNMask = 0x00000001, - FPFastMathModeNotInfMask = 0x00000002, - FPFastMathModeNSZMask = 0x00000004, - FPFastMathModeAllowRecipMask = 0x00000008, - FPFastMathModeFastMask = 0x00000010, -}; - -enum FPRoundingMode { - FPRoundingModeRTE = 0, - FPRoundingModeRTZ = 1, - FPRoundingModeRTP = 2, - FPRoundingModeRTN = 3, -}; - -enum LinkageType { - LinkageTypeExport = 0, - LinkageTypeImport = 1, -}; - -enum AccessQualifier { - AccessQualifierReadOnly = 0, - AccessQualifierWriteOnly = 1, - AccessQualifierReadWrite = 2, -}; - -enum FunctionParameterAttribute { - FunctionParameterAttributeZext = 0, - FunctionParameterAttributeSext = 1, - FunctionParameterAttributeByVal = 2, - FunctionParameterAttributeSret = 3, - FunctionParameterAttributeNoAlias = 4, - FunctionParameterAttributeNoCapture = 5, - FunctionParameterAttributeSVM = 6, - FunctionParameterAttributeNoWrite = 7, - FunctionParameterAttributeNoReadWrite = 8, -}; - -enum Decoration { - DecorationPrecisionLow = 0, - DecorationPrecisionMedium = 1, - DecorationPrecisionHigh = 2, - DecorationBlock = 3, - DecorationBufferBlock = 4, - DecorationRowMajor = 5, - DecorationColMajor = 6, - DecorationGLSLShared = 7, - DecorationGLSLStd140 = 8, - DecorationGLSLStd430 = 9, - DecorationGLSLPacked = 10, - DecorationSmooth = 11, - DecorationNoperspective = 12, - DecorationFlat = 13, - DecorationPatch = 14, - DecorationCentroid = 15, - DecorationSample = 16, - DecorationInvariant = 17, - DecorationRestrict = 18, - DecorationAliased = 19, - DecorationVolatile = 20, - DecorationConstant = 21, - DecorationCoherent = 22, - DecorationNonwritable = 23, - DecorationNonreadable = 24, - DecorationUniform = 25, - DecorationNoStaticUse = 26, - DecorationCPacked = 27, - DecorationSaturatedConversion = 28, - DecorationStream = 29, - DecorationLocation = 30, - DecorationComponent = 31, - DecorationIndex = 32, - DecorationBinding = 33, - DecorationDescriptorSet = 34, - DecorationOffset = 35, - DecorationAlignment = 36, - DecorationXfbBuffer = 37, - DecorationStride = 38, - DecorationBuiltIn = 39, - DecorationFuncParamAttr = 40, - DecorationFPRoundingMode = 41, - DecorationFPFastMathMode = 42, - DecorationLinkageAttributes = 43, - DecorationSpecId = 44, -}; - -enum BuiltIn { - BuiltInPosition = 0, - BuiltInPointSize = 1, - BuiltInClipVertex = 2, - BuiltInClipDistance = 3, - BuiltInCullDistance = 4, - BuiltInVertexId = 5, - BuiltInInstanceId = 6, - BuiltInPrimitiveId = 7, - BuiltInInvocationId = 8, - BuiltInLayer = 9, - BuiltInViewportIndex = 10, - BuiltInTessLevelOuter = 11, - BuiltInTessLevelInner = 12, - BuiltInTessCoord = 13, - BuiltInPatchVertices = 14, - BuiltInFragCoord = 15, - BuiltInPointCoord = 16, - BuiltInFrontFacing = 17, - BuiltInSampleId = 18, - BuiltInSamplePosition = 19, - BuiltInSampleMask = 20, - BuiltInFragColor = 21, - BuiltInFragDepth = 22, - BuiltInHelperInvocation = 23, - BuiltInNumWorkgroups = 24, - BuiltInWorkgroupSize = 25, - BuiltInWorkgroupId = 26, - BuiltInLocalInvocationId = 27, - BuiltInGlobalInvocationId = 28, - BuiltInLocalInvocationIndex = 29, - BuiltInWorkDim = 30, - BuiltInGlobalSize = 31, - BuiltInEnqueuedWorkgroupSize = 32, - BuiltInGlobalOffset = 33, - BuiltInGlobalLinearId = 34, - BuiltInWorkgroupLinearId = 35, - BuiltInSubgroupSize = 36, - BuiltInSubgroupMaxSize = 37, - BuiltInNumSubgroups = 38, - BuiltInNumEnqueuedSubgroups = 39, - BuiltInSubgroupId = 40, - BuiltInSubgroupLocalInvocationId = 41, -}; - -enum SelectionControlShift { - SelectionControlFlattenShift = 0, - SelectionControlDontFlattenShift = 1, -}; - -enum SelectionControlMask { - SelectionControlMaskNone = 0, - SelectionControlFlattenMask = 0x00000001, - SelectionControlDontFlattenMask = 0x00000002, -}; - -enum LoopControlShift { - LoopControlUnrollShift = 0, - LoopControlDontUnrollShift = 1, -}; - -enum LoopControlMask { - LoopControlMaskNone = 0, - LoopControlUnrollMask = 0x00000001, - LoopControlDontUnrollMask = 0x00000002, -}; - -enum FunctionControlShift { - FunctionControlInlineShift = 0, - FunctionControlDontInlineShift = 1, - FunctionControlPureShift = 2, - FunctionControlConstShift = 3, -}; - -enum FunctionControlMask { - FunctionControlMaskNone = 0, - FunctionControlInlineMask = 0x00000001, - FunctionControlDontInlineMask = 0x00000002, - FunctionControlPureMask = 0x00000004, - FunctionControlConstMask = 0x00000008, -}; - -enum MemorySemanticsShift { - MemorySemanticsRelaxedShift = 0, - MemorySemanticsSequentiallyConsistentShift = 1, - MemorySemanticsAcquireShift = 2, - MemorySemanticsReleaseShift = 3, - MemorySemanticsUniformMemoryShift = 4, - MemorySemanticsSubgroupMemoryShift = 5, - MemorySemanticsWorkgroupLocalMemoryShift = 6, - MemorySemanticsWorkgroupGlobalMemoryShift = 7, - MemorySemanticsAtomicCounterMemoryShift = 8, - MemorySemanticsImageMemoryShift = 9, -}; - -enum MemorySemanticsMask { - MemorySemanticsMaskNone = 0, - MemorySemanticsRelaxedMask = 0x00000001, - MemorySemanticsSequentiallyConsistentMask = 0x00000002, - MemorySemanticsAcquireMask = 0x00000004, - MemorySemanticsReleaseMask = 0x00000008, - MemorySemanticsUniformMemoryMask = 0x00000010, - MemorySemanticsSubgroupMemoryMask = 0x00000020, - MemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, - MemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, - MemorySemanticsAtomicCounterMemoryMask = 0x00000100, - MemorySemanticsImageMemoryMask = 0x00000200, -}; - -enum MemoryAccessShift { - MemoryAccessVolatileShift = 0, - MemoryAccessAlignedShift = 1, -}; - -enum MemoryAccessMask { - MemoryAccessMaskNone = 0, - MemoryAccessVolatileMask = 0x00000001, - MemoryAccessAlignedMask = 0x00000002, -}; - -enum ExecutionScope { - ExecutionScopeCrossDevice = 0, - ExecutionScopeDevice = 1, - ExecutionScopeWorkgroup = 2, - ExecutionScopeSubgroup = 3, -}; - -enum GroupOperation { - GroupOperationReduce = 0, - GroupOperationInclusiveScan = 1, - GroupOperationExclusiveScan = 2, -}; - -enum KernelEnqueueFlags { - KernelEnqueueFlagsNoWait = 0, - KernelEnqueueFlagsWaitKernel = 1, - KernelEnqueueFlagsWaitWorkGroup = 2, -}; - -enum KernelProfilingInfoShift { - KernelProfilingInfoCmdExecTimeShift = 0, -}; - -enum KernelProfilingInfoMask { - KernelProfilingInfoMaskNone = 0, - KernelProfilingInfoCmdExecTimeMask = 0x00000001, -}; - -enum Op { - OpNop = 0, - OpSource = 1, - OpSourceExtension = 2, - OpExtension = 3, - OpExtInstImport = 4, - OpMemoryModel = 5, - OpEntryPoint = 6, - OpExecutionMode = 7, - OpTypeVoid = 8, - OpTypeBool = 9, - OpTypeInt = 10, - OpTypeFloat = 11, - OpTypeVector = 12, - OpTypeMatrix = 13, - OpTypeSampler = 14, - OpTypeFilter = 15, - OpTypeArray = 16, - OpTypeRuntimeArray = 17, - OpTypeStruct = 18, - OpTypeOpaque = 19, - OpTypePointer = 20, - OpTypeFunction = 21, - OpTypeEvent = 22, - OpTypeDeviceEvent = 23, - OpTypeReserveId = 24, - OpTypeQueue = 25, - OpTypePipe = 26, - OpConstantTrue = 27, - OpConstantFalse = 28, - OpConstant = 29, - OpConstantComposite = 30, - OpConstantSampler = 31, - OpConstantNullPointer = 32, - OpConstantNullObject = 33, - OpSpecConstantTrue = 34, - OpSpecConstantFalse = 35, - OpSpecConstant = 36, - OpSpecConstantComposite = 37, - OpVariable = 38, - OpVariableArray = 39, - OpFunction = 40, - OpFunctionParameter = 41, - OpFunctionEnd = 42, - OpFunctionCall = 43, - OpExtInst = 44, - OpUndef = 45, - OpLoad = 46, - OpStore = 47, - OpPhi = 48, - OpDecorationGroup = 49, - OpDecorate = 50, - OpMemberDecorate = 51, - OpGroupDecorate = 52, - OpGroupMemberDecorate = 53, - OpName = 54, - OpMemberName = 55, - OpString = 56, - OpLine = 57, - OpVectorExtractDynamic = 58, - OpVectorInsertDynamic = 59, - OpVectorShuffle = 60, - OpCompositeConstruct = 61, - OpCompositeExtract = 62, - OpCompositeInsert = 63, - OpCopyObject = 64, - OpCopyMemory = 65, - OpCopyMemorySized = 66, - OpSampler = 67, - OpTextureSample = 68, - OpTextureSampleDref = 69, - OpTextureSampleLod = 70, - OpTextureSampleProj = 71, - OpTextureSampleGrad = 72, - OpTextureSampleOffset = 73, - OpTextureSampleProjLod = 74, - OpTextureSampleProjGrad = 75, - OpTextureSampleLodOffset = 76, - OpTextureSampleProjOffset = 77, - OpTextureSampleGradOffset = 78, - OpTextureSampleProjLodOffset = 79, - OpTextureSampleProjGradOffset = 80, - OpTextureFetchTexelLod = 81, - OpTextureFetchTexelOffset = 82, - OpTextureFetchSample = 83, - OpTextureFetchTexel = 84, - OpTextureGather = 85, - OpTextureGatherOffset = 86, - OpTextureGatherOffsets = 87, - OpTextureQuerySizeLod = 88, - OpTextureQuerySize = 89, - OpTextureQueryLod = 90, - OpTextureQueryLevels = 91, - OpTextureQuerySamples = 92, - OpAccessChain = 93, - OpInBoundsAccessChain = 94, - OpSNegate = 95, - OpFNegate = 96, - OpNot = 97, - OpAny = 98, - OpAll = 99, - OpConvertFToU = 100, - OpConvertFToS = 101, - OpConvertSToF = 102, - OpConvertUToF = 103, - OpUConvert = 104, - OpSConvert = 105, - OpFConvert = 106, - OpConvertPtrToU = 107, - OpConvertUToPtr = 108, - OpPtrCastToGeneric = 109, - OpGenericCastToPtr = 110, - OpBitcast = 111, - OpTranspose = 112, - OpIsNan = 113, - OpIsInf = 114, - OpIsFinite = 115, - OpIsNormal = 116, - OpSignBitSet = 117, - OpLessOrGreater = 118, - OpOrdered = 119, - OpUnordered = 120, - OpArrayLength = 121, - OpIAdd = 122, - OpFAdd = 123, - OpISub = 124, - OpFSub = 125, - OpIMul = 126, - OpFMul = 127, - OpUDiv = 128, - OpSDiv = 129, - OpFDiv = 130, - OpUMod = 131, - OpSRem = 132, - OpSMod = 133, - OpFRem = 134, - OpFMod = 135, - OpVectorTimesScalar = 136, - OpMatrixTimesScalar = 137, - OpVectorTimesMatrix = 138, - OpMatrixTimesVector = 139, - OpMatrixTimesMatrix = 140, - OpOuterProduct = 141, - OpDot = 142, - OpShiftRightLogical = 143, - OpShiftRightArithmetic = 144, - OpShiftLeftLogical = 145, - OpLogicalOr = 146, - OpLogicalXor = 147, - OpLogicalAnd = 148, - OpBitwiseOr = 149, - OpBitwiseXor = 150, - OpBitwiseAnd = 151, - OpSelect = 152, - OpIEqual = 153, - OpFOrdEqual = 154, - OpFUnordEqual = 155, - OpINotEqual = 156, - OpFOrdNotEqual = 157, - OpFUnordNotEqual = 158, - OpULessThan = 159, - OpSLessThan = 160, - OpFOrdLessThan = 161, - OpFUnordLessThan = 162, - OpUGreaterThan = 163, - OpSGreaterThan = 164, - OpFOrdGreaterThan = 165, - OpFUnordGreaterThan = 166, - OpULessThanEqual = 167, - OpSLessThanEqual = 168, - OpFOrdLessThanEqual = 169, - OpFUnordLessThanEqual = 170, - OpUGreaterThanEqual = 171, - OpSGreaterThanEqual = 172, - OpFOrdGreaterThanEqual = 173, - OpFUnordGreaterThanEqual = 174, - OpDPdx = 175, - OpDPdy = 176, - OpFwidth = 177, - OpDPdxFine = 178, - OpDPdyFine = 179, - OpFwidthFine = 180, - OpDPdxCoarse = 181, - OpDPdyCoarse = 182, - OpFwidthCoarse = 183, - OpEmitVertex = 184, - OpEndPrimitive = 185, - OpEmitStreamVertex = 186, - OpEndStreamPrimitive = 187, - OpControlBarrier = 188, - OpMemoryBarrier = 189, - OpImagePointer = 190, - OpAtomicInit = 191, - OpAtomicLoad = 192, - OpAtomicStore = 193, - OpAtomicExchange = 194, - OpAtomicCompareExchange = 195, - OpAtomicCompareExchangeWeak = 196, - OpAtomicIIncrement = 197, - OpAtomicIDecrement = 198, - OpAtomicIAdd = 199, - OpAtomicISub = 200, - OpAtomicUMin = 201, - OpAtomicUMax = 202, - OpAtomicAnd = 203, - OpAtomicOr = 204, - OpAtomicXor = 205, - OpLoopMerge = 206, - OpSelectionMerge = 207, - OpLabel = 208, - OpBranch = 209, - OpBranchConditional = 210, - OpSwitch = 211, - OpKill = 212, - OpReturn = 213, - OpReturnValue = 214, - OpUnreachable = 215, - OpLifetimeStart = 216, - OpLifetimeStop = 217, - OpCompileFlag = 218, - OpAsyncGroupCopy = 219, - OpWaitGroupEvents = 220, - OpGroupAll = 221, - OpGroupAny = 222, - OpGroupBroadcast = 223, - OpGroupIAdd = 224, - OpGroupFAdd = 225, - OpGroupFMin = 226, - OpGroupUMin = 227, - OpGroupSMin = 228, - OpGroupFMax = 229, - OpGroupUMax = 230, - OpGroupSMax = 231, - OpGenericCastToPtrExplicit = 232, - OpGenericPtrMemSemantics = 233, - OpReadPipe = 234, - OpWritePipe = 235, - OpReservedReadPipe = 236, - OpReservedWritePipe = 237, - OpReserveReadPipePackets = 238, - OpReserveWritePipePackets = 239, - OpCommitReadPipe = 240, - OpCommitWritePipe = 241, - OpIsValidReserveId = 242, - OpGetNumPipePackets = 243, - OpGetMaxPipePackets = 244, - OpGroupReserveReadPipePackets = 245, - OpGroupReserveWritePipePackets = 246, - OpGroupCommitReadPipe = 247, - OpGroupCommitWritePipe = 248, - OpEnqueueMarker = 249, - OpEnqueueKernel = 250, - OpGetKernelNDrangeSubGroupCount = 251, - OpGetKernelNDrangeMaxSubGroupSize = 252, - OpGetKernelWorkGroupSize = 253, - OpGetKernelPreferredWorkGroupSizeMultiple = 254, - OpRetainEvent = 255, - OpReleaseEvent = 256, - OpCreateUserEvent = 257, - OpIsValidEvent = 258, - OpSetUserEventStatus = 259, - OpCaptureEventProfilingInfo = 260, - OpGetDefaultQueue = 261, - OpBuildNDRange = 262, - OpSatConvertSToU = 263, - OpSatConvertUToS = 264, - OpAtomicIMin = 265, - OpAtomicIMax = 266, -}; - -}; // end namespace spv - -#endif // #ifdef __cplusplus - - -#ifndef __cplusplus - -static const int SpvMagicNumber = 0x07230203; -static const int SpvVersion = 99; - typedef unsigned int SpvId; -static const unsigned int SpvOpCodeMask = 0xFFFF; +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 99; +static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; typedef enum SpvSourceLanguage_ { @@ -711,9 +82,7 @@ typedef enum SpvAddressingModel_ { typedef enum SpvMemoryModel_ { SpvMemoryModelSimple = 0, SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL12 = 2, - SpvMemoryModelOpenCL20 = 3, - SpvMemoryModelOpenCL21 = 4, + SpvMemoryModelOpenCL = 2, } SpvMemoryModel; typedef enum SpvExecutionMode_ { @@ -725,29 +94,30 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeVertexOrderCcw = 5, SpvExecutionModePixelCenterInteger = 6, SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeEarlyFragmentTests = 8, - SpvExecutionModePointMode = 9, - SpvExecutionModeXfb = 10, - SpvExecutionModeDepthReplacing = 11, - SpvExecutionModeDepthAny = 12, - SpvExecutionModeDepthGreater = 13, - SpvExecutionModeDepthLess = 14, - SpvExecutionModeDepthUnchanged = 15, - SpvExecutionModeLocalSize = 16, - SpvExecutionModeLocalSizeHint = 17, - SpvExecutionModeInputPoints = 18, - SpvExecutionModeInputLines = 19, - SpvExecutionModeInputLinesAdjacency = 20, - SpvExecutionModeInputTriangles = 21, - SpvExecutionModeInputTrianglesAdjacency = 22, - SpvExecutionModeInputQuads = 23, - SpvExecutionModeInputIsolines = 24, - SpvExecutionModeOutputVertices = 25, - SpvExecutionModeOutputPoints = 26, - SpvExecutionModeOutputLineStrip = 27, - SpvExecutionModeOutputTriangleStrip = 28, - SpvExecutionModeVecTypeHint = 29, - SpvExecutionModeContractionOff = 30, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthAny = 13, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeInputTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeInputQuads = 24, + SpvExecutionModeInputIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, } SpvExecutionMode; typedef enum SpvStorageClass_ { @@ -760,8 +130,9 @@ typedef enum SpvStorageClass_ { SpvStorageClassPrivateGlobal = 6, SpvStorageClassFunction = 7, SpvStorageClassGeneric = 8, - SpvStorageClassPrivate = 9, + SpvStorageClassPushConstant = 9, SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, } SpvStorageClass; typedef enum SpvDim_ { @@ -786,6 +157,109 @@ typedef enum SpvSamplerFilterMode_ { SpvSamplerFilterModeLinear = 1, } SpvSamplerFilterMode; +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsOffsetShift = 3, + SpvImageOperandsOffsetsShift = 4, + SpvImageOperandsSampleShift = 5, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsOffsetMask = 0x00000008, + SpvImageOperandsOffsetsMask = 0x00000010, + SpvImageOperandsSampleMask = 0x00000020, +} SpvImageOperandsMask; + typedef enum SpvFPFastMathModeShift_ { SpvFPFastMathModeNotNaNShift = 0, SpvFPFastMathModeNotInfShift = 1, @@ -834,34 +308,34 @@ typedef enum SpvFunctionParameterAttribute_ { } SpvFunctionParameterAttribute; typedef enum SpvDecoration_ { - SpvDecorationPrecisionLow = 0, - SpvDecorationPrecisionMedium = 1, - SpvDecorationPrecisionHigh = 2, - SpvDecorationBlock = 3, - SpvDecorationBufferBlock = 4, - SpvDecorationRowMajor = 5, - SpvDecorationColMajor = 6, - SpvDecorationGLSLShared = 7, - SpvDecorationGLSLStd140 = 8, - SpvDecorationGLSLStd430 = 9, - SpvDecorationGLSLPacked = 10, - SpvDecorationSmooth = 11, - SpvDecorationNoperspective = 12, - SpvDecorationFlat = 13, - SpvDecorationPatch = 14, - SpvDecorationCentroid = 15, - SpvDecorationSample = 16, - SpvDecorationInvariant = 17, - SpvDecorationRestrict = 18, - SpvDecorationAliased = 19, - SpvDecorationVolatile = 20, - SpvDecorationConstant = 21, - SpvDecorationCoherent = 22, - SpvDecorationNonwritable = 23, - SpvDecorationNonreadable = 24, - SpvDecorationUniform = 25, - SpvDecorationNoStaticUse = 26, - SpvDecorationCPacked = 27, + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationSmooth = 12, + SpvDecorationNoperspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonwritable = 24, + SpvDecorationNonreadable = 25, + SpvDecorationUniform = 26, + SpvDecorationNoStaticUse = 27, SpvDecorationSaturatedConversion = 28, SpvDecorationStream = 29, SpvDecorationLocation = 30, @@ -870,15 +344,12 @@ typedef enum SpvDecoration_ { SpvDecorationBinding = 33, SpvDecorationDescriptorSet = 34, SpvDecorationOffset = 35, - SpvDecorationAlignment = 36, - SpvDecorationXfbBuffer = 37, - SpvDecorationStride = 38, - SpvDecorationBuiltIn = 39, - SpvDecorationFuncParamAttr = 40, - SpvDecorationFPRoundingMode = 41, - SpvDecorationFPFastMathMode = 42, - SpvDecorationLinkageAttributes = 43, - SpvDecorationSpecId = 44, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, } SpvDecoration; typedef enum SpvBuiltIn_ { @@ -1001,12 +472,13 @@ typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessAlignedMask = 0x00000002, } SpvMemoryAccessMask; -typedef enum SpvExecutionScope_ { - SpvExecutionScopeCrossDevice = 0, - SpvExecutionScopeDevice = 1, - SpvExecutionScopeWorkgroup = 2, - SpvExecutionScopeSubgroup = 3, -} SpvExecutionScope; +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; typedef enum SpvGroupOperation_ { SpvGroupOperationReduce = 0, @@ -1029,276 +501,308 @@ typedef enum SpvKernelProfilingInfoMask_ { SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, } SpvKernelProfilingInfoMask; +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityImageSRGBWrite = 16, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, +} SpvCapability; + typedef enum SpvOp_ { SpvOpNop = 0, - SpvOpSource = 1, - SpvOpSourceExtension = 2, - SpvOpExtension = 3, - SpvOpExtInstImport = 4, - SpvOpMemoryModel = 5, - SpvOpEntryPoint = 6, - SpvOpExecutionMode = 7, - SpvOpTypeVoid = 8, - SpvOpTypeBool = 9, - SpvOpTypeInt = 10, - SpvOpTypeFloat = 11, - SpvOpTypeVector = 12, - SpvOpTypeMatrix = 13, - SpvOpTypeSampler = 14, - SpvOpTypeFilter = 15, - SpvOpTypeArray = 16, - SpvOpTypeRuntimeArray = 17, - SpvOpTypeStruct = 18, - SpvOpTypeOpaque = 19, - SpvOpTypePointer = 20, - SpvOpTypeFunction = 21, - SpvOpTypeEvent = 22, - SpvOpTypeDeviceEvent = 23, - SpvOpTypeReserveId = 24, - SpvOpTypeQueue = 25, - SpvOpTypePipe = 26, - SpvOpConstantTrue = 27, - SpvOpConstantFalse = 28, - SpvOpConstant = 29, - SpvOpConstantComposite = 30, - SpvOpConstantSampler = 31, - SpvOpConstantNullPointer = 32, - SpvOpConstantNullObject = 33, - SpvOpSpecConstantTrue = 34, - SpvOpSpecConstantFalse = 35, - SpvOpSpecConstant = 36, - SpvOpSpecConstantComposite = 37, - SpvOpVariable = 38, - SpvOpVariableArray = 39, - SpvOpFunction = 40, - SpvOpFunctionParameter = 41, - SpvOpFunctionEnd = 42, - SpvOpFunctionCall = 43, - SpvOpExtInst = 44, - SpvOpUndef = 45, - SpvOpLoad = 46, - SpvOpStore = 47, - SpvOpPhi = 48, - SpvOpDecorationGroup = 49, - SpvOpDecorate = 50, - SpvOpMemberDecorate = 51, - SpvOpGroupDecorate = 52, - SpvOpGroupMemberDecorate = 53, - SpvOpName = 54, - SpvOpMemberName = 55, - SpvOpString = 56, - SpvOpLine = 57, - SpvOpVectorExtractDynamic = 58, - SpvOpVectorInsertDynamic = 59, - SpvOpVectorShuffle = 60, - SpvOpCompositeConstruct = 61, - SpvOpCompositeExtract = 62, - SpvOpCompositeInsert = 63, - SpvOpCopyObject = 64, - SpvOpCopyMemory = 65, - SpvOpCopyMemorySized = 66, - SpvOpSampler = 67, - SpvOpTextureSample = 68, - SpvOpTextureSampleDref = 69, - SpvOpTextureSampleLod = 70, - SpvOpTextureSampleProj = 71, - SpvOpTextureSampleGrad = 72, - SpvOpTextureSampleOffset = 73, - SpvOpTextureSampleProjLod = 74, - SpvOpTextureSampleProjGrad = 75, - SpvOpTextureSampleLodOffset = 76, - SpvOpTextureSampleProjOffset = 77, - SpvOpTextureSampleGradOffset = 78, - SpvOpTextureSampleProjLodOffset = 79, - SpvOpTextureSampleProjGradOffset = 80, - SpvOpTextureFetchTexelLod = 81, - SpvOpTextureFetchTexelOffset = 82, - SpvOpTextureFetchSample = 83, - SpvOpTextureFetchTexel = 84, - SpvOpTextureGather = 85, - SpvOpTextureGatherOffset = 86, - SpvOpTextureGatherOffsets = 87, - SpvOpTextureQuerySizeLod = 88, - SpvOpTextureQuerySize = 89, - SpvOpTextureQueryLod = 90, - SpvOpTextureQueryLevels = 91, - SpvOpTextureQuerySamples = 92, - SpvOpAccessChain = 93, - SpvOpInBoundsAccessChain = 94, - SpvOpSNegate = 95, - SpvOpFNegate = 96, - SpvOpNot = 97, - SpvOpAny = 98, - SpvOpAll = 99, - SpvOpConvertFToU = 100, - SpvOpConvertFToS = 101, - SpvOpConvertSToF = 102, - SpvOpConvertUToF = 103, - SpvOpUConvert = 104, - SpvOpSConvert = 105, - SpvOpFConvert = 106, - SpvOpConvertPtrToU = 107, - SpvOpConvertUToPtr = 108, - SpvOpPtrCastToGeneric = 109, - SpvOpGenericCastToPtr = 110, - SpvOpBitcast = 111, - SpvOpTranspose = 112, - SpvOpIsNan = 113, - SpvOpIsInf = 114, - SpvOpIsFinite = 115, - SpvOpIsNormal = 116, - SpvOpSignBitSet = 117, - SpvOpLessOrGreater = 118, - SpvOpOrdered = 119, - SpvOpUnordered = 120, - SpvOpArrayLength = 121, - SpvOpIAdd = 122, - SpvOpFAdd = 123, - SpvOpISub = 124, - SpvOpFSub = 125, - SpvOpIMul = 126, - SpvOpFMul = 127, - SpvOpUDiv = 128, - SpvOpSDiv = 129, - SpvOpFDiv = 130, - SpvOpUMod = 131, - SpvOpSRem = 132, - SpvOpSMod = 133, - SpvOpFRem = 134, - SpvOpFMod = 135, - SpvOpVectorTimesScalar = 136, - SpvOpMatrixTimesScalar = 137, - SpvOpVectorTimesMatrix = 138, - SpvOpMatrixTimesVector = 139, - SpvOpMatrixTimesMatrix = 140, - SpvOpOuterProduct = 141, - SpvOpDot = 142, - SpvOpShiftRightLogical = 143, - SpvOpShiftRightArithmetic = 144, - SpvOpShiftLeftLogical = 145, - SpvOpLogicalOr = 146, - SpvOpLogicalXor = 147, - SpvOpLogicalAnd = 148, - SpvOpBitwiseOr = 149, - SpvOpBitwiseXor = 150, - SpvOpBitwiseAnd = 151, - SpvOpSelect = 152, - SpvOpIEqual = 153, - SpvOpFOrdEqual = 154, - SpvOpFUnordEqual = 155, - SpvOpINotEqual = 156, - SpvOpFOrdNotEqual = 157, - SpvOpFUnordNotEqual = 158, - SpvOpULessThan = 159, - SpvOpSLessThan = 160, - SpvOpFOrdLessThan = 161, - SpvOpFUnordLessThan = 162, - SpvOpUGreaterThan = 163, - SpvOpSGreaterThan = 164, - SpvOpFOrdGreaterThan = 165, - SpvOpFUnordGreaterThan = 166, - SpvOpULessThanEqual = 167, - SpvOpSLessThanEqual = 168, - SpvOpFOrdLessThanEqual = 169, - SpvOpFUnordLessThanEqual = 170, - SpvOpUGreaterThanEqual = 171, - SpvOpSGreaterThanEqual = 172, - SpvOpFOrdGreaterThanEqual = 173, - SpvOpFUnordGreaterThanEqual = 174, - SpvOpDPdx = 175, - SpvOpDPdy = 176, - SpvOpFwidth = 177, - SpvOpDPdxFine = 178, - SpvOpDPdyFine = 179, - SpvOpFwidthFine = 180, - SpvOpDPdxCoarse = 181, - SpvOpDPdyCoarse = 182, - SpvOpFwidthCoarse = 183, - SpvOpEmitVertex = 184, - SpvOpEndPrimitive = 185, - SpvOpEmitStreamVertex = 186, - SpvOpEndStreamPrimitive = 187, - SpvOpControlBarrier = 188, - SpvOpMemoryBarrier = 189, - SpvOpImagePointer = 190, - SpvOpAtomicInit = 191, - SpvOpAtomicLoad = 192, - SpvOpAtomicStore = 193, - SpvOpAtomicExchange = 194, - SpvOpAtomicCompareExchange = 195, - SpvOpAtomicCompareExchangeWeak = 196, - SpvOpAtomicIIncrement = 197, - SpvOpAtomicIDecrement = 198, - SpvOpAtomicIAdd = 199, - SpvOpAtomicISub = 200, - SpvOpAtomicUMin = 201, - SpvOpAtomicUMax = 202, - SpvOpAtomicAnd = 203, - SpvOpAtomicOr = 204, - SpvOpAtomicXor = 205, - SpvOpLoopMerge = 206, - SpvOpSelectionMerge = 207, - SpvOpLabel = 208, - SpvOpBranch = 209, - SpvOpBranchConditional = 210, - SpvOpSwitch = 211, - SpvOpKill = 212, - SpvOpReturn = 213, - SpvOpReturnValue = 214, - SpvOpUnreachable = 215, - SpvOpLifetimeStart = 216, - SpvOpLifetimeStop = 217, - SpvOpCompileFlag = 218, - SpvOpAsyncGroupCopy = 219, - SpvOpWaitGroupEvents = 220, - SpvOpGroupAll = 221, - SpvOpGroupAny = 222, - SpvOpGroupBroadcast = 223, - SpvOpGroupIAdd = 224, - SpvOpGroupFAdd = 225, - SpvOpGroupFMin = 226, - SpvOpGroupUMin = 227, - SpvOpGroupSMin = 228, - SpvOpGroupFMax = 229, - SpvOpGroupUMax = 230, - SpvOpGroupSMax = 231, - SpvOpGenericCastToPtrExplicit = 232, - SpvOpGenericPtrMemSemantics = 233, - SpvOpReadPipe = 234, - SpvOpWritePipe = 235, - SpvOpReservedReadPipe = 236, - SpvOpReservedWritePipe = 237, - SpvOpReserveReadPipePackets = 238, - SpvOpReserveWritePipePackets = 239, - SpvOpCommitReadPipe = 240, - SpvOpCommitWritePipe = 241, - SpvOpIsValidReserveId = 242, - SpvOpGetNumPipePackets = 243, - SpvOpGetMaxPipePackets = 244, - SpvOpGroupReserveReadPipePackets = 245, - SpvOpGroupReserveWritePipePackets = 246, - SpvOpGroupCommitReadPipe = 247, - SpvOpGroupCommitWritePipe = 248, - SpvOpEnqueueMarker = 249, - SpvOpEnqueueKernel = 250, - SpvOpGetKernelNDrangeSubGroupCount = 251, - SpvOpGetKernelNDrangeMaxSubGroupSize = 252, - SpvOpGetKernelWorkGroupSize = 253, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 254, - SpvOpRetainEvent = 255, - SpvOpReleaseEvent = 256, - SpvOpCreateUserEvent = 257, - SpvOpIsValidEvent = 258, - SpvOpSetUserEventStatus = 259, - SpvOpCaptureEventProfilingInfo = 260, - SpvOpGetDefaultQueue = 261, - SpvOpBuildNDRange = 262, - SpvOpSatConvertSToU = 263, - SpvOpSatConvertUToS = 264, - SpvOpAtomicIMin = 265, - SpvOpAtomicIMax = 266, + SpvOpUndef = 1, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImageQueryDim = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpIMulExtended = 151, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicIMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicIMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpAsyncGroupCopy = 259, + SpvOpWaitGroupEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, } SpvOp; -#endif // #ifndef __cplusplus - #endif // #ifndef spirv_H + diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 6819f88833a..3b9253d2aef 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -334,10 +334,8 @@ struct_member_decoration_cb(struct vtn_builder *b, return; switch (dec->decoration) { - case SpvDecorationPrecisionLow: - case SpvDecorationPrecisionMedium: - case SpvDecorationPrecisionHigh: - break; /* FIXME: Do nothing with these for now. */ + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ case SpvDecorationSmooth: ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; break; @@ -362,11 +360,32 @@ struct_member_decoration_cb(struct vtn_builder *b, ctx->type->members[member]->is_builtin = true; ctx->type->members[member]->builtin = dec->literals[0]; break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; default: unreachable("Unhandled member decoration"); } } +static void +array_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + assert(member == -1); + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + + default: + unreachable("Unhandled array type decoration"); + } +} + static void vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -421,12 +440,14 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = glsl_array_type(array_element->type, w[3]); val->type->array_element = array_element; val->type->stride = 0; + vtn_foreach_decoration(b, val, array_decoration_cb, NULL); return; } case SpvOpTypeStruct: { unsigned num_fields = count - 2; val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); NIR_VLA(struct glsl_struct_field, fields, count); for (unsigned i = 0; i < num_fields; i++) { @@ -479,7 +500,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type = vtn_value(b, w[3], vtn_value_type_type)->type; return; - case SpvOpTypeSampler: { + case SpvOpTypeImage: { const struct glsl_type *sampled_type = vtn_value(b, w[2], vtn_value_type_type)->type->type; @@ -497,19 +518,21 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, unreachable("Invalid SPIR-V Sampler dimension"); } - /* TODO: Handle the various texture image/filter options */ - (void)w[4]; - + bool is_shadow = w[4]; bool is_array = w[5]; - bool is_shadow = w[6]; - assert(w[7] == 0 && "FIXME: Handl multi-sampled textures"); + assert(w[6] == 0 && "FIXME: Handl multi-sampled textures"); + assert(w[7] == 1 && "FIXME: Add support for non-sampled images"); val->type->type = glsl_sampler_type(dim, is_shadow, is_array, glsl_get_base_type(sampled_type)); return; } + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + case SpvOpTypeRuntimeArray: case SpvOpTypeOpaque: case SpvOpTypeEvent: @@ -693,10 +716,8 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, nir_variable *var = void_var; switch (dec->decoration) { - case SpvDecorationPrecisionLow: - case SpvDecorationPrecisionMedium: - case SpvDecorationPrecisionHigh: - break; /* FIXME: Do nothing with these for now. */ + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ case SpvDecorationSmooth: var->data.interpolation = INTERP_QUALIFIER_SMOOTH; break; @@ -758,9 +779,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationRowMajor: case SpvDecorationColMajor: case SpvDecorationGLSLShared: - case SpvDecorationGLSLStd140: - case SpvDecorationGLSLStd430: - case SpvDecorationGLSLPacked: case SpvDecorationPatch: case SpvDecorationRestrict: case SpvDecorationAliased: @@ -773,9 +791,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationSaturatedConversion: case SpvDecorationStream: case SpvDecorationOffset: - case SpvDecorationAlignment: case SpvDecorationXfbBuffer: - case SpvDecorationStride: case SpvDecorationFuncParamAttr: case SpvDecorationFPRoundingMode: case SpvDecorationFPFastMathMode: @@ -1118,7 +1134,6 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassWorkgroupLocal: case SpvStorageClassWorkgroupGlobal: case SpvStorageClassGeneric: - case SpvStorageClassPrivate: case SpvStorageClassAtomicCounter: default: unreachable("Unhandled variable storage class"); @@ -1270,10 +1285,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; } - case SpvOpVariableArray: case SpvOpCopyMemorySized: case SpvOpArrayLength: - case SpvOpImagePointer: + case SpvOpImageTexelPointer: default: unreachable("Unhandled opcode"); } @@ -1342,31 +1356,24 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_tex_src srcs[8]; /* 8 should be enough */ nir_tex_src *p = srcs; + unsigned idx = 4; + unsigned coord_components = 0; switch (opcode) { - case SpvOpTextureSample: - case SpvOpTextureSampleDref: - case SpvOpTextureSampleLod: - case SpvOpTextureSampleProj: - case SpvOpTextureSampleGrad: - case SpvOpTextureSampleOffset: - case SpvOpTextureSampleProjLod: - case SpvOpTextureSampleProjGrad: - case SpvOpTextureSampleLodOffset: - case SpvOpTextureSampleProjOffset: - case SpvOpTextureSampleGradOffset: - case SpvOpTextureSampleProjLodOffset: - case SpvOpTextureSampleProjGradOffset: - case SpvOpTextureFetchTexelLod: - case SpvOpTextureFetchTexelOffset: - case SpvOpTextureFetchSample: - case SpvOpTextureFetchTexel: - case SpvOpTextureGather: - case SpvOpTextureGatherOffset: - case SpvOpTextureGatherOffsets: - case SpvOpTextureQueryLod: { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { /* All these types have the coordinate as their first real argument */ - struct vtn_ssa_value *coord = vtn_ssa_value(b, w[4]); + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); coord_components = glsl_get_vector_elements(coord->type); p->src = nir_src_for_ssa(coord->def); p->src_type = nir_tex_src_coord; @@ -1380,43 +1387,36 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_texop texop; switch (opcode) { - case SpvOpTextureSample: + case SpvOpImageSampleImplicitLod: texop = nir_texop_tex; - - if (count == 6) { - texop = nir_texop_txb; - *p++ = vtn_tex_src(b, w[5], nir_tex_src_bias); - } break; - case SpvOpTextureSampleDref: - case SpvOpTextureSampleLod: - case SpvOpTextureSampleProj: - case SpvOpTextureSampleGrad: - case SpvOpTextureSampleOffset: - case SpvOpTextureSampleProjLod: - case SpvOpTextureSampleProjGrad: - case SpvOpTextureSampleLodOffset: - case SpvOpTextureSampleProjOffset: - case SpvOpTextureSampleGradOffset: - case SpvOpTextureSampleProjLodOffset: - case SpvOpTextureSampleProjGradOffset: - case SpvOpTextureFetchTexelLod: - case SpvOpTextureFetchTexelOffset: - case SpvOpTextureFetchSample: - case SpvOpTextureFetchTexel: - case SpvOpTextureGather: - case SpvOpTextureGatherOffset: - case SpvOpTextureGatherOffsets: - case SpvOpTextureQuerySizeLod: - case SpvOpTextureQuerySize: - case SpvOpTextureQueryLod: - case SpvOpTextureQueryLevels: - case SpvOpTextureQuerySamples: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: default: unreachable("Unhandled opcode"); } + /* From now on, the remaining sources are "Optional Image Operands." */ + if (idx < count) { + /* XXX handle these (bias, lod, etc.) */ + assert(0); + } + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); const struct glsl_type *sampler_type = nir_deref_tail(&sampler->deref)->type; @@ -1742,7 +1742,8 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; case SpvOpShiftLeftLogical: op = nir_op_ishl; break; case SpvOpLogicalOr: op = nir_op_ior; break; - case SpvOpLogicalXor: op = nir_op_ixor; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; case SpvOpLogicalAnd: op = nir_op_iand; break; case SpvOpBitwiseOr: op = nir_op_ior; break; case SpvOpBitwiseXor: op = nir_op_ixor; break; @@ -2200,11 +2201,19 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpSource: case SpvOpSourceExtension: - case SpvOpCompileFlag: case SpvOpExtension: /* Unhandled, but these are for debug so that's ok. */ break; + case SpvOpCapability: + /* + * TODO properly handle these and give a real error if asking for too + * much. + */ + assert(w[1] == SpvCapabilityMatrix || + w[1] == SpvCapabilityShader); + break; + case SpvOpExtInstImport: vtn_handle_extension(b, opcode, w, count); break; @@ -2221,7 +2230,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpExecutionMode: - unreachable("Execution modes not yet implemented"); + /* + * TODO handle these - for Vulkan OriginUpperLeft is always set for + * fragment shaders, so we can ignore this for now + */ break; case SpvOpString: @@ -2254,7 +2266,9 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeFloat: case SpvOpTypeVector: case SpvOpTypeMatrix: + case SpvOpTypeImage: case SpvOpTypeSampler: + case SpvOpTypeSampledImage: case SpvOpTypeArray: case SpvOpTypeRuntimeArray: case SpvOpTypeStruct: @@ -2274,8 +2288,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpConstant: case SpvOpConstantComposite: case SpvOpConstantSampler: - case SpvOpConstantNullPointer: - case SpvOpConstantNullObject: case SpvOpSpecConstantTrue: case SpvOpSpecConstantFalse: case SpvOpSpecConstant: @@ -2422,7 +2434,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpVariable: - case SpvOpVariableArray: case SpvOpLoad: case SpvOpStore: case SpvOpCopyMemory: @@ -2430,7 +2441,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpAccessChain: case SpvOpInBoundsAccessChain: case SpvOpArrayLength: - case SpvOpImagePointer: + case SpvOpImageTexelPointer: vtn_handle_variables(b, opcode, w, count); break; @@ -2438,31 +2449,22 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_function_call(b, opcode, w, count); break; - case SpvOpTextureSample: - case SpvOpTextureSampleDref: - case SpvOpTextureSampleLod: - case SpvOpTextureSampleProj: - case SpvOpTextureSampleGrad: - case SpvOpTextureSampleOffset: - case SpvOpTextureSampleProjLod: - case SpvOpTextureSampleProjGrad: - case SpvOpTextureSampleLodOffset: - case SpvOpTextureSampleProjOffset: - case SpvOpTextureSampleGradOffset: - case SpvOpTextureSampleProjLodOffset: - case SpvOpTextureSampleProjGradOffset: - case SpvOpTextureFetchTexelLod: - case SpvOpTextureFetchTexelOffset: - case SpvOpTextureFetchSample: - case SpvOpTextureFetchTexel: - case SpvOpTextureGather: - case SpvOpTextureGatherOffset: - case SpvOpTextureGatherOffsets: - case SpvOpTextureQuerySizeLod: - case SpvOpTextureQuerySize: - case SpvOpTextureQueryLod: - case SpvOpTextureQueryLevels: - case SpvOpTextureQuerySamples: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: vtn_handle_texture(b, opcode, w, count); break; @@ -2511,7 +2513,8 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpShiftRightArithmetic: case SpvOpShiftLeftLogical: case SpvOpLogicalOr: - case SpvOpLogicalXor: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: case SpvOpLogicalAnd: case SpvOpBitwiseOr: case SpvOpBitwiseXor: -- cgit v1.2.3 From 435ccf4056ac3aafeb3975b214b651f45a2d9dba Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 13 Jul 2015 13:19:33 -0700 Subject: vk/0.132: Rename VkDynamic*State types sed -i -e 's/VkDynamicVpState/VkDynamicViewportState/g' \ -e 's/VkDynamicRsState/VkDynamicRasterState/g' \ -e 's/VkDynamicCbState/VkDynamicColorBlendState/g' \ -e 's/VkDynamicDsState/VkDynamicDepthStencilState/g' \ $(git ls-files include/vulkan src/vulkan) --- include/vulkan/vulkan.h | 40 ++++++++++++++++++++-------------------- src/vulkan/device.c | 18 +++++++++--------- src/vulkan/meta.c | 8 ++++---- src/vulkan/private.h | 16 ++++++++-------- 4 files changed, 41 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9f0b438f02e..c32e41103ea 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -103,10 +103,10 @@ VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkSampler, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDescriptorPool, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDescriptorSet, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicStateObject, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicVpState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicRsState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicCbState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicDsState, VkDynamicStateObject) +VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicViewportState, VkDynamicStateObject) +VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicRasterState, VkDynamicStateObject) +VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicColorBlendState, VkDynamicStateObject) +VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicDepthStencilState, VkDynamicStateObject) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkFramebuffer, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkRenderPass, VkNonDispatchable) @@ -1772,7 +1772,7 @@ typedef struct { uint32_t viewportAndScissorCount; const VkViewport* pViewports; const VkRect2D* pScissors; -} VkDynamicVpStateCreateInfo; +} VkDynamicViewportStateCreateInfo; typedef struct { VkStructureType sType; @@ -1781,13 +1781,13 @@ typedef struct { float depthBiasClamp; float slopeScaledDepthBias; float lineWidth; -} VkDynamicRsStateCreateInfo; +} VkDynamicRasterStateCreateInfo; typedef struct { VkStructureType sType; const void* pNext; float blendConst[4]; -} VkDynamicCbStateCreateInfo; +} VkDynamicColorBlendStateCreateInfo; typedef struct { VkStructureType sType; @@ -1798,7 +1798,7 @@ typedef struct { uint32_t stencilWriteMask; uint32_t stencilFrontRef; uint32_t stencilBackRef; -} VkDynamicDsStateCreateInfo; +} VkDynamicDepthStencilStateCreateInfo; typedef struct { VkColorAttachmentView view; @@ -2044,10 +2044,10 @@ typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescript typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); -typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicVpStateCreateInfo* pCreateInfo, VkDynamicVpState* pState); -typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRsStateCreateInfo* pCreateInfo, VkDynamicRsState* pState); -typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicCbStateCreateInfo* pCreateInfo, VkDynamicCbState* pState); -typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDsStateCreateInfo* pCreateInfo, VkDynamicDsState* pState); +typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); +typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); +typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); +typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); @@ -2445,23 +2445,23 @@ VkResult VKAPI vkUpdateDescriptorSets( VkResult VKAPI vkCreateDynamicViewportState( VkDevice device, - const VkDynamicVpStateCreateInfo* pCreateInfo, - VkDynamicVpState* pState); + const VkDynamicViewportStateCreateInfo* pCreateInfo, + VkDynamicViewportState* pState); VkResult VKAPI vkCreateDynamicRasterState( VkDevice device, - const VkDynamicRsStateCreateInfo* pCreateInfo, - VkDynamicRsState* pState); + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState); VkResult VKAPI vkCreateDynamicColorBlendState( VkDevice device, - const VkDynamicCbStateCreateInfo* pCreateInfo, - VkDynamicCbState* pState); + const VkDynamicColorBlendStateCreateInfo* pCreateInfo, + VkDynamicColorBlendState* pState); VkResult VKAPI vkCreateDynamicDepthStencilState( VkDevice device, - const VkDynamicDsStateCreateInfo* pCreateInfo, - VkDynamicDsState* pState); + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState); VkResult VKAPI vkCreateFramebuffer( VkDevice device, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9173d6d5e58..34574d0f8c1 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2064,8 +2064,8 @@ anv_dynamic_vp_state_destroy(struct anv_device *device, VkResult anv_CreateDynamicViewportState( VkDevice _device, - const VkDynamicVpStateCreateInfo* pCreateInfo, - VkDynamicVpState* pState) + const VkDynamicViewportStateCreateInfo* pCreateInfo, + VkDynamicViewportState* pState) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_vp_state *state; @@ -2151,8 +2151,8 @@ VkResult anv_CreateDynamicViewportState( VkResult anv_CreateDynamicRasterState( VkDevice _device, - const VkDynamicRsStateCreateInfo* pCreateInfo, - VkDynamicRsState* pState) + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_rs_state *state; @@ -2191,8 +2191,8 @@ VkResult anv_CreateDynamicRasterState( VkResult anv_CreateDynamicColorBlendState( VkDevice _device, - const VkDynamicCbStateCreateInfo* pCreateInfo, - VkDynamicCbState* pState) + const VkDynamicColorBlendStateCreateInfo* pCreateInfo, + VkDynamicColorBlendState* pState) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_cb_state *state; @@ -2220,8 +2220,8 @@ VkResult anv_CreateDynamicColorBlendState( VkResult anv_CreateDynamicDepthStencilState( VkDevice _device, - const VkDynamicDsStateCreateInfo* pCreateInfo, - VkDynamicDsState* pState) + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_ds_state *state; @@ -3715,7 +3715,7 @@ VkResult anv_CreateFramebuffer( framebuffer->layers = pCreateInfo->layers; anv_CreateDynamicViewportState(anv_device_to_handle(device), - &(VkDynamicVpStateCreateInfo) { + &(VkDynamicViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, .viewportAndScissorCount = 1, .pViewports = (VkViewport[]) { diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 6b580dc9c49..4b15026f6d9 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -149,7 +149,7 @@ struct anv_saved_state { struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; struct anv_descriptor_set *old_descriptor_set0; struct anv_pipeline *old_pipeline; - VkDynamicCbState cb_state; + VkDynamicColorBlendState cb_state; }; static void @@ -1339,19 +1339,19 @@ anv_device_init_meta(struct anv_device *device) anv_device_init_meta_blit_state(device); anv_CreateDynamicRasterState(anv_device_to_handle(device), - &(VkDynamicRsStateCreateInfo) { + &(VkDynamicRasterStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, }, &device->meta_state.shared.rs_state); anv_CreateDynamicColorBlendState(anv_device_to_handle(device), - &(VkDynamicCbStateCreateInfo) { + &(VkDynamicColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO }, &device->meta_state.shared.cb_state); anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), - &(VkDynamicDsStateCreateInfo) { + &(VkDynamicDepthStencilStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO }, &device->meta_state.shared.ds_state); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 38679edaa2b..c853a0f5f58 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -367,9 +367,9 @@ struct anv_meta_state { } blit; struct { - VkDynamicRsState rs_state; - VkDynamicCbState cb_state; - VkDynamicDsState ds_state; + VkDynamicRasterState rs_state; + VkDynamicColorBlendState cb_state; + VkDynamicDepthStencilState ds_state; } shared; }; @@ -918,7 +918,7 @@ struct anv_framebuffer { uint32_t layers; /* Viewport for clears */ - VkDynamicVpState vp_state; + VkDynamicViewportState vp_state; }; struct anv_render_pass_layer { @@ -970,10 +970,10 @@ ANV_DEFINE_CASTS(anv_instance, VkInstance) ANV_DEFINE_CASTS(anv_queue, VkQueue) ANV_DEFINE_CASTS(anv_device, VkDevice) ANV_DEFINE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_CASTS(anv_dynamic_vp_state, VkDynamicVpState) -ANV_DEFINE_CASTS(anv_dynamic_rs_state, VkDynamicRsState) -ANV_DEFINE_CASTS(anv_dynamic_ds_state, VkDynamicDsState) -ANV_DEFINE_CASTS(anv_dynamic_cb_state, VkDynamicCbState) +ANV_DEFINE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) +ANV_DEFINE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) +ANV_DEFINE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) +ANV_DEFINE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) ANV_DEFINE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_CASTS(anv_pipeline_layout, VkPipelineLayout) -- cgit v1.2.3 From ebb191f14576d870bda8b66f6d5cadc67924bf42 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 09:29:35 -0700 Subject: vk/0.132: Add vkDestroyFence() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c32e41103ea..476c0523a91 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2010,6 +2010,7 @@ typedef VkResult (VKAPI *PFN_vkGetObjectMemoryRequirements)(VkDevice device, VkO typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); +typedef VkResult (VKAPI *PFN_vkDestroyFence)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); @@ -2261,6 +2262,10 @@ VkResult VKAPI vkCreateFence( const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); +VkResult VKAPI vkDestroyFence( + VkDevice device, + VkFence fence); + VkResult VKAPI vkResetFences( VkDevice device, uint32_t fenceCount, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 34574d0f8c1..14221127fc5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1353,9 +1353,8 @@ anv_fence_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_FENCE); - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_device_free(device, fence); + anv_DestroyFence(anv_device_to_handle(device), + anv_fence_to_handle(fence)); } VkResult anv_CreateFence( @@ -1423,6 +1422,20 @@ VkResult anv_CreateFence( return result; } +VkResult anv_DestroyFence( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_device_free(device, fence); + + return VK_SUCCESS; +} + VkResult anv_ResetFences( VkDevice _device, uint32_t fenceCount, -- cgit v1.2.3 From 549070b18c301e6f80ce0964037778b6936564c0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 09:31:34 -0700 Subject: vk/0.132: Add vkDestroySemaphore() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 476c0523a91..07947d63aeb 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2015,6 +2015,7 @@ typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); typedef VkResult (VKAPI *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); +typedef VkResult (VKAPI *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); @@ -2287,6 +2288,10 @@ VkResult VKAPI vkCreateSemaphore( const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); +VkResult VKAPI vkDestroySemaphore( + VkDevice device, + VkSemaphore semaphore); + VkResult VKAPI vkQueueSignalSemaphore( VkQueue queue, VkSemaphore semaphore); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 14221127fc5..08d7e9f146c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1505,6 +1505,13 @@ VkResult anv_CreateSemaphore( stub_return(VK_UNSUPPORTED); } +VkResult anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + VkResult anv_QueueSignalSemaphore( VkQueue queue, VkSemaphore semaphore) -- cgit v1.2.3 From 68c7ef502da6c99d87e37973cdd1ff92995e77df Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 09:33:47 -0700 Subject: vk/0.132: Add vkDestroyEvent() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 07947d63aeb..72d20dbeb52 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2019,6 +2019,7 @@ typedef VkResult (VKAPI *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore se typedef VkResult (VKAPI *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); +typedef VkResult (VKAPI *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkResetEvent)(VkDevice device, VkEvent event); @@ -2305,6 +2306,10 @@ VkResult VKAPI vkCreateEvent( const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); +VkResult VKAPI vkDestroyEvent( + VkDevice device, + VkEvent event); + VkResult VKAPI vkGetEventStatus( VkDevice device, VkEvent event); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 08d7e9f146c..133ffda3926 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1536,6 +1536,13 @@ VkResult anv_CreateEvent( stub_return(VK_UNSUPPORTED); } +VkResult anv_DestroyEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + VkResult anv_GetEventStatus( VkDevice device, VkEvent event) -- cgit v1.2.3 From 584cb7a16f4c1ec1b20760247069c3634232bc8a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 09:44:58 -0700 Subject: vk/0.132: Add vkDestroyQueryPool() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/query.c | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 72d20dbeb52..a2ebe4b106e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2024,6 +2024,7 @@ typedef VkResult (VKAPI *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkResetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); +typedef VkResult (VKAPI *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); @@ -2327,6 +2328,10 @@ VkResult VKAPI vkCreateQueryPool( const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); +VkResult VKAPI vkDestroyQueryPool( + VkDevice device, + VkQueryPool queryPool); + VkResult VKAPI vkGetQueryPoolResults( VkDevice device, VkQueryPool queryPool, diff --git a/src/vulkan/query.c b/src/vulkan/query.c index 2c68e30eeee..3a1f594816d 100644 --- a/src/vulkan/query.c +++ b/src/vulkan/query.c @@ -51,9 +51,8 @@ anv_query_pool_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_QUERY_POOL); - anv_gem_munmap(pool->bo.map, pool->bo.size); - anv_gem_close(device, pool->bo.gem_handle); - anv_device_free(device, pool); + anv_DestroyQueryPool(anv_device_to_handle(device), + anv_query_pool_to_handle(pool)); } VkResult anv_CreateQueryPool( @@ -102,6 +101,20 @@ VkResult anv_CreateQueryPool( return result; } +VkResult anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); + + return VK_SUCCESS; +} + VkResult anv_GetQueryPoolResults( VkDevice _device, VkQueryPool queryPool, -- cgit v1.2.3 From e93b6d8eb114b2fc7c7b7ef624874214a37dc184 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 09:47:45 -0700 Subject: vk/0.132: Add vkDestroyBuffer() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 12 ++++++++++++ 2 files changed, 17 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a2ebe4b106e..d4a0469aa30 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2027,6 +2027,7 @@ typedef VkResult (VKAPI *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPo typedef VkResult (VKAPI *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); +typedef VkResult (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); @@ -2346,6 +2347,10 @@ VkResult VKAPI vkCreateBuffer( const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); +VkResult VKAPI vkDestroyBuffer( + VkDevice device, + VkBuffer buffer); + VkResult VKAPI vkCreateBufferView( VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 133ffda3926..02e00c4a326 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1590,6 +1590,18 @@ VkResult anv_CreateBuffer( return VK_SUCCESS; } +VkResult anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_device_free(device, buffer); + + return VK_SUCCESS; +} + // Buffer view functions static void -- cgit v1.2.3 From e18377f43538f487ea79728e0a9b744f89d0bd49 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 09:56:48 -0700 Subject: vk/0.132: Dispatch vkDestroyObject to new destructors Oops. My recent commits added new destructors, but forgot to teach vkDestroyObject about them. They are: vkDestroyFence vkDestroyEvent vkDestroySemaphore vkDestroyQueryPool vkDestroyBuffer --- src/vulkan/device.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 02e00c4a326..0256b66791f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1184,6 +1184,9 @@ VkResult anv_DestroyObject( struct anv_object *object = (struct anv_object *) _object; switch (objType) { + case VK_OBJECT_TYPE_FENCE: + return anv_DestroyFence(_device, (VkFence) _object); + case VK_OBJECT_TYPE_INSTANCE: return anv_DestroyInstance((VkInstance) _object); @@ -1223,6 +1226,8 @@ VkResult anv_DestroyObject( return anv_DestroyImage(_device, _object); case VK_OBJECT_TYPE_BUFFER: + return anv_DestroyBuffer(_device, (VkBuffer) _object); + case VK_OBJECT_TYPE_SHADER: case VK_OBJECT_TYPE_SHADER_MODULE: case VK_OBJECT_TYPE_PIPELINE_LAYOUT: @@ -1240,15 +1245,18 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_PIPELINE: case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: - case VK_OBJECT_TYPE_FENCE: - case VK_OBJECT_TYPE_QUERY_POOL: case VK_OBJECT_TYPE_FRAMEBUFFER: (object->destructor)(device, object, objType); return VK_SUCCESS; + case VK_OBJECT_TYPE_QUERY_POOL: + return anv_DestroyQueryPool(_device, (VkQueryPool) _object); + case VK_OBJECT_TYPE_SEMAPHORE: + return anv_DestroySemaphore(_device, (VkSemaphore) _object); + case VK_OBJECT_TYPE_EVENT: - stub_return(VK_UNSUPPORTED); + return anv_DestroyEvent(_device, (VkEvent) _object); default: unreachable("Invalid object type"); -- cgit v1.2.3 From dd67c134ade2fe7ee7b4a4db7a68f6d89560f37e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:08:04 -0700 Subject: vk/0.132: Add vkDestroyImage() We only need to add it to vulkan.h because Jason defined the function earlier in image.c. --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/private.h | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index d4a0469aa30..c4ab8c7da2a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2030,6 +2030,7 @@ typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCrea typedef VkResult (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); +typedef VkResult (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); @@ -2361,6 +2362,10 @@ VkResult VKAPI vkCreateImage( const VkImageCreateInfo* pCreateInfo, VkImage* pImage); +VkResult VKAPI vkDestroyImage( + VkDevice device, + VkImage image); + VkResult VKAPI vkGetImageSubresourceLayout( VkDevice device, VkImage image, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index c853a0f5f58..6b0576440b5 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -944,7 +944,6 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); -VkResult anv_DestroyImage(VkDevice device, VkImage image); VkResult anv_DestroyImageView(VkDevice device, VkImageView imageView); VkResult anv_DestroyBufferView(VkDevice device, VkBufferView bufferView); VkResult anv_DestroyColorAttachmentView(VkDevice device, -- cgit v1.2.3 From 8ae8e14ba7e5628f4bf3e396fe48f3b46649107b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:12:10 -0700 Subject: vk/0.132: Add vkDestroyShaderModule() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 4 +++- src/vulkan/pipeline.c | 12 ++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c4ab8c7da2a..015ab11d890 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2036,6 +2036,7 @@ typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageVi typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); +typedef VkResult (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); @@ -2392,6 +2393,10 @@ VkResult VKAPI vkCreateShaderModule( const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); +VkResult VKAPI vkDestroyShaderModule( + VkDevice device, + VkShaderModule shaderModule); + VkResult VKAPI vkCreateShader( VkDevice device, const VkShaderCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0256b66791f..76c44ba507e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1228,8 +1228,10 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_BUFFER: return anv_DestroyBuffer(_device, (VkBuffer) _object); - case VK_OBJECT_TYPE_SHADER: case VK_OBJECT_TYPE_SHADER_MODULE: + return anv_DestroyShaderModule(_device, (VkShaderModule) _object); + + case VK_OBJECT_TYPE_SHADER: case VK_OBJECT_TYPE_PIPELINE_LAYOUT: case VK_OBJECT_TYPE_SAMPLER: case VK_OBJECT_TYPE_DESCRIPTOR_SET: diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 6396e6cc0f6..76a38250701 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -55,6 +55,18 @@ VkResult anv_CreateShaderModule( return VK_SUCCESS; } +VkResult anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_device_free(device, module); + + return VK_SUCCESS; +} + VkResult anv_CreateShader( VkDevice _device, const VkShaderCreateInfo* pCreateInfo, -- cgit v1.2.3 From cb57bff36c5b8f449f4f9738e71d5359d9090fc8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:16:22 -0700 Subject: vk/0.132: Add vkDestroyShader() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 2 ++ src/vulkan/pipeline.c | 13 +++++++++++++ 3 files changed, 20 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 015ab11d890..91d67156126 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2038,6 +2038,7 @@ typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const Vk typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); typedef VkResult (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); +typedef VkResult (VKAPI *PFN_vkDestroyShader)(VkDevice device, VkShader shader); typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); @@ -2402,6 +2403,10 @@ VkResult VKAPI vkCreateShader( const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); +VkResult VKAPI vkDestroyShader( + VkDevice device, + VkShader shader); + VkResult VKAPI vkCreatePipelineCache( VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 76c44ba507e..2c110726d5f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1232,6 +1232,8 @@ VkResult anv_DestroyObject( return anv_DestroyShaderModule(_device, (VkShaderModule) _object); case VK_OBJECT_TYPE_SHADER: + return anv_DestroyShader(_device, (VkShader) _object); + case VK_OBJECT_TYPE_PIPELINE_LAYOUT: case VK_OBJECT_TYPE_SAMPLER: case VK_OBJECT_TYPE_DESCRIPTOR_SET: diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 76a38250701..991ab9f21c2 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -98,6 +98,19 @@ VkResult anv_CreateShader( return VK_SUCCESS; } +VkResult anv_DestroyShader( + VkDevice _device, + VkShader _shader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader, shader, _shader); + + anv_device_free(device, shader); + + return VK_SUCCESS; +} + + VkResult anv_CreatePipelineCache( VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, -- cgit v1.2.3 From 114015321eb3998e2814687e3584994bc061a33b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:19:27 -0700 Subject: vk/0.132: Add vkDestroyPipelineCache() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 4 +++- src/vulkan/pipeline.c | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 91d67156126..35d8e45888d 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2040,6 +2040,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderMod typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); typedef VkResult (VKAPI *PFN_vkDestroyShader)(VkDevice device, VkShader shader); typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); +typedef VkResult (VKAPI *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); typedef VkResult (VKAPI *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); @@ -2412,6 +2413,10 @@ VkResult VKAPI vkCreatePipelineCache( const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); +VkResult VKAPI vkDestroyPipelineCache( + VkDevice device, + VkPipelineCache pipelineCache); + size_t VKAPI vkGetPipelineCacheSize( VkDevice device, VkPipelineCache pipelineCache); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 2c110726d5f..599475135e7 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1206,10 +1206,12 @@ VkResult anv_DestroyObject( return anv_FreeMemory(_device, (VkDeviceMemory) _object); case VK_OBJECT_TYPE_DESCRIPTOR_POOL: - case VK_OBJECT_TYPE_PIPELINE_CACHE: /* These are just dummys anyway, so we don't need to destroy them */ return VK_SUCCESS; + case VK_OBJECT_TYPE_PIPELINE_CACHE: + return anv_DestroyPipelineCache(_device, (VkPipelineCache) _object); + case VK_OBJECT_TYPE_BUFFER_VIEW: return anv_DestroyBufferView(_device, _object); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 991ab9f21c2..d21af0ff8b4 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -121,6 +121,14 @@ VkResult anv_CreatePipelineCache( stub_return(VK_SUCCESS); } +VkResult anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache) +{ + /* VkPipelineCache is a dummy object. */ + return VK_SUCCESS; +} + size_t anv_GetPipelineCacheSize( VkDevice device, VkPipelineCache pipelineCache) -- cgit v1.2.3 From 6e5ab5cf1bdac9292b512bbbe10da44558fe1bc0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:26:17 -0700 Subject: vk/0.132: Add vkDestroyPipeline() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 4 +++- src/vulkan/pipeline.c | 23 ++++++++++++++++++----- 3 files changed, 26 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 35d8e45888d..bbb75001d45 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2046,6 +2046,7 @@ typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipeline typedef VkResult (VKAPI *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); typedef VkResult (VKAPI *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); +typedef VkResult (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); @@ -2446,6 +2447,10 @@ VkResult VKAPI vkCreateComputePipelines( const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); +VkResult VKAPI vkDestroyPipeline( + VkDevice device, + VkPipeline pipeline); + VkResult VKAPI vkCreatePipelineLayout( VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 599475135e7..43724641c08 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1249,12 +1249,14 @@ VkResult anv_DestroyObject( return VK_SUCCESS; case VK_OBJECT_TYPE_COMMAND_BUFFER: - case VK_OBJECT_TYPE_PIPELINE: case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: case VK_OBJECT_TYPE_FRAMEBUFFER: (object->destructor)(device, object, objType); return VK_SUCCESS; + case VK_OBJECT_TYPE_PIPELINE: + return anv_DestroyPipeline(_device, (VkPipeline) _object); + case VK_OBJECT_TYPE_QUERY_POOL: return anv_DestroyQueryPool(_device, (VkQueryPool) _object); diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index d21af0ff8b4..3d67a3a1b6d 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -476,11 +476,8 @@ anv_pipeline_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_PIPELINE); - anv_compiler_free(pipeline); - anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); - anv_state_stream_finish(&pipeline->program_stream); - anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); - anv_device_free(pipeline->device, pipeline); + anv_DestroyPipeline(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline)); } VkResult @@ -769,6 +766,22 @@ anv_pipeline_create( return VK_SUCCESS; } +VkResult anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_compiler_free(pipeline); + anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); + anv_state_stream_finish(&pipeline->program_stream); + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_device_free(pipeline->device, pipeline); + + return VK_SUCCESS; +} + VkResult anv_CreateGraphicsPipelines( VkDevice _device, VkPipelineCache pipelineCache, -- cgit v1.2.3 From a6841989357d23b7736065a7d34e673f8831aae9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:28:41 -0700 Subject: vk/0.132: Add vkDestroyPipelineLayout() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 2 ++ src/vulkan/pipeline.c | 12 ++++++++++++ 3 files changed, 19 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index bbb75001d45..2d1dff11441 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2048,6 +2048,7 @@ typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipel typedef VkResult (VKAPI *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); typedef VkResult (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); +typedef VkResult (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); @@ -2456,6 +2457,10 @@ VkResult VKAPI vkCreatePipelineLayout( const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); +VkResult VKAPI vkDestroyPipelineLayout( + VkDevice device, + VkPipelineLayout pipelineLayout); + VkResult VKAPI vkCreateSampler( VkDevice device, const VkSamplerCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 43724641c08..7c896cdd20c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1237,6 +1237,8 @@ VkResult anv_DestroyObject( return anv_DestroyShader(_device, (VkShader) _object); case VK_OBJECT_TYPE_PIPELINE_LAYOUT: + return anv_DestroyPipelineLayout(_device, (VkPipelineLayout) _object); + case VK_OBJECT_TYPE_SAMPLER: case VK_OBJECT_TYPE_DESCRIPTOR_SET: case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 3d67a3a1b6d..42a900d7ab0 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -955,3 +955,15 @@ VkResult anv_CreatePipelineLayout( return VK_SUCCESS; } + +VkResult anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_device_free(device, pipeline_layout); + + return VK_SUCCESS; +} -- cgit v1.2.3 From ec5e2f4992a5ddd7aeb4385324fb4d674d168d36 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:34:00 -0700 Subject: vk/0.132: Add vkDestroySampler() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2d1dff11441..1cc4d8c2988 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2050,6 +2050,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipe typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); typedef VkResult (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); +typedef VkResult (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); @@ -2466,6 +2467,10 @@ VkResult VKAPI vkCreateSampler( const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); +VkResult VKAPI vkDestroySampler( + VkDevice device, + VkSampler sampler); + VkResult VKAPI vkCreateDescriptorSetLayout( VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7c896cdd20c..b42d6f4f3b4 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1240,6 +1240,8 @@ VkResult anv_DestroyObject( return anv_DestroyPipelineLayout(_device, (VkPipelineLayout) _object); case VK_OBJECT_TYPE_SAMPLER: + return anv_DestroySampler(_device, (VkSampler) _object); + case VK_OBJECT_TYPE_DESCRIPTOR_SET: case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: @@ -1818,6 +1820,18 @@ VkResult anv_CreateSampler( return VK_SUCCESS; } +VkResult anv_DestroySampler( + VkDevice _device, + VkSampler _sampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_device_free(device, sampler); + + return VK_SUCCESS; +} + // Descriptor set functions VkResult anv_CreateDescriptorSetLayout( -- cgit v1.2.3 From f925ea31e7085dd7a873103fe8e485c602235935 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:36:49 -0700 Subject: vk/0.132: Add vkDestroyDescriptorSetLayout() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1cc4d8c2988..33885c09abc 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2052,6 +2052,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelin typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); typedef VkResult (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); +typedef VkResult (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); @@ -2476,6 +2477,10 @@ VkResult VKAPI vkCreateDescriptorSetLayout( const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); +VkResult VKAPI vkDestroyDescriptorSetLayout( + VkDevice device, + VkDescriptorSetLayout descriptorSetLayout); + VkResult VKAPI vkCreateDescriptorPool( VkDevice device, VkDescriptorPoolUsage poolUsage, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b42d6f4f3b4..e54920efcf6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1242,8 +1242,10 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_SAMPLER: return anv_DestroySampler(_device, (VkSampler) _object); - case VK_OBJECT_TYPE_DESCRIPTOR_SET: case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: + return anv_DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout) _object); + + case VK_OBJECT_TYPE_DESCRIPTOR_SET: case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: @@ -1985,6 +1987,18 @@ VkResult anv_CreateDescriptorSetLayout( return VK_SUCCESS; } +VkResult anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_device_free(device, set_layout); + + return VK_SUCCESS; +} + VkResult anv_CreateDescriptorPool( VkDevice device, VkDescriptorPoolUsage poolUsage, -- cgit v1.2.3 From 9250e1e9e515f68a5f19b5ac2f7b8fad6e7db1bd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:38:22 -0700 Subject: vk/0.132: Add vkDestroyDescriptorPool() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 11 +++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 33885c09abc..eb5514c59eb 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2054,6 +2054,7 @@ typedef VkResult (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sample typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); typedef VkResult (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); +typedef VkResult (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); @@ -2488,6 +2489,10 @@ VkResult VKAPI vkCreateDescriptorPool( const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); +VkResult VKAPI vkDestroyDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool); + VkResult VKAPI vkResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e54920efcf6..9d1c0019a68 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1206,8 +1206,7 @@ VkResult anv_DestroyObject( return anv_FreeMemory(_device, (VkDeviceMemory) _object); case VK_OBJECT_TYPE_DESCRIPTOR_POOL: - /* These are just dummys anyway, so we don't need to destroy them */ - return VK_SUCCESS; + return anv_DestroyDescriptorPool(_device, (VkDescriptorPool) _object); case VK_OBJECT_TYPE_PIPELINE_CACHE: return anv_DestroyPipelineCache(_device, (VkPipelineCache) _object); @@ -2011,6 +2010,14 @@ VkResult anv_CreateDescriptorPool( return VK_SUCCESS; } +VkResult anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool) +{ + /* VkDescriptorPool is a dummy object. */ + return VK_SUCCESS; +} + VkResult anv_ResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool) -- cgit v1.2.3 From d80fea1af603672a774fdffbbcf0b4e2011a4526 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:42:45 -0700 Subject: vk/0.132: Add vkDestroyDynamicViewportState() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 29 ++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index eb5514c59eb..949b7df8c5f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2059,6 +2059,7 @@ typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescripto typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); +typedef VkResult (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkDynamicViewportState dynamicViewportState); typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); @@ -2518,6 +2519,10 @@ VkResult VKAPI vkCreateDynamicViewportState( const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); +VkResult VKAPI vkDestroyDynamicViewportState( + VkDevice device, + VkDynamicViewportState dynamicViewportState); + VkResult VKAPI vkCreateDynamicRasterState( VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 9d1c0019a68..0b2914e7d8e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1253,8 +1253,10 @@ VkResult anv_DestroyObject( anv_device_free(device, (void *) _object); return VK_SUCCESS; - case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: + return anv_DestroyDynamicViewportState(_device, (VkDynamicViewportState) _object); + + case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_FRAMEBUFFER: (object->destructor)(device, object, objType); return VK_SUCCESS; @@ -2143,15 +2145,12 @@ anv_dynamic_vp_state_destroy(struct anv_device *device, struct anv_object *object, VkObjectType obj_type) { - struct anv_dynamic_vp_state *state = (void *)object; + struct anv_dynamic_vp_state *vp_state = (void *) object; assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE); - anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp); - anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp); - anv_state_pool_free(&device->dynamic_state_pool, state->scissor); - - anv_device_free(device, state); + anv_DestroyDynamicViewportState(anv_device_to_handle(device), + anv_dynamic_vp_state_to_handle(vp_state)); } VkResult anv_CreateDynamicViewportState( @@ -2241,6 +2240,22 @@ VkResult anv_CreateDynamicViewportState( return VK_SUCCESS; } +VkResult anv_DestroyDynamicViewportState( + VkDevice _device, + VkDynamicViewportState _vp_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, _vp_state); + + anv_state_pool_free(&device->dynamic_state_pool, vp_state->sf_clip_vp); + anv_state_pool_free(&device->dynamic_state_pool, vp_state->cc_vp); + anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor); + + anv_device_free(device, vp_state); + + return VK_SUCCESS; +} + VkResult anv_CreateDynamicRasterState( VkDevice _device, const VkDynamicRasterStateCreateInfo* pCreateInfo, -- cgit v1.2.3 From 5e1737c42f183fb374fc1a76a6cf4ff9d828f850 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:51:08 -0700 Subject: vk/0.132: Add vkDestroyDynamicRasterState() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 949b7df8c5f..9e352da8e4a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2061,6 +2061,7 @@ typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t w typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); typedef VkResult (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkDynamicViewportState dynamicViewportState); typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); +typedef VkResult (VKAPI *PFN_vkDestroyDynamicRasterState)(VkDevice device, VkDynamicRasterState dynamicRasterState); typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); @@ -2528,6 +2529,10 @@ VkResult VKAPI vkCreateDynamicRasterState( const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); +VkResult VKAPI vkDestroyDynamicRasterState( + VkDevice device, + VkDynamicRasterState dynamicRasterState); + VkResult VKAPI vkCreateDynamicColorBlendState( VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0b2914e7d8e..53e998c226d 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1245,7 +1245,6 @@ VkResult anv_DestroyObject( return anv_DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout) _object); case VK_OBJECT_TYPE_DESCRIPTOR_SET: - case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: case VK_OBJECT_TYPE_RENDER_PASS: @@ -1256,6 +1255,9 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: return anv_DestroyDynamicViewportState(_device, (VkDynamicViewportState) _object); + case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: + return anv_DestroyDynamicRasterState(_device, (VkDynamicRasterState) _object); + case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_FRAMEBUFFER: (object->destructor)(device, object, objType); @@ -2296,6 +2298,18 @@ VkResult anv_CreateDynamicRasterState( return VK_SUCCESS; } +VkResult anv_DestroyDynamicRasterState( + VkDevice _device, + VkDynamicRasterState _rs_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state); + + anv_device_free(device, rs_state); + + return VK_SUCCESS; +} + VkResult anv_CreateDynamicColorBlendState( VkDevice _device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, -- cgit v1.2.3 From b29c929e8e2a7266833e4e1000613a770d5e67ae Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:52:45 -0700 Subject: vk/0.132: Add vkDestroyDynamicColorBlendState() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9e352da8e4a..538b3f2e661 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2063,6 +2063,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkD typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); typedef VkResult (VKAPI *PFN_vkDestroyDynamicRasterState)(VkDevice device, VkDynamicRasterState dynamicRasterState); typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); +typedef VkResult (VKAPI *PFN_vkDestroyDynamicColorBlendState)(VkDevice device, VkDynamicColorBlendState dynamicColorBlendState); typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); @@ -2538,6 +2539,10 @@ VkResult VKAPI vkCreateDynamicColorBlendState( const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); +VkResult VKAPI vkDestroyDynamicColorBlendState( + VkDevice device, + VkDynamicColorBlendState dynamicColorBlendState); + VkResult VKAPI vkCreateDynamicDepthStencilState( VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 53e998c226d..bbeb22a60b0 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1245,7 +1245,6 @@ VkResult anv_DestroyObject( return anv_DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout) _object); case VK_OBJECT_TYPE_DESCRIPTOR_SET: - case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: case VK_OBJECT_TYPE_RENDER_PASS: /* These are trivially destroyable */ @@ -1258,6 +1257,9 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: return anv_DestroyDynamicRasterState(_device, (VkDynamicRasterState) _object); + case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: + return anv_DestroyDynamicColorBlendState(_device, (VkDynamicColorBlendState) _object); + case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_FRAMEBUFFER: (object->destructor)(device, object, objType); @@ -2339,6 +2341,18 @@ VkResult anv_CreateDynamicColorBlendState( return VK_SUCCESS; } +VkResult anv_DestroyDynamicColorBlendState( + VkDevice _device, + VkDynamicColorBlendState _cb_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state); + + anv_device_free(device, cb_state); + + return VK_SUCCESS; +} + VkResult anv_CreateDynamicDepthStencilState( VkDevice _device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, -- cgit v1.2.3 From 0c8456ef1e33fafa6074966e8f19dae4d19740ed Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:54:16 -0700 Subject: vk/0.132: Add vkDestroyDynamicDepthStencilState() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 538b3f2e661..4527f2b51a7 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2065,6 +2065,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyDynamicRasterState)(VkDevice device, VkDyn typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); typedef VkResult (VKAPI *PFN_vkDestroyDynamicColorBlendState)(VkDevice device, VkDynamicColorBlendState dynamicColorBlendState); typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); +typedef VkResult (VKAPI *PFN_vkDestroyDynamicDepthStencilState)(VkDevice device, VkDynamicDepthStencilState dynamicDepthStencilState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); @@ -2548,6 +2549,10 @@ VkResult VKAPI vkCreateDynamicDepthStencilState( const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); +VkResult VKAPI vkDestroyDynamicDepthStencilState( + VkDevice device, + VkDynamicDepthStencilState dynamicDepthStencilState); + VkResult VKAPI vkCreateFramebuffer( VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index bbeb22a60b0..b35e0ea97b5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1245,7 +1245,6 @@ VkResult anv_DestroyObject( return anv_DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout) _object); case VK_OBJECT_TYPE_DESCRIPTOR_SET: - case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: case VK_OBJECT_TYPE_RENDER_PASS: /* These are trivially destroyable */ anv_device_free(device, (void *) _object); @@ -1260,6 +1259,9 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: return anv_DestroyDynamicColorBlendState(_device, (VkDynamicColorBlendState) _object); + case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: + return anv_DestroyDynamicDepthStencilState(_device, (VkDynamicDepthStencilState) _object); + case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_FRAMEBUFFER: (object->destructor)(device, object, objType); @@ -2396,6 +2398,18 @@ VkResult anv_CreateDynamicDepthStencilState( return VK_SUCCESS; } +VkResult anv_DestroyDynamicDepthStencilState( + VkDevice _device, + VkDynamicDepthStencilState _ds_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state); + + anv_device_free(device, ds_state); + + return VK_SUCCESS; +} + // Command buffer functions static void -- cgit v1.2.3 From 08f7731f67dfb7926d66928619e84e4dd33624d3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 10:59:30 -0700 Subject: vk/0.132: Add vkDestroyFramebuffer() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 25 +++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 4527f2b51a7..13d93094978 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2067,6 +2067,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyDynamicColorBlendState)(VkDevice device, V typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); typedef VkResult (VKAPI *PFN_vkDestroyDynamicDepthStencilState)(VkDevice device, VkDynamicDepthStencilState dynamicDepthStencilState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); +typedef VkResult (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); @@ -2558,6 +2559,10 @@ VkResult VKAPI vkCreateFramebuffer( const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); +VkResult VKAPI vkDestroyFramebuffer( + VkDevice device, + VkFramebuffer framebuffer); + VkResult VKAPI vkCreateRenderPass( VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b35e0ea97b5..a632a4f7e37 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1262,8 +1262,10 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: return anv_DestroyDynamicDepthStencilState(_device, (VkDynamicDepthStencilState) _object); - case VK_OBJECT_TYPE_COMMAND_BUFFER: case VK_OBJECT_TYPE_FRAMEBUFFER: + return anv_DestroyFramebuffer(_device, (VkFramebuffer) _object); + + case VK_OBJECT_TYPE_COMMAND_BUFFER: (object->destructor)(device, object, objType); return VK_SUCCESS; @@ -3818,11 +3820,8 @@ anv_framebuffer_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_DYNAMIC_VP_STATE, - fb->vp_state); - - anv_device_free(device, fb); + anv_DestroyFramebuffer(anv_device_to_handle(device), + anv_framebuffer_to_handle(fb)); } VkResult anv_CreateFramebuffer( @@ -3889,6 +3888,20 @@ VkResult anv_CreateFramebuffer( return VK_SUCCESS; } +VkResult anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_DestroyDynamicViewportState(anv_device_to_handle(device), + fb->vp_state); + anv_device_free(device, fb); + + return VK_SUCCESS; +} + VkResult anv_CreateRenderPass( VkDevice _device, const VkRenderPassCreateInfo* pCreateInfo, -- cgit v1.2.3 From 4b2c5a98f02699e396312f580e23cb91c9d233a3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 11:01:38 -0700 Subject: vk/0.132: Add vkDestroyBufferView() Just declare it in vulkan.h. Jason already defined the function earlier in vulkan.c. --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/private.h | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 13d93094978..0fbb0f1c370 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2029,6 +2029,7 @@ typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); typedef VkResult (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); +typedef VkResult (VKAPI *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); typedef VkResult (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); @@ -2370,6 +2371,10 @@ VkResult VKAPI vkCreateBufferView( const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); +VkResult VKAPI vkDestroyBufferView( + VkDevice device, + VkBufferView bufferView); + VkResult VKAPI vkCreateImage( VkDevice device, const VkImageCreateInfo* pCreateInfo, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 6b0576440b5..d50cc6175b0 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -945,7 +945,6 @@ void * anv_lookup_entrypoint(const char *name); VkResult anv_DestroyImageView(VkDevice device, VkImageView imageView); -VkResult anv_DestroyBufferView(VkDevice device, VkBufferView bufferView); VkResult anv_DestroyColorAttachmentView(VkDevice device, VkColorAttachmentView view); VkResult anv_DestroyDepthStencilView(VkDevice device, VkDepthStencilView view); -- cgit v1.2.3 From 6eec0b186ccabd6ffe7ec5ec90582d49198e1f0b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 11:02:33 -0700 Subject: vk/0.132: Add vkDestroyImageView() Just declare it in vulkan.h. Jason defined the function earlier in image.c. --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/private.h | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 0fbb0f1c370..cbb42b2c707 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2034,6 +2034,7 @@ typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreate typedef VkResult (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); +typedef VkResult (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); @@ -2395,6 +2396,10 @@ VkResult VKAPI vkCreateImageView( const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); +VkResult VKAPI vkDestroyImageView( + VkDevice device, + VkImageView imageView); + VkResult VKAPI vkCreateColorAttachmentView( VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index d50cc6175b0..fd4296c17d9 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -944,7 +944,6 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); -VkResult anv_DestroyImageView(VkDevice device, VkImageView imageView); VkResult anv_DestroyColorAttachmentView(VkDevice device, VkColorAttachmentView view); VkResult anv_DestroyDepthStencilView(VkDevice device, VkDepthStencilView view); -- cgit v1.2.3 From 1ca611cbad697cab2ece9c101ec25c212e19cdc2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 11:06:14 -0700 Subject: vk/0.132: Add vkDestroyCommandBuffer() --- include/vulkan/vulkan.h | 5 ++++ src/vulkan/device.c | 62 +++++++++++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index cbb42b2c707..c8b5cd8daea 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2073,6 +2073,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffe typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); +typedef VkResult (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer); @@ -2588,6 +2589,10 @@ VkResult VKAPI vkCreateCommandBuffer( const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); +VkResult VKAPI vkDestroyCommandBuffer( + VkDevice device, + VkCmdBuffer commandBuffer); + VkResult VKAPI vkBeginCommandBuffer( VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a632a4f7e37..7d764f33643 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1181,7 +1181,6 @@ VkResult anv_DestroyObject( VkObject _object) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_object *object = (struct anv_object *) _object; switch (objType) { case VK_OBJECT_TYPE_FENCE: @@ -1266,7 +1265,7 @@ VkResult anv_DestroyObject( return anv_DestroyFramebuffer(_device, (VkFramebuffer) _object); case VK_OBJECT_TYPE_COMMAND_BUFFER: - (object->destructor)(device, object, objType); + return anv_DestroyCommandBuffer(_device, (VkCmdBuffer) _object); return VK_SUCCESS; case VK_OBJECT_TYPE_PIPELINE: @@ -2423,29 +2422,8 @@ anv_cmd_buffer_destroy(struct anv_device *device, assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER); - /* Destroy all of the batch buffers */ - struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; - while (bbo) { - struct anv_batch_bo *prev = bbo->prev_batch_bo; - anv_batch_bo_destroy(bbo, device); - bbo = prev; - } - anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); - - /* Destroy all of the surface state buffers */ - bbo = cmd_buffer->surface_batch_bo; - while (bbo) { - struct anv_batch_bo *prev = bbo->prev_batch_bo; - anv_batch_bo_destroy(bbo, device); - bbo = prev; - } - anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer->exec2_objects); - anv_device_free(device, cmd_buffer->exec2_bos); - anv_device_free(device, cmd_buffer); + anv_DestroyCommandBuffer(anv_device_to_handle(device), + anv_cmd_buffer_to_handle(cmd_buffer)); } static VkResult @@ -2575,6 +2553,40 @@ VkResult anv_CreateCommandBuffer( return result; } +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + /* Destroy all of the batch buffers */ + struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; + while (bbo) { + struct anv_batch_bo *prev = bbo->prev_batch_bo; + anv_batch_bo_destroy(bbo, device); + bbo = prev; + } + anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); + + /* Destroy all of the surface state buffers */ + bbo = cmd_buffer->surface_batch_bo; + while (bbo) { + struct anv_batch_bo *prev = bbo->prev_batch_bo; + anv_batch_bo_destroy(bbo, device); + bbo = prev; + } + anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer->exec2_objects); + anv_device_free(device, cmd_buffer->exec2_bos); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + static void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { -- cgit v1.2.3 From 6d0ed38db57df9e8c0953c90eb3917b1adc80078 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 11:15:39 -0700 Subject: vk/0.132: Add vkDestroy*View() vkDestroyColorAttachmentView vkDestroyDepthStencilView These functions are not in the 0.132 header, but adding them will help us attain the type-safety API updates more quickly. --- include/vulkan/vulkan.h | 10 ++++++++++ src/vulkan/private.h | 3 --- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c8b5cd8daea..23c87de9dd9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2036,7 +2036,9 @@ typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkIma typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); typedef VkResult (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); +typedef VkResult (VKAPI *PFN_vkDestroyColorAttachmentView)(VkDevice device, VkColorAttachmentView view); typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); +typedef VkResult (VKAPI *PFN_vkDestroyDepthStencilView)(VkDevice device, VkDepthStencilView view); typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); typedef VkResult (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); @@ -2406,11 +2408,19 @@ VkResult VKAPI vkCreateColorAttachmentView( const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); +VkResult VKAPI vkDestroyColorAttachmentView( + VkDevice device, + VkColorAttachmentView view); + VkResult VKAPI vkCreateDepthStencilView( VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); +VkResult VKAPI vkDestroyDepthStencilView( + VkDevice device, + VkDepthStencilView view); + VkResult VKAPI vkCreateShaderModule( VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index fd4296c17d9..d5a2da2e35d 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -944,9 +944,6 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); -VkResult anv_DestroyColorAttachmentView(VkDevice device, - VkColorAttachmentView view); -VkResult anv_DestroyDepthStencilView(VkDevice device, VkDepthStencilView view); VkResult anv_DestroyRenderPass(VkDevice device, VkRenderPass renderPass); #define ANV_DEFINE_CASTS(__anv_type, __VkType) \ -- cgit v1.2.3 From 66cbb7f76d55a1ad7e8d2d0315ce58da99c4f8ed Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 11:21:01 -0700 Subject: vk/0.132: Add vkDestroyRenderPass() --- include/vulkan/vulkan.h | 5 +++++ src/vulkan/device.c | 5 +++-- src/vulkan/private.h | 2 -- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 23c87de9dd9..20672e325a3 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2073,6 +2073,7 @@ typedef VkResult (VKAPI *PFN_vkDestroyDynamicDepthStencilState)(VkDevice device, typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef VkResult (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); +typedef VkResult (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); typedef VkResult (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); @@ -2589,6 +2590,10 @@ VkResult VKAPI vkCreateRenderPass( const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); +VkResult VKAPI vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass); + VkResult VKAPI vkGetRenderAreaGranularity( VkDevice device, VkRenderPass renderPass, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7d764f33643..b7cbac97b78 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3951,11 +3951,12 @@ VkResult anv_CreateRenderPass( VkResult anv_DestroyRenderPass( VkDevice _device, - VkRenderPass renderPass) + VkRenderPass _pass) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - anv_device_free(device, anv_render_pass_from_handle(renderPass)); + anv_device_free(device, pass); return VK_SUCCESS; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index d5a2da2e35d..a8ff24214fd 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -944,8 +944,6 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, void * anv_lookup_entrypoint(const char *name); -VkResult anv_DestroyRenderPass(VkDevice device, VkRenderPass renderPass); - #define ANV_DEFINE_CASTS(__anv_type, __VkType) \ static inline struct __anv_type * \ __anv_type ## _from_handle(__VkType _handle) \ -- cgit v1.2.3 From 84783509926f01dcc6f96ddf8a86e4b9cac1c48f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 10 Jul 2015 20:18:52 -0700 Subject: vk: Implement Multipass --- include/vulkan/vulkan.h | 330 ++++++++++++++++++++++++++---------------------- src/vulkan/device.c | 196 ++++++++++++++++++++-------- src/vulkan/formats.c | 13 ++ src/vulkan/image.c | 111 ++++++++-------- src/vulkan/meta.c | 217 ++++++++++++++++++++----------- src/vulkan/private.h | 98 +++++++++----- 6 files changed, 602 insertions(+), 363 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 7bbc3a3baa0..16ad456325e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -31,7 +31,7 @@ extern "C" { /* ** This header is generated from the Khronos Vulkan XML API Registry. ** -** Generated on date 20150620 +** Generated on date 20150624 */ @@ -42,7 +42,7 @@ extern "C" { ((major << 22) | (minor << 12) | patch) // Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 130, 0) +#define VK_API_VERSION VK_MAKE_VERSION(0, 131, 0) #if defined (__cplusplus) && (VK_UINTPTRLEAST64_MAX == UINTPTR_MAX) #define VK_TYPE_SAFE_COMPATIBLE_HANDLES 1 @@ -71,6 +71,7 @@ extern "C" { #define VK_LAST_MIP_LEVEL UINT32_MAX #define VK_LAST_ARRAY_SLICE UINT32_MAX #define VK_WHOLE_SIZE UINT64_MAX +#define VK_ATTACHMENT_UNUSED UINT32_MAX #define VK_TRUE 1 #define VK_FALSE 0 #define VK_NULL_HANDLE 0 @@ -91,8 +92,7 @@ VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkEvent, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkQueryPool, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkBufferView, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkImageView, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkColorAttachmentView, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDepthStencilView, VkNonDispatchable) +VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkAttachmentView, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkShaderModule, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkShader, VkNonDispatchable) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkPipelineCache, VkNonDispatchable) @@ -161,52 +161,55 @@ typedef enum { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 1, VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO = 2, VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 3, - VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO = 4, - VK_STRUCTURE_TYPE_DEPTH_STENCIL_VIEW_CREATE_INFO = 5, - VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 6, - VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 7, - VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 9, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 10, - VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO = 11, - VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO = 12, - VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO = 13, - VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO = 14, - VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 15, - VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 16, - VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 17, - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 18, - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 19, - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 20, - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 21, - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 22, - VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO = 23, - VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO = 24, - VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO = 26, - VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO = 28, - VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO = 29, - VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 30, - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 31, - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 32, - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 33, - VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO = 34, - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 35, - VK_STRUCTURE_TYPE_LAYER_CREATE_INFO = 36, - VK_STRUCTURE_TYPE_MEMORY_BARRIER = 37, - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 38, - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 39, - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 40, - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 41, - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 42, - VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 43, - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 44, - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 45, - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 46, + VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO = 4, + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 5, + VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 6, + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 7, + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 8, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 9, + VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO = 10, + VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO = 11, + VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO = 12, + VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO = 13, + VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 14, + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 15, + VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 16, + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 17, + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 18, + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 19, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 20, + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 21, + VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO = 22, + VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO = 23, + VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO = 24, + VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO = 25, + VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO = 27, + VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO = 28, + VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 29, + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 30, + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 31, + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 32, + VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO = 33, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 34, + VK_STRUCTURE_TYPE_LAYER_CREATE_INFO = 35, + VK_STRUCTURE_TYPE_MEMORY_BARRIER = 36, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 37, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 38, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 39, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 40, + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 41, + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 42, + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 43, + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 44, + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 45, + VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION = 46, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 47, + VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 48, + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 49, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, - VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), + VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkStructureType; @@ -445,27 +448,26 @@ typedef enum { VK_OBJECT_TYPE_BUFFER_VIEW = 7, VK_OBJECT_TYPE_IMAGE = 8, VK_OBJECT_TYPE_IMAGE_VIEW = 9, - VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW = 10, - VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW = 11, - VK_OBJECT_TYPE_SHADER_MODULE = 12, - VK_OBJECT_TYPE_SHADER = 13, - VK_OBJECT_TYPE_PIPELINE = 14, - VK_OBJECT_TYPE_PIPELINE_LAYOUT = 15, - VK_OBJECT_TYPE_SAMPLER = 16, - VK_OBJECT_TYPE_DESCRIPTOR_SET = 17, - VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 18, - VK_OBJECT_TYPE_DESCRIPTOR_POOL = 19, - VK_OBJECT_TYPE_DYNAMIC_VP_STATE = 20, - VK_OBJECT_TYPE_DYNAMIC_RS_STATE = 21, - VK_OBJECT_TYPE_DYNAMIC_CB_STATE = 22, - VK_OBJECT_TYPE_DYNAMIC_DS_STATE = 23, - VK_OBJECT_TYPE_FENCE = 24, - VK_OBJECT_TYPE_SEMAPHORE = 25, - VK_OBJECT_TYPE_EVENT = 26, - VK_OBJECT_TYPE_QUERY_POOL = 27, - VK_OBJECT_TYPE_FRAMEBUFFER = 28, - VK_OBJECT_TYPE_RENDER_PASS = 29, - VK_OBJECT_TYPE_PIPELINE_CACHE = 30, + VK_OBJECT_TYPE_ATTACHMENT_VIEW = 10, + VK_OBJECT_TYPE_SHADER_MODULE = 11, + VK_OBJECT_TYPE_SHADER = 12, + VK_OBJECT_TYPE_PIPELINE = 13, + VK_OBJECT_TYPE_PIPELINE_LAYOUT = 14, + VK_OBJECT_TYPE_SAMPLER = 15, + VK_OBJECT_TYPE_DESCRIPTOR_SET = 16, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 17, + VK_OBJECT_TYPE_DESCRIPTOR_POOL = 18, + VK_OBJECT_TYPE_DYNAMIC_VP_STATE = 19, + VK_OBJECT_TYPE_DYNAMIC_RS_STATE = 20, + VK_OBJECT_TYPE_DYNAMIC_CB_STATE = 21, + VK_OBJECT_TYPE_DYNAMIC_DS_STATE = 22, + VK_OBJECT_TYPE_FENCE = 23, + VK_OBJECT_TYPE_SEMAPHORE = 24, + VK_OBJECT_TYPE_EVENT = 25, + VK_OBJECT_TYPE_QUERY_POOL = 26, + VK_OBJECT_TYPE_FRAMEBUFFER = 27, + VK_OBJECT_TYPE_RENDER_PASS = 28, + VK_OBJECT_TYPE_PIPELINE_CACHE = 29, VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_INSTANCE, VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_PIPELINE_CACHE, VK_OBJECT_TYPE_NUM = (VK_OBJECT_TYPE_PIPELINE_CACHE - VK_OBJECT_TYPE_INSTANCE + 1), @@ -743,9 +745,10 @@ typedef enum { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, + VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, - VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, - VK_DESCRIPTOR_TYPE_NUM = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC - VK_DESCRIPTOR_TYPE_SAMPLER + 1), + VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, + VK_DESCRIPTOR_TYPE_NUM = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF } VkDescriptorType; @@ -998,10 +1001,10 @@ typedef enum { typedef VkFlags VkImageCreateFlags; typedef enum { - VK_DEPTH_STENCIL_VIEW_CREATE_READ_ONLY_DEPTH_BIT = 0x00000001, - VK_DEPTH_STENCIL_VIEW_CREATE_READ_ONLY_STENCIL_BIT = 0x00000002, -} VkDepthStencilViewCreateFlagBits; -typedef VkFlags VkDepthStencilViewCreateFlags; + VK_ATTACHMENT_VIEW_CREATE_READ_ONLY_DEPTH_BIT = 0x00000001, + VK_ATTACHMENT_VIEW_CREATE_READ_ONLY_STENCIL_BIT = 0x00000002, +} VkAttachmentViewCreateFlagBits; +typedef VkFlags VkAttachmentViewCreateFlags; typedef VkFlags VkShaderModuleCreateFlags; typedef VkFlags VkShaderCreateFlags; @@ -1031,6 +1034,11 @@ typedef enum { } VkShaderStageFlagBits; typedef VkFlags VkShaderStageFlags; +typedef enum { + VK_SUBPASS_DESCRIPTION_NO_OVERDRAW_BIT = 0x00000001, +} VkSubpassDescriptionFlagBits; +typedef VkFlags VkSubpassDescriptionFlags; + typedef enum { VK_PIPE_EVENT_TOP_OF_PIPE_BIT = 0x00000001, VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT = 0x00000002, @@ -1062,7 +1070,8 @@ typedef enum { VK_MEMORY_INPUT_SHADER_READ_BIT = 0x00000020, VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT = 0x00000040, VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000080, - VK_MEMORY_INPUT_TRANSFER_BIT = 0x00000100, + VK_MEMORY_INPUT_INPUT_ATTACHMENT_BIT = 0x00000100, + VK_MEMORY_INPUT_TRANSFER_BIT = 0x00000200, } VkMemoryInputFlagBits; typedef VkFlags VkMemoryInputFlags; typedef VkFlags VkCmdBufferCreateFlags; @@ -1465,19 +1474,8 @@ typedef struct { uint32_t mipLevel; uint32_t baseArraySlice; uint32_t arraySize; - VkImage msaaResolveImage; - VkImageSubresourceRange msaaResolveSubResource; -} VkColorAttachmentViewCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkImage image; - uint32_t mipLevel; - uint32_t baseArraySlice; - uint32_t arraySize; - VkDepthStencilViewCreateFlags flags; -} VkDepthStencilViewCreateInfo; + VkAttachmentViewCreateFlags flags; +} VkAttachmentViewCreateInfo; typedef struct { VkStructureType sType; @@ -1592,7 +1590,6 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; - VkFormat format; VkBool32 depthTestEnable; VkBool32 depthWriteEnable; VkCompareOp depthCompareOp; @@ -1604,7 +1601,6 @@ typedef struct { typedef struct { VkBool32 blendEnable; - VkFormat format; VkBlend srcBlendColor; VkBlend destBlendColor; VkBlendOp blendOpColor; @@ -1639,6 +1635,8 @@ typedef struct { const VkPipelineCbStateCreateInfo* pCbState; VkPipelineCreateFlags flags; VkPipelineLayout layout; + VkRenderPass renderPass; + uint32_t subpass; VkPipeline basePipelineHandle; int32_t basePipelineIndex; } VkGraphicsPipelineCreateInfo; @@ -1716,6 +1714,7 @@ typedef struct { VkBufferView bufferView; VkSampler sampler; VkImageView imageView; + VkAttachmentView attachmentView; VkImageLayout imageLayout; } VkDescriptorInfo; @@ -1801,54 +1800,74 @@ typedef struct { } VkDynamicDepthStencilStateCreateInfo; typedef struct { - VkColorAttachmentView view; + VkAttachmentView view; VkImageLayout layout; -} VkColorAttachmentBindInfo; - -typedef struct { - VkDepthStencilView view; - VkImageLayout layout; -} VkDepthStencilBindInfo; +} VkAttachmentBindInfo; typedef struct { VkStructureType sType; const void* pNext; - uint32_t colorAttachmentCount; - const VkColorAttachmentBindInfo* pColorAttachments; - const VkDepthStencilBindInfo* pDepthStencilAttachment; - uint32_t sampleCount; + VkRenderPass renderPass; + uint32_t attachmentCount; + const VkAttachmentBindInfo* pAttachments; uint32_t width; uint32_t height; uint32_t layers; } VkFramebufferCreateInfo; -typedef union { - float f32[4]; - int32_t s32[4]; - uint32_t u32[4]; -} VkClearColorValue; - typedef struct { VkStructureType sType; const void* pNext; - VkRect2D renderArea; - uint32_t colorAttachmentCount; - VkExtent2D extent; - uint32_t sampleCount; - uint32_t layers; - const VkFormat* pColorFormats; - const VkImageLayout* pColorLayouts; - const VkAttachmentLoadOp* pColorLoadOps; - const VkAttachmentStoreOp* pColorStoreOps; - const VkClearColorValue* pColorLoadClearValues; - VkFormat depthStencilFormat; - VkImageLayout depthStencilLayout; - VkAttachmentLoadOp depthLoadOp; - float depthLoadClearValue; - VkAttachmentStoreOp depthStoreOp; + VkFormat format; + uint32_t samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; VkAttachmentLoadOp stencilLoadOp; - uint32_t stencilLoadClearValue; VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription; + +typedef struct { + uint32_t attachment; + VkImageLayout layout; +} VkAttachmentReference; + +typedef struct { + VkStructureType sType; + const void* pNext; + VkPipelineBindPoint pipelineBindPoint; + VkSubpassDescriptionFlags flags; + uint32_t inputCount; + const VkAttachmentReference* inputAttachments; + uint32_t colorCount; + const VkAttachmentReference* colorAttachments; + const VkAttachmentReference* resolveAttachments; + VkAttachmentReference depthStencilAttachment; + uint32_t preserveCount; + const VkAttachmentReference* preserveAttachments; +} VkSubpassDescription; + +typedef struct { + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkWaitEvent waitEvent; + VkPipeEventFlags pipeEventMask; + VkMemoryOutputFlags outputMask; + VkMemoryInputFlags inputMask; +} VkSubpassDependency; + +typedef struct { + VkStructureType sType; + const void* pNext; + uint32_t attachmentCount; + const VkAttachmentDescription* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency* pDependencies; } VkRenderPassCreateInfo; typedef struct { @@ -1899,6 +1918,12 @@ typedef struct { VkExtent3D imageExtent; } VkBufferImageCopy; +typedef union { + float f32[4]; + int32_t s32[4]; + uint32_t u32[4]; +} VkClearColorValue; + typedef struct { VkOffset3D offset; VkExtent3D extent; @@ -1913,10 +1938,24 @@ typedef struct { } VkImageResolve; typedef struct { + float depth; + uint32_t stencil; +} VkClearDepthStencilValue; + +typedef union { + VkClearColorValue color; + VkClearDepthStencilValue ds; +} VkClearValue; + +typedef struct { + VkStructureType sType; + const void* pNext; VkRenderPass renderPass; VkFramebuffer framebuffer; - VkRenderPassContents contents; -} VkRenderPassBegin; + VkRect2D renderArea; + uint32_t attachmentCount; + const VkClearValue* pAttachmentClearValues; +} VkRenderPassBeginInfo; typedef struct { VkStructureType sType; @@ -2034,11 +2073,9 @@ typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreate typedef VkResult (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -typedef VkResult (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); -typedef VkResult (VKAPI *PFN_vkCreateColorAttachmentView)(VkDevice device, const VkColorAttachmentViewCreateInfo* pCreateInfo, VkColorAttachmentView* pView); -typedef VkResult (VKAPI *PFN_vkDestroyColorAttachmentView)(VkDevice device, VkColorAttachmentView view); -typedef VkResult (VKAPI *PFN_vkCreateDepthStencilView)(VkDevice device, const VkDepthStencilViewCreateInfo* pCreateInfo, VkDepthStencilView* pView); -typedef VkResult (VKAPI *PFN_vkDestroyDepthStencilView)(VkDevice device, VkDepthStencilView view); +typedef VkResult (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView view); +typedef VkResult (VKAPI *PFN_vkCreateAttachmentView)(VkDevice device, const VkAttachmentViewCreateInfo* pCreateInfo, VkAttachmentView* pView); +typedef VkResult (VKAPI *PFN_vkDestroyAttachmentView)(VkDevice device, VkAttachmentView view); typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); typedef VkResult (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); @@ -2113,7 +2150,8 @@ typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCmdBuffer cmdBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); -typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBegin* pRenderPassBegin); +typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkRenderPassContents contents); +typedef void (VKAPI *PFN_vkCmdNextSubpass)(VkCmdBuffer cmdBuffer, VkRenderPassContents contents); typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCmdBuffer cmdBuffer); typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCmdBuffer cmdBuffer, uint32_t cmdBuffersCount, const VkCmdBuffer* pCmdBuffers); @@ -2404,23 +2442,14 @@ VkResult VKAPI vkDestroyImageView( VkDevice device, VkImageView imageView); -VkResult VKAPI vkCreateColorAttachmentView( - VkDevice device, - const VkColorAttachmentViewCreateInfo* pCreateInfo, - VkColorAttachmentView* pView); - -VkResult VKAPI vkDestroyColorAttachmentView( +VkResult VKAPI vkCreateAttachmentView( VkDevice device, - VkColorAttachmentView view); + const VkAttachmentViewCreateInfo* pCreateInfo, + VkAttachmentView* pView); -VkResult VKAPI vkCreateDepthStencilView( +VkResult VKAPI vkDestroyAttachmentView( VkDevice device, - const VkDepthStencilViewCreateInfo* pCreateInfo, - VkDepthStencilView* pView); - -VkResult VKAPI vkDestroyDepthStencilView( - VkDevice device, - VkDepthStencilView view); + VkAttachmentView view); VkResult VKAPI vkCreateShaderModule( VkDevice device, @@ -2859,7 +2888,12 @@ void VKAPI vkCmdPushConstants( void VKAPI vkCmdBeginRenderPass( VkCmdBuffer cmdBuffer, - const VkRenderPassBegin* pRenderPassBegin); + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents); + +void VKAPI vkCmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents); void VKAPI vkCmdEndRenderPass( VkCmdBuffer cmdBuffer); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b7cbac97b78..90ac956c040 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1216,11 +1216,8 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_IMAGE_VIEW: return anv_DestroyImageView(_device, _object); - case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW: - return anv_DestroyColorAttachmentView(_device, _object); - - case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW: - return anv_DestroyDepthStencilView(_device, _object); + case VK_OBJECT_TYPE_ATTACHMENT_VIEW: + return anv_DestroyAttachmentView(_device, _object); case VK_OBJECT_TYPE_IMAGE: return anv_DestroyImage(_device, _object); @@ -1722,8 +1719,10 @@ VkResult anv_DestroyBufferView( VkBufferView _view) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_surface_view *view = (struct anv_surface_view *)_view; - anv_surface_view_destroy(device, (struct anv_surface_view *)_view); + anv_surface_view_fini(device, view); + anv_device_free(device, view); return VK_SUCCESS; } @@ -1882,6 +1881,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) surface_count[s] += pCreateInfo->pBinding[i].arraySize; break; @@ -1970,6 +1970,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { surface[s]->index = descriptor + j; @@ -2107,6 +2108,10 @@ VkResult anv_UpdateDescriptorSets( anv_finishme("texel buffers not implemented"); break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -3049,8 +3054,10 @@ static VkResult cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *bt_state) { + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_subpass *subpass = cmd_buffer->subpass; struct anv_pipeline_layout *layout; - uint32_t color_attachments, bias, size; + uint32_t attachments, bias, size; if (stage == VK_SHADER_STAGE_COMPUTE) layout = cmd_buffer->compute_pipeline->layout; @@ -3059,10 +3066,10 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (stage == VK_SHADER_STAGE_FRAGMENT) { bias = MAX_RTS; - color_attachments = cmd_buffer->framebuffer->color_attachment_count; + attachments = subpass->color_count; } else { bias = 0; - color_attachments = 0; + attachments = 0; } /* This is a little awkward: layout can be NULL but we still have to @@ -3070,7 +3077,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, * targets. */ uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - if (color_attachments + surface_count == 0) + if (attachments + surface_count == 0) return VK_SUCCESS; size = (bias + surface_count) * sizeof(uint32_t); @@ -3080,9 +3087,19 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t ca = 0; ca < color_attachments; ca++) { - const struct anv_surface_view *view = - cmd_buffer->framebuffer->color_attachments[ca]; + /* This is highly annoying. The Vulkan spec puts the depth-stencil + * attachments in with the color attachments. Unfortunately, thanks to + * other aspects of the API, we cana't really saparate them before this + * point. Therefore, we have to walk all of the attachments but only + * put the color attachments into the binding table. + */ + for (uint32_t a = 0; a < attachments; a++) { + const struct anv_attachment_view *attachment = + fb->attachments[subpass->color_attachments[a]]; + + assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); + const struct anv_color_attachment_view *view = + (const struct anv_color_attachment_view *)attachment; struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); @@ -3090,16 +3107,16 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (state.map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - memcpy(state.map, view->surface_state.map, 64); + memcpy(state.map, view->view.surface_state.map, 64); /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ *(uint64_t *)(state.map + 8 * 4) = anv_reloc_list_add(&cmd_buffer->surface_relocs, cmd_buffer->device, state.offset + 8 * 4, - view->bo, view->offset); + view->view.bo, view->view.offset); - bt_map[ca] = state.offset; + bt_map[a] = state.offset; } if (layout == NULL) @@ -3844,32 +3861,25 @@ VkResult anv_CreateFramebuffer( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_framebuffer *framebuffer; - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); - framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8, + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_attachment_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (framebuffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); framebuffer->base.destructor = anv_framebuffer_destroy; - framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount; - for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { - framebuffer->color_attachments[i] = - (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view; - } + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + ANV_FROM_HANDLE(anv_attachment_view, view, + pCreateInfo->pAttachments[i].view); - if (pCreateInfo->pDepthStencilAttachment) { - framebuffer->depth_stencil = - anv_depth_stencil_view_from_handle(pCreateInfo->pDepthStencilAttachment->view); - } else { - framebuffer->depth_stencil = &null_view; + framebuffer->attachments[i] = view; } - framebuffer->sample_count = pCreateInfo->sampleCount; framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; @@ -3926,22 +3936,52 @@ VkResult anv_CreateRenderPass( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); size = sizeof(*pass) + - pCreateInfo->layers * sizeof(struct anv_render_pass_layer); + pCreateInfo->subpassCount * sizeof(struct anv_subpass); pass = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (pass == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - pass->render_area = pCreateInfo->renderArea; + pass->attachment_count = pCreateInfo->attachmentCount; + size = pCreateInfo->attachmentCount * sizeof(*pass->attachments); + pass->attachments = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + pass->attachments[i].format = pCreateInfo->pAttachments[i].format; + pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples; + pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp; + pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // pass->attachments[i].store_op = pCreateInfo->pAttachments[i].storeOp; + // pass->attachments[i].stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } - pass->num_layers = pCreateInfo->layers; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputCount; + subpass->input_attachments = + anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + for (uint32_t j = 0; j < desc->inputCount; j++) + subpass->input_attachments[j] = desc->inputAttachments[j].attachment; + + subpass->color_count = desc->colorCount; + subpass->color_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + for (uint32_t j = 0; j < desc->colorCount; j++) + subpass->color_attachments[j] = desc->colorAttachments[j].attachment; + + if (desc->resolveAttachments) { + subpass->resolve_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + for (uint32_t j = 0; j < desc->colorCount; j++) + subpass->resolve_attachments[j] = desc->resolveAttachments[j].attachment; + } - pass->num_clear_layers = 0; - for (uint32_t i = 0; i < pCreateInfo->layers; i++) { - pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i]; - pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i]; - if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) - pass->num_clear_layers++; + subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; } *pRenderPass = anv_render_pass_to_handle(pass); @@ -3956,6 +3996,14 @@ VkResult anv_DestroyRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + anv_device_free(device, pass->attachments); + + for (uint32_t i = 0; i < pass->attachment_count; i++) { + anv_device_free(device, pass->subpasses[i].input_attachments); + anv_device_free(device, pass->subpasses[i].color_attachments); + anv_device_free(device, pass->subpasses[i].resolve_attachments); + } + anv_device_free(device, pass); return VK_SUCCESS; @@ -3972,11 +4020,23 @@ VkResult anv_GetRenderAreaGranularity( } static void -anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass) +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { - const struct anv_depth_stencil_view *view = - cmd_buffer->framebuffer->depth_stencil; + struct anv_subpass *subpass = cmd_buffer->subpass; + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; + } /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ @@ -3989,8 +4049,8 @@ anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer, .SurfaceFormat = view->depth_format, .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = pass->render_area.extent.height - 1, - .Width = pass->render_area.extent.width - 1, + .Height = cmd_buffer->framebuffer->height - 1, + .Width = cmd_buffer->framebuffer->width - 1, .LOD = 0, .Depth = 1 - 1, .MinimumArrayElement = 0, @@ -4023,33 +4083,59 @@ void anv_CmdPushConstants( stub(); } +void +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->subpass = subpass; + + cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_cmd_buffer_emit_depth_stencil(cmd_buffer); +} + void anv_CmdBeginRenderPass( VkCmdBuffer cmdBuffer, - const VkRenderPassBegin* pRenderPassBegin) + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - assert(pRenderPassBegin->contents == VK_RENDER_PASS_CONTENTS_INLINE); + assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); cmd_buffer->framebuffer = framebuffer; + cmd_buffer->pass = pass; - cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + const VkRect2D *render_area = &pRenderPassBegin->renderArea; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, - .ClippedDrawingRectangleYMin = pass->render_area.offset.y, - .ClippedDrawingRectangleXMin = pass->render_area.offset.x, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, .ClippedDrawingRectangleYMax = - pass->render_area.offset.y + pass->render_area.extent.height - 1, + render_area->offset.y + render_area->extent.height - 1, .ClippedDrawingRectangleXMax = - pass->render_area.offset.x + pass->render_area.extent.width - 1, + render_area->offset.x + render_area->extent.width - 1, .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass); + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); + + anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); +} + +void anv_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); - anv_cmd_buffer_clear(cmd_buffer, pass); + cmd_buffer->subpass++; + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->subpass + 1); } void anv_CmdEndRenderPass( diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 0fa47fda681..a00dc8df75a 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -215,6 +215,19 @@ anv_format_for_vk_format(VkFormat format) return &anv_formats[format]; } +bool +anv_is_vk_format_depth_or_stencil(VkFormat format) +{ + const struct anv_format *format_info = + anv_format_for_vk_format(format); + + if (format_info->depth_format != UNSUPPORTED && + format_info->depth_format != 0) + return true; + + return format_info->has_stencil; +} + // Format capabilities struct surface_format_info { diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 5b042a0e297..c29c6939ffb 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -326,12 +326,10 @@ VkResult anv_GetImageSubresourceLayout( } void -anv_surface_view_destroy(struct anv_device *device, - struct anv_surface_view *view) +anv_surface_view_fini(struct anv_device *device, + struct anv_surface_view *view) { anv_state_pool_free(&device->surface_state_pool, view->surface_state); - - anv_device_free(device, view); } void @@ -557,30 +555,32 @@ VkResult anv_DestroyImageView(VkDevice _device, VkImageView _view) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_surface_view *view = (struct anv_surface_view *)_view; - anv_surface_view_destroy(device, (struct anv_surface_view *)_view); + anv_surface_view_fini(device, view); + anv_device_free(device, view); return VK_SUCCESS; } void -anv_color_attachment_view_init(struct anv_surface_view *view, +anv_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_device *device, - const VkColorAttachmentViewCreateInfo* pCreateInfo, + const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface_view *view = &aview->view; struct anv_surface *surface = &image->primary_surface; const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - if (pCreateInfo->msaaResolveImage) - anv_finishme("msaaResolveImage"); - view->bo = image->bo; view->offset = image->offset + surface->offset; view->format = pCreateInfo->format; @@ -659,57 +659,17 @@ anv_color_attachment_view_init(struct anv_surface_view *view, GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } -VkResult -anv_CreateColorAttachmentView(VkDevice _device, - const VkColorAttachmentViewCreateInfo *pCreateInfo, - VkColorAttachmentView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_surface_view *view; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO); - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_color_attachment_view_init(view, device, pCreateInfo, NULL); - - *pView = (VkColorAttachmentView) view; - - return VK_SUCCESS; -} - -VkResult -anv_DestroyColorAttachmentView(VkDevice _device, VkColorAttachmentView _view) +static void +anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, + const VkAttachmentViewCreateInfo *pCreateInfo) { - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_surface_view_destroy(device, (struct anv_surface_view *)_view); - - return VK_SUCCESS; -} - -VkResult -anv_CreateDepthStencilView(VkDevice _device, - const VkDepthStencilViewCreateInfo *pCreateInfo, - VkDepthStencilView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_depth_stencil_view *view; struct anv_surface *depth_surface = &image->primary_surface; struct anv_surface *stencil_surface = &image->stencil_surface; const struct anv_format *format = anv_format_for_vk_format(image->format); - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEPTH_STENCIL_VIEW_CREATE_INFO); - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->mipLevel == 0); @@ -726,17 +686,54 @@ anv_CreateDepthStencilView(VkDevice _device, view->stencil_stride = stencil_surface->stride; view->stencil_offset = image->offset + stencil_surface->offset; view->stencil_qpitch = 0; /* FINISHME: QPitch */ +} - *pView = anv_depth_stencil_view_to_handle(view); +VkResult +anv_CreateAttachmentView(VkDevice _device, + const VkAttachmentViewCreateInfo *pCreateInfo, + VkAttachmentView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); + + if (anv_is_vk_format_depth_or_stencil(pCreateInfo->format)) { + struct anv_depth_stencil_view *view = + anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_depth_stencil_view_init(view, pCreateInfo); + + *pView = anv_attachment_view_to_handle(&view->base); + } else { + struct anv_color_attachment_view *view = + anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_color_attachment_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_attachment_view_to_handle(&view->base); + } return VK_SUCCESS; } VkResult -anv_DestroyDepthStencilView(VkDevice _device, VkDepthStencilView _view) +anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _view) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_depth_stencil_view, view, _view); + ANV_FROM_HANDLE(anv_attachment_view, view, _view); + + if (view->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { + struct anv_color_attachment_view *aview = + (struct anv_color_attachment_view *)view; + + anv_surface_view_fini(device, &aview->view); + } anv_device_free(device, view); diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 4b15026f6d9..0844565a996 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -263,32 +263,57 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, } void -anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass) +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values) { struct anv_saved_state saved_state; int num_clear_layers = 0; - struct clear_instance_data instance_data[MAX_RTS]; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + if (anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + anv_finishme("Can't clear depth-stencil yet"); + continue; + } + num_clear_layers++; + } + } + + if (num_clear_layers == 0) + return; - for (uint32_t i = 0; i < pass->num_layers; i++) { - if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - instance_data[num_clear_layers++] = (struct clear_instance_data) { + struct clear_instance_data instance_data[num_clear_layers]; + uint32_t color_attachments[num_clear_layers]; + + int layer = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && + !anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + instance_data[layer] = (struct clear_instance_data) { .vue_header = { .RTAIndex = i, .ViewportIndex = 0, .PointWidth = 0.0 }, - .color = pass->layers[i].clear_color, + .color = clear_values[i].color, }; + color_attachments[layer] = i; + layer++; } } - if (num_clear_layers == 0) - return; - anv_cmd_buffer_save(cmd_buffer, &saved_state); + struct anv_subpass subpass = { + .input_count = 0, + .color_count = num_clear_layers, + .color_attachments = color_attachments, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); + meta_emit_clear(cmd_buffer, num_clear_layers, instance_data); /* Restore API state */ @@ -500,7 +525,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_surface_view *src, VkOffset3D src_offset, VkExtent3D src_extent, - struct anv_surface_view *dest, + struct anv_color_attachment_view *dest, VkOffset3D dest_offset, VkExtent3D dest_extent) { @@ -596,45 +621,67 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_CreateFramebuffer(anv_device_to_handle(device), &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .colorAttachmentCount = 1, - .pColorAttachments = (VkColorAttachmentBindInfo[]) { + .attachmentCount = 1, + .pAttachments = (VkAttachmentBindInfo[]) { { - .view = (VkColorAttachmentView) dest, + .view = anv_attachment_view_to_handle(&dest->base), .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .pDepthStencilAttachment = NULL, - .sampleCount = 1, - .width = dest->extent.width, - .height = dest->extent.height, + .width = dest->view.extent.width, + .height = dest->view.extent.height, .layers = 1 }, &fb); - VkRenderPass pass; anv_CreateRenderPass(anv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .renderArea = { { 0, 0 }, { dest->extent.width, dest->extent.height } }, - .colorAttachmentCount = 1, - .extent = { 0, }, - .sampleCount = 1, - .layers = 1, - .pColorFormats = (VkFormat[]) { dest->format }, - .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_GENERAL }, - .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_LOAD }, - .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = (VkClearColorValue[]) { - { .f32 = { 1.0, 0.0, 0.0, 1.0 } } + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = dest->view.format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .colorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .resolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .preserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, }, - .depthStencilFormat = VK_FORMAT_UNDEFINED, + .dependencyCount = 0, }, &pass); anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBegin) { + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass, .framebuffer = fb, - }); + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .attachmentCount = 1, + .pAttachmentClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), VK_STATE_BIND_POINT_VIEWPORT, @@ -747,10 +794,10 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer); - struct anv_surface_view dest_view; + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = dest_image, .format = copy_format, .mipLevel = 0, @@ -887,10 +934,10 @@ void anv_CmdCopyImage( }, cmd_buffer); - struct anv_surface_view dest_view; + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, .format = src_image->format, .mipLevel = pRegions[r].destSubresource.mipLevel, @@ -955,10 +1002,10 @@ void anv_CmdBlitImage( }, cmd_buffer); - struct anv_surface_view dest_view; + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, .format = dest_image->format, .mipLevel = pRegions[r].destSubresource.mipLevel, @@ -1051,10 +1098,10 @@ void anv_CmdCopyBufferToImage( }, cmd_buffer); - struct anv_surface_view dest_view; + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), .format = dest_image->format, .mipLevel = pRegions[r].imageSubresource.mipLevel, @@ -1149,10 +1196,10 @@ void anv_CmdCopyImageToBuffer( dest_image->bo = dest_buffer->bo; dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; - struct anv_surface_view dest_view; + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, .format = src_image->format, .mipLevel = 0, @@ -1212,10 +1259,10 @@ void anv_CmdClearColorImage( for (uint32_t r = 0; r < rangeCount; r++) { for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { - struct anv_surface_view view; + struct anv_color_attachment_view view; anv_color_attachment_view_init(&view, cmd_buffer->device, - &(VkColorAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = _image, .format = image->format, .mipLevel = pRanges[r].baseMipLevel + l, @@ -1228,17 +1275,15 @@ void anv_CmdClearColorImage( anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .colorAttachmentCount = 1, - .pColorAttachments = (VkColorAttachmentBindInfo[]) { + .attachmentCount = 1, + .pAttachments = (VkAttachmentBindInfo[]) { { - .view = (VkColorAttachmentView) &view, + .view = anv_attachment_view_to_handle(&view.base), .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .pDepthStencilAttachment = NULL, - .sampleCount = 1, - .width = view.extent.width, - .height = view.extent.height, + .width = view.view.extent.width, + .height = view.view.extent.height, .layers = 1 }, &fb); @@ -1246,24 +1291,54 @@ void anv_CmdClearColorImage( anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .renderArea = { { 0, 0 }, { view.extent.width, view.extent.height } }, - .colorAttachmentCount = 1, - .extent = { 0, }, - .sampleCount = 1, - .layers = 1, - .pColorFormats = (VkFormat[]) { image->format }, - .pColorLayouts = (VkImageLayout[]) { imageLayout }, - .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_DONT_CARE }, - .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, - .pColorLoadClearValues = pColor, - .depthStencilFormat = VK_FORMAT_UNDEFINED, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = view.view.format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .colorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .resolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .preserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, }, &pass); anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBegin) { + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = view.view.extent.width, + .height = view.view.extent.height, + }, + }, .renderPass = pass, .framebuffer = fb, - }); + .attachmentCount = 1, + .pAttachmentClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); struct clear_instance_data instance_data = { .vue_header = { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index a8ff24214fd..7dc420643fe 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -695,6 +695,8 @@ struct anv_cmd_buffer { struct anv_pipeline * pipeline; struct anv_pipeline * compute_pipeline; struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; struct anv_dynamic_rs_state * rs_state; struct anv_dynamic_ds_state * ds_state; struct anv_dynamic_vp_state * vp_state; @@ -797,12 +799,13 @@ struct anv_format { uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ uint8_t num_channels; - uint8_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; }; const struct anv_format * anv_format_for_vk_format(VkFormat format); +bool anv_is_vk_format_depth_or_stencil(VkFormat format); /** * A proxy for the color surfaces, depth surfaces, and stencil surfaces. @@ -866,6 +869,36 @@ struct anv_surface_view { VkFormat format; }; +enum anv_attachment_view_type { + ANV_ATTACHMENT_VIEW_TYPE_COLOR, + ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL, +}; + +struct anv_attachment_view { + enum anv_attachment_view_type attachment_type; +}; + +struct anv_color_attachment_view { + struct anv_attachment_view base; + + struct anv_surface_view view; +}; + +struct anv_depth_stencil_view { + struct anv_attachment_view base; + + struct anv_bo *bo; + + uint32_t depth_offset; /**< Offset into bo. */ + uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ + uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ + + uint32_t stencil_offset; /**< Offset into bo. */ + uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ + uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ +}; + struct anv_image_create_info { const VkImageCreateInfo *vk_info; bool force_tile_mode; @@ -881,65 +914,66 @@ void anv_image_view_init(struct anv_surface_view *view, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_color_attachment_view_init(struct anv_surface_view *view, +void anv_color_attachment_view_init(struct anv_color_attachment_view *view, struct anv_device *device, - const VkColorAttachmentViewCreateInfo* pCreateInfo, + const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_surface_view_destroy(struct anv_device *device, - struct anv_surface_view *view); +void anv_surface_view_fini(struct anv_device *device, + struct anv_surface_view *view); struct anv_sampler { uint32_t state[4]; }; -struct anv_depth_stencil_view { - struct anv_bo *bo; - - uint32_t depth_offset; /**< Offset into bo. */ - uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ - uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ - uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ - - uint32_t stencil_offset; /**< Offset into bo. */ - uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ - uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ -}; - struct anv_framebuffer { struct anv_object base; - uint32_t color_attachment_count; - const struct anv_surface_view * color_attachments[MAX_RTS]; - const struct anv_depth_stencil_view * depth_stencil; - uint32_t sample_count; uint32_t width; uint32_t height; uint32_t layers; /* Viewport for clears */ VkDynamicViewportState vp_state; + + uint32_t attachment_count; + const struct anv_attachment_view * attachments[0]; }; -struct anv_render_pass_layer { - VkAttachmentLoadOp color_load_op; - VkClearColorValue clear_color; +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; +}; + +struct anv_render_pass_attachment { + VkFormat format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; }; struct anv_render_pass { - VkRect2D render_area; + uint32_t attachment_count; + struct anv_render_pass_attachment * attachments; - uint32_t num_clear_layers; - uint32_t num_layers; - struct anv_render_pass_layer layers[0]; + struct anv_subpass subpasses[0]; }; void anv_device_init_meta(struct anv_device *device); void anv_device_finish_meta(struct anv_device *device); void -anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass); +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values); void * anv_lookup_entrypoint(const char *name); @@ -977,7 +1011,7 @@ ANV_DEFINE_CASTS(anv_shader, VkShader) ANV_DEFINE_CASTS(anv_pipeline, VkPipeline) ANV_DEFINE_CASTS(anv_image, VkImage) ANV_DEFINE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_CASTS(anv_depth_stencil_view, VkDepthStencilView) +ANV_DEFINE_CASTS(anv_attachment_view, VkAttachmentView) ANV_DEFINE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_CASTS(anv_render_pass, VkRenderPass) ANV_DEFINE_CASTS(anv_query_pool, VkQueryPool) -- cgit v1.2.3 From 30445f8f7a27b80cb344343dc193b5276fc1e1ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 14:26:10 -0700 Subject: vk: Split the dynamic state binding function into one per state --- include/vulkan/vulkan.h | 33 ++++++++++++++------------ src/vulkan/device.c | 61 ++++++++++++++++++++++++++++++------------------- src/vulkan/meta.c | 41 +++++++++++++-------------------- 3 files changed, 72 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2a1e484cd8e..a2410d1ff5c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -831,17 +831,6 @@ typedef enum { VK_CMD_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF } VkCmdBufferLevel; -typedef enum { - VK_STATE_BIND_POINT_VIEWPORT = 0, - VK_STATE_BIND_POINT_RASTER = 1, - VK_STATE_BIND_POINT_COLOR_BLEND = 2, - VK_STATE_BIND_POINT_DEPTH_STENCIL = 3, - VK_STATE_BIND_POINT_BEGIN_RANGE = VK_STATE_BIND_POINT_VIEWPORT, - VK_STATE_BIND_POINT_END_RANGE = VK_STATE_BIND_POINT_DEPTH_STENCIL, - VK_STATE_BIND_POINT_NUM = (VK_STATE_BIND_POINT_DEPTH_STENCIL - VK_STATE_BIND_POINT_VIEWPORT + 1), - VK_STATE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF -} VkStateBindPoint; - typedef enum { VK_INDEX_TYPE_UINT16 = 0, VK_INDEX_TYPE_UINT32 = 1, @@ -2118,7 +2107,10 @@ typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer); typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI *PFN_vkCmdBindDynamicStateObject)(VkCmdBuffer cmdBuffer, VkStateBindPoint stateBindPoint, VkDynamicStateObject dynamicState); +typedef void (VKAPI *PFN_vkCmdBindDynamicViewportState)(VkCmdBuffer cmdBuffer, VkDynamicViewportState dynamicViewportState); +typedef void (VKAPI *PFN_vkCmdBindDynamicRasterState)(VkCmdBuffer cmdBuffer, VkDynamicRasterState dynamicRasterState); +typedef void (VKAPI *PFN_vkCmdBindDynamicColorBlendState)(VkCmdBuffer cmdBuffer, VkDynamicColorBlendState dynamicColorBlendState); +typedef void (VKAPI *PFN_vkCmdBindDynamicDepthStencilState)(VkCmdBuffer cmdBuffer, VkDynamicDepthStencilState dynamicDepthStencilState); typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); @@ -2652,10 +2644,21 @@ void VKAPI vkCmdBindPipeline( VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -void VKAPI vkCmdBindDynamicStateObject( +void VKAPI vkCmdBindDynamicViewportState( + VkCmdBuffer cmdBuffer, + VkDynamicViewportState dynamicViewportState); + +void VKAPI vkCmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState); + +void VKAPI vkCmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState); + +void VKAPI vkCmdBindDynamicDepthStencilState( VkCmdBuffer cmdBuffer, - VkStateBindPoint stateBindPoint, - VkDynamicStateObject dynamicState); + VkDynamicDepthStencilState dynamicDepthStencilState); void VKAPI vkCmdBindDescriptorSets( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 90ac956c040..d29e35ba20c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2882,33 +2882,48 @@ void anv_CmdBindPipeline( } } -void anv_CmdBindDynamicStateObject( +void anv_CmdBindDynamicViewportState( VkCmdBuffer cmdBuffer, - VkStateBindPoint stateBindPoint, - VkDynamicStateObject dynamicState) + VkDynamicViewportState dynamicViewportState) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); - switch (stateBindPoint) { - case VK_STATE_BIND_POINT_VIEWPORT: - cmd_buffer->vp_state = anv_dynamic_vp_state_from_handle(dynamicState); - cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; - break; - case VK_STATE_BIND_POINT_RASTER: - cmd_buffer->rs_state = anv_dynamic_rs_state_from_handle(dynamicState); - cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; - break; - case VK_STATE_BIND_POINT_COLOR_BLEND: - cmd_buffer->cb_state = anv_dynamic_cb_state_from_handle(dynamicState); - cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; - break; - case VK_STATE_BIND_POINT_DEPTH_STENCIL: - cmd_buffer->ds_state = anv_dynamic_ds_state_from_handle(dynamicState); - cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; - break; - default: - break; - }; + cmd_buffer->vp_state = vp_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; +} + +void anv_CmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); + + cmd_buffer->rs_state = rs_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; +} + +void anv_CmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); + + cmd_buffer->cb_state = cb_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; +} + +void anv_CmdBindDynamicDepthStencilState( + VkCmdBuffer cmdBuffer, + VkDynamicDepthStencilState dynamicDepthStencilState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + + cmd_buffer->ds_state = ds_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; } static struct anv_state diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 0844565a996..140b6418377 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -240,24 +240,20 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_RASTER, + anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.rs_state); if (cmd_buffer->vp_state == NULL) - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_VIEWPORT, - cmd_buffer->framebuffer->vp_state); + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), + cmd_buffer->framebuffer->vp_state); if (cmd_buffer->ds_state == NULL) - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_DEPTH_STENCIL, - device->meta_state.shared.ds_state); + anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.ds_state); if (cmd_buffer->cb_state == NULL) - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_COLOR_BLEND, - device->meta_state.shared.cb_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.cb_state); anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); } @@ -499,18 +495,15 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, /* We don't need anything here, only set if not already set. */ if (cmd_buffer->rs_state == NULL) - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_RASTER, + anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.rs_state); if (cmd_buffer->ds_state == NULL) - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_DEPTH_STENCIL, - device->meta_state.shared.ds_state); + anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.ds_state); saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->cb_state); - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_COLOR_BLEND, - device->meta_state.shared.cb_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.cb_state); } struct blit_region { @@ -683,9 +676,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .pAttachmentClearValues = NULL, }, VK_RENDER_PASS_CONTENTS_INLINE); - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_VIEWPORT, - anv_framebuffer_from_handle(fb)->vp_state); + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), + anv_framebuffer_from_handle(fb)->vp_state); anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -712,9 +704,8 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *saved_state) { anv_cmd_buffer_restore(cmd_buffer, saved_state); - anv_CmdBindDynamicStateObject(anv_cmd_buffer_to_handle(cmd_buffer), - VK_STATE_BIND_POINT_COLOR_BLEND, - saved_state->cb_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + saved_state->cb_state); } static VkFormat -- cgit v1.2.3 From ccb2e5cd625d9fe7f62f4ba35a50b53bc15a74be Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 14:50:35 -0700 Subject: vk: Make barriers more precise (rev. 133) --- include/vulkan/vulkan.h | 81 ++++++++++++++++++++----------------------------- src/vulkan/device.c | 46 +++++++++++++++++----------- 2 files changed, 61 insertions(+), 66 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a2410d1ff5c..2ace82ed3a0 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -813,15 +813,6 @@ typedef enum { VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF } VkPipelineBindPoint; -typedef enum { - VK_WAIT_EVENT_TOP_OF_PIPE = 1, - VK_WAIT_EVENT_BEFORE_RASTERIZATION = 2, - VK_WAIT_EVENT_BEGIN_RANGE = VK_WAIT_EVENT_TOP_OF_PIPE, - VK_WAIT_EVENT_END_RANGE = VK_WAIT_EVENT_BEFORE_RASTERIZATION, - VK_WAIT_EVENT_NUM = (VK_WAIT_EVENT_BEFORE_RASTERIZATION - VK_WAIT_EVENT_TOP_OF_PIPE + 1), - VK_WAIT_EVENT_MAX_ENUM = 0x7FFFFFFF -} VkWaitEvent; - typedef enum { VK_CMD_BUFFER_LEVEL_PRIMARY = 0, VK_CMD_BUFFER_LEVEL_SECONDARY = 1, @@ -840,22 +831,6 @@ typedef enum { VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF } VkIndexType; -typedef enum { - VK_PIPE_EVENT_TOP_OF_PIPE = 0, - VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE = 1, - VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE = 2, - VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE = 3, - VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE = 4, - VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE = 5, - VK_PIPE_EVENT_TRANSFER_COMPLETE = 6, - VK_PIPE_EVENT_COMMANDS_COMPLETE = 7, - VK_PIPE_EVENT_CPU_SIGNAL = 8, - VK_PIPE_EVENT_BEGIN_RANGE = VK_PIPE_EVENT_TOP_OF_PIPE, - VK_PIPE_EVENT_END_RANGE = VK_PIPE_EVENT_CPU_SIGNAL, - VK_PIPE_EVENT_NUM = (VK_PIPE_EVENT_CPU_SIGNAL - VK_PIPE_EVENT_TOP_OF_PIPE + 1), - VK_PIPE_EVENT_MAX_ENUM = 0x7FFFFFFF -} VkPipeEvent; - typedef enum { VK_TIMESTAMP_TYPE_TOP = 0, VK_TIMESTAMP_TYPE_BOTTOM = 1, @@ -1029,17 +1004,25 @@ typedef enum { typedef VkFlags VkSubpassDescriptionFlags; typedef enum { - VK_PIPE_EVENT_TOP_OF_PIPE_BIT = 0x00000001, - VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT = 0x00000002, - VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE_BIT = 0x00000004, - VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE_BIT = 0x00000008, - VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE_BIT = 0x00000010, - VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE_BIT = 0x00000020, - VK_PIPE_EVENT_TRANSFER_COMPLETE_BIT = 0x00000040, - VK_PIPE_EVENT_COMMANDS_COMPLETE_BIT = 0x00000080, - VK_PIPE_EVENT_CPU_SIGNAL_BIT = 0x00000100, -} VkPipeEventFlagBits; -typedef VkFlags VkPipeEventFlags; + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT = 0x00000010, + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT = 0x00000020, + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, + VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, + VK_PIPELINE_STAGE_TRANSITION_BIT = 0x00002000, + VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, + VK_PIPELINE_STAGE_ALL_GRAPHICS = 0x000007FF, + VK_PIPELINE_STAGE_ALL_GPU_COMMANDS = 0x00003FFF, +} VkPipelineStageFlagBits; +typedef VkFlags VkPipelineStageFlags; typedef enum { VK_MEMORY_OUTPUT_HOST_WRITE_BIT = 0x00000001, @@ -1842,10 +1825,11 @@ typedef struct { const void* pNext; uint32_t srcSubpass; uint32_t dstSubpass; - VkWaitEvent waitEvent; - VkPipeEventFlags pipeEventMask; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags destStageMask; VkMemoryOutputFlags outputMask; VkMemoryInputFlags inputMask; + VkBool32 byRegion; } VkSubpassDependency; typedef struct { @@ -2132,10 +2116,10 @@ typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkI typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCmdBuffer cmdBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags imageAspectMask, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rectCount, const VkRect3D* pRects); typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); -typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); -typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipeEvent pipeEvent); -typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkWaitEvent waitEvent, VkPipeEventFlags pipeEventMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot); typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); @@ -2825,26 +2809,27 @@ void VKAPI vkCmdResolveImage( void VKAPI vkCmdSetEvent( VkCmdBuffer cmdBuffer, VkEvent event, - VkPipeEvent pipeEvent); + VkPipelineStageFlags stageMask); void VKAPI vkCmdResetEvent( VkCmdBuffer cmdBuffer, VkEvent event, - VkPipeEvent pipeEvent); + VkPipelineStageFlags stageMask); void VKAPI vkCmdWaitEvents( VkCmdBuffer cmdBuffer, - VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, - VkPipeEventFlags pipeEventMask, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); void VKAPI vkCmdPipelineBarrier( VkCmdBuffer cmdBuffer, - VkWaitEvent waitEvent, - VkPipeEventFlags pipeEventMask, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d29e35ba20c..2aa9aee9e50 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3702,7 +3702,7 @@ void anv_CmdDispatchIndirect( void anv_CmdSetEvent( VkCmdBuffer cmdBuffer, VkEvent event, - VkPipeEvent pipeEvent) + VkPipelineStageFlags stageMask) { stub(); } @@ -3710,17 +3710,17 @@ void anv_CmdSetEvent( void anv_CmdResetEvent( VkCmdBuffer cmdBuffer, VkEvent event, - VkPipeEvent pipeEvent) + VkPipelineStageFlags stageMask) { stub(); } void anv_CmdWaitEvents( VkCmdBuffer cmdBuffer, - VkWaitEvent waitEvent, uint32_t eventCount, const VkEvent* pEvents, - VkPipeEventFlags pipeEventMask, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers) { @@ -3729,8 +3729,9 @@ void anv_CmdWaitEvents( void anv_CmdPipelineBarrier( VkCmdBuffer cmdBuffer, - VkWaitEvent waitEvent, - VkPipeEventFlags pipeEventMask, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers) { @@ -3744,32 +3745,41 @@ void anv_CmdPipelineBarrier( /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_TOP_OF_PIPE_BIT)) { + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { /* This is just what PIPE_CONTROL does */ } - if (anv_clear_mask(&pipeEventMask, - VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT | - VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE_BIT | - VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE_BIT)) { + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { cmd.StallAtPixelScoreboard = true; } - if (anv_clear_mask(&pipeEventMask, - VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE_BIT | - VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE_BIT | - VK_PIPE_EVENT_TRANSFER_COMPLETE_BIT | - VK_PIPE_EVENT_COMMANDS_COMPLETE_BIT)) { + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { cmd.CommandStreamerStallEnable = true; } - if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_CPU_SIGNAL_BIT)) { + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); } + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + /* We checked all known VkPipeEventFlags. */ - anv_assert(pipeEventMask == 0); + anv_assert(srcStageMask == 0); /* XXX: Right now, we're really dumb and just flush whatever categories * the app asks for. One of these days we may make this a bit better -- cgit v1.2.3 From 55723e97f1f4e830a8a0f05ed4dd85108c658a45 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 14:59:39 -0700 Subject: vk: Split the memory requirements/binding functions --- include/vulkan/vulkan.h | 27 ++++++++++----- src/vulkan/device.c | 88 +++++++++++++++++++++++++++---------------------- src/vulkan/x11.c | 7 ++-- 3 files changed, 71 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2ace82ed3a0..68704a43fb4 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2017,8 +2017,10 @@ typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem) typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); -typedef VkResult (VKAPI *PFN_vkBindObjectMemory)(VkDevice device, VkObjectType objType, VkObject object, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkGetObjectMemoryRequirements)(VkDevice device, VkObjectType objType, VkObject object, VkMemoryRequirements* pMemoryRequirements); +typedef VkResult (VKAPI *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); +typedef VkResult (VKAPI *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); @@ -2267,17 +2269,26 @@ VkResult VKAPI vkDestroyObject( VkObjectType objType, VkObject object); -VkResult VKAPI vkBindObjectMemory( +VkResult VKAPI vkBindBufferMemory( VkDevice device, - VkObjectType objType, - VkObject object, + VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); -VkResult VKAPI vkGetObjectMemoryRequirements( +VkResult VKAPI vkBindImageMemory( VkDevice device, - VkObjectType objType, - VkObject object, + VkImage image, + VkDeviceMemory mem, + VkDeviceSize memOffset); + +VkResult VKAPI vkGetBufferMemoryRequirements( + VkDevice device, + VkBuffer buffer, + VkMemoryRequirements* pMemoryRequirements); + +VkResult VKAPI vkGetImageMemoryRequirements( + VkDevice device, + VkImage image, VkMemoryRequirements* pMemoryRequirements); VkResult VKAPI vkQueueBindSparseBufferMemory( diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 2aa9aee9e50..d566bb0b092 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1282,12 +1282,12 @@ VkResult anv_DestroyObject( } } -VkResult anv_GetObjectMemoryRequirements( +VkResult anv_GetBufferMemoryRequirements( VkDevice device, - VkObjectType objType, - VkObject object, + VkBuffer _buffer, VkMemoryRequirements* pMemoryRequirements) { + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); /* The Vulkan spec (git aaed022) says: * @@ -1300,52 +1300,62 @@ VkResult anv_GetObjectMemoryRequirements( */ pMemoryRequirements->memoryTypeBits = 1; - switch (objType) { - case VK_OBJECT_TYPE_BUFFER: { - struct anv_buffer *buffer = anv_buffer_from_handle(object); - pMemoryRequirements->size = buffer->size; - pMemoryRequirements->alignment = 16; - break; - } - case VK_OBJECT_TYPE_IMAGE: { - struct anv_image *image = anv_image_from_handle(object); - pMemoryRequirements->size = image->size; - pMemoryRequirements->alignment = image->alignment; - break; - } - default: - pMemoryRequirements->size = 0; - break; - } + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; return VK_SUCCESS; } -VkResult anv_BindObjectMemory( +VkResult anv_GetImageMemoryRequirements( VkDevice device, - VkObjectType objType, - VkObject object, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; + + return VK_SUCCESS; +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, VkDeviceMemory _mem, VkDeviceSize memOffset) { ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - struct anv_buffer *buffer; - struct anv_image *image; + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - switch (objType) { - case VK_OBJECT_TYPE_BUFFER: - buffer = anv_buffer_from_handle(object); - buffer->bo = &mem->bo; - buffer->offset = memOffset; - break; - case VK_OBJECT_TYPE_IMAGE: - image = anv_image_from_handle(object); - image->bo = &mem->bo; - image->offset = memOffset; - break; - default: - break; - } + buffer->bo = &mem->bo; + buffer->offset = memOffset; + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_image, image, _image); + + image->bo = &mem->bo; + image->offset = memOffset; return VK_SUCCESS; } diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index 7b6cee011a4..d1eaab3bf46 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -148,10 +148,9 @@ VkResult anv_CreateSwapChainWSI( }, (VkDeviceMemory *) &memory); - anv_BindObjectMemory(VK_NULL_HANDLE, - VK_OBJECT_TYPE_IMAGE, - (VkImage) image, - (VkDeviceMemory) memory, 0); + anv_BindImageMemory(VK_NULL_HANDLE, + anv_image_to_handle(image), + anv_device_memory_to_handle(memory), 0); ret = anv_gem_set_tiling(device, memory->bo.gem_handle, surface->stride, I915_TILING_X); -- cgit v1.2.3 From 02db21ae1188dd1e28efc8045de095be3e214bc8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 16:11:21 -0700 Subject: vk: Add the new extension/layer enumeration entrypoints --- include/vulkan/vulkan.h | 63 +++++++++++++++++++++++--------------------- src/vulkan/device.c | 69 ++++++++++++++++++++++++++++--------------------- 2 files changed, 72 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 60afdf6fa6b..aeb0e2bdec9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -65,6 +65,7 @@ extern "C" { #define VK_MAX_PHYSICAL_DEVICE_NAME 256 #define VK_UUID_LENGTH 16 #define VK_MAX_EXTENSION_NAME 256 +#define VK_MAX_DESCRIPTION 256 #define VK_MAX_MEMORY_TYPES 32 #define VK_MAX_MEMORY_HEAPS 16 #define VK_LOD_CLAMP_NONE MAX_FLOAT @@ -118,6 +119,7 @@ typedef enum { VK_TIMEOUT = 3, VK_EVENT_SET = 4, VK_EVENT_RESET = 5, + VK_INCOMPLETE = 6, VK_ERROR_UNKNOWN = -1, VK_ERROR_UNAVAILABLE = -2, VK_ERROR_INITIALIZATION_FAILED = -3, @@ -150,9 +152,10 @@ typedef enum { VK_ERROR_BUILDING_COMMAND_BUFFER = -30, VK_ERROR_MEMORY_NOT_BOUND = -31, VK_ERROR_INCOMPATIBLE_QUEUE = -32, - VK_RESULT_BEGIN_RANGE = VK_ERROR_INCOMPATIBLE_QUEUE, - VK_RESULT_END_RANGE = VK_EVENT_RESET, - VK_RESULT_NUM = (VK_EVENT_RESET - VK_ERROR_INCOMPATIBLE_QUEUE + 1), + VK_ERROR_INVALID_LAYER = -33, + VK_RESULT_BEGIN_RANGE = VK_ERROR_INVALID_LAYER, + VK_RESULT_END_RANGE = VK_INCOMPLETE, + VK_RESULT_NUM = (VK_INCOMPLETE - VK_ERROR_INVALID_LAYER + 1), VK_RESULT_MAX_ENUM = 0x7FFFFFFF } VkResult; @@ -1101,6 +1104,8 @@ typedef struct { const void* pNext; const VkApplicationInfo* pAppInfo; const VkAllocCallbacks* pAllocCb; + uint32_t layerCount; + const char*const* ppEnabledLayerNames; uint32_t extensionCount; const char*const* ppEnabledExtensionNames; } VkInstanceCreateInfo; @@ -1292,6 +1297,8 @@ typedef struct { const void* pNext; uint32_t queueRecordCount; const VkDeviceQueueCreateInfo* pRequestedQueues; + uint32_t layerCount; + const char*const* ppEnabledLayerNames; uint32_t extensionCount; const char*const* ppEnabledExtensionNames; const VkPhysicalDeviceFeatures* pEnabledFeatures; @@ -1300,9 +1307,16 @@ typedef struct { typedef struct { char extName[VK_MAX_EXTENSION_NAME]; - uint32_t version; + uint32_t specVersion; } VkExtensionProperties; +typedef struct { + char layerName[VK_MAX_EXTENSION_NAME]; + uint32_t specVersion; + uint32_t implVersion; + const char* description[VK_MAX_DESCRIPTION]; +} VkLayerProperties; + typedef struct { VkStructureType sType; const void* pNext; @@ -1973,13 +1987,6 @@ typedef struct { VkImageSubresourceRange subresourceRange; } VkImageMemoryBarrier; -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t layerCount; - const char *const* ppActiveLayerNames; -} VkLayerCreateInfo; - typedef struct { VkStructureType sType; const void* pNext; @@ -2002,11 +2009,10 @@ typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instanc typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); typedef VkResult (VKAPI *PFN_vkDestroyDevice)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionCount)(uint32_t* pCount); -typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionProperties)(uint32_t extensionIndex, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, uint32_t extensionIndex, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateLayers)(VkPhysicalDevice physicalDevice, size_t maxStringSize, size_t* pLayerCount, char* const* pOutLayers, void* pReserved); +typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkGetGlobalLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueNodeIndex, uint32_t queueIndex, VkQueue* pQueue); typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, VkFence fence); typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); @@ -2193,28 +2199,25 @@ VkResult VKAPI vkCreateDevice( VkResult VKAPI vkDestroyDevice( VkDevice device); -VkResult VKAPI vkGetGlobalExtensionCount( - uint32_t* pCount); - VkResult VKAPI vkGetGlobalExtensionProperties( - uint32_t extensionIndex, + const char* pLayerName, + uint32_t* pCount, VkExtensionProperties* pProperties); -VkResult VKAPI vkGetPhysicalDeviceExtensionCount( - VkPhysicalDevice physicalDevice, - uint32_t* pCount); - VkResult VKAPI vkGetPhysicalDeviceExtensionProperties( VkPhysicalDevice physicalDevice, - uint32_t extensionIndex, + const char* pLayerName, + uint32_t* pCount, VkExtensionProperties* pProperties); -VkResult VKAPI vkEnumerateLayers( +VkResult VKAPI vkGetGlobalLayerProperties( + uint32_t* pCount, + VkLayerProperties* pProperties); + +VkResult VKAPI vkGetPhysicalDeviceLayerProperties( VkPhysicalDevice physicalDevice, - size_t maxStringSize, - size_t* pLayerCount, - char* const* pOutLayers, - void* pReserved); + uint32_t* pCount, + VkLayerProperties* pProperties); VkResult VKAPI vkGetDeviceQueue( VkDevice device, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d566bb0b092..752b7813396 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -643,59 +643,68 @@ VkResult anv_DestroyDevice( static const VkExtensionProperties global_extensions[] = { { .extName = "VK_WSI_LunarG", - .version = 3 + .specVersion = 3 } }; -VkResult anv_GetGlobalExtensionCount( - uint32_t* pCount) -{ - *pCount = ARRAY_SIZE(global_extensions); - - return VK_SUCCESS; -} - - VkResult anv_GetGlobalExtensionProperties( - uint32_t extensionIndex, + const char* pLayerName, + uint32_t* pCount, VkExtensionProperties* pProperties) { - assert(extensionIndex < ARRAY_SIZE(global_extensions)); - - *pProperties = global_extensions[extensionIndex]; + if (pProperties == NULL) { + *pCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } - return VK_SUCCESS; -} + assert(*pCount < ARRAY_SIZE(global_extensions)); -VkResult anv_GetPhysicalDeviceExtensionCount( - VkPhysicalDevice physicalDevice, - uint32_t* pCount) -{ - /* None supported at this time */ - *pCount = 0; + *pCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); return VK_SUCCESS; } VkResult anv_GetPhysicalDeviceExtensionProperties( VkPhysicalDevice physicalDevice, - uint32_t extensionIndex, + const char* pLayerName, + uint32_t* pCount, VkExtensionProperties* pProperties) { + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + /* None supported at this time */ return vk_error(VK_ERROR_INVALID_EXTENSION); } -VkResult anv_EnumerateLayers( +VkResult anv_GetGlobalLayerProperties( + uint32_t* pCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_LAYER); +} + +VkResult anv_GetPhysicalDeviceLayerProperties( VkPhysicalDevice physicalDevice, - size_t maxStringSize, - size_t* pLayerCount, - char* const* pOutLayers, - void* pReserved) + uint32_t* pCount, + VkLayerProperties* pProperties) { - *pLayerCount = 0; + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } - return VK_SUCCESS; + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_LAYER); } VkResult anv_GetDeviceQueue( -- cgit v1.2.3 From 2e2f48f840b844569de29d04225bec70c81087f4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 16:34:31 -0700 Subject: vk: Remove abreviations --- include/vulkan/vulkan.h | 54 ++++++++++++++++++++++++------------------------- src/vulkan/device.c | 10 ++++----- src/vulkan/meta.c | 34 +++++++++++++++---------------- src/vulkan/pipeline.c | 34 +++++++++++++++---------------- 4 files changed, 66 insertions(+), 66 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index aeb0e2bdec9..d67b163c302 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -170,10 +170,10 @@ typedef enum { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 7, VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 8, VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 9, - VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO = 10, - VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO = 11, - VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO = 12, - VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO = 13, + VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO = 10, + VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO = 11, + VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO = 12, + VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO = 13, VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 14, VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 15, VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 16, @@ -182,13 +182,13 @@ typedef enum { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 19, VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 20, VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 21, - VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO = 22, - VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO = 23, - VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO = 24, - VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO = 26, - VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO = 28, + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 22, + VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 23, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 24, + VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO = 25, + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 27, + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 28, VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 29, VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 30, VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 31, @@ -1534,19 +1534,19 @@ typedef struct { const void* pNext; VkPrimitiveTopology topology; VkBool32 primitiveRestartEnable; -} VkPipelineIaStateCreateInfo; +} VkPipelineInputAssemblyStateCreateInfo; typedef struct { VkStructureType sType; const void* pNext; uint32_t patchControlPoints; -} VkPipelineTessStateCreateInfo; +} VkPipelineTessellationStateCreateInfo; typedef struct { VkStructureType sType; const void* pNext; uint32_t viewportCount; -} VkPipelineVpStateCreateInfo; +} VkPipelineViewportStateCreateInfo; typedef struct { VkStructureType sType; @@ -1556,7 +1556,7 @@ typedef struct { VkFillMode fillMode; VkCullMode cullMode; VkFrontFace frontFace; -} VkPipelineRsStateCreateInfo; +} VkPipelineRasterStateCreateInfo; typedef struct { VkStructureType sType; @@ -1565,7 +1565,7 @@ typedef struct { VkBool32 sampleShadingEnable; float minSampleShading; VkSampleMask sampleMask; -} VkPipelineMsStateCreateInfo; +} VkPipelineMultisampleStateCreateInfo; typedef struct { VkStencilOp stencilFailOp; @@ -1584,7 +1584,7 @@ typedef struct { VkBool32 stencilTestEnable; VkStencilOpState front; VkStencilOpState back; -} VkPipelineDsStateCreateInfo; +} VkPipelineDepthStencilStateCreateInfo; typedef struct { VkBool32 blendEnable; @@ -1595,7 +1595,7 @@ typedef struct { VkBlend destBlendAlpha; VkBlendOp blendOpAlpha; VkChannelFlags channelWriteMask; -} VkPipelineCbAttachmentState; +} VkPipelineColorBlendAttachmentState; typedef struct { VkStructureType sType; @@ -1604,8 +1604,8 @@ typedef struct { VkBool32 logicOpEnable; VkLogicOp logicOp; uint32_t attachmentCount; - const VkPipelineCbAttachmentState* pAttachments; -} VkPipelineCbStateCreateInfo; + const VkPipelineColorBlendAttachmentState* pAttachments; +} VkPipelineColorBlendStateCreateInfo; typedef struct { VkStructureType sType; @@ -1613,13 +1613,13 @@ typedef struct { uint32_t stageCount; const VkPipelineShaderStageCreateInfo* pStages; const VkPipelineVertexInputStateCreateInfo* pVertexInputState; - const VkPipelineIaStateCreateInfo* pIaState; - const VkPipelineTessStateCreateInfo* pTessState; - const VkPipelineVpStateCreateInfo* pVpState; - const VkPipelineRsStateCreateInfo* pRsState; - const VkPipelineMsStateCreateInfo* pMsState; - const VkPipelineDsStateCreateInfo* pDsState; - const VkPipelineCbStateCreateInfo* pCbState; + const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; + const VkPipelineTessellationStateCreateInfo* pTessellationState; + const VkPipelineViewportStateCreateInfo* pViewportState; + const VkPipelineRasterStateCreateInfo* pRasterState; + const VkPipelineMultisampleStateCreateInfo* pMultisampleState; + const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; + const VkPipelineColorBlendStateCreateInfo* pColorBlendState; VkPipelineCreateFlags flags; VkPipelineLayout layout; VkRenderPass renderPass; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 752b7813396..7ab7a05706f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2192,7 +2192,7 @@ VkResult anv_CreateDynamicViewportState( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_vp_state *state; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO); state = anv_device_alloc(device, sizeof(*state), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); @@ -2295,7 +2295,7 @@ VkResult anv_CreateDynamicRasterState( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_rs_state *state; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); state = anv_device_alloc(device, sizeof(*state), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); @@ -2347,7 +2347,7 @@ VkResult anv_CreateDynamicColorBlendState( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_cb_state *state; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO); state = anv_device_alloc(device, sizeof(*state), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); @@ -2388,7 +2388,7 @@ VkResult anv_CreateDynamicDepthStencilState( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_dynamic_ds_state *state; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); state = anv_device_alloc(device, sizeof(*state), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); @@ -3930,7 +3930,7 @@ VkResult anv_CreateFramebuffer( anv_CreateDynamicViewportState(anv_device_to_handle(device), &(VkDynamicViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO, .viewportAndScissorCount = 1, .pViewports = (VkViewport[]) { { diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 140b6418377..90447dc81d7 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -111,23 +111,23 @@ anv_device_init_meta_clear_state(struct anv_device *device) .pSpecializationInfo = NULL, }, .pVertexInputState = &vi_create_info, - .pIaState = &(VkPipelineIaStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, .primitiveRestartEnable = false, }, - .pRsState = &(VkPipelineRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, .depthClipEnable = true, .rasterizerDiscardEnable = false, .fillMode = VK_FILL_MODE_SOLID, .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_CCW }, - .pCbState = &(VkPipelineCbStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, - .pAttachments = (VkPipelineCbAttachmentState []) { + .pAttachments = (VkPipelineColorBlendAttachmentState []) { { .channelWriteMask = VK_CHANNEL_A_BIT | VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, } @@ -443,23 +443,23 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, }, .pVertexInputState = &vi_create_info, - .pIaState = &(VkPipelineIaStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, .primitiveRestartEnable = false, }, - .pRsState = &(VkPipelineRsStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, .depthClipEnable = true, .rasterizerDiscardEnable = false, .fillMode = VK_FILL_MODE_SOLID, .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_CCW }, - .pCbState = &(VkPipelineCbStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, - .pAttachments = (VkPipelineCbAttachmentState []) { + .pAttachments = (VkPipelineColorBlendAttachmentState []) { { .channelWriteMask = VK_CHANNEL_A_BIT | VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, } @@ -1406,19 +1406,19 @@ anv_device_init_meta(struct anv_device *device) anv_CreateDynamicRasterState(anv_device_to_handle(device), &(VkDynamicRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, }, &device->meta_state.shared.rs_state); anv_CreateDynamicColorBlendState(anv_device_to_handle(device), &(VkDynamicColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO + .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO }, &device->meta_state.shared.cb_state); anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), &(VkDynamicDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO + .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO }, &device->meta_state.shared.ds_state); } diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 42a900d7ab0..c6dc499e52c 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -225,7 +225,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, static void emit_ia_state(struct anv_pipeline *pipeline, - const VkPipelineIaStateCreateInfo *info, + const VkPipelineInputAssemblyStateCreateInfo *info, const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_primitive_type[] = { @@ -258,7 +258,7 @@ emit_ia_state(struct anv_pipeline *pipeline, static void emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRsStateCreateInfo *info, + const VkPipelineRasterStateCreateInfo *info, const struct anv_pipeline_create_info *extra) { static const uint32_t vk_to_gen_cullmode[] = { @@ -316,7 +316,7 @@ emit_rs_state(struct anv_pipeline *pipeline, static void emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineCbStateCreateInfo *info) + const VkPipelineColorBlendStateCreateInfo *info) { struct anv_device *device = pipeline->device; @@ -381,7 +381,7 @@ emit_cb_state(struct anv_pipeline *pipeline, GEN8_BLEND_STATE_pack(NULL, state, &blend_state); for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineCbAttachmentState *a = &info->pAttachments[i]; + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; struct GEN8_BLEND_STATE_ENTRY entry = { .LogicOpEnable = info->logicOpEnable, @@ -434,7 +434,7 @@ static const uint32_t vk_to_gen_stencil_op[] = { static void emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDsStateCreateInfo *info) + const VkPipelineDepthStencilStateCreateInfo *info) { if (info == NULL) { /* We're going to OR this together with the dynamic state. We need @@ -520,12 +520,12 @@ anv_pipeline_create( anv_shader_from_handle(pCreateInfo->pStages[i].shader); } - if (pCreateInfo->pTessState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO"); - if (pCreateInfo->pVpState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO"); - if (pCreateInfo->pMsState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO"); + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pViewportState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; @@ -542,12 +542,12 @@ anv_pipeline_create( assert(pCreateInfo->pVertexInputState); emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); - assert(pCreateInfo->pIaState); - emit_ia_state(pipeline, pCreateInfo->pIaState, extra); - assert(pCreateInfo->pRsState); - emit_rs_state(pipeline, pCreateInfo->pRsState, extra); - emit_ds_state(pipeline, pCreateInfo->pDsState); - emit_cb_state(pipeline, pCreateInfo->pCbState); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterState); + emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, .StatisticsEnable = true); -- cgit v1.2.3 From 2c4dc92f405e705a5487bab0bae32b350608f960 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 17:04:46 -0700 Subject: vk/vulkan.h: Rename FormatInfo to FormatProperties --- include/vulkan/vulkan.h | 6 +++--- src/vulkan/formats.c | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index cd270b3b1ad..b5526286112 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1999,7 +1999,7 @@ typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCrea typedef VkResult (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatInfo)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatInfo); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); @@ -2157,10 +2157,10 @@ VkResult VKAPI vkGetPhysicalDeviceFeatures( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -VkResult VKAPI vkGetPhysicalDeviceFormatInfo( +VkResult VKAPI vkGetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, - VkFormatProperties* pFormatInfo); + VkFormatProperties* pFormatProperties); VkResult VKAPI vkGetPhysicalDeviceLimits( VkPhysicalDevice physicalDevice, diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index a00dc8df75a..361dd70fa3e 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -245,20 +245,20 @@ struct surface_format_info { extern const struct surface_format_info surface_formats[]; -VkResult anv_validate_GetPhysicalDeviceFormatInfo( +VkResult anv_validate_GetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat _format, - VkFormatProperties* pFormatInfo) + VkFormatProperties* pFormatProperties) { const struct anv_format *format = anv_format_for_vk_format(_format); - fprintf(stderr, "vkGetFormatInfo(%s)\n", format->name); - return anv_GetPhysicalDeviceFormatInfo(physicalDevice, _format, pFormatInfo); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + return anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); } -VkResult anv_GetPhysicalDeviceFormatInfo( +VkResult anv_GetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat _format, - VkFormatProperties* pFormatInfo) + VkFormatProperties* pFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); const struct surface_format_info *info; @@ -296,14 +296,14 @@ VkResult anv_GetPhysicalDeviceFormatInfo( linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; } - pFormatInfo->linearTilingFeatures = linear; - pFormatInfo->optimalTilingFeatures = tiled; + pFormatProperties->linearTilingFeatures = linear; + pFormatProperties->optimalTilingFeatures = tiled; return VK_SUCCESS; unsupported: - pFormatInfo->linearTilingFeatures = 0; - pFormatInfo->optimalTilingFeatures = 0; + pFormatProperties->linearTilingFeatures = 0; + pFormatProperties->optimalTilingFeatures = 0; return VK_SUCCESS; } -- cgit v1.2.3 From c7fcfebd5bb5e1fb51b15615ecb091dbc986885d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 17:06:11 -0700 Subject: vk: Add stubs for all the sparse resource stuff --- include/vulkan/vulkan.h | 137 +++++++++++++++++++++++++++++++++++++++--------- src/vulkan/device.c | 38 +++++++++++--- src/vulkan/formats.c | 25 +++++++++ 3 files changed, 168 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c5e4a8a3df0..b9db32954a8 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -31,7 +31,6 @@ extern "C" { /* ** This header is generated from the Khronos Vulkan XML API Registry. ** -** Generated on date 20150624 */ @@ -481,9 +480,10 @@ typedef enum { VK_IMAGE_ASPECT_COLOR = 0, VK_IMAGE_ASPECT_DEPTH = 1, VK_IMAGE_ASPECT_STENCIL = 2, + VK_IMAGE_ASPECT_METADATA = 3, VK_IMAGE_ASPECT_BEGIN_RANGE = VK_IMAGE_ASPECT_COLOR, - VK_IMAGE_ASPECT_END_RANGE = VK_IMAGE_ASPECT_STENCIL, - VK_IMAGE_ASPECT_NUM = (VK_IMAGE_ASPECT_STENCIL - VK_IMAGE_ASPECT_COLOR + 1), + VK_IMAGE_ASPECT_END_RANGE = VK_IMAGE_ASPECT_METADATA, + VK_IMAGE_ASPECT_NUM = (VK_IMAGE_ASPECT_METADATA - VK_IMAGE_ASPECT_COLOR + 1), VK_IMAGE_ASPECT_MAX_ENUM = 0x7FFFFFFF } VkImageAspect; @@ -896,6 +896,7 @@ typedef enum { VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT = 0x00000002, VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT = 0x00000004, VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT = 0x00000008, + VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; @@ -910,6 +911,18 @@ typedef enum { typedef VkFlags VkDeviceCreateFlags; typedef VkFlags VkMemoryMapFlags; +typedef enum { + VK_SPARSE_IMAGE_FMT_SINGLE_MIPTAIL_BIT = 0x00000001, + VK_SPARSE_IMAGE_FMT_ALIGNED_MIP_SIZE_BIT = 0x00000002, + VK_SPARSE_IMAGE_FMT_NONSTD_BLOCK_SIZE_BIT = 0x00000004, +} VkSparseImageFormatFlagBits; +typedef VkFlags VkSparseImageFormatFlags; + +typedef enum { + VK_SPARSE_MEMORY_BIND_REPLICATE_64KIB_BLOCK_BIT = 0x00000001, +} VkSparseMemoryBindFlagBits; +typedef VkFlags VkSparseMemoryBindFlags; + typedef enum { VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, } VkFenceCreateFlagBits; @@ -957,14 +970,18 @@ typedef VkFlags VkBufferUsageFlags; typedef enum { VK_BUFFER_CREATE_SPARSE_BIT = 0x00000001, + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, } VkBufferCreateFlagBits; typedef VkFlags VkBufferCreateFlags; typedef enum { - VK_IMAGE_CREATE_INVARIANT_DATA_BIT = 0x00000001, - VK_IMAGE_CREATE_SPARSE_BIT = 0x00000002, - VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000004, - VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000008, + VK_IMAGE_CREATE_SPARSE_BIT = 0x00000001, + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, + VK_IMAGE_CREATE_INVARIANT_DATA_BIT = 0x00000008, + VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000010, + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000020, } VkImageCreateFlagBits; typedef VkFlags VkImageCreateFlags; @@ -1065,6 +1082,7 @@ typedef enum { VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, + VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, } VkImageAspectFlagBits; typedef VkFlags VkImageAspectFlags; @@ -1152,6 +1170,23 @@ typedef struct { VkBool32 shaderInt64; VkBool32 shaderFloat16; VkBool32 shaderInt16; + VkBool32 shaderResourceResidency; + VkBool32 shaderResourceMinLOD; + VkBool32 sparse; + VkBool32 sparseResidencyBuffer; + VkBool32 sparseResidencyImage2D; + VkBool32 sparseResidencyImage3D; + VkBool32 sparseResidency2Samples; + VkBool32 sparseResidency4Samples; + VkBool32 sparseResidency8Samples; + VkBool32 sparseResidency16Samples; + VkBool32 sparseResidencyStandard2DBlockShape; + VkBool32 sparseResidencyStandard2DMSBlockShape; + VkBool32 sparseResidencyStandard3DBlockShape; + VkBool32 sparseResidencyAlignedMipSize; + VkBool32 sparseResidencyNonResident; + VkBool32 sparseResidencyNonResidentStrict; + VkBool32 sparseResidencyAliased; } VkPhysicalDeviceFeatures; typedef struct { @@ -1344,6 +1379,33 @@ typedef struct { uint32_t memoryTypeBits; } VkMemoryRequirements; +typedef struct { + int32_t width; + int32_t height; + int32_t depth; +} VkExtent3D; + +typedef struct { + VkImageAspect aspect; + VkExtent3D imageGranularity; + VkSparseImageFormatFlags flags; +} VkSparseImageFormatProperties; + +typedef struct { + VkSparseImageFormatProperties formatProps; + uint32_t imageMipTailStartLOD; + VkDeviceSize imageMipTailSize; + VkDeviceSize imageMipTailOffset; + VkDeviceSize imageMipTailStride; +} VkSparseImageMemoryRequirements; + +typedef struct { + VkDeviceSize offset; + VkDeviceSize memOffset; + VkDeviceMemory mem; + VkSparseMemoryBindFlags flags; +} VkSparseMemoryBindInfo; + typedef struct { VkImageAspect aspect; uint32_t mipLevel; @@ -1356,17 +1418,14 @@ typedef struct { int32_t z; } VkOffset3D; -typedef struct { - int32_t width; - int32_t height; - int32_t depth; -} VkExtent3D; - typedef struct { VkImageSubresource subresource; VkOffset3D offset; VkExtent3D extent; -} VkImageMemoryBindInfo; + VkDeviceSize memOffset; + VkDeviceMemory mem; + VkSparseMemoryBindFlags flags; +} VkSparseImageMemoryBindInfo; typedef struct { VkStructureType sType; @@ -2010,7 +2069,7 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physica typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueProperties)(VkPhysicalDevice physicalDevice, uint32_t count, VkPhysicalDeviceQueueProperties* pQueueProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperies); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); @@ -2030,12 +2089,16 @@ typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem) typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); +typedef VkResult (VKAPI *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); typedef VkResult (VKAPI *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); typedef VkResult (VKAPI *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, VkDeviceSize rangeOffset, VkDeviceSize rangeSize, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, const VkImageMemoryBindInfo* pBindInfo, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, uint32_t samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); +typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageOpaqueMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); +typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); typedef VkResult (VKAPI *PFN_vkDestroyFence)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); @@ -2195,7 +2258,7 @@ VkResult VKAPI vkGetPhysicalDeviceQueueProperties( VkResult VKAPI vkGetPhysicalDeviceMemoryProperties( VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties* pMemoryProperies); + VkPhysicalDeviceMemoryProperties* pMemoryProperties); PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( VkInstance instance, @@ -2287,6 +2350,11 @@ VkResult VKAPI vkDestroyObject( VkObjectType objType, VkObject object); +VkResult VKAPI vkGetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes); + VkResult VKAPI vkBindBufferMemory( VkDevice device, VkBuffer buffer, @@ -2309,20 +2377,39 @@ VkResult VKAPI vkGetImageMemoryRequirements( VkImage image, VkMemoryRequirements* pMemoryRequirements); +VkResult VKAPI vkGetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pNumRequirements, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements); + +VkResult VKAPI vkGetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties); + VkResult VKAPI vkQueueBindSparseBufferMemory( VkQueue queue, VkBuffer buffer, - VkDeviceSize rangeOffset, - VkDeviceSize rangeSize, - VkDeviceMemory mem, - VkDeviceSize memOffset); + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo); + +VkResult VKAPI vkQueueBindSparseImageOpaqueMemory( + VkQueue queue, + VkImage image, + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo); VkResult VKAPI vkQueueBindSparseImageMemory( VkQueue queue, VkImage image, - const VkImageMemoryBindInfo* pBindInfo, - VkDeviceMemory mem, - VkDeviceSize memOffset); + uint32_t numBindings, + const VkSparseImageMemoryBindInfo* pBindInfo); VkResult VKAPI vkCreateFence( VkDevice device, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7ab7a05706f..b53000a5372 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1339,6 +1339,24 @@ VkResult anv_GetImageMemoryRequirements( return VK_SUCCESS; } +VkResult anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pNumRequirements, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + return vk_error(VK_UNSUPPORTED); +} + +VkResult anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; + stub_return(VK_SUCCESS); +} + VkResult anv_BindBufferMemory( VkDevice device, VkBuffer _buffer, @@ -1372,10 +1390,17 @@ VkResult anv_BindImageMemory( VkResult anv_QueueBindSparseBufferMemory( VkQueue queue, VkBuffer buffer, - VkDeviceSize rangeOffset, - VkDeviceSize rangeSize, - VkDeviceMemory mem, - VkDeviceSize memOffset) + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueBindSparseImageOpaqueMemory( + VkQueue queue, + VkImage image, + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo) { stub_return(VK_UNSUPPORTED); } @@ -1383,9 +1408,8 @@ VkResult anv_QueueBindSparseBufferMemory( VkResult anv_QueueBindSparseImageMemory( VkQueue queue, VkImage image, - const VkImageMemoryBindInfo* pBindInfo, - VkDeviceMemory mem, - VkDeviceSize memOffset) + uint32_t numBindings, + const VkSparseImageMemoryBindInfo* pBindInfo) { stub_return(VK_UNSUPPORTED); } diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c index 361dd70fa3e..9d9294b7ebd 100644 --- a/src/vulkan/formats.c +++ b/src/vulkan/formats.c @@ -307,3 +307,28 @@ VkResult anv_GetPhysicalDeviceFormatProperties( return VK_SUCCESS; } + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageFormatProperties* pImageFormatProperties) +{ + /* TODO: We should do something here. Chad? */ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + stub_return(VK_UNSUPPORTED); +} -- cgit v1.2.3 From e5db209d54c313965b50fbe7403e0df8df551109 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 17:10:37 -0700 Subject: vk/vulkan.h: Move around buffer image granularities --- include/vulkan/vulkan.h | 2 +- src/vulkan/device.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index b9db32954a8..fa9f6085d75 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1210,6 +1210,7 @@ typedef struct { uint32_t maxStorageBufferSize; uint32_t maxPushConstantsSize; uint32_t maxMemoryAllocationCount; + VkDeviceSize bufferImageGranularity; uint32_t maxBoundDescriptorSets; uint32_t maxDescriptorSets; uint32_t maxPerStageDescriptorSamplers; @@ -1375,7 +1376,6 @@ typedef struct { typedef struct { VkDeviceSize size; VkDeviceSize alignment; - VkDeviceSize granularity; uint32_t memoryTypeBits; } VkMemoryRequirements; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b53000a5372..d86a297e11f 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -280,6 +280,7 @@ VkResult anv_GetPhysicalDeviceLimits( .maxStorageBufferSize = UINT32_MAX, .maxPushConstantsSize = 128, .maxMemoryAllocationCount = UINT32_MAX, + .bufferImageGranularity = 64, /* A cache line */ .maxBoundDescriptorSets = MAX_SETS, .maxDescriptorSets = UINT32_MAX, .maxPerStageDescriptorSamplers = 64, -- cgit v1.2.3 From 1f658bed70f92d7ecd5c5466f7c37513b331b522 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Jul 2015 17:39:58 -0700 Subject: vk/device: Add stub support for command pools Real support isn't really that far away. We just need a data structure with a linked list and a few tests. --- include/vulkan/vulkan.h | 52 +++++++++++++++++++++++++++++++++++++++++++++---- src/vulkan/device.c | 31 ++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 12c6b8db016..174ec1b354c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -109,6 +109,7 @@ VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicRasterState, VkDynamicStateObject) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicColorBlendState, VkDynamicStateObject) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicDepthStencilState, VkDynamicStateObject) VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkFramebuffer, VkNonDispatchable) +VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkCmdPool, VkNonDispatchable) typedef enum { @@ -209,9 +210,10 @@ typedef enum { VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 47, VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 48, VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 49, + VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO = 50, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), + VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO, + VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkStructureType; @@ -470,6 +472,7 @@ typedef enum { VK_OBJECT_TYPE_FRAMEBUFFER = 27, VK_OBJECT_TYPE_RENDER_PASS = 28, VK_OBJECT_TYPE_PIPELINE_CACHE = 29, + VK_OBJECT_TYPE_CMD_POOL = 30, VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_INSTANCE, VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_PIPELINE_CACHE, VK_OBJECT_TYPE_NUM = (VK_OBJECT_TYPE_PIPELINE_CACHE - VK_OBJECT_TYPE_INSTANCE + 1), @@ -1076,6 +1079,17 @@ typedef enum { VK_MEMORY_INPUT_TRANSFER_BIT = 0x00000200, } VkMemoryInputFlagBits; typedef VkFlags VkMemoryInputFlags; + +typedef enum { + VK_CMD_POOL_CREATE_TRANSIENT_BIT = 0x00000001, + VK_CMD_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, +} VkCmdPoolCreateFlagBits; +typedef VkFlags VkCmdPoolCreateFlags; + +typedef enum { + VK_CMD_POOL_RESET_RELEASE_RESOURCES = 0x00000001, +} VkCmdPoolResetFlagBits; +typedef VkFlags VkCmdPoolResetFlags; typedef VkFlags VkCmdBufferCreateFlags; typedef enum { @@ -1087,6 +1101,11 @@ typedef enum { } VkCmdBufferOptimizeFlagBits; typedef VkFlags VkCmdBufferOptimizeFlags; +typedef enum { + VK_CMD_BUFFER_RESET_RELEASE_RESOURCES = 0x00000001, +} VkCmdBufferResetFlagBits; +typedef VkFlags VkCmdBufferResetFlags; + typedef enum { VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, @@ -1943,6 +1962,13 @@ typedef struct { VkStructureType sType; const void* pNext; uint32_t queueFamilyIndex; + VkCmdPoolCreateFlags flags; +} VkCmdPoolCreateInfo; + +typedef struct { + VkStructureType sType; + const void* pNext; + VkCmdPool cmdPool; VkCmdBufferLevel level; VkCmdBufferCreateFlags flags; } VkCmdBufferCreateInfo; @@ -2184,11 +2210,14 @@ typedef VkResult (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffe typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef VkResult (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); +typedef VkResult (VKAPI *PFN_vkCreateCommandPool)(VkDevice device, const VkCmdPoolCreateInfo* pCreateInfo, VkCmdPool* pCmdPool); +typedef VkResult (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCmdPool cmdPool); +typedef VkResult (VKAPI *PFN_vkResetCommandPool)(VkDevice device, VkCmdPool cmdPool, VkCmdPoolResetFlags flags); typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); typedef VkResult (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); -typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer); +typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer, VkCmdBufferResetFlags flags); typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); typedef void (VKAPI *PFN_vkCmdBindDynamicViewportState)(VkCmdBuffer cmdBuffer, VkDynamicViewportState dynamicViewportState); typedef void (VKAPI *PFN_vkCmdBindDynamicRasterState)(VkCmdBuffer cmdBuffer, VkDynamicRasterState dynamicRasterState); @@ -2741,6 +2770,20 @@ VkResult VKAPI vkGetRenderAreaGranularity( VkRenderPass renderPass, VkExtent2D* pGranularity); +VkResult VKAPI vkCreateCommandPool( + VkDevice device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool); + +VkResult VKAPI vkDestroyCommandPool( + VkDevice device, + VkCmdPool cmdPool); + +VkResult VKAPI vkResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags); + VkResult VKAPI vkCreateCommandBuffer( VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, @@ -2758,7 +2801,8 @@ VkResult VKAPI vkEndCommandBuffer( VkCmdBuffer cmdBuffer); VkResult VKAPI vkResetCommandBuffer( - VkCmdBuffer cmdBuffer); + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags); void VKAPI vkCmdBindPipeline( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d86a297e11f..d09a12b1b31 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1287,6 +1287,9 @@ VkResult anv_DestroyObject( case VK_OBJECT_TYPE_EVENT: return anv_DestroyEvent(_device, (VkEvent) _object); + case VK_OBJECT_TYPE_CMD_POOL: + return anv_DestroyCommandPool(_device, (VkCmdPool) _object); + default: unreachable("Invalid object type"); } @@ -2516,6 +2519,31 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) return VK_SUCCESS; } +VkResult anv_CreateCommandPool( + VkDevice device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) +{ + *pCmdPool = 7; + + stub_return(VK_SUCCESS); +} + +VkResult anv_DestroyCommandPool( + VkDevice device, + VkCmdPool cmdPool) +{ + stub_return(VK_SUCCESS); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + stub_return(VK_UNSUPPORTED); +} + VkResult anv_CreateCommandBuffer( VkDevice _device, const VkCmdBufferCreateInfo* pCreateInfo, @@ -2863,7 +2891,8 @@ VkResult anv_EndCommandBuffer( } VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer) + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); -- cgit v1.2.3 From 0eeba6b80c192e12f7b7e2c0eeb6c5a9589bb5b7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 16:01:42 -0700 Subject: vk: Add finishmes for VkDescriptorPool VkDescriptorPool is a stub object. As a consequence, it's impossible to free descriptor set memory. --- src/vulkan/device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index d09a12b1b31..1de60d4fc8e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2062,8 +2062,8 @@ VkResult anv_CreateDescriptorPool( const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool) { + anv_finishme("VkDescriptorPool is a stub"); *pDescriptorPool = 1; - return VK_SUCCESS; } @@ -2071,7 +2071,7 @@ VkResult anv_DestroyDescriptorPool( VkDevice _device, VkDescriptorPool _pool) { - /* VkDescriptorPool is a dummy object. */ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); return VK_SUCCESS; } @@ -2079,6 +2079,7 @@ VkResult anv_ResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool) { + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); return VK_SUCCESS; } -- cgit v1.2.3 From 5f46c4608fddbdd4f9fc8e1fc54fa52148487fc9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 14 Jul 2015 16:04:56 -0700 Subject: vk: Fix indentation of anv_dynamic_cb_state --- src/vulkan/private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 7dc420643fe..b7d313d2933 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -588,7 +588,7 @@ struct anv_dynamic_ds_state { }; struct anv_dynamic_cb_state { - uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; }; -- cgit v1.2.3 From 9aabe69028d0923c339bb448fe700e6b74409280 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 14 Jul 2015 22:59:27 -0700 Subject: vk/device: explain why a flush is necessary Jason found this from experimenting, but the docs give a reasonable explanation of why it's necessary. --- src/vulkan/device.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 1de60d4fc8e..51d729a6f49 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3044,9 +3044,16 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) */ anv_cmd_buffer_emit_state_base_address(cmd_buffer); - /* It seems like just changing the state base addresses isn't enough. - * Invalidating the cache seems to be enough to cause things to - * propagate. However, I'm not 100% sure what we're supposed to do. + /* The sampler unit caches SURFACE_STATE and RENDER_SURFACE_STATE entries, + * and the data port uses the same cache. When changing the Surface State + * Base Address, we need to flush the texture cache so that it can pick up + * on the new SURFACE_STATE's. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache + * must be invalidated to ensure the new surface or sampler state is + * fetched from system memory. */ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, .TextureCacheInvalidationEnable = true); -- cgit v1.2.3 From e375f722a697286b235d1699bb1bf73021215ed4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 11:09:02 -0700 Subject: vk/device: More documentation on surface state flushing --- src/vulkan/device.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 51d729a6f49..fd96457033e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -3044,16 +3044,42 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) */ anv_cmd_buffer_emit_state_base_address(cmd_buffer); - /* The sampler unit caches SURFACE_STATE and RENDER_SURFACE_STATE entries, - * and the data port uses the same cache. When changing the Surface State - * Base Address, we need to flush the texture cache so that it can pick up - * on the new SURFACE_STATE's. From the Broadwell PRM, + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, * Shared Function > 3D Sampler > State > State Caching (page 96): * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache - * must be invalidated to ensure the new surface or sampler state is - * fetched from system memory. + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. */ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, .TextureCacheInvalidationEnable = true); -- cgit v1.2.3 From 94e473c99399f45a556c62e9d8bbe979e018d9c6 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 11:26:31 -0700 Subject: vk: Remove struct anv_object Trivial removal because vkDestroyObject() no longer uses it. --- src/vulkan/device.c | 60 --------------------------------------------------- src/vulkan/pipeline.c | 23 ++------------------ src/vulkan/private.h | 17 --------------- src/vulkan/query.c | 17 --------------- 4 files changed, 2 insertions(+), 115 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index fd96457033e..fd0b80d272b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1418,19 +1418,6 @@ VkResult anv_QueueBindSparseImageMemory( stub_return(VK_UNSUPPORTED); } -static void -anv_fence_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_fence *fence = (struct anv_fence *) object; - - assert(obj_type == VK_OBJECT_TYPE_FENCE); - - anv_DestroyFence(anv_device_to_handle(device), - anv_fence_to_handle(fence)); -} - VkResult anv_CreateFence( VkDevice _device, const VkFenceCreateInfo* pCreateInfo, @@ -1454,8 +1441,6 @@ VkResult anv_CreateFence( if (result != VK_SUCCESS) goto fail; - fence->base.destructor = anv_fence_destroy; - fence->bo.map = anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); batch.next = batch.start = fence->bo.map; @@ -2200,19 +2185,6 @@ clamp_int64(int64_t x, int64_t min, int64_t max) return max; } -static void -anv_dynamic_vp_state_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_dynamic_vp_state *vp_state = (void *) object; - - assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE); - - anv_DestroyDynamicViewportState(anv_device_to_handle(device), - anv_dynamic_vp_state_to_handle(vp_state)); -} - VkResult anv_CreateDynamicViewportState( VkDevice _device, const VkDynamicViewportStateCreateInfo* pCreateInfo, @@ -2228,8 +2200,6 @@ VkResult anv_CreateDynamicViewportState( if (state == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - state->base.destructor = anv_dynamic_vp_state_destroy; - unsigned count = pCreateInfo->viewportAndScissorCount; state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, count * 64, 64); @@ -2466,19 +2436,6 @@ VkResult anv_DestroyDynamicDepthStencilState( // Command buffer functions -static void -anv_cmd_buffer_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; - - assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER); - - anv_DestroyCommandBuffer(anv_device_to_handle(device), - anv_cmd_buffer_to_handle(cmd_buffer)); -} - static VkResult anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) { @@ -2561,8 +2518,6 @@ VkResult anv_CreateCommandBuffer( if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - cmd_buffer->base.destructor = anv_cmd_buffer_destroy; - cmd_buffer->device = device; cmd_buffer->rs_state = NULL; cmd_buffer->vp_state = NULL; @@ -3972,19 +3927,6 @@ void anv_CmdPipelineBarrier( GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); } -static void -anv_framebuffer_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_framebuffer *fb = (struct anv_framebuffer *)object; - - assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER); - - anv_DestroyFramebuffer(anv_device_to_handle(device), - anv_framebuffer_to_handle(fb)); -} - VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, @@ -4002,8 +3944,6 @@ VkResult anv_CreateFramebuffer( if (framebuffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - framebuffer->base.destructor = anv_framebuffer_destroy; - framebuffer->attachment_count = pCreateInfo->attachmentCount; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { ANV_FROM_HANDLE(anv_attachment_view, view, diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index c6dc499e52c..afe4f5b767b 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -467,19 +467,6 @@ emit_ds_state(struct anv_pipeline *pipeline, GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); } -static void -anv_pipeline_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_pipeline *pipeline = (struct anv_pipeline*) object; - - assert(obj_type == VK_OBJECT_TYPE_PIPELINE); - - anv_DestroyPipeline(anv_device_to_handle(device), - anv_pipeline_to_handle(pipeline)); -} - VkResult anv_pipeline_create( VkDevice _device, @@ -499,7 +486,6 @@ anv_pipeline_create( if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - pipeline->base.destructor = anv_pipeline_destroy; pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); @@ -789,7 +775,6 @@ VkResult anv_CreateGraphicsPipelines( const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines) { - ANV_FROM_HANDLE(anv_device, device, _device); VkResult result = VK_SUCCESS; unsigned i = 0; @@ -798,8 +783,7 @@ VkResult anv_CreateGraphicsPipelines( NULL, &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { - anv_pipeline_destroy(device, (struct anv_object *)pPipelines[j], - VK_OBJECT_TYPE_PIPELINE); + anv_DestroyPipeline(_device, pPipelines[j]); } return result; @@ -825,7 +809,6 @@ static VkResult anv_compute_pipeline_create( if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - pipeline->base.destructor = anv_pipeline_destroy; pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); @@ -888,7 +871,6 @@ VkResult anv_CreateComputePipelines( const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines) { - ANV_FROM_HANDLE(anv_device, device, _device); VkResult result = VK_SUCCESS; unsigned i = 0; @@ -897,8 +879,7 @@ VkResult anv_CreateComputePipelines( &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { - anv_pipeline_destroy(device, (struct anv_object *)pPipelines[j], - VK_OBJECT_TYPE_PIPELINE); + anv_DestroyPipeline(_device, pPipelines[j]); } return result; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index b7d313d2933..c1f04dfbc19 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -325,17 +325,6 @@ void anv_bo_pool_finish(struct anv_bo_pool *pool); VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); -struct anv_object; -struct anv_device; - -typedef void (*anv_object_destructor_cb)(struct anv_device *, - struct anv_object *, - VkObjectType); - -struct anv_object { - anv_object_destructor_cb destructor; -}; - struct anv_physical_device { struct anv_instance * instance; uint32_t chipset_id; @@ -571,7 +560,6 @@ struct anv_device_memory { }; struct anv_dynamic_vp_state { - struct anv_object base; struct anv_state sf_clip_vp; struct anv_state cc_vp; struct anv_state scissor; @@ -666,7 +654,6 @@ struct anv_descriptor_set_binding { }; struct anv_cmd_buffer { - struct anv_object base; struct anv_device * device; struct drm_i915_gem_execbuffer2 execbuf; @@ -710,7 +697,6 @@ void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); void anv_aub_writer_destroy(struct anv_aub_writer *writer); struct anv_fence { - struct anv_object base; struct anv_bo bo; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; @@ -728,7 +714,6 @@ struct anv_shader { }; struct anv_pipeline { - struct anv_object base; struct anv_device * device; struct anv_batch batch; uint32_t batch_data[256]; @@ -927,8 +912,6 @@ struct anv_sampler { }; struct anv_framebuffer { - struct anv_object base; - uint32_t width; uint32_t height; uint32_t layers; diff --git a/src/vulkan/query.c b/src/vulkan/query.c index 3a1f594816d..b3b85897814 100644 --- a/src/vulkan/query.c +++ b/src/vulkan/query.c @@ -36,25 +36,11 @@ struct anv_query_pool_slot { }; struct anv_query_pool { - struct anv_object base; VkQueryType type; uint32_t slots; struct anv_bo bo; }; -static void -anv_query_pool_destroy(struct anv_device *device, - struct anv_object *object, - VkObjectType obj_type) -{ - struct anv_query_pool *pool = (struct anv_query_pool *) object; - - assert(obj_type == VK_OBJECT_TYPE_QUERY_POOL); - - anv_DestroyQueryPool(anv_device_to_handle(device), - anv_query_pool_to_handle(pool)); -} - VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -81,9 +67,6 @@ VkResult anv_CreateQueryPool( if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - pool->base.destructor = anv_query_pool_destroy; - - pool->type = pCreateInfo->queryType; size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) -- cgit v1.2.3 From 43241a24bc03aba6d25c7c751ec2f1c721b48331 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 11:49:37 -0700 Subject: vk/meta: Fix declared type of a shader module s/VkShader/VkShaderModule/ I'm looking forward to a type-safe vulkan.h ;) --- src/vulkan/meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 90447dc81d7..bf8bb6f9d30 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -36,7 +36,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. */ - VkShader fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, out vec4 f_color; flat in vec4 v_color; void main() -- cgit v1.2.3 From 8213be790e1e037195cb85aa9dec5a5918384438 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 12:00:27 -0700 Subject: vk: Define struct anv_image_view, anv_buffer_view Follow the pattern of anv_attachment_view. We need these structs to implement the type-safety that arrived in the 0.132 header. --- src/vulkan/device.c | 28 ++++++++++++++++------------ src/vulkan/image.c | 16 +++++++++------- src/vulkan/meta.c | 20 ++++++++++---------- src/vulkan/private.h | 13 ++++++++++++- 4 files changed, 47 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index fd0b80d272b..c59814fb4e6 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1722,15 +1722,17 @@ VkResult anv_CreateBufferView( { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *bview; struct anv_surface_view *view; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) + bview = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (bview == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + view = &bview->view; view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; view->surface_state = @@ -1741,20 +1743,20 @@ VkResult anv_CreateBufferView( fill_buffer_surface_state(view->surface_state.map, pCreateInfo->format, view->offset, pCreateInfo->range); - *pView = (VkBufferView) view; + *pView = anv_buffer_view_to_handle(bview); return VK_SUCCESS; } VkResult anv_DestroyBufferView( VkDevice _device, - VkBufferView _view) + VkBufferView _bview) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_surface_view *view = (struct anv_surface_view *)_view; + ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); - anv_surface_view_fini(device, view); - anv_device_free(device, view); + anv_surface_view_fini(device, &bview->view); + anv_device_free(device, bview); return VK_SUCCESS; } @@ -2131,8 +2133,9 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: for (uint32_t j = 0; j < write->count; j++) { - set->descriptors[write->destBinding + j].view = - (struct anv_surface_view *)write->pDescriptors[j].imageView; + ANV_FROM_HANDLE(anv_image_view, iview, + write->pDescriptors[j].imageView); + set->descriptors[write->destBinding + j].view = &iview->view; } break; @@ -2150,8 +2153,9 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->count; j++) { - set->descriptors[write->destBinding + j].view = - (struct anv_surface_view *)write->pDescriptors[j].bufferView; + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pDescriptors[j].bufferView); + set->descriptors[write->destBinding + j].view = &bview->view; } default: diff --git a/src/vulkan/image.c b/src/vulkan/image.c index c29c6939ffb..d05f42667ea 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -333,13 +333,15 @@ anv_surface_view_fini(struct anv_device *device, } void -anv_image_view_init(struct anv_surface_view *view, +anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface_view *view = &iview->view; struct anv_surface *surface; const struct anv_format *format_info = @@ -537,7 +539,7 @@ anv_CreateImageView(VkDevice _device, VkImageView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_surface_view *view; + struct anv_image_view *view; view = anv_device_alloc(device, sizeof(*view), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); @@ -546,19 +548,19 @@ anv_CreateImageView(VkDevice _device, anv_image_view_init(view, device, pCreateInfo, NULL); - *pView = (VkImageView) view; + *pView = anv_image_view_to_handle(view); return VK_SUCCESS; } VkResult -anv_DestroyImageView(VkDevice _device, VkImageView _view) +anv_DestroyImageView(VkDevice _device, VkImageView _iview) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_surface_view *view = (struct anv_surface_view *)_view; + ANV_FROM_HANDLE(anv_image_view, iview, _iview); - anv_surface_view_fini(device, view); - anv_device_free(device, view); + anv_surface_view_fini(device, &iview->view); + anv_device_free(device, iview); return VK_SUCCESS; } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index bf8bb6f9d30..c467b1961f6 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -762,7 +762,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(dest_image)->bo = dest; anv_image_from_handle(dest_image)->offset = dest_offset; - struct anv_surface_view src_view; + struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -798,7 +798,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + &src_view.view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, &dest_view, @@ -902,7 +902,7 @@ void anv_CmdCopyImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - struct anv_surface_view src_view; + struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -938,7 +938,7 @@ void anv_CmdCopyImage( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + &src_view.view, pRegions[r].srcOffset, pRegions[r].extent, &dest_view, @@ -970,7 +970,7 @@ void anv_CmdBlitImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - struct anv_surface_view src_view; + struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -1006,7 +1006,7 @@ void anv_CmdBlitImage( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + &src_view.view, pRegions[r].srcOffset, pRegions[r].srcExtent, &dest_view, @@ -1066,7 +1066,7 @@ void anv_CmdCopyBufferToImage( src_image->bo = src_buffer->bo; src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; - struct anv_surface_view src_view; + struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -1102,7 +1102,7 @@ void anv_CmdCopyBufferToImage( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + &src_view.view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, &dest_view, @@ -1137,7 +1137,7 @@ void anv_CmdCopyImageToBuffer( if (pRegions[r].bufferImageHeight != 0) anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); - struct anv_surface_view src_view; + struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -1200,7 +1200,7 @@ void anv_CmdCopyImageToBuffer( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + &src_view.view, pRegions[r].imageOffset, pRegions[r].imageExtent, &dest_view, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index c1f04dfbc19..5693a80af3d 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -854,6 +854,15 @@ struct anv_surface_view { VkFormat format; }; +struct anv_buffer_view { + /* FINISHME: Trim unneeded data from this struct. */ + struct anv_surface_view view; +}; + +struct anv_image_view { + struct anv_surface_view view; +}; + enum anv_attachment_view_type { ANV_ATTACHMENT_VIEW_TYPE_COLOR, ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL, @@ -894,7 +903,7 @@ VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *info, VkImage *pImage); -void anv_image_view_init(struct anv_surface_view *view, +void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); @@ -995,6 +1004,8 @@ ANV_DEFINE_CASTS(anv_pipeline, VkPipeline) ANV_DEFINE_CASTS(anv_image, VkImage) ANV_DEFINE_CASTS(anv_sampler, VkSampler) ANV_DEFINE_CASTS(anv_attachment_view, VkAttachmentView) +ANV_DEFINE_CASTS(anv_buffer_view, VkBufferView); +ANV_DEFINE_CASTS(anv_image_view, VkImageView); ANV_DEFINE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_CASTS(anv_render_pass, VkRenderPass) ANV_DEFINE_CASTS(anv_query_pool, VkQueryPool) -- cgit v1.2.3 From 7529e7ce860fc5dd64b93b53e5676136e0af3e4e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 13:55:28 -0700 Subject: vk: Correct anv_CreateShaderModule's prototype s/VkShader/VkShaderModule/ :sigh: I look forward to type-safety. --- src/vulkan/pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index afe4f5b767b..218733aaf30 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -34,7 +34,7 @@ VkResult anv_CreateShaderModule( VkDevice _device, const VkShaderModuleCreateInfo* pCreateInfo, - VkShader* pShaderModule) + VkShaderModule* pShaderModule) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_shader_module *module; -- cgit v1.2.3 From 365d80a91e822f28c35aeec09c8ca787163b73bf Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 14:00:21 -0700 Subject: vk: Replace some raw casts with safe casts That is, replace some instances of (VkFoo) foo with anv_foo_to_handle(foo) --- src/vulkan/intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c index 06e79591ee4..6457b211f73 100644 --- a/src/vulkan/intel.c +++ b/src/vulkan/intel.c @@ -83,8 +83,8 @@ VkResult anv_CreateDmaBufImageINTEL( assert(image->extent.height > 0); assert(image->extent.depth == 1); - *pMem = (VkDeviceMemory) mem; - *pImage = (VkImage) image; + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); return VK_SUCCESS; -- cgit v1.2.3 From c6270e8044782080a0c38b5290eac96caa608168 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 14:14:04 -0700 Subject: vk: Refactor create/destroy code for anv_descriptor_set Define two new functions: anv_descriptor_set_create anv_descriptor_set_destroy --- src/vulkan/device.c | 49 ++++++++++++++++++++++++++++++++++++++----------- src/vulkan/private.h | 9 +++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c59814fb4e6..7e230249d07 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1251,6 +1251,9 @@ VkResult anv_DestroyObject( return anv_DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout) _object); case VK_OBJECT_TYPE_DESCRIPTOR_SET: + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(_object)); + return VK_SUCCESS; + case VK_OBJECT_TYPE_RENDER_PASS: /* These are trivially destroyable */ anv_device_free(device, (void *) _object); @@ -2070,6 +2073,35 @@ VkResult anv_ResetDescriptorPool( return VK_SUCCESS; } +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); + + set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + anv_device_free(device, set); +} + VkResult anv_AllocDescriptorSets( VkDevice _device, VkDescriptorPool descriptorPool, @@ -2080,24 +2112,19 @@ VkResult anv_AllocDescriptorSets( uint32_t* pCount) { ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result; struct anv_descriptor_set *set; - size_t size; for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); - size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); - set = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!set) { + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) { *pCount = i; - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return result; } - /* Descriptor sets may not be 100% filled out so we need to memset to - * ensure that we can properly detect and handle holes. - */ - memset(set, 0, size); - pDescriptorSets[i] = anv_descriptor_set_to_handle(set); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 5693a80af3d..949962b8c04 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -608,6 +608,15 @@ struct anv_descriptor_set { struct anv_descriptor descriptors[0]; }; +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + #define MAX_VBS 32 #define MAX_SETS 8 #define MAX_RTS 8 -- cgit v1.2.3 From 188f2328de8d28cf67428e6707af74c07f783166 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 15:02:47 -0700 Subject: vk: Fix vkCreate/DestroyRenderPass While updating vkDestroyObject, I discovered that vkDestroyPass reliably crashes. That hasn't been an issue yet, though, because it is never called. In vkCreateRenderPass: - Don't allocate empty attachment arrays. - Ensure that pointers to empty attachment arrays are NULL. - Store VkRenderPassCreateInfo::subpassCount as anv_render_pass::subpass_count. In vkDestroyRenderPass: - Fix loop bounds: s/attachment_count/subpass_count/ - Don't call anv_device_free on null pointers. --- src/vulkan/device.c | 63 ++++++++++++++++++++++++++++++++++++++-------------- src/vulkan/private.h | 3 ++- 2 files changed, 48 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 7e230249d07..a4165ac1c81 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -4045,7 +4045,14 @@ VkResult anv_CreateRenderPass( if (pass == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + size = pCreateInfo->attachmentCount * sizeof(*pass->attachments); pass->attachments = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); @@ -4063,25 +4070,39 @@ VkResult anv_CreateRenderPass( struct anv_subpass *subpass = &pass->subpasses[i]; subpass->input_count = desc->inputCount; - subpass->input_attachments = - anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t j = 0; j < desc->inputCount; j++) - subpass->input_attachments[j] = desc->inputAttachments[j].attachment; - subpass->color_count = desc->colorCount; - subpass->color_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t j = 0; j < desc->colorCount; j++) - subpass->color_attachments[j] = desc->colorAttachments[j].attachment; + + if (desc->inputCount > 0) { + subpass->input_attachments = + anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->inputCount; j++) { + subpass->input_attachments[j] + = desc->inputAttachments[j].attachment; + } + } + + if (desc->colorCount > 0) { + subpass->color_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->color_attachments[j] + = desc->colorAttachments[j].attachment; + } + } if (desc->resolveAttachments) { subpass->resolve_attachments = anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t j = 0; j < desc->colorCount; j++) - subpass->resolve_attachments[j] = desc->resolveAttachments[j].attachment; + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->resolve_attachments[j] + = desc->resolveAttachments[j].attachment; + } } subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; @@ -4101,10 +4122,18 @@ VkResult anv_DestroyRenderPass( anv_device_free(device, pass->attachments); - for (uint32_t i = 0; i < pass->attachment_count; i++) { - anv_device_free(device, pass->subpasses[i].input_attachments); - anv_device_free(device, pass->subpasses[i].color_attachments); - anv_device_free(device, pass->subpasses[i].resolve_attachments); + for (uint32_t i = 0; i < pass->subpass_count; i++) { + /* In VkSubpassCreateInfo, each of the attachment arrays may be null. + * Don't free the null arrays. + */ + struct anv_subpass *subpass = &pass->subpasses[i]; + + if (subpass->input_attachments) + anv_device_free(device, subpass->input_attachments); + if (subpass->color_attachments) + anv_device_free(device, subpass->color_attachments); + if (subpass->resolve_attachments) + anv_device_free(device, subpass->resolve_attachments); } anv_device_free(device, pass); diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 949962b8c04..2990634dc6f 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -959,8 +959,9 @@ struct anv_render_pass_attachment { struct anv_render_pass { uint32_t attachment_count; - struct anv_render_pass_attachment * attachments; + uint32_t subpass_count; + struct anv_render_pass_attachment * attachments; struct anv_subpass subpasses[0]; }; -- cgit v1.2.3 From f5ad06eb7894604c1c9d755a9efb9983ad3bf695 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 14:33:10 -0700 Subject: vk: Fix vkDestroyObject dispatch for VkRenderPass It called anv_device_free() instead of anv_DestroyRenderPass(). --- src/vulkan/device.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a4165ac1c81..2b8bcbe9e82 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1255,9 +1255,7 @@ VkResult anv_DestroyObject( return VK_SUCCESS; case VK_OBJECT_TYPE_RENDER_PASS: - /* These are trivially destroyable */ - anv_device_free(device, (void *) _object); - return VK_SUCCESS; + return anv_DestroyRenderPass(_device, (VkRenderPass) _object); case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: return anv_DestroyDynamicViewportState(_device, (VkDynamicViewportState) _object); -- cgit v1.2.3 From 4c8e1e58883c6e5616cc61587d854e06ec75b7a8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 14:01:30 -0700 Subject: vk: Stop internally calling anv_DestroyObject() Replace each anv_DestroyObject() with anv_DestroyFoo(). Let vkDestroyObject() live for a while longer for Crucible's sake. --- src/vulkan/meta.c | 56 ++++++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index c467b1961f6..b10ba17a719 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -141,7 +141,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, &device->meta_state.clear.pipeline); - anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_SHADER, fs); + anv_DestroyShader(anv_device_to_handle(device), fs); } #define NUM_VB_USED 2 @@ -476,8 +476,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline); - anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_SHADER, vs); - anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_SHADER, fs); + anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShader(anv_device_to_handle(device), fs); } static void @@ -691,12 +691,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_DESCRIPTOR_SET, set); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_FRAMEBUFFER, fb); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_RENDER_PASS, pass); + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb); + anv_DestroyRenderPass(anv_device_to_handle(device), pass); } static void @@ -805,8 +802,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, src_image); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, dest_image); + anv_DestroyImage(vk_device, src_image); + anv_DestroyImage(vk_device, dest_image); } void anv_CmdCopyBuffer( @@ -1109,7 +1106,7 @@ void anv_CmdCopyBufferToImage( pRegions[r].imageOffset, pRegions[r].imageExtent); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, srcImage); + anv_DestroyImage(vk_device, srcImage); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1207,7 +1204,7 @@ void anv_CmdCopyImageToBuffer( (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); - anv_DestroyObject(vk_device, VK_OBJECT_TYPE_IMAGE, destImage); + anv_DestroyImage(vk_device, destImage); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1427,27 +1424,22 @@ void anv_device_finish_meta(struct anv_device *device) { /* Clear */ - anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_PIPELINE, - device->meta_state.clear.pipeline); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.clear.pipeline); /* Blit */ - anv_DestroyObject(anv_device_to_handle(device), VK_OBJECT_TYPE_PIPELINE, - device->meta_state.blit.pipeline); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_PIPELINE_LAYOUT, - device->meta_state.blit.pipeline_layout); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, - device->meta_state.blit.ds_layout); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout); /* Shared */ - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_DYNAMIC_RS_STATE, - device->meta_state.shared.rs_state); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_DYNAMIC_CB_STATE, - device->meta_state.shared.cb_state); - anv_DestroyObject(anv_device_to_handle(device), - VK_OBJECT_TYPE_DYNAMIC_DS_STATE, - device->meta_state.shared.ds_state); + anv_DestroyDynamicRasterState(anv_device_to_handle(device), + device->meta_state.shared.rs_state); + anv_DestroyDynamicColorBlendState(anv_device_to_handle(device), + device->meta_state.shared.cb_state); + anv_DestroyDynamicDepthStencilState(anv_device_to_handle(device), + device->meta_state.shared.ds_state); } -- cgit v1.2.3 From f4748bff59b21c51092a081df48d7d00f2bfbfef Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 15:15:54 -0700 Subject: vk/device: Provide proper NULL handling in anv_device_free The Vulkan spec does not specify that the free function provided to CreateInstance must handle NULL properly so we do it in the wrapper. If this ever changes in the spec, we can delete the extra 2 lines. --- src/vulkan/device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 2b8bcbe9e82..454e8f48f42 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1057,6 +1057,9 @@ void anv_device_free(struct anv_device * device, void * mem) { + if (mem == NULL) + return; + return device->instance->pfnFree(device->instance->pAllocUserData, mem); } -- cgit v1.2.3 From e1c78ebe53614466b5a45b636aa4b7f625d88acf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 15:19:59 -0700 Subject: vk/device: Remove unneeded checks for NULL --- src/vulkan/device.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 454e8f48f42..14a93c9af88 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -4129,12 +4129,9 @@ VkResult anv_DestroyRenderPass( */ struct anv_subpass *subpass = &pass->subpasses[i]; - if (subpass->input_attachments) - anv_device_free(device, subpass->input_attachments); - if (subpass->color_attachments) - anv_device_free(device, subpass->color_attachments); - if (subpass->resolve_attachments) - anv_device_free(device, subpass->resolve_attachments); + anv_device_free(device, subpass->input_attachments); + anv_device_free(device, subpass->color_attachments); + anv_device_free(device, subpass->resolve_attachments); } anv_device_free(device, pass); -- cgit v1.2.3 From 61a4bfe25386dbdeb26ec1747f41bc936d3af17c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 15:24:06 -0700 Subject: vk: Delete vkDbgSetObjectTag() Because VkObject is going away. --- src/vulkan/device.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 14a93c9af88..be5705f68c5 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -4305,14 +4305,6 @@ void vkCmdDbgMarkerEnd( VkCmdBuffer cmdBuffer) __attribute__ ((visibility ("default"))); -VkResult vkDbgSetObjectTag( - VkDevice device, - VkObject object, - size_t tagSize, - const void* pTag) - __attribute__ ((visibility ("default"))); - - void vkCmdDbgMarkerBegin( VkCmdBuffer cmdBuffer, const char* pMarker) @@ -4323,12 +4315,3 @@ void vkCmdDbgMarkerEnd( VkCmdBuffer cmdBuffer) { } - -VkResult vkDbgSetObjectTag( - VkDevice device, - VkObject object, - size_t tagSize, - const void* pTag) -{ - return VK_SUCCESS; -} -- cgit v1.2.3 From badbf0c94a6444c2a62994dc8a5c0ebb3d4be678 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 15:33:12 -0700 Subject: vk/x11: Remove raw casts The raw casts in the WSI functions will break the build when the type-safety changes arrive. --- src/vulkan/x11.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index d1eaab3bf46..73a7e2fc8ac 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -88,7 +88,8 @@ VkResult anv_CreateSwapChainWSI( const VkSwapChainCreateInfoWSI* pCreateInfo, VkSwapChainWSI* pSwapChain) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_swap_chain *chain; xcb_void_cookie_t cookie; VkResult result; @@ -110,11 +111,13 @@ VkResult anv_CreateSwapChainWSI( chain->extent = pCreateInfo->imageExtent; for (uint32_t i = 0; i < chain->count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; struct anv_image *image; struct anv_surface *surface; struct anv_device_memory *memory; - anv_image_create((VkDevice) device, + anv_image_create(_device, &(struct anv_image_create_info) { .force_tile_mode = true, .tile_mode = XMAJOR, @@ -136,21 +139,23 @@ VkResult anv_CreateSwapChainWSI( .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }}, - (VkImage *) &image); + &image_h); + image = anv_image_from_handle(image_h); surface = &image->primary_surface; - anv_AllocMemory((VkDevice) device, + anv_AllocMemory(_device, &(VkMemoryAllocInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, .allocationSize = image->size, .memoryTypeIndex = 0, }, - (VkDeviceMemory *) &memory); + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); - anv_BindImageMemory(VK_NULL_HANDLE, - anv_image_to_handle(image), - anv_device_memory_to_handle(memory), 0); + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); ret = anv_gem_set_tiling(device, memory->bo.gem_handle, surface->stride, I915_TILING_X); @@ -241,8 +246,8 @@ VkResult anv_GetSwapChainInfoWSI( images = pData; for (uint32_t i = 0; i < chain->count; i++) { - images[i].image = (VkImage) chain->images[i].image; - images[i].memory = (VkDeviceMemory) chain->images[i].memory; + images[i].image = anv_image_to_handle(chain->images[i].image); + images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); } return VK_SUCCESS; @@ -256,7 +261,8 @@ VkResult anv_QueuePresentWSI( VkQueue queue_, const VkPresentInfoWSI* pPresentInfo) { - struct anv_image *image = (struct anv_image *) pPresentInfo->image; + ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); + struct anv_swap_chain *chain = image->swap_chain; xcb_void_cookie_t cookie; xcb_pixmap_t pixmap; @@ -268,7 +274,7 @@ VkResult anv_QueuePresentWSI( pixmap = XCB_NONE; for (uint32_t i = 0; i < chain->count; i++) { - if ((VkImage) chain->images[i].image == pPresentInfo->image) { + if (image == chain->images[i].image) { pixmap = chain->images[i].pixmap; break; } -- cgit v1.2.3 From 6f140e8af1ecd157b09c2fb1239dca66dc9dc5c7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 15:33:40 -0700 Subject: vk/meta: Remove raw casts Needed for upcoming type-safety changes. --- src/vulkan/meta.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index b10ba17a719..226b40a9dfb 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -515,7 +515,7 @@ struct blit_region { static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_surface_view *src, + struct anv_image_view *src, VkOffset3D src_offset, VkExtent3D src_extent, struct anv_color_attachment_view *dest, @@ -542,8 +542,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + (float)(src_offset.x + src_extent.width) / (float)src->view.extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->view.extent.height, }, }; @@ -553,8 +553,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset.x / (float)src->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + (float)src_offset.x / (float)src->view.extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->view.extent.height, }, }; @@ -564,8 +564,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y, }, .tex_coord = { - (float)src_offset.x / (float)src->extent.width, - (float)src_offset.y / (float)src->extent.height, + (float)src_offset.x / (float)src->view.extent.width, + (float)src_offset.y / (float)src->view.extent.height, }, }; @@ -603,7 +603,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pDescriptors = (VkDescriptorInfo[]) { { - .imageView = (VkImageView) src, + .imageView = anv_image_view_to_handle(src), .imageLayout = VK_IMAGE_LAYOUT_GENERAL }, } @@ -795,7 +795,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view.view, + &src_view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, &dest_view, @@ -813,9 +813,10 @@ void anv_CmdCopyBuffer( uint32_t regionCount, const VkBufferCopy* pRegions) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - struct anv_buffer *src_buffer = (struct anv_buffer *)srcBuffer; - struct anv_buffer *dest_buffer = (struct anv_buffer *)destBuffer; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -892,8 +893,9 @@ void anv_CmdCopyImage( uint32_t regionCount, const VkImageCopy* pRegions) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - struct anv_image *src_image = (struct anv_image *)srcImage; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -935,7 +937,7 @@ void anv_CmdCopyImage( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view.view, + &src_view, pRegions[r].srcOffset, pRegions[r].extent, &dest_view, @@ -957,9 +959,10 @@ void anv_CmdBlitImage( VkTexFilter filter) { - struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer; - struct anv_image *src_image = (struct anv_image *)srcImage; - struct anv_image *dest_image = (struct anv_image *)destImage; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_saved_state saved_state; anv_finishme("respect VkTexFilter"); @@ -1003,7 +1006,7 @@ void anv_CmdBlitImage( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view.view, + &src_view, pRegions[r].srcOffset, pRegions[r].srcExtent, &dest_view, @@ -1099,7 +1102,7 @@ void anv_CmdCopyBufferToImage( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view.view, + &src_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, &dest_view, @@ -1197,7 +1200,7 @@ void anv_CmdCopyImageToBuffer( cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view.view, + &src_view, pRegions[r].imageOffset, pRegions[r].imageExtent, &dest_view, -- cgit v1.2.3 From 498ae009d3a9bae5e670bdc9f33a26e5b9e7f267 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 15:33:27 -0700 Subject: vk/glsl: Replace raw casts Needed for upcoming type-safety changes. --- src/vulkan/glsl_scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index 918c156027a..d1514712762 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -264,7 +264,7 @@ with open_file(outfname, 'w') as outfile: .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ }; \\ - vkCreateShaderModule((VkDevice) device, \\ + vkCreateShaderModule(anv_device_to_handle(device), \\ &__shader_create_info, &__module); \\ __module; \\ }) -- cgit v1.2.3 From d8620991989224b8a363be8e998910748b098a57 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 12:09:52 -0700 Subject: vk: Pull the guts of anv_cmd_buffer into its own file --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_cmd_buffer.c | 716 ++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/device.c | 670 +---------------------------------------- src/vulkan/private.h | 28 +- 4 files changed, 738 insertions(+), 677 deletions(-) create mode 100644 src/vulkan/anv_cmd_buffer.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 6d10f84adf4..9d136bdb208 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -57,6 +57,7 @@ libvulkan_la_SOURCES = \ private.h \ gem.c \ device.c \ + anv_cmd_buffer.c \ aub.c \ allocator.c \ util.c \ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c new file mode 100644 index 00000000000..2df14ae65ad --- /dev/null +++ b/src/vulkan/anv_cmd_buffer.c @@ -0,0 +1,716 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +{ + list->num_relocs = 0; + list->array_length = 256; + list->relocs = + anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) { + anv_device_free(device, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return VK_SUCCESS; +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +{ + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) { + anv_device_free(device, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); + + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + anv_reloc_list_grow(list, device, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, device, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(&batch->relocs, batch->device, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(&batch->relocs, batch->device, + &other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + bbo->num_relocs = 0; + bbo->prev_batch_bo = NULL; + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) { + anv_device_free(device, bbo); + return result; + } + + *bbo_out = bbo; + + return VK_SUCCESS; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + bbo->first_reloc = batch->relocs.num_relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); + bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc; +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +{ + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_device_free(device, bbo); +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + + struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == old_bbo->bo.map + old_bbo->bo.size); + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &new_bbo->bo, 0 }, + ); + + /* Pad out to a 2-dword aligned boundary with zeros */ + if ((uintptr_t)batch->next % 8 != 0) { + *(uint32_t *)batch->next = 0; + batch->next += 4; + } + + anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch); + + new_bbo->prev_batch_bo = old_bbo; + cmd_buffer->last_batch_bo = new_bbo; + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state.offset = align_u32(cmd_buffer->surface_next, alignment); + if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size) + return (struct anv_state) { 0 }; + + state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset; + state.alloc_size = size; + cmd_buffer->surface_next = state.offset + size; + + assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size); + + return state; +} + +VkResult +anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo; + + /* Finish off the old buffer */ + old_bbo->num_relocs = + cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc; + old_bbo->length = cmd_buffer->surface_next; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs; + cmd_buffer->surface_next = 1; + + new_bbo->prev_batch_bo = old_bbo; + cmd_buffer->surface_batch_bo = new_bbo; + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); + + return VK_SUCCESS; +} + +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->device = device; + cmd_buffer->rs_state = NULL; + cmd_buffer->vp_state = NULL; + cmd_buffer->cb_state = NULL; + cmd_buffer->ds_state = NULL; + memset(&cmd_buffer->state_vf, 0, sizeof(cmd_buffer->state_vf)); + memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); + + result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); + if (result != VK_SUCCESS) + goto fail; + + result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device); + if (result != VK_SUCCESS) + goto fail_batch_bo; + + cmd_buffer->batch.device = device; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo); + if (result != VK_SUCCESS) + goto fail_batch_relocs; + cmd_buffer->surface_batch_bo->first_reloc = 0; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device); + if (result != VK_SUCCESS) + goto fail_ss_batch_bo; + + /* Start surface_next at 1 so surface offset 0 is invalid. */ + cmd_buffer->surface_next = 1; + + cmd_buffer->exec2_objects = NULL; + cmd_buffer->exec2_bos = NULL; + cmd_buffer->exec2_array_length = 0; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + cmd_buffer->dirty = 0; + cmd_buffer->vb_dirty = 0; + cmd_buffer->descriptors_dirty = 0; + cmd_buffer->pipeline = NULL; + cmd_buffer->vp_state = NULL; + cmd_buffer->rs_state = NULL; + cmd_buffer->ds_state = NULL; + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail_ss_batch_bo: + anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device); + fail_batch_relocs: + anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); + fail_batch_bo: + anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); + fail: + anv_device_free(device, cmd_buffer); + + return result; +} + +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + /* Destroy all of the batch buffers */ + struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; + while (bbo) { + struct anv_batch_bo *prev = bbo->prev_batch_bo; + anv_batch_bo_destroy(bbo, device); + bbo = prev; + } + anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); + + /* Destroy all of the surface state buffers */ + bbo = cmd_buffer->surface_batch_bo; + while (bbo) { + struct anv_batch_bo *prev = bbo->prev_batch_bo; + anv_batch_bo_destroy(bbo, device); + bbo = prev; + } + anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer->exec2_objects); + anv_device_free(device, cmd_buffer->exec2_bos); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct drm_i915_gem_relocation_entry *relocs, + size_t num_relocs) +{ + struct drm_i915_gem_exec_object2 *obj; + + if (bo->index < cmd_buffer->bo_count && + cmd_buffer->exec2_bos[bo->index] == bo) + return VK_SUCCESS; + + if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) { + uint32_t new_len = cmd_buffer->exec2_objects ? + cmd_buffer->exec2_array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) { + anv_device_free(cmd_buffer->device, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->exec2_objects) { + memcpy(new_objects, cmd_buffer->exec2_objects, + cmd_buffer->bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->exec2_bos, + cmd_buffer->bo_count * sizeof(*new_bos)); + } + + cmd_buffer->exec2_objects = new_objects; + cmd_buffer->exec2_bos = new_bos; + cmd_buffer->exec2_array_length = new_len; + } + + assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length); + + bo->index = cmd_buffer->bo_count++; + obj = &cmd_buffer->exec2_objects[bo->index]; + cmd_buffer->exec2_bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + + if (relocs) { + obj->relocation_count = num_relocs; + obj->relocs_ptr = (uintptr_t) relocs; + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + for (size_t i = 0; i < list->num_relocs; i++) + anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0); +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((batch->next - batch->start) & 4) + anv_batch_emit(batch, GEN8_MI_NOOP); + + anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); + cmd_buffer->surface_batch_bo->num_relocs = + cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; + cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next; + + cmd_buffer->bo_count = 0; + cmd_buffer->need_reloc = false; + + /* Lock for access to bo->index. */ + pthread_mutex_lock(&device->mutex); + + /* Add surface state bos first so we can add them with their relocs. */ + for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, + &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], + bbo->num_relocs); + } + + /* Add all of the BOs referenced by surface state */ + anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); + + /* Add all but the first batch BO */ + struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo; + while (batch_bo->prev_batch_bo) { + anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, + &batch->relocs.relocs[batch_bo->first_reloc], + batch_bo->num_relocs); + batch_bo = batch_bo->prev_batch_bo; + } + + /* Add everything referenced by the batches */ + anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs); + + /* Add the first batch bo last */ + assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0); + anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, + &batch->relocs.relocs[batch_bo->first_reloc], + batch_bo->num_relocs); + assert(batch_bo->bo.index == cmd_buffer->bo_count - 1); + + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); + + cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; + cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count; + cmd_buffer->execbuf.batch_start_offset = 0; + cmd_buffer->execbuf.batch_len = batch->next - batch->start; + cmd_buffer->execbuf.cliprects_ptr = 0; + cmd_buffer->execbuf.num_cliprects = 0; + cmd_buffer->execbuf.DR1 = 0; + cmd_buffer->execbuf.DR4 = 0; + + cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT; + if (!cmd_buffer->need_reloc) + cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC; + cmd_buffer->execbuf.flags |= I915_EXEC_RENDER; + cmd_buffer->execbuf.rsvd1 = device->context_id; + cmd_buffer->execbuf.rsvd2 = 0; + + pthread_mutex_unlock(&device->mutex); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandBuffer( + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + /* Delete all but the first batch bo */ + while (cmd_buffer->last_batch_bo->prev_batch_bo) { + struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo; + anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device); + cmd_buffer->last_batch_bo = prev; + } + assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL); + + cmd_buffer->batch.relocs.num_relocs = 0; + anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + /* Delete all but the first batch bo */ + while (cmd_buffer->surface_batch_bo->prev_batch_bo) { + struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo; + anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device); + cmd_buffer->surface_batch_bo = prev; + } + assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL); + + cmd_buffer->surface_next = 1; + cmd_buffer->surface_relocs.num_relocs = 0; + + cmd_buffer->rs_state = NULL; + cmd_buffer->vp_state = NULL; + cmd_buffer->cb_state = NULL; + cmd_buffer->ds_state = NULL; + + return VK_SUCCESS; +} diff --git a/src/vulkan/device.c b/src/vulkan/device.c index be5705f68c5..a2182800cde 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -723,210 +723,6 @@ VkResult anv_GetDeviceQueue( return VK_SUCCESS; } -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) -{ - list->num_relocs = 0; - list->array_length = 256; - list->relocs = - anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - - if (list->relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - list->reloc_bos = - anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - - if (list->relocs == NULL) { - anv_device_free(device, list->relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - return VK_SUCCESS; -} - -void -anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) -{ - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); -} - -static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, - size_t num_additional_relocs) -{ - if (list->num_relocs + num_additional_relocs <= list->array_length) - return VK_SUCCESS; - - size_t new_length = list->array_length * 2; - while (new_length < list->num_relocs + num_additional_relocs) - new_length *= 2; - - struct drm_i915_gem_relocation_entry *new_relocs = - anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_reloc_bos = - anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) { - anv_device_free(device, new_relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); - memcpy(new_reloc_bos, list->reloc_bos, - list->num_relocs * sizeof(*list->reloc_bos)); - - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); - - list->relocs = new_relocs; - list->reloc_bos = new_reloc_bos; - - return VK_SUCCESS; -} - -static VkResult -anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) -{ - VkResult result; - - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - bbo->num_relocs = 0; - bbo->prev_batch_bo = NULL; - - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) { - anv_device_free(device, bbo); - return result; - } - - *bbo_out = bbo; - - return VK_SUCCESS; -} - -static void -anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - bbo->first_reloc = batch->relocs.num_relocs; -} - -static void -anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) -{ - assert(batch->start == bbo->bo.map); - bbo->length = batch->next - batch->start; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); - bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc; -} - -static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) -{ - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - anv_device_free(device, bbo); -} - -void * -anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) -{ - if (batch->next + num_dwords * 4 > batch->end) - batch->extend_cb(batch, batch->user_data); - - void *p = batch->next; - - batch->next += num_dwords * 4; - assert(batch->next <= batch->end); - - return p; -} - -static void -anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, - struct anv_reloc_list *other, uint32_t offset) -{ - anv_reloc_list_grow(list, device, other->num_relocs); - /* TODO: Handle failure */ - - memcpy(&list->relocs[list->num_relocs], &other->relocs[0], - other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], - other->num_relocs * sizeof(other->reloc_bos[0])); - - for (uint32_t i = 0; i < other->num_relocs; i++) - list->relocs[i + list->num_relocs].offset += offset; - - list->num_relocs += other->num_relocs; -} - -static uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, - uint32_t offset, struct anv_bo *target_bo, uint32_t delta) -{ - struct drm_i915_gem_relocation_entry *entry; - int index; - - anv_reloc_list_grow(list, device, 1); - /* TODO: Handle failure */ - - /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ - index = list->num_relocs++; - list->reloc_bos[index] = target_bo; - entry = &list->relocs[index]; - entry->target_handle = target_bo->gem_handle; - entry->delta = delta; - entry->offset = offset; - entry->presumed_offset = target_bo->offset; - entry->read_domains = 0; - entry->write_domain = 0; - - return target_bo->offset + delta; -} - -void -anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) -{ - uint32_t size, offset; - - size = other->next - other->start; - assert(size % 4 == 0); - - if (batch->next + size > batch->end) - batch->extend_cb(batch, batch->user_data); - - assert(batch->next + size <= batch->end); - - memcpy(batch->next, other->start, size); - - offset = batch->next - batch->start; - anv_reloc_list_append(&batch->relocs, batch->device, - &other->relocs, offset); - - batch->next += size; -} - -uint64_t -anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t delta) -{ - return anv_reloc_list_add(&batch->relocs, batch->device, - location - batch->start, bo, delta); -} - VkResult anv_QueueSubmit( VkQueue _queue, uint32_t cmdBufferCount, @@ -2468,47 +2264,6 @@ VkResult anv_DestroyDynamicDepthStencilState( // Command buffer functions -static VkResult -anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) -{ - struct anv_cmd_buffer *cmd_buffer = _data; - - struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo; - - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); - if (result != VK_SUCCESS) - return result; - - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == old_bbo->bo.map + old_bbo->bo.size); - - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &new_bbo->bo, 0 }, - ); - - /* Pad out to a 2-dword aligned boundary with zeros */ - if ((uintptr_t)batch->next % 8 != 0) { - *(uint32_t *)batch->next = 0; - batch->next += 4; - } - - anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch); - - new_bbo->prev_batch_bo = old_bbo; - cmd_buffer->last_batch_bo = new_bbo; - - anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - - return VK_SUCCESS; -} - VkResult anv_CreateCommandPool( VkDevice device, const VkCmdPoolCreateInfo* pCreateInfo, @@ -2534,125 +2289,7 @@ VkResult anv_ResetCommandPool( stub_return(VK_UNSUPPORTED); } -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_buffer *cmd_buffer; - VkResult result; - - assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->device = device; - cmd_buffer->rs_state = NULL; - cmd_buffer->vp_state = NULL; - cmd_buffer->cb_state = NULL; - cmd_buffer->ds_state = NULL; - memset(&cmd_buffer->state_vf, 0, sizeof(cmd_buffer->state_vf)); - memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); - - result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); - if (result != VK_SUCCESS) - goto fail; - - result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device); - if (result != VK_SUCCESS) - goto fail_batch_bo; - - cmd_buffer->batch.device = device; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; - cmd_buffer->batch.user_data = cmd_buffer; - - anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo); - if (result != VK_SUCCESS) - goto fail_batch_relocs; - cmd_buffer->surface_batch_bo->first_reloc = 0; - - result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device); - if (result != VK_SUCCESS) - goto fail_ss_batch_bo; - - /* Start surface_next at 1 so surface offset 0 is invalid. */ - cmd_buffer->surface_next = 1; - - cmd_buffer->exec2_objects = NULL; - cmd_buffer->exec2_bos = NULL; - cmd_buffer->exec2_array_length = 0; - - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - cmd_buffer->dirty = 0; - cmd_buffer->vb_dirty = 0; - cmd_buffer->descriptors_dirty = 0; - cmd_buffer->pipeline = NULL; - cmd_buffer->vp_state = NULL; - cmd_buffer->rs_state = NULL; - cmd_buffer->ds_state = NULL; - - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - - return VK_SUCCESS; - - fail_ss_batch_bo: - anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device); - fail_batch_relocs: - anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); - fail_batch_bo: - anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); - fail: - anv_device_free(device, cmd_buffer); - - return result; -} - -VkResult anv_DestroyCommandBuffer( - VkDevice _device, - VkCmdBuffer _cmd_buffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); - - /* Destroy all of the batch buffers */ - struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; - while (bbo) { - struct anv_batch_bo *prev = bbo->prev_batch_bo; - anv_batch_bo_destroy(bbo, device); - bbo = prev; - } - anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); - - /* Destroy all of the surface state buffers */ - bbo = cmd_buffer->surface_batch_bo; - while (bbo) { - struct anv_batch_bo *prev = bbo->prev_batch_bo; - anv_batch_bo_destroy(bbo, device); - bbo = prev; - } - anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer->exec2_objects); - anv_device_free(device, cmd_buffer->exec2_bos); - anv_device_free(device, cmd_buffer); - - return VK_SUCCESS; -} - -static void +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; @@ -2704,217 +2341,6 @@ VkResult anv_BeginCommandBuffer( return VK_SUCCESS; } -static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct drm_i915_gem_relocation_entry *relocs, - size_t num_relocs) -{ - struct drm_i915_gem_exec_object2 *obj; - - if (bo->index < cmd_buffer->bo_count && - cmd_buffer->exec2_bos[bo->index] == bo) - return VK_SUCCESS; - - if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) { - uint32_t new_len = cmd_buffer->exec2_objects ? - cmd_buffer->exec2_array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) { - anv_device_free(cmd_buffer->device, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - if (cmd_buffer->exec2_objects) { - memcpy(new_objects, cmd_buffer->exec2_objects, - cmd_buffer->bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->exec2_bos, - cmd_buffer->bo_count * sizeof(*new_bos)); - } - - cmd_buffer->exec2_objects = new_objects; - cmd_buffer->exec2_bos = new_bos; - cmd_buffer->exec2_array_length = new_len; - } - - assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length); - - bo->index = cmd_buffer->bo_count++; - obj = &cmd_buffer->exec2_objects[bo->index]; - cmd_buffer->exec2_bos[bo->index] = bo; - - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - - if (relocs) { - obj->relocation_count = num_relocs; - obj->relocs_ptr = (uintptr_t) relocs; - } - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) -{ - for (size_t i = 0; i < list->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0); -} - -static void -anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) -{ - struct anv_bo *bo; - - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ - - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->need_reloc = true; - - list->relocs[i].target_handle = bo->index; - } -} - -VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_device *device = cmd_buffer->device; - struct anv_batch *batch = &cmd_buffer->batch; - - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); - - /* Round batch up to an even number of dwords. */ - if ((batch->next - batch->start) & 4) - anv_batch_emit(batch, GEN8_MI_NOOP); - - anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); - cmd_buffer->surface_batch_bo->num_relocs = - cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; - cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next; - - cmd_buffer->bo_count = 0; - cmd_buffer->need_reloc = false; - - /* Lock for access to bo->index. */ - pthread_mutex_lock(&device->mutex); - - /* Add surface state bos first so we can add them with their relocs. */ - for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; - bbo != NULL; bbo = bbo->prev_batch_bo) { - anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, - &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], - bbo->num_relocs); - } - - /* Add all of the BOs referenced by surface state */ - anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); - - /* Add all but the first batch BO */ - struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo; - while (batch_bo->prev_batch_bo) { - anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, - &batch->relocs.relocs[batch_bo->first_reloc], - batch_bo->num_relocs); - batch_bo = batch_bo->prev_batch_bo; - } - - /* Add everything referenced by the batches */ - anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs); - - /* Add the first batch bo last */ - assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0); - anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, - &batch->relocs.relocs[batch_bo->first_reloc], - batch_bo->num_relocs); - assert(batch_bo->bo.index == cmd_buffer->bo_count - 1); - - anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); - - cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; - cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count; - cmd_buffer->execbuf.batch_start_offset = 0; - cmd_buffer->execbuf.batch_len = batch->next - batch->start; - cmd_buffer->execbuf.cliprects_ptr = 0; - cmd_buffer->execbuf.num_cliprects = 0; - cmd_buffer->execbuf.DR1 = 0; - cmd_buffer->execbuf.DR4 = 0; - - cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT; - if (!cmd_buffer->need_reloc) - cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC; - cmd_buffer->execbuf.flags |= I915_EXEC_RENDER; - cmd_buffer->execbuf.rsvd1 = device->context_id; - cmd_buffer->execbuf.rsvd2 = 0; - - pthread_mutex_unlock(&device->mutex); - - return VK_SUCCESS; -} - -VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - /* Delete all but the first batch bo */ - while (cmd_buffer->last_batch_bo->prev_batch_bo) { - struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo; - anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device); - cmd_buffer->last_batch_bo = prev; - } - assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL); - - cmd_buffer->batch.relocs.num_relocs = 0; - anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - /* Delete all but the first batch bo */ - while (cmd_buffer->surface_batch_bo->prev_batch_bo) { - struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo; - anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device); - cmd_buffer->surface_batch_bo = prev; - } - assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL); - - cmd_buffer->surface_next = 1; - cmd_buffer->surface_relocs.num_relocs = 0; - - cmd_buffer->rs_state = NULL; - cmd_buffer->vp_state = NULL; - cmd_buffer->cb_state = NULL; - cmd_buffer->ds_state = NULL; - - return VK_SUCCESS; -} - // Command buffer building functions void anv_CmdBindPipeline( @@ -2987,93 +2413,6 @@ void anv_CmdBindDynamicDepthStencilState( cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; } -static struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) -{ - struct anv_state state; - - state.offset = align_u32(cmd_buffer->surface_next, alignment); - if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size) - return (struct anv_state) { 0 }; - - state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset; - state.alloc_size = size; - cmd_buffer->surface_next = state.offset + size; - - assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size); - - return state; -} - -static VkResult -anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo; - - /* Finish off the old buffer */ - old_bbo->num_relocs = - cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc; - old_bbo->length = cmd_buffer->surface_next; - - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); - if (result != VK_SUCCESS) - return result; - - new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs; - cmd_buffer->surface_next = 1; - - new_bbo->prev_batch_bo = old_bbo; - cmd_buffer->surface_batch_bo = new_bbo; - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); - - return VK_SUCCESS; -} - void anv_CmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -3497,12 +2836,7 @@ anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { result = flush_compute_descriptor_set(cmd_buffer); - if (result != VK_SUCCESS) { - result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); - assert(result == VK_SUCCESS); - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - } + assert(result == VK_SUCCESS); cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 2990634dc6f..f4ccc92f7e9 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -456,6 +456,11 @@ VkResult anv_reloc_list_init(struct anv_reloc_list *list, void anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device); +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + struct anv_batch_bo { struct anv_bo bo; @@ -968,17 +973,22 @@ struct anv_render_pass { void anv_device_init_meta(struct anv_device *device); void anv_device_finish_meta(struct anv_device *device); -void -anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); -void -anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values); +VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); -void * -anv_lookup_entrypoint(const char *name); +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values); + +void *anv_lookup_entrypoint(const char *name); #define ANV_DEFINE_CASTS(__anv_type, __VkType) \ static inline struct __anv_type * \ -- cgit v1.2.3 From da4d9f6c7c739156eed5e887dd43df9c4d6e567f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 15:34:41 -0700 Subject: vk: Move most of the anv_Cmd related stuff to its own file --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_cmd_emit.c | 1188 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/device.c | 1162 +------------------------------------------- src/vulkan/private.h | 2 + 4 files changed, 1197 insertions(+), 1156 deletions(-) create mode 100644 src/vulkan/anv_cmd_emit.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 9d136bdb208..5147f6c69d0 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -58,6 +58,7 @@ libvulkan_la_SOURCES = \ gem.c \ device.c \ anv_cmd_buffer.c \ + anv_cmd_emit.c \ aub.c \ allocator.c \ util.c \ diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c new file mode 100644 index 00000000000..3a67c02bde7 --- /dev/null +++ b/src/vulkan/anv_cmd_emit.c @@ -0,0 +1,1188 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); +} + +VkResult anv_BeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->current_pipeline = UINT32_MAX; + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->compute_pipeline = pipeline; + cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->pipeline = pipeline; + cmd_buffer->vb_dirty |= pipeline->vb_used; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdBindDynamicViewportState( + VkCmdBuffer cmdBuffer, + VkDynamicViewportState dynamicViewportState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); + + cmd_buffer->vp_state = vp_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; +} + +void anv_CmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); + + cmd_buffer->rs_state = rs_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; +} + +void anv_CmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); + + cmd_buffer->cb_state = cb_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; +} + +void anv_CmdBindDynamicDepthStencilState( + VkCmdBuffer cmdBuffer, + VkDynamicDepthStencilState dynamicDepthStencilState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + + cmd_buffer->ds_state = ds_state; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; +} + +void anv_CmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + setCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < setCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + cmd_buffer->descriptors[firstSet + i].set = set; + + assert(set_layout->num_dynamic_buffers < + ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets, + pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); + + cmd_buffer->descriptors_dirty |= set_layout->shader_stages; + + dynamic_slot += set_layout->num_dynamic_buffers; + } +} + +void anv_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state_vf, &vf); + + cmd_buffer->dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} + +void anv_CmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(startBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->vb_dirty |= 1 << (startBinding + i); + } +} + +static VkResult +cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_subpass *subpass = cmd_buffer->subpass; + struct anv_pipeline_layout *layout; + uint32_t attachments, bias, size; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->compute_pipeline->layout; + else + layout = cmd_buffer->pipeline->layout; + + if (stage == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + attachments = subpass->color_count; + } else { + bias = 0; + attachments = 0; + } + + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; + + if (attachments + surface_count == 0) + return VK_SUCCESS; + + size = (bias + surface_count) * sizeof(uint32_t); + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + /* This is highly annoying. The Vulkan spec puts the depth-stencil + * attachments in with the color attachments. Unfortunately, thanks to + * other aspects of the API, we cana't really saparate them before this + * point. Therefore, we have to walk all of the attachments but only + * put the color attachments into the binding table. + */ + for (uint32_t a = 0; a < attachments; a++) { + const struct anv_attachment_view *attachment = + fb->attachments[subpass->color_attachments[a]]; + + assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); + const struct anv_color_attachment_view *view = + (const struct anv_color_attachment_view *)attachment; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + memcpy(state.map, view->view.surface_state.map, 64); + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(&cmd_buffer->surface_relocs, + cmd_buffer->device, + state.offset + 8 * 4, + view->view.bo, view->view.offset); + + bt_map[a] = state.offset; + } + + if (layout == NULL) + return VK_SUCCESS; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *surface_slots = + set_layout->stage[stage].surface_start; + + uint32_t start = bias + layout->set[set].surface_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { + struct anv_surface_view *view = + d->set->descriptors[surface_slots[b].index].view; + + if (!view) + continue; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + uint32_t offset; + if (surface_slots[b].dynamic_slot >= 0) { + uint32_t dynamic_offset = + d->dynamic_offsets[surface_slots[b].dynamic_slot]; + + offset = view->offset + dynamic_offset; + anv_fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(&cmd_buffer->surface_relocs, + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, offset); + + bt_map[start + b] = state.offset; + } + } + + return VK_SUCCESS; +} + +static VkResult +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) +{ + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->compute_pipeline->layout; + else + layout = cmd_buffer->pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) + return VK_SUCCESS; + + uint32_t size = sampler_count * 16; + *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *sampler_slots = + set_layout->stage[stage].sampler_start; + + uint32_t start = layout->set[set].sampler_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { + struct anv_sampler *sampler = + d->set->descriptors[sampler_slots[b].index].sampler; + + if (!sampler) + continue; + + memcpy(state->map + (start + b) * 16, + sampler->state, sizeof(sampler->state)); + } + } + + return VK_SUCCESS; +} + +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) +{ + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; +} + +static void +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t s, dirty = cmd_buffer->descriptors_dirty & + cmd_buffer->pipeline->active_stages; + + VkResult result = VK_SUCCESS; + for_each_bit(s, dirty) { + result = flush_descriptor_set(cmd_buffer, s); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->pipeline->active_stages) { + result = flush_descriptor_set(cmd_buffer, s); + + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } + } + + cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages; +} + +static struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + dwords * 4, alignment); + memcpy(state.map, a, dwords * 4); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + + return state; +} + +static struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->current_pipeline = GPGPU; + } + + if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->compute_dirty = 0; +} + +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->vertex_bindings[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GEN8_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->descriptors_dirty) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) { + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->rs_state->state_sf, pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->rs_state->state_raster, pipeline->state_raster); + } + + if (cmd_buffer->ds_state && + (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY))) + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->ds_state->state_color_calc, + cmd_buffer->cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state_vf, pipeline->state_vf); + } + + cmd_buffer->vb_dirty &= ~vb_emit; + cmd_buffer->dirty = 0; +} + +void anv_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void anv_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void anv_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void anv_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void anv_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void anv_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} + +void anv_CmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void anv_CmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void anv_CmdWaitEvents( + VkCmdBuffer cmdBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + stub(); +} + +void anv_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; + + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { + cmd.CommandStreamerStallEnable = true; + } + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common; + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common; + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common; + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); +} + +void anv_CmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values) +{ + stub(); +} + +static void +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_subpass *subpass = cmd_buffer->subpass; + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; + } + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->framebuffer->height - 1, + .Width = cmd_buffer->framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); +} + +void +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->subpass = subpass; + + cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void anv_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); + + cmd_buffer->framebuffer = framebuffer; + cmd_buffer->pass = pass; + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); + + anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); +} + +void anv_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); + + cmd_buffer->subpass++; + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->subpass + 1); +} + +void anv_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} + +void anv_CmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers) +{ + stub(); +} diff --git a/src/vulkan/device.c b/src/vulkan/device.c index a2182800cde..c168219a40b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1463,9 +1463,9 @@ VkResult anv_DestroyBuffer( // Buffer view functions -static void -fill_buffer_surface_state(void *state, VkFormat format, - uint32_t offset, uint32_t range) +void +anv_fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range) { const struct anv_format *info; @@ -1540,8 +1540,9 @@ VkResult anv_CreateBufferView( view->format = pCreateInfo->format; view->range = pCreateInfo->range; - fill_buffer_surface_state(view->surface_state.map, - pCreateInfo->format, view->offset, pCreateInfo->range); + anv_fill_buffer_surface_state(view->surface_state.map, + pCreateInfo->format, + view->offset, pCreateInfo->range); *pView = anv_buffer_view_to_handle(bview); @@ -2289,1010 +2290,6 @@ VkResult anv_ResetCommandPool( stub_return(VK_UNSUPPORTED); } -void -anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->scratch_size = device->scratch_block_pool.size; - if (cmd_buffer->scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN8_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - - .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 }, - .SurfaceStateMemoryObjectControlState = GEN8_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN8_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN8_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN8_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true); -} - -VkResult anv_BeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - cmd_buffer->current_pipeline = UINT32_MAX; - - return VK_SUCCESS; -} - -// Command buffer building functions - -void anv_CmdBindPipeline( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->compute_pipeline = pipeline; - cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - break; - - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->pipeline = pipeline; - cmd_buffer->vb_dirty |= pipeline->vb_used; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - break; - - default: - assert(!"invalid bind point"); - break; - } -} - -void anv_CmdBindDynamicViewportState( - VkCmdBuffer cmdBuffer, - VkDynamicViewportState dynamicViewportState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); - - cmd_buffer->vp_state = vp_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; -} - -void anv_CmdBindDynamicRasterState( - VkCmdBuffer cmdBuffer, - VkDynamicRasterState dynamicRasterState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - - cmd_buffer->rs_state = rs_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; -} - -void anv_CmdBindDynamicColorBlendState( - VkCmdBuffer cmdBuffer, - VkDynamicColorBlendState dynamicColorBlendState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - - cmd_buffer->cb_state = cb_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; -} - -void anv_CmdBindDynamicDepthStencilState( - VkCmdBuffer cmdBuffer, - VkDynamicDepthStencilState dynamicDepthStencilState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); - - cmd_buffer->ds_state = ds_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; -} - -void anv_CmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t setCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - struct anv_descriptor_set_layout *set_layout; - - assert(firstSet + setCount < MAX_SETS); - - uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < setCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - set_layout = layout->set[firstSet + i].layout; - - cmd_buffer->descriptors[firstSet + i].set = set; - - assert(set_layout->num_dynamic_buffers < - ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets)); - memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets, - pDynamicOffsets + dynamic_slot, - set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - - cmd_buffer->descriptors_dirty |= set_layout->shader_stages; - - dynamic_slot += set_layout->num_dynamic_buffers; - } -} - -void anv_CmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, - }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state_vf, &vf); - - cmd_buffer->dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GEN8_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); -} - -void anv_CmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, - uint32_t startBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings; - - /* We have to defer setting up vertex buffer since we need the buffer - * stride from the pipeline. */ - - assert(startBinding + bindingCount < MAX_VBS); - for (uint32_t i = 0; i < bindingCount; i++) { - vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); - vb[startBinding + i].offset = pOffsets[i]; - cmd_buffer->vb_dirty |= 1 << (startBinding + i); - } -} - -static VkResult -cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state) -{ - struct anv_framebuffer *fb = cmd_buffer->framebuffer; - struct anv_subpass *subpass = cmd_buffer->subpass; - struct anv_pipeline_layout *layout; - uint32_t attachments, bias, size; - - if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->compute_pipeline->layout; - else - layout = cmd_buffer->pipeline->layout; - - if (stage == VK_SHADER_STAGE_FRAGMENT) { - bias = MAX_RTS; - attachments = subpass->color_count; - } else { - bias = 0; - attachments = 0; - } - - /* This is a little awkward: layout can be NULL but we still have to - * allocate and set a binding table for the PS stage for render - * targets. */ - uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - - if (attachments + surface_count == 0) - return VK_SUCCESS; - - size = (bias + surface_count) * sizeof(uint32_t); - *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); - uint32_t *bt_map = bt_state->map; - - if (bt_state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - /* This is highly annoying. The Vulkan spec puts the depth-stencil - * attachments in with the color attachments. Unfortunately, thanks to - * other aspects of the API, we cana't really saparate them before this - * point. Therefore, we have to walk all of the attachments but only - * put the color attachments into the binding table. - */ - for (uint32_t a = 0; a < attachments; a++) { - const struct anv_attachment_view *attachment = - fb->attachments[subpass->color_attachments[a]]; - - assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - const struct anv_color_attachment_view *view = - (const struct anv_color_attachment_view *)attachment; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - memcpy(state.map, view->view.surface_state.map, 64); - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(&cmd_buffer->surface_relocs, - cmd_buffer->device, - state.offset + 8 * 4, - view->view.bo, view->view.offset); - - bt_map[a] = state.offset; - } - - if (layout == NULL) - return VK_SUCCESS; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *surface_slots = - set_layout->stage[stage].surface_start; - - uint32_t start = bias + layout->set[set].surface_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { - struct anv_surface_view *view = - d->set->descriptors[surface_slots[b].index].view; - - if (!view) - continue; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - uint32_t offset; - if (surface_slots[b].dynamic_slot >= 0) { - uint32_t dynamic_offset = - d->dynamic_offsets[surface_slots[b].dynamic_slot]; - - offset = view->offset + dynamic_offset; - fill_buffer_surface_state(state.map, view->format, offset, - view->range - dynamic_offset); - } else { - offset = view->offset; - memcpy(state.map, view->surface_state.map, 64); - } - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(&cmd_buffer->surface_relocs, - cmd_buffer->device, - state.offset + 8 * 4, - view->bo, offset); - - bt_map[start + b] = state.offset; - } - } - - return VK_SUCCESS; -} - -static VkResult -cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state) -{ - struct anv_pipeline_layout *layout; - uint32_t sampler_count; - - if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->compute_pipeline->layout; - else - layout = cmd_buffer->pipeline->layout; - - sampler_count = layout ? layout->stage[stage].sampler_count : 0; - if (sampler_count == 0) - return VK_SUCCESS; - - uint32_t size = sampler_count * 16; - *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); - - if (state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *sampler_slots = - set_layout->stage[stage].sampler_start; - - uint32_t start = layout->set[set].sampler_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { - struct anv_sampler *sampler = - d->set->descriptors[sampler_slots[b].index].sampler; - - if (!sampler) - continue; - - memcpy(state->map + (start + b) * 16, - sampler->state, sizeof(sampler->state)); - } - } - - return VK_SUCCESS; -} - -static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) -{ - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); - if (result != VK_SUCCESS) - return result; - - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - if (samplers.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = samplers.offset); - } - - if (surfaces.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = surfaces.offset); - } - - return VK_SUCCESS; -} - -static void -flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t s, dirty = cmd_buffer->descriptors_dirty & - cmd_buffer->pipeline->active_stages; - - VkResult result = VK_SUCCESS; - for_each_bit(s, dirty) { - result = flush_descriptor_set(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->pipeline->active_stages) { - result = flush_descriptor_set(cmd_buffer, s); - - /* It had better succeed this time */ - assert(result == VK_SUCCESS); - } - } - - cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages; -} - -static struct anv_state -anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - dwords * 4, alignment); - memcpy(state.map, a, dwords * 4); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); - - return state; -} - -static struct anv_state -anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - uint32_t *p; - - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - dwords * 4, alignment); - p = state.map; - for (uint32_t i = 0; i < dwords; i++) - p[i] = a[i] | b[i]; - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - - return state; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ - }; - - uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - struct anv_state state = - anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - - GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - if (cmd_buffer->current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = GPGPU); - cmd_buffer->current_pipeline = GPGPU; - } - - if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; - } - - cmd_buffer->compute_dirty = 0; -} - -static void -anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - if (cmd_buffer->current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = _3D); - cmd_buffer->current_pipeline = _3D; - } - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN8_3DSTATE_VERTEX_BUFFERS); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->vertex_bindings[vb].offset; - - struct GEN8_VERTEX_BUFFER_STATE state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GEN8_MOCS, - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->descriptors_dirty) - flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) { - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset); - } - - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->rs_state->state_sf, pipeline->state_sf); - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->rs_state->state_raster, pipeline->state_raster); - } - - if (cmd_buffer->ds_state && - (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY))) - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->ds_state->state_wm_depth_stencil, - pipeline->state_wm_depth_stencil); - - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - if (cmd_buffer->ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->cb_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->ds_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->ds_state->state_color_calc, - cmd_buffer->cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state_vf, pipeline->state_vf); - } - - cmd_buffer->vb_dirty &= ~vb_emit; - cmd_buffer->dirty = 0; -} - -void anv_CmdDraw( - VkCmdBuffer cmdBuffer, - uint32_t firstVertex, - uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = SEQUENTIAL, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void anv_CmdDrawIndexed( - VkCmdBuffer cmdBuffer, - uint32_t firstIndex, - uint32_t indexCount, - int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = RANDOM, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -static void -anv_batch_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -void anv_CmdDrawIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL); -} - -void anv_CmdDrawIndexedIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM); -} - -void anv_CmdDispatch( - VkCmdBuffer cmdBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void anv_CmdDispatchIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -void anv_CmdSetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdResetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdWaitEvents( - VkCmdBuffer cmdBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - stub(); -} - -void anv_CmdPipelineBarrier( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t b, *dw; - - struct GEN8_PIPE_CONTROL cmd = { - GEN8_PIPE_CONTROL_header, - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_TRANSITION_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; - - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common; - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common; - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common; - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: - cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - cmd.DepthCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("Invalid memory output flag"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: - cmd.VFCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: - cmd.DCFlushEnable = true; - cmd.TextureCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: - cmd.TextureCacheInvalidationEnable = true; - break; - } - } - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); - GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); -} - VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, @@ -3483,153 +2480,6 @@ VkResult anv_GetRenderAreaGranularity( return VK_SUCCESS; } -static void -anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_subpass *subpass = cmd_buffer->subpass; - struct anv_framebuffer *fb = cmd_buffer->framebuffer; - const struct anv_depth_stencil_view *view; - - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; - - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - const struct anv_attachment_view *aview = - fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - view = (const struct anv_depth_stencil_view *)aview; - } else { - view = &null_view; - } - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->framebuffer->height - 1, - .Width = cmd_buffer->framebuffer->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN8_MOCS, - .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = view->depth_qpitch >> 2); - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, - .StencilBufferEnable = view->stencil_stride > 0, - .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); -} - -void anv_CmdPushConstants( - VkCmdBuffer cmdBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values) -{ - stub(); -} - -void -anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->subpass = subpass; - - cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - anv_cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void anv_CmdBeginRenderPass( - VkCmdBuffer cmdBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); - - cmd_buffer->framebuffer = framebuffer; - cmd_buffer->pass = pass; - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pAttachmentClearValues); - - anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); -} - -void anv_CmdNextSubpass( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); - - cmd_buffer->subpass++; - anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->subpass + 1); -} - -void anv_CmdEndRenderPass( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); -} - -void anv_CmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers) -{ - stub(); -} - void vkCmdDbgMarkerBegin( VkCmdBuffer cmdBuffer, const char* pMarker) diff --git a/src/vulkan/private.h b/src/vulkan/private.h index f4ccc92f7e9..262c98e1ca2 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -926,6 +926,8 @@ void anv_color_attachment_view_init(struct anv_color_attachment_view *view, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void anv_fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range); void anv_surface_view_fini(struct anv_device *device, struct anv_surface_view *view); -- cgit v1.2.3 From d22d5f25fcf667d0e3998ee1a9692f79e0e20eab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 16:22:04 -0700 Subject: vk: Split command buffer state into its own structure Everything else in anv_cmd_buffer is the actual guts of the datastructure. --- src/vulkan/anv_cmd_buffer.c | 22 ++--- src/vulkan/anv_cmd_emit.c | 208 ++++++++++++++++++++++++++------------------ src/vulkan/meta.c | 40 ++++----- src/vulkan/private.h | 45 ++++++---- 4 files changed, 172 insertions(+), 143 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 2df14ae65ad..86d8f08971e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -403,12 +403,6 @@ VkResult anv_CreateCommandBuffer( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); cmd_buffer->device = device; - cmd_buffer->rs_state = NULL; - cmd_buffer->vp_state = NULL; - cmd_buffer->cb_state = NULL; - cmd_buffer->ds_state = NULL; - memset(&cmd_buffer->state_vf, 0, sizeof(cmd_buffer->state_vf)); - memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors)); result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); if (result != VK_SUCCESS) @@ -446,13 +440,7 @@ VkResult anv_CreateCommandBuffer( anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_block_pool); - cmd_buffer->dirty = 0; - cmd_buffer->vb_dirty = 0; - cmd_buffer->descriptors_dirty = 0; - cmd_buffer->pipeline = NULL; - cmd_buffer->vp_state = NULL; - cmd_buffer->rs_state = NULL; - cmd_buffer->ds_state = NULL; + anv_cmd_state_init(&cmd_buffer->state); *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); @@ -477,6 +465,8 @@ VkResult anv_DestroyCommandBuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + anv_cmd_state_fini(&cmd_buffer->state); + /* Destroy all of the batch buffers */ struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; while (bbo) { @@ -707,10 +697,8 @@ VkResult anv_ResetCommandBuffer( cmd_buffer->surface_next = 1; cmd_buffer->surface_relocs.num_relocs = 0; - cmd_buffer->rs_state = NULL; - cmd_buffer->vp_state = NULL; - cmd_buffer->cb_state = NULL; - cmd_buffer->ds_state = NULL; + anv_cmd_state_fini(&cmd_buffer->state); + anv_cmd_state_init(&cmd_buffer->state); return VK_SUCCESS; } diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 3a67c02bde7..b4cb0c048d0 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -38,14 +38,41 @@ * is concerned, most of anv_cmd_buffer is magic. */ +VkResult +anv_cmd_state_init(struct anv_cmd_state *state) +{ + state->rs_state = NULL; + state->vp_state = NULL; + state->cb_state = NULL; + state->ds_state = NULL; + memset(&state->state_vf, 0, sizeof(state->state_vf)); + memset(&state->descriptors, 0, sizeof(state->descriptors)); + + state->dirty = 0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->pipeline = NULL; + state->vp_state = NULL; + state->rs_state = NULL; + state->ds_state = NULL; + + return VK_SUCCESS; +} + +void +anv_cmd_state_fini(struct anv_cmd_state *state) +{ + /* Nothing we need to finish right now */ +} + void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; struct anv_bo *scratch_bo = NULL; - cmd_buffer->scratch_size = device->scratch_block_pool.size; - if (cmd_buffer->scratch_size > 0) + cmd_buffer->state.scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->state.scratch_size > 0) scratch_bo = &device->scratch_block_pool.bo; anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, @@ -85,7 +112,7 @@ VkResult anv_BeginCommandBuffer( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); anv_cmd_buffer_emit_state_base_address(cmd_buffer); - cmd_buffer->current_pipeline = UINT32_MAX; + cmd_buffer->state.current_pipeline = UINT32_MAX; return VK_SUCCESS; } @@ -100,14 +127,14 @@ void anv_CmdBindPipeline( switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->compute_pipeline = pipeline; - cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; break; case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->pipeline = pipeline; - cmd_buffer->vb_dirty |= pipeline->vb_used; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; break; default: @@ -123,8 +150,8 @@ void anv_CmdBindDynamicViewportState( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); - cmd_buffer->vp_state = vp_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY; + cmd_buffer->state.vp_state = vp_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; } void anv_CmdBindDynamicRasterState( @@ -134,8 +161,8 @@ void anv_CmdBindDynamicRasterState( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - cmd_buffer->rs_state = rs_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; + cmd_buffer->state.rs_state = rs_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; } void anv_CmdBindDynamicColorBlendState( @@ -145,8 +172,8 @@ void anv_CmdBindDynamicColorBlendState( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - cmd_buffer->cb_state = cb_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY; + cmd_buffer->state.cb_state = cb_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; } void anv_CmdBindDynamicDepthStencilState( @@ -156,8 +183,8 @@ void anv_CmdBindDynamicDepthStencilState( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); - cmd_buffer->ds_state = ds_state; - cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY; + cmd_buffer->state.ds_state = ds_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; } void anv_CmdBindDescriptorSets( @@ -181,15 +208,15 @@ void anv_CmdBindDescriptorSets( ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; - cmd_buffer->descriptors[firstSet + i].set = set; + cmd_buffer->state.descriptors[firstSet + i].set = set; assert(set_layout->num_dynamic_buffers < - ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets)); - memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets, + ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, pDynamicOffsets + dynamic_slot, set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - cmd_buffer->descriptors_dirty |= set_layout->shader_stages; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; dynamic_slot += set_layout->num_dynamic_buffers; } @@ -213,9 +240,9 @@ void anv_CmdBindIndexBuffer( GEN8_3DSTATE_VF_header, .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state_vf, &vf); + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - cmd_buffer->dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, .IndexFormat = vk_to_gen_index_type[indexType], @@ -232,7 +259,7 @@ void anv_CmdBindVertexBuffers( const VkDeviceSize* pOffsets) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings; + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ @@ -241,7 +268,7 @@ void anv_CmdBindVertexBuffers( for (uint32_t i = 0; i < bindingCount; i++) { vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); vb[startBinding + i].offset = pOffsets[i]; - cmd_buffer->vb_dirty |= 1 << (startBinding + i); + cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); } } @@ -249,15 +276,15 @@ static VkResult cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *bt_state) { - struct anv_framebuffer *fb = cmd_buffer->framebuffer; - struct anv_subpass *subpass = cmd_buffer->subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_layout *layout; uint32_t attachments, bias, size; if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->compute_pipeline->layout; + layout = cmd_buffer->state.compute_pipeline->layout; else - layout = cmd_buffer->pipeline->layout; + layout = cmd_buffer->state.pipeline->layout; if (stage == VK_SHADER_STAGE_FRAGMENT) { bias = MAX_RTS; @@ -318,7 +345,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; struct anv_descriptor_slot *surface_slots = set_layout->stage[stage].surface_start; @@ -373,9 +400,9 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, uint32_t sampler_count; if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->compute_pipeline->layout; + layout = cmd_buffer->state.compute_pipeline->layout; else - layout = cmd_buffer->pipeline->layout; + layout = cmd_buffer->state.pipeline->layout; sampler_count = layout ? layout->stage[stage].sampler_count : 0; if (sampler_count == 0) @@ -388,7 +415,7 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, return VK_ERROR_OUT_OF_DEVICE_MEMORY; for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; struct anv_descriptor_slot *sampler_slots = set_layout->stage[stage].sampler_start; @@ -461,8 +488,8 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { - uint32_t s, dirty = cmd_buffer->descriptors_dirty & - cmd_buffer->pipeline->active_stages; + uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; VkResult result = VK_SUCCESS; for_each_bit(s, dirty) { @@ -478,7 +505,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) assert(result == VK_SUCCESS); /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->pipeline->active_stages) { + for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { result = flush_descriptor_set(cmd_buffer, s); /* It had better succeed this time */ @@ -486,7 +513,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) } } - cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages; + cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; } static struct anv_state @@ -527,7 +554,7 @@ static VkResult flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; @@ -566,44 +593,44 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) static void anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - if (cmd_buffer->current_pipeline != GPGPU) { + if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, .PipelineSelection = GPGPU); - cmd_buffer->current_pipeline = GPGPU; + cmd_buffer->state.current_pipeline = GPGPU; } - if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { result = flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); - cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; } - cmd_buffer->compute_dirty = 0; + cmd_buffer->state.compute_dirty = 0; } static void anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_pipeline *pipeline = cmd_buffer->pipeline; + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; uint32_t *p; - uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used; + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - if (cmd_buffer->current_pipeline != _3D) { + if (cmd_buffer->state.current_pipeline != _3D) { anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, .PipelineSelection = _3D); - cmd_buffer->current_pipeline = _3D; + cmd_buffer->state.current_pipeline = _3D; } if (vb_emit) { @@ -614,8 +641,8 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) GEN8_3DSTATE_VERTEX_BUFFERS); uint32_t vb, i = 0; for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->vertex_bindings[vb].offset; + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; struct GEN8_VERTEX_BUFFER_STATE state = { .VertexBufferIndex = vb, @@ -631,56 +658,63 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } } - if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->scratch_size < pipeline->total_scratch) + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) anv_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); } - if (cmd_buffer->descriptors_dirty) + if (cmd_buffer->state.descriptors_dirty) flush_descriptor_sets(cmd_buffer); - if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) { + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset); + .ScissorRectPointer = vp_state->scissor.offset); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset); + .CCViewportPointer = vp_state->cc_vp.offset); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset); + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); } - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_RS_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->rs_state->state_sf, pipeline->state_sf); + cmd_buffer->state.rs_state->state_sf, + pipeline->state_sf); anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->rs_state->state_raster, pipeline->state_raster); + cmd_buffer->state.rs_state->state_raster, + pipeline->state_raster); } - if (cmd_buffer->ds_state && - (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY))) + if (cmd_buffer->state.ds_state && + (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY))) { anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->ds_state->state_wm_depth_stencil, + cmd_buffer->state.ds_state->state_wm_depth_stencil, pipeline->state_wm_depth_stencil); + } - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY)) { struct anv_state state; - if (cmd_buffer->ds_state == NULL) + if (cmd_buffer->state.ds_state == NULL) state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->cb_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, GEN8_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->cb_state == NULL) + else if (cmd_buffer->state.cb_state == NULL) state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->ds_state->state_color_calc, + cmd_buffer->state.ds_state->state_color_calc, GEN8_COLOR_CALC_STATE_length, 64); else state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->ds_state->state_color_calc, - cmd_buffer->cb_state->state_color_calc, + cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, GEN8_COLOR_CALC_STATE_length, 64); anv_batch_emit(&cmd_buffer->batch, @@ -689,13 +723,14 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .ColorCalcStatePointerValid = true); } - if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state_vf, pipeline->state_vf); + cmd_buffer->state.state_vf, pipeline->state_vf); } - cmd_buffer->vb_dirty &= ~vb_emit; - cmd_buffer->dirty = 0; + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; } void anv_CmdDraw( @@ -821,7 +856,7 @@ void anv_CmdDispatch( uint32_t z) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; anv_cmd_buffer_flush_compute_state(cmd_buffer); @@ -851,7 +886,7 @@ void anv_CmdDispatchIndirect( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -1054,8 +1089,8 @@ void anv_CmdPushConstants( static void anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { - struct anv_subpass *subpass = cmd_buffer->subpass; - struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_depth_stencil_view *view; static const struct anv_depth_stencil_view null_view = @@ -1081,8 +1116,8 @@ anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .SurfaceFormat = view->depth_format, .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->framebuffer->height - 1, - .Width = cmd_buffer->framebuffer->width - 1, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, .LOD = 0, .Depth = 1 - 1, .MinimumArrayElement = 0, @@ -1108,9 +1143,9 @@ void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass) { - cmd_buffer->subpass = subpass; + cmd_buffer->state.subpass = subpass; - cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; anv_cmd_buffer_emit_depth_stencil(cmd_buffer); } @@ -1126,8 +1161,8 @@ void anv_CmdBeginRenderPass( assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); - cmd_buffer->framebuffer = framebuffer; - cmd_buffer->pass = pass; + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; const VkRect2D *render_area = &pRenderPassBegin->renderArea; @@ -1155,8 +1190,7 @@ void anv_CmdNextSubpass( assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); - cmd_buffer->subpass++; - anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->subpass + 1); + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); } void anv_CmdEndRenderPass( diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 226b40a9dfb..49c9aa9c563 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -156,9 +156,9 @@ static void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *state) { - state->old_pipeline = cmd_buffer->pipeline; - state->old_descriptor_set0 = cmd_buffer->descriptors[0].set; - memcpy(state->old_vertex_bindings, cmd_buffer->vertex_bindings, + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, sizeof(state->old_vertex_bindings)); } @@ -166,14 +166,14 @@ static void anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { - cmd_buffer->pipeline = state->old_pipeline; - cmd_buffer->descriptors[0].set = state->old_descriptor_set0; - memcpy(cmd_buffer->vertex_bindings, state->old_vertex_bindings, + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0].set = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, sizeof(state->old_vertex_bindings)); - cmd_buffer->vb_dirty |= (1 << NUM_VB_USED) - 1; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; + cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; } struct vue_header { @@ -194,7 +194,7 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, struct clear_instance_data *instance_data) { struct anv_device *device = cmd_buffer->device; - struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_state state; uint32_t size; @@ -233,25 +233,25 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, sizeof(vertex_data) }); - if (cmd_buffer->pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.clear.pipeline); /* We don't need anything here, only set if not already set. */ - if (cmd_buffer->rs_state == NULL) + if (cmd_buffer->state.rs_state == NULL) anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.rs_state); - if (cmd_buffer->vp_state == NULL) + if (cmd_buffer->state.vp_state == NULL) anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->framebuffer->vp_state); + cmd_buffer->state.framebuffer->vp_state); - if (cmd_buffer->ds_state == NULL) + if (cmd_buffer->state.ds_state == NULL) anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.ds_state); - if (cmd_buffer->cb_state == NULL) + if (cmd_buffer->state.cb_state == NULL) anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); @@ -488,20 +488,20 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, saved_state); - if (cmd_buffer->pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline); /* We don't need anything here, only set if not already set. */ - if (cmd_buffer->rs_state == NULL) + if (cmd_buffer->state.rs_state == NULL) anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.rs_state); - if (cmd_buffer->ds_state == NULL) + if (cmd_buffer->state.ds_state == NULL) anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.ds_state); - saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->cb_state); + saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->state.cb_state); anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 262c98e1ca2..814ad504470 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -667,6 +667,31 @@ struct anv_descriptor_set_binding { uint32_t dynamic_offsets[128]; }; +/** State required while building cmd buffer */ +struct anv_cmd_state { + uint32_t current_pipeline; + uint32_t vb_dirty; + uint32_t dirty; + uint32_t compute_dirty; + uint32_t descriptors_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + struct anv_dynamic_rs_state * rs_state; + struct anv_dynamic_ds_state * ds_state; + struct anv_dynamic_vp_state * vp_state; + struct anv_dynamic_cb_state * cb_state; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set_binding descriptors[MAX_SETS]; +}; + +VkResult anv_cmd_state_init(struct anv_cmd_state *state); +void anv_cmd_state_fini(struct anv_cmd_state *state); + struct anv_cmd_buffer { struct anv_device * device; @@ -686,25 +711,7 @@ struct anv_cmd_buffer { struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; - /* State required while building cmd buffer */ - uint32_t current_pipeline; - uint32_t vb_dirty; - uint32_t dirty; - uint32_t compute_dirty; - uint32_t descriptors_dirty; - uint32_t scratch_size; - struct anv_pipeline * pipeline; - struct anv_pipeline * compute_pipeline; - struct anv_framebuffer * framebuffer; - struct anv_render_pass * pass; - struct anv_subpass * subpass; - struct anv_dynamic_rs_state * rs_state; - struct anv_dynamic_ds_state * ds_state; - struct anv_dynamic_vp_state * vp_state; - struct anv_dynamic_cb_state * cb_state; - uint32_t state_vf[GEN8_3DSTATE_VF_length]; - struct anv_vertex_binding vertex_bindings[MAX_VBS]; - struct anv_descriptor_set_binding descriptors[MAX_SETS]; + struct anv_cmd_state state; }; void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); -- cgit v1.2.3 From 7ccc8dd24ae254e10994b89e78f55d2003e0a12e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 16:29:40 -0700 Subject: vk/private.h: Move cmd_buffer functions to near the cmd_buffer struct --- src/vulkan/private.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 814ad504470..cd80c896f35 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -714,6 +714,21 @@ struct anv_cmd_buffer { struct anv_cmd_state state; }; +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values); + void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); void anv_aub_writer_destroy(struct anv_aub_writer *writer); @@ -982,21 +997,6 @@ struct anv_render_pass { void anv_device_init_meta(struct anv_device *device); void anv_device_finish_meta(struct anv_device *device); -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment); - -VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); - -void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values); - void *anv_lookup_entrypoint(const char *name); #define ANV_DEFINE_CASTS(__anv_type, __VkType) \ -- cgit v1.2.3 From 6037b5d6100aeea9f0970fccd4ec38c3f60ffcc9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 16:33:47 -0700 Subject: vk/cmd_buffer: Add a helper for allocating dynamic state This matches what we do for surface state and makes the dynamic state pool more opaque to things that need to get dynamic state. --- src/vulkan/anv_cmd_buffer.c | 8 ++++++++ src/vulkan/anv_cmd_emit.c | 10 +++++----- src/vulkan/private.h | 3 +++ 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 86d8f08971e..e6e60e14189 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -318,6 +318,14 @@ anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, return state; } +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index b4cb0c048d0..4c9f29ffaab 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -409,7 +409,7 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; uint32_t size = sampler_count * 16; - *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -522,8 +522,8 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, { struct anv_state state; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - dwords * 4, alignment); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); memcpy(state.map, a, dwords * 4); VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); @@ -539,8 +539,8 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, struct anv_state state; uint32_t *p; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - dwords * 4, alignment); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); p = state.map; for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cd80c896f35..cfc46ee8f60 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -717,6 +717,9 @@ struct anv_cmd_buffer { struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); -- cgit v1.2.3 From 9c0db9d349dda6ec8788007eaa4545009adf3e26 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 16:48:13 -0700 Subject: vk/cmd_buffer: Rename bo_count to exec2_bo_count --- src/vulkan/anv_cmd_buffer.c | 18 +++++++++--------- src/vulkan/aub.c | 8 ++++---- src/vulkan/device.c | 2 +- src/vulkan/private.h | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index e6e60e14189..8ccecd8f63b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -510,11 +510,11 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, { struct drm_i915_gem_exec_object2 *obj; - if (bo->index < cmd_buffer->bo_count && + if (bo->index < cmd_buffer->exec2_bo_count && cmd_buffer->exec2_bos[bo->index] == bo) return VK_SUCCESS; - if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) { + if (cmd_buffer->exec2_bo_count >= cmd_buffer->exec2_array_length) { uint32_t new_len = cmd_buffer->exec2_objects ? cmd_buffer->exec2_array_length * 2 : 64; @@ -534,9 +534,9 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, if (cmd_buffer->exec2_objects) { memcpy(new_objects, cmd_buffer->exec2_objects, - cmd_buffer->bo_count * sizeof(*new_objects)); + cmd_buffer->exec2_bo_count * sizeof(*new_objects)); memcpy(new_bos, cmd_buffer->exec2_bos, - cmd_buffer->bo_count * sizeof(*new_bos)); + cmd_buffer->exec2_bo_count * sizeof(*new_bos)); } cmd_buffer->exec2_objects = new_objects; @@ -544,9 +544,9 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->exec2_array_length = new_len; } - assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length); + assert(cmd_buffer->exec2_bo_count < cmd_buffer->exec2_array_length); - bo->index = cmd_buffer->bo_count++; + bo->index = cmd_buffer->exec2_bo_count++; obj = &cmd_buffer->exec2_objects[bo->index]; cmd_buffer->exec2_bos[bo->index] = bo; @@ -616,7 +616,7 @@ VkResult anv_EndCommandBuffer( cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next; - cmd_buffer->bo_count = 0; + cmd_buffer->exec2_bo_count = 0; cmd_buffer->need_reloc = false; /* Lock for access to bo->index. */ @@ -650,13 +650,13 @@ VkResult anv_EndCommandBuffer( anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, &batch->relocs.relocs[batch_bo->first_reloc], batch_bo->num_relocs); - assert(batch_bo->bo.index == cmd_buffer->bo_count - 1); + assert(batch_bo->bo.index == cmd_buffer->exec2_bo_count - 1); anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; - cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count; + cmd_buffer->execbuf.buffer_count = cmd_buffer->exec2_bo_count; cmd_buffer->execbuf.batch_start_offset = 0; cmd_buffer->execbuf.batch_len = batch->next - batch->start; cmd_buffer->execbuf.cliprects_ptr = 0; diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c index 42d4611eb6e..c33defd48f5 100644 --- a/src/vulkan/aub.c +++ b/src/vulkan/aub.c @@ -248,9 +248,9 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) if (writer == NULL) return; - aub_bos = malloc(cmd_buffer->bo_count * sizeof(aub_bos[0])); + aub_bos = malloc(cmd_buffer->exec2_bo_count * sizeof(aub_bos[0])); offset = writer->offset; - for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { bo = cmd_buffer->exec2_bos[i]; if (bo->map) aub_bos[i].map = bo->map; @@ -282,9 +282,9 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) bbo->num_relocs, aub_bos); } - for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { bo = cmd_buffer->exec2_bos[i]; - if (i == cmd_buffer->bo_count - 1) { + if (i == cmd_buffer->exec2_bo_count - 1) { assert(bo == &first_bbo->bo); aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, aub_bos[i].relocated, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index c168219a40b..052b725348e 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -751,7 +751,7 @@ VkResult anv_QueueSubmit( return vk_error(VK_ERROR_UNKNOWN); } - for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; } else { *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cfc46ee8f60..9c9190ab62a 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -697,12 +697,12 @@ struct anv_cmd_buffer { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 * exec2_objects; + uint32_t exec2_bo_count; struct anv_bo ** exec2_bos; uint32_t exec2_array_length; bool need_reloc; uint32_t serial; - uint32_t bo_count; struct anv_batch batch; struct anv_batch_bo * last_batch_bo; struct anv_batch_bo * surface_batch_bo; -- cgit v1.2.3 From 927f54de68c6241076b18244dbf264b81b09bca0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Jul 2015 17:10:58 -0700 Subject: vk/cmd_buffer: Move batch buffer padding to anv_batch_bo_finish() --- src/vulkan/anv_cmd_buffer.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 8ccecd8f63b..0d24d0bda02 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -241,6 +241,10 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, static void anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) { + /* Round batch up to an even number of dwords. */ + if ((batch->next - batch->start) & 4) + anv_batch_emit(batch, GEN8_MI_NOOP); + assert(batch->start == bbo->bo.map); bbo->length = batch->next - batch->start; VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); @@ -283,12 +287,6 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) .BatchBufferStartAddress = { &new_bbo->bo, 0 }, ); - /* Pad out to a 2-dword aligned boundary with zeros */ - if ((uintptr_t)batch->next % 8 != 0) { - *(uint32_t *)batch->next = 0; - batch->next += 4; - } - anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch); new_bbo->prev_batch_bo = old_bbo; @@ -607,10 +605,6 @@ VkResult anv_EndCommandBuffer( anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); - /* Round batch up to an even number of dwords. */ - if ((batch->next - batch->start) & 4) - anv_batch_emit(batch, GEN8_MI_NOOP); - anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); cmd_buffer->surface_batch_bo->num_relocs = cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; -- cgit v1.2.3 From 756d8064c103b9abe131ca5f6679d79426557424 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 15 Jul 2015 15:48:41 -0700 Subject: vk/0.132: Do type-safety --- include/vulkan/vk_wsi_lunarg.h | 8 +-- include/vulkan/vulkan.h | 152 ++++++++++++++++------------------------- src/vulkan/device.c | 115 +------------------------------ src/vulkan/meta.c | 3 +- src/vulkan/pipeline.c | 2 +- src/vulkan/private.h | 92 ++++++++++++++----------- 6 files changed, 121 insertions(+), 251 deletions(-) (limited to 'src') diff --git a/include/vulkan/vk_wsi_lunarg.h b/include/vulkan/vk_wsi_lunarg.h index c1af9f50fb0..9587952d067 100644 --- a/include/vulkan/vk_wsi_lunarg.h +++ b/include/vulkan/vk_wsi_lunarg.h @@ -59,8 +59,8 @@ extern "C" // ------------------------------------------------------------------------------------------------ // Objects -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkDisplayWSI, VkObject) -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkSwapChainWSI, VkObject) +VK_DEFINE_HANDLE(VkDisplayWSI) +VK_DEFINE_HANDLE(VkSwapChainWSI) // ------------------------------------------------------------------------------------------------ // Enumeration constants @@ -78,10 +78,6 @@ VK_DEFINE_DISP_SUBCLASS_HANDLE(VkSwapChainWSI, VkObject) // Extend VkImageLayout enum with extension specific constants #define VK_IMAGE_LAYOUT_PRESENT_SOURCE_WSI VK_WSI_LUNARG_ENUM(VkImageLayout, 0) -// Extend VkObjectType enum for new objects -#define VK_OBJECT_TYPE_DISPLAY_WSI VK_WSI_LUNARG_ENUM(VkObjectType, 0) -#define VK_OBJECT_TYPE_SWAP_CHAIN_WSI VK_WSI_LUNARG_ENUM(VkObjectType, 1) - // ------------------------------------------------------------------------------------------------ // Enumerations diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 174ec1b354c..3e4af9d0070 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -43,22 +43,37 @@ extern "C" { // Vulkan API version supported by this file #define VK_API_VERSION VK_MAKE_VERSION(0, 131, 0) -#if defined (__cplusplus) && (VK_UINTPTRLEAST64_MAX == UINTPTR_MAX) - #define VK_TYPE_SAFE_COMPATIBLE_HANDLES 1 -#endif - -#if defined(VK_TYPE_SAFE_COMPATIBLE_HANDLES) && !defined(VK_DISABLE_TYPE_SAFE_HANDLES) - #define VK_DEFINE_PTR_HANDLE(_obj) struct _obj##_T { char _dummy; }; typedef _obj##_T* _obj; - #define VK_DEFINE_PTR_SUBCLASS_HANDLE(_obj, _base) struct _obj##_T : public _base##_T {}; typedef _obj##_T* _obj; - #define VK_DEFINE_BASE_HANDLE(_obj) VK_DEFINE_PTR_HANDLE(_obj) - #define VK_DEFINE_DISP_SUBCLASS_HANDLE(_obj, _base) VK_DEFINE_PTR_SUBCLASS_HANDLE(_obj, _base) - #define VK_DEFINE_NONDISP_SUBCLASS_HANDLE(_obj, _base) VK_DEFINE_PTR_SUBCLASS_HANDLE(_obj, _base) +#define VK_DEFINE_HANDLE(obj) typedef struct obj##_T* obj; + + +#if defined(__cplusplus) + #if (_MSC_VER >= 1800 || __cplusplus >= 201103L) + // The bool operator only works if there are no implicit conversions from an obj to + // a bool-compatible type, which can then be used to unintentionally violate type safety. + // C++11 and above supports the "explicit" keyword on conversion operators to stop this + // from happening. Otherwise users of C++ below C++11 won't get direct access to evaluating + // the object handle as a bool in expressions like: + // if (obj) vkDestroy(obj); + #define VK_NONDISP_HANDLE_OPERATOR_BOOL() explicit operator bool() const { return handle != 0; } + #else + #define VK_NONDISP_HANDLE_OPERATOR_BOOL() + #endif + #define VK_DEFINE_NONDISP_HANDLE(obj) \ + struct obj { \ + obj() { } \ + obj(uint64_t x) { handle = x; } \ + obj& operator =(uint64_t x) { handle = x; return *this; } \ + bool operator==(const obj& other) const { return handle == other.handle; } \ + bool operator!=(const obj& other) const { return handle != other.handle; } \ + bool operator!() const { return !handle; } \ + VK_NONDISP_HANDLE_OPERATOR_BOOL() \ + uint64_t handle; \ + }; #else - #define VK_DEFINE_BASE_HANDLE(_obj) typedef VkUintPtrLeast64 _obj; - #define VK_DEFINE_DISP_SUBCLASS_HANDLE(_obj, _base) typedef uintptr_t _obj; - #define VK_DEFINE_NONDISP_SUBCLASS_HANDLE(_obj, _base) typedef VkUintPtrLeast64 _obj; + #define VK_DEFINE_NONDISP_HANDLE(obj) typedef struct obj##_T { uint64_t handle; } obj; #endif + #define VK_LOD_CLAMP_NONE MAX_FLOAT @@ -76,40 +91,37 @@ extern "C" { #define VK_MAX_EXTENSION_NAME 256 #define VK_MAX_DESCRIPTION 256 -VK_DEFINE_BASE_HANDLE(VkObject) -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkInstance, VkObject) -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkPhysicalDevice, VkObject) -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkDevice, VkObject) -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkQueue, VkObject) -VK_DEFINE_DISP_SUBCLASS_HANDLE(VkCmdBuffer, VkObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkNonDispatchable, VkObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkFence, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDeviceMemory, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkBuffer, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkImage, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkSemaphore, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkEvent, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkQueryPool, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkBufferView, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkImageView, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkAttachmentView, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkShaderModule, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkShader, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkPipelineCache, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkPipelineLayout, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkRenderPass, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkPipeline, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDescriptorSetLayout, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkSampler, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDescriptorPool, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDescriptorSet, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicStateObject, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicViewportState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicRasterState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicColorBlendState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkDynamicDepthStencilState, VkDynamicStateObject) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkFramebuffer, VkNonDispatchable) -VK_DEFINE_NONDISP_SUBCLASS_HANDLE(VkCmdPool, VkNonDispatchable) +VK_DEFINE_HANDLE(VkInstance) +VK_DEFINE_HANDLE(VkPhysicalDevice) +VK_DEFINE_HANDLE(VkDevice) +VK_DEFINE_HANDLE(VkQueue) +VK_DEFINE_HANDLE(VkCmdBuffer) +VK_DEFINE_NONDISP_HANDLE(VkFence) +VK_DEFINE_NONDISP_HANDLE(VkDeviceMemory) +VK_DEFINE_NONDISP_HANDLE(VkBuffer) +VK_DEFINE_NONDISP_HANDLE(VkImage) +VK_DEFINE_NONDISP_HANDLE(VkSemaphore) +VK_DEFINE_NONDISP_HANDLE(VkEvent) +VK_DEFINE_NONDISP_HANDLE(VkQueryPool) +VK_DEFINE_NONDISP_HANDLE(VkBufferView) +VK_DEFINE_NONDISP_HANDLE(VkImageView) +VK_DEFINE_NONDISP_HANDLE(VkAttachmentView) +VK_DEFINE_NONDISP_HANDLE(VkShaderModule) +VK_DEFINE_NONDISP_HANDLE(VkShader) +VK_DEFINE_NONDISP_HANDLE(VkPipelineCache) +VK_DEFINE_NONDISP_HANDLE(VkPipelineLayout) +VK_DEFINE_NONDISP_HANDLE(VkRenderPass) +VK_DEFINE_NONDISP_HANDLE(VkPipeline) +VK_DEFINE_NONDISP_HANDLE(VkDescriptorSetLayout) +VK_DEFINE_NONDISP_HANDLE(VkSampler) +VK_DEFINE_NONDISP_HANDLE(VkDescriptorPool) +VK_DEFINE_NONDISP_HANDLE(VkDescriptorSet) +VK_DEFINE_NONDISP_HANDLE(VkDynamicViewportState) +VK_DEFINE_NONDISP_HANDLE(VkDynamicRasterState) +VK_DEFINE_NONDISP_HANDLE(VkDynamicColorBlendState) +VK_DEFINE_NONDISP_HANDLE(VkDynamicDepthStencilState) +VK_DEFINE_NONDISP_HANDLE(VkFramebuffer) +VK_DEFINE_NONDISP_HANDLE(VkCmdPool) typedef enum { @@ -139,7 +151,7 @@ typedef enum { VK_ERROR_INVALID_IMAGE = -17, VK_ERROR_INVALID_DESCRIPTOR_SET_DATA = -18, VK_ERROR_INVALID_QUEUE_TYPE = -19, - VK_ERROR_INVALID_OBJECT_TYPE = -20, + VK_ERROR_UNSUPPORTED_SHADER_IL_VERSION = -21, VK_ERROR_BAD_SHADER_CODE = -22, VK_ERROR_BAD_PIPELINE_DATA = -23, @@ -441,44 +453,6 @@ typedef enum { VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkPhysicalDeviceType; -typedef enum { - VK_OBJECT_TYPE_INSTANCE = 0, - VK_OBJECT_TYPE_PHYSICAL_DEVICE = 1, - VK_OBJECT_TYPE_DEVICE = 2, - VK_OBJECT_TYPE_QUEUE = 3, - VK_OBJECT_TYPE_COMMAND_BUFFER = 4, - VK_OBJECT_TYPE_DEVICE_MEMORY = 5, - VK_OBJECT_TYPE_BUFFER = 6, - VK_OBJECT_TYPE_BUFFER_VIEW = 7, - VK_OBJECT_TYPE_IMAGE = 8, - VK_OBJECT_TYPE_IMAGE_VIEW = 9, - VK_OBJECT_TYPE_ATTACHMENT_VIEW = 10, - VK_OBJECT_TYPE_SHADER_MODULE = 11, - VK_OBJECT_TYPE_SHADER = 12, - VK_OBJECT_TYPE_PIPELINE = 13, - VK_OBJECT_TYPE_PIPELINE_LAYOUT = 14, - VK_OBJECT_TYPE_SAMPLER = 15, - VK_OBJECT_TYPE_DESCRIPTOR_SET = 16, - VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 17, - VK_OBJECT_TYPE_DESCRIPTOR_POOL = 18, - VK_OBJECT_TYPE_DYNAMIC_VP_STATE = 19, - VK_OBJECT_TYPE_DYNAMIC_RS_STATE = 20, - VK_OBJECT_TYPE_DYNAMIC_CB_STATE = 21, - VK_OBJECT_TYPE_DYNAMIC_DS_STATE = 22, - VK_OBJECT_TYPE_FENCE = 23, - VK_OBJECT_TYPE_SEMAPHORE = 24, - VK_OBJECT_TYPE_EVENT = 25, - VK_OBJECT_TYPE_QUERY_POOL = 26, - VK_OBJECT_TYPE_FRAMEBUFFER = 27, - VK_OBJECT_TYPE_RENDER_PASS = 28, - VK_OBJECT_TYPE_PIPELINE_CACHE = 29, - VK_OBJECT_TYPE_CMD_POOL = 30, - VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_INSTANCE, - VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_PIPELINE_CACHE, - VK_OBJECT_TYPE_NUM = (VK_OBJECT_TYPE_PIPELINE_CACHE - VK_OBJECT_TYPE_INSTANCE + 1), - VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkObjectType; - typedef enum { VK_IMAGE_ASPECT_COLOR = 0, VK_IMAGE_ASPECT_DEPTH = 1, @@ -2135,7 +2109,6 @@ typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, V typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -typedef VkResult (VKAPI *PFN_vkDestroyObject)(VkDevice device, VkObjectType objType, VkObject object); typedef VkResult (VKAPI *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); typedef VkResult (VKAPI *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); typedef VkResult (VKAPI *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); @@ -2395,11 +2368,6 @@ VkResult VKAPI vkInvalidateMappedMemoryRanges( uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -VkResult VKAPI vkDestroyObject( - VkDevice device, - VkObjectType objType, - VkObject object); - VkResult VKAPI vkGetDeviceMemoryCommitment( VkDevice device, VkDeviceMemory memory, diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 052b725348e..0691c61c038 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -984,117 +984,6 @@ VkResult anv_InvalidateMappedMemoryRanges( return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); } -VkResult anv_DestroyObject( - VkDevice _device, - VkObjectType objType, - VkObject _object) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - switch (objType) { - case VK_OBJECT_TYPE_FENCE: - return anv_DestroyFence(_device, (VkFence) _object); - - case VK_OBJECT_TYPE_INSTANCE: - return anv_DestroyInstance((VkInstance) _object); - - case VK_OBJECT_TYPE_PHYSICAL_DEVICE: - /* We don't want to actually destroy physical devices */ - return VK_SUCCESS; - - case VK_OBJECT_TYPE_DEVICE: - assert(_device == (VkDevice) _object); - return anv_DestroyDevice((VkDevice) _object); - - case VK_OBJECT_TYPE_QUEUE: - /* TODO */ - return VK_SUCCESS; - - case VK_OBJECT_TYPE_DEVICE_MEMORY: - return anv_FreeMemory(_device, (VkDeviceMemory) _object); - - case VK_OBJECT_TYPE_DESCRIPTOR_POOL: - return anv_DestroyDescriptorPool(_device, (VkDescriptorPool) _object); - - case VK_OBJECT_TYPE_PIPELINE_CACHE: - return anv_DestroyPipelineCache(_device, (VkPipelineCache) _object); - - case VK_OBJECT_TYPE_BUFFER_VIEW: - return anv_DestroyBufferView(_device, _object); - - case VK_OBJECT_TYPE_IMAGE_VIEW: - return anv_DestroyImageView(_device, _object); - - case VK_OBJECT_TYPE_ATTACHMENT_VIEW: - return anv_DestroyAttachmentView(_device, _object); - - case VK_OBJECT_TYPE_IMAGE: - return anv_DestroyImage(_device, _object); - - case VK_OBJECT_TYPE_BUFFER: - return anv_DestroyBuffer(_device, (VkBuffer) _object); - - case VK_OBJECT_TYPE_SHADER_MODULE: - return anv_DestroyShaderModule(_device, (VkShaderModule) _object); - - case VK_OBJECT_TYPE_SHADER: - return anv_DestroyShader(_device, (VkShader) _object); - - case VK_OBJECT_TYPE_PIPELINE_LAYOUT: - return anv_DestroyPipelineLayout(_device, (VkPipelineLayout) _object); - - case VK_OBJECT_TYPE_SAMPLER: - return anv_DestroySampler(_device, (VkSampler) _object); - - case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: - return anv_DestroyDescriptorSetLayout(_device, (VkDescriptorSetLayout) _object); - - case VK_OBJECT_TYPE_DESCRIPTOR_SET: - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(_object)); - return VK_SUCCESS; - - case VK_OBJECT_TYPE_RENDER_PASS: - return anv_DestroyRenderPass(_device, (VkRenderPass) _object); - - case VK_OBJECT_TYPE_DYNAMIC_VP_STATE: - return anv_DestroyDynamicViewportState(_device, (VkDynamicViewportState) _object); - - case VK_OBJECT_TYPE_DYNAMIC_RS_STATE: - return anv_DestroyDynamicRasterState(_device, (VkDynamicRasterState) _object); - - case VK_OBJECT_TYPE_DYNAMIC_CB_STATE: - return anv_DestroyDynamicColorBlendState(_device, (VkDynamicColorBlendState) _object); - - case VK_OBJECT_TYPE_DYNAMIC_DS_STATE: - return anv_DestroyDynamicDepthStencilState(_device, (VkDynamicDepthStencilState) _object); - - case VK_OBJECT_TYPE_FRAMEBUFFER: - return anv_DestroyFramebuffer(_device, (VkFramebuffer) _object); - - case VK_OBJECT_TYPE_COMMAND_BUFFER: - return anv_DestroyCommandBuffer(_device, (VkCmdBuffer) _object); - return VK_SUCCESS; - - case VK_OBJECT_TYPE_PIPELINE: - return anv_DestroyPipeline(_device, (VkPipeline) _object); - - case VK_OBJECT_TYPE_QUERY_POOL: - return anv_DestroyQueryPool(_device, (VkQueryPool) _object); - - case VK_OBJECT_TYPE_SEMAPHORE: - return anv_DestroySemaphore(_device, (VkSemaphore) _object); - - case VK_OBJECT_TYPE_EVENT: - return anv_DestroyEvent(_device, (VkEvent) _object); - - case VK_OBJECT_TYPE_CMD_POOL: - return anv_DestroyCommandPool(_device, (VkCmdPool) _object); - - default: - unreachable("Invalid object type"); - } -} - VkResult anv_GetBufferMemoryRequirements( VkDevice device, VkBuffer _buffer, @@ -1851,7 +1740,7 @@ VkResult anv_CreateDescriptorPool( VkDescriptorPool* pDescriptorPool) { anv_finishme("VkDescriptorPool is a stub"); - *pDescriptorPool = 1; + pDescriptorPool->handle = 1; return VK_SUCCESS; } @@ -2270,7 +2159,7 @@ VkResult anv_CreateCommandPool( const VkCmdPoolCreateInfo* pCreateInfo, VkCmdPool* pCmdPool) { - *pCmdPool = 7; + pCmdPool->handle = 7; stub_return(VK_SUCCESS); } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 49c9aa9c563..8e2556b8218 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -523,6 +523,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = { .handle = 1 }; struct blit_vb_data { float pos[2]; @@ -588,7 +589,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, uint32_t count; VkDescriptorSet set; - anv_AllocDescriptorSets(anv_device_to_handle(device), 0 /* pool */, + anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, 1, &device->meta_state.blit.ds_layout, &set, &count); anv_UpdateDescriptorSets(anv_device_to_handle(device), diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 218733aaf30..fdb632cd330 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -116,7 +116,7 @@ VkResult anv_CreatePipelineCache( const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache) { - *pPipelineCache = 1; + pPipelineCache->handle = 1; stub_return(VK_SUCCESS); } diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 9c9190ab62a..cf8a869cdfd 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -1002,49 +1002,65 @@ void anv_device_finish_meta(struct anv_device *device); void *anv_lookup_entrypoint(const char *name); -#define ANV_DEFINE_CASTS(__anv_type, __VkType) \ -static inline struct __anv_type * \ -__anv_type ## _from_handle(__VkType _handle) \ -{ \ - return (struct __anv_type *) _handle; \ -} \ - \ -static inline __VkType \ -__anv_type ## _to_handle(struct __anv_type *_obj) \ -{ \ - return (__VkType) _obj; \ -} +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } -ANV_DEFINE_CASTS(anv_physical_device, VkPhysicalDevice) -ANV_DEFINE_CASTS(anv_instance, VkInstance) -ANV_DEFINE_CASTS(anv_queue, VkQueue) -ANV_DEFINE_CASTS(anv_device, VkDevice) -ANV_DEFINE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) -ANV_DEFINE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) -ANV_DEFINE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) -ANV_DEFINE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) -ANV_DEFINE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) -ANV_DEFINE_CASTS(anv_descriptor_set, VkDescriptorSet) -ANV_DEFINE_CASTS(anv_pipeline_layout, VkPipelineLayout) -ANV_DEFINE_CASTS(anv_buffer, VkBuffer) -ANV_DEFINE_CASTS(anv_cmd_buffer, VkCmdBuffer) -ANV_DEFINE_CASTS(anv_fence, VkFence) -ANV_DEFINE_CASTS(anv_shader_module, VkShaderModule) -ANV_DEFINE_CASTS(anv_shader, VkShader) -ANV_DEFINE_CASTS(anv_pipeline, VkPipeline) -ANV_DEFINE_CASTS(anv_image, VkImage) -ANV_DEFINE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_CASTS(anv_attachment_view, VkAttachmentView) -ANV_DEFINE_CASTS(anv_buffer_view, VkBufferView); -ANV_DEFINE_CASTS(anv_image_view, VkImageView); -ANV_DEFINE_CASTS(anv_framebuffer, VkFramebuffer) -ANV_DEFINE_CASTS(anv_render_pass, VkRenderPass) -ANV_DEFINE_CASTS(anv_query_pool, VkQueryPool) +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle.handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) { .handle = (uint64_t) _obj }; \ + } #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ struct __anv_type *__name = __anv_type ## _from_handle(__handle) +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCmdBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader, VkShader) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 9fa0989ff2266315d7dc8469dab601ebc2289fea Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 15 Jul 2015 16:47:51 -0700 Subject: nir: move to two-level binding model for UBO's The GLSL layer above is still hacky, so we're really just moving the hack into GLSL-to-NIR. I'd rather not go all the way and make GLSL support the Vulkan binding model too, since presumably we'll be switching to SPIR-V exclusively, and so working on proper GLSL support will be a waste of time. For now, doing this keeps it working as we add SPIR-V->NIR support though. --- src/glsl/nir/glsl_to_nir.cpp | 26 ++++++++++++++++++++------ src/glsl/nir/nir_intrinsics.h | 23 ++++++++++++----------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 ++++----- 3 files changed, 36 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 0338af67567..54e56145c89 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -43,7 +43,7 @@ namespace { class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader, gl_shader_stage stage); + nir_visitor(nir_shader *shader, struct gl_shader *sh, gl_shader_stage stage); ~nir_visitor(); virtual void visit(ir_variable *); @@ -83,6 +83,8 @@ private: bool supports_ints; + struct gl_shader *sh; + nir_shader *shader; gl_shader_stage stage; nir_function_impl *impl; @@ -133,7 +135,7 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) { nir_shader *shader = nir_shader_create(NULL, options); - nir_visitor v1(shader, sh->Stage); + nir_visitor v1(shader, sh, sh->Stage); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); @@ -141,10 +143,12 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) return shader; } -nir_visitor::nir_visitor(nir_shader *shader, gl_shader_stage stage) +nir_visitor::nir_visitor(nir_shader *shader, struct gl_shader *sh, + gl_shader_stage stage) { this->supports_ints = shader->options->native_integers; this->shader = shader; + this->sh = sh; this->stage = stage; this->is_global = true; this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, @@ -987,11 +991,21 @@ nir_visitor::visit(ir_expression *ir) } else { op = nir_intrinsic_load_ubo_indirect; } + + ir_constant *const_block = ir->operands[0]->as_constant(); + assert(const_block && "can't figure out descriptor set index"); + unsigned index = const_block->value.u[0]; + unsigned set = sh->UniformBlocks[index].Set; + unsigned binding = sh->UniformBlocks[index].Binding; + nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); load->num_components = ir->type->vector_elements; - load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ - load->const_index[1] = 1; /* number of vec4's */ - load->src[0] = evaluate_rvalue(ir->operands[0]); + load->const_index[0] = set; + load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */ + nir_load_const_instr *load_binding = nir_load_const_instr_create(shader, 1); + load_binding->value.u[0] = binding; + nir_instr_insert_after_cf_list(this->cf_node_list, &load_binding->instr); + load->src[0] = nir_src_for_ssa(&load_binding->def); if (!const_index) load->src[1] = evaluate_rvalue(ir->operands[1]); add_instr(&load->instr, ir->type->vector_elements); diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index bc6e6b8f498..64861300b55 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -139,11 +139,12 @@ SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) /* - * The first and only index is the base address to load from. Indirect - * loads have an additional register input, which is added to the constant - * address to compute the final address to load from. For UBO's (and - * SSBO's), the first source is the (possibly constant) UBO buffer index - * and the indirect (if it exists) is the second source. + * The last index is the base address to load from. Indirect loads have an + * additional register input, which is added to the constant address to + * compute the final address to load from. For UBO's (and SSBO's), the first + * source is the (possibly constant) UBO buffer index and the indirect (if it + * exists) is the second source, and the first index is the descriptor set + * index. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -151,14 +152,14 @@ SYSTEM_VALUE(invocation_id, 1) * elements begin immediately after the previous array element. */ -#define LOAD(name, extra_srcs, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \ +#define LOAD(name, extra_srcs, extra_indices, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1 + extra_indices, flags) \ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, 1, flags) + true, 0, 0, 1 + extra_indices, flags) -LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(uniform, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(input, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* LOAD(ssbo, 1, 0) */ /* diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 46c30fcae26..4d98b048433 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1364,13 +1364,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr has_indirect = true; /* fallthrough */ case nir_intrinsic_load_ubo: { + uint32_t set = instr->const_index[0]; nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; if (const_index) { - uint32_t index = const_index->u[0]; - uint32_t set = shader->base.UniformBlocks[index].Set; - uint32_t binding = shader->base.UniformBlocks[index].Binding; + uint32_t binding = const_index->u[0]; /* FIXME: We should probably assert here, but dota2 seems to hit * it and we'd like to keep going. @@ -1405,7 +1404,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_D), fs_reg(2)); - unsigned vec4_offset = instr->const_index[0] / 4; + unsigned vec4_offset = instr->const_index[1] / 4; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index, base_offset, vec4_offset + i); @@ -1413,7 +1412,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); + fs_reg const_offset_reg((unsigned) instr->const_index[1] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); -- cgit v1.2.3 From 513ee7fa48bb2fb2ed1f07b6f18ebff402c882f2 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 15 Jul 2015 21:58:32 -0700 Subject: nir/types: add more nir_type_is_xxx() wrappers --- src/glsl/nir/nir_types.cpp | 12 ++++++++++++ src/glsl/nir/nir_types.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 3c00bdb3c18..f3f3af97fde 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -168,6 +168,18 @@ glsl_type_is_matrix(const struct glsl_type *type) return type->is_matrix(); } +bool +glsl_type_is_array(const struct glsl_type *type) +{ + return type->is_array(); +} + +bool +glsl_type_is_struct(const struct glsl_type *type) +{ + return type->is_record() || type->is_interface(); +} + bool glsl_type_is_sampler(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 60e1d9d96fc..a2fa7934e16 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -76,6 +76,8 @@ bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); +bool glsl_type_is_array(const struct glsl_type *type); +bool glsl_type_is_struct(const struct glsl_type *type); bool glsl_type_is_sampler(const struct glsl_type *type); bool glsl_sampler_type_is_shadow(const struct glsl_type *type); bool glsl_sampler_type_is_array(const struct glsl_type *type); -- cgit v1.2.3 From b599735be442368060f57af0a513f8ad8b2a09a1 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 15 Jul 2015 21:58:57 -0700 Subject: nir/spirv: add support for loading UBO's We directly emit ubo load intrinsics based off of the offset information handed to us from SPIR-V. --- src/glsl/nir/spirv_to_nir.c | 200 +++++++++++++++++++++++++++++++----- src/glsl/nir/spirv_to_nir_private.h | 6 ++ 2 files changed, 178 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3b9253d2aef..ee44a3f291e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -369,20 +369,32 @@ struct_member_decoration_cb(struct vtn_builder *b, } static void -array_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, const struct vtn_decoration *dec, void *ctx) { struct vtn_type *type = val->type; - assert(member == -1); + if (member != -1) + return; + switch (dec->decoration) { case SpvDecorationArrayStride: type->stride = dec->literals[0]; break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; default: - unreachable("Unhandled array type decoration"); + unreachable("Unhandled type decoration"); } } @@ -398,16 +410,16 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpTypeVoid: val->type->type = glsl_void_type(); - return; + break; case SpvOpTypeBool: val->type->type = glsl_bool_type(); - return; + break; case SpvOpTypeInt: val->type->type = glsl_int_type(); - return; + break; case SpvOpTypeFloat: val->type->type = glsl_float_type(); - return; + break; case SpvOpTypeVector: { const struct glsl_type *base = @@ -416,7 +428,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, assert(glsl_type_is_scalar(base)); val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); - return; + break; } case SpvOpTypeMatrix: { @@ -431,7 +443,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->array_element = base; val->type->row_major = false; val->type->stride = 0; - return; + break; } case SpvOpTypeArray: { @@ -440,8 +452,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = glsl_array_type(array_element->type, w[3]); val->type->array_element = array_element; val->type->stride = 0; - vtn_foreach_decoration(b, val, array_decoration_cb, NULL); - return; + break; } case SpvOpTypeStruct: { @@ -474,7 +485,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const char *name = val->name ? val->name : "struct"; val->type->type = glsl_struct_type(fields, num_fields, name); - return; + break; } case SpvOpTypeFunction: { @@ -489,7 +500,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, params[i].out = true; } val->type->type = glsl_function_type(return_type, params, count - 3); - return; + break; } case SpvOpTypePointer: @@ -498,7 +509,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, * of dereferences happen */ val->type = vtn_value(b, w[3], vtn_value_type_type)->type; - return; + break; case SpvOpTypeImage: { const struct glsl_type *sampled_type = @@ -526,7 +537,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = glsl_sampler_type(dim, is_shadow, is_array, glsl_get_base_type(sampled_type)); - return; + break; } case SpvOpTypeSampledImage: @@ -543,6 +554,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, default: unreachable("Unhandled opcode"); } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); } static void @@ -774,8 +787,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationNoStaticUse: /* This can safely be ignored */ break; - case SpvDecorationBlock: - case SpvDecorationBufferBlock: case SpvDecorationRowMajor: case SpvDecorationColMajor: case SpvDecorationGLSLShared: @@ -1002,6 +1013,109 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, dest_deref_tail->child = old_child; } +static struct vtn_ssa_value * +_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, + unsigned set, nir_ssa_def *binding, + unsigned offset, nir_ssa_def *indirect, + struct vtn_type *type) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type->type; + val->transposed = NULL; + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = glsl_get_vector_elements(type->type); + load->const_index[0] = set; + load->src[0] = nir_src_for_ssa(binding); + load->const_index[1] = offset; + if (indirect) + load->src[1] = nir_src_for_ssa(indirect); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; + } else { + unsigned elems = glsl_get_length(type->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, + offset + type->offsets[i], + indirect, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, + offset + i * type->stride, + indirect, type->array_element); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + unsigned set = src->var->data.descriptor_set; + + nir_ssa_def *binding = nir_imm_int(&b->nb, src->var->data.binding); + nir_deref *deref = &src->deref; + + /* The block variable may be an array, in which case the array index adds + * an offset to the binding. Figure out that index now. + */ + + if (deref->child->deref_type == nir_deref_type_array) { + deref = deref->child; + type = type->array_element; + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + binding = nir_imm_int(&b->nb, src->var->data.binding + + deref_array->base_offset); + } else { + binding = nir_iadd(&b->nb, binding, deref_array->indirect.ssa); + } + } + + unsigned offset = 0; + nir_ssa_def *indirect = NULL; + while (deref != src_tail) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + offset += type->stride * deref_array->base_offset; + } else { + nir_ssa_def *offset = nir_imul(&b->nb, deref_array->indirect.ssa, + nir_imm_int(&b->nb, type->stride)); + indirect = indirect ? nir_iadd(&b->nb, indirect, offset) : offset; + } + type = type->array_element; + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + offset += type->offsets[deref_struct->index]; + type = type->members[deref_struct->index]; + break; + } + + default: + unreachable("unknown deref type"); + } + } + + /* TODO SSBO's */ + nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_indirect + : nir_intrinsic_load_ubo; + + return _vtn_block_load(b, op, set, binding, offset, indirect, type); +} + /* * Gets the NIR-level deref tail, which may have as a child an array deref * selecting which component due to OpAccessChain supporting per-component @@ -1030,7 +1144,12 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, struct vtn_type *src_type) { nir_deref *src_tail = get_deref_tail(src); - struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_type, src_tail); + + struct vtn_ssa_value *val; + if (src->var->interface_type) + val = vtn_block_load(b, src, src_type, src_tail); + else + val = _vtn_variable_load(b, src, src_type, src_tail); if (src_tail->child) { nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); @@ -1082,7 +1201,7 @@ vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, { nir_deref *src_tail = get_deref_tail(src); - if (src_tail->child) { + if (src_tail->child || src->var->interface_type) { assert(get_deref_tail(dest)->child); struct vtn_ssa_value *val = vtn_variable_load(b, src, type); vtn_variable_store(b, val, dest, type); @@ -1111,12 +1230,19 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->type = type->type; var->name = ralloc_strdup(var, val->name); + if (type->block) + var->interface_type = type->type; + else if (glsl_type_is_array(type->type) && + (type->array_element->block || type->array_element->buffer_block)) + var->interface_type = type->array_element->type; + else + var->interface_type = NULL; + switch ((SpvStorageClass)w[3]) { case SpvStorageClassUniform: case SpvStorageClassUniformConstant: var->data.mode = nir_var_uniform; var->data.read_only = true; - var->interface_type = type->type; break; case SpvStorageClassInput: var->data.mode = nir_var_shader_in; @@ -1161,6 +1287,13 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->data.location += VARYING_SLOT_VAR0; } + /* If this was a uniform block, then we're not going to actually use the + * variable (we're only going to use it to compute offsets), so don't + * declare it in the shader. + */ + if (var->data.mode == nir_var_uniform && var->interface_type) + break; + switch (var->data.mode) { case nir_var_shader_in: exec_list_push_tail(&b->shader->inputs, &var->node); @@ -1189,7 +1322,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); - val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; nir_deref *tail = &val->deref->deref; while (tail->child) @@ -1210,14 +1343,14 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_deref_array *deref_arr = nir_deref_array_create(b); if (base_type == GLSL_TYPE_ARRAY || glsl_type_is_matrix(tail->type)) { - val->deref_type = val->deref_type->array_element; + deref_type = deref_type->array_element; } else { assert(glsl_type_is_vector(tail->type)); - val->deref_type = ralloc(b, struct vtn_type); - val->deref_type->type = glsl_scalar_type(base_type); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); } - deref_arr->deref.type = val->deref_type->type; + deref_arr->deref.type = deref_type->type; if (idx_val->value_type == vtn_value_type_constant) { unsigned idx = idx_val->constant->value.u[0]; @@ -1237,9 +1370,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case GLSL_TYPE_STRUCT: { assert(idx_val->value_type == vtn_value_type_constant); unsigned idx = idx_val->constant->value.u[0]; - val->deref_type = val->deref_type->members[idx]; + deref_type = deref_type->members[idx]; nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = val->deref_type->type; + deref_struct->deref.type = deref_type->type; tail->child = &deref_struct->deref; break; } @@ -1248,6 +1381,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } tail = tail->child; } + + /* For uniform blocks, we don't resolve the access chain until we + * actually access the variable, so we need to keep around the original + * type of the variable. + */ + if (base->var->interface_type && base->var->data.mode == nir_var_uniform) + val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + else + val->deref_type = deref_type; + + break; } diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 7262755d019..fa0489b877d 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -90,6 +90,12 @@ struct vtn_type { /* for structs, the offset of each member */ unsigned *offsets; + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + /* for arrays and matrices, the array stride */ unsigned stride; -- cgit v1.2.3 From b2cfd8506034e733f71722334330b5df0bf94ef8 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 16 Jul 2015 11:04:22 -0700 Subject: nir/spirv: don't declare builtin blocks They aren't used, and the backend was barfing on them. Also, remove a hack in in compiler.cpp now that they're gone. --- src/glsl/nir/spirv_to_nir.c | 22 +++++++++++++++++----- src/glsl/nir/spirv_to_nir_private.h | 5 +++++ src/vulkan/compiler.cpp | 4 ---- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ee44a3f291e..65a995c29de 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -359,6 +359,7 @@ struct_member_decoration_cb(struct vtn_builder *b, ctx->type->members[member]); ctx->type->members[member]->is_builtin = true; ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; break; case SpvDecorationOffset: ctx->type->offsets[member] = dec->literals[0]; @@ -404,7 +405,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); - val->type = ralloc(b, struct vtn_type); + val->type = rzalloc(b, struct vtn_type); val->type->is_builtin = false; switch (opcode) { @@ -1230,13 +1231,18 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->type = type->type; var->name = ralloc_strdup(var, val->name); - if (type->block) + bool builtin_block = false; + if (type->block) { var->interface_type = type->type; - else if (glsl_type_is_array(type->type) && - (type->array_element->block || type->array_element->buffer_block)) + builtin_block = type->builtin_block; + } else if (glsl_type_is_array(type->type) && + (type->array_element->block || + type->array_element->buffer_block)) { var->interface_type = type->array_element->type; - else + builtin_block = type->array_element->builtin_block; + } else { var->interface_type = NULL; + } switch ((SpvStorageClass)w[3]) { case SpvStorageClassUniform: @@ -1294,6 +1300,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, if (var->data.mode == nir_var_uniform && var->interface_type) break; + /* Builtin blocks are lowered to individual variables during SPIR-V -> + * NIR, so don't declare them either. + */ + if (builtin_block) + break; + switch (var->data.mode) { case nir_var_shader_in: exec_list_push_tail(&b->shader->inputs, &var->node); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index fa0489b877d..a964cc80fad 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -96,6 +96,11 @@ struct vtn_type { /* for structs, whether it was decorated as an "SSBO-like" block */ bool buffer_block; + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + /* for arrays and matrices, the array stride */ unsigned stride; diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index cf34e7b4414..9152de63ec9 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -959,10 +959,6 @@ setup_nir_io(struct gl_program *prog, } foreach_list_typed(nir_variable, var, node, &shader->outputs) { - /* XXX glslang gives us this but we never use it */ - if (!strcmp(var->name, "gl_PerVertex")) - continue; - prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); } } -- cgit v1.2.3 From fc3838376b920b5bfc3b1afe31c326063f4bfb84 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 17 Jul 2015 13:38:09 -0700 Subject: vk/image: Add braces around multi-line ifs --- src/vulkan/image.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index d05f42667ea..340df8a5af1 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -443,12 +443,13 @@ anv_image_view_init(struct anv_image_view *iview, .SurfaceBaseAddress = { NULL, view->offset }, }; - if (cmd_buffer) + if (cmd_buffer) { view->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - else + } else { view->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } @@ -600,12 +601,13 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, depth = image->extent.depth; } - if (cmd_buffer) + if (cmd_buffer) { view->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - else + } else { view->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, -- cgit v1.2.3 From 169251bff0ec4bdafa84da50b6144e6cf02bf0da Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 17 Jul 2015 13:59:48 -0700 Subject: vk: Remove more raw casts This removes nearly all the remaining raw Anvil<->Vulkan casts from the C source files. (File compiler.cpp still contains many raw casts, and I plan on ignoring that). As far as I can tell, the only remaining raw casts are: anv_attachment_view -> anv_depth_stencil_view anv_attachment_view -> anv_color_attachment_view --- src/vulkan/anv_cmd_emit.c | 6 +++--- src/vulkan/device.c | 8 ++++---- src/vulkan/intel.c | 2 +- src/vulkan/private.h | 16 ++++++++++++++++ src/vulkan/x11.c | 14 +++++++------- 5 files changed, 31 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 4c9f29ffaab..6f8788c3481 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -1002,19 +1002,19 @@ void anv_CmdPipelineBarrier( const struct anv_common *common = ppMemBarriers[i]; switch (common->sType) { case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common; + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); out_flags |= barrier->outputMask; in_flags |= barrier->inputMask; break; } case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common; + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); out_flags |= barrier->outputMask; in_flags |= barrier->inputMask; break; } case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common; + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); out_flags |= barrier->outputMask; in_flags |= barrier->inputMask; break; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 0691c61c038..1073ab00ad4 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -1189,10 +1189,10 @@ VkResult anv_ResetFences( uint32_t fenceCount, const VkFence* pFences) { - struct anv_fence **fences = (struct anv_fence **) pFences; - - for (uint32_t i = 0; i < fenceCount; i++) - fences[i]->ready = false; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } return VK_SUCCESS; } diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c index 6457b211f73..e314ba60586 100644 --- a/src/vulkan/intel.c +++ b/src/vulkan/intel.c @@ -35,7 +35,7 @@ VkResult anv_CreateDmaBufImageINTEL( VkDeviceMemory* pMem, VkImage* pImage) { - struct anv_device *device = (struct anv_device *) _device; + ANV_FROM_HANDLE(anv_device, device, _device); struct anv_device_memory *mem; struct anv_image *image; VkResult result; diff --git a/src/vulkan/private.h b/src/vulkan/private.h index cf8a869cdfd..a1fe5304572 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -1038,6 +1038,7 @@ ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) +ANV_DEFINE_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI); ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) @@ -1061,6 +1062,21 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader, VkShader) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + #ifdef __cplusplus } #endif diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c index 73a7e2fc8ac..1e0bdb12bd0 100644 --- a/src/vulkan/x11.c +++ b/src/vulkan/x11.c @@ -205,7 +205,7 @@ VkResult anv_CreateSwapChainWSI( (uint32_t []) { 0 }); xcb_discard_reply(chain->conn, cookie.sequence); - *pSwapChain = (VkSwapChainWSI) chain; + *pSwapChain = anv_swap_chain_to_handle(chain); return VK_SUCCESS; @@ -214,23 +214,23 @@ VkResult anv_CreateSwapChainWSI( } VkResult anv_DestroySwapChainWSI( - VkSwapChainWSI swapChain) + VkSwapChainWSI _chain) { - struct anv_swap_chain *chain = (struct anv_swap_chain *) swapChain; - struct anv_device *device = chain->device; + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - anv_device_free(device, chain); + anv_device_free(chain->device, chain); return VK_SUCCESS; } VkResult anv_GetSwapChainInfoWSI( - VkSwapChainWSI swapChain, + VkSwapChainWSI _chain, VkSwapChainInfoTypeWSI infoType, size_t* pDataSize, void* pData) { - struct anv_swap_chain *chain = (struct anv_swap_chain *) swapChain; + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + VkSwapChainImageInfoWSI *images; size_t size; -- cgit v1.2.3 From 194b77d426cf7cc8ace24eba079fbe25102c4f44 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 17 Jul 2015 14:39:05 -0700 Subject: vk: Document members of anv_surface_view --- src/vulkan/private.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/private.h b/src/vulkan/private.h index a1fe5304572..16ddaefc57e 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -885,12 +885,12 @@ struct anv_image { }; struct anv_surface_view { - struct anv_state surface_state; - struct anv_bo * bo; - uint32_t offset; - uint32_t range; - VkExtent3D extent; - VkFormat format; + struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ + struct anv_bo *bo; + uint32_t offset; /**< VkBufferCreateInfo::offset */ + uint32_t range; /**< VkBufferCreateInfo::range */ + VkExtent3D extent; + VkFormat format; /**< VkBufferCreateInfo::format */ }; struct anv_buffer_view { -- cgit v1.2.3 From f70d0798546aca1be89dc93c0fac20b82f9df834 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 17 Jul 2015 14:48:23 -0700 Subject: vk/image: Remove unneeded data from anv_buffer_view This completes the FINISHME to trim unneeded data from anv_buffer_view. A VkExtent3D doesn't make sense for a VkBufferView. So remove the member anv_surface_view::extent, and push it up to the two objects that actually need it, anv_image_view and anv_attachment_view. --- src/vulkan/image.c | 4 ++-- src/vulkan/meta.c | 24 ++++++++++++------------ src/vulkan/private.h | 5 ++--- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/image.c b/src/vulkan/image.c index 340df8a5af1..4b37de54dde 100644 --- a/src/vulkan/image.c +++ b/src/vulkan/image.c @@ -372,7 +372,7 @@ anv_image_view_init(struct anv_image_view *iview, view->offset = image->offset + surface->offset; view->format = pCreateInfo->format; - view->extent = (VkExtent3D) { + iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), .height = anv_minify(image->extent.height, range->baseMipLevel), .depth = anv_minify(image->extent.depth, range->baseMipLevel), @@ -588,7 +588,7 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, view->offset = image->offset + surface->offset; view->format = pCreateInfo->format; - view->extent = (VkExtent3D) { + aview->base.extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index 8e2556b8218..30809581ad6 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -543,8 +543,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src->view.extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->view.extent.height, + (float)(src_offset.x + src_extent.width) / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, }, }; @@ -554,8 +554,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset.x / (float)src->view.extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->view.extent.height, + (float)src_offset.x / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, }, }; @@ -565,8 +565,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y, }, .tex_coord = { - (float)src_offset.x / (float)src->view.extent.width, - (float)src_offset.y / (float)src->view.extent.height, + (float)src_offset.x / (float)src->extent.width, + (float)src_offset.y / (float)src->extent.height, }, }; @@ -622,8 +622,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = dest->view.extent.width, - .height = dest->view.extent.height, + .width = dest->base.extent.width, + .height = dest->base.extent.height, .layers = 1 }, &fb); @@ -1274,8 +1274,8 @@ void anv_CmdClearColorImage( .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = view.view.extent.width, - .height = view.view.extent.height, + .width = view.base.extent.width, + .height = view.base.extent.height, .layers = 1 }, &fb); @@ -1322,8 +1322,8 @@ void anv_CmdClearColorImage( .renderArea = { .offset = { 0, 0, }, .extent = { - .width = view.view.extent.width, - .height = view.view.extent.height, + .width = view.base.extent.width, + .height = view.base.extent.height, }, }, .renderPass = pass, diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 16ddaefc57e..ac64f294c2d 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -889,17 +889,16 @@ struct anv_surface_view { struct anv_bo *bo; uint32_t offset; /**< VkBufferCreateInfo::offset */ uint32_t range; /**< VkBufferCreateInfo::range */ - VkExtent3D extent; VkFormat format; /**< VkBufferCreateInfo::format */ }; struct anv_buffer_view { - /* FINISHME: Trim unneeded data from this struct. */ struct anv_surface_view view; }; struct anv_image_view { struct anv_surface_view view; + VkExtent3D extent; }; enum anv_attachment_view_type { @@ -909,11 +908,11 @@ enum anv_attachment_view_type { struct anv_attachment_view { enum anv_attachment_view_type attachment_type; + VkExtent3D extent; }; struct anv_color_attachment_view { struct anv_attachment_view base; - struct anv_surface_view view; }; -- cgit v1.2.3 From 2c2233e328341700b7bc5c574f9de21ab4e4116a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 17 Jul 2015 15:04:27 -0700 Subject: vk: Prefix most filenames with anv Jason started the task by creating anv_cmd_buffer.c and anv_cmd_emit.c. This patch finishes the task by renaming all other files except gen*_pack.h and glsl_scraper.py. --- src/vulkan/.gitignore | 6 +- src/vulkan/Makefile.am | 56 +- src/vulkan/allocator.c | 665 ----------- src/vulkan/anv_allocator.c | 665 +++++++++++ src/vulkan/anv_aub.c | 310 +++++ src/vulkan/anv_aub.h | 153 +++ src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/anv_cmd_emit.c | 2 +- src/vulkan/anv_compiler.cpp | 1209 +++++++++++++++++++ src/vulkan/anv_device.c | 2390 +++++++++++++++++++++++++++++++++++++ src/vulkan/anv_entrypoints_gen.py | 269 +++++ src/vulkan/anv_formats.c | 334 ++++++ src/vulkan/anv_gem.c | 279 +++++ src/vulkan/anv_image.c | 745 ++++++++++++ src/vulkan/anv_intel.c | 97 ++ src/vulkan/anv_meta.c | 1449 ++++++++++++++++++++++ src/vulkan/anv_pipeline.c | 950 +++++++++++++++ src/vulkan/anv_private.h | 1081 +++++++++++++++++ src/vulkan/anv_query.c | 352 ++++++ src/vulkan/anv_util.c | 151 +++ src/vulkan/anv_x11.c | 299 +++++ src/vulkan/aub.c | 310 ----- src/vulkan/aub.h | 153 --- src/vulkan/compiler.cpp | 1209 ------------------- src/vulkan/device.c | 2390 ------------------------------------- src/vulkan/formats.c | 334 ------ src/vulkan/gem.c | 279 ----- src/vulkan/image.c | 745 ------------ src/vulkan/intel.c | 97 -- src/vulkan/meta.c | 1449 ---------------------- src/vulkan/pipeline.c | 950 --------------- src/vulkan/private.h | 1081 ----------------- src/vulkan/query.c | 352 ------ src/vulkan/util.c | 151 --- src/vulkan/vk_gen.py | 269 ----- src/vulkan/x11.c | 299 ----- 36 files changed, 10766 insertions(+), 10766 deletions(-) delete mode 100644 src/vulkan/allocator.c create mode 100644 src/vulkan/anv_allocator.c create mode 100644 src/vulkan/anv_aub.c create mode 100644 src/vulkan/anv_aub.h create mode 100644 src/vulkan/anv_compiler.cpp create mode 100644 src/vulkan/anv_device.c create mode 100644 src/vulkan/anv_entrypoints_gen.py create mode 100644 src/vulkan/anv_formats.c create mode 100644 src/vulkan/anv_gem.c create mode 100644 src/vulkan/anv_image.c create mode 100644 src/vulkan/anv_intel.c create mode 100644 src/vulkan/anv_meta.c create mode 100644 src/vulkan/anv_pipeline.c create mode 100644 src/vulkan/anv_private.h create mode 100644 src/vulkan/anv_query.c create mode 100644 src/vulkan/anv_util.c create mode 100644 src/vulkan/anv_x11.c delete mode 100644 src/vulkan/aub.c delete mode 100644 src/vulkan/aub.h delete mode 100644 src/vulkan/compiler.cpp delete mode 100644 src/vulkan/device.c delete mode 100644 src/vulkan/formats.c delete mode 100644 src/vulkan/gem.c delete mode 100644 src/vulkan/image.c delete mode 100644 src/vulkan/intel.c delete mode 100644 src/vulkan/meta.c delete mode 100644 src/vulkan/pipeline.c delete mode 100644 src/vulkan/private.h delete mode 100644 src/vulkan/query.c delete mode 100644 src/vulkan/util.c delete mode 100644 src/vulkan/vk_gen.py delete mode 100644 src/vulkan/x11.c (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 617b6d4ebb9..30c614497e5 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -1,4 +1,4 @@ # Generated source files -/*-spirv.h -/entrypoints.c -/entrypoints.h +/*_spirv.h +/anv_entrypoints.c +/anv_entrypoints.h diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 5147f6c69d0..6d1212c532c 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -53,38 +53,38 @@ libvulkan_la_CFLAGS = \ libvulkan_la_CXXFLAGS = \ -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g -libvulkan_la_SOURCES = \ - private.h \ - gem.c \ - device.c \ - anv_cmd_buffer.c \ - anv_cmd_emit.c \ - aub.c \ - allocator.c \ - util.c \ - pipeline.c \ - image.c \ - meta.c \ - intel.c \ - entrypoints.c \ - entrypoints.h \ - x11.c \ - formats.c \ - compiler.cpp \ - query.c - -BUILT_SOURCES = \ - entrypoints.h \ - entrypoints.c \ - meta-spirv.h - -entrypoints.h : vk_gen.py $(vulkan_include_HEADERS) +libvulkan_la_SOURCES = \ + anv_allocator.c \ + anv_aub.c \ + anv_cmd_buffer.c \ + anv_cmd_emit.c \ + anv_compiler.cpp \ + anv_device.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_gem.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_pipeline.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_x11.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c \ + anv_meta_spirv.h + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ -entrypoints.c : vk_gen.py $(vulkan_include_HEADERS) +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ -%-spirv.h: %.c glsl_scraper.py +%_spirv.h: %.c glsl_scraper.py $(AM_V_GEN) $(PYTHON2) $(srcdir)/glsl_scraper.py --glsl-only -o $@ $< CLEANFILES = $(BUILT_SOURCES) diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c deleted file mode 100644 index 2d0d255721b..00000000000 --- a/src/vulkan/allocator.c +++ /dev/null @@ -1,665 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "private.h" - -#ifdef HAVE_VALGRIND -#define VG_NOACCESS_READ(__ptr) ({ \ - VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ - __typeof(*(__ptr)) __val = *(__ptr); \ - VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ - __val; \ -}) -#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ - VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ - *(__ptr) = (__val); \ - VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ -}) -#else -#define VG_NOACCESS_READ(__ptr) (*(__ptr)) -#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) -#endif - -/* Design goals: - * - * - Lock free (except when resizing underlying bos) - * - * - Constant time allocation with typically only one atomic - * - * - Multiple allocation sizes without fragmentation - * - * - Can grow while keeping addresses and offset of contents stable - * - * - All allocations within one bo so we can point one of the - * STATE_BASE_ADDRESS pointers at it. - * - * The overall design is a two-level allocator: top level is a fixed size, big - * block (8k) allocator, which operates out of a bo. Allocation is done by - * either pulling a block from the free list or growing the used range of the - * bo. Growing the range may run out of space in the bo which we then need to - * grow. Growing the bo is tricky in a multi-threaded, lockless environment: - * we need to keep all pointers and contents in the old map valid. GEM bos in - * general can't grow, but we use a trick: we create a memfd and use ftruncate - * to grow it as necessary. We mmap the new size and then create a gem bo for - * it using the new gem userptr ioctl. Without heavy-handed locking around - * our allocation fast-path, there isn't really a way to munmap the old mmap, - * so we just keep it around until garbage collection time. While the block - * allocator is lockless for normal operations, we block other threads trying - * to allocate while we're growing the map. It sholdn't happen often, and - * growing is fast anyway. - * - * At the next level we can use various sub-allocators. The state pool is a - * pool of smaller, fixed size objects, which operates much like the block - * pool. It uses a free list for freeing objects, but when it runs out of - * space it just allocates a new block from the block pool. This allocator is - * intended for longer lived state objects such as SURFACE_STATE and most - * other persistent state objects in the API. We may need to track more info - * with these object and a pointer back to the CPU object (eg VkImage). In - * those cases we just allocate a slightly bigger object and put the extra - * state after the GPU state object. - * - * The state stream allocator works similar to how the i965 DRI driver streams - * all its state. Even with Vulkan, we need to emit transient state (whether - * surface state base or dynamic state base), and for that we can just get a - * block and fill it up. These cases are local to a command buffer and the - * sub-allocator need not be thread safe. The streaming allocator gets a new - * block when it runs out of space and chains them together so they can be - * easily freed. - */ - -/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. - * We use it to indicate the free list is empty. */ -#define EMPTY 1 - -struct anv_mmap_cleanup { - void *map; - size_t size; - uint32_t gem_handle; -}; - -#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) - -static inline long -sys_futex(void *addr1, int op, int val1, - struct timespec *timeout, void *addr2, int val3) -{ - return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); -} - -static inline int -futex_wake(uint32_t *addr, int count) -{ - return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); -} - -static inline int -futex_wait(uint32_t *addr, int32_t value) -{ - return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); -} - -static inline int -memfd_create(const char *name, unsigned int flags) -{ - return syscall(SYS_memfd_create, name, flags); -} - -static inline uint32_t -ilog2_round_up(uint32_t value) -{ - assert(value != 0); - return 32 - __builtin_clz(value - 1); -} - -static inline uint32_t -round_to_power_of_two(uint32_t value) -{ - return 1 << ilog2_round_up(value); -} - -static bool -anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) -{ - union anv_free_list current, next, old; - - current = *list; - while (current.offset != EMPTY) { - /* We have to add a memory barrier here so that the list head (and - * offset) gets read before we read the map pointer. This way we - * know that the map pointer is valid for the given offset at the - * point where we read it. - */ - __sync_synchronize(); - - uint32_t *next_ptr = *map + current.offset; - next.offset = VG_NOACCESS_READ(next_ptr); - next.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, next.u64); - if (old.u64 == current.u64) { - *offset = current.offset; - return true; - } - current = old; - } - - return false; -} - -static void -anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) -{ - union anv_free_list current, old, new; - uint32_t *next_ptr = map + offset; - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, current.offset); - new.offset = offset; - new.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); - } while (old.u64 != current.u64); -} - -/* All pointers in the ptr_free_list are assumed to be page-aligned. This - * means that the bottom 12 bits should all be zero. - */ -#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) -#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) -#define PFL_PACK(ptr, count) ({ \ - assert(((uintptr_t)(ptr) & 0xfff) == 0); \ - (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ -}) - -static bool -anv_ptr_free_list_pop(void **list, void **elem) -{ - void *current = *list; - while (PFL_PTR(current) != NULL) { - void **next_ptr = PFL_PTR(current); - void *new_ptr = VG_NOACCESS_READ(next_ptr); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(new_ptr, new_count); - void *old = __sync_val_compare_and_swap(list, current, new); - if (old == current) { - *elem = PFL_PTR(current); - return true; - } - current = old; - } - - return false; -} - -static void -anv_ptr_free_list_push(void **list, void *elem) -{ - void *old, *current; - void **next_ptr = elem; - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(elem, new_count); - old = __sync_val_compare_and_swap(list, current, new); - } while (old != current); -} - -static int -anv_block_pool_grow(struct anv_block_pool *pool); - -void -anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size) -{ - assert(is_power_of_two(block_size)); - - pool->device = device; - pool->bo.gem_handle = 0; - pool->bo.offset = 0; - pool->size = 0; - pool->block_size = block_size; - pool->next_block = 0; - pool->free_list = ANV_FREE_LIST_EMPTY; - anv_vector_init(&pool->mmap_cleanups, - round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); - - /* Immediately grow the pool so we'll have a backing bo. */ - anv_block_pool_grow(pool); -} - -void -anv_block_pool_finish(struct anv_block_pool *pool) -{ - struct anv_mmap_cleanup *cleanup; - - anv_vector_foreach(cleanup, &pool->mmap_cleanups) { - if (cleanup->map) - munmap(cleanup->map, cleanup->size); - if (cleanup->gem_handle) - anv_gem_close(pool->device, cleanup->gem_handle); - } - - anv_vector_finish(&pool->mmap_cleanups); - - close(pool->fd); -} - -static int -anv_block_pool_grow(struct anv_block_pool *pool) -{ - size_t size; - void *map; - int gem_handle; - struct anv_mmap_cleanup *cleanup; - - if (pool->size == 0) { - size = 32 * pool->block_size; - } else { - size = pool->size * 2; - } - - cleanup = anv_vector_add(&pool->mmap_cleanups); - if (!cleanup) - return -1; - *cleanup = ANV_MMAP_CLEANUP_INIT; - - if (pool->size == 0) - pool->fd = memfd_create("block pool", MFD_CLOEXEC); - - if (pool->fd == -1) - return -1; - - if (ftruncate(pool->fd, size) == -1) - return -1; - - /* First try to see if mremap can grow the map in place. */ - map = MAP_FAILED; - if (pool->size > 0) - map = mremap(pool->map, pool->size, size, 0); - if (map == MAP_FAILED) { - /* Just leak the old map until we destroy the pool. We can't munmap it - * without races or imposing locking on the block allocate fast path. On - * the whole the leaked maps adds up to less than the size of the - * current map. MAP_POPULATE seems like the right thing to do, but we - * should try to get some numbers. - */ - map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pool->fd, 0); - cleanup->map = map; - cleanup->size = size; - } - if (map == MAP_FAILED) - return -1; - - gem_handle = anv_gem_userptr(pool->device, map, size); - if (gem_handle == 0) - return -1; - cleanup->gem_handle = gem_handle; - - /* Now that we successfull allocated everything, we can write the new - * values back into pool. */ - pool->map = map; - pool->bo.gem_handle = gem_handle; - pool->bo.size = size; - pool->bo.map = map; - pool->bo.index = 0; - - /* Write size last and after the memory barrier here. We need the memory - * barrier to make sure map and gem_handle are written before other threads - * see the new size. A thread could allocate a block and then go try using - * the old pool->map and access out of bounds. */ - - __sync_synchronize(); - pool->size = size; - - return 0; -} - -uint32_t -anv_block_pool_alloc(struct anv_block_pool *pool) -{ - uint32_t offset, block, size; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { - assert(pool->map); - return offset; - } - - restart: - size = pool->size; - block = __sync_fetch_and_add(&pool->next_block, pool->block_size); - if (block < size) { - assert(pool->map); - return block; - } else if (block == size) { - /* We allocated the first block outside the pool, we have to grow it. - * pool->next_block acts a mutex: threads who try to allocate now will - * get block indexes above the current limit and hit futex_wait - * below. */ - int err = anv_block_pool_grow(pool); - assert(err == 0); - (void) err; - futex_wake(&pool->size, INT_MAX); - } else { - futex_wait(&pool->size, size); - __sync_fetch_and_add(&pool->next_block, -pool->block_size); - goto restart; - } - - return block; -} - -void -anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset) -{ - anv_free_list_push(&pool->free_list, pool->map, offset); -} - -static void -anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, - size_t state_size) -{ - /* At least a cache line and must divide the block size. */ - assert(state_size >= 64 && is_power_of_two(state_size)); - - pool->state_size = state_size; - pool->free_list = ANV_FREE_LIST_EMPTY; - pool->block.next = 0; - pool->block.end = 0; -} - -static uint32_t -anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, - struct anv_block_pool *block_pool) -{ - uint32_t offset; - struct anv_block_state block, old, new; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) - return offset; - - /* If free list was empty (or somebody raced us and took the items) we - * allocate a new item from the end of the block */ - restart: - block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); - - if (block.next < block.end) { - return block.next; - } else if (block.next == block.end) { - new.next = anv_block_pool_alloc(block_pool); - new.end = new.next + block_pool->block_size; - old.u64 = __sync_fetch_and_add(&pool->block.u64, new.u64 - block.u64); - if (old.next != block.next) - futex_wake(&pool->block.end, INT_MAX); - return new.next; - } else { - futex_wait(&pool->block.end, block.end); - __sync_fetch_and_add(&pool->block.u64, -pool->state_size); - goto restart; - } -} - -static void -anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, - struct anv_block_pool *block_pool, - uint32_t offset) -{ - anv_free_list_push(&pool->free_list, block_pool->map, offset); -} - -void -anv_state_pool_init(struct anv_state_pool *pool, - struct anv_block_pool *block_pool) -{ - pool->block_pool = block_pool; - for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { - size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); - anv_fixed_size_state_pool_init(&pool->buckets[i], size); - } -} - -struct anv_state -anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) -{ - unsigned size_log2 = ilog2_round_up(size < align ? align : size); - assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); - if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) - size_log2 = ANV_MIN_STATE_SIZE_LOG2; - unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - - struct anv_state state; - state.alloc_size = 1 << size_log2; - state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], - pool->block_pool); - state.map = pool->block_pool->map + state.offset; - VG(VALGRIND_MALLOCLIKE_BLOCK(state.map, size, 0, false)); - return state; -} - -void -anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) -{ - assert(is_power_of_two(state.alloc_size)); - unsigned size_log2 = ilog2_round_up(state.alloc_size); - assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && - size_log2 <= ANV_MAX_STATE_SIZE_LOG2); - unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - - VG(VALGRIND_FREELIKE_BLOCK(state.map, 0)); - anv_fixed_size_state_pool_free(&pool->buckets[bucket], - pool->block_pool, state.offset); -} - -#define NULL_BLOCK 1 -struct stream_block { - uint32_t next; - - /* The map for the BO at the time the block was givne to us */ - void *current_map; - -#ifdef HAVE_VALGRIND - void *_vg_ptr; -#endif -}; - -/* The state stream allocator is a one-shot, single threaded allocator for - * variable sized blocks. We use it for allocating dynamic state. - */ -void -anv_state_stream_init(struct anv_state_stream *stream, - struct anv_block_pool *block_pool) -{ - stream->block_pool = block_pool; - stream->next = 0; - stream->end = 0; - stream->current_block = NULL_BLOCK; -} - -void -anv_state_stream_finish(struct anv_state_stream *stream) -{ - struct stream_block *sb; - uint32_t block, next_block; - - block = stream->current_block; - while (block != NULL_BLOCK) { - sb = stream->block_pool->map + block; - next_block = VG_NOACCESS_READ(&sb->next); - VG(VALGRIND_FREELIKE_BLOCK(VG_NOACCESS_READ(&sb->_vg_ptr), 0)); - anv_block_pool_free(stream->block_pool, block); - block = next_block; - } -} - -struct anv_state -anv_state_stream_alloc(struct anv_state_stream *stream, - uint32_t size, uint32_t alignment) -{ - struct stream_block *sb; - struct anv_state state; - uint32_t block; - - state.offset = align_u32(stream->next, alignment); - if (state.offset + size > stream->end) { - block = anv_block_pool_alloc(stream->block_pool); - void *current_map = stream->block_pool->map; - sb = current_map + block; - VG_NOACCESS_WRITE(&sb->current_map, current_map); - VG_NOACCESS_WRITE(&sb->next, stream->current_block); - VG(VG_NOACCESS_WRITE(&sb->_vg_ptr, 0)); - stream->current_block = block; - stream->next = block + sizeof(*sb); - stream->end = block + stream->block_pool->block_size; - state.offset = align_u32(stream->next, alignment); - assert(state.offset + size <= stream->end); - } - - sb = stream->block_pool->map + stream->current_block; - void *current_map = VG_NOACCESS_READ(&sb->current_map); - - state.map = current_map + state.offset; - state.alloc_size = size; - -#ifdef HAVE_VALGRIND - void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); - if (vg_ptr == NULL) { - vg_ptr = state.map; - VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); - VALGRIND_MALLOCLIKE_BLOCK(vg_ptr, size, 0, false); - } else { - ptrdiff_t vg_offset = vg_ptr - current_map; - assert(vg_offset >= stream->current_block && - vg_offset < stream->end); - VALGRIND_RESIZEINPLACE_BLOCK(vg_ptr, - stream->next - vg_offset, - (state.offset + size) - vg_offset, - 0); - } -#endif - - stream->next = state.offset + size; - - return state; -} - -struct bo_pool_bo_link { - struct bo_pool_bo_link *next; - struct anv_bo bo; -}; - -void -anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t bo_size) -{ - pool->device = device; - pool->bo_size = bo_size; - pool->free_list = NULL; -} - -void -anv_bo_pool_finish(struct anv_bo_pool *pool) -{ - struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); - while (link != NULL) { - struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - - /* The anv_gem_m[un]map() functions are also valgrind-safe so they - * act as an alloc/free. In order to avoid a double-free warning, we - * need to mark thiss as malloc'd before we unmap it. - */ - VG(VALGRIND_MALLOCLIKE_BLOCK(link_copy.bo.map, pool->bo_size, 0, false)); - - anv_gem_munmap(link_copy.bo.map, pool->bo_size); - anv_gem_close(pool->device, link_copy.bo.gem_handle); - link = link_copy.next; - } -} - -VkResult -anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) -{ - VkResult result; - - void *next_free_void; - if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { - struct bo_pool_bo_link *next_free = next_free_void; - *bo = VG_NOACCESS_READ(&next_free->bo); - assert(bo->map == next_free); - assert(bo->size == pool->bo_size); - - VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, pool->bo_size, 0, false)); - - return VK_SUCCESS; - } - - struct anv_bo new_bo; - - result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); - if (result != VK_SUCCESS) - return result; - - assert(new_bo.size == pool->bo_size); - - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size); - if (new_bo.map == NULL) { - anv_gem_close(pool->device, new_bo.gem_handle); - return vk_error(VK_ERROR_MEMORY_MAP_FAILED); - } - - /* We don't need to call VALGRIND_MALLOCLIKE_BLOCK here because gem_mmap - * calls it for us. If we really want to be pedantic we could do a - * VALGRIND_FREELIKE_BLOCK right after the mmap, but there's no good - * reason. - */ - - *bo = new_bo; - return VK_SUCCESS; -} - -void -anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) -{ - struct bo_pool_bo_link *link = bo->map; - link->bo = *bo; - - VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); - anv_ptr_free_list_push(&pool->free_list, link); -} diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c new file mode 100644 index 00000000000..d85b919154d --- /dev/null +++ b/src/vulkan/anv_allocator.c @@ -0,0 +1,665 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#ifdef HAVE_VALGRIND +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) +{ + union anv_free_list current, next, old; + + current = *list; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + uint32_t *next_ptr = *map + current.offset; + next.offset = VG_NOACCESS_READ(next_ptr); + next.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, next.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) +{ + union anv_free_list current, old, new; + uint32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, current.offset); + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + +static int +anv_block_pool_grow(struct anv_block_pool *pool); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->size = 0; + pool->block_size = block_size; + pool->next_block = 0; + pool->free_list = ANV_FREE_LIST_EMPTY; + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + /* Immediately grow the pool so we'll have a backing bo. */ + anv_block_pool_grow(pool); +} + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + + close(pool->fd); +} + +static int +anv_block_pool_grow(struct anv_block_pool *pool) +{ + size_t size; + void *map; + int gem_handle; + struct anv_mmap_cleanup *cleanup; + + if (pool->size == 0) { + size = 32 * pool->block_size; + } else { + size = pool->size * 2; + } + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + return -1; + *cleanup = ANV_MMAP_CLEANUP_INIT; + + if (pool->size == 0) + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + + if (pool->fd == -1) + return -1; + + if (ftruncate(pool->fd, size) == -1) + return -1; + + /* First try to see if mremap can grow the map in place. */ + map = MAP_FAILED; + if (pool->size > 0) + map = mremap(pool->map, pool->size, size, 0); + if (map == MAP_FAILED) { + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, 0); + cleanup->map = map; + cleanup->size = size; + } + if (map == MAP_FAILED) + return -1; + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + return -1; + cleanup->gem_handle = gem_handle; + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + + /* Write size last and after the memory barrier here. We need the memory + * barrier to make sure map and gem_handle are written before other threads + * see the new size. A thread could allocate a block and then go try using + * the old pool->map and access out of bounds. */ + + __sync_synchronize(); + pool->size = size; + + return 0; +} + +uint32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + uint32_t offset, block, size; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(pool->map); + return offset; + } + + restart: + size = pool->size; + block = __sync_fetch_and_add(&pool->next_block, pool->block_size); + if (block < size) { + assert(pool->map); + return block; + } else if (block == size) { + /* We allocated the first block outside the pool, we have to grow it. + * pool->next_block acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + int err = anv_block_pool_grow(pool); + assert(err == 0); + (void) err; + futex_wake(&pool->size, INT_MAX); + } else { + futex_wait(&pool->size, size); + __sync_fetch_and_add(&pool->next_block, -pool->block_size); + goto restart; + } + + return block; +} + +void +anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset) +{ + anv_free_list_push(&pool->free_list, pool->map, offset); +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + uint32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) + return offset; + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + new.next = anv_block_pool_alloc(block_pool); + new.end = new.next + block_pool->block_size; + old.u64 = __sync_fetch_and_add(&pool->block.u64, new.u64 - block.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return new.next; + } else { + futex_wait(&pool->block.end, block.end); + __sync_fetch_and_add(&pool->block.u64, -pool->state_size); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MALLOCLIKE_BLOCK(state.map, size, 0, false)); + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + VG(VALGRIND_FREELIKE_BLOCK(state.map, 0)); + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct stream_block { + uint32_t next; + + /* The map for the BO at the time the block was givne to us */ + void *current_map; + +#ifdef HAVE_VALGRIND + void *_vg_ptr; +#endif +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->next = 0; + stream->end = 0; + stream->current_block = NULL_BLOCK; +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + struct stream_block *sb; + uint32_t block, next_block; + + block = stream->current_block; + while (block != NULL_BLOCK) { + sb = stream->block_pool->map + block; + next_block = VG_NOACCESS_READ(&sb->next); + VG(VALGRIND_FREELIKE_BLOCK(VG_NOACCESS_READ(&sb->_vg_ptr), 0)); + anv_block_pool_free(stream->block_pool, block); + block = next_block; + } +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct stream_block *sb; + struct anv_state state; + uint32_t block; + + state.offset = align_u32(stream->next, alignment); + if (state.offset + size > stream->end) { + block = anv_block_pool_alloc(stream->block_pool); + void *current_map = stream->block_pool->map; + sb = current_map + block; + VG_NOACCESS_WRITE(&sb->current_map, current_map); + VG_NOACCESS_WRITE(&sb->next, stream->current_block); + VG(VG_NOACCESS_WRITE(&sb->_vg_ptr, 0)); + stream->current_block = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + state.offset = align_u32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + sb = stream->block_pool->map + stream->current_block; + void *current_map = VG_NOACCESS_READ(&sb->current_map); + + state.map = current_map + state.offset; + state.alloc_size = size; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VALGRIND_MALLOCLIKE_BLOCK(vg_ptr, size, 0, false); + } else { + ptrdiff_t vg_offset = vg_ptr - current_map; + assert(vg_offset >= stream->current_block && + vg_offset < stream->end); + VALGRIND_RESIZEINPLACE_BLOCK(vg_ptr, + stream->next - vg_offset, + (state.offset + size) - vg_offset, + 0); + } +#endif + + stream->next = state.offset + size; + + return state; +} + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + /* The anv_gem_m[un]map() functions are also valgrind-safe so they + * act as an alloc/free. In order to avoid a double-free warning, we + * need to mark thiss as malloc'd before we unmap it. + */ + VG(VALGRIND_MALLOCLIKE_BLOCK(link_copy.bo.map, pool->bo_size, 0, false)); + + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, pool->bo_size, 0, false)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + /* We don't need to call VALGRIND_MALLOCLIKE_BLOCK here because gem_mmap + * calls it for us. If we really want to be pedantic we could do a + * VALGRIND_FREELIKE_BLOCK right after the mmap, but there's no good + * reason. + */ + + *bo = new_bo; + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/vulkan/anv_aub.c b/src/vulkan/anv_aub.c new file mode 100644 index 00000000000..97f124a0aad --- /dev/null +++ b/src/vulkan/anv_aub.c @@ -0,0 +1,310 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "anv_private.h" +#include "anv_aub.h" + +struct anv_aub_writer { + FILE *file; + uint32_t offset; + int gen; +}; + +static void +aub_out(struct anv_aub_writer *writer, uint32_t data) +{ + fwrite(&data, 1, 4, writer->file); +} + +static void +aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) +{ + fwrite(data, 1, size, writer->file); +} + +static struct anv_aub_writer * +get_anv_aub_writer(struct anv_device *device) +{ + struct anv_aub_writer *writer = device->aub_writer; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + const char *filename; + + if (geteuid() != getuid()) + return NULL; + + if (writer) + return writer; + + writer = malloc(sizeof(*writer)); + if (writer == NULL) + return NULL; + + filename = "intel.aub"; + writer->gen = device->info.gen; + writer->file = fopen(filename, "w+"); + if (!writer->file) { + free(writer); + return NULL; + } + + /* Start allocating objects from just after the GTT. */ + writer->offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(writer, CMD_AUB_HEADER | (13 - 2)); + aub_out(writer, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(writer, 0); /* app name */ + } + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT_ENTRY | + AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, 0); /* subtype */ + aub_out(writer, 0); /* offset */ + aub_out(writer, gtt_size); /* size */ + if (writer->gen >= 8) + aub_out(writer, 0); + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(writer, entry); + } + + return device->aub_writer = writer; +} + +void +anv_aub_writer_destroy(struct anv_aub_writer *writer) +{ + fclose(writer->file); + free(writer); +} + + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, + void *virtual, uint32_t size, uint32_t gtt_offset) +{ + uint32_t block_size; + uint32_t offset; + uint32_t subtype = 0; + static const char null_block[8 * 4096]; + + for (offset = 0; offset < size; offset += block_size) { + block_size = size - offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | + type | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, subtype); + aub_out(writer, gtt_offset + offset); + aub_out(writer, align_u32(block_size, 4)); + if (writer->gen >= 8) + aub_out(writer, 0); + + if (virtual) + aub_out_data(writer, (char *) virtual + offset, block_size); + else + aub_out_data(writer, null_block, block_size); + + /* Pad to a multiple of 4 bytes. */ + aub_out_data(writer, null_block, -block_size & 3); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(struct anv_aub_writer *writer, + uint32_t batch_offset, uint32_t offset, + int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + else if (ring_flag == I915_EXEC_BLT) + ring = AUB_TRACE_TYPE_RING_PRB2; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + if (writer->gen >= 8) { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); + ringbuffer[ring_count++] = batch_offset; + ringbuffer[ring_count++] = 0; + } else { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_offset; + } + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(writer, 0); /* general/surface subtype */ + aub_out(writer, offset); + aub_out(writer, ring_count * 4); + if (writer->gen >= 8) + aub_out(writer, 0); + + /* FIXME: Need some flush operations here? */ + aub_out_data(writer, ringbuffer, ring_count * 4); +} + +struct aub_bo { + uint32_t offset; + void *map; + void *relocated; +}; + +static void +relocate_bo(struct anv_bo *bo, struct drm_i915_gem_relocation_entry *relocs, + size_t num_relocs, struct aub_bo *bos) +{ + struct aub_bo *aub_bo = &bos[bo->index]; + struct drm_i915_gem_relocation_entry *reloc; + uint32_t *dw; + + aub_bo->relocated = malloc(bo->size); + memcpy(aub_bo->relocated, aub_bo->map, bo->size); + for (size_t i = 0; i < num_relocs; i++) { + reloc = &relocs[i]; + assert(reloc->offset < bo->size); + dw = aub_bo->relocated + reloc->offset; + *dw = bos[reloc->target_handle].offset + reloc->delta; + } +} + +void +anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_aub_writer *writer; + struct anv_bo *bo; + uint32_t ring_flag = 0; + uint32_t offset; + struct aub_bo *aub_bos; + + writer = get_anv_aub_writer(device); + if (writer == NULL) + return; + + aub_bos = malloc(cmd_buffer->exec2_bo_count * sizeof(aub_bos[0])); + offset = writer->offset; + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (bo->map) + aub_bos[i].map = bo->map; + else + aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); + aub_bos[i].relocated = aub_bos[i].map; + aub_bos[i].offset = offset; + offset = align_u32(offset + bo->size + 4095, 4096); + } + + struct anv_batch_bo *first_bbo; + for (struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + /* Keep stashing the current BO until we get to the beginning */ + first_bbo = bbo; + + /* Handle relocations for this batch BO */ + relocate_bo(&bbo->bo, &batch->relocs.relocs[bbo->first_reloc], + bbo->num_relocs, aub_bos); + } + assert(first_bbo->prev_batch_bo == NULL); + + for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; + bbo != NULL; bbo = bbo->prev_batch_bo) { + + /* Handle relocations for this surface state BO */ + relocate_bo(&bbo->bo, + &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], + bbo->num_relocs, aub_bos); + } + + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (i == cmd_buffer->exec2_bo_count - 1) { + assert(bo == &first_bbo->bo); + aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, + aub_bos[i].relocated, + first_bbo->length, aub_bos[i].offset); + } else { + aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, + aub_bos[i].relocated, + bo->size, aub_bos[i].offset); + } + if (aub_bos[i].relocated != aub_bos[i].map) + free(aub_bos[i].relocated); + if (aub_bos[i].map != bo->map) + anv_gem_munmap(aub_bos[i].map, bo->size); + } + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(writer, aub_bos[first_bbo->bo.index].offset, + offset, ring_flag); + + free(aub_bos); + + fflush(writer->file); +} diff --git a/src/vulkan/anv_aub.h b/src/vulkan/anv_aub.h new file mode 100644 index 00000000000..7a67712ff9c --- /dev/null +++ b/src/vulkan/anv_aub.h @@ -0,0 +1,153 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ + +/** + * aub_state_struct_type enum values are encoded with the top 16 bits + * representing the type to be delivered to the .aub file, and the bottom 16 + * bits representing the subtype. This macro performs the encoding. + */ +#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) + +enum aub_state_struct_type { + AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), + AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), + AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), + AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), + AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), + AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), + AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), + AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), + AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), + AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), + AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), + AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), + AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), + + AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), + AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), + AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), + + AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), + AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), + AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), + AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), + AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), +}; + +#undef ENCODE_SS_TYPE + +/** + * Decode a aub_state_struct_type value to determine the type that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) +{ + return (ss_type & 0xFFFF0000) >> 16; +} + +/** + * Decode a state_struct_type value to determine the subtype that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) +{ + return ss_type & 0xFFFF; +} + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 0d24d0bda02..4d4dfa9fb53 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -27,7 +27,7 @@ #include #include -#include "private.h" +#include "anv_private.h" /** \file anv_cmd_buffer.c * diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 6f8788c3481..8654c4a0ac7 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -27,7 +27,7 @@ #include #include -#include "private.h" +#include "anv_private.h" /** \file anv_cmd_buffer.c * diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp new file mode 100644 index 00000000000..a50ecfde517 --- /dev/null +++ b/src/vulkan/anv_compiler.cpp @@ -0,0 +1,1209 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "anv_private.h" + +#include +#include /* brw_new_shader_program is here */ +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* XXX: We need this to keep symbols in nir.h from conflicting with the + * generated GEN command packing headers. We need to fix *both* to not + * define something as generic as LOAD. + */ +#undef LOAD + +#include + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static VkResult +set_binding_table_layout(struct brw_stage_prog_data *prog_data, + struct anv_pipeline *pipeline, uint32_t stage) +{ + uint32_t bias, count, k, *map; + struct anv_pipeline_layout *layout = pipeline->layout; + + /* No layout is valid for shaders that don't bind any resources. */ + if (pipeline->layout == NULL) + return VK_SUCCESS; + + if (stage == VK_SHADER_STAGE_FRAGMENT) + bias = MAX_RTS; + else + bias = 0; + + count = layout->stage[stage].surface_count; + prog_data->map_entries = + (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); + if (prog_data->map_entries == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + k = bias; + map = prog_data->map_entries; + for (uint32_t i = 0; i < layout->num_sets; i++) { + prog_data->bind_map[i].index = map; + for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) + *map++ = k++; + + prog_data->bind_map[i].index_count = + layout->set[i].layout->stage[stage].surface_count; + } + + return VK_SUCCESS; +} + +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_program *prog = (struct gl_program *) vp; + + memset(key, 0, sizeof(*key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key->base.program_string_id = vp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + vp->program.Base.UsesClipDistanceOut); + + /* _NEW_POLYGON */ + if (brw->gen < 6) { + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + } + + if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | + VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { + /* _NEW_LIGHT | _NEW_BUFFERS */ + key->clamp_vertex_color = ctx->Light._ClampVertexColor; + } + + /* _NEW_POINT */ + if (brw->gen < 6 && ctx->Point.PointSprite) { + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key->point_coord_replace |= (1 << i); + } + } + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, + &key->base.tex); +} + +static bool +really_do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_compile c; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base; + void *mem_ctx; + struct gl_shader *vs = NULL; + + if (prog) + vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + memset(prog_data, 0, sizeof(*prog_data)); + + mem_ctx = ralloc_context(NULL); + + c.vp = vp; + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (vs) { + /* We add padding around uniform values below vec4 size, with the worst + * case being a float value that gets blown up to a vec4, so be + * conservative here. + */ + param_count = vs->num_uniform_components * 4; + + } else { + param_count = vp->program.Base.Parameters->NumParameters * 4; + } + /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip + * planes as uniforms. + */ + param_count += c.key.base.nr_userclip_plane_consts * 4; + + /* Setting nr_params here NOT to the size of the param and pull_param + * arrays, but to the number of uniform components vec4_visitor + * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. + */ + stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; + if (vs) { + stage_prog_data->nr_params += vs->num_samplers; + } + + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; + prog_data->inputs_read = vp->program.Base.InputsRead; + + if (c.key.copy_edgeflag) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + prog_data->inputs_read |= VERT_BIT_EDGEFLAG; + } + + if (brw->gen < 6) { + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (c.key.point_coord_replace & (1 << i)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + /* In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ + if (c.key.base.userclip_active) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + brw_compute_vue_map(brw->intelScreen->devinfo, + &prog_data->base.vue_map, outputs_written); +\ + set_binding_table_layout(&prog_data->base.base, pipeline, + VK_SHADER_STAGE_VERTEX); + + /* Emit GEN4 code. + */ + program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + struct anv_state vs_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(vs_state.map, program, program_size); + + pipeline->vs_simd8 = vs_state.offset; + + ralloc_free(mem_ctx); + + return true; +} + +void brw_wm_populate_key(struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct gl_program *prog = (struct gl_program *) brw->fragment_program; + GLuint lookup = 0; + GLuint line_aa; + bool program_uses_dfdy = fp->program.UsesDFdy; + struct gl_framebuffer draw_buffer; + bool multisample_fbo; + + memset(key, 0, sizeof(*key)); + + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. */ + key->tex.swizzles[i] = SWIZZLE_XYZW; + } + + /* A non-zero framebuffer name indicates that the framebuffer was created by + * the user rather than the window system. */ + draw_buffer.Name = 1; + draw_buffer.Visual.samples = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer.Width = 400; + draw_buffer.Height = 400; + ctx->DrawBuffer = &draw_buffer; + + multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + + /* Build the index for table lookup + */ + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + key->line_aa = line_aa; + + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, + &key->tex); + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. + */ + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + } + + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } + + /* The unique fragment program ID */ + key->program_string_id = fp->id; + + ctx->DrawBuffer = NULL; +} + +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) +{ + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + +static bool +really_do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct gl_shader *fs = NULL; + unsigned int program_size; + const uint32_t *program; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + memset(prog_data, 0, sizeof(*prog_data)); + + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. + */ + prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; + + prog_data->computed_depth_mode = computed_depth_mode(&fp->program); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (fs) { + param_count = fs->num_uniform_components; + } else { + param_count = fp->program.Base.Parameters->NumParameters * 4; + } + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + prog_data->base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.nr_params = param_count; + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw, key->flat_shade, + key->persample_shading, + &fp->program); + + set_binding_table_layout(&prog_data->base, pipeline, + VK_SHADER_STAGE_FRAGMENT); + /* This needs to come after shader time and pull constant entries, but we + * don't have those set up now, so just put it after the layout entries. + */ + prog_data->binding_table.render_target_start = 0; + + program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + struct anv_state ps_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(ps_state.map, program, program_size); + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = ps_state.offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = ps_state.offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_gs_populate_key(struct brw_context *brw, + struct anv_pipeline *pipeline, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_stage_state *stage_state = &brw->gs.base; + struct gl_program *prog = &gp->program.Base; + + memset(key, 0, sizeof(*key)); + + key->base.program_string_id = gp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + gp->program.Base.UsesClipDistanceOut); + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, + &key->base.tex); + + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + + /* BRW_NEW_VUE_MAP_VS */ + key->input_varyings = prog_data->base.vue_map.slots_valid; +} + +static bool +really_do_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct brw_gs_compile_output output; + + /* FIXME: We pass the bind map to the compile in the output struct. Need + * something better. */ + set_binding_table_layout(&output.prog_data.base.base, + pipeline, VK_SHADER_STAGE_GEOMETRY); + + brw_compile_gs_prog(brw, prog, gp, key, &output); + + struct anv_state gs_state = anv_state_stream_alloc(&pipeline->program_stream, + output.program_size, 64); + memcpy(gs_state.map, output.program, output.program_size); + + pipeline->gs_vec4 = gs_state.offset; + pipeline->gs_vertex_count = gp->program.VerticesIn; + + ralloc_free(output.mem_ctx); + + return true; +} + +static bool +brw_codegen_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(prog_data, 0, sizeof(*prog_data)); + + set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count = cs->num_uniform_components; + + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; + prog_data->base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.nr_params = param_count; + + program = brw_cs_emit(brw, mem_ctx, key, prog_data, + &cp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(cs_state.map, program, program_size); + + pipeline->cs_simd = cs_state.offset; + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_cs_populate_key(struct brw_context *brw, + struct brw_compute_program *bcp, struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = bcp->id; +} + +static void +fail_on_compile_error(int status, const char *msg) +{ + int source, line, column; + char error[256]; + + if (status) + return; + + if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4) + fail_if(!status, "%d:%s\n", line, error); + else + fail_if(!status, "%s\n", msg); +} + +struct anv_compiler { + struct anv_device *device; + struct intel_screen *screen; + struct brw_context *brw; + struct gl_pipeline_object pipeline; +}; + +extern "C" { + +struct anv_compiler * +anv_compiler_create(struct anv_device *device) +{ + const struct brw_device_info *devinfo = &device->info; + struct anv_compiler *compiler; + struct gl_context *ctx; + + compiler = rzalloc(NULL, struct anv_compiler); + if (compiler == NULL) + return NULL; + + compiler->screen = rzalloc(compiler, struct intel_screen); + if (compiler->screen == NULL) + goto fail; + + compiler->brw = rzalloc(compiler, struct brw_context); + if (compiler->brw == NULL) + goto fail; + + compiler->device = device; + + compiler->brw->optionCache.info = NULL; + compiler->brw->bufmgr = NULL; + compiler->brw->gen = devinfo->gen; + compiler->brw->is_g4x = devinfo->is_g4x; + compiler->brw->is_baytrail = devinfo->is_baytrail; + compiler->brw->is_haswell = devinfo->is_haswell; + compiler->brw->is_cherryview = devinfo->is_cherryview; + + /* We need this at least for CS, which will check brw->max_cs_threads + * against the work group size. */ + compiler->brw->max_vs_threads = devinfo->max_vs_threads; + compiler->brw->max_hs_threads = devinfo->max_hs_threads; + compiler->brw->max_ds_threads = devinfo->max_ds_threads; + compiler->brw->max_gs_threads = devinfo->max_gs_threads; + compiler->brw->max_wm_threads = devinfo->max_wm_threads; + compiler->brw->max_cs_threads = devinfo->max_cs_threads; + compiler->brw->urb.size = devinfo->urb.size; + compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; + compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; + compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + + compiler->brw->intelScreen = compiler->screen; + compiler->screen->devinfo = &device->info; + + brw_process_intel_debug_variable(compiler->screen); + + compiler->screen->compiler = brw_compiler_create(compiler, &device->info); + + ctx = &compiler->brw->ctx; + _mesa_init_shader_object_functions(&ctx->Driver); + + _mesa_init_constants(&ctx->Const, API_OPENGL_CORE); + + brw_initialize_context_constants(compiler->brw); + + intelInitExtensions(ctx); + + /* Set dd::NewShader */ + brwInitFragProgFuncs(&ctx->Driver); + + ctx->_Shader = &compiler->pipeline; + + compiler->brw->precompile = false; + + return compiler; + + fail: + ralloc_free(compiler); + return NULL; +} + +void +anv_compiler_destroy(struct anv_compiler *compiler) +{ + _mesa_free_errors_data(&compiler->brw->ctx); + ralloc_free(compiler); +} + +/* From gen7_urb.c */ + +/* FIXME: Add to struct intel_device_info */ + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->vs_simd8 != NO_KERNEL; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->gs_vec4 != NO_KERNEL; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static const struct { + uint32_t token; + gl_shader_stage stage; + const char *name; +} stage_info[] = { + { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, + { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, + { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, + { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, + { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, + { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, +}; + +struct spirv_header{ + uint32_t magic; + uint32_t version; + uint32_t gen_magic; +}; + +static const char * +src_as_glsl(const char *data) +{ + const struct spirv_header *as_spirv = (const struct spirv_header *)data; + + /* Check alignment */ + if ((intptr_t)data & 0x3) { + return data; + } + + if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) { + /* LunarG back-door */ + if (as_spirv->version == 0) + return data + 12; + else + return NULL; + } else { + return data; + } +} + +static void +anv_compile_shader_glsl(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct gl_shader *shader; + int name = 0; + + shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); + + shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data)); + _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); + fail_on_compile_error(shader->CompileStatus, shader->InfoLog); + + program->Shaders[program->NumShaders] = shader; + program->NumShaders++; +} + +static void +setup_nir_io(struct gl_program *prog, + nir_shader *shader) +{ + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + prog->InputsRead |= BITFIELD64_BIT(var->data.location); + } + + foreach_list_typed(nir_variable, var, node, &shader->outputs) { + prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); + } +} + +static void +anv_compile_shader_spirv(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct anv_shader *shader = pipeline->shaders[stage]; + struct gl_shader *mesa_shader; + int name = 0; + + mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(mesa_shader == NULL, + "failed to create %s shader\n", stage_info[stage].name); + + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base; + break; + case VK_SHADER_STAGE_GEOMETRY: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base; + break; + case VK_SHADER_STAGE_FRAGMENT: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base; + break; + case VK_SHADER_STAGE_COMPUTE: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base; + break; + } + + mesa_shader->Program->Parameters = + rzalloc(mesa_shader, struct gl_program_parameter_list); + + mesa_shader->Type = stage_info[stage].token; + mesa_shader->Stage = stage_info[stage].stage; + + assert(shader->module->size % 4 == 0); + + struct gl_shader_compiler_options *glsl_options = + &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; + + mesa_shader->Program->nir = + spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4, + glsl_options->NirOptions); + nir_validate_shader(mesa_shader->Program->nir); + + brw_process_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, + NULL, mesa_shader->Stage); + + setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir); + + fail_if(mesa_shader->Program->nir == NULL, + "failed to translate SPIR-V to NIR\n"); + + program->Shaders[program->NumShaders] = mesa_shader; + program->NumShaders++; +} + +static void +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +int +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) +{ + struct gl_shader_program *program; + int name = 0; + struct brw_context *brw = compiler->brw; + + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + brw->use_rep_send = pipeline->use_repclear; + brw->no_simd8 = pipeline->use_repclear; + + program = brw->ctx.Driver.NewShaderProgram(name); + program->Shaders = (struct gl_shader **) + calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); + fail_if(program == NULL || program->Shaders == NULL, + "failed to create program\n"); + + bool all_spirv = true; + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i] == NULL) + continue; + + /* You need at least this much for "void main() { }" anyway */ + assert(pipeline->shaders[i]->module->size >= 12); + + if (src_as_glsl(pipeline->shaders[i]->module->data)) { + all_spirv = false; + break; + } + + assert(pipeline->shaders[i]->module->size % 4 == 0); + } + + if (all_spirv) { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_spirv(compiler, program, pipeline, i); + } + + for (unsigned i = 0; i < program->NumShaders; i++) { + struct gl_shader *shader = program->Shaders[i]; + program->_LinkedShaders[shader->Stage] = shader; + } + } else { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_glsl(compiler, program, pipeline, i); + } + + _mesa_glsl_link_shader(&brw->ctx, program); + fail_on_compile_error(program->LinkStatus, + program->InfoLog); + } + + bool success; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &pipeline->vs_prog_data.base.base); + + if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + pipeline->vs_simd8 = NO_KERNEL; + } + + + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { + struct brw_gs_prog_key gs_key; + struct gl_geometry_program *gp = (struct gl_geometry_program *) + program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; + struct brw_geometry_program *bgp = brw_geometry_program(gp); + + brw_gs_populate_key(brw, pipeline, bgp, &gs_key); + + success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); + fail_if(!success, "do_gs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &pipeline->gs_prog_data.base.base); + + if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + pipeline->gs_vec4 = NO_KERNEL; + } + + if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &pipeline->wm_prog_data.base); + } + + if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { + struct brw_cs_prog_key cs_key; + struct gl_compute_program *cp = (struct gl_compute_program *) + program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; + struct brw_compute_program *bcp = brw_compute_program(cp); + + brw_cs_populate_key(brw, bcp, &cs_key); + + success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); + fail_if(!success, "brw_codegen_cs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &pipeline->cs_prog_data.base); + } + + /* XXX: Deleting the shader is broken with our current SPIR-V hacks. We + * need to fix this ASAP. + */ + if (!all_spirv) + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + + struct anv_device *device = compiler->device; + while (device->scratch_block_pool.bo.size < pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + gen7_compute_urb_partition(pipeline); + + return 0; +} + +/* This badly named function frees the struct anv_pipeline data that the compiler + * allocates. Currently just the prog_data structs. + */ +void +anv_compiler_free(struct anv_pipeline *pipeline) +{ + for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { + if (pipeline->prog_data[stage]) { + free(pipeline->prog_data[stage]->map_entries); + ralloc_free(pipeline->prog_data[stage]->param); + ralloc_free(pipeline->prog_data[stage]->pull_param); + } + } +} + +} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c new file mode 100644 index 00000000000..1847303a1cb --- /dev/null +++ b/src/vulkan/anv_device.c @@ -0,0 +1,2390 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "mesa/main/git_sha1.h" + +static int +anv_env_get_int(const char *name) +{ + const char *val = getenv(name); + + if (!val) + return 0; + + return strtol(val, NULL, 0); +} + +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + if (device->fd >= 0) + close(device->fd); +} + +static VkResult +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + device->fd = open(path, O_RDWR | O_CLOEXEC); + if (device->fd < 0) + return vk_error(VK_ERROR_UNAVAILABLE); + + device->instance = instance; + device->path = path; + + device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE"); + device->no_hw = false; + if (device->chipset_id) { + /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ + device->no_hw = true; + } else { + device->chipset_id = anv_gem_get_param(device->fd, I915_PARAM_CHIPSET_ID); + } + if (!device->chipset_id) + goto fail; + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id, -1); + if (!device->info) + goto fail; + + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + goto fail; + + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXECBUF2)) + goto fail; + + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_LLC)) + goto fail; + + if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + goto fail; + + return VK_SUCCESS; + +fail: + anv_physical_device_finish(device); + return vk_error(VK_ERROR_UNAVAILABLE); +} + +static void *default_alloc( + void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return malloc(size); +} + +static void default_free( + void* pUserData, + void* pMem) +{ + free(pMem); +} + +static const VkAllocCallbacks default_alloc_callbacks = { + .pUserData = NULL, + .pfnAlloc = default_alloc, + .pfnFree = default_free +}; + +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + VkInstance* pInstance) +{ + struct anv_instance *instance; + const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; + void *user_data = NULL; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + if (pCreateInfo->pAllocCb) { + alloc_callbacks = pCreateInfo->pAllocCb; + user_data = pCreateInfo->pAllocCb->pUserData; + } + instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->pAllocUserData = alloc_callbacks->pUserData; + instance->pfnAlloc = alloc_callbacks->pfnAlloc; + instance->pfnFree = alloc_callbacks->pfnFree; + instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; + instance->physicalDeviceCount = 0; + + *pInstance = anv_instance_to_handle(instance); + + return VK_SUCCESS; +} + +VkResult anv_DestroyInstance( + VkInstance _instance) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + if (instance->physicalDeviceCount > 0) { + anv_physical_device_finish(&instance->physicalDevice); + } + + instance->pfnFree(instance->pAllocUserData, instance); + + return VK_SUCCESS; +} + +VkResult anv_EnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + VkResult result; + + if (instance->physicalDeviceCount == 0) { + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result != VK_SUCCESS) + return result; + + instance->physicalDeviceCount = 1; + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + anv_finishme("Get correct values for PhysicalDeviceFeatures"); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = false, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSourceBlend = true, + .logicOp = true, + .instancedDrawIndirect = true, + .depthClip = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .pipelineStatisticsQuery = true, + .vertexSideEffects = false, + .tessellationSideEffects = false, + .geometrySideEffects = false, + .fragmentSideEffects = false, + .shaderTessellationPointSize = false, + .shaderGeometryPointSize = true, + .shaderTextureGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageBufferArrayConstantIndexing = false, + .shaderStorageImageArrayConstantIndexing = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderFloat16 = false, + .shaderInt16 = false, + }; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceLimits( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceLimits* pLimits) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct brw_device_info *devinfo = physical_device->info; + + anv_finishme("Get correct values for PhysicalDeviceLimits"); + + *pLimits = (VkPhysicalDeviceLimits) { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferSize = (1 << 14), + .maxUniformBufferSize = UINT32_MAX, + .maxStorageBufferSize = UINT32_MAX, + .maxPushConstantsSize = 128, + .maxMemoryAllocationCount = UINT32_MAX, + .bufferImageGranularity = 64, /* A cache line */ + .maxBoundDescriptorSets = MAX_SETS, + .maxDescriptorSets = UINT32_MAX, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputAttributeOffset = 256, + .maxVertexInputBindingStride = 256, + .maxVertexOutputComponents = 32, + .maxTessGenLevel = 0, + .maxTessPatchSize = 0, + .maxTessControlPerVertexInputComponents = 0, + .maxTessControlPerVertexOutputComponents = 0, + .maxTessControlPerPatchOutputComponents = 0, + .maxTessControlTotalOutputComponents = 0, + .maxTessEvaluationInputComponents = 0, + .maxTessEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 6, + .maxGeometryInputComponents = 16, + .maxGeometryOutputComponents = 16, + .maxGeometryOutputVertices = 16, + .maxGeometryTotalOutputComponents = 16, + .maxFragmentInputComponents = 16, + .maxFragmentOutputBuffers = 8, + .maxFragmentDualSourceBuffers = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 1024, + .maxComputeWorkGroupCount = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectInstanceCount = UINT32_MAX, + .primitiveRestartForPatches = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = 16, + .maxDynamicViewportStates = UINT32_MAX, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 64, /* A cache line */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = 0, /* FIXME */ + .maxTexelOffset = 0, /* FIXME */ + .minTexelGatherOffset = 0, /* FIXME */ + .maxTexelGatherOffset = 0, /* FIXME */ + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .maxFramebufferColorSamples = 8, + .maxFramebufferDepthSamples = 8, + .maxFramebufferStencilSamples = 8, + .maxColorAttachments = MAX_RTS, + .maxSampledImageColorSamples = 8, + .maxSampledImageDepthSamples = 8, + .maxSampledImageIntegerSamples = 1, + .maxStorageImageSamples = 1, + .maxSampleMaskWords = 1, + .timestampFrequency = 1000 * 1000 * 1000 / 80, + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + }; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = 1, + .driverVersion = 1, + .vendorId = 0x8086, + .deviceId = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + }; + + strcpy(pProperties->deviceName, pdevice->name); + snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH, + "anv-%s", MESA_GIT_SHA1 + 4); + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceQueueCount( + VkPhysicalDevice physicalDevice, + uint32_t* pCount) +{ + *pCount = 1; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceQueueProperties( + VkPhysicalDevice physicalDevice, + uint32_t count, + VkPhysicalDeviceQueueProperties* pQueueProperties) +{ + assert(count == 1); + + *pQueueProperties = (VkPhysicalDeviceQueueProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_DMA_BIT, + .queueCount = 1, + .supportsTimestamps = true, + }; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + size_t aperture_size; + size_t heap_size; + + if (anv_gem_get_aperture(physical_device, &aperture_size) == -1) + return vk_error(VK_ERROR_UNAVAILABLE); + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * aperture_size / 4; + + /* The property flags below are valid only for llc platforms. */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + .heapIndex = 1, + }; + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_HOST_LOCAL, + }; + + return VK_SUCCESS; +} + +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +static void +parse_debug_flags(struct anv_device *device) +{ + const char *debug, *p, *end; + + debug = getenv("INTEL_DEBUG"); + device->dump_aub = false; + if (debug) { + for (p = debug; *p; p = end + 1) { + end = strchrnul(p, ','); + if (end - p == 3 && memcmp(p, "aub", 3) == 0) + device->dump_aub = true; + if (end - p == 5 && memcmp(p, "no_hw", 5) == 0) + device->no_hw = true; + if (*end == '\0') + break; + } + } +} + +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->device = device; + queue->pool = &device->surface_state_pool; + + queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); + if (queue->completed_serial.map == NULL) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + *(uint32_t *)queue->completed_serial.map = 0; + queue->next_serial = 1; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +#ifdef HAVE_VALGRIND + /* This gets torn down with the device so we only need to do this if + * valgrind is present. + */ + anv_state_pool_free(queue->pool, queue->completed_serial); +#endif +} + +static void +anv_device_init_border_colors(struct anv_device *device) +{ + static const VkClearColorValue border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } }, + }; + + device->border_colors = + anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(border_colors), 32); + memcpy(device->border_colors.map, border_colors, sizeof(border_colors)); +} + +static const uint32_t BATCH_SIZE = 8192; + +VkResult anv_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + VkDevice* pDevice) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + struct anv_instance *instance = physical_device->instance; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + device = instance->pfnAlloc(instance->pAllocUserData, + sizeof(*device), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->no_hw = physical_device->no_hw; + parse_debug_flags(device); + + device->instance = physical_device->instance; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); + if (device->fd == -1) + goto fail_device; + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) + goto fail_fd; + + anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE); + + anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); + + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 2048); + anv_block_pool_init(&device->surface_state_block_pool, device, 2048); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + + device->info = *physical_device->info; + + device->compiler = anv_compiler_create(device); + device->aub_writer = NULL; + + pthread_mutex_init(&device->mutex, NULL); + + anv_queue_init(device, &device->queue); + + anv_device_init_meta(device); + + anv_device_init_border_colors(device); + + *pDevice = anv_device_to_handle(device); + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_device_free(device, device); + + return vk_error(VK_ERROR_UNAVAILABLE); +} + +VkResult anv_DestroyDevice( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_compiler_destroy(device->compiler); + + anv_queue_finish(&device->queue); + + anv_device_finish_meta(device); + +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); +#endif + + anv_bo_pool_finish(&device->batch_bo_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + + close(device->fd); + + if (device->aub_writer) + anv_aub_writer_destroy(device->aub_writer); + + anv_device_free(device, device); + + return VK_SUCCESS; +} + +static const VkExtensionProperties global_extensions[] = { + { + .extName = "VK_WSI_LunarG", + .specVersion = 3 + } +}; + +VkResult anv_GetGlobalExtensionProperties( + const char* pLayerName, + uint32_t* pCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } + + assert(*pCount < ARRAY_SIZE(global_extensions)); + + *pCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_EXTENSION); +} + +VkResult anv_GetGlobalLayerProperties( + uint32_t* pCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_LAYER); +} + +VkResult anv_GetPhysicalDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_LAYER); +} + +VkResult anv_GetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(queueIndex == 0); + + *pQueue = anv_queue_to_handle(&device->queue); + + return VK_SUCCESS; +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t cmdBufferCount, + const VkCmdBuffer* pCmdBuffers, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + int ret; + + for (uint32_t i = 0; i < cmdBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]); + + if (device->dump_aub) + anv_cmd_buffer_dump(cmd_buffer); + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); + if (ret != 0) + return vk_error(VK_ERROR_UNKNOWN); + + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) + cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; + } else { + *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; + } + } + + return VK_SUCCESS; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return vkDeviceWaitIdle(anv_device_to_handle(queue->device)); +} + +VkResult anv_DeviceWaitIdle( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_batch batch; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo *bo = NULL; + VkResult result; + int64_t timeout; + int ret; + + state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); + bo = &device->dynamic_state_pool.block_pool->bo; + batch.start = batch.next = state.map; + batch.end = state.map + 32; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = state.offset; + execbuf.batch_len = batch.next - state.map; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + } + + anv_state_pool_free(&device->dynamic_state_pool, state); + + return VK_SUCCESS; + + fail: + anv_state_pool_free(&device->dynamic_state_pool, state); + + return result; +} + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return device->instance->pfnAlloc(device->instance->pAllocUserData, + size, + alignment, + allocType); +} + +void +anv_device_free(struct anv_device * device, + void * mem) +{ + if (mem == NULL) + return; + + return device->instance->pfnFree(device->instance->pAllocUserData, + mem); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + + return VK_SUCCESS; +} + +VkResult anv_AllocMemory( + VkDevice _device, + const VkMemoryAllocInfo* pAllocInfo, + VkDeviceMemory* pMem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); + + if (pAllocInfo->memoryTypeIndex != 0) { + /* We support exactly one memory heap. */ + return vk_error(VK_ERROR_INVALID_VALUE); + } + + /* FINISHME: Fail if allocation request exceeds heap size. */ + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize); + if (result != VK_SUCCESS) + goto fail; + + *pMem = anv_device_memory_to_handle(mem); + + return VK_SUCCESS; + + fail: + anv_device_free(device, mem); + + return result; +} + +VkResult anv_FreeMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_device_free(device, mem); + + return VK_SUCCESS; +} + +VkResult anv_MapMemory( + VkDevice _device, + VkDeviceMemory _mem, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size); + mem->map_size = size; + + *ppData = mem->map; + + return VK_SUCCESS; +} + +VkResult anv_UnmapMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + anv_gem_munmap(mem->map, mem->map_size); + + return VK_SUCCESS; +} + +VkResult anv_FlushMappedMemoryRanges( + VkDevice device, + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges) +{ + /* clflush here for !llc platforms */ + + return VK_SUCCESS; +} + +VkResult anv_InvalidateMappedMemoryRanges( + VkDevice device, + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges) +{ + return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); +} + +VkResult anv_GetBufferMemoryRequirements( + VkDevice device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; + + return VK_SUCCESS; +} + +VkResult anv_GetImageMemoryRequirements( + VkDevice device, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; + + return VK_SUCCESS; +} + +VkResult anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pNumRequirements, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + return vk_error(VK_UNSUPPORTED); +} + +VkResult anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; + stub_return(VK_SUCCESS); +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + buffer->bo = &mem->bo; + buffer->offset = memOffset; + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_image, image, _image); + + image->bo = &mem->bo; + image->offset = memOffset; + + return VK_SUCCESS; +} + +VkResult anv_QueueBindSparseBufferMemory( + VkQueue queue, + VkBuffer buffer, + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueBindSparseImageOpaqueMemory( + VkQueue queue, + VkImage image, + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueBindSparseImageMemory( + VkQueue queue, + VkImage image, + uint32_t numBindings, + const VkSparseImageMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_device_alloc(device, sizeof(*fence), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; + + fail: + anv_device_free(device, fence); + + return result; +} + +VkResult anv_DestroyFence( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_device_free(device, fence); + + return VK_SUCCESS; +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; +} + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + int64_t t = timeout; + int ret; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) + return VK_TIMEOUT; + else if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + VkSemaphore* pSemaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueSignalSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueWaitSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +// Event functions + +VkResult anv_CreateEvent( + VkDevice device, + const VkEventCreateInfo* pCreateInfo, + VkEvent* pEvent) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_DestroyEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_GetEventStatus( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_SetEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_ResetEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +// Buffer functions + +VkResult anv_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + VkBuffer* pBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_device_alloc(device, sizeof(*buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->bo = NULL; + buffer->offset = 0; + + *pBuffer = anv_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +VkResult anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_device_free(device, buffer); + + return VK_SUCCESS; +} + +// Buffer view functions + +void +anv_fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range) +{ + const struct anv_format *info; + + info = anv_format_for_vk_format(format); + /* This assumes RGBA float format. */ + uint32_t stride = 4; + uint32_t num_elements = range / stride; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = info->surface_format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + .BaseMipLevel = 0.0, + .SurfaceQPitch = 0, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult anv_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *bview; + struct anv_surface_view *view; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); + + bview = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (bview == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view = &bview->view; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->format = pCreateInfo->format; + view->range = pCreateInfo->range; + + anv_fill_buffer_surface_state(view->surface_state.map, + pCreateInfo->format, + view->offset, pCreateInfo->range); + + *pView = anv_buffer_view_to_handle(bview); + + return VK_SUCCESS; +} + +VkResult anv_DestroyBufferView( + VkDevice _device, + VkBufferView _bview) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); + + anv_surface_view_fini(device, &bview->view); + anv_device_free(device, bview); + + return VK_SUCCESS; +} + +// Sampler functions + +VkResult anv_CreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + }; + + static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, + }; + + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + + struct GEN8_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = 0, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = 0, + + .IndirectStatePointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = max_anisotropy, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + }; + + GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +VkResult anv_DestroySampler( + VkDevice _device, + VkSampler _sampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_device_free(device, sampler); + + return VK_SUCCESS; +} + +// Descriptor set functions + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t num_dynamic_buffers = 0; + uint32_t count = 0; + uint32_t stages = 0; + uint32_t s; + + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + sampler_count[s] += pCreateInfo->pBinding[i].arraySize; + break; + default: + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + surface_count[s] += pCreateInfo->pBinding[i].arraySize; + break; + default: + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize; + break; + default: + break; + } + + stages |= pCreateInfo->pBinding[i].stageFlags; + count += pCreateInfo->pBinding[i].arraySize; + } + + uint32_t sampler_total = 0; + uint32_t surface_total = 0; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + sampler_total += sampler_count[s]; + surface_total += surface_count[s]; + } + + size_t size = sizeof(*set_layout) + + (sampler_total + surface_total) * sizeof(set_layout->entries[0]); + set_layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + set_layout->num_dynamic_buffers = num_dynamic_buffers; + set_layout->count = count; + set_layout->shader_stages = stages; + + struct anv_descriptor_slot *p = set_layout->entries; + struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM]; + struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM]; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + set_layout->stage[s].surface_count = surface_count[s]; + set_layout->stage[s].surface_start = surface[s] = p; + p += surface_count[s]; + set_layout->stage[s].sampler_count = sampler_count[s]; + set_layout->stage[s].sampler_start = sampler[s] = p; + p += sampler_count[s]; + } + + uint32_t descriptor = 0; + int8_t dynamic_slot = 0; + bool is_dynamic; + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { + sampler[s]->index = descriptor + j; + sampler[s]->dynamic_slot = -1; + sampler[s]++; + } + break; + default: + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + is_dynamic = true; + break; + default: + is_dynamic = false; + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { + surface[s]->index = descriptor + j; + if (is_dynamic) + surface[s]->dynamic_slot = dynamic_slot + j; + else + surface[s]->dynamic_slot = -1; + surface[s]++; + } + break; + default: + break; + } + + if (is_dynamic) + dynamic_slot += pCreateInfo->pBinding[i].arraySize; + + descriptor += pCreateInfo->pBinding[i].arraySize; + } + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_device_free(device, set_layout); + + return VK_SUCCESS; +} + +VkResult anv_CreateDescriptorPool( + VkDevice device, + VkDescriptorPoolUsage poolUsage, + uint32_t maxSets, + const VkDescriptorPoolCreateInfo* pCreateInfo, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + pDescriptorPool->handle = 1; + return VK_SUCCESS; +} + +VkResult anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); + + set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + anv_device_free(device, set); +} + +VkResult anv_AllocDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorSetUsage setUsage, + uint32_t count, + const VkDescriptorSetLayout* pSetLayouts, + VkDescriptorSet* pDescriptorSets, + uint32_t* pCount) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result; + struct anv_descriptor_set *set; + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) { + *pCount = i; + return result; + } + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + *pCount = count; + + return VK_SUCCESS; +} + +VkResult anv_UpdateDescriptorSets( + VkDevice device, + uint32_t writeCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t copyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + for (uint32_t i = 0; i < writeCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->count; j++) { + set->descriptors[write->destBinding + j].sampler = + anv_sampler_from_handle(write->pDescriptors[j].sampler); + } + + if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) + break; + + /* fallthrough */ + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pDescriptors[j].imageView); + set->descriptors[write->destBinding + j].view = &iview->view; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_finishme("texel buffers not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pDescriptors[j].bufferView); + set->descriptors[write->destBinding + j].view = &bview->view; + } + + default: + break; + } + } + + for (uint32_t i = 0; i < copyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); + ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); + for (uint32_t j = 0; j < copy->count; j++) { + dest->descriptors[copy->destBinding + j] = + src->descriptors[copy->srcBinding + j]; + } + } + + return VK_SUCCESS; +} + +// State object functions + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +VkResult anv_CreateDynamicViewportState( + VkDevice _device, + const VkDynamicViewportStateCreateInfo* pCreateInfo, + VkDynamicViewportState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_vp_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + unsigned count = pCreateInfo->viewportAndScissorCount; + state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, + count * 64, 64); + state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool, + count * 8, 32); + state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool, + count * 32, 32); + + for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { + const VkViewport *vp = &pCreateInfo->pViewports[i]; + const VkRect2D *s = &pCreateInfo->pScissors[i]; + + struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->originX + vp->width / 2, + .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->originX, + .XMaxViewPort = vp->originX + vp->width - 1, + .YMinViewPort = vp->originY, + .YMaxViewPort = vp->originY + vp->height - 1, + }; + + struct GEN8_CC_VIEWPORT cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN8_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN8_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); + GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); + } else { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); + } + } + + *pState = anv_dynamic_vp_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicViewportState( + VkDevice _device, + VkDynamicViewportState _vp_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, _vp_state); + + anv_state_pool_free(&device->dynamic_state_pool, vp_state->sf_clip_vp); + anv_state_pool_free(&device->dynamic_state_pool, vp_state->cc_vp); + anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor); + + anv_device_free(device, vp_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateDynamicRasterState( + VkDevice _device, + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + }; + + GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + struct GEN8_3DSTATE_RASTER raster = { + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); + + *pState = anv_dynamic_rs_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicRasterState( + VkDevice _device, + VkDynamicRasterState _rs_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state); + + anv_device_free(device, rs_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateDynamicColorBlendState( + VkDevice _device, + const VkDynamicColorBlendStateCreateInfo* pCreateInfo, + VkDynamicColorBlendState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_cb_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .BlendConstantColorRed = pCreateInfo->blendConst[0], + .BlendConstantColorGreen = pCreateInfo->blendConst[1], + .BlendConstantColorBlue = pCreateInfo->blendConst[2], + .BlendConstantColorAlpha = pCreateInfo->blendConst[3] + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + + *pState = anv_dynamic_cb_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicColorBlendState( + VkDevice _device, + VkDynamicColorBlendState _cb_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state); + + anv_device_free(device, cb_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateDynamicDepthStencilState( + VkDevice _device, + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, + + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, + &wm_depth_stencil); + + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + + *pState = anv_dynamic_ds_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicDepthStencilState( + VkDevice _device, + VkDynamicDepthStencilState _ds_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state); + + anv_device_free(device, ds_state); + + return VK_SUCCESS; +} + +// Command buffer functions + +VkResult anv_CreateCommandPool( + VkDevice device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) +{ + pCmdPool->handle = 7; + + stub_return(VK_SUCCESS); +} + +VkResult anv_DestroyCommandPool( + VkDevice device, + VkCmdPool cmdPool) +{ + stub_return(VK_SUCCESS); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_CreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + VkFramebuffer* pFramebuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_attachment_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + ANV_FROM_HANDLE(anv_attachment_view, view, + pCreateInfo->pAttachments[i].view); + + framebuffer->attachments[i] = view; + } + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + anv_CreateDynamicViewportState(anv_device_to_handle(device), + &(VkDynamicViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO, + .viewportAndScissorCount = 1, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = pCreateInfo->width, + .height = pCreateInfo->height, + .minDepth = 0, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect2D[]) { + { { 0, 0 }, + { pCreateInfo->width, pCreateInfo->height } }, + } + }, + &framebuffer->vp_state); + + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +VkResult anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_DestroyDynamicViewportState(anv_device_to_handle(device), + fb->vp_state); + anv_device_free(device, fb); + + return VK_SUCCESS; +} + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass) + + pCreateInfo->subpassCount * sizeof(struct anv_subpass); + pass = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + + size = pCreateInfo->attachmentCount * sizeof(*pass->attachments); + pass->attachments = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + pass->attachments[i].format = pCreateInfo->pAttachments[i].format; + pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples; + pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp; + pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // pass->attachments[i].store_op = pCreateInfo->pAttachments[i].storeOp; + // pass->attachments[i].stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputCount; + subpass->color_count = desc->colorCount; + + if (desc->inputCount > 0) { + subpass->input_attachments = + anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->inputCount; j++) { + subpass->input_attachments[j] + = desc->inputAttachments[j].attachment; + } + } + + if (desc->colorCount > 0) { + subpass->color_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->color_attachments[j] + = desc->colorAttachments[j].attachment; + } + } + + if (desc->resolveAttachments) { + subpass->resolve_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->resolve_attachments[j] + = desc->resolveAttachments[j].attachment; + } + } + + subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +VkResult anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + anv_device_free(device, pass->attachments); + + for (uint32_t i = 0; i < pass->subpass_count; i++) { + /* In VkSubpassCreateInfo, each of the attachment arrays may be null. + * Don't free the null arrays. + */ + struct anv_subpass *subpass = &pass->subpasses[i]; + + anv_device_free(device, subpass->input_attachments); + anv_device_free(device, subpass->color_attachments); + anv_device_free(device, subpass->resolve_attachments); + } + + anv_device_free(device, pass); + + return VK_SUCCESS; +} + +VkResult anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; + + return VK_SUCCESS; +} + +void vkCmdDbgMarkerBegin( + VkCmdBuffer cmdBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCmdBuffer cmdBuffer) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerBegin( + VkCmdBuffer cmdBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCmdBuffer cmdBuffer) +{ +} diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py new file mode 100644 index 00000000000..96c4884d158 --- /dev/null +++ b/src/vulkan/anv_entrypoints_gen.py @@ -0,0 +1,269 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\(VKAPI \*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': + continue + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* DO NOT EDIT! This is a generated file. */ + +#include "anv_private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; + void *function; + void *validate; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +for type, name, args, num, h in entrypoints: + print "%s anv_validate_%s%s __attribute__ ((weak));" % (type, name, args) + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x, anv_%s, anv_validate_%s }," % (offsets[num], h, name, name) +print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static void * __attribute__ ((noinline)) +resolve_entrypoint(uint32_t index) +{ + if (enable_validate && entrypoints[index].validate) + return entrypoints[index].validate; + + return entrypoints[index].function; +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c new file mode 100644 index 00000000000..3cbcff5730f --- /dev/null +++ b/src/vulkan/anv_formats.c @@ -0,0 +1,334 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#define UNSUPPORTED 0xffff + +#define fmt(__vk_fmt, ...) \ + [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } + +static const struct anv_format anv_formats[] = { + fmt(VK_FORMAT_UNDEFINED, RAW, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R4G4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R4G4_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .cpp = 1, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, R8_UINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, R8_SINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SRGB, UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, R16_UINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, R16_SINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, R32_UINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, R32_SINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .cpp = 8, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .cpp = 16, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), + + fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), + fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, R8_UINT, .cpp = 1, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC2_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC2_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC3_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC3_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC4_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC5_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC6H_UFLOAT, UNSUPPORTED), + fmt(VK_FORMAT_BC6H_SFLOAT, UNSUPPORTED), + fmt(VK_FORMAT_BC7_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC7_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .cpp = 2, .num_channels = 3), + fmt(VK_FORMAT_B5G6R5_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) +}; + +#undef fmt + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +bool +anv_is_vk_format_depth_or_stencil(VkFormat format) +{ + const struct anv_format *format_info = + anv_format_for_vk_format(format); + + if (format_info->depth_format != UNSUPPORTED && + format_info->depth_format != 0) + return true; + + return format_info->has_stencil; +} + +// Format capabilities + +struct surface_format_info { + bool exists; + int sampling; + int filtering; + int shadow_compare; + int chroma_key; + int render_target; + int alpha_blend; + int input_vb; + int streamed_output_vb; + int color_processing; +}; + +extern const struct surface_format_info surface_formats[]; + +VkResult anv_validate_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + return anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); +} + +VkResult anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct surface_format_info *info; + int gen; + + const struct anv_format *format = anv_format_for_vk_format(_format); + if (format == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) + gen += 5; + + if (format->surface_format == UNSUPPORTED) + goto unsupported; + + info = &surface_formats[format->surface_format]; + if (!info->exists) + goto unsupported; + + uint32_t linear = 0, tiled = 0; + if (info->sampling <= gen) { + linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + } + if (info->render_target <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + if (info->alpha_blend <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + if (info->input_vb <= gen) { + linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + } + + pFormatProperties->linearTilingFeatures = linear; + pFormatProperties->optimalTilingFeatures = tiled; + + return VK_SUCCESS; + + unsupported: + pFormatProperties->linearTilingFeatures = 0; + pFormatProperties->optimalTilingFeatures = 0; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageFormatProperties* pImageFormatProperties) +{ + /* TODO: We should do something here. Chad? */ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + stub_return(VK_UNSUPPORTED); +} diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c new file mode 100644 index 00000000000..4ce857e2a5f --- /dev/null +++ b/src/vulkan/anv_gem.c @@ -0,0 +1,279 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, int gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); + gem_mmap.offset = offset; + gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); + +#ifdef I915_MMAP_WC + gem_mmap.flags = 0; +#endif + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); +} + +int +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + int gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = I915_TILING_X; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(physical_dev->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +int +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c new file mode 100644 index 00000000000..21099cb7730 --- /dev/null +++ b/src/vulkan/anv_image.c @@ -0,0 +1,745 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +struct anv_image_view_info { + uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ + bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ + bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ +}; + +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +static const uint8_t anv_surf_type_from_image_type[] = { + [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, + [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, + [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, + +}; + +static const struct anv_image_view_info +anv_image_view_info_table[] = { + #define INFO(s, ...) { .surface_type = s, __VA_ARGS__ } + [VK_IMAGE_VIEW_TYPE_1D] = INFO(SURFTYPE_1D), + [VK_IMAGE_VIEW_TYPE_2D] = INFO(SURFTYPE_2D), + [VK_IMAGE_VIEW_TYPE_3D] = INFO(SURFTYPE_3D), + [VK_IMAGE_VIEW_TYPE_CUBE] = INFO(SURFTYPE_CUBE, .is_cube = 1), + [VK_IMAGE_VIEW_TYPE_1D_ARRAY] = INFO(SURFTYPE_1D, .is_array = 1), + [VK_IMAGE_VIEW_TYPE_2D_ARRAY] = INFO(SURFTYPE_2D, .is_array = 1), + [VK_IMAGE_VIEW_TYPE_CUBE_ARRAY] = INFO(SURFTYPE_CUBE, .is_array = 1, .is_cube = 1), + #undef INFO +}; + +static const struct anv_surf_type_limits { + int32_t width; + int32_t height; + int32_t depth; +} anv_surf_type_limits[] = { + [SURFTYPE_1D] = {16384, 0, 2048}, + [SURFTYPE_2D] = {16384, 16384, 2048}, + [SURFTYPE_3D] = {2048, 2048, 2048}, + [SURFTYPE_CUBE] = {16384, 16384, 340}, + [SURFTYPE_BUFFER] = {128, 16384, 64}, + [SURFTYPE_STRBUF] = {128, 16384, 64}, +}; + +static const struct anv_tile_info { + uint32_t width; + uint32_t height; + + /** + * Alignment for RENDER_SURFACE_STATE.SurfaceBaseAddress. + * + * To simplify calculations, the alignments defined in the table are + * sometimes larger than required. For example, Skylake requires that X and + * Y tiled buffers be aligned to 4K, but Broadwell permits smaller + * alignment. We choose 4K to accomodate both chipsets. The alignment of + * a linear buffer depends on its element type and usage. Linear depth + * buffers have the largest alignment, 64B, so we choose that for all linear + * buffers. + */ + uint32_t surface_alignment; +} anv_tile_info_table[] = { + [LINEAR] = { 1, 1, 64 }, + [XMAJOR] = { 512, 8, 4096 }, + [YMAJOR] = { 128, 32, 4096 }, + [WMAJOR] = { 128, 32, 4096 }, +}; + +static uint32_t +anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) +{ + if (anv_info->force_tile_mode) + return anv_info->tile_mode; + + if (anv_info->vk_info->format == VK_FORMAT_S8_UINT) + return WMAJOR; + + switch (anv_info->vk_info->tiling) { + case VK_IMAGE_TILING_LINEAR: + return LINEAR; + case VK_IMAGE_TILING_OPTIMAL: + return YMAJOR; + default: + assert(!"bad VKImageTiling"); + return LINEAR; + } +} + +static VkResult +anv_image_make_surface(const struct anv_image_create_info *create_info, + uint64_t *inout_image_size, + uint32_t *inout_image_alignment, + struct anv_surface *out_surface) +{ + /* See RENDER_SURFACE_STATE.SurfaceQPitch */ + static const uint16_t min_qpitch UNUSED = 0x4; + static const uint16_t max_qpitch UNUSED = 0x1ffc; + + const VkExtent3D *restrict extent = &create_info->vk_info->extent; + const uint32_t levels = create_info->vk_info->mipLevels; + const uint32_t array_size = create_info->vk_info->arraySize; + + const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); + + const struct anv_tile_info *tile_info = + &anv_tile_info_table[tile_mode]; + + const struct anv_format *format_info = + anv_format_for_vk_format(create_info->vk_info->format); + + const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t w0 = align_u32(extent->width, i); + const uint32_t h0 = align_u32(extent->height, j); + + uint16_t qpitch; + uint32_t mt_width; + uint32_t mt_height; + + if (levels == 1 && array_size == 1) { + qpitch = min_qpitch; + mt_width = w0; + mt_height = h0; + } else { + uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); + uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); + uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); + + qpitch = h0 + h1 + 11 * j; + mt_width = MAX(w0, w1 + w2); + mt_height = array_size * qpitch; + } + + assert(qpitch >= min_qpitch); + if (qpitch > max_qpitch) { + anv_loge("image qpitch > 0x%x\n", max_qpitch); + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + /* From the Broadwell PRM, RENDER_SURFACE_STATE.SurfaceQpitch: + * + * This field must be set an integer multiple of the Surface Vertical + * Alignment. + */ + assert(anv_is_aligned(qpitch, j)); + + const uint32_t stride = align_u32(mt_width * format_info->cpp, + tile_info->width); + const uint32_t size = stride * align_u32(mt_height, tile_info->height); + const uint32_t offset = align_u32(*inout_image_size, + tile_info->surface_alignment); + + *inout_image_size = offset + size; + *inout_image_alignment = MAX(*inout_image_alignment, + tile_info->surface_alignment); + + *out_surface = (struct anv_surface) { + .offset = offset, + .stride = stride, + .tile_mode = tile_mode, + .qpitch = qpitch, + .h_align = i, + .v_align = j, + }; + + return VK_SUCCESS; +} + +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + VkImage *pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + const VkExtent3D *restrict extent = &pCreateInfo->extent; + struct anv_image *image = NULL; + VkResult r; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->samples == 1); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); + anv_assert(pCreateInfo->extent.depth > 0); + + /* TODO(chadv): How should we validate inputs? */ + const uint8_t surf_type = + anv_surf_type_from_image_type[pCreateInfo->imageType]; + + const struct anv_surf_type_limits *limits = + &anv_surf_type_limits[surf_type]; + + if (extent->width > limits->width || + extent->height > limits->height || + extent->depth > limits->depth) { + /* TODO(chadv): What is the correct error? */ + anv_loge("image extent is too large"); + return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); + } + + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->format = pCreateInfo->format; + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arraySize; + image->surf_type = surf_type; + + if (likely(!format_info->has_stencil || format_info->depth_format)) { + /* The image's primary surface is a color or depth surface. */ + r = anv_image_make_surface(create_info, &image->size, &image->alignment, + &image->primary_surface); + if (r != VK_SUCCESS) + goto fail; + } + + if (format_info->has_stencil) { + /* From the GPU's perspective, the depth buffer and stencil buffer are + * separate buffers. From Vulkan's perspective, though, depth and + * stencil reside in the same image. To satisfy Vulkan and the GPU, we + * place the depth and stencil buffers in the same bo. + */ + VkImageCreateInfo stencil_info = *pCreateInfo; + stencil_info.format = VK_FORMAT_S8_UINT; + + r = anv_image_make_surface( + &(struct anv_image_create_info) { + .vk_info = &stencil_info, + }, + &image->size, &image->alignment, &image->stencil_surface); + + if (r != VK_SUCCESS) + goto fail; + } + + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + +fail: + if (image) + anv_device_free(device, image); + + return r; +} + +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + VkImage *pImage) +{ + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + }, + pImage); +} + +VkResult +anv_DestroyImage(VkDevice _device, VkImage _image) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_device_free(device, anv_image_from_handle(_image)); + + return VK_SUCCESS; +} + +VkResult anv_GetImageSubresourceLayout( + VkDevice device, + VkImage image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) +{ + stub_return(VK_UNSUPPORTED); +} + +void +anv_surface_view_fini(struct anv_device *device, + struct anv_surface_view *view) +{ + anv_state_pool_free(&device->surface_state_pool, view->surface_state); +} + +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface_view *view = &iview->view; + struct anv_surface *surface; + + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + const struct anv_image_view_info *view_type_info + = &anv_image_view_info_table[pCreateInfo->viewType]; + + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + anv_finishme("stencil image views"); + abort(); + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + surface = &image->primary_surface; + break; + default: + unreachable(""); + break; + } + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = pCreateInfo->format; + + iview->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + + uint32_t depth = 1; + if (range->arraySize > 1) { + depth = range->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + static const uint32_t vk_to_gen_swizzle[] = { + [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, + [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, + [VK_CHANNEL_SWIZZLE_R] = SCS_RED, + [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, + [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, + [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA + }; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = view_type_info->surface_type, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = surface->qpitch >> 2, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = range->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + /* For sampler surfaces, the hardware interprets field MIPCount/LOD as + * MIPCount. The range of levels accessible by the sampler engine is + * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + .MIPCountLOD = range->mipLevels - 1, + .SurfaceMinLOD = range->baseMipLevel, + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *subresource; + const struct anv_image_view_info *view_info; + const struct anv_format *view_format_info; + const struct anv_format *image_format_info; + + /* Validate structure type before dereferencing it. */ + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + subresource = &pCreateInfo->subresourceRange; + + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + view_info = &anv_image_view_info_table[pCreateInfo->viewType]; + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + image_format_info = anv_format_for_vk_format(image->format); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->channels.r >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.r <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.g >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.g <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.b >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.b <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.a >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.a <= VK_CHANNEL_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspect >= VK_IMAGE_ASPECT_BEGIN_RANGE); + assert(subresource->aspect <= VK_IMAGE_ASPECT_END_RANGE); + assert(subresource->mipLevels > 0); + assert(subresource->arraySize > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); + assert(subresource->baseArraySlice < image->array_size); + assert(subresource->baseArraySlice + subresource->arraySize <= image->array_size); + assert(pView); + + if (view_info->is_cube) { + assert(subresource->baseArraySlice % 6 == 0); + assert(subresource->arraySize % 6 == 0); + } + + /* Validate format. */ + switch (subresource->aspect) { + case VK_IMAGE_ASPECT_COLOR: + assert(!image_format_info->depth_format); + assert(!image_format_info->has_stencil); + assert(!view_format_info->depth_format); + assert(!view_format_info->has_stencil); + assert(view_format_info->cpp == image_format_info->cpp); + break; + case VK_IMAGE_ASPECT_DEPTH: + assert(image_format_info->depth_format); + assert(view_format_info->depth_format); + assert(view_format_info->cpp == image_format_info->cpp); + break; + case VK_IMAGE_ASPECT_STENCIL: + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image_format_info->has_stencil); + assert(view_format_info->has_stencil); + break; + default: + assert(!"bad VkImageAspect"); + break; + } + + return anv_CreateImageView(_device, pCreateInfo, pView); +} + +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + +VkResult +anv_DestroyImageView(VkDevice _device, VkImageView _iview) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + + anv_surface_view_fini(device, &iview->view); + anv_device_free(device, iview); + + return VK_SUCCESS; +} + +void +anv_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface_view *view = &aview->view; + struct anv_surface *surface = &image->primary_surface; + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->mipLevel < image->levels); + anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = pCreateInfo->format; + + aview->base.extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), + .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), + .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), + }; + + uint32_t depth = 1; + if (pCreateInfo->arraySize > 1) { + depth = pCreateInfo->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = surface->qpitch >> 2, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = pCreateInfo->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .SurfaceMinLOD = 0, + .MIPCountLOD = pCreateInfo->mipLevel, + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +static void +anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, + const VkAttachmentViewCreateInfo *pCreateInfo) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface *depth_surface = &image->primary_surface; + struct anv_surface *stencil_surface = &image->stencil_surface; + const struct anv_format *format = + anv_format_for_vk_format(image->format); + + view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; + + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->mipLevel == 0); + anv_assert(pCreateInfo->baseArraySlice == 0); + anv_assert(pCreateInfo->arraySize == 1); + + view->bo = image->bo; + + view->depth_stride = depth_surface->stride; + view->depth_offset = image->offset + depth_surface->offset; + view->depth_format = format->depth_format; + view->depth_qpitch = 0; /* FINISHME: QPitch */ + + view->stencil_stride = stencil_surface->stride; + view->stencil_offset = image->offset + stencil_surface->offset; + view->stencil_qpitch = 0; /* FINISHME: QPitch */ +} + +VkResult +anv_CreateAttachmentView(VkDevice _device, + const VkAttachmentViewCreateInfo *pCreateInfo, + VkAttachmentView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); + + if (anv_is_vk_format_depth_or_stencil(pCreateInfo->format)) { + struct anv_depth_stencil_view *view = + anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_depth_stencil_view_init(view, pCreateInfo); + + *pView = anv_attachment_view_to_handle(&view->base); + } else { + struct anv_color_attachment_view *view = + anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_color_attachment_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_attachment_view_to_handle(&view->base); + } + + return VK_SUCCESS; +} + +VkResult +anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _view) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_attachment_view, view, _view); + + if (view->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { + struct anv_color_attachment_view *aview = + (struct anv_color_attachment_view *)view; + + anv_surface_view_fini(device, &aview->view); + } + + anv_device_free(device, view); + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c new file mode 100644 index 00000000000..9fc06aef6f8 --- /dev/null +++ b/src/vulkan/anv_intel.c @@ -0,0 +1,97 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (image == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mem; + } + + *image = (struct anv_image) { + .bo = &mem->bo, + .offset = 0, + .type = VK_IMAGE_TYPE_2D, + .extent = pCreateInfo->extent, + .size = mem->bo.size, + + .primary_surface = { + .offset = 0, + .stride = pCreateInfo->strideInBytes, + .tile_mode = XMAJOR, + }, + }; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + + fail_mem: + anv_gem_close(device, mem->bo.gem_handle); + fail: + anv_device_free(device, mem); + + return result; +} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c new file mode 100644 index 00000000000..f2629156f3c --- /dev/null +++ b/src/vulkan/anv_meta.c @@ -0,0 +1,1449 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "anv_meta_spirv.h" + +static void +anv_device_init_meta_clear_state(struct anv_device *device) +{ + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. + */ + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + out vec4 f_color; + flat in vec4 v_color; + void main() + { + f_color = v_color; + } + ); + + VkShader fs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm, + .pName = "main", + }, &fs); + + /* We use instanced rendering to clear multiple render targets. We have two + * vertex buffers: the first vertex buffer holds per-vertex data and + * provides the vertices for the clear rectangle. The second one holds + * per-instance data, which consists of the VUE header (which selects the + * layer) and the color (Vulkan supports per-RT clear colors). + */ + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 8, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 32, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Color */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 16 + } + } + }; + + anv_pipeline_create(anv_device_to_handle(device), + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 1, + .pStages = &(VkPipelineShaderStageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL, + }, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .flags = 0, + }, + &(struct anv_pipeline_create_info) { + .use_repclear = true, + .disable_viewport = true, + .use_rectlist = true + }, + &device->meta_state.clear.pipeline); + + anv_DestroyShader(anv_device_to_handle(device), fs); +} + +#define NUM_VB_USED 2 +struct anv_saved_state { + struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + VkDynamicColorBlendState cb_state; +}; + +static void +anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *state) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); +} + +static void +anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *state) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0].set = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; +} + +struct vue_header { + uint32_t Reserved; + uint32_t RTAIndex; + uint32_t ViewportIndex; + float PointWidth; +}; + +struct clear_instance_data { + struct vue_header vue_header; + VkClearColorValue color; +}; + +static void +meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, + int num_instances, + struct clear_instance_data *instance_data) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_state state; + uint32_t size; + + const float vertex_data[] = { + /* Rect-list coordinates */ + 0.0, 0.0, + fb->width, 0.0, + fb->width, fb->height, + + /* Align to 16 bytes */ + 0.0, 0.0, + }; + + size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); + state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 16); + + /* Copy in the vertex and instance data */ + memcpy(state.map, vertex_data, sizeof(vertex_data)); + memcpy(state.map + sizeof(vertex_data), instance_data, + num_instances * sizeof(*instance_data)); + + struct anv_buffer vertex_buffer = { + .device = cmd_buffer->device, + .size = size, + .bo = &device->surface_state_block_pool.bo, + .offset = state.offset + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.clear.pipeline); + + /* We don't need anything here, only set if not already set. */ + if (cmd_buffer->state.rs_state == NULL) + anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.rs_state); + + if (cmd_buffer->state.vp_state == NULL) + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), + cmd_buffer->state.framebuffer->vp_state); + + if (cmd_buffer->state.ds_state == NULL) + anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.ds_state); + + if (cmd_buffer->state.cb_state == NULL) + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.cb_state); + + anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); +} + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values) +{ + struct anv_saved_state saved_state; + + int num_clear_layers = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + if (anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + anv_finishme("Can't clear depth-stencil yet"); + continue; + } + num_clear_layers++; + } + } + + if (num_clear_layers == 0) + return; + + struct clear_instance_data instance_data[num_clear_layers]; + uint32_t color_attachments[num_clear_layers]; + + int layer = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && + !anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + instance_data[layer] = (struct clear_instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = clear_values[i].color, + }; + color_attachments[layer] = i; + layer++; + } + } + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + struct anv_subpass subpass = { + .input_count = 0, + .color_count = num_clear_layers, + .color_attachments = color_attachments, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); + + meta_emit_clear(cmd_buffer, num_clear_layers, instance_data); + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +static void +anv_device_init_meta_blit_state(struct anv_device *device) +{ + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, + in vec2 a_pos; + in vec2 a_tex_coord; + out vec4 v_tex_coord; + void main() + { + v_tex_coord = vec4(a_tex_coord, 0, 1); + gl_Position = vec4(a_pos, 0, 1); + } + ); + + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + out vec4 f_color; + in vec4 v_tex_coord; + layout(set = 0, binding = 0) uniform sampler2D u_tex; + void main() + { + f_color = texture(u_tex, v_tex_coord.xy); + } + ); + + VkShader vs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = vsm, + .pName = "main", + }, &vs); + + VkShader fs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm, + .pName = "main", + }, &fs); + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 0, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 1, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .arraySize = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, + &device->meta_state.blit.ds_layout); + + anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.blit.pipeline_layout); + + anv_pipeline_create(anv_device_to_handle(device), + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL + }, + }, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + }, + &(struct anv_pipeline_create_info) { + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.blit.pipeline); + + anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShader(anv_device_to_handle(device), fs); +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *saved_state) +{ + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_save(cmd_buffer, saved_state); + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline); + + /* We don't need anything here, only set if not already set. */ + if (cmd_buffer->state.rs_state == NULL) + anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.rs_state); + if (cmd_buffer->state.ds_state == NULL) + anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.ds_state); + + saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->state.cb_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.cb_state); +} + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_color_attachment_view *dest, + VkOffset3D dest_offset, + VkExtent3D dest_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = { .handle = 1 }; + + struct blit_vb_data { + float pos[2]; + float tex_coord[2]; + } *vb_data; + + unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct vue_header)); + vb_data = vb_state.map + sizeof(struct vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src->extent.width, + (float)src_offset.y / (float)src->extent.height, + }, + }; + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->surface_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct vue_header), + }); + + uint32_t count; + VkDescriptorSet set; + anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, + VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, + 1, &device->meta_state.blit.ds_layout, &set, &count); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .destSet = set, + .destBinding = 0, + .destArrayElement = 0, + .count = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pDescriptors = (VkDescriptorInfo[]) { + { + .imageView = anv_image_view_to_handle(src), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkAttachmentBindInfo[]) { + { + .view = anv_attachment_view_to_handle(&dest->base), + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .width = dest->base.extent.width, + .height = dest->base.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = dest->view.format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .colorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .resolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .preserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .attachmentCount = 1, + .pAttachmentClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), + anv_framebuffer_from_handle(fb)->vp_state); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); + + anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb); + anv_DestroyRenderPass(anv_device_to_handle(device), pass); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *saved_state) +{ + anv_cmd_buffer_restore(cmd_buffer, saved_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + saved_state->cb_state); +} + +static VkFormat +vk_format_for_cpp(int cpp) +{ + switch (cpp) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format cpp"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }; + + VkImage src_image, dest_image; + anv_CreateImage(vk_device, &image_info, &src_image); + anv_CreateImage(vk_device, &image_info, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = dest_image, + .format = copy_format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + &dest_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }); + + anv_DestroyImage(vk_device, src_image); + anv_DestroyImage(vk_device, dest_image); +} + +void anv_CmdCopyBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; + uint64_t copy_size = pRegions[r].copySize; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int cpp = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(src_offset % cpp == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(dest_offset % cpp == 0); + + fs = ffs(pRegions[r].copySize) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(pRegions[r].copySize % cpp == 0); + + VkFormat copy_format = vk_format_for_cpp(cpp); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * cpp; + while (copy_size > max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * cpp); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * cpp; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / cpp, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = src_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + pRegions[r].srcOffset, + pRegions[r].extent, + &dest_view, + pRegions[r].destOffset, + pRegions[r].extent); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkTexFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + + struct anv_saved_state saved_state; + + anv_finishme("respect VkTexFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = dest_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + pRegions[r].srcOffset, + pRegions[r].srcExtent, + &dest_view, + pRegions[r].destOffset, + pRegions[r].destExtent); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyBufferToImage( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (pRegions[r].bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + + VkImage srcImage; + anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = dest_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, &srcImage); + + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + src_image->bo = src_buffer->bo; + src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; + + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .format = dest_image->format, + .mipLevel = pRegions[r].imageSubresource.mipLevel, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + &dest_view, + pRegions[r].imageOffset, + pRegions[r].imageExtent); + + anv_DestroyImage(vk_device, srcImage); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (pRegions[r].bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1 + }, + }, + cmd_buffer); + + VkImage destImage; + anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = src_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, &destImage); + + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + dest_image->bo = dest_buffer->bo; + dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = src_image->format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + pRegions[r].imageOffset, + pRegions[r].imageExtent, + &dest_view, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent); + + anv_DestroyImage(vk_device, destImage); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + stub(); +} + +void anv_CmdFillBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + stub(); +} + +void anv_CmdClearColorImage( + VkCmdBuffer cmdBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, image, _image); + struct anv_saved_state saved_state; + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + for (uint32_t r = 0; r < rangeCount; r++) { + for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { + for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { + struct anv_color_attachment_view view; + anv_color_attachment_view_init(&view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = _image, + .format = image->format, + .mipLevel = pRanges[r].baseMipLevel + l, + .baseArraySlice = pRanges[r].baseArraySlice + s, + .arraySize = 1, + }, + cmd_buffer); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkAttachmentBindInfo[]) { + { + .view = anv_attachment_view_to_handle(&view.base), + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .width = view.base.extent.width, + .height = view.base.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = view.view.format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .colorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .resolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .preserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = view.base.extent.width, + .height = view.base.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .attachmentCount = 1, + .pAttachmentClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + struct clear_instance_data instance_data = { + .vue_header = { + .RTAIndex = 0, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = *pColor, + }; + + meta_emit_clear(cmd_buffer, 1, &instance_data); + + anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + } + } + } + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +void anv_CmdClearDepthStencilImage( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + stub(); +} + +void anv_CmdClearColorAttachment( + VkCmdBuffer cmdBuffer, + uint32_t colorAttachment, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdClearDepthStencilAttachment( + VkCmdBuffer cmdBuffer, + VkImageAspectFlags imageAspectMask, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdResolveImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageResolve* pRegions) +{ + stub(); +} + +void +anv_device_init_meta(struct anv_device *device) +{ + anv_device_init_meta_clear_state(device); + anv_device_init_meta_blit_state(device); + + anv_CreateDynamicRasterState(anv_device_to_handle(device), + &(VkDynamicRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, + }, + &device->meta_state.shared.rs_state); + + anv_CreateDynamicColorBlendState(anv_device_to_handle(device), + &(VkDynamicColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO + }, + &device->meta_state.shared.cb_state); + + anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), + &(VkDynamicDepthStencilStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO + }, + &device->meta_state.shared.ds_state); +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + /* Clear */ + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.clear.pipeline); + + /* Blit */ + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout); + + /* Shared */ + anv_DestroyDynamicRasterState(anv_device_to_handle(device), + device->meta_state.shared.rs_state); + anv_DestroyDynamicColorBlendState(anv_device_to_handle(device), + device->meta_state.shared.cb_state); + anv_DestroyDynamicDepthStencilState(anv_device_to_handle(device), + device->meta_state.shared.ds_state); +} diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c new file mode 100644 index 00000000000..817b644eefb --- /dev/null +++ b/src/vulkan/anv_pipeline.c @@ -0,0 +1,950 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +VkResult anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_device_free(device, module); + + return VK_SUCCESS; +} + +VkResult anv_CreateShader( + VkDevice _device, + const VkShaderCreateInfo* pCreateInfo, + VkShader* pShader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module); + struct anv_shader *shader; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + size_t name_len = strlen(pCreateInfo->pName); + + if (strcmp(pCreateInfo->pName, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (shader == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->module = module; + memcpy(shader->entrypoint, pCreateInfo->pName, name_len + 1); + + *pShader = anv_shader_to_handle(shader); + + return VK_SUCCESS; +} + +VkResult anv_DestroyShader( + VkDevice _device, + VkShader _shader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader, shader, _shader); + + anv_device_free(device, shader); + + return VK_SUCCESS; +} + + +VkResult anv_CreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo* pCreateInfo, + VkPipelineCache* pPipelineCache) +{ + pPipelineCache->handle = 1; + + stub_return(VK_SUCCESS); +} + +VkResult anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache) +{ + /* VkPipelineCache is a dummy object. */ + return VK_SUCCESS; +} + +size_t anv_GetPipelineCacheSize( + VkDevice device, + VkPipelineCache pipelineCache) +{ + stub_return(0); +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + void* pData) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_UNSUPPORTED); +} + +// Pipeline functions + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) +{ + const uint32_t num_dwords = 1 + info->attributeCount * 2; + uint32_t *p; + bool instancing_enable[32]; + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + instancing_enable[desc->binding] = true; + break; + } + } + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN8_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format->surface_format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + .InstancingEnable = instancing_enable[desc->binding], + .VertexElementIndex = i, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = info->bindingCount, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = info->bindingCount); +} + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + }; + uint32_t topology = vk_to_gen_primitive_type[info->topology]; + + if (extra && extra->use_rectlist) + topology = _3DPRIM_RECTLIST; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + }; + GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + .PrimitiveTopologyType = topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterStateCreateInfo *info, + const struct anv_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + }; + + static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID + }; + + static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise + }; + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + + struct GEN8_3DSTATE_RASTER raster = { + GEN8_3DSTATE_RASTER_header, + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ViewportZClipTestEnable = info->depthClipEnable + }; + + GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + pipeline->wm_prog_data.num_varying_inputs); + +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, + }; + + static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + }; + + static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, + }; + + uint32_t num_dwords = 1 + info->attachmentCount * 2; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN8_BLEND_STATE blend_state = { + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + }; + + uint32_t *state = pipeline->blend_state.map; + GEN8_BLEND_STATE_pack(NULL, state, &blend_state); + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + struct GEN8_BLEND_STATE_ENTRY entry = { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .PreBlendColorClampEnable = false, + .PostBlendColorClampEnable = false, + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + }; + + GEN8_BLEND_STATE_ENTRY_pack(NULL, state + i * 2 + 1, &entry); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, + [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, + [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, + [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->state_wm_depth_stencil, 0, + sizeof(pipeline->state_wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); +} + +VkResult +anv_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_pipeline_create_info * extra, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + result = anv_reloc_list_init(&pipeline->batch.relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + pipeline->shaders[pCreateInfo->pStages[i].stage] = + anv_shader_from_handle(pCreateInfo->pStages[i].shader); + } + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pViewportState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + + anv_compiler_run(device->compiler, pipeline); + + /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we + * hard code this to num_attributes - 2. This is because the attributes + * include VUE header and position, which aren't counted as varying + * inputs. */ + if (pipeline->vs_simd8 == NO_KERNEL) { + pipeline->wm_prog_data.num_varying_inputs = + pCreateInfo->pVertexInputState->attributeCount - 2; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterState); + emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->wm_prog_data.barycentric_interp_modes); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + bool enable_sampling = samples > 1 ? true : false; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + .PixelPositionOffsetEnable = enable_sampling, + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xffff); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_vec4 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_vec4, + .VectorMaskEnable = Vmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + //pipeline->gs_prog_data.dispatch_mode | + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .FunctionEnable = false, + .VertexURBEntryOutputReadOffset = 1, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. We use attribute + * count - 1, as we don't count the VUE header here. */ + .VertexURBEntryOutputLength = + DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + uint32_t ksp0, ksp2, grf_start0, grf_start2; + + ksp2 = 0; + grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + ksp0 = pipeline->ps_simd8; + grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + ksp2 = pipeline->ps_simd16; + grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + ksp0 = pipeline->ps_simd16; + grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + .KernelStartPointer0 = ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - 2, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps); + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_compiler_free(pipeline); + anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); + anv_state_stream_finish(&pipeline->program_stream); + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_device_free(pipeline->device, pipeline); + + return VK_SUCCESS; +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch.relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = + anv_shader_from_handle(pCreateInfo->cs.shader); + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, + .BypassGatewayControl = true, + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, &pCreateInfos[i], + &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +// Pipeline layout functions + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = anv_device_alloc(device, sizeof(*layout), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->descriptorSetCount; + + uint32_t surface_start[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t sampler_start[VK_SHADER_STAGE_NUM] = { 0, }; + + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + layout->stage[s].surface_count = 0; + layout->stage[s].sampler_count = 0; + } + + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[i]); + + layout->set[i].layout = set_layout; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + layout->set[i].surface_start[s] = surface_start[s]; + surface_start[s] += set_layout->stage[s].surface_count; + layout->set[i].sampler_start[s] = sampler_start[s]; + sampler_start[s] += set_layout->stage[s].sampler_count; + + layout->stage[s].surface_count += set_layout->stage[s].surface_count; + layout->stage[s].sampler_count += set_layout->stage[s].sampler_count; + } + } + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +VkResult anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_device_free(device, pipeline_layout); + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h new file mode 100644 index 00000000000..be24b514f30 --- /dev/null +++ b/src/vulkan/anv_private.h @@ -0,0 +1,1081 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#else +#define VG(x) +#endif + +#include "brw_device_info.h" +#include "util/macros.h" + +#define VK_PROTOTYPES +#include +#include +#include + +#include "anv_entrypoints.h" + +#include "brw_context.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define anv_noreturn __attribute__((__noreturn__)) +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +static inline VkResult +vk_error(VkResult error) +{ +#ifdef DEBUG + fprintf(stderr, "vk_error: %x\n", error); +#endif + + return error; +} + +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub() \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + int gem_handle; + uint32_t index; + uint64_t offset; + uint64_t size; + + /* This field is here for the benefit of the aub dumper. It can (and for + * userptr bos it must) be set to the cpu map of the buffer. Destroying + * the bo won't clean up the mmap, it's still the responsibility of the bo + * user to do that. */ + void *map; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + uint32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + void *map; + int fd; + uint32_t size; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + uint32_t next_block; + union anv_free_list free_list; +}; + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_state { + uint32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + uint32_t next; + uint32_t current_block; + uint32_t end; +}; + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + +struct anv_physical_device { + struct anv_instance * instance; + uint32_t chipset_id; + bool no_hw; + const char * path; + const char * name; + const struct brw_device_info * info; + int fd; +}; + +struct anv_instance { + void * pAllocUserData; + PFN_vkAllocFunction pfnAlloc; + PFN_vkFreeFunction pfnFree; + uint32_t apiVersion; + uint32_t physicalDeviceCount; + struct anv_physical_device physicalDevice; +}; + +struct anv_meta_state { + struct { + VkPipeline pipeline; + } clear; + + struct { + VkPipeline pipeline; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + VkDynamicRasterState rs_state; + VkDynamicColorBlendState cb_state; + VkDynamicDepthStencilState ds_state; + } shared; +}; + +struct anv_queue { + struct anv_device * device; + + struct anv_state_pool * pool; + + /** + * Serial number of the most recently completed batch executed on the + * engine. + */ + struct anv_state completed_serial; + + /** + * The next batch submitted to the engine will be assigned this serial + * number. + */ + uint32_t next_serial; + + uint32_t last_collected_serial; +}; + +struct anv_device { + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + int context_id; + int fd; + bool no_hw; + bool dump_aub; + + struct anv_bo_pool batch_bo_pool; + + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_meta_state meta_state; + + struct anv_state border_colors; + + struct anv_queue queue; + + struct anv_block_pool scratch_block_pool; + + struct anv_compiler * compiler; + struct anv_aub_writer * aub_writer; + pthread_mutex_t mutex; +}; + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType); + +void +anv_device_free(struct anv_device * device, + void * mem); + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, int gem_handle); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, int gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +int anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); +int anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +struct anv_reloc_list { + size_t num_relocs; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; +}; + +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + struct anv_device *device); +void anv_reloc_list_finish(struct anv_reloc_list *list, + struct anv_device *device); + +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + +struct anv_batch_bo { + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + /* These offsets reference the per-batch reloc list */ + size_t first_reloc; + size_t num_relocs; + + struct anv_batch_bo * prev_batch_bo; +}; + +struct anv_batch { + struct anv_device * device; + + void * start; + void * end; + void * next; + + struct anv_reloc_list relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; +}; + +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return delta; + } else { + assert(batch->start <= location && location < batch->end); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +#include "gen7_pack.h" +#include "gen75_pack.h" +#undef GEN8_3DSTATE_MULTISAMPLE +#include "gen8_pack.h" + +#define anv_batch_emit(batch, cmd, ...) do { \ + struct cmd __template = { \ + cmd ## _header, \ + __VA_ARGS__ \ + }; \ + void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ + cmd ## _pack(batch, __dst, &__template); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + struct cmd __template = { \ + cmd ## _header, \ + .DwordLength = n - cmd ## _length_bias, \ + __VA_ARGS__ \ + }; \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + cmd ## _pack(batch, __dst, &__template); \ + __dst; \ + }) + +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ + } while (0) + +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + +struct anv_device_memory { + struct anv_bo bo; + VkDeviceSize map_size; + void * map; +}; + +struct anv_dynamic_vp_state { + struct anv_state sf_clip_vp; + struct anv_state cc_vp; + struct anv_state scissor; +}; + +struct anv_dynamic_rs_state { + uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; +}; + +struct anv_dynamic_ds_state { + uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; +}; + +struct anv_dynamic_cb_state { + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; + +}; + +struct anv_descriptor_slot { + int8_t dynamic_slot; + uint8_t index; +}; + +struct anv_descriptor_set_layout { + struct { + uint32_t surface_count; + struct anv_descriptor_slot *surface_start; + uint32_t sampler_count; + struct anv_descriptor_slot *sampler_start; + } stage[VK_SHADER_STAGE_NUM]; + + uint32_t count; + uint32_t num_dynamic_buffers; + uint32_t shader_stages; + struct anv_descriptor_slot entries[0]; +}; + +struct anv_descriptor { + struct anv_sampler *sampler; + struct anv_surface_view *view; +}; + +struct anv_descriptor_set { + struct anv_descriptor descriptors[0]; +}; + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 + +struct anv_pipeline_layout { + struct { + struct anv_descriptor_set_layout *layout; + uint32_t surface_start[VK_SHADER_STAGE_NUM]; + uint32_t sampler_start[VK_SHADER_STAGE_NUM]; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { + uint32_t surface_count; + uint32_t sampler_count; + } stage[VK_SHADER_STAGE_NUM]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + /* Set when bound */ + struct anv_bo * bo; + VkDeviceSize offset; +}; + +#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) +#define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) +#define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) +#define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) +#define ANV_CMD_BUFFER_VP_DIRTY (1 << 5) +#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 6) + +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_descriptor_set_binding { + struct anv_descriptor_set * set; + uint32_t dynamic_offsets[128]; +}; + +/** State required while building cmd buffer */ +struct anv_cmd_state { + uint32_t current_pipeline; + uint32_t vb_dirty; + uint32_t dirty; + uint32_t compute_dirty; + uint32_t descriptors_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + struct anv_dynamic_rs_state * rs_state; + struct anv_dynamic_ds_state * ds_state; + struct anv_dynamic_vp_state * vp_state; + struct anv_dynamic_cb_state * cb_state; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set_binding descriptors[MAX_SETS]; +}; + +VkResult anv_cmd_state_init(struct anv_cmd_state *state); +void anv_cmd_state_fini(struct anv_cmd_state *state); + +struct anv_cmd_buffer { + struct anv_device * device; + + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 * exec2_objects; + uint32_t exec2_bo_count; + struct anv_bo ** exec2_bos; + uint32_t exec2_array_length; + bool need_reloc; + uint32_t serial; + + struct anv_batch batch; + struct anv_batch_bo * last_batch_bo; + struct anv_batch_bo * surface_batch_bo; + uint32_t surface_next; + struct anv_reloc_list surface_relocs; + struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; + + struct anv_cmd_state state; +}; + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values); + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +void anv_aub_writer_destroy(struct anv_aub_writer *writer); + +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + +struct anv_shader_module { + uint32_t size; + char data[0]; +}; + +struct anv_shader { + struct anv_shader_module * module; + char entrypoint[0]; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + uint32_t batch_data[256]; + struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; + struct anv_pipeline_layout * layout; + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; + struct brw_stage_prog_data * prog_data[VK_SHADER_STAGE_NUM]; + uint32_t scratch_start[VK_SHADER_STAGE_NUM]; + uint32_t total_scratch; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + uint32_t active_stages; + struct anv_state_stream program_stream; + struct anv_state blend_state; + uint32_t vs_simd8; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t gs_vec4; + uint32_t gs_vertex_count; + uint32_t cs_simd; + + uint32_t vb_used; + uint32_t binding_stride[MAX_VBS]; + + uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; + uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; +}; + +struct anv_pipeline_create_info { + bool use_repclear; + bool disable_viewport; + bool disable_scissor; + bool disable_vs; + bool use_rectlist; +}; + +VkResult +anv_pipeline_create(VkDevice device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_pipeline_create_info *extra, + VkPipeline *pPipeline); + +struct anv_compiler *anv_compiler_create(struct anv_device *device); +void anv_compiler_destroy(struct anv_compiler *compiler); +int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); +void anv_compiler_free(struct anv_pipeline *pipeline); + +struct anv_format { + const char *name; + uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ + uint8_t num_channels; + uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + bool has_stencil; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); +bool anv_is_vk_format_depth_or_stencil(VkFormat format); + +/** + * A proxy for the color surfaces, depth surfaces, and stencil surfaces. + */ +struct anv_surface { + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; + + uint32_t stride; /**< RENDER_SURFACE_STATE.SurfacePitch */ + uint16_t qpitch; /**< RENDER_SURFACE_STATE.QPitch */ + + /** + * \name Alignment of miptree images, in units of pixels. + * + * These fields contain the real alignment values, not the values to be + * given to the GPU. For example, if h_align is 4, then program the GPU + * with HALIGN_4. + * \{ + */ + uint8_t h_align; /**< RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */ + uint8_t v_align; /**< RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ + /** \} */ + + uint8_t tile_mode; /**< RENDER_SURFACE_STATE.TileMode */ +}; + +struct anv_image { + VkImageType type; + VkExtent3D extent; + VkFormat format; + uint32_t levels; + uint32_t array_size; + + VkDeviceSize size; + uint32_t alignment; + + /* Set when bound */ + struct anv_bo *bo; + VkDeviceSize offset; + + struct anv_swap_chain *swap_chain; + + /** RENDER_SURFACE_STATE.SurfaceType */ + uint8_t surf_type; + + /** Primary surface is either color or depth. */ + struct anv_surface primary_surface; + + /** Stencil surface is optional. */ + struct anv_surface stencil_surface; +}; + +struct anv_surface_view { + struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ + struct anv_bo *bo; + uint32_t offset; /**< VkBufferCreateInfo::offset */ + uint32_t range; /**< VkBufferCreateInfo::range */ + VkFormat format; /**< VkBufferCreateInfo::format */ +}; + +struct anv_buffer_view { + struct anv_surface_view view; +}; + +struct anv_image_view { + struct anv_surface_view view; + VkExtent3D extent; +}; + +enum anv_attachment_view_type { + ANV_ATTACHMENT_VIEW_TYPE_COLOR, + ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL, +}; + +struct anv_attachment_view { + enum anv_attachment_view_type attachment_type; + VkExtent3D extent; +}; + +struct anv_color_attachment_view { + struct anv_attachment_view base; + struct anv_surface_view view; +}; + +struct anv_depth_stencil_view { + struct anv_attachment_view base; + + struct anv_bo *bo; + + uint32_t depth_offset; /**< Offset into bo. */ + uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ + uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ + + uint32_t stencil_offset; /**< Offset into bo. */ + uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ + uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ +}; + +struct anv_image_create_info { + const VkImageCreateInfo *vk_info; + bool force_tile_mode; + uint8_t tile_mode; +}; + +VkResult anv_image_create(VkDevice _device, + const struct anv_image_create_info *info, + VkImage *pImage); + +void anv_image_view_init(struct anv_image_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + +void anv_color_attachment_view_init(struct anv_color_attachment_view *view, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); +void anv_fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range); + +void anv_surface_view_fini(struct anv_device *device, + struct anv_surface_view *view); + +struct anv_sampler { + uint32_t state[4]; +}; + +struct anv_framebuffer { + uint32_t width; + uint32_t height; + uint32_t layers; + + /* Viewport for clears */ + VkDynamicViewportState vp_state; + + uint32_t attachment_count; + const struct anv_attachment_view * attachments[0]; +}; + +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; +}; + +struct anv_render_pass_attachment { + VkFormat format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; +}; + +struct anv_render_pass { + uint32_t attachment_count; + uint32_t subpass_count; + + struct anv_render_pass_attachment * attachments; + struct anv_subpass subpasses[0]; +}; + +void anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); + +void *anv_lookup_entrypoint(const char *name); + +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle.handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) { .handle = (uint64_t) _obj }; \ + } + +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = __anv_type ## _from_handle(__handle) + +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCmdBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) +ANV_DEFINE_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI); + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader, VkShader) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c new file mode 100644 index 00000000000..d7903ee2cb8 --- /dev/null +++ b/src/vulkan/anv_query.c @@ -0,0 +1,352 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + VkQueryPool* pQueryPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_query_pool *pool; + VkResult result; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_UNSUPPORTED; + default: + unreachable(""); + } + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); + + *pQueryPool = anv_query_pool_to_handle(pool); + + return VK_SUCCESS; + + fail: + anv_device_free(device, pool); + + return result; +} + +VkResult anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); + + return VK_SUCCESS; +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + size_t* pDataSize, + void* pData, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + struct anv_query_pool_slot *slot = pool->bo.map; + int64_t timeout = INT64_MAX; + uint32_t *dst32 = pData; + uint64_t *dst64 = pData; + uint64_t result; + int ret; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return VK_UNSUPPORTED; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + if (flags & VK_QUERY_RESULT_64_BIT) + *pDataSize = queryCount * sizeof(uint64_t); + else + *pDataSize = queryCount * sizeof(uint32_t); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < queryCount; i++) { + result = slot[startQuery + i].end - slot[startQuery + i].begin; + if (flags & VK_QUERY_RESULT_64_BIT) { + *dst64++ = result; + } else { + if (result > UINT32_MAX) + result = UINT32_MAX; + *dst32++ = result; + } + } + + return VK_SUCCESS; +} + +static void +anv_batch_emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void anv_CmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void anv_CmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void anv_CmdResetQueryPool( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount) +{ + stub(); +} + +#define TIMESTAMP 0x2358 + +void anv_CmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + struct anv_bo *bo = buffer->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void anv_CmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +} diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c new file mode 100644 index 00000000000..820356675c7 --- /dev/null +++ b/src/vulkan/anv_util.c @@ -0,0 +1,151 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void anv_printflike(3, 4) +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(is_power_of_two(size)); + assert(element_size < size && is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = align_u32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c new file mode 100644 index 00000000000..9ffce8d8cbf --- /dev/null +++ b/src/vulkan/anv_x11.c @@ -0,0 +1,299 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include +#include +#include + +static const VkFormat formats[] = { + VK_FORMAT_B5G6R5_UNORM, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, +}; + +VkResult anv_GetDisplayInfoWSI( + VkDisplayWSI display, + VkDisplayInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + VkDisplayFormatPropertiesWSI *properties = pData; + size_t size; + + if (pDataSize == NULL) + return VK_ERROR_INVALID_POINTER; + + switch (infoType) { + case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: + size = sizeof(properties[0]) * ARRAY_SIZE(formats); + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) + return vk_error(VK_ERROR_INVALID_VALUE); + + *pDataSize = size; + + for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) + properties[i].swapChainFormat = formats[i]; + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +struct anv_swap_chain { + struct anv_device * device; + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t count; + struct { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + } images[0]; +}; + +VkResult anv_CreateSwapChainWSI( + VkDevice _device, + const VkSwapChainCreateInfoWSI* pCreateInfo, + VkSwapChainWSI* pSwapChain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + struct anv_swap_chain *chain; + xcb_void_cookie_t cookie; + VkResult result; + size_t size; + int ret; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + + size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); + chain = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->device = device; + chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; + chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; + chain->count = pCreateInfo->imageCount; + chain->extent = pCreateInfo->imageExtent; + + for (uint32_t i = 0; i < chain->count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .force_tile_mode = true, + .tile_mode = XMAJOR, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + &image_h); + + image = anv_image_from_handle(image_h); + surface = &image->primary_surface; + + anv_AllocMemory(_device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->stride, I915_TILING_X); + if (ret) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->stride, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + image->swap_chain = chain; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *pSwapChain = anv_swap_chain_to_handle(chain); + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult anv_DestroySwapChainWSI( + VkSwapChainWSI _chain) +{ + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + + anv_device_free(chain->device, chain); + + return VK_SUCCESS; +} + +VkResult anv_GetSwapChainInfoWSI( + VkSwapChainWSI _chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + + VkSwapChainImageInfoWSI *images; + size_t size; + + switch (infoType) { + case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: + size = sizeof(*images) * chain->count; + if (pData && *pDataSize < size) + return VK_ERROR_INVALID_VALUE; + + *pDataSize = size; + if (!pData) + return VK_SUCCESS; + + images = pData; + for (uint32_t i = 0; i < chain->count; i++) { + images[i].image = anv_image_to_handle(chain->images[i].image); + images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); + } + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +VkResult anv_QueuePresentWSI( + VkQueue queue_, + const VkPresentInfoWSI* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); + + struct anv_swap_chain *chain = image->swap_chain; + xcb_void_cookie_t cookie; + xcb_pixmap_t pixmap; + + assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); + + if (chain == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + pixmap = XCB_NONE; + for (uint32_t i = 0; i < chain->count; i++) { + if (image == chain->images[i].image) { + pixmap = chain->images[i].pixmap; + break; + } + } + + if (pixmap == XCB_NONE) + return vk_error(VK_ERROR_INVALID_VALUE); + + cookie = xcb_copy_area(chain->conn, + pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c deleted file mode 100644 index c33defd48f5..00000000000 --- a/src/vulkan/aub.c +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "private.h" -#include "aub.h" - -struct anv_aub_writer { - FILE *file; - uint32_t offset; - int gen; -}; - -static void -aub_out(struct anv_aub_writer *writer, uint32_t data) -{ - fwrite(&data, 1, 4, writer->file); -} - -static void -aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) -{ - fwrite(data, 1, size, writer->file); -} - -static struct anv_aub_writer * -get_anv_aub_writer(struct anv_device *device) -{ - struct anv_aub_writer *writer = device->aub_writer; - int entry = 0x200003; - int i; - int gtt_size = 0x10000; - const char *filename; - - if (geteuid() != getuid()) - return NULL; - - if (writer) - return writer; - - writer = malloc(sizeof(*writer)); - if (writer == NULL) - return NULL; - - filename = "intel.aub"; - writer->gen = device->info.gen; - writer->file = fopen(filename, "w+"); - if (!writer->file) { - free(writer); - return NULL; - } - - /* Start allocating objects from just after the GTT. */ - writer->offset = gtt_size; - - /* Start with a (required) version packet. */ - aub_out(writer, CMD_AUB_HEADER | (13 - 2)); - aub_out(writer, - (4 << AUB_HEADER_MAJOR_SHIFT) | - (0 << AUB_HEADER_MINOR_SHIFT)); - for (i = 0; i < 8; i++) { - aub_out(writer, 0); /* app name */ - } - aub_out(writer, 0); /* timestamp */ - aub_out(writer, 0); /* timestamp */ - aub_out(writer, 0); /* comment len */ - - /* Set up the GTT. The max we can handle is 256M */ - aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); - aub_out(writer, - AUB_TRACE_MEMTYPE_GTT_ENTRY | - AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); - aub_out(writer, 0); /* subtype */ - aub_out(writer, 0); /* offset */ - aub_out(writer, gtt_size); /* size */ - if (writer->gen >= 8) - aub_out(writer, 0); - for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { - aub_out(writer, entry); - } - - return device->aub_writer = writer; -} - -void -anv_aub_writer_destroy(struct anv_aub_writer *writer) -{ - fclose(writer->file); - free(writer); -} - - -/** - * Break up large objects into multiple writes. Otherwise a 128kb VBO - * would overflow the 16 bits of size field in the packet header and - * everything goes badly after that. - */ -static void -aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, - void *virtual, uint32_t size, uint32_t gtt_offset) -{ - uint32_t block_size; - uint32_t offset; - uint32_t subtype = 0; - static const char null_block[8 * 4096]; - - for (offset = 0; offset < size; offset += block_size) { - block_size = size - offset; - - if (block_size > 8 * 4096) - block_size = 8 * 4096; - - aub_out(writer, - CMD_AUB_TRACE_HEADER_BLOCK | - ((writer->gen >= 8 ? 6 : 5) - 2)); - aub_out(writer, - AUB_TRACE_MEMTYPE_GTT | - type | AUB_TRACE_OP_DATA_WRITE); - aub_out(writer, subtype); - aub_out(writer, gtt_offset + offset); - aub_out(writer, align_u32(block_size, 4)); - if (writer->gen >= 8) - aub_out(writer, 0); - - if (virtual) - aub_out_data(writer, (char *) virtual + offset, block_size); - else - aub_out_data(writer, null_block, block_size); - - /* Pad to a multiple of 4 bytes. */ - aub_out_data(writer, null_block, -block_size & 3); - } -} - -/* - * Make a ringbuffer on fly and dump it - */ -static void -aub_build_dump_ringbuffer(struct anv_aub_writer *writer, - uint32_t batch_offset, uint32_t offset, - int ring_flag) -{ - uint32_t ringbuffer[4096]; - int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ - int ring_count = 0; - - if (ring_flag == I915_EXEC_BSD) - ring = AUB_TRACE_TYPE_RING_PRB1; - else if (ring_flag == I915_EXEC_BLT) - ring = AUB_TRACE_TYPE_RING_PRB2; - - /* Make a ring buffer to execute our batchbuffer. */ - memset(ringbuffer, 0, sizeof(ringbuffer)); - if (writer->gen >= 8) { - ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); - ringbuffer[ring_count++] = batch_offset; - ringbuffer[ring_count++] = 0; - } else { - ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; - ringbuffer[ring_count++] = batch_offset; - } - - /* Write out the ring. This appears to trigger execution of - * the ring in the simulator. - */ - aub_out(writer, - CMD_AUB_TRACE_HEADER_BLOCK | - ((writer->gen >= 8 ? 6 : 5) - 2)); - aub_out(writer, - AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); - aub_out(writer, 0); /* general/surface subtype */ - aub_out(writer, offset); - aub_out(writer, ring_count * 4); - if (writer->gen >= 8) - aub_out(writer, 0); - - /* FIXME: Need some flush operations here? */ - aub_out_data(writer, ringbuffer, ring_count * 4); -} - -struct aub_bo { - uint32_t offset; - void *map; - void *relocated; -}; - -static void -relocate_bo(struct anv_bo *bo, struct drm_i915_gem_relocation_entry *relocs, - size_t num_relocs, struct aub_bo *bos) -{ - struct aub_bo *aub_bo = &bos[bo->index]; - struct drm_i915_gem_relocation_entry *reloc; - uint32_t *dw; - - aub_bo->relocated = malloc(bo->size); - memcpy(aub_bo->relocated, aub_bo->map, bo->size); - for (size_t i = 0; i < num_relocs; i++) { - reloc = &relocs[i]; - assert(reloc->offset < bo->size); - dw = aub_bo->relocated + reloc->offset; - *dw = bos[reloc->target_handle].offset + reloc->delta; - } -} - -void -anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_aub_writer *writer; - struct anv_bo *bo; - uint32_t ring_flag = 0; - uint32_t offset; - struct aub_bo *aub_bos; - - writer = get_anv_aub_writer(device); - if (writer == NULL) - return; - - aub_bos = malloc(cmd_buffer->exec2_bo_count * sizeof(aub_bos[0])); - offset = writer->offset; - for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { - bo = cmd_buffer->exec2_bos[i]; - if (bo->map) - aub_bos[i].map = bo->map; - else - aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); - aub_bos[i].relocated = aub_bos[i].map; - aub_bos[i].offset = offset; - offset = align_u32(offset + bo->size + 4095, 4096); - } - - struct anv_batch_bo *first_bbo; - for (struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; - bbo != NULL; bbo = bbo->prev_batch_bo) { - /* Keep stashing the current BO until we get to the beginning */ - first_bbo = bbo; - - /* Handle relocations for this batch BO */ - relocate_bo(&bbo->bo, &batch->relocs.relocs[bbo->first_reloc], - bbo->num_relocs, aub_bos); - } - assert(first_bbo->prev_batch_bo == NULL); - - for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; - bbo != NULL; bbo = bbo->prev_batch_bo) { - - /* Handle relocations for this surface state BO */ - relocate_bo(&bbo->bo, - &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], - bbo->num_relocs, aub_bos); - } - - for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { - bo = cmd_buffer->exec2_bos[i]; - if (i == cmd_buffer->exec2_bo_count - 1) { - assert(bo == &first_bbo->bo); - aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, - aub_bos[i].relocated, - first_bbo->length, aub_bos[i].offset); - } else { - aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, - aub_bos[i].relocated, - bo->size, aub_bos[i].offset); - } - if (aub_bos[i].relocated != aub_bos[i].map) - free(aub_bos[i].relocated); - if (aub_bos[i].map != bo->map) - anv_gem_munmap(aub_bos[i].map, bo->size); - } - - /* Dump ring buffer */ - aub_build_dump_ringbuffer(writer, aub_bos[first_bbo->bo.index].offset, - offset, ring_flag); - - free(aub_bos); - - fflush(writer->file); -} diff --git a/src/vulkan/aub.h b/src/vulkan/aub.h deleted file mode 100644 index 7a67712ff9c..00000000000 --- a/src/vulkan/aub.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** @file intel_aub.h - * - * The AUB file is a file format used by Intel's internal simulation - * and other validation tools. It can be used at various levels by a - * driver to input state to the simulated hardware or a replaying - * debugger. - * - * We choose to dump AUB files using the trace block format for ease - * of implementation -- dump out the blocks of memory as plain blobs - * and insert ring commands to execute the batchbuffer blob. - */ - -#ifndef _INTEL_AUB_H -#define _INTEL_AUB_H - -#define AUB_MI_NOOP (0) -#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) -#define AUB_PIPE_CONTROL (0x7a000002) - -/* DW0: instruction type. */ - -#define CMD_AUB (7 << 29) - -#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) -/* DW1 */ -# define AUB_HEADER_MAJOR_SHIFT 24 -# define AUB_HEADER_MINOR_SHIFT 16 - -#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) -#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) - -/* DW1 */ -#define AUB_TRACE_OPERATION_MASK 0x000000ff -#define AUB_TRACE_OP_COMMENT 0x00000000 -#define AUB_TRACE_OP_DATA_WRITE 0x00000001 -#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 -#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 -// operation = TRACE_DATA_WRITE, Type -#define AUB_TRACE_TYPE_MASK 0x0000ff00 -#define AUB_TRACE_TYPE_NOTYPE (0 << 8) -#define AUB_TRACE_TYPE_BATCH (1 << 8) -#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) -#define AUB_TRACE_TYPE_2D_MAP (6 << 8) -#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) -#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) -#define AUB_TRACE_TYPE_1D_MAP (10 << 8) -#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) -#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) -#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) -#define AUB_TRACE_TYPE_GENERAL (14 << 8) -#define AUB_TRACE_TYPE_SURFACE (15 << 8) - - -// operation = TRACE_COMMAND_WRITE, Type = -#define AUB_TRACE_TYPE_RING_HWB (1 << 8) -#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) -#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) -#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) - -// Address space -#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 -#define AUB_TRACE_MEMTYPE_GTT (0 << 16) -#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) -#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) -#define AUB_TRACE_MEMTYPE_PCI (3 << 16) -#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) - -/* DW2 */ - -/** - * aub_state_struct_type enum values are encoded with the top 16 bits - * representing the type to be delivered to the .aub file, and the bottom 16 - * bits representing the subtype. This macro performs the encoding. - */ -#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) - -enum aub_state_struct_type { - AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), - AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), - AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), - AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), - AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), - AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), - AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), - AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), - AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), - AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), - AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), - AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), - AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), - - AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), - AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), - AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), - - AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), - AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), - AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), - AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), - AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), -}; - -#undef ENCODE_SS_TYPE - -/** - * Decode a aub_state_struct_type value to determine the type that should be - * stored in the .aub file. - */ -static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) -{ - return (ss_type & 0xFFFF0000) >> 16; -} - -/** - * Decode a state_struct_type value to determine the subtype that should be - * stored in the .aub file. - */ -static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) -{ - return ss_type & 0xFFFF; -} - -/* DW3: address */ -/* DW4: len */ - -#endif /* _INTEL_AUB_H */ diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp deleted file mode 100644 index 9152de63ec9..00000000000 --- a/src/vulkan/compiler.cpp +++ /dev/null @@ -1,1209 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "private.h" - -#include -#include /* brw_new_shader_program is here */ -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -/* XXX: We need this to keep symbols in nir.h from conflicting with the - * generated GEN command packing headers. We need to fix *both* to not - * define something as generic as LOAD. - */ -#undef LOAD - -#include - -#define SPIR_V_MAGIC_NUMBER 0x07230203 - -static void -fail_if(int cond, const char *format, ...) -{ - va_list args; - - if (!cond) - return; - - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - - exit(1); -} - -static VkResult -set_binding_table_layout(struct brw_stage_prog_data *prog_data, - struct anv_pipeline *pipeline, uint32_t stage) -{ - uint32_t bias, count, k, *map; - struct anv_pipeline_layout *layout = pipeline->layout; - - /* No layout is valid for shaders that don't bind any resources. */ - if (pipeline->layout == NULL) - return VK_SUCCESS; - - if (stage == VK_SHADER_STAGE_FRAGMENT) - bias = MAX_RTS; - else - bias = 0; - - count = layout->stage[stage].surface_count; - prog_data->map_entries = - (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); - if (prog_data->map_entries == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - k = bias; - map = prog_data->map_entries; - for (uint32_t i = 0; i < layout->num_sets; i++) { - prog_data->bind_map[i].index = map; - for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) - *map++ = k++; - - prog_data->bind_map[i].index_count = - layout->set[i].layout->stage[stage].surface_count; - } - - return VK_SUCCESS; -} - -static void -brw_vs_populate_key(struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *prog = (struct gl_program *) vp; - - memset(key, 0, sizeof(*key)); - - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ - key->base.program_string_id = vp->id; - brw_setup_vue_key_clip_info(brw, &key->base, - vp->program.Base.UsesClipDistanceOut); - - /* _NEW_POLYGON */ - if (brw->gen < 6) { - key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - } - - if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | - VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { - /* _NEW_LIGHT | _NEW_BUFFERS */ - key->clamp_vertex_color = ctx->Light._ClampVertexColor; - } - - /* _NEW_POINT */ - if (brw->gen < 6 && ctx->Point.PointSprite) { - for (int i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - key->point_coord_replace |= (1 << i); - } - } - - /* _NEW_TEXTURE */ - brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, - &key->base.tex); -} - -static bool -really_do_vs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) -{ - GLuint program_size; - const GLuint *program; - struct brw_vs_compile c; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base; - void *mem_ctx; - struct gl_shader *vs = NULL; - - if (prog) - vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; - - memset(&c, 0, sizeof(c)); - memcpy(&c.key, key, sizeof(*key)); - memset(prog_data, 0, sizeof(*prog_data)); - - mem_ctx = ralloc_context(NULL); - - c.vp = vp; - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count; - if (vs) { - /* We add padding around uniform values below vec4 size, with the worst - * case being a float value that gets blown up to a vec4, so be - * conservative here. - */ - param_count = vs->num_uniform_components * 4; - - } else { - param_count = vp->program.Base.Parameters->NumParameters * 4; - } - /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip - * planes as uniforms. - */ - param_count += c.key.base.nr_userclip_plane_consts * 4; - - /* Setting nr_params here NOT to the size of the param and pull_param - * arrays, but to the number of uniform components vec4_visitor - * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. - */ - stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; - if (vs) { - stage_prog_data->nr_params += vs->num_samplers; - } - - GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; - prog_data->inputs_read = vp->program.Base.InputsRead; - - if (c.key.copy_edgeflag) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); - prog_data->inputs_read |= VERT_BIT_EDGEFLAG; - } - - if (brw->gen < 6) { - /* Put dummy slots into the VUE for the SF to put the replaced - * point sprite coords in. We shouldn't need these dummy slots, - * which take up precious URB space, but it would mean that the SF - * doesn't get nice aligned pairs of input coords into output - * coords, which would be a pain to handle. - */ - for (int i = 0; i < 8; i++) { - if (c.key.point_coord_replace & (1 << i)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); - } - - /* if back colors are written, allocate slots for front colors too */ - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); - } - - /* In order for legacy clipping to work, we need to populate the clip - * distance varying slots whenever clipping is enabled, even if the vertex - * shader doesn't write to gl_ClipDistance. - */ - if (c.key.base.userclip_active) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); - } - - brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data->base.vue_map, outputs_written); -\ - set_binding_table_layout(&prog_data->base.base, pipeline, - VK_SHADER_STAGE_VERTEX); - - /* Emit GEN4 code. - */ - program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - struct anv_state vs_state = anv_state_stream_alloc(&pipeline->program_stream, - program_size, 64); - memcpy(vs_state.map, program, program_size); - - pipeline->vs_simd8 = vs_state.offset; - - ralloc_free(mem_ctx); - - return true; -} - -void brw_wm_populate_key(struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - struct gl_program *prog = (struct gl_program *) brw->fragment_program; - GLuint lookup = 0; - GLuint line_aa; - bool program_uses_dfdy = fp->program.UsesDFdy; - struct gl_framebuffer draw_buffer; - bool multisample_fbo; - - memset(key, 0, sizeof(*key)); - - for (int i = 0; i < MAX_SAMPLERS; i++) { - /* Assume color sampler, no swizzling. */ - key->tex.swizzles[i] = SWIZZLE_XYZW; - } - - /* A non-zero framebuffer name indicates that the framebuffer was created by - * the user rather than the window system. */ - draw_buffer.Name = 1; - draw_buffer.Visual.samples = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer.Width = 400; - draw_buffer.Height = 400; - ctx->DrawBuffer = &draw_buffer; - - multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; - - /* Build the index for table lookup - */ - if (brw->gen < 6) { - /* _NEW_COLOR */ - if (fp->program.UsesKill || ctx->Color.AlphaEnabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) - lookup |= IZ_PS_COMPUTES_DEPTH_BIT; - - /* _NEW_DEPTH */ - if (ctx->Depth.Test) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; - - /* _NEW_STENCIL | _NEW_BUFFERS */ - if (ctx->Stencil._Enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - - if (ctx->Stencil.WriteMask[0] || - ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; - } - key->iz_lookup = lookup; - } - - line_aa = AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (ctx->Line.SmoothFlag) { - if (brw->reduced_primitive == GL_LINES) { - line_aa = AA_ALWAYS; - } - else if (brw->reduced_primitive == GL_TRIANGLES) { - if (ctx->Polygon.FrontMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if (ctx->Polygon.BackMode == GL_LINE || - (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_BACK)) - line_aa = AA_ALWAYS; - } - else if (ctx->Polygon.BackMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if ((ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT)) - line_aa = AA_ALWAYS; - } - } - } - - key->line_aa = line_aa; - - /* _NEW_HINT */ - key->high_quality_derivatives = - ctx->Hint.FragmentShaderDerivative == GL_NICEST; - - if (brw->gen < 6) - key->stats_wm = brw->stats_wm; - - /* _NEW_LIGHT */ - key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); - - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ - key->clamp_fragment_color = ctx->Color._ClampFragmentColor; - - /* _NEW_TEXTURE */ - brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, - &key->tex); - - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * This is only needed for the WM_WPOSXY opcode when the fragment program - * uses the gl_FragCoord input. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * For DRI2 the origin_x/y will always be (0,0) but we still need the - * drawable height in order to invert the Y axis. - */ - if (fp->program.Base.InputsRead & VARYING_BIT_POS) { - key->drawable_height = ctx->DrawBuffer->Height; - } - - if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { - key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - } - - /* _NEW_BUFFERS */ - key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; - - /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ - key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); - - /* _NEW_BUFFERS _NEW_MULTISAMPLE */ - /* Ignore sample qualifier while computing this flag. */ - key->persample_shading = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; - if (key->persample_shading) - key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; - - key->compute_pos_offset = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && - fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; - - key->compute_sample_id = - multisample_fbo && - ctx->Multisample.Enabled && - (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); - - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & - BRW_FS_VARYING_INPUT_MASK) > 16) - key->input_slots_valid = brw->vue_map_geom_out.slots_valid; - - - /* _NEW_COLOR | _NEW_BUFFERS */ - /* Pre-gen6, the hardware alpha test always used each render - * target's alpha to do alpha test, as opposed to render target 0's alpha - * like GL requires. Fix that by building the alpha test into the - * shader, and we'll skip enabling the fixed function alpha test. - */ - if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { - key->alpha_test_func = ctx->Color.AlphaFunc; - key->alpha_test_ref = ctx->Color.AlphaRef; - } - - /* The unique fragment program ID */ - key->program_string_id = fp->id; - - ctx->DrawBuffer = NULL; -} - -static uint8_t -computed_depth_mode(struct gl_fragment_program *fp) -{ - if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - switch (fp->FragDepthLayout) { - case FRAG_DEPTH_LAYOUT_NONE: - case FRAG_DEPTH_LAYOUT_ANY: - return BRW_PSCDEPTH_ON; - case FRAG_DEPTH_LAYOUT_GREATER: - return BRW_PSCDEPTH_ON_GE; - case FRAG_DEPTH_LAYOUT_LESS: - return BRW_PSCDEPTH_ON_LE; - case FRAG_DEPTH_LAYOUT_UNCHANGED: - return BRW_PSCDEPTH_OFF; - } - } - return BRW_PSCDEPTH_OFF; -} - -static bool -really_do_wm_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) -{ - struct gl_context *ctx = &brw->ctx; - void *mem_ctx = ralloc_context(NULL); - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; - struct gl_shader *fs = NULL; - unsigned int program_size; - const uint32_t *program; - - if (prog) - fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - memset(prog_data, 0, sizeof(*prog_data)); - - /* key->alpha_test_func means simulating alpha testing via discards, - * so the shader definitely kills pixels. - */ - prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; - - prog_data->computed_depth_mode = computed_depth_mode(&fp->program); - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count; - if (fs) { - param_count = fs->num_uniform_components; - } else { - param_count = fp->program.Base.Parameters->NumParameters * 4; - } - /* The backend also sometimes adds params for texture size. */ - param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; - prog_data->base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.nr_params = param_count; - - prog_data->barycentric_interp_modes = - brw_compute_barycentric_interp_modes(brw, key->flat_shade, - key->persample_shading, - &fp->program); - - set_binding_table_layout(&prog_data->base, pipeline, - VK_SHADER_STAGE_FRAGMENT); - /* This needs to come after shader time and pull constant entries, but we - * don't have those set up now, so just put it after the layout entries. - */ - prog_data->binding_table.render_target_start = 0; - - program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, - &fp->program, prog, &program_size); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - struct anv_state ps_state = anv_state_stream_alloc(&pipeline->program_stream, - program_size, 64); - memcpy(ps_state.map, program, program_size); - - if (prog_data->no_8) - pipeline->ps_simd8 = NO_KERNEL; - else - pipeline->ps_simd8 = ps_state.offset; - - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = ps_state.offset + prog_data->prog_offset_16; - } else { - pipeline->ps_simd16 = NO_KERNEL; - } - - ralloc_free(mem_ctx); - - return true; -} - -static void -brw_gs_populate_key(struct brw_context *brw, - struct anv_pipeline *pipeline, - struct brw_geometry_program *gp, - struct brw_gs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_stage_state *stage_state = &brw->gs.base; - struct gl_program *prog = &gp->program.Base; - - memset(key, 0, sizeof(*key)); - - key->base.program_string_id = gp->id; - brw_setup_vue_key_clip_info(brw, &key->base, - gp->program.Base.UsesClipDistanceOut); - - /* _NEW_TEXTURE */ - brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, - &key->base.tex); - - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - - /* BRW_NEW_VUE_MAP_VS */ - key->input_varyings = prog_data->base.vue_map.slots_valid; -} - -static bool -really_do_gs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_geometry_program *gp, - struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) -{ - struct brw_gs_compile_output output; - - /* FIXME: We pass the bind map to the compile in the output struct. Need - * something better. */ - set_binding_table_layout(&output.prog_data.base.base, - pipeline, VK_SHADER_STAGE_GEOMETRY); - - brw_compile_gs_prog(brw, prog, gp, key, &output); - - struct anv_state gs_state = anv_state_stream_alloc(&pipeline->program_stream, - output.program_size, 64); - memcpy(gs_state.map, output.program, output.program_size); - - pipeline->gs_vec4 = gs_state.offset; - pipeline->gs_vertex_count = gp->program.VerticesIn; - - ralloc_free(output.mem_ctx); - - return true; -} - -static bool -brw_codegen_cs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_compute_program *cp, - struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) -{ - struct gl_context *ctx = &brw->ctx; - const GLuint *program; - void *mem_ctx = ralloc_context(NULL); - GLuint program_size; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - assert (cs); - - memset(prog_data, 0, sizeof(*prog_data)); - - set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count = cs->num_uniform_components; - - /* The backend also sometimes adds params for texture size. */ - param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - prog_data->base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.nr_params = param_count; - - program = brw_cs_emit(brw, mem_ctx, key, prog_data, - &cp->program, prog, &program_size); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - fprintf(stderr, "\n"); - - struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream, - program_size, 64); - memcpy(cs_state.map, program, program_size); - - pipeline->cs_simd = cs_state.offset; - - ralloc_free(mem_ctx); - - return true; -} - -static void -brw_cs_populate_key(struct brw_context *brw, - struct brw_compute_program *bcp, struct brw_cs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* The unique compute program ID */ - key->program_string_id = bcp->id; -} - -static void -fail_on_compile_error(int status, const char *msg) -{ - int source, line, column; - char error[256]; - - if (status) - return; - - if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4) - fail_if(!status, "%d:%s\n", line, error); - else - fail_if(!status, "%s\n", msg); -} - -struct anv_compiler { - struct anv_device *device; - struct intel_screen *screen; - struct brw_context *brw; - struct gl_pipeline_object pipeline; -}; - -extern "C" { - -struct anv_compiler * -anv_compiler_create(struct anv_device *device) -{ - const struct brw_device_info *devinfo = &device->info; - struct anv_compiler *compiler; - struct gl_context *ctx; - - compiler = rzalloc(NULL, struct anv_compiler); - if (compiler == NULL) - return NULL; - - compiler->screen = rzalloc(compiler, struct intel_screen); - if (compiler->screen == NULL) - goto fail; - - compiler->brw = rzalloc(compiler, struct brw_context); - if (compiler->brw == NULL) - goto fail; - - compiler->device = device; - - compiler->brw->optionCache.info = NULL; - compiler->brw->bufmgr = NULL; - compiler->brw->gen = devinfo->gen; - compiler->brw->is_g4x = devinfo->is_g4x; - compiler->brw->is_baytrail = devinfo->is_baytrail; - compiler->brw->is_haswell = devinfo->is_haswell; - compiler->brw->is_cherryview = devinfo->is_cherryview; - - /* We need this at least for CS, which will check brw->max_cs_threads - * against the work group size. */ - compiler->brw->max_vs_threads = devinfo->max_vs_threads; - compiler->brw->max_hs_threads = devinfo->max_hs_threads; - compiler->brw->max_ds_threads = devinfo->max_ds_threads; - compiler->brw->max_gs_threads = devinfo->max_gs_threads; - compiler->brw->max_wm_threads = devinfo->max_wm_threads; - compiler->brw->max_cs_threads = devinfo->max_cs_threads; - compiler->brw->urb.size = devinfo->urb.size; - compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; - compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; - compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; - compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; - compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - - compiler->brw->intelScreen = compiler->screen; - compiler->screen->devinfo = &device->info; - - brw_process_intel_debug_variable(compiler->screen); - - compiler->screen->compiler = brw_compiler_create(compiler, &device->info); - - ctx = &compiler->brw->ctx; - _mesa_init_shader_object_functions(&ctx->Driver); - - _mesa_init_constants(&ctx->Const, API_OPENGL_CORE); - - brw_initialize_context_constants(compiler->brw); - - intelInitExtensions(ctx); - - /* Set dd::NewShader */ - brwInitFragProgFuncs(&ctx->Driver); - - ctx->_Shader = &compiler->pipeline; - - compiler->brw->precompile = false; - - return compiler; - - fail: - ralloc_free(compiler); - return NULL; -} - -void -anv_compiler_destroy(struct anv_compiler *compiler) -{ - _mesa_free_errors_data(&compiler->brw->ctx); - ralloc_free(compiler); -} - -/* From gen7_urb.c */ - -/* FIXME: Add to struct intel_device_info */ - -static const int gen8_push_size = 32 * 1024; - -static void -gen7_compute_urb_partition(struct anv_pipeline *pipeline) -{ - const struct brw_device_info *devinfo = &pipeline->device->info; - bool vs_present = pipeline->vs_simd8 != NO_KERNEL; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; - unsigned vs_entry_size_bytes = vs_size * 64; - bool gs_present = pipeline->gs_vec4 != NO_KERNEL; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; - unsigned gs_entry_size_bytes = gs_size * 64; - - /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): - * - * VS Number of URB Entries must be divisible by 8 if the VS URB Entry - * Allocation Size is less than 9 512-bit URB entries. - * - * Similar text exists for GS. - */ - unsigned vs_granularity = (vs_size < 9) ? 8 : 1; - unsigned gs_granularity = (gs_size < 9) ? 8 : 1; - - /* URB allocations must be done in 8k chunks. */ - unsigned chunk_size_bytes = 8192; - - /* Determine the size of the URB in chunks. */ - unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; - - /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; - unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - - /* Initially, assign each stage the minimum amount of URB space it needs, - * and make a note of how much additional space it "wants" (the amount of - * additional space it could actually make use of). - */ - - /* VS has a lower limit on the number of URB entries */ - unsigned vs_chunks = - ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - unsigned vs_wants = - ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - vs_chunks; - - unsigned gs_chunks = 0; - unsigned gs_wants = 0; - if (gs_present) { - /* There are two constraints on the minimum amount of URB space we can - * allocate: - * - * (1) We need room for at least 2 URB entries, since we always operate - * the GS in DUAL_OBJECT mode. - * - * (2) We can't allocate less than nr_gs_entries_granularity. - */ - gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - gs_wants = - ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - gs_chunks; - } - - /* There should always be enough URB space to satisfy the minimum - * requirements of each stage. - */ - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; - assert(total_needs <= urb_chunks); - - /* Mete out remaining space (if any) in proportion to "wants". */ - unsigned total_wants = vs_wants + gs_wants; - unsigned remaining_space = urb_chunks - total_needs; - if (remaining_space > total_wants) - remaining_space = total_wants; - if (remaining_space > 0) { - unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); - vs_chunks += vs_additional; - remaining_space -= vs_additional; - gs_chunks += remaining_space; - } - - /* Sanity check that we haven't over-allocated. */ - assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); - - /* Finally, compute the number of entries that can fit in the space - * allocated to each stage. - */ - unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; - unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; - - /* Since we rounded up when computing *_wants, this may be slightly more - * than the maximum allowed amount, so correct for that. - */ - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); - - /* Ensure that we program a multiple of the granularity. */ - nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); - nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); - - /* Finally, sanity check to make sure we have at least the minimum number - * of entries needed for each stage. - */ - assert(nr_vs_entries >= devinfo->urb.min_vs_entries); - if (gs_present) - assert(nr_gs_entries >= 2); - - /* Lay out the URB in the following order: - * - push constants - * - VS - * - GS - */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; - - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; -} - -static const struct { - uint32_t token; - gl_shader_stage stage; - const char *name; -} stage_info[] = { - { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, - { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, - { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, - { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, - { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, - { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, -}; - -struct spirv_header{ - uint32_t magic; - uint32_t version; - uint32_t gen_magic; -}; - -static const char * -src_as_glsl(const char *data) -{ - const struct spirv_header *as_spirv = (const struct spirv_header *)data; - - /* Check alignment */ - if ((intptr_t)data & 0x3) { - return data; - } - - if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) { - /* LunarG back-door */ - if (as_spirv->version == 0) - return data + 12; - else - return NULL; - } else { - return data; - } -} - -static void -anv_compile_shader_glsl(struct anv_compiler *compiler, - struct gl_shader_program *program, - struct anv_pipeline *pipeline, uint32_t stage) -{ - struct brw_context *brw = compiler->brw; - struct gl_shader *shader; - int name = 0; - - shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); - fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); - - shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data)); - _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); - fail_on_compile_error(shader->CompileStatus, shader->InfoLog); - - program->Shaders[program->NumShaders] = shader; - program->NumShaders++; -} - -static void -setup_nir_io(struct gl_program *prog, - nir_shader *shader) -{ - foreach_list_typed(nir_variable, var, node, &shader->inputs) { - prog->InputsRead |= BITFIELD64_BIT(var->data.location); - } - - foreach_list_typed(nir_variable, var, node, &shader->outputs) { - prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); - } -} - -static void -anv_compile_shader_spirv(struct anv_compiler *compiler, - struct gl_shader_program *program, - struct anv_pipeline *pipeline, uint32_t stage) -{ - struct brw_context *brw = compiler->brw; - struct anv_shader *shader = pipeline->shaders[stage]; - struct gl_shader *mesa_shader; - int name = 0; - - mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); - fail_if(mesa_shader == NULL, - "failed to create %s shader\n", stage_info[stage].name); - - switch (stage) { - case VK_SHADER_STAGE_VERTEX: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base; - break; - case VK_SHADER_STAGE_GEOMETRY: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base; - break; - case VK_SHADER_STAGE_FRAGMENT: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base; - break; - case VK_SHADER_STAGE_COMPUTE: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base; - break; - } - - mesa_shader->Program->Parameters = - rzalloc(mesa_shader, struct gl_program_parameter_list); - - mesa_shader->Type = stage_info[stage].token; - mesa_shader->Stage = stage_info[stage].stage; - - assert(shader->module->size % 4 == 0); - - struct gl_shader_compiler_options *glsl_options = - &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; - - mesa_shader->Program->nir = - spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4, - glsl_options->NirOptions); - nir_validate_shader(mesa_shader->Program->nir); - - brw_process_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, - NULL, mesa_shader->Stage); - - setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir); - - fail_if(mesa_shader->Program->nir == NULL, - "failed to translate SPIR-V to NIR\n"); - - program->Shaders[program->NumShaders] = mesa_shader; - program->NumShaders++; -} - -static void -add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, - struct brw_stage_prog_data *prog_data) -{ - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, - [VK_SHADER_STAGE_TESS_CONTROL] = 0, - [VK_SHADER_STAGE_TESS_EVALUATION] = 0, - [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, - [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, - [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, - }; - - pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= 1 << stage; - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; -} - -int -anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) -{ - struct gl_shader_program *program; - int name = 0; - struct brw_context *brw = compiler->brw; - - pipeline->writes_point_size = false; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - brw->use_rep_send = pipeline->use_repclear; - brw->no_simd8 = pipeline->use_repclear; - - program = brw->ctx.Driver.NewShaderProgram(name); - program->Shaders = (struct gl_shader **) - calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); - fail_if(program == NULL || program->Shaders == NULL, - "failed to create program\n"); - - bool all_spirv = true; - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i] == NULL) - continue; - - /* You need at least this much for "void main() { }" anyway */ - assert(pipeline->shaders[i]->module->size >= 12); - - if (src_as_glsl(pipeline->shaders[i]->module->data)) { - all_spirv = false; - break; - } - - assert(pipeline->shaders[i]->module->size % 4 == 0); - } - - if (all_spirv) { - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_spirv(compiler, program, pipeline, i); - } - - for (unsigned i = 0; i < program->NumShaders; i++) { - struct gl_shader *shader = program->Shaders[i]; - program->_LinkedShaders[shader->Stage] = shader; - } - } else { - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_glsl(compiler, program, pipeline, i); - } - - _mesa_glsl_link_shader(&brw->ctx, program); - fail_on_compile_error(program->LinkStatus, - program->InfoLog); - } - - bool success; - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { - struct brw_vs_prog_key vs_key; - struct gl_vertex_program *vp = (struct gl_vertex_program *) - program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; - struct brw_vertex_program *bvp = brw_vertex_program(vp); - - brw_vs_populate_key(brw, bvp, &vs_key); - - success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, - &pipeline->vs_prog_data.base.base); - - if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); - pipeline->vs_simd8 = NO_KERNEL; - } - - - if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { - struct brw_gs_prog_key gs_key; - struct gl_geometry_program *gp = (struct gl_geometry_program *) - program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; - struct brw_geometry_program *bgp = brw_geometry_program(gp); - - brw_gs_populate_key(brw, pipeline, bgp, &gs_key); - - success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); - fail_if(!success, "do_gs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, - &pipeline->gs_prog_data.base.base); - - if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - pipeline->gs_vec4 = NO_KERNEL; - } - - if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { - struct brw_wm_prog_key wm_key; - struct gl_fragment_program *fp = (struct gl_fragment_program *) - program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; - struct brw_fragment_program *bfp = brw_fragment_program(fp); - - brw_wm_populate_key(brw, bfp, &wm_key); - - success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, - &pipeline->wm_prog_data.base); - } - - if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { - struct brw_cs_prog_key cs_key; - struct gl_compute_program *cp = (struct gl_compute_program *) - program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; - struct brw_compute_program *bcp = brw_compute_program(cp); - - brw_cs_populate_key(brw, bcp, &cs_key); - - success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); - fail_if(!success, "brw_codegen_cs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, - &pipeline->cs_prog_data.base); - } - - /* XXX: Deleting the shader is broken with our current SPIR-V hacks. We - * need to fix this ASAP. - */ - if (!all_spirv) - brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); - - struct anv_device *device = compiler->device; - while (device->scratch_block_pool.bo.size < pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - - gen7_compute_urb_partition(pipeline); - - return 0; -} - -/* This badly named function frees the struct anv_pipeline data that the compiler - * allocates. Currently just the prog_data structs. - */ -void -anv_compiler_free(struct anv_pipeline *pipeline) -{ - for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { - if (pipeline->prog_data[stage]) { - free(pipeline->prog_data[stage]->map_entries); - ralloc_free(pipeline->prog_data[stage]->param); - ralloc_free(pipeline->prog_data[stage]->pull_param); - } - } -} - -} diff --git a/src/vulkan/device.c b/src/vulkan/device.c deleted file mode 100644 index 1073ab00ad4..00000000000 --- a/src/vulkan/device.c +++ /dev/null @@ -1,2390 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "private.h" -#include "mesa/main/git_sha1.h" - -static int -anv_env_get_int(const char *name) -{ - const char *val = getenv(name); - - if (!val) - return 0; - - return strtol(val, NULL, 0); -} - -static void -anv_physical_device_finish(struct anv_physical_device *device) -{ - if (device->fd >= 0) - close(device->fd); -} - -static VkResult -anv_physical_device_init(struct anv_physical_device *device, - struct anv_instance *instance, - const char *path) -{ - device->fd = open(path, O_RDWR | O_CLOEXEC); - if (device->fd < 0) - return vk_error(VK_ERROR_UNAVAILABLE); - - device->instance = instance; - device->path = path; - - device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE"); - device->no_hw = false; - if (device->chipset_id) { - /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ - device->no_hw = true; - } else { - device->chipset_id = anv_gem_get_param(device->fd, I915_PARAM_CHIPSET_ID); - } - if (!device->chipset_id) - goto fail; - - device->name = brw_get_device_name(device->chipset_id); - device->info = brw_get_device_info(device->chipset_id, -1); - if (!device->info) - goto fail; - - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_WAIT_TIMEOUT)) - goto fail; - - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXECBUF2)) - goto fail; - - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_LLC)) - goto fail; - - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXEC_CONSTANTS)) - goto fail; - - return VK_SUCCESS; - -fail: - anv_physical_device_finish(device); - return vk_error(VK_ERROR_UNAVAILABLE); -} - -static void *default_alloc( - void* pUserData, - size_t size, - size_t alignment, - VkSystemAllocType allocType) -{ - return malloc(size); -} - -static void default_free( - void* pUserData, - void* pMem) -{ - free(pMem); -} - -static const VkAllocCallbacks default_alloc_callbacks = { - .pUserData = NULL, - .pfnAlloc = default_alloc, - .pfnFree = default_free -}; - -VkResult anv_CreateInstance( - const VkInstanceCreateInfo* pCreateInfo, - VkInstance* pInstance) -{ - struct anv_instance *instance; - const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; - void *user_data = NULL; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - - if (pCreateInfo->pAllocCb) { - alloc_callbacks = pCreateInfo->pAllocCb; - user_data = pCreateInfo->pAllocCb->pUserData; - } - instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!instance) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - instance->pAllocUserData = alloc_callbacks->pUserData; - instance->pfnAlloc = alloc_callbacks->pfnAlloc; - instance->pfnFree = alloc_callbacks->pfnFree; - instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; - instance->physicalDeviceCount = 0; - - *pInstance = anv_instance_to_handle(instance); - - return VK_SUCCESS; -} - -VkResult anv_DestroyInstance( - VkInstance _instance) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - if (instance->physicalDeviceCount > 0) { - anv_physical_device_finish(&instance->physicalDevice); - } - - instance->pfnFree(instance->pAllocUserData, instance); - - return VK_SUCCESS; -} - -VkResult anv_EnumeratePhysicalDevices( - VkInstance _instance, - uint32_t* pPhysicalDeviceCount, - VkPhysicalDevice* pPhysicalDevices) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - VkResult result; - - if (instance->physicalDeviceCount == 0) { - result = anv_physical_device_init(&instance->physicalDevice, - instance, "/dev/dri/renderD128"); - if (result != VK_SUCCESS) - return result; - - instance->physicalDeviceCount = 1; - } - - /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; - * otherwise it's an inout parameter. - * - * The Vulkan spec (git aaed022) says: - * - * pPhysicalDeviceCount is a pointer to an unsigned integer variable - * that is initialized with the number of devices the application is - * prepared to receive handles to. pname:pPhysicalDevices is pointer to - * an array of at least this many VkPhysicalDevice handles [...]. - * - * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices - * overwrites the contents of the variable pointed to by - * pPhysicalDeviceCount with the number of physical devices in in the - * instance; otherwise, vkEnumeratePhysicalDevices overwrites - * pPhysicalDeviceCount with the number of physical handles written to - * pPhysicalDevices. - */ - if (!pPhysicalDevices) { - *pPhysicalDeviceCount = instance->physicalDeviceCount; - } else if (*pPhysicalDeviceCount >= 1) { - pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); - *pPhysicalDeviceCount = 1; - } else { - *pPhysicalDeviceCount = 0; - } - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceFeatures( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures* pFeatures) -{ - anv_finishme("Get correct values for PhysicalDeviceFeatures"); - - *pFeatures = (VkPhysicalDeviceFeatures) { - .robustBufferAccess = false, - .fullDrawIndexUint32 = false, - .imageCubeArray = false, - .independentBlend = false, - .geometryShader = true, - .tessellationShader = false, - .sampleRateShading = false, - .dualSourceBlend = true, - .logicOp = true, - .instancedDrawIndirect = true, - .depthClip = false, - .depthBiasClamp = false, - .fillModeNonSolid = true, - .depthBounds = false, - .wideLines = true, - .largePoints = true, - .textureCompressionETC2 = true, - .textureCompressionASTC_LDR = true, - .textureCompressionBC = true, - .pipelineStatisticsQuery = true, - .vertexSideEffects = false, - .tessellationSideEffects = false, - .geometrySideEffects = false, - .fragmentSideEffects = false, - .shaderTessellationPointSize = false, - .shaderGeometryPointSize = true, - .shaderTextureGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, - .shaderStorageBufferArrayConstantIndexing = false, - .shaderStorageImageArrayConstantIndexing = false, - .shaderUniformBufferArrayDynamicIndexing = true, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderFloat16 = false, - .shaderInt16 = false, - }; - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceLimits( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceLimits* pLimits) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - const struct brw_device_info *devinfo = physical_device->info; - - anv_finishme("Get correct values for PhysicalDeviceLimits"); - - *pLimits = (VkPhysicalDeviceLimits) { - .maxImageDimension1D = (1 << 14), - .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 10), - .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 10), - .maxTexelBufferSize = (1 << 14), - .maxUniformBufferSize = UINT32_MAX, - .maxStorageBufferSize = UINT32_MAX, - .maxPushConstantsSize = 128, - .maxMemoryAllocationCount = UINT32_MAX, - .bufferImageGranularity = 64, /* A cache line */ - .maxBoundDescriptorSets = MAX_SETS, - .maxDescriptorSets = UINT32_MAX, - .maxPerStageDescriptorSamplers = 64, - .maxPerStageDescriptorUniformBuffers = 64, - .maxPerStageDescriptorStorageBuffers = 64, - .maxPerStageDescriptorSampledImages = 64, - .maxPerStageDescriptorStorageImages = 64, - .maxDescriptorSetSamplers = 256, - .maxDescriptorSetUniformBuffers = 256, - .maxDescriptorSetStorageBuffers = 256, - .maxDescriptorSetSampledImages = 256, - .maxDescriptorSetStorageImages = 256, - .maxVertexInputAttributes = 32, - .maxVertexInputAttributeOffset = 256, - .maxVertexInputBindingStride = 256, - .maxVertexOutputComponents = 32, - .maxTessGenLevel = 0, - .maxTessPatchSize = 0, - .maxTessControlPerVertexInputComponents = 0, - .maxTessControlPerVertexOutputComponents = 0, - .maxTessControlPerPatchOutputComponents = 0, - .maxTessControlTotalOutputComponents = 0, - .maxTessEvaluationInputComponents = 0, - .maxTessEvaluationOutputComponents = 0, - .maxGeometryShaderInvocations = 6, - .maxGeometryInputComponents = 16, - .maxGeometryOutputComponents = 16, - .maxGeometryOutputVertices = 16, - .maxGeometryTotalOutputComponents = 16, - .maxFragmentInputComponents = 16, - .maxFragmentOutputBuffers = 8, - .maxFragmentDualSourceBuffers = 2, - .maxFragmentCombinedOutputResources = 8, - .maxComputeSharedMemorySize = 1024, - .maxComputeWorkGroupCount = { - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - }, - .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, - .maxComputeWorkGroupSize = { - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - }, - .subPixelPrecisionBits = 4 /* FIXME */, - .subTexelPrecisionBits = 4 /* FIXME */, - .mipmapPrecisionBits = 4 /* FIXME */, - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectInstanceCount = UINT32_MAX, - .primitiveRestartForPatches = UINT32_MAX, - .maxSamplerLodBias = 16, - .maxSamplerAnisotropy = 16, - .maxViewports = 16, - .maxDynamicViewportStates = UINT32_MAX, - .maxViewportDimensions = { (1 << 14), (1 << 14) }, - .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ - .viewportSubPixelBits = 13, /* We take a float? */ - .minMemoryMapAlignment = 64, /* A cache line */ - .minTexelBufferOffsetAlignment = 1, - .minUniformBufferOffsetAlignment = 1, - .minStorageBufferOffsetAlignment = 1, - .minTexelOffset = 0, /* FIXME */ - .maxTexelOffset = 0, /* FIXME */ - .minTexelGatherOffset = 0, /* FIXME */ - .maxTexelGatherOffset = 0, /* FIXME */ - .minInterpolationOffset = 0, /* FIXME */ - .maxInterpolationOffset = 0, /* FIXME */ - .subPixelInterpolationOffsetBits = 0, /* FIXME */ - .maxFramebufferWidth = (1 << 14), - .maxFramebufferHeight = (1 << 14), - .maxFramebufferLayers = (1 << 10), - .maxFramebufferColorSamples = 8, - .maxFramebufferDepthSamples = 8, - .maxFramebufferStencilSamples = 8, - .maxColorAttachments = MAX_RTS, - .maxSampledImageColorSamples = 8, - .maxSampledImageDepthSamples = 8, - .maxSampledImageIntegerSamples = 1, - .maxStorageImageSamples = 1, - .maxSampleMaskWords = 1, - .timestampFrequency = 1000 * 1000 * 1000 / 80, - .maxClipDistances = 0 /* FIXME */, - .maxCullDistances = 0 /* FIXME */, - .maxCombinedClipAndCullDistances = 0 /* FIXME */, - .pointSizeRange = { 0.125, 255.875 }, - .lineWidthRange = { 0.0, 7.9921875 }, - .pointSizeGranularity = (1.0 / 8.0), - .lineWidthGranularity = (1.0 / 128.0), - }; - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - - *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = 1, - .driverVersion = 1, - .vendorId = 0x8086, - .deviceId = pdevice->chipset_id, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - }; - - strcpy(pProperties->deviceName, pdevice->name); - snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH, - "anv-%s", MESA_GIT_SHA1 + 4); - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceQueueCount( - VkPhysicalDevice physicalDevice, - uint32_t* pCount) -{ - *pCount = 1; - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceQueueProperties( - VkPhysicalDevice physicalDevice, - uint32_t count, - VkPhysicalDeviceQueueProperties* pQueueProperties) -{ - assert(count == 1); - - *pQueueProperties = (VkPhysicalDeviceQueueProperties) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_DMA_BIT, - .queueCount = 1, - .supportsTimestamps = true, - }; - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceMemoryProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties* pMemoryProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - size_t aperture_size; - size_t heap_size; - - if (anv_gem_get_aperture(physical_device, &aperture_size) == -1) - return vk_error(VK_ERROR_UNAVAILABLE); - - /* Reserve some wiggle room for the driver by exposing only 75% of the - * aperture to the heap. - */ - heap_size = 3 * aperture_size / 4; - - /* The property flags below are valid only for llc platforms. */ - pMemoryProperties->memoryTypeCount = 1; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - .heapIndex = 1, - }; - - pMemoryProperties->memoryHeapCount = 1; - pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { - .size = heap_size, - .flags = VK_MEMORY_HEAP_HOST_LOCAL, - }; - - return VK_SUCCESS; -} - -PFN_vkVoidFunction anv_GetInstanceProcAddr( - VkInstance instance, - const char* pName) -{ - return anv_lookup_entrypoint(pName); -} - -PFN_vkVoidFunction anv_GetDeviceProcAddr( - VkDevice device, - const char* pName) -{ - return anv_lookup_entrypoint(pName); -} - -static void -parse_debug_flags(struct anv_device *device) -{ - const char *debug, *p, *end; - - debug = getenv("INTEL_DEBUG"); - device->dump_aub = false; - if (debug) { - for (p = debug; *p; p = end + 1) { - end = strchrnul(p, ','); - if (end - p == 3 && memcmp(p, "aub", 3) == 0) - device->dump_aub = true; - if (end - p == 5 && memcmp(p, "no_hw", 5) == 0) - device->no_hw = true; - if (*end == '\0') - break; - } - } -} - -static VkResult -anv_queue_init(struct anv_device *device, struct anv_queue *queue) -{ - queue->device = device; - queue->pool = &device->surface_state_pool; - - queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); - if (queue->completed_serial.map == NULL) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - - *(uint32_t *)queue->completed_serial.map = 0; - queue->next_serial = 1; - - return VK_SUCCESS; -} - -static void -anv_queue_finish(struct anv_queue *queue) -{ -#ifdef HAVE_VALGRIND - /* This gets torn down with the device so we only need to do this if - * valgrind is present. - */ - anv_state_pool_free(queue->pool, queue->completed_serial); -#endif -} - -static void -anv_device_init_border_colors(struct anv_device *device) -{ - static const VkClearColorValue border_colors[] = { - [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } }, - [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } }, - [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } }, - [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } }, - }; - - device->border_colors = - anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(border_colors), 32); - memcpy(device->border_colors.map, border_colors, sizeof(border_colors)); -} - -static const uint32_t BATCH_SIZE = 8192; - -VkResult anv_CreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - VkDevice* pDevice) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - struct anv_instance *instance = physical_device->instance; - struct anv_device *device; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - - device = instance->pfnAlloc(instance->pAllocUserData, - sizeof(*device), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!device) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - device->no_hw = physical_device->no_hw; - parse_debug_flags(device); - - device->instance = physical_device->instance; - - /* XXX(chadv): Can we dup() physicalDevice->fd here? */ - device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); - if (device->fd == -1) - goto fail_device; - - device->context_id = anv_gem_create_context(device); - if (device->context_id == -1) - goto fail_fd; - - anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE); - - anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); - - anv_state_pool_init(&device->dynamic_state_pool, - &device->dynamic_state_block_pool); - - anv_block_pool_init(&device->instruction_block_pool, device, 2048); - anv_block_pool_init(&device->surface_state_block_pool, device, 2048); - - anv_state_pool_init(&device->surface_state_pool, - &device->surface_state_block_pool); - - anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); - - device->info = *physical_device->info; - - device->compiler = anv_compiler_create(device); - device->aub_writer = NULL; - - pthread_mutex_init(&device->mutex, NULL); - - anv_queue_init(device, &device->queue); - - anv_device_init_meta(device); - - anv_device_init_border_colors(device); - - *pDevice = anv_device_to_handle(device); - - return VK_SUCCESS; - - fail_fd: - close(device->fd); - fail_device: - anv_device_free(device, device); - - return vk_error(VK_ERROR_UNAVAILABLE); -} - -VkResult anv_DestroyDevice( - VkDevice _device) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_compiler_destroy(device->compiler); - - anv_queue_finish(&device->queue); - - anv_device_finish_meta(device); - -#ifdef HAVE_VALGRIND - /* We only need to free these to prevent valgrind errors. The backing - * BO will go away in a couple of lines so we don't actually leak. - */ - anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); -#endif - - anv_bo_pool_finish(&device->batch_bo_pool); - anv_block_pool_finish(&device->dynamic_state_block_pool); - anv_block_pool_finish(&device->instruction_block_pool); - anv_block_pool_finish(&device->surface_state_block_pool); - - close(device->fd); - - if (device->aub_writer) - anv_aub_writer_destroy(device->aub_writer); - - anv_device_free(device, device); - - return VK_SUCCESS; -} - -static const VkExtensionProperties global_extensions[] = { - { - .extName = "VK_WSI_LunarG", - .specVersion = 3 - } -}; - -VkResult anv_GetGlobalExtensionProperties( - const char* pLayerName, - uint32_t* pCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pCount = ARRAY_SIZE(global_extensions); - return VK_SUCCESS; - } - - assert(*pCount < ARRAY_SIZE(global_extensions)); - - *pCount = ARRAY_SIZE(global_extensions); - memcpy(pProperties, global_extensions, sizeof(global_extensions)); - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_INVALID_EXTENSION); -} - -VkResult anv_GetGlobalLayerProperties( - uint32_t* pCount, - VkLayerProperties* pProperties) -{ - if (pProperties == NULL) { - *pCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_INVALID_LAYER); -} - -VkResult anv_GetPhysicalDeviceLayerProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, - VkLayerProperties* pProperties) -{ - if (pProperties == NULL) { - *pCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_INVALID_LAYER); -} - -VkResult anv_GetDeviceQueue( - VkDevice _device, - uint32_t queueNodeIndex, - uint32_t queueIndex, - VkQueue* pQueue) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(queueIndex == 0); - - *pQueue = anv_queue_to_handle(&device->queue); - - return VK_SUCCESS; -} - -VkResult anv_QueueSubmit( - VkQueue _queue, - uint32_t cmdBufferCount, - const VkCmdBuffer* pCmdBuffers, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - struct anv_device *device = queue->device; - int ret; - - for (uint32_t i = 0; i < cmdBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]); - - if (device->dump_aub) - anv_cmd_buffer_dump(cmd_buffer); - - if (!device->no_hw) { - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); - if (ret != 0) - return vk_error(VK_ERROR_UNKNOWN); - - if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) - return vk_error(VK_ERROR_UNKNOWN); - } - - for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) - cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; - } else { - *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; - } - } - - return VK_SUCCESS; -} - -VkResult anv_QueueWaitIdle( - VkQueue _queue) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - - return vkDeviceWaitIdle(anv_device_to_handle(queue->device)); -} - -VkResult anv_DeviceWaitIdle( - VkDevice _device) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_state state; - struct anv_batch batch; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo *bo = NULL; - VkResult result; - int64_t timeout; - int ret; - - state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); - bo = &device->dynamic_state_pool.block_pool->bo; - batch.start = batch.next = state.map; - batch.end = state.map + 32; - anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN8_MI_NOOP); - - exec2_objects[0].handle = bo->gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo->offset; - exec2_objects[0].flags = 0; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = state.offset; - execbuf.batch_len = batch.next - state.map; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - if (!device->no_hw) { - ret = anv_gem_execbuffer(device, &execbuf); - if (ret != 0) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } - - timeout = INT64_MAX; - ret = anv_gem_wait(device, bo->gem_handle, &timeout); - if (ret != 0) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } - } - - anv_state_pool_free(&device->dynamic_state_pool, state); - - return VK_SUCCESS; - - fail: - anv_state_pool_free(&device->dynamic_state_pool, state); - - return result; -} - -void * -anv_device_alloc(struct anv_device * device, - size_t size, - size_t alignment, - VkSystemAllocType allocType) -{ - return device->instance->pfnAlloc(device->instance->pAllocUserData, - size, - alignment, - allocType); -} - -void -anv_device_free(struct anv_device * device, - void * mem) -{ - if (mem == NULL) - return; - - return device->instance->pfnFree(device->instance->pAllocUserData, - mem); -} - -VkResult -anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) -{ - bo->gem_handle = anv_gem_create(device, size); - if (!bo->gem_handle) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - - bo->map = NULL; - bo->index = 0; - bo->offset = 0; - bo->size = size; - - return VK_SUCCESS; -} - -VkResult anv_AllocMemory( - VkDevice _device, - const VkMemoryAllocInfo* pAllocInfo, - VkDeviceMemory* pMem) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_device_memory *mem; - VkResult result; - - assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); - - if (pAllocInfo->memoryTypeIndex != 0) { - /* We support exactly one memory heap. */ - return vk_error(VK_ERROR_INVALID_VALUE); - } - - /* FINISHME: Fail if allocation request exceeds heap size. */ - - mem = anv_device_alloc(device, sizeof(*mem), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (mem == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize); - if (result != VK_SUCCESS) - goto fail; - - *pMem = anv_device_memory_to_handle(mem); - - return VK_SUCCESS; - - fail: - anv_device_free(device, mem); - - return result; -} - -VkResult anv_FreeMemory( - VkDevice _device, - VkDeviceMemory _mem) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - - if (mem->bo.map) - anv_gem_munmap(mem->bo.map, mem->bo.size); - - if (mem->bo.gem_handle != 0) - anv_gem_close(device, mem->bo.gem_handle); - - anv_device_free(device, mem); - - return VK_SUCCESS; -} - -VkResult anv_MapMemory( - VkDevice _device, - VkDeviceMemory _mem, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void** ppData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - - /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only - * takes a VkDeviceMemory pointer, it seems like only one map of the memory - * at a time is valid. We could just mmap up front and return an offset - * pointer here, but that may exhaust virtual memory on 32 bit - * userspace. */ - - mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size); - mem->map_size = size; - - *ppData = mem->map; - - return VK_SUCCESS; -} - -VkResult anv_UnmapMemory( - VkDevice _device, - VkDeviceMemory _mem) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - - anv_gem_munmap(mem->map, mem->map_size); - - return VK_SUCCESS; -} - -VkResult anv_FlushMappedMemoryRanges( - VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges) -{ - /* clflush here for !llc platforms */ - - return VK_SUCCESS; -} - -VkResult anv_InvalidateMappedMemoryRanges( - VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges) -{ - return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); -} - -VkResult anv_GetBufferMemoryRequirements( - VkDevice device, - VkBuffer _buffer, - VkMemoryRequirements* pMemoryRequirements) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - /* The Vulkan spec (git aaed022) says: - * - * memoryTypeBits is a bitfield and contains one bit set for every - * supported memory type for the resource. The bit `1<memoryTypeBits = 1; - - pMemoryRequirements->size = buffer->size; - pMemoryRequirements->alignment = 16; - - return VK_SUCCESS; -} - -VkResult anv_GetImageMemoryRequirements( - VkDevice device, - VkImage _image, - VkMemoryRequirements* pMemoryRequirements) -{ - ANV_FROM_HANDLE(anv_image, image, _image); - - /* The Vulkan spec (git aaed022) says: - * - * memoryTypeBits is a bitfield and contains one bit set for every - * supported memory type for the resource. The bit `1<memoryTypeBits = 1; - - pMemoryRequirements->size = image->size; - pMemoryRequirements->alignment = image->alignment; - - return VK_SUCCESS; -} - -VkResult anv_GetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t* pNumRequirements, - VkSparseImageMemoryRequirements* pSparseMemoryRequirements) -{ - return vk_error(VK_UNSUPPORTED); -} - -VkResult anv_GetDeviceMemoryCommitment( - VkDevice device, - VkDeviceMemory memory, - VkDeviceSize* pCommittedMemoryInBytes) -{ - *pCommittedMemoryInBytes = 0; - stub_return(VK_SUCCESS); -} - -VkResult anv_BindBufferMemory( - VkDevice device, - VkBuffer _buffer, - VkDeviceMemory _mem, - VkDeviceSize memOffset) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - buffer->bo = &mem->bo; - buffer->offset = memOffset; - - return VK_SUCCESS; -} - -VkResult anv_BindImageMemory( - VkDevice device, - VkImage _image, - VkDeviceMemory _mem, - VkDeviceSize memOffset) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - ANV_FROM_HANDLE(anv_image, image, _image); - - image->bo = &mem->bo; - image->offset = memOffset; - - return VK_SUCCESS; -} - -VkResult anv_QueueBindSparseBufferMemory( - VkQueue queue, - VkBuffer buffer, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueBindSparseImageOpaqueMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueBindSparseImageMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseImageMemoryBindInfo* pBindInfo) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_CreateFence( - VkDevice _device, - const VkFenceCreateInfo* pCreateInfo, - VkFence* pFence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_fence *fence; - struct anv_batch batch; - VkResult result; - - const uint32_t fence_size = 128; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - - fence = anv_device_alloc(device, sizeof(*fence), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (fence == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_init_new(&fence->bo, device, fence_size); - if (result != VK_SUCCESS) - goto fail; - - fence->bo.map = - anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); - batch.next = batch.start = fence->bo.map; - batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN8_MI_NOOP); - - fence->exec2_objects[0].handle = fence->bo.gem_handle; - fence->exec2_objects[0].relocation_count = 0; - fence->exec2_objects[0].relocs_ptr = 0; - fence->exec2_objects[0].alignment = 0; - fence->exec2_objects[0].offset = fence->bo.offset; - fence->exec2_objects[0].flags = 0; - fence->exec2_objects[0].rsvd1 = 0; - fence->exec2_objects[0].rsvd2 = 0; - - fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; - fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = 0; - fence->execbuf.batch_len = batch.next - fence->bo.map; - fence->execbuf.cliprects_ptr = 0; - fence->execbuf.num_cliprects = 0; - fence->execbuf.DR1 = 0; - fence->execbuf.DR4 = 0; - - fence->execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - fence->execbuf.rsvd1 = device->context_id; - fence->execbuf.rsvd2 = 0; - - *pFence = anv_fence_to_handle(fence); - - return VK_SUCCESS; - - fail: - anv_device_free(device, fence); - - return result; -} - -VkResult anv_DestroyFence( - VkDevice _device, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_device_free(device, fence); - - return VK_SUCCESS; -} - -VkResult anv_ResetFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences) -{ - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->ready = false; - } - - return VK_SUCCESS; -} - -VkResult anv_GetFenceStatus( - VkDevice _device, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - int64_t t = 0; - int ret; - - if (fence->ready) - return VK_SUCCESS; - - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == 0) { - fence->ready = true; - return VK_SUCCESS; - } - - return VK_NOT_READY; -} - -VkResult anv_WaitForFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t timeout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - int64_t t = timeout; - int ret; - - /* FIXME: handle !waitAll */ - - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) - return VK_TIMEOUT; - else if (ret == -1) - return vk_error(VK_ERROR_UNKNOWN); - } - - return VK_SUCCESS; -} - -// Queue semaphore functions - -VkResult anv_CreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - VkSemaphore* pSemaphore) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_DestroySemaphore( - VkDevice device, - VkSemaphore semaphore) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueSignalSemaphore( - VkQueue queue, - VkSemaphore semaphore) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueWaitSemaphore( - VkQueue queue, - VkSemaphore semaphore) -{ - stub_return(VK_UNSUPPORTED); -} - -// Event functions - -VkResult anv_CreateEvent( - VkDevice device, - const VkEventCreateInfo* pCreateInfo, - VkEvent* pEvent) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_DestroyEvent( - VkDevice device, - VkEvent event) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_GetEventStatus( - VkDevice device, - VkEvent event) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_SetEvent( - VkDevice device, - VkEvent event) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_ResetEvent( - VkDevice device, - VkEvent event) -{ - stub_return(VK_UNSUPPORTED); -} - -// Buffer functions - -VkResult anv_CreateBuffer( - VkDevice _device, - const VkBufferCreateInfo* pCreateInfo, - VkBuffer* pBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer *buffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); - - buffer = anv_device_alloc(device, sizeof(*buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - buffer->size = pCreateInfo->size; - buffer->bo = NULL; - buffer->offset = 0; - - *pBuffer = anv_buffer_to_handle(buffer); - - return VK_SUCCESS; -} - -VkResult anv_DestroyBuffer( - VkDevice _device, - VkBuffer _buffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - anv_device_free(device, buffer); - - return VK_SUCCESS; -} - -// Buffer view functions - -void -anv_fill_buffer_surface_state(void *state, VkFormat format, - uint32_t offset, uint32_t range) -{ - const struct anv_format *info; - - info = anv_format_for_vk_format(format); - /* This assumes RGBA float format. */ - uint32_t stride = 4; - uint32_t num_elements = range / stride; - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = info->surface_format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0.0, - .SurfaceQPitch = 0, - .Height = (num_elements >> 7) & 0x3fff, - .Width = num_elements & 0x7f, - .Depth = (num_elements >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .MinimumArrayElement = 0, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - .SurfaceMinLOD = 0, - .MIPCountLOD = 0, - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); -} - -VkResult anv_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *bview; - struct anv_surface_view *view; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); - - bview = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (bview == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - view = &bview->view; - view->bo = buffer->bo; - view->offset = buffer->offset + pCreateInfo->offset; - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - view->format = pCreateInfo->format; - view->range = pCreateInfo->range; - - anv_fill_buffer_surface_state(view->surface_state.map, - pCreateInfo->format, - view->offset, pCreateInfo->range); - - *pView = anv_buffer_view_to_handle(bview); - - return VK_SUCCESS; -} - -VkResult anv_DestroyBufferView( - VkDevice _device, - VkBufferView _bview) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); - - anv_surface_view_fini(device, &bview->view); - anv_device_free(device, bview); - - return VK_SUCCESS; -} - -// Sampler functions - -VkResult anv_CreateSampler( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - uint32_t mag_filter, min_filter, max_anisotropy; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_device_alloc(device, sizeof(*sampler), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - static const uint32_t vk_to_gen_tex_filter[] = { - [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, - }; - - static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, - }; - - if (pCreateInfo->maxAnisotropy > 1) { - mag_filter = MAPFILTER_ANISOTROPIC; - min_filter = MAPFILTER_ANISOTROPIC; - max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; - } else { - mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; - min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; - max_anisotropy = RATIO21; - } - - struct GEN8_SAMPLER_STATE sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = 0, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], - .MagModeFilter = mag_filter, - .MinModeFilter = min_filter, - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = 0, - - .IndirectStatePointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = max_anisotropy, - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = 0, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], - }; - - GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} - -VkResult anv_DestroySampler( - VkDevice _device, - VkSampler _sampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); - - anv_device_free(device, sampler); - - return VK_SUCCESS; -} - -// Descriptor set functions - -VkResult anv_CreateDescriptorSetLayout( - VkDevice _device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - VkDescriptorSetLayout* pSetLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_descriptor_set_layout *set_layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - - uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t num_dynamic_buffers = 0; - uint32_t count = 0; - uint32_t stages = 0; - uint32_t s; - - for (uint32_t i = 0; i < pCreateInfo->count; i++) { - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - sampler_count[s] += pCreateInfo->pBinding[i].arraySize; - break; - default: - break; - } - - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - surface_count[s] += pCreateInfo->pBinding[i].arraySize; - break; - default: - break; - } - - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize; - break; - default: - break; - } - - stages |= pCreateInfo->pBinding[i].stageFlags; - count += pCreateInfo->pBinding[i].arraySize; - } - - uint32_t sampler_total = 0; - uint32_t surface_total = 0; - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - sampler_total += sampler_count[s]; - surface_total += surface_count[s]; - } - - size_t size = sizeof(*set_layout) + - (sampler_total + surface_total) * sizeof(set_layout->entries[0]); - set_layout = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!set_layout) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - set_layout->num_dynamic_buffers = num_dynamic_buffers; - set_layout->count = count; - set_layout->shader_stages = stages; - - struct anv_descriptor_slot *p = set_layout->entries; - struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM]; - struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM]; - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - set_layout->stage[s].surface_count = surface_count[s]; - set_layout->stage[s].surface_start = surface[s] = p; - p += surface_count[s]; - set_layout->stage[s].sampler_count = sampler_count[s]; - set_layout->stage[s].sampler_start = sampler[s] = p; - p += sampler_count[s]; - } - - uint32_t descriptor = 0; - int8_t dynamic_slot = 0; - bool is_dynamic; - for (uint32_t i = 0; i < pCreateInfo->count; i++) { - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { - sampler[s]->index = descriptor + j; - sampler[s]->dynamic_slot = -1; - sampler[s]++; - } - break; - default: - break; - } - - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - is_dynamic = true; - break; - default: - is_dynamic = false; - break; - } - - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { - surface[s]->index = descriptor + j; - if (is_dynamic) - surface[s]->dynamic_slot = dynamic_slot + j; - else - surface[s]->dynamic_slot = -1; - surface[s]++; - } - break; - default: - break; - } - - if (is_dynamic) - dynamic_slot += pCreateInfo->pBinding[i].arraySize; - - descriptor += pCreateInfo->pBinding[i].arraySize; - } - - *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); - - return VK_SUCCESS; -} - -VkResult anv_DestroyDescriptorSetLayout( - VkDevice _device, - VkDescriptorSetLayout _set_layout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); - - anv_device_free(device, set_layout); - - return VK_SUCCESS; -} - -VkResult anv_CreateDescriptorPool( - VkDevice device, - VkDescriptorPoolUsage poolUsage, - uint32_t maxSets, - const VkDescriptorPoolCreateInfo* pCreateInfo, - VkDescriptorPool* pDescriptorPool) -{ - anv_finishme("VkDescriptorPool is a stub"); - pDescriptorPool->handle = 1; - return VK_SUCCESS; -} - -VkResult anv_DestroyDescriptorPool( - VkDevice _device, - VkDescriptorPool _pool) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); - return VK_SUCCESS; -} - -VkResult anv_ResetDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); - return VK_SUCCESS; -} - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set) -{ - struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); - - set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!set) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - - *out_set = set; - - return VK_SUCCESS; -} - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set) -{ - anv_device_free(device, set); -} - -VkResult anv_AllocDescriptorSets( - VkDevice _device, - VkDescriptorPool descriptorPool, - VkDescriptorSetUsage setUsage, - uint32_t count, - const VkDescriptorSetLayout* pSetLayouts, - VkDescriptorSet* pDescriptorSets, - uint32_t* pCount) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - VkResult result; - struct anv_descriptor_set *set; - - for (uint32_t i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); - - result = anv_descriptor_set_create(device, layout, &set); - if (result != VK_SUCCESS) { - *pCount = i; - return result; - } - - pDescriptorSets[i] = anv_descriptor_set_to_handle(set); - } - - *pCount = count; - - return VK_SUCCESS; -} - -VkResult anv_UpdateDescriptorSets( - VkDevice device, - uint32_t writeCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t copyCount, - const VkCopyDescriptorSet* pDescriptorCopies) -{ - for (uint32_t i = 0; i < writeCount; i++) { - const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; - ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); - - switch (write->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < write->count; j++) { - set->descriptors[write->destBinding + j].sampler = - anv_sampler_from_handle(write->pDescriptors[j].sampler); - } - - if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) - break; - - /* fallthrough */ - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (uint32_t j = 0; j < write->count; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pDescriptors[j].imageView); - set->descriptors[write->destBinding + j].view = &iview->view; - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - anv_finishme("texel buffers not implemented"); - break; - - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - anv_finishme("input attachments not implemented"); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->count; j++) { - ANV_FROM_HANDLE(anv_buffer_view, bview, - write->pDescriptors[j].bufferView); - set->descriptors[write->destBinding + j].view = &bview->view; - } - - default: - break; - } - } - - for (uint32_t i = 0; i < copyCount; i++) { - const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; - ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); - ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); - for (uint32_t j = 0; j < copy->count; j++) { - dest->descriptors[copy->destBinding + j] = - src->descriptors[copy->srcBinding + j]; - } - } - - return VK_SUCCESS; -} - -// State object functions - -static inline int64_t -clamp_int64(int64_t x, int64_t min, int64_t max) -{ - if (x < min) - return min; - else if (x < max) - return x; - else - return max; -} - -VkResult anv_CreateDynamicViewportState( - VkDevice _device, - const VkDynamicViewportStateCreateInfo* pCreateInfo, - VkDynamicViewportState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_vp_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - unsigned count = pCreateInfo->viewportAndScissorCount; - state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, - count * 64, 64); - state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool, - count * 8, 32); - state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool, - count * 32, 32); - - for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { - const VkViewport *vp = &pCreateInfo->pViewports[i]; - const VkRect2D *s = &pCreateInfo->pScissors[i]; - - struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { - .ViewportMatrixElementm00 = vp->width / 2, - .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, - .ViewportMatrixElementm30 = vp->originX + vp->width / 2, - .ViewportMatrixElementm31 = vp->originY + vp->height / 2, - .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, - .XMinClipGuardband = -1.0f, - .XMaxClipGuardband = 1.0f, - .YMinClipGuardband = -1.0f, - .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->originX, - .XMaxViewPort = vp->originX + vp->width - 1, - .YMinViewPort = vp->originY, - .YMaxViewPort = vp->originY + vp->height - 1, - }; - - struct GEN8_CC_VIEWPORT cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth - }; - - /* Since xmax and ymax are inclusive, we have to have xmax < xmin or - * ymax < ymin for empty clips. In case clip x, y, width height are all - * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't - * what we want. Just special case empty clips and produce a canonical - * empty clip. */ - static const struct GEN8_SCISSOR_RECT empty_scissor = { - .ScissorRectangleYMin = 1, - .ScissorRectangleXMin = 1, - .ScissorRectangleYMax = 0, - .ScissorRectangleXMax = 0 - }; - - const int max = 0xffff; - struct GEN8_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) - }; - - GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); - GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); - - if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); - } else { - GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); - } - } - - *pState = anv_dynamic_vp_state_to_handle(state); - - return VK_SUCCESS; -} - -VkResult anv_DestroyDynamicViewportState( - VkDevice _device, - VkDynamicViewportState _vp_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, _vp_state); - - anv_state_pool_free(&device->dynamic_state_pool, vp_state->sf_clip_vp); - anv_state_pool_free(&device->dynamic_state_pool, vp_state->cc_vp); - anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor); - - anv_device_free(device, vp_state); - - return VK_SUCCESS; -} - -VkResult anv_CreateDynamicRasterState( - VkDevice _device, - const VkDynamicRasterStateCreateInfo* pCreateInfo, - VkDynamicRasterState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_rs_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .LineWidth = pCreateInfo->lineWidth, - }; - - GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); - - bool enable_bias = pCreateInfo->depthBias != 0.0f || - pCreateInfo->slopeScaledDepthBias != 0.0f; - struct GEN8_3DSTATE_RASTER raster = { - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = pCreateInfo->depthBias, - .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, - .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp - }; - - GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); - - *pState = anv_dynamic_rs_state_to_handle(state); - - return VK_SUCCESS; -} - -VkResult anv_DestroyDynamicRasterState( - VkDevice _device, - VkDynamicRasterState _rs_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state); - - anv_device_free(device, rs_state); - - return VK_SUCCESS; -} - -VkResult anv_CreateDynamicColorBlendState( - VkDevice _device, - const VkDynamicColorBlendStateCreateInfo* pCreateInfo, - VkDynamicColorBlendState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_cb_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_COLOR_CALC_STATE color_calc_state = { - .BlendConstantColorRed = pCreateInfo->blendConst[0], - .BlendConstantColorGreen = pCreateInfo->blendConst[1], - .BlendConstantColorBlue = pCreateInfo->blendConst[2], - .BlendConstantColorAlpha = pCreateInfo->blendConst[3] - }; - - GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); - - *pState = anv_dynamic_cb_state_to_handle(state); - - return VK_SUCCESS; -} - -VkResult anv_DestroyDynamicColorBlendState( - VkDevice _device, - VkDynamicColorBlendState _cb_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state); - - anv_device_free(device, cb_state); - - return VK_SUCCESS; -} - -VkResult anv_CreateDynamicDepthStencilState( - VkDevice _device, - const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, - VkDynamicDepthStencilState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_ds_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - - /* Is this what we need to do? */ - .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, - - .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - - .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, - &wm_depth_stencil); - - struct GEN8_COLOR_CALC_STATE color_calc_state = { - .StencilReferenceValue = pCreateInfo->stencilFrontRef, - .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef - }; - - GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); - - *pState = anv_dynamic_ds_state_to_handle(state); - - return VK_SUCCESS; -} - -VkResult anv_DestroyDynamicDepthStencilState( - VkDevice _device, - VkDynamicDepthStencilState _ds_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state); - - anv_device_free(device, ds_state); - - return VK_SUCCESS; -} - -// Command buffer functions - -VkResult anv_CreateCommandPool( - VkDevice device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool) -{ - pCmdPool->handle = 7; - - stub_return(VK_SUCCESS); -} - -VkResult anv_DestroyCommandPool( - VkDevice device, - VkCmdPool cmdPool) -{ - stub_return(VK_SUCCESS); -} - -VkResult anv_ResetCommandPool( - VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_CreateFramebuffer( - VkDevice _device, - const VkFramebufferCreateInfo* pCreateInfo, - VkFramebuffer* pFramebuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_framebuffer *framebuffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); - - size_t size = sizeof(*framebuffer) + - sizeof(struct anv_attachment_view *) * pCreateInfo->attachmentCount; - framebuffer = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (framebuffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - framebuffer->attachment_count = pCreateInfo->attachmentCount; - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - ANV_FROM_HANDLE(anv_attachment_view, view, - pCreateInfo->pAttachments[i].view); - - framebuffer->attachments[i] = view; - } - - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; - - anv_CreateDynamicViewportState(anv_device_to_handle(device), - &(VkDynamicViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO, - .viewportAndScissorCount = 1, - .pViewports = (VkViewport[]) { - { - .originX = 0, - .originY = 0, - .width = pCreateInfo->width, - .height = pCreateInfo->height, - .minDepth = 0, - .maxDepth = 1 - }, - }, - .pScissors = (VkRect2D[]) { - { { 0, 0 }, - { pCreateInfo->width, pCreateInfo->height } }, - } - }, - &framebuffer->vp_state); - - *pFramebuffer = anv_framebuffer_to_handle(framebuffer); - - return VK_SUCCESS; -} - -VkResult anv_DestroyFramebuffer( - VkDevice _device, - VkFramebuffer _fb) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); - - anv_DestroyDynamicViewportState(anv_device_to_handle(device), - fb->vp_state); - anv_device_free(device, fb); - - return VK_SUCCESS; -} - -VkResult anv_CreateRenderPass( - VkDevice _device, - const VkRenderPassCreateInfo* pCreateInfo, - VkRenderPass* pRenderPass) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_render_pass *pass; - size_t size; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - - size = sizeof(*pass) + - pCreateInfo->subpassCount * sizeof(struct anv_subpass); - pass = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pass == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Clear the subpasses along with the parent pass. This required because - * each array member of anv_subpass must be a valid pointer if not NULL. - */ - memset(pass, 0, size); - - pass->attachment_count = pCreateInfo->attachmentCount; - pass->subpass_count = pCreateInfo->subpassCount; - - size = pCreateInfo->attachmentCount * sizeof(*pass->attachments); - pass->attachments = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - pass->attachments[i].format = pCreateInfo->pAttachments[i].format; - pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples; - pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp; - pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - // pass->attachments[i].store_op = pCreateInfo->pAttachments[i].storeOp; - // pass->attachments[i].stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - } - - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - struct anv_subpass *subpass = &pass->subpasses[i]; - - subpass->input_count = desc->inputCount; - subpass->color_count = desc->colorCount; - - if (desc->inputCount > 0) { - subpass->input_attachments = - anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - - for (uint32_t j = 0; j < desc->inputCount; j++) { - subpass->input_attachments[j] - = desc->inputAttachments[j].attachment; - } - } - - if (desc->colorCount > 0) { - subpass->color_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - - for (uint32_t j = 0; j < desc->colorCount; j++) { - subpass->color_attachments[j] - = desc->colorAttachments[j].attachment; - } - } - - if (desc->resolveAttachments) { - subpass->resolve_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - - for (uint32_t j = 0; j < desc->colorCount; j++) { - subpass->resolve_attachments[j] - = desc->resolveAttachments[j].attachment; - } - } - - subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; - } - - *pRenderPass = anv_render_pass_to_handle(pass); - - return VK_SUCCESS; -} - -VkResult anv_DestroyRenderPass( - VkDevice _device, - VkRenderPass _pass) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - - anv_device_free(device, pass->attachments); - - for (uint32_t i = 0; i < pass->subpass_count; i++) { - /* In VkSubpassCreateInfo, each of the attachment arrays may be null. - * Don't free the null arrays. - */ - struct anv_subpass *subpass = &pass->subpasses[i]; - - anv_device_free(device, subpass->input_attachments); - anv_device_free(device, subpass->color_attachments); - anv_device_free(device, subpass->resolve_attachments); - } - - anv_device_free(device, pass); - - return VK_SUCCESS; -} - -VkResult anv_GetRenderAreaGranularity( - VkDevice device, - VkRenderPass renderPass, - VkExtent2D* pGranularity) -{ - *pGranularity = (VkExtent2D) { 1, 1 }; - - return VK_SUCCESS; -} - -void vkCmdDbgMarkerBegin( - VkCmdBuffer cmdBuffer, - const char* pMarker) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerEnd( - VkCmdBuffer cmdBuffer) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerBegin( - VkCmdBuffer cmdBuffer, - const char* pMarker) -{ -} - -void vkCmdDbgMarkerEnd( - VkCmdBuffer cmdBuffer) -{ -} diff --git a/src/vulkan/formats.c b/src/vulkan/formats.c deleted file mode 100644 index 9d9294b7ebd..00000000000 --- a/src/vulkan/formats.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "private.h" - -#define UNSUPPORTED 0xffff - -#define fmt(__vk_fmt, ...) \ - [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } - -static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, RAW, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R4G4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R4G4_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .cpp = 1, .num_channels = 1,), - fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_UINT, R8_UINT, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SINT, R8_SINT, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SRGB, UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_UINT, R16_UINT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SINT, R16_SINT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R32_UINT, R32_UINT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SINT, R32_SINT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .cpp = 8, .num_channels = 1), - fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .cpp = 16, .num_channels = 2), - fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), - - fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), - fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, R8_UINT, .cpp = 1, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), - - fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC2_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC2_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC3_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC3_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC4_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC5_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC6H_UFLOAT, UNSUPPORTED), - fmt(VK_FORMAT_BC6H_SFLOAT, UNSUPPORTED), - fmt(VK_FORMAT_BC7_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC7_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11G11_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11G11_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .cpp = 2, .num_channels = 3), - fmt(VK_FORMAT_B5G6R5_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SSCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B8G8R8A8_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SSCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) -}; - -#undef fmt - -const struct anv_format * -anv_format_for_vk_format(VkFormat format) -{ - return &anv_formats[format]; -} - -bool -anv_is_vk_format_depth_or_stencil(VkFormat format) -{ - const struct anv_format *format_info = - anv_format_for_vk_format(format); - - if (format_info->depth_format != UNSUPPORTED && - format_info->depth_format != 0) - return true; - - return format_info->has_stencil; -} - -// Format capabilities - -struct surface_format_info { - bool exists; - int sampling; - int filtering; - int shadow_compare; - int chroma_key; - int render_target; - int alpha_blend; - int input_vb; - int streamed_output_vb; - int color_processing; -}; - -extern const struct surface_format_info surface_formats[]; - -VkResult anv_validate_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat _format, - VkFormatProperties* pFormatProperties) -{ - const struct anv_format *format = anv_format_for_vk_format(_format); - fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); - return anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); -} - -VkResult anv_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat _format, - VkFormatProperties* pFormatProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - const struct surface_format_info *info; - int gen; - - const struct anv_format *format = anv_format_for_vk_format(_format); - if (format == NULL) - return vk_error(VK_ERROR_INVALID_VALUE); - - gen = physical_device->info->gen * 10; - if (physical_device->info->is_haswell) - gen += 5; - - if (format->surface_format == UNSUPPORTED) - goto unsupported; - - info = &surface_formats[format->surface_format]; - if (!info->exists) - goto unsupported; - - uint32_t linear = 0, tiled = 0; - if (info->sampling <= gen) { - linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - } - if (info->render_target <= gen) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - } - if (info->alpha_blend <= gen) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - } - if (info->input_vb <= gen) { - linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - } - - pFormatProperties->linearTilingFeatures = linear; - pFormatProperties->optimalTilingFeatures = tiled; - - return VK_SUCCESS; - - unsupported: - pFormatProperties->linearTilingFeatures = 0; - pFormatProperties->optimalTilingFeatures = 0; - - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageFormatProperties* pImageFormatProperties) -{ - /* TODO: We should do something here. Chad? */ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - uint32_t samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t* pNumProperties, - VkSparseImageFormatProperties* pProperties) -{ - stub_return(VK_UNSUPPORTED); -} diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c deleted file mode 100644 index db0d29f42c7..00000000000 --- a/src/vulkan/gem.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include -#include -#include -#include - -#include "private.h" - -#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) - -static int -anv_ioctl(int fd, unsigned long request, void *arg) -{ - int ret; - - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -/** - * Wrapper around DRM_IOCTL_I915_GEM_CREATE. - * - * Return gem handle, or 0 on failure. Gem handles are never 0. - */ -uint32_t -anv_gem_create(struct anv_device *device, size_t size) -{ - struct drm_i915_gem_create gem_create; - int ret; - - VG_CLEAR(gem_create); - gem_create.size = size; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); - if (ret != 0) { - /* FIXME: What do we do if this fails? */ - return 0; - } - - return gem_create.handle; -} - -void -anv_gem_close(struct anv_device *device, int gem_handle) -{ - struct drm_gem_close close; - - VG_CLEAR(close); - close.handle = gem_handle; - anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); -} - -/** - * Wrapper around DRM_IOCTL_I915_GEM_MMAP. - */ -void* -anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size) -{ - struct drm_i915_gem_mmap gem_mmap; - int ret; - - gem_mmap.handle = gem_handle; - VG_CLEAR(gem_mmap.pad); - gem_mmap.offset = offset; - gem_mmap.size = size; - VG_CLEAR(gem_mmap.addr_ptr); - -#ifdef I915_MMAP_WC - gem_mmap.flags = 0; -#endif - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); - if (ret != 0) { - /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ - return NULL; - } - - VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); - return (void *)(uintptr_t) gem_mmap.addr_ptr; -} - -/* This is just a wrapper around munmap, but it also notifies valgrind that - * this map is no longer valid. Pair this with anv_gem_mmap(). - */ -void -anv_gem_munmap(void *p, uint64_t size) -{ - VG(VALGRIND_FREELIKE_BLOCK(p, 0)); - munmap(p, size); -} - -int -anv_gem_userptr(struct anv_device *device, void *mem, size_t size) -{ - struct drm_i915_gem_userptr userptr; - int ret; - - VG_CLEAR(userptr); - userptr.user_ptr = (__u64)((unsigned long) mem); - userptr.user_size = size; - userptr.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); - if (ret == -1) - return 0; - - return userptr.handle; -} - -/** - * On error, \a timeout_ns holds the remaining time. - */ -int -anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) -{ - struct drm_i915_gem_wait wait; - int ret; - - VG_CLEAR(wait); - wait.bo_handle = gem_handle; - wait.timeout_ns = *timeout_ns; - wait.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); - *timeout_ns = wait.timeout_ns; - - return ret; -} - -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); -} - -int -anv_gem_set_tiling(struct anv_device *device, - int gem_handle, uint32_t stride, uint32_t tiling) -{ - struct drm_i915_gem_set_tiling set_tiling; - int ret; - - /* set_tiling overwrites the input on the error path, so we have to open - * code anv_ioctl. - */ - - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_handle; - set_tiling.tiling_mode = I915_TILING_X; - set_tiling.stride = stride; - - ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -int -anv_gem_get_param(int fd, uint32_t param) -{ - drm_i915_getparam_t gp; - int ret, tmp; - - VG_CLEAR(gp); - gp.param = param; - gp.value = &tmp; - ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret == 0) - return tmp; - - return 0; -} - -int -anv_gem_create_context(struct anv_device *device) -{ - struct drm_i915_gem_context_create create; - int ret; - - VG_CLEAR(create); - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); - if (ret == -1) - return -1; - - return create.ctx_id; -} - -int -anv_gem_destroy_context(struct anv_device *device, int context) -{ - struct drm_i915_gem_context_destroy destroy; - - VG_CLEAR(destroy); - destroy.ctx_id = context; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); -} - -int -anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size) -{ - struct drm_i915_gem_get_aperture aperture; - int ret; - - VG_CLEAR(aperture); - ret = anv_ioctl(physical_dev->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - if (ret == -1) - return -1; - - *size = aperture.aper_available_size; - - return 0; -} - -int -anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) -{ - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.handle = gem_handle; - args.flags = DRM_CLOEXEC; - - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); - if (ret == -1) - return -1; - - return args.fd; -} - -int -anv_gem_fd_to_handle(struct anv_device *device, int fd) -{ - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.fd = fd; - - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); - if (ret == -1) - return 0; - - return args.handle; -} diff --git a/src/vulkan/image.c b/src/vulkan/image.c deleted file mode 100644 index 4b37de54dde..00000000000 --- a/src/vulkan/image.c +++ /dev/null @@ -1,745 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "private.h" - -struct anv_image_view_info { - uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ - bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ - bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ -}; - -static const uint8_t anv_halign[] = { - [4] = HALIGN4, - [8] = HALIGN8, - [16] = HALIGN16, -}; - -static const uint8_t anv_valign[] = { - [4] = VALIGN4, - [8] = VALIGN8, - [16] = VALIGN16, -}; - -static const uint8_t anv_surf_type_from_image_type[] = { - [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, - [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, - [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, - -}; - -static const struct anv_image_view_info -anv_image_view_info_table[] = { - #define INFO(s, ...) { .surface_type = s, __VA_ARGS__ } - [VK_IMAGE_VIEW_TYPE_1D] = INFO(SURFTYPE_1D), - [VK_IMAGE_VIEW_TYPE_2D] = INFO(SURFTYPE_2D), - [VK_IMAGE_VIEW_TYPE_3D] = INFO(SURFTYPE_3D), - [VK_IMAGE_VIEW_TYPE_CUBE] = INFO(SURFTYPE_CUBE, .is_cube = 1), - [VK_IMAGE_VIEW_TYPE_1D_ARRAY] = INFO(SURFTYPE_1D, .is_array = 1), - [VK_IMAGE_VIEW_TYPE_2D_ARRAY] = INFO(SURFTYPE_2D, .is_array = 1), - [VK_IMAGE_VIEW_TYPE_CUBE_ARRAY] = INFO(SURFTYPE_CUBE, .is_array = 1, .is_cube = 1), - #undef INFO -}; - -static const struct anv_surf_type_limits { - int32_t width; - int32_t height; - int32_t depth; -} anv_surf_type_limits[] = { - [SURFTYPE_1D] = {16384, 0, 2048}, - [SURFTYPE_2D] = {16384, 16384, 2048}, - [SURFTYPE_3D] = {2048, 2048, 2048}, - [SURFTYPE_CUBE] = {16384, 16384, 340}, - [SURFTYPE_BUFFER] = {128, 16384, 64}, - [SURFTYPE_STRBUF] = {128, 16384, 64}, -}; - -static const struct anv_tile_info { - uint32_t width; - uint32_t height; - - /** - * Alignment for RENDER_SURFACE_STATE.SurfaceBaseAddress. - * - * To simplify calculations, the alignments defined in the table are - * sometimes larger than required. For example, Skylake requires that X and - * Y tiled buffers be aligned to 4K, but Broadwell permits smaller - * alignment. We choose 4K to accomodate both chipsets. The alignment of - * a linear buffer depends on its element type and usage. Linear depth - * buffers have the largest alignment, 64B, so we choose that for all linear - * buffers. - */ - uint32_t surface_alignment; -} anv_tile_info_table[] = { - [LINEAR] = { 1, 1, 64 }, - [XMAJOR] = { 512, 8, 4096 }, - [YMAJOR] = { 128, 32, 4096 }, - [WMAJOR] = { 128, 32, 4096 }, -}; - -static uint32_t -anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) -{ - if (anv_info->force_tile_mode) - return anv_info->tile_mode; - - if (anv_info->vk_info->format == VK_FORMAT_S8_UINT) - return WMAJOR; - - switch (anv_info->vk_info->tiling) { - case VK_IMAGE_TILING_LINEAR: - return LINEAR; - case VK_IMAGE_TILING_OPTIMAL: - return YMAJOR; - default: - assert(!"bad VKImageTiling"); - return LINEAR; - } -} - -static VkResult -anv_image_make_surface(const struct anv_image_create_info *create_info, - uint64_t *inout_image_size, - uint32_t *inout_image_alignment, - struct anv_surface *out_surface) -{ - /* See RENDER_SURFACE_STATE.SurfaceQPitch */ - static const uint16_t min_qpitch UNUSED = 0x4; - static const uint16_t max_qpitch UNUSED = 0x1ffc; - - const VkExtent3D *restrict extent = &create_info->vk_info->extent; - const uint32_t levels = create_info->vk_info->mipLevels; - const uint32_t array_size = create_info->vk_info->arraySize; - - const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); - - const struct anv_tile_info *tile_info = - &anv_tile_info_table[tile_mode]; - - const struct anv_format *format_info = - anv_format_for_vk_format(create_info->vk_info->format); - - const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ - const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ - const uint32_t w0 = align_u32(extent->width, i); - const uint32_t h0 = align_u32(extent->height, j); - - uint16_t qpitch; - uint32_t mt_width; - uint32_t mt_height; - - if (levels == 1 && array_size == 1) { - qpitch = min_qpitch; - mt_width = w0; - mt_height = h0; - } else { - uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); - uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); - uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); - - qpitch = h0 + h1 + 11 * j; - mt_width = MAX(w0, w1 + w2); - mt_height = array_size * qpitch; - } - - assert(qpitch >= min_qpitch); - if (qpitch > max_qpitch) { - anv_loge("image qpitch > 0x%x\n", max_qpitch); - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - - /* From the Broadwell PRM, RENDER_SURFACE_STATE.SurfaceQpitch: - * - * This field must be set an integer multiple of the Surface Vertical - * Alignment. - */ - assert(anv_is_aligned(qpitch, j)); - - const uint32_t stride = align_u32(mt_width * format_info->cpp, - tile_info->width); - const uint32_t size = stride * align_u32(mt_height, tile_info->height); - const uint32_t offset = align_u32(*inout_image_size, - tile_info->surface_alignment); - - *inout_image_size = offset + size; - *inout_image_alignment = MAX(*inout_image_alignment, - tile_info->surface_alignment); - - *out_surface = (struct anv_surface) { - .offset = offset, - .stride = stride, - .tile_mode = tile_mode, - .qpitch = qpitch, - .h_align = i, - .v_align = j, - }; - - return VK_SUCCESS; -} - -VkResult -anv_image_create(VkDevice _device, - const struct anv_image_create_info *create_info, - VkImage *pImage) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - const VkImageCreateInfo *pCreateInfo = create_info->vk_info; - const VkExtent3D *restrict extent = &pCreateInfo->extent; - struct anv_image *image = NULL; - VkResult r; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - - /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); - anv_assert(pCreateInfo->mipLevels > 0); - anv_assert(pCreateInfo->arraySize > 0); - anv_assert(pCreateInfo->samples == 1); - anv_assert(pCreateInfo->extent.width > 0); - anv_assert(pCreateInfo->extent.height > 0); - anv_assert(pCreateInfo->extent.depth > 0); - - /* TODO(chadv): How should we validate inputs? */ - const uint8_t surf_type = - anv_surf_type_from_image_type[pCreateInfo->imageType]; - - const struct anv_surf_type_limits *limits = - &anv_surf_type_limits[surf_type]; - - if (extent->width > limits->width || - extent->height > limits->height || - extent->depth > limits->depth) { - /* TODO(chadv): What is the correct error? */ - anv_loge("image extent is too large"); - return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); - } - - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - - image = anv_device_alloc(device, sizeof(*image), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!image) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - memset(image, 0, sizeof(*image)); - image->type = pCreateInfo->imageType; - image->extent = pCreateInfo->extent; - image->format = pCreateInfo->format; - image->levels = pCreateInfo->mipLevels; - image->array_size = pCreateInfo->arraySize; - image->surf_type = surf_type; - - if (likely(!format_info->has_stencil || format_info->depth_format)) { - /* The image's primary surface is a color or depth surface. */ - r = anv_image_make_surface(create_info, &image->size, &image->alignment, - &image->primary_surface); - if (r != VK_SUCCESS) - goto fail; - } - - if (format_info->has_stencil) { - /* From the GPU's perspective, the depth buffer and stencil buffer are - * separate buffers. From Vulkan's perspective, though, depth and - * stencil reside in the same image. To satisfy Vulkan and the GPU, we - * place the depth and stencil buffers in the same bo. - */ - VkImageCreateInfo stencil_info = *pCreateInfo; - stencil_info.format = VK_FORMAT_S8_UINT; - - r = anv_image_make_surface( - &(struct anv_image_create_info) { - .vk_info = &stencil_info, - }, - &image->size, &image->alignment, &image->stencil_surface); - - if (r != VK_SUCCESS) - goto fail; - } - - *pImage = anv_image_to_handle(image); - - return VK_SUCCESS; - -fail: - if (image) - anv_device_free(device, image); - - return r; -} - -VkResult -anv_CreateImage(VkDevice device, - const VkImageCreateInfo *pCreateInfo, - VkImage *pImage) -{ - return anv_image_create(device, - &(struct anv_image_create_info) { - .vk_info = pCreateInfo, - }, - pImage); -} - -VkResult -anv_DestroyImage(VkDevice _device, VkImage _image) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_device_free(device, anv_image_from_handle(_image)); - - return VK_SUCCESS; -} - -VkResult anv_GetImageSubresourceLayout( - VkDevice device, - VkImage image, - const VkImageSubresource* pSubresource, - VkSubresourceLayout* pLayout) -{ - stub_return(VK_UNSUPPORTED); -} - -void -anv_surface_view_fini(struct anv_device *device, - struct anv_surface_view *view) -{ - anv_state_pool_free(&device->surface_state_pool, view->surface_state); -} - -void -anv_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface_view *view = &iview->view; - struct anv_surface *surface; - - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - - const struct anv_image_view_info *view_type_info - = &anv_image_view_info_table[pCreateInfo->viewType]; - - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - switch (pCreateInfo->subresourceRange.aspect) { - case VK_IMAGE_ASPECT_STENCIL: - anv_finishme("stencil image views"); - abort(); - break; - case VK_IMAGE_ASPECT_DEPTH: - case VK_IMAGE_ASPECT_COLOR: - view->offset = image->offset; - surface = &image->primary_surface; - break; - default: - unreachable(""); - break; - } - - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = pCreateInfo->format; - - iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, range->baseMipLevel), - .height = anv_minify(image->extent.height, range->baseMipLevel), - .depth = anv_minify(image->extent.depth, range->baseMipLevel), - }; - - uint32_t depth = 1; - if (range->arraySize > 1) { - depth = range->arraySize; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - static const uint32_t vk_to_gen_swizzle[] = { - [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, - [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, - [VK_CHANNEL_SWIZZLE_R] = SCS_RED, - [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, - [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, - [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA - }; - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = view_type_info->surface_type, - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format_info->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = surface->tile_mode, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = surface->qpitch >> 2, - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = range->baseArraySlice, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - /* For sampler surfaces, the hardware interprets field MIPCount/LOD as - * MIPCount. The range of levels accessible by the sampler engine is - * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - .MIPCountLOD = range->mipLevels - 1, - .SurfaceMinLOD = range->baseMipLevel, - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, - }; - - if (cmd_buffer) { - view->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); -} - -VkResult -anv_validate_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *subresource; - const struct anv_image_view_info *view_info; - const struct anv_format *view_format_info; - const struct anv_format *image_format_info; - - /* Validate structure type before dereferencing it. */ - assert(pCreateInfo); - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); - subresource = &pCreateInfo->subresourceRange; - - /* Validate viewType is in range before using it. */ - assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); - assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); - view_info = &anv_image_view_info_table[pCreateInfo->viewType]; - - /* Validate format is in range before using it. */ - assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); - assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); - image_format_info = anv_format_for_vk_format(image->format); - view_format_info = anv_format_for_vk_format(pCreateInfo->format); - - /* Validate channel swizzles. */ - assert(pCreateInfo->channels.r >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.r <= VK_CHANNEL_SWIZZLE_END_RANGE); - assert(pCreateInfo->channels.g >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.g <= VK_CHANNEL_SWIZZLE_END_RANGE); - assert(pCreateInfo->channels.b >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.b <= VK_CHANNEL_SWIZZLE_END_RANGE); - assert(pCreateInfo->channels.a >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.a <= VK_CHANNEL_SWIZZLE_END_RANGE); - - /* Validate subresource. */ - assert(subresource->aspect >= VK_IMAGE_ASPECT_BEGIN_RANGE); - assert(subresource->aspect <= VK_IMAGE_ASPECT_END_RANGE); - assert(subresource->mipLevels > 0); - assert(subresource->arraySize > 0); - assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); - assert(subresource->baseArraySlice < image->array_size); - assert(subresource->baseArraySlice + subresource->arraySize <= image->array_size); - assert(pView); - - if (view_info->is_cube) { - assert(subresource->baseArraySlice % 6 == 0); - assert(subresource->arraySize % 6 == 0); - } - - /* Validate format. */ - switch (subresource->aspect) { - case VK_IMAGE_ASPECT_COLOR: - assert(!image_format_info->depth_format); - assert(!image_format_info->has_stencil); - assert(!view_format_info->depth_format); - assert(!view_format_info->has_stencil); - assert(view_format_info->cpp == image_format_info->cpp); - break; - case VK_IMAGE_ASPECT_DEPTH: - assert(image_format_info->depth_format); - assert(view_format_info->depth_format); - assert(view_format_info->cpp == image_format_info->cpp); - break; - case VK_IMAGE_ASPECT_STENCIL: - /* FINISHME: Is it legal to have an R8 view of S8? */ - assert(image_format_info->has_stencil); - assert(view_format_info->has_stencil); - break; - default: - assert(!"bad VkImageAspect"); - break; - } - - return anv_CreateImageView(_device, pCreateInfo, pView); -} - -VkResult -anv_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *view; - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_image_view_init(view, device, pCreateInfo, NULL); - - *pView = anv_image_view_to_handle(view); - - return VK_SUCCESS; -} - -VkResult -anv_DestroyImageView(VkDevice _device, VkImageView _iview) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_image_view, iview, _iview); - - anv_surface_view_fini(device, &iview->view); - anv_device_free(device, iview); - - return VK_SUCCESS; -} - -void -anv_color_attachment_view_init(struct anv_color_attachment_view *aview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface_view *view = &aview->view; - struct anv_surface *surface = &image->primary_surface; - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - - aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; - - anv_assert(pCreateInfo->arraySize > 0); - anv_assert(pCreateInfo->mipLevel < image->levels); - anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = pCreateInfo->format; - - aview->base.extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), - .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), - .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), - }; - - uint32_t depth = 1; - if (pCreateInfo->arraySize > 1) { - depth = pCreateInfo->arraySize; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - if (cmd_buffer) { - view->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_2D, - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format_info->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = surface->tile_mode, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = surface->qpitch >> 2, - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = pCreateInfo->baseArraySlice, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - /* For render target surfaces, the hardware interprets field MIPCount/LOD as - * LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - .SurfaceMinLOD = 0, - .MIPCountLOD = pCreateInfo->mipLevel, - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, - }; - - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); -} - -static void -anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, - const VkAttachmentViewCreateInfo *pCreateInfo) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface *depth_surface = &image->primary_surface; - struct anv_surface *stencil_surface = &image->stencil_surface; - const struct anv_format *format = - anv_format_for_vk_format(image->format); - - view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; - - /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->mipLevel == 0); - anv_assert(pCreateInfo->baseArraySlice == 0); - anv_assert(pCreateInfo->arraySize == 1); - - view->bo = image->bo; - - view->depth_stride = depth_surface->stride; - view->depth_offset = image->offset + depth_surface->offset; - view->depth_format = format->depth_format; - view->depth_qpitch = 0; /* FINISHME: QPitch */ - - view->stencil_stride = stencil_surface->stride; - view->stencil_offset = image->offset + stencil_surface->offset; - view->stencil_qpitch = 0; /* FINISHME: QPitch */ -} - -VkResult -anv_CreateAttachmentView(VkDevice _device, - const VkAttachmentViewCreateInfo *pCreateInfo, - VkAttachmentView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); - - if (anv_is_vk_format_depth_or_stencil(pCreateInfo->format)) { - struct anv_depth_stencil_view *view = - anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_depth_stencil_view_init(view, pCreateInfo); - - *pView = anv_attachment_view_to_handle(&view->base); - } else { - struct anv_color_attachment_view *view = - anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_color_attachment_view_init(view, device, pCreateInfo, NULL); - - *pView = anv_attachment_view_to_handle(&view->base); - } - - return VK_SUCCESS; -} - -VkResult -anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _view) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_attachment_view, view, _view); - - if (view->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { - struct anv_color_attachment_view *aview = - (struct anv_color_attachment_view *)view; - - anv_surface_view_fini(device, &aview->view); - } - - anv_device_free(device, view); - - return VK_SUCCESS; -} diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c deleted file mode 100644 index e314ba60586..00000000000 --- a/src/vulkan/intel.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "private.h" - -VkResult anv_CreateDmaBufImageINTEL( - VkDevice _device, - const VkDmaBufImageCreateInfo* pCreateInfo, - VkDeviceMemory* pMem, - VkImage* pImage) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_device_memory *mem; - struct anv_image *image; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); - - mem = anv_device_alloc(device, sizeof(*mem), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (mem == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); - if (!mem->bo.gem_handle) { - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail; - } - - mem->bo.map = NULL; - mem->bo.index = 0; - mem->bo.offset = 0; - mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; - - image = anv_device_alloc(device, sizeof(*image), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (image == NULL) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_mem; - } - - *image = (struct anv_image) { - .bo = &mem->bo, - .offset = 0, - .type = VK_IMAGE_TYPE_2D, - .extent = pCreateInfo->extent, - .size = mem->bo.size, - - .primary_surface = { - .offset = 0, - .stride = pCreateInfo->strideInBytes, - .tile_mode = XMAJOR, - }, - }; - - assert(image->extent.width > 0); - assert(image->extent.height > 0); - assert(image->extent.depth == 1); - - *pMem = anv_device_memory_to_handle(mem); - *pImage = anv_image_to_handle(image); - - return VK_SUCCESS; - - fail_mem: - anv_gem_close(device, mem->bo.gem_handle); - fail: - anv_device_free(device, mem); - - return result; -} diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c deleted file mode 100644 index 30809581ad6..00000000000 --- a/src/vulkan/meta.c +++ /dev/null @@ -1,1449 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "private.h" -#include "meta-spirv.h" - -static void -anv_device_init_meta_clear_state(struct anv_device *device) -{ - /* We don't use a vertex shader for clearing, but instead build and pass - * the VUEs directly to the rasterization backend. - */ - VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - out vec4 f_color; - flat in vec4 v_color; - void main() - { - f_color = v_color; - } - ); - - VkShader fs; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = fsm, - .pName = "main", - }, &fs); - - /* We use instanced rendering to clear multiple render targets. We have two - * vertex buffers: the first vertex buffer holds per-vertex data and - * provides the vertices for the clear rectangle. The second one holds - * per-instance data, which consists of the VUE header (which selects the - * layer) and the color (Vulkan supports per-RT clear colors). - */ - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .strideInBytes = 8, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - { - .binding = 1, - .strideInBytes = 32, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE - }, - }, - .attributeCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = 0 - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = 0 - }, - { - /* Color */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 16 - } - } - }; - - anv_pipeline_create(anv_device_to_handle(device), - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 1, - .pStages = &(VkPipelineShaderStageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .pSpecializationInfo = NULL, - }, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pRasterState = &(VkPipelineRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }, - .flags = 0, - }, - &(struct anv_pipeline_create_info) { - .use_repclear = true, - .disable_viewport = true, - .use_rectlist = true - }, - &device->meta_state.clear.pipeline); - - anv_DestroyShader(anv_device_to_handle(device), fs); -} - -#define NUM_VB_USED 2 -struct anv_saved_state { - struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; - struct anv_descriptor_set *old_descriptor_set0; - struct anv_pipeline *old_pipeline; - VkDynamicColorBlendState cb_state; -}; - -static void -anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, - struct anv_saved_state *state) -{ - state->old_pipeline = cmd_buffer->state.pipeline; - state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; - memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, - sizeof(state->old_vertex_bindings)); -} - -static void -anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, - const struct anv_saved_state *state) -{ - cmd_buffer->state.pipeline = state->old_pipeline; - cmd_buffer->state.descriptors[0].set = state->old_descriptor_set0; - memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, - sizeof(state->old_vertex_bindings)); - - cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; -} - -struct vue_header { - uint32_t Reserved; - uint32_t RTAIndex; - uint32_t ViewportIndex; - float PointWidth; -}; - -struct clear_instance_data { - struct vue_header vue_header; - VkClearColorValue color; -}; - -static void -meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, - int num_instances, - struct clear_instance_data *instance_data) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_state state; - uint32_t size; - - const float vertex_data[] = { - /* Rect-list coordinates */ - 0.0, 0.0, - fb->width, 0.0, - fb->width, fb->height, - - /* Align to 16 bytes */ - 0.0, 0.0, - }; - - size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); - state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 16); - - /* Copy in the vertex and instance data */ - memcpy(state.map, vertex_data, sizeof(vertex_data)); - memcpy(state.map + sizeof(vertex_data), instance_data, - num_instances * sizeof(*instance_data)); - - struct anv_buffer vertex_buffer = { - .device = cmd_buffer->device, - .size = size, - .bo = &device->surface_state_block_pool.bo, - .offset = state.offset - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(vertex_data) - }); - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.clear.pipeline); - - /* We don't need anything here, only set if not already set. */ - if (cmd_buffer->state.rs_state == NULL) - anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.rs_state); - - if (cmd_buffer->state.vp_state == NULL) - anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->state.framebuffer->vp_state); - - if (cmd_buffer->state.ds_state == NULL) - anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.ds_state); - - if (cmd_buffer->state.cb_state == NULL) - anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.cb_state); - - anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); -} - -void -anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values) -{ - struct anv_saved_state saved_state; - - int num_clear_layers = 0; - for (uint32_t i = 0; i < pass->attachment_count; i++) { - if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - if (anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { - anv_finishme("Can't clear depth-stencil yet"); - continue; - } - num_clear_layers++; - } - } - - if (num_clear_layers == 0) - return; - - struct clear_instance_data instance_data[num_clear_layers]; - uint32_t color_attachments[num_clear_layers]; - - int layer = 0; - for (uint32_t i = 0; i < pass->attachment_count; i++) { - if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && - !anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { - instance_data[layer] = (struct clear_instance_data) { - .vue_header = { - .RTAIndex = i, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = clear_values[i].color, - }; - color_attachments[layer] = i; - layer++; - } - } - - anv_cmd_buffer_save(cmd_buffer, &saved_state); - - struct anv_subpass subpass = { - .input_count = 0, - .color_count = num_clear_layers, - .color_attachments = color_attachments, - .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, - }; - - anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); - - meta_emit_clear(cmd_buffer, num_clear_layers, instance_data); - - /* Restore API state */ - anv_cmd_buffer_restore(cmd_buffer, &saved_state); -} - -static void -anv_device_init_meta_blit_state(struct anv_device *device) -{ - /* We don't use a vertex shader for clearing, but instead build and pass - * the VUEs directly to the rasterization backend. However, we do need - * to provide GLSL source for the vertex shader so that the compiler - * does not dead-code our inputs. - */ - VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, - in vec2 a_pos; - in vec2 a_tex_coord; - out vec4 v_tex_coord; - void main() - { - v_tex_coord = vec4(a_tex_coord, 0, 1); - gl_Position = vec4(a_pos, 0, 1); - } - ); - - VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - out vec4 f_color; - in vec4 v_tex_coord; - layout(set = 0, binding = 0) uniform sampler2D u_tex; - void main() - { - f_color = texture(u_tex, v_tex_coord.xy); - } - ); - - VkShader vs; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = vsm, - .pName = "main", - }, &vs); - - VkShader fs; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = fsm, - .pName = "main", - }, &fs); - - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .strideInBytes = 0, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - { - .binding = 1, - .strideInBytes = 16, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - }, - .attributeCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = 8 - } - } - }; - - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .count = 1, - .pBinding = (VkDescriptorSetLayoutBinding[]) { - { - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .arraySize = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, - &device->meta_state.blit.ds_layout); - - anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, - }, - &device->meta_state.blit.pipeline_layout); - - anv_pipeline_create(anv_device_to_handle(device), - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .pSpecializationInfo = NULL - }, { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .pSpecializationInfo = NULL - }, - }, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pRasterState = &(VkPipelineRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }, - .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - }, - &(struct anv_pipeline_create_info) { - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }, - &device->meta_state.blit.pipeline); - - anv_DestroyShader(anv_device_to_handle(device), vs); - anv_DestroyShader(anv_device_to_handle(device), fs); -} - -static void -meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_saved_state *saved_state) -{ - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_save(cmd_buffer, saved_state); - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline); - - /* We don't need anything here, only set if not already set. */ - if (cmd_buffer->state.rs_state == NULL) - anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.rs_state); - if (cmd_buffer->state.ds_state == NULL) - anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.ds_state); - - saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->state.cb_state); - anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.cb_state); -} - -struct blit_region { - VkOffset3D src_offset; - VkExtent3D src_extent; - VkOffset3D dest_offset; - VkExtent3D dest_extent; -}; - -static void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src, - VkOffset3D src_offset, - VkExtent3D src_extent, - struct anv_color_attachment_view *dest, - VkOffset3D dest_offset, - VkExtent3D dest_extent) -{ - struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = { .handle = 1 }; - - struct blit_vb_data { - float pos[2]; - float tex_coord[2]; - } *vb_data; - - unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct vue_header)); - vb_data = vb_state.map + sizeof(struct vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + dest_extent.width, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->extent.height, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)src_offset.x / (float)src->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->extent.height, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - (float)src_offset.x / (float)src->extent.width, - (float)src_offset.y / (float)src->extent.height, - }, - }; - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->surface_state_block_pool.bo, - .offset = vb_state.offset, - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct vue_header), - }); - - uint32_t count; - VkDescriptorSet set; - anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - 1, &device->meta_state.blit.ds_layout, &set, &count); - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .destSet = set, - .destBinding = 0, - .destArrayElement = 0, - .count = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pDescriptors = (VkDescriptorInfo[]) { - { - .imageView = anv_image_view_to_handle(src), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL - }, - } - } - }, 0, NULL); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkAttachmentBindInfo[]) { - { - .view = anv_attachment_view_to_handle(&dest->base), - .layout = VK_IMAGE_LAYOUT_GENERAL - } - }, - .width = dest->base.extent.width, - .height = dest->base.extent.height, - .layers = 1 - }, &fb); - - VkRenderPass pass; - anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = dest->view.format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputCount = 0, - .colorCount = 1, - .colorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .resolveAttachments = NULL, - .depthStencilAttachment = (VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveCount = 1, - .preserveAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - }, - .dependencyCount = 0, - }, &pass); - - anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { dest_extent.width, dest_extent.height }, - }, - .attachmentCount = 1, - .pAttachmentClearValues = NULL, - }, VK_RENDER_PASS_CONTENTS_INLINE); - - anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), - anv_framebuffer_from_handle(fb)->vp_state); - - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, 0, 1, - &set, 0, NULL); - - anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); - - anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb); - anv_DestroyRenderPass(anv_device_to_handle(device), pass); -} - -static void -meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, - const struct anv_saved_state *saved_state) -{ - anv_cmd_buffer_restore(cmd_buffer, saved_state); - anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), - saved_state->cb_state); -} - -static VkFormat -vk_format_for_cpp(int cpp) -{ - switch (cpp) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UINT; - case 4: return VK_FORMAT_R8G8B8A8_UINT; - case 6: return VK_FORMAT_R16G16B16_UINT; - case 8: return VK_FORMAT_R16G16B16A16_UINT; - case 12: return VK_FORMAT_R32G32B32_UINT; - case 16: return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format cpp"); - } -} - -static void -do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat copy_format) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }; - - VkImage src_image, dest_image; - anv_CreateImage(vk_device, &image_info, &src_image); - anv_CreateImage(vk_device, &image_info, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src; - anv_image_from_handle(src_image)->offset = src_offset; - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = VK_IMAGE_ASPECT_COLOR, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArraySlice = 0, - .arraySize = 1 - }, - }, - cmd_buffer); - - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, - .image = dest_image, - .format = copy_format, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - &src_view, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - &dest_view, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }); - - anv_DestroyImage(vk_device, src_image); - anv_DestroyImage(vk_device, dest_image); -} - -void anv_CmdCopyBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - - struct anv_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; - uint64_t copy_size = pRegions[r].copySize; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int cpp = 16; - - int fs = ffs(src_offset) - 1; - if (fs != -1) - cpp = MIN2(cpp, 1 << fs); - assert(src_offset % cpp == 0); - - fs = ffs(dest_offset) - 1; - if (fs != -1) - cpp = MIN2(cpp, 1 << fs); - assert(dest_offset % cpp == 0); - - fs = ffs(pRegions[r].copySize) - 1; - if (fs != -1) - cpp = MIN2(cpp, 1 << fs); - assert(pRegions[r].copySize % cpp == 0); - - VkFormat copy_format = vk_format_for_cpp(cpp); - - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * cpp; - while (copy_size > max_copy_size) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, copy_format); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dest_offset += max_copy_size; - } - - uint64_t height = copy_size / (max_surface_dim * cpp); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * cpp; - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, height, copy_format); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dest_offset += rect_copy_size; - } - - if (copy_size != 0) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - copy_size / cpp, 1, copy_format); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImage( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - - struct anv_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].srcSubresource.aspect, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arraySlice, - .arraySize = 1 - }, - }, - cmd_buffer); - - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, - .image = destImage, - .format = src_image->format, - .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = pRegions[r].destSubresource.arraySlice, - .arraySize = 1, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - &src_view, - pRegions[r].srcOffset, - pRegions[r].extent, - &dest_view, - pRegions[r].destOffset, - pRegions[r].extent); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdBlitImage( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkTexFilter filter) - -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - - struct anv_saved_state saved_state; - - anv_finishme("respect VkTexFilter"); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].srcSubresource.aspect, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arraySlice, - .arraySize = 1 - }, - }, - cmd_buffer); - - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, - .image = destImage, - .format = dest_image->format, - .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = pRegions[r].destSubresource.arraySlice, - .arraySize = 1, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - &src_view, - pRegions[r].srcOffset, - pRegions[r].srcExtent, - &dest_view, - pRegions[r].destOffset, - pRegions[r].destExtent); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdCopyBufferToImage( - VkCmdBuffer cmdBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - if (pRegions[r].bufferRowLength != 0) - anv_finishme("bufferRowLength not supported in CopyBufferToImage"); - if (pRegions[r].bufferImageHeight != 0) - anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); - - VkImage srcImage; - anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = dest_image->format, - .extent = { - .width = pRegions[r].imageExtent.width, - .height = pRegions[r].imageExtent.height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }, &srcImage); - - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - src_image->bo = src_buffer->bo; - src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; - - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].imageSubresource.aspect, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArraySlice = 0, - .arraySize = 1 - }, - }, - cmd_buffer); - - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .format = dest_image->format, - .mipLevel = pRegions[r].imageSubresource.mipLevel, - .baseArraySlice = pRegions[r].imageSubresource.arraySlice, - .arraySize = 1, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - &src_view, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - &dest_view, - pRegions[r].imageOffset, - pRegions[r].imageExtent); - - anv_DestroyImage(vk_device, srcImage); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImageToBuffer( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - if (pRegions[r].bufferRowLength != 0) - anv_finishme("bufferRowLength not supported in CopyBufferToImage"); - if (pRegions[r].bufferImageHeight != 0) - anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); - - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspect = pRegions[r].imageSubresource.aspect, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .mipLevels = 1, - .baseArraySlice = pRegions[r].imageSubresource.arraySlice, - .arraySize = 1 - }, - }, - cmd_buffer); - - VkImage destImage; - anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = src_image->format, - .extent = { - .width = pRegions[r].imageExtent.width, - .height = pRegions[r].imageExtent.height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }, &destImage); - - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - dest_image->bo = dest_buffer->bo; - dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; - - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, - .image = destImage, - .format = src_image->format, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - &src_view, - pRegions[r].imageOffset, - pRegions[r].imageExtent, - &dest_view, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent); - - anv_DestroyImage(vk_device, destImage); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdUpdateBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - stub(); -} - -void anv_CmdFillBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize fillSize, - uint32_t data) -{ - stub(); -} - -void anv_CmdClearColorImage( - VkCmdBuffer cmdBuffer, - VkImage _image, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_image, image, _image); - struct anv_saved_state saved_state; - - anv_cmd_buffer_save(cmd_buffer, &saved_state); - - for (uint32_t r = 0; r < rangeCount; r++) { - for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { - for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { - struct anv_color_attachment_view view; - anv_color_attachment_view_init(&view, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, - .image = _image, - .format = image->format, - .mipLevel = pRanges[r].baseMipLevel + l, - .baseArraySlice = pRanges[r].baseArraySlice + s, - .arraySize = 1, - }, - cmd_buffer); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkAttachmentBindInfo[]) { - { - .view = anv_attachment_view_to_handle(&view.base), - .layout = VK_IMAGE_LAYOUT_GENERAL - } - }, - .width = view.base.extent.width, - .height = view.base.extent.height, - .layers = 1 - }, &fb); - - VkRenderPass pass; - anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = view.view.format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputCount = 0, - .colorCount = 1, - .colorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .resolveAttachments = NULL, - .depthStencilAttachment = (VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveCount = 1, - .preserveAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - }, - .dependencyCount = 0, - }, &pass); - - anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderArea = { - .offset = { 0, 0, }, - .extent = { - .width = view.base.extent.width, - .height = view.base.extent.height, - }, - }, - .renderPass = pass, - .framebuffer = fb, - .attachmentCount = 1, - .pAttachmentClearValues = NULL, - }, VK_RENDER_PASS_CONTENTS_INLINE); - - struct clear_instance_data instance_data = { - .vue_header = { - .RTAIndex = 0, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = *pColor, - }; - - meta_emit_clear(cmd_buffer, 1, &instance_data); - - anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); - } - } - } - - /* Restore API state */ - anv_cmd_buffer_restore(cmd_buffer, &saved_state); -} - -void anv_CmdClearDepthStencilImage( - VkCmdBuffer cmdBuffer, - VkImage image, - VkImageLayout imageLayout, - float depth, - uint32_t stencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - stub(); -} - -void anv_CmdClearColorAttachment( - VkCmdBuffer cmdBuffer, - uint32_t colorAttachment, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rectCount, - const VkRect3D* pRects) -{ - stub(); -} - -void anv_CmdClearDepthStencilAttachment( - VkCmdBuffer cmdBuffer, - VkImageAspectFlags imageAspectMask, - VkImageLayout imageLayout, - float depth, - uint32_t stencil, - uint32_t rectCount, - const VkRect3D* pRects) -{ - stub(); -} - -void anv_CmdResolveImage( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageResolve* pRegions) -{ - stub(); -} - -void -anv_device_init_meta(struct anv_device *device) -{ - anv_device_init_meta_clear_state(device); - anv_device_init_meta_blit_state(device); - - anv_CreateDynamicRasterState(anv_device_to_handle(device), - &(VkDynamicRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, - }, - &device->meta_state.shared.rs_state); - - anv_CreateDynamicColorBlendState(anv_device_to_handle(device), - &(VkDynamicColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO - }, - &device->meta_state.shared.cb_state); - - anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), - &(VkDynamicDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO - }, - &device->meta_state.shared.ds_state); -} - -void -anv_device_finish_meta(struct anv_device *device) -{ - /* Clear */ - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.clear.pipeline); - - /* Blit */ - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout); - - /* Shared */ - anv_DestroyDynamicRasterState(anv_device_to_handle(device), - device->meta_state.shared.rs_state); - anv_DestroyDynamicColorBlendState(anv_device_to_handle(device), - device->meta_state.shared.cb_state); - anv_DestroyDynamicDepthStencilState(anv_device_to_handle(device), - device->meta_state.shared.ds_state); -} diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c deleted file mode 100644 index fdb632cd330..00000000000 --- a/src/vulkan/pipeline.c +++ /dev/null @@ -1,950 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "private.h" - -// Shader functions - -VkResult anv_CreateShaderModule( - VkDevice _device, - const VkShaderModuleCreateInfo* pCreateInfo, - VkShaderModule* pShaderModule) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_shader_module *module; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (module == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - module->size = pCreateInfo->codeSize; - memcpy(module->data, pCreateInfo->pCode, module->size); - - *pShaderModule = anv_shader_module_to_handle(module); - - return VK_SUCCESS; -} - -VkResult anv_DestroyShaderModule( - VkDevice _device, - VkShaderModule _module) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader_module, module, _module); - - anv_device_free(device, module); - - return VK_SUCCESS; -} - -VkResult anv_CreateShader( - VkDevice _device, - const VkShaderCreateInfo* pCreateInfo, - VkShader* pShader) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module); - struct anv_shader *shader; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - size_t name_len = strlen(pCreateInfo->pName); - - if (strcmp(pCreateInfo->pName, "main") != 0) { - anv_finishme("Multiple shaders per module not really supported"); - } - - shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (shader == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - shader->module = module; - memcpy(shader->entrypoint, pCreateInfo->pName, name_len + 1); - - *pShader = anv_shader_to_handle(shader); - - return VK_SUCCESS; -} - -VkResult anv_DestroyShader( - VkDevice _device, - VkShader _shader) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader, shader, _shader); - - anv_device_free(device, shader); - - return VK_SUCCESS; -} - - -VkResult anv_CreatePipelineCache( - VkDevice device, - const VkPipelineCacheCreateInfo* pCreateInfo, - VkPipelineCache* pPipelineCache) -{ - pPipelineCache->handle = 1; - - stub_return(VK_SUCCESS); -} - -VkResult anv_DestroyPipelineCache( - VkDevice _device, - VkPipelineCache _cache) -{ - /* VkPipelineCache is a dummy object. */ - return VK_SUCCESS; -} - -size_t anv_GetPipelineCacheSize( - VkDevice device, - VkPipelineCache pipelineCache) -{ - stub_return(0); -} - -VkResult anv_GetPipelineCacheData( - VkDevice device, - VkPipelineCache pipelineCache, - void* pData) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_MergePipelineCaches( - VkDevice device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches) -{ - stub_return(VK_UNSUPPORTED); -} - -// Pipeline functions - -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info) -{ - const uint32_t num_dwords = 1 + info->attributeCount * 2; - uint32_t *p; - bool instancing_enable[32]; - - pipeline->vb_used = 0; - for (uint32_t i = 0; i < info->bindingCount; i++) { - const VkVertexInputBindingDescription *desc = - &info->pVertexBindingDescriptions[i]; - - pipeline->vb_used |= 1 << desc->binding; - pipeline->binding_stride[desc->binding] = desc->strideInBytes; - - /* Step rate is programmed per vertex element (attribute), not - * binding. Set up a map of which bindings step per instance, for - * reference by vertex element setup. */ - switch (desc->stepRate) { - default: - case VK_VERTEX_INPUT_STEP_RATE_VERTEX: - instancing_enable[desc->binding] = false; - break; - case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: - instancing_enable[desc->binding] = true; - break; - } - } - - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN8_3DSTATE_VERTEX_ELEMENTS); - - for (uint32_t i = 0; i < info->attributeCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - const struct anv_format *format = anv_format_for_vk_format(desc->format); - - struct GEN8_VERTEX_ELEMENT_STATE element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format->surface_format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offsetInBytes, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP - }; - GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, - .InstancingEnable = instancing_enable[desc->binding], - .VertexElementIndex = i, - /* Vulkan so far doesn't have an instance divisor, so - * this is always 1 (ignored if not instancing). */ - .InstanceDataStepRate = 1); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, - .VertexIDComponentNumber = 2, - .VertexIDElementOffset = info->bindingCount, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, - .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = info->bindingCount); -} - -static void -emit_ia_state(struct anv_pipeline *pipeline, - const VkPipelineInputAssemblyStateCreateInfo *info, - const struct anv_pipeline_create_info *extra) -{ - static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 - }; - uint32_t topology = vk_to_gen_primitive_type[info->topology]; - - if (extra && extra->use_rectlist) - topology = _3DPRIM_RECTLIST; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, - }; - GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, - .PrimitiveTopologyType = topology); -} - -static void -emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterStateCreateInfo *info, - const struct anv_pipeline_create_info *extra) -{ - static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH - }; - - static const uint32_t vk_to_gen_fillmode[] = { - [VK_FILL_MODE_POINTS] = RASTER_POINT, - [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, - [VK_FILL_MODE_SOLID] = RASTER_SOLID - }; - - static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = CounterClockwise, - [VK_FRONT_FACE_CW] = Clockwise - }; - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .ViewportTransformEnable = !(extra && extra->disable_viewport), - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, - .PointWidth = 1.0, - }; - - /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - - GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); - - struct GEN8_3DSTATE_RASTER raster = { - GEN8_3DSTATE_RASTER_header, - .FrontWinding = vk_to_gen_front_face[info->frontFace], - .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), - .ViewportZClipTestEnable = info->depthClipEnable - }; - - GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - pipeline->wm_prog_data.num_varying_inputs); - -} - -static void -emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info) -{ - struct anv_device *device = pipeline->device; - - static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, - }; - - static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, - }; - - static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, - }; - - uint32_t num_dwords = 1 + info->attachmentCount * 2; - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - - struct GEN8_BLEND_STATE blend_state = { - .AlphaToCoverageEnable = info->alphaToCoverageEnable, - }; - - uint32_t *state = pipeline->blend_state.map; - GEN8_BLEND_STATE_pack(NULL, state, &blend_state); - - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - - struct GEN8_BLEND_STATE_ENTRY entry = { - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .PreBlendSourceOnlyClampEnable = false, - .PreBlendColorClampEnable = false, - .PostBlendColorClampEnable = false, - .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], - .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], - .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], - .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], - .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), - .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), - .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), - .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), - }; - - GEN8_BLEND_STATE_ENTRY_pack(NULL, state + i * 2 + 1, &entry); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, - .BlendStatePointer = pipeline->blend_state.offset, - .BlendStatePointerValid = true); -} - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, - [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, - [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, - [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, - [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, - [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = 0, - [VK_STENCIL_OP_ZERO] = 0, - [VK_STENCIL_OP_REPLACE] = 0, - [VK_STENCIL_OP_INC_CLAMP] = 0, - [VK_STENCIL_OP_DEC_CLAMP] = 0, - [VK_STENCIL_OP_INVERT] = 0, - [VK_STENCIL_OP_INC_WRAP] = 0, - [VK_STENCIL_OP_DEC_WRAP] = 0 -}; - -static void -emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->state_wm_depth_stencil, 0, - sizeof(pipeline->state_wm_depth_stencil)); - return; - } - - /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], - .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], - .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); -} - -VkResult -anv_pipeline_create( - VkDevice _device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_pipeline_create_info * extra, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - uint32_t offset, length; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - result = anv_reloc_list_init(&pipeline->batch.relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - pipeline->shaders[pCreateInfo->pStages[i].stage] = - anv_shader_from_handle(pCreateInfo->pStages[i].shader); - } - - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - if (pCreateInfo->pViewportState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); - if (pCreateInfo->pMultisampleState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); - - pipeline->use_repclear = extra && extra->use_repclear; - - anv_compiler_run(device->compiler, pipeline); - - /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we - * hard code this to num_attributes - 2. This is because the attributes - * include VUE header and position, which aren't counted as varying - * inputs. */ - if (pipeline->vs_simd8 == NO_KERNEL) { - pipeline->wm_prog_data.num_varying_inputs = - pCreateInfo->pVertexInputState->attributeCount - 2; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); - assert(pCreateInfo->pInputAssemblyState); - emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); - assert(pCreateInfo->pRasterState); - emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); - emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - emit_cb_state(pipeline, pCreateInfo->pColorBlendState); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, - .StatisticsEnable = true); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, - .ChromaKeyKillEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, - .ClipEnable = true, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, - .StatisticsEnable = true, - .LineEndCapAntialiasingRegionWidth = _05pixels, - .LineAntialiasingRegionWidth = _10pixels, - .EarlyDepthStencilControl = NORMAL, - .ForceThreadDispatchEnable = NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .BarycentricInterpolationMode = - pipeline->wm_prog_data.barycentric_interp_modes); - - uint32_t samples = 1; - uint32_t log2_samples = __builtin_ffs(samples) - 1; - bool enable_sampling = samples > 1 ? true : false; - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, - .PixelPositionOffsetEnable = enable_sampling, - .PixelLocation = CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, - .SampleMask = 0xffff); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_vec4 == NO_KERNEL) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); - else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, - .SingleProgramFlow = false, - .KernelStartPointer = pipeline->gs_vec4, - .VectorMaskEnable = Vmask, - .SamplerCount = 0, - .BindingTableEntryCount = 0, - .ExpectedVertexCount = pipeline->gs_vertex_count, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], - .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .DispatchGRFStartRegisterForURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - //pipeline->gs_prog_data.dispatch_mode | - .StatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, - .ReorderMode = TRAILING, - .Enable = true, - - .ControlDataFormat = gs_prog_data->control_data_format, - - /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: - * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) - * UserClipDistanceCullTestEnableBitmask(v) - */ - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* Skip the VUE header and position slots */ - offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .FunctionEnable = false, - .VertexURBEntryOutputReadOffset = 1, - /* Even if VS is disabled, SBE still gets the amount of - * vertex data to read from this field. We use attribute - * count - 1, as we don't count the VUE header here. */ - .VertexURBEntryOutputLength = - DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); - else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .KernelStartPointer = pipeline->vs_simd8, - .SingleVertexDispatch = Multiple, - .VectorMaskEnable = Dmask, - .SamplerCount = 0, - .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = Normal, - .FloatingPointMode = IEEE754, - .IllegalOpcodeExceptionEnable = false, - .AccessesUAV = false, - .SoftwareExceptionEnable = false, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], - .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), - - .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = false, - .SIMD8DispatchEnable = true, - .VertexCacheDisable = false, - .FunctionEnable = true, - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length, - .UserClipDistanceClipTestEnableBitmask = 0, - .UserClipDistanceCullTestEnableBitmask = 0); - - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - uint32_t ksp0, ksp2, grf_start0, grf_start2; - - ksp2 = 0; - grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - ksp0 = pipeline->ps_simd8; - grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - ksp2 = pipeline->ps_simd16; - grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; - } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - ksp0 = pipeline->ps_simd16; - grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; - } else { - unreachable("no ps shader"); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, - .KernelStartPointer0 = ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], - .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), - - .MaximumNumberofThreadsPerPSD = 64 - 2, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = ksp2); - - bool per_sample_ps = false; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps); - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} - -VkResult anv_DestroyPipeline( - VkDevice _device, - VkPipeline _pipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - anv_compiler_free(pipeline); - anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); - anv_state_stream_finish(&pipeline->program_stream); - anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); - anv_device_free(pipeline->device, pipeline); - - return VK_SUCCESS; -} - -VkResult anv_CreateGraphicsPipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = anv_pipeline_create(_device, &pCreateInfos[i], - NULL, &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j]); - } - - return result; - } - } - - return VK_SUCCESS; -} - -static VkResult anv_compute_pipeline_create( - VkDevice _device, - const VkComputePipelineCreateInfo* pCreateInfo, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - result = anv_reloc_list_init(&pipeline->batch.relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - anv_shader_from_handle(pCreateInfo->cs.shader); - - pipeline->use_repclear = false; - - anv_compiler_run(device->compiler, pipeline); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - - anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], - .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), - .ScratchSpaceBasePointerHigh = 0, - .StackSize = 0, - - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = 2, - .ResetGatewayTimer = true, - .BypassGatewayControl = true, - .URBEntryAllocationSize = 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} - -VkResult anv_CreateComputePipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo* pCreateInfos, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = anv_compute_pipeline_create(_device, &pCreateInfos[i], - &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j]); - } - - return result; - } - } - - return VK_SUCCESS; -} - -// Pipeline layout functions - -VkResult anv_CreatePipelineLayout( - VkDevice _device, - const VkPipelineLayoutCreateInfo* pCreateInfo, - VkPipelineLayout* pPipelineLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_layout *layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - - layout = anv_device_alloc(device, sizeof(*layout), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (layout == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - layout->num_sets = pCreateInfo->descriptorSetCount; - - uint32_t surface_start[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t sampler_start[VK_SHADER_STAGE_NUM] = { 0, }; - - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - layout->stage[s].surface_count = 0; - layout->stage[s].sampler_count = 0; - } - - for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, - pCreateInfo->pSetLayouts[i]); - - layout->set[i].layout = set_layout; - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - layout->set[i].surface_start[s] = surface_start[s]; - surface_start[s] += set_layout->stage[s].surface_count; - layout->set[i].sampler_start[s] = sampler_start[s]; - sampler_start[s] += set_layout->stage[s].sampler_count; - - layout->stage[s].surface_count += set_layout->stage[s].surface_count; - layout->stage[s].sampler_count += set_layout->stage[s].sampler_count; - } - } - - *pPipelineLayout = anv_pipeline_layout_to_handle(layout); - - return VK_SUCCESS; -} - -VkResult anv_DestroyPipelineLayout( - VkDevice _device, - VkPipelineLayout _pipelineLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); - - anv_device_free(device, pipeline_layout); - - return VK_SUCCESS; -} diff --git a/src/vulkan/private.h b/src/vulkan/private.h deleted file mode 100644 index ac64f294c2d..00000000000 --- a/src/vulkan/private.h +++ /dev/null @@ -1,1081 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x -#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) -#else -#define VG(x) -#endif - -#include "brw_device_info.h" -#include "util/macros.h" - -#define VK_PROTOTYPES -#include -#include -#include - -#include "entrypoints.h" - -#include "brw_context.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define anv_noreturn __attribute__((__noreturn__)) -#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -static inline uint32_t -align_u32(uint32_t v, uint32_t a) -{ - return (v + a - 1) & ~(a - 1); -} - -static inline int32_t -align_i32(int32_t v, int32_t a) -{ - return (v + a - 1) & ~(a - 1); -} - -/** Alignment must be a power of 2. */ -static inline bool -anv_is_aligned(uintmax_t n, uintmax_t a) -{ - assert(a == (a & -a)); - return (n & (a - 1)) == 0; -} - -static inline uint32_t -anv_minify(uint32_t n, uint32_t levels) -{ - if (unlikely(n == 0)) - return 0; - else - return MAX(n >> levels, 1); -} - -static inline bool -anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) -{ - if (*inout_mask & clear_mask) { - *inout_mask &= ~clear_mask; - return true; - } else { - return false; - } -} - -#define for_each_bit(b, dword) \ - for (uint32_t __dword = (dword); \ - (b) = __builtin_ffs(__dword) - 1, __dword; \ - __dword &= ~(1 << (b))) - -/* Define no kernel as 1, since that's an illegal offset for a kernel */ -#define NO_KERNEL 1 - -struct anv_common { - VkStructureType sType; - const void* pNext; -}; - -/* Whenever we generate an error, pass it through this function. Useful for - * debugging, where we can break on it. Only call at error site, not when - * propagating errors. Might be useful to plug in a stack trace here. - */ - -static inline VkResult -vk_error(VkResult error) -{ -#ifdef DEBUG - fprintf(stderr, "vk_error: %x\n", error); -#endif - - return error; -} - -void __anv_finishme(const char *file, int line, const char *format, ...) - anv_printflike(3, 4); -void anv_loge(const char *format, ...) anv_printflike(1, 2); -void anv_loge_v(const char *format, va_list va); - -/** - * Print a FINISHME message, including its source location. - */ -#define anv_finishme(format, ...) \ - __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); - -/* A non-fatal assert. Useful for debugging. */ -#ifdef DEBUG -#define anv_assert(x) ({ \ - if (unlikely(!(x))) \ - fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ -}) -#else -#define anv_assert(x) -#endif - -void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); -void anv_abortfv(const char *format, va_list va) anv_noreturn; - -#define stub_return(v) \ - do { \ - anv_finishme("stub %s", __func__); \ - return (v); \ - } while (0) - -#define stub() \ - do { \ - anv_finishme("stub %s", __func__); \ - return; \ - } while (0) - -/** - * A dynamically growable, circular buffer. Elements are added at head and - * removed from tail. head and tail are free-running uint32_t indices and we - * only compute the modulo with size when accessing the array. This way, - * number of bytes in the queue is always head - tail, even in case of - * wraparound. - */ - -struct anv_vector { - uint32_t head; - uint32_t tail; - uint32_t element_size; - uint32_t size; - void *data; -}; - -int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); -void *anv_vector_add(struct anv_vector *queue); -void *anv_vector_remove(struct anv_vector *queue); - -static inline int -anv_vector_length(struct anv_vector *queue) -{ - return (queue->head - queue->tail) / queue->element_size; -} - -static inline void -anv_vector_finish(struct anv_vector *queue) -{ - free(queue->data); -} - -#define anv_vector_foreach(elem, queue) \ - static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ - for (uint32_t __anv_vector_offset = (queue)->tail; \ - elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ - __anv_vector_offset += (queue)->element_size) - -struct anv_bo { - int gem_handle; - uint32_t index; - uint64_t offset; - uint64_t size; - - /* This field is here for the benefit of the aub dumper. It can (and for - * userptr bos it must) be set to the cpu map of the buffer. Destroying - * the bo won't clean up the mmap, it's still the responsibility of the bo - * user to do that. */ - void *map; -}; - -/* Represents a lock-free linked list of "free" things. This is used by - * both the block pool and the state pools. Unfortunately, in order to - * solve the ABA problem, we can't use a single uint32_t head. - */ -union anv_free_list { - struct { - uint32_t offset; - - /* A simple count that is incremented every time the head changes. */ - uint32_t count; - }; - uint64_t u64; -}; - -#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) - -struct anv_block_pool { - struct anv_device *device; - - struct anv_bo bo; - void *map; - int fd; - uint32_t size; - - /** - * Array of mmaps and gem handles owned by the block pool, reclaimed when - * the block pool is destroyed. - */ - struct anv_vector mmap_cleanups; - - uint32_t block_size; - - uint32_t next_block; - union anv_free_list free_list; -}; - -struct anv_block_state { - union { - struct { - uint32_t next; - uint32_t end; - }; - uint64_t u64; - }; -}; - -struct anv_state { - uint32_t offset; - uint32_t alloc_size; - void *map; -}; - -struct anv_fixed_size_state_pool { - size_t state_size; - union anv_free_list free_list; - struct anv_block_state block; -}; - -#define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 10 - -#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) - -struct anv_state_pool { - struct anv_block_pool *block_pool; - struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; -}; - -struct anv_state_stream { - struct anv_block_pool *block_pool; - uint32_t next; - uint32_t current_block; - uint32_t end; -}; - -void anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size); -void anv_block_pool_finish(struct anv_block_pool *pool); -uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); -void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); -void anv_state_pool_init(struct anv_state_pool *pool, - struct anv_block_pool *block_pool); -struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, - size_t state_size, size_t alignment); -void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); -void anv_state_stream_init(struct anv_state_stream *stream, - struct anv_block_pool *block_pool); -void anv_state_stream_finish(struct anv_state_stream *stream); -struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, - uint32_t size, uint32_t alignment); - -/** - * Implements a pool of re-usable BOs. The interface is identical to that - * of block_pool except that each block is its own BO. - */ -struct anv_bo_pool { - struct anv_device *device; - - uint32_t bo_size; - - void *free_list; -}; - -void anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t block_size); -void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); -void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); - -struct anv_physical_device { - struct anv_instance * instance; - uint32_t chipset_id; - bool no_hw; - const char * path; - const char * name; - const struct brw_device_info * info; - int fd; -}; - -struct anv_instance { - void * pAllocUserData; - PFN_vkAllocFunction pfnAlloc; - PFN_vkFreeFunction pfnFree; - uint32_t apiVersion; - uint32_t physicalDeviceCount; - struct anv_physical_device physicalDevice; -}; - -struct anv_meta_state { - struct { - VkPipeline pipeline; - } clear; - - struct { - VkPipeline pipeline; - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; - } blit; - - struct { - VkDynamicRasterState rs_state; - VkDynamicColorBlendState cb_state; - VkDynamicDepthStencilState ds_state; - } shared; -}; - -struct anv_queue { - struct anv_device * device; - - struct anv_state_pool * pool; - - /** - * Serial number of the most recently completed batch executed on the - * engine. - */ - struct anv_state completed_serial; - - /** - * The next batch submitted to the engine will be assigned this serial - * number. - */ - uint32_t next_serial; - - uint32_t last_collected_serial; -}; - -struct anv_device { - struct anv_instance * instance; - uint32_t chipset_id; - struct brw_device_info info; - int context_id; - int fd; - bool no_hw; - bool dump_aub; - - struct anv_bo_pool batch_bo_pool; - - struct anv_block_pool dynamic_state_block_pool; - struct anv_state_pool dynamic_state_pool; - - struct anv_block_pool instruction_block_pool; - struct anv_block_pool surface_state_block_pool; - struct anv_state_pool surface_state_pool; - - struct anv_meta_state meta_state; - - struct anv_state border_colors; - - struct anv_queue queue; - - struct anv_block_pool scratch_block_pool; - - struct anv_compiler * compiler; - struct anv_aub_writer * aub_writer; - pthread_mutex_t mutex; -}; - -void * -anv_device_alloc(struct anv_device * device, - size_t size, - size_t alignment, - VkSystemAllocType allocType); - -void -anv_device_free(struct anv_device * device, - void * mem); - -void* anv_gem_mmap(struct anv_device *device, - uint32_t gem_handle, uint64_t offset, uint64_t size); -void anv_gem_munmap(void *p, uint64_t size); -uint32_t anv_gem_create(struct anv_device *device, size_t size); -void anv_gem_close(struct anv_device *device, int gem_handle); -int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -int anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns); -int anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf); -int anv_gem_set_tiling(struct anv_device *device, int gem_handle, - uint32_t stride, uint32_t tiling); -int anv_gem_create_context(struct anv_device *device); -int anv_gem_destroy_context(struct anv_device *device, int context); -int anv_gem_get_param(int fd, uint32_t param); -int anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size); -int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); -int anv_gem_fd_to_handle(struct anv_device *device, int fd); -int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); - -VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); - -struct anv_reloc_list { - size_t num_relocs; - size_t array_length; - struct drm_i915_gem_relocation_entry * relocs; - struct anv_bo ** reloc_bos; -}; - -VkResult anv_reloc_list_init(struct anv_reloc_list *list, - struct anv_device *device); -void anv_reloc_list_finish(struct anv_reloc_list *list, - struct anv_device *device); - -uint64_t anv_reloc_list_add(struct anv_reloc_list *list, - struct anv_device *device, - uint32_t offset, struct anv_bo *target_bo, - uint32_t delta); - -struct anv_batch_bo { - struct anv_bo bo; - - /* Bytes actually consumed in this batch BO */ - size_t length; - - /* These offsets reference the per-batch reloc list */ - size_t first_reloc; - size_t num_relocs; - - struct anv_batch_bo * prev_batch_bo; -}; - -struct anv_batch { - struct anv_device * device; - - void * start; - void * end; - void * next; - - struct anv_reloc_list relocs; - - /* This callback is called (with the associated user data) in the event - * that the batch runs out of space. - */ - VkResult (*extend_cb)(struct anv_batch *, void *); - void * user_data; -}; - -void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); -void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); -uint64_t anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t offset); - -struct anv_address { - struct anv_bo *bo; - uint32_t offset; -}; - -#define __gen_address_type struct anv_address -#define __gen_user_data struct anv_batch - -static inline uint64_t -__gen_combine_address(struct anv_batch *batch, void *location, - const struct anv_address address, uint32_t delta) -{ - if (address.bo == NULL) { - return delta; - } else { - assert(batch->start <= location && location < batch->end); - - return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); - } -} - -#include "gen7_pack.h" -#include "gen75_pack.h" -#undef GEN8_3DSTATE_MULTISAMPLE -#include "gen8_pack.h" - -#define anv_batch_emit(batch, cmd, ...) do { \ - struct cmd __template = { \ - cmd ## _header, \ - __VA_ARGS__ \ - }; \ - void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ - cmd ## _pack(batch, __dst, &__template); \ - } while (0) - -#define anv_batch_emitn(batch, n, cmd, ...) ({ \ - struct cmd __template = { \ - cmd ## _header, \ - .DwordLength = n - cmd ## _length_bias, \ - __VA_ARGS__ \ - }; \ - void *__dst = anv_batch_emit_dwords(batch, n); \ - cmd ## _pack(batch, __dst, &__template); \ - __dst; \ - }) - -#define anv_batch_emit_merge(batch, dwords0, dwords1) \ - do { \ - uint32_t *dw; \ - \ - assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ - dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ - for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ - dw[i] = (dwords0)[i] | (dwords1)[i]; \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ - } while (0) - -#define GEN8_MOCS { \ - .MemoryTypeLLCeLLCCacheabilityControl = WB, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ - } - -struct anv_device_memory { - struct anv_bo bo; - VkDeviceSize map_size; - void * map; -}; - -struct anv_dynamic_vp_state { - struct anv_state sf_clip_vp; - struct anv_state cc_vp; - struct anv_state scissor; -}; - -struct anv_dynamic_rs_state { - uint32_t state_sf[GEN8_3DSTATE_SF_length]; - uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; -}; - -struct anv_dynamic_ds_state { - uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; -}; - -struct anv_dynamic_cb_state { - uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; - -}; - -struct anv_descriptor_slot { - int8_t dynamic_slot; - uint8_t index; -}; - -struct anv_descriptor_set_layout { - struct { - uint32_t surface_count; - struct anv_descriptor_slot *surface_start; - uint32_t sampler_count; - struct anv_descriptor_slot *sampler_start; - } stage[VK_SHADER_STAGE_NUM]; - - uint32_t count; - uint32_t num_dynamic_buffers; - uint32_t shader_stages; - struct anv_descriptor_slot entries[0]; -}; - -struct anv_descriptor { - struct anv_sampler *sampler; - struct anv_surface_view *view; -}; - -struct anv_descriptor_set { - struct anv_descriptor descriptors[0]; -}; - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set); - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set); - -#define MAX_VBS 32 -#define MAX_SETS 8 -#define MAX_RTS 8 - -struct anv_pipeline_layout { - struct { - struct anv_descriptor_set_layout *layout; - uint32_t surface_start[VK_SHADER_STAGE_NUM]; - uint32_t sampler_start[VK_SHADER_STAGE_NUM]; - } set[MAX_SETS]; - - uint32_t num_sets; - - struct { - uint32_t surface_count; - uint32_t sampler_count; - } stage[VK_SHADER_STAGE_NUM]; -}; - -struct anv_buffer { - struct anv_device * device; - VkDeviceSize size; - - /* Set when bound */ - struct anv_bo * bo; - VkDeviceSize offset; -}; - -#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) -#define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) -#define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) -#define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) -#define ANV_CMD_BUFFER_VP_DIRTY (1 << 5) -#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 6) - -struct anv_vertex_binding { - struct anv_buffer * buffer; - VkDeviceSize offset; -}; - -struct anv_descriptor_set_binding { - struct anv_descriptor_set * set; - uint32_t dynamic_offsets[128]; -}; - -/** State required while building cmd buffer */ -struct anv_cmd_state { - uint32_t current_pipeline; - uint32_t vb_dirty; - uint32_t dirty; - uint32_t compute_dirty; - uint32_t descriptors_dirty; - uint32_t scratch_size; - struct anv_pipeline * pipeline; - struct anv_pipeline * compute_pipeline; - struct anv_framebuffer * framebuffer; - struct anv_render_pass * pass; - struct anv_subpass * subpass; - struct anv_dynamic_rs_state * rs_state; - struct anv_dynamic_ds_state * ds_state; - struct anv_dynamic_vp_state * vp_state; - struct anv_dynamic_cb_state * cb_state; - uint32_t state_vf[GEN8_3DSTATE_VF_length]; - struct anv_vertex_binding vertex_bindings[MAX_VBS]; - struct anv_descriptor_set_binding descriptors[MAX_SETS]; -}; - -VkResult anv_cmd_state_init(struct anv_cmd_state *state); -void anv_cmd_state_fini(struct anv_cmd_state *state); - -struct anv_cmd_buffer { - struct anv_device * device; - - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 * exec2_objects; - uint32_t exec2_bo_count; - struct anv_bo ** exec2_bos; - uint32_t exec2_array_length; - bool need_reloc; - uint32_t serial; - - struct anv_batch batch; - struct anv_batch_bo * last_batch_bo; - struct anv_batch_bo * surface_batch_bo; - uint32_t surface_next; - struct anv_reloc_list surface_relocs; - struct anv_state_stream surface_state_stream; - struct anv_state_stream dynamic_state_stream; - - struct anv_cmd_state state; -}; - -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment); -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment); - -VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); - -void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values); - -void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); -void anv_aub_writer_destroy(struct anv_aub_writer *writer); - -struct anv_fence { - struct anv_bo bo; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - bool ready; -}; - -struct anv_shader_module { - uint32_t size; - char data[0]; -}; - -struct anv_shader { - struct anv_shader_module * module; - char entrypoint[0]; -}; - -struct anv_pipeline { - struct anv_device * device; - struct anv_batch batch; - uint32_t batch_data[256]; - struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; - struct anv_pipeline_layout * layout; - bool use_repclear; - - struct brw_vs_prog_data vs_prog_data; - struct brw_wm_prog_data wm_prog_data; - struct brw_gs_prog_data gs_prog_data; - struct brw_cs_prog_data cs_prog_data; - bool writes_point_size; - struct brw_stage_prog_data * prog_data[VK_SHADER_STAGE_NUM]; - uint32_t scratch_start[VK_SHADER_STAGE_NUM]; - uint32_t total_scratch; - struct { - uint32_t vs_start; - uint32_t vs_size; - uint32_t nr_vs_entries; - uint32_t gs_start; - uint32_t gs_size; - uint32_t nr_gs_entries; - } urb; - - uint32_t active_stages; - struct anv_state_stream program_stream; - struct anv_state blend_state; - uint32_t vs_simd8; - uint32_t ps_simd8; - uint32_t ps_simd16; - uint32_t gs_vec4; - uint32_t gs_vertex_count; - uint32_t cs_simd; - - uint32_t vb_used; - uint32_t binding_stride[MAX_VBS]; - - uint32_t state_sf[GEN8_3DSTATE_SF_length]; - uint32_t state_vf[GEN8_3DSTATE_VF_length]; - uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; - uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - - uint32_t cs_thread_width_max; - uint32_t cs_right_mask; -}; - -struct anv_pipeline_create_info { - bool use_repclear; - bool disable_viewport; - bool disable_scissor; - bool disable_vs; - bool use_rectlist; -}; - -VkResult -anv_pipeline_create(VkDevice device, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_pipeline_create_info *extra, - VkPipeline *pPipeline); - -struct anv_compiler *anv_compiler_create(struct anv_device *device); -void anv_compiler_destroy(struct anv_compiler *compiler); -int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); -void anv_compiler_free(struct anv_pipeline *pipeline); - -struct anv_format { - const char *name; - uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ - uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ - uint8_t num_channels; - uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ - bool has_stencil; -}; - -const struct anv_format * -anv_format_for_vk_format(VkFormat format); -bool anv_is_vk_format_depth_or_stencil(VkFormat format); - -/** - * A proxy for the color surfaces, depth surfaces, and stencil surfaces. - */ -struct anv_surface { - /** - * Offset from VkImage's base address, as bound by vkBindImageMemory(). - */ - uint32_t offset; - - uint32_t stride; /**< RENDER_SURFACE_STATE.SurfacePitch */ - uint16_t qpitch; /**< RENDER_SURFACE_STATE.QPitch */ - - /** - * \name Alignment of miptree images, in units of pixels. - * - * These fields contain the real alignment values, not the values to be - * given to the GPU. For example, if h_align is 4, then program the GPU - * with HALIGN_4. - * \{ - */ - uint8_t h_align; /**< RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */ - uint8_t v_align; /**< RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ - /** \} */ - - uint8_t tile_mode; /**< RENDER_SURFACE_STATE.TileMode */ -}; - -struct anv_image { - VkImageType type; - VkExtent3D extent; - VkFormat format; - uint32_t levels; - uint32_t array_size; - - VkDeviceSize size; - uint32_t alignment; - - /* Set when bound */ - struct anv_bo *bo; - VkDeviceSize offset; - - struct anv_swap_chain *swap_chain; - - /** RENDER_SURFACE_STATE.SurfaceType */ - uint8_t surf_type; - - /** Primary surface is either color or depth. */ - struct anv_surface primary_surface; - - /** Stencil surface is optional. */ - struct anv_surface stencil_surface; -}; - -struct anv_surface_view { - struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ - struct anv_bo *bo; - uint32_t offset; /**< VkBufferCreateInfo::offset */ - uint32_t range; /**< VkBufferCreateInfo::range */ - VkFormat format; /**< VkBufferCreateInfo::format */ -}; - -struct anv_buffer_view { - struct anv_surface_view view; -}; - -struct anv_image_view { - struct anv_surface_view view; - VkExtent3D extent; -}; - -enum anv_attachment_view_type { - ANV_ATTACHMENT_VIEW_TYPE_COLOR, - ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL, -}; - -struct anv_attachment_view { - enum anv_attachment_view_type attachment_type; - VkExtent3D extent; -}; - -struct anv_color_attachment_view { - struct anv_attachment_view base; - struct anv_surface_view view; -}; - -struct anv_depth_stencil_view { - struct anv_attachment_view base; - - struct anv_bo *bo; - - uint32_t depth_offset; /**< Offset into bo. */ - uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ - uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ - uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ - - uint32_t stencil_offset; /**< Offset into bo. */ - uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ - uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ -}; - -struct anv_image_create_info { - const VkImageCreateInfo *vk_info; - bool force_tile_mode; - uint8_t tile_mode; -}; - -VkResult anv_image_create(VkDevice _device, - const struct anv_image_create_info *info, - VkImage *pImage); - -void anv_image_view_init(struct anv_image_view *view, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - -void anv_color_attachment_view_init(struct anv_color_attachment_view *view, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); -void anv_fill_buffer_surface_state(void *state, VkFormat format, - uint32_t offset, uint32_t range); - -void anv_surface_view_fini(struct anv_device *device, - struct anv_surface_view *view); - -struct anv_sampler { - uint32_t state[4]; -}; - -struct anv_framebuffer { - uint32_t width; - uint32_t height; - uint32_t layers; - - /* Viewport for clears */ - VkDynamicViewportState vp_state; - - uint32_t attachment_count; - const struct anv_attachment_view * attachments[0]; -}; - -struct anv_subpass { - uint32_t input_count; - uint32_t * input_attachments; - uint32_t color_count; - uint32_t * color_attachments; - uint32_t * resolve_attachments; - uint32_t depth_stencil_attachment; -}; - -struct anv_render_pass_attachment { - VkFormat format; - uint32_t samples; - VkAttachmentLoadOp load_op; - VkAttachmentLoadOp stencil_load_op; -}; - -struct anv_render_pass { - uint32_t attachment_count; - uint32_t subpass_count; - - struct anv_render_pass_attachment * attachments; - struct anv_subpass subpasses[0]; -}; - -void anv_device_init_meta(struct anv_device *device); -void anv_device_finish_meta(struct anv_device *device); - -void *anv_lookup_entrypoint(const char *name); - -#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType) _obj; \ - } - -#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *) _handle.handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType) { .handle = (uint64_t) _obj }; \ - } - -#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ - struct __anv_type *__name = __anv_type ## _from_handle(__handle) - -ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCmdBuffer) -ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) -ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) -ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) -ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) -ANV_DEFINE_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI); - -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader, VkShader) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) - -#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ - \ - static inline const __VkType * \ - __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ - { \ - return (const __VkType *) __anv_obj; \ - } - -#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ - const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) - -ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) -ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) -ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/query.c b/src/vulkan/query.c deleted file mode 100644 index b3b85897814..00000000000 --- a/src/vulkan/query.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "private.h" - -struct anv_query_pool_slot { - uint64_t begin; - uint64_t end; - uint64_t available; -}; - -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - -VkResult anv_CreateQueryPool( - VkDevice _device, - const VkQueryPoolCreateInfo* pCreateInfo, - VkQueryPool* pQueryPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_query_pool *pool; - VkResult result; - size_t size; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); - - switch (pCreateInfo->queryType) { - case VK_QUERY_TYPE_OCCLUSION: - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - return VK_UNSUPPORTED; - default: - unreachable(""); - } - - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); - result = anv_bo_init_new(&pool->bo, device, size); - if (result != VK_SUCCESS) - goto fail; - - pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); - - *pQueryPool = anv_query_pool_to_handle(pool); - - return VK_SUCCESS; - - fail: - anv_device_free(device, pool); - - return result; -} - -VkResult anv_DestroyQueryPool( - VkDevice _device, - VkQueryPool _pool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_query_pool, pool, _pool); - - anv_gem_munmap(pool->bo.map, pool->bo.size); - anv_gem_close(device, pool->bo.gem_handle); - anv_device_free(device, pool); - - return VK_SUCCESS; -} - -VkResult anv_GetQueryPoolResults( - VkDevice _device, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - size_t* pDataSize, - void* pData, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - struct anv_query_pool_slot *slot = pool->bo.map; - int64_t timeout = INT64_MAX; - uint32_t *dst32 = pData; - uint64_t *dst64 = pData; - uint64_t result; - int ret; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return VK_UNSUPPORTED; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - if (flags & VK_QUERY_RESULT_64_BIT) - *pDataSize = queryCount * sizeof(uint64_t); - else - *pDataSize = queryCount * sizeof(uint32_t); - - if (pData == NULL) - return VK_SUCCESS; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); - if (ret == -1) - return vk_error(VK_ERROR_UNKNOWN); - } - - for (uint32_t i = 0; i < queryCount; i++) { - result = slot[startQuery + i].end - slot[startQuery + i].begin; - if (flags & VK_QUERY_RESULT_64_BIT) { - *dst64++ = result; - } else { - if (result > UINT32_MAX) - result = UINT32_MAX; - *dst32++ = result; - } - } - - return VK_SUCCESS; -} - -static void -anv_batch_emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ -} - -void anv_CmdBeginQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void anv_CmdEndQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot) + 8); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void anv_CmdResetQueryPool( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount) -{ - stub(); -} - -#define TIMESTAMP 0x2358 - -void anv_CmdWriteTimestamp( - VkCmdBuffer cmdBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - struct anv_bo *bo = buffer->bo; - - switch (timestampType) { - case VK_TIMESTAMP_TYPE_TOP: - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { bo, buffer->offset + destOffset }); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); - break; - - case VK_TIMESTAMP_TYPE_BOTTOM: - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = /* FIXME: This is only lower 32 bits */ - { bo, buffer->offset + destOffset }); - break; - - default: - break; - } -} - -#define alu_opcode(v) __gen_field((v), 20, 31) -#define alu_operand1(v) __gen_field((v), 10, 19) -#define alu_operand2(v) __gen_field((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void anv_CmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - /* FIXME: If we're not waiting, should we just do this on the CPU? */ - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); - - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2), - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2) + 4, - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset + 4 }); - - dst_offset += destStride; - } -} diff --git a/src/vulkan/util.c b/src/vulkan/util.c deleted file mode 100644 index 21cb6484670..00000000000 --- a/src/vulkan/util.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include "private.h" - -/** Log an error message. */ -void anv_printflike(1, 2) -anv_loge(const char *format, ...) -{ - va_list va; - - va_start(va, format); - anv_loge_v(format, va); - va_end(va); -} - -/** \see anv_loge() */ -void -anv_loge_v(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); -} - -void anv_printflike(3, 4) -__anv_finishme(const char *file, int line, const char *format, ...) -{ - va_list ap; - char buffer[256]; - - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); -} - -void anv_noreturn anv_printflike(1, 2) -anv_abortf(const char *format, ...) -{ - va_list va; - - va_start(va, format); - anv_abortfv(format, va); - va_end(va); -} - -void anv_noreturn -anv_abortfv(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); - abort(); -} - -int -anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) -{ - assert(is_power_of_two(size)); - assert(element_size < size && is_power_of_two(element_size)); - - vector->head = 0; - vector->tail = 0; - vector->element_size = element_size; - vector->size = size; - vector->data = malloc(size); - - return vector->data != NULL; -} - -void * -anv_vector_add(struct anv_vector *vector) -{ - uint32_t offset, size, split, tail; - void *data; - - if (vector->head - vector->tail == vector->size) { - size = vector->size * 2; - data = malloc(size); - if (data == NULL) - return NULL; - split = align_u32(vector->tail, vector->size); - tail = vector->tail & (vector->size - 1); - if (vector->head - split < vector->size) { - memcpy(data + tail, - vector->data + tail, - split - vector->tail); - memcpy(data + vector->size, - vector->data, vector->head - split); - } else { - memcpy(data + tail, - vector->data + tail, - vector->head - vector->tail); - } - free(vector->data); - vector->data = data; - vector->size = size; - } - - assert(vector->head - vector->tail < vector->size); - - offset = vector->head & (vector->size - 1); - vector->head += vector->element_size; - - return vector->data + offset; -} - -void * -anv_vector_remove(struct anv_vector *vector) -{ - uint32_t offset; - - if (vector->head == vector->tail) - return NULL; - - assert(vector->head - vector->tail <= vector->size); - - offset = vector->tail & (vector->size - 1); - vector->tail += vector->element_size; - - return vector->data + offset; -} diff --git a/src/vulkan/vk_gen.py b/src/vulkan/vk_gen.py deleted file mode 100644 index d481af74ef0..00000000000 --- a/src/vulkan/vk_gen.py +++ /dev/null @@ -1,269 +0,0 @@ -# coding=utf-8 -# -# Copyright © 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# - -import fileinput, re, sys - -# Each function typedef in the vulkan.h header is all on one line and matches -# this regepx. We hope that won't change. - -p = re.compile('typedef ([^ ]*) *\(VKAPI \*PFN_vk([^(]*)\)(.*);') - -entrypoints = [] - -# We generate a static hash table for entry point lookup -# (vkGetProcAddress). We use a linear congruential generator for our hash -# function and a power-of-two size table. The prime numbers are determined -# experimentally. - -none = 0xffff -hash_size = 256 -u32_mask = 2**32 - 1 -hash_mask = hash_size - 1 - -prime_factor = 5024183 -prime_step = 19 - -def hash(name): - h = 0; - for c in name: - h = (h * prime_factor + ord(c)) & u32_mask - - return h - -opt_header = False -opt_code = False - -if (sys.argv[1] == "header"): - opt_header = True - sys.argv.pop() -elif (sys.argv[1] == "code"): - opt_code = True - sys.argv.pop() - -# Parse the entry points in the header - -i = 0 -for line in fileinput.input(): - m = p.match(line) - if (m): - if m.group(2) == 'VoidFunction': - continue - fullname = "vk" + m.group(2) - h = hash(fullname) - entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) - i = i + 1 - -# For outputting entrypoints.h we generate a anv_EntryPoint() prototype -# per entry point. - -if opt_header: - for type, name, args, num, h in entrypoints: - print "%s anv_%s%s;" % (type, name, args) - print "%s anv_validate_%s%s;" % (type, name, args) - exit() - - - -print """/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* This file generated from vk_gen.py, don't edit directly. */ - -#include "private.h" - -struct anv_entrypoint { - uint32_t name; - uint32_t hash; - void *function; - void *validate; -}; - -/* We use a big string constant to avoid lots of reloctions from the entry - * point table to lots of little strings. The entries in the entry point table - * store the index into this big string. - */ - -static const char strings[] =""" - -offsets = [] -i = 0; -for type, name, args, num, h in entrypoints: - print " \"vk%s\\0\"" % name - offsets.append(i) - i += 2 + len(name) + 1 -print """ ; - -/* Weak aliases for all potential validate functions. These will resolve to - * NULL if they're not defined, which lets the resolve_entrypoint() function - * either pick a validate wrapper if available or just plug in the actual - * entry point. - */ -""" - -for type, name, args, num, h in entrypoints: - print "%s anv_validate_%s%s __attribute__ ((weak));" % (type, name, args) - -# Now generate the table of all entry points and their validation functions - -print "\nstatic const struct anv_entrypoint entrypoints[] = {" -for type, name, args, num, h in entrypoints: - print " { %5d, 0x%08x, anv_%s, anv_validate_%s }," % (offsets[num], h, name, name) -print "};\n" - -print """ -#ifdef DEBUG -static bool enable_validate = true; -#else -static bool enable_validate = false; -#endif - -/* We can't use symbols that need resolving (like, oh, getenv) in the resolve - * function. This means that we have to determine whether or not to use the - * validation layer sometime before that. The constructor function attribute asks - * the dynamic linker to invoke determine_validate() at dlopen() time which - * works. - */ -static void __attribute__ ((constructor)) -determine_validate(void) -{ - const char *s = getenv("ANV_VALIDATE"); - - if (s) - enable_validate = atoi(s); -} - -static void * __attribute__ ((noinline)) -resolve_entrypoint(uint32_t index) -{ - if (enable_validate && entrypoints[index].validate) - return entrypoints[index].validate; - - return entrypoints[index].function; -} -""" - -# Now output ifuncs and their resolve helpers for all entry points. The -# resolve helper calls resolve_entrypoint() with the entry point index, which -# lets the resolver look it up in the table. - -for type, name, args, num, h in entrypoints: - print "static void *resolve_%s(void) { return resolve_entrypoint(%d); }" % (name, num) - print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) - - -# Now generate the hash table used for entry point look up. This is a -# uint16_t table of entry point indices. We use 0xffff to indicate an entry -# in the hash table is empty. - -map = [none for f in xrange(hash_size)] -collisions = [0 for f in xrange(10)] -for type, name, args, num, h in entrypoints: - level = 0 - while map[h & hash_mask] != none: - h = h + prime_step - level = level + 1 - if level > 9: - collisions[9] += 1 - else: - collisions[level] += 1 - map[h & hash_mask] = num - -print "/* Hash table stats:" -print " * size %d entries" % hash_size -print " * collisions entries" -for i in xrange(10): - if (i == 9): - plus = "+" - else: - plus = " " - - print " * %2d%s %4d" % (i, plus, collisions[i]) -print " */\n" - -print "#define none 0x%04x\n" % none - -print "static const uint16_t map[] = {" -for i in xrange(0, hash_size, 8): - print " ", - for j in xrange(i, i + 8): - if map[j] & 0xffff == 0xffff: - print " none,", - else: - print "0x%04x," % (map[j] & 0xffff), - print - -print "};" - -# Finally we generate the hash table lookup function. The hash function and -# linear probing algorithm matches the hash table generated above. - -print """ -void * -anv_lookup_entrypoint(const char *name) -{ - static const uint32_t prime_factor = %d; - static const uint32_t prime_step = %d; - const struct anv_entrypoint *e; - uint32_t hash, h, i; - const char *p; - - hash = 0; - for (p = name; *p; p++) - hash = hash * prime_factor + *p; - - h = hash; - do { - i = map[h & %d]; - if (i == none) - return NULL; - e = &entrypoints[i]; - h += prime_step; - } while (e->hash != hash); - - if (strcmp(name, strings + e->name) != 0) - return NULL; - - return resolve_entrypoint(i); -} -""" % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/x11.c b/src/vulkan/x11.c deleted file mode 100644 index 1e0bdb12bd0..00000000000 --- a/src/vulkan/x11.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "private.h" - -#include -#include -#include - -static const VkFormat formats[] = { - VK_FORMAT_B5G6R5_UNORM, - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_B8G8R8A8_SRGB, -}; - -VkResult anv_GetDisplayInfoWSI( - VkDisplayWSI display, - VkDisplayInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) -{ - VkDisplayFormatPropertiesWSI *properties = pData; - size_t size; - - if (pDataSize == NULL) - return VK_ERROR_INVALID_POINTER; - - switch (infoType) { - case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: - size = sizeof(properties[0]) * ARRAY_SIZE(formats); - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } - - if (*pDataSize < size) - return vk_error(VK_ERROR_INVALID_VALUE); - - *pDataSize = size; - - for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) - properties[i].swapChainFormat = formats[i]; - - return VK_SUCCESS; - - default: - return VK_UNSUPPORTED; - } -} - -struct anv_swap_chain { - struct anv_device * device; - xcb_connection_t * conn; - xcb_window_t window; - xcb_gc_t gc; - VkExtent2D extent; - uint32_t count; - struct { - struct anv_image * image; - struct anv_device_memory * memory; - xcb_pixmap_t pixmap; - } images[0]; -}; - -VkResult anv_CreateSwapChainWSI( - VkDevice _device, - const VkSwapChainCreateInfoWSI* pCreateInfo, - VkSwapChainWSI* pSwapChain) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - struct anv_swap_chain *chain; - xcb_void_cookie_t cookie; - VkResult result; - size_t size; - int ret; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); - - size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); - chain = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (chain == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - chain->device = device; - chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; - chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; - chain->count = pCreateInfo->imageCount; - chain->extent = pCreateInfo->imageExtent; - - for (uint32_t i = 0; i < chain->count; i++) { - VkDeviceMemory memory_h; - VkImage image_h; - struct anv_image *image; - struct anv_surface *surface; - struct anv_device_memory *memory; - - anv_image_create(_device, - &(struct anv_image_create_info) { - .force_tile_mode = true, - .tile_mode = XMAJOR, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - &image_h); - - image = anv_image_from_handle(image_h); - surface = &image->primary_surface; - - anv_AllocMemory(_device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - &memory_h); - - memory = anv_device_memory_from_handle(memory_h); - - anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), - memory_h, 0); - - ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->stride, I915_TILING_X); - if (ret) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } - - int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); - if (fd == -1) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } - - uint32_t bpp = 32; - uint32_t depth = 24; - xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); - - cookie = - xcb_dri3_pixmap_from_buffer_checked(chain->conn, - pixmap, - chain->window, - image->size, - pCreateInfo->imageExtent.width, - pCreateInfo->imageExtent.height, - surface->stride, - depth, bpp, fd); - - chain->images[i].image = image; - chain->images[i].memory = memory; - chain->images[i].pixmap = pixmap; - image->swap_chain = chain; - - xcb_discard_reply(chain->conn, cookie.sequence); - } - - chain->gc = xcb_generate_id(chain->conn); - if (!chain->gc) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } - - cookie = xcb_create_gc(chain->conn, - chain->gc, - chain->window, - XCB_GC_GRAPHICS_EXPOSURES, - (uint32_t []) { 0 }); - xcb_discard_reply(chain->conn, cookie.sequence); - - *pSwapChain = anv_swap_chain_to_handle(chain); - - return VK_SUCCESS; - - fail: - return result; -} - -VkResult anv_DestroySwapChainWSI( - VkSwapChainWSI _chain) -{ - ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - - anv_device_free(chain->device, chain); - - return VK_SUCCESS; -} - -VkResult anv_GetSwapChainInfoWSI( - VkSwapChainWSI _chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - - VkSwapChainImageInfoWSI *images; - size_t size; - - switch (infoType) { - case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: - size = sizeof(*images) * chain->count; - if (pData && *pDataSize < size) - return VK_ERROR_INVALID_VALUE; - - *pDataSize = size; - if (!pData) - return VK_SUCCESS; - - images = pData; - for (uint32_t i = 0; i < chain->count; i++) { - images[i].image = anv_image_to_handle(chain->images[i].image); - images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); - } - - return VK_SUCCESS; - - default: - return VK_UNSUPPORTED; - } -} - -VkResult anv_QueuePresentWSI( - VkQueue queue_, - const VkPresentInfoWSI* pPresentInfo) -{ - ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); - - struct anv_swap_chain *chain = image->swap_chain; - xcb_void_cookie_t cookie; - xcb_pixmap_t pixmap; - - assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); - - if (chain == NULL) - return vk_error(VK_ERROR_INVALID_VALUE); - - pixmap = XCB_NONE; - for (uint32_t i = 0; i < chain->count; i++) { - if (image == chain->images[i].image) { - pixmap = chain->images[i].pixmap; - break; - } - } - - if (pixmap == XCB_NONE) - return vk_error(VK_ERROR_INVALID_VALUE); - - cookie = xcb_copy_area(chain->conn, - pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); - - xcb_flush(chain->conn); - - return VK_SUCCESS; -} -- cgit v1.2.3 From c9dc1f409817379c8ae9655cade54d0872aa86fe Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 22 Jul 2015 15:26:53 -0700 Subject: vk/pipeline: Be more sloppy about shader entrypoint names The CTS passes in NULL names right now. It's not too hard to support that as just "main". With this, and a patch to vulkancts, we now pass all 6 tests. --- src/vulkan/anv_pipeline.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 817b644eefb..5aeacefddf6 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -79,9 +79,10 @@ VkResult anv_CreateShader( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); assert(pCreateInfo->flags == 0); - size_t name_len = strlen(pCreateInfo->pName); + const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; + size_t name_len = strlen(name); - if (strcmp(pCreateInfo->pName, "main") != 0) { + if (strcmp(name, "main") != 0) { anv_finishme("Multiple shaders per module not really supported"); } @@ -91,7 +92,7 @@ VkResult anv_CreateShader( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); shader->module = module; - memcpy(shader->entrypoint, pCreateInfo->pName, name_len + 1); + memcpy(shader->entrypoint, name, name_len + 1); *pShader = anv_shader_to_handle(shader); -- cgit v1.2.3 From 867f6cb90c7d3ccd79d5eb0c67537a83ae964dd2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 22 Jul 2015 17:33:09 -0700 Subject: vk: Add a FreeDescriptorSets function --- include/vulkan/vulkan.h | 7 +++++++ src/vulkan/anv_device.c | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a9f55a4927d..8af58d15a27 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2168,6 +2168,7 @@ typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescript typedef VkResult (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); +typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); typedef VkResult (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkDynamicViewportState dynamicViewportState); @@ -2671,6 +2672,12 @@ VkResult VKAPI vkAllocDescriptorSets( VkDescriptorSet* pDescriptorSets, uint32_t* pCount); +VkResult VKAPI vkFreeDescriptorSets( + VkDevice device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets); + VkResult VKAPI vkUpdateDescriptorSets( VkDevice device, uint32_t writeCount, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 1847303a1cb..22abddc0cd6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1820,6 +1820,23 @@ VkResult anv_AllocDescriptorSets( return VK_SUCCESS; } +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + VkResult anv_UpdateDescriptorSets( VkDevice device, uint32_t writeCount, -- cgit v1.2.3 From 3460e6cb2f8b4424e2b776518a6ad0fc190ecd73 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 22 Jul 2015 17:51:14 -0700 Subject: vk/device: Finish the scratch block pool on device destruction --- src/vulkan/anv_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 22abddc0cd6..886118d1954 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -630,6 +630,7 @@ VkResult anv_DestroyDevice( anv_block_pool_finish(&device->dynamic_state_block_pool); anv_block_pool_finish(&device->instruction_block_pool); anv_block_pool_finish(&device->surface_state_block_pool); + anv_block_pool_finish(&device->scratch_block_pool); close(device->fd); -- cgit v1.2.3 From b1fcc30ff0b27b4d74f0a3e68846076dd1e22d15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 22 Jul 2015 17:51:26 -0700 Subject: vk/meta: Destroy shader modules --- src/vulkan/anv_meta.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index f2629156f3c..8ee7eb012e6 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -141,6 +141,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, &device->meta_state.clear.pipeline); + anv_DestroyShaderModule(anv_device_to_handle(device), fsm); anv_DestroyShader(anv_device_to_handle(device), fs); } @@ -476,7 +477,9 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline); + anv_DestroyShaderModule(anv_device_to_handle(device), vsm); anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShaderModule(anv_device_to_handle(device), fsm); anv_DestroyShader(anv_device_to_handle(device), fs); } -- cgit v1.2.3 From e99773badd1f2f795bf93bf912d2e2a270e285ee Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 23 Jul 2015 08:57:54 -0700 Subject: vk: Add two more valgrind checks --- src/vulkan/anv_cmd_buffer.c | 1 + src/vulkan/anv_private.h | 1 + 2 files changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 4d4dfa9fb53..378d148acbe 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -192,6 +192,7 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) assert(batch->next + size <= batch->end); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); memcpy(batch->next, other->start, size); offset = batch->next - batch->start; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index be24b514f30..89c0bc29fab 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -528,6 +528,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, }; \ void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ cmd ## _pack(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, cmd ## _length * 4)); \ } while (0) #define anv_batch_emitn(batch, n, cmd, ...) ({ \ -- cgit v1.2.3 From 50e86b577761bcfa19230e917f9d8c9c56fb3075 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 23 Jul 2015 10:44:27 -0700 Subject: vk: Actually advertise 0.138.1 at runtime --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 886118d1954..0bcf9b04b45 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -382,7 +382,7 @@ VkResult anv_GetPhysicalDeviceProperties( ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = 1, + .apiVersion = VK_MAKE_VERSION(0, 138, 1), .driverVersion = 1, .vendorId = 0x8086, .deviceId = pdevice->chipset_id, -- cgit v1.2.3 From 80ad578c4e05f0f67efdb6f96a6b97b5c409fca5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Jul 2015 12:40:43 -0700 Subject: vk/private.h: Re-arrange and better comment anv_cmd_buffer --- src/vulkan/anv_private.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 89c0bc29fab..e72b8a3ae36 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -696,6 +696,16 @@ void anv_cmd_state_fini(struct anv_cmd_state *state); struct anv_cmd_buffer { struct anv_device * device; + /* Fields required for the actual chain of anv_batch_bo's */ + struct anv_batch batch; + struct anv_batch_bo * last_batch_bo; + struct anv_batch_bo * surface_batch_bo; + uint32_t surface_next; + struct anv_reloc_list surface_relocs; + + /* Information needed for execbuf that's generated when the command + * buffer is ended. + */ struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 * exec2_objects; uint32_t exec2_bo_count; @@ -704,11 +714,7 @@ struct anv_cmd_buffer { bool need_reloc; uint32_t serial; - struct anv_batch batch; - struct anv_batch_bo * last_batch_bo; - struct anv_batch_bo * surface_batch_bo; - uint32_t surface_next; - struct anv_reloc_list surface_relocs; + /* Stream objects for storing temporary data */ struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; -- cgit v1.2.3 From 8fb640571822205aba1b874b81bef89fd2dc652b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Jul 2015 14:23:56 -0700 Subject: vk/cmd_buffer: Factor the guts of (Create|Reset|Destroy)CmdBuffer into helpers --- src/vulkan/anv_cmd_buffer.c | 148 +++++++++++++++++++++++++++----------------- src/vulkan/anv_private.h | 9 ++- 2 files changed, 98 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 378d148acbe..085878bd69d 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -393,27 +393,15 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_buffer *cmd_buffer; + struct anv_device *device = cmd_buffer->device; VkResult result; - assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->device = device; - result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); if (result != VK_SUCCESS) - goto fail; + return result; result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device); if (result != VK_SUCCESS) @@ -442,15 +430,6 @@ VkResult anv_CreateCommandBuffer( cmd_buffer->exec2_bos = NULL; cmd_buffer->exec2_array_length = 0; - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - anv_cmd_state_init(&cmd_buffer->state); - - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - return VK_SUCCESS; fail_ss_batch_bo: @@ -459,20 +438,14 @@ VkResult anv_CreateCommandBuffer( anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); fail_batch_bo: anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); - fail: - anv_device_free(device, cmd_buffer); return result; } -VkResult anv_DestroyCommandBuffer( - VkDevice _device, - VkCmdBuffer _cmd_buffer) +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); - - anv_cmd_state_fini(&cmd_buffer->state); + struct anv_device *device = cmd_buffer->device; /* Destroy all of the batch buffers */ struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; @@ -492,10 +465,90 @@ VkResult anv_DestroyCommandBuffer( } anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + /* Delete all but the first batch bo */ + while (cmd_buffer->last_batch_bo->prev_batch_bo) { + struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo; + anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); + cmd_buffer->last_batch_bo = prev; + } + assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL); + + cmd_buffer->batch.relocs.num_relocs = 0; + anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + /* Delete all but the first batch bo */ + while (cmd_buffer->surface_batch_bo->prev_batch_bo) { + struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo; + anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device); + cmd_buffer->surface_batch_bo = prev; + } + assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL); + + cmd_buffer->surface_next = 1; + cmd_buffer->surface_relocs.num_relocs = 0; +} + +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->device = device; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_cmd_state_init(&cmd_buffer->state); + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: anv_device_free(device, cmd_buffer); + + return result; +} + +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + anv_cmd_state_fini(&cmd_buffer->state); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_device_free(device, cmd_buffer); return VK_SUCCESS; @@ -677,28 +730,7 @@ VkResult anv_ResetCommandBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - /* Delete all but the first batch bo */ - while (cmd_buffer->last_batch_bo->prev_batch_bo) { - struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo; - anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device); - cmd_buffer->last_batch_bo = prev; - } - assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL); - - cmd_buffer->batch.relocs.num_relocs = 0; - anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - /* Delete all but the first batch bo */ - while (cmd_buffer->surface_batch_bo->prev_batch_bo) { - struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo; - anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device); - cmd_buffer->surface_batch_bo = prev; - } - assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL); - - cmd_buffer->surface_next = 1; - cmd_buffer->surface_relocs.num_relocs = 0; + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); anv_cmd_state_fini(&cmd_buffer->state); anv_cmd_state_init(&cmd_buffer->state); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index e72b8a3ae36..1fc84e41d36 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -696,7 +696,10 @@ void anv_cmd_state_fini(struct anv_cmd_state *state); struct anv_cmd_buffer { struct anv_device * device; - /* Fields required for the actual chain of anv_batch_bo's */ + /* Fields required for the actual chain of anv_batch_bo's. + * + * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). + */ struct anv_batch batch; struct anv_batch_bo * last_batch_bo; struct anv_batch_bo * surface_batch_bo; @@ -721,6 +724,10 @@ struct anv_cmd_buffer { struct anv_cmd_state state; }; +VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); + struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); -- cgit v1.2.3 From 117d74b4e20d2fa9713ad83f85ca5c0f70b9b12e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Jul 2015 14:52:16 -0700 Subject: vk/cmd_buffer: Factor the guts of CmdBufferEnd into two helpers --- src/vulkan/anv_cmd_buffer.c | 36 +++++++++++++++++++++++++----------- src/vulkan/anv_private.h | 2 ++ 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 085878bd69d..f7d6990157b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -650,26 +650,25 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, } } -VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) +void +anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_device *device = cmd_buffer->device; - struct anv_batch *batch = &cmd_buffer->batch; - - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); cmd_buffer->surface_batch_bo->num_relocs = cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next; +} + +void +anv_cmd_buffer_compute_validate_list(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; cmd_buffer->exec2_bo_count = 0; cmd_buffer->need_reloc = false; - /* Lock for access to bo->index. */ - pthread_mutex_lock(&device->mutex); - /* Add surface state bos first so we can add them with their relocs. */ for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; bbo != NULL; bbo = bbo->prev_batch_bo) { @@ -716,9 +715,24 @@ VkResult anv_EndCommandBuffer( if (!cmd_buffer->need_reloc) cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC; cmd_buffer->execbuf.flags |= I915_EXEC_RENDER; - cmd_buffer->execbuf.rsvd1 = device->context_id; + cmd_buffer->execbuf.rsvd1 = cmd_buffer->device->context_id; cmd_buffer->execbuf.rsvd2 = 0; +} + +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; + anv_cmd_buffer_emit_batch_buffer_end(cmd_buffer); + + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_compute_validate_list(cmd_buffer); pthread_mutex_unlock(&device->mutex); return VK_SUCCESS; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1fc84e41d36..1ddc2722cea 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -727,6 +727,8 @@ struct anv_cmd_buffer { VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_compute_validate_list(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, -- cgit v1.2.3 From d4c249364dc7049cf91d8f626b289d51a53b3bc1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Jul 2015 15:05:04 -0700 Subject: vk/cmd_buffer: Move the re-emission of STATE_BASE_ADDRESS to the flushing code This used to happen magically in cmd_buffer_new_surface_state_bo. However, according to Ken, STATE_BASE_ADDRESS is very gen-specific so we really shouldn't have it in the generic data-structure code. --- src/vulkan/anv_cmd_buffer.c | 45 --------------------------------------------- src/vulkan/anv_cmd_emit.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index f7d6990157b..34916386b6a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -345,51 +345,6 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) new_bbo->prev_batch_bo = old_bbo; cmd_buffer->surface_batch_bo = new_bbo; - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); - return VK_SUCCESS; } diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 8654c4a0ac7..61d58be33a6 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -504,6 +504,51 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); assert(result == VK_SUCCESS); + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* After re-setting the surface state base address, we have to do + * some cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the + * texture cache is handled partially by software. It is expected + * that the command stream or shader will issue Cache Flush + * operation or Cache_Flush sampler message to ensure that the L1 + * cache remains coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is + * fetched from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" + * bit which, according the PIPE_CONTROL instruction documentation in + * the Broadwell PRM: + * + * Setting this bit is independent of any other bit in this + * packet. This bit controls the invalidation of the L1 and L2 + * state caches at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems + * that invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we + * have yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); + /* Re-emit all active binding tables */ for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { result = flush_descriptor_set(cmd_buffer, s); -- cgit v1.2.3 From 4ced8650d492c8a346bdc51c494b2268d7bf473d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Jul 2015 15:14:31 -0700 Subject: vk/cmd_buffer: Move the remaining entrypoints into cmd_emit.c --- src/vulkan/anv_cmd_buffer.c | 89 --------------------------------------------- src/vulkan/anv_cmd_emit.c | 86 +++++++++++++++++++++++++++++++++++++++++-- src/vulkan/anv_private.h | 3 -- 3 files changed, 82 insertions(+), 96 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 34916386b6a..bb58338c85f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -453,62 +453,6 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->surface_relocs.num_relocs = 0; } -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_buffer *cmd_buffer; - VkResult result; - - assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->device = device; - - result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); - if (result != VK_SUCCESS) - goto fail; - - anv_cmd_state_init(&cmd_buffer->state); - - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - - return VK_SUCCESS; - - fail: anv_device_free(device, cmd_buffer); - - return result; -} - -VkResult anv_DestroyCommandBuffer( - VkDevice _device, - VkCmdBuffer _cmd_buffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); - - anv_cmd_state_fini(&cmd_buffer->state); - - anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer); - - return VK_SUCCESS; -} - static VkResult anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, @@ -673,36 +617,3 @@ anv_cmd_buffer_compute_validate_list(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->execbuf.rsvd1 = cmd_buffer->device->context_id; cmd_buffer->execbuf.rsvd2 = 0; } - -VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_emit_batch_buffer_end(cmd_buffer); - - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_compute_validate_list(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - - return VK_SUCCESS; -} - -VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - - anv_cmd_state_fini(&cmd_buffer->state); - anv_cmd_state_init(&cmd_buffer->state); - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 61d58be33a6..c624906ae36 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -38,7 +38,7 @@ * is concerned, most of anv_cmd_buffer is magic. */ -VkResult +static void anv_cmd_state_init(struct anv_cmd_state *state) { state->rs_state = NULL; @@ -55,14 +55,73 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->vp_state = NULL; state->rs_state = NULL; state->ds_state = NULL; +} + +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->device = device; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + anv_cmd_state_init(&cmd_buffer->state); + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; + + fail: anv_device_free(device, cmd_buffer); + + return result; } -void -anv_cmd_state_fini(struct anv_cmd_state *state) +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandBuffer( + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) { - /* Nothing we need to finish right now */ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + anv_cmd_state_init(&cmd_buffer->state); + + return VK_SUCCESS; } void @@ -117,6 +176,25 @@ VkResult anv_BeginCommandBuffer( return VK_SUCCESS; } +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_emit_batch_buffer_end(cmd_buffer); + + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_compute_validate_list(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + + return VK_SUCCESS; +} + void anv_CmdBindPipeline( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1ddc2722cea..f8def1108cb 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -690,9 +690,6 @@ struct anv_cmd_state { struct anv_descriptor_set_binding descriptors[MAX_SETS]; }; -VkResult anv_cmd_state_init(struct anv_cmd_state *state); -void anv_cmd_state_fini(struct anv_cmd_state *state); - struct anv_cmd_buffer { struct anv_device * device; -- cgit v1.2.3 From 3c2743dcd10f2df459363a04a257b91ca7299092 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Jul 2015 16:37:09 -0700 Subject: vk/cmd_buffer: Pull the execbuf stuff into a substruct --- src/vulkan/anv_aub.c | 12 +++---- src/vulkan/anv_cmd_buffer.c | 85 +++++++++++++++++++++++---------------------- src/vulkan/anv_cmd_emit.c | 2 +- src/vulkan/anv_device.c | 6 ++-- src/vulkan/anv_private.h | 22 ++++++++---- 5 files changed, 68 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_aub.c b/src/vulkan/anv_aub.c index 97f124a0aad..c371cce1bb8 100644 --- a/src/vulkan/anv_aub.c +++ b/src/vulkan/anv_aub.c @@ -248,10 +248,10 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) if (writer == NULL) return; - aub_bos = malloc(cmd_buffer->exec2_bo_count * sizeof(aub_bos[0])); + aub_bos = malloc(cmd_buffer->execbuf2.bo_count * sizeof(aub_bos[0])); offset = writer->offset; - for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { - bo = cmd_buffer->exec2_bos[i]; + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { + bo = cmd_buffer->execbuf2.bos[i]; if (bo->map) aub_bos[i].map = bo->map; else @@ -282,9 +282,9 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) bbo->num_relocs, aub_bos); } - for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) { - bo = cmd_buffer->exec2_bos[i]; - if (i == cmd_buffer->exec2_bo_count - 1) { + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { + bo = cmd_buffer->execbuf2.bos[i]; + if (i == cmd_buffer->execbuf2.bo_count - 1) { assert(bo == &first_bbo->bo); aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, aub_bos[i].relocated, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index bb58338c85f..1c891e19ac6 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -381,9 +381,9 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; - cmd_buffer->exec2_objects = NULL; - cmd_buffer->exec2_bos = NULL; - cmd_buffer->exec2_array_length = 0; + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; return VK_SUCCESS; @@ -420,8 +420,8 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) } anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); - anv_device_free(device, cmd_buffer->exec2_objects); - anv_device_free(device, cmd_buffer->exec2_bos); + anv_device_free(device, cmd_buffer->execbuf2.objects); + anv_device_free(device, cmd_buffer->execbuf2.bos); } void @@ -461,13 +461,13 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, { struct drm_i915_gem_exec_object2 *obj; - if (bo->index < cmd_buffer->exec2_bo_count && - cmd_buffer->exec2_bos[bo->index] == bo) + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) return VK_SUCCESS; - if (cmd_buffer->exec2_bo_count >= cmd_buffer->exec2_array_length) { - uint32_t new_len = cmd_buffer->exec2_objects ? - cmd_buffer->exec2_array_length * 2 : 64; + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; struct drm_i915_gem_exec_object2 *new_objects = anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), @@ -483,23 +483,23 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - if (cmd_buffer->exec2_objects) { - memcpy(new_objects, cmd_buffer->exec2_objects, - cmd_buffer->exec2_bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->exec2_bos, - cmd_buffer->exec2_bo_count * sizeof(*new_bos)); + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); } - cmd_buffer->exec2_objects = new_objects; - cmd_buffer->exec2_bos = new_bos; - cmd_buffer->exec2_array_length = new_len; + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; } - assert(cmd_buffer->exec2_bo_count < cmd_buffer->exec2_array_length); + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); - bo->index = cmd_buffer->exec2_bo_count++; - obj = &cmd_buffer->exec2_objects[bo->index]; - cmd_buffer->exec2_bos[bo->index] = bo; + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; obj->handle = bo->gem_handle; obj->relocation_count = 0; @@ -543,7 +543,7 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, for (size_t i = 0; i < list->num_relocs; i++) { bo = list->reloc_bos[i]; if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->need_reloc = true; + cmd_buffer->execbuf2.need_reloc = true; list->relocs[i].target_handle = bo->index; } @@ -561,12 +561,12 @@ anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) } void -anv_cmd_buffer_compute_validate_list(struct anv_cmd_buffer *cmd_buffer) +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch *batch = &cmd_buffer->batch; - cmd_buffer->exec2_bo_count = 0; - cmd_buffer->need_reloc = false; + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; /* Add surface state bos first so we can add them with their relocs. */ for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; @@ -596,24 +596,25 @@ anv_cmd_buffer_compute_validate_list(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, &batch->relocs.relocs[batch_bo->first_reloc], batch_bo->num_relocs); - assert(batch_bo->bo.index == cmd_buffer->exec2_bo_count - 1); + assert(batch_bo->bo.index == cmd_buffer->execbuf2.bo_count - 1); anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); - cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; - cmd_buffer->execbuf.buffer_count = cmd_buffer->exec2_bo_count; - cmd_buffer->execbuf.batch_start_offset = 0; - cmd_buffer->execbuf.batch_len = batch->next - batch->start; - cmd_buffer->execbuf.cliprects_ptr = 0; - cmd_buffer->execbuf.num_cliprects = 0; - cmd_buffer->execbuf.DR1 = 0; - cmd_buffer->execbuf.DR4 = 0; - - cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT; - if (!cmd_buffer->need_reloc) - cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC; - cmd_buffer->execbuf.flags |= I915_EXEC_RENDER; - cmd_buffer->execbuf.rsvd1 = cmd_buffer->device->context_id; - cmd_buffer->execbuf.rsvd2 = 0; + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; } diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index c624906ae36..8178e697964 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -189,7 +189,7 @@ VkResult anv_EndCommandBuffer( * Fortunately, the chances for contention here are probably very low. */ pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_compute_validate_list(cmd_buffer); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); pthread_mutex_unlock(&device->mutex); return VK_SUCCESS; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0bcf9b04b45..ee7682a5d9a 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -742,7 +742,7 @@ VkResult anv_QueueSubmit( anv_cmd_buffer_dump(cmd_buffer); if (!device->no_hw) { - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); if (ret != 0) return vk_error(VK_ERROR_UNKNOWN); @@ -752,8 +752,8 @@ VkResult anv_QueueSubmit( return vk_error(VK_ERROR_UNKNOWN); } - for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) - cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) + cmd_buffer->execbuf2.bos[i]->offset = cmd_buffer->execbuf2.objects[i].offset; } else { *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f8def1108cb..3d364ef30d4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -706,12 +706,20 @@ struct anv_cmd_buffer { /* Information needed for execbuf that's generated when the command * buffer is ended. */ - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 * exec2_objects; - uint32_t exec2_bo_count; - struct anv_bo ** exec2_bos; - uint32_t exec2_array_length; - bool need_reloc; + struct { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + bool need_reloc; + } execbuf2; + + /* Serial for tracking buffer completion */ uint32_t serial; /* Stream objects for storing temporary data */ @@ -725,7 +733,7 @@ VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_compute_validate_list(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, -- cgit v1.2.3 From 6aba52381a8bfad6f0f5bf7cda8cc77e57079e1a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 28 Jul 2015 16:49:19 -0700 Subject: vk/aub: Use the data directly from the execbuf2 Previously, we were crawling through the anv_cmd_buffer datastructure to pull out batch buffers and things. This meant that every time something in anv_cmd_buffer changed, we broke aub dumping. However, aub dumping should just dump the stuff the kernel knows about so we really don't need to be crawling driver internals. --- src/vulkan/anv_aub.c | 55 ++++++++++++++++++---------------------------------- 1 file changed, 19 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_aub.c b/src/vulkan/anv_aub.c index c371cce1bb8..e4a35873590 100644 --- a/src/vulkan/anv_aub.c +++ b/src/vulkan/anv_aub.c @@ -210,26 +210,27 @@ aub_build_dump_ringbuffer(struct anv_aub_writer *writer, } struct aub_bo { + uint32_t size; uint32_t offset; void *map; void *relocated; }; static void -relocate_bo(struct anv_bo *bo, struct drm_i915_gem_relocation_entry *relocs, - size_t num_relocs, struct aub_bo *bos) +relocate_bo(struct aub_bo *aub_bo, + const struct drm_i915_gem_exec_object2 *gem_obj, + struct aub_bo *aub_bos) { - struct aub_bo *aub_bo = &bos[bo->index]; - struct drm_i915_gem_relocation_entry *reloc; + const struct drm_i915_gem_relocation_entry *relocs = + (const struct drm_i915_gem_relocation_entry *) gem_obj->relocs_ptr; uint32_t *dw; - aub_bo->relocated = malloc(bo->size); - memcpy(aub_bo->relocated, aub_bo->map, bo->size); - for (size_t i = 0; i < num_relocs; i++) { - reloc = &relocs[i]; - assert(reloc->offset < bo->size); - dw = aub_bo->relocated + reloc->offset; - *dw = bos[reloc->target_handle].offset + reloc->delta; + aub_bo->relocated = malloc(aub_bo->size); + memcpy(aub_bo->relocated, aub_bo->map, aub_bo->size); + for (size_t i = 0; i < gem_obj->relocation_count; i++) { + assert(relocs[i].offset < aub_bo->size); + dw = aub_bo->relocated + relocs[i].offset; + *dw = aub_bos[relocs[i].target_handle].offset + relocs[i].delta; } } @@ -237,7 +238,6 @@ void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; - struct anv_batch *batch = &cmd_buffer->batch; struct anv_aub_writer *writer; struct anv_bo *bo; uint32_t ring_flag = 0; @@ -256,39 +256,23 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) aub_bos[i].map = bo->map; else aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); + aub_bos[i].size = bo->size; aub_bos[i].relocated = aub_bos[i].map; aub_bos[i].offset = offset; offset = align_u32(offset + bo->size + 4095, 4096); } - struct anv_batch_bo *first_bbo; - for (struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; - bbo != NULL; bbo = bbo->prev_batch_bo) { - /* Keep stashing the current BO until we get to the beginning */ - first_bbo = bbo; + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) + relocate_bo(&aub_bos[i], &cmd_buffer->execbuf2.objects[i], aub_bos); - /* Handle relocations for this batch BO */ - relocate_bo(&bbo->bo, &batch->relocs.relocs[bbo->first_reloc], - bbo->num_relocs, aub_bos); - } - assert(first_bbo->prev_batch_bo == NULL); - - for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; - bbo != NULL; bbo = bbo->prev_batch_bo) { - - /* Handle relocations for this surface state BO */ - relocate_bo(&bbo->bo, - &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], - bbo->num_relocs, aub_bos); - } + struct aub_bo *batch_bo = &aub_bos[cmd_buffer->execbuf2.bo_count - 1]; for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { bo = cmd_buffer->execbuf2.bos[i]; - if (i == cmd_buffer->execbuf2.bo_count - 1) { - assert(bo == &first_bbo->bo); + if (&aub_bos[i] == batch_bo) { aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, aub_bos[i].relocated, - first_bbo->length, aub_bos[i].offset); + bo->size, aub_bos[i].offset); } else { aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, aub_bos[i].relocated, @@ -301,8 +285,7 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) } /* Dump ring buffer */ - aub_build_dump_ringbuffer(writer, aub_bos[first_bbo->bo.index].offset, - offset, ring_flag); + aub_build_dump_ringbuffer(writer, batch_bo->offset, offset, ring_flag); free(aub_bos); -- cgit v1.2.3 From 86a53d2880b27ef9b0a75f3a33debc0cd18f1ff7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 28 Jul 2015 17:47:04 -0700 Subject: vk/cmd_buffer: Use a doubly-linked list for batch and surface buffers This is probably better than hand-rolling the list of buffers. --- src/vulkan/anv_cmd_buffer.c | 141 +++++++++++++++++++++++++++----------------- src/vulkan/anv_cmd_emit.c | 2 +- src/vulkan/anv_private.h | 15 +++-- 3 files changed, 98 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 1c891e19ac6..a986df2860c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -217,7 +217,6 @@ anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); bbo->num_relocs = 0; - bbo->prev_batch_bo = NULL; result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); if (result != VK_SUCCESS) { @@ -263,12 +262,30 @@ anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) * Functions related to anv_batch_bo *-----------------------------------------------------------------------*/ +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); +} + +struct anv_bo * +anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; +} + static VkResult anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) { struct anv_cmd_buffer *cmd_buffer = _data; - - struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo; + struct anv_batch_bo *new_bbo, *old_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); if (result != VK_SUCCESS) @@ -288,10 +305,9 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) .BatchBufferStartAddress = { &new_bbo->bo, 0 }, ); - anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch); + anv_batch_bo_finish(old_bbo, batch); - new_bbo->prev_batch_bo = old_bbo; - cmd_buffer->last_batch_bo = new_bbo; + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); @@ -302,17 +318,19 @@ struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment) { + struct anv_bo *surface_bo = + anv_cmd_buffer_current_surface_bo(cmd_buffer); struct anv_state state; state.offset = align_u32(cmd_buffer->surface_next, alignment); - if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size) + if (state.offset + size > surface_bo->size) return (struct anv_state) { 0 }; - state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset; + state.map = surface_bo->map + state.offset; state.alloc_size = size; cmd_buffer->surface_next = state.offset + size; - assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size); + assert(state.offset + size <= surface_bo->size); return state; } @@ -328,7 +346,8 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) { - struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo; + struct anv_batch_bo *new_bbo, *old_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); /* Finish off the old buffer */ old_bbo->num_relocs = @@ -342,8 +361,7 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs; cmd_buffer->surface_next = 1; - new_bbo->prev_batch_bo = old_bbo; - cmd_buffer->surface_batch_bo = new_bbo; + list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); return VK_SUCCESS; } @@ -351,13 +369,19 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { + struct anv_batch_bo *batch_bo, *surface_bbo; struct anv_device *device = cmd_buffer->device; VkResult result; - result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo); + list_inithead(&cmd_buffer->batch_bos); + list_inithead(&cmd_buffer->surface_bos); + + result = anv_batch_bo_create(device, &batch_bo); if (result != VK_SUCCESS) return result; + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device); if (result != VK_SUCCESS) goto fail_batch_bo; @@ -366,13 +390,15 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; cmd_buffer->batch.user_data = cmd_buffer; - anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo); + result = anv_batch_bo_create(device, &surface_bbo); if (result != VK_SUCCESS) goto fail_batch_relocs; - cmd_buffer->surface_batch_bo->first_reloc = 0; + + surface_bbo->first_reloc = 0; + list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device); if (result != VK_SUCCESS) @@ -388,11 +414,11 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; fail_ss_batch_bo: - anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device); + anv_batch_bo_destroy(surface_bbo, device); fail_batch_relocs: anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); fail_batch_bo: - anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); + anv_batch_bo_destroy(batch_bo, device); return result; } @@ -403,20 +429,16 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) struct anv_device *device = cmd_buffer->device; /* Destroy all of the batch buffers */ - struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo; - while (bbo) { - struct anv_batch_bo *prev = bbo->prev_batch_bo; + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { anv_batch_bo_destroy(bbo, device); - bbo = prev; } anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); /* Destroy all of the surface state buffers */ - bbo = cmd_buffer->surface_batch_bo; - while (bbo) { - struct anv_batch_bo *prev = bbo->prev_batch_bo; + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->surface_bos, link) { anv_batch_bo_destroy(bbo, device); - bbo = prev; } anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); @@ -430,24 +452,27 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) struct anv_device *device = cmd_buffer->device; /* Delete all but the first batch bo */ - while (cmd_buffer->last_batch_bo->prev_batch_bo) { - struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo; - anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device); - cmd_buffer->last_batch_bo = prev; + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); } - assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL); + assert(!list_empty(&cmd_buffer->batch_bos)); cmd_buffer->batch.relocs.num_relocs = 0; - anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch, + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); /* Delete all but the first batch bo */ - while (cmd_buffer->surface_batch_bo->prev_batch_bo) { - struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo; - anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device); - cmd_buffer->surface_batch_bo = prev; + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); } - assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL); + assert(!list_empty(&cmd_buffer->batch_bos)); cmd_buffer->surface_next = 1; cmd_buffer->surface_relocs.num_relocs = 0; @@ -552,12 +577,17 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) { + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_batch_bo *surface_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); - anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch); - cmd_buffer->surface_batch_bo->num_relocs = - cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc; - cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next; + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + surface_bbo->num_relocs = + cmd_buffer->surface_relocs.num_relocs - surface_bbo->first_reloc; + surface_bbo->length = cmd_buffer->surface_next; } void @@ -569,8 +599,8 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->execbuf2.need_reloc = false; /* Add surface state bos first so we can add them with their relocs. */ - for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo; - bbo != NULL; bbo = bbo->prev_batch_bo) { + list_for_each_entry(struct anv_batch_bo, bbo, + &cmd_buffer->surface_bos, link) { anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], bbo->num_relocs); @@ -579,24 +609,27 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) /* Add all of the BOs referenced by surface state */ anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); + struct anv_batch_bo *first_batch_bo = + LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.next, link); + /* Add all but the first batch BO */ - struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo; - while (batch_bo->prev_batch_bo) { - anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, - &batch->relocs.relocs[batch_bo->first_reloc], - batch_bo->num_relocs); - batch_bo = batch_bo->prev_batch_bo; + list_for_each_entry(struct anv_batch_bo, bbo, &cmd_buffer->batch_bos, link) { + if (bbo == first_batch_bo) + continue; + + anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, + &batch->relocs.relocs[bbo->first_reloc], + bbo->num_relocs); } /* Add everything referenced by the batches */ anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs); /* Add the first batch bo last */ - assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0); - anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo, - &batch->relocs.relocs[batch_bo->first_reloc], - batch_bo->num_relocs); - assert(batch_bo->bo.index == cmd_buffer->execbuf2.bo_count - 1); + anv_cmd_buffer_add_bo(cmd_buffer, &first_batch_bo->bo, + &batch->relocs.relocs[first_batch_bo->first_reloc], + first_batch_bo->num_relocs); + assert(first_batch_bo->bo.index == cmd_buffer->execbuf2.bo_count - 1); anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 8178e697964..e5997f755e0 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -141,7 +141,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSizeModifyEnable = true, - .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 }, + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, .SurfaceStateMemoryObjectControlState = GEN8_MOCS, .SurfaceStateBaseAddressModifyEnable = true, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3d364ef30d4..14f4ac2004a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -41,6 +41,7 @@ #include "brw_device_info.h" #include "util/macros.h" +#include "util/list.h" #define VK_PROTOTYPES #include @@ -462,6 +463,9 @@ uint64_t anv_reloc_list_add(struct anv_reloc_list *list, uint32_t delta); struct anv_batch_bo { + /* Link in the anv_cmd_buffer.owned_batch_bos list */ + struct list_head link; + struct anv_bo bo; /* Bytes actually consumed in this batch BO */ @@ -470,8 +474,6 @@ struct anv_batch_bo { /* These offsets reference the per-batch reloc list */ size_t first_reloc; size_t num_relocs; - - struct anv_batch_bo * prev_batch_bo; }; struct anv_batch { @@ -693,13 +695,14 @@ struct anv_cmd_state { struct anv_cmd_buffer { struct anv_device * device; + struct anv_batch batch; + /* Fields required for the actual chain of anv_batch_bo's. * * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). */ - struct anv_batch batch; - struct anv_batch_bo * last_batch_bo; - struct anv_batch_bo * surface_batch_bo; + struct list_head batch_bos; + struct list_head surface_bos; uint32_t surface_next; struct anv_reloc_list surface_relocs; @@ -735,6 +738,8 @@ void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); +struct anv_bo * +anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); -- cgit v1.2.3 From 65f3d00cd6abb3ac1dec10808fbd1be693a3aaa3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 08:33:56 -0700 Subject: vk/cmd_buffer: Update a comment --- src/vulkan/anv_private.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 14f4ac2004a..3258e8c2adc 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -706,8 +706,9 @@ struct anv_cmd_buffer { uint32_t surface_next; struct anv_reloc_list surface_relocs; - /* Information needed for execbuf that's generated when the command - * buffer is ended. + /* Information needed for execbuf + * + * These fields are generated by anv_cmd_buffer_prepare_execbuf(). */ struct { struct drm_i915_gem_execbuffer2 execbuf; -- cgit v1.2.3 From fcea3e2d23b7a02f9cc4b58870ac59107bcf0050 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 28 Jul 2015 16:32:52 -0700 Subject: vk/headers: Update to new generated gen headers This update fixes cases where a 48-bit address field was split into two parts: __gen_address_type MemoryAddress; uint32_t MemoryAddressHigh; which cases this pack code to be generated: dw[1] = __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); dw[2] = __gen_field(values->MemoryAddressHigh, 0, 15) | 0; which breaks for addresses above 4G. This update also fixes arrays of structs in commands and structs, for example, we now have: struct GEN8_BLEND_STATE_ENTRY Entry[8]; and the pack functions now write all dwords in the packet, making valgrind happy. Finally, we would try to pack 64 bits of blend state into a uint32_t - that's also fixed now. --- src/vulkan/anv_pipeline.c | 11 ++- src/vulkan/gen75_pack.h | 28 ++++---- src/vulkan/gen7_pack.h | 25 ++++--- src/vulkan/gen8_pack.h | 170 ++++++++++++++++++---------------------------- 4 files changed, 95 insertions(+), 139 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 5aeacefddf6..5a36faa29b5 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -370,7 +370,7 @@ emit_cb_state(struct anv_pipeline *pipeline, [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, }; - uint32_t num_dwords = 1 + info->attachmentCount * 2; + uint32_t num_dwords = GEN8_BLEND_STATE_length; pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); @@ -378,13 +378,10 @@ emit_cb_state(struct anv_pipeline *pipeline, .AlphaToCoverageEnable = info->alphaToCoverageEnable, }; - uint32_t *state = pipeline->blend_state.map; - GEN8_BLEND_STATE_pack(NULL, state, &blend_state); - for (uint32_t i = 0; i < info->attachmentCount; i++) { const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - struct GEN8_BLEND_STATE_ENTRY entry = { + blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { .LogicOpEnable = info->logicOpEnable, .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], .ColorBufferBlendEnable = a->blendEnable, @@ -402,10 +399,10 @@ emit_cb_state(struct anv_pipeline *pipeline, .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), }; - - GEN8_BLEND_STATE_ENTRY_pack(NULL, state + i * 2 + 1, &entry); } + GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, .BlendStatePointer = pipeline->blend_state.offset, .BlendStatePointerValid = true); diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 9ea8c2bcf65..583c5f25003 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -2806,7 +2806,7 @@ struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t PatternRow; + uint32_t PatternRow[32]; }; static inline void @@ -2823,9 +2823,11 @@ GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict d __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - __gen_field(values->PatternRow, 0, 31) | - 0; + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } } @@ -5283,7 +5285,7 @@ struct GEN75_MEDIA_OBJECT_PRT { #define Rootthreadqueue 0 #define VFEstateflush 1 uint32_t PRT_FenceType; - uint32_t InlineData; + uint32_t InlineData[12]; }; static inline void @@ -5313,9 +5315,11 @@ GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, dw[3] = 0; - dw[4] = - __gen_field(values->InlineData, 0, 31) | - 0; + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } } @@ -7056,9 +7060,6 @@ GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, __gen_float(values->YMaxClipGuardband) | 0; - dw[12] = - 0; - } #define GEN75_BLEND_STATE_length 0x00000002 @@ -7563,7 +7564,6 @@ struct GEN75_RENDER_SURFACE_STATE { __gen_address_type AppendCounterAddress; bool AppendCounterEnable; bool MCSEnable; - uint32_t ReservedMBZ; uint32_t XOffsetforUVPlane; uint32_t YOffsetforUVPlane; #define SCS_ZERO 0 @@ -7642,7 +7642,6 @@ GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MCSSurfacePitch, 3, 11) | __gen_field(values->AppendCounterEnable, 1, 1) | __gen_field(values->MCSEnable, 0, 0) | - __gen_field(values->ReservedMBZ, 30, 31) | __gen_field(values->XOffsetforUVPlane, 16, 29) | __gen_field(values->YOffsetforUVPlane, 0, 13) | 0; @@ -7702,9 +7701,6 @@ GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst __gen_field(values->BorderColorAlpha, 0, 31) | 0; - dw[4] = - 0; - dw[16] = __gen_field(values->BorderColor, 0, 127) | __gen_field(values->BorderColor, 0, 127) | diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index f9121b78868..05b800034e0 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -2156,7 +2156,7 @@ struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t PatternRow; + uint32_t PatternRow[32]; }; static inline void @@ -2173,9 +2173,11 @@ GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict ds __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - __gen_field(values->PatternRow, 0, 31) | - 0; + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } } @@ -4451,7 +4453,7 @@ struct GEN7_MEDIA_OBJECT_PRT { #define Rootthreadqueue 0 #define VFEstateflush 1 uint32_t PRT_FenceType; - uint32_t InlineData; + uint32_t InlineData[12]; }; static inline void @@ -4481,9 +4483,11 @@ GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, dw[3] = 0; - dw[4] = - __gen_field(values->InlineData, 0, 31) | - 0; + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } } @@ -5812,9 +5816,6 @@ GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, __gen_float(values->YMaxClipGuardband) | 0; - dw[12] = - 0; - } #define GEN7_BLEND_STATE_length 0x00000002 @@ -6288,7 +6289,6 @@ struct GEN7_RENDER_SURFACE_STATE { uint32_t AppendCounterAddress; uint32_t AppendCounterEnable; uint32_t MCSEnable; - uint32_t ReservedMBZ; uint32_t XOffsetforUVPlane; uint32_t YOffsetforUVPlane; #define CC_ZERO 0 @@ -6366,7 +6366,6 @@ GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->AppendCounterAddress, 6, 31) | __gen_field(values->AppendCounterEnable, 1, 1) | __gen_field(values->MCSEnable, 0, 0) | - __gen_field(values->ReservedMBZ, 30, 31) | __gen_field(values->XOffsetforUVPlane, 16, 29) | __gen_field(values->YOffsetforUVPlane, 0, 13) | 0; diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index dd7f1b55a50..620b5a799c4 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -267,7 +267,7 @@ struct GEN8_GPGPU_CSR_BASE_ADDRESS { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - __gen_address_type GPGPUCSRBaseAddressHigh; + __gen_address_type GPGPUCSRBaseAddress; }; static inline void @@ -288,7 +288,7 @@ GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, 0; uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddressHigh, dw1); + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); dw[1] = qw1; dw[2] = qw1 >> 32; @@ -319,7 +319,6 @@ struct GEN8_MI_ATOMIC { uint32_t ATOMICOPCODE; uint32_t DwordLength; __gen_address_type MemoryAddress; - uint32_t MemoryAddressHigh; uint32_t Operand1DataDword0; uint32_t Operand2DataDword0; uint32_t Operand1DataDword1; @@ -352,12 +351,11 @@ GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, uint32_t dw1 = 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); - dw[2] = - __gen_field(values->MemoryAddressHigh, 0, 15) | - 0; + dw[1] = qw1; + dw[2] = qw1 >> 32; dw[3] = __gen_field(values->Operand1DataDword0, 0, 31) | @@ -886,7 +884,6 @@ struct GEN8_SWTESS_BASE_ADDRESS { uint32_t DwordLength; __gen_address_type SWTessellationBaseAddress; struct GEN8_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; - __gen_address_type SWTessellationBaseAddressHigh; }; static inline void @@ -909,14 +906,11 @@ GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SWTessellationBaseAddressHigh, dw2); + dw[1] = qw1; + dw[2] = qw1 >> 32; } @@ -3217,7 +3211,7 @@ struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - uint32_t PatternRow; + uint32_t PatternRow[32]; }; static inline void @@ -3234,9 +3228,11 @@ GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict ds __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - __gen_field(values->PatternRow, 0, 31) | - 0; + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } } @@ -4191,9 +4187,6 @@ GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - dw[1] = - 0; - dw[5] = __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | @@ -4363,23 +4356,8 @@ struct GEN8_3DSTATE_SBE_SWIZ { uint32_t _3DCommandOpcode; uint32_t _3DCommandSubOpcode; uint32_t DwordLength; - struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute; - uint32_t Attribute15WrapShortestEnables; - uint32_t Attribute14WrapShortestEnables; - uint32_t Attribute13WrapShortestEnables; - uint32_t Attribute12WrapShortestEnables; - uint32_t Attribute11WrapShortestEnables; - uint32_t Attribute10WrapShortestEnables; - uint32_t Attribute09WrapShortestEnables; - uint32_t Attribute08WrapShortestEnables; - uint32_t Attribute07WrapShortestEnables; - uint32_t Attribute06WrapShortestEnables; - uint32_t Attribute05WrapShortestEnables; - uint32_t Attribute04WrapShortestEnables; - uint32_t Attribute03WrapShortestEnables; - uint32_t Attribute02WrapShortestEnables; - uint32_t Attribute01WrapShortestEnables; - uint32_t Attribute00WrapShortestEnables; + struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; + uint32_t AttributeWrapShortestEnables[16]; }; static inline void @@ -4396,33 +4374,29 @@ GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; - uint32_t dw_Attribute; - GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute, &values->Attribute); - dw[1] = - __gen_field(dw_Attribute, 0, 15) | - 0; - - uint64_t qw9 = - __gen_field(values->Attribute15WrapShortestEnables, 60, 63) | - __gen_field(values->Attribute14WrapShortestEnables, 56, 59) | - __gen_field(values->Attribute13WrapShortestEnables, 52, 55) | - __gen_field(values->Attribute12WrapShortestEnables, 48, 51) | - __gen_field(values->Attribute11WrapShortestEnables, 44, 47) | - __gen_field(values->Attribute10WrapShortestEnables, 40, 43) | - __gen_field(values->Attribute09WrapShortestEnables, 36, 39) | - __gen_field(values->Attribute08WrapShortestEnables, 32, 35) | - __gen_field(values->Attribute07WrapShortestEnables, 28, 31) | - __gen_field(values->Attribute06WrapShortestEnables, 24, 27) | - __gen_field(values->Attribute05WrapShortestEnables, 20, 23) | - __gen_field(values->Attribute04WrapShortestEnables, 16, 19) | - __gen_field(values->Attribute03WrapShortestEnables, 12, 15) | - __gen_field(values->Attribute02WrapShortestEnables, 8, 11) | - __gen_field(values->Attribute01WrapShortestEnables, 4, 7) | - __gen_field(values->Attribute00WrapShortestEnables, 0, 3) | - 0; - - dw[9] = qw9; - dw[10] = qw9 >> 32; + for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { + uint32_t dw_Attribute0; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); + uint32_t dw_Attribute1; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); + dw[j] = + __gen_field(dw_Attribute0, 0, 15) | + __gen_field(dw_Attribute1, 16, 31) | + 0; + } + + for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { + dw[j] = + __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | + __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | + __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | + __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | + __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | + __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | + __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | + __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | + 0; + } } @@ -6091,7 +6065,7 @@ struct GEN8_MEDIA_OBJECT_GRPID { uint32_t SubSliceDestinationSelect; uint32_t IndirectDataLength; __gen_address_type IndirectDataStartAddress; - uint32_t ScoredboardY; + uint32_t ScoreboardY; uint32_t ScoreboardX; uint32_t ScoreboardColor; bool ScoreboardMask; @@ -6133,7 +6107,7 @@ GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); dw[4] = - __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardY, 16, 24) | __gen_field(values->ScoreboardX, 0, 8) | 0; @@ -6170,7 +6144,7 @@ struct GEN8_MEDIA_OBJECT_PRT { #define Rootthreadqueue 0 #define VFEstateflush 1 uint32_t PRT_FenceType; - uint32_t InlineData; + uint32_t InlineData[12]; }; static inline void @@ -6200,9 +6174,11 @@ GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, dw[3] = 0; - dw[4] = - __gen_field(values->InlineData, 0, 31) | - 0; + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } } @@ -6587,7 +6563,6 @@ struct GEN8_MI_BATCH_BUFFER_START { uint32_t AddressSpaceIndicator; uint32_t DwordLength; __gen_address_type BatchBufferStartAddress; - __gen_address_type BatchBufferStartAddressHigh; }; static inline void @@ -6610,14 +6585,11 @@ GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, uint32_t dw1 = 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->BatchBufferStartAddressHigh, dw2); + dw[1] = qw1; + dw[2] = qw1 >> 32; } @@ -6635,7 +6607,6 @@ struct GEN8_MI_CLFLUSH { uint32_t DwordLength; __gen_address_type PageBaseAddress; uint32_t StartingCachelineOffset; - __gen_address_type PageBaseAddressHigh; /* variable length fields follow */ }; @@ -6656,14 +6627,11 @@ GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->StartingCachelineOffset, 6, 11) | 0; - dw[1] = + uint64_t qw1 = __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + dw[1] = qw1; + dw[2] = qw1 >> 32; /* variable length fields follow */ } @@ -6685,7 +6653,6 @@ struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { uint32_t DwordLength; uint32_t CompareDataDword; __gen_address_type CompareAddress; - __gen_address_type CompareAddressHigh; }; static inline void @@ -6709,14 +6676,11 @@ GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->CompareAddressHigh, dw3); + dw[2] = qw2; + dw[3] = qw2 >> 32; } @@ -7704,7 +7668,6 @@ struct GEN8_PIPE_CONTROL { #define FlushEnabled 1 bool DepthCacheFlushEnable; __gen_address_type Address; - __gen_address_type AddressHigh; uint64_t ImmediateData; }; @@ -7749,14 +7712,11 @@ GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, uint32_t dw2 = 0; - dw[2] = + uint64_t qw2 = __gen_combine_address(data, &dw[2], values->Address, dw2); - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->AddressHigh, dw3); + dw[2] = qw2; + dw[3] = qw2 >> 32; uint64_t qw4 = __gen_field(values->ImmediateData, 0, 63) | @@ -7930,7 +7890,7 @@ GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, { uint32_t *dw = (uint32_t * restrict) dst; - dw[0] = + uint64_t qw0 = __gen_field(values->LogicOpEnable, 63, 63) | __gen_field(values->LogicOpFunction, 59, 62) | __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | @@ -7950,6 +7910,9 @@ GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->WriteDisableBlue, 0, 0) | 0; + dw[0] = qw0; + dw[1] = qw0 >> 32; + } struct GEN8_BLEND_STATE { @@ -7962,7 +7925,7 @@ struct GEN8_BLEND_STATE { bool ColorDitherEnable; uint32_t XDitherOffset; uint32_t YDitherOffset; - struct GEN8_BLEND_STATE_ENTRY Entry; + struct GEN8_BLEND_STATE_ENTRY Entry[8]; }; static inline void @@ -7983,10 +7946,11 @@ GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->YDitherOffset, 19, 20) | 0; - GEN8_BLEND_STATE_ENTRY_pack(data, &dw[1], &values->Entry); + for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) + GEN8_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); } -#define GEN8_BLEND_STATE_ENTRY_length 0x00000001 +#define GEN8_BLEND_STATE_ENTRY_length 0x00000002 #define GEN8_CC_VIEWPORT_length 0x00000002 -- cgit v1.2.3 From 7d507342403c775639383f0e228a081d2e3d4b44 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 09:23:11 -0700 Subject: vk/batch: Make relocs a pointer to a relocation list Previously anv_batch.relocs was an actual relocation list. However, this is limiting if the implementation of the batch wants to change the relocation list as the batch progresses. --- src/vulkan/anv_cmd_buffer.c | 27 ++++++++++++++------------- src/vulkan/anv_pipeline.c | 8 +++++--- src/vulkan/anv_private.h | 4 +++- 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index a986df2860c..1c88ee5b6ab 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -175,7 +175,7 @@ uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t delta) { - return anv_reloc_list_add(&batch->relocs, batch->device, + return anv_reloc_list_add(batch->relocs, batch->device, location - batch->start, bo, delta); } @@ -196,8 +196,8 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) memcpy(batch->next, other->start, size); offset = batch->next - batch->start; - anv_reloc_list_append(&batch->relocs, batch->device, - &other->relocs, offset); + anv_reloc_list_append(batch->relocs, batch->device, + other->relocs, offset); batch->next += size; } @@ -235,7 +235,7 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, { batch->next = batch->start = bbo->bo.map; batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - bbo->first_reloc = batch->relocs.num_relocs; + bbo->first_reloc = batch->relocs->num_relocs; } static void @@ -248,7 +248,7 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) assert(batch->start == bbo->bo.map); bbo->length = batch->next - batch->start; VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); - bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc; + bbo->num_relocs = batch->relocs->num_relocs - bbo->first_reloc; } static void @@ -382,13 +382,14 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); - result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device); + result = anv_reloc_list_init(&cmd_buffer->batch_relocs, device); if (result != VK_SUCCESS) goto fail_batch_bo; cmd_buffer->batch.device = device; cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; cmd_buffer->batch.user_data = cmd_buffer; + cmd_buffer->batch.relocs = &cmd_buffer->batch_relocs; anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); @@ -416,7 +417,7 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) fail_ss_batch_bo: anv_batch_bo_destroy(surface_bbo, device); fail_batch_relocs: - anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); + anv_reloc_list_finish(&cmd_buffer->batch_relocs, device); fail_batch_bo: anv_batch_bo_destroy(batch_bo, device); @@ -433,7 +434,7 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->batch_bos, link) { anv_batch_bo_destroy(bbo, device); } - anv_reloc_list_finish(&cmd_buffer->batch.relocs, device); + anv_reloc_list_finish(&cmd_buffer->batch_relocs, device); /* Destroy all of the surface state buffers */ list_for_each_entry_safe(struct anv_batch_bo, bbo, @@ -460,7 +461,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) } assert(!list_empty(&cmd_buffer->batch_bos)); - cmd_buffer->batch.relocs.num_relocs = 0; + cmd_buffer->batch_relocs.num_relocs = 0; anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); @@ -618,21 +619,21 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) continue; anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, - &batch->relocs.relocs[bbo->first_reloc], + &cmd_buffer->batch_relocs.relocs[bbo->first_reloc], bbo->num_relocs); } /* Add everything referenced by the batches */ - anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->batch_relocs); /* Add the first batch bo last */ anv_cmd_buffer_add_bo(cmd_buffer, &first_batch_bo->bo, - &batch->relocs.relocs[first_batch_bo->first_reloc], + &cmd_buffer->batch_relocs.relocs[first_batch_bo->first_reloc], first_batch_bo->num_relocs); assert(first_batch_bo->bo.index == cmd_buffer->execbuf2.bo_count - 1); anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->batch_relocs); cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 5a36faa29b5..3c9c14193de 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -488,13 +488,14 @@ anv_pipeline_create( pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - result = anv_reloc_list_init(&pipeline->batch.relocs, device); + result = anv_reloc_list_init(&pipeline->batch_relocs, device); if (result != VK_SUCCESS) { anv_device_free(device, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); @@ -758,7 +759,7 @@ VkResult anv_DestroyPipeline( ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); anv_compiler_free(pipeline); - anv_reloc_list_finish(&pipeline->batch.relocs, pipeline->device); + anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); anv_state_stream_finish(&pipeline->program_stream); anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_device_free(pipeline->device, pipeline); @@ -810,13 +811,14 @@ static VkResult anv_compute_pipeline_create( pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - result = anv_reloc_list_init(&pipeline->batch.relocs, device); + result = anv_reloc_list_init(&pipeline->batch_relocs, device); if (result != VK_SUCCESS) { anv_device_free(device, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3258e8c2adc..cb302c96162 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -483,7 +483,7 @@ struct anv_batch { void * end; void * next; - struct anv_reloc_list relocs; + struct anv_reloc_list * relocs; /* This callback is called (with the associated user data) in the event * that the batch runs out of space. @@ -702,6 +702,7 @@ struct anv_cmd_buffer { * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). */ struct list_head batch_bos; + struct anv_reloc_list batch_relocs; struct list_head surface_bos; uint32_t surface_next; struct anv_reloc_list surface_relocs; @@ -783,6 +784,7 @@ struct anv_pipeline { struct anv_device * device; struct anv_batch batch; uint32_t batch_data[256]; + struct anv_reloc_list batch_relocs; struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; struct anv_pipeline_layout * layout; bool use_repclear; -- cgit v1.2.3 From 8208f01a359ee7037ae46b7617cc72baf7771044 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 11:57:44 -0700 Subject: vk/cmd_buffer: Store the relocation list in the anv_batch_bo struct Before, we were doing this thing where we had one big relocation list for the whole command buffer and each subbuffer took a chunk out of it. Now, we store the actual relocation list in the anv_batch_bo. This comes at the cost of more small allocations but makes a lot of things simpler. --- src/vulkan/anv_cmd_buffer.c | 110 ++++++++++++++++++-------------------------- src/vulkan/anv_cmd_emit.c | 4 +- src/vulkan/anv_private.h | 8 ++-- 3 files changed, 49 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 1c88ee5b6ab..d7a006d9b7a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -216,17 +216,24 @@ anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - bbo->num_relocs = 0; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) { - anv_device_free(device, bbo); - return result; - } + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, device); + if (result != VK_SUCCESS) + goto fail_bo_alloc; *bbo_out = bbo; return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; } static void @@ -235,7 +242,8 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, { batch->next = batch->start = bbo->bo.map; batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - bbo->first_reloc = batch->relocs->num_relocs; + batch->relocs = &bbo->relocs; + bbo->relocs.num_relocs = 0; } static void @@ -248,12 +256,12 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) assert(batch->start == bbo->bo.map); bbo->length = batch->next - batch->start; VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); - bbo->num_relocs = batch->relocs->num_relocs - bbo->first_reloc; } static void anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) { + anv_reloc_list_finish(&bbo->relocs, device); anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); anv_device_free(device, bbo); } @@ -280,6 +288,12 @@ anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; } +struct anv_reloc_list * +anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; +} + static VkResult anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) { @@ -350,15 +364,12 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_current_surface_bbo(cmd_buffer); /* Finish off the old buffer */ - old_bbo->num_relocs = - cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc; old_bbo->length = cmd_buffer->surface_next; VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); if (result != VK_SUCCESS) return result; - new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs; cmd_buffer->surface_next = 1; list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); @@ -382,29 +393,19 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); - result = anv_reloc_list_init(&cmd_buffer->batch_relocs, device); - if (result != VK_SUCCESS) - goto fail_batch_bo; - cmd_buffer->batch.device = device; cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; cmd_buffer->batch.user_data = cmd_buffer; - cmd_buffer->batch.relocs = &cmd_buffer->batch_relocs; anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); result = anv_batch_bo_create(device, &surface_bbo); if (result != VK_SUCCESS) - goto fail_batch_relocs; + goto fail_batch_bo; - surface_bbo->first_reloc = 0; list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); - result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device); - if (result != VK_SUCCESS) - goto fail_ss_batch_bo; - /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; @@ -414,10 +415,6 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; - fail_ss_batch_bo: - anv_batch_bo_destroy(surface_bbo, device); - fail_batch_relocs: - anv_reloc_list_finish(&cmd_buffer->batch_relocs, device); fail_batch_bo: anv_batch_bo_destroy(batch_bo, device); @@ -434,14 +431,12 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->batch_bos, link) { anv_batch_bo_destroy(bbo, device); } - anv_reloc_list_finish(&cmd_buffer->batch_relocs, device); /* Destroy all of the surface state buffers */ list_for_each_entry_safe(struct anv_batch_bo, bbo, &cmd_buffer->surface_bos, link) { anv_batch_bo_destroy(bbo, device); } - anv_reloc_list_finish(&cmd_buffer->surface_relocs, device); anv_device_free(device, cmd_buffer->execbuf2.objects); anv_device_free(device, cmd_buffer->execbuf2.bos); @@ -461,7 +456,6 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) } assert(!list_empty(&cmd_buffer->batch_bos)); - cmd_buffer->batch_relocs.num_relocs = 0; anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); @@ -475,15 +469,15 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) } assert(!list_empty(&cmd_buffer->batch_bos)); + anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + cmd_buffer->surface_next = 1; - cmd_buffer->surface_relocs.num_relocs = 0; } static VkResult anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, - struct drm_i915_gem_relocation_entry *relocs, - size_t num_relocs) + struct anv_reloc_list *relocs) { struct drm_i915_gem_exec_object2 *obj; @@ -537,8 +531,8 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, obj->rsvd2 = 0; if (relocs) { - obj->relocation_count = num_relocs; - obj->relocs_ptr = (uintptr_t) relocs; + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; } return VK_SUCCESS; @@ -549,7 +543,7 @@ anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, struct anv_reloc_list *list) { for (size_t i = 0; i < list->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0); + anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL); } static void @@ -586,8 +580,6 @@ anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - surface_bbo->num_relocs = - cmd_buffer->surface_relocs.num_relocs - surface_bbo->first_reloc; surface_bbo->length = cmd_buffer->surface_next; } @@ -602,39 +594,25 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) /* Add surface state bos first so we can add them with their relocs. */ list_for_each_entry(struct anv_batch_bo, bbo, &cmd_buffer->surface_bos, link) { - anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, - &cmd_buffer->surface_relocs.relocs[bbo->first_reloc], - bbo->num_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &bbo->relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); } - /* Add all of the BOs referenced by surface state */ - anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); - - struct anv_batch_bo *first_batch_bo = - LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.next, link); - - /* Add all but the first batch BO */ - list_for_each_entry(struct anv_batch_bo, bbo, &cmd_buffer->batch_bos, link) { - if (bbo == first_batch_bo) - continue; - - anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, - &cmd_buffer->batch_relocs.relocs[bbo->first_reloc], - bbo->num_relocs); + /* Walk the list of batch buffers backwards and add each one. There are + * two reasons for walking backwards. First, it guarantees that we add + * a given batch bo before we process the relocation pointing to it from + * the MI_BATCH_BUFFER_START command. Second, thed kernel requires that + * the last bo on the list is the batch buffer to execute and walking + * backwards gives us this for free. + */ + list_for_each_entry_rev(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_cmd_buffer_add_validate_bos(cmd_buffer, &bbo->relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); } - /* Add everything referenced by the batches */ - anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->batch_relocs); - - /* Add the first batch bo last */ - anv_cmd_buffer_add_bo(cmd_buffer, &first_batch_bo->bo, - &cmd_buffer->batch_relocs.relocs[first_batch_bo->first_reloc], - first_batch_bo->num_relocs); - assert(first_batch_bo->bo.index == cmd_buffer->execbuf2.bo_count - 1); - - anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->batch_relocs); - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, .buffer_count = cmd_buffer->execbuf2.bo_count, diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index e5997f755e0..abe7275e2bd 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -411,7 +411,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(&cmd_buffer->surface_relocs, + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), cmd_buffer->device, state.offset + 8 * 4, view->view.bo, view->view.offset); @@ -458,7 +458,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(&cmd_buffer->surface_relocs, + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), cmd_buffer->device, state.offset + 8 * 4, view->bo, offset); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cb302c96162..1d04dfca9d7 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -471,9 +471,7 @@ struct anv_batch_bo { /* Bytes actually consumed in this batch BO */ size_t length; - /* These offsets reference the per-batch reloc list */ - size_t first_reloc; - size_t num_relocs; + struct anv_reloc_list relocs; }; struct anv_batch { @@ -702,10 +700,8 @@ struct anv_cmd_buffer { * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). */ struct list_head batch_bos; - struct anv_reloc_list batch_relocs; struct list_head surface_bos; uint32_t surface_next; - struct anv_reloc_list surface_relocs; /* Information needed for execbuf * @@ -742,6 +738,8 @@ void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); struct anv_bo * anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer); +struct anv_reloc_list * +anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); -- cgit v1.2.3 From 4fc7510a7cb370bcbdeaa1d54b4f51222484bb31 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 12:01:02 -0700 Subject: vk/cmd_buffer: Move emit_batch_buffer_end higher in the file --- src/vulkan/anv_cmd_buffer.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index d7a006d9b7a..e673ac6f94e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -474,6 +474,20 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->surface_next = 1; } +void +anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_batch_bo *surface_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + surface_bbo->length = cmd_buffer->surface_next; +} + static VkResult anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, @@ -569,20 +583,6 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, } } -void -anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - struct anv_batch_bo *surface_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); - - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - - surface_bbo->length = cmd_buffer->surface_next; -} - void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) { -- cgit v1.2.3 From 0f31c580bfaa2788e8d320c0bf4acb9b70a90e05 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 15:13:21 -0700 Subject: vk/cmd_buffer: Rework validate list creation The algorighm we used previously required us to call add_bo in a particular order in order to guarantee that we get the initial batch buffer as the last element in the validate list. The new algorighm does a recursive walk over the buffers and then re-orders the list. This should be much more robust as we start to add circular dependancies in the relocations. --- src/vulkan/anv_cmd_buffer.c | 158 ++++++++++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index e673ac6f94e..b47650aff73 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -493,73 +493,77 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, struct anv_reloc_list *relocs) { - struct drm_i915_gem_exec_object2 *obj; + struct drm_i915_gem_exec_object2 *obj = NULL; if (bo->index < cmd_buffer->execbuf2.bo_count && cmd_buffer->execbuf2.bos[bo->index] == bo) - return VK_SUCCESS; - - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) { - anv_device_free(cmd_buffer->device, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) { + anv_device_free(cmd_buffer->device, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; } - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); - } + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; - } + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); - - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; - - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } - if (relocs) { + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ obj->relocation_count = relocs->num_relocs; obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); } return VK_SUCCESS; } -static void -anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) -{ - for (size_t i = 0; i < list->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL); -} - static void anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, struct anv_reloc_list *list) @@ -591,25 +595,53 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->execbuf2.bo_count = 0; cmd_buffer->execbuf2.need_reloc = false; - /* Add surface state bos first so we can add them with their relocs. */ + list_for_each_entry(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); + } + list_for_each_entry(struct anv_batch_bo, bbo, &cmd_buffer->surface_bos, link) { anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); - anv_cmd_buffer_add_validate_bos(cmd_buffer, &bbo->relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); } - /* Walk the list of batch buffers backwards and add each one. There are - * two reasons for walking backwards. First, it guarantees that we add - * a given batch bo before we process the relocation pointing to it from - * the MI_BATCH_BUFFER_START command. Second, thed kernel requires that - * the last bo on the list is the batch buffer to execute and walking - * backwards gives us this for free. + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx] = + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = + &first_batch_bo->bo; + first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + list_for_each_entry(struct anv_batch_bo, bbo, + &cmd_buffer->surface_bos, link) { + anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); + } list_for_each_entry_rev(struct anv_batch_bo, bbo, &cmd_buffer->batch_bos, link) { - anv_cmd_buffer_add_validate_bos(cmd_buffer, &bbo->relocs); - anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); } -- cgit v1.2.3 From 3ed9cea84d42d7a5f37ed64d99a6a5ba064767f3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 15:28:51 -0700 Subject: vk/cmd_buffer: Use an array to track all know anv_batch_bo objects Instead of walking the list of batch and surface buffers, we simply keep track of all known batch and surface buffers as we build the command buffer. Then we use this new list to construct the validate list. --- src/vulkan/anv_cmd_buffer.c | 61 ++++++++++++++++++++++++++++++++------------- src/vulkan/anv_private.h | 7 ++++++ 2 files changed, 51 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index b47650aff73..2d46cc0c0ad 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -305,6 +305,13 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) if (result != VK_SUCCESS) return result; + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + /* We set the end of the batch a little short so we would be sure we * have room for the chaining command. Since we're about to emit the * chaining command, let's set it back where it should go. @@ -370,6 +377,13 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + cmd_buffer->surface_next = 1; list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); @@ -406,6 +420,15 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_surface_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + /* Start surface_next at 1 so surface offset 0 is invalid. */ cmd_buffer->surface_next = 1; @@ -415,6 +438,8 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; + fail_surface_bo: + anv_batch_bo_destroy(surface_bbo, device); fail_batch_bo: anv_batch_bo_destroy(batch_bo, device); @@ -426,6 +451,8 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; + anv_vector_finish(&cmd_buffer->seen_bbos); + /* Destroy all of the batch buffers */ list_for_each_entry_safe(struct anv_batch_bo, bbo, &cmd_buffer->batch_bos, link) { @@ -472,6 +499,15 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; cmd_buffer->surface_next = 1; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); } void @@ -595,15 +631,12 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->execbuf2.bo_count = 0; cmd_buffer->execbuf2.need_reloc = false; - list_for_each_entry(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); - } - - list_for_each_entry(struct anv_batch_bo, bbo, - &cmd_buffer->surface_bos, link) { - anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo, &bbo->relocs); - } + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); struct anv_batch_bo *first_batch_bo = list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); @@ -636,14 +669,8 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) * the correct indices in the object array. We have to do this after we * reorder the list above as some of the indices may have changed. */ - list_for_each_entry(struct anv_batch_bo, bbo, - &cmd_buffer->surface_bos, link) { - anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); - } - list_for_each_entry_rev(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_cmd_buffer_process_relocs(cmd_buffer, &bbo->relocs); - } + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1d04dfca9d7..a3787229a74 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -703,6 +703,13 @@ struct anv_cmd_buffer { struct list_head surface_bos; uint32_t surface_next; + /* A vector of anv_batch_bo pointers for every batch or surface buffer + * referenced by this command buffer + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector seen_bbos; + /* Information needed for execbuf * * These fields are generated by anv_cmd_buffer_prepare_execbuf(). -- cgit v1.2.3 From 56ce896d730b24376bb4218e5cbaaa02120d12cb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 21:22:10 -0700 Subject: vk/cmd_buffer: Rename emit_batch_buffer_end to end_batch_buffer This is more generic and doesn't imply that it emits MI_BATCH_BUFFER_END. While we're at it, we'll move NOOP adding from bo_finish to end_batch_buffer. --- src/vulkan/anv_cmd_buffer.c | 10 +++++----- src/vulkan/anv_cmd_emit.c | 2 +- src/vulkan/anv_private.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 2d46cc0c0ad..a0a67af676c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -249,10 +249,6 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, static void anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) { - /* Round batch up to an even number of dwords. */ - if ((batch->next - batch->start) & 4) - anv_batch_emit(batch, GEN8_MI_NOOP); - assert(batch->start == bbo->bo.map); bbo->length = batch->next - batch->start; VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); @@ -511,7 +507,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) } void -anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); struct anv_batch_bo *surface_bbo = @@ -519,6 +515,10 @@ anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); surface_bbo->length = cmd_buffer->surface_next; diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index abe7275e2bd..93ffb8cf2c1 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -182,7 +182,7 @@ VkResult anv_EndCommandBuffer( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_device *device = cmd_buffer->device; - anv_cmd_buffer_emit_batch_buffer_end(cmd_buffer); + anv_cmd_buffer_end_batch_buffer(cmd_buffer); /* The algorithm used to compute the validate list is not threadsafe as * it uses the bo->index field. We have to lock the device around it. diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a3787229a74..f3f50002568 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -740,7 +740,7 @@ struct anv_cmd_buffer { VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); struct anv_bo * -- cgit v1.2.3 From 82548a3acac29cc0fcf492a7dc743bedb11e2df2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 21:24:20 -0700 Subject: vk/cmd_buffer: Invalidate texture cache in emit_state_base_address Previously, the caller of emit_state_base_address was doing this. However, putting it directly in emit_state_base_address means that we'll never forget the flush at the cost of one PIPE_CONTROL at the top every batch (that should do nothing since the kernel just flushed for us). --- src/vulkan/anv_cmd_emit.c | 80 +++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 93ffb8cf2c1..62b3cda6cd7 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -162,6 +162,46 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .InstructionBaseAddressModifyEnable = true, .InstructionBufferSize = 0xfffff, .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); } VkResult anv_BeginCommandBuffer( @@ -587,46 +627,6 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) */ anv_cmd_buffer_emit_state_base_address(cmd_buffer); - /* After re-setting the surface state base address, we have to do - * some cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the - * texture cache is handled partially by software. It is expected - * that the command stream or shader will issue Cache Flush - * operation or Cache_Flush sampler message to ensure that the L1 - * cache remains coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is - * fetched from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" - * bit which, according the PIPE_CONTROL instruction documentation in - * the Broadwell PRM: - * - * Setting this bit is independent of any other bit in this - * packet. This bit controls the invalidation of the L1 and L2 - * state caches at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems - * that invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we - * have yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); - /* Re-emit all active binding tables */ for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { result = flush_descriptor_set(cmd_buffer, s); -- cgit v1.2.3 From e39d0b635c3d6fdce81496ca859b0bdd9de68724 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Jul 2015 14:05:06 -0700 Subject: CLONE --- src/vulkan/anv_cmd_buffer.c | 66 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index a0a67af676c..5126f4a981c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -42,11 +42,19 @@ * Functions related to anv_reloc_list *-----------------------------------------------------------------------*/ -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + struct anv_device *device, + const struct anv_reloc_list *other_list) { - list->num_relocs = 0; - list->array_length = 256; + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + list->relocs = anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); @@ -58,14 +66,27 @@ anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (list->relocs == NULL) { + if (list->reloc_bos == NULL) { anv_device_free(device, list->relocs); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + return VK_SUCCESS; } +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +{ + return anv_reloc_list_init_clone(list, device, NULL); +} + void anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) { @@ -236,6 +257,41 @@ anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) return result; } +static VkResult +anv_batch_bo_clone(struct anv_device *device, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + static void anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, size_t batch_padding) -- cgit v1.2.3 From f15be18c9210bfecd5d00819ed9edcdb4a667ffc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 11:28:22 -0700 Subject: util/list: Add list splicing functions This adds functions for splicing one list into another. These have more-or-less the same API as the kernel list splicing functions. --- src/util/list.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src') diff --git a/src/util/list.h b/src/util/list.h index b98ce59ff77..d4b485174fc 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -108,6 +108,28 @@ static inline unsigned list_length(struct list_head *list) return length; } +static inline void list_splice(struct list_head *src, struct list_head *dst) +{ + if (list_empty(src)) + return; + + src->next->prev = dst; + src->prev->next = dst->next; + dst->next->prev = src->prev; + dst->next = src->next; +} + +static inline void list_splicetail(struct list_head *src, struct list_head *dst) +{ + if (list_empty(src)) + return; + + src->prev->next = dst; + src->next->prev = dst->prev; + dst->prev->next = src->next; + dst->prev = src->prev; +} + static inline void list_validate(struct list_head *list) { struct list_head *node; -- cgit v1.2.3 From 7af67e085f0f7d1f794c010b5291962027917602 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 11:29:55 -0700 Subject: vk/reloc_list: Actually set the new length in reloc_list_grow --- src/vulkan/anv_cmd_buffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5126f4a981c..ee79e1b4924 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -126,6 +126,7 @@ anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, anv_device_free(device, list->relocs); anv_device_free(device, list->reloc_bos); + list->array_length = new_length; list->relocs = new_relocs; list->reloc_bos = new_reloc_bos; -- cgit v1.2.3 From ace093031d7516527e246c8da546f3ee1d9b7e7b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 11:32:27 -0700 Subject: vk/cmd_buffer: Add functions for cloning a list of anv_batch_bo's We'll need this to implement secondary command buffers. --- src/vulkan/anv_cmd_buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index ee79e1b4924..35540763d08 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -319,6 +319,44 @@ anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) anv_device_free(device, bbo); } +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo; + result = anv_batch_bo_clone(device, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, device); + } + + return result; +} + /*-----------------------------------------------------------------------* * Functions related to anv_batch_bo *-----------------------------------------------------------------------*/ -- cgit v1.2.3 From 0c4a2dab7e70481868ef60bfad0946badb9057e0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 11:34:09 -0700 Subject: vk/device: Make BATCH_SIZE a global #define --- src/vulkan/anv_device.c | 4 +--- src/vulkan/anv_private.h | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index ee7682a5d9a..a12076f790d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -535,8 +535,6 @@ anv_device_init_border_colors(struct anv_device *device) memcpy(device->border_colors.map, border_colors, sizeof(border_colors)); } -static const uint32_t BATCH_SIZE = 8192; - VkResult anv_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, @@ -568,7 +566,7 @@ VkResult anv_CreateDevice( if (device->context_id == -1) goto fail_fd; - anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE); + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f3f50002568..0437677de0a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -690,6 +690,8 @@ struct anv_cmd_state { struct anv_descriptor_set_binding descriptors[MAX_SETS]; }; +#define ANV_CMD_BUFFER_BATCH_SIZE 8192 + struct anv_cmd_buffer { struct anv_device * device; -- cgit v1.2.3 From 5aee803b9785ee5c19c867e98bb0e9faddfbfd1e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 11:34:58 -0700 Subject: vk/cmd_buffer: Split batch chaining into a helper function --- src/vulkan/anv_cmd_buffer.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 35540763d08..e7493d02839 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -385,12 +385,36 @@ anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; } +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &bbo->bo, 0 }, + ); + + anv_batch_bo_finish(current_bbo, batch); +} + static VkResult anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) { struct anv_cmd_buffer *cmd_buffer = _data; - struct anv_batch_bo *new_bbo, *old_bbo = - anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_batch_bo *new_bbo; VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); if (result != VK_SUCCESS) @@ -403,21 +427,7 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) } *seen_bbo = new_bbo; - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == old_bbo->bo.map + old_bbo->bo.size); - - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &new_bbo->bo, 0 }, - ); - - anv_batch_bo_finish(old_bbo, batch); + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); -- cgit v1.2.3 From 21004f23bfe29a8a78a2ce61229bbcc05e50b16d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 11:36:48 -0700 Subject: vk: Add initial support for secondary command buffers --- src/vulkan/anv_cmd_buffer.c | 75 ++++++++++++++++++++++++++++++++++++++++++--- src/vulkan/anv_cmd_emit.c | 52 +++++++++++++++++++++++-------- src/vulkan/anv_device.c | 2 ++ src/vulkan/anv_private.h | 5 +++ 4 files changed, 117 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index e7493d02839..cdde12a513b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -303,6 +303,16 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, bbo->relocs.num_relocs = 0; } +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + static void anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) { @@ -618,17 +628,74 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) struct anv_batch_bo *surface_bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); - /* Round batch up to an even number of dwords. */ - if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + } anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); surface_bbo->length = cmd_buffer->surface_next; } +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + if ((secondary->batch_bos.next == secondary->batch_bos.prev) && + anv_cmd_buffer_current_batch_bo(secondary)->length < ANV_CMD_BUFFER_BATCH_SIZE / 2) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + anv_batch_emit_batch(&primary->batch, &secondary->batch); + } else { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary->device, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + } + + /* Mark the surface buffer from the secondary as seen */ + anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); +} + static VkResult anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 62b3cda6cd7..64acec82154 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -66,8 +66,6 @@ VkResult anv_CreateCommandBuffer( struct anv_cmd_buffer *cmd_buffer; VkResult result; - assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (cmd_buffer == NULL) @@ -84,6 +82,9 @@ VkResult anv_CreateCommandBuffer( anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_block_pool); + cmd_buffer->level = pCreateInfo->level; + cmd_buffer->opt_flags = 0; + anv_cmd_state_init(&cmd_buffer->state); *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); @@ -210,6 +211,19 @@ VkResult anv_BeginCommandBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + cmd_buffer->opt_flags = pBeginInfo->flags; + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->renderPass); + + /* FIXME: We shouldn't be starting on the first subpass */ + anv_cmd_buffer_begin_subpass(cmd_buffer, + &cmd_buffer->state.pass->subpasses[0]); + } + anv_cmd_buffer_emit_state_base_address(cmd_buffer); cmd_buffer->state.current_pipeline = UINT32_MAX; @@ -224,13 +238,15 @@ VkResult anv_EndCommandBuffer( anv_cmd_buffer_end_batch_buffer(cmd_buffer); - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } return VK_SUCCESS; } @@ -1282,8 +1298,6 @@ void anv_CmdBeginRenderPass( ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); - cmd_buffer->state.framebuffer = framebuffer; cmd_buffer->state.pass = pass; @@ -1311,7 +1325,7 @@ void anv_CmdNextSubpass( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - assert(contents == VK_RENDER_PASS_CONTENTS_INLINE); + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); } @@ -1341,5 +1355,17 @@ void anv_CmdExecuteCommands( uint32_t cmdBuffersCount, const VkCmdBuffer* pCmdBuffers) { - stub(); + ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); + + assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); + + for (uint32_t i = 0; i < cmdBuffersCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a12076f790d..46e2d9b6d1b 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -736,6 +736,8 @@ VkResult anv_QueueSubmit( for (uint32_t i = 0; i < cmdBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]); + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + if (device->dump_aub) anv_cmd_buffer_dump(cmd_buffer); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 0437677de0a..bece3bcec24 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -736,6 +736,9 @@ struct anv_cmd_buffer { struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; + VkCmdBufferOptimizeFlags opt_flags; + VkCmdBufferLevel level; + struct anv_cmd_state state; }; @@ -743,6 +746,8 @@ VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary); void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); struct anv_bo * -- cgit v1.2.3 From 4c2a182a3646283cd99b6e7dd1060b107a9d3e0c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 14:22:17 -0700 Subject: vk/cmd_buffer: Add support for zero-copy batch chaining --- src/vulkan/anv_cmd_buffer.c | 77 ++++++++++++++++++++++++++++++++++++++++----- src/vulkan/anv_private.h | 8 +++++ 2 files changed, 78 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index cdde12a513b..28a3af7a9b8 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -634,6 +634,37 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) /* Round batch up to an even number of dwords. */ if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } else { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < + ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (cmd_buffer->opt_flags & + VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* For chaining mode, we need to increment the number of + * relocations. This is because, when we chain, we need to add + * an MI_BATCH_BUFFER_START command. Adding this command will + * also add a relocation. In order to handle theis we'll + * increment it here and decrement it right before adding the + * MI_BATCH_BUFFER_START command. + */ + anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } } anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); @@ -660,14 +691,42 @@ void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_cmd_buffer *secondary) { - if ((secondary->batch_bos.next == secondary->batch_bos.prev) && - anv_cmd_buffer_current_batch_bo(secondary)->length < ANV_CMD_BUFFER_BATCH_SIZE / 2) { - /* If the secondary has exactly one batch buffer in its list *and* - * that batch buffer is less than half of the maximum size, we're - * probably better of simply copying it into our batch. - */ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: anv_batch_emit_batch(&primary->batch, &secondary->batch); - } else { + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &first_bbo->bo, 0 }, + ); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + + struct GEN8_MI_BATCH_BUFFER_START ret = { + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &this_bbo->bo, offset }, + }; + last_bbo->relocs.num_relocs++; + GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, + last_bbo->bo.map + last_bbo->length, + &ret); + + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { struct list_head copy_list; VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, secondary->device, @@ -690,6 +749,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, GEN8_MI_BATCH_BUFFER_START_length * 4); anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); } /* Mark the surface buffer from the secondary as seen */ diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index bece3bcec24..d2b4b70e97f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -692,6 +692,13 @@ struct anv_cmd_state { #define ANV_CMD_BUFFER_BATCH_SIZE 8192 +enum anv_cmd_buffer_exec_mode { + ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, + ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, +}; + struct anv_cmd_buffer { struct anv_device * device; @@ -704,6 +711,7 @@ struct anv_cmd_buffer { struct list_head batch_bos; struct list_head surface_bos; uint32_t surface_next; + enum anv_cmd_buffer_exec_mode exec_mode; /* A vector of anv_batch_bo pointers for every batch or surface buffer * referenced by this command buffer -- cgit v1.2.3 From e379cd9a0e2822851c1745b473521e3a49bfbdd3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 14:55:49 -0700 Subject: vk/cmd_buffer: Add a simple command pool implementation --- src/vulkan/anv_cmd_emit.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_device.c | 27 ------------------------ src/vulkan/anv_private.h | 7 ++++++ 3 files changed, 61 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c index 64acec82154..3b9e67fdd0f 100644 --- a/src/vulkan/anv_cmd_emit.c +++ b/src/vulkan/anv_cmd_emit.c @@ -63,6 +63,7 @@ VkResult anv_CreateCommandBuffer( VkCmdBuffer* pCmdBuffer) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); struct anv_cmd_buffer *cmd_buffer; VkResult result; @@ -87,6 +88,8 @@ VkResult anv_CreateCommandBuffer( anv_cmd_state_init(&cmd_buffer->state); + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; @@ -103,6 +106,8 @@ VkResult anv_DestroyCommandBuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + list_del(&cmd_buffer->pool_link); + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); anv_state_stream_finish(&cmd_buffer->surface_state_stream); @@ -1369,3 +1374,52 @@ void anv_CmdExecuteCommands( anv_cmd_buffer_add_secondary(primary, secondary); } } + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +VkResult anv_DestroyCommandPool( + VkDevice _device, + VkCmdPool cmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + + anv_ResetCommandPool(_device, cmdPool, 0); + + anv_device_free(device, pool); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 46e2d9b6d1b..01eff02d25d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2170,33 +2170,6 @@ VkResult anv_DestroyDynamicDepthStencilState( return VK_SUCCESS; } -// Command buffer functions - -VkResult anv_CreateCommandPool( - VkDevice device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool) -{ - pCmdPool->handle = 7; - - stub_return(VK_SUCCESS); -} - -VkResult anv_DestroyCommandPool( - VkDevice device, - VkCmdPool cmdPool) -{ - stub_return(VK_SUCCESS); -} - -VkResult anv_ResetCommandPool( - VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags) -{ - stub_return(VK_UNSUPPORTED); -} - VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d2b4b70e97f..12f826c1ab1 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -690,6 +690,10 @@ struct anv_cmd_state { struct anv_descriptor_set_binding descriptors[MAX_SETS]; }; +struct anv_cmd_pool { + struct list_head cmd_buffers; +}; + #define ANV_CMD_BUFFER_BATCH_SIZE 8192 enum anv_cmd_buffer_exec_mode { @@ -702,6 +706,8 @@ enum anv_cmd_buffer_exec_mode { struct anv_cmd_buffer { struct anv_device * device; + struct list_head pool_link; + struct anv_batch batch; /* Fields required for the actual chain of anv_batch_bo's. @@ -1088,6 +1094,7 @@ ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) ANV_DEFINE_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCmdPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); -- cgit v1.2.3 From 26ba0ad54d6bef6237abfabf5a3f572c325951d3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 30 Jul 2015 14:59:02 -0700 Subject: vk: Re-name command buffer implementation files Previously, the command buffer implementation was split between anv_cmd_buffer.c and anv_cmd_emit.c. However, this naming convention was confusing because none of the Vulkan entrypoints for anv_cmd_buffer were actually in anv_cmd_buffer.c. This changes it so that anv_cmd_buffer.c is what you think it is and the internals are in anv_batch_chain.c. --- src/vulkan/Makefile.am | 2 +- src/vulkan/anv_batch_chain.c | 926 +++++++++++++++++++++ src/vulkan/anv_cmd_buffer.c | 1821 +++++++++++++++++++++++++++--------------- src/vulkan/anv_cmd_emit.c | 1425 --------------------------------- 4 files changed, 2087 insertions(+), 2087 deletions(-) create mode 100644 src/vulkan/anv_batch_chain.c delete mode 100644 src/vulkan/anv_cmd_emit.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 6d1212c532c..c816f97034f 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -57,7 +57,7 @@ libvulkan_la_SOURCES = \ anv_allocator.c \ anv_aub.c \ anv_cmd_buffer.c \ - anv_cmd_emit.c \ + anv_batch_chain.c \ anv_compiler.cpp \ anv_device.c \ anv_entrypoints.c \ diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c new file mode 100644 index 00000000000..2f09248acee --- /dev/null +++ b/src/vulkan/anv_batch_chain.c @@ -0,0 +1,926 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + struct anv_device *device, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->reloc_bos == NULL) { + anv_device_free(device, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +{ + return anv_reloc_list_init_clone(list, device, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +{ + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) { + anv_device_free(device, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + anv_reloc_list_grow(list, device, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, device, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->device, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->device, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, device); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_device *device, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +{ + anv_reloc_list_finish(&bbo->relocs, device); + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_device_free(device, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo; + result = anv_batch_bo_clone(device, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, device); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); +} + +struct anv_bo * +anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; +} + +struct anv_reloc_list * +anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &bbo->bo, 0 }, + ); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + struct anv_bo *surface_bo = + anv_cmd_buffer_current_surface_bo(cmd_buffer); + struct anv_state state; + + state.offset = align_u32(cmd_buffer->surface_next, alignment); + if (state.offset + size > surface_bo->size) + return (struct anv_state) { 0 }; + + state.map = surface_bo->map + state.offset; + state.alloc_size = size; + cmd_buffer->surface_next = state.offset + size; + + assert(state.offset + size <= surface_bo->size); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *new_bbo, *old_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + /* Finish off the old buffer */ + old_bbo->length = cmd_buffer->surface_next; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer->surface_next = 1; + + list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo, *surface_bbo; + struct anv_device *device = cmd_buffer->device; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + list_inithead(&cmd_buffer->surface_bos); + + result = anv_batch_bo_create(device, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.device = device; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + result = anv_batch_bo_create(device, &surface_bbo); + if (result != VK_SUCCESS) + goto fail_batch_bo; + + list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_surface_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + + /* Start surface_next at 1 so surface offset 0 is invalid. */ + cmd_buffer->surface_next = 1; + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_surface_bo: + anv_batch_bo_destroy(surface_bbo, device); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, device); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, device); + } + + /* Destroy all of the surface state buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->surface_bos, link) { + anv_batch_bo_destroy(bbo, device); + } + + anv_device_free(device, cmd_buffer->execbuf2.objects); + anv_device_free(device, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + + cmd_buffer->surface_next = 1; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_batch_bo *surface_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } else { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < + ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (cmd_buffer->opt_flags & + VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* For chaining mode, we need to increment the number of + * relocations. This is because, when we chain, we need to add + * an MI_BATCH_BUFFER_START command. Adding this command will + * also add a relocation. In order to handle theis we'll + * increment it here and decrement it right before adding the + * MI_BATCH_BUFFER_START command. + */ + anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + surface_bbo->length = cmd_buffer->surface_next; +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &first_bbo->bo, 0 }, + ); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + + struct GEN8_MI_BATCH_BUFFER_START ret = { + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &this_bbo->bo, offset }, + }; + last_bbo->relocs.num_relocs++; + GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, + last_bbo->bo.map + last_bbo->length, + &ret); + + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary->device, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + /* Mark the surface buffer from the secondary as seen */ + anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) { + anv_device_free(cmd_buffer->device, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx] = + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = + &first_batch_bo->bo; + first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 28a3af7a9b8..3b9e67fdd0f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -31,896 +31,1395 @@ /** \file anv_cmd_buffer.c * - * This file contains functions related to anv_cmd_buffer as a data - * structure. This involves everything required to create and destroy - * the actual batch buffers as well as link them together and handle - * relocations and surface state. It specifically does *not* contain any - * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. */ -/*-----------------------------------------------------------------------* - * Functions related to anv_reloc_list - *-----------------------------------------------------------------------*/ - -static VkResult -anv_reloc_list_init_clone(struct anv_reloc_list *list, - struct anv_device *device, - const struct anv_reloc_list *other_list) +static void +anv_cmd_state_init(struct anv_cmd_state *state) { - if (other_list) { - list->num_relocs = other_list->num_relocs; - list->array_length = other_list->array_length; - } else { - list->num_relocs = 0; - list->array_length = 256; - } + state->rs_state = NULL; + state->vp_state = NULL; + state->cb_state = NULL; + state->ds_state = NULL; + memset(&state->state_vf, 0, sizeof(state->state_vf)); + memset(&state->descriptors, 0, sizeof(state->descriptors)); + + state->dirty = 0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->pipeline = NULL; + state->vp_state = NULL; + state->rs_state = NULL; + state->ds_state = NULL; +} - list->relocs = - anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; - if (list->relocs == NULL) + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - list->reloc_bos = - anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + cmd_buffer->device = device; - if (list->reloc_bos == NULL) { - anv_device_free(device, list->relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; - if (other_list) { - memcpy(list->relocs, other_list->relocs, - list->array_length * sizeof(*list->relocs)); - memcpy(list->reloc_bos, other_list->reloc_bos, - list->array_length * sizeof(*list->reloc_bos)); - } + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + cmd_buffer->level = pCreateInfo->level; + cmd_buffer->opt_flags = 0; + + anv_cmd_state_init(&cmd_buffer->state); + + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; + + fail: anv_device_free(device, cmd_buffer); + + return result; } -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) { - return anv_reloc_list_init_clone(list, device, NULL); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandBuffer( + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + anv_cmd_state_init(&cmd_buffer->state); + + return VK_SUCCESS; } void -anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); } -static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, - size_t num_additional_relocs) +VkResult anv_BeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) { - if (list->num_relocs + num_additional_relocs <= list->array_length) - return VK_SUCCESS; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - size_t new_length = list->array_length * 2; - while (new_length < list->num_relocs + num_additional_relocs) - new_length *= 2; + cmd_buffer->opt_flags = pBeginInfo->flags; - struct drm_i915_gem_relocation_entry *new_relocs = - anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->renderPass); - struct anv_bo **new_reloc_bos = - anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) { - anv_device_free(device, new_relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* FIXME: We shouldn't be starting on the first subpass */ + anv_cmd_buffer_begin_subpass(cmd_buffer, + &cmd_buffer->state.pass->subpasses[0]); } - memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); - memcpy(new_reloc_bos, list->reloc_bos, - list->num_relocs * sizeof(*list->reloc_bos)); - - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); - - list->array_length = new_length; - list->relocs = new_relocs; - list->reloc_bos = new_reloc_bos; + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->state.current_pipeline = UINT32_MAX; return VK_SUCCESS; } -uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, - uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) { - struct drm_i915_gem_relocation_entry *entry; - int index; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; - anv_reloc_list_grow(list, device, 1); - /* TODO: Handle failure */ + anv_cmd_buffer_end_batch_buffer(cmd_buffer); - /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ - index = list->num_relocs++; - list->reloc_bos[index] = target_bo; - entry = &list->relocs[index]; - entry->target_handle = target_bo->gem_handle; - entry->delta = delta; - entry->offset = offset; - entry->presumed_offset = target_bo->offset; - entry->read_domains = 0; - entry->write_domain = 0; + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } - return target_bo->offset + delta; + return VK_SUCCESS; } -static void -anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, - struct anv_reloc_list *other, uint32_t offset) +void anv_CmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) { - anv_reloc_list_grow(list, device, other->num_relocs); - /* TODO: Handle failure */ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - memcpy(&list->relocs[list->num_relocs], &other->relocs[0], - other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], - other->num_relocs * sizeof(other->reloc_bos[0])); + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; - for (uint32_t i = 0; i < other->num_relocs; i++) - list->relocs[i + list->num_relocs].offset += offset; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; - list->num_relocs += other->num_relocs; + default: + assert(!"invalid bind point"); + break; + } } -/*-----------------------------------------------------------------------* - * Functions related to anv_batch - *-----------------------------------------------------------------------*/ +void anv_CmdBindDynamicViewportState( + VkCmdBuffer cmdBuffer, + VkDynamicViewportState dynamicViewportState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); -void * -anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) + cmd_buffer->state.vp_state = vp_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; +} + +void anv_CmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState) { - if (batch->next + num_dwords * 4 > batch->end) - batch->extend_cb(batch, batch->user_data); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - void *p = batch->next; + cmd_buffer->state.rs_state = rs_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; +} - batch->next += num_dwords * 4; - assert(batch->next <= batch->end); +void anv_CmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - return p; + cmd_buffer->state.cb_state = cb_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; } -uint64_t -anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t delta) +void anv_CmdBindDynamicDepthStencilState( + VkCmdBuffer cmdBuffer, + VkDynamicDepthStencilState dynamicDepthStencilState) { - return anv_reloc_list_add(batch->relocs, batch->device, - location - batch->start, bo, delta); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + + cmd_buffer->state.ds_state = ds_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; } -void -anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +void anv_CmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) { - uint32_t size, offset; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; - size = other->next - other->start; - assert(size % 4 == 0); + assert(firstSet + setCount < MAX_SETS); - if (batch->next + size > batch->end) - batch->extend_cb(batch, batch->user_data); + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < setCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; - assert(batch->next + size <= batch->end); + cmd_buffer->state.descriptors[firstSet + i].set = set; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); - memcpy(batch->next, other->start, size); + assert(set_layout->num_dynamic_buffers < + ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, + pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - offset = batch->next - batch->start; - anv_reloc_list_append(batch->relocs, batch->device, - other->relocs, offset); + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - batch->next += size; + dynamic_slot += set_layout->num_dynamic_buffers; + } } -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static VkResult -anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +void anv_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) { - VkResult result; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - result = anv_reloc_list_init(&bbo->relocs, device); - if (result != VK_SUCCESS) - goto fail_bo_alloc; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - *bbo_out = bbo; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} - return VK_SUCCESS; +void anv_CmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_device_free(device, bbo); + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ - return result; + assert(startBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); + } } static VkResult -anv_batch_bo_clone(struct anv_device *device, - const struct anv_batch_bo *other_bbo, - struct anv_batch_bo **bbo_out) +cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state) { - VkResult result; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_layout *layout; + uint32_t attachments, bias, size; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + if (stage == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + attachments = subpass->color_count; + } else { + bias = 0; + attachments = 0; + } - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; + if (attachments + surface_count == 0) + return VK_SUCCESS; - result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); - if (result != VK_SUCCESS) - goto fail_bo_alloc; + size = (bias + surface_count) * sizeof(uint32_t); + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; - bbo->length = other_bbo->length; - memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; - *bbo_out = bbo; + /* This is highly annoying. The Vulkan spec puts the depth-stencil + * attachments in with the color attachments. Unfortunately, thanks to + * other aspects of the API, we cana't really saparate them before this + * point. Therefore, we have to walk all of the attachments but only + * put the color attachments into the binding table. + */ + for (uint32_t a = 0; a < attachments; a++) { + const struct anv_attachment_view *attachment = + fb->attachments[subpass->color_attachments[a]]; - return VK_SUCCESS; + assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); + const struct anv_color_attachment_view *view = + (const struct anv_color_attachment_view *)attachment; - fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_device_free(device, bbo); + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - return result; -} + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; -static void -anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; - bbo->relocs.num_relocs = 0; + memcpy(state.map, view->view.surface_state.map, 64); + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->view.bo, view->view.offset); + + bt_map[a] = state.offset; + } + + if (layout == NULL) + return VK_SUCCESS; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *surface_slots = + set_layout->stage[stage].surface_start; + + uint32_t start = bias + layout->set[set].surface_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { + struct anv_surface_view *view = + d->set->descriptors[surface_slots[b].index].view; + + if (!view) + continue; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + uint32_t offset; + if (surface_slots[b].dynamic_slot >= 0) { + uint32_t dynamic_offset = + d->dynamic_offsets[surface_slots[b].dynamic_slot]; + + offset = view->offset + dynamic_offset; + anv_fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, offset); + + bt_map[start + b] = state.offset; + } + } + + return VK_SUCCESS; } -static void -anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) +static VkResult +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) { - batch->start = bbo->bo.map; - batch->next = bbo->bo.map + bbo->length; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) + return VK_SUCCESS; + + uint32_t size = sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *sampler_slots = + set_layout->stage[stage].sampler_start; + + uint32_t start = layout->set[set].sampler_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { + struct anv_sampler *sampler = + d->set->descriptors[sampler_slots[b].index].sampler; + + if (!sampler) + continue; + + memcpy(state->map + (start + b) * 16, + sampler->state, sizeof(sampler->state)); + } + } + + return VK_SUCCESS; } -static void -anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) { - assert(batch->start == bbo->bo.map); - bbo->length = batch->next - batch->start; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; } static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { - anv_reloc_list_finish(&bbo->relocs, device); - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - anv_device_free(device, bbo); -} + uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; -static VkResult -anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, - struct list_head *new_list) -{ VkResult result = VK_SUCCESS; - - list_inithead(new_list); - - struct anv_batch_bo *prev_bbo = NULL; - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo *new_bbo; - result = anv_batch_bo_clone(device, bbo, &new_bbo); + for_each_bit(s, dirty) { + result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) break; - list_addtail(&new_bbo->link, new_list); - - if (prev_bbo) { - /* As we clone this list of batch_bo's, they chain one to the - * other using MI_BATCH_BUFFER_START commands. We need to fix up - * those relocations as we go. Fortunately, this is pretty easy - * as it will always be the last relocation in the list. - */ - uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; - assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); - prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; - } - - prev_bbo = new_bbo; } if (result != VK_SUCCESS) { - list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) - anv_batch_bo_destroy(bbo, device); - } + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - return result; -} + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); -static inline struct anv_batch_bo * -anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); -} + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { + result = flush_descriptor_set(cmd_buffer, s); -static inline struct anv_batch_bo * -anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); -} + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } + } -struct anv_bo * -anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; + cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; } -struct anv_reloc_list * -anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +static struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, uint32_t alignment) { - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + memcpy(state.map, a, dwords * 4); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + + return state; } -static void -cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo *bbo) +static struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) { - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_batch_bo *current_bbo = - anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_state state; + uint32_t *p; - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &bbo->bo, 0 }, - ); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - anv_batch_bo_finish(current_bbo, batch); + return state; } static VkResult -anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) { - struct anv_cmd_buffer *cmd_buffer = _data; - struct anv_batch_bo *new_bbo; + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); if (result != VK_SUCCESS) return result; - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - *seen_bbo = new_bbo; + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; - cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); return VK_SUCCESS; } -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_bo *surface_bo = - anv_cmd_buffer_current_surface_bo(cmd_buffer); - struct anv_state state; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; - state.offset = align_u32(cmd_buffer->surface_next, alignment); - if (state.offset + size > surface_bo->size) - return (struct anv_state) { 0 }; + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - state.map = surface_bo->map + state.offset; - state.alloc_size = size; - cmd_buffer->surface_next = state.offset + size; + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } - assert(state.offset + size <= surface_bo->size); + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - return state; -} + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) -{ - return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - size, alignment); + cmd_buffer->state.compute_dirty = 0; } -VkResult -anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_batch_bo *new_bbo, *old_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; - /* Finish off the old buffer */ - old_bbo->length = cmd_buffer->surface_next; + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); - if (result != VK_SUCCESS) - return result; + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; } - *seen_bbo = new_bbo; - cmd_buffer->surface_next = 1; + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GEN8_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } - list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); - return VK_SUCCESS; -} + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } -VkResult -anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo, *surface_bbo; - struct anv_device *device = cmd_buffer->device; - VkResult result; + if (cmd_buffer->state.descriptors_dirty) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_sf, + pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_raster, + pipeline->state_raster); + } - list_inithead(&cmd_buffer->batch_bos); - list_inithead(&cmd_buffer->surface_bos); + if (cmd_buffer->state.ds_state && + (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY))) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + } - result = anv_batch_bo_create(device, &batch_bo); - if (result != VK_SUCCESS) - return result; + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } - list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.state_vf, pipeline->state_vf); + } - cmd_buffer->batch.device = device; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; - cmd_buffer->batch.user_data = cmd_buffer; + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} - anv_batch_bo_start(batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); +void anv_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - result = anv_batch_bo_create(device, &surface_bbo); - if (result != VK_SUCCESS) - goto fail_batch_bo; + anv_cmd_buffer_flush_state(cmd_buffer); - list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} - int success = anv_vector_init(&cmd_buffer->seen_bbos, - sizeof(struct anv_bo *), - 8 * sizeof(struct anv_bo *)); - if (!success) - goto fail_surface_bo; +void anv_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + anv_cmd_buffer_flush_state(cmd_buffer); - /* Start surface_next at 1 so surface offset 0 is invalid. */ - cmd_buffer->surface_next = 1; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} - cmd_buffer->execbuf2.objects = NULL; - cmd_buffer->execbuf2.bos = NULL; - cmd_buffer->execbuf2.array_length = 0; +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} - return VK_SUCCESS; +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} - fail_surface_bo: - anv_batch_bo_destroy(surface_bbo, device); - fail_batch_bo: - anv_batch_bo_destroy(batch_bo, device); +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void anv_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} - return result; +void anv_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); } -void -anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +void anv_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) { - struct anv_device *device = cmd_buffer->device; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} - anv_vector_finish(&cmd_buffer->seen_bbos); +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 - /* Destroy all of the batch buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_batch_bo_destroy(bbo, device); - } +void anv_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} - /* Destroy all of the surface state buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->surface_bos, link) { - anv_batch_bo_destroy(bbo, device); - } +void anv_CmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} - anv_device_free(device, cmd_buffer->execbuf2.objects); - anv_device_free(device, cmd_buffer->execbuf2.bos); +void anv_CmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); } -void -anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +void anv_CmdWaitEvents( + VkCmdBuffer cmdBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) { - struct anv_device *device = cmd_buffer->device; + stub(); +} - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); - } - assert(!list_empty(&cmd_buffer->batch_bos)); +void anv_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; - anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), - &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ } - assert(!list_empty(&cmd_buffer->batch_bos)); - anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } - cmd_buffer->surface_next = 1; - /* Reset the list of seen buffers */ - cmd_buffer->seen_bbos.head = 0; - cmd_buffer->seen_bbos.tail = 0; + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { + cmd.CommandStreamerStallEnable = true; + } - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_batch_bo(cmd_buffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); -} + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } -void -anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - struct anv_batch_bo *surface_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); - /* Round batch up to an even number of dwords. */ - if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; - } else { - /* If this is a secondary command buffer, we need to determine the - * mode in which it will be executed with vkExecuteCommands. We - * determine this statically here so that this stays in sync with the - * actual ExecuteCommands implementation. - */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && - (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < - ANV_CMD_BUFFER_BATCH_SIZE / 2)) { - /* If the secondary has exactly one batch buffer in its list *and* - * that batch buffer is less than half of the maximum size, we're - * probably better of simply copying it into our batch. - */ - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; - } else if (cmd_buffer->opt_flags & - VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; - - /* For chaining mode, we need to increment the number of - * relocations. This is because, when we chain, we need to add - * an MI_BATCH_BUFFER_START command. Adding this command will - * also add a relocation. In order to handle theis we'll - * increment it here and decrement it right before adding the - * MI_BATCH_BUFFER_START command. - */ - anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; - } else { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); } } - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } - surface_bbo->length = cmd_buffer->surface_next; + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); } -static inline VkResult -anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, - struct list_head *list) +void anv_CmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values) { - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); - if (bbo_ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + stub(); +} - *bbo_ptr = bbo; +static void +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; } - return VK_SUCCESS; + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); } void -anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary) +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { - switch (secondary->exec_mode) { - case ANV_CMD_BUFFER_EXEC_MODE_EMIT: - anv_batch_emit_batch(&primary->batch, &secondary->batch); - break; - case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { - struct anv_batch_bo *first_bbo = - list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - - anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &first_bbo->bo, 0 }, - ); - - struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); - assert(primary->batch.start == this_bbo->bo.map); - uint32_t offset = primary->batch.next - primary->batch.start; - - struct GEN8_MI_BATCH_BUFFER_START ret = { - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &this_bbo->bo, offset }, - }; - last_bbo->relocs.num_relocs++; - GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, - last_bbo->bo.map + last_bbo->length, - &ret); - - anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); - break; - } - case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { - struct list_head copy_list; - VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, - secondary->device, - ©_list); - if (result != VK_SUCCESS) - return; /* FIXME */ + cmd_buffer->state.subpass = subpass; - anv_cmd_buffer_add_seen_bbos(primary, ©_list); + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - struct anv_batch_bo *first_bbo = - list_first_entry(©_list, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(©_list, struct anv_batch_bo, link); + anv_cmd_buffer_emit_depth_stencil(cmd_buffer); +} - cmd_buffer_chain_to_batch_bo(primary, first_bbo); +void anv_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - list_splicetail(©_list, &primary->batch_bos); + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; - anv_batch_bo_continue(last_bbo, &primary->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); + const VkRect2D *render_area = &pRenderPassBegin->renderArea; - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - default: - assert(!"Invalid execution mode"); - } + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); - /* Mark the surface buffer from the secondary as seen */ - anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); + anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } -static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct anv_reloc_list *relocs) +void anv_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) { - struct drm_i915_gem_exec_object2 *obj = NULL; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - if (bo->index < cmd_buffer->execbuf2.bo_count && - cmd_buffer->execbuf2.bos[bo->index] == bo) - obj = &cmd_buffer->execbuf2.objects[bo->index]; + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - if (obj == NULL) { - /* We've never seen this one before. Add it to the list and assign - * an id that we can use later. - */ - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) { - anv_device_free(cmd_buffer->device, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); - } +void anv_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; - } + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); +void anv_CmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; + assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - } + anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - if (relocs != NULL && obj->relocation_count == 0) { - /* This is the first time we've ever seen a list of relocations for - * this BO. Go ahead and set the relocations and then walk the list - * of relocations and add them all. - */ - obj->relocation_count = relocs->num_relocs; - obj->relocs_ptr = (uintptr_t) relocs->relocs; + for (uint32_t i = 0; i < cmdBuffersCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - for (size_t i = 0; i < relocs->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); - } + assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); - return VK_SUCCESS; + anv_cmd_buffer_add_secondary(primary, secondary); + } } -static void -anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) { - struct anv_bo *bo; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->execbuf2.need_reloc = true; + list_inithead(&pool->cmd_buffers); - list->relocs[i].target_handle = bo->index; - } + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; } -void -anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +VkResult anv_DestroyCommandPool( + VkDevice _device, + VkCmdPool cmdPool) { - struct anv_batch *batch = &cmd_buffer->batch; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - cmd_buffer->execbuf2.bo_count = 0; - cmd_buffer->execbuf2.need_reloc = false; - - /* First, we walk over all of the bos we've seen and add them and their - * relocations to the validate list. - */ - struct anv_batch_bo **bbo; - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + anv_ResetCommandPool(_device, cmdPool, 0); - struct anv_batch_bo *first_batch_bo = - list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + anv_device_free(device, pool); - /* The kernel requires that the last entry in the validation list be the - * batch buffer to execute. We can simply swap the element - * corresponding to the first batch_bo in the chain with the last - * element in the list. - */ - if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { - uint32_t idx = first_batch_bo->bo.index; - - struct drm_i915_gem_exec_object2 tmp_obj = - cmd_buffer->execbuf2.objects[idx]; - assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + return VK_SUCCESS; +} - cmd_buffer->execbuf2.objects[idx] = - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx] = - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx]->index = idx; +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = - &first_batch_bo->bo; - first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); } - /* Now we go through and fixup all of the relocation lists to point to - * the correct indices in the object array. We have to do this after we - * reorder the list above as some of the indices may have changed. - */ - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); - - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, - .buffer_count = cmd_buffer->execbuf2.bo_count, - .batch_start_offset = 0, - .batch_len = batch->next - batch->start, - .cliprects_ptr = 0, - .num_cliprects = 0, - .DR1 = 0, - .DR4 = 0, - .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, - .rsvd1 = cmd_buffer->device->context_id, - .rsvd2 = 0, - }; - - if (!cmd_buffer->execbuf2.need_reloc) - cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; + return VK_SUCCESS; } diff --git a/src/vulkan/anv_cmd_emit.c b/src/vulkan/anv_cmd_emit.c deleted file mode 100644 index 3b9e67fdd0f..00000000000 --- a/src/vulkan/anv_cmd_emit.c +++ /dev/null @@ -1,1425 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** \file anv_cmd_buffer.c - * - * This file contains all of the stuff for emitting commands into a command - * buffer. This includes implementations of most of the vkCmd* - * entrypoints. This file is concerned entirely with state emission and - * not with the command buffer data structure itself. As far as this file - * is concerned, most of anv_cmd_buffer is magic. - */ - -static void -anv_cmd_state_init(struct anv_cmd_state *state) -{ - state->rs_state = NULL; - state->vp_state = NULL; - state->cb_state = NULL; - state->ds_state = NULL; - memset(&state->state_vf, 0, sizeof(state->state_vf)); - memset(&state->descriptors, 0, sizeof(state->descriptors)); - - state->dirty = 0; - state->vb_dirty = 0; - state->descriptors_dirty = 0; - state->pipeline = NULL; - state->vp_state = NULL; - state->rs_state = NULL; - state->ds_state = NULL; -} - -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); - struct anv_cmd_buffer *cmd_buffer; - VkResult result; - - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->device = device; - - result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); - if (result != VK_SUCCESS) - goto fail; - - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - cmd_buffer->level = pCreateInfo->level; - cmd_buffer->opt_flags = 0; - - anv_cmd_state_init(&cmd_buffer->state); - - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - - return VK_SUCCESS; - - fail: anv_device_free(device, cmd_buffer); - - return result; -} - -VkResult anv_DestroyCommandBuffer( - VkDevice _device, - VkCmdBuffer _cmd_buffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); - - list_del(&cmd_buffer->pool_link); - - anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer); - - return VK_SUCCESS; -} - -VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - - anv_cmd_state_init(&cmd_buffer->state); - - return VK_SUCCESS; -} - -void -anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = device->scratch_block_pool.size; - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN8_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - - .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, - .SurfaceStateMemoryObjectControlState = GEN8_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN8_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN8_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN8_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); -} - -VkResult anv_BeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - cmd_buffer->opt_flags = pBeginInfo->flags; - - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { - cmd_buffer->state.framebuffer = - anv_framebuffer_from_handle(pBeginInfo->framebuffer); - cmd_buffer->state.pass = - anv_render_pass_from_handle(pBeginInfo->renderPass); - - /* FIXME: We shouldn't be starting on the first subpass */ - anv_cmd_buffer_begin_subpass(cmd_buffer, - &cmd_buffer->state.pass->subpasses[0]); - } - - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - cmd_buffer->state.current_pipeline = UINT32_MAX; - - return VK_SUCCESS; -} - -VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_end_batch_buffer(cmd_buffer); - - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - } - - return VK_SUCCESS; -} - -void anv_CmdBindPipeline( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - break; - - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->state.pipeline = pipeline; - cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; - break; - - default: - assert(!"invalid bind point"); - break; - } -} - -void anv_CmdBindDynamicViewportState( - VkCmdBuffer cmdBuffer, - VkDynamicViewportState dynamicViewportState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); - - cmd_buffer->state.vp_state = vp_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; -} - -void anv_CmdBindDynamicRasterState( - VkCmdBuffer cmdBuffer, - VkDynamicRasterState dynamicRasterState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - - cmd_buffer->state.rs_state = rs_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; -} - -void anv_CmdBindDynamicColorBlendState( - VkCmdBuffer cmdBuffer, - VkDynamicColorBlendState dynamicColorBlendState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - - cmd_buffer->state.cb_state = cb_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; -} - -void anv_CmdBindDynamicDepthStencilState( - VkCmdBuffer cmdBuffer, - VkDynamicDepthStencilState dynamicDepthStencilState) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); - - cmd_buffer->state.ds_state = ds_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; -} - -void anv_CmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t setCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - struct anv_descriptor_set_layout *set_layout; - - assert(firstSet + setCount < MAX_SETS); - - uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < setCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - set_layout = layout->set[firstSet + i].layout; - - cmd_buffer->state.descriptors[firstSet + i].set = set; - - assert(set_layout->num_dynamic_buffers < - ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); - memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, - pDynamicOffsets + dynamic_slot, - set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - - dynamic_slot += set_layout->num_dynamic_buffers; - } -} - -void anv_CmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, - }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GEN8_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); -} - -void anv_CmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, - uint32_t startBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - - /* We have to defer setting up vertex buffer since we need the buffer - * stride from the pipeline. */ - - assert(startBinding + bindingCount < MAX_VBS); - for (uint32_t i = 0; i < bindingCount; i++) { - vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); - vb[startBinding + i].offset = pOffsets[i]; - cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); - } -} - -static VkResult -cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_layout *layout; - uint32_t attachments, bias, size; - - if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; - else - layout = cmd_buffer->state.pipeline->layout; - - if (stage == VK_SHADER_STAGE_FRAGMENT) { - bias = MAX_RTS; - attachments = subpass->color_count; - } else { - bias = 0; - attachments = 0; - } - - /* This is a little awkward: layout can be NULL but we still have to - * allocate and set a binding table for the PS stage for render - * targets. */ - uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - - if (attachments + surface_count == 0) - return VK_SUCCESS; - - size = (bias + surface_count) * sizeof(uint32_t); - *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); - uint32_t *bt_map = bt_state->map; - - if (bt_state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - /* This is highly annoying. The Vulkan spec puts the depth-stencil - * attachments in with the color attachments. Unfortunately, thanks to - * other aspects of the API, we cana't really saparate them before this - * point. Therefore, we have to walk all of the attachments but only - * put the color attachments into the binding table. - */ - for (uint32_t a = 0; a < attachments; a++) { - const struct anv_attachment_view *attachment = - fb->attachments[subpass->color_attachments[a]]; - - assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - const struct anv_color_attachment_view *view = - (const struct anv_color_attachment_view *)attachment; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - memcpy(state.map, view->view.surface_state.map, 64); - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, - state.offset + 8 * 4, - view->view.bo, view->view.offset); - - bt_map[a] = state.offset; - } - - if (layout == NULL) - return VK_SUCCESS; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *surface_slots = - set_layout->stage[stage].surface_start; - - uint32_t start = bias + layout->set[set].surface_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { - struct anv_surface_view *view = - d->set->descriptors[surface_slots[b].index].view; - - if (!view) - continue; - - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - uint32_t offset; - if (surface_slots[b].dynamic_slot >= 0) { - uint32_t dynamic_offset = - d->dynamic_offsets[surface_slots[b].dynamic_slot]; - - offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(state.map, view->format, offset, - view->range - dynamic_offset); - } else { - offset = view->offset; - memcpy(state.map, view->surface_state.map, 64); - } - - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, - state.offset + 8 * 4, - view->bo, offset); - - bt_map[start + b] = state.offset; - } - } - - return VK_SUCCESS; -} - -static VkResult -cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state) -{ - struct anv_pipeline_layout *layout; - uint32_t sampler_count; - - if (stage == VK_SHADER_STAGE_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; - else - layout = cmd_buffer->state.pipeline->layout; - - sampler_count = layout ? layout->stage[stage].sampler_count : 0; - if (sampler_count == 0) - return VK_SUCCESS; - - uint32_t size = sampler_count * 16; - *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); - - if (state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *sampler_slots = - set_layout->stage[stage].sampler_start; - - uint32_t start = layout->set[set].sampler_start[stage]; - - for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { - struct anv_sampler *sampler = - d->set->descriptors[sampler_slots[b].index].sampler; - - if (!sampler) - continue; - - memcpy(state->map + (start + b) * 16, - sampler->state, sizeof(sampler->state)); - } - } - - return VK_SUCCESS; -} - -static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) -{ - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); - if (result != VK_SUCCESS) - return result; - - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - if (samplers.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = samplers.offset); - } - - if (surfaces.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = surfaces.offset); - } - - return VK_SUCCESS; -} - -static void -flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - for_each_bit(s, dirty) { - result = flush_descriptor_set(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { - result = flush_descriptor_set(cmd_buffer, s); - - /* It had better succeed this time */ - assert(result == VK_SUCCESS); - } - } - - cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; -} - -static struct anv_state -anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - memcpy(state.map, a, dwords * 4); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); - - return state; -} - -static struct anv_state -anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - uint32_t *p; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - p = state.map; - for (uint32_t i = 0; i < dwords; i++) - p[i] = a[i] | b[i]; - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - - return state; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ - }; - - uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - struct anv_state state = - anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - - GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; - } - - cmd_buffer->state.compute_dirty = 0; -} - -static void -anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN8_3DSTATE_VERTEX_BUFFERS); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GEN8_VERTEX_BUFFER_STATE state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GEN8_MOCS, - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->state.descriptors_dirty) - flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { - struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = vp_state->sf_clip_vp.offset); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_RS_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_sf, - pipeline->state_sf); - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_raster, - pipeline->state_raster); - } - - if (cmd_buffer->state.ds_state && - (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY))) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.ds_state->state_wm_depth_stencil, - pipeline->state_wm_depth_stencil); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - if (cmd_buffer->state.ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->state.cb_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - cmd_buffer->state.cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.state_vf, pipeline->state_vf); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void anv_CmdDraw( - VkCmdBuffer cmdBuffer, - uint32_t firstVertex, - uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = SEQUENTIAL, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void anv_CmdDrawIndexed( - VkCmdBuffer cmdBuffer, - uint32_t firstIndex, - uint32_t indexCount, - int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = RANDOM, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -static void -anv_batch_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -void anv_CmdDrawIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL); -} - -void anv_CmdDrawIndexedIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM); -} - -void anv_CmdDispatch( - VkCmdBuffer cmdBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void anv_CmdDispatchIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -void anv_CmdSetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdResetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdWaitEvents( - VkCmdBuffer cmdBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - stub(); -} - -void anv_CmdPipelineBarrier( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t b, *dw; - - struct GEN8_PIPE_CONTROL cmd = { - GEN8_PIPE_CONTROL_header, - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_TRANSITION_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; - - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: - cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - cmd.DepthCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("Invalid memory output flag"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: - cmd.VFCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: - cmd.DCFlushEnable = true; - cmd.TextureCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: - cmd.TextureCacheInvalidationEnable = true; - break; - } - } - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); - GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); -} - -void anv_CmdPushConstants( - VkCmdBuffer cmdBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values) -{ - stub(); -} - -static void -anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view; - - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; - - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - const struct anv_attachment_view *aview = - fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - view = (const struct anv_depth_stencil_view *)aview; - } else { - view = &null_view; - } - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->state.framebuffer->height - 1, - .Width = cmd_buffer->state.framebuffer->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN8_MOCS, - .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = view->depth_qpitch >> 2); - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, - .StencilBufferEnable = view->stencil_stride > 0, - .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); -} - -void -anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - anv_cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void anv_CmdBeginRenderPass( - VkCmdBuffer cmdBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pAttachmentClearValues); - - anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); -} - -void anv_CmdNextSubpass( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); -} - -void anv_CmdEndRenderPass( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); -} - -void anv_CmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); - - assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - - for (uint32_t i = 0; i < cmdBuffersCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - - assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); - - anv_cmd_buffer_add_secondary(primary, secondary); - } -} - -VkResult anv_CreateCommandPool( - VkDevice _device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_pool *pool; - - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - list_inithead(&pool->cmd_buffers); - - *pCmdPool = anv_cmd_pool_to_handle(pool); - - return VK_SUCCESS; -} - -VkResult anv_DestroyCommandPool( - VkDevice _device, - VkCmdPool cmdPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - - anv_ResetCommandPool(_device, cmdPool, 0); - - anv_device_free(device, pool); - - return VK_SUCCESS; -} - -VkResult anv_ResetCommandPool( - VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - - list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); - } - - return VK_SUCCESS; -} -- cgit v1.2.3 From 220a01d525cc49380e97b87695f454f5e76ce69a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 08:52:28 -0700 Subject: vk/batch_chain: Compute secondary exec mode after finishing the bo Figuring out whether or not to do a copy requires knowing the length of the final batch_bo. This gets set by anv_batch_bo_finish so we have to do it afterwards. Not sure how this was even working before. --- src/vulkan/anv_batch_chain.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 2f09248acee..04528bb06b6 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -636,7 +636,13 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; - } else { + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + surface_bbo->length = cmd_buffer->surface_next; + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { /* If this is a secondary command buffer, we need to determine the * mode in which it will be executed with vkExecuteCommands. We * determine this statically here so that this stays in sync with the @@ -666,10 +672,6 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; } } - - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - - surface_bbo->length = cmd_buffer->surface_next; } static inline VkResult -- cgit v1.2.3 From 1f49a7d9fc0eda5dfba03b0e3faca93e57b39132 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 09:11:47 -0700 Subject: vk/batch_chain: Decrement num_relocs instead of incrementing it --- src/vulkan/anv_batch_chain.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 04528bb06b6..c34f58b2534 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -720,7 +720,15 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, .AddressSpaceIndicator = ASI_PPGTT, .BatchBufferStartAddress = { &this_bbo->bo, offset }, }; - last_bbo->relocs.num_relocs++; + /* The pack function below is going to insert a relocation. In order + * to allow us to splice this secondary into a primary multiple + * times, we can't have relocations from previous splices in this + * splice. In order to deal with this, we simply decrement the + * relocation count prior to inserting the next one. In order to + * handle the base case, num_relocs was artificially incremented in + * end_batch_buffer(). + */ + last_bbo->relocs.num_relocs--; GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, last_bbo->bo.map + last_bbo->length, &ret); -- cgit v1.2.3 From 0f050aaa15ec6bd521eff013808ac151cec9abda Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 09:44:40 -0700 Subject: vk/device: Mark newly allocated memory as undefined for valgrind This way valgrind still works even if the client gives us memory that has been initialized or re-uses memory for some reason. --- src/vulkan/anv_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 01eff02d25d..9217be79306 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -844,10 +844,10 @@ anv_device_alloc(struct anv_device * device, size_t alignment, VkSystemAllocType allocType) { - return device->instance->pfnAlloc(device->instance->pAllocUserData, - size, - alignment, - allocType); + void *mem = device->instance->pfnAlloc(device->instance->pAllocUserData, + size, alignment, allocType); + VG(VALGRIND_MAKE_MEM_UNDEFINED(mem, size)); + return mem; } void -- cgit v1.2.3 From e40bdcef1fb6127545999a0b671b49fa393652b4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 10:13:24 -0700 Subject: vk/device: Add anv_instance_alloc/free helpers This way we can more consistently alloc/free the device and it will provide us a better place to put valgrind hooks in the next patch --- src/vulkan/anv_device.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9217be79306..9f2fcf37344 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -161,6 +161,25 @@ VkResult anv_DestroyInstance( return VK_SUCCESS; } +static void * +anv_instance_alloc(struct anv_instance *instance, size_t size, + size_t alignment, VkSystemAllocType allocType) +{ + void *mem = instance->pfnAlloc(instance->pAllocUserData, + size, alignment, allocType); + VG(VALGRIND_MAKE_MEM_UNDEFINED(mem, size)); + return mem; +} + +static void +anv_instance_free(struct anv_instance *instance, void *mem) +{ + if (mem == NULL) + return; + + instance->pfnFree(instance->pAllocUserData, mem); +} + VkResult anv_EnumeratePhysicalDevices( VkInstance _instance, uint32_t* pPhysicalDeviceCount, @@ -546,8 +565,7 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - device = instance->pfnAlloc(instance->pAllocUserData, - sizeof(*device), 8, + device = anv_instance_alloc(instance, sizeof(*device), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -635,7 +653,7 @@ VkResult anv_DestroyDevice( if (device->aub_writer) anv_aub_writer_destroy(device->aub_writer); - anv_device_free(device, device); + anv_instance_free(device->instance, device); return VK_SUCCESS; } @@ -844,21 +862,14 @@ anv_device_alloc(struct anv_device * device, size_t alignment, VkSystemAllocType allocType) { - void *mem = device->instance->pfnAlloc(device->instance->pAllocUserData, - size, alignment, allocType); - VG(VALGRIND_MAKE_MEM_UNDEFINED(mem, size)); - return mem; + return anv_instance_alloc(device->instance, size, alignment, allocType); } void anv_device_free(struct anv_device * device, void * mem) { - if (mem == NULL) - return; - - return device->instance->pfnFree(device->instance->pAllocUserData, - mem); + anv_instance_free(device->instance, mem); } VkResult -- cgit v1.2.3 From 930598ad567155a7c35e7c5758844253232015a1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 10:18:00 -0700 Subject: vk/instance: valgrind-guard client-provided allocations --- src/vulkan/anv_device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9f2fcf37344..f633108895e 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -142,6 +142,8 @@ VkResult anv_CreateInstance( instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; instance->physicalDeviceCount = 0; + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + *pInstance = anv_instance_to_handle(instance); return VK_SUCCESS; @@ -156,6 +158,8 @@ VkResult anv_DestroyInstance( anv_physical_device_finish(&instance->physicalDevice); } + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + instance->pfnFree(instance->pAllocUserData, instance); return VK_SUCCESS; @@ -167,7 +171,10 @@ anv_instance_alloc(struct anv_instance *instance, size_t size, { void *mem = instance->pfnAlloc(instance->pAllocUserData, size, alignment, allocType); - VG(VALGRIND_MAKE_MEM_UNDEFINED(mem, size)); + if (mem) { + VALGRIND_MEMPOOL_ALLOC(instance, mem, size); + VALGRIND_MAKE_MEM_UNDEFINED(mem, size); + } return mem; } @@ -177,6 +184,8 @@ anv_instance_free(struct anv_instance *instance, void *mem) if (mem == NULL) return; + VALGRIND_MEMPOOL_FREE(instance, mem); + instance->pfnFree(instance->pAllocUserData, mem); } -- cgit v1.2.3 From 1920ef9675b5ebdab2050b1f37c6cfdd6d9c6f69 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 10:30:57 -0700 Subject: vk/allocator: Add an anv_state_pool_finish function Currently this is a no-op but it gives us a place to put finalization things in the future. --- src/vulkan/anv_allocator.c | 5 +++++ src/vulkan/anv_device.c | 2 ++ src/vulkan/anv_private.h | 1 + 3 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index d85b919154d..463660f12c0 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -456,6 +456,11 @@ anv_state_pool_init(struct anv_state_pool *pool, } } +void +anv_state_pool_finish(struct anv_state_pool *pool) +{ +} + struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index f633108895e..0c05c33f626 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -652,8 +652,10 @@ VkResult anv_DestroyDevice( #endif anv_bo_pool_finish(&device->batch_bo_pool); + anv_state_pool_finish(&device->dynamic_state_pool); anv_block_pool_finish(&device->dynamic_state_block_pool); anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->surface_state_pool); anv_block_pool_finish(&device->surface_state_block_pool); anv_block_pool_finish(&device->scratch_block_pool); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 12f826c1ab1..563d7a573bd 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -299,6 +299,7 @@ uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); void anv_state_pool_init(struct anv_state_pool *pool, struct anv_block_pool *block_pool); +void anv_state_pool_finish(struct anv_state_pool *pool); struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, size_t state_size, size_t alignment); void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); -- cgit v1.2.3 From e65953146c817e8bb98dd91788ad4d46775d40b8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 31 Jul 2015 10:36:51 -0700 Subject: vk/allocator: Use memory pools rather than (MALLOC|FREE)LIKE We have pools, so we should be using them. Also, I think this will help keep valgrind from getting confused when we have to end up fighting with system allocations such as those from malloc/free and mmap/munmap. --- src/vulkan/anv_allocator.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 463660f12c0..96c06fbcc18 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -454,11 +454,13 @@ anv_state_pool_init(struct anv_state_pool *pool, size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); anv_fixed_size_state_pool_init(&pool->buckets[i], size); } + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); } void anv_state_pool_finish(struct anv_state_pool *pool) { + VG(VALGRIND_DESTROY_MEMPOOL(pool)); } struct anv_state @@ -475,7 +477,7 @@ anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], pool->block_pool); state.map = pool->block_pool->map + state.offset; - VG(VALGRIND_MALLOCLIKE_BLOCK(state.map, size, 0, false)); + VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); return state; } @@ -488,7 +490,7 @@ anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) size_log2 <= ANV_MAX_STATE_SIZE_LOG2); unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - VG(VALGRIND_FREELIKE_BLOCK(state.map, 0)); + VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); anv_fixed_size_state_pool_free(&pool->buckets[bucket], pool->block_pool, state.offset); } @@ -516,6 +518,8 @@ anv_state_stream_init(struct anv_state_stream *stream, stream->next = 0; stream->end = 0; stream->current_block = NULL_BLOCK; + + VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); } void @@ -528,10 +532,12 @@ anv_state_stream_finish(struct anv_state_stream *stream) while (block != NULL_BLOCK) { sb = stream->block_pool->map + block; next_block = VG_NOACCESS_READ(&sb->next); - VG(VALGRIND_FREELIKE_BLOCK(VG_NOACCESS_READ(&sb->_vg_ptr), 0)); + VG(VALGRIND_MEMPOOL_FREE(stream, VG_NOACCESS_READ(&sb->_vg_ptr))); anv_block_pool_free(stream->block_pool, block); block = next_block; } + + VG(VALGRIND_DESTROY_MEMPOOL(stream)); } struct anv_state @@ -568,15 +574,13 @@ anv_state_stream_alloc(struct anv_state_stream *stream, if (vg_ptr == NULL) { vg_ptr = state.map; VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); - VALGRIND_MALLOCLIKE_BLOCK(vg_ptr, size, 0, false); + VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); } else { ptrdiff_t vg_offset = vg_ptr - current_map; assert(vg_offset >= stream->current_block && vg_offset < stream->end); - VALGRIND_RESIZEINPLACE_BLOCK(vg_ptr, - stream->next - vg_offset, - (state.offset + size) - vg_offset, - 0); + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, + (state.offset + size) - vg_offset); } #endif @@ -597,6 +601,8 @@ anv_bo_pool_init(struct anv_bo_pool *pool, pool->device = device; pool->bo_size = bo_size; pool->free_list = NULL; + + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); } void @@ -606,16 +612,12 @@ anv_bo_pool_finish(struct anv_bo_pool *pool) while (link != NULL) { struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - /* The anv_gem_m[un]map() functions are also valgrind-safe so they - * act as an alloc/free. In order to avoid a double-free warning, we - * need to mark thiss as malloc'd before we unmap it. - */ - VG(VALGRIND_MALLOCLIKE_BLOCK(link_copy.bo.map, pool->bo_size, 0, false)); - anv_gem_munmap(link_copy.bo.map, pool->bo_size); anv_gem_close(pool->device, link_copy.bo.gem_handle); link = link_copy.next; } + + VG(VALGRIND_DESTROY_MEMPOOL(pool)); } VkResult @@ -630,7 +632,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) assert(bo->map == next_free); assert(bo->size == pool->bo_size); - VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, pool->bo_size, 0, false)); + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); return VK_SUCCESS; } @@ -649,13 +651,10 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) return vk_error(VK_ERROR_MEMORY_MAP_FAILED); } - /* We don't need to call VALGRIND_MALLOCLIKE_BLOCK here because gem_mmap - * calls it for us. If we really want to be pedantic we could do a - * VALGRIND_FREELIKE_BLOCK right after the mmap, but there's no good - * reason. - */ - *bo = new_bo; + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + return VK_SUCCESS; } @@ -665,6 +664,6 @@ anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) struct bo_pool_bo_link *link = bo->map; link->bo = *bo; - VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); anv_ptr_free_list_push(&pool->free_list, link); } -- cgit v1.2.3 From 481122f4ac11fff402fa5b0884757462bcb1e933 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 3 Aug 2015 00:35:19 -0700 Subject: vk/allocator: Make a few things more consistant --- src/vulkan/anv_allocator.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 96c06fbcc18..04293f0d9be 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -153,9 +153,9 @@ round_to_power_of_two(uint32_t value) static bool anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) { - union anv_free_list current, next, old; + union anv_free_list current, new, old; - current = *list; + current.u64 = list->u64; while (current.offset != EMPTY) { /* We have to add a memory barrier here so that the list head (and * offset) gets read before we read the map pointer. This way we @@ -165,9 +165,9 @@ anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) __sync_synchronize(); uint32_t *next_ptr = *map + current.offset; - next.offset = VG_NOACCESS_READ(next_ptr); - next.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, next.u64); + new.offset = VG_NOACCESS_READ(next_ptr); + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); if (old.u64 == current.u64) { *offset = current.offset; return true; -- cgit v1.2.3 From fd64598462689ccc9ac14dccdddb96c8a6ff8364 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 3 Aug 2015 00:38:48 -0700 Subject: vk/allocator: Fix a data race in the state pool The previous algorithm had a race because of the way we were using __sync_fetch_and_add for everything. In particular, the concept of "returning" over-allocated states in the "next > end" case was completely bogus. If too many threads were hitting the state pool at the same time, it was possible to have the following sequence: A: Get an offset (next == end) B: Get an offset (next > end) A: Resize the pool (now next < end by a lot) C: Get an offset (next < end) B: Return the over-allocated offset D: Get an offset in which case D will get the same offset as C. The solution to this race is to get rid of the concept of "returning" over-allocated states. Instead, the thread that gets a new block simply sets the next and end offsets directly and threads that over-allocate don't return anything and just futex-wait. Since you can only ever hit the over-allocate case if someone else hit the "next == end" case and hasn't resized yet, you're guaranteed that the end value will get updated and the futex won't block forever. --- src/vulkan/anv_allocator.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 04293f0d9be..601539bcf25 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -424,15 +424,15 @@ anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, if (block.next < block.end) { return block.next; } else if (block.next == block.end) { - new.next = anv_block_pool_alloc(block_pool); - new.end = new.next + block_pool->block_size; - old.u64 = __sync_fetch_and_add(&pool->block.u64, new.u64 - block.u64); + offset = anv_block_pool_alloc(block_pool); + new.next = offset + pool->state_size; + new.end = offset + block_pool->block_size; + old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); if (old.next != block.next) futex_wake(&pool->block.end, INT_MAX); - return new.next; + return offset; } else { futex_wait(&pool->block.end, block.end); - __sync_fetch_and_add(&pool->block.u64, -pool->state_size); goto restart; } } -- cgit v1.2.3 From 56ce2194934868b08e13fb9c483cf90bbe6d562d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 3 Aug 2015 01:03:53 -0700 Subject: vk/allocator: Make block_pool_grow take and return a size It takes the old size as an argument and returns the new size as the return value. On error, it returns a size of 0. --- src/vulkan/anv_allocator.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 601539bcf25..c8507e0f30c 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -240,8 +240,8 @@ anv_ptr_free_list_push(void **list, void *elem) } while (old != current); } -static int -anv_block_pool_grow(struct anv_block_pool *pool); +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size); void anv_block_pool_init(struct anv_block_pool *pool, @@ -260,7 +260,7 @@ anv_block_pool_init(struct anv_block_pool *pool, round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); /* Immediately grow the pool so we'll have a backing bo. */ - anv_block_pool_grow(pool); + anv_block_pool_grow(pool, 0); } void @@ -280,38 +280,38 @@ anv_block_pool_finish(struct anv_block_pool *pool) close(pool->fd); } -static int -anv_block_pool_grow(struct anv_block_pool *pool) +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) { size_t size; void *map; int gem_handle; struct anv_mmap_cleanup *cleanup; - if (pool->size == 0) { + if (old_size == 0) { size = 32 * pool->block_size; } else { - size = pool->size * 2; + size = old_size * 2; } cleanup = anv_vector_add(&pool->mmap_cleanups); if (!cleanup) - return -1; + return 0; *cleanup = ANV_MMAP_CLEANUP_INIT; - if (pool->size == 0) + if (old_size == 0) pool->fd = memfd_create("block pool", MFD_CLOEXEC); if (pool->fd == -1) - return -1; + return 0; if (ftruncate(pool->fd, size) == -1) - return -1; + return 0; /* First try to see if mremap can grow the map in place. */ map = MAP_FAILED; - if (pool->size > 0) - map = mremap(pool->map, pool->size, size, 0); + if (old_size > 0) + map = mremap(pool->map, old_size, size, 0); if (map == MAP_FAILED) { /* Just leak the old map until we destroy the pool. We can't munmap it * without races or imposing locking on the block allocate fast path. On @@ -325,11 +325,11 @@ anv_block_pool_grow(struct anv_block_pool *pool) cleanup->size = size; } if (map == MAP_FAILED) - return -1; + return 0; gem_handle = anv_gem_userptr(pool->device, map, size); if (gem_handle == 0) - return -1; + return 0; cleanup->gem_handle = gem_handle; /* Now that we successfull allocated everything, we can write the new @@ -348,7 +348,7 @@ anv_block_pool_grow(struct anv_block_pool *pool) __sync_synchronize(); pool->size = size; - return 0; + return size; } uint32_t @@ -373,9 +373,9 @@ anv_block_pool_alloc(struct anv_block_pool *pool) * pool->next_block acts a mutex: threads who try to allocate now will * get block indexes above the current limit and hit futex_wait * below. */ - int err = anv_block_pool_grow(pool); - assert(err == 0); - (void) err; + uint32_t new_size = anv_block_pool_grow(pool, size); + assert(new_size > 0); + (void) new_size; futex_wake(&pool->size, INT_MAX); } else { futex_wait(&pool->size, size); -- cgit v1.2.3 From 5e5a783530aba5c65e9d08683c905fb4cfd329c5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 3 Aug 2015 01:18:09 -0700 Subject: vk: Add and use an anv_block_pool_size() helper --- src/vulkan/anv_cmd_buffer.c | 3 ++- src/vulkan/anv_private.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 3b9e67fdd0f..9ee5cf8b247 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -136,7 +136,8 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) struct anv_device *device = cmd_buffer->device; struct anv_bo *scratch_bo = NULL; - cmd_buffer->state.scratch_size = device->scratch_block_pool.size; + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); if (cmd_buffer->state.scratch_size > 0) scratch_bo = &device->scratch_block_pool.bo; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 563d7a573bd..b30dd7d51ad 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -253,6 +253,12 @@ struct anv_block_pool { union anv_free_list free_list; }; +static inline uint32_t +anv_block_pool_size(struct anv_block_pool *pool) +{ + return pool->size; +} + struct anv_block_state { union { struct { -- cgit v1.2.3 From facf587deac3375e42338aa304d77b34a527b26e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 3 Aug 2015 01:19:34 -0700 Subject: vk/allocator: Solve a data race in anv_block_pool The anv_block_pool data structure suffered from the exact same race as the state pool. Namely, that the uniqueness of the blocks handed out depends on the next_block value increasing monotonically. However, this invariant did not hold thanks to our block "return" concept. --- src/vulkan/anv_allocator.c | 41 ++++++++++++++++------------------------- src/vulkan/anv_private.h | 25 ++++++++++++------------- 2 files changed, 28 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index c8507e0f30c..0003b3737fc 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -252,15 +252,14 @@ anv_block_pool_init(struct anv_block_pool *pool, pool->device = device; pool->bo.gem_handle = 0; pool->bo.offset = 0; - pool->size = 0; pool->block_size = block_size; - pool->next_block = 0; pool->free_list = ANV_FREE_LIST_EMPTY; anv_vector_init(&pool->mmap_cleanups, round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); /* Immediately grow the pool so we'll have a backing bo. */ - anv_block_pool_grow(pool, 0); + pool->state.next = 0; + pool->state.end = anv_block_pool_grow(pool, 0); } void @@ -340,21 +339,14 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) pool->bo.map = map; pool->bo.index = 0; - /* Write size last and after the memory barrier here. We need the memory - * barrier to make sure map and gem_handle are written before other threads - * see the new size. A thread could allocate a block and then go try using - * the old pool->map and access out of bounds. */ - - __sync_synchronize(); - pool->size = size; - return size; } uint32_t anv_block_pool_alloc(struct anv_block_pool *pool) { - uint32_t offset, block, size; + uint32_t offset; + struct anv_block_state state, old, new; /* Try free list first. */ if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { @@ -363,27 +355,26 @@ anv_block_pool_alloc(struct anv_block_pool *pool) } restart: - size = pool->size; - block = __sync_fetch_and_add(&pool->next_block, pool->block_size); - if (block < size) { + state.u64 = __sync_fetch_and_add(&pool->state.u64, pool->block_size); + if (state.next < state.end) { assert(pool->map); - return block; - } else if (block == size) { + return state.next; + } else if (state.next == state.end) { /* We allocated the first block outside the pool, we have to grow it. * pool->next_block acts a mutex: threads who try to allocate now will * get block indexes above the current limit and hit futex_wait * below. */ - uint32_t new_size = anv_block_pool_grow(pool, size); - assert(new_size > 0); - (void) new_size; - futex_wake(&pool->size, INT_MAX); + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, state.end); + assert(new.end > 0); + old.u64 = __sync_lock_test_and_set(&pool->state.u64, new.u64); + if (old.next != state.next) + futex_wake(&pool->state.end, INT_MAX); + return state.next; } else { - futex_wait(&pool->size, size); - __sync_fetch_and_add(&pool->next_block, -pool->block_size); + futex_wait(&pool->state.end, state.end); goto restart; } - - return block; } void diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b30dd7d51ad..f14a6ca858f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -233,13 +233,22 @@ union anv_free_list { #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + struct anv_block_pool { struct anv_device *device; struct anv_bo bo; void *map; int fd; - uint32_t size; /** * Array of mmaps and gem handles owned by the block pool, reclaimed when @@ -249,26 +258,16 @@ struct anv_block_pool { uint32_t block_size; - uint32_t next_block; union anv_free_list free_list; + struct anv_block_state state; }; static inline uint32_t anv_block_pool_size(struct anv_block_pool *pool) { - return pool->size; + return pool->state.end; } -struct anv_block_state { - union { - struct { - uint32_t next; - uint32_t end; - }; - uint64_t u64; - }; -}; - struct anv_state { uint32_t offset; uint32_t alloc_size; -- cgit v1.2.3 From fbb119061e7be6ecdaefd8630e3fae274886d297 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 3 Aug 2015 15:15:17 -0700 Subject: vk: Update generated headers This adds zeroing of reserved blocks of dwords and removes an instruction definition. --- src/vulkan/gen75_pack.h | 40 ++++++++++------------------------------ src/vulkan/gen7_pack.h | 5 +++++ src/vulkan/gen8_pack.h | 41 +++++------------------------------------ 3 files changed, 20 insertions(+), 66 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 583c5f25003..82b4065b760 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -7060,6 +7060,11 @@ GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, __gen_float(values->YMaxClipGuardband) | 0; + for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { + dw[j] = + 0; + } + } #define GEN75_BLEND_STATE_length 0x00000002 @@ -7365,36 +7370,6 @@ GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 -#define GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_length 0x00000001 - -struct GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { -#define Highestpriority 0 -#define Secondhighestpriority 1 -#define Thirdhighestpriority 2 -#define Lowestpriority 3 - uint32_t ArbitrationPriorityControl; -#define PTE 0 -#define UC 1 -#define LLCeLLCWBcacheable 2 -#define eLLCWBcacheable 3 - uint32_t LLCeLLCCacheabilityControlLLCCC; - uint32_t L3CacheabilityControlL3CC; -}; - -static inline void -GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ArbitrationPriorityControl, 4, 5) | - __gen_field(values->LLCeLLCCacheabilityControlLLCCC, 1, 2) | - __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | - 0; - -} - #define GEN75_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 struct GEN75_INTERFACE_DESCRIPTOR_DATA { @@ -7701,6 +7676,11 @@ GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst __gen_field(values->BorderColorAlpha, 0, 31) | 0; + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + 0; + } + dw[16] = __gen_field(values->BorderColor, 0, 127) | __gen_field(values->BorderColor, 0, 127) | diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 05b800034e0..886a26c00a2 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -5816,6 +5816,11 @@ GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, __gen_float(values->YMaxClipGuardband) | 0; + for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { + dw[j] = + 0; + } + } #define GEN7_BLEND_STATE_length 0x00000002 diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 620b5a799c4..cafccc94741 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -4187,6 +4187,11 @@ GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->DwordLength, 0, 7) | 0; + for (uint32_t i = 0, j = 1; i < 4; i += 1, j++) { + dw[j] = + 0; + } + dw[5] = __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | @@ -8032,42 +8037,6 @@ GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 -#define GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_length 0x00000001 - -struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { -#define UseCacheabilityControlsfrompagetableUCwithFenceifcoherentcycle 0 -#define UncacheableUCnoncacheable 1 -#define WritethroughWT 2 -#define WritebackWB 3 - uint32_t MemoryTypeLLCeLLCCacheabilityControlLeLLCCC; -#define eLLCOnly 0 -#define LLCOnly 1 -#define LLCeLLCAllowed 2 -#define L3LLCeLLCAllowed 3 - uint32_t TargetCacheTC; - bool EncryptedData; -#define PoorChance 3 -#define NormalChance 2 -#define BetterChance 1 -#define BestChance 0 - bool AgeforQUADLRUAGE; -}; - -static inline void -GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->MemoryTypeLLCeLLCCacheabilityControlLeLLCCC, 5, 6) | - __gen_field(values->TargetCacheTC, 3, 4) | - __gen_field(values->EncryptedData, 2, 2) | - __gen_field(values->AgeforQUADLRUAGE, 0, 1) | - 0; - -} - #define GEN8_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 struct GEN8_INTERFACE_DESCRIPTOR_DATA { -- cgit v1.2.3 From 4b097d73e6bc360235915d34b798284d09174929 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 31 Jul 2015 13:45:54 -0700 Subject: vk: Call anv_batch_emit_dwords() up front in anv_batch_emit() This avoids putting a memory barrier between the template struct and the pack function, which generates much better code. --- src/vulkan/anv_private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f14a6ca858f..48edc6c95ed 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -528,22 +528,22 @@ __gen_combine_address(struct anv_batch *batch, void *location, #include "gen8_pack.h" #define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ struct cmd __template = { \ cmd ## _header, \ __VA_ARGS__ \ }; \ - void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ cmd ## _pack(batch, __dst, &__template); \ VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, cmd ## _length * 4)); \ } while (0) #define anv_batch_emitn(batch, n, cmd, ...) ({ \ + void *__dst = anv_batch_emit_dwords(batch, n); \ struct cmd __template = { \ cmd ## _header, \ .DwordLength = n - cmd ## _length_bias, \ __VA_ARGS__ \ }; \ - void *__dst = anv_batch_emit_dwords(batch, n); \ cmd ## _pack(batch, __dst, &__template); \ __dst; \ }) -- cgit v1.2.3 From 6757e2f75c304afc958309d3084aa66ceb92bb10 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 4 Aug 2015 14:00:09 -0700 Subject: vk/cmd_buffer: Allow for null VkCmdPool's --- src/vulkan/anv_cmd_buffer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 9ee5cf8b247..5178f6529ab 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -88,7 +88,14 @@ VkResult anv_CreateCommandBuffer( anv_cmd_state_init(&cmd_buffer->state); - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); -- cgit v1.2.3 From 8605ee60e086ceb936000d3942a902239105637d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 10 Aug 2015 17:14:35 -0700 Subject: vk: Share upload logic and add size assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lets us hit an assert if we exceed the block pool size instead of GPU hanging. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_compiler.cpp | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index a50ecfde517..ff32b071af2 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -102,6 +102,19 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, return VK_SUCCESS; } +static uint32_t +upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} + static void brw_vs_populate_key(struct brw_context *brw, struct brw_vertex_program *vp, @@ -249,11 +262,7 @@ really_do_vs_prog(struct brw_context *brw, return false; } - struct anv_state vs_state = anv_state_stream_alloc(&pipeline->program_stream, - program_size, 64); - memcpy(vs_state.map, program, program_size); - - pipeline->vs_simd8 = vs_state.offset; + pipeline->vs_simd8 = upload_kernel(pipeline, program, program_size); ralloc_free(mem_ctx); @@ -520,17 +529,15 @@ really_do_wm_prog(struct brw_context *brw, return false; } - struct anv_state ps_state = anv_state_stream_alloc(&pipeline->program_stream, - program_size, 64); - memcpy(ps_state.map, program, program_size); + uint32_t offset = upload_kernel(pipeline, program, program_size); if (prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else - pipeline->ps_simd8 = ps_state.offset; + pipeline->ps_simd8 = offset; if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = ps_state.offset + prog_data->prog_offset_16; + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; } else { pipeline->ps_simd16 = NO_KERNEL; } @@ -581,11 +588,7 @@ really_do_gs_prog(struct brw_context *brw, brw_compile_gs_prog(brw, prog, gp, key, &output); - struct anv_state gs_state = anv_state_stream_alloc(&pipeline->program_stream, - output.program_size, 64); - memcpy(gs_state.map, output.program, output.program_size); - - pipeline->gs_vec4 = gs_state.offset; + pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size); pipeline->gs_vertex_count = gp->program.VerticesIn; ralloc_free(output.mem_ctx); @@ -636,11 +639,7 @@ brw_codegen_cs_prog(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_CS)) fprintf(stderr, "\n"); - struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream, - program_size, 64); - memcpy(cs_state.map, program, program_size); - - pipeline->cs_simd = cs_state.offset; + pipeline->cs_simd = upload_kernel(pipeline, program, program_size); ralloc_free(mem_ctx); -- cgit v1.2.3 From 9564dd37a00f642d2637ffa15c2f27fb8992aa50 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 21 Jul 2015 13:09:25 -0700 Subject: vk: Query aperture size up front in anv_physical_device_init() We already query the device in various ways here and we can just also get the aperture size. This avoids keeping an extra drm fd open during the life time of the driver. Also, we need to use explicit 64 bit types for the aperture size, not size_t. --- src/vulkan/anv_device.c | 43 +++++++++++++++++-------------------------- src/vulkan/anv_gem.c | 4 ++-- src/vulkan/anv_private.h | 4 ++-- 3 files changed, 21 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0c05c33f626..7eed78c660e 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -41,20 +41,15 @@ anv_env_get_int(const char *name) return strtol(val, NULL, 0); } -static void -anv_physical_device_finish(struct anv_physical_device *device) -{ - if (device->fd >= 0) - close(device->fd); -} - static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, const char *path) { - device->fd = open(path, O_RDWR | O_CLOEXEC); - if (device->fd < 0) + int fd; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) return vk_error(VK_ERROR_UNAVAILABLE); device->instance = instance; @@ -66,7 +61,7 @@ anv_physical_device_init(struct anv_physical_device *device, /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ device->no_hw = true; } else { - device->chipset_id = anv_gem_get_param(device->fd, I915_PARAM_CHIPSET_ID); + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); } if (!device->chipset_id) goto fail; @@ -76,22 +71,27 @@ anv_physical_device_init(struct anv_physical_device *device, if (!device->info) goto fail; - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) goto fail; - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXECBUF2)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) goto fail; - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_LLC)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) goto fail; - if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) goto fail; + close(fd); + return VK_SUCCESS; fail: - anv_physical_device_finish(device); + close(fd); return vk_error(VK_ERROR_UNAVAILABLE); } @@ -154,10 +154,6 @@ VkResult anv_DestroyInstance( { ANV_FROM_HANDLE(anv_instance, instance, _instance); - if (instance->physicalDeviceCount > 0) { - anv_physical_device_finish(&instance->physicalDevice); - } - VG(VALGRIND_DESTROY_MEMPOOL(instance)); instance->pfnFree(instance->pAllocUserData, instance); @@ -456,17 +452,12 @@ VkResult anv_GetPhysicalDeviceMemoryProperties( VkPhysicalDeviceMemoryProperties* pMemoryProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - size_t aperture_size; - size_t heap_size; - - if (anv_gem_get_aperture(physical_device, &aperture_size) == -1) - return vk_error(VK_ERROR_UNAVAILABLE); + VkDeviceSize heap_size; /* Reserve some wiggle room for the driver by exposing only 75% of the * aperture to the heap. */ - heap_size = 3 * aperture_size / 4; + heap_size = 3 * physical_device->aperture_size / 4; /* The property flags below are valid only for llc platforms. */ pMemoryProperties->memoryTypeCount = 1; diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c index 4ce857e2a5f..01671d2ea50 100644 --- a/src/vulkan/anv_gem.c +++ b/src/vulkan/anv_gem.c @@ -230,13 +230,13 @@ anv_gem_destroy_context(struct anv_device *device, int context) } int -anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size) +anv_gem_get_aperture(int fd, uint64_t *size) { struct drm_i915_gem_get_aperture aperture; int ret; VG_CLEAR(aperture); - ret = anv_ioctl(physical_dev->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); if (ret == -1) return -1; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 48edc6c95ed..d53f63d5d27 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -339,7 +339,7 @@ struct anv_physical_device { const char * path; const char * name; const struct brw_device_info * info; - int fd; + uint64_t aperture_size; }; struct anv_instance { @@ -444,7 +444,7 @@ int anv_gem_set_tiling(struct anv_device *device, int gem_handle, int anv_gem_create_context(struct anv_device *device); int anv_gem_destroy_context(struct anv_device *device, int context); int anv_gem_get_param(int fd, uint32_t param); -int anv_gem_get_aperture(struct anv_physical_device *physical_dev, uint64_t *size); +int anv_gem_get_aperture(int fd, uint64_t *size); int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); int anv_gem_fd_to_handle(struct anv_device *device, int fd); int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -- cgit v1.2.3 From 30d82136bb360e339260cdb0d930dea5fdaf4b84 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 12 Aug 2015 21:05:18 -0700 Subject: vk: Update generated headers This update brings usable IVB/HSW RENDER_SURFACE_STATE structs and adds more float fields that we previously failed to recognize. --- src/vulkan/gen75_pack.h | 859 +++++++++++++++++++++++++++++++---------------- src/vulkan/gen7_pack.h | 627 ++++++++++++++++++++-------------- src/vulkan/gen8_pack.h | 874 ++++++++++++++++++++++++++++++++---------------- 3 files changed, 1515 insertions(+), 845 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 82b4065b760..7602fb7bb76 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -92,7 +92,6 @@ __gen_float(float v) #endif -#define GEN75_3DSTATE_URB_VS_length 0x00000002 #define GEN75_3DSTATE_URB_VS_length_bias 0x00000002 #define GEN75_3DSTATE_URB_VS_header \ .CommandType = 3, \ @@ -101,6 +100,8 @@ __gen_float(float v) ._3DCommandSubOpcode = 48, \ .DwordLength = 0 +#define GEN75_3DSTATE_URB_VS_length 0x00000002 + struct GEN75_3DSTATE_URB_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -114,7 +115,7 @@ struct GEN75_3DSTATE_URB_VS { static inline void GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_VS * restrict values) + const struct GEN75_3DSTATE_URB_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -134,7 +135,6 @@ GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_GPGPU_CSR_BASE_ADDRESS_length 0x00000002 #define GEN75_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 #define GEN75_GPGPU_CSR_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -143,6 +143,8 @@ GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 0 +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length 0x00000002 + struct GEN75_GPGPU_CSR_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -154,7 +156,7 @@ struct GEN75_GPGPU_CSR_BASE_ADDRESS { static inline void GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GPGPU_CSR_BASE_ADDRESS * restrict values) + const struct GEN75_GPGPU_CSR_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -174,13 +176,14 @@ GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_STORE_REGISTER_MEM_length 0x00000003 #define GEN75_MI_STORE_REGISTER_MEM_length_bias 0x00000002 #define GEN75_MI_STORE_REGISTER_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 36, \ .DwordLength = 1 +#define GEN75_MI_STORE_REGISTER_MEM_length 0x00000003 + struct GEN75_MI_STORE_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -193,7 +196,7 @@ struct GEN75_MI_STORE_REGISTER_MEM { static inline void GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_REGISTER_MEM * restrict values) + const struct GEN75_MI_STORE_REGISTER_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -217,7 +220,6 @@ GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_PIPELINE_SELECT_length 0x00000001 #define GEN75_PIPELINE_SELECT_length_bias 0x00000001 #define GEN75_PIPELINE_SELECT_header \ .CommandType = 3, \ @@ -225,6 +227,8 @@ GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 4 +#define GEN75_PIPELINE_SELECT_length 0x00000001 + struct GEN75_PIPELINE_SELECT { uint32_t CommandType; uint32_t CommandSubType; @@ -238,7 +242,7 @@ struct GEN75_PIPELINE_SELECT { static inline void GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PIPELINE_SELECT * restrict values) + const struct GEN75_PIPELINE_SELECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -252,7 +256,6 @@ GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_STATE_BASE_ADDRESS_length 0x0000000a #define GEN75_STATE_BASE_ADDRESS_length_bias 0x00000002 #define GEN75_STATE_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -261,6 +264,10 @@ GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 1, \ .DwordLength = 8 +#define GEN75_STATE_BASE_ADDRESS_length 0x0000000a + +#define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + struct GEN75_MEMORY_OBJECT_CONTROL_STATE { uint32_t LLCeLLCCacheabilityControlLLCCC; uint32_t L3CacheabilityControlL3CC; @@ -268,7 +275,7 @@ struct GEN75_MEMORY_OBJECT_CONTROL_STATE { static inline void GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) + const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -313,7 +320,7 @@ struct GEN75_STATE_BASE_ADDRESS { static inline void GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_STATE_BASE_ADDRESS * restrict values) + const struct GEN75_STATE_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -408,7 +415,6 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_STATE_PREFETCH_length 0x00000002 #define GEN75_STATE_PREFETCH_length_bias 0x00000002 #define GEN75_STATE_PREFETCH_header \ .CommandType = 3, \ @@ -417,6 +423,8 @@ GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 3, \ .DwordLength = 0 +#define GEN75_STATE_PREFETCH_length 0x00000002 + struct GEN75_STATE_PREFETCH { uint32_t CommandType; uint32_t CommandSubType; @@ -429,7 +437,7 @@ struct GEN75_STATE_PREFETCH { static inline void GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_STATE_PREFETCH * restrict values) + const struct GEN75_STATE_PREFETCH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -450,7 +458,6 @@ GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_STATE_SIP_length 0x00000002 #define GEN75_STATE_SIP_length_bias 0x00000002 #define GEN75_STATE_SIP_header \ .CommandType = 3, \ @@ -459,6 +466,8 @@ GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 2, \ .DwordLength = 0 +#define GEN75_STATE_SIP_length 0x00000002 + struct GEN75_STATE_SIP { uint32_t CommandType; uint32_t CommandSubType; @@ -470,7 +479,7 @@ struct GEN75_STATE_SIP { static inline void GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_STATE_SIP * restrict values) + const struct GEN75_STATE_SIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -488,7 +497,6 @@ GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_SWTESS_BASE_ADDRESS_length 0x00000002 #define GEN75_SWTESS_BASE_ADDRESS_length_bias 0x00000002 #define GEN75_SWTESS_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -497,6 +505,8 @@ GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 3, \ .DwordLength = 0 +#define GEN75_SWTESS_BASE_ADDRESS_length 0x00000002 + struct GEN75_SWTESS_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -509,7 +519,7 @@ struct GEN75_SWTESS_BASE_ADDRESS { static inline void GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SWTESS_BASE_ADDRESS * restrict values) + const struct GEN75_SWTESS_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -532,7 +542,6 @@ GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DPRIMITIVE_length 0x00000007 #define GEN75_3DPRIMITIVE_length_bias 0x00000002 #define GEN75_3DPRIMITIVE_header \ .CommandType = 3, \ @@ -541,6 +550,8 @@ GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 5 +#define GEN75_3DPRIMITIVE_length 0x00000007 + struct GEN75_3DPRIMITIVE { uint32_t CommandType; uint32_t CommandSubType; @@ -564,7 +575,7 @@ struct GEN75_3DPRIMITIVE { static inline void GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DPRIMITIVE * restrict values) + const struct GEN75_3DPRIMITIVE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -607,7 +618,6 @@ GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 #define GEN75_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 #define GEN75_3DSTATE_AA_LINE_PARAMETERS_header \ .CommandType = 3, \ @@ -616,6 +626,8 @@ GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 10, \ .DwordLength = 1 +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + struct GEN75_3DSTATE_AA_LINE_PARAMETERS { uint32_t CommandType; uint32_t CommandSubType; @@ -630,7 +642,7 @@ struct GEN75_3DSTATE_AA_LINE_PARAMETERS { static inline void GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_AA_LINE_PARAMETERS * restrict values) + const struct GEN75_3DSTATE_AA_LINE_PARAMETERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -661,6 +673,10 @@ GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 70 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN75_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + struct GEN75_BINDING_TABLE_EDIT_ENTRY { uint32_t BindingTableIndex; uint32_t SurfaceStatePointer; @@ -668,7 +684,7 @@ struct GEN75_BINDING_TABLE_EDIT_ENTRY { static inline void GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) + const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -695,7 +711,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS { static inline void GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -722,6 +738,8 @@ GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 68 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -738,7 +756,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS { static inline void GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -765,6 +783,8 @@ GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 69 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -781,7 +801,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS { static inline void GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -808,6 +828,8 @@ GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 71 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -824,7 +846,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS { static inline void GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -851,6 +873,8 @@ GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 67 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -867,7 +891,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS { static inline void GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -887,7 +911,6 @@ GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict /* variable length fields follow */ } -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ .CommandType = 3, \ @@ -896,6 +919,8 @@ GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 40, \ .DwordLength = 0 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -907,7 +932,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS { static inline void GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -925,7 +950,6 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ .CommandType = 3, \ @@ -934,6 +958,8 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 41, \ .DwordLength = 0 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -945,7 +971,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS { static inline void GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -963,7 +989,6 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ .CommandType = 3, \ @@ -972,6 +997,8 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 39, \ .DwordLength = 0 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -983,7 +1010,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS { static inline void GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1001,7 +1028,6 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ .CommandType = 3, \ @@ -1010,6 +1036,8 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 42, \ .DwordLength = 0 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -1021,7 +1049,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS { static inline void GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1039,7 +1067,6 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ .CommandType = 3, \ @@ -1048,6 +1075,8 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 38, \ .DwordLength = 0 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -1059,7 +1088,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS { static inline void GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1077,7 +1106,6 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000003 #define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 #define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ .CommandType = 3, \ @@ -1086,6 +1114,8 @@ GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 25, \ .DwordLength = 1 +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000003 + struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { uint32_t CommandType; uint32_t CommandSubType; @@ -1100,7 +1130,7 @@ struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { static inline void GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) + const struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1130,7 +1160,6 @@ GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restri } -#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 #define GEN75_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 #define GEN75_3DSTATE_BLEND_STATE_POINTERS_header\ .CommandType = 3, \ @@ -1139,6 +1168,8 @@ GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 36, \ .DwordLength = 0 +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + struct GEN75_3DSTATE_BLEND_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1150,7 +1181,7 @@ struct GEN75_3DSTATE_BLEND_STATE_POINTERS { static inline void GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BLEND_STATE_POINTERS * restrict values) + const struct GEN75_3DSTATE_BLEND_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1169,7 +1200,6 @@ GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict d } -#define GEN75_3DSTATE_CC_STATE_POINTERS_length 0x00000002 #define GEN75_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 #define GEN75_3DSTATE_CC_STATE_POINTERS_header \ .CommandType = 3, \ @@ -1178,6 +1208,8 @@ GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict d ._3DCommandSubOpcode = 14, \ .DwordLength = 0 +#define GEN75_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + struct GEN75_3DSTATE_CC_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1189,7 +1221,7 @@ struct GEN75_3DSTATE_CC_STATE_POINTERS { static inline void GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CC_STATE_POINTERS * restrict values) + const struct GEN75_3DSTATE_CC_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1208,7 +1240,6 @@ GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_CHROMA_KEY_length 0x00000004 #define GEN75_3DSTATE_CHROMA_KEY_length_bias 0x00000002 #define GEN75_3DSTATE_CHROMA_KEY_header \ .CommandType = 3, \ @@ -1217,6 +1248,8 @@ GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 2 +#define GEN75_3DSTATE_CHROMA_KEY_length 0x00000004 + struct GEN75_3DSTATE_CHROMA_KEY { uint32_t CommandType; uint32_t CommandSubType; @@ -1230,7 +1263,7 @@ struct GEN75_3DSTATE_CHROMA_KEY { static inline void GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CHROMA_KEY * restrict values) + const struct GEN75_3DSTATE_CHROMA_KEY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1256,7 +1289,6 @@ GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_CLEAR_PARAMS_length 0x00000003 #define GEN75_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 #define GEN75_3DSTATE_CLEAR_PARAMS_header \ .CommandType = 3, \ @@ -1265,6 +1297,8 @@ GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 1 +#define GEN75_3DSTATE_CLEAR_PARAMS_length 0x00000003 + struct GEN75_3DSTATE_CLEAR_PARAMS { uint32_t CommandType; uint32_t CommandSubType; @@ -1277,7 +1311,7 @@ struct GEN75_3DSTATE_CLEAR_PARAMS { static inline void GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CLEAR_PARAMS * restrict values) + const struct GEN75_3DSTATE_CLEAR_PARAMS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1299,7 +1333,6 @@ GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_CLIP_length 0x00000004 #define GEN75_3DSTATE_CLIP_length_bias 0x00000002 #define GEN75_3DSTATE_CLIP_header \ .CommandType = 3, \ @@ -1308,6 +1341,8 @@ GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 18, \ .DwordLength = 2 +#define GEN75_3DSTATE_CLIP_length 0x00000004 + struct GEN75_3DSTATE_CLIP { uint32_t CommandType; uint32_t CommandSubType; @@ -1356,7 +1391,7 @@ struct GEN75_3DSTATE_CLIP { static inline void GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CLIP * restrict values) + const struct GEN75_3DSTATE_CLIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1401,7 +1436,6 @@ GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_CONSTANT_DS_length 0x00000007 #define GEN75_3DSTATE_CONSTANT_DS_length_bias 0x00000002 #define GEN75_3DSTATE_CONSTANT_DS_header \ .CommandType = 3, \ @@ -1410,6 +1444,10 @@ GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 5 +#define GEN75_3DSTATE_CONSTANT_DS_length 0x00000007 + +#define GEN75_3DSTATE_CONSTANT_BODY_length 0x00000006 + struct GEN75_3DSTATE_CONSTANT_BODY { uint32_t ConstantBuffer1ReadLength; uint32_t ConstantBuffer0ReadLength; @@ -1424,7 +1462,7 @@ struct GEN75_3DSTATE_CONSTANT_BODY { static inline void GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) + const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1478,7 +1516,7 @@ struct GEN75_3DSTATE_CONSTANT_DS { static inline void GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_DS * restrict values) + const struct GEN75_3DSTATE_CONSTANT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1493,7 +1531,6 @@ GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 #define GEN75_3DSTATE_CONSTANT_GS_length_bias 0x00000002 #define GEN75_3DSTATE_CONSTANT_GS_header \ .CommandType = 3, \ @@ -1502,6 +1539,8 @@ GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 22, \ .DwordLength = 5 +#define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 + struct GEN75_3DSTATE_CONSTANT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -1513,7 +1552,7 @@ struct GEN75_3DSTATE_CONSTANT_GS { static inline void GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_GS * restrict values) + const struct GEN75_3DSTATE_CONSTANT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1528,7 +1567,6 @@ GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 #define GEN75_3DSTATE_CONSTANT_HS_length_bias 0x00000002 #define GEN75_3DSTATE_CONSTANT_HS_header \ .CommandType = 3, \ @@ -1537,6 +1575,8 @@ GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 25, \ .DwordLength = 5 +#define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 + struct GEN75_3DSTATE_CONSTANT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -1548,7 +1588,7 @@ struct GEN75_3DSTATE_CONSTANT_HS { static inline void GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_HS * restrict values) + const struct GEN75_3DSTATE_CONSTANT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1563,7 +1603,6 @@ GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 #define GEN75_3DSTATE_CONSTANT_PS_length_bias 0x00000002 #define GEN75_3DSTATE_CONSTANT_PS_header \ .CommandType = 3, \ @@ -1572,6 +1611,8 @@ GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 23, \ .DwordLength = 5 +#define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 + struct GEN75_3DSTATE_CONSTANT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -1583,7 +1624,7 @@ struct GEN75_3DSTATE_CONSTANT_PS { static inline void GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_PS * restrict values) + const struct GEN75_3DSTATE_CONSTANT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1598,7 +1639,6 @@ GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 #define GEN75_3DSTATE_CONSTANT_VS_length_bias 0x00000002 #define GEN75_3DSTATE_CONSTANT_VS_header \ .CommandType = 3, \ @@ -1607,6 +1647,8 @@ GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 21, \ .DwordLength = 5 +#define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 + struct GEN75_3DSTATE_CONSTANT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -1618,7 +1660,7 @@ struct GEN75_3DSTATE_CONSTANT_VS { static inline void GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_VS * restrict values) + const struct GEN75_3DSTATE_CONSTANT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1633,7 +1675,6 @@ GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 #define GEN75_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 #define GEN75_3DSTATE_DEPTH_BUFFER_header \ .CommandType = 3, \ @@ -1642,6 +1683,8 @@ GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 5, \ .DwordLength = 5 +#define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 + struct GEN75_3DSTATE_DEPTH_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -1677,7 +1720,7 @@ struct GEN75_3DSTATE_DEPTH_BUFFER { static inline void GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DEPTH_BUFFER * restrict values) + const struct GEN75_3DSTATE_DEPTH_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1729,7 +1772,6 @@ GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 #define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 #define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ .CommandType = 3, \ @@ -1738,6 +1780,8 @@ GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 37, \ .DwordLength = 0 +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 + struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1749,7 +1793,7 @@ struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { static inline void GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) + const struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1768,7 +1812,6 @@ GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * re } -#define GEN75_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 #define GEN75_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 #define GEN75_3DSTATE_DRAWING_RECTANGLE_header \ .CommandType = 3, \ @@ -1777,6 +1820,8 @@ GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * re ._3DCommandSubOpcode = 0, \ .DwordLength = 2 +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + struct GEN75_3DSTATE_DRAWING_RECTANGLE { uint32_t CommandType; uint32_t CommandSubType; @@ -1797,7 +1842,7 @@ struct GEN75_3DSTATE_DRAWING_RECTANGLE { static inline void GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DRAWING_RECTANGLE * restrict values) + const struct GEN75_3DSTATE_DRAWING_RECTANGLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1827,7 +1872,6 @@ GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_DS_length 0x00000006 #define GEN75_3DSTATE_DS_length_bias 0x00000002 #define GEN75_3DSTATE_DS_header \ .CommandType = 3, \ @@ -1836,6 +1880,8 @@ GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 29, \ .DwordLength = 4 +#define GEN75_3DSTATE_DS_length 0x00000006 + struct GEN75_3DSTATE_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -1879,7 +1925,7 @@ struct GEN75_3DSTATE_DS { static inline void GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DS * restrict values) + const struct GEN75_3DSTATE_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1935,6 +1981,10 @@ GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 55 +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN75_GATHER_CONSTANT_ENTRY_length 0x00000001 + struct GEN75_GATHER_CONSTANT_ENTRY { uint32_t ConstantBufferOffset; uint32_t ChannelMask; @@ -1943,7 +1993,7 @@ struct GEN75_GATHER_CONSTANT_ENTRY { static inline void GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) + const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1969,7 +2019,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_DS { static inline void GEN75_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_DS * restrict values) + const struct GEN75_3DSTATE_GATHER_CONSTANT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2000,6 +2050,8 @@ GEN75_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 53 +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + struct GEN75_3DSTATE_GATHER_CONSTANT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -2014,7 +2066,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_GS { static inline void GEN75_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_GS * restrict values) + const struct GEN75_3DSTATE_GATHER_CONSTANT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2045,6 +2097,8 @@ GEN75_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 54 +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + struct GEN75_3DSTATE_GATHER_CONSTANT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -2059,7 +2113,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_HS { static inline void GEN75_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_HS * restrict values) + const struct GEN75_3DSTATE_GATHER_CONSTANT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2090,6 +2144,8 @@ GEN75_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 56 +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + struct GEN75_3DSTATE_GATHER_CONSTANT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -2105,7 +2161,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_PS { static inline void GEN75_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_PS * restrict values) + const struct GEN75_3DSTATE_GATHER_CONSTANT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2137,6 +2193,8 @@ GEN75_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 52 +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + struct GEN75_3DSTATE_GATHER_CONSTANT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -2152,7 +2210,7 @@ struct GEN75_3DSTATE_GATHER_CONSTANT_VS { static inline void GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_VS * restrict values) + const struct GEN75_3DSTATE_GATHER_CONSTANT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2177,7 +2235,6 @@ GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst /* variable length fields follow */ } -#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length 0x00000003 #define GEN75_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 #define GEN75_3DSTATE_GATHER_POOL_ALLOC_header \ .CommandType = 3, \ @@ -2186,6 +2243,8 @@ GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst ._3DCommandSubOpcode = 26, \ .DwordLength = 1 +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length 0x00000003 + struct GEN75_3DSTATE_GATHER_POOL_ALLOC { uint32_t CommandType; uint32_t CommandSubType; @@ -2200,7 +2259,7 @@ struct GEN75_3DSTATE_GATHER_POOL_ALLOC { static inline void GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_POOL_ALLOC * restrict values) + const struct GEN75_3DSTATE_GATHER_POOL_ALLOC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2231,7 +2290,6 @@ GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_GS_length 0x00000007 #define GEN75_3DSTATE_GS_length_bias 0x00000002 #define GEN75_3DSTATE_GS_header \ .CommandType = 3, \ @@ -2240,6 +2298,8 @@ GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 17, \ .DwordLength = 5 +#define GEN75_3DSTATE_GS_length 0x00000007 + struct GEN75_3DSTATE_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -2301,7 +2361,7 @@ struct GEN75_3DSTATE_GS { static inline void GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GS * restrict values) + const struct GEN75_3DSTATE_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2366,7 +2426,6 @@ GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 #define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 #define GEN75_3DSTATE_HIER_DEPTH_BUFFER_header \ .CommandType = 3, \ @@ -2375,6 +2434,8 @@ GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 7, \ .DwordLength = 1 +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 + struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -2388,7 +2449,7 @@ struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { static inline void GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_HIER_DEPTH_BUFFER * restrict values) + const struct GEN75_3DSTATE_HIER_DEPTH_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2415,7 +2476,6 @@ GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_HS_length 0x00000007 #define GEN75_3DSTATE_HS_length_bias 0x00000002 #define GEN75_3DSTATE_HS_header \ .CommandType = 3, \ @@ -2424,6 +2484,8 @@ GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 27, \ .DwordLength = 5 +#define GEN75_3DSTATE_HS_length 0x00000007 + struct GEN75_3DSTATE_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -2466,7 +2528,7 @@ struct GEN75_3DSTATE_HS { static inline void GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_HS * restrict values) + const struct GEN75_3DSTATE_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2519,7 +2581,6 @@ GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_INDEX_BUFFER_length 0x00000003 #define GEN75_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 #define GEN75_3DSTATE_INDEX_BUFFER_header \ .CommandType = 3, \ @@ -2528,6 +2589,8 @@ GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 10, \ .DwordLength = 1 +#define GEN75_3DSTATE_INDEX_BUFFER_length 0x00000003 + struct GEN75_3DSTATE_INDEX_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -2545,7 +2608,7 @@ struct GEN75_3DSTATE_INDEX_BUFFER { static inline void GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_INDEX_BUFFER * restrict values) + const struct GEN75_3DSTATE_INDEX_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2575,7 +2638,6 @@ GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_LINE_STIPPLE_length 0x00000003 #define GEN75_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 #define GEN75_3DSTATE_LINE_STIPPLE_header \ .CommandType = 3, \ @@ -2584,6 +2646,8 @@ GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 8, \ .DwordLength = 1 +#define GEN75_3DSTATE_LINE_STIPPLE_length 0x00000003 + struct GEN75_3DSTATE_LINE_STIPPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -2600,7 +2664,7 @@ struct GEN75_3DSTATE_LINE_STIPPLE { static inline void GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_LINE_STIPPLE * restrict values) + const struct GEN75_3DSTATE_LINE_STIPPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2626,7 +2690,6 @@ GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_MONOFILTER_SIZE_length 0x00000002 #define GEN75_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 #define GEN75_3DSTATE_MONOFILTER_SIZE_header \ .CommandType = 3, \ @@ -2635,6 +2698,8 @@ GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 17, \ .DwordLength = 0 +#define GEN75_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + struct GEN75_3DSTATE_MONOFILTER_SIZE { uint32_t CommandType; uint32_t CommandSubType; @@ -2647,7 +2712,7 @@ struct GEN75_3DSTATE_MONOFILTER_SIZE { static inline void GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_MONOFILTER_SIZE * restrict values) + const struct GEN75_3DSTATE_MONOFILTER_SIZE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2666,7 +2731,6 @@ GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_MULTISAMPLE_length 0x00000004 #define GEN75_3DSTATE_MULTISAMPLE_length_bias 0x00000002 #define GEN75_3DSTATE_MULTISAMPLE_header \ .CommandType = 3, \ @@ -2675,6 +2739,8 @@ GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 13, \ .DwordLength = 2 +#define GEN75_3DSTATE_MULTISAMPLE_length 0x00000004 + struct GEN75_3DSTATE_MULTISAMPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -2709,7 +2775,7 @@ struct GEN75_3DSTATE_MULTISAMPLE { static inline void GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_MULTISAMPLE * restrict values) + const struct GEN75_3DSTATE_MULTISAMPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2751,7 +2817,6 @@ GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 #define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 #define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_header\ .CommandType = 3, \ @@ -2760,6 +2825,8 @@ GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 6, \ .DwordLength = 0 +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET { uint32_t CommandType; uint32_t CommandSubType; @@ -2772,7 +2839,7 @@ struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET { static inline void GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) + const struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2791,7 +2858,6 @@ GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict ds } -#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 #define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 #define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_header\ .CommandType = 3, \ @@ -2800,6 +2866,8 @@ GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict ds ._3DCommandSubOpcode = 7, \ .DwordLength = 31 +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { uint32_t CommandType; uint32_t CommandSubType; @@ -2811,7 +2879,7 @@ struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { static inline void GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) + const struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2831,7 +2899,6 @@ GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict d } -#define GEN75_3DSTATE_PS_length 0x00000008 #define GEN75_3DSTATE_PS_length_bias 0x00000002 #define GEN75_3DSTATE_PS_header \ .CommandType = 3, \ @@ -2840,6 +2907,8 @@ GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict d ._3DCommandSubOpcode = 32, \ .DwordLength = 6 +#define GEN75_3DSTATE_PS_length 0x00000008 + struct GEN75_3DSTATE_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -2899,7 +2968,7 @@ struct GEN75_3DSTATE_PS { static inline void GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PS * restrict values) + const struct GEN75_3DSTATE_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2966,7 +3035,6 @@ GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ .CommandType = 3, \ @@ -2975,6 +3043,8 @@ GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 20, \ .DwordLength = 0 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -2987,7 +3057,7 @@ struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS { static inline void GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3006,7 +3076,6 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict } -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ .CommandType = 3, \ @@ -3015,6 +3084,8 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 21, \ .DwordLength = 0 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -3027,7 +3098,7 @@ struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS { static inline void GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3046,7 +3117,6 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict } -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ .CommandType = 3, \ @@ -3055,6 +3125,8 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 19, \ .DwordLength = 0 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -3067,7 +3139,7 @@ struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS { static inline void GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3086,7 +3158,6 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict } -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ .CommandType = 3, \ @@ -3095,6 +3166,8 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 22, \ .DwordLength = 0 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -3107,7 +3180,7 @@ struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS { static inline void GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3126,7 +3199,6 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict } -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 #define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ .CommandType = 3, \ @@ -3135,6 +3207,8 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 18, \ .DwordLength = 0 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -3147,7 +3221,7 @@ struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS { static inline void GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3166,7 +3240,6 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict } -#define GEN75_3DSTATE_RAST_MULTISAMPLE_length 0x00000006 #define GEN75_3DSTATE_RAST_MULTISAMPLE_length_bias 0x00000002 #define GEN75_3DSTATE_RAST_MULTISAMPLE_header \ .CommandType = 3, \ @@ -3175,6 +3248,8 @@ GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 14, \ .DwordLength = 4 +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length 0x00000006 + struct GEN75_3DSTATE_RAST_MULTISAMPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -3223,7 +3298,7 @@ struct GEN75_3DSTATE_RAST_MULTISAMPLE { static inline void GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_RAST_MULTISAMPLE * restrict values) + const struct GEN75_3DSTATE_RAST_MULTISAMPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3292,6 +3367,10 @@ GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 2 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN75_PALETTE_ENTRY_length 0x00000001 + struct GEN75_PALETTE_ENTRY { uint32_t Alpha; uint32_t Red; @@ -3301,7 +3380,7 @@ struct GEN75_PALETTE_ENTRY { static inline void GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PALETTE_ENTRY * restrict values) + const struct GEN75_PALETTE_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3325,7 +3404,7 @@ struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 { static inline void GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3347,6 +3426,8 @@ GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 12 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 { uint32_t CommandType; uint32_t CommandSubType; @@ -3358,7 +3439,7 @@ struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 { static inline void GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3373,7 +3454,6 @@ GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict /* variable length fields follow */ } -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ .CommandType = 3, \ @@ -3382,6 +3462,8 @@ GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 45, \ .DwordLength = 0 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -3393,7 +3475,7 @@ struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS { static inline void GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3411,7 +3493,6 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ .CommandType = 3, \ @@ -3420,6 +3501,8 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 46, \ .DwordLength = 0 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -3431,7 +3514,7 @@ struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS { static inline void GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3449,7 +3532,6 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ .CommandType = 3, \ @@ -3458,6 +3540,8 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 44, \ .DwordLength = 0 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -3469,7 +3553,7 @@ struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS { static inline void GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3487,7 +3571,6 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ .CommandType = 3, \ @@ -3496,6 +3579,8 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 47, \ .DwordLength = 0 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -3507,7 +3592,7 @@ struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS { static inline void GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3525,7 +3610,6 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 #define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ .CommandType = 3, \ @@ -3534,6 +3618,8 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 43, \ .DwordLength = 0 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -3545,7 +3631,7 @@ struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS { static inline void GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3563,7 +3649,6 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restr } -#define GEN75_3DSTATE_SAMPLE_MASK_length 0x00000002 #define GEN75_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 #define GEN75_3DSTATE_SAMPLE_MASK_header \ .CommandType = 3, \ @@ -3572,6 +3657,8 @@ GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 24, \ .DwordLength = 0 +#define GEN75_3DSTATE_SAMPLE_MASK_length 0x00000002 + struct GEN75_3DSTATE_SAMPLE_MASK { uint32_t CommandType; uint32_t CommandSubType; @@ -3583,7 +3670,7 @@ struct GEN75_3DSTATE_SAMPLE_MASK { static inline void GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLE_MASK * restrict values) + const struct GEN75_3DSTATE_SAMPLE_MASK * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3601,7 +3688,6 @@ GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_SBE_length 0x0000000e #define GEN75_3DSTATE_SBE_length_bias 0x00000002 #define GEN75_3DSTATE_SBE_header \ .CommandType = 3, \ @@ -3610,6 +3696,8 @@ GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 31, \ .DwordLength = 12 +#define GEN75_3DSTATE_SBE_length 0x0000000e + struct GEN75_3DSTATE_SBE { uint32_t CommandType; uint32_t CommandSubType; @@ -3676,7 +3764,7 @@ struct GEN75_3DSTATE_SBE { static inline void GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SBE * restrict values) + const struct GEN75_3DSTATE_SBE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3746,7 +3834,6 @@ GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 #define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 #define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_header\ .CommandType = 3, \ @@ -3755,6 +3842,8 @@ GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 15, \ .DwordLength = 0 +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -3766,7 +3855,7 @@ struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS { static inline void GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) + const struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3784,7 +3873,6 @@ GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict } -#define GEN75_3DSTATE_SF_length 0x00000007 #define GEN75_3DSTATE_SF_length_bias 0x00000002 #define GEN75_3DSTATE_SF_header \ .CommandType = 3, \ @@ -3793,6 +3881,8 @@ GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 19, \ .DwordLength = 5 +#define GEN75_3DSTATE_SF_length 0x00000007 + struct GEN75_3DSTATE_SF { uint32_t CommandType; uint32_t CommandSubType; @@ -3847,14 +3937,14 @@ struct GEN75_3DSTATE_SF { uint32_t VertexSubPixelPrecisionSelect; uint32_t UsePointWidthState; float PointWidth; - uint32_t GlobalDepthOffsetConstant; - uint32_t GlobalDepthOffsetScale; - uint32_t GlobalDepthOffsetClamp; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; }; static inline void GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SF * restrict values) + const struct GEN75_3DSTATE_SF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3902,20 +3992,19 @@ GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, 0; dw[4] = - __gen_field(values->GlobalDepthOffsetConstant, 0, 31) | + __gen_float(values->GlobalDepthOffsetConstant) | 0; dw[5] = - __gen_field(values->GlobalDepthOffsetScale, 0, 31) | + __gen_float(values->GlobalDepthOffsetScale) | 0; dw[6] = - __gen_field(values->GlobalDepthOffsetClamp, 0, 31) | + __gen_float(values->GlobalDepthOffsetClamp) | 0; } -#define GEN75_3DSTATE_SO_BUFFER_length 0x00000004 #define GEN75_3DSTATE_SO_BUFFER_length_bias 0x00000002 #define GEN75_3DSTATE_SO_BUFFER_header \ .CommandType = 3, \ @@ -3924,6 +4013,8 @@ GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 24, \ .DwordLength = 2 +#define GEN75_3DSTATE_SO_BUFFER_length 0x00000004 + struct GEN75_3DSTATE_SO_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -3939,7 +4030,7 @@ struct GEN75_3DSTATE_SO_BUFFER { static inline void GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SO_BUFFER * restrict values) + const struct GEN75_3DSTATE_SO_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3980,6 +4071,12 @@ GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 23 +#define GEN75_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN75_SO_DECL_ENTRY_length 0x00000002 + +#define GEN75_SO_DECL_length 0x00000001 + struct GEN75_SO_DECL { uint32_t OutputBufferSlot; uint32_t HoleFlag; @@ -3989,7 +4086,7 @@ struct GEN75_SO_DECL { static inline void GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SO_DECL * restrict values) + const struct GEN75_SO_DECL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4011,7 +4108,7 @@ struct GEN75_SO_DECL_ENTRY { static inline void GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SO_DECL_ENTRY * restrict values) + const struct GEN75_SO_DECL_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4054,7 +4151,7 @@ struct GEN75_3DSTATE_SO_DECL_LIST { static inline void GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SO_DECL_LIST * restrict values) + const struct GEN75_3DSTATE_SO_DECL_LIST * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4083,7 +4180,6 @@ GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_3DSTATE_STENCIL_BUFFER_length 0x00000003 #define GEN75_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 #define GEN75_3DSTATE_STENCIL_BUFFER_header \ .CommandType = 3, \ @@ -4092,6 +4188,8 @@ GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 6, \ .DwordLength = 1 +#define GEN75_3DSTATE_STENCIL_BUFFER_length 0x00000003 + struct GEN75_3DSTATE_STENCIL_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -4106,7 +4204,7 @@ struct GEN75_3DSTATE_STENCIL_BUFFER { static inline void GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_STENCIL_BUFFER * restrict values) + const struct GEN75_3DSTATE_STENCIL_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4134,7 +4232,6 @@ GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_STREAMOUT_length 0x00000003 #define GEN75_3DSTATE_STREAMOUT_length_bias 0x00000002 #define GEN75_3DSTATE_STREAMOUT_header \ .CommandType = 3, \ @@ -4143,6 +4240,8 @@ GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 30, \ .DwordLength = 1 +#define GEN75_3DSTATE_STREAMOUT_length 0x00000003 + struct GEN75_3DSTATE_STREAMOUT { uint32_t CommandType; uint32_t CommandSubType; @@ -4172,7 +4271,7 @@ struct GEN75_3DSTATE_STREAMOUT { static inline void GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_STREAMOUT * restrict values) + const struct GEN75_3DSTATE_STREAMOUT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4209,7 +4308,6 @@ GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_TE_length 0x00000004 #define GEN75_3DSTATE_TE_length_bias 0x00000002 #define GEN75_3DSTATE_TE_header \ .CommandType = 3, \ @@ -4218,6 +4316,8 @@ GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 28, \ .DwordLength = 2 +#define GEN75_3DSTATE_TE_length 0x00000004 + struct GEN75_3DSTATE_TE { uint32_t CommandType; uint32_t CommandSubType; @@ -4247,7 +4347,7 @@ struct GEN75_3DSTATE_TE { static inline void GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_TE * restrict values) + const struct GEN75_3DSTATE_TE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4277,7 +4377,6 @@ GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_URB_DS_length 0x00000002 #define GEN75_3DSTATE_URB_DS_length_bias 0x00000002 #define GEN75_3DSTATE_URB_DS_header \ .CommandType = 3, \ @@ -4286,6 +4385,8 @@ GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 50, \ .DwordLength = 0 +#define GEN75_3DSTATE_URB_DS_length 0x00000002 + struct GEN75_3DSTATE_URB_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -4299,7 +4400,7 @@ struct GEN75_3DSTATE_URB_DS { static inline void GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_DS * restrict values) + const struct GEN75_3DSTATE_URB_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4319,7 +4420,6 @@ GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_URB_GS_length 0x00000002 #define GEN75_3DSTATE_URB_GS_length_bias 0x00000002 #define GEN75_3DSTATE_URB_GS_header \ .CommandType = 3, \ @@ -4328,6 +4428,8 @@ GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 51, \ .DwordLength = 0 +#define GEN75_3DSTATE_URB_GS_length 0x00000002 + struct GEN75_3DSTATE_URB_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -4341,7 +4443,7 @@ struct GEN75_3DSTATE_URB_GS { static inline void GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_GS * restrict values) + const struct GEN75_3DSTATE_URB_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4361,7 +4463,6 @@ GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_URB_HS_length 0x00000002 #define GEN75_3DSTATE_URB_HS_length_bias 0x00000002 #define GEN75_3DSTATE_URB_HS_header \ .CommandType = 3, \ @@ -4370,6 +4471,8 @@ GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 49, \ .DwordLength = 0 +#define GEN75_3DSTATE_URB_HS_length 0x00000002 + struct GEN75_3DSTATE_URB_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -4383,7 +4486,7 @@ struct GEN75_3DSTATE_URB_HS { static inline void GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_HS * restrict values) + const struct GEN75_3DSTATE_URB_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4410,6 +4513,10 @@ GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 8 +#define GEN75_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN75_VERTEX_BUFFER_STATE_length 0x00000004 + struct GEN75_VERTEX_BUFFER_STATE { uint32_t VertexBufferIndex; #define VERTEXDATA 0 @@ -4427,7 +4534,7 @@ struct GEN75_VERTEX_BUFFER_STATE { static inline void GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VERTEX_BUFFER_STATE * restrict values) + const struct GEN75_VERTEX_BUFFER_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4472,7 +4579,7 @@ struct GEN75_3DSTATE_VERTEX_BUFFERS { static inline void GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VERTEX_BUFFERS * restrict values) + const struct GEN75_3DSTATE_VERTEX_BUFFERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4494,6 +4601,10 @@ GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 9 +#define GEN75_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN75_VERTEX_ELEMENT_STATE_length 0x00000002 + struct GEN75_VERTEX_ELEMENT_STATE { uint32_t VertexBufferIndex; bool Valid; @@ -4508,7 +4619,7 @@ struct GEN75_VERTEX_ELEMENT_STATE { static inline void GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) + const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4540,7 +4651,7 @@ struct GEN75_3DSTATE_VERTEX_ELEMENTS { static inline void GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VERTEX_ELEMENTS * restrict values) + const struct GEN75_3DSTATE_VERTEX_ELEMENTS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4555,7 +4666,6 @@ GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_3DSTATE_VF_length 0x00000002 #define GEN75_3DSTATE_VF_length_bias 0x00000002 #define GEN75_3DSTATE_VF_header \ .CommandType = 3, \ @@ -4564,6 +4674,8 @@ GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 12, \ .DwordLength = 0 +#define GEN75_3DSTATE_VF_length 0x00000002 + struct GEN75_3DSTATE_VF { uint32_t CommandType; uint32_t CommandSubType; @@ -4576,7 +4688,7 @@ struct GEN75_3DSTATE_VF { static inline void GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VF * restrict values) + const struct GEN75_3DSTATE_VF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4595,7 +4707,6 @@ GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_VF_STATISTICS_length 0x00000001 #define GEN75_3DSTATE_VF_STATISTICS_length_bias 0x00000001 #define GEN75_3DSTATE_VF_STATISTICS_header \ .CommandType = 3, \ @@ -4603,6 +4714,8 @@ GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 11 +#define GEN75_3DSTATE_VF_STATISTICS_length 0x00000001 + struct GEN75_3DSTATE_VF_STATISTICS { uint32_t CommandType; uint32_t CommandSubType; @@ -4613,7 +4726,7 @@ struct GEN75_3DSTATE_VF_STATISTICS { static inline void GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VF_STATISTICS * restrict values) + const struct GEN75_3DSTATE_VF_STATISTICS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4627,7 +4740,6 @@ GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 #define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 #define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ .CommandType = 3, \ @@ -4636,6 +4748,8 @@ GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 35, \ .DwordLength = 0 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC { uint32_t CommandType; uint32_t CommandSubType; @@ -4647,7 +4761,7 @@ struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC { static inline void GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4665,7 +4779,6 @@ GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * rest } -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 #define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 #define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ .CommandType = 3, \ @@ -4674,6 +4787,8 @@ GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * rest ._3DCommandSubOpcode = 33, \ .DwordLength = 0 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { uint32_t CommandType; uint32_t CommandSubType; @@ -4685,7 +4800,7 @@ struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { static inline void GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4703,7 +4818,6 @@ GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * } -#define GEN75_3DSTATE_VS_length 0x00000006 #define GEN75_3DSTATE_VS_length_bias 0x00000002 #define GEN75_3DSTATE_VS_header \ .CommandType = 3, \ @@ -4712,6 +4826,8 @@ GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * ._3DCommandSubOpcode = 16, \ .DwordLength = 4 +#define GEN75_3DSTATE_VS_length 0x00000006 + struct GEN75_3DSTATE_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -4754,7 +4870,7 @@ struct GEN75_3DSTATE_VS { static inline void GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VS * restrict values) + const struct GEN75_3DSTATE_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4802,7 +4918,6 @@ GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_WM_length 0x00000003 #define GEN75_3DSTATE_WM_length_bias 0x00000002 #define GEN75_3DSTATE_WM_header \ .CommandType = 3, \ @@ -4811,6 +4926,8 @@ GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 20, \ .DwordLength = 1 +#define GEN75_3DSTATE_WM_length 0x00000003 + struct GEN75_3DSTATE_WM { uint32_t CommandType; uint32_t CommandSubType; @@ -4864,7 +4981,7 @@ struct GEN75_3DSTATE_WM { static inline void GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_WM * restrict values) + const struct GEN75_3DSTATE_WM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4907,7 +5024,6 @@ GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_GPGPU_OBJECT_length 0x00000008 #define GEN75_GPGPU_OBJECT_length_bias 0x00000002 #define GEN75_GPGPU_OBJECT_header \ .CommandType = 3, \ @@ -4916,6 +5032,8 @@ GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 4, \ .DwordLength = 6 +#define GEN75_GPGPU_OBJECT_length 0x00000008 + struct GEN75_GPGPU_OBJECT { uint32_t CommandType; uint32_t Pipeline; @@ -4944,7 +5062,7 @@ struct GEN75_GPGPU_OBJECT { static inline void GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GPGPU_OBJECT * restrict values) + const struct GEN75_GPGPU_OBJECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4992,7 +5110,6 @@ GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_GPGPU_WALKER_length 0x0000000b #define GEN75_GPGPU_WALKER_length_bias 0x00000002 #define GEN75_GPGPU_WALKER_header \ .CommandType = 3, \ @@ -5001,6 +5118,8 @@ GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, .SubOpcodeA = 5, \ .DwordLength = 9 +#define GEN75_GPGPU_WALKER_length 0x0000000b + struct GEN75_GPGPU_WALKER { uint32_t CommandType; uint32_t Pipeline; @@ -5029,7 +5148,7 @@ struct GEN75_GPGPU_WALKER { static inline void GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GPGPU_WALKER * restrict values) + const struct GEN75_GPGPU_WALKER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5088,7 +5207,6 @@ GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MEDIA_CURBE_LOAD_length 0x00000004 #define GEN75_MEDIA_CURBE_LOAD_length_bias 0x00000002 #define GEN75_MEDIA_CURBE_LOAD_header \ .CommandType = 3, \ @@ -5097,6 +5215,8 @@ GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 1, \ .DwordLength = 2 +#define GEN75_MEDIA_CURBE_LOAD_length 0x00000004 + struct GEN75_MEDIA_CURBE_LOAD { uint32_t CommandType; uint32_t Pipeline; @@ -5109,7 +5229,7 @@ struct GEN75_MEDIA_CURBE_LOAD { static inline void GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_CURBE_LOAD * restrict values) + const struct GEN75_MEDIA_CURBE_LOAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5134,7 +5254,6 @@ GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 #define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 #define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ .CommandType = 3, \ @@ -5143,6 +5262,8 @@ GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 2, \ .DwordLength = 2 +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD { uint32_t CommandType; uint32_t Pipeline; @@ -5155,7 +5276,7 @@ struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD { static inline void GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) + const struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5187,6 +5308,8 @@ GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restric .MediaCommandOpcode = 1, \ .MediaCommandSubOpcode = 0 +#define GEN75_MEDIA_OBJECT_length 0x00000000 + struct GEN75_MEDIA_OBJECT { uint32_t CommandType; uint32_t MediaCommandPipeline; @@ -5220,7 +5343,7 @@ struct GEN75_MEDIA_OBJECT { static inline void GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_OBJECT * restrict values) + const struct GEN75_MEDIA_OBJECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5264,7 +5387,6 @@ GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_MEDIA_OBJECT_PRT_length 0x00000010 #define GEN75_MEDIA_OBJECT_PRT_length_bias 0x00000002 #define GEN75_MEDIA_OBJECT_PRT_header \ .CommandType = 3, \ @@ -5273,6 +5395,8 @@ GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 2, \ .DwordLength = 14 +#define GEN75_MEDIA_OBJECT_PRT_length 0x00000010 + struct GEN75_MEDIA_OBJECT_PRT { uint32_t CommandType; uint32_t Pipeline; @@ -5290,7 +5414,7 @@ struct GEN75_MEDIA_OBJECT_PRT { static inline void GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_OBJECT_PRT * restrict values) + const struct GEN75_MEDIA_OBJECT_PRT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5330,6 +5454,8 @@ GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, .MediaCommandOpcode = 1, \ .SubOpcode = 3 +#define GEN75_MEDIA_OBJECT_WALKER_length 0x00000000 + struct GEN75_MEDIA_OBJECT_WALKER { uint32_t CommandType; uint32_t Pipeline; @@ -5377,7 +5503,7 @@ struct GEN75_MEDIA_OBJECT_WALKER { static inline void GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_OBJECT_WALKER * restrict values) + const struct GEN75_MEDIA_OBJECT_WALKER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5472,7 +5598,6 @@ GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_MEDIA_STATE_FLUSH_length 0x00000002 #define GEN75_MEDIA_STATE_FLUSH_length_bias 0x00000002 #define GEN75_MEDIA_STATE_FLUSH_header \ .CommandType = 3, \ @@ -5481,6 +5606,8 @@ GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 4, \ .DwordLength = 0 +#define GEN75_MEDIA_STATE_FLUSH_length 0x00000002 + struct GEN75_MEDIA_STATE_FLUSH { uint32_t CommandType; uint32_t Pipeline; @@ -5495,7 +5622,7 @@ struct GEN75_MEDIA_STATE_FLUSH { static inline void GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_STATE_FLUSH * restrict values) + const struct GEN75_MEDIA_STATE_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5516,7 +5643,6 @@ GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MEDIA_VFE_STATE_length 0x00000008 #define GEN75_MEDIA_VFE_STATE_length_bias 0x00000002 #define GEN75_MEDIA_VFE_STATE_header \ .CommandType = 3, \ @@ -5525,6 +5651,8 @@ GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 0, \ .DwordLength = 6 +#define GEN75_MEDIA_VFE_STATE_length 0x00000008 + struct GEN75_MEDIA_VFE_STATE { uint32_t CommandType; uint32_t Pipeline; @@ -5573,7 +5701,7 @@ struct GEN75_MEDIA_VFE_STATE { static inline void GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_VFE_STATE * restrict values) + const struct GEN75_MEDIA_VFE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5638,12 +5766,13 @@ GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_ARB_CHECK_length 0x00000001 #define GEN75_MI_ARB_CHECK_length_bias 0x00000001 #define GEN75_MI_ARB_CHECK_header \ .CommandType = 0, \ .MICommandOpcode = 5 +#define GEN75_MI_ARB_CHECK_length 0x00000001 + struct GEN75_MI_ARB_CHECK { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5651,7 +5780,7 @@ struct GEN75_MI_ARB_CHECK { static inline void GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_ARB_CHECK * restrict values) + const struct GEN75_MI_ARB_CHECK * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5662,12 +5791,13 @@ GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_ARB_ON_OFF_length 0x00000001 #define GEN75_MI_ARB_ON_OFF_length_bias 0x00000001 #define GEN75_MI_ARB_ON_OFF_header \ .CommandType = 0, \ .MICommandOpcode = 8 +#define GEN75_MI_ARB_ON_OFF_length 0x00000001 + struct GEN75_MI_ARB_ON_OFF { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5676,7 +5806,7 @@ struct GEN75_MI_ARB_ON_OFF { static inline void GEN75_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_ARB_ON_OFF * restrict values) + const struct GEN75_MI_ARB_ON_OFF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5688,12 +5818,13 @@ GEN75_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_BATCH_BUFFER_END_length 0x00000001 #define GEN75_MI_BATCH_BUFFER_END_length_bias 0x00000001 #define GEN75_MI_BATCH_BUFFER_END_header \ .CommandType = 0, \ .MICommandOpcode = 10 +#define GEN75_MI_BATCH_BUFFER_END_length 0x00000001 + struct GEN75_MI_BATCH_BUFFER_END { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5701,7 +5832,7 @@ struct GEN75_MI_BATCH_BUFFER_END { static inline void GEN75_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_BATCH_BUFFER_END * restrict values) + const struct GEN75_MI_BATCH_BUFFER_END * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5712,13 +5843,14 @@ GEN75_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_BATCH_BUFFER_START_length 0x00000002 #define GEN75_MI_BATCH_BUFFER_START_length_bias 0x00000002 #define GEN75_MI_BATCH_BUFFER_START_header \ .CommandType = 0, \ .MICommandOpcode = 49, \ .DwordLength = 0 +#define GEN75_MI_BATCH_BUFFER_START_length 0x00000002 + struct GEN75_MI_BATCH_BUFFER_START { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5739,7 +5871,7 @@ struct GEN75_MI_BATCH_BUFFER_START { static inline void GEN75_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_BATCH_BUFFER_START * restrict values) + const struct GEN75_MI_BATCH_BUFFER_START * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5769,6 +5901,8 @@ GEN75_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 39 +#define GEN75_MI_CLFLUSH_length 0x00000000 + struct GEN75_MI_CLFLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5784,7 +5918,7 @@ struct GEN75_MI_CLFLUSH { static inline void GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_CLFLUSH * restrict values) + const struct GEN75_MI_CLFLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5811,7 +5945,6 @@ GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 #define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 #define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_header\ .CommandType = 0, \ @@ -5820,6 +5953,8 @@ GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, .CompareSemaphore = 0, \ .DwordLength = 0 +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 + struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5832,7 +5967,7 @@ struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END { static inline void GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) + const struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5856,12 +5991,13 @@ GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restric } -#define GEN75_MI_FLUSH_length 0x00000001 #define GEN75_MI_FLUSH_length_bias 0x00000001 #define GEN75_MI_FLUSH_header \ .CommandType = 0, \ .MICommandOpcode = 4 +#define GEN75_MI_FLUSH_length 0x00000001 + struct GEN75_MI_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5880,7 +6016,7 @@ struct GEN75_MI_FLUSH { static inline void GEN75_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_FLUSH * restrict values) + const struct GEN75_MI_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5896,13 +6032,14 @@ GEN75_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_LOAD_REGISTER_IMM_length 0x00000003 #define GEN75_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 #define GEN75_MI_LOAD_REGISTER_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 34, \ .DwordLength = 1 +#define GEN75_MI_LOAD_REGISTER_IMM_length 0x00000003 + struct GEN75_MI_LOAD_REGISTER_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5914,7 +6051,7 @@ struct GEN75_MI_LOAD_REGISTER_IMM { static inline void GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_REGISTER_IMM * restrict values) + const struct GEN75_MI_LOAD_REGISTER_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5935,13 +6072,14 @@ GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_LOAD_REGISTER_MEM_length 0x00000003 #define GEN75_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 #define GEN75_MI_LOAD_REGISTER_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 41, \ .DwordLength = 1 +#define GEN75_MI_LOAD_REGISTER_MEM_length 0x00000003 + struct GEN75_MI_LOAD_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5954,7 +6092,7 @@ struct GEN75_MI_LOAD_REGISTER_MEM { static inline void GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_REGISTER_MEM * restrict values) + const struct GEN75_MI_LOAD_REGISTER_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5978,13 +6116,14 @@ GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_LOAD_REGISTER_REG_length 0x00000003 #define GEN75_MI_LOAD_REGISTER_REG_length_bias 0x00000002 #define GEN75_MI_LOAD_REGISTER_REG_header \ .CommandType = 0, \ .MICommandOpcode = 42, \ .DwordLength = 1 +#define GEN75_MI_LOAD_REGISTER_REG_length 0x00000003 + struct GEN75_MI_LOAD_REGISTER_REG { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5995,7 +6134,7 @@ struct GEN75_MI_LOAD_REGISTER_REG { static inline void GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_REGISTER_REG * restrict values) + const struct GEN75_MI_LOAD_REGISTER_REG * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6015,13 +6154,14 @@ GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 #define GEN75_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 #define GEN75_MI_LOAD_SCAN_LINES_EXCL_header \ .CommandType = 0, \ .MICommandOpcode = 19, \ .DwordLength = 0 +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + struct GEN75_MI_LOAD_SCAN_LINES_EXCL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6036,7 +6176,7 @@ struct GEN75_MI_LOAD_SCAN_LINES_EXCL { static inline void GEN75_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_SCAN_LINES_EXCL * restrict values) + const struct GEN75_MI_LOAD_SCAN_LINES_EXCL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6054,13 +6194,14 @@ GEN75_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 #define GEN75_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 #define GEN75_MI_LOAD_SCAN_LINES_INCL_header \ .CommandType = 0, \ .MICommandOpcode = 18, \ .DwordLength = 0 +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + struct GEN75_MI_LOAD_SCAN_LINES_INCL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6075,7 +6216,7 @@ struct GEN75_MI_LOAD_SCAN_LINES_INCL { static inline void GEN75_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_SCAN_LINES_INCL * restrict values) + const struct GEN75_MI_LOAD_SCAN_LINES_INCL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6093,13 +6234,14 @@ GEN75_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_LOAD_URB_MEM_length 0x00000003 #define GEN75_MI_LOAD_URB_MEM_length_bias 0x00000002 #define GEN75_MI_LOAD_URB_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 44, \ .DwordLength = 1 +#define GEN75_MI_LOAD_URB_MEM_length 0x00000003 + struct GEN75_MI_LOAD_URB_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6110,7 +6252,7 @@ struct GEN75_MI_LOAD_URB_MEM { static inline void GEN75_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_URB_MEM * restrict values) + const struct GEN75_MI_LOAD_URB_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6137,6 +6279,8 @@ GEN75_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 26 +#define GEN75_MI_MATH_length 0x00000000 + struct GEN75_MI_MATH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6148,7 +6292,7 @@ struct GEN75_MI_MATH { static inline void GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_MATH * restrict values) + const struct GEN75_MI_MATH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6169,12 +6313,13 @@ GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_MI_NOOP_length 0x00000001 #define GEN75_MI_NOOP_length_bias 0x00000001 #define GEN75_MI_NOOP_header \ .CommandType = 0, \ .MICommandOpcode = 0 +#define GEN75_MI_NOOP_length 0x00000001 + struct GEN75_MI_NOOP { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6184,7 +6329,7 @@ struct GEN75_MI_NOOP { static inline void GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_NOOP * restrict values) + const struct GEN75_MI_NOOP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6197,12 +6342,13 @@ GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_PREDICATE_length 0x00000001 #define GEN75_MI_PREDICATE_length_bias 0x00000001 #define GEN75_MI_PREDICATE_header \ .CommandType = 0, \ .MICommandOpcode = 12 +#define GEN75_MI_PREDICATE_length 0x00000001 + struct GEN75_MI_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6222,7 +6368,7 @@ struct GEN75_MI_PREDICATE { static inline void GEN75_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_PREDICATE * restrict values) + const struct GEN75_MI_PREDICATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6236,12 +6382,13 @@ GEN75_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_REPORT_HEAD_length 0x00000001 #define GEN75_MI_REPORT_HEAD_length_bias 0x00000001 #define GEN75_MI_REPORT_HEAD_header \ .CommandType = 0, \ .MICommandOpcode = 7 +#define GEN75_MI_REPORT_HEAD_length 0x00000001 + struct GEN75_MI_REPORT_HEAD { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6249,7 +6396,7 @@ struct GEN75_MI_REPORT_HEAD { static inline void GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_REPORT_HEAD * restrict values) + const struct GEN75_MI_REPORT_HEAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6260,12 +6407,13 @@ GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_RS_CONTEXT_length 0x00000001 #define GEN75_MI_RS_CONTEXT_length_bias 0x00000001 #define GEN75_MI_RS_CONTEXT_header \ .CommandType = 0, \ .MICommandOpcode = 15 +#define GEN75_MI_RS_CONTEXT_length 0x00000001 + struct GEN75_MI_RS_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6276,7 +6424,7 @@ struct GEN75_MI_RS_CONTEXT { static inline void GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_RS_CONTEXT * restrict values) + const struct GEN75_MI_RS_CONTEXT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6288,12 +6436,13 @@ GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_RS_CONTROL_length 0x00000001 #define GEN75_MI_RS_CONTROL_length_bias 0x00000001 #define GEN75_MI_RS_CONTROL_header \ .CommandType = 0, \ .MICommandOpcode = 6 +#define GEN75_MI_RS_CONTROL_length 0x00000001 + struct GEN75_MI_RS_CONTROL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6304,7 +6453,7 @@ struct GEN75_MI_RS_CONTROL { static inline void GEN75_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_RS_CONTROL * restrict values) + const struct GEN75_MI_RS_CONTROL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6316,13 +6465,14 @@ GEN75_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_RS_STORE_DATA_IMM_length 0x00000004 #define GEN75_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 #define GEN75_MI_RS_STORE_DATA_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 43, \ .DwordLength = 2 +#define GEN75_MI_RS_STORE_DATA_IMM_length 0x00000004 + struct GEN75_MI_RS_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6334,7 +6484,7 @@ struct GEN75_MI_RS_STORE_DATA_IMM { static inline void GEN75_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_RS_STORE_DATA_IMM * restrict values) + const struct GEN75_MI_RS_STORE_DATA_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6360,13 +6510,14 @@ GEN75_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_SEMAPHORE_MBOX_length 0x00000003 #define GEN75_MI_SEMAPHORE_MBOX_length_bias 0x00000002 #define GEN75_MI_SEMAPHORE_MBOX_header \ .CommandType = 0, \ .MICommandOpcode = 22, \ .DwordLength = 1 +#define GEN75_MI_SEMAPHORE_MBOX_length 0x00000003 + struct GEN75_MI_SEMAPHORE_MBOX { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6382,7 +6533,7 @@ struct GEN75_MI_SEMAPHORE_MBOX { static inline void GEN75_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SEMAPHORE_MBOX * restrict values) + const struct GEN75_MI_SEMAPHORE_MBOX * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6403,13 +6554,14 @@ GEN75_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_SET_CONTEXT_length 0x00000002 #define GEN75_MI_SET_CONTEXT_length_bias 0x00000002 #define GEN75_MI_SET_CONTEXT_header \ .CommandType = 0, \ .MICommandOpcode = 24, \ .DwordLength = 0 +#define GEN75_MI_SET_CONTEXT_length 0x00000002 + struct GEN75_MI_SET_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6425,7 +6577,7 @@ struct GEN75_MI_SET_CONTEXT { static inline void GEN75_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SET_CONTEXT * restrict values) + const struct GEN75_MI_SET_CONTEXT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6449,13 +6601,14 @@ GEN75_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_SET_PREDICATE_length 0x00000001 #define GEN75_MI_SET_PREDICATE_length_bias 0x00000001 #define GEN75_MI_SET_PREDICATE_header \ .CommandType = 0, \ .MICommandOpcode = 1, \ .PREDICATEENABLE = 6 +#define GEN75_MI_SET_PREDICATE_length 0x00000001 + struct GEN75_MI_SET_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6468,7 +6621,7 @@ struct GEN75_MI_SET_PREDICATE { static inline void GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SET_PREDICATE * restrict values) + const struct GEN75_MI_SET_PREDICATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6480,13 +6633,14 @@ GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_STORE_DATA_IMM_length 0x00000004 #define GEN75_MI_STORE_DATA_IMM_length_bias 0x00000002 #define GEN75_MI_STORE_DATA_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 32, \ .DwordLength = 2 +#define GEN75_MI_STORE_DATA_IMM_length 0x00000004 + struct GEN75_MI_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6500,7 +6654,7 @@ struct GEN75_MI_STORE_DATA_IMM { static inline void GEN75_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_DATA_IMM * restrict values) + const struct GEN75_MI_STORE_DATA_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6529,13 +6683,14 @@ GEN75_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_STORE_DATA_INDEX_length 0x00000003 #define GEN75_MI_STORE_DATA_INDEX_length_bias 0x00000002 #define GEN75_MI_STORE_DATA_INDEX_header \ .CommandType = 0, \ .MICommandOpcode = 33, \ .DwordLength = 1 +#define GEN75_MI_STORE_DATA_INDEX_length 0x00000003 + struct GEN75_MI_STORE_DATA_INDEX { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6547,7 +6702,7 @@ struct GEN75_MI_STORE_DATA_INDEX { static inline void GEN75_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_DATA_INDEX * restrict values) + const struct GEN75_MI_STORE_DATA_INDEX * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6571,13 +6726,14 @@ GEN75_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_STORE_URB_MEM_length 0x00000003 #define GEN75_MI_STORE_URB_MEM_length_bias 0x00000002 #define GEN75_MI_STORE_URB_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 45, \ .DwordLength = 1 +#define GEN75_MI_STORE_URB_MEM_length 0x00000003 + struct GEN75_MI_STORE_URB_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6588,7 +6744,7 @@ struct GEN75_MI_STORE_URB_MEM { static inline void GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_URB_MEM * restrict values) + const struct GEN75_MI_STORE_URB_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6610,12 +6766,13 @@ GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_SUSPEND_FLUSH_length 0x00000001 #define GEN75_MI_SUSPEND_FLUSH_length_bias 0x00000001 #define GEN75_MI_SUSPEND_FLUSH_header \ .CommandType = 0, \ .MICommandOpcode = 11 +#define GEN75_MI_SUSPEND_FLUSH_length 0x00000001 + struct GEN75_MI_SUSPEND_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6624,7 +6781,7 @@ struct GEN75_MI_SUSPEND_FLUSH { static inline void GEN75_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SUSPEND_FLUSH * restrict values) + const struct GEN75_MI_SUSPEND_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6636,12 +6793,13 @@ GEN75_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_TOPOLOGY_FILTER_length 0x00000001 #define GEN75_MI_TOPOLOGY_FILTER_length_bias 0x00000001 #define GEN75_MI_TOPOLOGY_FILTER_header \ .CommandType = 0, \ .MICommandOpcode = 13 +#define GEN75_MI_TOPOLOGY_FILTER_length 0x00000001 + struct GEN75_MI_TOPOLOGY_FILTER { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6650,7 +6808,7 @@ struct GEN75_MI_TOPOLOGY_FILTER { static inline void GEN75_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_TOPOLOGY_FILTER * restrict values) + const struct GEN75_MI_TOPOLOGY_FILTER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6667,6 +6825,8 @@ GEN75_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 35 +#define GEN75_MI_UPDATE_GTT_length 0x00000000 + struct GEN75_MI_UPDATE_GTT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6680,7 +6840,7 @@ struct GEN75_MI_UPDATE_GTT { static inline void GEN75_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_UPDATE_GTT * restrict values) + const struct GEN75_MI_UPDATE_GTT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6700,12 +6860,13 @@ GEN75_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN75_MI_URB_ATOMIC_ALLOC_length 0x00000001 #define GEN75_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 #define GEN75_MI_URB_ATOMIC_ALLOC_header \ .CommandType = 0, \ .MICommandOpcode = 9 +#define GEN75_MI_URB_ATOMIC_ALLOC_length 0x00000001 + struct GEN75_MI_URB_ATOMIC_ALLOC { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6715,7 +6876,7 @@ struct GEN75_MI_URB_ATOMIC_ALLOC { static inline void GEN75_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_URB_ATOMIC_ALLOC * restrict values) + const struct GEN75_MI_URB_ATOMIC_ALLOC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6728,13 +6889,14 @@ GEN75_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_URB_CLEAR_length 0x00000002 #define GEN75_MI_URB_CLEAR_length_bias 0x00000002 #define GEN75_MI_URB_CLEAR_header \ .CommandType = 0, \ .MICommandOpcode = 25, \ .DwordLength = 0 +#define GEN75_MI_URB_CLEAR_length 0x00000002 + struct GEN75_MI_URB_CLEAR { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6745,7 +6907,7 @@ struct GEN75_MI_URB_CLEAR { static inline void GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_URB_CLEAR * restrict values) + const struct GEN75_MI_URB_CLEAR * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6762,12 +6924,13 @@ GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_USER_INTERRUPT_length 0x00000001 #define GEN75_MI_USER_INTERRUPT_length_bias 0x00000001 #define GEN75_MI_USER_INTERRUPT_header \ .CommandType = 0, \ .MICommandOpcode = 2 +#define GEN75_MI_USER_INTERRUPT_length 0x00000001 + struct GEN75_MI_USER_INTERRUPT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6775,7 +6938,7 @@ struct GEN75_MI_USER_INTERRUPT { static inline void GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_USER_INTERRUPT * restrict values) + const struct GEN75_MI_USER_INTERRUPT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6786,12 +6949,13 @@ GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MI_WAIT_FOR_EVENT_length 0x00000001 #define GEN75_MI_WAIT_FOR_EVENT_length_bias 0x00000001 #define GEN75_MI_WAIT_FOR_EVENT_header \ .CommandType = 0, \ .MICommandOpcode = 3 +#define GEN75_MI_WAIT_FOR_EVENT_length 0x00000001 + struct GEN75_MI_WAIT_FOR_EVENT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6816,7 +6980,7 @@ struct GEN75_MI_WAIT_FOR_EVENT { static inline void GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_WAIT_FOR_EVENT * restrict values) + const struct GEN75_MI_WAIT_FOR_EVENT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6843,7 +7007,6 @@ GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_PIPE_CONTROL_length 0x00000005 #define GEN75_PIPE_CONTROL_length_bias 0x00000002 #define GEN75_PIPE_CONTROL_header \ .CommandType = 3, \ @@ -6852,6 +7015,8 @@ GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 3 +#define GEN75_PIPE_CONTROL_length 0x00000005 + struct GEN75_PIPE_CONTROL { uint32_t CommandType; uint32_t CommandSubType; @@ -6900,7 +7065,7 @@ struct GEN75_PIPE_CONTROL { static inline void GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PIPE_CONTROL * restrict values) + const struct GEN75_PIPE_CONTROL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6952,20 +7117,6 @@ GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_3DSTATE_CONSTANT_BODY_length 0x00000006 - -#define GEN75_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 - -#define GEN75_GATHER_CONSTANT_ENTRY_length 0x00000001 - -#define GEN75_VERTEX_BUFFER_STATE_length 0x00000004 - -#define GEN75_VERTEX_ELEMENT_STATE_length 0x00000002 - -#define GEN75_SO_DECL_ENTRY_length 0x00000002 - -#define GEN75_SO_DECL_length 0x00000001 - #define GEN75_SCISSOR_RECT_length 0x00000002 struct GEN75_SCISSOR_RECT { @@ -6977,7 +7128,7 @@ struct GEN75_SCISSOR_RECT { static inline void GEN75_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SCISSOR_RECT * restrict values) + const struct GEN75_SCISSOR_RECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7010,7 +7161,7 @@ struct GEN75_SF_CLIP_VIEWPORT { static inline void GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SF_CLIP_VIEWPORT * restrict values) + const struct GEN75_SF_CLIP_VIEWPORT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7155,7 +7306,7 @@ struct GEN75_BLEND_STATE { static inline void GEN75_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BLEND_STATE * restrict values) + const struct GEN75_BLEND_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7201,7 +7352,7 @@ struct GEN75_CC_VIEWPORT { static inline void GEN75_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_CC_VIEWPORT * restrict values) + const struct GEN75_CC_VIEWPORT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7236,7 +7387,7 @@ struct GEN75_COLOR_CALC_STATE { static inline void GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_COLOR_CALC_STATE * restrict values) + const struct GEN75_COLOR_CALC_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7335,7 +7486,7 @@ struct GEN75_DEPTH_STENCIL_STATE { static inline void GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_DEPTH_STENCIL_STATE * restrict values) + const struct GEN75_DEPTH_STENCIL_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7368,8 +7519,6 @@ GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 - #define GEN75_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 struct GEN75_INTERFACE_DESCRIPTOR_DATA { @@ -7409,7 +7558,7 @@ struct GEN75_INTERFACE_DESCRIPTOR_DATA { static inline void GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_INTERFACE_DESCRIPTOR_DATA * restrict values) + const struct GEN75_INTERFACE_DESCRIPTOR_DATA * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7456,8 +7605,6 @@ GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } -#define GEN75_PALETTE_ENTRY_length 0x00000001 - #define GEN75_BINDING_TABLE_STATE_length 0x00000001 struct GEN75_BINDING_TABLE_STATE { @@ -7466,7 +7613,7 @@ struct GEN75_BINDING_TABLE_STATE { static inline void GEN75_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BINDING_TABLE_STATE * restrict values) + const struct GEN75_BINDING_TABLE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7556,7 +7703,7 @@ struct GEN75_RENDER_SURFACE_STATE { static inline void GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_RENDER_SURFACE_STATE * restrict values) + const struct GEN75_RENDER_SURFACE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7636,28 +7783,155 @@ GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN75_SAMPLER_BORDER_COLOR_STATE_length 0x00000014 +#define GEN75_BORDER_COLOR_UINT32_SINT32_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT32_SINT32 { + uint32_t BorderColorRedui32integerunclamp; + uint32_t BorderColorRedsi32integerunclamp; + uint32_t BorderColorGreenui32integerunclamp; + uint32_t BorderColorGreensi32integerunclamp; + uint32_t BorderColorBlueui32integerunclamp; + uint32_t BorderColorBluesi32integerunclamp; + uint32_t BorderColorGreenui32integerunclamp0; + uint32_t BorderColorGreensi32integerunclamp0; + uint32_t BorderColorAlphaui32integerunclamp; + uint32_t BorderColorAlphasi32integerunclamp; +}; + +static inline void +GEN75_BORDER_COLOR_UINT32_SINT32_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT32_SINT32 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorRedui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorRedsi32integerunclamp, 0, 31) | + 0; + + dw[1] = + __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | + 0; + + dw[2] = + __gen_field(values->BorderColorBlueui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorBluesi32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | + 0; + + dw[3] = + __gen_field(values->BorderColorAlphaui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorAlphasi32integerunclamp, 0, 31) | + 0; + +} + +#define GEN75_BORDER_COLOR_UINT16_SINT16_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT16_SINT16 { + uint32_t BorderColorGreenclamptouint16; + uint32_t BorderColorGreenclamptosint16; + uint32_t BorderColorRedclamptouint16; + uint32_t BorderColorRedclamptosint16; + uint32_t BorderColorAlphaclamptouint16; + uint32_t BorderColorAlphaclamptosint16; + uint32_t BorderColorBlueclamptouint16; + uint32_t BorderColorBlueclamptosint16; +}; + +static inline void +GEN75_BORDER_COLOR_UINT16_SINT16_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT16_SINT16 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorGreenclamptouint16, 16, 31) | + __gen_field(values->BorderColorGreenclamptosint16, 16, 31) | + __gen_field(values->BorderColorRedclamptouint16, 0, 15) | + __gen_field(values->BorderColorRedclamptosint16, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->BorderColorAlphaclamptouint16, 16, 31) | + __gen_field(values->BorderColorAlphaclamptosint16, 16, 31) | + __gen_field(values->BorderColorBlueclamptouint16, 0, 15) | + __gen_field(values->BorderColorBlueclamptosint16, 0, 15) | + 0; + + dw[3] = + 0; + +} + +#define GEN75_BORDER_COLOR_UINT8_SINT8_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT8_SINT8 { + uint32_t BorderColorAlphaclamptouint8; + uint32_t BorderColorAlphaclamptosint8; + uint32_t BorderColorBlueclamptouint8; + uint32_t BorderColorBlueclamptosint8; + uint32_t BorderColorGreenclamptouint8; + uint32_t BorderColorGreenclamptosint8; + uint32_t BorderRedAlphaclamptouint8; + uint32_t BorderRedAlphaclamptosint8; +}; + +static inline void +GEN75_BORDER_COLOR_UINT8_SINT8_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT8_SINT8 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorAlphaclamptouint8, 24, 31) | + __gen_field(values->BorderColorAlphaclamptosint8, 24, 31) | + __gen_field(values->BorderColorBlueclamptouint8, 16, 23) | + __gen_field(values->BorderColorBlueclamptosint8, 16, 23) | + __gen_field(values->BorderColorGreenclamptouint8, 8, 15) | + __gen_field(values->BorderColorGreenclamptosint8, 8, 15) | + __gen_field(values->BorderRedAlphaclamptouint8, 0, 7) | + __gen_field(values->BorderRedAlphaclamptosint8, 0, 7) | + 0; + + dw[1] = + 0; + + dw[2] = + 0; + + dw[3] = + 0; + +} + struct GEN75_SAMPLER_BORDER_COLOR_STATE { - uint32_t BorderColorRedDX100GL; + float BorderColorRedDX100GL; uint32_t BorderColorAlpha; uint32_t BorderColorBlue; uint32_t BorderColorGreen; uint32_t BorderColorRedDX9; - uint32_t BorderColorGreen0; - uint32_t BorderColorBlue0; - uint32_t BorderColorAlpha0; - uint64_t BorderColor; - uint64_t BorderColor0; - uint64_t BorderColor1; + float BorderColorGreen0; + float BorderColorBlue0; + float BorderColorAlpha0; + struct GEN75_BORDER_COLOR_UINT32_SINT32 BorderColor; + struct GEN75_BORDER_COLOR_UINT16_SINT16 BorderColor0; + struct GEN75_BORDER_COLOR_UINT8_SINT8 BorderColor1; }; static inline void GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SAMPLER_BORDER_COLOR_STATE * restrict values) + const struct GEN75_SAMPLER_BORDER_COLOR_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; dw[0] = - __gen_field(values->BorderColorRedDX100GL, 0, 31) | + __gen_float(values->BorderColorRedDX100GL) | __gen_field(values->BorderColorAlpha, 24, 31) | __gen_field(values->BorderColorBlue, 16, 23) | __gen_field(values->BorderColorGreen, 8, 15) | @@ -7665,15 +7939,15 @@ GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst 0; dw[1] = - __gen_field(values->BorderColorGreen, 0, 31) | + __gen_float(values->BorderColorGreen) | 0; dw[2] = - __gen_field(values->BorderColorBlue, 0, 31) | + __gen_float(values->BorderColorBlue) | 0; dw[3] = - __gen_field(values->BorderColorAlpha, 0, 31) | + __gen_float(values->BorderColorAlpha) | 0; for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { @@ -7681,12 +7955,7 @@ GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst 0; } - dw[16] = - __gen_field(values->BorderColor, 0, 127) | - __gen_field(values->BorderColor, 0, 127) | - __gen_field(values->BorderColor, 0, 127) | - 0; - + GEN75_BORDER_COLOR_UINT32_SINT32_pack(data, &dw[16], &values->BorderColor); } #define GEN75_SAMPLER_STATE_length 0x00000004 @@ -7765,7 +8034,7 @@ struct GEN75_SAMPLER_STATE { static inline void GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SAMPLER_STATE * restrict values) + const struct GEN75_SAMPLER_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 886a26c00a2..2204263e1dd 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -92,7 +92,6 @@ __gen_float(float v) #endif -#define GEN7_3DSTATE_URB_VS_length 0x00000002 #define GEN7_3DSTATE_URB_VS_length_bias 0x00000002 #define GEN7_3DSTATE_URB_VS_header \ .CommandType = 3, \ @@ -101,6 +100,8 @@ __gen_float(float v) ._3DCommandSubOpcode = 48, \ .DwordLength = 0 +#define GEN7_3DSTATE_URB_VS_length 0x00000002 + struct GEN7_3DSTATE_URB_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -114,7 +115,7 @@ struct GEN7_3DSTATE_URB_VS { static inline void GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_VS * restrict values) + const struct GEN7_3DSTATE_URB_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -134,13 +135,14 @@ GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_STORE_REGISTER_MEM_length 0x00000003 #define GEN7_MI_STORE_REGISTER_MEM_length_bias 0x00000002 #define GEN7_MI_STORE_REGISTER_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 36, \ .DwordLength = 1 +#define GEN7_MI_STORE_REGISTER_MEM_length 0x00000003 + struct GEN7_MI_STORE_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -152,7 +154,7 @@ struct GEN7_MI_STORE_REGISTER_MEM { static inline void GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_STORE_REGISTER_MEM * restrict values) + const struct GEN7_MI_STORE_REGISTER_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -175,7 +177,6 @@ GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_PIPELINE_SELECT_length 0x00000001 #define GEN7_PIPELINE_SELECT_length_bias 0x00000001 #define GEN7_PIPELINE_SELECT_header \ .CommandType = 3, \ @@ -183,6 +184,8 @@ GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 4 +#define GEN7_PIPELINE_SELECT_length 0x00000001 + struct GEN7_PIPELINE_SELECT { uint32_t CommandType; uint32_t CommandSubType; @@ -196,7 +199,7 @@ struct GEN7_PIPELINE_SELECT { static inline void GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PIPELINE_SELECT * restrict values) + const struct GEN7_PIPELINE_SELECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -210,7 +213,6 @@ GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_STATE_BASE_ADDRESS_length 0x0000000a #define GEN7_STATE_BASE_ADDRESS_length_bias 0x00000002 #define GEN7_STATE_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -219,6 +221,10 @@ GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 1, \ .DwordLength = 8 +#define GEN7_STATE_BASE_ADDRESS_length 0x0000000a + +#define GEN7_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + struct GEN7_MEMORY_OBJECT_CONTROL_STATE { uint32_t GraphicsDataTypeGFDT; uint32_t LLCCacheabilityControlLLCCC; @@ -227,7 +233,7 @@ struct GEN7_MEMORY_OBJECT_CONTROL_STATE { static inline void GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) + const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -274,7 +280,7 @@ struct GEN7_STATE_BASE_ADDRESS { static inline void GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_STATE_BASE_ADDRESS * restrict values) + const struct GEN7_STATE_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -370,7 +376,6 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_STATE_PREFETCH_length 0x00000002 #define GEN7_STATE_PREFETCH_length_bias 0x00000002 #define GEN7_STATE_PREFETCH_header \ .CommandType = 3, \ @@ -379,6 +384,8 @@ GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 3, \ .DwordLength = 0 +#define GEN7_STATE_PREFETCH_length 0x00000002 + struct GEN7_STATE_PREFETCH { uint32_t CommandType; uint32_t CommandSubType; @@ -391,7 +398,7 @@ struct GEN7_STATE_PREFETCH { static inline void GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_STATE_PREFETCH * restrict values) + const struct GEN7_STATE_PREFETCH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -412,7 +419,6 @@ GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_STATE_SIP_length 0x00000002 #define GEN7_STATE_SIP_length_bias 0x00000002 #define GEN7_STATE_SIP_header \ .CommandType = 3, \ @@ -421,6 +427,8 @@ GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 2, \ .DwordLength = 0 +#define GEN7_STATE_SIP_length 0x00000002 + struct GEN7_STATE_SIP { uint32_t CommandType; uint32_t CommandSubType; @@ -432,7 +440,7 @@ struct GEN7_STATE_SIP { static inline void GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_STATE_SIP * restrict values) + const struct GEN7_STATE_SIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -450,7 +458,6 @@ GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_SWTESS_BASE_ADDRESS_length 0x00000002 #define GEN7_SWTESS_BASE_ADDRESS_length_bias 0x00000002 #define GEN7_SWTESS_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -459,6 +466,8 @@ GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 3, \ .DwordLength = 0 +#define GEN7_SWTESS_BASE_ADDRESS_length 0x00000002 + struct GEN7_SWTESS_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -471,7 +480,7 @@ struct GEN7_SWTESS_BASE_ADDRESS { static inline void GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SWTESS_BASE_ADDRESS * restrict values) + const struct GEN7_SWTESS_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -494,7 +503,6 @@ GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DPRIMITIVE_length 0x00000007 #define GEN7_3DPRIMITIVE_length_bias 0x00000002 #define GEN7_3DPRIMITIVE_header \ .CommandType = 3, \ @@ -503,6 +511,8 @@ GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 5 +#define GEN7_3DPRIMITIVE_length 0x00000007 + struct GEN7_3DPRIMITIVE { uint32_t CommandType; uint32_t CommandSubType; @@ -525,7 +535,7 @@ struct GEN7_3DPRIMITIVE { static inline void GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DPRIMITIVE * restrict values) + const struct GEN7_3DPRIMITIVE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -567,7 +577,6 @@ GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 #define GEN7_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 #define GEN7_3DSTATE_AA_LINE_PARAMETERS_header \ .CommandType = 3, \ @@ -576,6 +585,8 @@ GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 10, \ .DwordLength = 1 +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + struct GEN7_3DSTATE_AA_LINE_PARAMETERS { uint32_t CommandType; uint32_t CommandSubType; @@ -590,7 +601,7 @@ struct GEN7_3DSTATE_AA_LINE_PARAMETERS { static inline void GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_AA_LINE_PARAMETERS * restrict values) + const struct GEN7_3DSTATE_AA_LINE_PARAMETERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -614,7 +625,6 @@ GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ .CommandType = 3, \ @@ -623,6 +633,8 @@ GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 40, \ .DwordLength = 0 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -634,7 +646,7 @@ struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS { static inline void GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -652,7 +664,6 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ .CommandType = 3, \ @@ -661,6 +672,8 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 41, \ .DwordLength = 0 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -672,7 +685,7 @@ struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS { static inline void GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -690,7 +703,6 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ .CommandType = 3, \ @@ -699,6 +711,8 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 39, \ .DwordLength = 0 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -710,7 +724,7 @@ struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS { static inline void GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -728,7 +742,6 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ .CommandType = 3, \ @@ -737,6 +750,8 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 42, \ .DwordLength = 0 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -748,7 +763,7 @@ struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS { static inline void GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -766,7 +781,6 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 #define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ .CommandType = 3, \ @@ -775,6 +789,8 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 38, \ .DwordLength = 0 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -786,7 +802,7 @@ struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS { static inline void GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -804,7 +820,6 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 #define GEN7_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 #define GEN7_3DSTATE_BLEND_STATE_POINTERS_header\ .CommandType = 3, \ @@ -813,6 +828,8 @@ GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 36, \ .DwordLength = 0 +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + struct GEN7_3DSTATE_BLEND_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -824,7 +841,7 @@ struct GEN7_3DSTATE_BLEND_STATE_POINTERS { static inline void GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BLEND_STATE_POINTERS * restrict values) + const struct GEN7_3DSTATE_BLEND_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -843,7 +860,6 @@ GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ds } -#define GEN7_3DSTATE_CC_STATE_POINTERS_length 0x00000002 #define GEN7_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 #define GEN7_3DSTATE_CC_STATE_POINTERS_header \ .CommandType = 3, \ @@ -852,6 +868,8 @@ GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ds ._3DCommandSubOpcode = 14, \ .DwordLength = 0 +#define GEN7_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + struct GEN7_3DSTATE_CC_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -863,7 +881,7 @@ struct GEN7_3DSTATE_CC_STATE_POINTERS { static inline void GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CC_STATE_POINTERS * restrict values) + const struct GEN7_3DSTATE_CC_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -882,7 +900,6 @@ GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_CHROMA_KEY_length 0x00000004 #define GEN7_3DSTATE_CHROMA_KEY_length_bias 0x00000002 #define GEN7_3DSTATE_CHROMA_KEY_header \ .CommandType = 3, \ @@ -891,6 +908,8 @@ GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 2 +#define GEN7_3DSTATE_CHROMA_KEY_length 0x00000004 + struct GEN7_3DSTATE_CHROMA_KEY { uint32_t CommandType; uint32_t CommandSubType; @@ -904,7 +923,7 @@ struct GEN7_3DSTATE_CHROMA_KEY { static inline void GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CHROMA_KEY * restrict values) + const struct GEN7_3DSTATE_CHROMA_KEY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -930,7 +949,6 @@ GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_CLEAR_PARAMS_length 0x00000003 #define GEN7_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 #define GEN7_3DSTATE_CLEAR_PARAMS_header \ .CommandType = 3, \ @@ -939,6 +957,8 @@ GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 1 +#define GEN7_3DSTATE_CLEAR_PARAMS_length 0x00000003 + struct GEN7_3DSTATE_CLEAR_PARAMS { uint32_t CommandType; uint32_t CommandSubType; @@ -951,7 +971,7 @@ struct GEN7_3DSTATE_CLEAR_PARAMS { static inline void GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CLEAR_PARAMS * restrict values) + const struct GEN7_3DSTATE_CLEAR_PARAMS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -973,7 +993,6 @@ GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_CLIP_length 0x00000004 #define GEN7_3DSTATE_CLIP_length_bias 0x00000002 #define GEN7_3DSTATE_CLIP_header \ .CommandType = 3, \ @@ -982,6 +1001,8 @@ GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 18, \ .DwordLength = 2 +#define GEN7_3DSTATE_CLIP_length 0x00000004 + struct GEN7_3DSTATE_CLIP { uint32_t CommandType; uint32_t CommandSubType; @@ -1030,7 +1051,7 @@ struct GEN7_3DSTATE_CLIP { static inline void GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CLIP * restrict values) + const struct GEN7_3DSTATE_CLIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1075,7 +1096,6 @@ GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_CONSTANT_DS_length 0x00000007 #define GEN7_3DSTATE_CONSTANT_DS_length_bias 0x00000002 #define GEN7_3DSTATE_CONSTANT_DS_header \ .CommandType = 3, \ @@ -1084,6 +1104,10 @@ GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 5 +#define GEN7_3DSTATE_CONSTANT_DS_length 0x00000007 + +#define GEN7_3DSTATE_CONSTANT_BODY_length 0x00000006 + struct GEN7_3DSTATE_CONSTANT_BODY { uint32_t ConstantBuffer1ReadLength; uint32_t ConstantBuffer0ReadLength; @@ -1098,7 +1122,7 @@ struct GEN7_3DSTATE_CONSTANT_BODY { static inline void GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) + const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1152,7 +1176,7 @@ struct GEN7_3DSTATE_CONSTANT_DS { static inline void GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_DS * restrict values) + const struct GEN7_3DSTATE_CONSTANT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1167,7 +1191,6 @@ GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 #define GEN7_3DSTATE_CONSTANT_GS_length_bias 0x00000002 #define GEN7_3DSTATE_CONSTANT_GS_header \ .CommandType = 3, \ @@ -1176,6 +1199,8 @@ GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 22, \ .DwordLength = 5 +#define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 + struct GEN7_3DSTATE_CONSTANT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -1187,7 +1212,7 @@ struct GEN7_3DSTATE_CONSTANT_GS { static inline void GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_GS * restrict values) + const struct GEN7_3DSTATE_CONSTANT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1202,7 +1227,6 @@ GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 #define GEN7_3DSTATE_CONSTANT_HS_length_bias 0x00000002 #define GEN7_3DSTATE_CONSTANT_HS_header \ .CommandType = 3, \ @@ -1211,6 +1235,8 @@ GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 25, \ .DwordLength = 5 +#define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 + struct GEN7_3DSTATE_CONSTANT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -1222,7 +1248,7 @@ struct GEN7_3DSTATE_CONSTANT_HS { static inline void GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_HS * restrict values) + const struct GEN7_3DSTATE_CONSTANT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1237,7 +1263,6 @@ GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 #define GEN7_3DSTATE_CONSTANT_PS_length_bias 0x00000002 #define GEN7_3DSTATE_CONSTANT_PS_header \ .CommandType = 3, \ @@ -1246,6 +1271,8 @@ GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 23, \ .DwordLength = 5 +#define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 + struct GEN7_3DSTATE_CONSTANT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -1257,7 +1284,7 @@ struct GEN7_3DSTATE_CONSTANT_PS { static inline void GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_PS * restrict values) + const struct GEN7_3DSTATE_CONSTANT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1272,7 +1299,6 @@ GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 #define GEN7_3DSTATE_CONSTANT_VS_length_bias 0x00000002 #define GEN7_3DSTATE_CONSTANT_VS_header \ .CommandType = 3, \ @@ -1281,6 +1307,8 @@ GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 21, \ .DwordLength = 5 +#define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 + struct GEN7_3DSTATE_CONSTANT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -1292,7 +1320,7 @@ struct GEN7_3DSTATE_CONSTANT_VS { static inline void GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_VS * restrict values) + const struct GEN7_3DSTATE_CONSTANT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1307,7 +1335,6 @@ GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 #define GEN7_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 #define GEN7_3DSTATE_DEPTH_BUFFER_header \ .CommandType = 3, \ @@ -1316,6 +1343,8 @@ GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 5, \ .DwordLength = 5 +#define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 + struct GEN7_3DSTATE_DEPTH_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -1351,7 +1380,7 @@ struct GEN7_3DSTATE_DEPTH_BUFFER { static inline void GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DEPTH_BUFFER * restrict values) + const struct GEN7_3DSTATE_DEPTH_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1403,7 +1432,6 @@ GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 #define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 #define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ .CommandType = 3, \ @@ -1412,6 +1440,8 @@ GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 37, \ .DwordLength = 0 +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 + struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1423,7 +1453,7 @@ struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { static inline void GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) + const struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1442,7 +1472,6 @@ GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * res } -#define GEN7_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 #define GEN7_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 #define GEN7_3DSTATE_DRAWING_RECTANGLE_header \ .CommandType = 3, \ @@ -1451,6 +1480,8 @@ GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * res ._3DCommandSubOpcode = 0, \ .DwordLength = 2 +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + struct GEN7_3DSTATE_DRAWING_RECTANGLE { uint32_t CommandType; uint32_t CommandSubType; @@ -1467,7 +1498,7 @@ struct GEN7_3DSTATE_DRAWING_RECTANGLE { static inline void GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DRAWING_RECTANGLE * restrict values) + const struct GEN7_3DSTATE_DRAWING_RECTANGLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1496,7 +1527,6 @@ GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_DS_length 0x00000006 #define GEN7_3DSTATE_DS_length_bias 0x00000002 #define GEN7_3DSTATE_DS_header \ .CommandType = 3, \ @@ -1505,6 +1535,8 @@ GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 29, \ .DwordLength = 4 +#define GEN7_3DSTATE_DS_length 0x00000006 + struct GEN7_3DSTATE_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -1544,7 +1576,7 @@ struct GEN7_3DSTATE_DS { static inline void GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DS * restrict values) + const struct GEN7_3DSTATE_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1591,7 +1623,6 @@ GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_GS_length 0x00000007 #define GEN7_3DSTATE_GS_length_bias 0x00000002 #define GEN7_3DSTATE_GS_header \ .CommandType = 3, \ @@ -1600,6 +1631,8 @@ GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 17, \ .DwordLength = 5 +#define GEN7_3DSTATE_GS_length 0x00000007 + struct GEN7_3DSTATE_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -1658,7 +1691,7 @@ struct GEN7_3DSTATE_GS { static inline void GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_GS * restrict values) + const struct GEN7_3DSTATE_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1722,7 +1755,6 @@ GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 #define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 #define GEN7_3DSTATE_HIER_DEPTH_BUFFER_header \ .CommandType = 3, \ @@ -1731,6 +1763,8 @@ GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 7, \ .DwordLength = 1 +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 + struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -1744,7 +1778,7 @@ struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { static inline void GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_HIER_DEPTH_BUFFER * restrict values) + const struct GEN7_3DSTATE_HIER_DEPTH_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1771,7 +1805,6 @@ GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_HS_length 0x00000007 #define GEN7_3DSTATE_HS_length_bias 0x00000002 #define GEN7_3DSTATE_HS_header \ .CommandType = 3, \ @@ -1780,6 +1813,8 @@ GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 27, \ .DwordLength = 5 +#define GEN7_3DSTATE_HS_length 0x00000007 + struct GEN7_3DSTATE_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -1818,7 +1853,7 @@ struct GEN7_3DSTATE_HS { static inline void GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_HS * restrict values) + const struct GEN7_3DSTATE_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1869,7 +1904,6 @@ GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_INDEX_BUFFER_length 0x00000003 #define GEN7_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 #define GEN7_3DSTATE_INDEX_BUFFER_header \ .CommandType = 3, \ @@ -1878,6 +1912,8 @@ GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 10, \ .DwordLength = 1 +#define GEN7_3DSTATE_INDEX_BUFFER_length 0x00000003 + struct GEN7_3DSTATE_INDEX_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -1896,7 +1932,7 @@ struct GEN7_3DSTATE_INDEX_BUFFER { static inline void GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_INDEX_BUFFER * restrict values) + const struct GEN7_3DSTATE_INDEX_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1927,7 +1963,6 @@ GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_LINE_STIPPLE_length 0x00000003 #define GEN7_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 #define GEN7_3DSTATE_LINE_STIPPLE_header \ .CommandType = 3, \ @@ -1936,6 +1971,8 @@ GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 8, \ .DwordLength = 1 +#define GEN7_3DSTATE_LINE_STIPPLE_length 0x00000003 + struct GEN7_3DSTATE_LINE_STIPPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -1952,7 +1989,7 @@ struct GEN7_3DSTATE_LINE_STIPPLE { static inline void GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_LINE_STIPPLE * restrict values) + const struct GEN7_3DSTATE_LINE_STIPPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1978,7 +2015,6 @@ GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_MONOFILTER_SIZE_length 0x00000002 #define GEN7_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 #define GEN7_3DSTATE_MONOFILTER_SIZE_header \ .CommandType = 3, \ @@ -1987,6 +2023,8 @@ GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 17, \ .DwordLength = 0 +#define GEN7_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + struct GEN7_3DSTATE_MONOFILTER_SIZE { uint32_t CommandType; uint32_t CommandSubType; @@ -1999,7 +2037,7 @@ struct GEN7_3DSTATE_MONOFILTER_SIZE { static inline void GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_MONOFILTER_SIZE * restrict values) + const struct GEN7_3DSTATE_MONOFILTER_SIZE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2018,7 +2056,6 @@ GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_MULTISAMPLE_length 0x00000004 #define GEN7_3DSTATE_MULTISAMPLE_length_bias 0x00000002 #define GEN7_3DSTATE_MULTISAMPLE_header \ .CommandType = 3, \ @@ -2027,6 +2064,8 @@ GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 13, \ .DwordLength = 2 +#define GEN7_3DSTATE_MULTISAMPLE_length 0x00000004 + struct GEN7_3DSTATE_MULTISAMPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -2060,7 +2099,7 @@ struct GEN7_3DSTATE_MULTISAMPLE { static inline void GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_MULTISAMPLE * restrict values) + const struct GEN7_3DSTATE_MULTISAMPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2101,7 +2140,6 @@ GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 #define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 #define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_header \ .CommandType = 3, \ @@ -2110,6 +2148,8 @@ GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 6, \ .DwordLength = 0 +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET { uint32_t CommandType; uint32_t CommandSubType; @@ -2122,7 +2162,7 @@ struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET { static inline void GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) + const struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2141,7 +2181,6 @@ GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst } -#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 #define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 #define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_header\ .CommandType = 3, \ @@ -2150,6 +2189,8 @@ GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst ._3DCommandSubOpcode = 7, \ .DwordLength = 31 +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { uint32_t CommandType; uint32_t CommandSubType; @@ -2161,7 +2202,7 @@ struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { static inline void GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) + const struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2181,7 +2222,6 @@ GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict ds } -#define GEN7_3DSTATE_PS_length 0x00000008 #define GEN7_3DSTATE_PS_length_bias 0x00000002 #define GEN7_3DSTATE_PS_header \ .CommandType = 3, \ @@ -2190,6 +2230,8 @@ GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict ds ._3DCommandSubOpcode = 32, \ .DwordLength = 6 +#define GEN7_3DSTATE_PS_length 0x00000008 + struct GEN7_3DSTATE_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -2244,7 +2286,7 @@ struct GEN7_3DSTATE_PS { static inline void GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PS * restrict values) + const struct GEN7_3DSTATE_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2308,7 +2350,6 @@ GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ .CommandType = 3, \ @@ -2317,6 +2358,8 @@ GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 20, \ .DwordLength = 0 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -2331,7 +2374,7 @@ struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS { static inline void GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2350,7 +2393,6 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict } -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ .CommandType = 3, \ @@ -2359,6 +2401,8 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 21, \ .DwordLength = 0 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -2373,7 +2417,7 @@ struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS { static inline void GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2392,7 +2436,6 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict } -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ .CommandType = 3, \ @@ -2401,6 +2444,8 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 19, \ .DwordLength = 0 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -2415,7 +2460,7 @@ struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS { static inline void GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2434,7 +2479,6 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict } -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ .CommandType = 3, \ @@ -2443,6 +2487,8 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 22, \ .DwordLength = 0 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -2457,7 +2503,7 @@ struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS { static inline void GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2476,7 +2522,6 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict } -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ .CommandType = 3, \ @@ -2485,6 +2530,8 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 18, \ .DwordLength = 0 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -2499,7 +2546,7 @@ struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS { static inline void GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2525,6 +2572,10 @@ GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 2 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN7_PALETTE_ENTRY_length 0x00000001 + struct GEN7_PALETTE_ENTRY { uint32_t Alpha; uint32_t Red; @@ -2534,7 +2585,7 @@ struct GEN7_PALETTE_ENTRY { static inline void GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PALETTE_ENTRY * restrict values) + const struct GEN7_PALETTE_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2558,7 +2609,7 @@ struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 { static inline void GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2580,6 +2631,8 @@ GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict d ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 12 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 { uint32_t CommandType; uint32_t CommandSubType; @@ -2591,7 +2644,7 @@ struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 { static inline void GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2606,7 +2659,6 @@ GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict d /* variable length fields follow */ } -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ .CommandType = 3, \ @@ -2615,6 +2667,8 @@ GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict d ._3DCommandSubOpcode = 45, \ .DwordLength = 0 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -2626,7 +2680,7 @@ struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS { static inline void GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2644,7 +2698,6 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ .CommandType = 3, \ @@ -2653,6 +2706,8 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 46, \ .DwordLength = 0 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -2664,7 +2719,7 @@ struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS { static inline void GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2682,7 +2737,6 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ .CommandType = 3, \ @@ -2691,6 +2745,8 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 44, \ .DwordLength = 0 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -2702,7 +2758,7 @@ struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS { static inline void GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2720,7 +2776,6 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ .CommandType = 3, \ @@ -2729,6 +2784,8 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 47, \ .DwordLength = 0 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -2740,7 +2797,7 @@ struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS { static inline void GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2758,7 +2815,6 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ .CommandType = 3, \ @@ -2767,6 +2823,8 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 43, \ .DwordLength = 0 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -2778,7 +2836,7 @@ struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS { static inline void GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2796,7 +2854,6 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restri } -#define GEN7_3DSTATE_SAMPLE_MASK_length 0x00000002 #define GEN7_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 #define GEN7_3DSTATE_SAMPLE_MASK_header \ .CommandType = 3, \ @@ -2805,6 +2862,8 @@ GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 24, \ .DwordLength = 0 +#define GEN7_3DSTATE_SAMPLE_MASK_length 0x00000002 + struct GEN7_3DSTATE_SAMPLE_MASK { uint32_t CommandType; uint32_t CommandSubType; @@ -2816,7 +2875,7 @@ struct GEN7_3DSTATE_SAMPLE_MASK { static inline void GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLE_MASK * restrict values) + const struct GEN7_3DSTATE_SAMPLE_MASK * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2834,7 +2893,6 @@ GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_SBE_length 0x0000000e #define GEN7_3DSTATE_SBE_length_bias 0x00000002 #define GEN7_3DSTATE_SBE_header \ .CommandType = 3, \ @@ -2843,6 +2901,8 @@ GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 31, \ .DwordLength = 12 +#define GEN7_3DSTATE_SBE_length 0x0000000e + struct GEN7_3DSTATE_SBE { uint32_t CommandType; uint32_t CommandSubType; @@ -2911,7 +2971,7 @@ struct GEN7_3DSTATE_SBE { static inline void GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SBE * restrict values) + const struct GEN7_3DSTATE_SBE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2981,7 +3041,6 @@ GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 #define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 #define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_header\ .CommandType = 3, \ @@ -2990,6 +3049,8 @@ GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 15, \ .DwordLength = 0 +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -3001,7 +3062,7 @@ struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS { static inline void GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) + const struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3019,7 +3080,6 @@ GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict } -#define GEN7_3DSTATE_SF_length 0x00000007 #define GEN7_3DSTATE_SF_length_bias 0x00000002 #define GEN7_3DSTATE_SF_header \ .CommandType = 3, \ @@ -3028,6 +3088,8 @@ GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 19, \ .DwordLength = 5 +#define GEN7_3DSTATE_SF_length 0x00000007 + struct GEN7_3DSTATE_SF { uint32_t CommandType; uint32_t CommandSubType; @@ -3080,14 +3142,14 @@ struct GEN7_3DSTATE_SF { uint32_t VertexSubPixelPrecisionSelect; uint32_t UsePointWidthState; float PointWidth; - uint32_t GlobalDepthOffsetConstant; - uint32_t GlobalDepthOffsetScale; - uint32_t GlobalDepthOffsetClamp; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; }; static inline void GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SF * restrict values) + const struct GEN7_3DSTATE_SF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3133,20 +3195,19 @@ GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, 0; dw[4] = - __gen_field(values->GlobalDepthOffsetConstant, 0, 31) | + __gen_float(values->GlobalDepthOffsetConstant) | 0; dw[5] = - __gen_field(values->GlobalDepthOffsetScale, 0, 31) | + __gen_float(values->GlobalDepthOffsetScale) | 0; dw[6] = - __gen_field(values->GlobalDepthOffsetClamp, 0, 31) | + __gen_float(values->GlobalDepthOffsetClamp) | 0; } -#define GEN7_3DSTATE_SO_BUFFER_length 0x00000004 #define GEN7_3DSTATE_SO_BUFFER_length_bias 0x00000002 #define GEN7_3DSTATE_SO_BUFFER_header \ .CommandType = 3, \ @@ -3155,6 +3216,8 @@ GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 24, \ .DwordLength = 2 +#define GEN7_3DSTATE_SO_BUFFER_length 0x00000004 + struct GEN7_3DSTATE_SO_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -3170,7 +3233,7 @@ struct GEN7_3DSTATE_SO_BUFFER { static inline void GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SO_BUFFER * restrict values) + const struct GEN7_3DSTATE_SO_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3211,6 +3274,12 @@ GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 23 +#define GEN7_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN7_SO_DECL_ENTRY_length 0x00000002 + +#define GEN7_SO_DECL_length 0x00000001 + struct GEN7_SO_DECL { uint32_t OutputBufferSlot; uint32_t HoleFlag; @@ -3220,7 +3289,7 @@ struct GEN7_SO_DECL { static inline void GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SO_DECL * restrict values) + const struct GEN7_SO_DECL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3242,7 +3311,7 @@ struct GEN7_SO_DECL_ENTRY { static inline void GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SO_DECL_ENTRY * restrict values) + const struct GEN7_SO_DECL_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3285,7 +3354,7 @@ struct GEN7_3DSTATE_SO_DECL_LIST { static inline void GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SO_DECL_LIST * restrict values) + const struct GEN7_3DSTATE_SO_DECL_LIST * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3314,7 +3383,6 @@ GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN7_3DSTATE_STENCIL_BUFFER_length 0x00000003 #define GEN7_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 #define GEN7_3DSTATE_STENCIL_BUFFER_header \ .CommandType = 3, \ @@ -3323,6 +3391,8 @@ GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 6, \ .DwordLength = 1 +#define GEN7_3DSTATE_STENCIL_BUFFER_length 0x00000003 + struct GEN7_3DSTATE_STENCIL_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -3336,7 +3406,7 @@ struct GEN7_3DSTATE_STENCIL_BUFFER { static inline void GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_STENCIL_BUFFER * restrict values) + const struct GEN7_3DSTATE_STENCIL_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3363,7 +3433,6 @@ GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_STREAMOUT_length 0x00000003 #define GEN7_3DSTATE_STREAMOUT_length_bias 0x00000002 #define GEN7_3DSTATE_STREAMOUT_header \ .CommandType = 3, \ @@ -3372,6 +3441,8 @@ GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 30, \ .DwordLength = 1 +#define GEN7_3DSTATE_STREAMOUT_length 0x00000003 + struct GEN7_3DSTATE_STREAMOUT { uint32_t CommandType; uint32_t CommandSubType; @@ -3401,7 +3472,7 @@ struct GEN7_3DSTATE_STREAMOUT { static inline void GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_STREAMOUT * restrict values) + const struct GEN7_3DSTATE_STREAMOUT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3438,7 +3509,6 @@ GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_TE_length 0x00000004 #define GEN7_3DSTATE_TE_length_bias 0x00000002 #define GEN7_3DSTATE_TE_header \ .CommandType = 3, \ @@ -3447,6 +3517,8 @@ GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 28, \ .DwordLength = 2 +#define GEN7_3DSTATE_TE_length 0x00000004 + struct GEN7_3DSTATE_TE { uint32_t CommandType; uint32_t CommandSubType; @@ -3476,7 +3548,7 @@ struct GEN7_3DSTATE_TE { static inline void GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_TE * restrict values) + const struct GEN7_3DSTATE_TE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3506,7 +3578,6 @@ GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_URB_DS_length 0x00000002 #define GEN7_3DSTATE_URB_DS_length_bias 0x00000002 #define GEN7_3DSTATE_URB_DS_header \ .CommandType = 3, \ @@ -3515,6 +3586,8 @@ GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 50, \ .DwordLength = 0 +#define GEN7_3DSTATE_URB_DS_length 0x00000002 + struct GEN7_3DSTATE_URB_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -3528,7 +3601,7 @@ struct GEN7_3DSTATE_URB_DS { static inline void GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_DS * restrict values) + const struct GEN7_3DSTATE_URB_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3548,7 +3621,6 @@ GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_URB_GS_length 0x00000002 #define GEN7_3DSTATE_URB_GS_length_bias 0x00000002 #define GEN7_3DSTATE_URB_GS_header \ .CommandType = 3, \ @@ -3557,6 +3629,8 @@ GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 51, \ .DwordLength = 0 +#define GEN7_3DSTATE_URB_GS_length 0x00000002 + struct GEN7_3DSTATE_URB_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -3570,7 +3644,7 @@ struct GEN7_3DSTATE_URB_GS { static inline void GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_GS * restrict values) + const struct GEN7_3DSTATE_URB_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3590,7 +3664,6 @@ GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_URB_HS_length 0x00000002 #define GEN7_3DSTATE_URB_HS_length_bias 0x00000002 #define GEN7_3DSTATE_URB_HS_header \ .CommandType = 3, \ @@ -3599,6 +3672,8 @@ GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 49, \ .DwordLength = 0 +#define GEN7_3DSTATE_URB_HS_length 0x00000002 + struct GEN7_3DSTATE_URB_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -3612,7 +3687,7 @@ struct GEN7_3DSTATE_URB_HS { static inline void GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_HS * restrict values) + const struct GEN7_3DSTATE_URB_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3639,6 +3714,10 @@ GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 8 +#define GEN7_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN7_VERTEX_BUFFER_STATE_length 0x00000004 + struct GEN7_VERTEX_BUFFER_STATE { uint32_t VertexBufferIndex; #define VERTEXDATA 0 @@ -3656,7 +3735,7 @@ struct GEN7_VERTEX_BUFFER_STATE { static inline void GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_VERTEX_BUFFER_STATE * restrict values) + const struct GEN7_VERTEX_BUFFER_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3701,7 +3780,7 @@ struct GEN7_3DSTATE_VERTEX_BUFFERS { static inline void GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VERTEX_BUFFERS * restrict values) + const struct GEN7_3DSTATE_VERTEX_BUFFERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3723,6 +3802,10 @@ GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 9 +#define GEN7_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN7_VERTEX_ELEMENT_STATE_length 0x00000002 + struct GEN7_VERTEX_ELEMENT_STATE { uint32_t VertexBufferIndex; bool Valid; @@ -3737,7 +3820,7 @@ struct GEN7_VERTEX_ELEMENT_STATE { static inline void GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) + const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3769,7 +3852,7 @@ struct GEN7_3DSTATE_VERTEX_ELEMENTS { static inline void GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VERTEX_ELEMENTS * restrict values) + const struct GEN7_3DSTATE_VERTEX_ELEMENTS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3784,7 +3867,6 @@ GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN7_3DSTATE_VF_STATISTICS_length 0x00000001 #define GEN7_3DSTATE_VF_STATISTICS_length_bias 0x00000001 #define GEN7_3DSTATE_VF_STATISTICS_header \ .CommandType = 3, \ @@ -3792,6 +3874,8 @@ GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 11 +#define GEN7_3DSTATE_VF_STATISTICS_length 0x00000001 + struct GEN7_3DSTATE_VF_STATISTICS { uint32_t CommandType; uint32_t CommandSubType; @@ -3802,7 +3886,7 @@ struct GEN7_3DSTATE_VF_STATISTICS { static inline void GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VF_STATISTICS * restrict values) + const struct GEN7_3DSTATE_VF_STATISTICS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3816,7 +3900,6 @@ GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ .CommandType = 3, \ @@ -3825,6 +3908,8 @@ GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 35, \ .DwordLength = 0 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC { uint32_t CommandType; uint32_t CommandSubType; @@ -3836,7 +3921,7 @@ struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC { static inline void GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3854,7 +3939,6 @@ GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restr } -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ .CommandType = 3, \ @@ -3863,6 +3947,8 @@ GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 33, \ .DwordLength = 0 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { uint32_t CommandType; uint32_t CommandSubType; @@ -3874,7 +3960,7 @@ struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { static inline void GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3892,7 +3978,6 @@ GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * } -#define GEN7_3DSTATE_VS_length 0x00000006 #define GEN7_3DSTATE_VS_length_bias 0x00000002 #define GEN7_3DSTATE_VS_header \ .CommandType = 3, \ @@ -3901,6 +3986,8 @@ GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * ._3DCommandSubOpcode = 16, \ .DwordLength = 4 +#define GEN7_3DSTATE_VS_length 0x00000006 + struct GEN7_3DSTATE_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -3939,7 +4026,7 @@ struct GEN7_3DSTATE_VS { static inline void GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VS * restrict values) + const struct GEN7_3DSTATE_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3985,7 +4072,6 @@ GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_WM_length 0x00000003 #define GEN7_3DSTATE_WM_length_bias 0x00000002 #define GEN7_3DSTATE_WM_header \ .CommandType = 3, \ @@ -3994,6 +4080,8 @@ GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 20, \ .DwordLength = 1 +#define GEN7_3DSTATE_WM_length 0x00000003 + struct GEN7_3DSTATE_WM { uint32_t CommandType; uint32_t CommandSubType; @@ -4043,7 +4131,7 @@ struct GEN7_3DSTATE_WM { static inline void GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_WM * restrict values) + const struct GEN7_3DSTATE_WM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4084,7 +4172,6 @@ GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_GPGPU_OBJECT_length 0x00000008 #define GEN7_GPGPU_OBJECT_length_bias 0x00000002 #define GEN7_GPGPU_OBJECT_header \ .CommandType = 3, \ @@ -4093,6 +4180,8 @@ GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 4, \ .DwordLength = 6 +#define GEN7_GPGPU_OBJECT_length 0x00000008 + struct GEN7_GPGPU_OBJECT { uint32_t CommandType; uint32_t Pipeline; @@ -4118,7 +4207,7 @@ struct GEN7_GPGPU_OBJECT { static inline void GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_GPGPU_OBJECT * restrict values) + const struct GEN7_GPGPU_OBJECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4165,7 +4254,6 @@ GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_GPGPU_WALKER_length 0x0000000b #define GEN7_GPGPU_WALKER_length_bias 0x00000002 #define GEN7_GPGPU_WALKER_header \ .CommandType = 3, \ @@ -4174,6 +4262,8 @@ GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, .SubOpcodeA = 5, \ .DwordLength = 9 +#define GEN7_GPGPU_WALKER_length 0x0000000b + struct GEN7_GPGPU_WALKER { uint32_t CommandType; uint32_t Pipeline; @@ -4202,7 +4292,7 @@ struct GEN7_GPGPU_WALKER { static inline void GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_GPGPU_WALKER * restrict values) + const struct GEN7_GPGPU_WALKER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4261,7 +4351,6 @@ GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MEDIA_CURBE_LOAD_length 0x00000004 #define GEN7_MEDIA_CURBE_LOAD_length_bias 0x00000002 #define GEN7_MEDIA_CURBE_LOAD_header \ .CommandType = 3, \ @@ -4270,6 +4359,8 @@ GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 1, \ .DwordLength = 2 +#define GEN7_MEDIA_CURBE_LOAD_length 0x00000004 + struct GEN7_MEDIA_CURBE_LOAD { uint32_t CommandType; uint32_t Pipeline; @@ -4282,7 +4373,7 @@ struct GEN7_MEDIA_CURBE_LOAD { static inline void GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_CURBE_LOAD * restrict values) + const struct GEN7_MEDIA_CURBE_LOAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4307,7 +4398,6 @@ GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 #define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 #define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ .CommandType = 3, \ @@ -4316,6 +4406,8 @@ GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 2, \ .DwordLength = 2 +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD { uint32_t CommandType; uint32_t Pipeline; @@ -4328,7 +4420,7 @@ struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD { static inline void GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) + const struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4360,6 +4452,8 @@ GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict .MediaCommandOpcode = 1, \ .MediaCommandSubOpcode = 0 +#define GEN7_MEDIA_OBJECT_length 0x00000000 + struct GEN7_MEDIA_OBJECT { uint32_t CommandType; uint32_t MediaCommandPipeline; @@ -4389,7 +4483,7 @@ struct GEN7_MEDIA_OBJECT { static inline void GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_OBJECT * restrict values) + const struct GEN7_MEDIA_OBJECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4432,7 +4526,6 @@ GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN7_MEDIA_OBJECT_PRT_length 0x00000010 #define GEN7_MEDIA_OBJECT_PRT_length_bias 0x00000002 #define GEN7_MEDIA_OBJECT_PRT_header \ .CommandType = 3, \ @@ -4441,6 +4534,8 @@ GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 2, \ .DwordLength = 14 +#define GEN7_MEDIA_OBJECT_PRT_length 0x00000010 + struct GEN7_MEDIA_OBJECT_PRT { uint32_t CommandType; uint32_t Pipeline; @@ -4458,7 +4553,7 @@ struct GEN7_MEDIA_OBJECT_PRT { static inline void GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_OBJECT_PRT * restrict values) + const struct GEN7_MEDIA_OBJECT_PRT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4498,6 +4593,8 @@ GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, .MediaCommandOpcode = 1, \ .SubOpcode = 3 +#define GEN7_MEDIA_OBJECT_WALKER_length 0x00000000 + struct GEN7_MEDIA_OBJECT_WALKER { uint32_t CommandType; uint32_t Pipeline; @@ -4546,7 +4643,7 @@ struct GEN7_MEDIA_OBJECT_WALKER { static inline void GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_OBJECT_WALKER * restrict values) + const struct GEN7_MEDIA_OBJECT_WALKER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4642,7 +4739,6 @@ GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN7_MEDIA_STATE_FLUSH_length 0x00000002 #define GEN7_MEDIA_STATE_FLUSH_length_bias 0x00000002 #define GEN7_MEDIA_STATE_FLUSH_header \ .CommandType = 3, \ @@ -4651,6 +4747,8 @@ GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 4, \ .DwordLength = 0 +#define GEN7_MEDIA_STATE_FLUSH_length 0x00000002 + struct GEN7_MEDIA_STATE_FLUSH { uint32_t CommandType; uint32_t Pipeline; @@ -4663,7 +4761,7 @@ struct GEN7_MEDIA_STATE_FLUSH { static inline void GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_STATE_FLUSH * restrict values) + const struct GEN7_MEDIA_STATE_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4682,7 +4780,6 @@ GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MEDIA_VFE_STATE_length 0x00000008 #define GEN7_MEDIA_VFE_STATE_length_bias 0x00000002 #define GEN7_MEDIA_VFE_STATE_header \ .CommandType = 3, \ @@ -4691,6 +4788,8 @@ GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 0, \ .DwordLength = 6 +#define GEN7_MEDIA_VFE_STATE_length 0x00000008 + struct GEN7_MEDIA_VFE_STATE { uint32_t CommandType; uint32_t Pipeline; @@ -4740,7 +4839,7 @@ struct GEN7_MEDIA_VFE_STATE { static inline void GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_VFE_STATE * restrict values) + const struct GEN7_MEDIA_VFE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4804,12 +4903,13 @@ GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_ARB_CHECK_length 0x00000001 #define GEN7_MI_ARB_CHECK_length_bias 0x00000001 #define GEN7_MI_ARB_CHECK_header \ .CommandType = 0, \ .MICommandOpcode = 5 +#define GEN7_MI_ARB_CHECK_length 0x00000001 + struct GEN7_MI_ARB_CHECK { uint32_t CommandType; uint32_t MICommandOpcode; @@ -4817,7 +4917,7 @@ struct GEN7_MI_ARB_CHECK { static inline void GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_ARB_CHECK * restrict values) + const struct GEN7_MI_ARB_CHECK * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4828,12 +4928,13 @@ GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_ARB_ON_OFF_length 0x00000001 #define GEN7_MI_ARB_ON_OFF_length_bias 0x00000001 #define GEN7_MI_ARB_ON_OFF_header \ .CommandType = 0, \ .MICommandOpcode = 8 +#define GEN7_MI_ARB_ON_OFF_length 0x00000001 + struct GEN7_MI_ARB_ON_OFF { uint32_t CommandType; uint32_t MICommandOpcode; @@ -4842,7 +4943,7 @@ struct GEN7_MI_ARB_ON_OFF { static inline void GEN7_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_ARB_ON_OFF * restrict values) + const struct GEN7_MI_ARB_ON_OFF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4854,12 +4955,13 @@ GEN7_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_BATCH_BUFFER_END_length 0x00000001 #define GEN7_MI_BATCH_BUFFER_END_length_bias 0x00000001 #define GEN7_MI_BATCH_BUFFER_END_header \ .CommandType = 0, \ .MICommandOpcode = 10 +#define GEN7_MI_BATCH_BUFFER_END_length 0x00000001 + struct GEN7_MI_BATCH_BUFFER_END { uint32_t CommandType; uint32_t MICommandOpcode; @@ -4867,7 +4969,7 @@ struct GEN7_MI_BATCH_BUFFER_END { static inline void GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_BATCH_BUFFER_END * restrict values) + const struct GEN7_MI_BATCH_BUFFER_END * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4878,13 +4980,14 @@ GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_BATCH_BUFFER_START_length 0x00000002 #define GEN7_MI_BATCH_BUFFER_START_length_bias 0x00000002 #define GEN7_MI_BATCH_BUFFER_START_header \ .CommandType = 0, \ .MICommandOpcode = 49, \ .DwordLength = 0 +#define GEN7_MI_BATCH_BUFFER_START_length 0x00000002 + struct GEN7_MI_BATCH_BUFFER_START { uint32_t CommandType; uint32_t MICommandOpcode; @@ -4898,7 +5001,7 @@ struct GEN7_MI_BATCH_BUFFER_START { static inline void GEN7_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_BATCH_BUFFER_START * restrict values) + const struct GEN7_MI_BATCH_BUFFER_START * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4923,6 +5026,8 @@ GEN7_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 39 +#define GEN7_MI_CLFLUSH_length 0x00000000 + struct GEN7_MI_CLFLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -4938,7 +5043,7 @@ struct GEN7_MI_CLFLUSH { static inline void GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_CLFLUSH * restrict values) + const struct GEN7_MI_CLFLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4965,7 +5070,6 @@ GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 #define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 #define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_header\ .CommandType = 0, \ @@ -4974,6 +5078,8 @@ GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, .CompareSemaphore = 0, \ .DwordLength = 0 +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 + struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END { uint32_t CommandType; uint32_t MICommandOpcode; @@ -4986,7 +5092,7 @@ struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END { static inline void GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) + const struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5010,12 +5116,13 @@ GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict } -#define GEN7_MI_FLUSH_length 0x00000001 #define GEN7_MI_FLUSH_length_bias 0x00000001 #define GEN7_MI_FLUSH_header \ .CommandType = 0, \ .MICommandOpcode = 4 +#define GEN7_MI_FLUSH_length 0x00000001 + struct GEN7_MI_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5034,7 +5141,7 @@ struct GEN7_MI_FLUSH { static inline void GEN7_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_FLUSH * restrict values) + const struct GEN7_MI_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5050,13 +5157,14 @@ GEN7_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_LOAD_REGISTER_IMM_length 0x00000003 #define GEN7_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 #define GEN7_MI_LOAD_REGISTER_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 34, \ .DwordLength = 1 +#define GEN7_MI_LOAD_REGISTER_IMM_length 0x00000003 + struct GEN7_MI_LOAD_REGISTER_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5068,7 +5176,7 @@ struct GEN7_MI_LOAD_REGISTER_IMM { static inline void GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_LOAD_REGISTER_IMM * restrict values) + const struct GEN7_MI_LOAD_REGISTER_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5089,13 +5197,14 @@ GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_LOAD_REGISTER_MEM_length 0x00000003 #define GEN7_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 #define GEN7_MI_LOAD_REGISTER_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 41, \ .DwordLength = 1 +#define GEN7_MI_LOAD_REGISTER_MEM_length 0x00000003 + struct GEN7_MI_LOAD_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5108,7 +5217,7 @@ struct GEN7_MI_LOAD_REGISTER_MEM { static inline void GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_LOAD_REGISTER_MEM * restrict values) + const struct GEN7_MI_LOAD_REGISTER_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5132,12 +5241,13 @@ GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_NOOP_length 0x00000001 #define GEN7_MI_NOOP_length_bias 0x00000001 #define GEN7_MI_NOOP_header \ .CommandType = 0, \ .MICommandOpcode = 0 +#define GEN7_MI_NOOP_length 0x00000001 + struct GEN7_MI_NOOP { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5147,7 +5257,7 @@ struct GEN7_MI_NOOP { static inline void GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_NOOP * restrict values) + const struct GEN7_MI_NOOP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5160,12 +5270,13 @@ GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_PREDICATE_length 0x00000001 #define GEN7_MI_PREDICATE_length_bias 0x00000001 #define GEN7_MI_PREDICATE_header \ .CommandType = 0, \ .MICommandOpcode = 12 +#define GEN7_MI_PREDICATE_length 0x00000001 + struct GEN7_MI_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5185,7 +5296,7 @@ struct GEN7_MI_PREDICATE { static inline void GEN7_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_PREDICATE * restrict values) + const struct GEN7_MI_PREDICATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5199,12 +5310,13 @@ GEN7_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_REPORT_HEAD_length 0x00000001 #define GEN7_MI_REPORT_HEAD_length_bias 0x00000001 #define GEN7_MI_REPORT_HEAD_header \ .CommandType = 0, \ .MICommandOpcode = 7 +#define GEN7_MI_REPORT_HEAD_length 0x00000001 + struct GEN7_MI_REPORT_HEAD { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5212,7 +5324,7 @@ struct GEN7_MI_REPORT_HEAD { static inline void GEN7_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_REPORT_HEAD * restrict values) + const struct GEN7_MI_REPORT_HEAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5223,13 +5335,14 @@ GEN7_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_SEMAPHORE_MBOX_length 0x00000003 #define GEN7_MI_SEMAPHORE_MBOX_length_bias 0x00000002 #define GEN7_MI_SEMAPHORE_MBOX_header \ .CommandType = 0, \ .MICommandOpcode = 22, \ .DwordLength = 1 +#define GEN7_MI_SEMAPHORE_MBOX_length 0x00000003 + struct GEN7_MI_SEMAPHORE_MBOX { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5243,7 +5356,7 @@ struct GEN7_MI_SEMAPHORE_MBOX { static inline void GEN7_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_SEMAPHORE_MBOX * restrict values) + const struct GEN7_MI_SEMAPHORE_MBOX * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5263,13 +5376,14 @@ GEN7_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_SET_CONTEXT_length 0x00000002 #define GEN7_MI_SET_CONTEXT_length_bias 0x00000002 #define GEN7_MI_SET_CONTEXT_header \ .CommandType = 0, \ .MICommandOpcode = 24, \ .DwordLength = 0 +#define GEN7_MI_SET_CONTEXT_length 0x00000002 + struct GEN7_MI_SET_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5284,7 +5398,7 @@ struct GEN7_MI_SET_CONTEXT { static inline void GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_SET_CONTEXT * restrict values) + const struct GEN7_MI_SET_CONTEXT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5307,13 +5421,14 @@ GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_STORE_DATA_IMM_length 0x00000004 #define GEN7_MI_STORE_DATA_IMM_length_bias 0x00000002 #define GEN7_MI_STORE_DATA_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 32, \ .DwordLength = 2 +#define GEN7_MI_STORE_DATA_IMM_length 0x00000004 + struct GEN7_MI_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5327,7 +5442,7 @@ struct GEN7_MI_STORE_DATA_IMM { static inline void GEN7_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_STORE_DATA_IMM * restrict values) + const struct GEN7_MI_STORE_DATA_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5356,13 +5471,14 @@ GEN7_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_STORE_DATA_INDEX_length 0x00000003 #define GEN7_MI_STORE_DATA_INDEX_length_bias 0x00000002 #define GEN7_MI_STORE_DATA_INDEX_header \ .CommandType = 0, \ .MICommandOpcode = 33, \ .DwordLength = 1 +#define GEN7_MI_STORE_DATA_INDEX_length 0x00000003 + struct GEN7_MI_STORE_DATA_INDEX { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5374,7 +5490,7 @@ struct GEN7_MI_STORE_DATA_INDEX { static inline void GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_STORE_DATA_INDEX * restrict values) + const struct GEN7_MI_STORE_DATA_INDEX * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5398,12 +5514,13 @@ GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_SUSPEND_FLUSH_length 0x00000001 #define GEN7_MI_SUSPEND_FLUSH_length_bias 0x00000001 #define GEN7_MI_SUSPEND_FLUSH_header \ .CommandType = 0, \ .MICommandOpcode = 11 +#define GEN7_MI_SUSPEND_FLUSH_length 0x00000001 + struct GEN7_MI_SUSPEND_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5412,7 +5529,7 @@ struct GEN7_MI_SUSPEND_FLUSH { static inline void GEN7_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_SUSPEND_FLUSH * restrict values) + const struct GEN7_MI_SUSPEND_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5424,12 +5541,13 @@ GEN7_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_TOPOLOGY_FILTER_length 0x00000001 #define GEN7_MI_TOPOLOGY_FILTER_length_bias 0x00000001 #define GEN7_MI_TOPOLOGY_FILTER_header \ .CommandType = 0, \ .MICommandOpcode = 13 +#define GEN7_MI_TOPOLOGY_FILTER_length 0x00000001 + struct GEN7_MI_TOPOLOGY_FILTER { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5438,7 +5556,7 @@ struct GEN7_MI_TOPOLOGY_FILTER { static inline void GEN7_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_TOPOLOGY_FILTER * restrict values) + const struct GEN7_MI_TOPOLOGY_FILTER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5455,6 +5573,8 @@ GEN7_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 35 +#define GEN7_MI_UPDATE_GTT_length 0x00000000 + struct GEN7_MI_UPDATE_GTT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5468,7 +5588,7 @@ struct GEN7_MI_UPDATE_GTT { static inline void GEN7_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_UPDATE_GTT * restrict values) + const struct GEN7_MI_UPDATE_GTT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5488,13 +5608,14 @@ GEN7_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN7_MI_URB_CLEAR_length 0x00000002 #define GEN7_MI_URB_CLEAR_length_bias 0x00000002 #define GEN7_MI_URB_CLEAR_header \ .CommandType = 0, \ .MICommandOpcode = 25, \ .DwordLength = 0 +#define GEN7_MI_URB_CLEAR_length 0x00000002 + struct GEN7_MI_URB_CLEAR { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5505,7 +5626,7 @@ struct GEN7_MI_URB_CLEAR { static inline void GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_URB_CLEAR * restrict values) + const struct GEN7_MI_URB_CLEAR * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5522,12 +5643,13 @@ GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_USER_INTERRUPT_length 0x00000001 #define GEN7_MI_USER_INTERRUPT_length_bias 0x00000001 #define GEN7_MI_USER_INTERRUPT_header \ .CommandType = 0, \ .MICommandOpcode = 2 +#define GEN7_MI_USER_INTERRUPT_length 0x00000001 + struct GEN7_MI_USER_INTERRUPT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5535,7 +5657,7 @@ struct GEN7_MI_USER_INTERRUPT { static inline void GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_USER_INTERRUPT * restrict values) + const struct GEN7_MI_USER_INTERRUPT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5546,12 +5668,13 @@ GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MI_WAIT_FOR_EVENT_length 0x00000001 #define GEN7_MI_WAIT_FOR_EVENT_length_bias 0x00000001 #define GEN7_MI_WAIT_FOR_EVENT_header \ .CommandType = 0, \ .MICommandOpcode = 3 +#define GEN7_MI_WAIT_FOR_EVENT_length 0x00000001 + struct GEN7_MI_WAIT_FOR_EVENT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -5576,7 +5699,7 @@ struct GEN7_MI_WAIT_FOR_EVENT { static inline void GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_WAIT_FOR_EVENT * restrict values) + const struct GEN7_MI_WAIT_FOR_EVENT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5603,7 +5726,6 @@ GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_PIPE_CONTROL_length 0x00000005 #define GEN7_PIPE_CONTROL_length_bias 0x00000002 #define GEN7_PIPE_CONTROL_header \ .CommandType = 3, \ @@ -5612,6 +5734,8 @@ GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 3 +#define GEN7_PIPE_CONTROL_length 0x00000005 + struct GEN7_PIPE_CONTROL { uint32_t CommandType; uint32_t CommandSubType; @@ -5660,7 +5784,7 @@ struct GEN7_PIPE_CONTROL { static inline void GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PIPE_CONTROL * restrict values) + const struct GEN7_PIPE_CONTROL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5712,16 +5836,6 @@ GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_3DSTATE_CONSTANT_BODY_length 0x00000006 - -#define GEN7_VERTEX_BUFFER_STATE_length 0x00000004 - -#define GEN7_VERTEX_ELEMENT_STATE_length 0x00000002 - -#define GEN7_SO_DECL_ENTRY_length 0x00000002 - -#define GEN7_SO_DECL_length 0x00000001 - #define GEN7_SCISSOR_RECT_length 0x00000002 struct GEN7_SCISSOR_RECT { @@ -5733,7 +5847,7 @@ struct GEN7_SCISSOR_RECT { static inline void GEN7_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SCISSOR_RECT * restrict values) + const struct GEN7_SCISSOR_RECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5766,7 +5880,7 @@ struct GEN7_SF_CLIP_VIEWPORT { static inline void GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SF_CLIP_VIEWPORT * restrict values) + const struct GEN7_SF_CLIP_VIEWPORT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5911,7 +6025,7 @@ struct GEN7_BLEND_STATE { static inline void GEN7_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_BLEND_STATE * restrict values) + const struct GEN7_BLEND_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5957,7 +6071,7 @@ struct GEN7_CC_VIEWPORT { static inline void GEN7_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_CC_VIEWPORT * restrict values) + const struct GEN7_CC_VIEWPORT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5992,7 +6106,7 @@ struct GEN7_COLOR_CALC_STATE { static inline void GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_COLOR_CALC_STATE * restrict values) + const struct GEN7_COLOR_CALC_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6091,7 +6205,7 @@ struct GEN7_DEPTH_STENCIL_STATE { static inline void GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_DEPTH_STENCIL_STATE * restrict values) + const struct GEN7_DEPTH_STENCIL_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6124,8 +6238,6 @@ GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 - #define GEN7_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 struct GEN7_INTERFACE_DESCRIPTOR_DATA { @@ -6165,7 +6277,7 @@ struct GEN7_INTERFACE_DESCRIPTOR_DATA { static inline void GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_INTERFACE_DESCRIPTOR_DATA * restrict values) + const struct GEN7_INTERFACE_DESCRIPTOR_DATA * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6212,8 +6324,6 @@ GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } -#define GEN7_PALETTE_ENTRY_length 0x00000001 - #define GEN7_BINDING_TABLE_STATE_length 0x00000001 struct GEN7_BINDING_TABLE_STATE { @@ -6222,7 +6332,7 @@ struct GEN7_BINDING_TABLE_STATE { static inline void GEN7_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_BINDING_TABLE_STATE * restrict values) + const struct GEN7_BINDING_TABLE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6243,7 +6353,7 @@ struct GEN7_RENDER_SURFACE_STATE { #define SURFTYPE_STRBUF 5 #define SURFTYPE_NULL 7 uint32_t SurfaceType; - uint32_t SurfaceArray; + bool SurfaceArray; uint32_t SurfaceFormat; uint32_t SurfaceVerticalAlignment; #define HALIGN_4 0 @@ -6264,7 +6374,7 @@ struct GEN7_RENDER_SURFACE_STATE { #define INTERLACED_FRAME 3 uint32_t MediaBoundaryPixelMode; uint32_t CubeFaceEnables; - uint32_t SurfaceBaseAddress; + __gen_address_type SurfaceBaseAddress; uint32_t Height; uint32_t Width; uint32_t Depth; @@ -6286,14 +6396,14 @@ struct GEN7_RENDER_SURFACE_STATE { uint32_t MinimumArrayElement0; uint32_t XOffset; uint32_t YOffset; - uint32_t SurfaceObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; uint32_t SurfaceMinLOD; uint32_t MIPCountLOD; - uint32_t MCSBaseAddress; + __gen_address_type MCSBaseAddress; uint32_t MCSSurfacePitch; - uint32_t AppendCounterAddress; - uint32_t AppendCounterEnable; - uint32_t MCSEnable; + __gen_address_type AppendCounterAddress; + bool AppendCounterEnable; + bool MCSEnable; uint32_t XOffsetforUVPlane; uint32_t YOffsetforUVPlane; #define CC_ZERO 0 @@ -6308,12 +6418,12 @@ struct GEN7_RENDER_SURFACE_STATE { #define CC_ZERO 0 #define CC_ONE 1 uint32_t AlphaClearColor; - uint32_t ResourceMinLOD; + float ResourceMinLOD; }; static inline void GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_RENDER_SURFACE_STATE * restrict values) + const struct GEN7_RENDER_SURFACE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6333,10 +6443,12 @@ GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->CubeFaceEnables, 0, 5) | 0; - dw[1] = - __gen_field(values->SurfaceBaseAddress, 0, 31) | + uint32_t dw1 = 0; + dw[1] = + __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); + dw[2] = __gen_field(values->Height, 16, 29) | __gen_field(values->Width, 0, 13) | @@ -6357,30 +6469,33 @@ GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MinimumArrayElement, 0, 26) | 0; + uint32_t dw_SurfaceObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); dw[5] = - __gen_field(values->XOffset, 25, 31) | - __gen_field(values->YOffset, 20, 23) | - __gen_field(values->SurfaceObjectControlState, 16, 19) | + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 20, 23) | + __gen_field(dw_SurfaceObjectControlState, 16, 19) | __gen_field(values->SurfaceMinLOD, 4, 7) | __gen_field(values->MIPCountLOD, 0, 3) | 0; - dw[6] = - __gen_field(values->MCSBaseAddress, 12, 31) | + uint32_t dw6 = __gen_field(values->MCSSurfacePitch, 3, 11) | - __gen_field(values->AppendCounterAddress, 6, 31) | __gen_field(values->AppendCounterEnable, 1, 1) | __gen_field(values->MCSEnable, 0, 0) | __gen_field(values->XOffsetforUVPlane, 16, 29) | __gen_field(values->YOffsetforUVPlane, 0, 13) | 0; + dw[6] = + __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); + dw[7] = __gen_field(values->RedClearColor, 31, 31) | __gen_field(values->GreenClearColor, 30, 30) | __gen_field(values->BlueClearColor, 29, 29) | __gen_field(values->AlphaClearColor, 28, 28) | - __gen_field(values->ResourceMinLOD, 0, 11) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | 0; } @@ -6388,24 +6503,24 @@ GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, #define GEN7_SAMPLER_BORDER_COLOR_STATE_length 0x00000004 struct GEN7_SAMPLER_BORDER_COLOR_STATE { - uint32_t BorderColorRedDX100GL; + float BorderColorRedDX100GL; uint32_t BorderColorAlpha; uint32_t BorderColorBlue; uint32_t BorderColorGreen; uint32_t BorderColorRedDX9; - uint32_t BorderColorGreen0; - uint32_t BorderColorBlue0; - uint32_t BorderColorAlpha0; + float BorderColorGreen0; + float BorderColorBlue0; + float BorderColorAlpha0; }; static inline void GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SAMPLER_BORDER_COLOR_STATE * restrict values) + const struct GEN7_SAMPLER_BORDER_COLOR_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; dw[0] = - __gen_field(values->BorderColorRedDX100GL, 0, 31) | + __gen_float(values->BorderColorRedDX100GL) | __gen_field(values->BorderColorAlpha, 24, 31) | __gen_field(values->BorderColorBlue, 16, 23) | __gen_field(values->BorderColorGreen, 8, 15) | @@ -6413,15 +6528,15 @@ GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, 0; dw[1] = - __gen_field(values->BorderColorGreen, 0, 31) | + __gen_float(values->BorderColorGreen) | 0; dw[2] = - __gen_field(values->BorderColorBlue, 0, 31) | + __gen_float(values->BorderColorBlue) | 0; dw[3] = - __gen_field(values->BorderColorAlpha, 0, 31) | + __gen_float(values->BorderColorAlpha) | 0; } @@ -6501,7 +6616,7 @@ struct GEN7_SAMPLER_STATE { static inline void GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SAMPLER_STATE * restrict values) + const struct GEN7_SAMPLER_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index cafccc94741..4d6b7c0a04d 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -92,7 +92,6 @@ __gen_float(float v) #endif -#define GEN8_3DSTATE_URB_VS_length 0x00000002 #define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 #define GEN8_3DSTATE_URB_VS_header \ .CommandType = 3, \ @@ -101,6 +100,8 @@ __gen_float(float v) ._3DCommandSubOpcode = 48, \ .DwordLength = 0 +#define GEN8_3DSTATE_URB_VS_length 0x00000002 + struct GEN8_3DSTATE_URB_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -114,7 +115,7 @@ struct GEN8_3DSTATE_URB_VS { static inline void GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_VS * restrict values) + const struct GEN8_3DSTATE_URB_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -134,7 +135,6 @@ GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_VS_length 0x00000009 #define GEN8_3DSTATE_VS_length_bias 0x00000002 #define GEN8_3DSTATE_VS_header \ .CommandType = 3, \ @@ -143,6 +143,8 @@ GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 16, \ .DwordLength = 7 +#define GEN8_3DSTATE_VS_length 0x00000009 + struct GEN8_3DSTATE_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -190,7 +192,7 @@ struct GEN8_3DSTATE_VS { static inline void GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VS * restrict values) + const struct GEN8_3DSTATE_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -252,7 +254,6 @@ GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 #define GEN8_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 #define GEN8_GPGPU_CSR_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -261,6 +262,8 @@ GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 1 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 + struct GEN8_GPGPU_CSR_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -272,7 +275,7 @@ struct GEN8_GPGPU_CSR_BASE_ADDRESS { static inline void GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) + const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -295,12 +298,13 @@ GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_ATOMIC_length 0x00000003 #define GEN8_MI_ATOMIC_length_bias 0x00000002 #define GEN8_MI_ATOMIC_header \ .CommandType = 0, \ .MICommandOpcode = 47 +#define GEN8_MI_ATOMIC_length 0x00000003 + struct GEN8_MI_ATOMIC { uint32_t CommandType; uint32_t MICommandOpcode; @@ -331,7 +335,7 @@ struct GEN8_MI_ATOMIC { static inline void GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_ATOMIC * restrict values) + const struct GEN8_MI_ATOMIC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -391,13 +395,14 @@ GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 #define GEN8_MI_LOAD_REGISTER_REG_length_bias 0x00000002 #define GEN8_MI_LOAD_REGISTER_REG_header \ .CommandType = 0, \ .MICommandOpcode = 42, \ .DwordLength = 1 +#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 + struct GEN8_MI_LOAD_REGISTER_REG { uint32_t CommandType; uint32_t MICommandOpcode; @@ -408,7 +413,7 @@ struct GEN8_MI_LOAD_REGISTER_REG { static inline void GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) + const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -428,13 +433,14 @@ GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 #define GEN8_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 #define GEN8_MI_SEMAPHORE_SIGNAL_header \ .CommandType = 0, \ .MICommandOpcode = 27, \ .DwordLength = 0 +#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 + struct GEN8_MI_SEMAPHORE_SIGNAL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -451,7 +457,7 @@ struct GEN8_MI_SEMAPHORE_SIGNAL { static inline void GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) + const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -469,13 +475,14 @@ GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 #define GEN8_MI_SEMAPHORE_WAIT_length_bias 0x00000002 #define GEN8_MI_SEMAPHORE_WAIT_header \ .CommandType = 0, \ .MICommandOpcode = 28, \ .DwordLength = 2 +#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 + struct GEN8_MI_SEMAPHORE_WAIT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -499,7 +506,7 @@ struct GEN8_MI_SEMAPHORE_WAIT { static inline void GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) + const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -527,13 +534,14 @@ GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 #define GEN8_MI_STORE_REGISTER_MEM_length_bias 0x00000002 #define GEN8_MI_STORE_REGISTER_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 36, \ .DwordLength = 2 +#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 + struct GEN8_MI_STORE_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -546,7 +554,7 @@ struct GEN8_MI_STORE_REGISTER_MEM { static inline void GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) + const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -573,7 +581,6 @@ GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_PIPELINE_SELECT_length 0x00000001 #define GEN8_PIPELINE_SELECT_length_bias 0x00000001 #define GEN8_PIPELINE_SELECT_header \ .CommandType = 3, \ @@ -581,6 +588,8 @@ GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 4 +#define GEN8_PIPELINE_SELECT_length 0x00000001 + struct GEN8_PIPELINE_SELECT { uint32_t CommandType; uint32_t CommandSubType; @@ -594,7 +603,7 @@ struct GEN8_PIPELINE_SELECT { static inline void GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PIPELINE_SELECT * restrict values) + const struct GEN8_PIPELINE_SELECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -608,7 +617,6 @@ GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 #define GEN8_STATE_BASE_ADDRESS_length_bias 0x00000002 #define GEN8_STATE_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -617,6 +625,10 @@ GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 1, \ .DwordLength = 14 +#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 + +#define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + struct GEN8_MEMORY_OBJECT_CONTROL_STATE { #define UCwithFenceifcoherentcycle 0 #define UCUncacheable 1 @@ -633,7 +645,7 @@ struct GEN8_MEMORY_OBJECT_CONTROL_STATE { static inline void GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) + const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -679,7 +691,7 @@ struct GEN8_STATE_BASE_ADDRESS { static inline void GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_STATE_BASE_ADDRESS * restrict values) + const struct GEN8_STATE_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -784,7 +796,6 @@ GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_STATE_PREFETCH_length 0x00000002 #define GEN8_STATE_PREFETCH_length_bias 0x00000002 #define GEN8_STATE_PREFETCH_header \ .CommandType = 3, \ @@ -793,6 +804,8 @@ GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 3, \ .DwordLength = 0 +#define GEN8_STATE_PREFETCH_length 0x00000002 + struct GEN8_STATE_PREFETCH { uint32_t CommandType; uint32_t CommandSubType; @@ -805,7 +818,7 @@ struct GEN8_STATE_PREFETCH { static inline void GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_STATE_PREFETCH * restrict values) + const struct GEN8_STATE_PREFETCH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -826,7 +839,6 @@ GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_STATE_SIP_length 0x00000003 #define GEN8_STATE_SIP_length_bias 0x00000002 #define GEN8_STATE_SIP_header \ .CommandType = 3, \ @@ -835,6 +847,8 @@ GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 2, \ .DwordLength = 1 +#define GEN8_STATE_SIP_length 0x00000003 + struct GEN8_STATE_SIP { uint32_t CommandType; uint32_t CommandSubType; @@ -846,7 +860,7 @@ struct GEN8_STATE_SIP { static inline void GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_STATE_SIP * restrict values) + const struct GEN8_STATE_SIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -867,7 +881,6 @@ GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 #define GEN8_SWTESS_BASE_ADDRESS_length_bias 0x00000002 #define GEN8_SWTESS_BASE_ADDRESS_header \ .CommandType = 3, \ @@ -876,6 +889,8 @@ GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 3, \ .DwordLength = 0 +#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 + struct GEN8_SWTESS_BASE_ADDRESS { uint32_t CommandType; uint32_t CommandSubType; @@ -888,7 +903,7 @@ struct GEN8_SWTESS_BASE_ADDRESS { static inline void GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) + const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -914,7 +929,6 @@ GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DPRIMITIVE_length 0x00000007 #define GEN8_3DPRIMITIVE_length_bias 0x00000002 #define GEN8_3DPRIMITIVE_header \ .CommandType = 3, \ @@ -923,6 +937,8 @@ GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 5 +#define GEN8_3DPRIMITIVE_length 0x00000007 + struct GEN8_3DPRIMITIVE { uint32_t CommandType; uint32_t CommandSubType; @@ -946,7 +962,7 @@ struct GEN8_3DPRIMITIVE { static inline void GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DPRIMITIVE * restrict values) + const struct GEN8_3DPRIMITIVE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -989,7 +1005,6 @@ GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 #define GEN8_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 #define GEN8_3DSTATE_AA_LINE_PARAMETERS_header \ .CommandType = 3, \ @@ -998,6 +1013,8 @@ GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 10, \ .DwordLength = 1 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + struct GEN8_3DSTATE_AA_LINE_PARAMETERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1016,7 +1033,7 @@ struct GEN8_3DSTATE_AA_LINE_PARAMETERS { static inline void GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) + const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1051,6 +1068,10 @@ GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 70 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN8_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + struct GEN8_BINDING_TABLE_EDIT_ENTRY { uint32_t BindingTableIndex; uint32_t SurfaceStatePointer; @@ -1058,7 +1079,7 @@ struct GEN8_BINDING_TABLE_EDIT_ENTRY { static inline void GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) + const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1085,7 +1106,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { static inline void GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1112,6 +1133,8 @@ GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict d ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 68 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -1128,7 +1151,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { static inline void GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1155,6 +1178,8 @@ GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict d ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 69 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -1171,7 +1196,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { static inline void GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1198,6 +1223,8 @@ GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict d ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 71 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -1214,7 +1241,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { static inline void GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1241,6 +1268,8 @@ GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict d ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 67 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -1257,7 +1286,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { static inline void GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1277,7 +1306,6 @@ GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict d /* variable length fields follow */ } -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ .CommandType = 3, \ @@ -1286,6 +1314,8 @@ GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict d ._3DCommandSubOpcode = 40, \ .DwordLength = 0 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -1297,7 +1327,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { static inline void GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1315,7 +1345,6 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ .CommandType = 3, \ @@ -1324,6 +1353,8 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 41, \ .DwordLength = 0 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -1335,7 +1366,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { static inline void GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1353,7 +1384,6 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ .CommandType = 3, \ @@ -1362,6 +1392,8 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 39, \ .DwordLength = 0 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -1373,7 +1405,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { static inline void GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1391,7 +1423,6 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ .CommandType = 3, \ @@ -1400,6 +1431,8 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 42, \ .DwordLength = 0 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -1411,7 +1444,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { static inline void GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1429,7 +1462,6 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ .CommandType = 3, \ @@ -1438,6 +1470,8 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 38, \ .DwordLength = 0 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -1449,7 +1483,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { static inline void GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1467,7 +1501,6 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 #define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 #define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ .CommandType = 3, \ @@ -1476,6 +1509,8 @@ GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 25, \ .DwordLength = 2 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 + struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { uint32_t CommandType; uint32_t CommandSubType; @@ -1491,7 +1526,7 @@ struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { static inline void GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) + const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1522,7 +1557,6 @@ GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restric } -#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 #define GEN8_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 #define GEN8_3DSTATE_BLEND_STATE_POINTERS_header\ .CommandType = 3, \ @@ -1531,6 +1565,8 @@ GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restric ._3DCommandSubOpcode = 36, \ .DwordLength = 0 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + struct GEN8_3DSTATE_BLEND_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1543,7 +1579,7 @@ struct GEN8_3DSTATE_BLEND_STATE_POINTERS { static inline void GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) + const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1562,7 +1598,6 @@ GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ds } -#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 #define GEN8_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 #define GEN8_3DSTATE_CC_STATE_POINTERS_header \ .CommandType = 3, \ @@ -1571,6 +1606,8 @@ GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ds ._3DCommandSubOpcode = 14, \ .DwordLength = 0 +#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + struct GEN8_3DSTATE_CC_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -1583,7 +1620,7 @@ struct GEN8_3DSTATE_CC_STATE_POINTERS { static inline void GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) + const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1602,7 +1639,6 @@ GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 #define GEN8_3DSTATE_CHROMA_KEY_length_bias 0x00000002 #define GEN8_3DSTATE_CHROMA_KEY_header \ .CommandType = 3, \ @@ -1611,6 +1647,8 @@ GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 2 +#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 + struct GEN8_3DSTATE_CHROMA_KEY { uint32_t CommandType; uint32_t CommandSubType; @@ -1624,7 +1662,7 @@ struct GEN8_3DSTATE_CHROMA_KEY { static inline void GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) + const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1650,7 +1688,6 @@ GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 #define GEN8_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 #define GEN8_3DSTATE_CLEAR_PARAMS_header \ .CommandType = 3, \ @@ -1659,6 +1696,8 @@ GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 4, \ .DwordLength = 1 +#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 + struct GEN8_3DSTATE_CLEAR_PARAMS { uint32_t CommandType; uint32_t CommandSubType; @@ -1671,7 +1710,7 @@ struct GEN8_3DSTATE_CLEAR_PARAMS { static inline void GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) + const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1693,7 +1732,6 @@ GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_CLIP_length 0x00000004 #define GEN8_3DSTATE_CLIP_length_bias 0x00000002 #define GEN8_3DSTATE_CLIP_header \ .CommandType = 3, \ @@ -1702,6 +1740,8 @@ GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 18, \ .DwordLength = 2 +#define GEN8_3DSTATE_CLIP_length 0x00000004 + struct GEN8_3DSTATE_CLIP { uint32_t CommandType; uint32_t CommandSubType; @@ -1746,7 +1786,7 @@ struct GEN8_3DSTATE_CLIP { static inline void GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CLIP * restrict values) + const struct GEN8_3DSTATE_CLIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1791,7 +1831,6 @@ GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b #define GEN8_3DSTATE_CONSTANT_DS_length_bias 0x00000002 #define GEN8_3DSTATE_CONSTANT_DS_header \ .CommandType = 3, \ @@ -1800,6 +1839,10 @@ GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 9 +#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b + +#define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a + struct GEN8_3DSTATE_CONSTANT_BODY { uint32_t ConstantBuffer1ReadLength; uint32_t ConstantBuffer0ReadLength; @@ -1813,7 +1856,7 @@ struct GEN8_3DSTATE_CONSTANT_BODY { static inline void GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) + const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1877,7 +1920,7 @@ struct GEN8_3DSTATE_CONSTANT_DS { static inline void GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) + const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1895,7 +1938,6 @@ GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b #define GEN8_3DSTATE_CONSTANT_GS_length_bias 0x00000002 #define GEN8_3DSTATE_CONSTANT_GS_header \ .CommandType = 3, \ @@ -1904,6 +1946,8 @@ GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 22, \ .DwordLength = 9 +#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b + struct GEN8_3DSTATE_CONSTANT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -1916,7 +1960,7 @@ struct GEN8_3DSTATE_CONSTANT_GS { static inline void GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) + const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1934,7 +1978,6 @@ GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b #define GEN8_3DSTATE_CONSTANT_HS_length_bias 0x00000002 #define GEN8_3DSTATE_CONSTANT_HS_header \ .CommandType = 3, \ @@ -1943,6 +1986,8 @@ GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 25, \ .DwordLength = 9 +#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b + struct GEN8_3DSTATE_CONSTANT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -1955,7 +2000,7 @@ struct GEN8_3DSTATE_CONSTANT_HS { static inline void GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) + const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -1973,7 +2018,6 @@ GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b #define GEN8_3DSTATE_CONSTANT_PS_length_bias 0x00000002 #define GEN8_3DSTATE_CONSTANT_PS_header \ .CommandType = 3, \ @@ -1982,6 +2026,8 @@ GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 23, \ .DwordLength = 9 +#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b + struct GEN8_3DSTATE_CONSTANT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -1994,7 +2040,7 @@ struct GEN8_3DSTATE_CONSTANT_PS { static inline void GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) + const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2012,7 +2058,6 @@ GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b #define GEN8_3DSTATE_CONSTANT_VS_length_bias 0x00000002 #define GEN8_3DSTATE_CONSTANT_VS_header \ .CommandType = 3, \ @@ -2021,6 +2066,8 @@ GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 21, \ .DwordLength = 9 +#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b + struct GEN8_3DSTATE_CONSTANT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -2033,7 +2080,7 @@ struct GEN8_3DSTATE_CONSTANT_VS { static inline void GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) + const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2051,7 +2098,6 @@ GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); } -#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 #define GEN8_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 #define GEN8_3DSTATE_DEPTH_BUFFER_header \ .CommandType = 3, \ @@ -2060,6 +2106,8 @@ GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 5, \ .DwordLength = 6 +#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 + struct GEN8_3DSTATE_DEPTH_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -2093,7 +2141,7 @@ struct GEN8_3DSTATE_DEPTH_BUFFER { static inline void GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) + const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2147,7 +2195,6 @@ GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 #define GEN8_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 #define GEN8_3DSTATE_DRAWING_RECTANGLE_header \ .CommandType = 3, \ @@ -2156,6 +2203,8 @@ GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 2 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + struct GEN8_3DSTATE_DRAWING_RECTANGLE { uint32_t CommandType; uint32_t CommandSubType; @@ -2176,7 +2225,7 @@ struct GEN8_3DSTATE_DRAWING_RECTANGLE { static inline void GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) + const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2206,7 +2255,6 @@ GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_DS_length 0x00000009 #define GEN8_3DSTATE_DS_length_bias 0x00000002 #define GEN8_3DSTATE_DS_header \ .CommandType = 3, \ @@ -2215,6 +2263,8 @@ GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 29, \ .DwordLength = 7 +#define GEN8_3DSTATE_DS_length 0x00000009 + struct GEN8_3DSTATE_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -2263,7 +2313,7 @@ struct GEN8_3DSTATE_DS { static inline void GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_DS * restrict values) + const struct GEN8_3DSTATE_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2333,6 +2383,10 @@ GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 55 +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN8_GATHER_CONSTANT_ENTRY_length 0x00000001 + struct GEN8_GATHER_CONSTANT_ENTRY { uint32_t ConstantBufferOffset; uint32_t ChannelMask; @@ -2341,7 +2395,7 @@ struct GEN8_GATHER_CONSTANT_ENTRY { static inline void GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) + const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2368,7 +2422,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_DS { static inline void GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) + const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2400,6 +2454,8 @@ GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 53 +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + struct GEN8_3DSTATE_GATHER_CONSTANT_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -2415,7 +2471,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_GS { static inline void GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) + const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2447,6 +2503,8 @@ GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 54 +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + struct GEN8_3DSTATE_GATHER_CONSTANT_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -2462,7 +2520,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_HS { static inline void GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) + const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2494,6 +2552,8 @@ GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 56 +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + struct GEN8_3DSTATE_GATHER_CONSTANT_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -2510,7 +2570,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_PS { static inline void GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) + const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2543,6 +2603,8 @@ GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 52 +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + struct GEN8_3DSTATE_GATHER_CONSTANT_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -2559,7 +2621,7 @@ struct GEN8_3DSTATE_GATHER_CONSTANT_VS { static inline void GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) + const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2585,7 +2647,6 @@ GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 #define GEN8_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 #define GEN8_3DSTATE_GATHER_POOL_ALLOC_header \ .CommandType = 3, \ @@ -2594,6 +2655,8 @@ GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 26, \ .DwordLength = 2 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 + struct GEN8_3DSTATE_GATHER_POOL_ALLOC { uint32_t CommandType; uint32_t CommandSubType; @@ -2608,7 +2671,7 @@ struct GEN8_3DSTATE_GATHER_POOL_ALLOC { static inline void GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) + const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2639,7 +2702,6 @@ GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_GS_length 0x0000000a #define GEN8_3DSTATE_GS_length_bias 0x00000002 #define GEN8_3DSTATE_GS_header \ .CommandType = 3, \ @@ -2648,6 +2710,8 @@ GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 17, \ .DwordLength = 8 +#define GEN8_3DSTATE_GS_length 0x0000000a + struct GEN8_3DSTATE_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -2716,7 +2780,7 @@ struct GEN8_3DSTATE_GS { static inline void GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GS * restrict values) + const struct GEN8_3DSTATE_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2796,7 +2860,6 @@ GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 #define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 #define GEN8_3DSTATE_HIER_DEPTH_BUFFER_header \ .CommandType = 3, \ @@ -2805,6 +2868,8 @@ GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 7, \ .DwordLength = 3 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 + struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -2819,7 +2884,7 @@ struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { static inline void GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) + const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2853,7 +2918,6 @@ GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_HS_length 0x00000009 #define GEN8_3DSTATE_HS_length_bias 0x00000002 #define GEN8_3DSTATE_HS_header \ .CommandType = 3, \ @@ -2862,6 +2926,8 @@ GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 27, \ .DwordLength = 7 +#define GEN8_3DSTATE_HS_length 0x00000009 + struct GEN8_3DSTATE_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -2903,7 +2969,7 @@ struct GEN8_3DSTATE_HS { static inline void GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_HS * restrict values) + const struct GEN8_3DSTATE_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -2961,7 +3027,6 @@ GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 #define GEN8_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 #define GEN8_3DSTATE_INDEX_BUFFER_header \ .CommandType = 3, \ @@ -2970,6 +3035,8 @@ GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 10, \ .DwordLength = 3 +#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 + struct GEN8_3DSTATE_INDEX_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -2987,7 +3054,7 @@ struct GEN8_3DSTATE_INDEX_BUFFER { static inline void GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) + const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3021,7 +3088,6 @@ GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 #define GEN8_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 #define GEN8_3DSTATE_LINE_STIPPLE_header \ .CommandType = 3, \ @@ -3030,6 +3096,8 @@ GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 8, \ .DwordLength = 1 +#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 + struct GEN8_3DSTATE_LINE_STIPPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -3046,7 +3114,7 @@ struct GEN8_3DSTATE_LINE_STIPPLE { static inline void GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) + const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3072,7 +3140,6 @@ GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 #define GEN8_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 #define GEN8_3DSTATE_MONOFILTER_SIZE_header \ .CommandType = 3, \ @@ -3081,6 +3148,8 @@ GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 17, \ .DwordLength = 0 +#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + struct GEN8_3DSTATE_MONOFILTER_SIZE { uint32_t CommandType; uint32_t CommandSubType; @@ -3093,7 +3162,7 @@ struct GEN8_3DSTATE_MONOFILTER_SIZE { static inline void GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) + const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3112,7 +3181,6 @@ GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 #define GEN8_3DSTATE_MULTISAMPLE_length_bias 0x00000002 #define GEN8_3DSTATE_MULTISAMPLE_header \ .CommandType = 3, \ @@ -3121,6 +3189,8 @@ GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 13, \ .DwordLength = 0 +#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 + struct GEN8_3DSTATE_MULTISAMPLE { uint32_t CommandType; uint32_t CommandSubType; @@ -3136,7 +3206,7 @@ struct GEN8_3DSTATE_MULTISAMPLE { static inline void GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) + const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3156,7 +3226,6 @@ GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 #define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 #define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_header \ .CommandType = 3, \ @@ -3165,6 +3234,8 @@ GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 6, \ .DwordLength = 0 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { uint32_t CommandType; uint32_t CommandSubType; @@ -3177,7 +3248,7 @@ struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { static inline void GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) + const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3196,7 +3267,6 @@ GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst } -#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 #define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 #define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_header\ .CommandType = 3, \ @@ -3205,6 +3275,8 @@ GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst ._3DCommandSubOpcode = 7, \ .DwordLength = 31 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { uint32_t CommandType; uint32_t CommandSubType; @@ -3216,7 +3288,7 @@ struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { static inline void GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) + const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3236,7 +3308,6 @@ GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict ds } -#define GEN8_3DSTATE_PS_length 0x0000000c #define GEN8_3DSTATE_PS_length_bias 0x00000002 #define GEN8_3DSTATE_PS_header \ .CommandType = 3, \ @@ -3245,6 +3316,8 @@ GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict ds ._3DCommandSubOpcode = 32, \ .DwordLength = 10 +#define GEN8_3DSTATE_PS_length 0x0000000c + struct GEN8_3DSTATE_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -3304,7 +3377,7 @@ struct GEN8_3DSTATE_PS { static inline void GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PS * restrict values) + const struct GEN8_3DSTATE_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3378,7 +3451,6 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 #define GEN8_3DSTATE_PS_BLEND_length_bias 0x00000002 #define GEN8_3DSTATE_PS_BLEND_header \ .CommandType = 3, \ @@ -3387,6 +3459,8 @@ GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 77, \ .DwordLength = 0 +#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 + struct GEN8_3DSTATE_PS_BLEND { uint32_t CommandType; uint32_t CommandSubType; @@ -3406,7 +3480,7 @@ struct GEN8_3DSTATE_PS_BLEND { static inline void GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PS_BLEND * restrict values) + const struct GEN8_3DSTATE_PS_BLEND * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3432,7 +3506,6 @@ GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 #define GEN8_3DSTATE_PS_EXTRA_length_bias 0x00000002 #define GEN8_3DSTATE_PS_EXTRA_header \ .CommandType = 3, \ @@ -3441,6 +3514,8 @@ GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 79, \ .DwordLength = 0 +#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 + struct GEN8_3DSTATE_PS_EXTRA { uint32_t CommandType; uint32_t CommandSubType; @@ -3469,7 +3544,7 @@ struct GEN8_3DSTATE_PS_EXTRA { static inline void GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PS_EXTRA * restrict values) + const struct GEN8_3DSTATE_PS_EXTRA * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3500,7 +3575,6 @@ GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ .CommandType = 3, \ @@ -3509,6 +3583,8 @@ GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 20, \ .DwordLength = 0 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -3521,7 +3597,7 @@ struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { static inline void GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3540,7 +3616,6 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict } -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ .CommandType = 3, \ @@ -3549,6 +3624,8 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 21, \ .DwordLength = 0 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -3561,7 +3638,7 @@ struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { static inline void GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3580,7 +3657,6 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict } -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ .CommandType = 3, \ @@ -3589,6 +3665,8 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 19, \ .DwordLength = 0 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -3601,7 +3679,7 @@ struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { static inline void GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3620,7 +3698,6 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict } -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ .CommandType = 3, \ @@ -3629,6 +3706,8 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 22, \ .DwordLength = 0 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -3641,7 +3720,7 @@ struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { static inline void GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3660,7 +3739,6 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict } -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 #define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ .CommandType = 3, \ @@ -3669,6 +3747,8 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 18, \ .DwordLength = 0 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -3681,7 +3761,7 @@ struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { static inline void GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3700,7 +3780,6 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict } -#define GEN8_3DSTATE_RASTER_length 0x00000005 #define GEN8_3DSTATE_RASTER_length_bias 0x00000002 #define GEN8_3DSTATE_RASTER_header \ .CommandType = 3, \ @@ -3709,6 +3788,8 @@ GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 80, \ .DwordLength = 3 +#define GEN8_3DSTATE_RASTER_length 0x00000005 + struct GEN8_3DSTATE_RASTER { uint32_t CommandType; uint32_t CommandSubType; @@ -3765,7 +3846,7 @@ struct GEN8_3DSTATE_RASTER { static inline void GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_RASTER * restrict values) + const struct GEN8_3DSTATE_RASTER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3817,6 +3898,10 @@ GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 2 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN8_PALETTE_ENTRY_length 0x00000001 + struct GEN8_PALETTE_ENTRY { uint32_t Alpha; uint32_t Red; @@ -3826,7 +3911,7 @@ struct GEN8_PALETTE_ENTRY { static inline void GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PALETTE_ENTRY * restrict values) + const struct GEN8_PALETTE_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3850,7 +3935,7 @@ struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { static inline void GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3872,6 +3957,8 @@ GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict d ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 12 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { uint32_t CommandType; uint32_t CommandSubType; @@ -3883,7 +3970,7 @@ struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { static inline void GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3898,7 +3985,6 @@ GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict d /* variable length fields follow */ } -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ .CommandType = 3, \ @@ -3907,6 +3993,8 @@ GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict d ._3DCommandSubOpcode = 45, \ .DwordLength = 0 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -3918,7 +4006,7 @@ struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { static inline void GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3936,7 +4024,6 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ .CommandType = 3, \ @@ -3945,6 +4032,8 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 46, \ .DwordLength = 0 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -3956,7 +4045,7 @@ struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { static inline void GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -3974,7 +4063,6 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ .CommandType = 3, \ @@ -3983,6 +4071,8 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 44, \ .DwordLength = 0 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -3994,7 +4084,7 @@ struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { static inline void GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4012,7 +4102,6 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ .CommandType = 3, \ @@ -4021,6 +4110,8 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 47, \ .DwordLength = 0 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { uint32_t CommandType; uint32_t CommandSubType; @@ -4032,7 +4123,7 @@ struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { static inline void GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4050,7 +4141,6 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ .CommandType = 3, \ @@ -4059,6 +4149,8 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 43, \ .DwordLength = 0 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { uint32_t CommandType; uint32_t CommandSubType; @@ -4070,7 +4162,7 @@ struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { static inline void GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4088,7 +4180,6 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restri } -#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 #define GEN8_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLE_MASK_header \ .CommandType = 3, \ @@ -4097,6 +4188,8 @@ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restri ._3DCommandSubOpcode = 24, \ .DwordLength = 0 +#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 + struct GEN8_3DSTATE_SAMPLE_MASK { uint32_t CommandType; uint32_t CommandSubType; @@ -4108,7 +4201,7 @@ struct GEN8_3DSTATE_SAMPLE_MASK { static inline void GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) + const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4126,7 +4219,6 @@ GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 #define GEN8_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 #define GEN8_3DSTATE_SAMPLE_PATTERN_header \ .CommandType = 3, \ @@ -4135,6 +4227,8 @@ GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 28, \ .DwordLength = 7 +#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 + struct GEN8_3DSTATE_SAMPLE_PATTERN { uint32_t CommandType; uint32_t CommandSubType; @@ -4175,7 +4269,7 @@ struct GEN8_3DSTATE_SAMPLE_PATTERN { static inline void GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) + const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4236,7 +4330,6 @@ GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_SBE_length 0x00000004 #define GEN8_3DSTATE_SBE_length_bias 0x00000002 #define GEN8_3DSTATE_SBE_header \ .CommandType = 3, \ @@ -4245,6 +4338,8 @@ GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 31, \ .DwordLength = 2 +#define GEN8_3DSTATE_SBE_length 0x00000004 + struct GEN8_3DSTATE_SBE { uint32_t CommandType; uint32_t CommandSubType; @@ -4271,7 +4366,7 @@ struct GEN8_3DSTATE_SBE { static inline void GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SBE * restrict values) + const struct GEN8_3DSTATE_SBE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4308,7 +4403,6 @@ GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b #define GEN8_3DSTATE_SBE_SWIZ_length_bias 0x00000002 #define GEN8_3DSTATE_SBE_SWIZ_header \ .CommandType = 3, \ @@ -4317,6 +4411,10 @@ GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 81, \ .DwordLength = 9 +#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b + +#define GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 + struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { bool ComponentOverrideW; bool ComponentOverrideZ; @@ -4338,7 +4436,7 @@ struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { static inline void GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) + const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4367,7 +4465,7 @@ struct GEN8_3DSTATE_SBE_SWIZ { static inline void GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) + const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4405,7 +4503,6 @@ GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 #define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 #define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_header\ .CommandType = 3, \ @@ -4414,6 +4511,8 @@ GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 15, \ .DwordLength = 0 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { uint32_t CommandType; uint32_t CommandSubType; @@ -4425,7 +4524,7 @@ struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { static inline void GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) + const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4443,7 +4542,6 @@ GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict } -#define GEN8_3DSTATE_SF_length 0x00000004 #define GEN8_3DSTATE_SF_length_bias 0x00000002 #define GEN8_3DSTATE_SF_header \ .CommandType = 3, \ @@ -4452,6 +4550,8 @@ GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict ._3DCommandSubOpcode = 19, \ .DwordLength = 2 +#define GEN8_3DSTATE_SF_length 0x00000004 + struct GEN8_3DSTATE_SF { uint32_t CommandType; uint32_t CommandSubType; @@ -4483,7 +4583,7 @@ struct GEN8_3DSTATE_SF { static inline void GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SF * restrict values) + const struct GEN8_3DSTATE_SF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4520,7 +4620,6 @@ GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 #define GEN8_3DSTATE_SO_BUFFER_length_bias 0x00000002 #define GEN8_3DSTATE_SO_BUFFER_header \ .CommandType = 3, \ @@ -4529,6 +4628,8 @@ GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 24, \ .DwordLength = 6 +#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 + struct GEN8_3DSTATE_SO_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -4548,7 +4649,7 @@ struct GEN8_3DSTATE_SO_BUFFER { static inline void GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SO_BUFFER * restrict values) + const struct GEN8_3DSTATE_SO_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4605,6 +4706,12 @@ GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 1, \ ._3DCommandSubOpcode = 23 +#define GEN8_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN8_SO_DECL_ENTRY_length 0x00000002 + +#define GEN8_SO_DECL_length 0x00000001 + struct GEN8_SO_DECL { uint32_t OutputBufferSlot; uint32_t HoleFlag; @@ -4614,7 +4721,7 @@ struct GEN8_SO_DECL { static inline void GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SO_DECL * restrict values) + const struct GEN8_SO_DECL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4636,7 +4743,7 @@ struct GEN8_SO_DECL_ENTRY { static inline void GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SO_DECL_ENTRY * restrict values) + const struct GEN8_SO_DECL_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4679,7 +4786,7 @@ struct GEN8_3DSTATE_SO_DECL_LIST { static inline void GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) + const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4708,7 +4815,6 @@ GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 #define GEN8_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 #define GEN8_3DSTATE_STENCIL_BUFFER_header \ .CommandType = 3, \ @@ -4717,6 +4823,8 @@ GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 6, \ .DwordLength = 3 +#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 + struct GEN8_3DSTATE_STENCIL_BUFFER { uint32_t CommandType; uint32_t CommandSubType; @@ -4732,7 +4840,7 @@ struct GEN8_3DSTATE_STENCIL_BUFFER { static inline void GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) + const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4767,7 +4875,6 @@ GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 #define GEN8_3DSTATE_STREAMOUT_length_bias 0x00000002 #define GEN8_3DSTATE_STREAMOUT_header \ .CommandType = 3, \ @@ -4776,6 +4883,8 @@ GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 30, \ .DwordLength = 3 +#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 + struct GEN8_3DSTATE_STREAMOUT { uint32_t CommandType; uint32_t CommandSubType; @@ -4810,7 +4919,7 @@ struct GEN8_3DSTATE_STREAMOUT { static inline void GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_STREAMOUT * restrict values) + const struct GEN8_3DSTATE_STREAMOUT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4854,7 +4963,6 @@ GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_TE_length 0x00000004 #define GEN8_3DSTATE_TE_length_bias 0x00000002 #define GEN8_3DSTATE_TE_header \ .CommandType = 3, \ @@ -4863,6 +4971,8 @@ GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 28, \ .DwordLength = 2 +#define GEN8_3DSTATE_TE_length 0x00000004 + struct GEN8_3DSTATE_TE { uint32_t CommandType; uint32_t CommandSubType; @@ -4892,7 +5002,7 @@ struct GEN8_3DSTATE_TE { static inline void GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_TE * restrict values) + const struct GEN8_3DSTATE_TE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4922,7 +5032,6 @@ GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_URB_DS_length 0x00000002 #define GEN8_3DSTATE_URB_DS_length_bias 0x00000002 #define GEN8_3DSTATE_URB_DS_header \ .CommandType = 3, \ @@ -4931,6 +5040,8 @@ GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 50, \ .DwordLength = 0 +#define GEN8_3DSTATE_URB_DS_length 0x00000002 + struct GEN8_3DSTATE_URB_DS { uint32_t CommandType; uint32_t CommandSubType; @@ -4944,7 +5055,7 @@ struct GEN8_3DSTATE_URB_DS { static inline void GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_DS * restrict values) + const struct GEN8_3DSTATE_URB_DS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -4964,7 +5075,6 @@ GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_URB_GS_length 0x00000002 #define GEN8_3DSTATE_URB_GS_length_bias 0x00000002 #define GEN8_3DSTATE_URB_GS_header \ .CommandType = 3, \ @@ -4973,6 +5083,8 @@ GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 51, \ .DwordLength = 0 +#define GEN8_3DSTATE_URB_GS_length 0x00000002 + struct GEN8_3DSTATE_URB_GS { uint32_t CommandType; uint32_t CommandSubType; @@ -4986,7 +5098,7 @@ struct GEN8_3DSTATE_URB_GS { static inline void GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_GS * restrict values) + const struct GEN8_3DSTATE_URB_GS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5006,7 +5118,6 @@ GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_URB_HS_length 0x00000002 #define GEN8_3DSTATE_URB_HS_length_bias 0x00000002 #define GEN8_3DSTATE_URB_HS_header \ .CommandType = 3, \ @@ -5015,6 +5126,8 @@ GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 49, \ .DwordLength = 0 +#define GEN8_3DSTATE_URB_HS_length 0x00000002 + struct GEN8_3DSTATE_URB_HS { uint32_t CommandType; uint32_t CommandSubType; @@ -5028,7 +5141,7 @@ struct GEN8_3DSTATE_URB_HS { static inline void GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_HS * restrict values) + const struct GEN8_3DSTATE_URB_HS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5055,6 +5168,10 @@ GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 8 +#define GEN8_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN8_VERTEX_BUFFER_STATE_length 0x00000004 + struct GEN8_VERTEX_BUFFER_STATE { uint32_t VertexBufferIndex; struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; @@ -5067,7 +5184,7 @@ struct GEN8_VERTEX_BUFFER_STATE { static inline void GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VERTEX_BUFFER_STATE * restrict values) + const struct GEN8_VERTEX_BUFFER_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5107,7 +5224,7 @@ struct GEN8_3DSTATE_VERTEX_BUFFERS { static inline void GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) + const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5129,6 +5246,10 @@ GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 9 +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN8_VERTEX_ELEMENT_STATE_length 0x00000002 + struct GEN8_VERTEX_ELEMENT_STATE { uint32_t VertexBufferIndex; bool Valid; @@ -5143,7 +5264,7 @@ struct GEN8_VERTEX_ELEMENT_STATE { static inline void GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) + const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5175,7 +5296,7 @@ struct GEN8_3DSTATE_VERTEX_ELEMENTS { static inline void GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) + const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5190,7 +5311,6 @@ GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_3DSTATE_VF_length 0x00000002 #define GEN8_3DSTATE_VF_length_bias 0x00000002 #define GEN8_3DSTATE_VF_header \ .CommandType = 3, \ @@ -5199,6 +5319,8 @@ GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 12, \ .DwordLength = 0 +#define GEN8_3DSTATE_VF_length 0x00000002 + struct GEN8_3DSTATE_VF { uint32_t CommandType; uint32_t CommandSubType; @@ -5211,7 +5333,7 @@ struct GEN8_3DSTATE_VF { static inline void GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF * restrict values) + const struct GEN8_3DSTATE_VF * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5230,7 +5352,6 @@ GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 #define GEN8_3DSTATE_VF_INSTANCING_length_bias 0x00000002 #define GEN8_3DSTATE_VF_INSTANCING_header \ .CommandType = 3, \ @@ -5239,6 +5360,8 @@ GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 73, \ .DwordLength = 1 +#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 + struct GEN8_3DSTATE_VF_INSTANCING { uint32_t CommandType; uint32_t CommandSubType; @@ -5252,7 +5375,7 @@ struct GEN8_3DSTATE_VF_INSTANCING { static inline void GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) + const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5275,7 +5398,6 @@ GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 #define GEN8_3DSTATE_VF_SGVS_length_bias 0x00000002 #define GEN8_3DSTATE_VF_SGVS_header \ .CommandType = 3, \ @@ -5284,6 +5406,8 @@ GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 74, \ .DwordLength = 0 +#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 + struct GEN8_3DSTATE_VF_SGVS { uint32_t CommandType; uint32_t CommandSubType; @@ -5308,7 +5432,7 @@ struct GEN8_3DSTATE_VF_SGVS { static inline void GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_SGVS * restrict values) + const struct GEN8_3DSTATE_VF_SGVS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5331,7 +5455,6 @@ GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 #define GEN8_3DSTATE_VF_STATISTICS_length_bias 0x00000001 #define GEN8_3DSTATE_VF_STATISTICS_header \ .CommandType = 3, \ @@ -5339,6 +5462,8 @@ GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandOpcode = 0, \ ._3DCommandSubOpcode = 11 +#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 + struct GEN8_3DSTATE_VF_STATISTICS { uint32_t CommandType; uint32_t CommandSubType; @@ -5349,7 +5474,7 @@ struct GEN8_3DSTATE_VF_STATISTICS { static inline void GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) + const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5363,7 +5488,6 @@ GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 #define GEN8_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 #define GEN8_3DSTATE_VF_TOPOLOGY_header \ .CommandType = 3, \ @@ -5372,6 +5496,8 @@ GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 75, \ .DwordLength = 0 +#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 + struct GEN8_3DSTATE_VF_TOPOLOGY { uint32_t CommandType; uint32_t CommandSubType; @@ -5383,7 +5509,7 @@ struct GEN8_3DSTATE_VF_TOPOLOGY { static inline void GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) + const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5401,7 +5527,6 @@ GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 #define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 #define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ .CommandType = 3, \ @@ -5410,6 +5535,8 @@ GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 35, \ .DwordLength = 0 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { uint32_t CommandType; uint32_t CommandSubType; @@ -5421,7 +5548,7 @@ struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { static inline void GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5439,7 +5566,6 @@ GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restr } -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 #define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 #define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ .CommandType = 3, \ @@ -5448,6 +5574,8 @@ GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restr ._3DCommandSubOpcode = 33, \ .DwordLength = 0 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { uint32_t CommandType; uint32_t CommandSubType; @@ -5459,7 +5587,7 @@ struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { static inline void GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5477,7 +5605,6 @@ GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * } -#define GEN8_3DSTATE_WM_length 0x00000002 #define GEN8_3DSTATE_WM_length_bias 0x00000002 #define GEN8_3DSTATE_WM_header \ .CommandType = 3, \ @@ -5486,6 +5613,8 @@ GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * ._3DCommandSubOpcode = 20, \ .DwordLength = 0 +#define GEN8_3DSTATE_WM_length 0x00000002 + struct GEN8_3DSTATE_WM { uint32_t CommandType; uint32_t CommandSubType; @@ -5533,7 +5662,7 @@ struct GEN8_3DSTATE_WM { static inline void GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM * restrict values) + const struct GEN8_3DSTATE_WM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5565,7 +5694,6 @@ GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 #define GEN8_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 #define GEN8_3DSTATE_WM_CHROMAKEY_header \ .CommandType = 3, \ @@ -5574,6 +5702,8 @@ GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 76, \ .DwordLength = 0 +#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 + struct GEN8_3DSTATE_WM_CHROMAKEY { uint32_t CommandType; uint32_t CommandSubType; @@ -5585,7 +5715,7 @@ struct GEN8_3DSTATE_WM_CHROMAKEY { static inline void GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) + const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5603,7 +5733,6 @@ GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 #define GEN8_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 #define GEN8_3DSTATE_WM_DEPTH_STENCIL_header \ .CommandType = 3, \ @@ -5612,6 +5741,8 @@ GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 78, \ .DwordLength = 1 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 + struct GEN8_3DSTATE_WM_DEPTH_STENCIL { uint32_t CommandType; uint32_t CommandSubType; @@ -5640,7 +5771,7 @@ struct GEN8_3DSTATE_WM_DEPTH_STENCIL { static inline void GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) + const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5678,7 +5809,6 @@ GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 #define GEN8_3DSTATE_WM_HZ_OP_length_bias 0x00000002 #define GEN8_3DSTATE_WM_HZ_OP_header \ .CommandType = 3, \ @@ -5687,6 +5817,8 @@ GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 82, \ .DwordLength = 3 +#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 + struct GEN8_3DSTATE_WM_HZ_OP { uint32_t CommandType; uint32_t CommandSubType; @@ -5711,7 +5843,7 @@ struct GEN8_3DSTATE_WM_HZ_OP { static inline void GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) + const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5751,7 +5883,6 @@ GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_GPGPU_WALKER_length 0x0000000f #define GEN8_GPGPU_WALKER_length_bias 0x00000002 #define GEN8_GPGPU_WALKER_header \ .CommandType = 3, \ @@ -5760,6 +5891,8 @@ GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 5, \ .DwordLength = 13 +#define GEN8_GPGPU_WALKER_length 0x0000000f + struct GEN8_GPGPU_WALKER { uint32_t CommandType; uint32_t Pipeline; @@ -5790,7 +5923,7 @@ struct GEN8_GPGPU_WALKER { static inline void GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GPGPU_WALKER * restrict values) + const struct GEN8_GPGPU_WALKER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5863,7 +5996,6 @@ GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 #define GEN8_MEDIA_CURBE_LOAD_length_bias 0x00000002 #define GEN8_MEDIA_CURBE_LOAD_header \ .CommandType = 3, \ @@ -5872,6 +6004,8 @@ GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 1, \ .DwordLength = 2 +#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 + struct GEN8_MEDIA_CURBE_LOAD { uint32_t CommandType; uint32_t Pipeline; @@ -5884,7 +6018,7 @@ struct GEN8_MEDIA_CURBE_LOAD { static inline void GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_CURBE_LOAD * restrict values) + const struct GEN8_MEDIA_CURBE_LOAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5909,7 +6043,6 @@ GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 #define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 #define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ .CommandType = 3, \ @@ -5918,6 +6051,8 @@ GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 2, \ .DwordLength = 2 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { uint32_t CommandType; uint32_t Pipeline; @@ -5930,7 +6065,7 @@ struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { static inline void GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) + const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -5962,6 +6097,8 @@ GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict .MediaCommandOpcode = 1, \ .MediaCommandSubOpcode = 0 +#define GEN8_MEDIA_OBJECT_length 0x00000000 + struct GEN8_MEDIA_OBJECT { uint32_t CommandType; uint32_t MediaCommandPipeline; @@ -5996,7 +6133,7 @@ struct GEN8_MEDIA_OBJECT { static inline void GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT * restrict values) + const struct GEN8_MEDIA_OBJECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6048,6 +6185,8 @@ GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, .MediaCommandOpcode = 1, \ .MediaCommandSubOpcode = 6 +#define GEN8_MEDIA_OBJECT_GRPID_length 0x00000000 + struct GEN8_MEDIA_OBJECT_GRPID { uint32_t CommandType; uint32_t MediaCommandPipeline; @@ -6080,7 +6219,7 @@ struct GEN8_MEDIA_OBJECT_GRPID { static inline void GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) + const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6128,7 +6267,6 @@ GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 #define GEN8_MEDIA_OBJECT_PRT_length_bias 0x00000002 #define GEN8_MEDIA_OBJECT_PRT_header \ .CommandType = 3, \ @@ -6137,6 +6275,8 @@ GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 2, \ .DwordLength = 14 +#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 + struct GEN8_MEDIA_OBJECT_PRT { uint32_t CommandType; uint32_t Pipeline; @@ -6154,7 +6294,7 @@ struct GEN8_MEDIA_OBJECT_PRT { static inline void GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT_PRT * restrict values) + const struct GEN8_MEDIA_OBJECT_PRT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6194,6 +6334,8 @@ GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, .MediaCommandOpcode = 1, \ .SubOpcode = 3 +#define GEN8_MEDIA_OBJECT_WALKER_length 0x00000000 + struct GEN8_MEDIA_OBJECT_WALKER { uint32_t CommandType; uint32_t Pipeline; @@ -6239,7 +6381,7 @@ struct GEN8_MEDIA_OBJECT_WALKER { static inline void GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) + const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6332,7 +6474,6 @@ GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 #define GEN8_MEDIA_STATE_FLUSH_length_bias 0x00000002 #define GEN8_MEDIA_STATE_FLUSH_header \ .CommandType = 3, \ @@ -6341,6 +6482,8 @@ GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 4, \ .DwordLength = 0 +#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 + struct GEN8_MEDIA_STATE_FLUSH { uint32_t CommandType; uint32_t Pipeline; @@ -6354,7 +6497,7 @@ struct GEN8_MEDIA_STATE_FLUSH { static inline void GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_STATE_FLUSH * restrict values) + const struct GEN8_MEDIA_STATE_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6374,7 +6517,6 @@ GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MEDIA_VFE_STATE_length 0x00000009 #define GEN8_MEDIA_VFE_STATE_length_bias 0x00000002 #define GEN8_MEDIA_VFE_STATE_header \ .CommandType = 3, \ @@ -6383,6 +6525,8 @@ GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, .SubOpcode = 0, \ .DwordLength = 7 +#define GEN8_MEDIA_VFE_STATE_length 0x00000009 + struct GEN8_MEDIA_VFE_STATE { uint32_t CommandType; uint32_t Pipeline; @@ -6431,7 +6575,7 @@ struct GEN8_MEDIA_VFE_STATE { static inline void GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_VFE_STATE * restrict values) + const struct GEN8_MEDIA_VFE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6499,12 +6643,13 @@ GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_ARB_CHECK_length 0x00000001 #define GEN8_MI_ARB_CHECK_length_bias 0x00000001 #define GEN8_MI_ARB_CHECK_header \ .CommandType = 0, \ .MICommandOpcode = 5 +#define GEN8_MI_ARB_CHECK_length 0x00000001 + struct GEN8_MI_ARB_CHECK { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6512,7 +6657,7 @@ struct GEN8_MI_ARB_CHECK { static inline void GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_ARB_CHECK * restrict values) + const struct GEN8_MI_ARB_CHECK * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6523,12 +6668,13 @@ GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 #define GEN8_MI_BATCH_BUFFER_END_length_bias 0x00000001 #define GEN8_MI_BATCH_BUFFER_END_header \ .CommandType = 0, \ .MICommandOpcode = 10 +#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 + struct GEN8_MI_BATCH_BUFFER_END { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6536,7 +6682,7 @@ struct GEN8_MI_BATCH_BUFFER_END { static inline void GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_BATCH_BUFFER_END * restrict values) + const struct GEN8_MI_BATCH_BUFFER_END * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6547,13 +6693,14 @@ GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 #define GEN8_MI_BATCH_BUFFER_START_length_bias 0x00000002 #define GEN8_MI_BATCH_BUFFER_START_header \ .CommandType = 0, \ .MICommandOpcode = 49, \ .DwordLength = 1 +#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 + struct GEN8_MI_BATCH_BUFFER_START { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6572,7 +6719,7 @@ struct GEN8_MI_BATCH_BUFFER_START { static inline void GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_BATCH_BUFFER_START * restrict values) + const struct GEN8_MI_BATCH_BUFFER_START * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6603,6 +6750,8 @@ GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 39 +#define GEN8_MI_CLFLUSH_length 0x00000000 + struct GEN8_MI_CLFLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6617,7 +6766,7 @@ struct GEN8_MI_CLFLUSH { static inline void GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_CLFLUSH * restrict values) + const struct GEN8_MI_CLFLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6641,7 +6790,6 @@ GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 #define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 #define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_header\ .CommandType = 0, \ @@ -6650,6 +6798,8 @@ GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, .CompareSemaphore = 0, \ .DwordLength = 1 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 + struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6662,7 +6812,7 @@ struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { static inline void GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) + const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6689,13 +6839,14 @@ GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict } -#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 #define GEN8_MI_COPY_MEM_MEM_length_bias 0x00000002 #define GEN8_MI_COPY_MEM_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 46, \ .DwordLength = 3 +#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 + struct GEN8_MI_COPY_MEM_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6712,7 +6863,7 @@ struct GEN8_MI_COPY_MEM_MEM { static inline void GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_COPY_MEM_MEM * restrict values) + const struct GEN8_MI_COPY_MEM_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6744,13 +6895,14 @@ GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 #define GEN8_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 #define GEN8_MI_LOAD_REGISTER_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 34, \ .DwordLength = 1 +#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 + struct GEN8_MI_LOAD_REGISTER_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6762,7 +6914,7 @@ struct GEN8_MI_LOAD_REGISTER_IMM { static inline void GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) + const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6783,13 +6935,14 @@ GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 #define GEN8_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 #define GEN8_MI_LOAD_REGISTER_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 41, \ .DwordLength = 2 +#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 + struct GEN8_MI_LOAD_REGISTER_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6802,7 +6955,7 @@ struct GEN8_MI_LOAD_REGISTER_MEM { static inline void GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) + const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6829,13 +6982,14 @@ GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 #define GEN8_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 #define GEN8_MI_LOAD_SCAN_LINES_EXCL_header \ .CommandType = 0, \ .MICommandOpcode = 19, \ .DwordLength = 0 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + struct GEN8_MI_LOAD_SCAN_LINES_EXCL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6850,7 +7004,7 @@ struct GEN8_MI_LOAD_SCAN_LINES_EXCL { static inline void GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) + const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6868,13 +7022,14 @@ GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 #define GEN8_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 #define GEN8_MI_LOAD_SCAN_LINES_INCL_header \ .CommandType = 0, \ .MICommandOpcode = 18, \ .DwordLength = 0 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + struct GEN8_MI_LOAD_SCAN_LINES_INCL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6893,7 +7048,7 @@ struct GEN8_MI_LOAD_SCAN_LINES_INCL { static inline void GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) + const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6912,13 +7067,14 @@ GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 #define GEN8_MI_LOAD_URB_MEM_length_bias 0x00000002 #define GEN8_MI_LOAD_URB_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 44, \ .DwordLength = 2 +#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 + struct GEN8_MI_LOAD_URB_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6929,7 +7085,7 @@ struct GEN8_MI_LOAD_URB_MEM { static inline void GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_URB_MEM * restrict values) + const struct GEN8_MI_LOAD_URB_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6959,6 +7115,8 @@ GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 26 +#define GEN8_MI_MATH_length 0x00000000 + struct GEN8_MI_MATH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -6970,7 +7128,7 @@ struct GEN8_MI_MATH { static inline void GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_MATH * restrict values) + const struct GEN8_MI_MATH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -6991,12 +7149,13 @@ GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_MI_NOOP_length 0x00000001 #define GEN8_MI_NOOP_length_bias 0x00000001 #define GEN8_MI_NOOP_header \ .CommandType = 0, \ .MICommandOpcode = 0 +#define GEN8_MI_NOOP_length 0x00000001 + struct GEN8_MI_NOOP { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7006,7 +7165,7 @@ struct GEN8_MI_NOOP { static inline void GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_NOOP * restrict values) + const struct GEN8_MI_NOOP * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7019,12 +7178,13 @@ GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_PREDICATE_length 0x00000001 #define GEN8_MI_PREDICATE_length_bias 0x00000001 #define GEN8_MI_PREDICATE_header \ .CommandType = 0, \ .MICommandOpcode = 12 +#define GEN8_MI_PREDICATE_length 0x00000001 + struct GEN8_MI_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7044,7 +7204,7 @@ struct GEN8_MI_PREDICATE { static inline void GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_PREDICATE * restrict values) + const struct GEN8_MI_PREDICATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7058,12 +7218,13 @@ GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_REPORT_HEAD_length 0x00000001 #define GEN8_MI_REPORT_HEAD_length_bias 0x00000001 #define GEN8_MI_REPORT_HEAD_header \ .CommandType = 0, \ .MICommandOpcode = 7 +#define GEN8_MI_REPORT_HEAD_length 0x00000001 + struct GEN8_MI_REPORT_HEAD { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7071,7 +7232,7 @@ struct GEN8_MI_REPORT_HEAD { static inline void GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_REPORT_HEAD * restrict values) + const struct GEN8_MI_REPORT_HEAD * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7082,12 +7243,13 @@ GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_RS_CONTEXT_length 0x00000001 #define GEN8_MI_RS_CONTEXT_length_bias 0x00000001 #define GEN8_MI_RS_CONTEXT_header \ .CommandType = 0, \ .MICommandOpcode = 15 +#define GEN8_MI_RS_CONTEXT_length 0x00000001 + struct GEN8_MI_RS_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7098,7 +7260,7 @@ struct GEN8_MI_RS_CONTEXT { static inline void GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_RS_CONTEXT * restrict values) + const struct GEN8_MI_RS_CONTEXT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7110,12 +7272,13 @@ GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_RS_CONTROL_length 0x00000001 #define GEN8_MI_RS_CONTROL_length_bias 0x00000001 #define GEN8_MI_RS_CONTROL_header \ .CommandType = 0, \ .MICommandOpcode = 6 +#define GEN8_MI_RS_CONTROL_length 0x00000001 + struct GEN8_MI_RS_CONTROL { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7126,7 +7289,7 @@ struct GEN8_MI_RS_CONTROL { static inline void GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_RS_CONTROL * restrict values) + const struct GEN8_MI_RS_CONTROL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7138,13 +7301,14 @@ GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 #define GEN8_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 #define GEN8_MI_RS_STORE_DATA_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 43, \ .DwordLength = 2 +#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 + struct GEN8_MI_RS_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7156,7 +7320,7 @@ struct GEN8_MI_RS_STORE_DATA_IMM { static inline void GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) + const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7182,13 +7346,14 @@ GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_SET_CONTEXT_length 0x00000002 #define GEN8_MI_SET_CONTEXT_length_bias 0x00000002 #define GEN8_MI_SET_CONTEXT_header \ .CommandType = 0, \ .MICommandOpcode = 24, \ .DwordLength = 0 +#define GEN8_MI_SET_CONTEXT_length 0x00000002 + struct GEN8_MI_SET_CONTEXT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7204,7 +7369,7 @@ struct GEN8_MI_SET_CONTEXT { static inline void GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SET_CONTEXT * restrict values) + const struct GEN8_MI_SET_CONTEXT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7228,12 +7393,13 @@ GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_SET_PREDICATE_length 0x00000001 #define GEN8_MI_SET_PREDICATE_length_bias 0x00000001 #define GEN8_MI_SET_PREDICATE_header \ .CommandType = 0, \ .MICommandOpcode = 1 +#define GEN8_MI_SET_PREDICATE_length 0x00000001 + struct GEN8_MI_SET_PREDICATE { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7251,7 +7417,7 @@ struct GEN8_MI_SET_PREDICATE { static inline void GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SET_PREDICATE * restrict values) + const struct GEN8_MI_SET_PREDICATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7263,13 +7429,14 @@ GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 #define GEN8_MI_STORE_DATA_IMM_length_bias 0x00000002 #define GEN8_MI_STORE_DATA_IMM_header \ .CommandType = 0, \ .MICommandOpcode = 32, \ .DwordLength = 2 +#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 + struct GEN8_MI_STORE_DATA_IMM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7284,7 +7451,7 @@ struct GEN8_MI_STORE_DATA_IMM { static inline void GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_DATA_IMM * restrict values) + const struct GEN8_MI_STORE_DATA_IMM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7316,13 +7483,14 @@ GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 #define GEN8_MI_STORE_DATA_INDEX_length_bias 0x00000002 #define GEN8_MI_STORE_DATA_INDEX_header \ .CommandType = 0, \ .MICommandOpcode = 33, \ .DwordLength = 1 +#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 + struct GEN8_MI_STORE_DATA_INDEX { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7335,7 +7503,7 @@ struct GEN8_MI_STORE_DATA_INDEX { static inline void GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_DATA_INDEX * restrict values) + const struct GEN8_MI_STORE_DATA_INDEX * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7360,13 +7528,14 @@ GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_STORE_URB_MEM_length 0x00000004 #define GEN8_MI_STORE_URB_MEM_length_bias 0x00000002 #define GEN8_MI_STORE_URB_MEM_header \ .CommandType = 0, \ .MICommandOpcode = 45, \ .DwordLength = 2 +#define GEN8_MI_STORE_URB_MEM_length 0x00000004 + struct GEN8_MI_STORE_URB_MEM { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7377,7 +7546,7 @@ struct GEN8_MI_STORE_URB_MEM { static inline void GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_URB_MEM * restrict values) + const struct GEN8_MI_STORE_URB_MEM * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7402,12 +7571,13 @@ GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 #define GEN8_MI_SUSPEND_FLUSH_length_bias 0x00000001 #define GEN8_MI_SUSPEND_FLUSH_header \ .CommandType = 0, \ .MICommandOpcode = 11 +#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 + struct GEN8_MI_SUSPEND_FLUSH { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7416,7 +7586,7 @@ struct GEN8_MI_SUSPEND_FLUSH { static inline void GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SUSPEND_FLUSH * restrict values) + const struct GEN8_MI_SUSPEND_FLUSH * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7428,12 +7598,13 @@ GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 #define GEN8_MI_TOPOLOGY_FILTER_length_bias 0x00000001 #define GEN8_MI_TOPOLOGY_FILTER_header \ .CommandType = 0, \ .MICommandOpcode = 13 +#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 + struct GEN8_MI_TOPOLOGY_FILTER { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7442,7 +7613,7 @@ struct GEN8_MI_TOPOLOGY_FILTER { static inline void GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) + const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7459,6 +7630,8 @@ GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, .CommandType = 0, \ .MICommandOpcode = 35 +#define GEN8_MI_UPDATE_GTT_length 0x00000000 + struct GEN8_MI_UPDATE_GTT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7469,7 +7642,7 @@ struct GEN8_MI_UPDATE_GTT { static inline void GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_UPDATE_GTT * restrict values) + const struct GEN8_MI_UPDATE_GTT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7488,12 +7661,13 @@ GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, /* variable length fields follow */ } -#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 #define GEN8_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 #define GEN8_MI_URB_ATOMIC_ALLOC_header \ .CommandType = 0, \ .MICommandOpcode = 9 +#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 + struct GEN8_MI_URB_ATOMIC_ALLOC { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7503,7 +7677,7 @@ struct GEN8_MI_URB_ATOMIC_ALLOC { static inline void GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) + const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7516,13 +7690,14 @@ GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_URB_CLEAR_length 0x00000002 #define GEN8_MI_URB_CLEAR_length_bias 0x00000002 #define GEN8_MI_URB_CLEAR_header \ .CommandType = 0, \ .MICommandOpcode = 25, \ .DwordLength = 0 +#define GEN8_MI_URB_CLEAR_length 0x00000002 + struct GEN8_MI_URB_CLEAR { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7533,7 +7708,7 @@ struct GEN8_MI_URB_CLEAR { static inline void GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_URB_CLEAR * restrict values) + const struct GEN8_MI_URB_CLEAR * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7550,12 +7725,13 @@ GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_USER_INTERRUPT_length 0x00000001 #define GEN8_MI_USER_INTERRUPT_length_bias 0x00000001 #define GEN8_MI_USER_INTERRUPT_header \ .CommandType = 0, \ .MICommandOpcode = 2 +#define GEN8_MI_USER_INTERRUPT_length 0x00000001 + struct GEN8_MI_USER_INTERRUPT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7563,7 +7739,7 @@ struct GEN8_MI_USER_INTERRUPT { static inline void GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_USER_INTERRUPT * restrict values) + const struct GEN8_MI_USER_INTERRUPT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7574,12 +7750,13 @@ GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 #define GEN8_MI_WAIT_FOR_EVENT_length_bias 0x00000001 #define GEN8_MI_WAIT_FOR_EVENT_header \ .CommandType = 0, \ .MICommandOpcode = 3 +#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 + struct GEN8_MI_WAIT_FOR_EVENT { uint32_t CommandType; uint32_t MICommandOpcode; @@ -7599,7 +7776,7 @@ struct GEN8_MI_WAIT_FOR_EVENT { static inline void GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) + const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7622,7 +7799,6 @@ GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_PIPE_CONTROL_length 0x00000006 #define GEN8_PIPE_CONTROL_length_bias 0x00000002 #define GEN8_PIPE_CONTROL_header \ .CommandType = 3, \ @@ -7631,6 +7807,8 @@ GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, ._3DCommandSubOpcode = 0, \ .DwordLength = 4 +#define GEN8_PIPE_CONTROL_length 0x00000006 + struct GEN8_PIPE_CONTROL { uint32_t CommandType; uint32_t CommandSubType; @@ -7678,7 +7856,7 @@ struct GEN8_PIPE_CONTROL { static inline void GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PIPE_CONTROL * restrict values) + const struct GEN8_PIPE_CONTROL * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7732,22 +7910,6 @@ GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a - -#define GEN8_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 - -#define GEN8_GATHER_CONSTANT_ENTRY_length 0x00000001 - -#define GEN8_VERTEX_BUFFER_STATE_length 0x00000004 - -#define GEN8_VERTEX_ELEMENT_STATE_length 0x00000002 - -#define GEN8_SO_DECL_ENTRY_length 0x00000002 - -#define GEN8_SO_DECL_length 0x00000001 - -#define GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 - #define GEN8_SCISSOR_RECT_length 0x00000002 struct GEN8_SCISSOR_RECT { @@ -7759,7 +7921,7 @@ struct GEN8_SCISSOR_RECT { static inline void GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SCISSOR_RECT * restrict values) + const struct GEN8_SCISSOR_RECT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7796,7 +7958,7 @@ struct GEN8_SF_CLIP_VIEWPORT { static inline void GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SF_CLIP_VIEWPORT * restrict values) + const struct GEN8_SF_CLIP_VIEWPORT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7866,6 +8028,8 @@ GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, #define GEN8_BLEND_STATE_length 0x00000011 +#define GEN8_BLEND_STATE_ENTRY_length 0x00000002 + struct GEN8_BLEND_STATE_ENTRY { bool LogicOpEnable; uint32_t LogicOpFunction; @@ -7891,7 +8055,7 @@ struct GEN8_BLEND_STATE_ENTRY { static inline void GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BLEND_STATE_ENTRY * restrict values) + const struct GEN8_BLEND_STATE_ENTRY * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7935,7 +8099,7 @@ struct GEN8_BLEND_STATE { static inline void GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BLEND_STATE * restrict values) + const struct GEN8_BLEND_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -7955,8 +8119,6 @@ GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, GEN8_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); } -#define GEN8_BLEND_STATE_ENTRY_length 0x00000002 - #define GEN8_CC_VIEWPORT_length 0x00000002 struct GEN8_CC_VIEWPORT { @@ -7966,7 +8128,7 @@ struct GEN8_CC_VIEWPORT { static inline void GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_CC_VIEWPORT * restrict values) + const struct GEN8_CC_VIEWPORT * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -8001,7 +8163,7 @@ struct GEN8_COLOR_CALC_STATE { static inline void GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_COLOR_CALC_STATE * restrict values) + const struct GEN8_COLOR_CALC_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -8035,7 +8197,30 @@ GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 +#define GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_length 0x00000002 + +struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 { + uint32_t BlackPointOffsetR; + uint32_t BlackPointOffsetG; + uint32_t BlackPointOffsetB; +}; + +static inline void +GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BlackPointOffsetR, 0, 12) | + 0; + + dw[1] = + __gen_field(values->BlackPointOffsetG, 13, 25) | + __gen_field(values->BlackPointOffsetB, 0, 12) | + 0; + +} #define GEN8_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 @@ -8087,7 +8272,7 @@ struct GEN8_INTERFACE_DESCRIPTOR_DATA { static inline void GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) + const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -8137,8 +8322,6 @@ GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, } -#define GEN8_PALETTE_ENTRY_length 0x00000001 - #define GEN8_BINDING_TABLE_STATE_length 0x00000001 struct GEN8_BINDING_TABLE_STATE { @@ -8147,7 +8330,7 @@ struct GEN8_BINDING_TABLE_STATE { static inline void GEN8_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BINDING_TABLE_STATE * restrict values) + const struct GEN8_BINDING_TABLE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -8257,7 +8440,7 @@ struct GEN8_RENDER_SURFACE_STATE { static inline void GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_RENDER_SURFACE_STATE * restrict values) + const struct GEN8_RENDER_SURFACE_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -8373,6 +8556,24 @@ GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_FILTER_COEFFICIENT_length 0x00000001 + +struct GEN8_FILTER_COEFFICIENT { + uint32_t FilterCoefficient; +}; + +static inline void +GEN8_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_FILTER_COEFFICIENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->FilterCoefficient, 0, 7) | + 0; + +} + #define GEN8_SAMPLER_STATE_length 0x00000004 struct GEN8_SAMPLER_STATE { @@ -8453,7 +8654,7 @@ struct GEN8_SAMPLER_STATE { static inline void GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SAMPLER_STATE * restrict values) + const struct GEN8_SAMPLER_STATE * restrict values) { uint32_t *dw = (uint32_t * restrict) dst; @@ -8501,6 +8702,91 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, } +#define GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 + +struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { + uint32_t Table0YFilterCoefficientn1; + uint32_t Table0XFilterCoefficientn1; + uint32_t Table0YFilterCoefficientn0; + uint32_t Table0XFilterCoefficientn0; + uint32_t Table0YFilterCoefficientn3; + uint32_t Table0XFilterCoefficientn3; + uint32_t Table0YFilterCoefficientn2; + uint32_t Table0XFilterCoefficientn2; + uint32_t Table0YFilterCoefficientn5; + uint32_t Table0XFilterCoefficientn5; + uint32_t Table0YFilterCoefficientn4; + uint32_t Table0XFilterCoefficientn4; + uint32_t Table0YFilterCoefficientn7; + uint32_t Table0XFilterCoefficientn7; + uint32_t Table0YFilterCoefficientn6; + uint32_t Table0XFilterCoefficientn6; + uint32_t Table1XFilterCoefficientn3; + uint32_t Table1XFilterCoefficientn2; + uint32_t Table1XFilterCoefficientn5; + uint32_t Table1XFilterCoefficientn4; + uint32_t Table1YFilterCoefficientn3; + uint32_t Table1YFilterCoefficientn2; + uint32_t Table1YFilterCoefficientn5; + uint32_t Table1YFilterCoefficientn4; +}; + +static inline void +GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | + 0; + + dw[3] = + __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | + 0; + + dw[4] = + __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | + 0; + + dw[5] = + __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | + 0; + + dw[7] = + __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | + 0; + +} + /* Enum 3D_Prim_Topo_Type */ #define _3DPRIM_POINTLIST 1 #define _3DPRIM_LINELIST 2 -- cgit v1.2.3 From 16c5b9f4eda04d22c3c7b4cba0fe6303f149f86d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 14 Aug 2015 15:22:57 -0700 Subject: vk: Build a version of the driver for linking into unit tests --- src/vulkan/Makefile.am | 19 ++++++- src/vulkan/anv_gem_stubs.c | 136 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 src/vulkan/anv_gem_stubs.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index c816f97034f..31367de42a2 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -30,6 +30,8 @@ vulkan_include_HEADERS = \ lib_LTLIBRARIES = libvulkan.la +check_LTLIBRARIES = libvulkan-test.la + # The gallium includes are for the util/u_math.h include from main/macros.h AM_CPPFLAGS = \ @@ -53,7 +55,7 @@ libvulkan_la_CFLAGS = \ libvulkan_la_CXXFLAGS = \ -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g -libvulkan_la_SOURCES = \ +VULKAN_SOURCES = \ anv_allocator.c \ anv_aub.c \ anv_cmd_buffer.c \ @@ -63,7 +65,6 @@ libvulkan_la_SOURCES = \ anv_entrypoints.c \ anv_entrypoints.h \ anv_formats.c \ - anv_gem.c \ anv_image.c \ anv_intel.c \ anv_meta.c \ @@ -73,6 +74,10 @@ libvulkan_la_SOURCES = \ anv_util.c \ anv_x11.c +libvulkan_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + BUILT_SOURCES = \ anv_entrypoints.h \ anv_entrypoints.c \ @@ -92,4 +97,14 @@ CLEANFILES = $(BUILT_SOURCES) libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_CXXFLAGS = $(libvulkan_la_CXXFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c new file mode 100644 index 00000000000..d036314c446 --- /dev/null +++ b/src/vulkan/anv_gem_stubs.c @@ -0,0 +1,136 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include + +#include "anv_private.h" + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + int fd = memfd_create("fake bo", MFD_CLOEXEC); + if (fd == -1) + return 0; + + assert(fd != 0); + + if (ftruncate(fd, size) == -1) + return 0; + + return fd; +} + +void +anv_gem_close(struct anv_device *device, int gem_handle) +{ + close(gem_handle); +} + +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size) +{ + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + gem_handle, offset); +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); +} + +int +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + return -1; +} + +int +anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +{ + return 0; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return 0; +} + +int +anv_gem_set_tiling(struct anv_device *device, + int gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + unreachable("Unused"); +} + +int +anv_gem_create_context(struct anv_device *device) +{ + unreachable("Unused"); +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + unreachable("Unused"); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + unreachable("Unused"); +} + +int +anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +{ + unreachable("Unused"); +} + +int +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + unreachable("Unused"); +} -- cgit v1.2.3 From b4c02253c4e1a7bc5a7a6369045210932f5de605 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 14 Aug 2015 15:24:01 -0700 Subject: vk: Add four unit tests for our lock-free data-structures --- configure.ac | 1 + src/vulkan/Makefile.am | 1 + src/vulkan/tests/.gitignore | 4 + src/vulkan/tests/Makefile.am | 45 +++++++++++ src/vulkan/tests/block_pool_no_free.c | 104 ++++++++++++++++++++++++ src/vulkan/tests/state_pool.c | 53 ++++++++++++ src/vulkan/tests/state_pool_free_list_only.c | 64 +++++++++++++++ src/vulkan/tests/state_pool_no_free.c | 115 +++++++++++++++++++++++++++ src/vulkan/tests/state_pool_test_helper.h | 71 +++++++++++++++++ 9 files changed, 458 insertions(+) create mode 100644 src/vulkan/tests/.gitignore create mode 100644 src/vulkan/tests/Makefile.am create mode 100644 src/vulkan/tests/block_pool_no_free.c create mode 100644 src/vulkan/tests/state_pool.c create mode 100644 src/vulkan/tests/state_pool_free_list_only.c create mode 100644 src/vulkan/tests/state_pool_no_free.c create mode 100644 src/vulkan/tests/state_pool_test_helper.h (limited to 'src') diff --git a/configure.ac b/configure.ac index 33aacd2ec06..e78a4ba6325 100644 --- a/configure.ac +++ b/configure.ac @@ -2439,6 +2439,7 @@ AC_CONFIG_FILES([Makefile src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile src/vulkan/Makefile + src/vulkan/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile]) diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 31367de42a2..9b15871eb43 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -19,6 +19,7 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +SUBDIRS = . tests vulkan_includedir = $(includedir)/vulkan diff --git a/src/vulkan/tests/.gitignore b/src/vulkan/tests/.gitignore new file mode 100644 index 00000000000..9f4be5270f6 --- /dev/null +++ b/src/vulkan/tests/.gitignore @@ -0,0 +1,4 @@ +block_pool +state_pool +state_pool_free_list_only +state_pool_no_free diff --git a/src/vulkan/tests/Makefile.am b/src/vulkan/tests/Makefile.am new file mode 100644 index 00000000000..7b15bb002be --- /dev/null +++ b/src/vulkan/tests/Makefile.am @@ -0,0 +1,45 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/vulkan + +LDADD = \ + $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(PTHREAD_LIBS) -lm -lstdc++ + +check_PROGRAMS = \ + block_pool_no_free \ + state_pool_no_free \ + state_pool_free_list_only \ + state_pool + +TESTS = $(check_PROGRAMS) diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c new file mode 100644 index 00000000000..898a82b0909 --- /dev/null +++ b/src/vulkan/tests/block_pool_no_free.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define BLOCKS_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_block_pool *pool; + uint32_t blocks[BLOCKS_PER_THREAD]; +} jobs[NUM_THREADS]; + +static void *alloc_blocks(void *_job) +{ + struct job *job = _job; + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) + job->blocks[i] = anv_block_pool_alloc(job->pool); + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= BLOCKS_PER_THREAD) + continue; + + if (thread_max < jobs[i].blocks[next[i]]) { + thread_max = jobs[i].blocks[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].blocks[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].blocks[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_block_pool_finish(&pool); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/vulkan/tests/state_pool.c b/src/vulkan/tests/state_pool.c new file mode 100644 index 00000000000..e235ee9b394 --- /dev/null +++ b/src/vulkan/tests/state_pool.c @@ -0,0 +1,53 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 10 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) +#define NUM_RUNS 64 + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + for (unsigned i = 0; i < NUM_RUNS; i++) { + anv_block_pool_init(&block_pool, &device, 256); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + } +} diff --git a/src/vulkan/tests/state_pool_free_list_only.c b/src/vulkan/tests/state_pool_free_list_only.c new file mode 100644 index 00000000000..9e89cf6425f --- /dev/null +++ b/src/vulkan/tests/state_pool_free_list_only.c @@ -0,0 +1,64 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 12 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + anv_block_pool_init(&block_pool, &device, 4096); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab and return enough states that the state pool test below won't + * actually ever resize anything. + */ + { + struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { + states[i] = anv_state_pool_alloc(&state_pool, 16, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) + anv_state_pool_free(&state_pool, states[i]); + } + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); +} diff --git a/src/vulkan/tests/state_pool_no_free.c b/src/vulkan/tests/state_pool_no_free.c new file mode 100644 index 00000000000..4b3ca78974f --- /dev/null +++ b/src/vulkan/tests/state_pool_no_free.c @@ -0,0 +1,115 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define STATES_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_state_pool *pool; + uint32_t offsets[STATES_PER_THREAD]; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *_job) +{ + struct job *job = _job; + + pthread_barrier_wait(&barrier); + + for (unsigned i = 0; i < STATES_PER_THREAD; i++) { + struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); + job->offsets[i] = state.offset; + } + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + anv_block_pool_init(&block_pool, &device, 64); + anv_state_pool_init(&state_pool, &block_pool); + + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= STATES_PER_THREAD) + continue; + + if (thread_max < jobs[i].offsets[next[i]]) { + thread_max = jobs[i].offsets[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/vulkan/tests/state_pool_test_helper.h b/src/vulkan/tests/state_pool_test_helper.h new file mode 100644 index 00000000000..0e56431303f --- /dev/null +++ b/src/vulkan/tests/state_pool_test_helper.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +struct job { + struct anv_state_pool *pool; + unsigned id; + pthread_t thread; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *void_job) +{ + struct job *job = void_job; + + const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); + const unsigned num_chunks = STATES_PER_THREAD / chunk_size; + + struct anv_state states[chunk_size]; + + pthread_barrier_wait(&barrier); + + for (unsigned c = 0; c < num_chunks; c++) { + for (unsigned i = 0; i < chunk_size; i++) { + states[i] = anv_state_pool_alloc(job->pool, 16, 16); + memset(states[i].map, 139, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < chunk_size; i++) + anv_state_pool_free(job->pool, states[i]); + } + + return NULL; +} + +static void run_state_pool_test(struct anv_state_pool *state_pool) +{ + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); +} -- cgit v1.2.3 From 0deae66eb138ada869105a359ce0c87e412ed713 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 17 Aug 2015 11:40:13 -0700 Subject: vk: Add an _autogen suffix autogenerated spirv file names This prevents make from stomping on nir_spirv.h --- src/vulkan/Makefile.am | 4 ++-- src/vulkan/anv_meta.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 9b15871eb43..aeebca4b9f5 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -82,7 +82,7 @@ libvulkan_la_SOURCES = \ BUILT_SOURCES = \ anv_entrypoints.h \ anv_entrypoints.c \ - anv_meta_spirv.h + anv_meta_spirv_autogen.h anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ @@ -90,7 +90,7 @@ anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ -%_spirv.h: %.c glsl_scraper.py +%_spirv_autogen.h: %.c glsl_scraper.py $(AM_V_GEN) $(PYTHON2) $(srcdir)/glsl_scraper.py --glsl-only -o $@ $< CLEANFILES = $(BUILT_SOURCES) diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 8ee7eb012e6..29995330b04 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -28,7 +28,7 @@ #include #include "anv_private.h" -#include "anv_meta_spirv.h" +#include "anv_meta_spirv_autogen.h" static void anv_device_init_meta_clear_state(struct anv_device *device) -- cgit v1.2.3 From 6d09d0644b4c3a9e4512c193686cfa556cdaf00f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 10:02:19 -0700 Subject: vk: Use anv_image_create() for creating dmabuf VkImage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to make sure we use the VkImage infrastructure for creating dmabuf images. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_image.c | 6 ++++-- src/vulkan/anv_intel.c | 36 ++++++++++++++++++++++++------------ src/vulkan/anv_private.h | 1 + src/vulkan/anv_x11.c | 1 + 4 files changed, 30 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 21099cb7730..2937c2e76e5 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -181,8 +181,10 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, */ assert(anv_is_aligned(qpitch, j)); - const uint32_t stride = align_u32(mt_width * format_info->cpp, - tile_info->width); + uint32_t stride = align_u32(mt_width * format_info->cpp, tile_info->width); + if (create_info->stride > 0) + stride = create_info->stride; + const uint32_t size = stride * align_u32(mt_height, tile_info->height); const uint32_t offset = align_u32(*inout_image_size, tile_info->surface_alignment); diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c index 9fc06aef6f8..f64e2dcb1e4 100644 --- a/src/vulkan/anv_intel.c +++ b/src/vulkan/anv_intel.c @@ -39,6 +39,7 @@ VkResult anv_CreateDmaBufImageINTEL( struct anv_device_memory *mem; struct anv_image *image; VkResult result; + VkImage image_h; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); @@ -65,19 +66,30 @@ VkResult anv_CreateDmaBufImageINTEL( goto fail_mem; } - *image = (struct anv_image) { - .bo = &mem->bo, - .offset = 0, - .type = VK_IMAGE_TYPE_2D, - .extent = pCreateInfo->extent, - .size = mem->bo.size, - - .primary_surface = { - .offset = 0, - .stride = pCreateInfo->strideInBytes, + anv_image_create(_device, + &(struct anv_image_create_info) { + .force_tile_mode = true, .tile_mode = XMAJOR, - }, - }; + .stride = pCreateInfo->strideInBytes, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->format, + .extent = pCreateInfo->extent, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + &image_h); + + image = anv_image_from_handle(image_h); + image->bo = &mem->bo; + image->offset = 0; assert(image->extent.width > 0); assert(image->extent.height > 0); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d53f63d5d27..d517814847c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -996,6 +996,7 @@ struct anv_image_create_info { const VkImageCreateInfo *vk_info; bool force_tile_mode; uint8_t tile_mode; + uint32_t stride; }; VkResult anv_image_create(VkDevice _device, diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c index 9ffce8d8cbf..cc11a5fc762 100644 --- a/src/vulkan/anv_x11.c +++ b/src/vulkan/anv_x11.c @@ -121,6 +121,7 @@ VkResult anv_CreateSwapChainWSI( &(struct anv_image_create_info) { .force_tile_mode = true, .tile_mode = XMAJOR, + .stride = 0, .vk_info = &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, -- cgit v1.2.3 From aac6f7c3bb4630c57bed25f3777bf81140206aad Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 09:39:01 -0700 Subject: vk: Drop aub dumper and PCI ID override feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are now available in intel_aubdump from intel-gpu-tools. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/Makefile.am | 1 - src/vulkan/anv_aub.c | 293 ----------------------------------------------- src/vulkan/anv_device.c | 92 +++------------ src/vulkan/anv_private.h | 10 -- 4 files changed, 19 insertions(+), 377 deletions(-) delete mode 100644 src/vulkan/anv_aub.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index aeebca4b9f5..10179364bf8 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -58,7 +58,6 @@ libvulkan_la_CXXFLAGS = \ VULKAN_SOURCES = \ anv_allocator.c \ - anv_aub.c \ anv_cmd_buffer.c \ anv_batch_chain.c \ anv_compiler.cpp \ diff --git a/src/vulkan/anv_aub.c b/src/vulkan/anv_aub.c deleted file mode 100644 index e4a35873590..00000000000 --- a/src/vulkan/anv_aub.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "anv_private.h" -#include "anv_aub.h" - -struct anv_aub_writer { - FILE *file; - uint32_t offset; - int gen; -}; - -static void -aub_out(struct anv_aub_writer *writer, uint32_t data) -{ - fwrite(&data, 1, 4, writer->file); -} - -static void -aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) -{ - fwrite(data, 1, size, writer->file); -} - -static struct anv_aub_writer * -get_anv_aub_writer(struct anv_device *device) -{ - struct anv_aub_writer *writer = device->aub_writer; - int entry = 0x200003; - int i; - int gtt_size = 0x10000; - const char *filename; - - if (geteuid() != getuid()) - return NULL; - - if (writer) - return writer; - - writer = malloc(sizeof(*writer)); - if (writer == NULL) - return NULL; - - filename = "intel.aub"; - writer->gen = device->info.gen; - writer->file = fopen(filename, "w+"); - if (!writer->file) { - free(writer); - return NULL; - } - - /* Start allocating objects from just after the GTT. */ - writer->offset = gtt_size; - - /* Start with a (required) version packet. */ - aub_out(writer, CMD_AUB_HEADER | (13 - 2)); - aub_out(writer, - (4 << AUB_HEADER_MAJOR_SHIFT) | - (0 << AUB_HEADER_MINOR_SHIFT)); - for (i = 0; i < 8; i++) { - aub_out(writer, 0); /* app name */ - } - aub_out(writer, 0); /* timestamp */ - aub_out(writer, 0); /* timestamp */ - aub_out(writer, 0); /* comment len */ - - /* Set up the GTT. The max we can handle is 256M */ - aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); - aub_out(writer, - AUB_TRACE_MEMTYPE_GTT_ENTRY | - AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); - aub_out(writer, 0); /* subtype */ - aub_out(writer, 0); /* offset */ - aub_out(writer, gtt_size); /* size */ - if (writer->gen >= 8) - aub_out(writer, 0); - for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { - aub_out(writer, entry); - } - - return device->aub_writer = writer; -} - -void -anv_aub_writer_destroy(struct anv_aub_writer *writer) -{ - fclose(writer->file); - free(writer); -} - - -/** - * Break up large objects into multiple writes. Otherwise a 128kb VBO - * would overflow the 16 bits of size field in the packet header and - * everything goes badly after that. - */ -static void -aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, - void *virtual, uint32_t size, uint32_t gtt_offset) -{ - uint32_t block_size; - uint32_t offset; - uint32_t subtype = 0; - static const char null_block[8 * 4096]; - - for (offset = 0; offset < size; offset += block_size) { - block_size = size - offset; - - if (block_size > 8 * 4096) - block_size = 8 * 4096; - - aub_out(writer, - CMD_AUB_TRACE_HEADER_BLOCK | - ((writer->gen >= 8 ? 6 : 5) - 2)); - aub_out(writer, - AUB_TRACE_MEMTYPE_GTT | - type | AUB_TRACE_OP_DATA_WRITE); - aub_out(writer, subtype); - aub_out(writer, gtt_offset + offset); - aub_out(writer, align_u32(block_size, 4)); - if (writer->gen >= 8) - aub_out(writer, 0); - - if (virtual) - aub_out_data(writer, (char *) virtual + offset, block_size); - else - aub_out_data(writer, null_block, block_size); - - /* Pad to a multiple of 4 bytes. */ - aub_out_data(writer, null_block, -block_size & 3); - } -} - -/* - * Make a ringbuffer on fly and dump it - */ -static void -aub_build_dump_ringbuffer(struct anv_aub_writer *writer, - uint32_t batch_offset, uint32_t offset, - int ring_flag) -{ - uint32_t ringbuffer[4096]; - int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ - int ring_count = 0; - - if (ring_flag == I915_EXEC_BSD) - ring = AUB_TRACE_TYPE_RING_PRB1; - else if (ring_flag == I915_EXEC_BLT) - ring = AUB_TRACE_TYPE_RING_PRB2; - - /* Make a ring buffer to execute our batchbuffer. */ - memset(ringbuffer, 0, sizeof(ringbuffer)); - if (writer->gen >= 8) { - ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); - ringbuffer[ring_count++] = batch_offset; - ringbuffer[ring_count++] = 0; - } else { - ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; - ringbuffer[ring_count++] = batch_offset; - } - - /* Write out the ring. This appears to trigger execution of - * the ring in the simulator. - */ - aub_out(writer, - CMD_AUB_TRACE_HEADER_BLOCK | - ((writer->gen >= 8 ? 6 : 5) - 2)); - aub_out(writer, - AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); - aub_out(writer, 0); /* general/surface subtype */ - aub_out(writer, offset); - aub_out(writer, ring_count * 4); - if (writer->gen >= 8) - aub_out(writer, 0); - - /* FIXME: Need some flush operations here? */ - aub_out_data(writer, ringbuffer, ring_count * 4); -} - -struct aub_bo { - uint32_t size; - uint32_t offset; - void *map; - void *relocated; -}; - -static void -relocate_bo(struct aub_bo *aub_bo, - const struct drm_i915_gem_exec_object2 *gem_obj, - struct aub_bo *aub_bos) -{ - const struct drm_i915_gem_relocation_entry *relocs = - (const struct drm_i915_gem_relocation_entry *) gem_obj->relocs_ptr; - uint32_t *dw; - - aub_bo->relocated = malloc(aub_bo->size); - memcpy(aub_bo->relocated, aub_bo->map, aub_bo->size); - for (size_t i = 0; i < gem_obj->relocation_count; i++) { - assert(relocs[i].offset < aub_bo->size); - dw = aub_bo->relocated + relocs[i].offset; - *dw = aub_bos[relocs[i].target_handle].offset + relocs[i].delta; - } -} - -void -anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_aub_writer *writer; - struct anv_bo *bo; - uint32_t ring_flag = 0; - uint32_t offset; - struct aub_bo *aub_bos; - - writer = get_anv_aub_writer(device); - if (writer == NULL) - return; - - aub_bos = malloc(cmd_buffer->execbuf2.bo_count * sizeof(aub_bos[0])); - offset = writer->offset; - for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { - bo = cmd_buffer->execbuf2.bos[i]; - if (bo->map) - aub_bos[i].map = bo->map; - else - aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); - aub_bos[i].size = bo->size; - aub_bos[i].relocated = aub_bos[i].map; - aub_bos[i].offset = offset; - offset = align_u32(offset + bo->size + 4095, 4096); - } - - for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) - relocate_bo(&aub_bos[i], &cmd_buffer->execbuf2.objects[i], aub_bos); - - struct aub_bo *batch_bo = &aub_bos[cmd_buffer->execbuf2.bo_count - 1]; - - for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { - bo = cmd_buffer->execbuf2.bos[i]; - if (&aub_bos[i] == batch_bo) { - aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, - aub_bos[i].relocated, - bo->size, aub_bos[i].offset); - } else { - aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, - aub_bos[i].relocated, - bo->size, aub_bos[i].offset); - } - if (aub_bos[i].relocated != aub_bos[i].map) - free(aub_bos[i].relocated); - if (aub_bos[i].map != bo->map) - anv_gem_munmap(aub_bos[i].map, bo->size); - } - - /* Dump ring buffer */ - aub_build_dump_ringbuffer(writer, batch_bo->offset, offset, ring_flag); - - free(aub_bos); - - fflush(writer->file); -} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 76381e615d3..0653f8bab16 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -31,17 +31,6 @@ #include "mesa/main/git_sha1.h" #include "util/strtod.h" -static int -anv_env_get_int(const char *name) -{ - const char *val = getenv(name); - - if (!val) - return 0; - - return strtol(val, NULL, 0); -} - static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, @@ -56,14 +45,7 @@ anv_physical_device_init(struct anv_physical_device *device, device->instance = instance; device->path = path; - device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE"); - device->no_hw = false; - if (device->chipset_id) { - /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ - device->no_hw = true; - } else { - device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); - } + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); if (!device->chipset_id) goto fail; @@ -494,26 +476,6 @@ PFN_vkVoidFunction anv_GetDeviceProcAddr( return anv_lookup_entrypoint(pName); } -static void -parse_debug_flags(struct anv_device *device) -{ - const char *debug, *p, *end; - - debug = getenv("INTEL_DEBUG"); - device->dump_aub = false; - if (debug) { - for (p = debug; *p; p = end + 1) { - end = strchrnul(p, ','); - if (end - p == 3 && memcmp(p, "aub", 3) == 0) - device->dump_aub = true; - if (end - p == 5 && memcmp(p, "no_hw", 5) == 0) - device->no_hw = true; - if (*end == '\0') - break; - } - } -} - static VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue) { @@ -575,9 +537,6 @@ VkResult anv_CreateDevice( if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - device->no_hw = physical_device->no_hw; - parse_debug_flags(device); - device->instance = physical_device->instance; /* XXX(chadv): Can we dup() physicalDevice->fd here? */ @@ -607,7 +566,6 @@ VkResult anv_CreateDevice( device->info = *physical_device->info; device->compiler = anv_compiler_create(device); - device->aub_writer = NULL; pthread_mutex_init(&device->mutex, NULL); @@ -657,9 +615,6 @@ VkResult anv_DestroyDevice( close(device->fd); - if (device->aub_writer) - anv_aub_writer_destroy(device->aub_writer); - anv_instance_free(device->instance, device); return VK_SUCCESS; @@ -763,25 +718,18 @@ VkResult anv_QueueSubmit( assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - if (device->dump_aub) - anv_cmd_buffer_dump(cmd_buffer); + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (ret != 0) + return vk_error(VK_ERROR_UNKNOWN); - if (!device->no_hw) { - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); if (ret != 0) return vk_error(VK_ERROR_UNKNOWN); - - if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) - return vk_error(VK_ERROR_UNKNOWN); - } - - for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) - cmd_buffer->execbuf2.bos[i]->offset = cmd_buffer->execbuf2.objects[i].offset; - } else { - *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; } + + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) + cmd_buffer->execbuf2.bos[i]->offset = cmd_buffer->execbuf2.objects[i].offset; } return VK_SUCCESS; @@ -838,19 +786,17 @@ VkResult anv_DeviceWaitIdle( execbuf.rsvd1 = device->context_id; execbuf.rsvd2 = 0; - if (!device->no_hw) { - ret = anv_gem_execbuffer(device, &execbuf); - if (ret != 0) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } - timeout = INT64_MAX; - ret = anv_gem_wait(device, bo->gem_handle, &timeout); - if (ret != 0) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; } anv_state_pool_free(&device->dynamic_state_pool, state); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d517814847c..a94dd63b6ae 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -209,11 +209,6 @@ struct anv_bo { uint32_t index; uint64_t offset; uint64_t size; - - /* This field is here for the benefit of the aub dumper. It can (and for - * userptr bos it must) be set to the cpu map of the buffer. Destroying - * the bo won't clean up the mmap, it's still the responsibility of the bo - * user to do that. */ void *map; }; @@ -335,7 +330,6 @@ void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); struct anv_physical_device { struct anv_instance * instance; uint32_t chipset_id; - bool no_hw; const char * path; const char * name; const struct brw_device_info * info; @@ -395,8 +389,6 @@ struct anv_device { struct brw_device_info info; int context_id; int fd; - bool no_hw; - bool dump_aub; struct anv_bo_pool batch_bo_pool; @@ -416,7 +408,6 @@ struct anv_device { struct anv_block_pool scratch_block_pool; struct anv_compiler * compiler; - struct anv_aub_writer * aub_writer; pthread_mutex_t mutex; }; @@ -793,7 +784,6 @@ void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, const VkClearValue *clear_values); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); -void anv_aub_writer_destroy(struct anv_aub_writer *writer); struct anv_fence { struct anv_bo bo; -- cgit v1.2.3 From e39e1f4d2413d6eadb38b4f9e793efa2d0ce87cd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 17 Aug 2015 11:47:20 -0700 Subject: vk: Update .gitignore for the autogenerated spirv changes --- src/vulkan/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 30c614497e5..8bc1c2eda93 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -1,4 +1,4 @@ # Generated source files -/*_spirv.h +/*_spirv_autogen.h /anv_entrypoints.c /anv_entrypoints.h -- cgit v1.2.3 From 4ae42c83ec46be3e200120e928a8060628a45bb6 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 12:52:09 -0700 Subject: vk: Store the original VkFormat in anv_format Store the original VkFormat as anv_format::vk_format. This will be used to reduce format indirection, such as lookups into the VkFormat -> anv_format translation table. --- src/vulkan/anv_formats.c | 2 +- src/vulkan/anv_private.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 3cbcff5730f..1d1d1bbe135 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -26,7 +26,7 @@ #define UNSUPPORTED 0xffff #define fmt(__vk_fmt, ...) \ - [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } + [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, RAW, .cpp = 1, .num_channels = 1), diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a94dd63b6ae..dc14dd2818a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -870,6 +870,7 @@ int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipelin void anv_compiler_free(struct anv_pipeline *pipeline); struct anv_format { + const VkFormat vk_format; const char *name; uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ -- cgit v1.2.3 From ded736f16a9980eff3009b25b4b89763237c6cea Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 13:10:40 -0700 Subject: vk: Add anv_format reference to anv_image Change type of anv_image::format from VkFormat to const struct anv_format*. This reduces the number of lookups in the VkFormat -> anv_format table. --- src/vulkan/anv_image.c | 27 ++++++++++----------------- src/vulkan/anv_meta.c | 22 +++++++++++----------- src/vulkan/anv_private.h | 2 +- 3 files changed, 22 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2937c2e76e5..329955fff59 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -242,9 +242,6 @@ anv_image_create(VkDevice _device, return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); } - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - image = anv_device_alloc(device, sizeof(*image), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!image) @@ -253,12 +250,12 @@ anv_image_create(VkDevice _device, memset(image, 0, sizeof(*image)); image->type = pCreateInfo->imageType; image->extent = pCreateInfo->extent; - image->format = pCreateInfo->format; + image->format = anv_format_for_vk_format(pCreateInfo->format); image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arraySize; image->surf_type = surf_type; - if (likely(!format_info->has_stencil || format_info->depth_format)) { + if (likely(!image->format->has_stencil || image->format->depth_format)) { /* The image's primary surface is a color or depth surface. */ r = anv_image_make_surface(create_info, &image->size, &image->alignment, &image->primary_surface); @@ -266,7 +263,7 @@ anv_image_create(VkDevice _device, goto fail; } - if (format_info->has_stencil) { + if (image->format->has_stencil) { /* From the GPU's perspective, the depth buffer and stencil buffer are * separate buffers. From Vulkan's perspective, though, depth and * stencil reside in the same image. To satisfy Vulkan and the GPU, we @@ -465,7 +462,6 @@ anv_validate_CreateImageView(VkDevice _device, const VkImageSubresourceRange *subresource; const struct anv_image_view_info *view_info; const struct anv_format *view_format_info; - const struct anv_format *image_format_info; /* Validate structure type before dereferencing it. */ assert(pCreateInfo); @@ -480,7 +476,6 @@ anv_validate_CreateImageView(VkDevice _device, /* Validate format is in range before using it. */ assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); - image_format_info = anv_format_for_vk_format(image->format); view_format_info = anv_format_for_vk_format(pCreateInfo->format); /* Validate channel swizzles. */ @@ -512,20 +507,20 @@ anv_validate_CreateImageView(VkDevice _device, /* Validate format. */ switch (subresource->aspect) { case VK_IMAGE_ASPECT_COLOR: - assert(!image_format_info->depth_format); - assert(!image_format_info->has_stencil); + assert(!image->format->depth_format); + assert(!image->format->has_stencil); assert(!view_format_info->depth_format); assert(!view_format_info->has_stencil); - assert(view_format_info->cpp == image_format_info->cpp); + assert(view_format_info->cpp == image->format->cpp); break; case VK_IMAGE_ASPECT_DEPTH: - assert(image_format_info->depth_format); + assert(image->format->depth_format); assert(view_format_info->depth_format); - assert(view_format_info->cpp == image_format_info->cpp); + assert(view_format_info->cpp == image->format->cpp); break; case VK_IMAGE_ASPECT_STENCIL: /* FINISHME: Is it legal to have an R8 view of S8? */ - assert(image_format_info->has_stencil); + assert(image->format->has_stencil); assert(view_format_info->has_stencil); break; default: @@ -672,8 +667,6 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_surface *depth_surface = &image->primary_surface; struct anv_surface *stencil_surface = &image->stencil_surface; - const struct anv_format *format = - anv_format_for_vk_format(image->format); view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; @@ -686,7 +679,7 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, view->depth_stride = depth_surface->stride; view->depth_offset = image->offset + depth_surface->offset; - view->depth_format = format->depth_format; + view->depth_format = image->format->depth_format; view->depth_qpitch = 0; /* FINISHME: QPitch */ view->stencil_stride = stencil_surface->stride; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 29995330b04..7fcabb29069 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -911,7 +911,7 @@ void anv_CmdCopyImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, + .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, VK_CHANNEL_SWIZZLE_G, @@ -933,7 +933,7 @@ void anv_CmdCopyImage( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, - .format = src_image->format, + .format = src_image->format->vk_format, .mipLevel = pRegions[r].destSubresource.mipLevel, .baseArraySlice = pRegions[r].destSubresource.arraySlice, .arraySize = 1, @@ -980,7 +980,7 @@ void anv_CmdBlitImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, + .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, VK_CHANNEL_SWIZZLE_G, @@ -1002,7 +1002,7 @@ void anv_CmdBlitImage( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, - .format = dest_image->format, + .format = dest_image->format->vk_format, .mipLevel = pRegions[r].destSubresource.mipLevel, .baseArraySlice = pRegions[r].destSubresource.arraySlice, .arraySize = 1, @@ -1048,7 +1048,7 @@ void anv_CmdCopyBufferToImage( &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = dest_image->format, + .format = dest_image->format->vk_format, .extent = { .width = pRegions[r].imageExtent.width, .height = pRegions[r].imageExtent.height, @@ -1076,7 +1076,7 @@ void anv_CmdCopyBufferToImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(src_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->format, + .format = dest_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, VK_CHANNEL_SWIZZLE_G, @@ -1098,7 +1098,7 @@ void anv_CmdCopyBufferToImage( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), - .format = dest_image->format, + .format = dest_image->format->vk_format, .mipLevel = pRegions[r].imageSubresource.mipLevel, .baseArraySlice = pRegions[r].imageSubresource.arraySlice, .arraySize = 1, @@ -1147,7 +1147,7 @@ void anv_CmdCopyImageToBuffer( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->format, + .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, VK_CHANNEL_SWIZZLE_G, @@ -1169,7 +1169,7 @@ void anv_CmdCopyImageToBuffer( &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = src_image->format, + .format = src_image->format->vk_format, .extent = { .width = pRegions[r].imageExtent.width, .height = pRegions[r].imageExtent.height, @@ -1196,7 +1196,7 @@ void anv_CmdCopyImageToBuffer( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, - .format = src_image->format, + .format = src_image->format->vk_format, .mipLevel = 0, .baseArraySlice = 0, .arraySize = 1, @@ -1259,7 +1259,7 @@ void anv_CmdClearColorImage( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = _image, - .format = image->format, + .format = image->format->vk_format, .mipLevel = pRanges[r].baseMipLevel + l, .baseArraySlice = pRanges[r].baseArraySlice + s, .arraySize = 1, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index dc14dd2818a..89e7a8d778c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -912,8 +912,8 @@ struct anv_surface { struct anv_image { VkImageType type; + const struct anv_format *format; VkExtent3D extent; - VkFormat format; uint32_t levels; uint32_t array_size; -- cgit v1.2.3 From c11094ec9a8e6580205e76399aa357fbbad01aed Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 13:20:33 -0700 Subject: vk: Pass anv_format to anv_fill_buffer_surface_state() This moves the translation of VkFormat to anv_format from anv_fill_buffer_surface_state() to its caller. A prep commit to reduce more VkFormat -> anv_format translations. --- src/vulkan/anv_cmd_buffer.c | 5 ++++- src/vulkan/anv_device.c | 9 +++------ src/vulkan/anv_private.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5178f6529ab..5d3d4a4353c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -506,6 +506,9 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (!view) continue; + const struct anv_format *format = + anv_format_for_vk_format(view->format); + struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); @@ -518,7 +521,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, d->dynamic_offsets[surface_slots[b].dynamic_slot]; offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(state.map, view->format, offset, + anv_fill_buffer_surface_state(state.map, format, offset, view->range - dynamic_offset); } else { offset = view->offset; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0653f8bab16..677e277cdf7 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1318,12 +1318,9 @@ VkResult anv_DestroyBuffer( // Buffer view functions void -anv_fill_buffer_surface_state(void *state, VkFormat format, +anv_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range) { - const struct anv_format *info; - - info = anv_format_for_vk_format(format); /* This assumes RGBA float format. */ uint32_t stride = 4; uint32_t num_elements = range / stride; @@ -1331,7 +1328,7 @@ anv_fill_buffer_surface_state(void *state, VkFormat format, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_BUFFER, .SurfaceArray = false, - .SurfaceFormat = info->surface_format, + .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = VALIGN4, .SurfaceHorizontalAlignment = HALIGN4, .TileMode = LINEAR, @@ -1395,7 +1392,7 @@ VkResult anv_CreateBufferView( view->range = pCreateInfo->range; anv_fill_buffer_surface_state(view->surface_state.map, - pCreateInfo->format, + anv_format_for_vk_format(pCreateInfo->format), view->offset, pCreateInfo->range); *pView = anv_buffer_view_to_handle(bview); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 89e7a8d778c..3cf6c67e7d8 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1003,7 +1003,7 @@ void anv_color_attachment_view_init(struct anv_color_attachment_view *view, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_fill_buffer_surface_state(void *state, VkFormat format, +void anv_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range); void anv_surface_view_fini(struct anv_device *device, -- cgit v1.2.3 From 60c4ac57f28c655cd7b40b7f5be966f95c982a0b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 13:26:28 -0700 Subject: vk: Add anv_format reference t anv_surface_view Change type of anv_surface_view::format from VkFormat to const struct anv_format*. This reduces the number of lookups in the VkFormat -> anv_format table. --- src/vulkan/anv_cmd_buffer.c | 5 +---- src/vulkan/anv_device.c | 5 ++--- src/vulkan/anv_image.c | 13 ++++--------- src/vulkan/anv_meta.c | 4 ++-- src/vulkan/anv_private.h | 2 +- 5 files changed, 10 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5d3d4a4353c..5178f6529ab 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -506,9 +506,6 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (!view) continue; - const struct anv_format *format = - anv_format_for_vk_format(view->format); - struct anv_state state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); @@ -521,7 +518,7 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, d->dynamic_offsets[surface_slots[b].dynamic_slot]; offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(state.map, format, offset, + anv_fill_buffer_surface_state(state.map, view->format, offset, view->range - dynamic_offset); } else { offset = view->offset; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 677e277cdf7..0f06f3e5a91 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1388,11 +1388,10 @@ VkResult anv_CreateBufferView( view->offset = buffer->offset + pCreateInfo->offset; view->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - view->format = pCreateInfo->format; + view->format = anv_format_for_vk_format(pCreateInfo->format); view->range = pCreateInfo->range; - anv_fill_buffer_surface_state(view->surface_state.map, - anv_format_for_vk_format(pCreateInfo->format), + anv_fill_buffer_surface_state(view->surface_state.map, view->format, view->offset, pCreateInfo->range); *pView = anv_buffer_view_to_handle(bview); diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 329955fff59..4c5032cea53 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -343,9 +343,6 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_surface_view *view = &iview->view; struct anv_surface *surface; - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - const struct anv_image_view_info *view_type_info = &anv_image_view_info_table[pCreateInfo->viewType]; @@ -369,7 +366,7 @@ anv_image_view_init(struct anv_image_view *iview, view->bo = image->bo; view->offset = image->offset + surface->offset; - view->format = pCreateInfo->format; + view->format = anv_format_for_vk_format(pCreateInfo->format); iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), @@ -396,7 +393,7 @@ anv_image_view_init(struct anv_image_view *iview, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = view_type_info->surface_type, .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format_info->surface_format, + .SurfaceFormat = view->format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], .TileMode = surface->tile_mode, @@ -572,8 +569,6 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_surface_view *view = &aview->view; struct anv_surface *surface = &image->primary_surface; - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; @@ -583,7 +578,7 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, view->bo = image->bo; view->offset = image->offset + surface->offset; - view->format = pCreateInfo->format; + view->format = anv_format_for_vk_format(pCreateInfo->format); aview->base.extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), @@ -609,7 +604,7 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format_info->surface_format, + .SurfaceFormat = view->format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], .TileMode = surface->tile_mode, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 7fcabb29069..d3dbd6bafc7 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -637,7 +637,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = dest->view.format, + .format = dest->view.format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -1289,7 +1289,7 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = view.view.format, + .format = view.view.format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3cf6c67e7d8..725f8d88566 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -941,7 +941,7 @@ struct anv_surface_view { struct anv_bo *bo; uint32_t offset; /**< VkBufferCreateInfo::offset */ uint32_t range; /**< VkBufferCreateInfo::range */ - VkFormat format; /**< VkBufferCreateInfo::format */ + const struct anv_format *format; /**< VkBufferCreateInfo::format */ }; struct anv_buffer_view { -- cgit v1.2.3 From a9c36daa83ce2305ea8e310306699dee4a3d5c19 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 13:49:35 -0700 Subject: vk/formats: Add global pointer to anv_format for S8_UINT Stencil formats are often a special case. To reduce the number of lookups into the VkFormat-to-anv_format translation table when working with stencil, expose the table's entry for VK_FORMAT_S8_UINT as global variable anv_format_s8_uint. --- src/vulkan/anv_formats.c | 3 +++ src/vulkan/anv_private.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 1d1d1bbe135..9b971a18ac2 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -209,6 +209,9 @@ static const struct anv_format anv_formats[] = { #undef fmt +const struct anv_format *const +anv_format_s8_uint = &anv_formats[VK_FORMAT_S8_UINT]; + const struct anv_format * anv_format_for_vk_format(VkFormat format) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 725f8d88566..ee2700254cd 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -879,6 +879,13 @@ struct anv_format { bool has_stencil; }; +/** + * Stencil formats are often a special case. To reduce the number of lookups + * into the VkFormat-to-anv_format translation table when working with + * stencil, here is the handle to the table's entry for VK_FORMAT_S8_UINT. + */ +extern const struct anv_format *const anv_format_s8_uint; + const struct anv_format * anv_format_for_vk_format(VkFormat format); bool anv_is_vk_format_depth_or_stencil(VkFormat format); -- cgit v1.2.3 From 5a6b2e6df0ac63e20856dfcd1fc9f0ff73ae67e2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 13:50:43 -0700 Subject: vk/image: Simplify stencil case for anv_image_create() Stop creating a temporary VkImageCreateInfo with overriden format=VK_FORMAT_S8_UINT. Instead, just pass the format override directly to anv_image_make_surface(). --- src/vulkan/anv_image.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 4c5032cea53..3b706c68443 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -123,8 +123,14 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) } } + +/** + * The \a format argument is required and overrides any format in + * struct anv_image_create_info. + */ static VkResult anv_image_make_surface(const struct anv_image_create_info *create_info, + const struct anv_format *format, uint64_t *inout_image_size, uint32_t *inout_image_alignment, struct anv_surface *out_surface) @@ -142,9 +148,6 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const struct anv_tile_info *tile_info = &anv_tile_info_table[tile_mode]; - const struct anv_format *format_info = - anv_format_for_vk_format(create_info->vk_info->format); - const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ const uint32_t w0 = align_u32(extent->width, i); @@ -181,7 +184,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, */ assert(anv_is_aligned(qpitch, j)); - uint32_t stride = align_u32(mt_width * format_info->cpp, tile_info->width); + uint32_t stride = align_u32(mt_width * format->cpp, tile_info->width); if (create_info->stride > 0) stride = create_info->stride; @@ -257,7 +260,8 @@ anv_image_create(VkDevice _device, if (likely(!image->format->has_stencil || image->format->depth_format)) { /* The image's primary surface is a color or depth surface. */ - r = anv_image_make_surface(create_info, &image->size, &image->alignment, + r = anv_image_make_surface(create_info, image->format, + &image->size, &image->alignment, &image->primary_surface); if (r != VK_SUCCESS) goto fail; @@ -269,15 +273,9 @@ anv_image_create(VkDevice _device, * stencil reside in the same image. To satisfy Vulkan and the GPU, we * place the depth and stencil buffers in the same bo. */ - VkImageCreateInfo stencil_info = *pCreateInfo; - stencil_info.format = VK_FORMAT_S8_UINT; - - r = anv_image_make_surface( - &(struct anv_image_create_info) { - .vk_info = &stencil_info, - }, - &image->size, &image->alignment, &image->stencil_surface); - + r = anv_image_make_surface(create_info, anv_format_s8_uint, + &image->size, &image->alignment, + &image->stencil_surface); if (r != VK_SUCCESS) goto fail; } -- cgit v1.2.3 From 6ff95bba8abd50b47117f733d3e46fb90333210f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 14:03:52 -0700 Subject: vk: Add anv_format reference to anv_render_pass_attachment Change type of anv_render_pass_attachment::format from VkFormat to const struct anv_format*. This elimiates the repetitive lookups into the VkFormat -> anv_format table when looping over attachments during anv_cmd_buffer_clear_attachments(). --- src/vulkan/anv_device.c | 3 ++- src/vulkan/anv_formats.c | 13 ------------- src/vulkan/anv_image.c | 5 ++++- src/vulkan/anv_meta.c | 4 ++-- src/vulkan/anv_private.h | 9 +++++++-- 5 files changed, 15 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0f06f3e5a91..145d16f485e 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2229,7 +2229,8 @@ VkResult anv_CreateRenderPass( pass->attachments = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - pass->attachments[i].format = pCreateInfo->pAttachments[i].format; + pass->attachments[i].format = + anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples; pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp; pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 9b971a18ac2..f5d00a0f8ff 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -218,19 +218,6 @@ anv_format_for_vk_format(VkFormat format) return &anv_formats[format]; } -bool -anv_is_vk_format_depth_or_stencil(VkFormat format) -{ - const struct anv_format *format_info = - anv_format_for_vk_format(format); - - if (format_info->depth_format != UNSUPPORTED && - format_info->depth_format != 0) - return true; - - return format_info->has_stencil; -} - // Format capabilities struct surface_format_info { diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 3b706c68443..0152fef9ffb 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -689,7 +689,10 @@ anv_CreateAttachmentView(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); - if (anv_is_vk_format_depth_or_stencil(pCreateInfo->format)) { + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + if (anv_format_is_depth_or_stencil(format)) { struct anv_depth_stencil_view *view = anv_device_alloc(device, sizeof(*view), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d3dbd6bafc7..8808d312db4 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -269,7 +269,7 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, int num_clear_layers = 0; for (uint32_t i = 0; i < pass->attachment_count; i++) { if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - if (anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + if (anv_format_is_depth_or_stencil(pass->attachments[i].format)) { anv_finishme("Can't clear depth-stencil yet"); continue; } @@ -286,7 +286,7 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, int layer = 0; for (uint32_t i = 0; i < pass->attachment_count; i++) { if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && - !anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + !anv_format_is_depth_or_stencil(pass->attachments[i].format)) { instance_data[layer] = (struct clear_instance_data) { .vue_header = { .RTAIndex = i, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ee2700254cd..1b2cfc6fa0c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -888,7 +888,12 @@ extern const struct anv_format *const anv_format_s8_uint; const struct anv_format * anv_format_for_vk_format(VkFormat format); -bool anv_is_vk_format_depth_or_stencil(VkFormat format); + +static inline bool +anv_format_is_depth_or_stencil(const struct anv_format *format) +{ + return format->depth_format || format->has_stencil; +} /** * A proxy for the color surfaces, depth surfaces, and stencil surfaces. @@ -1042,7 +1047,7 @@ struct anv_subpass { }; struct anv_render_pass_attachment { - VkFormat format; + const struct anv_format *format; uint32_t samples; VkAttachmentLoadOp load_op; VkAttachmentLoadOp stencil_load_op; -- cgit v1.2.3 From 50f7bf70dac9ffe8733cce6e65aba23655e1f8c8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 18 Aug 2015 11:08:39 -0700 Subject: vk: Add anv_format_is_color() --- src/vulkan/anv_private.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1b2cfc6fa0c..73bcd85e411 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -889,6 +889,12 @@ extern const struct anv_format *const anv_format_s8_uint; const struct anv_format * anv_format_for_vk_format(VkFormat format); +static inline bool +anv_format_is_color(const struct anv_format *format) +{ + return !format->depth_format && !format->has_stencil; +} + static inline bool anv_format_is_depth_or_stencil(const struct anv_format *format) { -- cgit v1.2.3 From e7d3a5df5a226caa060b822bb87dc00c572aaf7f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 18 Aug 2015 11:09:40 -0700 Subject: vk/meta: Use anv_format_is_color() That is, replace !anv_format_is_depth_or_stencil() with anv_format_is_color(). That conveys the meaning better. --- src/vulkan/anv_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 8808d312db4..bec8455c873 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -286,7 +286,7 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, int layer = 0; for (uint32_t i = 0; i < pass->attachment_count; i++) { if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && - !anv_format_is_depth_or_stencil(pass->attachments[i].format)) { + anv_format_is_color(pass->attachments[i].format)) { instance_data[layer] = (struct clear_instance_data) { .vue_header = { .RTAIndex = i, -- cgit v1.2.3 From d52822541ee792516cfa5ab14e939ec66396f125 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 18 Aug 2015 11:30:38 -0700 Subject: vk/image: Don't set anv_surface_view::offset twice It was set twice a few lines apart, and the second setting always overrode the first. --- src/vulkan/anv_image.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 0152fef9ffb..f24acdb0fac 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -354,7 +354,6 @@ anv_image_view_init(struct anv_image_view *iview, break; case VK_IMAGE_ASPECT_DEPTH: case VK_IMAGE_ASPECT_COLOR: - view->offset = image->offset; surface = &image->primary_surface; break; default: -- cgit v1.2.3 From b0875aa911073ebab7eb38ced07bf62ed4ac5d9b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 17 Aug 2015 15:02:38 -0700 Subject: vk: Assert that swap chain format is a color format --- src/vulkan/anv_x11.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c index cc11a5fc762..aee5775c9de 100644 --- a/src/vulkan/anv_x11.c +++ b/src/vulkan/anv_x11.c @@ -143,6 +143,8 @@ VkResult anv_CreateSwapChainWSI( &image_h); image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->format)); + surface = &image->primary_surface; anv_AllocMemory(_device, -- cgit v1.2.3 From bd0aab9a5892387daf0a2a4badc39d27dda73033 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 18 Aug 2015 12:39:28 -0700 Subject: vk/meta: Fix dest format of vkCmdCopyImage The source image's format was incorrectly used for both the source view and destination view. For vkCmdCopyImage to correctly translate formats, the destination view's format must be that of the destination image's. --- src/vulkan/anv_meta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index bec8455c873..a433d132562 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -899,6 +899,7 @@ void anv_CmdCopyImage( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); struct anv_saved_state saved_state; @@ -933,7 +934,7 @@ void anv_CmdCopyImage( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, - .format = src_image->format->vk_format, + .format = dest_image->format->vk_format, .mipLevel = pRegions[r].destSubresource.mipLevel, .baseArraySlice = pRegions[r].destSubresource.arraySlice, .arraySize = 1, -- cgit v1.2.3 From 49d9e89d00fc19e44f7f9503a07592a126474b9d Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Mon, 10 Aug 2015 17:31:02 +0100 Subject: Add mesa.icd to the .gitignore Since 4d7e0fa8c731776 this file is generated by the configure script. Reviewed-by: Tapani Palli Reviewed-by: Ben Widawsky (cherry picked from commit 885762e18291eb4dc0b449297c3a78f7c036bcde) --- src/gallium/targets/opencl/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/gallium/targets/opencl/.gitignore (limited to 'src') diff --git a/src/gallium/targets/opencl/.gitignore b/src/gallium/targets/opencl/.gitignore new file mode 100644 index 00000000000..dad573fb98d --- /dev/null +++ b/src/gallium/targets/opencl/.gitignore @@ -0,0 +1 @@ +/mesa.icd -- cgit v1.2.3 From 9f908fcbdecce9221c0ffbcccf8481f889172c51 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 19 Aug 2015 11:58:50 -0700 Subject: vk/meta: Use consistent names and types in anv_saved_state In struct anv_saved_state, each member's type was a pointer to an Anvil struct and each member's name was prefixed with "old" except cb_state, which was a Vulkan handle whose name lacked "old". --- src/vulkan/anv_meta.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a433d132562..ff82fbe5b38 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -150,7 +150,7 @@ struct anv_saved_state { struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; struct anv_descriptor_set *old_descriptor_set0; struct anv_pipeline *old_pipeline; - VkDynamicColorBlendState cb_state; + struct anv_dynamic_cb_state *old_cb_state; }; static void @@ -504,7 +504,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.ds_state); - saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->state.cb_state); + saved_state->old_cb_state = cmd_buffer->state.cb_state; anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); } @@ -705,8 +705,9 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *saved_state) { anv_cmd_buffer_restore(cmd_buffer, saved_state); - anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), - saved_state->cb_state); + anv_CmdBindDynamicColorBlendState( + anv_cmd_buffer_to_handle(cmd_buffer), + anv_dynamic_cb_state_to_handle(saved_state->old_cb_state)); } static VkFormat -- cgit v1.2.3 From 4aef5c62cd1b39ffb9ca26dffb18dc39444daf3a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 19 Aug 2015 12:07:20 -0700 Subject: vk/meta: Restore all saved state in anv_cmd_buffer_restore() anv_cmd_buffer_restore() did not restore the old VkDynamicColorBlendState. --- src/vulkan/anv_meta.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index ff82fbe5b38..c3538f0e455 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -161,6 +161,7 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, sizeof(state->old_vertex_bindings)); + state->old_cb_state = cmd_buffer->state.cb_state; } static void @@ -175,6 +176,11 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; + + if (cmd_buffer->state.cb_state != state->old_cb_state) { + cmd_buffer->state.cb_state = state->old_cb_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; + } } struct vue_header { @@ -504,7 +510,6 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.ds_state); - saved_state->old_cb_state = cmd_buffer->state.cb_state; anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); } @@ -705,9 +710,6 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *saved_state) { anv_cmd_buffer_restore(cmd_buffer, saved_state); - anv_CmdBindDynamicColorBlendState( - anv_cmd_buffer_to_handle(cmd_buffer), - anv_dynamic_cb_state_to_handle(saved_state->old_cb_state)); } static VkFormat -- cgit v1.2.3 From 44ef4484c8ebffd2ef966cee1f8fc3327e955adb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 19 Aug 2015 12:28:14 -0700 Subject: vk/meta: Add Z coord to clear vertices For now, the Z coordinate is always 0.0. Will later be used for depth clears. --- src/vulkan/anv_meta.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c3538f0e455..add5f7d6d9b 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -65,7 +65,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { .binding = 0, - .strideInBytes = 8, + .strideInBytes = 12, .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX }, { @@ -87,7 +87,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) /* Position */ .location = 1, .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, + .format = VK_FORMAT_R32G32B32_SFLOAT, .offsetInBytes = 0 }, { @@ -207,12 +207,12 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, const float vertex_data[] = { /* Rect-list coordinates */ - 0.0, 0.0, - fb->width, 0.0, - fb->width, fb->height, + 0.0, 0.0, 0.0, + fb->width, 0.0, 0.0, + fb->width, fb->height, 0.0, /* Align to 16 bytes */ - 0.0, 0.0, + 0.0, 0.0, 0.0, }; size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); -- cgit v1.2.3 From 4eaf90effb871c119034c7e7a53c71bb71a11b1f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 19 Aug 2015 14:55:44 -0700 Subject: vk: Unharcode an argument to sizeof s/struct anv_subpass/pass->subpasses[0])/ --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 145d16f485e..89f6ec697d8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2211,7 +2211,7 @@ VkResult anv_CreateRenderPass( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); size = sizeof(*pass) + - pCreateInfo->subpassCount * sizeof(struct anv_subpass); + pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); pass = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (pass == NULL) -- cgit v1.2.3 From 1c24a191cd41133e72a8e8db605faf508a632079 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 19 Aug 2015 15:10:14 -0700 Subject: vk: Improve memory locality of anv_render_pass Allocate the pass's array of attachments, anv_render_pass::attachments, in the same allocation as the pass itself. --- src/vulkan/anv_device.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 89f6ec697d8..177986183e1 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2207,11 +2207,15 @@ VkResult anv_CreateRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_render_pass *pass; size_t size; + size_t attachments_offset; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - size = sizeof(*pass) + - pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + pass = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (pass == NULL) @@ -2221,13 +2225,10 @@ VkResult anv_CreateRenderPass( * each array member of anv_subpass must be a valid pointer if not NULL. */ memset(pass, 0, size); - pass->attachment_count = pCreateInfo->attachmentCount; pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; - size = pCreateInfo->attachmentCount * sizeof(*pass->attachments); - pass->attachments = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { pass->attachments[i].format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); @@ -2293,8 +2294,6 @@ VkResult anv_DestroyRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - anv_device_free(device, pass->attachments); - for (uint32_t i = 0; i < pass->subpass_count; i++) { /* In VkSubpassCreateInfo, each of the attachment arrays may be null. * Don't free the null arrays. -- cgit v1.2.3 From 23872191012a0b95a1ef7bcefeb6609ce595fecf Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 20 Aug 2015 09:54:32 -0700 Subject: vk: Use temp var in vkCreateRenderPass's attachment loop Store the attachment in a temporary variable and s/pass->attachments[i]/att/ . --- src/vulkan/anv_device.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 177986183e1..5805ffab193 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2230,13 +2230,14 @@ VkResult anv_CreateRenderPass( pass->attachments = (void *) pass + attachments_offset; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - pass->attachments[i].format = - anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); - pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples; - pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp; - pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - // pass->attachments[i].store_op = pCreateInfo->pAttachments[i].storeOp; - // pass->attachments[i].stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + struct anv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; } for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { -- cgit v1.2.3 From 0db3d67a1488089602f787c281b2dff888fb7acd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 20 Aug 2015 10:03:58 -0700 Subject: vk: Cache each render pass's number of clear ops During vkCreateRenderPass, count the number of clear ops and store them in new members of anv_render_pass: uint32_t num_color_clear_attachments bool has_depth_clear_attachment bool has_stencil_clear_attachment Cacheing these 8 bytes (including padding) reduces the number of times that anv_cmd_buffer_clear_attachments needs to loop over the pass's attachments. --- src/vulkan/anv_device.c | 11 +++++++++++ src/vulkan/anv_meta.c | 26 +++++++++++--------------- src/vulkan/anv_private.h | 4 ++++ 3 files changed, 26 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 5805ffab193..eaeae3b0a4f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2238,6 +2238,17 @@ VkResult anv_CreateRenderPass( att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; // att->store_op = pCreateInfo->pAttachments[i].storeOp; // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + if (anv_format_is_color(att->format)) { + ++pass->num_color_clear_attachments; + } else if (att->format->depth_format) { + pass->has_depth_clear_attachment = true; + } + } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + assert(att->format->has_stencil); + pass->has_stencil_clear_attachment = true; + } } for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index add5f7d6d9b..b1f3fe7191b 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -272,22 +272,17 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, { struct anv_saved_state saved_state; - int num_clear_layers = 0; - for (uint32_t i = 0; i < pass->attachment_count; i++) { - if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - if (anv_format_is_depth_or_stencil(pass->attachments[i].format)) { - anv_finishme("Can't clear depth-stencil yet"); - continue; - } - num_clear_layers++; - } - } + if (pass->has_depth_clear_attachment) + anv_finishme("depth clear"); + + if (pass->has_stencil_clear_attachment) + anv_finishme("stencil clear"); - if (num_clear_layers == 0) + if (pass->num_color_clear_attachments == 0) return; - struct clear_instance_data instance_data[num_clear_layers]; - uint32_t color_attachments[num_clear_layers]; + struct clear_instance_data instance_data[pass->num_color_clear_attachments]; + uint32_t color_attachments[pass->num_color_clear_attachments]; int layer = 0; for (uint32_t i = 0; i < pass->attachment_count; i++) { @@ -310,14 +305,15 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass subpass = { .input_count = 0, - .color_count = num_clear_layers, + .color_count = pass->num_color_clear_attachments, .color_attachments = color_attachments, .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, }; anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); - meta_emit_clear(cmd_buffer, num_clear_layers, instance_data); + meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, + instance_data); /* Restore API state */ anv_cmd_buffer_restore(cmd_buffer, &saved_state); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 73bcd85e411..1de97fda83a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1063,6 +1063,10 @@ struct anv_render_pass { uint32_t attachment_count; uint32_t subpass_count; + uint32_t num_color_clear_attachments; + bool has_depth_clear_attachment; + bool has_stencil_clear_attachment; + struct anv_render_pass_attachment * attachments; struct anv_subpass subpasses[0]; }; -- cgit v1.2.3 From c4e7ed91636769c0ca05a6360fe8c20b9176c1b1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 19 Aug 2015 12:17:37 -0700 Subject: vk/meta: Implement depth clears Fixes Crucible test func.depthstencil.basic-depth.clear-1.0.op-greater. --- src/vulkan/anv_meta.c | 71 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index b1f3fe7191b..d4a0bd96a4c 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -124,6 +124,22 @@ anv_device_init_meta_clear_state(struct anv_device *device) .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_CCW }, + .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = true, + .depthWriteEnable = true, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsEnable = false, + .stencilTestEnable = true, + .front = (VkStencilOpState) { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + }, + .back = (VkStencilOpState) { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + }, + }, .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, @@ -198,7 +214,8 @@ struct clear_instance_data { static void meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, int num_instances, - struct clear_instance_data *instance_data) + struct clear_instance_data *instance_data, + VkClearDepthStencilValue ds_clear_value) { struct anv_device *device = cmd_buffer->device; struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; @@ -207,9 +224,9 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, const float vertex_data[] = { /* Rect-list coordinates */ - 0.0, 0.0, 0.0, - fb->width, 0.0, 0.0, - fb->width, fb->height, 0.0, + 0.0, 0.0, ds_clear_value.depth, + fb->width, 0.0, ds_clear_value.depth, + fb->width, fb->height, ds_clear_value.depth, /* Align to 16 bytes */ 0.0, 0.0, 0.0, @@ -272,9 +289,6 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, { struct anv_saved_state saved_state; - if (pass->has_depth_clear_attachment) - anv_finishme("depth clear"); - if (pass->has_stencil_clear_attachment) anv_finishme("stencil clear"); @@ -283,21 +297,33 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct clear_instance_data instance_data[pass->num_color_clear_attachments]; uint32_t color_attachments[pass->num_color_clear_attachments]; + uint32_t ds_attachment = VK_ATTACHMENT_UNUSED; + VkClearDepthStencilValue ds_clear_value = {0}; int layer = 0; for (uint32_t i = 0; i < pass->attachment_count; i++) { - if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && - anv_format_is_color(pass->attachments[i].format)) { - instance_data[layer] = (struct clear_instance_data) { - .vue_header = { - .RTAIndex = i, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = clear_values[i].color, - }; - color_attachments[layer] = i; - layer++; + const struct anv_render_pass_attachment *att = &pass->attachments[i]; + + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + if (anv_format_is_color(att->format)) { + instance_data[layer] = (struct clear_instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = clear_values[i].color, + }; + color_attachments[layer] = i; + layer++; + } else if (att->format->depth_format) { + assert(ds_attachment == VK_ATTACHMENT_UNUSED); + ds_attachment = i; + ds_clear_value= clear_values[ds_attachment].ds; + } + } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + assert(att->format->has_stencil); + anv_finishme("stencil clear"); } } @@ -307,13 +333,13 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, .input_count = 0, .color_count = pass->num_color_clear_attachments, .color_attachments = color_attachments, - .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + .depth_stencil_attachment = ds_attachment, }; anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, - instance_data); + instance_data, ds_clear_value); /* Restore API state */ anv_cmd_buffer_restore(cmd_buffer, &saved_state); @@ -1344,7 +1370,8 @@ void anv_CmdClearColorImage( .color = *pColor, }; - meta_emit_clear(cmd_buffer, 1, &instance_data); + meta_emit_clear(cmd_buffer, 1, &instance_data, + (VkClearDepthStencilValue) {0}); anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); } -- cgit v1.2.3 From ee9788973f069152905d23f65c6742efe4362124 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 30 Jul 2015 14:44:01 -0700 Subject: vk: Implement multi-gen dispatch mechanism --- src/vulkan/anv_device.c | 9 ++++++++ src/vulkan/anv_entrypoints_gen.py | 45 +++++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index eaeae3b0a4f..4db344bf08c 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -532,6 +532,15 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + switch (physical_device->info->gen) { + case 7: + driver_layer = &gen7_layer; + break; + case 8: + driver_layer = &gen8_layer; + break; + } + device = anv_instance_alloc(instance, sizeof(*device), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!device) diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 96c4884d158..21f87f181e9 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -78,8 +78,27 @@ for line in fileinput.input(): # per entry point. if opt_header: + print "/* This file generated from vk_gen.py, don't edit directly. */\n" + + print "struct anv_layer {" + print " union {" + print " void *entrypoints[%d];" % len(entrypoints) + print " struct {" + + for type, name, args, num, h in entrypoints: + print " %s (*%s)%s;" % (type, name, args) + print " };\n" + print " };\n" + print "};\n" + + print "extern const struct anv_layer gen7_layer;\n" + print "extern const struct anv_layer gen8_layer;\n" + print "extern const struct anv_layer *driver_layer;\n" + for type, name, args, num, h in entrypoints: print "%s anv_%s%s;" % (type, name, args) + print "%s gen7_%s%s;" % (type, name, args) + print "%s gen8_%s%s;" % (type, name, args) print "%s anv_validate_%s%s;" % (type, name, args) exit() @@ -115,8 +134,6 @@ print """/* struct anv_entrypoint { uint32_t name; uint32_t hash; - void *function; - void *validate; }; /* We use a big string constant to avoid lots of reloctions from the entry @@ -141,16 +158,21 @@ print """ ; */ """ -for type, name, args, num, h in entrypoints: - print "%s anv_validate_%s%s __attribute__ ((weak));" % (type, name, args) - # Now generate the table of all entry points and their validation functions print "\nstatic const struct anv_entrypoint entrypoints[] = {" for type, name, args, num, h in entrypoints: - print " { %5d, 0x%08x, anv_%s, anv_validate_%s }," % (offsets[num], h, name, name) + print " { %5d, 0x%08x }," % (offsets[num], h) print "};\n" +for layer in [ "anv", "validate", "gen7", "gen8" ]: + for type, name, args, num, h in entrypoints: + print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) + print "\nconst struct anv_layer %s_layer = {" % layer + for type, name, args, num, h in entrypoints: + print " .%s = %s_%s," % (name, layer, name) + print "};\n" + print """ #ifdef DEBUG static bool enable_validate = true; @@ -173,13 +195,18 @@ determine_validate(void) enable_validate = atoi(s); } +const struct anv_layer *driver_layer = &anv_layer; + static void * __attribute__ ((noinline)) resolve_entrypoint(uint32_t index) { - if (enable_validate && entrypoints[index].validate) - return entrypoints[index].validate; + if (enable_validate && validate_layer.entrypoints[index]) + return validate_layer.entrypoints[index]; + + if (driver_layer && driver_layer->entrypoints[index]) + return driver_layer->entrypoints[index]; - return entrypoints[index].function; + return anv_layer.entrypoints[index]; } """ -- cgit v1.2.3 From 74556b076a191fb1e6e0b0204a49ee33962e5112 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 13 Aug 2015 21:05:47 -0700 Subject: vk: Add new anv_gen8.c and move CreateDynamicRasterState there MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/Makefile.am | 3 ++- src/vulkan/anv_device.c | 38 +++------------------------ src/vulkan/anv_gen8.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 36 deletions(-) create mode 100644 src/vulkan/anv_gen8.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 10179364bf8..6c833d84753 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -72,7 +72,8 @@ VULKAN_SOURCES = \ anv_private.h \ anv_query.c \ anv_util.c \ - anv_x11.c + anv_x11.c \ + anv_gen8.c libvulkan_la_SOURCES = \ $(VULKAN_SOURCES) \ diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 4db344bf08c..ba0966aa3b2 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -769,8 +769,8 @@ VkResult anv_DeviceWaitIdle( bo = &device->dynamic_state_pool.block_pool->bo; batch.start = batch.next = state.map; batch.end = state.map + 32; - anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN8_MI_NOOP); + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); exec2_objects[0].handle = bo->gem_handle; exec2_objects[0].relocation_count = 0; @@ -1996,39 +1996,7 @@ VkResult anv_CreateDynamicRasterState( const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState) { - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_rs_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .LineWidth = pCreateInfo->lineWidth, - }; - - GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); - - bool enable_bias = pCreateInfo->depthBias != 0.0f || - pCreateInfo->slopeScaledDepthBias != 0.0f; - struct GEN8_3DSTATE_RASTER raster = { - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = pCreateInfo->depthBias, - .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, - .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp - }; - - GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); - - *pState = anv_dynamic_rs_state_to_handle(state); - - return VK_SUCCESS; + return driver_layer->CreateDynamicRasterState(_device, pCreateInfo, pState); } VkResult anv_DestroyDynamicRasterState( diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c new file mode 100644 index 00000000000..8d53ebb32f5 --- /dev/null +++ b/src/vulkan/anv_gen8.c @@ -0,0 +1,70 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult gen8_CreateDynamicRasterState( + VkDevice _device, + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + }; + + GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + struct GEN8_3DSTATE_RASTER raster = { + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); + + *pState = anv_dynamic_rs_state_to_handle(state); + + return VK_SUCCESS; +} -- cgit v1.2.3 From fb428727e03d9b819fc9ef510941e001ac03877b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 13 Aug 2015 21:48:19 -0700 Subject: vk: Move anv_CreateBufferView to anv_gen8.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 4 +- src/vulkan/anv_device.c | 98 ++++++++++++--------------------------------- src/vulkan/anv_gen8.c | 74 ++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 9 ++++- 4 files changed, 109 insertions(+), 76 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5178f6529ab..e0e850b8cc4 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -518,8 +518,8 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, d->dynamic_offsets[surface_slots[b].dynamic_slot]; offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(state.map, view->format, offset, - view->range - dynamic_offset); + gen8_fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); } else { offset = view->offset; memcpy(state.map, view->surface_state.map, 64); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index ba0966aa3b2..78c35dd83fe 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1324,90 +1324,44 @@ VkResult anv_DestroyBuffer( return VK_SUCCESS; } -// Buffer view functions - -void -anv_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range) -{ - /* This assumes RGBA float format. */ - uint32_t stride = 4; - uint32_t num_elements = range / stride; - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = format->surface_format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0.0, - .SurfaceQPitch = 0, - .Height = (num_elements >> 7) & 0x3fff, - .Width = num_elements & 0x7f, - .Depth = (num_elements >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .MinimumArrayElement = 0, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - .SurfaceMinLOD = 0, - .MIPCountLOD = 0, - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); -} - -VkResult anv_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) +VkResult +anv_buffer_view_create( + struct anv_device * device, + const VkBufferViewCreateInfo* pCreateInfo, + struct anv_buffer_view ** view_out) { - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *bview; - struct anv_surface_view *view; + struct anv_buffer_view *view; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); - bview = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (bview == NULL) + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view = &bview->view; - view->bo = buffer->bo; - view->offset = buffer->offset + pCreateInfo->offset; - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - view->format = anv_format_for_vk_format(pCreateInfo->format); - view->range = pCreateInfo->range; - - anv_fill_buffer_surface_state(view->surface_state.map, view->format, - view->offset, pCreateInfo->range); + view->view = (struct anv_surface_view) { + .bo = buffer->bo, + .offset = buffer->offset + pCreateInfo->offset, + .surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64), + .format = anv_format_for_vk_format(pCreateInfo->format), + .range = pCreateInfo->range, + }; - *pView = anv_buffer_view_to_handle(bview); + *view_out = view; return VK_SUCCESS; } + +VkResult anv_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + return driver_layer->CreateBufferView(_device, pCreateInfo, pView); +} + VkResult anv_DestroyBufferView( VkDevice _device, VkBufferView _bview) diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index 8d53ebb32f5..16f2da18006 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -68,3 +68,77 @@ VkResult gen8_CreateDynamicRasterState( return VK_SUCCESS; } + +void +gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range) +{ + /* This assumes RGBA float format. */ + uint32_t stride = 4; + uint32_t num_elements = range / stride; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format->surface_format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + .BaseMipLevel = 0.0, + .SurfaceQPitch = 0, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult gen8_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer_view *view; + VkResult result; + + result = anv_buffer_view_create(device, pCreateInfo, &view); + if (result != VK_SUCCESS) + return result; + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + gen8_fill_buffer_surface_state(view->view.surface_state.map, format, + view->view.offset, pCreateInfo->range); + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1de97fda83a..518a3ccdd25 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1021,8 +1021,13 @@ void anv_color_attachment_view_init(struct anv_color_attachment_view *view, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range); + +VkResult anv_buffer_view_create(struct anv_device *device, + const VkBufferViewCreateInfo *pCreateInfo, + struct anv_buffer_view **view_out); + +void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range); void anv_surface_view_fini(struct anv_device *device, struct anv_surface_view *view); -- cgit v1.2.3 From ef0ab62486abfe4337870f861897fdb42a7bb66d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 13 Aug 2015 21:52:31 -0700 Subject: vk: Move anv_CreateSampler to anv_gen8.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_device.c | 96 +---------------------------------------------- src/vulkan/anv_gen8.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 78c35dd83fe..3ca9318cdb0 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1375,106 +1375,12 @@ VkResult anv_DestroyBufferView( return VK_SUCCESS; } -// Sampler functions - VkResult anv_CreateSampler( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler) { - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - uint32_t mag_filter, min_filter, max_anisotropy; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_device_alloc(device, sizeof(*sampler), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - static const uint32_t vk_to_gen_tex_filter[] = { - [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, - }; - - static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, - }; - - if (pCreateInfo->maxAnisotropy > 1) { - mag_filter = MAPFILTER_ANISOTROPIC; - min_filter = MAPFILTER_ANISOTROPIC; - max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; - } else { - mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; - min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; - max_anisotropy = RATIO21; - } - - struct GEN8_SAMPLER_STATE sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = 0, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], - .MagModeFilter = mag_filter, - .MinModeFilter = min_filter, - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = 0, - - .IndirectStatePointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = max_anisotropy, - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = 0, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], - }; - - GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; + return driver_layer->CreateSampler(_device, pCreateInfo, pSampler); } VkResult anv_DestroySampler( diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index 16f2da18006..21797b3e4bf 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -142,3 +142,102 @@ VkResult gen8_CreateBufferView( return VK_SUCCESS; } +VkResult gen8_CreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + }; + + static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, + }; + + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + + struct GEN8_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = 0, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = 0, + + .IndirectStatePointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = max_anisotropy, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + }; + + GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} -- cgit v1.2.3 From 0bcf85d79f39242daa648fb88fcded9fba6fd3b4 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 09:30:41 -0700 Subject: vk: Move pipeline creation to anv_gen8.c --- src/vulkan/anv_gen8.c | 667 ++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_meta.c | 8 +- src/vulkan/anv_pipeline.c | 685 ++-------------------------------------------- src/vulkan/anv_private.h | 20 +- 4 files changed, 710 insertions(+), 670 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index 21797b3e4bf..1333e98f579 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -241,3 +241,670 @@ VkResult gen8_CreateSampler( return VK_SUCCESS; } + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) +{ + const uint32_t num_dwords = 1 + info->attributeCount * 2; + uint32_t *p; + bool instancing_enable[32]; + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + instancing_enable[desc->binding] = true; + break; + } + } + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN8_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format->surface_format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + .InstancingEnable = instancing_enable[desc->binding], + .VertexElementIndex = i, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = info->bindingCount, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = info->bindingCount); +} + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + }; + uint32_t topology = vk_to_gen_primitive_type[info->topology]; + + if (extra && extra->use_rectlist) + topology = _3DPRIM_RECTLIST; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + }; + GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + .PrimitiveTopologyType = topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + }; + + static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID + }; + + static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise + }; + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + + struct GEN8_3DSTATE_RASTER raster = { + GEN8_3DSTATE_RASTER_header, + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ViewportZClipTestEnable = info->depthClipEnable + }; + + GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + pipeline->wm_prog_data.num_varying_inputs); + +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, + }; + + static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + }; + + static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, + }; + + uint32_t num_dwords = GEN8_BLEND_STATE_length; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN8_BLEND_STATE blend_state = { + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .PreBlendColorClampEnable = false, + .PostBlendColorClampEnable = false, + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + }; + } + + GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, + [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, + [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, + [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->state_wm_depth_stencil, 0, + sizeof(pipeline->state_wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); +} + +VkResult +gen8_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + pipeline->shaders[pCreateInfo->pStages[i].stage] = + anv_shader_from_handle(pCreateInfo->pStages[i].shader); + } + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pViewportState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + + anv_compiler_run(device->compiler, pipeline); + + /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we + * hard code this to num_attributes - 2. This is because the attributes + * include VUE header and position, which aren't counted as varying + * inputs. */ + if (pipeline->vs_simd8 == NO_KERNEL) { + pipeline->wm_prog_data.num_varying_inputs = + pCreateInfo->pVertexInputState->attributeCount - 2; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterState); + emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->wm_prog_data.barycentric_interp_modes); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + bool enable_sampling = samples > 1 ? true : false; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + .PixelPositionOffsetEnable = enable_sampling, + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xffff); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_vec4 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_vec4, + .VectorMaskEnable = Vmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + //pipeline->gs_prog_data.dispatch_mode | + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .FunctionEnable = false, + .VertexURBEntryOutputReadOffset = 1, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. We use attribute + * count - 1, as we don't count the VUE header here. */ + .VertexURBEntryOutputLength = + DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + uint32_t ksp0, ksp2, grf_start0, grf_start2; + + ksp2 = 0; + grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + ksp0 = pipeline->ps_simd8; + grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + ksp2 = pipeline->ps_simd16; + grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + ksp0 = pipeline->ps_simd16; + grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + .KernelStartPointer0 = ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - 2, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps); + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult gen8_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = + anv_shader_from_handle(pCreateInfo->cs.shader); + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, + .BypassGatewayControl = true, + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d4a0bd96a4c..38906a916da 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -100,7 +100,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) } }; - anv_pipeline_create(anv_device_to_handle(device), + anv_graphics_pipeline_create(anv_device_to_handle(device), &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 1, @@ -150,7 +150,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, .flags = 0, }, - &(struct anv_pipeline_create_info) { + &(struct anv_graphics_pipeline_create_info) { .use_repclear = true, .disable_viewport = true, .use_rectlist = true @@ -454,7 +454,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline_layout); - anv_pipeline_create(anv_device_to_handle(device), + anv_graphics_pipeline_create(anv_device_to_handle(device), &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 2, @@ -496,7 +496,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .flags = 0, .layout = device->meta_state.blit.pipeline_layout, }, - &(struct anv_pipeline_create_info) { + &(struct anv_graphics_pipeline_create_info) { .use_repclear = false, .disable_viewport = true, .disable_scissor = true, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 3c9c14193de..9a96387795d 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -154,603 +154,6 @@ VkResult anv_MergePipelineCaches( stub_return(VK_UNSUPPORTED); } -// Pipeline functions - -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info) -{ - const uint32_t num_dwords = 1 + info->attributeCount * 2; - uint32_t *p; - bool instancing_enable[32]; - - pipeline->vb_used = 0; - for (uint32_t i = 0; i < info->bindingCount; i++) { - const VkVertexInputBindingDescription *desc = - &info->pVertexBindingDescriptions[i]; - - pipeline->vb_used |= 1 << desc->binding; - pipeline->binding_stride[desc->binding] = desc->strideInBytes; - - /* Step rate is programmed per vertex element (attribute), not - * binding. Set up a map of which bindings step per instance, for - * reference by vertex element setup. */ - switch (desc->stepRate) { - default: - case VK_VERTEX_INPUT_STEP_RATE_VERTEX: - instancing_enable[desc->binding] = false; - break; - case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: - instancing_enable[desc->binding] = true; - break; - } - } - - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN8_3DSTATE_VERTEX_ELEMENTS); - - for (uint32_t i = 0; i < info->attributeCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - const struct anv_format *format = anv_format_for_vk_format(desc->format); - - struct GEN8_VERTEX_ELEMENT_STATE element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format->surface_format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offsetInBytes, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP - }; - GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, - .InstancingEnable = instancing_enable[desc->binding], - .VertexElementIndex = i, - /* Vulkan so far doesn't have an instance divisor, so - * this is always 1 (ignored if not instancing). */ - .InstanceDataStepRate = 1); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, - .VertexIDComponentNumber = 2, - .VertexIDElementOffset = info->bindingCount, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, - .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = info->bindingCount); -} - -static void -emit_ia_state(struct anv_pipeline *pipeline, - const VkPipelineInputAssemblyStateCreateInfo *info, - const struct anv_pipeline_create_info *extra) -{ - static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 - }; - uint32_t topology = vk_to_gen_primitive_type[info->topology]; - - if (extra && extra->use_rectlist) - topology = _3DPRIM_RECTLIST; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, - }; - GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, - .PrimitiveTopologyType = topology); -} - -static void -emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterStateCreateInfo *info, - const struct anv_pipeline_create_info *extra) -{ - static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH - }; - - static const uint32_t vk_to_gen_fillmode[] = { - [VK_FILL_MODE_POINTS] = RASTER_POINT, - [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, - [VK_FILL_MODE_SOLID] = RASTER_SOLID - }; - - static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = CounterClockwise, - [VK_FRONT_FACE_CW] = Clockwise - }; - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .ViewportTransformEnable = !(extra && extra->disable_viewport), - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, - .PointWidth = 1.0, - }; - - /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - - GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); - - struct GEN8_3DSTATE_RASTER raster = { - GEN8_3DSTATE_RASTER_header, - .FrontWinding = vk_to_gen_front_face[info->frontFace], - .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), - .ViewportZClipTestEnable = info->depthClipEnable - }; - - GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - pipeline->wm_prog_data.num_varying_inputs); - -} - -static void -emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info) -{ - struct anv_device *device = pipeline->device; - - static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, - }; - - static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, - }; - - static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, - }; - - uint32_t num_dwords = GEN8_BLEND_STATE_length; - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - - struct GEN8_BLEND_STATE blend_state = { - .AlphaToCoverageEnable = info->alphaToCoverageEnable, - }; - - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - - blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .PreBlendSourceOnlyClampEnable = false, - .PreBlendColorClampEnable = false, - .PostBlendColorClampEnable = false, - .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], - .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], - .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], - .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], - .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), - .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), - .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), - .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), - }; - } - - GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, - .BlendStatePointer = pipeline->blend_state.offset, - .BlendStatePointerValid = true); -} - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, - [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, - [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, - [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, - [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, - [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = 0, - [VK_STENCIL_OP_ZERO] = 0, - [VK_STENCIL_OP_REPLACE] = 0, - [VK_STENCIL_OP_INC_CLAMP] = 0, - [VK_STENCIL_OP_DEC_CLAMP] = 0, - [VK_STENCIL_OP_INVERT] = 0, - [VK_STENCIL_OP_INC_WRAP] = 0, - [VK_STENCIL_OP_DEC_WRAP] = 0 -}; - -static void -emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->state_wm_depth_stencil, 0, - sizeof(pipeline->state_wm_depth_stencil)); - return; - } - - /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], - .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], - .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); -} - -VkResult -anv_pipeline_create( - VkDevice _device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_pipeline_create_info * extra, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - uint32_t offset, length; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - result = anv_reloc_list_init(&pipeline->batch_relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - pipeline->shaders[pCreateInfo->pStages[i].stage] = - anv_shader_from_handle(pCreateInfo->pStages[i].shader); - } - - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - if (pCreateInfo->pViewportState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); - if (pCreateInfo->pMultisampleState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); - - pipeline->use_repclear = extra && extra->use_repclear; - - anv_compiler_run(device->compiler, pipeline); - - /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we - * hard code this to num_attributes - 2. This is because the attributes - * include VUE header and position, which aren't counted as varying - * inputs. */ - if (pipeline->vs_simd8 == NO_KERNEL) { - pipeline->wm_prog_data.num_varying_inputs = - pCreateInfo->pVertexInputState->attributeCount - 2; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); - assert(pCreateInfo->pInputAssemblyState); - emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); - assert(pCreateInfo->pRasterState); - emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); - emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - emit_cb_state(pipeline, pCreateInfo->pColorBlendState); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, - .StatisticsEnable = true); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, - .ChromaKeyKillEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, - .ClipEnable = true, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, - .StatisticsEnable = true, - .LineEndCapAntialiasingRegionWidth = _05pixels, - .LineAntialiasingRegionWidth = _10pixels, - .EarlyDepthStencilControl = NORMAL, - .ForceThreadDispatchEnable = NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .BarycentricInterpolationMode = - pipeline->wm_prog_data.barycentric_interp_modes); - - uint32_t samples = 1; - uint32_t log2_samples = __builtin_ffs(samples) - 1; - bool enable_sampling = samples > 1 ? true : false; - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, - .PixelPositionOffsetEnable = enable_sampling, - .PixelLocation = CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, - .SampleMask = 0xffff); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_vec4 == NO_KERNEL) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); - else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, - .SingleProgramFlow = false, - .KernelStartPointer = pipeline->gs_vec4, - .VectorMaskEnable = Vmask, - .SamplerCount = 0, - .BindingTableEntryCount = 0, - .ExpectedVertexCount = pipeline->gs_vertex_count, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], - .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .DispatchGRFStartRegisterForURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - //pipeline->gs_prog_data.dispatch_mode | - .StatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, - .ReorderMode = TRAILING, - .Enable = true, - - .ControlDataFormat = gs_prog_data->control_data_format, - - /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: - * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) - * UserClipDistanceCullTestEnableBitmask(v) - */ - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* Skip the VUE header and position slots */ - offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .FunctionEnable = false, - .VertexURBEntryOutputReadOffset = 1, - /* Even if VS is disabled, SBE still gets the amount of - * vertex data to read from this field. We use attribute - * count - 1, as we don't count the VUE header here. */ - .VertexURBEntryOutputLength = - DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); - else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .KernelStartPointer = pipeline->vs_simd8, - .SingleVertexDispatch = Multiple, - .VectorMaskEnable = Dmask, - .SamplerCount = 0, - .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = Normal, - .FloatingPointMode = IEEE754, - .IllegalOpcodeExceptionEnable = false, - .AccessesUAV = false, - .SoftwareExceptionEnable = false, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], - .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), - - .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = false, - .SIMD8DispatchEnable = true, - .VertexCacheDisable = false, - .FunctionEnable = true, - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length, - .UserClipDistanceClipTestEnableBitmask = 0, - .UserClipDistanceCullTestEnableBitmask = 0); - - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - uint32_t ksp0, ksp2, grf_start0, grf_start2; - - ksp2 = 0; - grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - ksp0 = pipeline->ps_simd8; - grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - ksp2 = pipeline->ps_simd16; - grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; - } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - ksp0 = pipeline->ps_simd16; - grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; - } else { - unreachable("no ps shader"); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, - .KernelStartPointer0 = ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], - .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), - - .MaximumNumberofThreadsPerPSD = 64 - 2, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = ksp2); - - bool per_sample_ps = false; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps); - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} - VkResult anv_DestroyPipeline( VkDevice _device, VkPipeline _pipeline) @@ -767,6 +170,23 @@ VkResult anv_DestroyPipeline( return VK_SUCCESS; } +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + switch (device->info.gen) { + case 8: + return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + VkResult anv_CreateGraphicsPipelines( VkDevice _device, VkPipelineCache pipelineCache, @@ -778,8 +198,8 @@ VkResult anv_CreateGraphicsPipelines( unsigned i = 0; for (; i < count; i++) { - result = anv_pipeline_create(_device, &pCreateInfos[i], - NULL, &pPipelines[i]); + result = anv_graphics_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { anv_DestroyPipeline(_device, pPipelines[j]); @@ -798,70 +218,13 @@ static VkResult anv_compute_pipeline_create( VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - result = anv_reloc_list_init(&pipeline->batch_relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; + switch (device->info.gen) { + case 8: + return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); + default: + unreachable("unsupported gen\n"); } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - anv_shader_from_handle(pCreateInfo->cs.shader); - - pipeline->use_repclear = false; - - anv_compiler_run(device->compiler, pipeline); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - - anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], - .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), - .ScratchSpaceBasePointerHigh = 0, - .StackSize = 0, - - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = 2, - .ResetGatewayTimer = true, - .BypassGatewayControl = true, - .URBEntryAllocationSize = 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; } VkResult anv_CreateComputePipelines( diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 518a3ccdd25..69003c52a9a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -850,7 +850,7 @@ struct anv_pipeline { uint32_t cs_right_mask; }; -struct anv_pipeline_create_info { +struct anv_graphics_pipeline_create_info { bool use_repclear; bool disable_viewport; bool disable_scissor; @@ -859,10 +859,20 @@ struct anv_pipeline_create_info { }; VkResult -anv_pipeline_create(VkDevice device, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_pipeline_create_info *extra, - VkPipeline *pPipeline); +anv_graphics_pipeline_create(VkDevice device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline); + +VkResult +gen8_graphics_pipeline_create(VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline); +VkResult +gen8_compute_pipeline_create(VkDevice _device, + const VkComputePipelineCreateInfo *pCreateInfo, + VkPipeline *pPipeline); struct anv_compiler *anv_compiler_create(struct anv_device *device); void anv_compiler_destroy(struct anv_compiler *compiler); -- cgit v1.2.3 From 98126c021fafa6e9c35e48ae369e5dad1161a1da Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 09:31:11 -0700 Subject: vk: Move dynamic depth stenctil to anv_gen8.c --- src/vulkan/anv_device.c | 37 +------------------------------------ src/vulkan/anv_gen8.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3ca9318cdb0..1ab9d1a3c43 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1917,42 +1917,7 @@ VkResult anv_CreateDynamicDepthStencilState( const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState) { - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_ds_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - - /* Is this what we need to do? */ - .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, - - .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - - .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, - &wm_depth_stencil); - - struct GEN8_COLOR_CALC_STATE color_calc_state = { - .StencilReferenceValue = pCreateInfo->stencilFrontRef, - .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef - }; - - GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); - - *pState = anv_dynamic_ds_state_to_handle(state); - - return VK_SUCCESS; + return driver_layer->CreateDynamicDepthStencilState(_device, pCreateInfo, pState); } VkResult anv_DestroyDynamicDepthStencilState( diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index 1333e98f579..e01bf52de95 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -908,3 +908,46 @@ VkResult gen8_compute_pipeline_create( return VK_SUCCESS; } + +VkResult gen8_CreateDynamicDepthStencilState( + VkDevice _device, + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, + + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, + &wm_depth_stencil); + + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + + *pState = anv_dynamic_ds_state_to_handle(state); + + return VK_SUCCESS; +} -- cgit v1.2.3 From 130db307719901ff29677c3cfba53bd03dff9cb3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 10:03:27 -0700 Subject: vk: Move gen8 specific parts of queries to anv_gen8.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_gen8.c | 190 ++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 12 +++ src/vulkan/anv_query.c | 202 ----------------------------------------------- 3 files changed, 202 insertions(+), 202 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index e01bf52de95..fd2b583ee47 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -951,3 +951,193 @@ VkResult gen8_CreateDynamicDepthStencilState( return VK_SUCCESS; } + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void gen8_CmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void gen8_CmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void gen8_CmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + struct anv_bo *bo = buffer->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void gen8_CmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 69003c52a9a..a399f86e689 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1086,6 +1086,18 @@ struct anv_render_pass { struct anv_subpass subpasses[0]; }; +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + void anv_device_init_meta(struct anv_device *device); void anv_device_finish_meta(struct anv_device *device); diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index d7903ee2cb8..3785560dc3b 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -29,18 +29,6 @@ #include "anv_private.h" -struct anv_query_pool_slot { - uint64_t begin; - uint64_t end; - uint64_t available; -}; - -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -152,57 +140,6 @@ VkResult anv_GetQueryPoolResults( return VK_SUCCESS; } -static void -anv_batch_emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ -} - -void anv_CmdBeginQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void anv_CmdEndQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot) + 8); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - void anv_CmdResetQueryPool( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, @@ -211,142 +148,3 @@ void anv_CmdResetQueryPool( { stub(); } - -#define TIMESTAMP 0x2358 - -void anv_CmdWriteTimestamp( - VkCmdBuffer cmdBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - struct anv_bo *bo = buffer->bo; - - switch (timestampType) { - case VK_TIMESTAMP_TYPE_TOP: - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { bo, buffer->offset + destOffset }); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); - break; - - case VK_TIMESTAMP_TYPE_BOTTOM: - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = /* FIXME: This is only lower 32 bits */ - { bo, buffer->offset + destOffset }); - break; - - default: - break; - } -} - -#define alu_opcode(v) __gen_field((v), 20, 31) -#define alu_operand1(v) __gen_field((v), 10, 19) -#define alu_operand2(v) __gen_field((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void anv_CmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - /* FIXME: If we're not waiting, should we just do this on the CPU? */ - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); - - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2), - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2) + 4, - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset + 4 }); - - dst_offset += destStride; - } -} -- cgit v1.2.3 From a7649b2869dc19174bee2fe963bd792937dbb958 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 14:50:11 -0700 Subject: vk: Move gen8_cmd_buffer_emit_state_base_address() to anv_gen8.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 82 ++++----------------------------------------- src/vulkan/anv_gen8.c | 82 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 2 ++ 3 files changed, 90 insertions(+), 76 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index e0e850b8cc4..2c6e2d1869b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -140,82 +140,12 @@ VkResult anv_ResetCommandBuffer( void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN8_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - - .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, - .SurfaceStateMemoryObjectControlState = GEN8_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN8_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN8_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN8_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); + switch (cmd_buffer->device->info.gen) { + case 8: + return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + default: + unreachable("unsupported gen\n"); + } } VkResult anv_BeginCommandBuffer( diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index fd2b583ee47..3a83be53bc4 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -1141,3 +1141,85 @@ void gen8_CmdCopyQueryPoolResults( dst_offset += destStride; } } + +void +gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); +} + diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a399f86e689..0b422d257a9 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -776,6 +776,8 @@ VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); -- cgit v1.2.3 From 9f0bb5977b58c59e0f97f40b803d7749efd245a6 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 17 Aug 2015 11:36:10 -0700 Subject: vk: Move gen8_CmdBindIndexBuffer() to anv_gen8.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 29 ----------------------------- src/vulkan/anv_gen8.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 2c6e2d1869b..b874ba76738 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -299,35 +299,6 @@ void anv_CmdBindDescriptorSets( } } -void anv_CmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, - }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GEN8_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); -} - void anv_CmdBindVertexBuffers( VkCmdBuffer cmdBuffer, uint32_t startBinding, diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index 3a83be53bc4..95afaadb8f0 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -1223,3 +1223,31 @@ gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .TextureCacheInvalidationEnable = true); } +void gen8_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); + + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} -- cgit v1.2.3 From 64045eebfbf6a799b43b6a48a8b2c26bdb902ed5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 17 Aug 2015 16:17:07 -0700 Subject: vk: Reorder gen8 specific code into three new files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll organize gen specific code in three files per gen: pipeline, cmd_buffer and state, eg: gen8_cmd_buffer.c gen8_pipeline.c gen8_state.c where gen8_cmd_buffer.c holds all vkCmd* entry points, gne8_pipeline.c all gen specific code related to pipeline building and remaining state code (sampler, surface state, dynamic state) in gen8_state.c. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/Makefile.am | 4 +- src/vulkan/anv_cmd_buffer.c | 509 +---------------- src/vulkan/anv_gen8.c | 1253 ------------------------------------------ src/vulkan/anv_meta.c | 12 +- src/vulkan/anv_private.h | 20 +- src/vulkan/gen8_cmd_buffer.c | 814 +++++++++++++++++++++++++++ src/vulkan/gen8_pipeline.c | 697 +++++++++++++++++++++++ src/vulkan/gen8_state.c | 286 ++++++++++ 8 files changed, 1841 insertions(+), 1754 deletions(-) delete mode 100644 src/vulkan/anv_gen8.c create mode 100644 src/vulkan/gen8_cmd_buffer.c create mode 100644 src/vulkan/gen8_pipeline.c create mode 100644 src/vulkan/gen8_state.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 6c833d84753..2359ffeeff1 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -73,7 +73,9 @@ VULKAN_SOURCES = \ anv_query.c \ anv_util.c \ anv_x11.c \ - anv_gen8.c + gen8_state.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c libvulkan_la_SOURCES = \ $(VULKAN_SOURCES) \ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index b874ba76738..84e69032147 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -320,9 +320,9 @@ void anv_CmdBindVertexBuffers( } } -static VkResult -cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state) +VkResult +anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state) { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -440,9 +440,9 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } -static VkResult -cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state) +VkResult +anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) { struct anv_pipeline_layout *layout; uint32_t sampler_count; @@ -491,10 +491,10 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; - result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + result = anv_cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); if (result != VK_SUCCESS) return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); if (result != VK_SUCCESS) return result; @@ -533,8 +533,8 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) return VK_SUCCESS; } -static void -flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +void +anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & cmd_buffer->state.pipeline->active_stages; @@ -569,7 +569,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; } -static struct anv_state +struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t dwords, uint32_t alignment) { @@ -584,7 +584,7 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, return state; } -static struct anv_state +struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t *b, uint32_t dwords, uint32_t alignment) @@ -603,363 +603,11 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, return state; } -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = cmd_buffer_emit_samplers(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = cmd_buffer_emit_binding_table(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ - }; - - uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - struct anv_state state = - anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - - GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; - } - - cmd_buffer->state.compute_dirty = 0; -} - -static void -anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN8_3DSTATE_VERTEX_BUFFERS); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GEN8_VERTEX_BUFFER_STATE state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GEN8_MOCS, - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->state.descriptors_dirty) - flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { - struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = vp_state->sf_clip_vp.offset); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_RS_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_sf, - pipeline->state_sf); - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_raster, - pipeline->state_raster); - } - - if (cmd_buffer->state.ds_state && - (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY))) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.ds_state->state_wm_depth_stencil, - pipeline->state_wm_depth_stencil); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - if (cmd_buffer->state.ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->state.cb_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - cmd_buffer->state.cb_state->state_color_calc, - GEN8_COLOR_CALC_STATE_length, 64); - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.state_vf, pipeline->state_vf); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void anv_CmdDraw( - VkCmdBuffer cmdBuffer, - uint32_t firstVertex, - uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = SEQUENTIAL, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void anv_CmdDrawIndexed( - VkCmdBuffer cmdBuffer, - uint32_t firstIndex, - uint32_t indexCount, - int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .VertexAccessType = RANDOM, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -static void -anv_batch_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -void anv_CmdDrawIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL); -} - -void anv_CmdDrawIndexedIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM); -} - -void anv_CmdDispatch( - VkCmdBuffer cmdBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void anv_CmdDispatchIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset) +void +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - anv_cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); + gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); } void anv_CmdSetEvent( @@ -1139,131 +787,6 @@ void anv_CmdPushConstants( stub(); } -static void -anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view; - - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; - - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - const struct anv_attachment_view *aview = - fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - view = (const struct anv_depth_stencil_view *)aview; - } else { - view = &null_view; - } - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->state.framebuffer->height - 1, - .Width = cmd_buffer->state.framebuffer->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN8_MOCS, - .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = view->depth_qpitch >> 2); - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, - .StencilBufferEnable = view->stencil_stride > 0, - .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); -} - -void -anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - anv_cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void anv_CmdBeginRenderPass( - VkCmdBuffer cmdBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pAttachmentClearValues); - - anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); -} - -void anv_CmdNextSubpass( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); -} - -void anv_CmdEndRenderPass( - VkCmdBuffer cmdBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); -} - void anv_CmdExecuteCommands( VkCmdBuffer cmdBuffer, uint32_t cmdBuffersCount, diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c deleted file mode 100644 index 95afaadb8f0..00000000000 --- a/src/vulkan/anv_gen8.c +++ /dev/null @@ -1,1253 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -VkResult gen8_CreateDynamicRasterState( - VkDevice _device, - const VkDynamicRasterStateCreateInfo* pCreateInfo, - VkDynamicRasterState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_rs_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .LineWidth = pCreateInfo->lineWidth, - }; - - GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); - - bool enable_bias = pCreateInfo->depthBias != 0.0f || - pCreateInfo->slopeScaledDepthBias != 0.0f; - struct GEN8_3DSTATE_RASTER raster = { - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = pCreateInfo->depthBias, - .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, - .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp - }; - - GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); - - *pState = anv_dynamic_rs_state_to_handle(state); - - return VK_SUCCESS; -} - -void -gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range) -{ - /* This assumes RGBA float format. */ - uint32_t stride = 4; - uint32_t num_elements = range / stride; - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = format->surface_format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0.0, - .SurfaceQPitch = 0, - .Height = (num_elements >> 7) & 0x3fff, - .Width = num_elements & 0x7f, - .Depth = (num_elements >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .MinimumArrayElement = 0, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - .SurfaceMinLOD = 0, - .MIPCountLOD = 0, - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); -} - -VkResult gen8_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer_view *view; - VkResult result; - - result = anv_buffer_view_create(device, pCreateInfo, &view); - if (result != VK_SUCCESS) - return result; - - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - - gen8_fill_buffer_surface_state(view->view.surface_state.map, format, - view->view.offset, pCreateInfo->range); - - *pView = anv_buffer_view_to_handle(view); - - return VK_SUCCESS; -} - -VkResult gen8_CreateSampler( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - uint32_t mag_filter, min_filter, max_anisotropy; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_device_alloc(device, sizeof(*sampler), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - static const uint32_t vk_to_gen_tex_filter[] = { - [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, - }; - - static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, - }; - - if (pCreateInfo->maxAnisotropy > 1) { - mag_filter = MAPFILTER_ANISOTROPIC; - min_filter = MAPFILTER_ANISOTROPIC; - max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; - } else { - mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; - min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; - max_anisotropy = RATIO21; - } - - struct GEN8_SAMPLER_STATE sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = 0, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], - .MagModeFilter = mag_filter, - .MinModeFilter = min_filter, - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = 0, - - .IndirectStatePointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = max_anisotropy, - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = 0, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], - }; - - GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} - -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info) -{ - const uint32_t num_dwords = 1 + info->attributeCount * 2; - uint32_t *p; - bool instancing_enable[32]; - - pipeline->vb_used = 0; - for (uint32_t i = 0; i < info->bindingCount; i++) { - const VkVertexInputBindingDescription *desc = - &info->pVertexBindingDescriptions[i]; - - pipeline->vb_used |= 1 << desc->binding; - pipeline->binding_stride[desc->binding] = desc->strideInBytes; - - /* Step rate is programmed per vertex element (attribute), not - * binding. Set up a map of which bindings step per instance, for - * reference by vertex element setup. */ - switch (desc->stepRate) { - default: - case VK_VERTEX_INPUT_STEP_RATE_VERTEX: - instancing_enable[desc->binding] = false; - break; - case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: - instancing_enable[desc->binding] = true; - break; - } - } - - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN8_3DSTATE_VERTEX_ELEMENTS); - - for (uint32_t i = 0; i < info->attributeCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - const struct anv_format *format = anv_format_for_vk_format(desc->format); - - struct GEN8_VERTEX_ELEMENT_STATE element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format->surface_format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offsetInBytes, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP - }; - GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, - .InstancingEnable = instancing_enable[desc->binding], - .VertexElementIndex = i, - /* Vulkan so far doesn't have an instance divisor, so - * this is always 1 (ignored if not instancing). */ - .InstanceDataStepRate = 1); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, - .VertexIDComponentNumber = 2, - .VertexIDElementOffset = info->bindingCount, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, - .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = info->bindingCount); -} - -static void -emit_ia_state(struct anv_pipeline *pipeline, - const VkPipelineInputAssemblyStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 - }; - uint32_t topology = vk_to_gen_primitive_type[info->topology]; - - if (extra && extra->use_rectlist) - topology = _3DPRIM_RECTLIST; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, - }; - GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, - .PrimitiveTopologyType = topology); -} - -static void -emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH - }; - - static const uint32_t vk_to_gen_fillmode[] = { - [VK_FILL_MODE_POINTS] = RASTER_POINT, - [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, - [VK_FILL_MODE_SOLID] = RASTER_SOLID - }; - - static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = CounterClockwise, - [VK_FRONT_FACE_CW] = Clockwise - }; - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .ViewportTransformEnable = !(extra && extra->disable_viewport), - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, - .PointWidth = 1.0, - }; - - /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - - GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); - - struct GEN8_3DSTATE_RASTER raster = { - GEN8_3DSTATE_RASTER_header, - .FrontWinding = vk_to_gen_front_face[info->frontFace], - .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), - .ViewportZClipTestEnable = info->depthClipEnable - }; - - GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - pipeline->wm_prog_data.num_varying_inputs); - -} - -static void -emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info) -{ - struct anv_device *device = pipeline->device; - - static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, - }; - - static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, - }; - - static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, - }; - - uint32_t num_dwords = GEN8_BLEND_STATE_length; - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - - struct GEN8_BLEND_STATE blend_state = { - .AlphaToCoverageEnable = info->alphaToCoverageEnable, - }; - - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - - blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .PreBlendSourceOnlyClampEnable = false, - .PreBlendColorClampEnable = false, - .PostBlendColorClampEnable = false, - .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], - .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], - .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], - .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], - .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), - .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), - .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), - .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), - }; - } - - GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, - .BlendStatePointer = pipeline->blend_state.offset, - .BlendStatePointerValid = true); -} - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, - [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, - [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, - [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, - [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, - [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = 0, - [VK_STENCIL_OP_ZERO] = 0, - [VK_STENCIL_OP_REPLACE] = 0, - [VK_STENCIL_OP_INC_CLAMP] = 0, - [VK_STENCIL_OP_DEC_CLAMP] = 0, - [VK_STENCIL_OP_INVERT] = 0, - [VK_STENCIL_OP_INC_WRAP] = 0, - [VK_STENCIL_OP_DEC_WRAP] = 0 -}; - -static void -emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->state_wm_depth_stencil, 0, - sizeof(pipeline->state_wm_depth_stencil)); - return; - } - - /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], - .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], - .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); -} - -VkResult -gen8_graphics_pipeline_create( - VkDevice _device, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - uint32_t offset, length; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - result = anv_reloc_list_init(&pipeline->batch_relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - pipeline->shaders[pCreateInfo->pStages[i].stage] = - anv_shader_from_handle(pCreateInfo->pStages[i].shader); - } - - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - if (pCreateInfo->pViewportState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); - if (pCreateInfo->pMultisampleState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); - - pipeline->use_repclear = extra && extra->use_repclear; - - anv_compiler_run(device->compiler, pipeline); - - /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we - * hard code this to num_attributes - 2. This is because the attributes - * include VUE header and position, which aren't counted as varying - * inputs. */ - if (pipeline->vs_simd8 == NO_KERNEL) { - pipeline->wm_prog_data.num_varying_inputs = - pCreateInfo->pVertexInputState->attributeCount - 2; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); - assert(pCreateInfo->pInputAssemblyState); - emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); - assert(pCreateInfo->pRasterState); - emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); - emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - emit_cb_state(pipeline, pCreateInfo->pColorBlendState); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, - .StatisticsEnable = true); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, - .ChromaKeyKillEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, - .ClipEnable = true, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, - .StatisticsEnable = true, - .LineEndCapAntialiasingRegionWidth = _05pixels, - .LineAntialiasingRegionWidth = _10pixels, - .EarlyDepthStencilControl = NORMAL, - .ForceThreadDispatchEnable = NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .BarycentricInterpolationMode = - pipeline->wm_prog_data.barycentric_interp_modes); - - uint32_t samples = 1; - uint32_t log2_samples = __builtin_ffs(samples) - 1; - bool enable_sampling = samples > 1 ? true : false; - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, - .PixelPositionOffsetEnable = enable_sampling, - .PixelLocation = CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, - .SampleMask = 0xffff); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_vec4 == NO_KERNEL) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); - else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, - .SingleProgramFlow = false, - .KernelStartPointer = pipeline->gs_vec4, - .VectorMaskEnable = Vmask, - .SamplerCount = 0, - .BindingTableEntryCount = 0, - .ExpectedVertexCount = pipeline->gs_vertex_count, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], - .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .DispatchGRFStartRegisterForURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - //pipeline->gs_prog_data.dispatch_mode | - .StatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, - .ReorderMode = TRAILING, - .Enable = true, - - .ControlDataFormat = gs_prog_data->control_data_format, - - /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: - * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) - * UserClipDistanceCullTestEnableBitmask(v) - */ - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* Skip the VUE header and position slots */ - offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .FunctionEnable = false, - .VertexURBEntryOutputReadOffset = 1, - /* Even if VS is disabled, SBE still gets the amount of - * vertex data to read from this field. We use attribute - * count - 1, as we don't count the VUE header here. */ - .VertexURBEntryOutputLength = - DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); - else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, - .KernelStartPointer = pipeline->vs_simd8, - .SingleVertexDispatch = Multiple, - .VectorMaskEnable = Dmask, - .SamplerCount = 0, - .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = Normal, - .FloatingPointMode = IEEE754, - .IllegalOpcodeExceptionEnable = false, - .AccessesUAV = false, - .SoftwareExceptionEnable = false, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], - .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), - - .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = false, - .SIMD8DispatchEnable = true, - .VertexCacheDisable = false, - .FunctionEnable = true, - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length, - .UserClipDistanceClipTestEnableBitmask = 0, - .UserClipDistanceCullTestEnableBitmask = 0); - - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - uint32_t ksp0, ksp2, grf_start0, grf_start2; - - ksp2 = 0; - grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - ksp0 = pipeline->ps_simd8; - grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - ksp2 = pipeline->ps_simd16; - grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; - } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - ksp0 = pipeline->ps_simd16; - grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; - } else { - unreachable("no ps shader"); - } - - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, - .KernelStartPointer0 = ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], - .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), - - .MaximumNumberofThreadsPerPSD = 64 - 2, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = ksp2); - - bool per_sample_ps = false; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps); - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} - -VkResult gen8_compute_pipeline_create( - VkDevice _device, - const VkComputePipelineCreateInfo* pCreateInfo, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - result = anv_reloc_list_init(&pipeline->batch_relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - anv_shader_from_handle(pCreateInfo->cs.shader); - - pipeline->use_repclear = false; - - anv_compiler_run(device->compiler, pipeline); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - - anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], - .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), - .ScratchSpaceBasePointerHigh = 0, - .StackSize = 0, - - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = 2, - .ResetGatewayTimer = true, - .BypassGatewayControl = true, - .URBEntryAllocationSize = 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} - -VkResult gen8_CreateDynamicDepthStencilState( - VkDevice _device, - const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, - VkDynamicDepthStencilState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_ds_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - - /* Is this what we need to do? */ - .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, - - .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - - .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, - &wm_depth_stencil); - - struct GEN8_COLOR_CALC_STATE color_calc_state = { - .StencilReferenceValue = pCreateInfo->stencilFrontRef, - .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef - }; - - GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); - - *pState = anv_dynamic_ds_state_to_handle(state); - - return VK_SUCCESS; -} - -static void -emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ -} - -void gen8_CmdBeginQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void gen8_CmdEndQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot) + 8); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -#define TIMESTAMP 0x2358 - -void gen8_CmdWriteTimestamp( - VkCmdBuffer cmdBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - struct anv_bo *bo = buffer->bo; - - switch (timestampType) { - case VK_TIMESTAMP_TYPE_TOP: - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { bo, buffer->offset + destOffset }); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); - break; - - case VK_TIMESTAMP_TYPE_BOTTOM: - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = /* FIXME: This is only lower 32 bits */ - { bo, buffer->offset + destOffset }); - break; - - default: - break; - } -} - -#define alu_opcode(v) __gen_field((v), 20, 31) -#define alu_operand1(v) __gen_field((v), 10, 19) -#define alu_operand2(v) __gen_field((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void gen8_CmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - /* FIXME: If we're not waiting, should we just do this on the CPU? */ - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); - - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2), - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2) + 4, - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset + 4 }); - - dst_offset += destStride; - } -} - -void -gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN8_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - - .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, - .SurfaceStateMemoryObjectControlState = GEN8_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN8_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN8_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN8_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); -} - -void gen8_CmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, - }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GEN8_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); -} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 38906a916da..8f681230292 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -279,7 +279,7 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); - anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); + driver_layer->CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); } void @@ -694,7 +694,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .dependencyCount = 0, }, &pass); - anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + driver_layer->CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass, @@ -715,9 +715,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, device->meta_state.blit.pipeline_layout, 0, 1, &set, 0, NULL); - anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); + driver_layer->CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); - anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + driver_layer->CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. @@ -1345,7 +1345,7 @@ void anv_CmdClearColorImage( .dependencyCount = 0, }, &pass); - anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + driver_layer->CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderArea = { @@ -1373,7 +1373,7 @@ void anv_CmdClearColorImage( meta_emit_clear(cmd_buffer, 1, &instance_data, (VkClearDepthStencilValue) {0}); - anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + driver_layer->CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); } } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 0b422d257a9..3a46e37bc3a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -761,6 +761,21 @@ void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_cmd_buffer *secondary); void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); +VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state); +VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state); +void anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); + +struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, + uint32_t alignment); +struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment); +void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + struct anv_bo * anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer); struct anv_reloc_list * @@ -774,9 +789,12 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c new file mode 100644 index 00000000000..59a21081c87 --- /dev/null +++ b/src/vulkan/gen8_cmd_buffer.c @@ -0,0 +1,814 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +static void +gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GEN8_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty) + anv_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_sf, + pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_raster, + pipeline->state_raster); + } + + if (cmd_buffer->state.ds_state && + (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY))) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.state_vf, pipeline->state_vf); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void gen8_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + gen8_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void gen8_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + gen8_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void gen8_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen8_cmd_buffer_flush_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void gen8_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); + + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} + +static VkResult +gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = gen8_flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->state.compute_dirty = 0; +} + +void gen8_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen8_cmd_buffer_flush_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void gen8_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + gen8_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void gen8_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen8_cmd_buffer_flush_compute_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} + +static void +gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; + } + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); +} + +void +gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + gen8_cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void gen8_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); + + gen8_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); +} + +void gen8_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + gen8_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} + +void gen8_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void gen8_CmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void gen8_CmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void gen8_CmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + struct anv_bo *bo = buffer->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void gen8_CmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +} + +void +gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); +} diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c new file mode 100644 index 00000000000..05091831b98 --- /dev/null +++ b/src/vulkan/gen8_pipeline.c @@ -0,0 +1,697 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) +{ + const uint32_t num_dwords = 1 + info->attributeCount * 2; + uint32_t *p; + bool instancing_enable[32]; + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + instancing_enable[desc->binding] = true; + break; + } + } + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN8_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format->surface_format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + .InstancingEnable = instancing_enable[desc->binding], + .VertexElementIndex = i, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = info->bindingCount, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = info->bindingCount); +} + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + }; + uint32_t topology = vk_to_gen_primitive_type[info->topology]; + + if (extra && extra->use_rectlist) + topology = _3DPRIM_RECTLIST; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + }; + GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + .PrimitiveTopologyType = topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + }; + + static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID + }; + + static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise + }; + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + + struct GEN8_3DSTATE_RASTER raster = { + GEN8_3DSTATE_RASTER_header, + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ViewportZClipTestEnable = info->depthClipEnable + }; + + GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + pipeline->wm_prog_data.num_varying_inputs); + +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, + }; + + static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + }; + + static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, + }; + + uint32_t num_dwords = GEN8_BLEND_STATE_length; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN8_BLEND_STATE blend_state = { + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .PreBlendColorClampEnable = false, + .PostBlendColorClampEnable = false, + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + }; + } + + GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, + [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, + [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, + [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->state_wm_depth_stencil, 0, + sizeof(pipeline->state_wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); +} + +VkResult +gen8_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + pipeline->shaders[pCreateInfo->pStages[i].stage] = + anv_shader_from_handle(pCreateInfo->pStages[i].shader); + } + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pViewportState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + + anv_compiler_run(device->compiler, pipeline); + + /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we + * hard code this to num_attributes - 2. This is because the attributes + * include VUE header and position, which aren't counted as varying + * inputs. */ + if (pipeline->vs_simd8 == NO_KERNEL) { + pipeline->wm_prog_data.num_varying_inputs = + pCreateInfo->pVertexInputState->attributeCount - 2; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterState); + emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->wm_prog_data.barycentric_interp_modes); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + bool enable_sampling = samples > 1 ? true : false; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + .PixelPositionOffsetEnable = enable_sampling, + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xffff); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_vec4 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_vec4, + .VectorMaskEnable = Vmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + //pipeline->gs_prog_data.dispatch_mode | + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .FunctionEnable = false, + .VertexURBEntryOutputReadOffset = 1, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. We use attribute + * count - 1, as we don't count the VUE header here. */ + .VertexURBEntryOutputLength = + DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + uint32_t ksp0, ksp2, grf_start0, grf_start2; + + ksp2 = 0; + grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + ksp0 = pipeline->ps_simd8; + grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + ksp2 = pipeline->ps_simd16; + grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + ksp0 = pipeline->ps_simd16; + grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + .KernelStartPointer0 = ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - 2, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps); + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult gen8_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = + anv_shader_from_handle(pCreateInfo->cs.shader); + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, + .BypassGatewayControl = true, + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c new file mode 100644 index 00000000000..d7078a21696 --- /dev/null +++ b/src/vulkan/gen8_state.c @@ -0,0 +1,286 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult gen8_CreateDynamicRasterState( + VkDevice _device, + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + }; + + GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + struct GEN8_3DSTATE_RASTER raster = { + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); + + *pState = anv_dynamic_rs_state_to_handle(state); + + return VK_SUCCESS; +} + +void +gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range) +{ + /* This assumes RGBA float format. */ + uint32_t stride = 4; + uint32_t num_elements = range / stride; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format->surface_format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + .BaseMipLevel = 0.0, + .SurfaceQPitch = 0, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult gen8_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer_view *view; + VkResult result; + + result = anv_buffer_view_create(device, pCreateInfo, &view); + if (result != VK_SUCCESS) + return result; + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + gen8_fill_buffer_surface_state(view->view.surface_state.map, format, + view->view.offset, pCreateInfo->range); + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +VkResult gen8_CreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + }; + + static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, + }; + + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + + struct GEN8_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = 0, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = 0, + + .IndirectStatePointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = max_anisotropy, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + }; + + GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +VkResult gen8_CreateDynamicDepthStencilState( + VkDevice _device, + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, + + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, + &wm_depth_stencil); + + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + + *pState = anv_dynamic_ds_state_to_handle(state); + + return VK_SUCCESS; +} -- cgit v1.2.3 From cff717c6493c5518d988244050d796f7820ef89c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 18 Aug 2015 11:04:19 -0700 Subject: vk: Downgrade state packet to gen7 where they're common MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_batch_chain.c | 4 ++-- src/vulkan/anv_cmd_buffer.c | 4 ++-- src/vulkan/anv_device.c | 22 ++++++++++++---------- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index c34f58b2534..0faf7877af1 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -629,11 +629,11 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_current_surface_bbo(cmd_buffer); if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); /* Round batch up to an even number of dwords. */ if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; } diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 84e69032147..920d9f74ef7 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -518,14 +518,14 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) if (samplers.alloc_size > 0) { anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, ._3DCommandSubOpcode = sampler_state_opcodes[stage], .PointertoVSSamplerState = samplers.offset); } if (surfaces.alloc_size > 0) { anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, ._3DCommandSubOpcode = binding_table_opcodes[stage], .PointertoVSBindingTable = surfaces.offset); } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 1ab9d1a3c43..a33981d7350 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1108,8 +1108,8 @@ VkResult anv_CreateFence( anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); batch.next = batch.start = fence->bo.map; batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN8_MI_NOOP); + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); fence->exec2_objects[0].handle = fence->bo.gem_handle; fence->exec2_objects[0].relocation_count = 0; @@ -1777,6 +1777,8 @@ VkResult anv_CreateDynamicViewportState( const VkViewport *vp = &pCreateInfo->pViewports[i]; const VkRect2D *s = &pCreateInfo->pScissors[i]; + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { .ViewportMatrixElementm00 = vp->width / 2, .ViewportMatrixElementm11 = vp->height / 2, @@ -1794,7 +1796,7 @@ VkResult anv_CreateDynamicViewportState( .YMaxViewPort = vp->originY + vp->height - 1, }; - struct GEN8_CC_VIEWPORT cc_viewport = { + struct GEN7_CC_VIEWPORT cc_viewport = { .MinimumDepth = vp->minDepth, .MaximumDepth = vp->maxDepth }; @@ -1804,7 +1806,7 @@ VkResult anv_CreateDynamicViewportState( * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't * what we want. Just special case empty clips and produce a canonical * empty clip. */ - static const struct GEN8_SCISSOR_RECT empty_scissor = { + static const struct GEN7_SCISSOR_RECT empty_scissor = { .ScissorRectangleYMin = 1, .ScissorRectangleXMin = 1, .ScissorRectangleYMax = 0, @@ -1812,7 +1814,7 @@ VkResult anv_CreateDynamicViewportState( }; const int max = 0xffff; - struct GEN8_SCISSOR_RECT scissor = { + struct GEN7_SCISSOR_RECT scissor = { /* Do this math using int64_t so overflow gets clamped correctly. */ .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), @@ -1821,12 +1823,12 @@ VkResult anv_CreateDynamicViewportState( }; GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); - GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); + GEN7_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); + GEN7_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); } else { - GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); + GEN7_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); } } @@ -1886,14 +1888,14 @@ VkResult anv_CreateDynamicColorBlendState( if (state == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - struct GEN8_COLOR_CALC_STATE color_calc_state = { + struct GEN7_COLOR_CALC_STATE color_calc_state = { .BlendConstantColorRed = pCreateInfo->blendConst[0], .BlendConstantColorGreen = pCreateInfo->blendConst[1], .BlendConstantColorBlue = pCreateInfo->blendConst[2], .BlendConstantColorAlpha = pCreateInfo->blendConst[3] }; - GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + GEN7_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); *pState = anv_dynamic_cb_state_to_handle(state); -- cgit v1.2.3 From 97360ffc6c465fafb1445e344351103ad5fee20e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 18 Aug 2015 15:26:07 -0700 Subject: vk: Use anv_batch_emit() for chaining back to primary batch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to use a manual GEN8_MI_BATCH_BUFFER_START_pack() call, but this refactors the code to use anv_batch_emit(); Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_batch_chain.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 0faf7877af1..ddb4ca6cc29 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -660,14 +660,13 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; - /* For chaining mode, we need to increment the number of - * relocations. This is because, when we chain, we need to add - * an MI_BATCH_BUFFER_START command. Adding this command will - * also add a relocation. In order to handle theis we'll - * increment it here and decrement it right before adding the + /* When we chain, we need to add an MI_BATCH_BUFFER_START command + * with its relocation. In order to handle this we'll increment here + * so we can unconditionally decrement right before adding the * MI_BATCH_BUFFER_START command. */ anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; + cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; } else { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; } @@ -704,7 +703,6 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, ._2ndLevelBatchBuffer = _1stlevelbatch, .AddressSpaceIndicator = ASI_PPGTT, .BatchBufferStartAddress = { &first_bbo->bo, 0 }, @@ -714,24 +712,18 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, assert(primary->batch.start == this_bbo->bo.map); uint32_t offset = primary->batch.next - primary->batch.start; - struct GEN8_MI_BATCH_BUFFER_START ret = { - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &this_bbo->bo, offset }, - }; - /* The pack function below is going to insert a relocation. In order - * to allow us to splice this secondary into a primary multiple - * times, we can't have relocations from previous splices in this - * splice. In order to deal with this, we simply decrement the - * relocation count prior to inserting the next one. In order to - * handle the base case, num_relocs was artificially incremented in - * end_batch_buffer(). + /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we + * can emit a new command and relocation for the current splice. In + * order to handle the initial-use case, we incremented next and + * num_relocs in end_batch_buffer() so we can alyways just subtract + * here. */ last_bbo->relocs.num_relocs--; - GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, - last_bbo->bo.map + last_bbo->length, - &ret); + secondary->batch.next -= GEN8_MI_BATCH_BUFFER_START_length * 4; + anv_batch_emit(&secondary->batch, GEN8_MI_BATCH_BUFFER_START, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &this_bbo->bo, offset }); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); break; -- cgit v1.2.3 From b4ef2302a9794c33ad9af5c905c6c2002b5dd1cb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 11:24:52 -0700 Subject: vk: Use helper function for emitting MI_BATCH_BUFFER_START MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_batch_chain.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index ddb4ca6cc29..f1d7bea840a 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -395,6 +395,15 @@ anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; } +static void +emit_batch_buffer_start(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { bo, offset }); +} + static void cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo) @@ -410,12 +419,7 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &bbo->bo, 0 }, - ); + emit_batch_buffer_start(batch, &bbo->bo, 0); anv_batch_bo_finish(current_bbo, batch); } @@ -702,11 +706,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *last_bbo = list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &first_bbo->bo, 0 }, - ); + emit_batch_buffer_start(&primary->batch, &first_bbo->bo, 0); struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); assert(primary->batch.start == this_bbo->bo.map); @@ -720,11 +720,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, */ last_bbo->relocs.num_relocs--; secondary->batch.next -= GEN8_MI_BATCH_BUFFER_START_length * 4; - anv_batch_emit(&secondary->batch, GEN8_MI_BATCH_BUFFER_START, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &this_bbo->bo, offset }); - + emit_batch_buffer_start(&secondary->batch, &this_bbo->bo, offset); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); break; } -- cgit v1.2.3 From 25ab43ee8cde04914eb19ed060cedf98c94287a3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 14:39:35 -0700 Subject: vk: Move vkCmdPipelineBarrier to gen8_cmd_buffer.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 138 ------------------------------------------- src/vulkan/gen8_cmd_buffer.c | 138 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 138 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 920d9f74ef7..afc58dfece1 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -638,144 +638,6 @@ void anv_CmdWaitEvents( stub(); } -void anv_CmdPipelineBarrier( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t b, *dw; - - struct GEN8_PIPE_CONTROL cmd = { - GEN8_PIPE_CONTROL_header, - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_TRANSITION_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; - - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: - cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - cmd.DepthCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("Invalid memory output flag"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: - cmd.VFCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: - cmd.DCFlushEnable = true; - cmd.TextureCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: - cmd.TextureCacheInvalidationEnable = true; - break; - } - } - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); - GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); -} - void anv_CmdPushConstants( VkCmdBuffer cmdBuffer, VkPipelineLayout layout, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 59a21081c87..68d6d4a42d3 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -812,3 +812,141 @@ gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, .TextureCacheInvalidationEnable = true); } + +void gen8_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; + + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { + cmd.CommandStreamerStallEnable = true; + } + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); +} -- cgit v1.2.3 From e43fc871be2d60bd182c9c83372c61ff43d19ab1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 14:33:22 -0700 Subject: vk: Make batch chain code gen-agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the extra dword in MI_BATCH_BUFFER_START added in gen8 is at the end of the struct, we can emit the gen8 packet on all gens as long as we set the instruction length correctly. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_batch_chain.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index f1d7bea840a..af90fb7ae21 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -398,7 +398,21 @@ anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) static void emit_batch_buffer_start(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) { + /* In gen8+ the address field grew to two dwords to accomodate 48 bit + * offsets. The high 16 bits are in the last dword, so we can use the gen8 + * version in either case, as long as we set the instruction length in the + * header accordingly. This means that we always emit three dwords here + * and all the padding and adjustment we do in this file works for all + * gens. + */ + + const uint32_t gen7_length = + GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; + const uint32_t gen8_length = + GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + .DwordLength = batch->device->info.gen < 8 ? gen7_length : gen8_length, ._2ndLevelBatchBuffer = _1stlevelbatch, .AddressSpaceIndicator = ASI_PPGTT, .BatchBufferStartAddress = { bo, offset }); -- cgit v1.2.3 From a2b822185ecc8868e1cccfaa98f16129f36c686b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 14:56:12 -0700 Subject: vk: Add helper for adding surface state reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to have to do this differently for earlier gens, so lets do it in place only. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index afc58dfece1..4e3264cda69 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -320,6 +320,17 @@ void anv_CmdBindVertexBuffers( } } +static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, uint32_t offset) +{ + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, state.offset + 8 * 4, bo, offset); + +} + VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *bt_state) @@ -379,12 +390,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, memcpy(state.map, view->view.surface_state.map, 64); - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, - state.offset + 8 * 4, - view->view.bo, view->view.offset); + add_surface_state_reloc(cmd_buffer, state, view->view.bo, view->view.offset); bt_map[a] = state.offset; } @@ -426,12 +432,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, memcpy(state.map, view->surface_state.map, 64); } - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, - state.offset + 8 * 4, - view->bo, offset); + add_surface_state_reloc(cmd_buffer, state, view->bo, offset); bt_map[start + b] = state.offset; } -- cgit v1.2.3 From 8fe74ec45c93b3ec64243f351a7e2173a76cf788 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 16:01:33 -0700 Subject: vk: Add generic wrapper for filling out buffer surface state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need this for generating surface state on the fly for dynamic buffer views. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 5 +++-- src/vulkan/anv_device.c | 14 ++++++++++++++ src/vulkan/anv_private.h | 4 ++++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 4e3264cda69..951bb103430 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -425,8 +425,9 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, d->dynamic_offsets[surface_slots[b].dynamic_slot]; offset = view->offset + dynamic_offset; - gen8_fill_buffer_surface_state(state.map, view->format, offset, - view->range - dynamic_offset); + anv_fill_buffer_surface_state(cmd_buffer->device, + state.map, view->format, offset, + view->range - dynamic_offset); } else { offset = view->offset; memcpy(state.map, view->surface_state.map, 64); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a33981d7350..bf256765bdf 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1324,6 +1324,20 @@ VkResult anv_DestroyBuffer( return VK_SUCCESS; } +void +anv_fill_buffer_surface_state(struct anv_device *device, void *state, + const struct anv_format *format, + uint32_t offset, uint32_t range) +{ + switch (device->info.gen) { + case 8: + gen8_fill_buffer_surface_state(state, format, offset, range); + break; + default: + unreachable("unsupported gen\n"); + } +} + VkResult anv_buffer_view_create( struct anv_device * device, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3a46e37bc3a..2f741aca101 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1056,6 +1056,10 @@ VkResult anv_buffer_view_create(struct anv_device *device, const VkBufferViewCreateInfo *pCreateInfo, struct anv_buffer_view **view_out); +void anv_fill_buffer_surface_state(struct anv_device *device, void *state, + const struct anv_format *format, + uint32_t offset, uint32_t range); + void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range); -- cgit v1.2.3 From bc568ee992ff272372dc8e57524ab12ec0b7d8a8 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 21:30:08 -0700 Subject: vk: Make anv_cmd_buffer_begin_subpass() switch on gen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_cmd_buffer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 951bb103430..6cb98a21ca4 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -609,7 +609,13 @@ void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass) { - gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); + switch (cmd_buffer->device->info.gen) { + case 8: + gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; + default: + unreachable("unsupported gen\n"); + } } void anv_CmdSetEvent( -- cgit v1.2.3 From 988341a73c8d6d4f839802b129e3c6df14ddcc3c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 21:36:57 -0700 Subject: vk: Move anv_CreateImageView to gen8_state.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll probably want to move some code back into a shared init function, but this gets one GEN8 surface state initialization out of anv_image.c. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_image.c | 163 ++++++++--------------------------------------- src/vulkan/anv_private.h | 15 +++++ src/vulkan/gen8_state.c | 154 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+), 135 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f24acdb0fac..04fab514582 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -29,12 +29,6 @@ #include "anv_private.h" -struct anv_image_view_info { - uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ - bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ - bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ -}; - static const uint8_t anv_halign[] = { [4] = HALIGN4, [8] = HALIGN8, @@ -67,6 +61,12 @@ anv_image_view_info_table[] = { #undef INFO }; +const struct anv_image_view_info * +anv_image_view_info_for_vk_image_view_type(VkImageViewType type) +{ + return &anv_image_view_info_table[type]; +} + static const struct anv_surf_type_limits { int32_t width; int32_t height; @@ -329,124 +329,6 @@ anv_surface_view_fini(struct anv_device *device, anv_state_pool_free(&device->surface_state_pool, view->surface_state); } -void -anv_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface_view *view = &iview->view; - struct anv_surface *surface; - - const struct anv_image_view_info *view_type_info - = &anv_image_view_info_table[pCreateInfo->viewType]; - - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - switch (pCreateInfo->subresourceRange.aspect) { - case VK_IMAGE_ASPECT_STENCIL: - anv_finishme("stencil image views"); - abort(); - break; - case VK_IMAGE_ASPECT_DEPTH: - case VK_IMAGE_ASPECT_COLOR: - surface = &image->primary_surface; - break; - default: - unreachable(""); - break; - } - - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = anv_format_for_vk_format(pCreateInfo->format); - - iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, range->baseMipLevel), - .height = anv_minify(image->extent.height, range->baseMipLevel), - .depth = anv_minify(image->extent.depth, range->baseMipLevel), - }; - - uint32_t depth = 1; - if (range->arraySize > 1) { - depth = range->arraySize; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - static const uint32_t vk_to_gen_swizzle[] = { - [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, - [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, - [VK_CHANNEL_SWIZZLE_R] = SCS_RED, - [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, - [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, - [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA - }; - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = view_type_info->surface_type, - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = view->format->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = surface->tile_mode, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = surface->qpitch >> 2, - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = range->baseArraySlice, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - /* For sampler surfaces, the hardware interprets field MIPCount/LOD as - * MIPCount. The range of levels accessible by the sampler engine is - * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - .MIPCountLOD = range->mipLevels - 1, - .SurfaceMinLOD = range->baseMipLevel, - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, - }; - - if (cmd_buffer) { - view->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); -} - VkResult anv_validate_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, @@ -525,26 +407,37 @@ anv_validate_CreateImageView(VkDevice _device, return anv_CreateImageView(_device, pCreateInfo, pView); } +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + switch (device->info.gen) { + case 8: + gen8_image_view_init(iview, device, pCreateInfo, cmd_buffer); + break; + default: + unreachable("unsupported gen\n"); + } +} + VkResult anv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, VkImageView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *view; - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_image_view_init(view, device, pCreateInfo, NULL); - - *pView = anv_image_view_to_handle(view); - - return VK_SUCCESS; + switch (device->info.gen) { + case 8: + return gen8_CreateImageView(_device, pCreateInfo, pView); + default: + unreachable("unsupported gen\n"); + } } + VkResult anv_DestroyImageView(VkDevice _device, VkImageView _iview) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 2f741aca101..234cb6ebdc3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -931,6 +931,15 @@ anv_format_is_depth_or_stencil(const struct anv_format *format) return format->depth_format || format->has_stencil; } +struct anv_image_view_info { + uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ + bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ + bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ +}; + +const struct anv_image_view_info * +anv_image_view_info_for_vk_image_view_type(VkImageViewType type); + /** * A proxy for the color surfaces, depth surfaces, and stencil surfaces. */ @@ -1047,6 +1056,12 @@ void anv_image_view_init(struct anv_image_view *view, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void +gen8_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + void anv_color_attachment_view_init(struct anv_color_attachment_view *view, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index d7078a21696..fb013cbf3a4 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -142,6 +142,160 @@ VkResult gen8_CreateBufferView( return VK_SUCCESS; } +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +void +gen8_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface_view *view = &iview->view; + struct anv_surface *surface; + + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + const struct anv_image_view_info *view_type_info = + anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); + + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + anv_finishme("stencil image views"); + abort(); + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + surface = &image->primary_surface; + break; + default: + unreachable(""); + break; + } + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = format_info; + + iview->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + + uint32_t depth = 1; + if (range->arraySize > 1) { + depth = range->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + static const uint32_t vk_to_gen_swizzle[] = { + [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, + [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, + [VK_CHANNEL_SWIZZLE_R] = SCS_RED, + [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, + [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, + [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA + }; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = view_type_info->surface_type, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = surface->qpitch >> 2, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = range->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + /* For sampler surfaces, the hardware interprets field MIPCount/LOD as + * MIPCount. The range of levels accessible by the sampler engine is + * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + .MIPCountLOD = range->mipLevels - 1, + .SurfaceMinLOD = range->baseMipLevel, + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +VkResult +gen8_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + VkResult gen8_CreateSampler( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, -- cgit v1.2.3 From f5275f7eb35ad48f60f79eb60fe6f833c8ef7502 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 19 Aug 2015 22:19:21 -0700 Subject: vk: Move anv_color_attachment_view_init() to gen8_state.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'd prefer to move anv_CreateAttachmentView() as well, but it's a little too much generic code to just duplicate for each gen. For now, we'll add a anv_color_attachment_view_init() to dispatch to the gen specific implementation, which we then call from anv_CreateAttachmentView(). Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_image.c | 110 +++++++---------------------------------------- src/vulkan/anv_private.h | 5 +++ src/vulkan/gen8_state.c | 97 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 04fab514582..51f2cf5244c 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -450,101 +450,6 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview) return VK_SUCCESS; } -void -anv_color_attachment_view_init(struct anv_color_attachment_view *aview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface_view *view = &aview->view; - struct anv_surface *surface = &image->primary_surface; - - aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; - - anv_assert(pCreateInfo->arraySize > 0); - anv_assert(pCreateInfo->mipLevel < image->levels); - anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = anv_format_for_vk_format(pCreateInfo->format); - - aview->base.extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), - .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), - .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), - }; - - uint32_t depth = 1; - if (pCreateInfo->arraySize > 1) { - depth = pCreateInfo->arraySize; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - if (cmd_buffer) { - view->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_2D, - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = view->format->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = surface->tile_mode, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = surface->qpitch >> 2, - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = pCreateInfo->baseArraySlice, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - /* For render target surfaces, the hardware interprets field MIPCount/LOD as - * LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - .SurfaceMinLOD = 0, - .MIPCountLOD = pCreateInfo->mipLevel, - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, - }; - - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); -} - static void anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, const VkAttachmentViewCreateInfo *pCreateInfo) @@ -572,6 +477,21 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, view->stencil_qpitch = 0; /* FINISHME: QPitch */ } +void +anv_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + switch (device->info.gen) { + case 8: + gen8_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + break; + default: + unreachable("unsupported gen\n"); + } +} + VkResult anv_CreateAttachmentView(VkDevice _device, const VkAttachmentViewCreateInfo *pCreateInfo, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 234cb6ebdc3..58480aca818 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1067,6 +1067,11 @@ void anv_color_attachment_view_init(struct anv_color_attachment_view *view, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + VkResult anv_buffer_view_create(struct anv_device *device, const VkBufferViewCreateInfo *pCreateInfo, struct anv_buffer_view **view_out); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index fb013cbf3a4..a22610c99e0 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -296,6 +296,103 @@ gen8_CreateImageView(VkDevice _device, return VK_SUCCESS; } +void +gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface_view *view = &aview->view; + struct anv_surface *surface = &image->primary_surface; + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->mipLevel < image->levels); + anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = anv_format_for_vk_format(pCreateInfo->format); + + aview->base.extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), + .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), + .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), + }; + + uint32_t depth = 1; + if (pCreateInfo->arraySize > 1) { + depth = pCreateInfo->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = surface->qpitch >> 2, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = pCreateInfo->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .SurfaceMinLOD = 0, + .MIPCountLOD = pCreateInfo->mipLevel, + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + VkResult gen8_CreateSampler( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, -- cgit v1.2.3 From 963a1e35e72003cea9f82402065119afbf8954da Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:12:55 -0700 Subject: vk: Update generated headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds VALIGN_2 and VALIGN_4 defines for IVB and HSW RENDER_SURFACE_STATE. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/gen75_pack.h | 2 ++ src/vulkan/gen7_pack.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 7602fb7bb76..3ed685bed0c 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -7636,6 +7636,8 @@ struct GEN75_RENDER_SURFACE_STATE { uint32_t SurfaceType; bool SurfaceArray; uint32_t SurfaceFormat; +#define VALIGN_2 0 +#define VALIGN_4 1 uint32_t SurfaceVerticalAlignment; #define HALIGN_4 0 #define HALIGN_8 1 diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 2204263e1dd..5f2dbc470ec 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -6355,6 +6355,8 @@ struct GEN7_RENDER_SURFACE_STATE { uint32_t SurfaceType; bool SurfaceArray; uint32_t SurfaceFormat; +#define VALIGN_2 0 +#define VALIGN_4 1 uint32_t SurfaceVerticalAlignment; #define HALIGN_4 0 #define HALIGN_8 1 -- cgit v1.2.3 From ac738ada7a319c202b59ad6beb878e04d2e7a2ac Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:24:13 -0700 Subject: vk: Trim out irrelevant 0-initialized surface state fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many of of these fields aren't used for buffer surfaces, so leave them out for brevity. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/gen8_state.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index a22610c99e0..736a1d9455f 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -84,33 +84,18 @@ gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, .SurfaceVerticalAlignment = VALIGN4, .SurfaceHorizontalAlignment = HALIGN4, .TileMode = LINEAR, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .BaseMipLevel = 0.0, - .SurfaceQPitch = 0, .Height = (num_elements >> 7) & 0x3fff, .Width = num_elements & 0x7f, .Depth = (num_elements >> 21) & 0x3f, .SurfacePitch = stride - 1, - .MinimumArrayElement = 0, .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - .SurfaceMinLOD = 0, - .MIPCountLOD = 0, - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, .ShaderChannelSelectRed = SCS_RED, .ShaderChannelSelectGreen = SCS_GREEN, .ShaderChannelSelectBlue = SCS_BLUE, .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, /* FIXME: We assume that the image must be bound at this time. */ .SurfaceBaseAddress = { NULL, offset }, }; -- cgit v1.2.3 From 615da3795a76e7f18e518d2c896613c3f9c04d27 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:32:28 -0700 Subject: vk: Always use a placeholder vertex shader in meta MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clear pipeline didn't have a vertex shader and relied on the clear shader being hardcoded by the compiler to accept one attribute. This necessitated a few special cases in the 3DSTATE_VS setup. Instead, always provide a vertex shader, even if we disable VS dispatch. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_meta.c | 44 +++++++++++++++++++++++++++++++++++++------- src/vulkan/gen8_pipeline.c | 8 +++----- 2 files changed, 40 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 8f681230292..858a3daf11c 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -34,8 +34,21 @@ static void anv_device_init_meta_clear_state(struct anv_device *device) { /* We don't use a vertex shader for clearing, but instead build and pass - * the VUEs directly to the rasterization backend. + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. */ + VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, + in vec2 a_pos; + in vec4 a_color; + flat out vec4 v_color; + void main() + { + v_color = a_color; + gl_Position = vec4(a_pos, 0, 1); + } + ); + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, out vec4 f_color; flat in vec4 v_color; @@ -45,6 +58,14 @@ anv_device_init_meta_clear_state(struct anv_device *device) } ); + VkShader vs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = vsm, + .pName = "main", + }, &vs); + VkShader fs; anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { @@ -103,12 +124,20 @@ anv_device_init_meta_clear_state(struct anv_device *device) anv_graphics_pipeline_create(anv_device_to_handle(device), &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 1, - .pStages = &(VkPipelineShaderStageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .pSpecializationInfo = NULL, + + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL, + } }, .pVertexInputState = &vi_create_info, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { @@ -153,6 +182,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) &(struct anv_graphics_pipeline_create_info) { .use_repclear = true, .disable_viewport = true, + .disable_vs = true, .use_rectlist = true }, &device->meta_state.clear.pipeline); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 05091831b98..220317c2d48 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -527,12 +527,10 @@ gen8_graphics_pipeline_create( if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, .FunctionEnable = false, - .VertexURBEntryOutputReadOffset = 1, /* Even if VS is disabled, SBE still gets the amount of - * vertex data to read from this field. We use attribute - * count - 1, as we don't count the VUE header here. */ - .VertexURBEntryOutputLength = - DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); + * vertex data to read from this field. */ + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); else anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, .KernelStartPointer = pipeline->vs_simd8, -- cgit v1.2.3 From 3800573fb5390e84eebc2ebdbcd85f5736626f65 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:41:22 -0700 Subject: vk: Move gen8 specific state into gen8 sub-structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit moves all occurances of gen8 specific state into a gen8 substruct. This clearly identifies the state as gen8 specific and prepares for adding gen7 state structs. In the process we also rename the field names to exactly match the command or state packet name, without the 3DSTATE prefix, eg: 3DSTATE_VF -> gen8.vf 3DSTATE_WM_DEPTH_STENCIL -> gen8.wm_depth_stencil Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_private.h | 26 ++++++++++++++++---------- src/vulkan/gen8_cmd_buffer.c | 22 +++++++++++----------- src/vulkan/gen8_pipeline.c | 13 ++++++------- src/vulkan/gen8_state.c | 8 ++++---- 5 files changed, 38 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index bf256765bdf..27a51129a74 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1909,7 +1909,7 @@ VkResult anv_CreateDynamicColorBlendState( .BlendConstantColorAlpha = pCreateInfo->blendConst[3] }; - GEN7_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + GEN7_COLOR_CALC_STATE_pack(NULL, state->color_calc_state, &color_calc_state); *pState = anv_dynamic_cb_state_to_handle(state); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 58480aca818..4d30bcb68ff 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -569,17 +569,21 @@ struct anv_dynamic_vp_state { }; struct anv_dynamic_rs_state { - uint32_t state_sf[GEN8_3DSTATE_SF_length]; - uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; + struct { + uint32_t sf[GEN8_3DSTATE_SF_length]; + uint32_t raster[GEN8_3DSTATE_RASTER_length]; + } gen8; }; struct anv_dynamic_ds_state { - uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; + struct { + uint32_t wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; + } gen8; }; struct anv_dynamic_cb_state { - uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; + uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; }; @@ -861,13 +865,15 @@ struct anv_pipeline { uint32_t vb_used; uint32_t binding_stride[MAX_VBS]; - uint32_t state_sf[GEN8_3DSTATE_SF_length]; - uint32_t state_vf[GEN8_3DSTATE_VF_length]; - uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; - uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - uint32_t cs_thread_width_max; uint32_t cs_right_mask; + + struct { + uint32_t sf[GEN8_3DSTATE_SF_length]; + uint32_t vf[GEN8_3DSTATE_VF_length]; + uint32_t raster[GEN8_3DSTATE_RASTER_length]; + uint32_t wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + } gen8; }; struct anv_graphics_pipeline_create_info { diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 68d6d4a42d3..6d5004a1ca2 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -97,19 +97,19 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_sf, - pipeline->state_sf); + cmd_buffer->state.rs_state->gen8.sf, + pipeline->gen8.sf); anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->state_raster, - pipeline->state_raster); + cmd_buffer->state.rs_state->gen8.raster, + pipeline->gen8.raster); } if (cmd_buffer->state.ds_state && (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY))) { anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.ds_state->state_wm_depth_stencil, - pipeline->state_wm_depth_stencil); + cmd_buffer->state.ds_state->gen8.wm_depth_stencil, + pipeline->gen8.wm_depth_stencil); } if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | @@ -117,16 +117,16 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) struct anv_state state; if (cmd_buffer->state.ds_state == NULL) state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.cb_state->state_color_calc, + cmd_buffer->state.cb_state->color_calc_state, GEN8_COLOR_CALC_STATE_length, 64); else if (cmd_buffer->state.cb_state == NULL) state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.ds_state->gen8.color_calc_state, GEN8_COLOR_CALC_STATE_length, 64); else state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->state_color_calc, - cmd_buffer->state.cb_state->state_color_calc, + cmd_buffer->state.ds_state->gen8.color_calc_state, + cmd_buffer->state.cb_state->color_calc_state, GEN8_COLOR_CALC_STATE_length, 64); anv_batch_emit(&cmd_buffer->batch, @@ -138,7 +138,7 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.state_vf, pipeline->state_vf); + cmd_buffer->state.state_vf, pipeline->gen8.vf); } cmd_buffer->state.vb_dirty &= ~vb_emit; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 220317c2d48..a993552cfd3 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -124,7 +124,7 @@ emit_ia_state(struct anv_pipeline *pipeline, GEN8_3DSTATE_VF_header, .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, }; - GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); + GEN8_3DSTATE_VF_pack(NULL, pipeline->gen8.vf, &vf); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, .PrimitiveTopologyType = topology); @@ -165,7 +165,7 @@ emit_rs_state(struct anv_pipeline *pipeline, /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + GEN8_3DSTATE_SF_pack(NULL, pipeline->gen8.sf, &sf); struct GEN8_3DSTATE_RASTER raster = { GEN8_3DSTATE_RASTER_header, @@ -177,8 +177,6 @@ emit_rs_state(struct anv_pipeline *pipeline, .ViewportZClipTestEnable = info->depthClipEnable }; - GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, .ForceVertexURBEntryReadLength = false, .ForceVertexURBEntryReadOffset = false, @@ -186,6 +184,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs); + GEN8_3DSTATE_RASTER_pack(NULL, pipeline->gen8.raster, &raster); } static void @@ -311,8 +310,8 @@ emit_ds_state(struct anv_pipeline *pipeline, /* We're going to OR this together with the dynamic state. We need * to make sure it's initialized to something useful. */ - memset(pipeline->state_wm_depth_stencil, 0, - sizeof(pipeline->state_wm_depth_stencil)); + memset(pipeline->gen8.wm_depth_stencil, 0, + sizeof(pipeline->gen8.wm_depth_stencil)); return; } @@ -335,7 +334,7 @@ emit_ds_state(struct anv_pipeline *pipeline, .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->gen8.wm_depth_stencil, &wm_depth_stencil); } VkResult diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 736a1d9455f..f035baabf74 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -49,7 +49,7 @@ VkResult gen8_CreateDynamicRasterState( .LineWidth = pCreateInfo->lineWidth, }; - GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + GEN8_3DSTATE_SF_pack(NULL, state->gen8.sf, &sf); bool enable_bias = pCreateInfo->depthBias != 0.0f || pCreateInfo->slopeScaledDepthBias != 0.0f; @@ -62,7 +62,7 @@ VkResult gen8_CreateDynamicRasterState( .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp }; - GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); + GEN8_3DSTATE_RASTER_pack(NULL, state->gen8.raster, &raster); *pState = anv_dynamic_rs_state_to_handle(state); @@ -506,7 +506,7 @@ VkResult gen8_CreateDynamicDepthStencilState( .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->gen8.wm_depth_stencil, &wm_depth_stencil); struct GEN8_COLOR_CALC_STATE color_calc_state = { @@ -514,7 +514,7 @@ VkResult gen8_CreateDynamicDepthStencilState( .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef }; - GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + GEN8_COLOR_CALC_STATE_pack(NULL, state->gen8.color_calc_state, &color_calc_state); *pState = anv_dynamic_ds_state_to_handle(state); -- cgit v1.2.3 From 9c752b5b38305acaf863fc0165565fcf6f8f390a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:53:54 -0700 Subject: vk: Move generic pipeline init to anv_pipeline.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This logic will be shared between multiple gens. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_pipeline.c | 105 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 18 ++++++++ src/vulkan/gen8_pipeline.c | 76 +++++--------------------------- 3 files changed, 134 insertions(+), 65 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 9a96387795d..02343e489d1 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -170,6 +170,111 @@ VkResult anv_DestroyPipeline( return VK_SUCCESS; } +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 +}; + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra) +{ + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + VkResult result; + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + pipeline->shaders[pCreateInfo->pStages[i].stage] = + anv_shader_from_handle(pCreateInfo->pStages[i].shader); + } + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pViewportState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + + anv_compiler_run(device->compiler, pipeline); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + return VK_SUCCESS; +} + VkResult anv_graphics_pipeline_create( VkDevice _device, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 4d30bcb68ff..2da4c414b41 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -858,12 +858,19 @@ struct anv_pipeline { uint32_t vs_simd8; uint32_t ps_simd8; uint32_t ps_simd16; + uint32_t ps_ksp0; + uint32_t ps_ksp2; + uint32_t ps_grf_start0; + uint32_t ps_grf_start2; uint32_t gs_vec4; uint32_t gs_vertex_count; uint32_t cs_simd; uint32_t vb_used; uint32_t binding_stride[MAX_VBS]; + bool instancing_enable[MAX_VBS]; + bool primitive_restart; + uint32_t topology; uint32_t cs_thread_width_max; uint32_t cs_right_mask; @@ -884,12 +891,23 @@ struct anv_graphics_pipeline_create_info { bool use_rectlist; }; +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra); + VkResult anv_graphics_pipeline_create(VkDevice device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, VkPipeline *pPipeline); +VkResult +gen7_graphics_pipeline_create(VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline); + VkResult gen8_graphics_pipeline_create(VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index a993552cfd3..9060b3c744a 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -35,29 +35,6 @@ emit_vertex_input(struct anv_pipeline *pipeline, { const uint32_t num_dwords = 1 + info->attributeCount * 2; uint32_t *p; - bool instancing_enable[32]; - - pipeline->vb_used = 0; - for (uint32_t i = 0; i < info->bindingCount; i++) { - const VkVertexInputBindingDescription *desc = - &info->pVertexBindingDescriptions[i]; - - pipeline->vb_used |= 1 << desc->binding; - pipeline->binding_stride[desc->binding] = desc->strideInBytes; - - /* Step rate is programmed per vertex element (attribute), not - * binding. Set up a map of which bindings step per instance, for - * reference by vertex element setup. */ - switch (desc->stepRate) { - default: - case VK_VERTEX_INPUT_STEP_RATE_VERTEX: - instancing_enable[desc->binding] = false; - break; - case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: - instancing_enable[desc->binding] = true; - break; - } - } p = anv_batch_emitn(&pipeline->batch, num_dwords, GEN8_3DSTATE_VERTEX_ELEMENTS); @@ -81,7 +58,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, - .InstancingEnable = instancing_enable[desc->binding], + .InstancingEnable = pipeline->instancing_enable[desc->binding], .VertexElementIndex = i, /* Vulkan so far doesn't have an instance divisor, so * this is always 1 (ignored if not instancing). */ @@ -102,32 +79,14 @@ emit_ia_state(struct anv_pipeline *pipeline, const VkPipelineInputAssemblyStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 - }; - uint32_t topology = vk_to_gen_primitive_type[info->topology]; - - if (extra && extra->use_rectlist) - topology = _3DPRIM_RECTLIST; - struct GEN8_3DSTATE_VF vf = { GEN8_3DSTATE_VF_header, - .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart }; GEN8_3DSTATE_VF_pack(NULL, pipeline->gen8.vf, &vf); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, - .PrimitiveTopologyType = topology); + .PrimitiveTopologyType = pipeline->topology); } static void @@ -356,6 +315,10 @@ gen8_graphics_pipeline_create( if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + result = anv_pipeline_init(pipeline, device, pCreateInfo, extra); + if (result != VK_SUCCESS) + return result; + pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); @@ -564,26 +527,9 @@ gen8_graphics_pipeline_create( .UserClipDistanceCullTestEnableBitmask = 0); const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - uint32_t ksp0, ksp2, grf_start0, grf_start2; - - ksp2 = 0; - grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - ksp0 = pipeline->ps_simd8; - grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - ksp2 = pipeline->ps_simd16; - grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; - } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - ksp0 = pipeline->ps_simd16; - grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; - } else { - unreachable("no ps shader"); - } anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, - .KernelStartPointer0 = ksp0, + .KernelStartPointer0 = pipeline->ps_ksp0, .SingleProgramFlow = false, .VectorMaskEnable = true, @@ -600,12 +546,12 @@ gen8_graphics_pipeline_create( ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, ._32PixelDispatchEnable = false, - .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, .KernelStartPointer1 = 0, - .KernelStartPointer2 = ksp2); + .KernelStartPointer2 = pipeline->ps_ksp2); bool per_sample_ps = false; anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, -- cgit v1.2.3 From 891995e55bbadf75699e659c9d5ded24419e3ad3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:55:08 -0700 Subject: vk: Move 3DSTATE_SBE setup to just before 3DSTATE_PS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a more logical place for it, between geometry front end state and pixel backend state. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/gen8_pipeline.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 9060b3c744a..bd179fdc845 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -136,13 +136,6 @@ emit_rs_state(struct anv_pipeline *pipeline, .ViewportZClipTestEnable = info->depthClipEnable }; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - pipeline->wm_prog_data.num_varying_inputs); - GEN8_3DSTATE_RASTER_pack(NULL, pipeline->gen8.raster, &raster); } @@ -528,6 +521,13 @@ gen8_graphics_pipeline_create( const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + wm_prog_data->num_varying_inputs); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, .KernelStartPointer0 = pipeline->ps_ksp0, -- cgit v1.2.3 From f1455ffac78b1369d9c4187b7f1d36c2d96e0bab Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 20 Aug 2015 22:59:19 -0700 Subject: vk: Add gen7 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With all the previous commits in place, we can now drop in support for multiple platforms. First up is gen7 (Ivybridge). Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/Makefile.am | 5 +- src/vulkan/anv_cmd_buffer.c | 19 +- src/vulkan/anv_compiler.cpp | 10 +- src/vulkan/anv_device.c | 3 + src/vulkan/anv_image.c | 24 +- src/vulkan/anv_pipeline.c | 4 + src/vulkan/anv_private.h | 49 ++++ src/vulkan/gen7_cmd_buffer.c | 647 +++++++++++++++++++++++++++++++++++++++++++ src/vulkan/gen7_pipeline.c | 595 +++++++++++++++++++++++++++++++++++++++ src/vulkan/gen7_state.c | 455 ++++++++++++++++++++++++++++++ src/vulkan/gen8_state.c | 20 -- 11 files changed, 1798 insertions(+), 33 deletions(-) create mode 100644 src/vulkan/gen7_cmd_buffer.c create mode 100644 src/vulkan/gen7_pipeline.c create mode 100644 src/vulkan/gen7_state.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 2359ffeeff1..89880b77c01 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -75,7 +75,10 @@ VULKAN_SOURCES = \ anv_x11.c \ gen8_state.c \ gen8_cmd_buffer.c \ - gen8_pipeline.c + gen8_pipeline.c \ + gen7_state.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c libvulkan_la_SOURCES = \ $(VULKAN_SOURCES) \ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 6cb98a21ca4..033c7872aaf 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -55,6 +55,8 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->vp_state = NULL; state->rs_state = NULL; state->ds_state = NULL; + + state->gen7.index_buffer = NULL; } VkResult anv_CreateCommandBuffer( @@ -141,6 +143,8 @@ void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { switch (cmd_buffer->device->info.gen) { + case 7: + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); default: @@ -324,11 +328,15 @@ static void add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, struct anv_state state, struct anv_bo *bo, uint32_t offset) { - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, state.offset + 8 * 4, bo, offset); + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + *(uint32_t *)(state.map + dword * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, state.offset + dword * 4, bo, offset); } VkResult @@ -610,6 +618,9 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass) { switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; case 8: gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); break; diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 258abfb52be..2dbf59f991e 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -258,7 +258,14 @@ really_do_vs_prog(struct brw_context *brw, return false; } - pipeline->vs_simd8 = upload_kernel(pipeline, program, program_size); + const uint32_t offset = upload_kernel(pipeline, program, program_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } ralloc_free(mem_ctx); @@ -1121,6 +1128,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) } else { memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 27a51129a74..6d2f58603b3 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1330,6 +1330,9 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, uint32_t offset, uint32_t range) { switch (device->info.gen) { + case 7: + gen7_fill_buffer_surface_state(state, format, offset, range); + break; case 8: gen8_fill_buffer_surface_state(state, format, offset, range); break; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 51f2cf5244c..15a736c25bc 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -414,6 +414,9 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { + case 7: + gen7_image_view_init(iview, device, pCreateInfo, cmd_buffer); + break; case 8: gen8_image_view_init(iview, device, pCreateInfo, cmd_buffer); break; @@ -428,15 +431,19 @@ anv_CreateImageView(VkDevice _device, VkImageView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; - switch (device->info.gen) { - case 8: - return gen8_CreateImageView(_device, pCreateInfo, pView); - default: - unreachable("unsupported gen\n"); - } -} + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_image_view_to_handle(view); + return VK_SUCCESS; +} VkResult anv_DestroyImageView(VkDevice _device, VkImageView _iview) @@ -484,6 +491,9 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { + case 7: + gen7_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + break; case 8: gen8_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); break; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 02343e489d1..39fcd235fa4 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -285,6 +285,8 @@ anv_graphics_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); switch (device->info.gen) { + case 7: + return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); case 8: return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); default: @@ -325,6 +327,8 @@ static VkResult anv_compute_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); switch (device->info.gen) { + case 7: + return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); case 8: return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 2da4c414b41..5d5ab462d1b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -550,6 +550,12 @@ __gen_combine_address(struct anv_batch *batch, void *location, VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ } while (0) +static const struct GEN7_MEMORY_OBJECT_CONTROL_STATE GEN7_MOCS = { + .GraphicsDataTypeGFDT = 0, + .LLCCacheabilityControlLLCCC = 0, + .L3CacheabilityControlL3CC = 0 +}; + #define GEN8_MOCS { \ .MemoryTypeLLCeLLCCacheabilityControl = WB, \ .TargetCache = L3DefertoPATforLLCeLLCselection, \ @@ -569,6 +575,10 @@ struct anv_dynamic_vp_state { }; struct anv_dynamic_rs_state { + struct { + uint32_t sf[GEN7_3DSTATE_SF_length]; + } gen7; + struct { uint32_t sf[GEN8_3DSTATE_SF_length]; uint32_t raster[GEN8_3DSTATE_RASTER_length]; @@ -576,6 +586,11 @@ struct anv_dynamic_rs_state { }; struct anv_dynamic_ds_state { + struct { + uint32_t depth_stencil_state[GEN7_DEPTH_STENCIL_STATE_length]; + uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; + } gen7; + struct { uint32_t wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; @@ -689,6 +704,12 @@ struct anv_cmd_state { uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set_binding descriptors[MAX_SETS]; + + struct { + struct anv_buffer * index_buffer; + uint32_t index_type; + uint32_t index_offset; + } gen7; }; struct anv_cmd_pool { @@ -793,10 +814,14 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + void gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); @@ -856,6 +881,7 @@ struct anv_pipeline { struct anv_state_stream program_stream; struct anv_state blend_state; uint32_t vs_simd8; + uint32_t vs_vec4; uint32_t ps_simd8; uint32_t ps_simd16; uint32_t ps_ksp0; @@ -875,6 +901,11 @@ struct anv_pipeline { uint32_t cs_thread_width_max; uint32_t cs_right_mask; + struct { + uint32_t sf[GEN7_3DSTATE_SF_length]; + uint32_t depth_stencil_state[GEN7_DEPTH_STENCIL_STATE_length]; + } gen7; + struct { uint32_t sf[GEN8_3DSTATE_SF_length]; uint32_t vf[GEN8_3DSTATE_VF_length]; @@ -914,6 +945,11 @@ gen8_graphics_pipeline_create(VkDevice _device, const struct anv_graphics_pipeline_create_info *extra, VkPipeline *pPipeline); VkResult +gen7_compute_pipeline_create(VkDevice _device, + const VkComputePipelineCreateInfo *pCreateInfo, + VkPipeline *pPipeline); + +VkResult gen8_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); @@ -1080,6 +1116,12 @@ void anv_image_view_init(struct anv_image_view *view, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void +gen7_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + void gen8_image_view_init(struct anv_image_view *iview, struct anv_device *device, @@ -1091,6 +1133,11 @@ void anv_color_attachment_view_init(struct anv_color_attachment_view *view, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + void gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, @@ -1104,6 +1151,8 @@ void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, uint32_t offset, uint32_t range); +void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range); void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c new file mode 100644 index 00000000000..74fc60fa84c --- /dev/null +++ b/src/vulkan/gen7_cmd_buffer.c @@ -0,0 +1,647 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + + +void +gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN7_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN7_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateAccessUpperBound = { scratch_bo, scratch_bo->size }, + .GeneralStateAccessUpperBoundModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN7_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN7_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateAccessUpperBound = { &device->dynamic_state_block_pool.bo, + device->dynamic_state_block_pool.bo.size }, + .DynamicStateAccessUpperBoundModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN7_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + + .IndirectObjectAccessUpperBound = { NULL, 0xffffffff }, + .IndirectObjectAccessUpperBoundModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN7_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionAccessUpperBound = { &device->instruction_block_pool.bo, + device->instruction_block_pool.bo.size }, + .InstructionAccessUpperBoundModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); +} + +static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, +}; + +void gen7_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + cmd_buffer->state.gen7.index_buffer = buffer; + cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; + cmd_buffer->state.gen7.index_offset = offset; +} + +static VkResult +gen7_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + /* FIXME: figure out descriptors for gen7 */ + result = gen7_flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN7_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN7_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, + .InstanceDataStepRate = 1 + }; + + GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty) + anv_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & + (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->gen7.sf, + pipeline->gen7.sf); + } + + if (cmd_buffer->state.dirty & + (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + pipeline->gen7.depth_stencil_state, + GEN7_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->gen7.depth_stencil_state, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = state.offset); + } + + if (cmd_buffer->state.dirty & + (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->color_calc_state, + GEN7_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->gen7.color_calc_state, + GEN7_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->gen7.color_calc_state, + cmd_buffer->state.cb_state->color_calc_state, + GEN7_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset); + } + + if (cmd_buffer->state.gen7.index_buffer && + cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gen7.index_offset; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + .CutIndexEnable = pipeline->primitive_restart, + .IndexFormat = cmd_buffer->state.gen7.index_type, + .MemoryObjectControlState = GEN7_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void gen7_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void gen7_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +static void +gen7_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void gen7_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + gen7_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void gen7_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} + +void gen7_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + gen7_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void gen7_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen7_cmd_buffer_flush_compute_state(cmd_buffer); + + gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); +} + +void gen7_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + stub(); +} + +void +gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN7_MOCS, + .RenderTargetViewExtent = 1 - 1); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, + .StencilBufferObjectControlState = GEN7_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); +} + +static void +begin_render_pass(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo* pRenderPassBegin) +{ + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); +} + +void gen7_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + + begin_render_pass(cmd_buffer, pRenderPassBegin); + + gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); +} + +void gen7_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} + +void gen7_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c new file mode 100644 index 00000000000..7a54d7eebe4 --- /dev/null +++ b/src/vulkan/gen7_pipeline.c @@ -0,0 +1,595 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +static void +gen7_emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) +{ + const bool sgvs = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid; + const uint32_t element_count = info->attributeCount + (sgvs ? 1 : 0); + const uint32_t num_dwords = 1 + element_count * 2; + uint32_t *p; + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN7_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN7_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format->surface_format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + } + + if (sgvs) { + struct GEN7_VERTEX_ELEMENT_STATE element = { + .Valid = true, + /* FIXME: Do we need to provide the base vertex as component 0 here + * to support the correct base vertex ID? */ + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID + }; + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + info->attributeCount * 2], &element); + } +} + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise +}; + +static void +gen7_emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + + /* FIXME: Get this from pass info */ + .DepthBufferSurfaceFormat = D24_UNORM_X8_UINT, + + /* LegacyGlobalDepthBiasEnable */ + + .StatisticsEnable = true, + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ViewTransformEnable = !(extra && extra->disable_viewport), + .FrontWinding = vk_to_gen_front_face[info->frontFace], + /* bool AntiAliasingEnable; */ + + .CullMode = vk_to_gen_cullmode[info->cullMode], + + /* uint32_t LineEndCapAntialiasingRegionWidth; */ + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + + /* uint32_t MultisampleRasterizationMode; */ + /* bool LastPixelEnable; */ + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + + /* uint32_t AALineDistanceMode; */ + /* uint32_t VertexSubPixelPrecisionSelect; */ + .UsePointWidthState = !pipeline->writes_point_size, + .PointWidth = 1.0, + }; + + GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); +} + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static void +gen7_emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen7.depth_stencil_state, 0, + sizeof(pipeline->gen7.depth_stencil_state)); + return; + } + + bool has_stencil = false; /* enable if subpass has stencil? */ + + struct GEN7_DEPTH_STENCIL_STATE state = { + /* Is this what we need to do? */ + .StencilBufferWriteEnable = has_stencil, + + .StencilTestEnable = info->stencilTestEnable, + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + + .DoubleSidedStencilEnable = true, + + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + + .DepthTestEnable = info->depthTestEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DepthBufferWriteEnable = info->depthWriteEnable, + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); +} + +static void +gen7_emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + + uint32_t num_dwords = GEN7_BLEND_STATE_length; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN7_BLEND_STATE blend_state = { + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + +#if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +#endif + + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +#if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +#endif + }; + + GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 +}; + +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 1024); +} + +VkResult +gen7_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, pCreateInfo, extra); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + gen7_emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + + assert(pCreateInfo->pRasterState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + + gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + + const VkPipelineRasterStateCreateInfo *rs_info = pCreateInfo->pRasterState; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + .CullMode = vk_to_gen_cullmode[rs_info->cullMode], + .ClipEnable = true, + .APIMode = APIMODE_OGL, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ClipMode = CLIPMODE_NORMAL, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + .PixelLocation = PIXLOC_CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xff); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* The last geometry producing stage will set urb_offset and urb_length, + * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ + uint32_t urb_offset = 1; + uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; + +#if 0 + /* From gen7_vs_state.c */ + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); +#endif + + if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VS, .VSFunctionEnable = false); + else + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_vec4, + .ScratchSpaceBaseOffset = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterforURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = true, + .VSFunctionEnable = true); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + + if (pipeline->gs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) { + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_GS, .GSEnable = false); + } else { + urb_offset = 1; + urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_GS, + .KernelStartPointer = pipeline->gs_vec4, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterforURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads - 1, + /* This in the next dword on HSW. */ + .ControlDataFormat = gs_prog_data->control_data_format, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .InstanceControl = gs_prog_data->invocations - 1, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .GSStatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderEnable = true, + .GSEnable = true); + } + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PS, + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, + + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, + + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + +#if 0 + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ +#endif + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult gen7_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = gen7_graphics_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +VkResult gen7_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + anv_finishme("primitive_id needs sbe swizzling setup"); + + return vk_error(VK_ERROR_UNAVAILABLE); +} diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c new file mode 100644 index 00000000000..4cab54f2d36 --- /dev/null +++ b/src/vulkan/gen7_state.c @@ -0,0 +1,455 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +void +gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range) +{ + /* This assumes RGBA float format. */ + + uint32_t stride = 16; /* Depends on whether accessing shader is simd8 or + * vec4. Will need one of each for buffers that are + * used in both vec4 and simd8. */ + + uint32_t num_elements = range / stride; + + struct GEN7_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceFormat = format->surface_format, + .SurfaceVerticalAlignment = VALIGN_4, + .SurfaceHorizontalAlignment = HALIGN_4, + .TiledSurface = false, + .RenderCacheReadWriteMode = WriteOnlyCache, + .SurfaceObjectControlState = GEN7_MOCS, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .SurfaceBaseAddress = { NULL, offset }, + }; + + GEN7_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult gen7_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer_view *view; + VkResult result; + + result = anv_buffer_view_create(device, pCreateInfo, &view); + if (result != VK_SUCCESS) + return result; + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + gen7_fill_buffer_surface_state(view->view.surface_state.map, format, + view->view.offset, pCreateInfo->range); + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +VkResult gen7_CreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + + struct GEN7_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = 0, + + .BorderColorPointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .MaximumAnisotropy = max_anisotropy, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + }; + + GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} +VkResult gen7_CreateDynamicRasterState( + VkDevice _device, + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN7_3DSTATE_SF_pack(NULL, state->gen7.sf, &sf); + + *pState = anv_dynamic_rs_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult gen7_CreateDynamicDepthStencilState( + VkDevice _device, + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil_state = { + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, state->gen7.depth_stencil_state, + &depth_stencil_state); + + struct GEN7_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN7_COLOR_CALC_STATE_pack(NULL, state->gen7.color_calc_state, &color_calc_state); + + *pState = anv_dynamic_ds_state_to_handle(state); + + return VK_SUCCESS; +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t anv_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; + +void +gen7_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface_view *view = &iview->view; + struct anv_surface *surface; + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + const struct anv_image_view_info *view_type_info = + anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); + + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + anv_finishme("stencil image views"); + abort(); + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + surface = &image->primary_surface; + break; + default: + unreachable(""); + break; + } + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = anv_format_for_vk_format(pCreateInfo->format); + + iview->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + + uint32_t depth = 1; + if (range->arraySize > 1) { + depth = range->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + struct GEN7_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = view_type_info->surface_type, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TiledSurface = surface->tile_mode > LINEAR, + .TileWalk = surface->tile_mode == XMAJOR ? TILEWALK_XMAJOR : TILEWALK_YMAJOR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .RenderCacheReadWriteMode = false, + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = range->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GEN7_MOCS, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .MIPCountLOD = range->mipLevels - 1, + .SurfaceMinLOD = range->baseMipLevel, + + .MCSEnable = false, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + GEN7_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +void +gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface_view *view = &aview->view; + struct anv_surface *surface = &image->primary_surface; + + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->mipLevel < image->levels); + anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = anv_format_for_vk_format(pCreateInfo->format); + + aview->base.extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), + .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), + .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), + }; + + uint32_t depth = 1; + if (pCreateInfo->arraySize > 1) { + depth = pCreateInfo->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + struct GEN7_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = view->format->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TiledSurface = surface->tile_mode > LINEAR, + .TileWalk = surface->tile_mode == XMAJOR ? TILEWALK_XMAJOR : TILEWALK_YMAJOR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .RenderCacheReadWriteMode = WriteOnlyCache, + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = pCreateInfo->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GEN7_MOCS, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .SurfaceMinLOD = 0, + .MIPCountLOD = pCreateInfo->mipLevel, + + .MCSEnable = false, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + + }; + + GEN7_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index f035baabf74..0d65f169231 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -261,26 +261,6 @@ gen8_image_view_init(struct anv_image_view *iview, GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } -VkResult -gen8_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *view; - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_image_view_init(view, device, pCreateInfo, NULL); - - *pView = anv_image_view_to_handle(view); - - return VK_SUCCESS; -} - void gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_device *device, -- cgit v1.2.3 From 6a1098b2c2577b1a1b40c92d47e58d636ba39923 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 25 Aug 2015 10:48:43 -0700 Subject: vk/gen7: Use TILEWALK_XMAJOR for linear surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit You wouldn't think the TileWalk mode matters when TiledSurface is false. However, it has to be TILEWALK_XMAJOR. Make it so. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/gen7_state.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 4cab54f2d36..042c4224c5f 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -322,8 +322,13 @@ gen7_image_view_init(struct anv_image_view *iview, .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + + /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if + * Tiled Surface is False." + */ .TiledSurface = surface->tile_mode > LINEAR, - .TileWalk = surface->tile_mode == XMAJOR ? TILEWALK_XMAJOR : TILEWALK_YMAJOR, + .TileWalk = surface->tile_mode == YMAJOR ? TILEWALK_YMAJOR : TILEWALK_XMAJOR, + .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .RenderCacheReadWriteMode = false, @@ -415,8 +420,13 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, .SurfaceFormat = view->format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + + /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if + * Tiled Surface is False." + */ .TiledSurface = surface->tile_mode > LINEAR, - .TileWalk = surface->tile_mode == XMAJOR ? TILEWALK_XMAJOR : TILEWALK_YMAJOR, + .TileWalk = surface->tile_mode == YMAJOR ? TILEWALK_YMAJOR : TILEWALK_XMAJOR, + .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .RenderCacheReadWriteMode = WriteOnlyCache, -- cgit v1.2.3 From 00e7799c69b3cdc64e7badc670de98103418581c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 25 Aug 2015 15:55:56 -0700 Subject: vk/gen7: Enable L3 caching for GEN7 MOCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do what GL does here. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5d5ab462d1b..f0d4233b046 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -553,7 +553,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, static const struct GEN7_MEMORY_OBJECT_CONTROL_STATE GEN7_MOCS = { .GraphicsDataTypeGFDT = 0, .LLCCacheabilityControlLLCCC = 0, - .L3CacheabilityControlL3CC = 0 + .L3CacheabilityControlL3CC = 1 }; #define GEN8_MOCS { \ -- cgit v1.2.3 From 7e5afa75b5193c188551ed6a98c56691eae6841a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 25 Aug 2015 15:57:12 -0700 Subject: vk: Support descriptor sets and bindings in vec4 ubo loads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Still incomplete, but at least we get the simplest case working. Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 923e2d30a4c..ee353b22330 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -612,17 +612,19 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) has_indirect = true; /* fallthrough */ case nir_intrinsic_load_ubo: { + const uint32_t set = instr->const_index[0]; nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]); src_reg surf_index; dest = get_nir_dest(instr->dest); if (const_block_index) { + uint32_t binding = const_block_index->u[0]; + /* The block index is a constant, so just emit the binding table entry * as an immediate. */ - surf_index = src_reg(prog_data->base.binding_table.ubo_start + - const_block_index->u[0]); + surf_index = src_reg(stage_prog_data->bind_map[set].index[binding]); } else { /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value -- cgit v1.2.3 From 647a60226d974288ca8ff106592f156dbd0b2645 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 25 Aug 2015 15:58:21 -0700 Subject: vk: Use true/false for RenderCacheReadWriteMode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This field in surface state is a bool, WriteOnlyCache is an enum from GEN8. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/gen7_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 042c4224c5f..801716ea884 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -47,7 +47,7 @@ gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, .SurfaceVerticalAlignment = VALIGN_4, .SurfaceHorizontalAlignment = HALIGN_4, .TiledSurface = false, - .RenderCacheReadWriteMode = WriteOnlyCache, + .RenderCacheReadWriteMode = false, .SurfaceObjectControlState = GEN7_MOCS, .Height = (num_elements >> 7) & 0x3fff, .Width = num_elements & 0x7f, -- cgit v1.2.3 From 5360edcb304e147341b934567f3bbf40e9d5a3b5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 25 Aug 2015 16:14:59 -0700 Subject: vk/vec4: Use the right constant for offset into a UBO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were using constant 0, which is the set. Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index ee353b22330..fd3d556bfb6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -644,7 +644,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) shader_prog->NumUniformBlocks - 1); } - unsigned const_offset = instr->const_index[0]; + unsigned const_offset = instr->const_index[1]; src_reg offset; if (!has_indirect) { -- cgit v1.2.3 From 4bb9915755f6066f24b826cb323739dbebe7dba5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 25 Aug 2015 18:32:56 -0700 Subject: vk/gen8: Don't duplicate generic pipeline setup gen8_graphics_pipeline_create had a bunch of stuff in it that's already set up by anv_pipeline_init. The duplication was causing double-initialization of a state stream and made valgrind very angry. --- src/vulkan/anv_pipeline.c | 3 ++- src/vulkan/gen8_pipeline.c | 32 -------------------------------- 2 files changed, 2 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 39fcd235fa4..9372fb318df 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -189,10 +189,11 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra) { + VkResult result; + pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - VkResult result; result = anv_reloc_list_init(&pipeline->batch_relocs, device); if (result != VK_SUCCESS) { diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index bd179fdc845..9e87a6951b6 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -312,38 +312,6 @@ gen8_graphics_pipeline_create( if (result != VK_SUCCESS) return result; - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); - - result = anv_reloc_list_init(&pipeline->batch_relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - pipeline->shaders[pCreateInfo->pStages[i].stage] = - anv_shader_from_handle(pCreateInfo->pStages[i].shader); - } - - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - if (pCreateInfo->pViewportState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); - if (pCreateInfo->pMultisampleState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); - - pipeline->use_repclear = extra && extra->use_repclear; - - anv_compiler_run(device->compiler, pipeline); - /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we * hard code this to num_attributes - 2. This is because the attributes * include VUE header and position, which aren't counted as varying -- cgit v1.2.3 From 74e076bba807884f9bbc14c1b2ad8cf501c7ed1f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 25 Aug 2015 18:51:26 -0700 Subject: vk/meta: Destroy vertex shaders when setting up clearing --- src/vulkan/anv_meta.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 858a3daf11c..cdd357ddcc9 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -187,7 +187,9 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, &device->meta_state.clear.pipeline); + anv_DestroyShaderModule(anv_device_to_handle(device), vsm); anv_DestroyShaderModule(anv_device_to_handle(device), fsm); + anv_DestroyShader(anv_device_to_handle(device), vs); anv_DestroyShader(anv_device_to_handle(device), fs); } -- cgit v1.2.3 From 5446bf352e130ed4c61c33bddf2de7cf7899a5d7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Aug 2015 15:01:38 -0700 Subject: vk: Add initial API support for setting push constants This doesn't add support for actually uploading them, it just ensures that we have and update the shadow copy. --- src/vulkan/anv_cmd_buffer.c | 21 ++++++++++++++++++++- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_private.h | 13 +++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 033c7872aaf..a2a2833b62e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -47,10 +47,12 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->ds_state = NULL; memset(&state->state_vf, 0, sizeof(state->state_vf)); memset(&state->descriptors, 0, sizeof(state->descriptors)); + memset(&state->push_constants, 0, sizeof(state->push_constants)); state->dirty = 0; state->vb_dirty = 0; state->descriptors_dirty = 0; + state->push_constants_dirty = 0; state->pipeline = NULL; state->vp_state = NULL; state->rs_state = NULL; @@ -210,12 +212,14 @@ void anv_CmdBindPipeline( case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer->state.compute_pipeline = pipeline; cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; break; case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer->state.pipeline = pipeline; cmd_buffer->state.vb_dirty |= pipeline->vb_used; cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; break; default: @@ -665,7 +669,22 @@ void anv_CmdPushConstants( uint32_t length, const void* values) { - stub(); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t stage; + + for_each_bit(stage, stageFlags) { + if (cmd_buffer->state.push_constants[stage].data == NULL) { + cmd_buffer->state.push_constants[stage].data = + anv_device_alloc(cmd_buffer->device, + sizeof(struct anv_push_constant_data), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + } + + memcpy(cmd_buffer->state.push_constants[stage].data->client_data + start, + values, length); + } + + cmd_buffer->state.push_constants_dirty |= stageFlags; } void anv_CmdExecuteCommands( diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 6d2f58603b3..57b2681a2d1 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -289,7 +289,7 @@ VkResult anv_GetPhysicalDeviceLimits( .maxTexelBufferSize = (1 << 14), .maxUniformBufferSize = UINT32_MAX, .maxStorageBufferSize = UINT32_MAX, - .maxPushConstantsSize = 128, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = UINT32_MAX, .bufferImageGranularity = 64, /* A cache line */ .maxBoundDescriptorSets = MAX_SETS, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f0d4233b046..cb8defd23b6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -642,6 +642,7 @@ anv_descriptor_set_destroy(struct anv_device *device, #define MAX_VBS 32 #define MAX_SETS 8 #define MAX_RTS 8 +#define MAX_PUSH_CONSTANTS_SIZE 128 struct anv_pipeline_layout { struct { @@ -684,6 +685,16 @@ struct anv_descriptor_set_binding { uint32_t dynamic_offsets[128]; }; +struct anv_push_constant_data { + uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; + uint8_t driver_data[0]; +}; + +struct anv_push_constants { + uint32_t driver_data_size; + struct anv_push_constant_data *data; +}; + /** State required while building cmd buffer */ struct anv_cmd_state { uint32_t current_pipeline; @@ -691,6 +702,7 @@ struct anv_cmd_state { uint32_t dirty; uint32_t compute_dirty; uint32_t descriptors_dirty; + uint32_t push_constants_dirty; uint32_t scratch_size; struct anv_pipeline * pipeline; struct anv_pipeline * compute_pipeline; @@ -704,6 +716,7 @@ struct anv_cmd_state { uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set_binding descriptors[MAX_SETS]; + struct anv_push_constants push_constants[VK_SHADER_STAGE_NUM]; struct { struct anv_buffer * index_buffer; -- cgit v1.2.3 From 33cabeab01bb18c7ea9006419186c76a97c91b43 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Aug 2015 16:20:34 -0700 Subject: vk/compiler: Add a helper for setting up prog_data->param This new helper sets it up the way we'll want for handling push constants. --- src/vulkan/anv_compiler.cpp | 113 ++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 4cbf98afa1e..c0aed33bd12 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -115,6 +115,50 @@ upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) return state.offset; } +static void +create_params_array(struct anv_device *device, + struct gl_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + unsigned num_client_params; + if (shader->num_uniform_components) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + num_client_params = MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } else { + num_client_params = 0; + } + + /* We'll need to add space here for images, texture rectangle, uniform + * offsets, etc. + */ + unsigned num_driver_params = 0; + + unsigned num_total_params = num_client_params + num_driver_params; + + if (num_total_params == 0) + return; + + prog_data->param = (const gl_constant_value **) + anv_device_alloc(device, num_total_params * sizeof(gl_constant_value *), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constant_data *null_data = NULL; + for (unsigned i = 0; i < num_client_params; i++) + prog_data->param[i] = + (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; + + for (unsigned i = 0; i < num_driver_params; i++) + prog_data->param[num_client_params + i] = + (const gl_constant_value *)&null_data->driver_data[i * sizeof(float)]; +} + static void brw_vs_populate_key(struct brw_context *brw, struct brw_vertex_program *vp, @@ -178,34 +222,7 @@ really_do_vs_prog(struct brw_context *brw, mem_ctx = ralloc_context(NULL); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count; - if (vs) { - /* We add padding around uniform values below vec4 size, with the worst - * case being a float value that gets blown up to a vec4, so be - * conservative here. - */ - param_count = vs->num_uniform_components * 4; - - } else { - param_count = vp->program.Base.Parameters->NumParameters * 4; - } - /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip - * planes as uniforms. - */ - param_count += key->base.nr_userclip_plane_consts * 4; - - /* Setting nr_params here NOT to the size of the param and pull_param - * arrays, but to the number of uniform components vec4_visitor - * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. - */ - stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; - if (vs) { - stage_prog_data->nr_params += vs->num_samplers; - } + create_params_array(pipeline->device, vs, stage_prog_data); GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data->inputs_read = vp->program.Base.InputsRead; @@ -476,7 +493,6 @@ really_do_wm_prog(struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) { - struct gl_context *ctx = &brw->ctx; void *mem_ctx = ralloc_context(NULL); struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; struct gl_shader *fs = NULL; @@ -495,23 +511,7 @@ really_do_wm_prog(struct brw_context *brw, prog_data->computed_depth_mode = computed_depth_mode(&fp->program); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count; - if (fs) { - param_count = fs->num_uniform_components; - } else { - param_count = fp->program.Base.Parameters->NumParameters * 4; - } - /* The backend also sometimes adds params for texture size. */ - param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; - prog_data->base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.nr_params = param_count; + create_params_array(pipeline->device, fs, &prog_data->base); prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(brw, key->flat_shade, @@ -605,7 +605,6 @@ brw_codegen_cs_prog(struct brw_context *brw, struct brw_compute_program *cp, struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) { - struct gl_context *ctx = &brw->ctx; const GLuint *program; void *mem_ctx = ralloc_context(NULL); GLuint program_size; @@ -618,19 +617,7 @@ brw_codegen_cs_prog(struct brw_context *brw, set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count = cs->num_uniform_components; - - /* The backend also sometimes adds params for texture size. */ - param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - prog_data->base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data->base.nr_params = param_count; + create_params_array(pipeline->device, cs, &prog_data->base); program = brw_cs_emit(brw, mem_ctx, key, prog_data, &cp->program, prog, &program_size); @@ -1203,8 +1190,10 @@ anv_compiler_free(struct anv_pipeline *pipeline) for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { if (pipeline->prog_data[stage]) { free(pipeline->prog_data[stage]->map_entries); - ralloc_free(pipeline->prog_data[stage]->param); - ralloc_free(pipeline->prog_data[stage]->pull_param); + /* We only ever set up the params array because we don't do + * non-UBO pull constants + */ + anv_device_free(pipeline->device, pipeline->prog_data[stage]->param); } } } -- cgit v1.2.3 From efc2cce01f7e938932b03e5e14f7ea0b8a63f734 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Aug 2015 17:55:45 -0700 Subject: HACK: Don't call nir_setup_uniforms We're doing our own uniform setup and we don't need to call into the entire GL stack to mess with things. --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6272b61a98b..cd2b850581e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -42,7 +42,8 @@ fs_visitor::emit_nir_code() */ nir_setup_inputs(nir); nir_setup_outputs(nir); - nir_setup_uniforms(nir); + uniforms = nir->num_uniforms; + //nir_setup_uniforms(nir); nir_emit_system_values(nir); /* get the main function and emit it */ -- cgit v1.2.3 From c8365c55f551b357bdee0aace5569e970c6c3758 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Aug 2015 17:56:40 -0700 Subject: vk/cmd_buffer: Set the CONSTANTS_REL_GENERAL flag on execbuf This tells the kernel that the push constant buffers are relative to the dynamic state base address. --- src/vulkan/anv_batch_chain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index af90fb7ae21..77a5bd6f9ec 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -928,7 +928,8 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) .num_cliprects = 0, .DR1 = 0, .DR4 = 0, - .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | + I915_EXEC_CONSTANTS_REL_GENERAL, .rsvd1 = cmd_buffer->device->context_id, .rsvd2 = 0, }; -- cgit v1.2.3 From 375a65d5de218fc3686b6b2bc719acf1c82a410c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Aug 2015 17:58:22 -0700 Subject: vk/private.h: Handle a NULL bo but valid offset in __gen_combine_address --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cb8defd23b6..05799276718 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -505,7 +505,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, const struct anv_address address, uint32_t delta) { if (address.bo == NULL) { - return delta; + return address.offset + delta; } else { assert(batch->start <= location && location < batch->end); -- cgit v1.2.3 From 4e3ee043c0ab68faf5c853dd361e68974b55e8db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Aug 2015 17:57:51 -0700 Subject: vk/gen8: Add support for push constants --- src/vulkan/anv_cmd_buffer.c | 28 ++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 4 ++++ src/vulkan/gen8_cmd_buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index a2a2833b62e..963edcf06ca 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -661,6 +661,34 @@ void anv_CmdWaitEvents( stub(); } +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + VkShaderStage stage) +{ + struct anv_push_constant_data *data = + cmd_buffer->state.push_constants[stage].data; + struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->prog_data[stage]; + + /* If we don't actually have any push constants, bail. */ + if (data == NULL || prog_data->nr_params == 0) + return (struct anv_state) { .offset = 0 }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + prog_data->nr_params * sizeof(float), + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } + + return state; +} + void anv_CmdPushConstants( VkCmdBuffer cmdBuffer, VkPipelineLayout layout, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 05799276718..55affd160d9 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -841,6 +841,10 @@ void gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + VkShaderStage stage); + void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass, const VkClearValue *clear_values); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 6d5004a1ca2..37a53c00156 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -29,6 +29,41 @@ #include "anv_private.h" +static void +gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t stage; + + static const uint32_t push_constant_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 21, + [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 26, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 22, + [VK_SHADER_STAGE_FRAGMENT] = 23, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + uint32_t flushed = 0; + + for_each_bit(stage, cmd_buffer->state.push_constants_dirty) { + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CONSTANT_VS, + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= 1 << stage; + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; +} + static void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { @@ -84,6 +119,9 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.descriptors_dirty) anv_flush_descriptor_sets(cmd_buffer); + if (cmd_buffer->state.push_constants_dirty) + gen8_cmd_buffer_flush_push_constants(cmd_buffer); + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, -- cgit v1.2.3 From c313a989b4354318e6842867af35e58f686978bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 27 Aug 2015 15:24:04 -0700 Subject: spirv: Bump to the public revision 31 --- src/glsl/nir/spirv.h | 41 +++++++++++++++++++++++++++-------------- src/glsl/nir/spirv_to_nir.c | 4 ---- 2 files changed, 27 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index 55bdcbee8b5..ea3799e2d8f 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -1,5 +1,5 @@ /* -** Copyright (c) 2015 The Khronos Group Inc. +** Copyright (c) 2014-2015 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), @@ -53,6 +53,7 @@ typedef unsigned int SpvId; static const unsigned int SpvMagicNumber = 0x07230203; static const unsigned int SpvVersion = 99; +static const unsigned int SpvRevision = 31; static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; @@ -130,7 +131,6 @@ typedef enum SpvStorageClass_ { SpvStorageClassPrivateGlobal = 6, SpvStorageClassFunction = 7, SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, SpvStorageClassAtomicCounter = 10, SpvStorageClassImage = 11, } SpvStorageClass; @@ -245,9 +245,10 @@ typedef enum SpvImageOperandsShift_ { SpvImageOperandsBiasShift = 0, SpvImageOperandsLodShift = 1, SpvImageOperandsGradShift = 2, - SpvImageOperandsOffsetShift = 3, - SpvImageOperandsOffsetsShift = 4, - SpvImageOperandsSampleShift = 5, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, } SpvImageOperandsShift; typedef enum SpvImageOperandsMask_ { @@ -255,9 +256,10 @@ typedef enum SpvImageOperandsMask_ { SpvImageOperandsBiasMask = 0x00000001, SpvImageOperandsLodMask = 0x00000002, SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsOffsetMask = 0x00000008, - SpvImageOperandsOffsetsMask = 0x00000010, - SpvImageOperandsSampleMask = 0x00000020, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, } SpvImageOperandsMask; typedef enum SpvFPFastMathModeShift_ { @@ -302,9 +304,8 @@ typedef enum SpvFunctionParameterAttribute_ { SpvFunctionParameterAttributeSret = 3, SpvFunctionParameterAttributeNoAlias = 4, SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeSVM = 6, - SpvFunctionParameterAttributeNoWrite = 7, - SpvFunctionParameterAttributeNoReadWrite = 8, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, } SpvFunctionParameterAttribute; typedef enum SpvDecoration_ { @@ -355,7 +356,6 @@ typedef enum SpvDecoration_ { typedef enum SpvBuiltIn_ { SpvBuiltInPosition = 0, SpvBuiltInPointSize = 1, - SpvBuiltInClipVertex = 2, SpvBuiltInClipDistance = 3, SpvBuiltInCullDistance = 4, SpvBuiltInVertexId = 5, @@ -525,6 +525,19 @@ typedef enum SpvCapability_ { SpvCapabilityLiteralSampler = 20, SpvCapabilityAtomicStorage = 21, SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageExtendedFormats = 26, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, } SpvCapability; typedef enum SpvOp_ { @@ -740,9 +753,9 @@ typedef enum SpvOp_ { SpvOpAtomicIDecrement = 233, SpvOpAtomicIAdd = 234, SpvOpAtomicISub = 235, - SpvOpAtomicIMin = 236, + SpvOpAtomicSMin = 236, SpvOpAtomicUMin = 237, - SpvOpAtomicIMax = 238, + SpvOpAtomicSMax = 238, SpvOpAtomicUMax = 239, SpvOpAtomicAnd = 240, SpvOpAtomicOr = 241, diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 771637e8912..b09196ccd98 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -636,10 +636,6 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, *location = VARYING_SLOT_PSIZ; *mode = nir_var_shader_out; break; - case SpvBuiltInClipVertex: - *location = VARYING_SLOT_CLIP_VERTEX; - *mode = nir_var_shader_out; - break; case SpvBuiltInClipDistance: *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ *mode = nir_var_shader_in; -- cgit v1.2.3 From 941b48e992ac2272d3034e987a1313f8f5c1ca4f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:08:58 -0700 Subject: vk/image: Let anv_image have one anv_surface per aspect Split anv_image::primary_surface into two: anv_image::color_surface and depth_surface. --- src/vulkan/anv_image.c | 82 +++++++++++++++++++++++++++++++++++------------- src/vulkan/anv_private.h | 29 ++++++++++++++--- src/vulkan/anv_x11.c | 2 +- src/vulkan/gen7_state.c | 21 +++---------- src/vulkan/gen8_state.c | 21 +++---------- 5 files changed, 94 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 15a736c25bc..198f4a40212 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -258,26 +258,28 @@ anv_image_create(VkDevice _device, image->array_size = pCreateInfo->arraySize; image->surf_type = surf_type; - if (likely(!image->format->has_stencil || image->format->depth_format)) { - /* The image's primary surface is a color or depth surface. */ + if (likely(anv_format_is_color(image->format))) { r = anv_image_make_surface(create_info, image->format, &image->size, &image->alignment, - &image->primary_surface); - if (r != VK_SUCCESS) - goto fail; - } - - if (image->format->has_stencil) { - /* From the GPU's perspective, the depth buffer and stencil buffer are - * separate buffers. From Vulkan's perspective, though, depth and - * stencil reside in the same image. To satisfy Vulkan and the GPU, we - * place the depth and stencil buffers in the same bo. - */ - r = anv_image_make_surface(create_info, anv_format_s8_uint, - &image->size, &image->alignment, - &image->stencil_surface); + &image->color_surface); if (r != VK_SUCCESS) goto fail; + } else { + if (image->format->depth_format) { + r = anv_image_make_surface(create_info, image->format, + &image->size, &image->alignment, + &image->depth_surface); + if (r != VK_SUCCESS) + goto fail; + } + + if (image->format->has_stencil) { + r = anv_image_make_surface(create_info, anv_format_s8_uint, + &image->size, &image->alignment, + &image->stencil_surface); + if (r != VK_SUCCESS) + goto fail; + } } *pImage = anv_image_to_handle(image); @@ -462,28 +464,64 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, const VkAttachmentViewCreateInfo *pCreateInfo) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface *depth_surface = &image->primary_surface; - struct anv_surface *stencil_surface = &image->stencil_surface; view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; /* XXX: We don't handle any of these */ + assert(anv_format_is_depth_or_stencil(image->format)); anv_assert(pCreateInfo->mipLevel == 0); anv_assert(pCreateInfo->baseArraySlice == 0); anv_assert(pCreateInfo->arraySize == 1); view->bo = image->bo; - view->depth_stride = depth_surface->stride; - view->depth_offset = image->offset + depth_surface->offset; + view->depth_stride = image->depth_surface.stride; + view->depth_offset = image->offset + image->depth_surface.offset; view->depth_format = image->format->depth_format; view->depth_qpitch = 0; /* FINISHME: QPitch */ - view->stencil_stride = stencil_surface->stride; - view->stencil_offset = image->offset + stencil_surface->offset; + view->stencil_stride = image->stencil_surface.stride; + view->stencil_offset = image->offset + image->stencil_surface.offset; view->stencil_qpitch = 0; /* FINISHME: QPitch */ } +struct anv_surface * +anv_image_get_surface_for_aspect(struct anv_image *image, VkImageAspect aspect) +{ + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR: + assert(anv_format_is_color(image->format)); + return &image->color_surface; + case VK_IMAGE_ASPECT_DEPTH: + assert(image->format->depth_format); + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL: + assert(image->format->has_stencil); + anv_finishme("stencil image views"); + abort(); + return &image->stencil_surface; + default: + unreachable("image does not have aspect"); + return NULL; + } +} + +/** The attachment may be a color view into a non-color image. */ +struct anv_surface * +anv_image_get_surface_for_color_attachment(struct anv_image *image) +{ + if (anv_format_is_color(image->format)) { + return &image->color_surface; + } else if (image->format->depth_format) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } else { + unreachable("image has bad format"); + return NULL; + } +} + void anv_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_device *device, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 55affd160d9..2d037b4cbb3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1063,11 +1063,26 @@ struct anv_image { /** RENDER_SURFACE_STATE.SurfaceType */ uint8_t surf_type; - /** Primary surface is either color or depth. */ - struct anv_surface primary_surface; + /** + * Image subsurfaces + * + * For each foo, anv_image::foo_surface is valid if and only if + * anv_image::format has a foo aspect. + * + * The hardware requires that the depth buffer and stencil buffer be + * separate surfaces. From Vulkan's perspective, though, depth and stencil + * reside in the same VkImage. To satisfy both the hardware and Vulkan, we + * allocate the depth and stencil buffers as separate surfaces in the same + * bo. + */ + union { + struct anv_surface color_surface; - /** Stencil surface is optional. */ - struct anv_surface stencil_surface; + struct { + struct anv_surface depth_surface; + struct anv_surface stencil_surface; + }; + }; }; struct anv_surface_view { @@ -1128,6 +1143,12 @@ VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *info, VkImage *pImage); +struct anv_surface * +anv_image_get_surface_for_aspect(struct anv_image *image, VkImageAspect aspect); + +struct anv_surface * +anv_image_get_surface_for_color_attachment(struct anv_image *image); + void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c index aee5775c9de..f65a86487cb 100644 --- a/src/vulkan/anv_x11.c +++ b/src/vulkan/anv_x11.c @@ -145,7 +145,7 @@ VkResult anv_CreateSwapChainWSI( image = anv_image_from_handle(image_h); assert(anv_format_is_color(image->format)); - surface = &image->primary_surface; + surface = &image->color_surface; anv_AllocMemory(_device, &(VkMemoryAllocInfo) { diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 801716ea884..faf99a5aef8 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -273,7 +273,8 @@ gen7_image_view_init(struct anv_image_view *iview, const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; struct anv_surface_view *view = &iview->view; - struct anv_surface *surface; + struct anv_surface *surface = + anv_image_get_surface_for_aspect(image, range->aspect); const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -284,21 +285,6 @@ gen7_image_view_init(struct anv_image_view *iview, if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); - switch (pCreateInfo->subresourceRange.aspect) { - case VK_IMAGE_ASPECT_STENCIL: - anv_finishme("stencil image views"); - abort(); - break; - case VK_IMAGE_ASPECT_DEPTH: - case VK_IMAGE_ASPECT_COLOR: - view->offset = image->offset; - surface = &image->primary_surface; - break; - default: - unreachable(""); - break; - } - view->bo = image->bo; view->offset = image->offset + surface->offset; view->format = anv_format_for_vk_format(pCreateInfo->format); @@ -381,7 +367,8 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_surface_view *view = &aview->view; - struct anv_surface *surface = &image->primary_surface; + struct anv_surface *surface = + anv_image_get_surface_for_color_attachment(image); aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 0d65f169231..5e79a37f402 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -149,7 +149,8 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; struct anv_surface_view *view = &iview->view; - struct anv_surface *surface; + struct anv_surface *surface = + anv_image_get_surface_for_aspect(image, range->aspect); const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); @@ -160,21 +161,6 @@ gen8_image_view_init(struct anv_image_view *iview, if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); - switch (pCreateInfo->subresourceRange.aspect) { - case VK_IMAGE_ASPECT_STENCIL: - anv_finishme("stencil image views"); - abort(); - break; - case VK_IMAGE_ASPECT_DEPTH: - case VK_IMAGE_ASPECT_COLOR: - view->offset = image->offset; - surface = &image->primary_surface; - break; - default: - unreachable(""); - break; - } - view->bo = image->bo; view->offset = image->offset + surface->offset; view->format = format_info; @@ -269,7 +255,8 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_surface_view *view = &aview->view; - struct anv_surface *surface = &image->primary_surface; + struct anv_surface *surface = + anv_image_get_surface_for_color_attachment(image); const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); -- cgit v1.2.3 From 4461392343b76050180c9d2004d5b617fdbd2e1a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:35:39 -0700 Subject: vk: Remove dummy anv_depth_stencil_view --- src/vulkan/gen7_cmd_buffer.c | 74 ++++++++++++++++++++++++++-------------- src/vulkan/gen8_cmd_buffer.c | 80 ++++++++++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 55 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 74fc60fa84c..6eb5d48c039 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -529,11 +529,8 @@ void gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass) { - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view; + const struct anv_depth_stencil_view *view = NULL; cmd_buffer->state.subpass = subpass; @@ -544,34 +541,59 @@ gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, fb->attachments[subpass->depth_stencil_attachment]; assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); view = (const struct anv_depth_stencil_view *)aview; - } else { - view = &null_view; } - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->state.framebuffer->height - 1, - .Width = cmd_buffer->state.framebuffer->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN7_MOCS, - .RenderTargetViewExtent = 1 - 1); + if (view) { + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN7_MOCS, + .RenderTargetViewExtent = 1 - 1); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, + .StencilBufferObjectControlState = GEN7_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height. + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1); + + /* Disable the stencil buffer. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER); + } /* Disable hierarchial depth buffers. */ anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER); - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, - .StencilBufferObjectControlState = GEN7_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }); - /* Clear the clear params. */ anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 37a53c00156..57f061b9e8f 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -460,50 +460,72 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view; - - static const struct anv_depth_stencil_view null_view = - { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + const struct anv_depth_stencil_view *view = NULL; if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { const struct anv_attachment_view *aview = fb->attachments[subpass->depth_stencil_attachment]; assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); view = (const struct anv_depth_stencil_view *)aview; - } else { - view = &null_view; } /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, - .Height = cmd_buffer->state.framebuffer->height - 1, - .Width = cmd_buffer->state.framebuffer->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN8_MOCS, - .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = view->depth_qpitch >> 2); + if (view) { + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1); + + /* Disable the stencil buffer. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER); + } /* Disable hierarchial depth buffers. */ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, - .StencilBufferEnable = view->stencil_stride > 0, - .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); - /* Clear the clear params. */ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); } -- cgit v1.2.3 From 798acb2464f1a7baf58b0085d020d7b2b3d21de7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:36:28 -0700 Subject: vk/gen7: Fix gen of emitted packet in gen7_batch_lri() Emit GEN7_MI_LOAD_REGISTER_IMM, not the GEN8 version. --- src/vulkan/gen7_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 6eb5d48c039..d9644e2a06a 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -387,7 +387,7 @@ gen7_batch_lrm(struct anv_batch *batch, static void gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) { - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM, .RegisterOffset = reg, .DataDWord = imm); } -- cgit v1.2.3 From b2ee317e24d9d236d0050b2a6188df379e913c45 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:44:32 -0700 Subject: vk: Fix format of anv_depth_stencil_view The format of the view itself and of the view's image may differ. Moreover, if the view's format has no depth aspect but the image's format does, we must not program the depth buffer. Ditto for stencil. --- src/vulkan/anv_image.c | 30 ++++++++++++++++++++++-------- src/vulkan/anv_private.h | 1 + src/vulkan/gen7_cmd_buffer.c | 34 +++++++++++++++++++++------------- src/vulkan/gen8_cmd_buffer.c | 38 +++++++++++++++++++++++--------------- 4 files changed, 67 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 198f4a40212..6ab541e5bce 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -475,14 +475,28 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, view->bo = image->bo; - view->depth_stride = image->depth_surface.stride; - view->depth_offset = image->offset + image->depth_surface.offset; - view->depth_format = image->format->depth_format; - view->depth_qpitch = 0; /* FINISHME: QPitch */ - - view->stencil_stride = image->stencil_surface.stride; - view->stencil_offset = image->offset + image->stencil_surface.offset; - view->stencil_qpitch = 0; /* FINISHME: QPitch */ + view->format = anv_format_for_vk_format(pCreateInfo->format); + assert(anv_format_is_depth_or_stencil(view->format)); + + if (view->format->depth_format) { + view->depth_stride = image->depth_surface.stride; + view->depth_offset = image->offset + image->depth_surface.offset; + view->depth_qpitch = 0; /* FINISHME: QPitch */ + } else { + view->depth_stride = 0; + view->depth_offset = 0; + view->depth_qpitch = 0; + } + + if (view->format->has_stencil) { + view->stencil_stride = image->stencil_surface.stride; + view->stencil_offset = image->offset + image->stencil_surface.offset; + view->stencil_qpitch = 0; /* FINISHME: QPitch */ + } else { + view->stencil_stride = 0; + view->stencil_offset = 0; + view->stencil_qpitch = 0; + } } struct anv_surface * diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 2d037b4cbb3..594b4fedc6c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1119,6 +1119,7 @@ struct anv_color_attachment_view { struct anv_depth_stencil_view { struct anv_attachment_view base; + const struct anv_format *format; /**< VkAttachmentViewCreateInfo::format */ struct anv_bo *bo; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index d9644e2a06a..1028b6dae22 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -543,14 +543,18 @@ gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, view = (const struct anv_depth_stencil_view *)aview; } - if (view) { + const bool has_depth = view && view->format->depth_format; + const bool has_stencil = view && view->format->has_stencil; + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, + .DepthWriteEnable = view->format->depth_format, + .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceFormat = view->format->depth_format, + .SurfacePitch = view->depth_stride - 1, .SurfaceBaseAddress = { view->bo, view->depth_offset }, .Height = fb->height - 1, .Width = fb->width - 1, @@ -559,11 +563,6 @@ gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, .MinimumArrayElement = 0, .DepthBufferObjectControlState = GEN7_MOCS, .RenderTargetViewExtent = 1 - 1); - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, - .StencilBufferObjectControlState = GEN7_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }); } else { /* Even when no depth buffer is present, the hardware requires that * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: @@ -579,15 +578,24 @@ gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 * * The PRM is wrong, though. The width and height must be programmed to - * actual framebuffer's width and height. + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. */ anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, .SurfaceFormat = D16_UNORM, .Width = fb->width - 1, - .Height = fb->height - 1); + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } - /* Disable the stencil buffer. */ + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, + .StencilBufferObjectControlState = GEN7_MOCS, + .SurfacePitch = view->stencil_stride - 1, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }); + } else { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 57f061b9e8f..0ec0f666ae0 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -469,17 +469,21 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) view = (const struct anv_depth_stencil_view *)aview; } + const bool has_depth = view && view->format->depth_format; + const bool has_stencil = view && view->format->has_stencil; + /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ - if (view) { + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->depth_stride > 0, - .StencilWriteEnable = view->stencil_stride > 0, + .DepthWriteEnable = view->format->depth_format, + .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->depth_format, - .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceFormat = view->format->depth_format, + .SurfacePitch = view->depth_stride - 1, .SurfaceBaseAddress = { view->bo, view->depth_offset }, .Height = fb->height - 1, .Width = fb->width - 1, @@ -489,13 +493,6 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .DepthBufferObjectControlState = GEN8_MOCS, .RenderTargetViewExtent = 1 - 1, .SurfaceQPitch = view->depth_qpitch >> 2); - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, - .StencilBufferEnable = view->stencil_stride > 0, - .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); } else { /* Even when no depth buffer is present, the hardware requires that * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: @@ -511,15 +508,26 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 * * The PRM is wrong, though. The width and height must be programmed to - * actual framebuffer's width and height. + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. */ anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, .SurfaceFormat = D16_UNORM, .Width = fb->width - 1, - .Height = fb->height - 1); + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } - /* Disable the stencil buffer. */ + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = true, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride - 1, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + } else { anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER); } -- cgit v1.2.3 From 35b0262a2d80b8b21dfccaf9a099278d3f3f6a6c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:46:16 -0700 Subject: vk/gen7: Add func gen7_cmd_buffer_emit_depth_stencil() This patch moves all the GEN7_3DSTATE_DEPTH_BUFFER code from gen7_cmd_buffer_begin_subpass() into a new function gen7_cmd_buffer_emit_depth_stencil(). --- src/vulkan/gen7_cmd_buffer.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 1028b6dae22..c157155cd99 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -525,17 +525,13 @@ void gen7_CmdPipelineBarrier( stub(); } -void -gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +static void +gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { + struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_depth_stencil_view *view = NULL; - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { const struct anv_attachment_view *aview = fb->attachments[subpass->depth_stencil_attachment]; @@ -606,6 +602,16 @@ gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); } +void +gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + gen7_cmd_buffer_emit_depth_stencil(cmd_buffer); +} + static void begin_render_pass(struct anv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo* pRenderPassBegin) -- cgit v1.2.3 From c6f19b42486dd8f17c19779e38f7ee84f4f14a52 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:52:19 -0700 Subject: vk: Don't duplicate anv_depth_stencil_view's surface data In anv_depth_stencil_view, replace the members bo depth_offset depth_stride depth_format depth_qpitch stencil_offset stencil_stride stencil_qpitch with the single member const struct anv_image *image The removed members duplicated data in anv_image::depth_surface and anv_image::stencil_surface. --- src/vulkan/anv_image.c | 26 +++----------------------- src/vulkan/anv_private.h | 12 +----------- src/vulkan/gen7_cmd_buffer.c | 15 +++++++++++---- src/vulkan/gen8_cmd_buffer.c | 19 +++++++++++++------ 4 files changed, 28 insertions(+), 44 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 6ab541e5bce..242de52f639 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -468,35 +468,15 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; /* XXX: We don't handle any of these */ - assert(anv_format_is_depth_or_stencil(image->format)); anv_assert(pCreateInfo->mipLevel == 0); anv_assert(pCreateInfo->baseArraySlice == 0); anv_assert(pCreateInfo->arraySize == 1); - view->bo = image->bo; - + view->image = image; view->format = anv_format_for_vk_format(pCreateInfo->format); - assert(anv_format_is_depth_or_stencil(view->format)); - - if (view->format->depth_format) { - view->depth_stride = image->depth_surface.stride; - view->depth_offset = image->offset + image->depth_surface.offset; - view->depth_qpitch = 0; /* FINISHME: QPitch */ - } else { - view->depth_stride = 0; - view->depth_offset = 0; - view->depth_qpitch = 0; - } - if (view->format->has_stencil) { - view->stencil_stride = image->stencil_surface.stride; - view->stencil_offset = image->offset + image->stencil_surface.offset; - view->stencil_qpitch = 0; /* FINISHME: QPitch */ - } else { - view->stencil_stride = 0; - view->stencil_offset = 0; - view->stencil_qpitch = 0; - } + assert(anv_format_is_depth_or_stencil(image->format)); + assert(anv_format_is_depth_or_stencil(view->format)); } struct anv_surface * diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 594b4fedc6c..5cbc67b7c2c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1119,18 +1119,8 @@ struct anv_color_attachment_view { struct anv_depth_stencil_view { struct anv_attachment_view base; + const struct anv_image *image; /**< VkAttachmentViewCreateInfo::image */ const struct anv_format *format; /**< VkAttachmentViewCreateInfo::format */ - - struct anv_bo *bo; - - uint32_t depth_offset; /**< Offset into bo. */ - uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ - uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ - uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ - - uint32_t stencil_offset; /**< Offset into bo. */ - uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ - uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ }; struct anv_image_create_info { diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index c157155cd99..84b733ca458 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -539,6 +539,7 @@ gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) view = (const struct anv_depth_stencil_view *)aview; } + const struct anv_image *image = view ? view->image : NULL; const bool has_depth = view && view->format->depth_format; const bool has_stencil = view && view->format->has_stencil; @@ -550,8 +551,11 @@ gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, .SurfaceFormat = view->format->depth_format, - .SurfacePitch = view->depth_stride - 1, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .SurfacePitch = image->depth_surface.stride - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, .Height = fb->height - 1, .Width = fb->width - 1, .LOD = 0, @@ -589,8 +593,11 @@ gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (has_stencil) { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, .StencilBufferObjectControlState = GEN7_MOCS, - .SurfacePitch = view->stencil_stride - 1, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }); + .SurfacePitch = image->stencil_surface.stride - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }); } else { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 0ec0f666ae0..5aa78528f5c 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -469,6 +469,7 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) view = (const struct anv_depth_stencil_view *)aview; } + const struct anv_image *image = view ? view->image : NULL; const bool has_depth = view && view->format->depth_format; const bool has_stencil = view && view->format->has_stencil; @@ -483,8 +484,11 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, .SurfaceFormat = view->format->depth_format, - .SurfacePitch = view->depth_stride - 1, - .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .SurfacePitch = image->depth_surface.stride - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, .Height = fb->height - 1, .Width = fb->width - 1, .LOD = 0, @@ -492,7 +496,7 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .MinimumArrayElement = 0, .DepthBufferObjectControlState = GEN8_MOCS, .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = view->depth_qpitch >> 2); + .SurfaceQPitch = image->depth_surface.qpitch >> 2); } else { /* Even when no depth buffer is present, the hardware requires that * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: @@ -524,9 +528,12 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, .StencilBufferEnable = true, .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = view->stencil_stride - 1, - .SurfaceBaseAddress = { view->bo, view->stencil_offset }, - .SurfaceQPitch = view->stencil_qpitch >> 2); + .SurfacePitch = image->stencil_surface.stride - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }, + .SurfaceQPitch = image->stencil_surface.stride >> 2); } else { anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER); } -- cgit v1.2.3 From 641c25dd5505333ae4d260c6e821249c1be6da65 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:53:24 -0700 Subject: vk: Declare some local variables as const In anv_cmd_buffer_emit_depth_stencil(), declare 'subpass' and 'fb' as const. --- src/vulkan/gen7_cmd_buffer.c | 4 ++-- src/vulkan/gen8_cmd_buffer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 84b733ca458..e98fac1f4d3 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -528,8 +528,8 @@ void gen7_CmdPipelineBarrier( static void gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_depth_stencil_view *view = NULL; if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 5aa78528f5c..87ec90e18aa 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -458,8 +458,8 @@ void gen8_CmdDispatchIndirect( static void gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_depth_stencil_view *view = NULL; if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { -- cgit v1.2.3 From aacb7bb9b6080ac6a0c9166d5a5b615c6425b821 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:57:34 -0700 Subject: vk: Add func anv_cmd_buffer_get_depth_stencil_view() This function removes some duplicated code from genN_cmd_buffer_emit_depth_stencil(). --- src/vulkan/anv_cmd_buffer.c | 20 ++++++++++++++++++++ src/vulkan/anv_private.h | 2 ++ src/vulkan/gen7_cmd_buffer.c | 12 ++---------- src/vulkan/gen8_cmd_buffer.c | 12 ++---------- 4 files changed, 26 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 963edcf06ca..f8a630bece5 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -783,3 +783,23 @@ VkResult anv_ResetCommandPool( return VK_SUCCESS; } + +/** + * Return NULL if the current subpass has no depthstencil attachment. + */ +const struct anv_depth_stencil_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + return NULL; + + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + + return (const struct anv_depth_stencil_view *) aview; +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5cbc67b7c2c..a27b2e5ed92 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -848,6 +848,8 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass, const VkClearValue *clear_values); +const struct anv_depth_stencil_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index e98fac1f4d3..8fe59bab1c8 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -528,17 +528,9 @@ void gen7_CmdPipelineBarrier( static void gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { - const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view = NULL; - - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - const struct anv_attachment_view *aview = - fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - view = (const struct anv_depth_stencil_view *)aview; - } - + const struct anv_depth_stencil_view *view = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = view ? view->image : NULL; const bool has_depth = view && view->format->depth_format; const bool has_stencil = view && view->format->has_stencil; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 87ec90e18aa..3a7f009dc66 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -458,17 +458,9 @@ void gen8_CmdDispatchIndirect( static void gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { - const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view = NULL; - - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - const struct anv_attachment_view *aview = - fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - view = (const struct anv_depth_stencil_view *)aview; - } - + const struct anv_depth_stencil_view *view = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = view ? view->image : NULL; const bool has_depth = view && view->format->depth_format; const bool has_stencil = view && view->format->has_stencil; -- cgit v1.2.3 From 104c4e5ddfc05918badbbca143c161f6e5fb3691 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:58:51 -0700 Subject: vk/meta: Don't skip clearing when clearing only depth attachment anv_cmd_buffer_clear_attachments() skipped the clear renderpass if no color attachments needed to be cleared, even if a depth attachment needed to be cleared. --- src/vulkan/anv_meta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index cdd357ddcc9..201e3da41b3 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -324,7 +324,8 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, if (pass->has_stencil_clear_attachment) anv_finishme("stencil clear"); - if (pass->num_color_clear_attachments == 0) + if (pass->num_color_clear_attachments == 0 && + !pass->has_depth_clear_attachment) return; struct clear_instance_data instance_data[pass->num_color_clear_attachments]; -- cgit v1.2.3 From 4f852c76dcf6f0cf64d5b362fdf2555ccb8019ae Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:59:29 -0700 Subject: vk/meta: Save/restore VkDynamicDepthStencilState --- src/vulkan/anv_meta.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 201e3da41b3..63d9b3f3e47 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -198,6 +198,7 @@ struct anv_saved_state { struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; struct anv_descriptor_set *old_descriptor_set0; struct anv_pipeline *old_pipeline; + struct anv_dynamic_ds_state *old_ds_state; struct anv_dynamic_cb_state *old_cb_state; }; @@ -209,6 +210,7 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, sizeof(state->old_vertex_bindings)); + state->old_ds_state = cmd_buffer->state.ds_state; state->old_cb_state = cmd_buffer->state.cb_state; } @@ -225,6 +227,11 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; + if (cmd_buffer->state.ds_state != state->old_ds_state) { + cmd_buffer->state.ds_state = state->old_ds_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; + } + if (cmd_buffer->state.cb_state != state->old_cb_state) { cmd_buffer->state.cb_state = state->old_cb_state; cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; -- cgit v1.2.3 From bff2879abe689fe03d9877d17706ae8c076b65b1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 07:59:59 -0700 Subject: vk/image: Don't abort when creating stencil image views When creating a stencil image view, log a FINISHME but don't abort. We're sooooo close to having this working. --- src/vulkan/anv_image.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 242de52f639..656193f774e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -492,7 +492,6 @@ anv_image_get_surface_for_aspect(struct anv_image *image, VkImageAspect aspect) case VK_IMAGE_ASPECT_STENCIL: assert(image->format->has_stencil); anv_finishme("stencil image views"); - abort(); return &image->stencil_surface; default: unreachable("image does not have aspect"); -- cgit v1.2.3 From 31af126229a4c2a5a532e43433c57fad263b03af Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 08:00:56 -0700 Subject: vk: Program stencil ops in 3DSTATE_WM_DEPTH_STENCIL The driver ignored the Vulkan stencil, always programming the hardware stencil op to 0 (STENCILOP_KEEP). --- src/vulkan/gen7_pipeline.c | 16 ++++++++-------- src/vulkan/gen8_pipeline.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 7a54d7eebe4..a5c7a201690 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -146,14 +146,14 @@ static const uint32_t vk_to_gen_compare_op[] = { }; static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = 0, - [VK_STENCIL_OP_ZERO] = 0, - [VK_STENCIL_OP_REPLACE] = 0, - [VK_STENCIL_OP_INC_CLAMP] = 0, - [VK_STENCIL_OP_DEC_CLAMP] = 0, - [VK_STENCIL_OP_INVERT] = 0, - [VK_STENCIL_OP_INC_WRAP] = 0, - [VK_STENCIL_OP_DEC_WRAP] = 0 + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INC_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DEC_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INC_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DEC_WRAP] = STENCILOP_DECR, }; static const uint32_t vk_to_gen_blend_op[] = { diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 9e87a6951b6..b1cb16234d5 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -244,14 +244,14 @@ static const uint32_t vk_to_gen_compare_op[] = { }; static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = 0, - [VK_STENCIL_OP_ZERO] = 0, - [VK_STENCIL_OP_REPLACE] = 0, - [VK_STENCIL_OP_INC_CLAMP] = 0, - [VK_STENCIL_OP_DEC_CLAMP] = 0, - [VK_STENCIL_OP_INVERT] = 0, - [VK_STENCIL_OP_INC_WRAP] = 0, - [VK_STENCIL_OP_DEC_WRAP] = 0 + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INC_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DEC_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INC_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DEC_WRAP] = STENCILOP_DECR, }; static void -- cgit v1.2.3 From 14e1d58fb79ccb5034587092eaa3678f2642edc5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 08:03:31 -0700 Subject: vk: Fix stride of stencil buffers Stencil buffers have strange pitch. The PRM says: The pitch must be set to 2x the value computed based on width, as the stencil buffer is stored with two rows interleaved. --- src/vulkan/gen7_cmd_buffer.c | 9 ++++++++- src/vulkan/gen8_cmd_buffer.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 8fe59bab1c8..b149673f405 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -585,7 +585,14 @@ gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (has_stencil) { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, .StencilBufferObjectControlState = GEN7_MOCS, - .SurfacePitch = image->stencil_surface.stride - 1, + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.stride - 1, + .SurfaceBaseAddress = { .bo = image->bo, .offset = image->offset + image->stencil_surface.offset, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 3a7f009dc66..5737879ffe7 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -520,7 +520,14 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, .StencilBufferEnable = true, .StencilBufferObjectControlState = GEN8_MOCS, - .SurfacePitch = image->stencil_surface.stride - 1, + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.stride - 1, + .SurfaceBaseAddress = { .bo = image->bo, .offset = image->offset + image->stencil_surface.offset, -- cgit v1.2.3 From 053d32d2a5906057519e12ffd8bc946bee2f5c60 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 08:04:59 -0700 Subject: vk/image: Linear stencil buffers are illegal The hardware requires that stencil buffer memory be W-tiled. From the Sandybridge PRM: This buffer is supported only in Tile W memory. --- src/vulkan/anv_image.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 656193f774e..736a76881ff 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -103,20 +103,32 @@ static const struct anv_tile_info { [WMAJOR] = { 128, 32, 4096 }, }; -static uint32_t +/** + * Return -1 on failure. + */ +static int8_t anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) { if (anv_info->force_tile_mode) return anv_info->tile_mode; - if (anv_info->vk_info->format == VK_FORMAT_S8_UINT) - return WMAJOR; + /* The Sandybridge PRM says that the stencil buffer "is supported + * only in Tile W memory". + */ switch (anv_info->vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: - return LINEAR; + if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { + return -1; + } else { + return LINEAR; + } case VK_IMAGE_TILING_OPTIMAL: - return YMAJOR; + if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { + return WMAJOR; + } else { + return YMAJOR; + } default: assert(!"bad VKImageTiling"); return LINEAR; @@ -143,7 +155,9 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const uint32_t levels = create_info->vk_info->mipLevels; const uint32_t array_size = create_info->vk_info->arraySize; - const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); + const int8_t tile_mode = anv_image_choose_tile_mode(create_info); + if (tile_mode == -1) + return vk_error(VK_ERROR_INVALID_IMAGE); const struct anv_tile_info *tile_info = &anv_tile_info_table[tile_mode]; -- cgit v1.2.3 From 84cfc08c1059f6190b0ada811003abe93c624e39 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 08:07:15 -0700 Subject: vk/pipeline: Fix crash when the pipeline has no attributes If there are no attributes, don't emit 3DSTATE_VERTEX_ELEMENTS. That packet does not allow 0 attributes. --- src/vulkan/gen7_pipeline.c | 6 ++++-- src/vulkan/gen8_pipeline.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index a5c7a201690..0ed4727c37f 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -39,8 +39,10 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, const uint32_t num_dwords = 1 + element_count * 2; uint32_t *p; - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN7_3DSTATE_VERTEX_ELEMENTS); + if (info->attributeCount > 0) { + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN7_3DSTATE_VERTEX_ELEMENTS); + } for (uint32_t i = 0; i < info->attributeCount; i++) { const VkVertexInputAttributeDescription *desc = diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index b1cb16234d5..fae09f3fbbe 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -36,8 +36,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, const uint32_t num_dwords = 1 + info->attributeCount * 2; uint32_t *p; - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN8_3DSTATE_VERTEX_ELEMENTS); + if (info->attributeCount > 0) { + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + } for (uint32_t i = 0; i < info->attributeCount; i++) { const VkVertexInputAttributeDescription *desc = -- cgit v1.2.3 From a2d15ee698a377c41c8c7b7414a6202d93ba1d96 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 28 Aug 2015 08:08:12 -0700 Subject: vk/meta: Support stencil in vkCmdCopyImageToBuffer At Crucible commit 12e64a4, fixes the func.depthstencil.stencil-triangles.* tests on Broadwell. --- src/vulkan/anv_meta.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 63d9b3f3e47..c1dcb771819 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1230,12 +1230,17 @@ void anv_CmdCopyImageToBuffer( }, cmd_buffer); + VkFormat dest_format = src_image->format->vk_format; + if (dest_format == VK_FORMAT_S8_UINT) { + dest_format = VK_FORMAT_R8_UINT; + } + VkImage destImage; anv_CreateImage(vk_device, &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = src_image->format->vk_format, + .format = dest_format, .extent = { .width = pRegions[r].imageExtent.width, .height = pRegions[r].imageExtent.height, @@ -1262,7 +1267,7 @@ void anv_CmdCopyImageToBuffer( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, - .format = src_image->format->vk_format, + .format = dest_format, .mipLevel = 0, .baseArraySlice = 0, .arraySize = 1, -- cgit v1.2.3 From ea56d0cb1d1f8be93ec6ec9ce24f0be84482b885 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 12:00:25 -0700 Subject: glsl/types: Fix up function type hash table insertion --- src/glsl/glsl_types.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 76814e894ed..c976826733c 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -988,7 +988,7 @@ glsl_type::get_function_instance(const glsl_type *return_type, const glsl_type *t = new glsl_type(return_type, params, num_params); mtx_lock(&glsl_type::mutex); - _mesa_hash_table_insert(function_types, t, (void *) t); + entry = _mesa_hash_table_insert(function_types, t, (void *) t); } const glsl_type *t = (const glsl_type *)entry->data; -- cgit v1.2.3 From dbc3eb5bb4ac966abeef3a8dbc5c2127e52710a1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 12:13:17 -0700 Subject: vk/compiler: Pass the correct is_scalar value to brw_process_nir --- src/vulkan/anv_compiler.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index c0aed33bd12..40cd4846857 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -966,19 +966,26 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, fail_if(mesa_shader == NULL, "failed to create %s shader\n", stage_info[stage].name); + bool is_scalar; switch (stage) { case VK_SHADER_STAGE_VERTEX: mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base; + is_scalar = compiler->screen->compiler->scalar_vs; break; case VK_SHADER_STAGE_GEOMETRY: mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base; + is_scalar = false; break; case VK_SHADER_STAGE_FRAGMENT: mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base; + is_scalar = true; break; case VK_SHADER_STAGE_COMPUTE: mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base; + is_scalar = true; break; + default: + unreachable("Unsupported shader stage"); } mesa_shader->Program->Parameters = @@ -999,7 +1006,7 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, brw_process_nir(mesa_shader->Program->nir, compiler->screen->devinfo, - NULL, mesa_shader->Stage, false); + NULL, mesa_shader->Stage, is_scalar); setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir); -- cgit v1.2.3 From 98abed2441440b6d86f245f6cf59c98e7ddb0f59 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 16:08:29 -0700 Subject: spirv: Use VERTEX_ID_ZERO_BASE for vertex id --- src/glsl/nir/spirv_to_nir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index b09196ccd98..3cbaf3c8616 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -644,7 +644,10 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, /* XXX figure this out */ unreachable("unhandled builtin"); case SpvBuiltInVertexId: - *location = SYSTEM_VALUE_VERTEX_ID; + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; *mode = nir_var_system_value; break; case SpvBuiltInInstanceId: -- cgit v1.2.3 From 5e7c7b2a4e6b9cbdbcdaa95caac787b4d4953b68 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 16:17:45 -0700 Subject: spirv: Only do a block load if you're actually loading a uniform --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3cbaf3c8616..ffaebf10502 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1147,7 +1147,7 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, nir_deref *src_tail = get_deref_tail(src); struct vtn_ssa_value *val; - if (src->var->interface_type) + if (src->var->interface_type && src->var->data.mode == nir_var_uniform) val = vtn_block_load(b, src, src_type, src_tail); else val = _vtn_variable_load(b, src, src_type, src_tail); -- cgit v1.2.3 From 9cebdd78d8cbdc4915c26811cadac4a57f9d6426 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 17:09:02 -0700 Subject: nir: Add a pass to lower outputs to temporary variables This pass can be used as a helper for NIR producers so they don't have to worry about creating the temporaries themselves. --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_lower_outputs_to_temporaries.c | 97 +++++++++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/glsl/nir/nir_lower_outputs_to_temporaries.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index da38e3576bd..acd13ef7745 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -40,6 +40,7 @@ NIR_FILES = \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ nir/nir_lower_phis_to_scalar.c \ nir/nir_lower_samplers.cpp \ nir/nir_lower_system_values.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index f78596d5cc0..8a2396422b9 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1652,6 +1652,8 @@ void nir_lower_global_vars_to_local(nir_shader *shader); void nir_lower_locals_to_regs(nir_shader *shader); +void nir_lower_outputs_to_temporaries(nir_shader *shader); + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c new file mode 100644 index 00000000000..715f1f84c48 --- /dev/null +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c @@ -0,0 +1,97 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * Implements a pass that lowers output variables to a temporary plus an + * output variable with a single copy at each exit point of the shader. + * This way the output variable is only ever written. + * + * Because valid NIR requires that output variables are never read, this + * pass is more of a helper for NIR producers and must be run before the + * shader is ever validated. + */ + +#include "nir.h" + +static void +emit_output_copies(nir_shader *shader, nir_variable *temp, nir_variable *output) +{ + nir_foreach_overload(shader, overload) { + if (!overload->impl || strcmp(overload->function->name, "main")) + continue; + + struct set_entry *block_entry; + set_foreach(overload->impl->end_block->predecessors, block_entry) { + struct nir_block *block = (void *)block_entry->key; + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, output); + copy->variables[1] = nir_deref_var_create(copy, temp); + + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr && last_instr->type == nir_instr_type_jump) + nir_instr_insert_before(last_instr, ©->instr); + else + nir_instr_insert_after_block(block, ©->instr); + } + } +} + +void +nir_lower_outputs_to_temporaries(nir_shader *shader) +{ + struct exec_list old_outputs; + + exec_list_move_nodes_to(&shader->outputs, &old_outputs); + + /* Walk over all of the outputs turn each output into a temporary and + * make a new variable for the actual output. + */ + foreach_list_typed(nir_variable, var, node, &old_outputs) { + nir_variable *output = ralloc(shader, nir_variable); + memcpy(output, var, sizeof *output); + + /* The orignal is now the temporary */ + nir_variable *temp = var; + + /* Move the original name over to the new output */ + if (output->name) + ralloc_steal(output, output->name); + + /* Give the temporary a new name with @out-temp appended */ + temp->name = ralloc_asprintf(temp, "%s@out-temp", output->name); + temp->data.mode = nir_var_global; + temp->constant_initializer = NULL; + + exec_list_push_tail(&shader->outputs, &output->node); + + emit_output_copies(shader, temp, output); + } + + exec_list_append(&shader->globals, &old_outputs); +} -- cgit v1.2.3 From 44e6ea74b0d3119eb12537a284d9376fc91f4c21 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 17:21:19 -0700 Subject: spirv: lower outputs to temporaries --- src/glsl/nir/spirv_to_nir.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ffaebf10502..612d2fff293 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2973,6 +2973,11 @@ spirv_to_nir(const uint32_t *words, size_t word_count, vtn_handle_phi_second_pass); } + /* Because we can still have output reads in NIR, we need to lower + * outputs to temporaries before we are truely finished. + */ + nir_lower_outputs_to_temporaries(shader); + ralloc_free(b); return shader; -- cgit v1.2.3 From 9f9628e9dd64b2bae62269cc4ebb0f920a5833c5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 22:31:03 -0700 Subject: vk/SPIR-V: Pull num_uniform_components out of the NIR shader --- src/vulkan/anv_compiler.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 40cd4846857..b330b245870 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -940,9 +940,10 @@ anv_compile_shader_glsl(struct anv_compiler *compiler, } static void -setup_nir_io(struct gl_program *prog, +setup_nir_io(struct gl_shader *mesa_shader, nir_shader *shader) { + struct gl_program *prog = mesa_shader->Program; foreach_list_typed(nir_variable, var, node, &shader->inputs) { prog->InputsRead |= BITFIELD64_BIT(var->data.location); } @@ -950,6 +951,8 @@ setup_nir_io(struct gl_program *prog, foreach_list_typed(nir_variable, var, node, &shader->outputs) { prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); } + + mesa_shader->num_uniform_components = shader->num_uniforms; } static void @@ -1008,7 +1011,7 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, compiler->screen->devinfo, NULL, mesa_shader->Stage, is_scalar); - setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir); + setup_nir_io(mesa_shader, mesa_shader->Program->nir); fail_if(mesa_shader->Program->nir == NULL, "failed to translate SPIR-V to NIR\n"); -- cgit v1.2.3 From 9d92b4fd0e1ca655179619fbacf0325c735b6f38 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Apr 2015 21:13:44 -0700 Subject: nir: Import the revision 30 SPIR-V header from Khronos --- src/glsl/nir/spirv.h | 1304 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1304 insertions(+) create mode 100644 src/glsl/nir/spirv.h (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h new file mode 100644 index 00000000000..93135c09596 --- /dev/null +++ b/src/glsl/nir/spirv.h @@ -0,0 +1,1304 @@ +/* +** Copyright (c) 2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Specification revision 30. +** Enumeration tokens for SPIR-V, in three styles: C, C++, generic. +** - C++ will have the tokens in the "spv" name space, with no prefix. +** - C will have tokens with as "Spv" prefix. +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +#ifdef __cplusplus + +namespace spv { + +const int MagicNumber = 0x07230203; +const int Version = 99; + +typedef unsigned int Id; + +const unsigned int OpCodeMask = 0xFFFF; +const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL = 3, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL12 = 2, + MemoryModelOpenCL20 = 3, + MemoryModelOpenCL21 = 4, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeEarlyFragmentTests = 8, + ExecutionModePointMode = 9, + ExecutionModeXfb = 10, + ExecutionModeDepthReplacing = 11, + ExecutionModeDepthAny = 12, + ExecutionModeDepthGreater = 13, + ExecutionModeDepthLess = 14, + ExecutionModeDepthUnchanged = 15, + ExecutionModeLocalSize = 16, + ExecutionModeLocalSizeHint = 17, + ExecutionModeInputPoints = 18, + ExecutionModeInputLines = 19, + ExecutionModeInputLinesAdjacency = 20, + ExecutionModeInputTriangles = 21, + ExecutionModeInputTrianglesAdjacency = 22, + ExecutionModeInputQuads = 23, + ExecutionModeInputIsolines = 24, + ExecutionModeOutputVertices = 25, + ExecutionModeOutputPoints = 26, + ExecutionModeOutputLineStrip = 27, + ExecutionModeOutputTriangleStrip = 28, + ExecutionModeVecTypeHint = 29, + ExecutionModeContractionOff = 30, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroupLocal = 4, + StorageClassWorkgroupGlobal = 5, + StorageClassPrivateGlobal = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPrivate = 9, + StorageClassAtomicCounter = 10, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeSVM = 6, + FunctionParameterAttributeNoWrite = 7, + FunctionParameterAttributeNoReadWrite = 8, +}; + +enum Decoration { + DecorationPrecisionLow = 0, + DecorationPrecisionMedium = 1, + DecorationPrecisionHigh = 2, + DecorationBlock = 3, + DecorationBufferBlock = 4, + DecorationRowMajor = 5, + DecorationColMajor = 6, + DecorationGLSLShared = 7, + DecorationGLSLStd140 = 8, + DecorationGLSLStd430 = 9, + DecorationGLSLPacked = 10, + DecorationSmooth = 11, + DecorationNoperspective = 12, + DecorationFlat = 13, + DecorationPatch = 14, + DecorationCentroid = 15, + DecorationSample = 16, + DecorationInvariant = 17, + DecorationRestrict = 18, + DecorationAliased = 19, + DecorationVolatile = 20, + DecorationConstant = 21, + DecorationCoherent = 22, + DecorationNonwritable = 23, + DecorationNonreadable = 24, + DecorationUniform = 25, + DecorationNoStaticUse = 26, + DecorationCPacked = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationAlignment = 36, + DecorationXfbBuffer = 37, + DecorationStride = 38, + DecorationBuiltIn = 39, + DecorationFuncParamAttr = 40, + DecorationFPRoundingMode = 41, + DecorationFPFastMathMode = 42, + DecorationLinkageAttributes = 43, + DecorationSpecId = 44, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipVertex = 2, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragColor = 21, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInWorkgroupLinearId = 35, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsRelaxedShift = 0, + MemorySemanticsSequentiallyConsistentShift = 1, + MemorySemanticsAcquireShift = 2, + MemorySemanticsReleaseShift = 3, + MemorySemanticsUniformMemoryShift = 4, + MemorySemanticsSubgroupMemoryShift = 5, + MemorySemanticsWorkgroupLocalMemoryShift = 6, + MemorySemanticsWorkgroupGlobalMemoryShift = 7, + MemorySemanticsAtomicCounterMemoryShift = 8, + MemorySemanticsImageMemoryShift = 9, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsRelaxedMask = 0x00000001, + MemorySemanticsSequentiallyConsistentMask = 0x00000002, + MemorySemanticsAcquireMask = 0x00000004, + MemorySemanticsReleaseMask = 0x00000008, + MemorySemanticsUniformMemoryMask = 0x00000010, + MemorySemanticsSubgroupMemoryMask = 0x00000020, + MemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, + MemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, + MemorySemanticsAtomicCounterMemoryMask = 0x00000100, + MemorySemanticsImageMemoryMask = 0x00000200, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, +}; + +enum ExecutionScope { + ExecutionScopeCrossDevice = 0, + ExecutionScopeDevice = 1, + ExecutionScopeWorkgroup = 2, + ExecutionScopeSubgroup = 3, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Op { + OpNop = 0, + OpSource = 1, + OpSourceExtension = 2, + OpExtension = 3, + OpExtInstImport = 4, + OpMemoryModel = 5, + OpEntryPoint = 6, + OpExecutionMode = 7, + OpTypeVoid = 8, + OpTypeBool = 9, + OpTypeInt = 10, + OpTypeFloat = 11, + OpTypeVector = 12, + OpTypeMatrix = 13, + OpTypeSampler = 14, + OpTypeFilter = 15, + OpTypeArray = 16, + OpTypeRuntimeArray = 17, + OpTypeStruct = 18, + OpTypeOpaque = 19, + OpTypePointer = 20, + OpTypeFunction = 21, + OpTypeEvent = 22, + OpTypeDeviceEvent = 23, + OpTypeReserveId = 24, + OpTypeQueue = 25, + OpTypePipe = 26, + OpConstantTrue = 27, + OpConstantFalse = 28, + OpConstant = 29, + OpConstantComposite = 30, + OpConstantSampler = 31, + OpConstantNullPointer = 32, + OpConstantNullObject = 33, + OpSpecConstantTrue = 34, + OpSpecConstantFalse = 35, + OpSpecConstant = 36, + OpSpecConstantComposite = 37, + OpVariable = 38, + OpVariableArray = 39, + OpFunction = 40, + OpFunctionParameter = 41, + OpFunctionEnd = 42, + OpFunctionCall = 43, + OpExtInst = 44, + OpUndef = 45, + OpLoad = 46, + OpStore = 47, + OpPhi = 48, + OpDecorationGroup = 49, + OpDecorate = 50, + OpMemberDecorate = 51, + OpGroupDecorate = 52, + OpGroupMemberDecorate = 53, + OpName = 54, + OpMemberName = 55, + OpString = 56, + OpLine = 57, + OpVectorExtractDynamic = 58, + OpVectorInsertDynamic = 59, + OpVectorShuffle = 60, + OpCompositeConstruct = 61, + OpCompositeExtract = 62, + OpCompositeInsert = 63, + OpCopyObject = 64, + OpCopyMemory = 65, + OpCopyMemorySized = 66, + OpSampler = 67, + OpTextureSample = 68, + OpTextureSampleDref = 69, + OpTextureSampleLod = 70, + OpTextureSampleProj = 71, + OpTextureSampleGrad = 72, + OpTextureSampleOffset = 73, + OpTextureSampleProjLod = 74, + OpTextureSampleProjGrad = 75, + OpTextureSampleLodOffset = 76, + OpTextureSampleProjOffset = 77, + OpTextureSampleGradOffset = 78, + OpTextureSampleProjLodOffset = 79, + OpTextureSampleProjGradOffset = 80, + OpTextureFetchTexelLod = 81, + OpTextureFetchTexelOffset = 82, + OpTextureFetchSample = 83, + OpTextureFetchTexel = 84, + OpTextureGather = 85, + OpTextureGatherOffset = 86, + OpTextureGatherOffsets = 87, + OpTextureQuerySizeLod = 88, + OpTextureQuerySize = 89, + OpTextureQueryLod = 90, + OpTextureQueryLevels = 91, + OpTextureQuerySamples = 92, + OpAccessChain = 93, + OpInBoundsAccessChain = 94, + OpSNegate = 95, + OpFNegate = 96, + OpNot = 97, + OpAny = 98, + OpAll = 99, + OpConvertFToU = 100, + OpConvertFToS = 101, + OpConvertSToF = 102, + OpConvertUToF = 103, + OpUConvert = 104, + OpSConvert = 105, + OpFConvert = 106, + OpConvertPtrToU = 107, + OpConvertUToPtr = 108, + OpPtrCastToGeneric = 109, + OpGenericCastToPtr = 110, + OpBitcast = 111, + OpTranspose = 112, + OpIsNan = 113, + OpIsInf = 114, + OpIsFinite = 115, + OpIsNormal = 116, + OpSignBitSet = 117, + OpLessOrGreater = 118, + OpOrdered = 119, + OpUnordered = 120, + OpArrayLength = 121, + OpIAdd = 122, + OpFAdd = 123, + OpISub = 124, + OpFSub = 125, + OpIMul = 126, + OpFMul = 127, + OpUDiv = 128, + OpSDiv = 129, + OpFDiv = 130, + OpUMod = 131, + OpSRem = 132, + OpSMod = 133, + OpFRem = 134, + OpFMod = 135, + OpVectorTimesScalar = 136, + OpMatrixTimesScalar = 137, + OpVectorTimesMatrix = 138, + OpMatrixTimesVector = 139, + OpMatrixTimesMatrix = 140, + OpOuterProduct = 141, + OpDot = 142, + OpShiftRightLogical = 143, + OpShiftRightArithmetic = 144, + OpShiftLeftLogical = 145, + OpLogicalOr = 146, + OpLogicalXor = 147, + OpLogicalAnd = 148, + OpBitwiseOr = 149, + OpBitwiseXor = 150, + OpBitwiseAnd = 151, + OpSelect = 152, + OpIEqual = 153, + OpFOrdEqual = 154, + OpFUnordEqual = 155, + OpINotEqual = 156, + OpFOrdNotEqual = 157, + OpFUnordNotEqual = 158, + OpULessThan = 159, + OpSLessThan = 160, + OpFOrdLessThan = 161, + OpFUnordLessThan = 162, + OpUGreaterThan = 163, + OpSGreaterThan = 164, + OpFOrdGreaterThan = 165, + OpFUnordGreaterThan = 166, + OpULessThanEqual = 167, + OpSLessThanEqual = 168, + OpFOrdLessThanEqual = 169, + OpFUnordLessThanEqual = 170, + OpUGreaterThanEqual = 171, + OpSGreaterThanEqual = 172, + OpFOrdGreaterThanEqual = 173, + OpFUnordGreaterThanEqual = 174, + OpDPdx = 175, + OpDPdy = 176, + OpFwidth = 177, + OpDPdxFine = 178, + OpDPdyFine = 179, + OpFwidthFine = 180, + OpDPdxCoarse = 181, + OpDPdyCoarse = 182, + OpFwidthCoarse = 183, + OpEmitVertex = 184, + OpEndPrimitive = 185, + OpEmitStreamVertex = 186, + OpEndStreamPrimitive = 187, + OpControlBarrier = 188, + OpMemoryBarrier = 189, + OpImagePointer = 190, + OpAtomicInit = 191, + OpAtomicLoad = 192, + OpAtomicStore = 193, + OpAtomicExchange = 194, + OpAtomicCompareExchange = 195, + OpAtomicCompareExchangeWeak = 196, + OpAtomicIIncrement = 197, + OpAtomicIDecrement = 198, + OpAtomicIAdd = 199, + OpAtomicISub = 200, + OpAtomicUMin = 201, + OpAtomicUMax = 202, + OpAtomicAnd = 203, + OpAtomicOr = 204, + OpAtomicXor = 205, + OpLoopMerge = 206, + OpSelectionMerge = 207, + OpLabel = 208, + OpBranch = 209, + OpBranchConditional = 210, + OpSwitch = 211, + OpKill = 212, + OpReturn = 213, + OpReturnValue = 214, + OpUnreachable = 215, + OpLifetimeStart = 216, + OpLifetimeStop = 217, + OpCompileFlag = 218, + OpAsyncGroupCopy = 219, + OpWaitGroupEvents = 220, + OpGroupAll = 221, + OpGroupAny = 222, + OpGroupBroadcast = 223, + OpGroupIAdd = 224, + OpGroupFAdd = 225, + OpGroupFMin = 226, + OpGroupUMin = 227, + OpGroupSMin = 228, + OpGroupFMax = 229, + OpGroupUMax = 230, + OpGroupSMax = 231, + OpGenericCastToPtrExplicit = 232, + OpGenericPtrMemSemantics = 233, + OpReadPipe = 234, + OpWritePipe = 235, + OpReservedReadPipe = 236, + OpReservedWritePipe = 237, + OpReserveReadPipePackets = 238, + OpReserveWritePipePackets = 239, + OpCommitReadPipe = 240, + OpCommitWritePipe = 241, + OpIsValidReserveId = 242, + OpGetNumPipePackets = 243, + OpGetMaxPipePackets = 244, + OpGroupReserveReadPipePackets = 245, + OpGroupReserveWritePipePackets = 246, + OpGroupCommitReadPipe = 247, + OpGroupCommitWritePipe = 248, + OpEnqueueMarker = 249, + OpEnqueueKernel = 250, + OpGetKernelNDrangeSubGroupCount = 251, + OpGetKernelNDrangeMaxSubGroupSize = 252, + OpGetKernelWorkGroupSize = 253, + OpGetKernelPreferredWorkGroupSizeMultiple = 254, + OpRetainEvent = 255, + OpReleaseEvent = 256, + OpCreateUserEvent = 257, + OpIsValidEvent = 258, + OpSetUserEventStatus = 259, + OpCaptureEventProfilingInfo = 260, + OpGetDefaultQueue = 261, + OpBuildNDRange = 262, + OpSatConvertSToU = 263, + OpSatConvertUToS = 264, + OpAtomicIMin = 265, + OpAtomicIMax = 266, +}; + +}; // end namespace spv + +#endif // #ifdef __cplusplus + + +#ifndef __cplusplus + +const int SpvMagicNumber = 0x07230203; +const int SpvVersion = 99; + +typedef unsigned int SpvId; + +const unsigned int SpvOpCodeMask = 0xFFFF; +const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL = 3, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL12 = 2, + SpvMemoryModelOpenCL20 = 3, + SpvMemoryModelOpenCL21 = 4, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeEarlyFragmentTests = 8, + SpvExecutionModePointMode = 9, + SpvExecutionModeXfb = 10, + SpvExecutionModeDepthReplacing = 11, + SpvExecutionModeDepthAny = 12, + SpvExecutionModeDepthGreater = 13, + SpvExecutionModeDepthLess = 14, + SpvExecutionModeDepthUnchanged = 15, + SpvExecutionModeLocalSize = 16, + SpvExecutionModeLocalSizeHint = 17, + SpvExecutionModeInputPoints = 18, + SpvExecutionModeInputLines = 19, + SpvExecutionModeInputLinesAdjacency = 20, + SpvExecutionModeInputTriangles = 21, + SpvExecutionModeInputTrianglesAdjacency = 22, + SpvExecutionModeInputQuads = 23, + SpvExecutionModeInputIsolines = 24, + SpvExecutionModeOutputVertices = 25, + SpvExecutionModeOutputPoints = 26, + SpvExecutionModeOutputLineStrip = 27, + SpvExecutionModeOutputTriangleStrip = 28, + SpvExecutionModeVecTypeHint = 29, + SpvExecutionModeContractionOff = 30, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroupLocal = 4, + SpvStorageClassWorkgroupGlobal = 5, + SpvStorageClassPrivateGlobal = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPrivate = 9, + SpvStorageClassAtomicCounter = 10, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeSVM = 6, + SpvFunctionParameterAttributeNoWrite = 7, + SpvFunctionParameterAttributeNoReadWrite = 8, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationPrecisionLow = 0, + SpvDecorationPrecisionMedium = 1, + SpvDecorationPrecisionHigh = 2, + SpvDecorationBlock = 3, + SpvDecorationBufferBlock = 4, + SpvDecorationRowMajor = 5, + SpvDecorationColMajor = 6, + SpvDecorationGLSLShared = 7, + SpvDecorationGLSLStd140 = 8, + SpvDecorationGLSLStd430 = 9, + SpvDecorationGLSLPacked = 10, + SpvDecorationSmooth = 11, + SpvDecorationNoperspective = 12, + SpvDecorationFlat = 13, + SpvDecorationPatch = 14, + SpvDecorationCentroid = 15, + SpvDecorationSample = 16, + SpvDecorationInvariant = 17, + SpvDecorationRestrict = 18, + SpvDecorationAliased = 19, + SpvDecorationVolatile = 20, + SpvDecorationConstant = 21, + SpvDecorationCoherent = 22, + SpvDecorationNonwritable = 23, + SpvDecorationNonreadable = 24, + SpvDecorationUniform = 25, + SpvDecorationNoStaticUse = 26, + SpvDecorationCPacked = 27, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationAlignment = 36, + SpvDecorationXfbBuffer = 37, + SpvDecorationStride = 38, + SpvDecorationBuiltIn = 39, + SpvDecorationFuncParamAttr = 40, + SpvDecorationFPRoundingMode = 41, + SpvDecorationFPFastMathMode = 42, + SpvDecorationLinkageAttributes = 43, + SpvDecorationSpecId = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipVertex = 2, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragColor = 21, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInWorkgroupLinearId = 35, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsRelaxedShift = 0, + SpvMemorySemanticsSequentiallyConsistentShift = 1, + SpvMemorySemanticsAcquireShift = 2, + SpvMemorySemanticsReleaseShift = 3, + SpvMemorySemanticsUniformMemoryShift = 4, + SpvMemorySemanticsSubgroupMemoryShift = 5, + SpvMemorySemanticsWorkgroupLocalMemoryShift = 6, + SpvMemorySemanticsWorkgroupGlobalMemoryShift = 7, + SpvMemorySemanticsAtomicCounterMemoryShift = 8, + SpvMemorySemanticsImageMemoryShift = 9, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsRelaxedMask = 0x00000001, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000002, + SpvMemorySemanticsAcquireMask = 0x00000004, + SpvMemorySemanticsReleaseMask = 0x00000008, + SpvMemorySemanticsUniformMemoryMask = 0x00000010, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000020, + SpvMemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, + SpvMemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000100, + SpvMemorySemanticsImageMemoryMask = 0x00000200, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, +} SpvMemoryAccessMask; + +typedef enum SpvExecutionScope_ { + SpvExecutionScopeCrossDevice = 0, + SpvExecutionScopeDevice = 1, + SpvExecutionScopeWorkgroup = 2, + SpvExecutionScopeSubgroup = 3, +} SpvExecutionScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpSource = 1, + SpvOpSourceExtension = 2, + SpvOpExtension = 3, + SpvOpExtInstImport = 4, + SpvOpMemoryModel = 5, + SpvOpEntryPoint = 6, + SpvOpExecutionMode = 7, + SpvOpTypeVoid = 8, + SpvOpTypeBool = 9, + SpvOpTypeInt = 10, + SpvOpTypeFloat = 11, + SpvOpTypeVector = 12, + SpvOpTypeMatrix = 13, + SpvOpTypeSampler = 14, + SpvOpTypeFilter = 15, + SpvOpTypeArray = 16, + SpvOpTypeRuntimeArray = 17, + SpvOpTypeStruct = 18, + SpvOpTypeOpaque = 19, + SpvOpTypePointer = 20, + SpvOpTypeFunction = 21, + SpvOpTypeEvent = 22, + SpvOpTypeDeviceEvent = 23, + SpvOpTypeReserveId = 24, + SpvOpTypeQueue = 25, + SpvOpTypePipe = 26, + SpvOpConstantTrue = 27, + SpvOpConstantFalse = 28, + SpvOpConstant = 29, + SpvOpConstantComposite = 30, + SpvOpConstantSampler = 31, + SpvOpConstantNullPointer = 32, + SpvOpConstantNullObject = 33, + SpvOpSpecConstantTrue = 34, + SpvOpSpecConstantFalse = 35, + SpvOpSpecConstant = 36, + SpvOpSpecConstantComposite = 37, + SpvOpVariable = 38, + SpvOpVariableArray = 39, + SpvOpFunction = 40, + SpvOpFunctionParameter = 41, + SpvOpFunctionEnd = 42, + SpvOpFunctionCall = 43, + SpvOpExtInst = 44, + SpvOpUndef = 45, + SpvOpLoad = 46, + SpvOpStore = 47, + SpvOpPhi = 48, + SpvOpDecorationGroup = 49, + SpvOpDecorate = 50, + SpvOpMemberDecorate = 51, + SpvOpGroupDecorate = 52, + SpvOpGroupMemberDecorate = 53, + SpvOpName = 54, + SpvOpMemberName = 55, + SpvOpString = 56, + SpvOpLine = 57, + SpvOpVectorExtractDynamic = 58, + SpvOpVectorInsertDynamic = 59, + SpvOpVectorShuffle = 60, + SpvOpCompositeConstruct = 61, + SpvOpCompositeExtract = 62, + SpvOpCompositeInsert = 63, + SpvOpCopyObject = 64, + SpvOpCopyMemory = 65, + SpvOpCopyMemorySized = 66, + SpvOpSampler = 67, + SpvOpTextureSample = 68, + SpvOpTextureSampleDref = 69, + SpvOpTextureSampleLod = 70, + SpvOpTextureSampleProj = 71, + SpvOpTextureSampleGrad = 72, + SpvOpTextureSampleOffset = 73, + SpvOpTextureSampleProjLod = 74, + SpvOpTextureSampleProjGrad = 75, + SpvOpTextureSampleLodOffset = 76, + SpvOpTextureSampleProjOffset = 77, + SpvOpTextureSampleGradOffset = 78, + SpvOpTextureSampleProjLodOffset = 79, + SpvOpTextureSampleProjGradOffset = 80, + SpvOpTextureFetchTexelLod = 81, + SpvOpTextureFetchTexelOffset = 82, + SpvOpTextureFetchSample = 83, + SpvOpTextureFetchTexel = 84, + SpvOpTextureGather = 85, + SpvOpTextureGatherOffset = 86, + SpvOpTextureGatherOffsets = 87, + SpvOpTextureQuerySizeLod = 88, + SpvOpTextureQuerySize = 89, + SpvOpTextureQueryLod = 90, + SpvOpTextureQueryLevels = 91, + SpvOpTextureQuerySamples = 92, + SpvOpAccessChain = 93, + SpvOpInBoundsAccessChain = 94, + SpvOpSNegate = 95, + SpvOpFNegate = 96, + SpvOpNot = 97, + SpvOpAny = 98, + SpvOpAll = 99, + SpvOpConvertFToU = 100, + SpvOpConvertFToS = 101, + SpvOpConvertSToF = 102, + SpvOpConvertUToF = 103, + SpvOpUConvert = 104, + SpvOpSConvert = 105, + SpvOpFConvert = 106, + SpvOpConvertPtrToU = 107, + SpvOpConvertUToPtr = 108, + SpvOpPtrCastToGeneric = 109, + SpvOpGenericCastToPtr = 110, + SpvOpBitcast = 111, + SpvOpTranspose = 112, + SpvOpIsNan = 113, + SpvOpIsInf = 114, + SpvOpIsFinite = 115, + SpvOpIsNormal = 116, + SpvOpSignBitSet = 117, + SpvOpLessOrGreater = 118, + SpvOpOrdered = 119, + SpvOpUnordered = 120, + SpvOpArrayLength = 121, + SpvOpIAdd = 122, + SpvOpFAdd = 123, + SpvOpISub = 124, + SpvOpFSub = 125, + SpvOpIMul = 126, + SpvOpFMul = 127, + SpvOpUDiv = 128, + SpvOpSDiv = 129, + SpvOpFDiv = 130, + SpvOpUMod = 131, + SpvOpSRem = 132, + SpvOpSMod = 133, + SpvOpFRem = 134, + SpvOpFMod = 135, + SpvOpVectorTimesScalar = 136, + SpvOpMatrixTimesScalar = 137, + SpvOpVectorTimesMatrix = 138, + SpvOpMatrixTimesVector = 139, + SpvOpMatrixTimesMatrix = 140, + SpvOpOuterProduct = 141, + SpvOpDot = 142, + SpvOpShiftRightLogical = 143, + SpvOpShiftRightArithmetic = 144, + SpvOpShiftLeftLogical = 145, + SpvOpLogicalOr = 146, + SpvOpLogicalXor = 147, + SpvOpLogicalAnd = 148, + SpvOpBitwiseOr = 149, + SpvOpBitwiseXor = 150, + SpvOpBitwiseAnd = 151, + SpvOpSelect = 152, + SpvOpIEqual = 153, + SpvOpFOrdEqual = 154, + SpvOpFUnordEqual = 155, + SpvOpINotEqual = 156, + SpvOpFOrdNotEqual = 157, + SpvOpFUnordNotEqual = 158, + SpvOpULessThan = 159, + SpvOpSLessThan = 160, + SpvOpFOrdLessThan = 161, + SpvOpFUnordLessThan = 162, + SpvOpUGreaterThan = 163, + SpvOpSGreaterThan = 164, + SpvOpFOrdGreaterThan = 165, + SpvOpFUnordGreaterThan = 166, + SpvOpULessThanEqual = 167, + SpvOpSLessThanEqual = 168, + SpvOpFOrdLessThanEqual = 169, + SpvOpFUnordLessThanEqual = 170, + SpvOpUGreaterThanEqual = 171, + SpvOpSGreaterThanEqual = 172, + SpvOpFOrdGreaterThanEqual = 173, + SpvOpFUnordGreaterThanEqual = 174, + SpvOpDPdx = 175, + SpvOpDPdy = 176, + SpvOpFwidth = 177, + SpvOpDPdxFine = 178, + SpvOpDPdyFine = 179, + SpvOpFwidthFine = 180, + SpvOpDPdxCoarse = 181, + SpvOpDPdyCoarse = 182, + SpvOpFwidthCoarse = 183, + SpvOpEmitVertex = 184, + SpvOpEndPrimitive = 185, + SpvOpEmitStreamVertex = 186, + SpvOpEndStreamPrimitive = 187, + SpvOpControlBarrier = 188, + SpvOpMemoryBarrier = 189, + SpvOpImagePointer = 190, + SpvOpAtomicInit = 191, + SpvOpAtomicLoad = 192, + SpvOpAtomicStore = 193, + SpvOpAtomicExchange = 194, + SpvOpAtomicCompareExchange = 195, + SpvOpAtomicCompareExchangeWeak = 196, + SpvOpAtomicIIncrement = 197, + SpvOpAtomicIDecrement = 198, + SpvOpAtomicIAdd = 199, + SpvOpAtomicISub = 200, + SpvOpAtomicUMin = 201, + SpvOpAtomicUMax = 202, + SpvOpAtomicAnd = 203, + SpvOpAtomicOr = 204, + SpvOpAtomicXor = 205, + SpvOpLoopMerge = 206, + SpvOpSelectionMerge = 207, + SpvOpLabel = 208, + SpvOpBranch = 209, + SpvOpBranchConditional = 210, + SpvOpSwitch = 211, + SpvOpKill = 212, + SpvOpReturn = 213, + SpvOpReturnValue = 214, + SpvOpUnreachable = 215, + SpvOpLifetimeStart = 216, + SpvOpLifetimeStop = 217, + SpvOpCompileFlag = 218, + SpvOpAsyncGroupCopy = 219, + SpvOpWaitGroupEvents = 220, + SpvOpGroupAll = 221, + SpvOpGroupAny = 222, + SpvOpGroupBroadcast = 223, + SpvOpGroupIAdd = 224, + SpvOpGroupFAdd = 225, + SpvOpGroupFMin = 226, + SpvOpGroupUMin = 227, + SpvOpGroupSMin = 228, + SpvOpGroupFMax = 229, + SpvOpGroupUMax = 230, + SpvOpGroupSMax = 231, + SpvOpGenericCastToPtrExplicit = 232, + SpvOpGenericPtrMemSemantics = 233, + SpvOpReadPipe = 234, + SpvOpWritePipe = 235, + SpvOpReservedReadPipe = 236, + SpvOpReservedWritePipe = 237, + SpvOpReserveReadPipePackets = 238, + SpvOpReserveWritePipePackets = 239, + SpvOpCommitReadPipe = 240, + SpvOpCommitWritePipe = 241, + SpvOpIsValidReserveId = 242, + SpvOpGetNumPipePackets = 243, + SpvOpGetMaxPipePackets = 244, + SpvOpGroupReserveReadPipePackets = 245, + SpvOpGroupReserveWritePipePackets = 246, + SpvOpGroupCommitReadPipe = 247, + SpvOpGroupCommitWritePipe = 248, + SpvOpEnqueueMarker = 249, + SpvOpEnqueueKernel = 250, + SpvOpGetKernelNDrangeSubGroupCount = 251, + SpvOpGetKernelNDrangeMaxSubGroupSize = 252, + SpvOpGetKernelWorkGroupSize = 253, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 254, + SpvOpRetainEvent = 255, + SpvOpReleaseEvent = 256, + SpvOpCreateUserEvent = 257, + SpvOpIsValidEvent = 258, + SpvOpSetUserEventStatus = 259, + SpvOpCaptureEventProfilingInfo = 260, + SpvOpGetDefaultQueue = 261, + SpvOpBuildNDRange = 262, + SpvOpSatConvertSToU = 263, + SpvOpSatConvertUToS = 264, + SpvOpAtomicIMin = 265, + SpvOpAtomicIMax = 266, +} SpvOp; + +#endif // #ifndef __cplusplus + +#endif // #ifndef spirv_H -- cgit v1.2.3 From b20d9f564335134c79c973fc4b6d85225b9d54b0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 28 Apr 2015 17:43:16 -0700 Subject: nir: Add the start of a SPIR-V to NIR translator At the moment, it can handle the very basics of strings and can ignore debug instructions. It also has basic support for decorations. --- src/glsl/Makefile.sources | 2 + src/glsl/nir/nir_spirv.h | 39 ++++ src/glsl/nir/spirv_to_nir.c | 454 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 495 insertions(+) create mode 100644 src/glsl/nir/nir_spirv.h create mode 100644 src/glsl/nir/spirv_to_nir.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 05a12bc0721..c6a89362988 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -64,6 +64,7 @@ NIR_FILES = \ nir/nir_remove_dead_variables.c \ nir/nir_search.c \ nir/nir_search.h \ + nir/nir_spirv.h \ nir/nir_split_var_copies.c \ nir/nir_sweep.c \ nir/nir_to_ssa.c \ @@ -73,6 +74,7 @@ NIR_FILES = \ nir/nir_worklist.c \ nir/nir_worklist.h \ nir/nir_types.cpp \ + nir/spirv_to_nir.c \ $(NIR_GENERATED_FILES) # libglsl diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h new file mode 100644 index 00000000000..5f4140db3d9 --- /dev/null +++ b/src/glsl/nir/nir_spirv.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir.h" + +nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, + const nir_shader_compiler_options *options); + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c new file mode 100644 index 00000000000..3a49ad965ba --- /dev/null +++ b/src/glsl/nir/spirv_to_nir.c @@ -0,0 +1,454 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_spirv.h" +#include "spirv.h" + +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_ssa, + vtn_value_type_deref, +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + nir_ssa_def *ssa; + nir_deref_var *deref; + }; +}; + +struct vtn_decoration { + struct vtn_decoration *next; + const uint32_t *literals; + struct vtn_value *group; + SpvDecoration decoration; +}; + +struct vtn_builder { + nir_shader *shader; + nir_function_impl *impl; + + unsigned value_id_bound; + struct vtn_value *values; +}; + +static void +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type, void *ptr) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + b->values[value_id].ptr = ptr; +} + +static void +vtn_push_token(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + vtn_push_value(b, value_id, value_type, NULL); +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count) +{ + return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); +} + +typedef void (*decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + struct vtn_value *value, + decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, dec->group, cb, data); + } else { + cb(b, base_value, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +static void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, value, cb, data); +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_token(b, w[1], vtn_value_type_undef); + break; + + case SpvOpDecorate: { + struct vtn_value *val = &b->values[w[1]]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->decoration = w[2]; + dec->literals = &w[3]; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupDecorate: { + struct vtn_value *group = &b->values[w[1]]; + assert(group->value_type == vtn_value_type_decoration_group); + + for (unsigned i = 2; i < count; i++) { + struct vtn_value *val = &b->values[w[i]]; + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->group = group; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + case SpvOpGroupMemberDecorate: + assert(!"Bad instruction. Khronos Bug #13513"); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static void +vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpMemberName: + case SpvOpLine: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string, + vtn_string_literal(b, &w[2], count - 2)); + break; + + case SpvOpUndef: + vtn_push_token(b, w[2], vtn_value_type_undef); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeSampler: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpConstantNullPointer: + case SpvOpConstantNullObject: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpVariableArray: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + case SpvOpImagePointer: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + case SpvOpTextureSample: + case SpvOpTextureSampleDref: + case SpvOpTextureSampleLod: + case SpvOpTextureSampleProj: + case SpvOpTextureSampleGrad: + case SpvOpTextureSampleOffset: + case SpvOpTextureSampleProjLod: + case SpvOpTextureSampleProjGrad: + case SpvOpTextureSampleLodOffset: + case SpvOpTextureSampleProjOffset: + case SpvOpTextureSampleGradOffset: + case SpvOpTextureSampleProjLodOffset: + case SpvOpTextureSampleProjGradOffset: + case SpvOpTextureFetchTexelLod: + case SpvOpTextureFetchTexelOffset: + case SpvOpTextureFetchSample: + case SpvOpTextureFetchTexel: + case SpvOpTextureGather: + case SpvOpTextureGatherOffset: + case SpvOpTextureGatherOffsets: + case SpvOpTextureQuerySizeLod: + case SpvOpTextureQuerySize: + case SpvOpTextureQueryLod: + case SpvOpTextureQueryLevels: + case SpvOpTextureQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpTranspose: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + case SpvOpOuterProduct: + case SpvOpDot: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalOr: + case SpvOpLogicalXor: + case SpvOpLogicalAnd: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + vtn_handle_alu(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +nir_shader * +spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] == 99); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + nir_shader *shader = nir_shader_create(NULL, stage, options); + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->shader = shader; + b->value_id_bound = value_id_bound; + b->values = ralloc_array(b, struct vtn_value, value_id_bound); + + /* Start handling instructions */ + const uint32_t *word_end = words + word_count; + while (words < word_end) { + SpvOp opcode = words[0] & SpvOpCodeMask; + unsigned count = words[0] >> SpvWordCountShift; + assert(words + count <= word_end); + + vtn_handle_instruction(b, opcode, words, count); + + words += count; + } + + ralloc_free(b); + + return shader; +} -- cgit v1.2.3 From 2c585a722d61257c29eb9160e51b140afb27a928 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:30:22 -0700 Subject: glsl/compiler: Move the error_no_memory stub to standalone_scaffolding.cpp --- src/glsl/standalone_scaffolding.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index 6033364afc5..6ff9553d6fe 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -35,6 +35,12 @@ #include "util/ralloc.h" #include "util/strtod.h" +extern "C" void +_mesa_error_no_memory(const char *caller) +{ + fprintf(stderr, "Mesa error: out of memory in %s", caller); +} + void _mesa_warning(struct gl_context *ctx, const char *fmt, ...) { -- cgit v1.2.3 From 78eabc615323cf4728fb9ea72aa5bd9799828f04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:29:38 -0700 Subject: REVERT: Add a simple helper program for testing SPIR-V -> NIR translation --- src/glsl/Makefile.am | 12 ++++++++++- src/glsl/nir/spirv2nir.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/glsl/nir/spirv2nir.c (limited to 'src') diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 98dcb37fc74..2ab40506e97 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -75,7 +75,7 @@ check_PROGRAMS = \ tests/sampler-types-test \ tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler +noinst_PROGRAMS = glsl_compiler spirv2nir tests_blob_test_SOURCES = \ tests/blob_test.c @@ -156,6 +156,16 @@ glsl_compiler_LDADD = \ $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) +spirv2nir_SOURCES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + nir/spirv2nir.c + +spirv2nir_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + glsl_test_SOURCES = \ standalone_scaffolding.cpp \ test.cpp \ diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c new file mode 100644 index 00000000000..e06e82595a2 --- /dev/null +++ b/src/glsl/nir/spirv2nir.c @@ -0,0 +1,55 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * A simple executable that opens a SPIR-V shader, converts it to NIR, and + * dumps out the result. This should be useful for testing the + * spirv_to_nir code. + */ + +#include "nir_spirv.h" + +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + int fd = open(argv[1], O_RDONLY); + off_t len = lseek(fd, 0, SEEK_END); + + assert(len % 4 == 0); + size_t word_count = len / 4; + + const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + assert(map != NULL); + + nir_shader *shader = spirv_to_nir(map, MESA_SHADER_FRAGMENT, + word_count, NULL); + nir_print_shader(shader, stderr); +} -- cgit v1.2.3 From ac60aba351c7c1076803b07c6f546ab5b70ac083 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:32:55 -0700 Subject: nir/spirv: Add stub support for extension instructions --- src/glsl/nir/spirv_to_nir.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3a49ad965ba..bc7b98f2e0d 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -64,6 +64,9 @@ struct vtn_builder { unsigned value_id_bound; struct vtn_value *values; + + SpvExecutionModel execution_model; + struct vtn_value *entry_point; }; static void @@ -91,6 +94,21 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); } +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: + /* Do nothing for the moment */ + break; + + case SpvOpExtInst: + default: + unreachable("Unhandled opcode"); + } +} + typedef void (*decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, const struct vtn_decoration *, @@ -216,6 +234,7 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSourceExtension: case SpvOpMemberName: case SpvOpLine: + case SpvOpExtension: /* Unhandled, but these are for debug so that's ok. */ break; @@ -232,6 +251,22 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_push_token(b, w[2], vtn_value_type_undef); break; + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: + assert(b->entry_point == NULL); + b->entry_point = &b->values[w[2]]; + b->execution_model = w[1]; + break; + + case SpvOpExtInstImport: + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: -- cgit v1.2.3 From b79916dacc3d373d3d957a2e723a6e1d27932081 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:34:06 -0700 Subject: nir/spirv: Rework the way values are added Instead of having functions to add values and set various things, we just have a function that does a few asserts and then returns the value. The caller is then responsible for setting the various fields. --- src/glsl/nir/spirv_to_nir.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index bc7b98f2e0d..05193531107 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -69,22 +69,16 @@ struct vtn_builder { struct vtn_value *entry_point; }; -static void +static struct vtn_value * vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type, void *ptr) + enum vtn_value_type value_type) { assert(value_id < b->value_id_bound); assert(b->values[value_id].value_type == vtn_value_type_invalid); b->values[value_id].value_type = value_type; - b->values[value_id].ptr = ptr; -} -static void -vtn_push_token(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - vtn_push_value(b, value_id, value_type, NULL); + return &b->values[value_id]; } static char * @@ -149,7 +143,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, { switch (opcode) { case SpvOpDecorationGroup: - vtn_push_token(b, w[1], vtn_value_type_undef); + vtn_push_value(b, w[1], vtn_value_type_undef); break; case SpvOpDecorate: { @@ -243,12 +237,12 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string, - vtn_string_literal(b, &w[2], count - 2)); + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); break; case SpvOpUndef: - vtn_push_token(b, w[2], vtn_value_type_undef); + vtn_push_value(b, w[2], vtn_value_type_undef); break; case SpvOpMemoryModel: -- cgit v1.2.3 From 1169fcdb0579b1cba1d43854397079ee54022e11 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Feb 2015 16:27:32 -0800 Subject: glsl: Add GLSL_TYPE_FUNCTION to the base types enums --- src/glsl/ast_to_hir.cpp | 1 + src/glsl/glsl_types.cpp | 3 +++ src/glsl/glsl_types.h | 1 + src/glsl/ir_clone.cpp | 1 + src/glsl/link_uniform_initializers.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 + src/mesa/program/ir_to_mesa.cpp | 2 ++ 9 files changed, 12 insertions(+) (limited to 'src') diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 517841c99f8..81b44bd6786 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -1019,6 +1019,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_SUBROUTINE: /* I assume a comparison of a struct containing a sampler just diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 755618ac28b..50409dd50de 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -1029,6 +1029,8 @@ glsl_type::component_slots() const return 1; case GLSL_TYPE_SUBROUTINE: return 1; + + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_VOID: @@ -1402,6 +1404,7 @@ glsl_type::count_attribute_slots() const case GLSL_TYPE_ARRAY: return this->length * this->fields.array->count_attribute_slots(); + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 02a398f6112..3c5345955fa 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -56,6 +56,7 @@ enum glsl_base_type { GLSL_TYPE_IMAGE, GLSL_TYPE_ATOMIC_UINT, GLSL_TYPE_STRUCT, + GLSL_TYPE_FUNCTION, GLSL_TYPE_INTERFACE, GLSL_TYPE_ARRAY, GLSL_TYPE_VOID, diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index a8fac183a8d..4edf70dba5d 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -363,6 +363,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const return c; } + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index 05000fc39ef..f238513f174 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -88,6 +88,7 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_SUBROUTINE: case GLSL_TYPE_ERROR: diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 269914d64a8..39c3eb2d4a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -491,6 +491,7 @@ type_size_scalar(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 445764d3d06..0007e5c07a5 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -459,6 +459,7 @@ brw_type_for_base_type(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ca86e8b6d56..499f6288aee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -643,6 +643,7 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 8f58f3edf98..b8b082e2a59 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -543,6 +543,7 @@ type_size(const struct glsl_type *type) case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Invalid type in type_size"); break; } @@ -2463,6 +2464,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, case GLSL_TYPE_STRUCT: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Should not get here."); break; } -- cgit v1.2.3 From 0db3e4dd7269c9eee48b165ce3560c042e73e8c0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Feb 2015 16:29:33 -0800 Subject: glsl/types: Add support for function types --- src/glsl/glsl_types.cpp | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ src/glsl/glsl_types.h | 23 ++++++++++- 2 files changed, 124 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 50409dd50de..c737fb6e366 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -32,6 +32,7 @@ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; hash_table *glsl_type::array_types = NULL; hash_table *glsl_type::record_types = NULL; hash_table *glsl_type::interface_types = NULL; +hash_table *glsl_type::function_types = NULL; hash_table *glsl_type::subroutine_types = NULL; void *glsl_type::mem_ctx = NULL; @@ -162,6 +163,39 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, mtx_unlock(&glsl_type::mutex); } +glsl_type::glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) : + gl_type(0), + base_type(GLSL_TYPE_FUNCTION), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_params) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + + this->fields.parameters = rzalloc_array(this->mem_ctx, + glsl_function_param, num_params + 1); + + /* We store the return type as the first parameter */ + this->fields.parameters[0].type = return_type; + this->fields.parameters[0].in = false; + this->fields.parameters[0].out = true; + + /* We store the i'th parameter in slot i+1 */ + for (i = 0; i < length; i++) { + this->fields.parameters[i + 1].type = params[i].type; + this->fields.parameters[i + 1].in = params[i].in; + this->fields.parameters[i + 1].out = params[i].out; + } + + mtx_unlock(&glsl_type::mutex); +} + glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), base_type(GLSL_TYPE_SUBROUTINE), @@ -900,6 +934,74 @@ glsl_type::get_subroutine_instance(const char *subroutine_name) } +static bool +function_key_compare(const void *a, const void *b) +{ + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + if (key1->length != key2->length) + return 1; + + return memcmp(key1->fields.parameters, key2->fields.parameters, + (key1->length + 1) * sizeof(*key1->fields.parameters)); +} + + +static uint32_t +function_key_hash(const void *a) +{ + const glsl_type *const key = (glsl_type *) a; + char hash_key[128]; + unsigned size = 0; + + size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); + + for (unsigned i = 0; i < key->length; i++) { + if (size >= sizeof(hash_key)) + break; + + size += snprintf(& hash_key[size], sizeof(hash_key) - size, + "%p", (void *) key->fields.structure[i].type); + } + + return _mesa_hash_string(hash_key); +} + +const glsl_type * +glsl_type::get_function_instance(const glsl_type *return_type, + const glsl_function_param *params, + unsigned num_params) +{ + const glsl_type key(return_type, params, num_params); + + mtx_lock(&glsl_type::mutex); + + if (function_types == NULL) { + function_types = _mesa_hash_table_create(NULL, function_key_hash, + function_key_compare); + } + + struct hash_entry *entry = _mesa_hash_table_search(function_types, &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(return_type, params, num_params); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(function_types, t, (void *) t); + } + + const glsl_type *t = (const glsl_type *)entry->data; + + assert(t->base_type == GLSL_TYPE_FUNCTION); + assert(t->length == num_params); + + mtx_unlock(&glsl_type::mutex); + + return t; +} + + const glsl_type * glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) { diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 3c5345955fa..daa809e3bae 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -180,7 +180,7 @@ struct glsl_type { */ union { const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ + struct glsl_function_param *parameters; /**< Parameters to function. */ struct glsl_struct_field *structure; /**< List of struct fields. */ } fields; @@ -270,6 +270,13 @@ struct glsl_type { */ static const glsl_type *get_subroutine_instance(const char *subroutine_name); + /** + * Get the instance of a function type + */ + static const glsl_type *get_function_instance(const struct glsl_type *return_type, + const glsl_function_param *parameters, + unsigned num_params); + /** * Get the type resulting from a multiplication of \p type_a * \p type_b */ @@ -690,6 +697,10 @@ private: glsl_type(const glsl_struct_field *fields, unsigned num_fields, enum glsl_interface_packing packing, const char *name); + /** Constructor for interface types */ + glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params); + /** Constructor for array types */ glsl_type(const glsl_type *array, unsigned length); @@ -708,6 +719,9 @@ private: /** Hash table containing the known subroutine types. */ static struct hash_table *subroutine_types; + /** Hash table containing the known function types. */ + static struct hash_table *function_types; + static bool record_key_compare(const void *a, const void *b); static unsigned record_key_hash(const void *key); @@ -797,6 +811,13 @@ struct glsl_struct_field { } }; +struct glsl_function_param { + const struct glsl_type *type; + + bool in; + bool out; +}; + static inline unsigned int glsl_align(unsigned int a, unsigned int align) { -- cgit v1.2.3 From 53bff3e445b7c9697d7cfbbcd2207b21bbc6e08a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:48:12 -0700 Subject: glsl/types: Expose the function_param and struct_field structs to C Previously, they were hidden behind a #ifdef __cplusplus so C wouldn't find them. This commit simpliy moves the #ifdef and adds #ifdef's around constructors. --- src/glsl/glsl_types.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index daa809e3bae..0a8a0b97dc9 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -749,6 +749,10 @@ private: /*@}*/ }; +#undef DECL_TYPE +#undef STRUCT_TYPE +#endif /* __cplusplus */ + struct glsl_struct_field { const struct glsl_type *type; const char *name; @@ -797,6 +801,7 @@ struct glsl_struct_field { */ int stream; +#ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), @@ -809,6 +814,7 @@ struct glsl_struct_field { { /* empty */ } +#endif }; struct glsl_function_param { @@ -824,8 +830,4 @@ glsl_align(unsigned int a, unsigned int align) return (a + align - 1) / align * align; } -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - #endif /* GLSL_TYPES_H */ -- cgit v1.2.3 From 5bb94c9b120cfd93ed362c9fa77ea274376b1d3b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:28:37 -0700 Subject: nir/types: Add more helpers for creating types --- src/glsl/nir/nir_types.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++-- src/glsl/nir/nir_types.h | 14 +++++++++++++- 2 files changed, 55 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 940c676005a..937a842d98e 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -149,9 +149,9 @@ glsl_float_type(void) } const glsl_type * -glsl_vec4_type(void) +glsl_int_type(void) { - return glsl_type::vec4_type; + return glsl_type::int_type; } const glsl_type * @@ -160,8 +160,48 @@ glsl_uint_type(void) return glsl_type::uint_type; } +const glsl_type * +glsl_bool_type(void) +{ + return glsl_type::bool_type; +} + +const glsl_type * +glsl_vec4_type(void) +{ + return glsl_type::vec4_type; +} + +const glsl_type * +glsl_vector_type(enum glsl_base_type base_type, unsigned components) +{ + assert(components > 1 && components <= 4); + return glsl_type::get_instance(base_type, components, 1); +} + +const glsl_type * +glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) +{ + assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4); + return glsl_type::get_instance(base_type, rows, columns); +} + const glsl_type * glsl_array_type(const glsl_type *base, unsigned elements) { return glsl_type::get_array_instance(base, elements); } + +const glsl_type * +glsl_struct_type(const glsl_struct_field *fields, + unsigned num_fields, const char *name) +{ + return glsl_type::get_record_instance(fields, num_fields, name); +} + +const glsl_type * +glsl_function_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) +{ + return glsl_type::get_function_instance(return_type, params, num_params); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index a8ff8f2c606..aad43f7a8c0 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -70,10 +70,22 @@ bool glsl_type_is_matrix(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); -const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_int_type(void); const struct glsl_type *glsl_uint_type(void); +const struct glsl_type *glsl_bool_type(void); + +const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, + unsigned components); +const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, + unsigned rows, unsigned columns); const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); +const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, + unsigned num_fields, const char *name); +const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, + const struct glsl_function_param *params, + unsigned num_params); #ifdef __cplusplus } -- cgit v1.2.3 From 2a023f30a644821550e0d529078a05af12188fcb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 14:36:01 -0700 Subject: nir/spirv: Add basic support for types --- src/glsl/nir/spirv_to_nir.c | 89 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 05193531107..6c9b776922a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -26,6 +26,7 @@ */ #include "nir_spirv.h" +#include "nir_vla.h" #include "spirv.h" struct vtn_decoration; @@ -35,6 +36,7 @@ enum vtn_value_type { vtn_value_type_undef, vtn_value_type_string, vtn_value_type_decoration_group, + vtn_value_type_type, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -46,6 +48,7 @@ struct vtn_value { union { void *ptr; char *str; + const struct glsl_type *type; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -184,11 +187,88 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } } -static void +static const struct glsl_type * vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) + const uint32_t *args, unsigned count) { - unreachable("Unhandled opcode"); + switch (opcode) { + case SpvOpTypeVoid: + return glsl_void_type(); + case SpvOpTypeBool: + return glsl_bool_type(); + case SpvOpTypeInt: + return glsl_int_type(); + case SpvOpTypeFloat: + return glsl_float_type(); + + case SpvOpTypeVector: { + const struct glsl_type *base = b->values[args[0]].type; + unsigned elems = args[1]; + + assert(glsl_type_is_scalar(base)); + return glsl_vector_type(glsl_get_base_type(base), elems); + } + + case SpvOpTypeMatrix: { + const struct glsl_type *base = b->values[args[0]].type; + unsigned columns = args[1]; + + assert(glsl_type_is_vector(base)); + return glsl_matrix_type(glsl_get_base_type(base), + glsl_get_vector_elements(base), + columns); + } + + case SpvOpTypeArray: + return glsl_array_type(b->values[args[0]].type, args[1]); + + case SpvOpTypeStruct: { + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < count; i++) { + /* TODO: Handle decorators */ + fields[i].type = b->values[args[i]].type; + fields[i].name = ralloc_asprintf(b, "field%d", i); + fields[i].location = -1; + fields[i].interpolation = 0; + fields[i].centroid = 0; + fields[i].sample = 0; + fields[i].matrix_layout = 2; + fields[i].stream = -1; + } + return glsl_struct_type(fields, count, "struct"); + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = b->values[args[0]].type; + NIR_VLA(struct glsl_function_param, params, count - 1); + for (unsigned i = 1; i < count; i++) { + params[i - 1].type = b->values[args[i]].type; + + /* FIXME: */ + params[i - 1].in = true; + params[i - 1].out = true; + } + return glsl_function_type(return_type, params, count - 1); + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + return b->values[args[0]].type; + + case SpvOpTypeSampler: + case SpvOpTypeRuntimeArray: + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } } static void @@ -279,7 +359,8 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeReserveId: case SpvOpTypeQueue: case SpvOpTypePipe: - vtn_handle_type(b, opcode, w, count); + vtn_push_value(b, w[1], vtn_value_type_type)->type = + vtn_handle_type(b, opcode, &w[2], count - 2); break; case SpvOpConstantTrue: -- cgit v1.2.3 From 24940556313ccb21c79fdbe6a0cdd061f49ba4c8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 18:14:11 -0700 Subject: nir/spirv: Add support for constants --- src/glsl/nir/spirv_to_nir.c | 68 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 6c9b776922a..f22ec77b68e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -37,6 +37,7 @@ enum vtn_value_type { vtn_value_type_string, vtn_value_type_decoration_group, vtn_value_type_type, + vtn_value_type_constant, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -49,6 +50,7 @@ struct vtn_value { void *ptr; char *str; const struct glsl_type *type; + nir_constant *constant; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -84,6 +86,15 @@ vtn_push_value(struct vtn_builder *b, uint32_t value_id, return &b->values[value_id]; } +static struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == value_type); + return &b->values[value_id]; +} + static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) @@ -275,7 +286,62 @@ static void vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_constant *constant = ralloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(type == glsl_bool_type()); + constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(type == glsl_bool_type()); + constant->value.u[0] = NIR_FALSE; + break; + case SpvOpConstant: + assert(glsl_type_is_scalar(type)); + constant->value.u[0] = w[3]; + break; + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(type)) { + unsigned rows = glsl_get_vector_elements(type); + assert(glsl_get_matrix_columns(type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(type)); + assert(glsl_get_vector_elements(type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + default: + unreachable("Unhandled opcode"); + } + vtn_push_value(b, w[2], vtn_value_type_constant)->constant = constant; } static void -- cgit v1.2.3 From 3a266a18ae5df27f78c442628a84ba2ab11dfb9d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 19:37:41 -0700 Subject: nir/spirv: Add support for declaring variables Deref chains and variable load/store operations are still missing. --- src/glsl/nir/spirv_to_nir.c | 152 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index f22ec77b68e..ab8b4465324 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -38,6 +38,7 @@ enum vtn_value_type { vtn_value_type_decoration_group, vtn_value_type_type, vtn_value_type_constant, + vtn_value_type_variable, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -51,6 +52,7 @@ struct vtn_value { char *str; const struct glsl_type *type; nir_constant *constant; + nir_variable *var; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -344,11 +346,159 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, vtn_push_value(b, w[2], vtn_value_type_constant)->constant = constant; } +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, + const struct vtn_decoration *dec, void *unused) +{ + assert(val->value_type == vtn_value_type_variable); + nir_variable *var = val->var; + switch (dec->decoration) { + case SpvDecorationPrecisionLow: + case SpvDecorationPrecisionMedium: + case SpvDecorationPrecisionHigh: + break; /* FIXME: Do nothing with these for now. */ + case SpvDecorationSmooth: + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoperspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonwritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.explicit_location = true; + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationBlock: + case SpvDecorationBufferBlock: + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationGLSLStd140: + case SpvDecorationGLSLStd430: + case SpvDecorationGLSLPacked: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonreadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationNoStaticUse: + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationDescriptorSet: + case SpvDecorationOffset: + case SpvDecorationAlignment: + case SpvDecorationXfbBuffer: + case SpvDecorationStride: + case SpvDecorationBuiltIn: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + default: + unreachable("Unhandled variable decoration"); + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + switch (opcode) { + case SpvOpVariable: { + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_variable); + + nir_variable *var = ralloc(b->shader, nir_variable); + val->var = var; + + var->type = type; + var->name = ralloc_strdup(var, val->name); + + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniformConstant: + var->data.mode = nir_var_uniform; + var->data.read_only = true; + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivateGlobal: + var->data.mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + break; + case SpvStorageClassUniform: + case SpvStorageClassWorkgroupLocal: + case SpvStorageClassWorkgroupGlobal: + case SpvStorageClassGeneric: + case SpvStorageClassPrivate: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + var->constant_initializer = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + } + + vtn_foreach_decoration(b, val, var_decoration_cb, NULL); + break; + } + + case SpvOpVariableArray: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + case SpvOpImagePointer: + default: + unreachable("Unhandled opcode"); + } } static void -- cgit v1.2.3 From ac4d459aa2e018d8a4e1ff6b739dc6f93e5730f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:09:36 -0700 Subject: nir/types: Add accessors for function parameter/return types --- src/glsl/nir/nir_types.cpp | 12 ++++++++++++ src/glsl/nir/nir_types.h | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 937a842d98e..f2894d40c78 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -70,6 +70,18 @@ glsl_get_struct_field(const glsl_type *type, unsigned index) return type->fields.structure[index].type; } +const glsl_type * +glsl_get_function_return_type(const glsl_type *type) +{ + return type->fields.parameters[0].type; +} + +const glsl_function_param * +glsl_get_function_param(const glsl_type *type, unsigned index) +{ + return &type->fields.parameters[index + 1]; +} + const struct glsl_type * glsl_get_column_type(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index aad43f7a8c0..dd535770c9f 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -49,6 +49,12 @@ const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); const struct glsl_type *glsl_get_column_type(const struct glsl_type *type); +const struct glsl_type * +glsl_get_function_return_type(const struct glsl_type *type); + +const struct glsl_function_param * +glsl_get_function_param(const struct glsl_type *type, unsigned index); + enum glsl_base_type glsl_get_base_type(const struct glsl_type *type); unsigned glsl_get_vector_elements(const struct glsl_type *type); -- cgit v1.2.3 From 399e962d2553814c947f25773c07a5b2944fdb9b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:10:20 -0700 Subject: nir/spirv: Add support for declaring functions --- src/glsl/nir/spirv_to_nir.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ab8b4465324..67db9a99e9e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -39,6 +39,7 @@ enum vtn_value_type { vtn_value_type_type, vtn_value_type_constant, vtn_value_type_variable, + vtn_value_type_function, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -53,6 +54,7 @@ struct vtn_value { const struct glsl_type *type; nir_constant *constant; nir_variable *var; + nir_function_impl *impl; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -68,6 +70,7 @@ struct vtn_decoration { struct vtn_builder { nir_shader *shader; nir_function_impl *impl; + struct exec_list *cf_list; unsigned value_id_bound; struct vtn_value *values; @@ -501,6 +504,63 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } } +static void +vtn_handle_functions(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->impl == NULL); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + val->impl = b->impl = nir_function_impl_create(overload); + b->cf_list = &b->impl->body; + + break; + } + case SpvOpFunctionEnd: + b->impl = NULL; + break; + case SpvOpFunctionParameter: + case SpvOpFunctionCall: + default: + unreachable("Unhandled opcode"); + } +} + static void vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -614,6 +674,13 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_decoration(b, opcode, w, count); break; + case SpvOpFunction: + case SpvOpFunctionEnd: + case SpvOpFunctionParameter: + case SpvOpFunctionCall: + vtn_handle_functions(b, opcode, w, count); + break; + case SpvOpTextureSample: case SpvOpTextureSampleDref: case SpvOpTextureSampleLod: -- cgit v1.2.3 From befecb3c553fd7418ff9bc8a85e685b84621cb58 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:19:34 -0700 Subject: nir/spirv: Add support for OpLabel --- src/glsl/nir/spirv_to_nir.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 67db9a99e9e..5b171ece8bf 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -40,6 +40,7 @@ enum vtn_value_type { vtn_value_type_constant, vtn_value_type_variable, vtn_value_type_function, + vtn_value_type_block, vtn_value_type_ssa, vtn_value_type_deref, }; @@ -55,6 +56,7 @@ struct vtn_value { nir_constant *constant; nir_variable *var; nir_function_impl *impl; + nir_block *block; nir_ssa_def *ssa; nir_deref_var *deref; }; @@ -612,6 +614,17 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, b->execution_model = w[1]; break; + case SpvOpLabel: { + struct exec_node *list_tail = exec_list_get_tail(b->cf_list); + nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); + assert(tail_node->type == nir_cf_node_block); + nir_block *block = nir_cf_node_as_block(tail_node); + + assert(exec_list_is_empty(&block->instr_list)); + vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + break; + } + case SpvOpExtInstImport: case SpvOpExtInst: vtn_handle_extension(b, opcode, w, count); -- cgit v1.2.3 From 366366c7f7fe24d9bc2a7f3d7464c49ccdb73813 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:56:17 -0700 Subject: nir/types: Add a scalar type constructor --- src/glsl/nir/nir_types.cpp | 6 ++++++ src/glsl/nir/nir_types.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index f2894d40c78..f93a52b5fa5 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -184,6 +184,12 @@ glsl_vec4_type(void) return glsl_type::vec4_type; } +const glsl_type * +glsl_scalar_type(enum glsl_base_type base_type) +{ + return glsl_type::get_instance(base_type, 1, 1); +} + const glsl_type * glsl_vector_type(enum glsl_base_type base_type, unsigned components) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index dd535770c9f..40a80ec7130 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -81,6 +81,7 @@ const struct glsl_type *glsl_uint_type(void); const struct glsl_type *glsl_bool_type(void); const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type); const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, unsigned components); const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, -- cgit v1.2.3 From 5e6c5e3c8e7a68452d8f2afc27472337a2457114 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 Apr 2015 20:56:36 -0700 Subject: nir/spirv: Add support for deref chains --- src/glsl/nir/spirv_to_nir.c | 86 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 5b171ece8bf..a7ce17a77d5 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -38,11 +38,10 @@ enum vtn_value_type { vtn_value_type_decoration_group, vtn_value_type_type, vtn_value_type_constant, - vtn_value_type_variable, + vtn_value_type_deref, vtn_value_type_function, vtn_value_type_block, vtn_value_type_ssa, - vtn_value_type_deref, }; struct vtn_value { @@ -54,11 +53,10 @@ struct vtn_value { char *str; const struct glsl_type *type; nir_constant *constant; - nir_variable *var; + nir_deref_var *deref; nir_function_impl *impl; nir_block *block; nir_ssa_def *ssa; - nir_deref_var *deref; }; }; @@ -353,10 +351,13 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, static void var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, - const struct vtn_decoration *dec, void *unused) + const struct vtn_decoration *dec, void *void_var) { - assert(val->value_type == vtn_value_type_variable); - nir_variable *var = val->var; + assert(val->value_type == vtn_value_type_deref); + assert(val->deref->deref.child == NULL); + assert(val->deref->var == void_var); + + nir_variable *var = void_var; switch (dec->decoration) { case SpvDecorationPrecisionLow: case SpvDecorationPrecisionMedium: @@ -446,10 +447,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpVariable: { const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_variable); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_variable *var = ralloc(b->shader, nir_variable); - val->var = var; var->type = type; var->name = ralloc_strdup(var, val->name); @@ -488,7 +488,71 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_constant)->constant; } - vtn_foreach_decoration(b, val, var_decoration_cb, NULL); + val->deref = nir_deref_var_create(b->shader, var); + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + + nir_deref *tail = &val->deref->deref; + while (tail->child) + tail = tail->child; + + for (unsigned i = 0; i < count - 3; i++) { + assert(w[i + 3] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 3]]; + + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY) { + deref_arr->deref.type = glsl_get_array_element(tail->type); + } else if (glsl_type_is_matrix(tail->type)) { + deref_arr->deref.type = glsl_get_column_type(tail->type); + } else { + assert(glsl_type_is_vector(tail->type)); + deref_arr->deref.type = glsl_scalar_type(base_type); + } + + if (idx_val->value_type == vtn_value_type_constant) { + unsigned idx = idx_val->constant->value.u[0]; + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + /* TODO */ + unreachable("Indirect array accesses not implemented"); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = glsl_get_struct_field(tail->type, idx); + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + tail = tail->child; + } break; } @@ -497,8 +561,6 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpStore: case SpvOpCopyMemory: case SpvOpCopyMemorySized: - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: case SpvOpArrayLength: case SpvOpImagePointer: default: -- cgit v1.2.3 From 67af6c59f22cf5a223e539faa4215e7722bda75c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:26:40 -0700 Subject: nir/types: Add an is_vector_or_scalar helper --- src/glsl/nir/nir_types.cpp | 6 ++++++ src/glsl/nir/nir_types.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index f93a52b5fa5..a6d35fe6179 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -142,6 +142,12 @@ glsl_type_is_scalar(const struct glsl_type *type) return type->is_scalar(); } +bool +glsl_type_is_vector_or_scalar(const struct glsl_type *type) +{ + return type->is_vector() || type->is_scalar(); +} + bool glsl_type_is_matrix(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 40a80ec7130..f19f0e5db5d 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -72,6 +72,7 @@ const char *glsl_get_struct_elem_name(const struct glsl_type *type, bool glsl_type_is_void(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); +bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); -- cgit v1.2.3 From e709a4ebb8d3ebcdd101899223dd239b647a540b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:27:21 -0700 Subject: nir/spirv: Use vtn_value in the types code and fix a off-by-one error --- src/glsl/nir/spirv_to_nir.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a7ce17a77d5..abcdd66a4f7 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -218,7 +218,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, return glsl_float_type(); case SpvOpTypeVector: { - const struct glsl_type *base = b->values[args[0]].type; + const struct glsl_type *base = + vtn_value(b, args[0], vtn_value_type_type)->type; unsigned elems = args[1]; assert(glsl_type_is_scalar(base)); @@ -226,7 +227,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, } case SpvOpTypeMatrix: { - const struct glsl_type *base = b->values[args[0]].type; + const struct glsl_type *base = + vtn_value(b, args[0], vtn_value_type_type)->type; unsigned columns = args[1]; assert(glsl_type_is_vector(base)); @@ -242,7 +244,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, NIR_VLA(struct glsl_struct_field, fields, count); for (unsigned i = 0; i < count; i++) { /* TODO: Handle decorators */ - fields[i].type = b->values[args[i]].type; + fields[i].type = vtn_value(b, args[i], vtn_value_type_type)->type; fields[i].name = ralloc_asprintf(b, "field%d", i); fields[i].location = -1; fields[i].interpolation = 0; @@ -258,7 +260,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const struct glsl_type *return_type = b->values[args[0]].type; NIR_VLA(struct glsl_function_param, params, count - 1); for (unsigned i = 1; i < count; i++) { - params[i - 1].type = b->values[args[i]].type; + params[i - 1].type = vtn_value(b, args[i], vtn_value_type_type)->type; /* FIXME: */ params[i - 1].in = true; @@ -272,7 +274,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, * the same type. The validator should ensure that the proper number * of dereferences happen */ - return b->values[args[0]].type; + return vtn_value(b, args[1], vtn_value_type_type)->type; case SpvOpTypeSampler: case SpvOpTypeRuntimeArray: -- cgit v1.2.3 From 4fa1366392ef3c9779ffe265bd6c39bd483249de Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:27:44 -0700 Subject: nir/spirv: Add a vtn_untyped_value helper --- src/glsl/nir/spirv_to_nir.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index abcdd66a4f7..a71119fe524 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -92,14 +92,21 @@ vtn_push_value(struct vtn_builder *b, uint32_t value_id, } static struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) { assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == value_type); return &b->values[value_id]; } +static struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) -- cgit v1.2.3 From 112c607216ae53ad19380970988309a96d5a1fd8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:28:01 -0700 Subject: nir/spirv: Actaully add variables to the funciton or shader --- src/glsl/nir/spirv_to_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a71119fe524..61514c1d469 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -497,6 +497,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_constant)->constant; } + if (var->data.mode == nir_var_local) { + exec_list_push_tail(&b->impl->locals, &var->node); + } else { + exec_list_push_tail(&b->shader->globals, &var->node); + } + val->deref = nir_deref_var_create(b->shader, var); vtn_foreach_decoration(b, val, var_decoration_cb, var); -- cgit v1.2.3 From 7d64741a5e68c238e49486730b2aa3a32fefa49f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 10:19:24 -0700 Subject: nir: Add a helper for getting the tail of a deref chain --- src/glsl/nir/nir.h | 9 +++++++++ src/glsl/nir/nir_lower_var_copies.c | 15 ++------------- src/glsl/nir/nir_split_var_copies.c | 12 ++---------- 3 files changed, 13 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 9703372fcc0..fc8f27a7fe5 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -777,6 +777,15 @@ NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) +/** Returns the tail of a deref chain */ +static inline nir_deref * +nir_deref_tail(nir_deref *deref) +{ + while (deref->child) + deref = deref->child; + return deref; +} + typedef struct { nir_instr instr; diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c index 21672901f04..98c107aa50e 100644 --- a/src/glsl/nir/nir_lower_var_copies.c +++ b/src/glsl/nir/nir_lower_var_copies.c @@ -53,17 +53,6 @@ deref_next_wildcard_parent(nir_deref *deref) return NULL; } -/* Returns the last deref in the chain. - */ -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child) - deref = deref->child; - - return deref; -} - /* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. @@ -121,8 +110,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, } else { /* In this case, we have no wildcards anymore, so all we have to do * is just emit the load and store operations. */ - src_tail = get_deref_tail(src_tail); - dest_tail = get_deref_tail(dest_tail); + src_tail = nir_deref_tail(src_tail); + dest_tail = nir_deref_tail(dest_tail); assert(src_tail->type == dest_tail->type); diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c index fc72c078c77..5c163b59819 100644 --- a/src/glsl/nir/nir_split_var_copies.c +++ b/src/glsl/nir/nir_split_var_copies.c @@ -66,14 +66,6 @@ struct split_var_copies_state { void *dead_ctx; }; -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child != NULL) - deref = deref->child; - return deref; -} - /* Recursively constructs deref chains to split a copy instruction into * multiple (if needed) copy instructions with full-length deref chains. * External callers of this function should pass the tail and head of the @@ -225,8 +217,8 @@ split_var_copies_block(nir_block *block, void *void_state) nir_deref *dest_head = &intrinsic->variables[0]->deref; nir_deref *src_head = &intrinsic->variables[1]->deref; - nir_deref *dest_tail = get_deref_tail(dest_head); - nir_deref *src_tail = get_deref_tail(src_head); + nir_deref *dest_tail = nir_deref_tail(dest_head); + nir_deref *src_tail = nir_deref_tail(src_head); switch (glsl_get_base_type(src_tail->type)) { case GLSL_TYPE_ARRAY: -- cgit v1.2.3 From 7bf4b53f1ccb29c2beca24be19fcf76436cad0d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 11:28:18 -0700 Subject: nir/spirv: Implement load/store instructiosn --- src/glsl/nir/spirv_to_nir.c | 72 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 61514c1d469..b9d35766019 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -519,9 +519,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, while (tail->child) tail = tail->child; - for (unsigned i = 0; i < count - 3; i++) { - assert(w[i + 3] < b->value_id_bound); - struct vtn_value *idx_val = &b->values[w[i + 3]]; + for (unsigned i = 0; i < count - 4; i++) { + assert(w[i + 4] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 4]]; enum glsl_base_type base_type = glsl_get_base_type(tail->type); switch (base_type) { @@ -571,10 +571,70 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpCopyMemory: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + break; + } + + case SpvOpLoad: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; + const struct glsl_type *src_type = nir_deref_tail(&src->deref)->type; + assert(glsl_type_is_vector_or_scalar(src_type)); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref)); + load->num_components = glsl_get_vector_elements(src_type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, + val->name); + + nir_instr_insert_after_cf_list(b->cf_list, &load->instr); + val->ssa = &load->dest.ssa; + break; + } + + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type; + struct vtn_value *src_val = vtn_untyped_value(b, w[2]); + if (src_val->value_type == vtn_value_type_ssa) { + assert(glsl_type_is_vector_or_scalar(dest_type)); + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->src[0] = nir_src_for_ssa(src_val->ssa); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); + store->num_components = glsl_get_vector_elements(dest_type); + + nir_instr_insert_after_cf_list(b->cf_list, &store->instr); + } else { + assert(src_val->value_type == vtn_value_type_constant); + + nir_variable *const_tmp = rzalloc(b->shader, nir_variable); + const_tmp->type = dest_type; + const_tmp->data.mode = nir_var_local; + const_tmp->data.read_only = true; + exec_list_push_tail(&b->impl->locals, &const_tmp->node); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_var_create(copy, const_tmp); + + nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + } + break; + } + case SpvOpVariableArray: - case SpvOpLoad: - case SpvOpStore: - case SpvOpCopyMemory: case SpvOpCopyMemorySized: case SpvOpArrayLength: case SpvOpImagePointer: -- cgit v1.2.3 From f36fabb7367e6d154c4ddb010206d17447eb1a2a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 May 2015 14:00:57 -0700 Subject: nir/spirv: Split instruction handling into preamble and body sections --- src/glsl/nir/spirv_to_nir.c | 138 +++++++++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index b9d35766019..ef42b14c41e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -114,6 +114,28 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); } +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, SpvOp, + const uint32_t *, unsigned); + +static const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + if (!handler(b, opcode, w, count)) + return w; + + w += count; + } + assert(w == end); + return w; +} + static void vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -714,32 +736,19 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } -static void -vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpSource: case SpvOpSourceExtension: - case SpvOpMemberName: - case SpvOpLine: + case SpvOpCompileFlag: case SpvOpExtension: + case SpvOpExtInstImport: /* Unhandled, but these are for debug so that's ok. */ break; - case SpvOpName: - b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); - break; - - case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string)->str = - vtn_string_literal(b, &w[2], count - 2); - break; - - case SpvOpUndef: - vtn_push_value(b, w[2], vtn_value_type_undef); - break; - case SpvOpMemoryModel: assert(w[1] == SpvAddressingModelLogical); assert(w[2] == SpvMemoryModelGLSL450); @@ -751,20 +760,32 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, b->execution_model = w[1]; break; - case SpvOpLabel: { - struct exec_node *list_tail = exec_list_get_tail(b->cf_list); - nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); - assert(tail_node->type == nir_cf_node_block); - nir_block *block = nir_cf_node_as_block(tail_node); + case SpvOpExecutionMode: + unreachable("Execution modes not yet implemented"); + break; - assert(exec_list_is_empty(&block->instr_list)); - vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); break; - } - case SpvOpExtInstImport: - case SpvOpExtInst: - vtn_handle_extension(b, opcode, w, count); + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpLine: + break; /* Ignored for now */ + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); break; case SpvOpTypeVoid: @@ -803,6 +824,41 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_constant(b, opcode, w, count); break; + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: { + struct exec_node *list_tail = exec_list_get_tail(b->cf_list); + nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); + assert(tail_node->type == nir_cf_node_block); + nir_block *block = nir_cf_node_as_block(tail_node); + + assert(exec_list_is_empty(&block->instr_list)); + vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + break; + } + + case SpvOpUndef: + vtn_push_value(b, w[2], vtn_value_type_undef); + break; + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + case SpvOpVariable: case SpvOpVariableArray: case SpvOpLoad: @@ -816,14 +872,6 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_variables(b, opcode, w, count); break; - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - vtn_handle_decoration(b, opcode, w, count); - break; - case SpvOpFunction: case SpvOpFunctionEnd: case SpvOpFunctionParameter: @@ -953,6 +1001,8 @@ vtn_handle_instruction(struct vtn_builder *b, SpvOp opcode, default: unreachable("Unhandled opcode"); } + + return true; } nir_shader * @@ -979,17 +1029,15 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->value_id_bound = value_id_bound; b->values = ralloc_array(b, struct vtn_value, value_id_bound); - /* Start handling instructions */ const uint32_t *word_end = words + word_count; - while (words < word_end) { - SpvOp opcode = words[0] & SpvOpCodeMask; - unsigned count = words[0] >> SpvWordCountShift; - assert(words + count <= word_end); - vtn_handle_instruction(b, opcode, words, count); + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); - words += count; - } + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_body_instruction); + assert(words == word_end); ralloc_free(b); -- cgit v1.2.3 From d216dcee94d5adf72b8d7dee794ff7b3f1cd499f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 10:22:52 -0700 Subject: nir/spirv: Add a helper for getting a value as an SSA value --- src/glsl/nir/spirv_to_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ef42b14c41e..840b4c6fc65 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -107,6 +107,12 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, return val; } +static nir_ssa_def * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + return vtn_value(b, value_id, vtn_value_type_ssa)->ssa; +} + static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) -- cgit v1.2.3 From b7904b828115ca2e00dfdd814c9e83f2fc3df3ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 10:23:09 -0700 Subject: nir/spirv: Handle OpBranchConditional We do control-flow handling as a two-step process. The first step is to walk the instructions list and record various information about blocks and functions. This is where the acutal nir_function_overload objects get created. We also record the start/stop instruction for each block. Then a second pass walks over each of the functions and over the blocks in each function in a way that's NIR-friendly and actually parses the instructions. --- src/glsl/nir/spirv_to_nir.c | 257 ++++++++++++++++++++++++++++++++------------ 1 file changed, 189 insertions(+), 68 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 840b4c6fc65..0bbae8ee874 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -44,6 +44,19 @@ enum vtn_value_type { vtn_value_type_ssa, }; +struct vtn_block { + const uint32_t *label; + const uint32_t *branch; + nir_block *block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_overload *overload; + struct vtn_block *start_block; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -54,8 +67,8 @@ struct vtn_value { const struct glsl_type *type; nir_constant *constant; nir_deref_var *deref; - nir_function_impl *impl; - nir_block *block; + struct vtn_function *func; + struct vtn_block *block; nir_ssa_def *ssa; }; }; @@ -71,12 +84,17 @@ struct vtn_builder { nir_shader *shader; nir_function_impl *impl; struct exec_list *cf_list; + struct vtn_block *block; + struct vtn_block *merge_block; unsigned value_id_bound; struct vtn_value *values; SpvExecutionModel execution_model; struct vtn_value *entry_point; + + struct vtn_function *func; + struct exec_list functions; }; static struct vtn_value * @@ -672,60 +690,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } static void -vtn_handle_functions(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) { - switch (opcode) { - case SpvOpFunction: { - assert(b->impl == NULL); - - const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); - const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type; - - assert(glsl_get_function_return_type(func_type) == result_type); - - nir_function *func = - nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); - - nir_function_overload *overload = nir_function_overload_create(func); - overload->num_params = glsl_get_length(func_type); - overload->params = ralloc_array(overload, nir_parameter, - overload->num_params); - for (unsigned i = 0; i < overload->num_params; i++) { - const struct glsl_function_param *param = - glsl_get_function_param(func_type, i); - overload->params[i].type = param->type; - if (param->in) { - if (param->out) { - overload->params[i].param_type = nir_parameter_inout; - } else { - overload->params[i].param_type = nir_parameter_in; - } - } else { - if (param->out) { - overload->params[i].param_type = nir_parameter_out; - } else { - assert(!"Parameter is neither in nor out"); - } - } - } - - val->impl = b->impl = nir_function_impl_create(overload); - b->cf_list = &b->impl->body; - - break; - } - case SpvOpFunctionEnd: - b->impl = NULL; - break; - case SpvOpFunctionParameter: - case SpvOpFunctionCall: - default: - unreachable("Unhandled opcode"); - } + unreachable("Unhandled opcode"); } static void @@ -841,22 +809,118 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static bool +vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + b->func->overload = overload; + break; + } + + case SpvOpFunctionEnd: + b->func = NULL; + break; + + case SpvOpFunctionParameter: + break; /* Does nothing */ + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block); + b->block->branch = w; + b->block = NULL; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + static bool vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpLabel: { + struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; struct exec_node *list_tail = exec_list_get_tail(b->cf_list); nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); - nir_block *block = nir_cf_node_as_block(tail_node); - - assert(exec_list_is_empty(&block->instr_list)); - vtn_push_value(b, w[1], vtn_value_type_block)->block = block; + block->block = nir_cf_node_as_block(tail_node); + assert(exec_list_is_empty(&block->block->instr_list)); break; } + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + assert(b->merge_block == NULL); + /* TODO: Selection Control */ + b->merge_block = vtn_value(b, w[1], vtn_value_type_block)->block; + break; + case SpvOpUndef: vtn_push_value(b, w[2], vtn_value_type_undef); break; @@ -878,11 +942,8 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_variables(b, opcode, w, count); break; - case SpvOpFunction: - case SpvOpFunctionEnd: - case SpvOpFunctionParameter: case SpvOpFunctionCall: - vtn_handle_functions(b, opcode, w, count); + vtn_handle_function_call(b, opcode, w, count); break; case SpvOpTextureSample: @@ -1011,11 +1072,66 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static void +vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, + struct vtn_block *end) +{ + struct vtn_block *block = start; + while (block != end) { + vtn_foreach_instruction(b, block->label, block->branch, + vtn_handle_body_instruction); + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + switch (branch_op) { + case SpvOpBranch: { + assert(vtn_value(b, w[1], vtn_value_type_block)->block == end); + return; + } + + case SpvOpBranchConditional: { + /* Gather up the branch blocks */ + struct vtn_block *then_block = + vtn_value(b, w[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, w[3], vtn_value_type_block)->block; + struct vtn_block *merge_block = b->merge_block; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + nir_cf_node_insert_end(b->cf_list, &if_stmt->cf_node); + + struct exec_list *old_list = b->cf_list; + + b->cf_list = &if_stmt->then_list; + vtn_walk_blocks(b, then_block, merge_block); + + b->cf_list = &if_stmt->else_list; + vtn_walk_blocks(b, else_block, merge_block); + + b->cf_list = old_list; + block = merge_block; + continue; + } + + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + nir_shader * spirv_to_nir(const uint32_t *words, size_t word_count, gl_shader_stage stage, const nir_shader_compiler_options *options) { + const uint32_t *word_end = words + word_count; + /* Handle the SPIR-V header (first 4 dwords) */ assert(word_count > 5); @@ -1034,16 +1150,21 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->shader = shader; b->value_id_bound = value_id_bound; b->values = ralloc_array(b, struct vtn_value, value_id_bound); - - const uint32_t *word_end = words + word_count; + exec_list_make_empty(&b->functions); /* Handle all the preamble instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_body_instruction); - assert(words == word_end); + /* Do a very quick CFG analysis pass */ + vtn_foreach_instruction(b, words, word_end, + vtn_handle_first_cfg_pass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = nir_function_impl_create(func->overload); + b->cf_list = &b->impl->body; + vtn_walk_blocks(b, func->start_block, NULL); + } ralloc_free(b); -- cgit v1.2.3 From 9197e3b9fcfe109c268f336fb260e3db6b11d21a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:02:24 -0700 Subject: nir/spirv: Explicitly type constants and SSA values --- src/glsl/nir/spirv_to_nir.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 0bbae8ee874..f36db8a3c27 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -61,10 +61,10 @@ struct vtn_value { enum vtn_value_type value_type; const char *name; struct vtn_decoration *decoration; + const struct glsl_type *type; union { void *ptr; char *str; - const struct glsl_type *type; nir_constant *constant; nir_deref_var *deref; struct vtn_function *func; @@ -346,20 +346,21 @@ static void vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_constant *constant = ralloc(b, nir_constant); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->constant = ralloc(b, nir_constant); switch (opcode) { case SpvOpConstantTrue: - assert(type == glsl_bool_type()); - constant->value.u[0] = NIR_TRUE; + assert(val->type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; break; case SpvOpConstantFalse: - assert(type == glsl_bool_type()); - constant->value.u[0] = NIR_FALSE; + assert(val->type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; break; case SpvOpConstant: - assert(glsl_type_is_scalar(type)); - constant->value.u[0] = w[3]; + assert(glsl_type_is_scalar(val->type)); + val->constant->value.u[0] = w[3]; break; case SpvOpConstantComposite: { unsigned elem_count = count - 3; @@ -367,29 +368,30 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < elem_count; i++) elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - switch (glsl_get_base_type(type)) { + switch (glsl_get_base_type(val->type)) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) { - unsigned rows = glsl_get_vector_elements(type); - assert(glsl_get_matrix_columns(type) == elem_count); + if (glsl_type_is_matrix(val->type)) { + unsigned rows = glsl_get_vector_elements(val->type); + assert(glsl_get_matrix_columns(val->type) == elem_count); for (unsigned i = 0; i < elem_count; i++) for (unsigned j = 0; j < rows; j++) - constant->value.u[rows * i + j] = elems[i]->value.u[j]; + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; } else { - assert(glsl_type_is_vector(type)); - assert(glsl_get_vector_elements(type) == elem_count); + assert(glsl_type_is_vector(val->type)); + assert(glsl_get_vector_elements(val->type) == elem_count); for (unsigned i = 0; i < elem_count; i++) - constant->value.u[i] = elems[i]->value.u[0]; + val->constant->value.u[i] = elems[i]->value.u[0]; } ralloc_free(elems); break; case GLSL_TYPE_STRUCT: case GLSL_TYPE_ARRAY: - constant->elements = elems; + ralloc_steal(val->constant, elems); + val->constant->elements = elems; break; default: @@ -401,7 +403,6 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, default: unreachable("Unhandled opcode"); } - vtn_push_value(b, w[2], vtn_value_type_constant)->constant = constant; } static void @@ -644,6 +645,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, val->name); nir_instr_insert_after_cf_list(b->cf_list, &load->instr); + val->type = src_type; val->ssa = &load->dest.ssa; break; } -- cgit v1.2.3 From 91b3b46d8b5a1928bdea377acc51a3fcc85b498b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:02:57 -0700 Subject: nir/spirv: Add support for indirect array accesses --- src/glsl/nir/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index f36db8a3c27..9c056da897f 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -595,8 +595,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } else { assert(idx_val->value_type == vtn_value_type_ssa); deref_arr->deref_array_type = nir_deref_array_type_indirect; - /* TODO */ - unreachable("Indirect array accesses not implemented"); + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1])); } tail->child = &deref_arr->deref; break; -- cgit v1.2.3 From 9e03b6724cf75950181851196888f55725e537ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:04:02 -0700 Subject: nir/spirv: Add support for a bunch of ALU operations --- src/glsl/nir/spirv_to_nir.c | 202 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 9c056da897f..a88fa4591bd 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -27,6 +27,7 @@ #include "nir_spirv.h" #include "nir_vla.h" +#include "nir_builder.h" #include "spirv.h" struct vtn_decoration; @@ -81,6 +82,8 @@ struct vtn_decoration { }; struct vtn_builder { + nir_builder nb; + nir_shader *shader; nir_function_impl *impl; struct exec_list *cf_list; @@ -705,11 +708,192 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Matrix math not handled"); +} + static void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 3; + nir_ssa_def *src[4]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 3]); + + /* We use the builder for some of the instructions. Go ahead and + * initialize it with the current cf_list. + */ + nir_builder_insert_after_cf_list(&b->nb, b->cf_list); + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_bany2; break; + case 3: op = nir_op_bany3; break; + case 4: op = nir_op_bany4; break; + } + break; + + case SpvOpAll: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_ball2; break; + case 3: op = nir_op_ball3; break; + case 4: op = nir_op_ball4; break; + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalXor: op = nir_op_ixor; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->type), val->name); + val->ssa = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_instr_insert_after_cf_list(b->cf_list, &instr->instr); } static bool @@ -993,7 +1177,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpPtrCastToGeneric: case SpvOpGenericCastToPtr: case SpvOpBitcast: - case SpvOpTranspose: case SpvOpIsNan: case SpvOpIsInf: case SpvOpIsFinite: @@ -1017,11 +1200,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpFRem: case SpvOpFMod: case SpvOpVectorTimesScalar: - case SpvOpMatrixTimesScalar: - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - case SpvOpOuterProduct: case SpvOpDot: case SpvOpShiftRightLogical: case SpvOpShiftRightArithmetic: @@ -1067,6 +1245,15 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_alu(b, opcode, w, count); break; + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_matrix_alu(b, opcode, w, count); + break; + default: unreachable("Unhandled opcode"); } @@ -1164,6 +1351,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); + nir_builder_init(&b->nb, b->impl); b->cf_list = &b->impl->body; vtn_walk_blocks(b, func->start_block, NULL); } -- cgit v1.2.3 From 66fc7f252f7e4e5efe4891b2eed72a4f0c2f968e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:12:23 -0700 Subject: nir/spirv: Use the builder for all instructions We don't actually use it to create all the instructions but we do use it for insertion always. This should make things far more consistent for implementing extended instructions. --- src/glsl/nir/spirv_to_nir.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a88fa4591bd..bd1052fc9c1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -86,7 +86,6 @@ struct vtn_builder { nir_shader *shader; nir_function_impl *impl; - struct exec_list *cf_list; struct vtn_block *block; struct vtn_block *merge_block; @@ -630,7 +629,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + nir_builder_instr_insert(&b->nb, ©->instr); break; } @@ -647,7 +646,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, val->name); - nir_instr_insert_after_cf_list(b->cf_list, &load->instr); + nir_builder_instr_insert(&b->nb, &load->instr); val->type = src_type; val->ssa = &load->dest.ssa; break; @@ -665,7 +664,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); store->num_components = glsl_get_vector_elements(dest_type); - nir_instr_insert_after_cf_list(b->cf_list, &store->instr); + nir_builder_instr_insert(&b->nb, &store->instr); } else { assert(src_val->value_type == vtn_value_type_constant); @@ -680,7 +679,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); copy->variables[1] = nir_deref_var_create(copy, const_tmp); - nir_instr_insert_after_cf_list(b->cf_list, ©->instr); + nir_builder_instr_insert(&b->nb, ©->instr); } break; } @@ -728,11 +727,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < num_inputs; i++) src[i] = vtn_ssa_value(b, w[i + 3]); - /* We use the builder for some of the instructions. Go ahead and - * initialize it with the current cf_list. - */ - nir_builder_insert_after_cf_list(&b->nb, b->cf_list); - /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. */ @@ -893,7 +887,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); - nir_instr_insert_after_cf_list(b->cf_list, &instr->instr); + nir_builder_instr_insert(&b->nb, &instr->instr); } static bool @@ -1092,7 +1086,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpLabel: { struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; - struct exec_node *list_tail = exec_list_get_tail(b->cf_list); + struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); block->block = nir_cf_node_as_block(tail_node); @@ -1288,17 +1282,17 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, nir_if *if_stmt = nir_if_create(b->shader); if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); - nir_cf_node_insert_end(b->cf_list, &if_stmt->cf_node); + nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); - struct exec_list *old_list = b->cf_list; + struct exec_list *old_list = b->nb.cf_node_list; - b->cf_list = &if_stmt->then_list; + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); vtn_walk_blocks(b, then_block, merge_block); - b->cf_list = &if_stmt->else_list; + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); vtn_walk_blocks(b, else_block, merge_block); - b->cf_list = old_list; + nir_builder_insert_after_cf_list(&b->nb, old_list); block = merge_block; continue; } @@ -1352,7 +1346,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); nir_builder_init(&b->nb, b->impl); - b->cf_list = &b->impl->body; + nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL); } -- cgit v1.2.3 From 577c09fdadb789ee017ccf0b2bb397f3abec0bf7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 12:25:09 -0700 Subject: nir/spirv: Split the core datastructures into a header file --- src/glsl/nir/spirv_to_nir.c | 117 ++---------------------------- src/glsl/nir/spirv_to_nir_private.h | 141 ++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 112 deletions(-) create mode 100644 src/glsl/nir/spirv_to_nir_private.h (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index bd1052fc9c1..78bffda9e8c 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -25,109 +25,10 @@ * */ -#include "nir_spirv.h" +#include "spirv_to_nir_private.h" #include "nir_vla.h" -#include "nir_builder.h" -#include "spirv.h" - -struct vtn_decoration; - -enum vtn_value_type { - vtn_value_type_invalid = 0, - vtn_value_type_undef, - vtn_value_type_string, - vtn_value_type_decoration_group, - vtn_value_type_type, - vtn_value_type_constant, - vtn_value_type_deref, - vtn_value_type_function, - vtn_value_type_block, - vtn_value_type_ssa, -}; - -struct vtn_block { - const uint32_t *label; - const uint32_t *branch; - nir_block *block; -}; - -struct vtn_function { - struct exec_node node; - - nir_function_overload *overload; - struct vtn_block *start_block; -}; - -struct vtn_value { - enum vtn_value_type value_type; - const char *name; - struct vtn_decoration *decoration; - const struct glsl_type *type; - union { - void *ptr; - char *str; - nir_constant *constant; - nir_deref_var *deref; - struct vtn_function *func; - struct vtn_block *block; - nir_ssa_def *ssa; - }; -}; - -struct vtn_decoration { - struct vtn_decoration *next; - const uint32_t *literals; - struct vtn_value *group; - SpvDecoration decoration; -}; - -struct vtn_builder { - nir_builder nb; - - nir_shader *shader; - nir_function_impl *impl; - struct vtn_block *block; - struct vtn_block *merge_block; - - unsigned value_id_bound; - struct vtn_value *values; - - SpvExecutionModel execution_model; - struct vtn_value *entry_point; - - struct vtn_function *func; - struct exec_list functions; -}; - -static struct vtn_value * -vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == vtn_value_type_invalid); - - b->values[value_id].value_type = value_type; - - return &b->values[value_id]; -} - -static struct vtn_value * -vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) -{ - assert(value_id < b->value_id_bound); - return &b->values[value_id]; -} -static struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - assert(val->value_type == value_type); - return val; -} - -static nir_ssa_def * +nir_ssa_def * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { return vtn_value(b, value_id, vtn_value_type_ssa)->ssa; @@ -140,9 +41,6 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); } -typedef bool (*vtn_instruction_handler)(struct vtn_builder *, SpvOp, - const uint32_t *, unsigned); - static const uint32_t * vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, const uint32_t *end, vtn_instruction_handler handler) @@ -177,16 +75,11 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, } } -typedef void (*decoration_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - const struct vtn_decoration *, - void *); - static void _foreach_decoration_helper(struct vtn_builder *b, struct vtn_value *base_value, struct vtn_value *value, - decoration_foreach_cb cb, void *data) + vtn_decoration_foreach_cb cb, void *data) { for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { if (dec->group) { @@ -204,9 +97,9 @@ _foreach_decoration_helper(struct vtn_builder *b, * value. If it encounters a decoration group, it recurses into the group * and iterates over all of those decorations as well. */ -static void +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - decoration_foreach_cb cb, void *data) + vtn_decoration_foreach_cb cb, void *data) { _foreach_decoration_helper(b, value, value, cb, data); } diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h new file mode 100644 index 00000000000..0a07b377e72 --- /dev/null +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -0,0 +1,141 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_spirv.h" +#include "nir_builder.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_deref, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, +}; + +struct vtn_block { + const uint32_t *label; + const uint32_t *branch; + nir_block *block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_overload *overload; + struct vtn_block *start_block; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + const struct glsl_type *type; + union { + void *ptr; + char *str; + nir_constant *constant; + nir_deref_var *deref; + struct vtn_function *func; + struct vtn_block *block; + nir_ssa_def *ssa; + }; +}; + +struct vtn_decoration { + struct vtn_decoration *next; + const uint32_t *literals; + struct vtn_value *group; + SpvDecoration decoration; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + struct vtn_block *merge_block; + + unsigned value_id_bound; + struct vtn_value *values; + + SpvExecutionModel execution_model; + struct vtn_value *entry_point; + + struct vtn_function *func; + struct exec_list functions; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); -- cgit v1.2.3 From 1feeee9cf476010f72cb973480f67de0764e6a90 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 May 2015 15:17:56 -0700 Subject: nir/spirv: Add initial support for GLSL 4.50 builtins --- src/glsl/Makefile.sources | 1 + src/glsl/nir/spirv_glsl450_to_nir.c | 284 ++++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv_to_nir.c | 24 ++- src/glsl/nir/spirv_to_nir_private.h | 5 + 4 files changed, 310 insertions(+), 4 deletions(-) create mode 100644 src/glsl/nir/spirv_glsl450_to_nir.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index c6a89362988..da38e3576bd 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -75,6 +75,7 @@ NIR_FILES = \ nir/nir_worklist.h \ nir/nir_types.cpp \ nir/spirv_to_nir.c \ + nir/spirv_glsl450_to_nir.c \ $(NIR_GENERATED_FILES) # libglsl diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c new file mode 100644 index 00000000000..240ff012fe1 --- /dev/null +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -0,0 +1,284 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "spirv_to_nir_private.h" + +enum GLSL450Entrypoint { + Round = 0, + RoundEven = 1, + Trunc = 2, + Abs = 3, + Sign = 4, + Floor = 5, + Ceil = 6, + Fract = 7, + + Radians = 8, + Degrees = 9, + Sin = 10, + Cos = 11, + Tan = 12, + Asin = 13, + Acos = 14, + Atan = 15, + Sinh = 16, + Cosh = 17, + Tanh = 18, + Asinh = 19, + Acosh = 20, + Atanh = 21, + Atan2 = 22, + + Pow = 23, + Exp = 24, + Log = 25, + Exp2 = 26, + Log2 = 27, + Sqrt = 28, + InverseSqrt = 29, + + Determinant = 30, + MatrixInverse = 31, + + Modf = 32, // second argument needs the OpVariable = , not an OpLoad + Min = 33, + Max = 34, + Clamp = 35, + Mix = 36, + Step = 37, + SmoothStep = 38, + + FloatBitsToInt = 39, + FloatBitsToUint = 40, + IntBitsToFloat = 41, + UintBitsToFloat = 42, + + Fma = 43, + Frexp = 44, + Ldexp = 45, + + PackSnorm4x8 = 46, + PackUnorm4x8 = 47, + PackSnorm2x16 = 48, + PackUnorm2x16 = 49, + PackHalf2x16 = 50, + PackDouble2x32 = 51, + UnpackSnorm2x16 = 52, + UnpackUnorm2x16 = 53, + UnpackHalf2x16 = 54, + UnpackSnorm4x8 = 55, + UnpackUnorm4x8 = 56, + UnpackDouble2x32 = 57, + + Length = 58, + Distance = 59, + Cross = 60, + Normalize = 61, + Ftransform = 62, + FaceForward = 63, + Reflect = 64, + Refract = 65, + + UaddCarry = 66, + UsubBorrow = 67, + UmulExtended = 68, + ImulExtended = 69, + BitfieldExtract = 70, + BitfieldInsert = 71, + BitfieldReverse = 72, + BitCount = 73, + FindLSB = 74, + FindMSB = 75, + + InterpolateAtCentroid = 76, + InterpolateAtSample = 77, + InterpolateAtOffset = 78, + + Count +}; + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5]); + + nir_op op; + switch (entrypoint) { + case Round: op = nir_op_fround_even; break; /* TODO */ + case RoundEven: op = nir_op_fround_even; break; + case Trunc: op = nir_op_ftrunc; break; + case Abs: op = nir_op_fabs; break; + case Sign: op = nir_op_fsign; break; + case Floor: op = nir_op_ffloor; break; + case Ceil: op = nir_op_fceil; break; + case Fract: op = nir_op_ffract; break; + case Radians: + val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + return; + case Degrees: + val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + return; + case Sin: op = nir_op_fsin; break; + case Cos: op = nir_op_fcos; break; + case Tan: + val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); + return; + case Pow: op = nir_op_fpow; break; + case Exp: op = nir_op_fexp; break; + case Log: op = nir_op_flog; break; + case Exp2: op = nir_op_fexp2; break; + case Log2: op = nir_op_flog2; break; + case Sqrt: op = nir_op_fsqrt; break; + case InverseSqrt: op = nir_op_frsq; break; + + case Modf: op = nir_op_fmod; break; + case Min: op = nir_op_fmin; break; + case Max: op = nir_op_fmax; break; + case Mix: op = nir_op_flrp; break; + case Step: + val->ssa = nir_sge(&b->nb, src[1], src[0]); + return; + + case FloatBitsToInt: + case FloatBitsToUint: + case IntBitsToFloat: + case UintBitsToFloat: + /* Probably going to be removed from the final version of the spec. */ + val->ssa = src[0]; + return; + + case Fma: op = nir_op_ffma; break; + case Ldexp: op = nir_op_ldexp; break; + + /* Packing/Unpacking functions */ + case PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; + case PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; + case PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; + case PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; + case PackHalf2x16: op = nir_op_pack_half_2x16; break; + case UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; + case UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; + case UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; + case UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; + case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; + + case Length: + val->ssa = build_length(&b->nb, src[0]); + return; + case Distance: + val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + return; + case Normalize: + val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + return; + + case UaddCarry: op = nir_op_uadd_carry; break; + case UsubBorrow: op = nir_op_usub_borrow; break; + case BitfieldExtract: op = nir_op_ubitfield_extract; break; /* TODO */ + case BitfieldInsert: op = nir_op_bitfield_insert; break; + case BitfieldReverse: op = nir_op_bitfield_reverse; break; + case BitCount: op = nir_op_bit_count; break; + case FindLSB: op = nir_op_find_lsb; break; + case FindMSB: op = nir_op_ufind_msb; break; /* TODO */ + + case Clamp: + case Asin: + case Acos: + case Atan: + case Atan2: + case Sinh: + case Cosh: + case Tanh: + case Asinh: + case Acosh: + case Atanh: + case SmoothStep: + case Frexp: + case PackDouble2x32: + case UnpackDouble2x32: + case Cross: + case Ftransform: + case FaceForward: + case Reflect: + case Refract: + case UmulExtended: + case ImulExtended: + default: + unreachable("Unhandled opcode"); + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->type), val->name); + val->ssa = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count) +{ + switch ((enum GLSL450Entrypoint)ext_opcode) { + case Determinant: + case MatrixInverse: + case InterpolateAtCentroid: + case InterpolateAtSample: + case InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSL450Entrypoint)ext_opcode, words, count); + } + + return true; +} diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 78bffda9e8c..0177be04537 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -65,11 +65,24 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { - case SpvOpExtInstImport: - /* Do nothing for the moment */ + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } - case SpvOpExtInst: default: unreachable("Unhandled opcode"); } @@ -792,10 +805,13 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSourceExtension: case SpvOpCompileFlag: case SpvOpExtension: - case SpvOpExtInstImport: /* Unhandled, but these are for debug so that's ok. */ break; + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + case SpvOpMemoryModel: assert(w[1] == SpvAddressingModelLogical); assert(w[2] == SpvMemoryModelGLSL450); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 0a07b377e72..fd80dd4e161 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -43,6 +43,7 @@ enum vtn_value_type { vtn_value_type_function, vtn_value_type_block, vtn_value_type_ssa, + vtn_value_type_extension, }; struct vtn_block { @@ -74,6 +75,7 @@ struct vtn_value { struct vtn_function *func; struct vtn_block *block; nir_ssa_def *ssa; + vtn_instruction_handler ext_handler; }; }; @@ -139,3 +141,6 @@ typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); -- cgit v1.2.3 From 9da6d808bec836c413e308caa7b36d74429ecf8d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:35:30 -0700 Subject: nir/spirv: Make vtn_ssa_value handle constants as well as ssa values --- src/glsl/nir/spirv_to_nir.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 0177be04537..03ef4c45dd1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -31,7 +31,26 @@ nir_ssa_def * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { - return vtn_value(b, value_id, vtn_value_type_ssa)->ssa; + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: { + assert(glsl_type_is_vector_or_scalar(val->type)); + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[0] = val->constant->value.u[0]; + + nir_builder_instr_insert(&b->nb, &load->instr); + return &load->def; + } + + case vtn_value_type_ssa: + return val->ssa; + default: + unreachable("Invalid type for an SSA value"); + } } static char * -- cgit v1.2.3 From 6fc7911d1565214c9921cfc9da3df37718839184 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:36:09 -0700 Subject: nir/spirv: Use the correct length for copying string literals --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 03ef4c45dd1..3d2a26c90b0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -57,7 +57,7 @@ static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, unsigned word_count) { - return ralloc_strndup(b, (char *)words, (word_count - 2) * sizeof(*words)); + return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); } static const uint32_t * -- cgit v1.2.3 From 4a63761e1d7970f34be19b02b98a0be1a6f1af2c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:36:31 -0700 Subject: nir/spirv: Set a name on temporary variables --- src/glsl/nir/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3d2a26c90b0..b5130f339c4 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -595,6 +595,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_variable *const_tmp = rzalloc(b->shader, nir_variable); const_tmp->type = dest_type; + const_tmp->name = "const_temp"; const_tmp->data.mode = nir_var_local; const_tmp->data.read_only = true; exec_list_push_tail(&b->impl->locals, &const_tmp->node); -- cgit v1.2.3 From b6562bbc300d75de71ef5ed03fd24bb6cbe8d51b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:37:10 -0700 Subject: nir/spirv: Handle control-flow with loops --- src/glsl/nir/spirv_to_nir.c | 168 +++++++++++++++++++++++++++++++----- src/glsl/nir/spirv_to_nir_private.h | 4 +- 2 files changed, 151 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index b5130f339c4..4a12b2930b2 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1000,6 +1000,13 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, b->block = NULL; break; + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge_op == SpvOpNop); + b->block->merge_op = opcode; + b->block->merge_block_id = w[1]; + break; + default: /* Continue on as per normal */ return true; @@ -1015,19 +1022,20 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpLabel: { struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; + assert(block->block == NULL); + struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); block->block = nir_cf_node_as_block(tail_node); + assert(exec_list_is_empty(&block->block->instr_list)); break; } case SpvOpLoopMerge: case SpvOpSelectionMerge: - assert(b->merge_block == NULL); - /* TODO: Selection Control */ - b->merge_block = vtn_value(b, w[1], vtn_value_type_block)->block; + /* This is handled by cfg pre-pass and walk_blocks */ break; case SpvOpUndef: @@ -1186,19 +1194,68 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, static void vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, - struct vtn_block *end) + struct vtn_block *break_block, struct vtn_block *cont_block, + struct vtn_block *end_block) { struct vtn_block *block = start; - while (block != end) { + while (block != end_block) { + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + + if (block->block != NULL) { + /* We've already visited this block once before so this is a + * back-edge. Back-edges are only allowed to point to a loop + * merge. + */ + assert(block == cont_block); + return; + } + + b->block = block; vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); - const uint32_t *w = block->branch; - SpvOp branch_op = w[0] & SpvOpCodeMask; switch (branch_op) { case SpvOpBranch: { - assert(vtn_value(b, w[1], vtn_value_type_block)->block == end); - return; + struct vtn_block *branch_block = + vtn_value(b, w[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == end_block) { + return; + } else if (branch_block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + cont_block = branch_block; + break_block = vtn_value(b, branch_block->merge_block_id, + vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); + + struct exec_list *old_list = b->nb.cf_node_list; + + nir_builder_insert_after_cf_list(&b->nb, &loop->body); + vtn_walk_blocks(b, branch_block, break_block, cont_block, NULL); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = break_block; + continue; + } else { + /* TODO: Can this ever happen? */ + block = branch_block; + continue; + } } case SpvOpBranchConditional: { @@ -1207,28 +1264,99 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, w[2], vtn_value_type_block)->block; struct vtn_block *else_block = vtn_value(b, w[3], vtn_value_type_block)->block; - struct vtn_block *merge_block = b->merge_block; nir_if *if_stmt = nir_if_create(b->shader); if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); - struct exec_list *old_list = b->nb.cf_node_list; + if (then_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else if (then_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else { + /* Conventional if statement */ + assert(block->merge_op == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + struct exec_list *old_list = b->nb.cf_node_list; - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); - vtn_walk_blocks(b, then_block, merge_block); + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); + vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); - vtn_walk_blocks(b, else_block, merge_block); + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); + vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = merge_block; + continue; + } - nir_builder_insert_after_cf_list(&b->nb, old_list); - block = merge_block; + /* If we got here then we inserted a predicated break or continue + * above and we need to handle the other case. We already set + * `block` above to indicate what block to visit after the + * predicated break. + */ + + /* It's possible that the other branch is also a break/continue. + * If it is, we handle that here. + */ + if (block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } + + /* If we got here then there was a predicated break/continue but + * the other half of the if has stuff in it. `block` was already + * set above so there is nothing left for us to do. + */ continue; } + case SpvOpReturn: { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpKill: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + return; + } + case SpvOpSwitch: - case SpvOpKill: - case SpvOpReturn: case SpvOpReturnValue: case SpvOpUnreachable: default: @@ -1276,7 +1404,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->impl = nir_function_impl_create(func->overload); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); - vtn_walk_blocks(b, func->start_block, NULL); + vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); } ralloc_free(b); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index fd80dd4e161..d2b364bdfeb 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -47,6 +47,9 @@ enum vtn_value_type { }; struct vtn_block { + /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */ + SpvOp merge_op; + uint32_t merge_block_id; const uint32_t *label; const uint32_t *branch; nir_block *block; @@ -92,7 +95,6 @@ struct vtn_builder { nir_shader *shader; nir_function_impl *impl; struct vtn_block *block; - struct vtn_block *merge_block; unsigned value_id_bound; struct vtn_value *values; -- cgit v1.2.3 From ca51d926fd0a1080b114022b362cebb2107f2cbb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 12:54:02 -0700 Subject: nir/spirv: Handle boolean uniforms correctly --- src/glsl/nir/spirv_to_nir.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 4a12b2930b2..d1205f5ae07 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -573,7 +573,16 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &load->instr); val->type = src_type; - val->ssa = &load->dest.ssa; + + if (src->var->data.mode == nir_var_uniform && + glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->ssa = &load->dest.ssa; + } break; } -- cgit v1.2.3 From 62b094a81c4f9ccd21622bea3aa18ccd1b9afb41 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 15:33:21 -0700 Subject: nir/spirv: Handle jump-to-loop in a more general way --- src/glsl/nir/spirv_to_nir.c | 46 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d1205f5ae07..bcb77faa551 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1208,9 +1208,6 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, { struct vtn_block *block = start; while (block != end_block) { - const uint32_t *w = block->branch; - SpvOp branch_op = w[0] & SpvOpCodeMask; - if (block->block != NULL) { /* We've already visited this block once before so this is a * back-edge. Back-edges are only allowed to point to a loop @@ -1220,6 +1217,31 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } + if (block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + cont_block = block; + break_block = vtn_value(b, block->merge_block_id, + vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); + + struct exec_list *old_list = b->nb.cf_node_list; + + /* Reset the merge_op to prerevent infinite recursion */ + block->merge_op = SpvOpNop; + + nir_builder_insert_after_cf_list(&b->nb, &loop->body); + vtn_walk_blocks(b, block, break_block, cont_block, NULL); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = break_block; + continue; + } + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + b->block = block; vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); @@ -1243,25 +1265,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } else if (branch_block == end_block) { return; - } else if (branch_block->merge_op == SpvOpLoopMerge) { - /* This is the jump into a loop. */ - cont_block = branch_block; - break_block = vtn_value(b, branch_block->merge_block_id, - vtn_value_type_block)->block; - - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); - - struct exec_list *old_list = b->nb.cf_node_list; - - nir_builder_insert_after_cf_list(&b->nb, &loop->body); - vtn_walk_blocks(b, branch_block, break_block, cont_block, NULL); - - nir_builder_insert_after_cf_list(&b->nb, old_list); - block = break_block; - continue; } else { - /* TODO: Can this ever happen? */ block = branch_block; continue; } -- cgit v1.2.3 From 2887e68f36e4782b2c255fd2fc11af328e2fe9fe Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 May 2015 15:36:54 -0700 Subject: nir/spirv: Make the global constants in spirv.h static I've been promissed in a bug that this will be fixed in a future version of the header. However, in the interest of my branch building, I'm adding these changes in myself for the moment. --- src/glsl/nir/spirv.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index 93135c09596..da717ecd342 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -48,13 +48,13 @@ namespace spv { -const int MagicNumber = 0x07230203; -const int Version = 99; +static const int MagicNumber = 0x07230203; +static const int Version = 99; typedef unsigned int Id; -const unsigned int OpCodeMask = 0xFFFF; -const unsigned int WordCountShift = 16; +static const unsigned int OpCodeMask = 0xFFFF; +static const unsigned int WordCountShift = 16; enum SourceLanguage { SourceLanguageUnknown = 0, @@ -677,13 +677,13 @@ enum Op { #ifndef __cplusplus -const int SpvMagicNumber = 0x07230203; -const int SpvVersion = 99; +static const int SpvMagicNumber = 0x07230203; +static const int SpvVersion = 99; typedef unsigned int SpvId; -const unsigned int SpvOpCodeMask = 0xFFFF; -const unsigned int SpvWordCountShift = 16; +static const unsigned int SpvOpCodeMask = 0xFFFF; +static const unsigned int SpvWordCountShift = 16; typedef enum SpvSourceLanguage_ { SpvSourceLanguageUnknown = 0, -- cgit v1.2.3 From 45963c9c64bb18e8f8b4cde3ef2c5a93ed19d5fc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:32:58 -0700 Subject: nir/types: Add support for sampler types --- src/glsl/nir/nir_types.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ src/glsl/nir/nir_types.h | 8 ++++++++ 2 files changed, 49 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index a6d35fe6179..35421506545 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -124,6 +124,20 @@ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) return type->fields.structure[index].name; } +glsl_sampler_dim +glsl_get_sampler_dim(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_sampler_dim)type->sampler_dimensionality; +} + +glsl_base_type +glsl_get_sampler_result_type(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_base_type)type->sampler_type; +} + bool glsl_type_is_void(const glsl_type *type) { @@ -154,6 +168,26 @@ glsl_type_is_matrix(const struct glsl_type *type) return type->is_matrix(); } +bool +glsl_type_is_sampler(const struct glsl_type *type) +{ + return type->is_sampler(); +} + +bool +glsl_sampler_type_is_shadow(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_shadow; +} + +bool +glsl_sampler_type_is_array(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_array; +} + const glsl_type * glsl_void_type(void) { @@ -223,6 +257,13 @@ glsl_struct_type(const glsl_struct_field *fields, return glsl_type::get_record_instance(fields, num_fields, name); } +const struct glsl_type * +glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, + enum glsl_base_type base_type) +{ + return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type); +} + const glsl_type * glsl_function_type(const glsl_type *return_type, const glsl_function_param *params, unsigned num_params) diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index f19f0e5db5d..ceb131c9f47 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -68,12 +68,17 @@ unsigned glsl_get_length(const struct glsl_type *type); const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type); +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); bool glsl_type_is_void(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); +bool glsl_type_is_sampler(const struct glsl_type *type); +bool glsl_sampler_type_is_shadow(const struct glsl_type *type); +bool glsl_sampler_type_is_array(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); @@ -91,6 +96,9 @@ const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, unsigned num_fields, const char *name); +const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, + bool is_shadow, bool is_array, + enum glsl_base_type base_type); const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); -- cgit v1.2.3 From a992909aaef77c55162c4e92b16f009fe6b40fe2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:33:29 -0700 Subject: nir/spirv: Move Exp and Log to the list of currently unhandled ALU ops NIR doesn't have the native opcodes for them anymore --- src/glsl/nir/spirv_glsl450_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 240ff012fe1..3b9d0940aad 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -170,8 +170,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_fcos(&b->nb, src[0])); return; case Pow: op = nir_op_fpow; break; - case Exp: op = nir_op_fexp; break; - case Log: op = nir_op_flog; break; case Exp2: op = nir_op_fexp2; break; case Log2: op = nir_op_flog2; break; case Sqrt: op = nir_op_fsqrt; break; @@ -227,6 +225,8 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case FindLSB: op = nir_op_find_lsb; break; case FindMSB: op = nir_op_ufind_msb; break; /* TODO */ + case Exp: + case Log: case Clamp: case Asin: case Acos: -- cgit v1.2.3 From fe220ebd371308c89f969a89bcff2b0c63ff070a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:34:15 -0700 Subject: nir/spirv: Add initial support for samplers --- src/glsl/nir/spirv_to_nir.c | 155 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index bcb77faa551..a0404dfd057 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -256,7 +256,36 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, */ return vtn_value(b, args[1], vtn_value_type_type)->type; - case SpvOpTypeSampler: + case SpvOpTypeSampler: { + const struct glsl_type *sampled_type = + vtn_value(b, args[0], vtn_value_type_type)->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)args[1]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + /* TODO: Handle the various texture image/filter options */ + (void)args[2]; + + bool is_array = args[3]; + bool is_shadow = args[4]; + + assert(args[5] == 0 && "FIXME: Handl multi-sampled textures"); + + return glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } + case SpvOpTypeRuntimeArray: case SpvOpTypeOpaque: case SpvOpTypeEvent: @@ -559,10 +588,16 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } case SpvOpLoad: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; const struct glsl_type *src_type = nir_deref_tail(&src->deref)->type; + + if (glsl_get_base_type(src_type) == GLSL_TYPE_SAMPLER) { + vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + return; + } + assert(glsl_type_is_vector_or_scalar(src_type)); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); @@ -635,11 +670,125 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa); + src.src_type = type; + return src; +} + static void vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + nir_deref_var *sampler = vtn_value(b, w[3], vtn_value_type_deref)->deref; + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned coord_components = 0; + switch (opcode) { + case SpvOpTextureSample: + case SpvOpTextureSampleDref: + case SpvOpTextureSampleLod: + case SpvOpTextureSampleProj: + case SpvOpTextureSampleGrad: + case SpvOpTextureSampleOffset: + case SpvOpTextureSampleProjLod: + case SpvOpTextureSampleProjGrad: + case SpvOpTextureSampleLodOffset: + case SpvOpTextureSampleProjOffset: + case SpvOpTextureSampleGradOffset: + case SpvOpTextureSampleProjLodOffset: + case SpvOpTextureSampleProjGradOffset: + case SpvOpTextureFetchTexelLod: + case SpvOpTextureFetchTexelOffset: + case SpvOpTextureFetchSample: + case SpvOpTextureFetchTexel: + case SpvOpTextureGather: + case SpvOpTextureGatherOffset: + case SpvOpTextureGatherOffsets: + case SpvOpTextureQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_value *coord = vtn_value(b, w[4], vtn_value_type_ssa); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->ssa); + p->src_type = nir_tex_src_coord; + p++; + break; + } + default: + break; + } + + nir_texop texop; + switch (opcode) { + case SpvOpTextureSample: + texop = nir_texop_tex; + + if (count == 6) { + texop = nir_texop_txb; + *p++ = vtn_tex_src(b, w[5], nir_tex_src_bias); + } + break; + + case SpvOpTextureSampleDref: + case SpvOpTextureSampleLod: + case SpvOpTextureSampleProj: + case SpvOpTextureSampleGrad: + case SpvOpTextureSampleOffset: + case SpvOpTextureSampleProjLod: + case SpvOpTextureSampleProjGrad: + case SpvOpTextureSampleLodOffset: + case SpvOpTextureSampleProjOffset: + case SpvOpTextureSampleGradOffset: + case SpvOpTextureSampleProjLodOffset: + case SpvOpTextureSampleProjGradOffset: + case SpvOpTextureFetchTexelLod: + case SpvOpTextureFetchTexelOffset: + case SpvOpTextureFetchSample: + case SpvOpTextureFetchTexel: + case SpvOpTextureGather: + case SpvOpTextureGatherOffset: + case SpvOpTextureGatherOffsets: + case SpvOpTextureQuerySizeLod: + case SpvOpTextureQuerySize: + case SpvOpTextureQueryLod: + case SpvOpTextureQueryLevels: + case SpvOpTextureQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + + const struct glsl_type *sampler_type = nir_deref_tail(&sampler->deref)->type; + instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + + switch (glsl_get_sampler_result_type(sampler_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + instr->op = texop; + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + instr->coord_components = coord_components; + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + + instr->sampler = sampler; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); } static void -- cgit v1.2.3 From 756b00389c4034331989299d0dd3505da02dfa78 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 May 2015 12:34:32 -0700 Subject: nir/spirv: Don't assert that the current block is empty It's possible that someone will give us SPIR-V code in which someone needlessly branches to new blocks. We should handle that ok now. --- src/glsl/nir/spirv_to_nir.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a0404dfd057..3f2ef15c74f 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1186,8 +1186,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); assert(tail_node->type == nir_cf_node_block); block->block = nir_cf_node_as_block(tail_node); - - assert(exec_list_is_empty(&block->block->instr_list)); break; } -- cgit v1.2.3 From a1e136711bc661f4471115396f6a477a5fe9f930 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:34:12 -0700 Subject: nir/types: add a helper to transpose a matrix type --- src/glsl/nir/nir_types.cpp | 7 +++++++ src/glsl/nir/nir_types.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 35421506545..d44d48095da 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -270,3 +270,10 @@ glsl_function_type(const glsl_type *return_type, { return glsl_type::get_function_instance(return_type, params, num_params); } + +const glsl_type * +glsl_transposed_type(const struct glsl_type *type) +{ + return glsl_type::get_instance(type->base_type, type->matrix_columns, + type->vector_elements); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index ceb131c9f47..60e1d9d96fc 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -103,6 +103,8 @@ const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From c62be3828671706600f4b661e0b67fef78580cd2 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 15 Jul 2015 21:58:32 -0700 Subject: nir/types: add more nir_type_is_xxx() wrappers --- src/glsl/nir/nir_types.cpp | 12 ++++++++++++ src/glsl/nir/nir_types.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index d44d48095da..809a7cff79c 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -168,6 +168,18 @@ glsl_type_is_matrix(const struct glsl_type *type) return type->is_matrix(); } +bool +glsl_type_is_array(const struct glsl_type *type) +{ + return type->is_array(); +} + +bool +glsl_type_is_struct(const struct glsl_type *type) +{ + return type->is_record() || type->is_interface(); +} + bool glsl_type_is_sampler(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 60e1d9d96fc..a2fa7934e16 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -76,6 +76,8 @@ bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); +bool glsl_type_is_array(const struct glsl_type *type); +bool glsl_type_is_struct(const struct glsl_type *type); bool glsl_type_is_sampler(const struct glsl_type *type); bool glsl_sampler_type_is_shadow(const struct glsl_type *type); bool glsl_sampler_type_is_array(const struct glsl_type *type); -- cgit v1.2.3 From 4956bbaa33858f2d67465421ac59dcfd66637c8b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 17:17:39 -0700 Subject: nir/cursor: Add a constructor for the end of a block but before the jump --- src/glsl/nir/nir.h | 11 +++++++++++ src/glsl/nir/nir_from_ssa.c | 7 +------ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index fc8f27a7fe5..0358f53737e 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1616,6 +1616,17 @@ nir_after_instr(nir_instr *instr) return cursor; } +static inline nir_cursor +nir_after_block_before_jump(nir_block *block) +{ + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr && last_instr->type == nir_instr_type_jump) { + return nir_before_instr(last_instr); + } else { + return nir_after_block(block); + } +} + static inline nir_cursor nir_before_cf_node(nir_cf_node *node) { diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c index 1fd8b24d33d..94002f18cd7 100644 --- a/src/glsl/nir/nir_from_ssa.c +++ b/src/glsl/nir/nir_from_ssa.c @@ -249,12 +249,7 @@ add_parallel_copy_to_end_of_block(nir_block *block, void *void_state) nir_parallel_copy_instr *pcopy = nir_parallel_copy_instr_create(state->dead_ctx); - nir_instr *last_instr = nir_block_last_instr(block); - if (last_instr && last_instr->type == nir_instr_type_jump) { - nir_instr_insert_before(last_instr, &pcopy->instr); - } else { - nir_instr_insert_after_block(block, &pcopy->instr); - } + nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr); } return true; -- cgit v1.2.3 From f6a0eff1ba3899276988cc115289d78363c25af0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 28 Aug 2015 17:09:02 -0700 Subject: nir: Add a pass to lower outputs to temporary variables This pass can be used as a helper for NIR producers so they don't have to worry about creating the temporaries themselves. --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_lower_outputs_to_temporaries.c | 93 +++++++++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 src/glsl/nir/nir_lower_outputs_to_temporaries.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index da38e3576bd..acd13ef7745 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -40,6 +40,7 @@ NIR_FILES = \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ nir/nir_lower_phis_to_scalar.c \ nir/nir_lower_samplers.cpp \ nir/nir_lower_system_values.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 0358f53737e..ac80af37bae 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1791,6 +1791,8 @@ void nir_lower_global_vars_to_local(nir_shader *shader); void nir_lower_locals_to_regs(nir_shader *shader); +void nir_lower_outputs_to_temporaries(nir_shader *shader); + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c new file mode 100644 index 00000000000..1a3e7721c31 --- /dev/null +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * Implements a pass that lowers output variables to a temporary plus an + * output variable with a single copy at each exit point of the shader. + * This way the output variable is only ever written. + * + * Because valid NIR requires that output variables are never read, this + * pass is more of a helper for NIR producers and must be run before the + * shader is ever validated. + */ + +#include "nir.h" + +static void +emit_output_copies(nir_shader *shader, nir_variable *temp, nir_variable *output) +{ + nir_foreach_overload(shader, overload) { + if (!overload->impl || strcmp(overload->function->name, "main")) + continue; + + struct set_entry *block_entry; + set_foreach(overload->impl->end_block->predecessors, block_entry) { + struct nir_block *block = (void *)block_entry->key; + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, output); + copy->variables[1] = nir_deref_var_create(copy, temp); + + nir_instr_insert(nir_after_block_before_jump(block), ©->instr); + } + } +} + +void +nir_lower_outputs_to_temporaries(nir_shader *shader) +{ + struct exec_list old_outputs; + + exec_list_move_nodes_to(&shader->outputs, &old_outputs); + + /* Walk over all of the outputs turn each output into a temporary and + * make a new variable for the actual output. + */ + foreach_list_typed(nir_variable, var, node, &old_outputs) { + nir_variable *output = ralloc(shader, nir_variable); + memcpy(output, var, sizeof *output); + + /* The orignal is now the temporary */ + nir_variable *temp = var; + + /* Move the original name over to the new output */ + if (output->name) + ralloc_steal(output, output->name); + + /* Give the output a new name with @out-temp appended */ + temp->name = ralloc_asprintf(var, "%s@out-temp", output->name); + temp->data.mode = nir_var_global; + temp->constant_initializer = NULL; + + exec_list_push_tail(&shader->outputs, &output->node); + + emit_output_copies(shader, temp, output); + } + + exec_list_append(&shader->globals, &old_outputs); +} -- cgit v1.2.3 From 024c49e95e04dc6ec86c851450bfc954e46265d6 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 18 Jun 2015 17:34:55 -0700 Subject: nir/builder: add a nir_fdot() convenience function --- src/glsl/nir/nir_builder.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 08b40f8ea7c..3aa0efded3c 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -217,6 +217,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. -- cgit v1.2.3 From de4f379a7092bb1710e205128447ff447b3868c5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 14:48:10 -0700 Subject: nir/cursor: Add a helper for getting the current block --- src/glsl/nir/nir.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index ac80af37bae..2ad739523ba 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1580,6 +1580,17 @@ typedef struct { }; } nir_cursor; +static inline nir_block * +nir_cursor_current_block(nir_cursor cursor) +{ + if (cursor.option == nir_cursor_before_instr || + cursor.option == nir_cursor_after_instr) { + return cursor.instr->block; + } else { + return cursor.block; + } +} + static inline nir_cursor nir_before_block(nir_block *block) { -- cgit v1.2.3 From 85cf2385c525caa576199d2020a2faab57ee041b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 14:55:49 -0700 Subject: mesa: Move gl_vert_attrib from mtypes.h to shader_enums.h It is a shader enum after all... --- src/glsl/shader_enums.h | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ src/mesa/main/mtypes.h | 107 ----------------------------------------------- 2 files changed, 108 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index c6f4678f56f..9bb163f3bb0 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -45,6 +45,114 @@ typedef enum #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1) + +/** + * Indexes for vertex program attributes. + * GL_NV_vertex_program aliases generic attributes over the conventional + * attributes. In GL_ARB_vertex_program shader the aliasing is optional. + * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the + * generic attributes are distinct/separate). + */ +typedef enum +{ + VERT_ATTRIB_POS = 0, + VERT_ATTRIB_WEIGHT = 1, + VERT_ATTRIB_NORMAL = 2, + VERT_ATTRIB_COLOR0 = 3, + VERT_ATTRIB_COLOR1 = 4, + VERT_ATTRIB_FOG = 5, + VERT_ATTRIB_COLOR_INDEX = 6, + VERT_ATTRIB_EDGEFLAG = 7, + VERT_ATTRIB_TEX0 = 8, + VERT_ATTRIB_TEX1 = 9, + VERT_ATTRIB_TEX2 = 10, + VERT_ATTRIB_TEX3 = 11, + VERT_ATTRIB_TEX4 = 12, + VERT_ATTRIB_TEX5 = 13, + VERT_ATTRIB_TEX6 = 14, + VERT_ATTRIB_TEX7 = 15, + VERT_ATTRIB_POINT_SIZE = 16, + VERT_ATTRIB_GENERIC0 = 17, + VERT_ATTRIB_GENERIC1 = 18, + VERT_ATTRIB_GENERIC2 = 19, + VERT_ATTRIB_GENERIC3 = 20, + VERT_ATTRIB_GENERIC4 = 21, + VERT_ATTRIB_GENERIC5 = 22, + VERT_ATTRIB_GENERIC6 = 23, + VERT_ATTRIB_GENERIC7 = 24, + VERT_ATTRIB_GENERIC8 = 25, + VERT_ATTRIB_GENERIC9 = 26, + VERT_ATTRIB_GENERIC10 = 27, + VERT_ATTRIB_GENERIC11 = 28, + VERT_ATTRIB_GENERIC12 = 29, + VERT_ATTRIB_GENERIC13 = 30, + VERT_ATTRIB_GENERIC14 = 31, + VERT_ATTRIB_GENERIC15 = 32, + VERT_ATTRIB_MAX = 33 +} gl_vert_attrib; + +/** + * Symbolic constats to help iterating over + * specific blocks of vertex attributes. + * + * VERT_ATTRIB_FF + * includes all fixed function attributes as well as + * the aliased GL_NV_vertex_program shader attributes. + * VERT_ATTRIB_TEX + * include the classic texture coordinate attributes. + * Is a subset of VERT_ATTRIB_FF. + * VERT_ATTRIB_GENERIC + * include the OpenGL 2.0+ GLSL generic shader attributes. + * These alias the generic GL_ARB_vertex_shader attributes. + */ +#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i)) +#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0 + +#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i)) +#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS + +#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i)) +#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS + +/** + * Bitflags for vertex attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS) +#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT) +#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL) +#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0) +#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1) +#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG) +#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX) +#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG) +#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0) +#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1) +#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2) +#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3) +#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4) +#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5) +#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6) +#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7) +#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE) +#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0) + +#define VERT_BIT(i) BITFIELD64_BIT(i) +#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX) + +#define VERT_BIT_FF(i) VERT_BIT(i) +#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX) +#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i)) +#define VERT_BIT_TEX_ALL \ + BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX) + +#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i)) +#define VERT_BIT_GENERIC_ALL \ + BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) +/*@}*/ + + /** * Indexes for vertex shader outputs, geometry shader inputs/outputs, and * fragment shader inputs. diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a172952c1fb..85a9f5dc5f1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -94,113 +94,6 @@ struct vbo_context; #define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1) #define PRIM_UNKNOWN (PRIM_MAX + 2) -/** - * Indexes for vertex program attributes. - * GL_NV_vertex_program aliases generic attributes over the conventional - * attributes. In GL_ARB_vertex_program shader the aliasing is optional. - * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the - * generic attributes are distinct/separate). - */ -typedef enum -{ - VERT_ATTRIB_POS = 0, - VERT_ATTRIB_WEIGHT = 1, - VERT_ATTRIB_NORMAL = 2, - VERT_ATTRIB_COLOR0 = 3, - VERT_ATTRIB_COLOR1 = 4, - VERT_ATTRIB_FOG = 5, - VERT_ATTRIB_COLOR_INDEX = 6, - VERT_ATTRIB_EDGEFLAG = 7, - VERT_ATTRIB_TEX0 = 8, - VERT_ATTRIB_TEX1 = 9, - VERT_ATTRIB_TEX2 = 10, - VERT_ATTRIB_TEX3 = 11, - VERT_ATTRIB_TEX4 = 12, - VERT_ATTRIB_TEX5 = 13, - VERT_ATTRIB_TEX6 = 14, - VERT_ATTRIB_TEX7 = 15, - VERT_ATTRIB_POINT_SIZE = 16, - VERT_ATTRIB_GENERIC0 = 17, - VERT_ATTRIB_GENERIC1 = 18, - VERT_ATTRIB_GENERIC2 = 19, - VERT_ATTRIB_GENERIC3 = 20, - VERT_ATTRIB_GENERIC4 = 21, - VERT_ATTRIB_GENERIC5 = 22, - VERT_ATTRIB_GENERIC6 = 23, - VERT_ATTRIB_GENERIC7 = 24, - VERT_ATTRIB_GENERIC8 = 25, - VERT_ATTRIB_GENERIC9 = 26, - VERT_ATTRIB_GENERIC10 = 27, - VERT_ATTRIB_GENERIC11 = 28, - VERT_ATTRIB_GENERIC12 = 29, - VERT_ATTRIB_GENERIC13 = 30, - VERT_ATTRIB_GENERIC14 = 31, - VERT_ATTRIB_GENERIC15 = 32, - VERT_ATTRIB_MAX = 33 -} gl_vert_attrib; - -/** - * Symbolic constats to help iterating over - * specific blocks of vertex attributes. - * - * VERT_ATTRIB_FF - * includes all fixed function attributes as well as - * the aliased GL_NV_vertex_program shader attributes. - * VERT_ATTRIB_TEX - * include the classic texture coordinate attributes. - * Is a subset of VERT_ATTRIB_FF. - * VERT_ATTRIB_GENERIC - * include the OpenGL 2.0+ GLSL generic shader attributes. - * These alias the generic GL_ARB_vertex_shader attributes. - */ -#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i)) -#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0 - -#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i)) -#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS - -#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i)) -#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS - -/** - * Bitflags for vertex attributes. - * These are used in bitfields in many places. - */ -/*@{*/ -#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS) -#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT) -#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL) -#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0) -#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1) -#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG) -#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX) -#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG) -#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0) -#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1) -#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2) -#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3) -#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4) -#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5) -#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6) -#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7) -#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE) -#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0) - -#define VERT_BIT(i) BITFIELD64_BIT(i) -#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX) - -#define VERT_BIT_FF(i) VERT_BIT(i) -#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX) -#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i)) -#define VERT_BIT_TEX_ALL \ - BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX) - -#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i)) -#define VERT_BIT_GENERIC_ALL \ - BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) -/*@}*/ - - #define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING) #define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX) #define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING) -- cgit v1.2.3 From f4608bc530e61b7dbc8ebe7eff6ddbc70ac20a9d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 15:34:56 -0700 Subject: nir/nir_variable: Add a descriptor set field We need this for SPIR-V --- src/glsl/nir/nir.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 2ad739523ba..4c60dbd645a 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -296,6 +296,11 @@ typedef struct { */ int index; + /** + * Descriptor set binding for sampler or UBO. + */ + int descriptor_set; + /** * Initial binding point for a sampler or UBO. * -- cgit v1.2.3 From 24b0c532319b9318e6e5794978c7e1c05e81d76e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 15:54:21 -0700 Subject: nir/intrinsics: Move to a two-dimensional binding model for UBO's --- src/glsl/nir/nir_intrinsics.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index ed309b602c2..1f24f9f677d 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -145,8 +145,9 @@ SYSTEM_VALUE(invocation_id, 1) * the first index is the base address and the second index is an offset that * should be added to the base address. (This way you can determine in the * back-end which variable is being accessed even in an array.) For inputs, - * the one and only index corresponds to the attribute slot. UBO loads also - * have a single index which is the base address to load from. + * the one and only index corresponds to the attribute slot. UBO loads + * have two indices the first of which is the descriptor set and the second + * is the base address to load from. * * UBO loads have a (possibly constant) source which is the UBO buffer index. * For each type of load, the _indirect variant has one additional source @@ -165,7 +166,7 @@ SYSTEM_VALUE(invocation_id, 1) true, 0, 0, indices, flags) LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* LOAD(ssbo, 1, 0) */ -- cgit v1.2.3 From ce70cae7562c7651a7fb907b4bd2f6924a00b40c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 16:54:02 -0700 Subject: nir/builder: Use nir_after_instr to advance the cursor This *should* ensure that the cursor gets properly advanced in all cases. We had a problem before where, if the cursor was created using nir_after_cf_node on a non-block cf_node, that would call nir_before_block on the block following the cf node. Instructions would then get inserted in backwards order at the top of the block which is not at all what you would expect from nir_after_cf_node. By just resetting to after_instr, we avoid all these problems. --- src/glsl/nir/nir_builder.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 3aa0efded3c..295a209b4e6 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -49,8 +49,7 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr) nir_instr_insert(build->cursor, instr); /* Move the cursor forward. */ - if (build->cursor.option == nir_cursor_after_instr) - build->cursor.instr = instr; + build->cursor = nir_after_instr(instr); } static inline void -- cgit v1.2.3 From 22fdb2f8551330fea308ce9cfe151ec00201e2b1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 12:48:04 -0700 Subject: nir/spirv: Update to the latest revision --- src/glsl/nir/nir_spirv.h | 8 + src/glsl/nir/spirv.h | 1484 +++++++++----------------- src/glsl/nir/spirv_glsl450_to_nir.c | 27 +- src/glsl/nir/spirv_to_nir.c | 2018 +++++++++++++++++++++++++++++------ src/glsl/nir/spirv_to_nir_private.h | 89 +- 5 files changed, 2313 insertions(+), 1313 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h index 5f4140db3d9..1f09174ad7f 100644 --- a/src/glsl/nir/nir_spirv.h +++ b/src/glsl/nir/nir_spirv.h @@ -32,8 +32,16 @@ #include "nir.h" +#ifdef __cplusplus +extern "C" { +#endif + nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, gl_shader_stage stage, const nir_shader_compiler_options *options); +#ifdef __cplusplus +} +#endif + #endif /* _NIR_SPIRV_H_ */ diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index da717ecd342..d289c687c76 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -1,5 +1,5 @@ /* -** Copyright (c) 2015 The Khronos Group Inc. +** Copyright (c) 2014-2015 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), @@ -30,10 +30,15 @@ */ /* -** Specification revision 30. -** Enumeration tokens for SPIR-V, in three styles: C, C++, generic. -** - C++ will have the tokens in the "spv" name space, with no prefix. -** - C will have tokens with as "Spv" prefix. +** Specification revision 31. +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] ** ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have @@ -44,645 +49,12 @@ #ifndef spirv_H #define spirv_H -#ifdef __cplusplus - -namespace spv { - -static const int MagicNumber = 0x07230203; -static const int Version = 99; - -typedef unsigned int Id; - -static const unsigned int OpCodeMask = 0xFFFF; -static const unsigned int WordCountShift = 16; - -enum SourceLanguage { - SourceLanguageUnknown = 0, - SourceLanguageESSL = 1, - SourceLanguageGLSL = 2, - SourceLanguageOpenCL = 3, -}; - -enum ExecutionModel { - ExecutionModelVertex = 0, - ExecutionModelTessellationControl = 1, - ExecutionModelTessellationEvaluation = 2, - ExecutionModelGeometry = 3, - ExecutionModelFragment = 4, - ExecutionModelGLCompute = 5, - ExecutionModelKernel = 6, -}; - -enum AddressingModel { - AddressingModelLogical = 0, - AddressingModelPhysical32 = 1, - AddressingModelPhysical64 = 2, -}; - -enum MemoryModel { - MemoryModelSimple = 0, - MemoryModelGLSL450 = 1, - MemoryModelOpenCL12 = 2, - MemoryModelOpenCL20 = 3, - MemoryModelOpenCL21 = 4, -}; - -enum ExecutionMode { - ExecutionModeInvocations = 0, - ExecutionModeSpacingEqual = 1, - ExecutionModeSpacingFractionalEven = 2, - ExecutionModeSpacingFractionalOdd = 3, - ExecutionModeVertexOrderCw = 4, - ExecutionModeVertexOrderCcw = 5, - ExecutionModePixelCenterInteger = 6, - ExecutionModeOriginUpperLeft = 7, - ExecutionModeEarlyFragmentTests = 8, - ExecutionModePointMode = 9, - ExecutionModeXfb = 10, - ExecutionModeDepthReplacing = 11, - ExecutionModeDepthAny = 12, - ExecutionModeDepthGreater = 13, - ExecutionModeDepthLess = 14, - ExecutionModeDepthUnchanged = 15, - ExecutionModeLocalSize = 16, - ExecutionModeLocalSizeHint = 17, - ExecutionModeInputPoints = 18, - ExecutionModeInputLines = 19, - ExecutionModeInputLinesAdjacency = 20, - ExecutionModeInputTriangles = 21, - ExecutionModeInputTrianglesAdjacency = 22, - ExecutionModeInputQuads = 23, - ExecutionModeInputIsolines = 24, - ExecutionModeOutputVertices = 25, - ExecutionModeOutputPoints = 26, - ExecutionModeOutputLineStrip = 27, - ExecutionModeOutputTriangleStrip = 28, - ExecutionModeVecTypeHint = 29, - ExecutionModeContractionOff = 30, -}; - -enum StorageClass { - StorageClassUniformConstant = 0, - StorageClassInput = 1, - StorageClassUniform = 2, - StorageClassOutput = 3, - StorageClassWorkgroupLocal = 4, - StorageClassWorkgroupGlobal = 5, - StorageClassPrivateGlobal = 6, - StorageClassFunction = 7, - StorageClassGeneric = 8, - StorageClassPrivate = 9, - StorageClassAtomicCounter = 10, -}; - -enum Dim { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - DimCube = 3, - DimRect = 4, - DimBuffer = 5, -}; - -enum SamplerAddressingMode { - SamplerAddressingModeNone = 0, - SamplerAddressingModeClampToEdge = 1, - SamplerAddressingModeClamp = 2, - SamplerAddressingModeRepeat = 3, - SamplerAddressingModeRepeatMirrored = 4, -}; - -enum SamplerFilterMode { - SamplerFilterModeNearest = 0, - SamplerFilterModeLinear = 1, -}; - -enum FPFastMathModeShift { - FPFastMathModeNotNaNShift = 0, - FPFastMathModeNotInfShift = 1, - FPFastMathModeNSZShift = 2, - FPFastMathModeAllowRecipShift = 3, - FPFastMathModeFastShift = 4, -}; - -enum FPFastMathModeMask { - FPFastMathModeMaskNone = 0, - FPFastMathModeNotNaNMask = 0x00000001, - FPFastMathModeNotInfMask = 0x00000002, - FPFastMathModeNSZMask = 0x00000004, - FPFastMathModeAllowRecipMask = 0x00000008, - FPFastMathModeFastMask = 0x00000010, -}; - -enum FPRoundingMode { - FPRoundingModeRTE = 0, - FPRoundingModeRTZ = 1, - FPRoundingModeRTP = 2, - FPRoundingModeRTN = 3, -}; - -enum LinkageType { - LinkageTypeExport = 0, - LinkageTypeImport = 1, -}; - -enum AccessQualifier { - AccessQualifierReadOnly = 0, - AccessQualifierWriteOnly = 1, - AccessQualifierReadWrite = 2, -}; - -enum FunctionParameterAttribute { - FunctionParameterAttributeZext = 0, - FunctionParameterAttributeSext = 1, - FunctionParameterAttributeByVal = 2, - FunctionParameterAttributeSret = 3, - FunctionParameterAttributeNoAlias = 4, - FunctionParameterAttributeNoCapture = 5, - FunctionParameterAttributeSVM = 6, - FunctionParameterAttributeNoWrite = 7, - FunctionParameterAttributeNoReadWrite = 8, -}; - -enum Decoration { - DecorationPrecisionLow = 0, - DecorationPrecisionMedium = 1, - DecorationPrecisionHigh = 2, - DecorationBlock = 3, - DecorationBufferBlock = 4, - DecorationRowMajor = 5, - DecorationColMajor = 6, - DecorationGLSLShared = 7, - DecorationGLSLStd140 = 8, - DecorationGLSLStd430 = 9, - DecorationGLSLPacked = 10, - DecorationSmooth = 11, - DecorationNoperspective = 12, - DecorationFlat = 13, - DecorationPatch = 14, - DecorationCentroid = 15, - DecorationSample = 16, - DecorationInvariant = 17, - DecorationRestrict = 18, - DecorationAliased = 19, - DecorationVolatile = 20, - DecorationConstant = 21, - DecorationCoherent = 22, - DecorationNonwritable = 23, - DecorationNonreadable = 24, - DecorationUniform = 25, - DecorationNoStaticUse = 26, - DecorationCPacked = 27, - DecorationSaturatedConversion = 28, - DecorationStream = 29, - DecorationLocation = 30, - DecorationComponent = 31, - DecorationIndex = 32, - DecorationBinding = 33, - DecorationDescriptorSet = 34, - DecorationOffset = 35, - DecorationAlignment = 36, - DecorationXfbBuffer = 37, - DecorationStride = 38, - DecorationBuiltIn = 39, - DecorationFuncParamAttr = 40, - DecorationFPRoundingMode = 41, - DecorationFPFastMathMode = 42, - DecorationLinkageAttributes = 43, - DecorationSpecId = 44, -}; - -enum BuiltIn { - BuiltInPosition = 0, - BuiltInPointSize = 1, - BuiltInClipVertex = 2, - BuiltInClipDistance = 3, - BuiltInCullDistance = 4, - BuiltInVertexId = 5, - BuiltInInstanceId = 6, - BuiltInPrimitiveId = 7, - BuiltInInvocationId = 8, - BuiltInLayer = 9, - BuiltInViewportIndex = 10, - BuiltInTessLevelOuter = 11, - BuiltInTessLevelInner = 12, - BuiltInTessCoord = 13, - BuiltInPatchVertices = 14, - BuiltInFragCoord = 15, - BuiltInPointCoord = 16, - BuiltInFrontFacing = 17, - BuiltInSampleId = 18, - BuiltInSamplePosition = 19, - BuiltInSampleMask = 20, - BuiltInFragColor = 21, - BuiltInFragDepth = 22, - BuiltInHelperInvocation = 23, - BuiltInNumWorkgroups = 24, - BuiltInWorkgroupSize = 25, - BuiltInWorkgroupId = 26, - BuiltInLocalInvocationId = 27, - BuiltInGlobalInvocationId = 28, - BuiltInLocalInvocationIndex = 29, - BuiltInWorkDim = 30, - BuiltInGlobalSize = 31, - BuiltInEnqueuedWorkgroupSize = 32, - BuiltInGlobalOffset = 33, - BuiltInGlobalLinearId = 34, - BuiltInWorkgroupLinearId = 35, - BuiltInSubgroupSize = 36, - BuiltInSubgroupMaxSize = 37, - BuiltInNumSubgroups = 38, - BuiltInNumEnqueuedSubgroups = 39, - BuiltInSubgroupId = 40, - BuiltInSubgroupLocalInvocationId = 41, -}; - -enum SelectionControlShift { - SelectionControlFlattenShift = 0, - SelectionControlDontFlattenShift = 1, -}; - -enum SelectionControlMask { - SelectionControlMaskNone = 0, - SelectionControlFlattenMask = 0x00000001, - SelectionControlDontFlattenMask = 0x00000002, -}; - -enum LoopControlShift { - LoopControlUnrollShift = 0, - LoopControlDontUnrollShift = 1, -}; - -enum LoopControlMask { - LoopControlMaskNone = 0, - LoopControlUnrollMask = 0x00000001, - LoopControlDontUnrollMask = 0x00000002, -}; - -enum FunctionControlShift { - FunctionControlInlineShift = 0, - FunctionControlDontInlineShift = 1, - FunctionControlPureShift = 2, - FunctionControlConstShift = 3, -}; - -enum FunctionControlMask { - FunctionControlMaskNone = 0, - FunctionControlInlineMask = 0x00000001, - FunctionControlDontInlineMask = 0x00000002, - FunctionControlPureMask = 0x00000004, - FunctionControlConstMask = 0x00000008, -}; - -enum MemorySemanticsShift { - MemorySemanticsRelaxedShift = 0, - MemorySemanticsSequentiallyConsistentShift = 1, - MemorySemanticsAcquireShift = 2, - MemorySemanticsReleaseShift = 3, - MemorySemanticsUniformMemoryShift = 4, - MemorySemanticsSubgroupMemoryShift = 5, - MemorySemanticsWorkgroupLocalMemoryShift = 6, - MemorySemanticsWorkgroupGlobalMemoryShift = 7, - MemorySemanticsAtomicCounterMemoryShift = 8, - MemorySemanticsImageMemoryShift = 9, -}; - -enum MemorySemanticsMask { - MemorySemanticsMaskNone = 0, - MemorySemanticsRelaxedMask = 0x00000001, - MemorySemanticsSequentiallyConsistentMask = 0x00000002, - MemorySemanticsAcquireMask = 0x00000004, - MemorySemanticsReleaseMask = 0x00000008, - MemorySemanticsUniformMemoryMask = 0x00000010, - MemorySemanticsSubgroupMemoryMask = 0x00000020, - MemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, - MemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, - MemorySemanticsAtomicCounterMemoryMask = 0x00000100, - MemorySemanticsImageMemoryMask = 0x00000200, -}; - -enum MemoryAccessShift { - MemoryAccessVolatileShift = 0, - MemoryAccessAlignedShift = 1, -}; - -enum MemoryAccessMask { - MemoryAccessMaskNone = 0, - MemoryAccessVolatileMask = 0x00000001, - MemoryAccessAlignedMask = 0x00000002, -}; - -enum ExecutionScope { - ExecutionScopeCrossDevice = 0, - ExecutionScopeDevice = 1, - ExecutionScopeWorkgroup = 2, - ExecutionScopeSubgroup = 3, -}; - -enum GroupOperation { - GroupOperationReduce = 0, - GroupOperationInclusiveScan = 1, - GroupOperationExclusiveScan = 2, -}; - -enum KernelEnqueueFlags { - KernelEnqueueFlagsNoWait = 0, - KernelEnqueueFlagsWaitKernel = 1, - KernelEnqueueFlagsWaitWorkGroup = 2, -}; - -enum KernelProfilingInfoShift { - KernelProfilingInfoCmdExecTimeShift = 0, -}; - -enum KernelProfilingInfoMask { - KernelProfilingInfoMaskNone = 0, - KernelProfilingInfoCmdExecTimeMask = 0x00000001, -}; - -enum Op { - OpNop = 0, - OpSource = 1, - OpSourceExtension = 2, - OpExtension = 3, - OpExtInstImport = 4, - OpMemoryModel = 5, - OpEntryPoint = 6, - OpExecutionMode = 7, - OpTypeVoid = 8, - OpTypeBool = 9, - OpTypeInt = 10, - OpTypeFloat = 11, - OpTypeVector = 12, - OpTypeMatrix = 13, - OpTypeSampler = 14, - OpTypeFilter = 15, - OpTypeArray = 16, - OpTypeRuntimeArray = 17, - OpTypeStruct = 18, - OpTypeOpaque = 19, - OpTypePointer = 20, - OpTypeFunction = 21, - OpTypeEvent = 22, - OpTypeDeviceEvent = 23, - OpTypeReserveId = 24, - OpTypeQueue = 25, - OpTypePipe = 26, - OpConstantTrue = 27, - OpConstantFalse = 28, - OpConstant = 29, - OpConstantComposite = 30, - OpConstantSampler = 31, - OpConstantNullPointer = 32, - OpConstantNullObject = 33, - OpSpecConstantTrue = 34, - OpSpecConstantFalse = 35, - OpSpecConstant = 36, - OpSpecConstantComposite = 37, - OpVariable = 38, - OpVariableArray = 39, - OpFunction = 40, - OpFunctionParameter = 41, - OpFunctionEnd = 42, - OpFunctionCall = 43, - OpExtInst = 44, - OpUndef = 45, - OpLoad = 46, - OpStore = 47, - OpPhi = 48, - OpDecorationGroup = 49, - OpDecorate = 50, - OpMemberDecorate = 51, - OpGroupDecorate = 52, - OpGroupMemberDecorate = 53, - OpName = 54, - OpMemberName = 55, - OpString = 56, - OpLine = 57, - OpVectorExtractDynamic = 58, - OpVectorInsertDynamic = 59, - OpVectorShuffle = 60, - OpCompositeConstruct = 61, - OpCompositeExtract = 62, - OpCompositeInsert = 63, - OpCopyObject = 64, - OpCopyMemory = 65, - OpCopyMemorySized = 66, - OpSampler = 67, - OpTextureSample = 68, - OpTextureSampleDref = 69, - OpTextureSampleLod = 70, - OpTextureSampleProj = 71, - OpTextureSampleGrad = 72, - OpTextureSampleOffset = 73, - OpTextureSampleProjLod = 74, - OpTextureSampleProjGrad = 75, - OpTextureSampleLodOffset = 76, - OpTextureSampleProjOffset = 77, - OpTextureSampleGradOffset = 78, - OpTextureSampleProjLodOffset = 79, - OpTextureSampleProjGradOffset = 80, - OpTextureFetchTexelLod = 81, - OpTextureFetchTexelOffset = 82, - OpTextureFetchSample = 83, - OpTextureFetchTexel = 84, - OpTextureGather = 85, - OpTextureGatherOffset = 86, - OpTextureGatherOffsets = 87, - OpTextureQuerySizeLod = 88, - OpTextureQuerySize = 89, - OpTextureQueryLod = 90, - OpTextureQueryLevels = 91, - OpTextureQuerySamples = 92, - OpAccessChain = 93, - OpInBoundsAccessChain = 94, - OpSNegate = 95, - OpFNegate = 96, - OpNot = 97, - OpAny = 98, - OpAll = 99, - OpConvertFToU = 100, - OpConvertFToS = 101, - OpConvertSToF = 102, - OpConvertUToF = 103, - OpUConvert = 104, - OpSConvert = 105, - OpFConvert = 106, - OpConvertPtrToU = 107, - OpConvertUToPtr = 108, - OpPtrCastToGeneric = 109, - OpGenericCastToPtr = 110, - OpBitcast = 111, - OpTranspose = 112, - OpIsNan = 113, - OpIsInf = 114, - OpIsFinite = 115, - OpIsNormal = 116, - OpSignBitSet = 117, - OpLessOrGreater = 118, - OpOrdered = 119, - OpUnordered = 120, - OpArrayLength = 121, - OpIAdd = 122, - OpFAdd = 123, - OpISub = 124, - OpFSub = 125, - OpIMul = 126, - OpFMul = 127, - OpUDiv = 128, - OpSDiv = 129, - OpFDiv = 130, - OpUMod = 131, - OpSRem = 132, - OpSMod = 133, - OpFRem = 134, - OpFMod = 135, - OpVectorTimesScalar = 136, - OpMatrixTimesScalar = 137, - OpVectorTimesMatrix = 138, - OpMatrixTimesVector = 139, - OpMatrixTimesMatrix = 140, - OpOuterProduct = 141, - OpDot = 142, - OpShiftRightLogical = 143, - OpShiftRightArithmetic = 144, - OpShiftLeftLogical = 145, - OpLogicalOr = 146, - OpLogicalXor = 147, - OpLogicalAnd = 148, - OpBitwiseOr = 149, - OpBitwiseXor = 150, - OpBitwiseAnd = 151, - OpSelect = 152, - OpIEqual = 153, - OpFOrdEqual = 154, - OpFUnordEqual = 155, - OpINotEqual = 156, - OpFOrdNotEqual = 157, - OpFUnordNotEqual = 158, - OpULessThan = 159, - OpSLessThan = 160, - OpFOrdLessThan = 161, - OpFUnordLessThan = 162, - OpUGreaterThan = 163, - OpSGreaterThan = 164, - OpFOrdGreaterThan = 165, - OpFUnordGreaterThan = 166, - OpULessThanEqual = 167, - OpSLessThanEqual = 168, - OpFOrdLessThanEqual = 169, - OpFUnordLessThanEqual = 170, - OpUGreaterThanEqual = 171, - OpSGreaterThanEqual = 172, - OpFOrdGreaterThanEqual = 173, - OpFUnordGreaterThanEqual = 174, - OpDPdx = 175, - OpDPdy = 176, - OpFwidth = 177, - OpDPdxFine = 178, - OpDPdyFine = 179, - OpFwidthFine = 180, - OpDPdxCoarse = 181, - OpDPdyCoarse = 182, - OpFwidthCoarse = 183, - OpEmitVertex = 184, - OpEndPrimitive = 185, - OpEmitStreamVertex = 186, - OpEndStreamPrimitive = 187, - OpControlBarrier = 188, - OpMemoryBarrier = 189, - OpImagePointer = 190, - OpAtomicInit = 191, - OpAtomicLoad = 192, - OpAtomicStore = 193, - OpAtomicExchange = 194, - OpAtomicCompareExchange = 195, - OpAtomicCompareExchangeWeak = 196, - OpAtomicIIncrement = 197, - OpAtomicIDecrement = 198, - OpAtomicIAdd = 199, - OpAtomicISub = 200, - OpAtomicUMin = 201, - OpAtomicUMax = 202, - OpAtomicAnd = 203, - OpAtomicOr = 204, - OpAtomicXor = 205, - OpLoopMerge = 206, - OpSelectionMerge = 207, - OpLabel = 208, - OpBranch = 209, - OpBranchConditional = 210, - OpSwitch = 211, - OpKill = 212, - OpReturn = 213, - OpReturnValue = 214, - OpUnreachable = 215, - OpLifetimeStart = 216, - OpLifetimeStop = 217, - OpCompileFlag = 218, - OpAsyncGroupCopy = 219, - OpWaitGroupEvents = 220, - OpGroupAll = 221, - OpGroupAny = 222, - OpGroupBroadcast = 223, - OpGroupIAdd = 224, - OpGroupFAdd = 225, - OpGroupFMin = 226, - OpGroupUMin = 227, - OpGroupSMin = 228, - OpGroupFMax = 229, - OpGroupUMax = 230, - OpGroupSMax = 231, - OpGenericCastToPtrExplicit = 232, - OpGenericPtrMemSemantics = 233, - OpReadPipe = 234, - OpWritePipe = 235, - OpReservedReadPipe = 236, - OpReservedWritePipe = 237, - OpReserveReadPipePackets = 238, - OpReserveWritePipePackets = 239, - OpCommitReadPipe = 240, - OpCommitWritePipe = 241, - OpIsValidReserveId = 242, - OpGetNumPipePackets = 243, - OpGetMaxPipePackets = 244, - OpGroupReserveReadPipePackets = 245, - OpGroupReserveWritePipePackets = 246, - OpGroupCommitReadPipe = 247, - OpGroupCommitWritePipe = 248, - OpEnqueueMarker = 249, - OpEnqueueKernel = 250, - OpGetKernelNDrangeSubGroupCount = 251, - OpGetKernelNDrangeMaxSubGroupSize = 252, - OpGetKernelWorkGroupSize = 253, - OpGetKernelPreferredWorkGroupSizeMultiple = 254, - OpRetainEvent = 255, - OpReleaseEvent = 256, - OpCreateUserEvent = 257, - OpIsValidEvent = 258, - OpSetUserEventStatus = 259, - OpCaptureEventProfilingInfo = 260, - OpGetDefaultQueue = 261, - OpBuildNDRange = 262, - OpSatConvertSToU = 263, - OpSatConvertUToS = 264, - OpAtomicIMin = 265, - OpAtomicIMax = 266, -}; - -}; // end namespace spv - -#endif // #ifdef __cplusplus - - -#ifndef __cplusplus - -static const int SpvMagicNumber = 0x07230203; -static const int SpvVersion = 99; - typedef unsigned int SpvId; -static const unsigned int SpvOpCodeMask = 0xFFFF; +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 99; +static const unsigned int SpvRevision = 31; +static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; typedef enum SpvSourceLanguage_ { @@ -711,9 +83,7 @@ typedef enum SpvAddressingModel_ { typedef enum SpvMemoryModel_ { SpvMemoryModelSimple = 0, SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL12 = 2, - SpvMemoryModelOpenCL20 = 3, - SpvMemoryModelOpenCL21 = 4, + SpvMemoryModelOpenCL = 2, } SpvMemoryModel; typedef enum SpvExecutionMode_ { @@ -725,29 +95,30 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeVertexOrderCcw = 5, SpvExecutionModePixelCenterInteger = 6, SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeEarlyFragmentTests = 8, - SpvExecutionModePointMode = 9, - SpvExecutionModeXfb = 10, - SpvExecutionModeDepthReplacing = 11, - SpvExecutionModeDepthAny = 12, - SpvExecutionModeDepthGreater = 13, - SpvExecutionModeDepthLess = 14, - SpvExecutionModeDepthUnchanged = 15, - SpvExecutionModeLocalSize = 16, - SpvExecutionModeLocalSizeHint = 17, - SpvExecutionModeInputPoints = 18, - SpvExecutionModeInputLines = 19, - SpvExecutionModeInputLinesAdjacency = 20, - SpvExecutionModeInputTriangles = 21, - SpvExecutionModeInputTrianglesAdjacency = 22, - SpvExecutionModeInputQuads = 23, - SpvExecutionModeInputIsolines = 24, - SpvExecutionModeOutputVertices = 25, - SpvExecutionModeOutputPoints = 26, - SpvExecutionModeOutputLineStrip = 27, - SpvExecutionModeOutputTriangleStrip = 28, - SpvExecutionModeVecTypeHint = 29, - SpvExecutionModeContractionOff = 30, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthAny = 13, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeInputTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeInputQuads = 24, + SpvExecutionModeInputIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, } SpvExecutionMode; typedef enum SpvStorageClass_ { @@ -760,8 +131,8 @@ typedef enum SpvStorageClass_ { SpvStorageClassPrivateGlobal = 6, SpvStorageClassFunction = 7, SpvStorageClassGeneric = 8, - SpvStorageClassPrivate = 9, SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, } SpvStorageClass; typedef enum SpvDim_ { @@ -786,6 +157,111 @@ typedef enum SpvSamplerFilterMode_ { SpvSamplerFilterModeLinear = 1, } SpvSamplerFilterMode; +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, +} SpvImageOperandsMask; + typedef enum SpvFPFastMathModeShift_ { SpvFPFastMathModeNotNaNShift = 0, SpvFPFastMathModeNotInfShift = 1, @@ -828,40 +304,39 @@ typedef enum SpvFunctionParameterAttribute_ { SpvFunctionParameterAttributeSret = 3, SpvFunctionParameterAttributeNoAlias = 4, SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeSVM = 6, - SpvFunctionParameterAttributeNoWrite = 7, - SpvFunctionParameterAttributeNoReadWrite = 8, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, } SpvFunctionParameterAttribute; typedef enum SpvDecoration_ { - SpvDecorationPrecisionLow = 0, - SpvDecorationPrecisionMedium = 1, - SpvDecorationPrecisionHigh = 2, - SpvDecorationBlock = 3, - SpvDecorationBufferBlock = 4, - SpvDecorationRowMajor = 5, - SpvDecorationColMajor = 6, - SpvDecorationGLSLShared = 7, - SpvDecorationGLSLStd140 = 8, - SpvDecorationGLSLStd430 = 9, - SpvDecorationGLSLPacked = 10, - SpvDecorationSmooth = 11, - SpvDecorationNoperspective = 12, - SpvDecorationFlat = 13, - SpvDecorationPatch = 14, - SpvDecorationCentroid = 15, - SpvDecorationSample = 16, - SpvDecorationInvariant = 17, - SpvDecorationRestrict = 18, - SpvDecorationAliased = 19, - SpvDecorationVolatile = 20, - SpvDecorationConstant = 21, - SpvDecorationCoherent = 22, - SpvDecorationNonwritable = 23, - SpvDecorationNonreadable = 24, - SpvDecorationUniform = 25, - SpvDecorationNoStaticUse = 26, - SpvDecorationCPacked = 27, + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationSmooth = 12, + SpvDecorationNoperspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonwritable = 24, + SpvDecorationNonreadable = 25, + SpvDecorationUniform = 26, + SpvDecorationNoStaticUse = 27, SpvDecorationSaturatedConversion = 28, SpvDecorationStream = 29, SpvDecorationLocation = 30, @@ -870,21 +345,17 @@ typedef enum SpvDecoration_ { SpvDecorationBinding = 33, SpvDecorationDescriptorSet = 34, SpvDecorationOffset = 35, - SpvDecorationAlignment = 36, - SpvDecorationXfbBuffer = 37, - SpvDecorationStride = 38, - SpvDecorationBuiltIn = 39, - SpvDecorationFuncParamAttr = 40, - SpvDecorationFPRoundingMode = 41, - SpvDecorationFPFastMathMode = 42, - SpvDecorationLinkageAttributes = 43, - SpvDecorationSpecId = 44, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, } SpvDecoration; typedef enum SpvBuiltIn_ { SpvBuiltInPosition = 0, SpvBuiltInPointSize = 1, - SpvBuiltInClipVertex = 2, SpvBuiltInClipDistance = 3, SpvBuiltInCullDistance = 4, SpvBuiltInVertexId = 5, @@ -1001,12 +472,13 @@ typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessAlignedMask = 0x00000002, } SpvMemoryAccessMask; -typedef enum SpvExecutionScope_ { - SpvExecutionScopeCrossDevice = 0, - SpvExecutionScopeDevice = 1, - SpvExecutionScopeWorkgroup = 2, - SpvExecutionScopeSubgroup = 3, -} SpvExecutionScope; +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; typedef enum SpvGroupOperation_ { SpvGroupOperationReduce = 0, @@ -1029,276 +501,320 @@ typedef enum SpvKernelProfilingInfoMask_ { SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, } SpvKernelProfilingInfoMask; +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityImageSRGBWrite = 16, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageExtendedFormats = 26, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, +} SpvCapability; + typedef enum SpvOp_ { SpvOpNop = 0, - SpvOpSource = 1, - SpvOpSourceExtension = 2, - SpvOpExtension = 3, - SpvOpExtInstImport = 4, - SpvOpMemoryModel = 5, - SpvOpEntryPoint = 6, - SpvOpExecutionMode = 7, - SpvOpTypeVoid = 8, - SpvOpTypeBool = 9, - SpvOpTypeInt = 10, - SpvOpTypeFloat = 11, - SpvOpTypeVector = 12, - SpvOpTypeMatrix = 13, - SpvOpTypeSampler = 14, - SpvOpTypeFilter = 15, - SpvOpTypeArray = 16, - SpvOpTypeRuntimeArray = 17, - SpvOpTypeStruct = 18, - SpvOpTypeOpaque = 19, - SpvOpTypePointer = 20, - SpvOpTypeFunction = 21, - SpvOpTypeEvent = 22, - SpvOpTypeDeviceEvent = 23, - SpvOpTypeReserveId = 24, - SpvOpTypeQueue = 25, - SpvOpTypePipe = 26, - SpvOpConstantTrue = 27, - SpvOpConstantFalse = 28, - SpvOpConstant = 29, - SpvOpConstantComposite = 30, - SpvOpConstantSampler = 31, - SpvOpConstantNullPointer = 32, - SpvOpConstantNullObject = 33, - SpvOpSpecConstantTrue = 34, - SpvOpSpecConstantFalse = 35, - SpvOpSpecConstant = 36, - SpvOpSpecConstantComposite = 37, - SpvOpVariable = 38, - SpvOpVariableArray = 39, - SpvOpFunction = 40, - SpvOpFunctionParameter = 41, - SpvOpFunctionEnd = 42, - SpvOpFunctionCall = 43, - SpvOpExtInst = 44, - SpvOpUndef = 45, - SpvOpLoad = 46, - SpvOpStore = 47, - SpvOpPhi = 48, - SpvOpDecorationGroup = 49, - SpvOpDecorate = 50, - SpvOpMemberDecorate = 51, - SpvOpGroupDecorate = 52, - SpvOpGroupMemberDecorate = 53, - SpvOpName = 54, - SpvOpMemberName = 55, - SpvOpString = 56, - SpvOpLine = 57, - SpvOpVectorExtractDynamic = 58, - SpvOpVectorInsertDynamic = 59, - SpvOpVectorShuffle = 60, - SpvOpCompositeConstruct = 61, - SpvOpCompositeExtract = 62, - SpvOpCompositeInsert = 63, - SpvOpCopyObject = 64, - SpvOpCopyMemory = 65, - SpvOpCopyMemorySized = 66, - SpvOpSampler = 67, - SpvOpTextureSample = 68, - SpvOpTextureSampleDref = 69, - SpvOpTextureSampleLod = 70, - SpvOpTextureSampleProj = 71, - SpvOpTextureSampleGrad = 72, - SpvOpTextureSampleOffset = 73, - SpvOpTextureSampleProjLod = 74, - SpvOpTextureSampleProjGrad = 75, - SpvOpTextureSampleLodOffset = 76, - SpvOpTextureSampleProjOffset = 77, - SpvOpTextureSampleGradOffset = 78, - SpvOpTextureSampleProjLodOffset = 79, - SpvOpTextureSampleProjGradOffset = 80, - SpvOpTextureFetchTexelLod = 81, - SpvOpTextureFetchTexelOffset = 82, - SpvOpTextureFetchSample = 83, - SpvOpTextureFetchTexel = 84, - SpvOpTextureGather = 85, - SpvOpTextureGatherOffset = 86, - SpvOpTextureGatherOffsets = 87, - SpvOpTextureQuerySizeLod = 88, - SpvOpTextureQuerySize = 89, - SpvOpTextureQueryLod = 90, - SpvOpTextureQueryLevels = 91, - SpvOpTextureQuerySamples = 92, - SpvOpAccessChain = 93, - SpvOpInBoundsAccessChain = 94, - SpvOpSNegate = 95, - SpvOpFNegate = 96, - SpvOpNot = 97, - SpvOpAny = 98, - SpvOpAll = 99, - SpvOpConvertFToU = 100, - SpvOpConvertFToS = 101, - SpvOpConvertSToF = 102, - SpvOpConvertUToF = 103, - SpvOpUConvert = 104, - SpvOpSConvert = 105, - SpvOpFConvert = 106, - SpvOpConvertPtrToU = 107, - SpvOpConvertUToPtr = 108, - SpvOpPtrCastToGeneric = 109, - SpvOpGenericCastToPtr = 110, - SpvOpBitcast = 111, - SpvOpTranspose = 112, - SpvOpIsNan = 113, - SpvOpIsInf = 114, - SpvOpIsFinite = 115, - SpvOpIsNormal = 116, - SpvOpSignBitSet = 117, - SpvOpLessOrGreater = 118, - SpvOpOrdered = 119, - SpvOpUnordered = 120, - SpvOpArrayLength = 121, - SpvOpIAdd = 122, - SpvOpFAdd = 123, - SpvOpISub = 124, - SpvOpFSub = 125, - SpvOpIMul = 126, - SpvOpFMul = 127, - SpvOpUDiv = 128, - SpvOpSDiv = 129, - SpvOpFDiv = 130, - SpvOpUMod = 131, - SpvOpSRem = 132, - SpvOpSMod = 133, - SpvOpFRem = 134, - SpvOpFMod = 135, - SpvOpVectorTimesScalar = 136, - SpvOpMatrixTimesScalar = 137, - SpvOpVectorTimesMatrix = 138, - SpvOpMatrixTimesVector = 139, - SpvOpMatrixTimesMatrix = 140, - SpvOpOuterProduct = 141, - SpvOpDot = 142, - SpvOpShiftRightLogical = 143, - SpvOpShiftRightArithmetic = 144, - SpvOpShiftLeftLogical = 145, - SpvOpLogicalOr = 146, - SpvOpLogicalXor = 147, - SpvOpLogicalAnd = 148, - SpvOpBitwiseOr = 149, - SpvOpBitwiseXor = 150, - SpvOpBitwiseAnd = 151, - SpvOpSelect = 152, - SpvOpIEqual = 153, - SpvOpFOrdEqual = 154, - SpvOpFUnordEqual = 155, - SpvOpINotEqual = 156, - SpvOpFOrdNotEqual = 157, - SpvOpFUnordNotEqual = 158, - SpvOpULessThan = 159, - SpvOpSLessThan = 160, - SpvOpFOrdLessThan = 161, - SpvOpFUnordLessThan = 162, - SpvOpUGreaterThan = 163, - SpvOpSGreaterThan = 164, - SpvOpFOrdGreaterThan = 165, - SpvOpFUnordGreaterThan = 166, - SpvOpULessThanEqual = 167, - SpvOpSLessThanEqual = 168, - SpvOpFOrdLessThanEqual = 169, - SpvOpFUnordLessThanEqual = 170, - SpvOpUGreaterThanEqual = 171, - SpvOpSGreaterThanEqual = 172, - SpvOpFOrdGreaterThanEqual = 173, - SpvOpFUnordGreaterThanEqual = 174, - SpvOpDPdx = 175, - SpvOpDPdy = 176, - SpvOpFwidth = 177, - SpvOpDPdxFine = 178, - SpvOpDPdyFine = 179, - SpvOpFwidthFine = 180, - SpvOpDPdxCoarse = 181, - SpvOpDPdyCoarse = 182, - SpvOpFwidthCoarse = 183, - SpvOpEmitVertex = 184, - SpvOpEndPrimitive = 185, - SpvOpEmitStreamVertex = 186, - SpvOpEndStreamPrimitive = 187, - SpvOpControlBarrier = 188, - SpvOpMemoryBarrier = 189, - SpvOpImagePointer = 190, - SpvOpAtomicInit = 191, - SpvOpAtomicLoad = 192, - SpvOpAtomicStore = 193, - SpvOpAtomicExchange = 194, - SpvOpAtomicCompareExchange = 195, - SpvOpAtomicCompareExchangeWeak = 196, - SpvOpAtomicIIncrement = 197, - SpvOpAtomicIDecrement = 198, - SpvOpAtomicIAdd = 199, - SpvOpAtomicISub = 200, - SpvOpAtomicUMin = 201, - SpvOpAtomicUMax = 202, - SpvOpAtomicAnd = 203, - SpvOpAtomicOr = 204, - SpvOpAtomicXor = 205, - SpvOpLoopMerge = 206, - SpvOpSelectionMerge = 207, - SpvOpLabel = 208, - SpvOpBranch = 209, - SpvOpBranchConditional = 210, - SpvOpSwitch = 211, - SpvOpKill = 212, - SpvOpReturn = 213, - SpvOpReturnValue = 214, - SpvOpUnreachable = 215, - SpvOpLifetimeStart = 216, - SpvOpLifetimeStop = 217, - SpvOpCompileFlag = 218, - SpvOpAsyncGroupCopy = 219, - SpvOpWaitGroupEvents = 220, - SpvOpGroupAll = 221, - SpvOpGroupAny = 222, - SpvOpGroupBroadcast = 223, - SpvOpGroupIAdd = 224, - SpvOpGroupFAdd = 225, - SpvOpGroupFMin = 226, - SpvOpGroupUMin = 227, - SpvOpGroupSMin = 228, - SpvOpGroupFMax = 229, - SpvOpGroupUMax = 230, - SpvOpGroupSMax = 231, - SpvOpGenericCastToPtrExplicit = 232, - SpvOpGenericPtrMemSemantics = 233, - SpvOpReadPipe = 234, - SpvOpWritePipe = 235, - SpvOpReservedReadPipe = 236, - SpvOpReservedWritePipe = 237, - SpvOpReserveReadPipePackets = 238, - SpvOpReserveWritePipePackets = 239, - SpvOpCommitReadPipe = 240, - SpvOpCommitWritePipe = 241, - SpvOpIsValidReserveId = 242, - SpvOpGetNumPipePackets = 243, - SpvOpGetMaxPipePackets = 244, - SpvOpGroupReserveReadPipePackets = 245, - SpvOpGroupReserveWritePipePackets = 246, - SpvOpGroupCommitReadPipe = 247, - SpvOpGroupCommitWritePipe = 248, - SpvOpEnqueueMarker = 249, - SpvOpEnqueueKernel = 250, - SpvOpGetKernelNDrangeSubGroupCount = 251, - SpvOpGetKernelNDrangeMaxSubGroupSize = 252, - SpvOpGetKernelWorkGroupSize = 253, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 254, - SpvOpRetainEvent = 255, - SpvOpReleaseEvent = 256, - SpvOpCreateUserEvent = 257, - SpvOpIsValidEvent = 258, - SpvOpSetUserEventStatus = 259, - SpvOpCaptureEventProfilingInfo = 260, - SpvOpGetDefaultQueue = 261, - SpvOpBuildNDRange = 262, - SpvOpSatConvertSToU = 263, - SpvOpSatConvertUToS = 264, - SpvOpAtomicIMin = 265, - SpvOpAtomicIMax = 266, + SpvOpUndef = 1, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImageQueryDim = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpIMulExtended = 151, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpAsyncGroupCopy = 259, + SpvOpWaitGroupEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, } SpvOp; -#endif // #ifndef __cplusplus - #endif // #ifndef spirv_H diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 3b9d0940aad..52b048820f3 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -139,13 +139,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 5; nir_ssa_def *src[3]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5]); + src[i] = vtn_ssa_value(b, w[i + 5])->def; nir_op op; switch (entrypoint) { @@ -158,16 +159,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Ceil: op = nir_op_fceil; break; case Fract: op = nir_op_ffract; break; case Radians: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); return; case Degrees: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); return; case Sin: op = nir_op_fsin; break; case Cos: op = nir_op_fcos; break; case Tan: - val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), - nir_fcos(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); return; case Pow: op = nir_op_fpow; break; case Exp2: op = nir_op_fexp2; break; @@ -180,7 +181,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Max: op = nir_op_fmax; break; case Mix: op = nir_op_flrp; break; case Step: - val->ssa = nir_sge(&b->nb, src[1], src[0]); + val->ssa->def = nir_sge(&b->nb, src[1], src[0]); return; case FloatBitsToInt: @@ -188,7 +189,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case IntBitsToFloat: case UintBitsToFloat: /* Probably going to be removed from the final version of the spec. */ - val->ssa = src[0]; + val->ssa->def = src[0]; return; case Fma: op = nir_op_ffma; break; @@ -207,13 +208,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; case Length: - val->ssa = build_length(&b->nb, src[0]); + val->ssa->def = build_length(&b->nb, src[0]); return; case Distance: - val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); return; case Normalize: - val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); return; case UaddCarry: op = nir_op_uadd_carry; break; @@ -255,8 +256,8 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + glsl_get_vector_elements(val->ssa->type), val->name); + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3f2ef15c74f..c3a16986fc1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -27,25 +27,96 @@ #include "spirv_to_nir_private.h" #include "nir_vla.h" +#include "nir_control_flow.h" -nir_ssa_def * -vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) { - struct vtn_value *val = vtn_untyped_value(b, value_id); - switch (val->value_type) { - case vtn_value_type_constant: { - assert(glsl_type_is_vector_or_scalar(val->type)); - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components); + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); - for (unsigned i = 0; i < num_components; i++) - load->value.u[0] = val->constant->value.u[0]; + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); - nir_builder_instr_insert(&b->nb, &load->instr); - return &load->def; + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; } + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + case vtn_value_type_ssa: return val->ssa; default: @@ -110,15 +181,24 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, static void _foreach_decoration_helper(struct vtn_builder *b, struct vtn_value *base_value, + int member, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data) { + int new_member = member; + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->member >= 0) { + assert(member == -1); + new_member = dec->member; + } + if (dec->group) { assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, dec->group, cb, data); + _foreach_decoration_helper(b, base_value, new_member, dec->group, + cb, data); } else { - cb(b, base_value, dec, data); + cb(b, base_value, new_member, dec, data); } } } @@ -133,24 +213,33 @@ void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data) { - _foreach_decoration_helper(b, value, value, cb, data); + _foreach_decoration_helper(b, value, -1, value, cb, data); } static void vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + int member = -1; switch (opcode) { case SpvOpDecorationGroup: - vtn_push_value(b, w[1], vtn_value_type_undef); + vtn_push_value(b, target, vtn_value_type_undef); break; + case SpvOpMemberDecorate: + member = *(w++); + /* fallthrough */ case SpvOpDecorate: { - struct vtn_value *val = &b->values[w[1]]; + struct vtn_value *val = &b->values[target]; struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->decoration = w[2]; - dec->literals = &w[3]; + dec->member = member; + dec->decoration = *(w++); + dec->literals = w; /* Link into the list */ dec->next = val->decoration; @@ -158,13 +247,17 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpGroupMemberDecorate: + member = *(w++); + /* fallthrough */ case SpvOpGroupDecorate: { - struct vtn_value *group = &b->values[w[1]]; + struct vtn_value *group = &b->values[target]; assert(group->value_type == vtn_value_type_decoration_group); - for (unsigned i = 2; i < count; i++) { - struct vtn_value *val = &b->values[w[i]]; + for (; w < w_end; w++) { + struct vtn_value *val = &b->values[*w]; struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; dec->group = group; /* Link into the list */ @@ -174,57 +267,207 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, break; } - case SpvOpGroupMemberDecorate: - assert(!"Bad instruction. Khronos Bug #13513"); + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_vector_or_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_ARRAY: + dest->array_element = src->array_element; + dest->stride = src->stride; + break; + + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* matrices */ + dest->row_major = src->row_major; + dest->stride = src->stride; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationSmooth: + ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoperspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, + ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ break; default: - unreachable("Unhandled opcode"); + unreachable("Unhandled type decoration"); } } -static const struct glsl_type * +static void vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *args, unsigned count) + const uint32_t *w, unsigned count) { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + switch (opcode) { case SpvOpTypeVoid: - return glsl_void_type(); + val->type->type = glsl_void_type(); + break; case SpvOpTypeBool: - return glsl_bool_type(); + val->type->type = glsl_bool_type(); + break; case SpvOpTypeInt: - return glsl_int_type(); + val->type->type = glsl_int_type(); + break; case SpvOpTypeFloat: - return glsl_float_type(); + val->type->type = glsl_float_type(); + break; case SpvOpTypeVector: { const struct glsl_type *base = - vtn_value(b, args[0], vtn_value_type_type)->type; - unsigned elems = args[1]; + vtn_value(b, w[2], vtn_value_type_type)->type->type; + unsigned elems = w[3]; assert(glsl_type_is_scalar(base)); - return glsl_vector_type(glsl_get_base_type(base), elems); + val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); + break; } case SpvOpTypeMatrix: { - const struct glsl_type *base = - vtn_value(b, args[0], vtn_value_type_type)->type; - unsigned columns = args[1]; - - assert(glsl_type_is_vector(base)); - return glsl_matrix_type(glsl_get_base_type(base), - glsl_get_vector_elements(base), - columns); + struct vtn_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; } - case SpvOpTypeArray: - return glsl_array_type(b->values[args[0]].type, args[1]); + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + val->type->type = glsl_array_type(array_element->type, w[3]); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + NIR_VLA(struct glsl_struct_field, fields, count); - for (unsigned i = 0; i < count; i++) { + for (unsigned i = 0; i < num_fields; i++) { /* TODO: Handle decorators */ - fields[i].type = vtn_value(b, args[i], vtn_value_type_type)->type; + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i].type = val->type->members[i]->type; fields[i].name = ralloc_asprintf(b, "field%d", i); fields[i].location = -1; fields[i].interpolation = 0; @@ -233,20 +476,33 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, fields[i].matrix_layout = 2; fields[i].stream = -1; } - return glsl_struct_type(fields, count, "struct"); + + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; } case SpvOpTypeFunction: { - const struct glsl_type *return_type = b->values[args[0]].type; - NIR_VLA(struct glsl_function_param, params, count - 1); - for (unsigned i = 1; i < count; i++) { - params[i - 1].type = vtn_value(b, args[i], vtn_value_type_type)->type; + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; /* FIXME: */ - params[i - 1].in = true; - params[i - 1].out = true; + params[i].in = true; + params[i].out = true; } - return glsl_function_type(return_type, params, count - 1); + val->type->type = glsl_function_type(return_type, params, count - 3); + break; } case SpvOpTypePointer: @@ -254,16 +510,17 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, * the same type. The validator should ensure that the proper number * of dereferences happen */ - return vtn_value(b, args[1], vtn_value_type_type)->type; + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; - case SpvOpTypeSampler: { + case SpvOpTypeImage: { const struct glsl_type *sampled_type = - vtn_value(b, args[0], vtn_value_type_type)->type; + vtn_value(b, w[2], vtn_value_type_type)->type->type; assert(glsl_type_is_vector_or_scalar(sampled_type)); enum glsl_sampler_dim dim; - switch ((SpvDim)args[1]) { + switch ((SpvDim)w[3]) { case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; @@ -274,18 +531,21 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, unreachable("Invalid SPIR-V Sampler dimension"); } - /* TODO: Handle the various texture image/filter options */ - (void)args[2]; - - bool is_array = args[3]; - bool is_shadow = args[4]; + bool is_shadow = w[4]; + bool is_array = w[5]; - assert(args[5] == 0 && "FIXME: Handl multi-sampled textures"); + assert(w[6] == 0 && "FIXME: Handl multi-sampled textures"); + assert(w[7] == 1 && "FIXME: Add support for non-sampled images"); - return glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + break; } + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + case SpvOpTypeRuntimeArray: case SpvOpTypeOpaque: case SpvOpTypeEvent: @@ -296,6 +556,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, default: unreachable("Unhandled opcode"); } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); } static void @@ -303,19 +565,19 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; val->constant = ralloc(b, nir_constant); switch (opcode) { case SpvOpConstantTrue: - assert(val->type == glsl_bool_type()); + assert(val->const_type == glsl_bool_type()); val->constant->value.u[0] = NIR_TRUE; break; case SpvOpConstantFalse: - assert(val->type == glsl_bool_type()); + assert(val->const_type == glsl_bool_type()); val->constant->value.u[0] = NIR_FALSE; break; case SpvOpConstant: - assert(glsl_type_is_scalar(val->type)); + assert(glsl_type_is_scalar(val->const_type)); val->constant->value.u[0] = w[3]; break; case SpvOpConstantComposite: { @@ -324,20 +586,20 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < elem_count; i++) elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - switch (glsl_get_base_type(val->type)) { + switch (glsl_get_base_type(val->const_type)) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(val->type)) { - unsigned rows = glsl_get_vector_elements(val->type); - assert(glsl_get_matrix_columns(val->type) == elem_count); + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); for (unsigned i = 0; i < elem_count; i++) for (unsigned j = 0; j < rows; j++) val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; } else { - assert(glsl_type_is_vector(val->type)); - assert(glsl_get_vector_elements(val->type) == elem_count); + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); for (unsigned i = 0; i < elem_count; i++) val->constant->value.u[i] = elems[i]->value.u[0]; } @@ -362,7 +624,101 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, +vtn_get_builtin_location(SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + *mode = nir_var_shader_out; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + *mode = nir_var_shader_out; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + *mode = nir_var_shader_in; + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexId: + *location = SYSTEM_VALUE_VERTEX_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + *mode = nir_var_shader_in; + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + *mode = nir_var_shader_out; + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + *mode = nir_var_shader_out; + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + *mode = nir_var_shader_in; + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + *mode = nir_var_shader_in; + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + *mode = nir_var_shader_in; + break; + case SpvBuiltInFragColor: + *location = FRAG_RESULT_COLOR; + *mode = nir_var_shader_out; + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + *mode = nir_var_shader_out; + break; + case SpvBuiltInHelperInvocation: + unreachable("unsupported builtin"); /* XXX */ + break; + case SpvBuiltInNumWorkgroups: + case SpvBuiltInWorkgroupSize: + /* these are constants, need to be handled specially */ + unreachable("unsupported builtin"); + case SpvBuiltInWorkgroupId: + case SpvBuiltInLocalInvocationId: + case SpvBuiltInGlobalInvocationId: + case SpvBuiltInLocalInvocationIndex: + unreachable("no compute shader support"); + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, const struct vtn_decoration *dec, void *void_var) { assert(val->value_type == vtn_value_type_deref); @@ -371,10 +727,8 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, nir_variable *var = void_var; switch (dec->decoration) { - case SpvDecorationPrecisionLow: - case SpvDecorationPrecisionMedium: - case SpvDecorationPrecisionHigh: - break; /* FIXME: Do nothing with these for now. */ + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ case SpvDecorationSmooth: var->data.interpolation = INTERP_QUALIFIER_SMOOTH; break; @@ -415,14 +769,25 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, var->data.explicit_binding = true; var->data.binding = dec->literals[0]; break; - case SpvDecorationBlock: - case SpvDecorationBufferBlock: + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + nir_variable_mode mode; + vtn_get_builtin_location(dec->literals[0], &var->data.location, + &mode); + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + b->builtins[dec->literals[0]] = var; + break; + } + case SpvDecorationNoStaticUse: + /* This can safely be ignored */ + break; case SpvDecorationRowMajor: case SpvDecorationColMajor: case SpvDecorationGLSLShared: - case SpvDecorationGLSLStd140: - case SpvDecorationGLSLStd430: - case SpvDecorationGLSLPacked: case SpvDecorationPatch: case SpvDecorationRestrict: case SpvDecorationAliased: @@ -431,42 +796,453 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, case SpvDecorationNonreadable: case SpvDecorationUniform: /* This is really nice but we have no use for it right now. */ - case SpvDecorationNoStaticUse: case SpvDecorationCPacked: case SpvDecorationSaturatedConversion: case SpvDecorationStream: - case SpvDecorationDescriptorSet: case SpvDecorationOffset: - case SpvDecorationAlignment: case SpvDecorationXfbBuffer: - case SpvDecorationStride: - case SpvDecorationBuiltIn: case SpvDecorationFuncParamAttr: case SpvDecorationFPRoundingMode: case SpvDecorationFPFastMathMode: case SpvDecorationLinkageAttributes: case SpvDecorationSpecId: + break; default: unreachable("Unhandled variable decoration"); } } +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var = b->builtins[builtin]; + + if (!var) { + var = ralloc(b->shader, nir_variable); + var->type = type; + + nir_variable_mode mode; + vtn_get_builtin_location(builtin, &var->data.location, &mode); + var->data.mode = mode; + var->name = ralloc_strdup(var, "builtin"); + + switch (mode) { + case nir_var_shader_in: + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case nir_var_shader_out: + exec_list_push_tail(&b->shader->outputs, &var->node); + break; + case nir_var_system_value: + exec_list_push_tail(&b->shader->system_values, &var->node); + break; + default: + unreachable("bad builtin mode"); + } + + b->builtins[builtin] = var; + } + + return var; +} + +static void +vtn_builtin_load(struct vtn_builder *b, + struct vtn_ssa_value *val, + SpvBuiltIn builtin) +{ + assert(glsl_type_is_vector_or_scalar(val->type)); + + nir_variable *var = get_builtin_variable(b, val->type, builtin); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + nir_ssa_dest_init(&load->instr, &load->dest, + glsl_get_vector_elements(val->type), NULL); + + load->variables[0] = nir_deref_var_create(load, var); + load->num_components = glsl_get_vector_elements(val->type); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; +} + +static void +vtn_builtin_store(struct vtn_builder *b, + struct vtn_ssa_value *val, + SpvBuiltIn builtin) +{ + assert(glsl_type_is_vector_or_scalar(val->type)); + + nir_variable *var = get_builtin_variable(b, val->type, builtin); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + + store->variables[0] = nir_deref_var_create(store, var); + store->num_components = glsl_get_vector_elements(val->type); + store->src[0] = nir_src_for_ssa(val->def); + nir_builder_instr_insert(&b->nb, &store->instr); +} + +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, struct vtn_type *src_type, + nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + if (src_type->is_builtin) { + vtn_builtin_load(b, val, src_type->builtin); + return val; + } + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, + src_type->array_element, + &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, + src_type->members[i], + &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) +{ + if (dest_type->is_builtin) { + vtn_builtin_store(b, src, dest_type->builtin); + return; + } + + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_type->array_element, dest_deref, + &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_type->members[i], dest_deref, + &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +static struct vtn_ssa_value * +_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, + unsigned set, nir_ssa_def *binding, + unsigned offset, nir_ssa_def *indirect, + struct vtn_type *type) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type->type; + val->transposed = NULL; + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = glsl_get_vector_elements(type->type); + load->const_index[0] = set; + load->src[0] = nir_src_for_ssa(binding); + load->const_index[1] = offset; + if (indirect) + load->src[1] = nir_src_for_ssa(indirect); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; + } else { + unsigned elems = glsl_get_length(type->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, + offset + type->offsets[i], + indirect, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, + offset + i * type->stride, + indirect, type->array_element); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + unsigned set = src->var->data.descriptor_set; + + nir_ssa_def *binding = nir_imm_int(&b->nb, src->var->data.binding); + nir_deref *deref = &src->deref; + + /* The block variable may be an array, in which case the array index adds + * an offset to the binding. Figure out that index now. + */ + + if (deref->child->deref_type == nir_deref_type_array) { + deref = deref->child; + type = type->array_element; + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + binding = nir_imm_int(&b->nb, src->var->data.binding + + deref_array->base_offset); + } else { + binding = nir_iadd(&b->nb, binding, deref_array->indirect.ssa); + } + } + + unsigned offset = 0; + nir_ssa_def *indirect = NULL; + while (deref != src_tail) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + offset += type->stride * deref_array->base_offset; + } else { + nir_ssa_def *offset = nir_imul(&b->nb, deref_array->indirect.ssa, + nir_imm_int(&b->nb, type->stride)); + indirect = indirect ? nir_iadd(&b->nb, indirect, offset) : offset; + } + type = type->array_element; + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + offset += type->offsets[deref_struct->index]; + type = type->members[deref_struct->index]; + break; + } + + default: + unreachable("unknown deref type"); + } + } + + /* TODO SSBO's */ + nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_indirect + : nir_intrinsic_load_ubo; + + return _vtn_block_load(b, op, set, binding, offset, indirect, type); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type) +{ + nir_deref *src_tail = get_deref_tail(src); + + struct vtn_ssa_value *val; + if (src->var->interface_type && src->var->data.mode == nir_var_uniform) + val = vtn_block_load(b, src, src_type, src_tail); + else + val = _vtn_variable_load(b, src, src_type, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *dest_type) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_type, + dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest_type, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest_type, dest, dest_tail, src); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest, struct vtn_type *type) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child || src->var->interface_type) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src, type); + vtn_variable_store(b, val, dest, type); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpVariable: { - const struct glsl_type *type = + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_variable *var = ralloc(b->shader, nir_variable); - var->type = type; + var->type = type->type; var->name = ralloc_strdup(var, val->name); + bool builtin_block = false; + if (type->block) { + var->interface_type = type->type; + builtin_block = type->builtin_block; + } else if (glsl_type_is_array(type->type) && + (type->array_element->block || + type->array_element->buffer_block)) { + var->interface_type = type->array_element->type; + builtin_block = type->array_element->builtin_block; + } else { + var->interface_type = NULL; + } + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: case SpvStorageClassUniformConstant: var->data.mode = nir_var_uniform; var->data.read_only = true; @@ -484,11 +1260,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassFunction: var->data.mode = nir_var_local; break; - case SpvStorageClassUniform: case SpvStorageClassWorkgroupLocal: case SpvStorageClassWorkgroupGlobal: case SpvStorageClassGeneric: - case SpvStorageClassPrivate: case SpvStorageClassAtomicCounter: default: unreachable("Unhandled variable storage class"); @@ -500,15 +1274,55 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_constant)->constant; } - if (var->data.mode == nir_var_local) { - exec_list_push_tail(&b->impl->locals, &var->node); - } else { - exec_list_push_tail(&b->shader->globals, &var->node); + val->deref = nir_deref_var_create(b, var); + val->deref_type = type; + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; } - val->deref = nir_deref_var_create(b->shader, var); + /* If this was a uniform block, then we're not going to actually use the + * variable (we're only going to use it to compute offsets), so don't + * declare it in the shader. + */ + if (var->data.mode == nir_var_uniform && var->interface_type) + break; - vtn_foreach_decoration(b, val, var_decoration_cb, var); + /* Builtin blocks are lowered to individual variables during SPIR-V -> + * NIR, so don't declare them either. + */ + if (builtin_block) + break; + + switch (var->data.mode) { + case nir_var_shader_in: + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case nir_var_shader_out: + exec_list_push_tail(&b->shader->outputs, &var->node); + break; + case nir_var_global: + exec_list_push_tail(&b->shader->globals, &var->node); + break; + case nir_var_local: + exec_list_push_tail(&b->impl->locals, &var->node); + break; + case nir_var_uniform: + exec_list_push_tail(&b->shader->uniforms, &var->node); + break; + case nir_var_system_value: + exec_list_push_tail(&b->shader->system_values, &var->node); + break; + } break; } @@ -517,6 +1331,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; nir_deref *tail = &val->deref->deref; while (tail->child) @@ -535,15 +1350,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case GLSL_TYPE_BOOL: case GLSL_TYPE_ARRAY: { nir_deref_array *deref_arr = nir_deref_array_create(b); - if (base_type == GLSL_TYPE_ARRAY) { - deref_arr->deref.type = glsl_get_array_element(tail->type); - } else if (glsl_type_is_matrix(tail->type)) { - deref_arr->deref.type = glsl_get_column_type(tail->type); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; } else { assert(glsl_type_is_vector(tail->type)); - deref_arr->deref.type = glsl_scalar_type(base_type); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); } + deref_arr->deref.type = deref_type->type; + if (idx_val->value_type == vtn_value_type_constant) { unsigned idx = idx_val->constant->value.u[0]; deref_arr->deref_array_type = nir_deref_array_type_direct; @@ -552,7 +1369,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, assert(idx_val->value_type == vtn_value_type_ssa); deref_arr->deref_array_type = nir_deref_array_type_indirect; deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); } tail->child = &deref_arr->deref; break; @@ -561,8 +1379,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case GLSL_TYPE_STRUCT: { assert(idx_val->value_type == vtn_value_type_constant); unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = glsl_get_struct_field(tail->type, idx); + deref_struct->deref.type = deref_type->type; tail->child = &deref_struct->deref; break; } @@ -571,93 +1390,57 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } tail = tail->child; } + + /* For uniform blocks, we don't resolve the access chain until we + * actually access the variable, so we need to keep around the original + * type of the variable. + */ + if (base->var->interface_type && base->var->data.mode == nir_var_uniform) + val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + else + val->deref_type = deref_type; + + break; } case SpvOpCopyMemory: { nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); + vtn_variable_copy(b, src, dest, type); break; } case SpvOpLoad: { nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; - const struct glsl_type *src_type = nir_deref_tail(&src->deref)->type; + struct vtn_type *src_type = + vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - if (glsl_get_base_type(src_type) == GLSL_TYPE_SAMPLER) { + if (glsl_get_base_type(src_type->type) == GLSL_TYPE_SAMPLER) { vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; return; } - assert(glsl_type_is_vector_or_scalar(src_type)); struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref)); - load->num_components = glsl_get_vector_elements(src_type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, - val->name); - - nir_builder_instr_insert(&b->nb, &load->instr); - val->type = src_type; - - if (src->var->data.mode == nir_var_uniform && - glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->ssa = &load->dest.ssa; - } + val->ssa = vtn_variable_load(b, src, src_type); break; } - case SpvOpStore: { - nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type; - struct vtn_value *src_val = vtn_untyped_value(b, w[2]); - if (src_val->value_type == vtn_value_type_ssa) { - assert(glsl_type_is_vector_or_scalar(dest_type)); - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->src[0] = nir_src_for_ssa(src_val->ssa); - store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); - store->num_components = glsl_get_vector_elements(dest_type); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else { - assert(src_val->value_type == vtn_value_type_constant); - - nir_variable *const_tmp = rzalloc(b->shader, nir_variable); - const_tmp->type = dest_type; - const_tmp->name = "const_temp"; - const_tmp->data.mode = nir_var_local; - const_tmp->data.read_only = true; - exec_list_push_tail(&b->impl->locals, &const_tmp->node); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_var_create(copy, const_tmp); - - nir_builder_instr_insert(&b->nb, ©->instr); - } + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + struct vtn_type *dest_type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest, dest_type); break; } - case SpvOpVariableArray: case SpvOpCopyMemorySized: case SpvOpArrayLength: - case SpvOpImagePointer: + case SpvOpImageTexelPointer: default: unreachable("Unhandled opcode"); } @@ -670,11 +1453,48 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + static nir_tex_src vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) { nir_tex_src src; - src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa); + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def); src.src_type = type; return src; } @@ -689,80 +1509,67 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_tex_src srcs[8]; /* 8 should be enough */ nir_tex_src *p = srcs; + unsigned idx = 4; + unsigned coord_components = 0; switch (opcode) { - case SpvOpTextureSample: - case SpvOpTextureSampleDref: - case SpvOpTextureSampleLod: - case SpvOpTextureSampleProj: - case SpvOpTextureSampleGrad: - case SpvOpTextureSampleOffset: - case SpvOpTextureSampleProjLod: - case SpvOpTextureSampleProjGrad: - case SpvOpTextureSampleLodOffset: - case SpvOpTextureSampleProjOffset: - case SpvOpTextureSampleGradOffset: - case SpvOpTextureSampleProjLodOffset: - case SpvOpTextureSampleProjGradOffset: - case SpvOpTextureFetchTexelLod: - case SpvOpTextureFetchTexelOffset: - case SpvOpTextureFetchSample: - case SpvOpTextureFetchTexel: - case SpvOpTextureGather: - case SpvOpTextureGatherOffset: - case SpvOpTextureGatherOffsets: - case SpvOpTextureQueryLod: { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { /* All these types have the coordinate as their first real argument */ - struct vtn_value *coord = vtn_value(b, w[4], vtn_value_type_ssa); + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); coord_components = glsl_get_vector_elements(coord->type); - p->src = nir_src_for_ssa(coord->ssa); + p->src = nir_src_for_ssa(coord->def); p->src_type = nir_tex_src_coord; p++; break; } + default: break; } nir_texop texop; switch (opcode) { - case SpvOpTextureSample: + case SpvOpImageSampleImplicitLod: texop = nir_texop_tex; - - if (count == 6) { - texop = nir_texop_txb; - *p++ = vtn_tex_src(b, w[5], nir_tex_src_bias); - } break; - case SpvOpTextureSampleDref: - case SpvOpTextureSampleLod: - case SpvOpTextureSampleProj: - case SpvOpTextureSampleGrad: - case SpvOpTextureSampleOffset: - case SpvOpTextureSampleProjLod: - case SpvOpTextureSampleProjGrad: - case SpvOpTextureSampleLodOffset: - case SpvOpTextureSampleProjOffset: - case SpvOpTextureSampleGradOffset: - case SpvOpTextureSampleProjLodOffset: - case SpvOpTextureSampleProjGradOffset: - case SpvOpTextureFetchTexelLod: - case SpvOpTextureFetchTexelOffset: - case SpvOpTextureFetchSample: - case SpvOpTextureFetchTexel: - case SpvOpTextureGather: - case SpvOpTextureGatherOffset: - case SpvOpTextureGatherOffsets: - case SpvOpTextureQuerySizeLod: - case SpvOpTextureQuerySize: - case SpvOpTextureQueryLod: - case SpvOpTextureQueryLevels: - case SpvOpTextureQuerySamples: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: default: unreachable("Unhandled opcode"); } + /* From now on, the remaining sources are "Optional Image Operands." */ + if (idx < count) { + /* XXX handle these (bias, lod, etc.) */ + assert(0); + } + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); const struct glsl_type *sampler_type = nir_deref_tail(&sampler->deref)->type; @@ -783,19 +1590,237 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->is_array = glsl_sampler_type_is_array(sampler_type); instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); - instr->sampler = sampler; + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - val->ssa = &instr->dest.ssa; + val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); + val->ssa->def = &instr->dest.ssa; nir_builder_instr_insert(&b->nb, &instr->instr); } + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + static void vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Matrix math not handled"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } } static void @@ -803,13 +1828,15 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); /* Collect the various SSA sources */ unsigned num_inputs = count - 3; nir_ssa_def *src[4]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 3]); + src[i] = vtn_ssa_value(b, w[i + 3])->def; /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. @@ -868,7 +1895,8 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; case SpvOpShiftLeftLogical: op = nir_op_ishl; break; case SpvOpLogicalOr: op = nir_op_ior; break; - case SpvOpLogicalXor: op = nir_op_ixor; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; case SpvOpLogicalAnd: op = nir_op_iand; break; case SpvOpBitwiseOr: op = nir_op_ior; break; case SpvOpBitwiseXor: op = nir_op_ixor; break; @@ -921,24 +1949,24 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; case SpvOpFwidth: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); return; case SpvOpFwidthFine: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); return; case SpvOpFwidthCoarse: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); return; case SpvOpVectorTimesScalar: /* The builder will take care of splatting for us. */ - val->ssa = nir_fmul(&b->nb, src[0], src[1]); + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); return; case SpvOpSRem: @@ -965,8 +1993,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + glsl_get_vector_elements(type), val->name); + instr->dest.write_mask = (1 << glsl_get_vector_elements(type)) - 1; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); @@ -974,6 +2003,350 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_phi_node_create(b, type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + b->nb.cursor = nir_before_block(block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -981,11 +2354,19 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpSource: case SpvOpSourceExtension: - case SpvOpCompileFlag: case SpvOpExtension: /* Unhandled, but these are for debug so that's ok. */ break; + case SpvOpCapability: + /* + * TODO properly handle these and give a real error if asking for too + * much. + */ + assert(w[1] == SpvCapabilityMatrix || + w[1] == SpvCapabilityShader); + break; + case SpvOpExtInstImport: vtn_handle_extension(b, opcode, w, count); break; @@ -1002,7 +2383,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpExecutionMode: - unreachable("Execution modes not yet implemented"); + /* TODO */ break; case SpvOpString: @@ -1035,7 +2416,9 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeFloat: case SpvOpTypeVector: case SpvOpTypeMatrix: + case SpvOpTypeImage: case SpvOpTypeSampler: + case SpvOpTypeSampledImage: case SpvOpTypeArray: case SpvOpTypeRuntimeArray: case SpvOpTypeStruct: @@ -1047,8 +2430,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeReserveId: case SpvOpTypeQueue: case SpvOpTypePipe: - vtn_push_value(b, w[1], vtn_value_type_type)->type = - vtn_handle_type(b, opcode, &w[2], count - 2); + vtn_handle_type(b, opcode, w, count); break; case SpvOpConstantTrue: @@ -1056,8 +2438,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpConstant: case SpvOpConstantComposite: case SpvOpConstantSampler: - case SpvOpConstantNullPointer: - case SpvOpConstantNullObject: case SpvOpSpecConstantTrue: case SpvOpSpecConstantFalse: case SpvOpSpecConstant: @@ -1086,10 +2466,10 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, b->func = rzalloc(b, struct vtn_function); const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_value(b, w[1], vtn_value_type_type)->type->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type; + vtn_value(b, w[4], vtn_value_type_type)->type->type; assert(glsl_get_function_return_type(func_type) == result_type); @@ -1123,6 +2503,7 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, } case SpvOpFunctionEnd: + b->func->end = w; b->func = NULL; break; @@ -1182,10 +2563,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; assert(block->block == NULL); - struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); - nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); - assert(tail_node->type == nir_cf_node_block); - block->block = nir_cf_node_as_block(tail_node); + block->block = nir_cursor_current_block(b->nb.cursor); break; } @@ -1203,7 +2581,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpVariable: - case SpvOpVariableArray: case SpvOpLoad: case SpvOpStore: case SpvOpCopyMemory: @@ -1211,7 +2588,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpAccessChain: case SpvOpInBoundsAccessChain: case SpvOpArrayLength: - case SpvOpImagePointer: + case SpvOpImageTexelPointer: vtn_handle_variables(b, opcode, w, count); break; @@ -1219,31 +2596,22 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_function_call(b, opcode, w, count); break; - case SpvOpTextureSample: - case SpvOpTextureSampleDref: - case SpvOpTextureSampleLod: - case SpvOpTextureSampleProj: - case SpvOpTextureSampleGrad: - case SpvOpTextureSampleOffset: - case SpvOpTextureSampleProjLod: - case SpvOpTextureSampleProjGrad: - case SpvOpTextureSampleLodOffset: - case SpvOpTextureSampleProjOffset: - case SpvOpTextureSampleGradOffset: - case SpvOpTextureSampleProjLodOffset: - case SpvOpTextureSampleProjGradOffset: - case SpvOpTextureFetchTexelLod: - case SpvOpTextureFetchTexelOffset: - case SpvOpTextureFetchSample: - case SpvOpTextureFetchTexel: - case SpvOpTextureGather: - case SpvOpTextureGatherOffset: - case SpvOpTextureGatherOffsets: - case SpvOpTextureQuerySizeLod: - case SpvOpTextureQuerySize: - case SpvOpTextureQueryLod: - case SpvOpTextureQueryLevels: - case SpvOpTextureQuerySamples: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: vtn_handle_texture(b, opcode, w, count); break; @@ -1292,7 +2660,8 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpShiftRightArithmetic: case SpvOpShiftLeftLogical: case SpvOpLogicalOr: - case SpvOpLogicalXor: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: case SpvOpLogicalAnd: case SpvOpBitwiseOr: case SpvOpBitwiseXor: @@ -1341,6 +2710,20 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_matrix_alu(b, opcode, w, count); break; + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + default: unreachable("Unhandled opcode"); } @@ -1355,34 +2738,23 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, { struct vtn_block *block = start; while (block != end_block) { - if (block->block != NULL) { - /* We've already visited this block once before so this is a - * back-edge. Back-edges are only allowed to point to a loop - * merge. - */ - assert(block == cont_block); - return; - } - if (block->merge_op == SpvOpLoopMerge) { /* This is the jump into a loop. */ - cont_block = block; - break_block = vtn_value(b, block->merge_block_id, - vtn_value_type_block)->block; + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); - - struct exec_list *old_list = b->nb.cf_node_list; + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); /* Reset the merge_op to prerevent infinite recursion */ block->merge_op = SpvOpNop; - nir_builder_insert_after_cf_list(&b->nb, &loop->body); - vtn_walk_blocks(b, block, break_block, cont_block, NULL); + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); - nir_builder_insert_after_cf_list(&b->nb, old_list); - block = break_block; + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + block = new_break_block; continue; } @@ -1393,6 +2765,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); + nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); + assert(cur_block == block->block); + _mesa_hash_table_insert(b->block_table, cur_block, block); + switch (branch_op) { case SpvOpBranch: { struct vtn_block *branch_block = @@ -1411,8 +2787,16 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ return; } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ block = branch_block; continue; } @@ -1426,8 +2810,8 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, w[3], vtn_value_type_block)->block; nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); - nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); if (then_block == break_block) { nir_jump_instr *jump = nir_jump_instr_create(b->shader, @@ -1454,20 +2838,21 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, &jump->instr); block = then_block; } else { - /* Conventional if statement */ + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ assert(block->merge_op == SpvOpSelectionMerge); struct vtn_block *merge_block = vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; - struct exec_list *old_list = b->nb.cf_node_list; - - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); - nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); - nir_builder_insert_after_cf_list(&b->nb, old_list); + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); block = merge_block; continue; } @@ -1549,7 +2934,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); b->shader = shader; b->value_id_bound = value_id_bound; - b->values = ralloc_array(b, struct vtn_value, value_id_bound); + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); /* Handle all the preamble instructions */ @@ -1562,11 +2947,22 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); - nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); + b->nb.cursor = nir_after_cf_list(&b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); } + /* Because we can still have output reads in NIR, we need to lower + * outputs to temporaries before we are truely finished. + */ + nir_lower_outputs_to_temporaries(shader); + ralloc_free(b); return shader; diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index d2b364bdfeb..decceff65a6 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -25,6 +25,7 @@ * */ +#include "nir.h" #include "nir_spirv.h" #include "nir_builder.h" #include "spirv.h" @@ -60,30 +61,88 @@ struct vtn_function { nir_function_overload *overload; struct vtn_block *start_block; + + const uint32_t *end; }; typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, const uint32_t *, unsigned); +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, a transposed version of the value, or NULL if it hasn't + * been computed + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + +struct vtn_type { + const struct glsl_type *type; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; struct vtn_decoration *decoration; - const struct glsl_type *type; union { void *ptr; char *str; - nir_constant *constant; - nir_deref_var *deref; + struct vtn_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; + struct { + nir_deref_var *deref; + struct vtn_type *deref_type; + }; struct vtn_function *func; struct vtn_block *block; - nir_ssa_def *ssa; + struct vtn_ssa_value *ssa; vtn_instruction_handler ext_handler; }; }; struct vtn_decoration { struct vtn_decoration *next; + int member; /* -1 if not a member decoration */ const uint32_t *literals; struct vtn_value *group; SpvDecoration decoration; @@ -96,6 +155,25 @@ struct vtn_builder { nir_function_impl *impl; struct vtn_block *block; + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from nir_block to the vtn_block which ends with it -- used for + * handling phi nodes. + */ + struct hash_table *block_table; + + /* + * NIR variable for each SPIR-V builtin. + */ + nir_variable *builtins[42]; /* XXX need symbolic constant from SPIR-V header */ + unsigned value_id_bound; struct vtn_value *values; @@ -134,10 +212,11 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, return val; } -nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, + int member, const struct vtn_decoration *, void *); -- cgit v1.2.3 From 86c3476668775513a720f116bffb619e98ecfe6d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 17:16:01 -0700 Subject: nir/spirv: Use VERTEX_ID_ZERO_BASE for VertexId In Vulkan, VertexId and InstanceId will be zero-based and new intrinsics, VertexIndex and InstanceIndex, will be added for non-zer-based. See also, Khronos bug #14255 --- src/glsl/nir/spirv_to_nir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index c3a16986fc1..27a864f5993 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -644,7 +644,10 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, /* XXX figure this out */ unreachable("unhandled builtin"); case SpvBuiltInVertexId: - *location = SYSTEM_VALUE_VERTEX_ID; + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; *mode = nir_var_system_value; break; case SpvBuiltInInstanceId: -- cgit v1.2.3 From 16ebe883a4e15d1b5f9ce475454777e0d586d561 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 31 Aug 2015 21:12:59 -0700 Subject: vk/meta: Add a helper for making an image from a buffer --- src/vulkan/anv_meta.c | 114 ++++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 65 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c1dcb771819..cfaf878eb2b 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1087,6 +1087,48 @@ void anv_CmdBlitImage( meta_finish_blit(cmd_buffer, &saved_state); } +static VkImage +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + if (copy->bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (copy->bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = format, + .extent = { + .width = copy->imageExtent.width, + .height = copy->imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return anv_image_to_handle(image); +} + void anv_CmdCopyBufferToImage( VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, @@ -1096,7 +1138,6 @@ void anv_CmdCopyBufferToImage( const VkBufferImageCopy* pRegions) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_saved_state saved_state; @@ -1104,43 +1145,15 @@ void anv_CmdCopyBufferToImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - if (pRegions[r].bufferRowLength != 0) - anv_finishme("bufferRowLength not supported in CopyBufferToImage"); - if (pRegions[r].bufferImageHeight != 0) - anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); - - VkImage srcImage; - anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = dest_image->format->vk_format, - .extent = { - .width = pRegions[r].imageExtent.width, - .height = pRegions[r].imageExtent.height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }, &srcImage); - - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - src_image->bo = src_buffer->bo; - src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; + VkImage srcImage = make_image_for_buffer(vk_device, srcBuffer, + dest_image->format->vk_format, + &pRegions[r]); struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), + .image = srcImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_image->format->vk_format, .channels = { @@ -1195,18 +1208,12 @@ void anv_CmdCopyImageToBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - if (pRegions[r].bufferRowLength != 0) - anv_finishme("bufferRowLength not supported in CopyBufferToImage"); - if (pRegions[r].bufferImageHeight != 0) - anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); - struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -1235,32 +1242,9 @@ void anv_CmdCopyImageToBuffer( dest_format = VK_FORMAT_R8_UINT; } - VkImage destImage; - anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = dest_format, - .extent = { - .width = pRegions[r].imageExtent.width, - .height = pRegions[r].imageExtent.height, - .depth = 1, - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, - .flags = 0, - }, &destImage); - - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - dest_image->bo = dest_buffer->bo; - dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; + VkImage destImage = make_image_for_buffer(vk_device, destBuffer, + dest_format, + &pRegions[r]); struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, -- cgit v1.2.3 From 0c2d4769354752f7e8df9b0819ef3e444df914bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 11:57:28 -0700 Subject: vk/compiler: Properly reference/delete programs when using SPIR-V --- src/vulkan/anv_compiler.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index b330b245870..e024bf0a0eb 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -969,27 +969,32 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, fail_if(mesa_shader == NULL, "failed to create %s shader\n", stage_info[stage].name); +#define CREATE_PROGRAM(stage) \ + _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0) + bool is_scalar; + struct gl_program *prog; switch (stage) { case VK_SHADER_STAGE_VERTEX: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base; + prog = CREATE_PROGRAM(vertex); is_scalar = compiler->screen->compiler->scalar_vs; break; case VK_SHADER_STAGE_GEOMETRY: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base; + prog = CREATE_PROGRAM(geometry); is_scalar = false; break; case VK_SHADER_STAGE_FRAGMENT: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base; + prog = CREATE_PROGRAM(fragment); is_scalar = true; break; case VK_SHADER_STAGE_COMPUTE: - mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base; + prog = CREATE_PROGRAM(compute); is_scalar = true; break; default: unreachable("Unsupported shader stage"); } + _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog); mesa_shader->Program->Parameters = rzalloc(mesa_shader, struct gl_program_parameter_list); @@ -1016,7 +1021,8 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, fail_if(mesa_shader->Program->nir == NULL, "failed to translate SPIR-V to NIR\n"); - program->Shaders[program->NumShaders] = mesa_shader; + _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders], + mesa_shader); program->NumShaders++; } @@ -1176,11 +1182,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) &pipeline->cs_prog_data.base); } - /* XXX: Deleting the shader is broken with our current SPIR-V hacks. We - * need to fix this ASAP. - */ - if (!all_spirv) - brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); struct anv_device *device = compiler->device; while (device->scratch_block_pool.bo.size < pipeline->total_scratch) -- cgit v1.2.3 From 126ade002373825ff9e6db0f6ee2d4f7f6579f13 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 12:23:48 -0700 Subject: vk/extensions: count needs to be <= number of extensions --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 57b2681a2d1..19f3c1ed3cc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -646,7 +646,7 @@ VkResult anv_GetGlobalExtensionProperties( return VK_SUCCESS; } - assert(*pCount < ARRAY_SIZE(global_extensions)); + assert(*pCount <= ARRAY_SIZE(global_extensions)); *pCount = ARRAY_SIZE(global_extensions); memcpy(pProperties, global_extensions, sizeof(global_extensions)); -- cgit v1.2.3 From 362ab2d78880288a92682ffcd56942461df00245 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 15:15:04 -0700 Subject: vk/compiler: Handle interpolation qualifiers for SPIR-V shaders --- src/vulkan/anv_compiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index e024bf0a0eb..1f961eb043b 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -946,6 +946,16 @@ setup_nir_io(struct gl_shader *mesa_shader, struct gl_program *prog = mesa_shader->Program; foreach_list_typed(nir_variable, var, node, &shader->inputs) { prog->InputsRead |= BITFIELD64_BIT(var->data.location); + if (shader->stage == MESA_SHADER_FRAGMENT) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog; + + fprog->InterpQualifier[var->data.location] = + (glsl_interp_qualifier)var->data.interpolation; + if (var->data.centroid) + fprog->IsCentroid |= BITFIELD64_BIT(var->data.location); + if (var->data.sample) + fprog->IsSample |= BITFIELD64_BIT(var->data.location); + } } foreach_list_typed(nir_variable, var, node, &shader->outputs) { -- cgit v1.2.3 From be0a4da6a571dc82dd491005c2cb01d5a3b09061 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 15:16:06 -0700 Subject: vk/meta: Use SPIR-V for shaders We are also now using glslc for compiling the Vulkan driver like we do in curcible. --- configure.ac | 6 + src/vulkan/Makefile.am | 2 +- src/vulkan/glsl_scraper.py | 432 +++++++++++++++++++++------------------------ 3 files changed, 212 insertions(+), 228 deletions(-) (limited to 'src') diff --git a/configure.ac b/configure.ac index 0c496136d3d..5828885e92a 100644 --- a/configure.ac +++ b/configure.ac @@ -99,6 +99,7 @@ AM_PROG_CC_C_O AM_PROG_AS AX_CHECK_GNU_MAKE AC_CHECK_PROGS([PYTHON2], [python2 python]) +AC_CHECK_PROGS([PYTHON3], [python3]) AC_PROG_SED AC_PROG_MKDIR_P @@ -1523,6 +1524,10 @@ AC_SUBST([GBM_PC_LIB_PRIV]) AM_CONDITIONAL(HAVE_VULKAN, true) +AC_ARG_VAR([GLSLC], [Path to the glslc executable]) +AC_CHECK_PROGS([GLSLC], [glslc]) +AC_SUBST([GLSLC]) + dnl dnl EGL configuration dnl @@ -2533,6 +2538,7 @@ if test "x$MESA_LLVM" = x1; then echo "" fi echo " PYTHON2: $PYTHON2" +echo " PYTHON3: $PYTHON3" echo "" echo " Run '${MAKE-make}' to build Mesa" diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 89880b77c01..13897090287 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -96,7 +96,7 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ %_spirv_autogen.h: %.c glsl_scraper.py - $(AM_V_GEN) $(PYTHON2) $(srcdir)/glsl_scraper.py --glsl-only -o $@ $< + $(AM_V_GEN) $(PYTHON3) $(srcdir)/glsl_scraper.py --with-glslc=$(GLSLC) -o $@ $< CLEANFILES = $(BUILT_SOURCES) diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index d1514712762..4963742ea36 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -1,7 +1,7 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 import argparse -import cStringIO +import io import os import re import shutil @@ -11,231 +11,223 @@ import sys import tempfile from textwrap import dedent +class ShaderCompileError(RuntimeError): + def __init__(self): + super(ShaderCompileError, self).__init__('Compile error') + class Shader: - def __init__(self, stage): - self.stream = cStringIO.StringIO() - self.stage = stage - - if self.stage == 'VERTEX': - self.ext = 'vert' - elif self.stage == 'TESS_CONTROL': - self.ext = 'tesc' - elif self.stage == 'TESS_EVALUATION': - self.ext = 'tese' - elif self.stage == 'GEOMETRY': - self.ext = 'geom' - elif self.stage == 'FRAGMENT': - self.ext = 'frag' - elif self.stage == 'COMPUTE': - self.ext = 'comp' - else: - assert False - - def add_text(self, s): - self.stream.write(s) - - def finish_text(self, line): - self.line = line - - def glsl_source(self): - return self.stream.getvalue() - - def compile(self): - # We can assume if we got here that we have a temp directory and that - # we're currently living in it. - glsl_fname = 'shader{0}.{1}'.format(self.line, self.ext) - spirv_fname = self.ext + '.spv' - - glsl_file = open(glsl_fname, 'w') - glsl_file.write('#version 420 core\n') - glsl_file.write(self.glsl_source()) - glsl_file.close() - - out = open('glslang.out', 'wb') - err = subprocess.call([glslang, '-V', glsl_fname], stdout=out) - if err != 0: - out = open('glslang.out', 'r') - sys.stderr.write(out.read()) - out.close() - exit(1) - - def dwords(f): - while True: - dword_str = f.read(4) - if not dword_str: - return - assert len(dword_str) == 4 - yield struct.unpack('I', dword_str)[0] - - spirv_file = open(spirv_fname, 'rb') - self.dwords = list(dwords(spirv_file)) - spirv_file.close() - - os.remove(glsl_fname) - os.remove(spirv_fname) - - def dump_c_code(self, f, glsl_only = False): - f.write('\n\n') - var_prefix = '_glsl_helpers_shader{0}'.format(self.line) - - # First dump the GLSL source as strings - f.write('static const char {0}_glsl_src[] ='.format(var_prefix)) - f.write('\n_ANV_SPIRV_' + self.stage) - f.write('\n"#version 330\\n"') - for line in self.glsl_source().splitlines(): - if not line.strip(): - continue - f.write('\n"{0}\\n"'.format(line)) - f.write(';\n\n') - - if glsl_only: - return - - # Now dump the SPIR-V source - f.write('static const uint32_t {0}_spir_v_src[] = {{'.format(var_prefix)) - line_start = 0 - while line_start < len(self.dwords): - f.write('\n ') - for i in range(line_start, min(line_start + 6, len(self.dwords))): - f.write(' 0x{:08x},'.format(self.dwords[i])) - line_start += 6 - f.write('\n};\n') + def __init__(self, stage): + self.stream = io.StringIO() + self.stage = stage + self.dwords = None + + def add_text(self, s): + self.stream.write(s) + + def finish_text(self, line): + self.line = line + + def glsl_source(self): + return dedent(self.stream.getvalue()) + + def __run_glslc(self, extra_args=[]): + stage_flag = '-fshader-stage=' + if self.stage == 'VERTEX': + stage_flag += 'vertex' + elif self.stage == 'TESS_CONTROL': + stage_flag += 'tesscontrol' + elif self.stage == 'TESS_EVALUATION': + stage_flag += 'tesseval' + elif self.stage == 'GEOMETRY': + stage_flag += 'geometry' + elif self.stage == 'FRAGMENT': + stage_flag += 'fragment' + elif self.stage == 'COMPUTE': + stage_flag += 'compute' + else: + assert False + + with subprocess.Popen([glslc] + extra_args + + [stage_flag, '-std=430core', '-o', '-', '-'], + stdout = subprocess.PIPE, + stdin = subprocess.PIPE) as proc: + + proc.stdin.write(self.glsl_source().encode('utf-8')) + out, err = proc.communicate(timeout=30) + + if proc.returncode != 0: + raise ShaderCompileError() + + return out + + def compile(self): + def dwords(f): + while True: + dword_str = f.read(4) + if not dword_str: + return + assert len(dword_str) == 4 + yield struct.unpack('I', dword_str)[0] + + spirv = self.__run_glslc() + self.dwords = list(dwords(io.BytesIO(spirv))) + self.assembly = str(self.__run_glslc(['-S']), 'utf-8') + + def dump_c_code(self, f): + f.write('\n\n') + prefix = '_anv_glsl_helpers_shader{0}'.format(self.line) + + f.write('/* GLSL Source code:\n') + for line in self.glsl_source().splitlines(): + f.write(' * ' + line + '\n') + + f.write(' *\n') + + f.write(' * SPIR-V Assembly:\n') + f.write(' *\n') + for line in self.assembly.splitlines(): + f.write(' * ' + line + '\n') + f.write(' */\n') + + f.write('static const uint32_t {0}_spirv_code[] = {{'.format(prefix)) + line_start = 0 + while line_start < len(self.dwords): + f.write('\n ') + for i in range(line_start, min(line_start + 6, len(self.dwords))): + f.write(' 0x{:08x},'.format(self.dwords[i])) + line_start += 6 + f.write('\n};\n') + + f.write(dedent("""\ + static const VkShaderModuleCreateInfo {0}_info = {{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = sizeof({0}_spirv_code), + .pCode = {0}_spirv_code, + }}; + """.format(prefix))) token_exp = re.compile(r'(GLSL_VK_SHADER_MODULE|\(|\)|,)') class Parser: - def __init__(self, f): - self.infile = f - self.paren_depth = 0 - self.shader = None - self.line_number = 1 - self.shaders = [] - - def tokenize(f): - leftover = '' - for line in f: - pos = 0 - while True: - m = token_exp.search(line, pos) - if m: - if m.start() > pos: - leftover += line[pos:m.start()] - pos = m.end() - - if leftover: - yield leftover - leftover = '' - - yield m.group(0) - - else: - leftover += line[pos:] - break - - self.line_number += 1 - - if leftover: - yield leftover - - self.token_iter = tokenize(self.infile) - - def handle_shader_src(self): - paren_depth = 1 - for t in self.token_iter: - if t == '(': - paren_depth += 1 - elif t == ')': - paren_depth -= 1 - if paren_depth == 0: - return - - self.current_shader.add_text(t) - - def handle_macro(self): - t = self.token_iter.next() - assert t == '(' - t = self.token_iter.next() - t = self.token_iter.next() - assert t == ',' + def __init__(self, f): + self.infile = f + self.paren_depth = 0 + self.shader = None + self.line_number = 1 + self.shaders = [] + + def tokenize(f): + leftover = '' + for line in f: + pos = 0 + while True: + m = token_exp.search(line, pos) + if m: + if m.start() > pos: + leftover += line[pos:m.start()] + pos = m.end() + + if leftover: + yield leftover + leftover = '' + + yield m.group(0) + + else: + leftover += line[pos:] + break + + self.line_number += 1 + + if leftover: + yield leftover + + self.token_iter = tokenize(self.infile) + + def handle_shader_src(self): + paren_depth = 1 + for t in self.token_iter: + if t == '(': + paren_depth += 1 + elif t == ')': + paren_depth -= 1 + if paren_depth == 0: + return + + self.current_shader.add_text(t) + + def handle_macro(self, macro): + t = next(self.token_iter) + assert t == '(' + + # Throw away the device parameter + t = next(self.token_iter) + t = next(self.token_iter) + assert t == ',' + + stage = next(self.token_iter).strip() + + t = next(self.token_iter) + assert t == ',' + + self.current_shader = Shader(stage) + self.handle_shader_src() + self.current_shader.finish_text(self.line_number) + + self.shaders.append(self.current_shader) + self.current_shader = None + + def run(self): + for t in self.token_iter: + if t == 'GLSL_VK_SHADER_MODULE': + self.handle_macro(t) - stage = self.token_iter.next().strip() - - t = self.token_iter.next() - assert t == ',' - - self.current_shader = Shader(stage) - self.handle_shader_src() - self.current_shader.finish_text(self.line_number) +def open_file(name, mode): + if name == '-': + if mode == 'w': + return sys.stdout + elif mode == 'r': + return sys.stdin + else: + assert False + else: + return open(name, mode) - self.shaders.append(self.current_shader) - self.current_shader = None +def parse_args(): + description = dedent("""\ + This program scrapes a C file for any instance of the + qoShaderCreateInfoGLSL and qoCreateShaderGLSL macaros, grabs the + GLSL source code, compiles it to SPIR-V. The resulting SPIR-V code + is written to another C file as an array of 32-bit words. - def run(self): - for t in self.token_iter: - if t == 'GLSL_VK_SHADER_MODULE': - self.handle_macro() + If '-' is passed as the input file or output file, stdin or stdout + will be used instead of a file on disc.""") -def open_file(name, mode): - if name == '-': - if mode == 'w': - return sys.stdout - elif mode == 'r': - return sys.stdin - else: - assert False - else: - return open(name, mode) + p = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-o', '--outfile', default='-', + help='Output to the given file (default: stdout).') + p.add_argument('--with-glslc', metavar='PATH', + default='glslc', + dest='glslc', + help='Full path to the glslc shader compiler.') + p.add_argument('infile', metavar='INFILE') -def parse_args(): - description = dedent("""\ - This program scrapes a C file for any instance of the - GLSL_VK_SHADER_MODULE macro, grabs the GLSL source code, compiles it - to SPIR-V. The resulting SPIR-V code is written to another C file as - an array of 32-bit words. - - If '-' is passed as the input file or output file, stdin or stdout will be - used instead of a file on disc.""") - - p = argparse.ArgumentParser( - description=description, - formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument('-o', '--outfile', default='-', - help='Output to the given file (default: stdout).') - p.add_argument('--with-glslang', metavar='PATH', - default='glslangValidator', - dest='glslang', - help='Full path to the glslangValidator program.') - p.add_argument('--glsl-only', action='store_true') - p.add_argument('infile', metavar='INFILE') - - return p.parse_args() + return p.parse_args() args = parse_args() infname = args.infile outfname = args.outfile -glslang = args.glslang -glsl_only = args.glsl_only +glslc = args.glslc with open_file(infname, 'r') as infile: - parser = Parser(infile) - parser.run() + parser = Parser(infile) + parser.run() -if not glsl_only: - # glslangValidator has an absolutely *insane* interface. We pretty much - # have to run in a temporary directory. Sad day. - current_dir = os.getcwd() - tmpdir = tempfile.mkdtemp('glsl_scraper') - - try: - os.chdir(tmpdir) - - for shader in parser.shaders: - shader.compile() - - os.chdir(current_dir) - finally: - shutil.rmtree(tmpdir) +for shader in parser.shaders: + shader.compile() with open_file(outfname, 'w') as outfile: outfile.write(dedent("""\ @@ -245,30 +237,16 @@ with open_file(outfname, 'w') as outfile: #include - #define _ANV_SPIRV_MAGIC "\\x03\\x02\\x23\\x07\\0\\0\\0\\0" - - #define _ANV_SPIRV_VERTEX _ANV_SPIRV_MAGIC "\\0\\0\\0\\0" - #define _ANV_SPIRV_TESS_CONTROL _ANV_SPIRV_MAGIC "\\1\\0\\0\\0" - #define _ANV_SPIRV_TESS_EVALUATION _ANV_SPIRV_MAGIC "\\2\\0\\0\\0" - #define _ANV_SPIRV_GEOMETRY _ANV_SPIRV_MAGIC "\\3\\0\\0\\0" - #define _ANV_SPIRV_FRAGMENT _ANV_SPIRV_MAGIC "\\4\\0\\0\\0" - #define _ANV_SPIRV_COMPUTE _ANV_SPIRV_MAGIC "\\5\\0\\0\\0" - - #define _ANV_GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src - #define _ANV_GLSL_SRC_VAR(_line) _ANV_GLSL_SRC_VAR2(_line) + #define _ANV_SPIRV_MODULE_INFO2(_line) _anv_glsl_helpers_shader ## _line ## _info + #define _ANV_SPIRV_MODULE_INFO(_line) _ANV_SPIRV_MODULE_INFO2(_line) #define GLSL_VK_SHADER_MODULE(device, stage, ...) ({ \\ VkShaderModule __module; \\ - VkShaderModuleCreateInfo __shader_create_info = { \\ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, \\ - .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ - .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ - }; \\ vkCreateShaderModule(anv_device_to_handle(device), \\ - &__shader_create_info, &__module); \\ + &_ANV_SPIRV_MODULE_INFO(__LINE__), &__module); \\ __module; \\ }) """)) for shader in parser.shaders: - shader.dump_c_code(outfile, glsl_only) + shader.dump_c_code(outfile) -- cgit v1.2.3 From 28503191f11b689b8bcc04420713d28869268b1e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 31 Aug 2015 17:44:08 -0700 Subject: vk/meta: Partially fix vkCmdCopyBufferToImage for S8_UINT Create R8_UINT VkAttachmentView and VkImageView for the stencil data. This fixes a crash, but the pixels in the destination image are still incorrect. They are not properly tiled. Fixes crashes in Crucible tests func.miptree.s8-uint.aspect-stencil.* as of crucible-7471449. Test results improve 'lost' -> 'fail'. --- src/vulkan/anv_meta.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index cfaf878eb2b..d01001911ef 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1140,13 +1140,22 @@ void anv_CmdCopyBufferToImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + const VkFormat orig_format = dest_image->format->vk_format; struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + VkFormat proxy_format = orig_format; + VkImageAspect proxy_aspect = pRegions[r].imageSubresource.aspect; + + if (orig_format == VK_FORMAT_S8_UINT) { + proxy_format = VK_FORMAT_R8_UINT; + proxy_aspect = VK_IMAGE_ASPECT_COLOR; + } + VkImage srcImage = make_image_for_buffer(vk_device, srcBuffer, - dest_image->format->vk_format, + proxy_format, &pRegions[r]); struct anv_image_view src_view; @@ -1155,7 +1164,7 @@ void anv_CmdCopyBufferToImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->format->vk_format, + .format = proxy_format, .channels = { VK_CHANNEL_SWIZZLE_R, VK_CHANNEL_SWIZZLE_G, @@ -1163,7 +1172,7 @@ void anv_CmdCopyBufferToImage( VK_CHANNEL_SWIZZLE_A }, .subresourceRange = { - .aspect = pRegions[r].imageSubresource.aspect, + .aspect = proxy_aspect, .baseMipLevel = 0, .mipLevels = 1, .baseArraySlice = 0, @@ -1177,7 +1186,7 @@ void anv_CmdCopyBufferToImage( &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), - .format = dest_image->format->vk_format, + .format = proxy_format, .mipLevel = pRegions[r].imageSubresource.mipLevel, .baseArraySlice = pRegions[r].imageSubresource.arraySlice, .arraySize = 1, -- cgit v1.2.3 From 0cb26523d31e8f2bb12395b1a5e4b7551df31e4a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 3 Sep 2015 11:03:28 -0700 Subject: vk/image: Add PRM reference for QPitch equation Suggested-by: Nanley Chery --- src/vulkan/anv_image.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 736a76881ff..7a37497d0cc 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -180,6 +180,10 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: Memory + * Views >> Common Surface Formats >> Surface Layout >> 2D Surfaces >> + * Surface Arrays >> For All Surface Other Than Separate Stencil Buffer: + */ qpitch = h0 + h1 + 11 * j; mt_width = MAX(w0, w1 + w2); mt_height = array_size * qpitch; -- cgit v1.2.3 From 2e346c882ddbea68db6dc0b6a05a9e9186624c04 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 26 Aug 2015 03:41:37 -0700 Subject: vk: Make vk_error a little more helpful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print out file and line number and translate the error code to the symbolic name. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_private.h | 12 +++++------- src/vulkan/anv_util.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a27b2e5ed92..b662af6cf5f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -120,16 +120,14 @@ struct anv_common { * propagating errors. Might be useful to plug in a stack trace here. */ -static inline VkResult -vk_error(VkResult error) -{ +VkResult __vk_error(VkResult error, const char *file, int line); + #ifdef DEBUG - fprintf(stderr, "vk_error: %x\n", error); +#define vk_error(error) __vk_error(error, __FILE__, __LINE__); +#else +#define vk_error(error) error #endif - return error; -} - void __anv_finishme(const char *file, int line, const char *format, ...) anv_printflike(3, 4); void anv_loge(const char *format, ...) anv_printflike(1, 2); diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c index 0311fbcd84f..a78847acec6 100644 --- a/src/vulkan/anv_util.c +++ b/src/vulkan/anv_util.c @@ -82,6 +82,53 @@ anv_abortfv(const char *format, va_list va) abort(); } +VkResult +__vk_error(VkResult error, const char *file, int line) +{ + static const char *error_names[] = { + "VK_ERROR_UNKNOWN", + "VK_ERROR_UNAVAILABLE", + "VK_ERROR_INITIALIZATION_FAILED", + "VK_ERROR_OUT_OF_HOST_MEMORY", + "VK_ERROR_OUT_OF_DEVICE_MEMORY", + "VK_ERROR_DEVICE_ALREADY_CREATED", + "VK_ERROR_DEVICE_LOST", + "VK_ERROR_INVALID_POINTER", + "VK_ERROR_INVALID_VALUE", + "VK_ERROR_INVALID_HANDLE", + "VK_ERROR_INVALID_ORDINAL", + "VK_ERROR_INVALID_MEMORY_SIZE", + "VK_ERROR_INVALID_EXTENSION", + "VK_ERROR_INVALID_FLAGS", + "VK_ERROR_INVALID_ALIGNMENT", + "VK_ERROR_INVALID_FORMAT", + "VK_ERROR_INVALID_IMAGE", + "VK_ERROR_INVALID_DESCRIPTOR_SET_DATA", + "VK_ERROR_INVALID_QUEUE_TYPE", + "VK_ERROR_UNSUPPORTED_SHADER_IL_VERSION", + "VK_ERROR_BAD_SHADER_CODE", + "VK_ERROR_BAD_PIPELINE_DATA", + "VK_ERROR_NOT_MAPPABLE", + "VK_ERROR_MEMORY_MAP_FAILED", + "VK_ERROR_MEMORY_UNMAP_FAILED", + "VK_ERROR_INCOMPATIBLE_DEVICE", + "VK_ERROR_INCOMPATIBLE_DRIVER", + "VK_ERROR_INCOMPLETE_COMMAND_BUFFER", + "VK_ERROR_BUILDING_COMMAND_BUFFER", + "VK_ERROR_MEMORY_NOT_BOUND", + "VK_ERROR_INCOMPATIBLE_QUEUE", + "VK_ERROR_INVALID_LAYER", + }; + + if (error <= VK_ERROR_UNKNOWN && error >= VK_ERROR_INVALID_LAYER) + fprintf(stderr, "%s:%d: %s\n", + file, line, error_names[-error - VK_ERROR_UNKNOWN]); + else + fprintf(stderr, "%s:%d: vk error %d\n", file, line, error); + + return error; +} + int anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) { -- cgit v1.2.3 From c4b30e7885f1d47e57753db49d232669d87a88e5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 26 Aug 2015 04:03:38 -0700 Subject: vk: Add new vk_errorf that takes a format string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to annotate error cases in debug builds. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_device.c | 43 +++++++++++++++++++++++++++++-------------- src/vulkan/anv_image.c | 3 +-- src/vulkan/anv_private.h | 5 +++-- src/vulkan/anv_query.c | 2 +- src/vulkan/anv_util.c | 22 ++++++++++++++++------ src/vulkan/anv_x11.c | 4 ++-- 6 files changed, 52 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 19f3c1ed3cc..37cf7d3cea9 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -36,38 +36,53 @@ anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, const char *path) { + VkResult result; int fd; fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) - return vk_error(VK_ERROR_UNAVAILABLE); + return vk_errorf(VK_ERROR_UNAVAILABLE, "failed to open %s: %m", path); device->instance = instance; device->path = path; device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); - if (!device->chipset_id) + if (!device->chipset_id) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "failed to get chipset id: %m"); goto fail; + } device->name = brw_get_device_name(device->chipset_id); device->info = brw_get_device_info(device->chipset_id, -1); - if (!device->info) + if (!device->info) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "failed to get device info"); goto fail; + } - if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "failed to get aperture size: %m"); goto fail; + } - if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "kernel missing gem wait"); goto fail; + } - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "kernel missing execbuf2"); goto fail; + } - if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "non-llc gpu"); goto fail; + } - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) { + result = vk_errorf(VK_ERROR_UNAVAILABLE, "kernel missing exec constants"); goto fail; + } close(fd); @@ -75,7 +90,7 @@ anv_physical_device_init(struct anv_physical_device *device, fail: close(fd); - return vk_error(VK_ERROR_UNAVAILABLE); + return result; } static void *default_alloc( @@ -729,12 +744,12 @@ VkResult anv_QueueSubmit( ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); if (ret != 0) - return vk_error(VK_ERROR_UNKNOWN); + return vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); if (fence) { ret = anv_gem_execbuffer(device, &fence->execbuf); if (ret != 0) - return vk_error(VK_ERROR_UNKNOWN); + return vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); } for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) @@ -797,14 +812,14 @@ VkResult anv_DeviceWaitIdle( ret = anv_gem_execbuffer(device, &execbuf); if (ret != 0) { - result = vk_error(VK_ERROR_UNKNOWN); + result = vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); goto fail; } timeout = INT64_MAX; ret = anv_gem_wait(device, bo->gem_handle, &timeout); if (ret != 0) { - result = vk_error(VK_ERROR_UNKNOWN); + result = vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); goto fail; } @@ -1211,7 +1226,7 @@ VkResult anv_WaitForFences( if (ret == -1 && errno == ETIME) return VK_TIMEOUT; else if (ret == -1) - return vk_error(VK_ERROR_UNKNOWN); + return vk_errorf(VK_ERROR_UNKNOWN, "gem wait failed: %m"); } return VK_SUCCESS; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 7a37497d0cc..fbcd435a1a0 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -259,8 +259,7 @@ anv_image_create(VkDevice _device, extent->height > limits->height || extent->depth > limits->depth) { /* TODO(chadv): What is the correct error? */ - anv_loge("image extent is too large"); - return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); + return vk_errorf(VK_ERROR_INVALID_MEMORY_SIZE, "image extent is too large"); } image = anv_device_alloc(device, sizeof(*image), 8, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b662af6cf5f..ca62dc487d7 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -120,10 +120,11 @@ struct anv_common { * propagating errors. Might be useful to plug in a stack trace here. */ -VkResult __vk_error(VkResult error, const char *file, int line); +VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); #ifdef DEBUG -#define vk_error(error) __vk_error(error, __FILE__, __LINE__); +#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); +#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); #else #define vk_error(error) error #endif diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 3785560dc3b..4ef9d8c4b0c 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -123,7 +123,7 @@ VkResult anv_GetQueryPoolResults( if (flags & VK_QUERY_RESULT_WAIT_BIT) { ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); if (ret == -1) - return vk_error(VK_ERROR_UNKNOWN); + return vk_errorf(VK_ERROR_UNKNOWN, "gem_wait failed %m"); } for (uint32_t i = 0; i < queryCount; i++) { diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c index a78847acec6..94f14f98f8f 100644 --- a/src/vulkan/anv_util.c +++ b/src/vulkan/anv_util.c @@ -83,8 +83,11 @@ anv_abortfv(const char *format, va_list va) } VkResult -__vk_error(VkResult error, const char *file, int line) +__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) { + va_list ap; + char buffer[256]; + static const char *error_names[] = { "VK_ERROR_UNKNOWN", "VK_ERROR_UNAVAILABLE", @@ -120,11 +123,18 @@ __vk_error(VkResult error, const char *file, int line) "VK_ERROR_INVALID_LAYER", }; - if (error <= VK_ERROR_UNKNOWN && error >= VK_ERROR_INVALID_LAYER) - fprintf(stderr, "%s:%d: %s\n", - file, line, error_names[-error - VK_ERROR_UNKNOWN]); - else - fprintf(stderr, "%s:%d: vk error %d\n", file, line, error); + assert(error <= VK_ERROR_UNKNOWN && error >= VK_ERROR_INVALID_LAYER); + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, + buffer, error_names[-error - 1]); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_names[-error - 1]); + } return error; } diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c index f65a86487cb..3c6d41aa79e 100644 --- a/src/vulkan/anv_x11.c +++ b/src/vulkan/anv_x11.c @@ -163,13 +163,13 @@ VkResult anv_CreateSwapChainWSI( ret = anv_gem_set_tiling(device, memory->bo.gem_handle, surface->stride, I915_TILING_X); if (ret) { - result = vk_error(VK_ERROR_UNKNOWN); + result = vk_errorf(VK_ERROR_UNKNOWN, "set_tiling failed: %m"); goto fail; } int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); if (fd == -1) { - result = vk_error(VK_ERROR_UNKNOWN); + result = vk_errorf(VK_ERROR_UNKNOWN, "handle_to_fd failed: %m"); goto fail; } -- cgit v1.2.3 From 82396a551493f3bbaa4a80e903616912b0092d3e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 26 Aug 2015 04:04:36 -0700 Subject: vk: Drop check for I915_PARAM_HAS_EXEC_CONSTANTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't use this kernel feature. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_device.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 37cf7d3cea9..557404fa746 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -78,11 +78,6 @@ anv_physical_device_init(struct anv_physical_device *device, result = vk_errorf(VK_ERROR_UNAVAILABLE, "non-llc gpu"); goto fail; } - - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "kernel missing exec constants"); - goto fail; - } close(fd); -- cgit v1.2.3 From b5e90f3f48f6fe866f3c2f94745df82c959d09b5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 26 Aug 2015 04:08:51 -0700 Subject: vk: Use vk* entrypoints in meta, not driver_layer pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll change the dispatch mechanism again in a later commit. Stop using the driver_layer function pointers and just use the public entry points. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_device.c | 33 --------------------------------- src/vulkan/anv_meta.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 557404fa746..25c60488a80 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1380,15 +1380,6 @@ anv_buffer_view_create( return VK_SUCCESS; } - -VkResult anv_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) -{ - return driver_layer->CreateBufferView(_device, pCreateInfo, pView); -} - VkResult anv_DestroyBufferView( VkDevice _device, VkBufferView _bview) @@ -1402,14 +1393,6 @@ VkResult anv_DestroyBufferView( return VK_SUCCESS; } -VkResult anv_CreateSampler( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - VkSampler* pSampler) -{ - return driver_layer->CreateSampler(_device, pCreateInfo, pSampler); -} - VkResult anv_DestroySampler( VkDevice _device, VkSampler _sampler) @@ -1880,14 +1863,6 @@ VkResult anv_DestroyDynamicViewportState( return VK_SUCCESS; } -VkResult anv_CreateDynamicRasterState( - VkDevice _device, - const VkDynamicRasterStateCreateInfo* pCreateInfo, - VkDynamicRasterState* pState) -{ - return driver_layer->CreateDynamicRasterState(_device, pCreateInfo, pState); -} - VkResult anv_DestroyDynamicRasterState( VkDevice _device, VkDynamicRasterState _rs_state) @@ -1941,14 +1916,6 @@ VkResult anv_DestroyDynamicColorBlendState( return VK_SUCCESS; } -VkResult anv_CreateDynamicDepthStencilState( - VkDevice _device, - const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, - VkDynamicDepthStencilState* pState) -{ - return driver_layer->CreateDynamicDepthStencilState(_device, pCreateInfo, pState); -} - VkResult anv_DestroyDynamicDepthStencilState( VkDevice _device, VkDynamicDepthStencilState _ds_state) diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d01001911ef..6ce963d8374 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -318,7 +318,7 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); - driver_layer->CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); + vkCmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); } void @@ -734,7 +734,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .dependencyCount = 0, }, &pass); - driver_layer->CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + vkCmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass, @@ -755,9 +755,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, device->meta_state.blit.pipeline_layout, 0, 1, &set, 0, NULL); - driver_layer->CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); + vkCmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); - driver_layer->CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + vkCmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. @@ -1383,7 +1383,7 @@ void anv_CmdClearColorImage( .dependencyCount = 0, }, &pass); - driver_layer->CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + vkCmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderArea = { @@ -1411,7 +1411,7 @@ void anv_CmdClearColorImage( meta_emit_clear(cmd_buffer, 1, &instance_data, (VkClearDepthStencilValue) {0}); - driver_layer->CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + vkCmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); } } } @@ -1473,19 +1473,19 @@ anv_device_init_meta(struct anv_device *device) anv_device_init_meta_clear_state(device); anv_device_init_meta_blit_state(device); - anv_CreateDynamicRasterState(anv_device_to_handle(device), + vkCreateDynamicRasterState(anv_device_to_handle(device), &(VkDynamicRasterStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, }, &device->meta_state.shared.rs_state); - anv_CreateDynamicColorBlendState(anv_device_to_handle(device), + vkCreateDynamicColorBlendState(anv_device_to_handle(device), &(VkDynamicColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO }, &device->meta_state.shared.cb_state); - anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), + vkCreateDynamicDepthStencilState(anv_device_to_handle(device), &(VkDynamicDepthStencilStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO }, -- cgit v1.2.3 From c4dbff58d80c842794175f7b16cc0085ed2db940 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 27 Aug 2015 12:04:41 -0700 Subject: vk: Drop redundant gen7_CreateGraphicsPipelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is handled by anv_CreateGraphicsPipelines(). Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/gen7_pipeline.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 0ed4727c37f..599432ed68a 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -561,31 +561,6 @@ gen7_graphics_pipeline_create( return VK_SUCCESS; } -VkResult gen7_CreateGraphicsPipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = gen7_graphics_pipeline_create(_device, &pCreateInfos[i], - NULL, &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j]); - } - - return result; - } - } - - return VK_SUCCESS; -} - VkResult gen7_compute_pipeline_create( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, -- cgit v1.2.3 From 1d787781ff4834015d7b3008336f4765c5c709e5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 26 Aug 2015 04:10:58 -0700 Subject: vk: Fall back to previous gens in entry point resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to always just do a one-level fallback from genX_* to anv_* entry points. That worked for gen7 and gen8 where all entry points were either different or could be made anv_* entry points (eg anv_CreateDynamicViewportState). We're about to add gen9 and now need to be able to fall back to gen8 entry points for most things. Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_device.c | 9 +-------- src/vulkan/anv_entrypoints_gen.py | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 25c60488a80..70c0c9d490a 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -542,14 +542,7 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - switch (physical_device->info->gen) { - case 7: - driver_layer = &gen7_layer; - break; - case 8: - driver_layer = &gen8_layer; - break; - } + anv_set_dispatch_gen(physical_device->info->gen); device = anv_instance_alloc(instance, sizeof(*device), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 21f87f181e9..149f34c9842 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -91,9 +91,7 @@ if opt_header: print " };\n" print "};\n" - print "extern const struct anv_layer gen7_layer;\n" - print "extern const struct anv_layer gen8_layer;\n" - print "extern const struct anv_layer *driver_layer;\n" + print "void anv_set_dispatch_gen(uint32_t gen);\n" for type, name, args, num, h in entrypoints: print "%s anv_%s%s;" % (type, name, args) @@ -195,7 +193,13 @@ determine_validate(void) enable_validate = atoi(s); } -const struct anv_layer *driver_layer = &anv_layer; +static uint32_t dispatch_gen; + +void +anv_set_dispatch_gen(uint32_t gen) +{ + dispatch_gen = gen; +} static void * __attribute__ ((noinline)) resolve_entrypoint(uint32_t index) @@ -203,10 +207,20 @@ resolve_entrypoint(uint32_t index) if (enable_validate && validate_layer.entrypoints[index]) return validate_layer.entrypoints[index]; - if (driver_layer && driver_layer->entrypoints[index]) - return driver_layer->entrypoints[index]; - - return anv_layer.entrypoints[index]; + switch (dispatch_gen) { + case 8: + if (gen8_layer.entrypoints[index]) + return gen8_layer.entrypoints[index]; + /* fall through */ + case 7: + if (gen7_layer.entrypoints[index]) + return gen7_layer.entrypoints[index]; + /* fall through */ + case 0: + return anv_layer.entrypoints[index]; + default: + unreachable("unsupported gen\\n"); + } } """ -- cgit v1.2.3 From 6e35a1f166e677a55f465ec420d9d546e10f3fd7 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Sep 2015 12:04:03 -0700 Subject: vk: Remove various hacks/scaffolding code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we switched away from calling brwCreateContext() there's a bit of hacky support we can now delete. This reduces our diff to upstream master. Signed-off-by: Kristian Høgsberg Kristensen --- configure.ac | 4 -- src/mesa/drivers/dri/common/dri_test.c | 9 ++-- src/mesa/drivers/dri/i965/brw_context.c | 4 -- src/mesa/drivers/dri/i965/brw_context.h | 6 --- src/mesa/drivers/dri/i965/brw_state_cache.c | 3 -- src/mesa/drivers/dri/i965/intel_debug.c | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 72 ----------------------------- 7 files changed, 4 insertions(+), 96 deletions(-) (limited to 'src') diff --git a/configure.ac b/configure.ac index 5828885e92a..e8919caa74b 100644 --- a/configure.ac +++ b/configure.ac @@ -1162,10 +1162,6 @@ AC_ARG_ENABLE([driglx-direct], [driglx_direct="$enableval"], [driglx_direct="yes"]) -# Check for libcaca -PKG_CHECK_EXISTS([caca], [have_libcaca=yes], [have_libcaca=no]) -AM_CONDITIONAL([HAVE_LIBCACA], [test x$have_libcaca = xyes]) - dnl dnl libGL configuration per driver dnl diff --git a/src/mesa/drivers/dri/common/dri_test.c b/src/mesa/drivers/dri/common/dri_test.c index 310e7617e2f..57bfa5b9394 100644 --- a/src/mesa/drivers/dri/common/dri_test.c +++ b/src/mesa/drivers/dri/common/dri_test.c @@ -1,4 +1,3 @@ -#include #include "main/glheader.h" #include "main/compiler.h" #include "glapi/glapi.h" @@ -34,14 +33,12 @@ _glapi_check_multithread(void) PUBLIC void _glapi_set_context(void *context) -{ - _glapi_Context = context; -} +{} PUBLIC void * _glapi_get_context(void) { - return _glapi_Context; + return 0; } PUBLIC void @@ -87,7 +84,7 @@ _glapi_set_nop_handler(_glapi_nop_handler_proc func) PUBLIC struct _glapi_table * _glapi_new_nop_table(unsigned num_entries) { - return malloc(16); + return NULL; } #ifndef NO_MAIN diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 0ee5ab2bdee..020df05b2a8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -815,7 +815,6 @@ brwCreateContext(gl_api api, intel_batchbuffer_init(brw); -#if 0 if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us @@ -840,7 +839,6 @@ brwCreateContext(gl_api api, } brw_init_state(brw); -#endif intelInitExtensions(ctx); @@ -908,10 +906,8 @@ brwCreateContext(gl_api api, _mesa_compute_version(ctx); -#if 0 _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); -#endif if (ctx->Extensions.AMD_performance_monitor) { brw_init_performance_monitors(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1267a6f5a97..54813c51d50 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -2075,12 +2075,6 @@ gen6_upload_push_constants(struct brw_context *brw, struct brw_stage_state *stage_state, enum aub_state_struct_type type); -struct intel_screen *intel_screen_create(int fd); -void intel_screen_destroy(struct intel_screen *screen); - -struct brw_context *intel_context_create(struct intel_screen *screen); -void intel_context_destroy(struct brw_context *brw); - void brw_initialize_context_constants(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e817ecfb8cc..fbc041920f4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -416,9 +416,6 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) DBG("%s\n", __func__); - if (cache->bo == NULL) - return; - if (brw->has_llc) drm_intel_bo_unmap(cache->bo); drm_intel_bo_unreference(cache->bo); diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 6bd55d395b2..b3b3c21f491 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -60,7 +60,7 @@ static const struct dri_debug_control debug_control[] = { { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, { "clip", DEBUG_CLIP }, - { "foob", DEBUG_AUB }, /* disable aub dumbing in the dri driver */ + { "aub", DEBUG_AUB }, { "shader_time", DEBUG_SHADER_TIME }, { "no16", DEBUG_NO16 }, { "blorp", DEBUG_BLORP }, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 5911b444454..85863a0827e 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1486,78 +1486,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return (const __DRIconfig**) intel_screen_make_configs(psp); } -struct intel_screen * -intel_screen_create(int fd) -{ - __DRIscreen *psp; - __DRIconfig **configs; - int i; - - psp = malloc(sizeof(*psp)); - if (psp == NULL) - return NULL; - - psp->image.loader = (void *) 1; /* Don't complain about this being NULL */ - psp->fd = fd; - psp->dri2.useInvalidate = (void *) 1; - - configs = (__DRIconfig **) intelInitScreen2(psp); - for (i = 0; configs[i]; i++) - free(configs[i]); - free(configs); - - return psp->driverPrivate; -} - -void -intel_screen_destroy(struct intel_screen *screen) -{ - __DRIscreen *psp; - - psp = screen->driScrnPriv; - intelDestroyScreen(screen->driScrnPriv); - free(psp); -} - - -struct brw_context * -intel_context_create(struct intel_screen *screen) -{ - __DRIcontext *driContextPriv; - struct brw_context *brw; - unsigned error; - - driContextPriv = malloc(sizeof(*driContextPriv)); - if (driContextPriv == NULL) - return NULL; - - driContextPriv->driScreenPriv = screen->driScrnPriv; - - brwCreateContext(API_OPENGL_CORE, - NULL, /* visual */ - driContextPriv, - 3, 0, - 0, /* flags */ - false, /* notify_reset */ - &error, - NULL); - - brw = driContextPriv->driverPrivate; - brw->ctx.FirstTimeCurrent = false; - - return driContextPriv->driverPrivate; -} - -void -intel_context_destroy(struct brw_context *brw) -{ - __DRIcontext *driContextPriv; - - driContextPriv = brw->driContext; - intelDestroyContext(driContextPriv); - free(driContextPriv); -} - struct intel_buffer { __DRIbuffer base; drm_intel_bo *bo; -- cgit v1.2.3 From 316c8ac53bd1cf654d77057a0fb4b35e4994d523 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Sep 2015 12:27:28 -0700 Subject: vk: Assert that the SPIR-V module has the magic number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_compiler.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 1f961eb043b..070fd1e1121 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -974,6 +974,7 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, struct anv_shader *shader = pipeline->shaders[stage]; struct gl_shader *mesa_shader; int name = 0; + uint32_t *spirv; mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); fail_if(mesa_shader == NULL, @@ -1012,13 +1013,15 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, mesa_shader->Type = stage_info[stage].token; mesa_shader->Stage = stage_info[stage].stage; - assert(shader->module->size % 4 == 0); - struct gl_shader_compiler_options *glsl_options = &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; + spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); + mesa_shader->Program->nir = - spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4, + spirv_to_nir(spirv, shader->module->size / 4, stage_info[stage].stage, glsl_options->NirOptions); nir_validate_shader(mesa_shader->Program->nir); -- cgit v1.2.3 From 7c1d20dc489097f4c5586b41f50b010c3c0e0a40 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Sep 2015 14:02:11 -0700 Subject: vk: Drop GLSL code from anv_compiler.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_compiler.cpp | 97 +++------------------------------------------ 1 file changed, 5 insertions(+), 92 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 070fd1e1121..ab6d64897a5 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -646,21 +646,6 @@ brw_cs_populate_key(struct brw_context *brw, key->program_string_id = bcp->id; } -static void -fail_on_compile_error(int status, const char *msg) -{ - int source, line, column; - char error[256]; - - if (status) - return; - - if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4) - fail_if(!status, "%d:%s\n", line, error); - else - fail_if(!status, "%s\n", msg); -} - struct anv_compiler { struct anv_device *device; struct intel_screen *screen; @@ -726,10 +711,6 @@ anv_compiler_create(struct anv_device *device) _mesa_init_constants(&ctx->Const, API_OPENGL_CORE); - brw_initialize_context_constants(compiler->brw); - - intelInitExtensions(ctx); - /* Set dd::NewShader */ brwInitFragProgFuncs(&ctx->Driver); @@ -898,47 +879,6 @@ struct spirv_header{ uint32_t gen_magic; }; -static const char * -src_as_glsl(const char *data) -{ - const struct spirv_header *as_spirv = (const struct spirv_header *)data; - - /* Check alignment */ - if ((intptr_t)data & 0x3) { - return data; - } - - if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) { - /* LunarG back-door */ - if (as_spirv->version == 0) - return data + 12; - else - return NULL; - } else { - return data; - } -} - -static void -anv_compile_shader_glsl(struct anv_compiler *compiler, - struct gl_shader_program *program, - struct anv_pipeline *pipeline, uint32_t stage) -{ - struct brw_context *brw = compiler->brw; - struct gl_shader *shader; - int name = 0; - - shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); - fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); - - shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data)); - _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); - fail_on_compile_error(shader->CompileStatus, shader->InfoLog); - - program->Shaders[program->NumShaders] = shader; - program->NumShaders++; -} - static void setup_nir_io(struct gl_shader *mesa_shader, nir_shader *shader) @@ -1085,41 +1025,14 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) fail_if(program == NULL || program->Shaders == NULL, "failed to create program\n"); - bool all_spirv = true; for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i] == NULL) - continue; - - /* You need at least this much for "void main() { }" anyway */ - assert(pipeline->shaders[i]->module->size >= 12); - - if (src_as_glsl(pipeline->shaders[i]->module->data)) { - all_spirv = false; - break; - } - - assert(pipeline->shaders[i]->module->size % 4 == 0); + if (pipeline->shaders[i]) + anv_compile_shader_spirv(compiler, program, pipeline, i); } - if (all_spirv) { - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_spirv(compiler, program, pipeline, i); - } - - for (unsigned i = 0; i < program->NumShaders; i++) { - struct gl_shader *shader = program->Shaders[i]; - program->_LinkedShaders[shader->Stage] = shader; - } - } else { - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_glsl(compiler, program, pipeline, i); - } - - _mesa_glsl_link_shader(&brw->ctx, program); - fail_on_compile_error(program->LinkStatus, - program->InfoLog); + for (unsigned i = 0; i < program->NumShaders; i++) { + struct gl_shader *shader = program->Shaders[i]; + program->_LinkedShaders[shader->Stage] = shader; } bool success; -- cgit v1.2.3 From 8af3624651345e68ab833dcf98ca00da5a6afd4e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Sep 2015 11:46:43 -0700 Subject: vk: Further reduce diff to master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we don't compile GLSL, we can roll back a few more hacks and unexport some things from the backend compiler. Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_context.c | 5 ++--- src/mesa/drivers/dri/i965/brw_context.h | 3 --- src/mesa/drivers/dri/i965/intel_extensions.c | 20 ++++++-------------- src/vulkan/anv_compiler.cpp | 10 +--------- 4 files changed, 9 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 020df05b2a8..907b2a07353 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -311,7 +311,7 @@ brw_init_driver_functions(struct brw_context *brw, functions->GetSamplePosition = gen6_get_sample_position; } -void +static void brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -390,8 +390,7 @@ brw_initialize_context_constants(struct brw_context *brw) int max_samples; const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); const int clamp_max_samples = - brw->optionCache.info != NULL ? - driQueryOptioni(&brw->optionCache, "clamp_max_samples") : -1; + driQueryOptioni(&brw->optionCache, "clamp_max_samples"); if (clamp_max_samples < 0) { max_samples = msaa_modes[0]; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 54813c51d50..49ff428b13a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -2075,9 +2075,6 @@ gen6_upload_push_constants(struct brw_context *brw, struct brw_stage_state *stage_state, enum aub_state_struct_type type); -void -brw_initialize_context_constants(struct brw_context *brw); - bool gen9_use_linear_1d_layout(const struct brw_context *brw, const struct intel_mipmap_tree *mt); diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 1a246d3ea3a..4365b719801 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -275,17 +275,14 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130; ctx->Extensions.EXT_timer_query = true; - if (brw->bufmgr) { - if (brw->gen == 5 || can_write_oacontrol(brw)) { - ctx->Extensions.AMD_performance_monitor = true; - ctx->Extensions.INTEL_performance_query = true; - } + if (brw->gen == 5 || can_write_oacontrol(brw)) { + ctx->Extensions.AMD_performance_monitor = true; + ctx->Extensions.INTEL_performance_query = true; } } if (brw->gen >= 6) { ctx->Extensions.ARB_blend_func_extended = - brw->optionCache.info == NULL || !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended"); ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_draw_buffers_blend = true; @@ -308,9 +305,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_transform_feedback = true; ctx->Extensions.OES_depth_texture_cube_map = true; - /* Test if the kernel has the ioctl. */ - if (brw->intelScreen->hw_has_timestamp) - ctx->Extensions.ARB_timer_query = true; + ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp; /* Only enable this in core profile because other parts of Mesa behave * slightly differently when the extension is enabled. @@ -335,8 +330,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; - if (brw->bufmgr && - can_do_pipelined_register_writes(brw)) { + if (can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_draw_indirect = true; ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; @@ -365,9 +359,7 @@ intelInitExtensions(struct gl_context *ctx) if (ctx->API != API_OPENGL_CORE) ctx->Extensions.ARB_color_buffer_float = true; - if (ctx->Mesa_DXTn || - (brw->optionCache.info != NULL && - driQueryOptionb(&brw->optionCache, "force_s3tc_enable"))) + if (ctx->Mesa_DXTn || driQueryOptionb(&brw->optionCache, "force_s3tc_enable")) ctx->Extensions.EXT_texture_compression_s3tc = true; ctx->Extensions.ANGLE_texture_compression_dxt = true; diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index ab6d64897a5..5f189c7ce8e 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -676,8 +676,6 @@ anv_compiler_create(struct anv_device *device) compiler->device = device; - compiler->brw->optionCache.info = NULL; - compiler->brw->bufmgr = NULL; compiler->brw->gen = devinfo->gen; compiler->brw->is_g4x = devinfo->is_g4x; compiler->brw->is_baytrail = devinfo->is_baytrail; @@ -709,15 +707,9 @@ anv_compiler_create(struct anv_device *device) ctx = &compiler->brw->ctx; _mesa_init_shader_object_functions(&ctx->Driver); - _mesa_init_constants(&ctx->Const, API_OPENGL_CORE); - - /* Set dd::NewShader */ - brwInitFragProgFuncs(&ctx->Driver); - + /* brw_select_clip_planes() needs this for bogus reasons. */ ctx->_Shader = &compiler->pipeline; - compiler->brw->precompile = false; - return compiler; fail: -- cgit v1.2.3 From 9a7600c9b58b0fc62033bb993016d3d7d4b8810a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 16:44:42 -0700 Subject: vk/device: Use an array for device extensions --- src/vulkan/anv_device.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 70c0c9d490a..25fedf908b8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -657,6 +657,9 @@ VkResult anv_GetGlobalExtensionProperties( return VK_SUCCESS; } +static const VkExtensionProperties device_extensions[] = { +}; + VkResult anv_GetPhysicalDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, @@ -664,12 +667,16 @@ VkResult anv_GetPhysicalDeviceExtensionProperties( VkExtensionProperties* pProperties) { if (pProperties == NULL) { - *pCount = 0; + *pCount = ARRAY_SIZE(device_extensions); return VK_SUCCESS; } - /* None supported at this time */ - return vk_error(VK_ERROR_INVALID_EXTENSION); + assert(*pCount < ARRAY_SIZE(device_extensions)); + + *pCount = ARRAY_SIZE(device_extensions); + memcpy(pProperties, device_extensions, sizeof(device_extensions)); + + return VK_SUCCESS; } VkResult anv_GetGlobalLayerProperties( -- cgit v1.2.3 From beb466ff5b22b816d1c3033e0a6f1f1b2fa8bb12 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 16:46:20 -0700 Subject: vk: Move anv_x11.c to anv_wsi_x11.c --- src/vulkan/Makefile.am | 2 +- src/vulkan/anv_wsi_x11.c | 302 +++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_x11.c | 302 ----------------------------------------------- 3 files changed, 303 insertions(+), 303 deletions(-) create mode 100644 src/vulkan/anv_wsi_x11.c delete mode 100644 src/vulkan/anv_x11.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 13897090287..a2a9c430869 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -72,7 +72,7 @@ VULKAN_SOURCES = \ anv_private.h \ anv_query.c \ anv_util.c \ - anv_x11.c \ + anv_wsi_x11.c \ gen8_state.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c new file mode 100644 index 00000000000..3c6d41aa79e --- /dev/null +++ b/src/vulkan/anv_wsi_x11.c @@ -0,0 +1,302 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include +#include +#include + +static const VkFormat formats[] = { + VK_FORMAT_B5G6R5_UNORM, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, +}; + +VkResult anv_GetDisplayInfoWSI( + VkDisplayWSI display, + VkDisplayInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + VkDisplayFormatPropertiesWSI *properties = pData; + size_t size; + + if (pDataSize == NULL) + return VK_ERROR_INVALID_POINTER; + + switch (infoType) { + case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: + size = sizeof(properties[0]) * ARRAY_SIZE(formats); + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) + return vk_error(VK_ERROR_INVALID_VALUE); + + *pDataSize = size; + + for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) + properties[i].swapChainFormat = formats[i]; + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +struct anv_swap_chain { + struct anv_device * device; + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t count; + struct { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + } images[0]; +}; + +VkResult anv_CreateSwapChainWSI( + VkDevice _device, + const VkSwapChainCreateInfoWSI* pCreateInfo, + VkSwapChainWSI* pSwapChain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + struct anv_swap_chain *chain; + xcb_void_cookie_t cookie; + VkResult result; + size_t size; + int ret; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + + size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); + chain = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->device = device; + chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; + chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; + chain->count = pCreateInfo->imageCount; + chain->extent = pCreateInfo->imageExtent; + + for (uint32_t i = 0; i < chain->count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .force_tile_mode = true, + .tile_mode = XMAJOR, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + &image_h); + + image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->format)); + + surface = &image->color_surface; + + anv_AllocMemory(_device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->stride, I915_TILING_X); + if (ret) { + result = vk_errorf(VK_ERROR_UNKNOWN, "set_tiling failed: %m"); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + result = vk_errorf(VK_ERROR_UNKNOWN, "handle_to_fd failed: %m"); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->stride, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + image->swap_chain = chain; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *pSwapChain = anv_swap_chain_to_handle(chain); + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult anv_DestroySwapChainWSI( + VkSwapChainWSI _chain) +{ + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + + anv_device_free(chain->device, chain); + + return VK_SUCCESS; +} + +VkResult anv_GetSwapChainInfoWSI( + VkSwapChainWSI _chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + + VkSwapChainImageInfoWSI *images; + size_t size; + + switch (infoType) { + case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: + size = sizeof(*images) * chain->count; + if (pData && *pDataSize < size) + return VK_ERROR_INVALID_VALUE; + + *pDataSize = size; + if (!pData) + return VK_SUCCESS; + + images = pData; + for (uint32_t i = 0; i < chain->count; i++) { + images[i].image = anv_image_to_handle(chain->images[i].image); + images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); + } + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +VkResult anv_QueuePresentWSI( + VkQueue queue_, + const VkPresentInfoWSI* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); + + struct anv_swap_chain *chain = image->swap_chain; + xcb_void_cookie_t cookie; + xcb_pixmap_t pixmap; + + assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); + + if (chain == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + pixmap = XCB_NONE; + for (uint32_t i = 0; i < chain->count; i++) { + if (image == chain->images[i].image) { + pixmap = chain->images[i].pixmap; + break; + } + } + + if (pixmap == XCB_NONE) + return vk_error(VK_ERROR_INVALID_VALUE); + + cookie = xcb_copy_area(chain->conn, + pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c deleted file mode 100644 index 3c6d41aa79e..00000000000 --- a/src/vulkan/anv_x11.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -#include -#include -#include - -static const VkFormat formats[] = { - VK_FORMAT_B5G6R5_UNORM, - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_B8G8R8A8_SRGB, -}; - -VkResult anv_GetDisplayInfoWSI( - VkDisplayWSI display, - VkDisplayInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) -{ - VkDisplayFormatPropertiesWSI *properties = pData; - size_t size; - - if (pDataSize == NULL) - return VK_ERROR_INVALID_POINTER; - - switch (infoType) { - case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: - size = sizeof(properties[0]) * ARRAY_SIZE(formats); - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } - - if (*pDataSize < size) - return vk_error(VK_ERROR_INVALID_VALUE); - - *pDataSize = size; - - for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) - properties[i].swapChainFormat = formats[i]; - - return VK_SUCCESS; - - default: - return VK_UNSUPPORTED; - } -} - -struct anv_swap_chain { - struct anv_device * device; - xcb_connection_t * conn; - xcb_window_t window; - xcb_gc_t gc; - VkExtent2D extent; - uint32_t count; - struct { - struct anv_image * image; - struct anv_device_memory * memory; - xcb_pixmap_t pixmap; - } images[0]; -}; - -VkResult anv_CreateSwapChainWSI( - VkDevice _device, - const VkSwapChainCreateInfoWSI* pCreateInfo, - VkSwapChainWSI* pSwapChain) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - struct anv_swap_chain *chain; - xcb_void_cookie_t cookie; - VkResult result; - size_t size; - int ret; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); - - size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); - chain = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (chain == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - chain->device = device; - chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; - chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; - chain->count = pCreateInfo->imageCount; - chain->extent = pCreateInfo->imageExtent; - - for (uint32_t i = 0; i < chain->count; i++) { - VkDeviceMemory memory_h; - VkImage image_h; - struct anv_image *image; - struct anv_surface *surface; - struct anv_device_memory *memory; - - anv_image_create(_device, - &(struct anv_image_create_info) { - .force_tile_mode = true, - .tile_mode = XMAJOR, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arraySize = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - &image_h); - - image = anv_image_from_handle(image_h); - assert(anv_format_is_color(image->format)); - - surface = &image->color_surface; - - anv_AllocMemory(_device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - &memory_h); - - memory = anv_device_memory_from_handle(memory_h); - - anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), - memory_h, 0); - - ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->stride, I915_TILING_X); - if (ret) { - result = vk_errorf(VK_ERROR_UNKNOWN, "set_tiling failed: %m"); - goto fail; - } - - int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); - if (fd == -1) { - result = vk_errorf(VK_ERROR_UNKNOWN, "handle_to_fd failed: %m"); - goto fail; - } - - uint32_t bpp = 32; - uint32_t depth = 24; - xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); - - cookie = - xcb_dri3_pixmap_from_buffer_checked(chain->conn, - pixmap, - chain->window, - image->size, - pCreateInfo->imageExtent.width, - pCreateInfo->imageExtent.height, - surface->stride, - depth, bpp, fd); - - chain->images[i].image = image; - chain->images[i].memory = memory; - chain->images[i].pixmap = pixmap; - image->swap_chain = chain; - - xcb_discard_reply(chain->conn, cookie.sequence); - } - - chain->gc = xcb_generate_id(chain->conn); - if (!chain->gc) { - result = vk_error(VK_ERROR_UNKNOWN); - goto fail; - } - - cookie = xcb_create_gc(chain->conn, - chain->gc, - chain->window, - XCB_GC_GRAPHICS_EXPOSURES, - (uint32_t []) { 0 }); - xcb_discard_reply(chain->conn, cookie.sequence); - - *pSwapChain = anv_swap_chain_to_handle(chain); - - return VK_SUCCESS; - - fail: - return result; -} - -VkResult anv_DestroySwapChainWSI( - VkSwapChainWSI _chain) -{ - ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - - anv_device_free(chain->device, chain); - - return VK_SUCCESS; -} - -VkResult anv_GetSwapChainInfoWSI( - VkSwapChainWSI _chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - - VkSwapChainImageInfoWSI *images; - size_t size; - - switch (infoType) { - case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: - size = sizeof(*images) * chain->count; - if (pData && *pDataSize < size) - return VK_ERROR_INVALID_VALUE; - - *pDataSize = size; - if (!pData) - return VK_SUCCESS; - - images = pData; - for (uint32_t i = 0; i < chain->count; i++) { - images[i].image = anv_image_to_handle(chain->images[i].image); - images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); - } - - return VK_SUCCESS; - - default: - return VK_UNSUPPORTED; - } -} - -VkResult anv_QueuePresentWSI( - VkQueue queue_, - const VkPresentInfoWSI* pPresentInfo) -{ - ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); - - struct anv_swap_chain *chain = image->swap_chain; - xcb_void_cookie_t cookie; - xcb_pixmap_t pixmap; - - assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); - - if (chain == NULL) - return vk_error(VK_ERROR_INVALID_VALUE); - - pixmap = XCB_NONE; - for (uint32_t i = 0; i < chain->count; i++) { - if (image == chain->images[i].image) { - pixmap = chain->images[i].pixmap; - break; - } - } - - if (pixmap == XCB_NONE) - return vk_error(VK_ERROR_INVALID_VALUE); - - cookie = xcb_copy_area(chain->conn, - pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); - - xcb_flush(chain->conn); - - return VK_SUCCESS; -} -- cgit v1.2.3 From 3d9fbb6575697744642c28464fb098552576ac97 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 17:00:10 -0700 Subject: vk: Add initial support for VK_WSI_swapchain --- src/vulkan/Makefile.am | 2 ++ src/vulkan/anv_device.c | 4 ++++ src/vulkan/anv_private.h | 1 + src/vulkan/anv_wsi.c | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 src/vulkan/anv_wsi.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index a2a9c430869..9d52dad7bb1 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -27,6 +27,7 @@ vulkan_include_HEADERS = \ $(top_srcdir)/include/vulkan/vk_platform.h \ $(top_srcdir)/include/vulkan/vulkan.h \ $(top_srcdir)/include/vulkan/vulkan_intel.h \ + $(top_srcdir)/include/vulkan/vk_wsi_swapchain.h \ $(top_srcdir)/include/vulkan/vk_wsi_lunarg.h lib_LTLIBRARIES = libvulkan.la @@ -72,6 +73,7 @@ VULKAN_SOURCES = \ anv_private.h \ anv_query.c \ anv_util.c \ + anv_wsi.c \ anv_wsi_x11.c \ gen8_state.c \ gen8_cmd_buffer.c \ diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 25fedf908b8..e83bd4bf272 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -633,6 +633,10 @@ VkResult anv_DestroyDevice( } static const VkExtensionProperties global_extensions[] = { + { + .extName = "VK_WSI_swapchain", + .specVersion = 12 + }, { .extName = "VK_WSI_LunarG", .specVersion = 3 diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ca62dc487d7..c09d4ad2f0d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -47,6 +47,7 @@ #include #include #include +#include #include "anv_entrypoints.h" diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c new file mode 100644 index 00000000000..5ae9b85e540 --- /dev/null +++ b/src/vulkan/anv_wsi.c @@ -0,0 +1,41 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VkResult +anv_GetPhysicalDeviceSurfaceSupportWSI( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + const VkSurfaceDescriptionWSI* pSurfaceDescription, + VkBool32* pSupported) +{ + assert(pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); + + VkSurfaceDescriptionWindowWSI *window = (void *)pSurfaceDescription; + + *pSupported = window->platform == VK_PLATFORM_XCB_WSI; + + return VK_SUCCESS; +} -- cgit v1.2.3 From ca3cfbf6f1e009c0208ffaa483c8a7e80394639d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Sep 2015 18:59:06 -0700 Subject: vk: Add an initial implementation of the actual Khronos WSI extension Unfortunately, this is a very large commit and removes the old LunarG WSI extension. This is because there are a couple of entrypoints that have the same name between the two extensions so implementing them both is impractiacl. Support is still incomplete, but this is enough to get vkcube up and going again. --- include/vulkan/vk_wsi_lunarg.h | 212 ----------------------------------------- src/vulkan/Makefile.am | 2 +- src/vulkan/anv_device.c | 8 +- src/vulkan/anv_private.h | 5 +- src/vulkan/anv_wsi.c | 146 +++++++++++++++++++++++++++- src/vulkan/anv_wsi.h | 59 ++++++++++++ src/vulkan/anv_wsi_x11.c | 201 +++++++++++++++++++++----------------- 7 files changed, 324 insertions(+), 309 deletions(-) delete mode 100644 include/vulkan/vk_wsi_lunarg.h create mode 100644 src/vulkan/anv_wsi.h (limited to 'src') diff --git a/include/vulkan/vk_wsi_lunarg.h b/include/vulkan/vk_wsi_lunarg.h deleted file mode 100644 index 9587952d067..00000000000 --- a/include/vulkan/vk_wsi_lunarg.h +++ /dev/null @@ -1,212 +0,0 @@ -// -// File: vk_wsi_display.h -// -/* -** Copyright (c) 2014 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -#ifndef __VK_WSI_LUNARG_H__ -#define __VK_WSI_LUNARG_H__ - -#include "vulkan.h" - -#define VK_WSI_LUNARG_REVISION 3 -#define VK_WSI_LUNARG_EXTENSION_NUMBER 1 - -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -// This macro defines INT_MAX in enumerations to force compilers to use 32 bits -// to represent them. This may or may not be necessary on some compilers. The -// option to compile it out may allow compilers that warn about missing enumerants -// in switch statements to be silenced. -// Using this macro is not needed for flag bit enums because those aren't used -// as storage type anywhere. -#define VK_MAX_ENUM(Prefix) VK_##Prefix##_MAX_ENUM = 0x7FFFFFFF - -// This macro defines the BEGIN_RANGE, END_RANGE, NUM, and MAX_ENUM constants for -// the enumerations. -#define VK_ENUM_RANGE(Prefix, First, Last) \ - VK_##Prefix##_BEGIN_RANGE = VK_##Prefix##_##First, \ - VK_##Prefix##_END_RANGE = VK_##Prefix##_##Last, \ - VK_NUM_##Prefix = (VK_##Prefix##_END_RANGE - VK_##Prefix##_BEGIN_RANGE + 1), \ - VK_MAX_ENUM(Prefix) - -// This is a helper macro to define the value of flag bit enum values. -#define VK_BIT(bit) (1 << (bit)) - -// ------------------------------------------------------------------------------------------------ -// Objects - -VK_DEFINE_HANDLE(VkDisplayWSI) -VK_DEFINE_HANDLE(VkSwapChainWSI) - -// ------------------------------------------------------------------------------------------------ -// Enumeration constants - -#define VK_WSI_LUNARG_ENUM(type,id) ((type)(VK_WSI_LUNARG_EXTENSION_NUMBER * -1000 + (id))) - -// Extend VkPhysicalDeviceInfoType enum with extension specific constants -#define VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI VK_WSI_LUNARG_ENUM(VkPhysicalDeviceInfoType, 0) -#define VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI VK_WSI_LUNARG_ENUM(VkPhysicalDeviceInfoType, 1) - -// Extend VkStructureType enum with extension specific constants -#define VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI VK_WSI_LUNARG_ENUM(VkStructureType, 0) -#define VK_STRUCTURE_TYPE_PRESENT_INFO_WSI VK_WSI_LUNARG_ENUM(VkStructureType, 1) - -// Extend VkImageLayout enum with extension specific constants -#define VK_IMAGE_LAYOUT_PRESENT_SOURCE_WSI VK_WSI_LUNARG_ENUM(VkImageLayout, 0) - -// ------------------------------------------------------------------------------------------------ -// Enumerations - -typedef enum VkDisplayInfoTypeWSI_ -{ - // Info type for vkGetDisplayInfo() - VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI = 0x00000003, // Return the VkFormat(s) supported for swap chains with the display - - VK_ENUM_RANGE(DISPLAY_INFO_TYPE, FORMAT_PROPERTIES_WSI, FORMAT_PROPERTIES_WSI) -} VkDisplayInfoTypeWSI; - -typedef enum VkSwapChainInfoTypeWSI_ -{ - // Info type for vkGetSwapChainInfo() - VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI = 0x00000000, // Return information about the persistent images of the swapchain - - VK_ENUM_RANGE(SWAP_CHAIN_INFO_TYPE, PERSISTENT_IMAGES_WSI, PERSISTENT_IMAGES_WSI) -} VkSwapChainInfoTypeWSI; - -// ------------------------------------------------------------------------------------------------ -// Flags - -typedef VkFlags VkSwapModeFlagsWSI; -typedef enum VkSwapModeFlagBitsWSI_ -{ - VK_SWAP_MODE_FLIP_BIT_WSI = VK_BIT(0), - VK_SWAP_MODE_BLIT_BIT_WSI = VK_BIT(1), -} VkSwapModeFlagBitsWSI; - -// ------------------------------------------------------------------------------------------------ -// Structures - -typedef struct VkDisplayPropertiesWSI_ -{ - VkDisplayWSI display; // Handle of the display object - VkExtent2D physicalResolution; // Max resolution for CRT? -} VkDisplayPropertiesWSI; - -typedef struct VkDisplayFormatPropertiesWSI_ -{ - VkFormat swapChainFormat; // Format of the images of the swap chain -} VkDisplayFormatPropertiesWSI; - -typedef struct VkSwapChainCreateInfoWSI_ -{ - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI - const void* pNext; // Pointer to next structure - - // TBD: It is not yet clear what the use will be for the following two - // values. It seems to be needed for more-global window-system handles - // (e.g. X11 display). If not needed for the SDK, we will drop it from - // this extension, and from a future version of this header. - const void* pNativeWindowSystemHandle; // Pointer to native window system handle - const void* pNativeWindowHandle; // Pointer to native window handle - - uint32_t displayCount; // Number of displays the swap chain is created for - const VkDisplayWSI* pDisplays; // displayCount number of display objects the swap chain is created for - - uint32_t imageCount; // Number of images in the swap chain - - VkFormat imageFormat; // Format of the images of the swap chain - VkExtent2D imageExtent; // Width and height of the images of the swap chain - uint32_t imageArraySize; // Number of layers of the images of the swap chain (needed for multi-view rendering) - VkFlags imageUsageFlags; // Usage flags for the images of the swap chain (see VkImageUsageFlags) - - VkFlags swapModeFlags; // Allowed swap modes (see VkSwapModeFlagsWSI) -} VkSwapChainCreateInfoWSI; - -typedef struct VkSwapChainImageInfoWSI_ -{ - VkImage image; // Persistent swap chain image handle - VkDeviceMemory memory; // Persistent swap chain image's memory handle -} VkSwapChainImageInfoWSI; - -typedef struct VkPhysicalDeviceQueuePresentPropertiesWSI_ -{ - VkBool32 supportsPresent; // Tells whether the queue supports presenting -} VkPhysicalDeviceQueuePresentPropertiesWSI; - -typedef struct VkPresentInfoWSI_ -{ - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_PRESENT_INFO_WSI - const void* pNext; // Pointer to next structure - VkImage image; // Image to present - uint32_t flipInterval; // Flip interval -} VkPresentInfoWSI; - -// ------------------------------------------------------------------------------------------------ -// Function types - -typedef VkResult (VKAPI *PFN_vkGetDisplayInfoWSI)(VkDisplayWSI display, VkDisplayInfoTypeWSI infoType, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI *PFN_vkCreateSwapChainWSI)(VkDevice device, const VkSwapChainCreateInfoWSI* pCreateInfo, VkSwapChainWSI* pSwapChain); -typedef VkResult (VKAPI *PFN_vkDestroySwapChainWSI)(VkSwapChainWSI swapChain); -typedef VkResult (VKAPI *PFN_vkGetSwapChainInfoWSI)(VkSwapChainWSI swapChain, VkSwapChainInfoTypeWSI infoType, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI *PFN_vkQueuePresentWSI)(VkQueue queue, const VkPresentInfoWSI* pPresentInfo); - -// ------------------------------------------------------------------------------------------------ -// Function prototypes - -#ifdef VK_PROTOTYPES - -VkResult VKAPI vkGetDisplayInfoWSI( - VkDisplayWSI display, - VkDisplayInfoTypeWSI infoType, - size_t* pDataSize, - void* pData); - -VkResult VKAPI vkCreateSwapChainWSI( - VkDevice device, - const VkSwapChainCreateInfoWSI* pCreateInfo, - VkSwapChainWSI* pSwapChain); - -VkResult VKAPI vkDestroySwapChainWSI( - VkSwapChainWSI swapChain); - -VkResult VKAPI vkGetSwapChainInfoWSI( - VkSwapChainWSI swapChain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, - void* pData); - -VkResult VKAPI vkQueuePresentWSI( - VkQueue queue, - const VkPresentInfoWSI* pPresentInfo); - -#endif // VK_PROTOTYPES - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // __VK_WSI_LUNARG_H__ diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 9d52dad7bb1..24e45cf873b 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -28,7 +28,7 @@ vulkan_include_HEADERS = \ $(top_srcdir)/include/vulkan/vulkan.h \ $(top_srcdir)/include/vulkan/vulkan_intel.h \ $(top_srcdir)/include/vulkan/vk_wsi_swapchain.h \ - $(top_srcdir)/include/vulkan/vk_wsi_lunarg.h + $(top_srcdir)/include/vulkan/vk_wsi_device_swapchain.h lib_LTLIBRARIES = libvulkan.la diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e83bd4bf272..7ae11d6560b 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -637,10 +637,6 @@ static const VkExtensionProperties global_extensions[] = { .extName = "VK_WSI_swapchain", .specVersion = 12 }, - { - .extName = "VK_WSI_LunarG", - .specVersion = 3 - } }; VkResult anv_GetGlobalExtensionProperties( @@ -662,6 +658,10 @@ VkResult anv_GetGlobalExtensionProperties( } static const VkExtensionProperties device_extensions[] = { + { + .extName = "VK_WSI_device_swapchain", + .specVersion = 12 + }, }; VkResult anv_GetPhysicalDeviceExtensionProperties( diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c09d4ad2f0d..06ef4e123f3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -46,8 +46,8 @@ #define VK_PROTOTYPES #include #include -#include #include +#include #include "anv_entrypoints.h" @@ -1060,8 +1060,6 @@ struct anv_image { struct anv_bo *bo; VkDeviceSize offset; - struct anv_swap_chain *swap_chain; - /** RENDER_SURFACE_STATE.SurfaceType */ uint8_t surf_type; @@ -1287,7 +1285,6 @@ ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) -ANV_DEFINE_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI); ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCmdPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 5ae9b85e540..5c2dbb5fbca 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -21,7 +21,7 @@ * IN THE SOFTWARE. */ -#include "anv_private.h" +#include "anv_wsi.h" VkResult anv_GetPhysicalDeviceSurfaceSupportWSI( @@ -39,3 +39,147 @@ anv_GetPhysicalDeviceSurfaceSupportWSI( return VK_SUCCESS; } + +VkResult +anv_GetSurfaceInfoWSI( + VkDevice _device, + const VkSurfaceDescriptionWSI* pSurfaceDescription, + VkSurfaceInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); + VkSurfaceDescriptionWindowWSI *window = + (VkSurfaceDescriptionWindowWSI *)pSurfaceDescription; + + switch (window->platform) { + case VK_PLATFORM_XCB_WSI: + return anv_x11_get_surface_info(device, window, infoType, + pDataSize, pData); + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +VkResult +anv_CreateSwapChainWSI( + VkDevice _device, + const VkSwapChainCreateInfoWSI* pCreateInfo, + VkSwapChainWSI* pSwapChain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_swap_chain *swap_chain; + VkResult result; + + assert(pCreateInfo->pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); + VkSurfaceDescriptionWindowWSI *window = + (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; + + switch (window->platform) { + case VK_PLATFORM_XCB_WSI: + result = anv_x11_create_swap_chain(device, pCreateInfo, + (void *)&swap_chain); + break; + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } + + if (result == VK_SUCCESS) + *pSwapChain = anv_swap_chain_to_handle(swap_chain); + + return result; +} + +VkResult +anv_DestroySwapChainWSI( + VkDevice device, + VkSwapChainWSI swapChain) +{ + ANV_FROM_HANDLE(anv_swap_chain, swap_chain, swapChain); + + assert(swap_chain->device == anv_device_from_handle(device)); + + switch (swap_chain->type) { + case ANV_SWAP_CHAIN_TYPE_X11: + return anv_x11_destroy_swap_chain((void *)swap_chain); + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +VkResult +anv_GetSwapChainInfoWSI( + VkDevice device, + VkSwapChainWSI swapChain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_swap_chain, swap_chain, swapChain); + + assert(swap_chain->device == anv_device_from_handle(device)); + + switch (swap_chain->type) { + case ANV_SWAP_CHAIN_TYPE_X11: + return anv_x11_get_swap_chain_info((void *)swap_chain, + infoType, pDataSize, pData); + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +VkResult +anv_AcquireNextImageWSI( + VkDevice device, + VkSwapChainWSI swapChain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t* pImageIndex) +{ + ANV_FROM_HANDLE(anv_swap_chain, swap_chain, swapChain); + + assert(swap_chain->device == anv_device_from_handle(device)); + + switch (swap_chain->type) { + case ANV_SWAP_CHAIN_TYPE_X11: + return anv_x11_acquire_next_image((void *)swap_chain, + timeout, semaphore, pImageIndex); + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +VkResult +anv_QueuePresentWSI( + VkQueue _queue, + VkPresentInfoWSI* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + VkResult result; + + for (uint32_t i = 0; i < pPresentInfo->swapChainCount; i++) { + ANV_FROM_HANDLE(anv_swap_chain, swap_chain, pPresentInfo->swapChains[i]); + + assert(swap_chain->device == queue->device); + + switch (swap_chain->type) { + case ANV_SWAP_CHAIN_TYPE_X11: + result = anv_x11_queue_present(queue, (void *)swap_chain, + pPresentInfo->imageIndices[i]); + /* TODO: What if one of them returns OUT_OF_DATE? */ + if (result != VK_SUCCESS) + return result; + else + continue; + + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h new file mode 100644 index 00000000000..1c1870f5453 --- /dev/null +++ b/src/vulkan/anv_wsi.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +enum anv_swap_chain_type { + ANV_SWAP_CHAIN_TYPE_X11 = 11, +}; + +struct anv_swap_chain { + enum anv_swap_chain_type type; + + struct anv_device * device; +}; + +struct anv_x11_swap_chain; + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI) + +VkResult anv_x11_get_surface_info(struct anv_device *device, + VkSurfaceDescriptionWindowWSI *window, + VkSurfaceInfoTypeWSI infoType, + size_t* pDataSize, void* pData); +VkResult anv_x11_create_swap_chain(struct anv_device *device, + const VkSwapChainCreateInfoWSI *pCreateInfo, + struct anv_x11_swap_chain **swap_chain); +VkResult anv_x11_destroy_swap_chain(struct anv_x11_swap_chain *swap_chain); +VkResult anv_x11_get_swap_chain_info(struct anv_x11_swap_chain *swap_chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, void* pData); +VkResult anv_x11_acquire_next_image(struct anv_x11_swap_chain *swap_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index); +VkResult anv_x11_queue_present(struct anv_queue *queue, + struct anv_x11_swap_chain *swap_chain, + uint32_t image_index); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 3c6d41aa79e..403949b3410 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -21,61 +21,86 @@ * IN THE SOFTWARE. */ -#include "anv_private.h" - #include #include #include +#include "anv_wsi.h" + static const VkFormat formats[] = { VK_FORMAT_B5G6R5_UNORM, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB, }; -VkResult anv_GetDisplayInfoWSI( - VkDisplayWSI display, - VkDisplayInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) -{ - VkDisplayFormatPropertiesWSI *properties = pData; - size_t size; +static const VkSurfacePresentModePropertiesWSI present_modes[] = { + { VK_PRESENT_MODE_MAILBOX_WSI }, +}; +VkResult +anv_x11_get_surface_info(struct anv_device *device, + VkSurfaceDescriptionWindowWSI *window, + VkSurfaceInfoTypeWSI infoType, + size_t* pDataSize, void* pData) +{ if (pDataSize == NULL) - return VK_ERROR_INVALID_POINTER; + return vk_error(VK_ERROR_INVALID_POINTER); switch (infoType) { - case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: - size = sizeof(properties[0]) * ARRAY_SIZE(formats); + case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { + assert(*pDataSize >= sizeof(VkSurfacePropertiesWSI)); + VkSurfacePropertiesWSI *props = pData; + + props->minImageCount = 2; + props->maxImageCount = 4; + props->currentExtent = (VkExtent2D) { -1, -1 }; + props->minImageExtent = (VkExtent2D) { 1, 1 }; + props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_WSI; + props->currentTransform = VK_SURFACE_TRANSFORM_NONE_WSI; + props->maxImageArraySize = 1; + props->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + return VK_SUCCESS; + } + + case VK_SURFACE_INFO_TYPE_FORMATS_WSI: if (pData == NULL) { - *pDataSize = size; + *pDataSize = sizeof(formats); return VK_SUCCESS; } - if (*pDataSize < size) - return vk_error(VK_ERROR_INVALID_VALUE); + assert(*pDataSize >= sizeof(formats)); + memcpy(pData, formats, *pDataSize); - *pDataSize = size; + return VK_SUCCESS; - for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) - properties[i].swapChainFormat = formats[i]; + case VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI: + if (pData == NULL) { + *pDataSize = sizeof(present_modes); + return VK_SUCCESS; + } - return VK_SUCCESS; + assert(*pDataSize >= sizeof(present_modes)); + memcpy(pData, present_modes, *pDataSize); + return VK_SUCCESS; default: - return VK_UNSUPPORTED; + return vk_error(VK_ERROR_INVALID_VALUE); } } -struct anv_swap_chain { - struct anv_device * device; +struct anv_x11_swap_chain { + struct anv_swap_chain base; + xcb_connection_t * conn; xcb_window_t window; xcb_gc_t gc; VkExtent2D extent; - uint32_t count; + uint32_t image_count; + uint32_t next_image; struct { struct anv_image * image; struct anv_device_memory * memory; @@ -83,41 +108,50 @@ struct anv_swap_chain { } images[0]; }; -VkResult anv_CreateSwapChainWSI( - VkDevice _device, - const VkSwapChainCreateInfoWSI* pCreateInfo, - VkSwapChainWSI* pSwapChain) +VkResult +anv_x11_create_swap_chain(struct anv_device *device, + const VkSwapChainCreateInfoWSI *pCreateInfo, + struct anv_x11_swap_chain **swap_chain_out) { - ANV_FROM_HANDLE(anv_device, device, _device); - - struct anv_swap_chain *chain; + struct anv_x11_swap_chain *chain; xcb_void_cookie_t cookie; VkResult result; - size_t size; - int ret; + + assert(pCreateInfo->pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); + VkSurfaceDescriptionWindowWSI *vk_window = + (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; + assert(vk_window->platform == VK_PLATFORM_XCB_WSI); + + int num_images = pCreateInfo->minImageCount; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); - size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (chain == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - chain->device = device; - chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; - chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; - chain->count = pCreateInfo->imageCount; + chain->base.type = ANV_SWAP_CHAIN_TYPE_X11; + chain->base.device = device; + + VkPlatformHandleXcbWSI *vk_xcb_handle = vk_window->pPlatformHandle; + + chain->conn = (xcb_connection_t *) vk_xcb_handle->connection; + chain->window = (xcb_window_t) (uintptr_t)vk_window->pPlatformWindow; chain->extent = pCreateInfo->imageExtent; + chain->image_count = num_images; + chain->next_image = 0; - for (uint32_t i = 0; i < chain->count; i++) { + for (uint32_t i = 0; i < chain->image_count; i++) { VkDeviceMemory memory_h; VkImage image_h; struct anv_image *image; struct anv_surface *surface; struct anv_device_memory *memory; - anv_image_create(_device, + anv_image_create(anv_device_to_handle(device), &(struct anv_image_create_info) { .force_tile_mode = true, .tile_mode = XMAJOR, @@ -147,7 +181,7 @@ VkResult anv_CreateSwapChainWSI( surface = &image->color_surface; - anv_AllocMemory(_device, + anv_AllocMemory(anv_device_to_handle(device), &(VkMemoryAllocInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, .allocationSize = image->size, @@ -160,8 +194,8 @@ VkResult anv_CreateSwapChainWSI( anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), memory_h, 0); - ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->stride, I915_TILING_X); + int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->stride, I915_TILING_X); if (ret) { result = vk_errorf(VK_ERROR_UNKNOWN, "set_tiling failed: %m"); goto fail; @@ -190,7 +224,6 @@ VkResult anv_CreateSwapChainWSI( chain->images[i].image = image; chain->images[i].memory = memory; chain->images[i].pixmap = pixmap; - image->swap_chain = chain; xcb_discard_reply(chain->conn, cookie.sequence); } @@ -208,7 +241,7 @@ VkResult anv_CreateSwapChainWSI( (uint32_t []) { 0 }); xcb_discard_reply(chain->conn, cookie.sequence); - *pSwapChain = anv_swap_chain_to_handle(chain); + *swap_chain_out = chain; return VK_SUCCESS; @@ -216,72 +249,66 @@ VkResult anv_CreateSwapChainWSI( return result; } -VkResult anv_DestroySwapChainWSI( - VkSwapChainWSI _chain) +VkResult +anv_x11_destroy_swap_chain(struct anv_x11_swap_chain *chain) { - ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - - anv_device_free(chain->device, chain); + anv_device_free(chain->base.device, chain); return VK_SUCCESS; } -VkResult anv_GetSwapChainInfoWSI( - VkSwapChainWSI _chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) +VkResult +anv_x11_get_swap_chain_info(struct anv_x11_swap_chain *chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, void* pData) { - ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); - - VkSwapChainImageInfoWSI *images; size_t size; switch (infoType) { - case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: - size = sizeof(*images) * chain->count; - if (pData && *pDataSize < size) - return VK_ERROR_INVALID_VALUE; + case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { + VkSwapChainImagePropertiesWSI *images = pData; - *pDataSize = size; - if (!pData) + size = chain->image_count * sizeof(*images); + + if (pData == NULL) { + *pDataSize = size; return VK_SUCCESS; + } - images = pData; - for (uint32_t i = 0; i < chain->count; i++) { + assert(size <= *pDataSize); + for (uint32_t i = 0; i < chain->image_count; i++) images[i].image = anv_image_to_handle(chain->images[i].image); - images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); - } + + *pDataSize = size; return VK_SUCCESS; + } default: - return VK_UNSUPPORTED; + return vk_error(VK_ERROR_INVALID_VALUE); } } -VkResult anv_QueuePresentWSI( - VkQueue queue_, - const VkPresentInfoWSI* pPresentInfo) +VkResult +anv_x11_acquire_next_image(struct anv_x11_swap_chain *chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) { - ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); + anv_finishme("Implement real blocking AcquireNextImage"); + *image_index = chain->next_image; + chain->next_image = (chain->next_image + 1) % chain->image_count; + return VK_SUCCESS; +} - struct anv_swap_chain *chain = image->swap_chain; +VkResult +anv_x11_queue_present(struct anv_queue *queue, + struct anv_x11_swap_chain *chain, + uint32_t image_index) +{ xcb_void_cookie_t cookie; - xcb_pixmap_t pixmap; - - assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); - if (chain == NULL) - return vk_error(VK_ERROR_INVALID_VALUE); - - pixmap = XCB_NONE; - for (uint32_t i = 0; i < chain->count; i++) { - if (image == chain->images[i].image) { - pixmap = chain->images[i].pixmap; - break; - } - } + xcb_pixmap_t pixmap = chain->images[image_index].pixmap; if (pixmap == XCB_NONE) return vk_error(VK_ERROR_INVALID_VALUE); -- cgit v1.2.3 From c0b97577e8900b46349c2b831784bf63b94eeee1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 10:28:26 -0700 Subject: vk/WSI: Use a callback mechanism instead of explicit switching --- src/vulkan/anv_wsi.c | 43 +++--------- src/vulkan/anv_wsi.h | 32 ++++----- src/vulkan/anv_wsi_x11.c | 176 +++++++++++++++++++++++++---------------------- 3 files changed, 114 insertions(+), 137 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 5c2dbb5fbca..404c905d419 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -103,12 +103,7 @@ anv_DestroySwapChainWSI( assert(swap_chain->device == anv_device_from_handle(device)); - switch (swap_chain->type) { - case ANV_SWAP_CHAIN_TYPE_X11: - return anv_x11_destroy_swap_chain((void *)swap_chain); - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + return swap_chain->destroy(swap_chain); } VkResult @@ -123,13 +118,8 @@ anv_GetSwapChainInfoWSI( assert(swap_chain->device == anv_device_from_handle(device)); - switch (swap_chain->type) { - case ANV_SWAP_CHAIN_TYPE_X11: - return anv_x11_get_swap_chain_info((void *)swap_chain, - infoType, pDataSize, pData); - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + return swap_chain->get_swap_chain_info(swap_chain, infoType, + pDataSize, pData); } VkResult @@ -144,13 +134,8 @@ anv_AcquireNextImageWSI( assert(swap_chain->device == anv_device_from_handle(device)); - switch (swap_chain->type) { - case ANV_SWAP_CHAIN_TYPE_X11: - return anv_x11_acquire_next_image((void *)swap_chain, - timeout, semaphore, pImageIndex); - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + return swap_chain->acquire_next_image(swap_chain, + timeout, semaphore, pImageIndex); } VkResult @@ -166,19 +151,11 @@ anv_QueuePresentWSI( assert(swap_chain->device == queue->device); - switch (swap_chain->type) { - case ANV_SWAP_CHAIN_TYPE_X11: - result = anv_x11_queue_present(queue, (void *)swap_chain, - pPresentInfo->imageIndices[i]); - /* TODO: What if one of them returns OUT_OF_DATE? */ - if (result != VK_SUCCESS) - return result; - else - continue; - - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + result = swap_chain->queue_present(swap_chain, queue, + pPresentInfo->imageIndices[i]); + /* TODO: What if one of them returns OUT_OF_DATE? */ + if (result != VK_SUCCESS) + return result; } return VK_SUCCESS; diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h index 1c1870f5453..3a5fba13a56 100644 --- a/src/vulkan/anv_wsi.h +++ b/src/vulkan/anv_wsi.h @@ -25,17 +25,20 @@ #include "anv_private.h" -enum anv_swap_chain_type { - ANV_SWAP_CHAIN_TYPE_X11 = 11, -}; - struct anv_swap_chain { - enum anv_swap_chain_type type; - struct anv_device * device; -}; -struct anv_x11_swap_chain; + VkResult (*destroy)(struct anv_swap_chain *swap_chain); + VkResult (*get_swap_chain_info)(struct anv_swap_chain *swap_chain, + VkSwapChainInfoTypeWSI infoType, + size_t *pDataSize, void *pData); + VkResult (*acquire_next_image)(struct anv_swap_chain *swap_chain, + uint64_t timeout, VkSemaphore semaphore, + uint32_t *image_index); + VkResult (*queue_present)(struct anv_swap_chain *swap_chain, + struct anv_queue *queue, + uint32_t image_index); +}; ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI) @@ -45,15 +48,4 @@ VkResult anv_x11_get_surface_info(struct anv_device *device, size_t* pDataSize, void* pData); VkResult anv_x11_create_swap_chain(struct anv_device *device, const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_x11_swap_chain **swap_chain); -VkResult anv_x11_destroy_swap_chain(struct anv_x11_swap_chain *swap_chain); -VkResult anv_x11_get_swap_chain_info(struct anv_x11_swap_chain *swap_chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, void* pData); -VkResult anv_x11_acquire_next_image(struct anv_x11_swap_chain *swap_chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index); -VkResult anv_x11_queue_present(struct anv_queue *queue, - struct anv_x11_swap_chain *swap_chain, - uint32_t image_index); + struct anv_swap_chain **swap_chain); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 403949b3410..54260f56ad9 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -92,7 +92,7 @@ anv_x11_get_surface_info(struct anv_device *device, } } -struct anv_x11_swap_chain { +struct x11_swap_chain { struct anv_swap_chain base; xcb_connection_t * conn; @@ -108,12 +108,96 @@ struct anv_x11_swap_chain { } images[0]; }; +static VkResult +x11_get_swap_chain_info(struct anv_swap_chain *anv_chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, void* pData) +{ + struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + size_t size; + + switch (infoType) { + case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { + VkSwapChainImagePropertiesWSI *images = pData; + + size = chain->image_count * sizeof(*images); + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + assert(size <= *pDataSize); + for (uint32_t i = 0; i < chain->image_count; i++) + images[i].image = anv_image_to_handle(chain->images[i].image); + + *pDataSize = size; + + return VK_SUCCESS; + } + + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +static VkResult +x11_acquire_next_image(struct anv_swap_chain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + + anv_finishme("Implement real blocking AcquireNextImage"); + *image_index = chain->next_image; + chain->next_image = (chain->next_image + 1) % chain->image_count; + return VK_SUCCESS; +} + +static VkResult +x11_queue_present(struct anv_swap_chain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + + xcb_void_cookie_t cookie; + + xcb_pixmap_t pixmap = chain->images[image_index].pixmap; + + if (pixmap == XCB_NONE) + return vk_error(VK_ERROR_INVALID_VALUE); + + cookie = xcb_copy_area(chain->conn, + pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} + +static VkResult +x11_destroy_swap_chain(struct anv_swap_chain *chain) +{ + anv_device_free(chain->device, chain); + + return VK_SUCCESS; +} + VkResult anv_x11_create_swap_chain(struct anv_device *device, const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_x11_swap_chain **swap_chain_out) + struct anv_swap_chain **swap_chain_out) { - struct anv_x11_swap_chain *chain; + struct x11_swap_chain *chain; xcb_void_cookie_t cookie; VkResult result; @@ -133,8 +217,11 @@ anv_x11_create_swap_chain(struct anv_device *device, if (chain == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - chain->base.type = ANV_SWAP_CHAIN_TYPE_X11; chain->base.device = device; + chain->base.destroy = x11_destroy_swap_chain; + chain->base.get_swap_chain_info = x11_get_swap_chain_info; + chain->base.acquire_next_image = x11_acquire_next_image; + chain->base.queue_present = x11_queue_present; VkPlatformHandleXcbWSI *vk_xcb_handle = vk_window->pPlatformHandle; @@ -241,89 +328,10 @@ anv_x11_create_swap_chain(struct anv_device *device, (uint32_t []) { 0 }); xcb_discard_reply(chain->conn, cookie.sequence); - *swap_chain_out = chain; + *swap_chain_out = &chain->base; return VK_SUCCESS; fail: return result; } - -VkResult -anv_x11_destroy_swap_chain(struct anv_x11_swap_chain *chain) -{ - anv_device_free(chain->base.device, chain); - - return VK_SUCCESS; -} - -VkResult -anv_x11_get_swap_chain_info(struct anv_x11_swap_chain *chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, void* pData) -{ - size_t size; - - switch (infoType) { - case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { - VkSwapChainImagePropertiesWSI *images = pData; - - size = chain->image_count * sizeof(*images); - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } - - assert(size <= *pDataSize); - for (uint32_t i = 0; i < chain->image_count; i++) - images[i].image = anv_image_to_handle(chain->images[i].image); - - *pDataSize = size; - - return VK_SUCCESS; - } - - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } -} - -VkResult -anv_x11_acquire_next_image(struct anv_x11_swap_chain *chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index) -{ - anv_finishme("Implement real blocking AcquireNextImage"); - *image_index = chain->next_image; - chain->next_image = (chain->next_image + 1) % chain->image_count; - return VK_SUCCESS; -} - -VkResult -anv_x11_queue_present(struct anv_queue *queue, - struct anv_x11_swap_chain *chain, - uint32_t image_index) -{ - xcb_void_cookie_t cookie; - - xcb_pixmap_t pixmap = chain->images[image_index].pixmap; - - if (pixmap == XCB_NONE) - return vk_error(VK_ERROR_INVALID_VALUE); - - cookie = xcb_copy_area(chain->conn, - pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); - - xcb_flush(chain->conn); - - return VK_SUCCESS; -} -- cgit v1.2.3 From 06d8fd58818c4574ba233edb4566509577d79dd0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 11:14:03 -0700 Subject: vk/instance: Expose anv_instance_alloc/free --- src/vulkan/anv_device.c | 4 ++-- src/vulkan/anv_private.h | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7ae11d6560b..0df1bbeda3c 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -158,7 +158,7 @@ VkResult anv_DestroyInstance( return VK_SUCCESS; } -static void * +void * anv_instance_alloc(struct anv_instance *instance, size_t size, size_t alignment, VkSystemAllocType allocType) { @@ -171,7 +171,7 @@ anv_instance_alloc(struct anv_instance *instance, size_t size, return mem; } -static void +void anv_instance_free(struct anv_instance *instance, void *mem) { if (mem == NULL) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 06ef4e123f3..8be2aa2bee1 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -411,6 +411,16 @@ struct anv_device { pthread_mutex_t mutex; }; +void * +anv_instance_alloc(struct anv_instance * instance, + size_t size, + size_t alignment, + VkSystemAllocType allocType); + +void +anv_instance_free(struct anv_instance * instance, + void * mem); + void * anv_device_alloc(struct anv_device * device, size_t size, -- cgit v1.2.3 From 348cb29a20e5076b84d0e6d01ffa7b6128b80ea8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 11:14:45 -0700 Subject: vk/wsi: Move to a clallback system for the entire WSI implementation We do this for two reasons: First, because it allows us to simplify WSI and compiling in/out support for a particular platform is as simple as calling or not calling the platform-specific init function. Second, the implementation gives us a place for a given chunk of the WSI to stash stuff in the instance. --- src/vulkan/anv_device.c | 4 ++++ src/vulkan/anv_private.h | 5 +++++ src/vulkan/anv_wsi.c | 54 ++++++++++++++++++++++++++++++++--------------- src/vulkan/anv_wsi.h | 25 ++++++++++++++++------ src/vulkan/anv_wsi_x11.c | 55 ++++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 110 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0df1bbeda3c..c1758de537c 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -139,6 +139,8 @@ VkResult anv_CreateInstance( VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + anv_init_wsi(instance); + *pInstance = anv_instance_to_handle(instance); return VK_SUCCESS; @@ -149,6 +151,8 @@ VkResult anv_DestroyInstance( { ANV_FROM_HANDLE(anv_instance, instance, _instance); + anv_finish_wsi(instance); + VG(VALGRIND_DESTROY_MEMPOOL(instance)); _mesa_locale_fini(); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8be2aa2bee1..0c7d5e8a536 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -343,8 +343,13 @@ struct anv_instance { uint32_t apiVersion; uint32_t physicalDeviceCount; struct anv_physical_device physicalDevice; + + struct anv_wsi_implementation * wsi_impl[VK_PLATFORM_NUM_WSI]; }; +VkResult anv_init_wsi(struct anv_instance *instance); +void anv_finish_wsi(struct anv_instance *instance); + struct anv_meta_state { struct { VkPipeline pipeline; diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 404c905d419..24379337a5e 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -23,6 +23,19 @@ #include "anv_wsi.h" +VkResult +anv_init_wsi(struct anv_instance *instance) +{ + memset(instance->wsi_impl, 0, sizeof(instance->wsi_impl)); + return anv_x11_init_wsi(instance); +} + +void +anv_finish_wsi(struct anv_instance *instance) +{ + anv_x11_finish_wsi(instance); +} + VkResult anv_GetPhysicalDeviceSurfaceSupportWSI( VkPhysicalDevice physicalDevice, @@ -30,14 +43,23 @@ anv_GetPhysicalDeviceSurfaceSupportWSI( const VkSurfaceDescriptionWSI* pSurfaceDescription, VkBool32* pSupported) { + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + assert(pSurfaceDescription->sType == VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); VkSurfaceDescriptionWindowWSI *window = (void *)pSurfaceDescription; - *pSupported = window->platform == VK_PLATFORM_XCB_WSI; + struct anv_wsi_implementation *impl = + physical_device->instance->wsi_impl[window->platform]; - return VK_SUCCESS; + if (impl) { + return impl->get_window_supported(impl, physical_device, + window, pSupported); + } else { + *pSupported = false; + return VK_SUCCESS; + } } VkResult @@ -55,13 +77,13 @@ anv_GetSurfaceInfoWSI( VkSurfaceDescriptionWindowWSI *window = (VkSurfaceDescriptionWindowWSI *)pSurfaceDescription; - switch (window->platform) { - case VK_PLATFORM_XCB_WSI: - return anv_x11_get_surface_info(device, window, infoType, - pDataSize, pData); - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + struct anv_wsi_implementation *impl = + device->instance->wsi_impl[window->platform]; + + assert(impl); + + return impl->get_surface_info(impl, device, window, infoType, + pDataSize, pData); } VkResult @@ -79,14 +101,12 @@ anv_CreateSwapChainWSI( VkSurfaceDescriptionWindowWSI *window = (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; - switch (window->platform) { - case VK_PLATFORM_XCB_WSI: - result = anv_x11_create_swap_chain(device, pCreateInfo, - (void *)&swap_chain); - break; - default: - return vk_error(VK_ERROR_INVALID_VALUE); - } + struct anv_wsi_implementation *impl = + device->instance->wsi_impl[window->platform]; + + assert(impl); + + result = impl->create_swap_chain(impl, device, pCreateInfo, &swap_chain); if (result == VK_SUCCESS) *pSwapChain = anv_swap_chain_to_handle(swap_chain); diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h index 3a5fba13a56..3ee3fcee56d 100644 --- a/src/vulkan/anv_wsi.h +++ b/src/vulkan/anv_wsi.h @@ -42,10 +42,21 @@ struct anv_swap_chain { ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI) -VkResult anv_x11_get_surface_info(struct anv_device *device, - VkSurfaceDescriptionWindowWSI *window, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, void* pData); -VkResult anv_x11_create_swap_chain(struct anv_device *device, - const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_swap_chain **swap_chain); +struct anv_wsi_implementation { + VkResult (*get_window_supported)(struct anv_wsi_implementation *impl, + struct anv_physical_device *physical_device, + const VkSurfaceDescriptionWindowWSI *window, + VkBool32 *pSupported); + VkResult (*get_surface_info)(struct anv_wsi_implementation *impl, + struct anv_device *device, + VkSurfaceDescriptionWindowWSI *window, + VkSurfaceInfoTypeWSI infoType, + size_t* pDataSize, void* pData); + VkResult (*create_swap_chain)(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSwapChainCreateInfoWSI *pCreateInfo, + struct anv_swap_chain **swap_chain); +}; + +VkResult anv_x11_init_wsi(struct anv_instance *instance); +void anv_x11_finish_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 54260f56ad9..212c01be0b9 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -37,11 +37,22 @@ static const VkSurfacePresentModePropertiesWSI present_modes[] = { { VK_PRESENT_MODE_MAILBOX_WSI }, }; -VkResult -anv_x11_get_surface_info(struct anv_device *device, - VkSurfaceDescriptionWindowWSI *window, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, void* pData) +static VkResult +x11_get_window_supported(struct anv_wsi_implementation *impl, + struct anv_physical_device *physical_device, + const VkSurfaceDescriptionWindowWSI *window, + VkBool32 *pSupported) +{ + *pSupported = true; + stub_return(VK_SUCCESS); +} + +static VkResult +x11_get_surface_info(struct anv_wsi_implementation *impl, + struct anv_device *device, + VkSurfaceDescriptionWindowWSI *window, + VkSurfaceInfoTypeWSI infoType, + size_t* pDataSize, void* pData) { if (pDataSize == NULL) return vk_error(VK_ERROR_INVALID_POINTER); @@ -192,10 +203,11 @@ x11_destroy_swap_chain(struct anv_swap_chain *chain) return VK_SUCCESS; } -VkResult -anv_x11_create_swap_chain(struct anv_device *device, - const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_swap_chain **swap_chain_out) +static VkResult +x11_create_swap_chain(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSwapChainCreateInfoWSI *pCreateInfo, + struct anv_swap_chain **swap_chain_out) { struct x11_swap_chain *chain; xcb_void_cookie_t cookie; @@ -335,3 +347,28 @@ anv_x11_create_swap_chain(struct anv_device *device, fail: return result; } + +VkResult +anv_x11_init_wsi(struct anv_instance *instance) +{ + struct anv_wsi_implementation *impl; + + impl = anv_instance_alloc(instance, sizeof(*impl), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (!impl) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + impl->get_window_supported = x11_get_window_supported; + impl->get_surface_info = x11_get_surface_info; + impl->create_swap_chain = x11_create_swap_chain; + + instance->wsi_impl[VK_PLATFORM_XCB_WSI] = impl; + + return VK_SUCCESS; +} + +void +anv_x11_finish_wsi(struct anv_instance *instance) +{ + anv_instance_free(instance, instance->wsi_impl[VK_PLATFORM_XCB_WSI]); +} -- cgit v1.2.3 From 48e87c01630b8559e4e5ee669b77b8b1d10fd623 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 17:50:32 -0700 Subject: vk/wsi: Add Wayland WSI support --- src/vulkan/Makefile.am | 2 + src/vulkan/anv_wsi.c | 16 +- src/vulkan/anv_wsi.h | 2 + src/vulkan/anv_wsi_wayland.c | 799 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 818 insertions(+), 1 deletion(-) create mode 100644 src/vulkan/anv_wsi_wayland.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 24e45cf873b..014183c0f71 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -42,6 +42,7 @@ AM_CPPFLAGS = \ $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/egl/wayland/wayland-drm \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ @@ -74,6 +75,7 @@ VULKAN_SOURCES = \ anv_query.c \ anv_util.c \ anv_wsi.c \ + anv_wsi_wayland.c \ anv_wsi_x11.c \ gen8_state.c \ gen8_cmd_buffer.c \ diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 24379337a5e..3778a1b20e2 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -26,13 +26,27 @@ VkResult anv_init_wsi(struct anv_instance *instance) { + VkResult result; + memset(instance->wsi_impl, 0, sizeof(instance->wsi_impl)); - return anv_x11_init_wsi(instance); + + result = anv_x11_init_wsi(instance); + if (result != VK_SUCCESS) + return result; + + result = anv_wl_init_wsi(instance); + if (result != VK_SUCCESS) { + anv_x11_finish_wsi(instance); + return result; + } + + return VK_SUCCESS; } void anv_finish_wsi(struct anv_instance *instance) { + anv_wl_finish_wsi(instance); anv_x11_finish_wsi(instance); } diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h index 3ee3fcee56d..cbff854c2fc 100644 --- a/src/vulkan/anv_wsi.h +++ b/src/vulkan/anv_wsi.h @@ -60,3 +60,5 @@ struct anv_wsi_implementation { VkResult anv_x11_init_wsi(struct anv_instance *instance); void anv_x11_finish_wsi(struct anv_instance *instance); +VkResult anv_wl_init_wsi(struct anv_instance *instance); +void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c new file mode 100644 index 00000000000..e23f5933fe4 --- /dev/null +++ b/src/vulkan/anv_wsi_wayland.c @@ -0,0 +1,799 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_wsi.h" + +#include + +#define MIN_NUM_IMAGES 2 + +struct wsi_wl_display { + struct wl_display * display; + struct wl_drm * drm; + + /* Vector of VkFormats supported */ + struct anv_vector formats; + + uint32_t capabilities; +}; + +struct wsi_wayland { + struct anv_wsi_implementation base; + + struct anv_instance * instance; + + pthread_mutex_t mutex; + /* Hash table of wl_display -> wsi_wl_display mappings */ + struct hash_table * displays; +}; + +static void +wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) +{ + /* Don't add a format that's already in the list */ + VkFormat *f; + anv_vector_foreach(f, &display->formats) + if (*f == format) + return; + + /* Don't add formats which aren't supported by the driver */ + if (anv_format_for_vk_format(format)->cpp == 0) + return; + + f = anv_vector_add(&display->formats); + if (f) + *f = format; +} + +static void +drm_handle_device(void *data, struct wl_drm *drm, const char *name) +{ + fprintf(stderr, "wl_drm.device(%s)\n", name); +} + +static uint32_t +wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) +{ + switch (vk_format) { + case VK_FORMAT_R4G4B4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; + case VK_FORMAT_R5G6B5_UNORM: + return WL_DRM_FORMAT_BGR565; + case VK_FORMAT_R5G5B5A1_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; + case VK_FORMAT_R8G8B8_UNORM: + return WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R8G8B8A8_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R10G10B10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; + case VK_FORMAT_B4G4R4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; + case VK_FORMAT_B5G6R5_UNORM: + return WL_DRM_FORMAT_RGB565; + case VK_FORMAT_B5G5R5A1_UNORM: + return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; + case VK_FORMAT_B8G8R8_UNORM: + return WL_DRM_FORMAT_BGRX8888; + case VK_FORMAT_B8G8R8A8_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; + case VK_FORMAT_B10G10R10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; + + default: + assert("!Unsupported Vulkan format"); + return 0; + } +} + +static void +drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) +{ + struct wsi_wl_display *display = data; + + switch (wl_format) { + case WL_DRM_FORMAT_ABGR4444: + case WL_DRM_FORMAT_XBGR4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); + break; + case WL_DRM_FORMAT_BGR565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); + break; + case WL_DRM_FORMAT_ABGR1555: + case WL_DRM_FORMAT_XBGR1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); + break; + case WL_DRM_FORMAT_XBGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); + /* fallthrough */ + case WL_DRM_FORMAT_ABGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); + break; + case WL_DRM_FORMAT_ABGR2101010: + case WL_DRM_FORMAT_XBGR2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); + break; + case WL_DRM_FORMAT_ARGB4444: + case WL_DRM_FORMAT_XRGB4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); + break; + case WL_DRM_FORMAT_RGB565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); + break; + case WL_DRM_FORMAT_ARGB1555: + case WL_DRM_FORMAT_XRGB1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); + break; + case WL_DRM_FORMAT_XRGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_UNORM); + /* fallthrough */ + case WL_DRM_FORMAT_ARGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_UNORM); + break; + case WL_DRM_FORMAT_ARGB2101010: + case WL_DRM_FORMAT_XRGB2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); + break; + } +} + +static void +drm_handle_authenticated(void *data, struct wl_drm *drm) +{ +} + +static void +drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) +{ + struct wsi_wl_display *display = data; + + display->capabilities = capabilities; +} + +static const struct wl_drm_listener drm_listener = { + drm_handle_device, + drm_handle_format, + drm_handle_authenticated, + drm_handle_capabilities, +}; + +static void +registry_handle_global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) +{ + struct wsi_wl_display *display = data; + + if (strcmp(interface, "wl_drm") == 0) { + assert(display->drm == NULL); + + assert(version >= 2); + display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); + + if (display->drm) + wl_drm_add_listener(display->drm, &drm_listener, display); + } +} + +static void +registry_handle_global_remove(void *data, struct wl_registry *registry, + uint32_t name) +{ /* No-op */ } + +static const struct wl_registry_listener registry_listener = { + registry_handle_global, + registry_handle_global_remove +}; + +static void +wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) +{ + anv_vector_finish(&display->formats); + if (display->drm) + wl_drm_destroy(display->drm); + anv_instance_free(wsi->instance, display); +} + +static struct wsi_wl_display * +wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) +{ + struct wsi_wl_display *display = + anv_instance_alloc(wsi->instance, sizeof(*display), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (!display) + return NULL; + + memset(display, 0, sizeof(*display)); + + display->display = wl_display; + + if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) + goto fail; + + struct wl_registry *registry = wl_display_get_registry(wl_display); + if (!registry) + return NULL; + + wl_registry_add_listener(registry, ®istry_listener, display); + + /* Round-rip to get the wl_drm global */ + wl_display_roundtrip(wl_display); + + if (!display->drm) + goto fail; + + /* Round-rip to get wl_drm formats and capabilities */ + wl_display_roundtrip(wl_display); + + /* We need prime support */ + if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) + goto fail; + + /* We don't need this anymore */ + wl_registry_destroy(registry); + + return display; + +fail: + if (registry) + wl_registry_destroy(registry); + + wsi_wl_display_destroy(wsi, display); + return NULL; +} + +static struct wsi_wl_display * +wsi_wl_get_display(struct wsi_wayland *wsi, struct wl_display *wl_display) +{ + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, + wl_display); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->displays, wl_display); + if (entry) { + /* Oops, someone raced us to it */ + wsi_wl_display_destroy(wsi, display); + } else { + entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +static VkResult +wsi_wl_get_window_supported(struct anv_wsi_implementation *impl, + struct anv_physical_device *physical_device, + const VkSurfaceDescriptionWindowWSI *window, + VkBool32 *pSupported) +{ + struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + + *pSupported = wsi_wl_get_display(wsi, window->pPlatformHandle) != NULL; + + return VK_SUCCESS; +} + +static const VkSurfacePresentModePropertiesWSI present_modes[] = { + { VK_PRESENT_MODE_MAILBOX_WSI }, + { VK_PRESENT_MODE_FIFO_WSI }, +}; + +static VkResult +wsi_wl_get_surface_info(struct anv_wsi_implementation *impl, + struct anv_device *device, + VkSurfaceDescriptionWindowWSI *window, + VkSurfaceInfoTypeWSI infoType, + size_t* pDataSize, void* pData) +{ + struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + + if (pDataSize == NULL) + return vk_error(VK_ERROR_INVALID_POINTER); + + switch (infoType) { + case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { + assert(*pDataSize >= sizeof(VkSurfacePropertiesWSI)); + VkSurfacePropertiesWSI *props = pData; + + props->minImageCount = MIN_NUM_IMAGES; + props->maxImageCount = 4; + props->currentExtent = (VkExtent2D) { -1, -1 }; + props->minImageExtent = (VkExtent2D) { 1, 1 }; + props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_WSI; + props->currentTransform = VK_SURFACE_TRANSFORM_NONE_WSI; + props->maxImageArraySize = 1; + props->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; + } + + case VK_SURFACE_INFO_TYPE_FORMATS_WSI: { + VkSurfaceFormatPropertiesWSI *formats = pData; + + struct wsi_wl_display *display = + wsi_wl_get_display(wsi, window->pPlatformHandle); + + uint32_t size = anv_vector_length(&display->formats) * sizeof(*formats); + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + assert(*pDataSize >= size); + *pDataSize = size; + + VkFormat *f; + anv_vector_foreach(f, &display->formats) + (formats++)->format = *f; + + return VK_SUCCESS; + } + + case VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI: + if (pData == NULL) { + *pDataSize = sizeof(present_modes); + return VK_SUCCESS; + } + + assert(*pDataSize >= sizeof(present_modes)); + memcpy(pData, present_modes, *pDataSize); + + return VK_SUCCESS; + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +struct wsi_wl_image { + struct anv_image * image; + struct anv_device_memory * memory; + struct wl_buffer * buffer; + bool busy; +}; + +struct wsi_wl_swap_chain { + struct anv_swap_chain base; + + struct wsi_wl_display * display; + struct wl_event_queue * queue; + struct wl_surface * surface; + + VkExtent2D extent; + VkFormat vk_format; + uint32_t drm_format; + + VkPresentModeWSI present_mode; + bool fifo_ready; + + uint32_t image_count; + struct wsi_wl_image images[0]; +}; + +static VkResult +wsi_wl_get_swap_chain_info(struct anv_swap_chain *anv_chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, void* pData) +{ + struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + size_t size; + + switch (infoType) { + case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { + VkSwapChainImagePropertiesWSI *images = pData; + + size = chain->image_count * sizeof(*images); + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + assert(size <= *pDataSize); + for (uint32_t i = 0; i < chain->image_count; i++) + images[i].image = anv_image_to_handle(chain->images[i].image); + + *pDataSize = size; + + return VK_SUCCESS; + } + + default: + return vk_error(VK_ERROR_INVALID_VALUE); + } +} + +static VkResult +wsi_wl_acquire_next_image(struct anv_swap_chain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + + int ret = wl_display_dispatch_queue_pending(chain->display->display, + chain->queue); + /* XXX: I'm not sure if out-of-date is the right error here. If + * wl_display_dispatch_queue_pending fails it most likely means we got + * kicked by the server so this seems more-or-less correct. + */ + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + *image_index = i; + return VK_SUCCESS; + } + } + + /* This time we do a blocking dispatch because we can't go + * anywhere until we get an event. + */ + int ret = wl_display_dispatch_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + } +} + +static void +frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) +{ + struct wsi_wl_swap_chain *chain = data; + + chain->fifo_ready = true; + + wl_callback_destroy(callback); +} + +static const struct wl_callback_listener frame_listener = { + frame_handle_done, +}; + +static VkResult +wsi_wl_queue_present(struct anv_swap_chain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_WSI) { + while (!chain->fifo_ready) { + int ret = wl_display_dispatch_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + } + } + + assert(image_index < chain->image_count); + wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); + wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); + wl_surface_commit(chain->surface); + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_WSI) { + struct wl_callback *frame = wl_surface_frame(chain->surface); + wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); + wl_callback_add_listener(frame, &frame_listener, chain); + } + + return VK_SUCCESS; +} + +static void +wsi_wl_image_finish(struct wsi_wl_swap_chain *chain, struct wsi_wl_image *image) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory)); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image)); +} + +static void +buffer_handle_release(void *data, struct wl_buffer *buffer) +{ + struct wsi_wl_image *image = data; + + assert(image->buffer == buffer); + + image->busy = false; +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static VkResult +wsi_wl_image_init(struct wsi_wl_swap_chain *chain, struct wsi_wl_image *image) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + VkResult result; + + VkImage vk_image; + result = anv_image_create(vk_device, + &(struct anv_image_create_info) { + .force_tile_mode = true, + .tile_mode = XMAJOR, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = chain->vk_format, + .extent = { + .width = chain->extent.width, + .height = chain->extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + &vk_image); + + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(vk_image); + assert(anv_format_is_color(image->image->format)); + + struct anv_surface *surface = &image->image->color_surface; + + VkDeviceMemory vk_memory; + result = anv_AllocMemory(vk_device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + &vk_memory); + + if (result != VK_SUCCESS) + goto fail_image; + + image->memory = anv_device_memory_from_handle(vk_memory); + + result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); + + if (result != VK_SUCCESS) + goto fail_mem; + + int ret = anv_gem_set_tiling(chain->base.device, + image->memory->bo.gem_handle, + surface->stride, I915_TILING_X); + if (ret) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail_mem; + } + + int fd = anv_gem_handle_to_fd(chain->base.device, + image->memory->bo.gem_handle); + if (fd == -1) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail_mem; + } + + image->buffer = wl_drm_create_prime_buffer(chain->display->drm, + fd, /* name */ + chain->extent.width, + chain->extent.height, + chain->drm_format, + surface->offset, + surface->stride, + 0, 0, 0, 0 /* unused */); + wl_display_roundtrip(chain->display->display); + close(fd); + + wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); + wl_buffer_add_listener(image->buffer, &buffer_listener, image); + + return VK_SUCCESS; + +fail_mem: + anv_FreeMemory(vk_device, vk_memory); +fail_image: + anv_DestroyImage(vk_device, vk_image); + + return result; +} + +static VkResult +wsi_wl_destroy_swap_chain(struct anv_swap_chain *anv_chain) +{ + struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) { + if (chain->images[i].buffer) + wsi_wl_image_finish(chain, &chain->images[i]); + } + + anv_device_free(chain->base.device, chain); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_create_swap_chain(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSwapChainCreateInfoWSI *pCreateInfo, + struct anv_swap_chain **swap_chain_out) +{ + struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + struct wsi_wl_swap_chain *chain; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + + assert(pCreateInfo->pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); + VkSurfaceDescriptionWindowWSI *vk_window = + (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; + assert(vk_window->platform == VK_PLATFORM_WAYLAND_WSI); + + int num_images = pCreateInfo->minImageCount; + + assert(num_images >= MIN_NUM_IMAGES); + + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_WSI) + num_images = MAX2(num_images, 4); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = wsi_wl_destroy_swap_chain; + chain->base.get_swap_chain_info = wsi_wl_get_swap_chain_info; + chain->base.acquire_next_image = wsi_wl_acquire_next_image; + chain->base.queue_present = wsi_wl_queue_present; + + chain->surface = vk_window->pPlatformWindow; + chain->extent = pCreateInfo->imageExtent; + chain->vk_format = pCreateInfo->imageFormat; + chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); + + chain->present_mode = pCreateInfo->presentMode; + chain->fifo_ready = true; + + chain->image_count = num_images; + + /* Mark a bunch of stuff as NULL. This way we can just call + * destroy_swapchain for cleanup. + */ + for (uint32_t i = 0; i < chain->image_count; i++) + chain->images[i].buffer = NULL; + chain->queue = NULL; + + chain->display = wsi_wl_get_display(wsi, vk_window->pPlatformHandle); + if (!chain->display) + goto fail; + + chain->queue = wl_display_create_queue(chain->display->display); + if (!chain->queue) + goto fail; + + for (uint32_t i = 0; i < chain->image_count; i++) { + result = wsi_wl_image_init(chain, &chain->images[i]); + if (result != VK_SUCCESS) + goto fail; + chain->images[i].busy = false; + } + + *swap_chain_out = &chain->base; + + return VK_SUCCESS; + +fail: + wsi_wl_destroy_swap_chain(&chain->base); + + return result; +} + +VkResult +anv_wl_init_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi; + VkResult result; + + wsi = anv_instance_alloc(instance, sizeof(*wsi), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (!wsi) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + wsi->base.get_window_supported = wsi_wl_get_window_supported; + wsi->base.get_surface_info = wsi_wl_get_surface_info; + wsi->base.create_swap_chain = wsi_wl_create_swap_chain; + + wsi->instance = instance; + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + result = (ret == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY : + VK_ERROR_UNKNOWN; + goto fail_alloc; + } + + wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->displays) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail_mutex; + } + + instance->wsi_impl[VK_PLATFORM_WAYLAND_WSI] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); + +fail_alloc: + anv_instance_free(instance, wsi); + + return result; +} + +void +anv_wl_finish_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi_impl[VK_PLATFORM_WAYLAND_WSI]; + + _mesa_hash_table_destroy(wsi->displays, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_instance_free(instance, wsi); +} -- cgit v1.2.3 From f3bdb93a8eb5115ec46a43c45c44db57b897ca94 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 18:26:05 -0700 Subject: nir/types: Allow single-column matrices This can sometimes be a convenient way to build vectors. --- src/glsl/nir/nir_types.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 809a7cff79c..69cfac18587 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -252,7 +252,7 @@ glsl_vector_type(enum glsl_base_type base_type, unsigned components) const glsl_type * glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) { - assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4); + assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4); return glsl_type::get_instance(base_type, rows, columns); } -- cgit v1.2.3 From 9e2c13350e01534adf046af3786b78f91bec3ed3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 18:26:32 -0700 Subject: nir/spirv: Add support for SpvDecorationColMajor --- src/glsl/nir/spirv_to_nir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index c3a16986fc1..9495fba0652 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -365,6 +365,8 @@ struct_member_decoration_cb(struct vtn_builder *b, case SpvDecorationOffset: ctx->type->offsets[member] = dec->literals[0]; break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ default: unreachable("Unhandled member decoration"); } -- cgit v1.2.3 From f32d16a9f060225d24eed5c248da4639c126b88d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 18:39:57 -0700 Subject: nir/spirv: Use the actual GLSL 450 extension header from Khronos --- src/glsl/nir/spirv_glsl450.h | 130 ++++++++++++++++++ src/glsl/nir/spirv_glsl450_to_nir.c | 259 +++++++++++------------------------- 2 files changed, 206 insertions(+), 183 deletions(-) create mode 100644 src/glsl/nir/spirv_glsl450.h (limited to 'src') diff --git a/src/glsl/nir/spirv_glsl450.h b/src/glsl/nir/spirv_glsl450.h new file mode 100644 index 00000000000..d828b152f43 --- /dev/null +++ b/src/glsl/nir/spirv_glsl450.h @@ -0,0 +1,130 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +const int GLSLstd450Version = 99; +const int GLSLstd450Revision = 2; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450Mix = 46, + GLSLstd450Step = 47, + GLSLstd450SmoothStep = 48, + + GLSLstd450Fma = 49, + GLSLstd450Frexp = 50, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 51, // no OpVariable operand + GLSLstd450Ldexp = 52, + + GLSLstd450PackSnorm4x8 = 53, + GLSLstd450PackUnorm4x8 = 54, + GLSLstd450PackSnorm2x16 = 55, + GLSLstd450PackUnorm2x16 = 56, + GLSLstd450PackHalf2x16 = 57, + GLSLstd450PackDouble2x32 = 58, + GLSLstd450UnpackSnorm2x16 = 59, + GLSLstd450UnpackUnorm2x16 = 60, + GLSLstd450UnpackHalf2x16 = 61, + GLSLstd450UnpackSnorm4x8 = 62, + GLSLstd450UnpackUnorm4x8 = 63, + GLSLstd450UnpackDouble2x32 = 64, + + GLSLstd450Length = 65, + GLSLstd450Distance = 66, + GLSLstd450Cross = 67, + GLSLstd450Normalize = 68, + GLSLstd450FaceForward = 69, + GLSLstd450Reflect = 70, + GLSLstd450Refract = 71, + + GLSLstd450FindILSB = 72, + GLSLstd450FindSMSB = 73, + GLSLstd450FindUMSB = 74, + + GLSLstd450InterpolateAtCentroid = 75, + GLSLstd450InterpolateAtSample = 76, + GLSLstd450InterpolateAtOffset = 77, + + GLSLstd450AddCarry = 78, // These three should move to the core instruction set + GLSLstd450SubBorrow = 79, + GLSLstd450MulExtended = 80, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 52b048820f3..8bfa16e6d2f 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -26,100 +26,7 @@ */ #include "spirv_to_nir_private.h" - -enum GLSL450Entrypoint { - Round = 0, - RoundEven = 1, - Trunc = 2, - Abs = 3, - Sign = 4, - Floor = 5, - Ceil = 6, - Fract = 7, - - Radians = 8, - Degrees = 9, - Sin = 10, - Cos = 11, - Tan = 12, - Asin = 13, - Acos = 14, - Atan = 15, - Sinh = 16, - Cosh = 17, - Tanh = 18, - Asinh = 19, - Acosh = 20, - Atanh = 21, - Atan2 = 22, - - Pow = 23, - Exp = 24, - Log = 25, - Exp2 = 26, - Log2 = 27, - Sqrt = 28, - InverseSqrt = 29, - - Determinant = 30, - MatrixInverse = 31, - - Modf = 32, // second argument needs the OpVariable = , not an OpLoad - Min = 33, - Max = 34, - Clamp = 35, - Mix = 36, - Step = 37, - SmoothStep = 38, - - FloatBitsToInt = 39, - FloatBitsToUint = 40, - IntBitsToFloat = 41, - UintBitsToFloat = 42, - - Fma = 43, - Frexp = 44, - Ldexp = 45, - - PackSnorm4x8 = 46, - PackUnorm4x8 = 47, - PackSnorm2x16 = 48, - PackUnorm2x16 = 49, - PackHalf2x16 = 50, - PackDouble2x32 = 51, - UnpackSnorm2x16 = 52, - UnpackUnorm2x16 = 53, - UnpackHalf2x16 = 54, - UnpackSnorm4x8 = 55, - UnpackUnorm4x8 = 56, - UnpackDouble2x32 = 57, - - Length = 58, - Distance = 59, - Cross = 60, - Normalize = 61, - Ftransform = 62, - FaceForward = 63, - Reflect = 64, - Refract = 65, - - UaddCarry = 66, - UsubBorrow = 67, - UmulExtended = 68, - ImulExtended = 69, - BitfieldExtract = 70, - BitfieldInsert = 71, - BitfieldReverse = 72, - BitCount = 73, - FindLSB = 74, - FindMSB = 75, - - InterpolateAtCentroid = 76, - InterpolateAtSample = 77, - InterpolateAtOffset = 78, - - Count -}; +#include "spirv_glsl450.h" static nir_ssa_def* build_length(nir_builder *b, nir_ssa_def *vec) @@ -135,7 +42,7 @@ build_length(nir_builder *b, nir_ssa_def *vec) } static void -handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); @@ -150,106 +57,92 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_op op; switch (entrypoint) { - case Round: op = nir_op_fround_even; break; /* TODO */ - case RoundEven: op = nir_op_fround_even; break; - case Trunc: op = nir_op_ftrunc; break; - case Abs: op = nir_op_fabs; break; - case Sign: op = nir_op_fsign; break; - case Floor: op = nir_op_ffloor; break; - case Ceil: op = nir_op_fceil; break; - case Fract: op = nir_op_ffract; break; - case Radians: + case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */ + case GLSLstd450RoundEven: op = nir_op_fround_even; break; + case GLSLstd450Trunc: op = nir_op_ftrunc; break; + case GLSLstd450FAbs: op = nir_op_fabs; break; + case GLSLstd450FSign: op = nir_op_fsign; break; + case GLSLstd450Floor: op = nir_op_ffloor; break; + case GLSLstd450Ceil: op = nir_op_fceil; break; + case GLSLstd450Fract: op = nir_op_ffract; break; + case GLSLstd450Radians: val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); return; - case Degrees: + case GLSLstd450Degrees: val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); return; - case Sin: op = nir_op_fsin; break; - case Cos: op = nir_op_fcos; break; - case Tan: + case GLSLstd450Sin: op = nir_op_fsin; break; + case GLSLstd450Cos: op = nir_op_fcos; break; + case GLSLstd450Tan: val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), nir_fcos(&b->nb, src[0])); return; - case Pow: op = nir_op_fpow; break; - case Exp2: op = nir_op_fexp2; break; - case Log2: op = nir_op_flog2; break; - case Sqrt: op = nir_op_fsqrt; break; - case InverseSqrt: op = nir_op_frsq; break; - - case Modf: op = nir_op_fmod; break; - case Min: op = nir_op_fmin; break; - case Max: op = nir_op_fmax; break; - case Mix: op = nir_op_flrp; break; - case Step: + case GLSLstd450Pow: op = nir_op_fpow; break; + case GLSLstd450Exp2: op = nir_op_fexp2; break; + case GLSLstd450Log2: op = nir_op_flog2; break; + case GLSLstd450Sqrt: op = nir_op_fsqrt; break; + case GLSLstd450InverseSqrt: op = nir_op_frsq; break; + + case GLSLstd450Modf: op = nir_op_fmod; break; + case GLSLstd450FMin: op = nir_op_fmin; break; + case GLSLstd450FMax: op = nir_op_fmax; break; + case GLSLstd450Mix: op = nir_op_flrp; break; + case GLSLstd450Step: val->ssa->def = nir_sge(&b->nb, src[1], src[0]); return; - case FloatBitsToInt: - case FloatBitsToUint: - case IntBitsToFloat: - case UintBitsToFloat: - /* Probably going to be removed from the final version of the spec. */ - val->ssa->def = src[0]; - return; - - case Fma: op = nir_op_ffma; break; - case Ldexp: op = nir_op_ldexp; break; + case GLSLstd450Fma: op = nir_op_ffma; break; + case GLSLstd450Ldexp: op = nir_op_ldexp; break; /* Packing/Unpacking functions */ - case PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; - case PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; - case PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; - case PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; - case PackHalf2x16: op = nir_op_pack_half_2x16; break; - case UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; - case UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; - case UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; - case UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; - case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; - - case Length: + case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; + case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; + case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; + case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; + case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break; + case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; + case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; + case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; + case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; + case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; + + case GLSLstd450Length: val->ssa->def = build_length(&b->nb, src[0]); return; - case Distance: + case GLSLstd450Distance: val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); return; - case Normalize: + case GLSLstd450Normalize: val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); return; - case UaddCarry: op = nir_op_uadd_carry; break; - case UsubBorrow: op = nir_op_usub_borrow; break; - case BitfieldExtract: op = nir_op_ubitfield_extract; break; /* TODO */ - case BitfieldInsert: op = nir_op_bitfield_insert; break; - case BitfieldReverse: op = nir_op_bitfield_reverse; break; - case BitCount: op = nir_op_bit_count; break; - case FindLSB: op = nir_op_find_lsb; break; - case FindMSB: op = nir_op_ufind_msb; break; /* TODO */ - - case Exp: - case Log: - case Clamp: - case Asin: - case Acos: - case Atan: - case Atan2: - case Sinh: - case Cosh: - case Tanh: - case Asinh: - case Acosh: - case Atanh: - case SmoothStep: - case Frexp: - case PackDouble2x32: - case UnpackDouble2x32: - case Cross: - case Ftransform: - case FaceForward: - case Reflect: - case Refract: - case UmulExtended: - case ImulExtended: + case GLSLstd450AddCarry: op = nir_op_uadd_carry; break; + case GLSLstd450SubBorrow: op = nir_op_usub_borrow; break; + + case GLSLstd450Exp: + case GLSLstd450Log: + case GLSLstd450FClamp: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450Asin: + case GLSLstd450Acos: + case GLSLstd450Atan: + case GLSLstd450Atan2: + case GLSLstd450Sinh: + case GLSLstd450Cosh: + case GLSLstd450Tanh: + case GLSLstd450Asinh: + case GLSLstd450Acosh: + case GLSLstd450Atanh: + case GLSLstd450SmoothStep: + case GLSLstd450Frexp: + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackDouble2x32: + case GLSLstd450Cross: + case GLSLstd450FaceForward: + case GLSLstd450Reflect: + case GLSLstd450Refract: + case GLSLstd450MulExtended: default: unreachable("Unhandled opcode"); } @@ -269,16 +162,16 @@ bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, const uint32_t *words, unsigned count) { - switch ((enum GLSL450Entrypoint)ext_opcode) { - case Determinant: - case MatrixInverse: - case InterpolateAtCentroid: - case InterpolateAtSample: - case InterpolateAtOffset: + switch ((enum GLSLstd450)ext_opcode) { + case GLSLstd450Determinant: + case GLSLstd450MatrixInverse: + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: unreachable("Unhandled opcode"); default: - handle_glsl450_alu(b, (enum GLSL450Entrypoint)ext_opcode, words, count); + handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, words, count); } return true; -- cgit v1.2.3 From 7174d155e9b2d3ecc1d60f71e202838fa9359f1d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 18:47:42 -0700 Subject: nir: Add a lower_fdiv option and use it in i965 --- src/glsl/nir/nir.h | 1 + src/glsl/nir/nir_opt_algebraic.py | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + 3 files changed, 3 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 4c60dbd645a..415dda7712d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1433,6 +1433,7 @@ typedef struct nir_function { typedef struct nir_shader_compiler_options { bool lower_ffma; + bool lower_fdiv; bool lower_flrp; bool lower_fpow; bool lower_fsat; diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 226e0a8d85c..880408bc367 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -66,6 +66,7 @@ optimizations = [ (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), (('imul', a, -1), ('ineg', a)), + (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), (('ffma', 0.0, a, b), b), (('ffma', a, 0.0, b), b), (('ffma', a, b, 0.0), ('fmul', a, b)), diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0007e5c07a5..4ef2777559f 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -96,6 +96,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) */ nir_options->lower_ffma = true; nir_options->lower_sub = true; + nir_options->lower_fdiv = true; /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { -- cgit v1.2.3 From 6d5dafd779550391c2313e82e6630d7a93cf9530 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 18:47:56 -0700 Subject: nir/spirv/glsl450: Use the correct write mask --- src/glsl/nir/spirv_glsl450_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 8bfa16e6d2f..60bd597820c 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -150,6 +150,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->ssa->type), val->name); + instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1; val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) -- cgit v1.2.3 From b3c037f329beb5d7a63a7e755ca3b15acdfce972 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 19:05:51 -0700 Subject: vk: Fix size return value handling in a couple plces --- src/vulkan/anv_device.c | 4 ++-- src/vulkan/anv_wsi_wayland.c | 8 +++++++- src/vulkan/anv_wsi_x11.c | 8 +++++++- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c1758de537c..cf93cd1a6eb 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -653,7 +653,7 @@ VkResult anv_GetGlobalExtensionProperties( return VK_SUCCESS; } - assert(*pCount <= ARRAY_SIZE(global_extensions)); + assert(*pCount >= ARRAY_SIZE(global_extensions)); *pCount = ARRAY_SIZE(global_extensions); memcpy(pProperties, global_extensions, sizeof(global_extensions)); @@ -679,7 +679,7 @@ VkResult anv_GetPhysicalDeviceExtensionProperties( return VK_SUCCESS; } - assert(*pCount < ARRAY_SIZE(device_extensions)); + assert(*pCount >= ARRAY_SIZE(device_extensions)); *pCount = ARRAY_SIZE(device_extensions); memcpy(pProperties, device_extensions, sizeof(device_extensions)); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index e23f5933fe4..11f2dae9759 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -327,9 +327,15 @@ wsi_wl_get_surface_info(struct anv_wsi_implementation *impl, switch (infoType) { case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { - assert(*pDataSize >= sizeof(VkSurfacePropertiesWSI)); VkSurfacePropertiesWSI *props = pData; + if (pData == NULL) { + *pDataSize = sizeof(*props); + return VK_SUCCESS; + } + + assert(*pDataSize >= sizeof(*props)); + props->minImageCount = MIN_NUM_IMAGES; props->maxImageCount = 4; props->currentExtent = (VkExtent2D) { -1, -1 }; diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 212c01be0b9..b412a2bab32 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -59,9 +59,15 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, switch (infoType) { case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { - assert(*pDataSize >= sizeof(VkSurfacePropertiesWSI)); VkSurfacePropertiesWSI *props = pData; + if (pData == NULL) { + *pDataSize = sizeof(*props); + return VK_SUCCESS; + } + + assert(*pDataSize >= sizeof(*props)); + props->minImageCount = 2; props->maxImageCount = 4; props->currentExtent = (VkExtent2D) { -1, -1 }; -- cgit v1.2.3 From 2c4ae00db693b066c479150998ca618265bf9d5c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 19:18:50 -0700 Subject: vk: Conditionally compile Wayland support Pulling in libwayland causes undefined symbols in applications that are linked against vulkan alone. Ideally, we would like to dlopen a platform support library or something like that. For now, this works and should get crucible running again. --- src/vulkan/Makefile.am | 6 +++++- src/vulkan/anv_wsi.c | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 014183c0f71..b2eb4fb1cb1 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -75,7 +75,6 @@ VULKAN_SOURCES = \ anv_query.c \ anv_util.c \ anv_wsi.c \ - anv_wsi_wayland.c \ anv_wsi_x11.c \ gen8_state.c \ gen8_cmd_buffer.c \ @@ -84,6 +83,11 @@ VULKAN_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c +if HAVE_EGL_PLATFORM_WAYLAND +VULKAN_SOURCES += anv_wsi_wayland.c +libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + libvulkan_la_SOURCES = \ $(VULKAN_SOURCES) \ anv_gem.c diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 3778a1b20e2..21e01fc61f2 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -34,11 +34,13 @@ anv_init_wsi(struct anv_instance *instance) if (result != VK_SUCCESS) return result; +#ifdef HAVE_WAYLAND_PLATFORM result = anv_wl_init_wsi(instance); if (result != VK_SUCCESS) { anv_x11_finish_wsi(instance); return result; } +#endif return VK_SUCCESS; } @@ -46,7 +48,9 @@ anv_init_wsi(struct anv_instance *instance) void anv_finish_wsi(struct anv_instance *instance) { +#ifdef HAVE_WAYLAND_PLATFORM anv_wl_finish_wsi(instance); +#endif anv_x11_finish_wsi(instance); } -- cgit v1.2.3 From 01924a03d483d44c34e285e81a8cb875887c5805 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Sep 2015 20:02:36 -0700 Subject: vk: Actually link in wayland libraries Turns out this was why I had accidentally broken the universe. Oops... --- src/vulkan/Makefile.am | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index b2eb4fb1cb1..8b17af22eb4 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -42,7 +42,6 @@ AM_CPPFLAGS = \ $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/egl/wayland/wayland-drm \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ @@ -83,8 +82,26 @@ VULKAN_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c \ + anv_meta_spirv_autogen.h + if HAVE_EGL_PLATFORM_WAYLAND -VULKAN_SOURCES += anv_wsi_wayland.c +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM endif @@ -92,11 +109,6 @@ libvulkan_la_SOURCES = \ $(VULKAN_SOURCES) \ anv_gem.c -BUILT_SOURCES = \ - anv_entrypoints.h \ - anv_entrypoints.c \ - anv_meta_spirv_autogen.h - anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ @@ -108,7 +120,7 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) CLEANFILES = $(BUILT_SOURCES) -libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ +libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la # Libvulkan with dummy gem. Used for unit tests. -- cgit v1.2.3 From 9be43ef99cba08aab37b97be09b1f40f23288dcb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Sep 2015 09:47:11 -0700 Subject: nir/spirv: Handle the MatrixStride member decoration --- src/glsl/nir/spirv_to_nir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 9495fba0652..8c2b31cf3ec 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -365,6 +365,9 @@ struct_member_decoration_cb(struct vtn_builder *b, case SpvDecorationOffset: ctx->type->offsets[member] = dec->literals[0]; break; + case SpvDecorationMatrixStride: + ctx->type->members[member]->stride = dec->literals[0]; + break; case SpvDecorationColMajor: break; /* Nothing to do here. Column-major is the default. */ default: -- cgit v1.2.3 From 35fcd37fcf27ac570e96c76807bf3c18d10d0729 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Sep 2015 13:16:49 -0700 Subject: nir/spirv: Handle decorations after assigning variable locations --- src/glsl/nir/spirv_to_nir.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 8c2b31cf3ec..30104f0d09a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1282,8 +1282,6 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, val->deref = nir_deref_var_create(b, var); val->deref_type = type; - vtn_foreach_decoration(b, val, var_decoration_cb, var); - if (b->execution_model == SpvExecutionModelFragment && var->data.mode == nir_var_shader_out) { var->data.location += FRAG_RESULT_DATA0; @@ -1295,6 +1293,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->data.location += VARYING_SLOT_VAR0; } + /* We handle decorations last because decorations might cause us to + * over-write other things such as the variable's location and we want + * those changes to stick. + */ + vtn_foreach_decoration(b, val, var_decoration_cb, var); + /* If this was a uniform block, then we're not going to actually use the * variable (we're only going to use it to compute offsets), so don't * declare it in the shader. -- cgit v1.2.3 From 612b13aeae99feac2d6c4da05a013ec77c048bc6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Sep 2015 14:09:15 -0700 Subject: nir/spirv: Add support for most of the rest of texturing Assuming this all works, about the only thing left should be some corner-cases for tg4 --- src/glsl/nir/spirv_to_nir.c | 74 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 30104f0d09a..e6fe74de9e8 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1547,12 +1547,22 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, break; } - nir_texop texop; + /* These all have an explicit depth value as their next source */ switch (opcode) { - case SpvOpImageSampleImplicitLod: - texop = nir_texop_tex; + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); break; + default: + break; + } + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: case SpvOpImageSampleExplicitLod: case SpvOpImageSampleDrefImplicitLod: case SpvOpImageSampleDrefExplicitLod: @@ -1560,24 +1570,74 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, case SpvOpImageSampleProjExplicitLod: case SpvOpImageSampleProjDrefImplicitLod: case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_tex; + break; + case SpvOpImageFetch: + texop = nir_texop_txf; + break; + case SpvOpImageGather: case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + case SpvOpImageQuerySizeLod: case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + case SpvOpImageQuerySamples: default: unreachable("Unhandled opcode"); } - /* From now on, the remaining sources are "Optional Image Operands." */ + /* Now we need to handle some number of optional arguments */ if (idx < count) { - /* XXX handle these (bias, lod, etc.) */ - assert(0); - } + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txl; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); -- cgit v1.2.3 From 4d73ca3c582cf6fa8f9d36f5d752eb44efe71982 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Sep 2015 14:11:40 -0700 Subject: nir/spirv.h: Remove some cruft missed while merging There were merge conflicts in spirv.h that got missed because they were in a comment and so it still compiled. This gets rid of them and we should be on-par with upstream spirv->nir. --- src/glsl/nir/spirv.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index 8472caf30e5..d289c687c76 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -1,26 +1,12 @@ /* ** Copyright (c) 2014-2015 The Khronos Group Inc. -<<<<<<< HEAD -** -======= ** ->>>>>>> fdo-personal/nir-spirv ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), ** to deal in the Materials without restriction, including without limitation ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** and/or sell copies of the Materials, and to permit persons to whom the ** Materials are furnished to do so, subject to the following conditions: -<<<<<<< HEAD -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -======= ** ** The above copyright notice and this permission notice shall be included in ** all copies or substantial portions of the Materials. @@ -29,7 +15,6 @@ ** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND ** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ ** ->>>>>>> fdo-personal/nir-spirv ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -48,21 +33,13 @@ ** Specification revision 31. ** Enumeration tokens for SPIR-V, in various styles: ** C, C++, C++11, JSON, Lua, Python -<<<<<<< HEAD -** -======= ** ->>>>>>> fdo-personal/nir-spirv ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL ** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL ** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL ** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -<<<<<<< HEAD -** -======= ** ->>>>>>> fdo-personal/nir-spirv ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have ** "Mask" in their name, and a parallel enum that has the shift -- cgit v1.2.3 From 060720f0c9fc5333ba481315e63ba697c868f44c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Sep 2015 11:51:47 -0700 Subject: vk/wsi/x11: Actually block on X so we don't re-use busy buffers --- src/vulkan/anv_wsi_x11.c | 80 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index b412a2bab32..be4677ae7ed 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -50,7 +50,7 @@ x11_get_window_supported(struct anv_wsi_implementation *impl, static VkResult x11_get_surface_info(struct anv_wsi_implementation *impl, struct anv_device *device, - VkSurfaceDescriptionWindowWSI *window, + VkSurfaceDescriptionWindowWSI *vk_window, VkSurfaceInfoTypeWSI infoType, size_t* pDataSize, void* pData) { @@ -68,11 +68,21 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, assert(*pDataSize >= sizeof(*props)); + VkPlatformHandleXcbWSI *vk_xcb_handle = vk_window->pPlatformHandle; + xcb_connection_t *conn = vk_xcb_handle->connection; + xcb_window_t win = (xcb_window_t)(uintptr_t)vk_window->pPlatformWindow; + + xcb_get_geometry_cookie_t cookie = xcb_get_geometry(conn, win); + xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, + NULL); + VkExtent2D extent = { geom->width, geom->height }; + free(geom); + props->minImageCount = 2; props->maxImageCount = 4; - props->currentExtent = (VkExtent2D) { -1, -1 }; - props->minImageExtent = (VkExtent2D) { 1, 1 }; - props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + props->currentExtent = extent; + props->minImageExtent = extent; + props->maxImageExtent = extent; props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_WSI; props->currentTransform = VK_SURFACE_TRANSFORM_NONE_WSI; props->maxImageArraySize = 1; @@ -109,6 +119,14 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, } } +struct x11_image { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + xcb_get_geometry_cookie_t geom_cookie; + bool busy; +}; + struct x11_swap_chain { struct anv_swap_chain base; @@ -118,11 +136,7 @@ struct x11_swap_chain { VkExtent2D extent; uint32_t image_count; uint32_t next_image; - struct { - struct anv_image * image; - struct anv_device_memory * memory; - xcb_pixmap_t pixmap; - } images[0]; + struct x11_image images[0]; }; static VkResult @@ -165,8 +179,21 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, uint32_t *image_index) { struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + struct x11_image *image = &chain->images[chain->next_image]; + + if (image->busy) { + xcb_get_geometry_reply_t *geom = + xcb_get_geometry_reply(chain->conn, image->geom_cookie, NULL); + image->busy = false; + + if (geom->width != chain->extent.width || + geom->height != chain->extent.height) { + free(geom); + return VK_ERROR_OUT_OF_DATE_WSI; + } + free(geom); + } - anv_finishme("Implement real blocking AcquireNextImage"); *image_index = chain->next_image; chain->next_image = (chain->next_image + 1) % chain->image_count; return VK_SUCCESS; @@ -178,16 +205,14 @@ x11_queue_present(struct anv_swap_chain *anv_chain, uint32_t image_index) { struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + struct x11_image *image = &chain->images[image_index]; - xcb_void_cookie_t cookie; - - xcb_pixmap_t pixmap = chain->images[image_index].pixmap; + assert(image_index < chain->image_count); - if (pixmap == XCB_NONE) - return vk_error(VK_ERROR_INVALID_VALUE); + xcb_void_cookie_t cookie; cookie = xcb_copy_area(chain->conn, - pixmap, + image->pixmap, chain->window, chain->gc, 0, 0, @@ -196,15 +221,33 @@ x11_queue_present(struct anv_swap_chain *anv_chain, chain->extent.height); xcb_discard_reply(chain->conn, cookie.sequence); + image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + image->busy = true; + xcb_flush(chain->conn); return VK_SUCCESS; } static VkResult -x11_destroy_swap_chain(struct anv_swap_chain *chain) +x11_destroy_swap_chain(struct anv_swap_chain *anv_chain) { - anv_device_free(chain->device, chain); + struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + xcb_void_cookie_t cookie; + + for (uint32_t i = 0; i < chain->image_count; i++) { + struct x11_image *image = &chain->images[i]; + + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + /* TODO: Delete images and free memory */ + } + + anv_device_free(chain->base.device, chain); return VK_SUCCESS; } @@ -329,6 +372,7 @@ x11_create_swap_chain(struct anv_wsi_implementation *impl, chain->images[i].image = image; chain->images[i].memory = memory; chain->images[i].pixmap = pixmap; + chain->images[i].busy = false; xcb_discard_reply(chain->conn, cookie.sequence); } -- cgit v1.2.3 From 8040dc4ca5e05a24c30ad3a428498cf82ded6bf6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Sep 2015 12:13:07 -0700 Subject: vk/error: Handle ERROR_OUT_OF_DATE_WSI --- src/vulkan/anv_util.c | 84 ++++++++++++++++++++++++++---------------------- src/vulkan/anv_wsi_x11.c | 2 +- 2 files changed, 46 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c index 94f14f98f8f..4c8fadcc805 100644 --- a/src/vulkan/anv_util.c +++ b/src/vulkan/anv_util.c @@ -88,52 +88,58 @@ __vk_errorf(VkResult error, const char *file, int line, const char *format, ...) va_list ap; char buffer[256]; - static const char *error_names[] = { - "VK_ERROR_UNKNOWN", - "VK_ERROR_UNAVAILABLE", - "VK_ERROR_INITIALIZATION_FAILED", - "VK_ERROR_OUT_OF_HOST_MEMORY", - "VK_ERROR_OUT_OF_DEVICE_MEMORY", - "VK_ERROR_DEVICE_ALREADY_CREATED", - "VK_ERROR_DEVICE_LOST", - "VK_ERROR_INVALID_POINTER", - "VK_ERROR_INVALID_VALUE", - "VK_ERROR_INVALID_HANDLE", - "VK_ERROR_INVALID_ORDINAL", - "VK_ERROR_INVALID_MEMORY_SIZE", - "VK_ERROR_INVALID_EXTENSION", - "VK_ERROR_INVALID_FLAGS", - "VK_ERROR_INVALID_ALIGNMENT", - "VK_ERROR_INVALID_FORMAT", - "VK_ERROR_INVALID_IMAGE", - "VK_ERROR_INVALID_DESCRIPTOR_SET_DATA", - "VK_ERROR_INVALID_QUEUE_TYPE", - "VK_ERROR_UNSUPPORTED_SHADER_IL_VERSION", - "VK_ERROR_BAD_SHADER_CODE", - "VK_ERROR_BAD_PIPELINE_DATA", - "VK_ERROR_NOT_MAPPABLE", - "VK_ERROR_MEMORY_MAP_FAILED", - "VK_ERROR_MEMORY_UNMAP_FAILED", - "VK_ERROR_INCOMPATIBLE_DEVICE", - "VK_ERROR_INCOMPATIBLE_DRIVER", - "VK_ERROR_INCOMPLETE_COMMAND_BUFFER", - "VK_ERROR_BUILDING_COMMAND_BUFFER", - "VK_ERROR_MEMORY_NOT_BOUND", - "VK_ERROR_INCOMPATIBLE_QUEUE", - "VK_ERROR_INVALID_LAYER", - }; - - assert(error <= VK_ERROR_UNKNOWN && error >= VK_ERROR_INVALID_LAYER); +#define ERROR_CASE(error) case error: error_str = #error; break; + + const char *error_str; + switch ((int32_t)error) { + ERROR_CASE(VK_ERROR_UNKNOWN) + ERROR_CASE(VK_ERROR_UNAVAILABLE) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) + ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) + ERROR_CASE(VK_ERROR_DEVICE_ALREADY_CREATED) + ERROR_CASE(VK_ERROR_DEVICE_LOST) + ERROR_CASE(VK_ERROR_INVALID_POINTER) + ERROR_CASE(VK_ERROR_INVALID_VALUE) + ERROR_CASE(VK_ERROR_INVALID_HANDLE) + ERROR_CASE(VK_ERROR_INVALID_ORDINAL) + ERROR_CASE(VK_ERROR_INVALID_MEMORY_SIZE) + ERROR_CASE(VK_ERROR_INVALID_EXTENSION) + ERROR_CASE(VK_ERROR_INVALID_FLAGS) + ERROR_CASE(VK_ERROR_INVALID_ALIGNMENT) + ERROR_CASE(VK_ERROR_INVALID_FORMAT) + ERROR_CASE(VK_ERROR_INVALID_IMAGE) + ERROR_CASE(VK_ERROR_INVALID_DESCRIPTOR_SET_DATA) + ERROR_CASE(VK_ERROR_INVALID_QUEUE_TYPE) + ERROR_CASE(VK_ERROR_UNSUPPORTED_SHADER_IL_VERSION) + ERROR_CASE(VK_ERROR_BAD_SHADER_CODE) + ERROR_CASE(VK_ERROR_BAD_PIPELINE_DATA) + ERROR_CASE(VK_ERROR_NOT_MAPPABLE) + ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) + ERROR_CASE(VK_ERROR_MEMORY_UNMAP_FAILED) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DEVICE) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + ERROR_CASE(VK_ERROR_INCOMPLETE_COMMAND_BUFFER) + ERROR_CASE(VK_ERROR_BUILDING_COMMAND_BUFFER) + ERROR_CASE(VK_ERROR_MEMORY_NOT_BOUND) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_QUEUE) + ERROR_CASE(VK_ERROR_INVALID_LAYER) + ERROR_CASE(VK_ERROR_OUT_OF_DATE_WSI) + default: + assert(!"Unknown error"); + error_str = "unknown error"; + } + +#undef ERROR_CASE if (format) { va_start(ap, format); vsnprintf(buffer, sizeof(buffer), format, ap); va_end(ap); - fprintf(stderr, "%s:%d: %s (%s)\n", file, line, - buffer, error_names[-error - 1]); + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); } else { - fprintf(stderr, "%s:%d: %s\n", file, line, error_names[-error - 1]); + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); } return error; diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index be4677ae7ed..f481a017001 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -189,7 +189,7 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, if (geom->width != chain->extent.width || geom->height != chain->extent.height) { free(geom); - return VK_ERROR_OUT_OF_DATE_WSI; + return vk_error(VK_ERROR_OUT_OF_DATE_WSI); } free(geom); } -- cgit v1.2.3 From fd21f0681a36d5f9fe7d2180eae1f83dae799d99 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 09:29:40 -0700 Subject: Add the wayland protocol files to .gitignire --- src/vulkan/.gitignore | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 8bc1c2eda93..3ccb52c3d2d 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -2,3 +2,5 @@ /*_spirv_autogen.h /anv_entrypoints.c /anv_entrypoints.h +/wayland-drm-protocol.c +/wayland-drm-client-protocol.h -- cgit v1.2.3 From b908c67816709d0bdcdec02b7926c80531508405 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 10:25:21 -0700 Subject: vk: Rework the push constants data structure Previously, we simply had a big blob of stuff for "driver constants". Now, we have a very specific data structure that contains the driver constants that we care about. --- src/vulkan/anv_cmd_buffer.c | 46 ++++++++++++++++++++++++++++++++++++--------- src/vulkan/anv_compiler.cpp | 6 +----- src/vulkan/anv_private.h | 32 +++++++++++++++++++++++-------- 3 files changed, 62 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index f8a630bece5..17857d9759a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -61,6 +61,39 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->gen7.index_buffer = NULL; } +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + VkShaderStage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_device_alloc(cmd_buffer->device, size, 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + (*ptr)->size = size; + } else if ((*ptr)->size < size) { + void *new_data = anv_device_alloc(cmd_buffer->device, size, 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_data == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memcpy(new_data, *ptr, (*ptr)->size); + anv_device_free(cmd_buffer->device, *ptr); + + *ptr = new_data; + (*ptr)->size = size; + } + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + VkResult anv_CreateCommandBuffer( VkDevice _device, const VkCmdBufferCreateInfo* pCreateInfo, @@ -665,8 +698,8 @@ struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) { - struct anv_push_constant_data *data = - cmd_buffer->state.push_constants[stage].data; + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; struct brw_stage_prog_data *prog_data = cmd_buffer->state.pipeline->prog_data[stage]; @@ -701,14 +734,9 @@ void anv_CmdPushConstants( uint32_t stage; for_each_bit(stage, stageFlags) { - if (cmd_buffer->state.push_constants[stage].data == NULL) { - cmd_buffer->state.push_constants[stage].data = - anv_device_alloc(cmd_buffer->device, - sizeof(struct anv_push_constant_data), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - } + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); - memcpy(cmd_buffer->state.push_constants[stage].data->client_data + start, + memcpy(cmd_buffer->state.push_constants[stage]->client_data + start, values, length); } diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 5f189c7ce8e..06ae61aea5c 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -149,14 +149,10 @@ create_params_array(struct anv_device *device, * actually dereference any of the gl_constant_value pointers in the * params array, it doesn't really matter what we put here. */ - struct anv_push_constant_data *null_data = NULL; + struct anv_push_constants *null_data = NULL; for (unsigned i = 0; i < num_client_params; i++) prog_data->param[i] = (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; - - for (unsigned i = 0; i < num_driver_params; i++) - prog_data->param[num_client_params + i] = - (const gl_constant_value *)&null_data->driver_data[i * sizeof(float)]; } static void diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 0c7d5e8a536..53eb0b30526 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -658,6 +658,8 @@ anv_descriptor_set_destroy(struct anv_device *device, #define MAX_SETS 8 #define MAX_RTS 8 #define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_IMAGES 8 struct anv_pipeline_layout { struct { @@ -700,14 +702,28 @@ struct anv_descriptor_set_binding { uint32_t dynamic_offsets[128]; }; -struct anv_push_constant_data { - uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; - uint8_t driver_data[0]; -}; - struct anv_push_constants { - uint32_t driver_data_size; - struct anv_push_constant_data *data; + /* Current allocated size of this push constants data structure. + * Because a decent chunk of it may not be used (images on SKL, for + * instance), we won't actually allocate the entire structure up-front. + */ + uint32_t size; + + /* Push constant data provided by the client through vkPushConstants */ + uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; + + /* Our hardware only provides zero-based vertex and instance id so, in + * order to satisfy the vulkan requirements, we may have to push one or + * both of these into the shader. + */ + uint32_t base_vertex; + uint32_t base_instance; + + /* Offsets for dynamically bound buffers */ + uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; + + /* Image data for image_load_store on pre-SKL */ + struct brw_image_param images[MAX_IMAGES]; }; /** State required while building cmd buffer */ @@ -731,7 +747,7 @@ struct anv_cmd_state { uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set_binding descriptors[MAX_SETS]; - struct anv_push_constants push_constants[VK_SHADER_STAGE_NUM]; + struct anv_push_constants * push_constants[VK_SHADER_STAGE_NUM]; struct { struct anv_buffer * index_buffer; -- cgit v1.2.3 From de5220c7ce97d6aa72b84bf60f65886139e48eaf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 10:41:57 -0700 Subject: vk/pipeline_layout: Move surface/sampler start from SoA to AoS This makes more sense to me and it's more consistent with anv_descriptor_set_layout. --- src/vulkan/anv_cmd_buffer.c | 4 ++-- src/vulkan/anv_pipeline.c | 4 ++-- src/vulkan/anv_private.h | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 17857d9759a..c2024e4dd60 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -449,7 +449,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_descriptor_slot *surface_slots = set_layout->stage[stage].surface_start; - uint32_t start = bias + layout->set[set].surface_start[stage]; + uint32_t start = bias + layout->set[set].stage[stage].surface_start; for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { struct anv_surface_view *view = @@ -515,7 +515,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, struct anv_descriptor_slot *sampler_slots = set_layout->stage[stage].sampler_start; - uint32_t start = layout->set[set].sampler_start[stage]; + uint32_t start = layout->set[set].stage[stage].sampler_start; for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { struct anv_sampler *sampler = diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 9372fb318df..dba2a5e7b46 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -395,9 +395,9 @@ VkResult anv_CreatePipelineLayout( layout->set[i].layout = set_layout; for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - layout->set[i].surface_start[s] = surface_start[s]; + layout->set[i].stage[s].surface_start = surface_start[s]; surface_start[s] += set_layout->stage[s].surface_count; - layout->set[i].sampler_start[s] = sampler_start[s]; + layout->set[i].stage[s].sampler_start = sampler_start[s]; sampler_start[s] += set_layout->stage[s].sampler_count; layout->stage[s].surface_count += set_layout->stage[s].surface_count; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 53eb0b30526..f556161679f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -664,8 +664,10 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_pipeline_layout { struct { struct anv_descriptor_set_layout *layout; - uint32_t surface_start[VK_SHADER_STAGE_NUM]; - uint32_t sampler_start[VK_SHADER_STAGE_NUM]; + struct { + uint32_t surface_start; + uint32_t sampler_start; + } stage[VK_SHADER_STAGE_NUM]; } set[MAX_SETS]; uint32_t num_sets; -- cgit v1.2.3 From 7487371056175406a5408d0c2c1621b64c5bef92 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 15:52:43 -0700 Subject: vk/pipeline_layout: Add dynamic_offset_start and has_dynamic_offsets fields --- src/vulkan/anv_pipeline.c | 7 +++++++ src/vulkan/anv_private.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index dba2a5e7b46..833957a32e2 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -385,16 +385,23 @@ VkResult anv_CreatePipelineLayout( uint32_t sampler_start[VK_SHADER_STAGE_NUM] = { 0, }; for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + layout->stage[s].has_dynamic_offsets = false; layout->stage[s].surface_count = 0; layout->stage[s].sampler_count = 0; } + uint32_t num_dynamic_offsets = 0; for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[i]); layout->set[i].layout = set_layout; + layout->set[i].dynamic_offset_start = num_dynamic_offsets; + num_dynamic_offsets += set_layout->num_dynamic_buffers; for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + if (set_layout->num_dynamic_buffers > 0) + layout->stage[s].has_dynamic_offsets = true; + layout->set[i].stage[s].surface_start = surface_start[s]; surface_start[s] += set_layout->stage[s].surface_count; layout->set[i].stage[s].sampler_start = sampler_start[s]; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f556161679f..b290d60cc08 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -664,6 +664,7 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_pipeline_layout { struct { struct anv_descriptor_set_layout *layout; + uint32_t dynamic_offset_start; struct { uint32_t surface_start; uint32_t sampler_start; @@ -673,6 +674,7 @@ struct anv_pipeline_layout { uint32_t num_sets; struct { + bool has_dynamic_offsets; uint32_t surface_count; uint32_t sampler_count; } stage[VK_SHADER_STAGE_NUM]; -- cgit v1.2.3 From c3086c54a832b4c8426cd9f7de6bcf627f26e9b4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 15:53:53 -0700 Subject: vk/compiler: Add a NIR pass for pushing dynamic buffer offset This commit just adds the NIR pass but does none of the uniform setup --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_nir.h | 46 +++++++++ src/vulkan/anv_nir_apply_dynamic_offsets.c | 150 +++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 src/vulkan/anv_nir.h create mode 100644 src/vulkan/anv_nir_apply_dynamic_offsets.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 8b17af22eb4..aada6314b2a 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -69,6 +69,7 @@ VULKAN_SOURCES = \ anv_image.c \ anv_intel.c \ anv_meta.c \ + anv_nir_apply_dynamic_offsets.c \ anv_pipeline.c \ anv_private.h \ anv_query.c \ diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h new file mode 100644 index 00000000000..1fd3484e059 --- /dev/null +++ b/src/vulkan/anv_nir.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "glsl/nir/nir.h" +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline VkShaderStage +anv_vk_shader_stage_for_mesa_stage(gl_shader_stage stage) +{ + /* The two enums happen to line up. */ + return (VkShaderStage)(int)stage; +} + +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c new file mode 100644 index 00000000000..367c4f82d64 --- /dev/null +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -0,0 +1,150 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "glsl/nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + VkShaderStage stage; + struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + const struct anv_descriptor_slot *slot; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + bool has_indirect = false; + uint32_t set, binding; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_load_ubo: { + set = intrin->const_index[0]; + + nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]); + if (const_binding) { + binding = const_binding->u[0]; + } else { + assert(0 && "need more info from the ir for this."); + } + break; + } + default: + continue; /* the loop */ + } + + set_layout = state->layout->set[set].layout; + slot = &set_layout->stage[state->stage].surface_start[binding]; + if (slot->dynamic_slot < 0) + continue; + + uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start + + slot->dynamic_slot; + + state->builder.cursor = nir_before_instr(&intrin->instr); + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 1; + offset_load->const_index[0] = state->indices_start + dynamic_index; + offset_load->const_index[1] = 0; + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); + nir_builder_instr_insert(&state->builder, &offset_load->instr); + + nir_ssa_def *offset = &offset_load->dest.ssa; + if (has_indirect) { + assert(intrin->src[1].is_ssa); + offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset); + } + + assert(intrin->dest.is_ssa); + + nir_intrinsic_instr *new_load = + nir_intrinsic_instr_create(state->shader, + nir_intrinsic_load_ubo_indirect); + new_load->num_components = intrin->num_components; + new_load->const_index[0] = intrin->const_index[0]; + new_load->const_index[1] = intrin->const_index[1]; + nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr); + new_load->src[1] = nir_src_for_ssa(offset); + nir_ssa_dest_init(&new_load->instr, &new_load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.name); + nir_builder_instr_insert(&state->builder, &new_load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&new_load->dest.ssa), + state->shader); + + nir_instr_remove(&intrin->instr); + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage), + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[state.stage].has_dynamic_offsets) + return; + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_builder_init(&state.builder, overload->impl); + nir_foreach_block(overload->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(overload->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) + prog_data->param[i + shader->num_uniforms] = + (const gl_constant_value *)&null_data->dynamic_offsets[i]; + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS; +} -- cgit v1.2.3 From 2b4a2eb592fab5a61abeaa9b60c14bdc9d565fce Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 15:55:54 -0700 Subject: vk/compiler: Rework create_params_array --- src/vulkan/anv_compiler.cpp | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 06ae61aea5c..e19486ac95d 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -116,32 +116,26 @@ upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) } static void -create_params_array(struct anv_device *device, +create_params_array(struct anv_pipeline *pipeline, struct gl_shader *shader, struct brw_stage_prog_data *prog_data) { - unsigned num_client_params; + VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage); + unsigned num_params = 0; + if (shader->num_uniform_components) { /* If the shader uses any push constants at all, we'll just give * them the maximum possible number */ - num_client_params = MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } else { - num_client_params = 0; + num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); } - /* We'll need to add space here for images, texture rectangle, uniform - * offsets, etc. - */ - unsigned num_driver_params = 0; - - unsigned num_total_params = num_client_params + num_driver_params; - - if (num_total_params == 0) + if (num_params == 0) return; prog_data->param = (const gl_constant_value **) - anv_device_alloc(device, num_total_params * sizeof(gl_constant_value *), + anv_device_alloc(pipeline->device, + num_params * sizeof(gl_constant_value *), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); /* We now set the param values to be offsets into a @@ -150,7 +144,7 @@ create_params_array(struct anv_device *device, * params array, it doesn't really matter what we put here. */ struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < num_client_params; i++) + for (unsigned i = 0; i < num_params; i++) prog_data->param[i] = (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; } @@ -207,7 +201,6 @@ really_do_vs_prog(struct brw_context *brw, GLuint program_size; const GLuint *program; struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base; void *mem_ctx; struct gl_shader *vs = NULL; @@ -218,7 +211,7 @@ really_do_vs_prog(struct brw_context *brw, mem_ctx = ralloc_context(NULL); - create_params_array(pipeline->device, vs, stage_prog_data); + create_params_array(pipeline, vs, &prog_data->base.base); GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data->inputs_read = vp->program.Base.InputsRead; @@ -507,7 +500,7 @@ really_do_wm_prog(struct brw_context *brw, prog_data->computed_depth_mode = computed_depth_mode(&fp->program); - create_params_array(pipeline->device, fs, &prog_data->base); + create_params_array(pipeline, fs, &prog_data->base); prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(brw, key->flat_shade, @@ -613,7 +606,7 @@ brw_codegen_cs_prog(struct brw_context *brw, set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); - create_params_array(pipeline->device, cs, &prog_data->base); + create_params_array(pipeline, cs, &prog_data->base); program = brw_cs_emit(brw, mem_ctx, key, prog_data, &cp->program, prog, &program_size); -- cgit v1.2.3 From 8c8ad6dddf127e7a4facd40ec378a93a5719e5b1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Sep 2015 15:56:19 -0700 Subject: vk: Use push constants for dynamic buffers --- src/vulkan/anv_cmd_buffer.c | 44 +++++++++++++++++++++----------------------- src/vulkan/anv_compiler.cpp | 8 ++++++++ 2 files changed, 29 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index c2024e4dd60..cca5cfae3ef 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -326,17 +326,28 @@ void anv_CmdBindDescriptorSets( ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; - cmd_buffer->state.descriptors[firstSet + i].set = set; + if (cmd_buffer->state.descriptors[firstSet + i].set != set) { + cmd_buffer->state.descriptors[firstSet + i].set = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->num_dynamic_buffers > 0) { + uint32_t s; + for_each_bit(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, + dynamic_offsets); + uint32_t *offsets = + cmd_buffer->state.push_constants[s]->dynamic_offsets + + layout->set[firstSet + i].dynamic_offset_start; - assert(set_layout->num_dynamic_buffers < - ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); - memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, - pDynamicOffsets + dynamic_slot, - set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); + memcpy(offsets, pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - dynamic_slot += set_layout->num_dynamic_buffers; + dynamic_slot += set_layout->num_dynamic_buffers; + } } } @@ -464,21 +475,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (state.map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - uint32_t offset; - if (surface_slots[b].dynamic_slot >= 0) { - uint32_t dynamic_offset = - d->dynamic_offsets[surface_slots[b].dynamic_slot]; - - offset = view->offset + dynamic_offset; - anv_fill_buffer_surface_state(cmd_buffer->device, - state.map, view->format, offset, - view->range - dynamic_offset); - } else { - offset = view->offset; - memcpy(state.map, view->surface_state.map, 64); - } - - add_surface_state_reloc(cmd_buffer, state, view->bo, offset); + memcpy(state.map, view->surface_state.map, 64); + add_surface_state_reloc(cmd_buffer, state, view->bo, view->offset); bt_map[start + b] = state.offset; } diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index e19486ac95d..dcd5581f957 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -26,6 +26,7 @@ #include #include "anv_private.h" +#include "anv_nir.h" #include #include /* brw_new_shader_program is here */ @@ -130,6 +131,9 @@ create_params_array(struct anv_pipeline *pipeline, num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); } + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + num_params += MAX_DYNAMIC_BUFFERS; + if (num_params == 0) return; @@ -212,6 +216,8 @@ really_do_vs_prog(struct brw_context *brw, mem_ctx = ralloc_context(NULL); create_params_array(pipeline, vs, &prog_data->base.base); + anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, + &prog_data->base.base); GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data->inputs_read = vp->program.Base.InputsRead; @@ -501,6 +507,7 @@ really_do_wm_prog(struct brw_context *brw, prog_data->computed_depth_mode = computed_depth_mode(&fp->program); create_params_array(pipeline, fs, &prog_data->base); + anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(brw, key->flat_shade, @@ -607,6 +614,7 @@ brw_codegen_cs_prog(struct brw_context *brw, set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); create_params_array(pipeline, cs, &prog_data->base); + anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); program = brw_cs_emit(brw, mem_ctx, key, prog_data, &cp->program, prog, &program_size); -- cgit v1.2.3 From e01d5a0471bdf7e679eb576da2706e5436695fe9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 10:58:43 -0700 Subject: vk: Refactor anv_image_make_surface() Move the code that calculates the layout of 2D surfaces into a switch case. --- src/vulkan/anv_image.c | 56 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index fbcd435a1a0..0db87753e28 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -164,29 +164,43 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ - const uint32_t w0 = align_u32(extent->width, i); - const uint32_t h0 = align_u32(extent->height, j); - uint16_t qpitch; - uint32_t mt_width; - uint32_t mt_height; + uint16_t qpitch = min_qpitch; + uint32_t mt_width = 0; + uint32_t mt_height = 0; - if (levels == 1 && array_size == 1) { - qpitch = min_qpitch; - mt_width = w0; - mt_height = h0; - } else { - uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); - uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); - uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); - - /* The QPitch equation is found in the Broadwell PRM >> Volume 5: Memory - * Views >> Common Surface Formats >> Surface Layout >> 2D Surfaces >> - * Surface Arrays >> For All Surface Other Than Separate Stencil Buffer: - */ - qpitch = h0 + h1 + 11 * j; - mt_width = MAX(w0, w1 + w2); - mt_height = array_size * qpitch; + switch (create_info->vk_info->imageType) { + case VK_IMAGE_TYPE_1D: + anv_finishme("VK_IMAGE_TYPE_1D"); + break; + case VK_IMAGE_TYPE_2D: { + const uint32_t w0 = align_u32(extent->width, i); + const uint32_t h0 = align_u32(extent->height, j); + + if (levels == 1 && array_size == 1) { + qpitch = min_qpitch; + mt_width = w0; + mt_height = h0; + } else { + uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); + uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); + uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); + + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: Memory + * Views >> Common Surface Formats >> Surface Layout >> 2D Surfaces >> + * Surface Arrays >> For All Surface Other Than Separate Stencil Buffer: + */ + qpitch = h0 + h1 + 11 * j; + mt_width = MAX(w0, w1 + w2); + mt_height = array_size * qpitch; + } + break; + } + case VK_IMAGE_TYPE_3D: + anv_finishme("VK_IMAGE_TYPE_3D"); + break; + default: + unreachable(!"bad VkImageType"); } assert(qpitch >= min_qpitch); -- cgit v1.2.3 From eed74e3a02e21867f47501ca9ce0be21e76f7510 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 11:04:08 -0700 Subject: vk: Teach vkCreateImage about layout of 3D surfaces Calling vkCreateImage() with VK_IMAGE_TYPE_3D now succeeds and computes the surface layout correctly. However, 3D images do not yet work for many other Vulkan entrypoints. --- src/vulkan/anv_image.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 0db87753e28..f0577a50a94 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -197,7 +197,21 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, break; } case VK_IMAGE_TYPE_3D: - anv_finishme("VK_IMAGE_TYPE_3D"); + /* The layout of 3D surfaces is described by the Broadwell PRM >> + * Volume 5: Memory Views >> Common Surface Formats >> Surface Layout >> + * 3D Surfaces. + */ + for (uint32_t l = 0; l < levels; ++l) { + const uint32_t w_l = align_u32(anv_minify(extent->width, l), i); + const uint32_t h_l = align_u32(anv_minify(extent->height, l), j); + const uint32_t d_l = anv_minify(extent->depth, l); + + const uint32_t max_layers_horiz = MIN(d_l, 1u << l); + const uint32_t max_layers_vert = align_u32(d_l, 1u << l) / (1u << l); + + mt_width = MAX(mt_width, w_l * max_layers_horiz); + mt_height += h_l * max_layers_vert; + } break; default: unreachable(!"bad VkImageType"); @@ -254,7 +268,8 @@ anv_image_create(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); + anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D || + pCreateInfo->imageType == VK_IMAGE_TYPE_3D); anv_assert(pCreateInfo->mipLevels > 0); anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->samples == 1); -- cgit v1.2.3 From ffa61e157242ee2494485d25c2a9cc56f9022613 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 11:14:21 -0700 Subject: vk/gen8: Refactor setting of SURFACE_STATE::Depth The field's meaning depends on SURFACE_STATE::SurfaceType. Make that correlation explicit by switching on VkImageType. For good measure, add some PRM quotes too. --- src/vulkan/gen8_state.c | 50 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 5e79a37f402..6bfe577dd98 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -152,6 +152,8 @@ gen8_image_view_init(struct anv_image_view *iview, struct anv_surface *surface = anv_image_get_surface_for_aspect(image, range->aspect); + uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ + const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); @@ -171,11 +173,28 @@ gen8_image_view_init(struct anv_image_view *iview, .depth = anv_minify(image->extent.depth, range->baseMipLevel), }; - uint32_t depth = 1; - if (range->arraySize > 1) { + switch (image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + */ depth = range->arraySize; - } else if (image->extent.depth > 1) { + break; + case VK_IMAGE_TYPE_3D: + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ depth = image->extent.depth; + break; + default: + unreachable(!"bad VkImageType"); } static const uint32_t vk_to_gen_swizzle[] = { @@ -260,6 +279,8 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); + uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; anv_assert(pCreateInfo->arraySize > 0); @@ -276,11 +297,28 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), }; - uint32_t depth = 1; - if (pCreateInfo->arraySize > 1) { + switch (image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + */ depth = pCreateInfo->arraySize; - } else if (image->extent.depth > 1) { + break; + case VK_IMAGE_TYPE_3D: + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ depth = image->extent.depth; + break; + default: + unreachable(!"bad VkImageType"); } if (cmd_buffer) { -- cgit v1.2.3 From b659a066e993b7b107ec2698cb8fbb137076068e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 11:20:25 -0700 Subject: vk/gen8: Set RENDER_SURFACE_STATE::RenderTargetViewExtent --- src/vulkan/gen8_state.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 6bfe577dd98..26509f1d817 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -153,6 +153,7 @@ gen8_image_view_init(struct anv_image_view *iview, anv_image_get_surface_for_aspect(image, range->aspect); uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ + uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); @@ -184,6 +185,13 @@ gen8_image_view_init(struct anv_image_view *iview, * the range of this field is reduced to [0,1023]. */ depth = range->arraySize; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + rt_view_extent = depth; break; case VK_IMAGE_TYPE_3D: /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: @@ -192,6 +200,14 @@ gen8_image_view_init(struct anv_image_view *iview, * depth of the base MIP level. */ depth = image->extent.depth; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + rt_view_extent = iview->extent.depth; break; default: unreachable(!"bad VkImageType"); @@ -230,6 +246,7 @@ gen8_image_view_init(struct anv_image_view *iview, .Width = image->extent.width - 1, .Depth = depth - 1, .SurfacePitch = surface->stride - 1, + .RenderTargetViewExtent = rt_view_extent - 1, .MinimumArrayElement = range->baseArraySlice, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, @@ -280,6 +297,7 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, anv_format_for_vk_format(pCreateInfo->format); uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ + uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; @@ -308,6 +326,13 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, * the range of this field is reduced to [0,1023]. */ depth = pCreateInfo->arraySize; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + rt_view_extent = depth; break; case VK_IMAGE_TYPE_3D: /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: @@ -316,6 +341,14 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, * depth of the base MIP level. */ depth = image->extent.depth; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + rt_view_extent = aview->base.extent.depth; break; default: unreachable(!"bad VkImageType"); @@ -353,6 +386,7 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, .Width = image->extent.width - 1, .Depth = depth - 1, .SurfacePitch = surface->stride - 1, + .RenderTargetViewExtent = rt_view_extent - 1, .MinimumArrayElement = pCreateInfo->baseArraySlice, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, -- cgit v1.2.3 From 0ecafe028533ceb48619a453bc3c592363231a0a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 11:42:46 -0700 Subject: vk/meta: Rename meta_emit_blit() params Rename src -> src_view and dest -> dest_view. This reduces noise in the next patch's diff, which adds new params to the function. --- src/vulkan/anv_meta.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 6ce963d8374..607054b92ab 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -585,10 +585,10 @@ struct blit_region { static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src, + struct anv_image_view *src_view, VkOffset3D src_offset, VkExtent3D src_extent, - struct anv_color_attachment_view *dest, + struct anv_color_attachment_view *dest_view, VkOffset3D dest_offset, VkExtent3D dest_extent) { @@ -613,8 +613,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + (float)(src_offset.x + src_extent.width) / (float)src_view->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_view->extent.height, }, }; @@ -624,8 +624,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset.x / (float)src->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + (float)src_offset.x / (float)src_view->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_view->extent.height, }, }; @@ -635,8 +635,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y, }, .tex_coord = { - (float)src_offset.x / (float)src->extent.width, - (float)src_offset.y / (float)src->extent.height, + (float)src_offset.x / (float)src_view->extent.width, + (float)src_offset.y / (float)src_view->extent.height, }, }; @@ -674,7 +674,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pDescriptors = (VkDescriptorInfo[]) { { - .imageView = anv_image_view_to_handle(src), + .imageView = anv_image_view_to_handle(src_view), .imageLayout = VK_IMAGE_LAYOUT_GENERAL }, } @@ -688,12 +688,12 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = (VkAttachmentBindInfo[]) { { - .view = anv_attachment_view_to_handle(&dest->base), + .view = anv_attachment_view_to_handle(&dest_view->base), .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = dest->base.extent.width, - .height = dest->base.extent.height, + .width = dest_view->base.extent.width, + .height = dest_view->base.extent.height, .layers = 1 }, &fb); @@ -704,7 +704,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = dest->view.format->vk_format, + .format = dest_view->view.format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, -- cgit v1.2.3 From 6221593ff81a19129eee53a1cbded0d009425a38 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 11:45:35 -0700 Subject: vk/meta: Partially implement vkCmdCopy*, vkCmdBlit* for 3D images Partially implement the below functions for 3D images: vkCmdCopyBufferToImage vkCmdCopyImageToBuffer vkCmdCopyImage vkCmdBlitImage Not all features work, and there is much for performance improvement. Beware that vkCmdCopyImage and vkCmdBlitImage are untested. Crucible proves that vkCmdCopyBufferToImage and vkCmdCopyImageToBuffer works, though. Supported: - copy regions with z offset Unsupported: - copy regions with extent.depth > 1 Crucible test results on master@d452d2b are: pass: func.miptree.r8g8b8a8-unorm.*.view-3d.* pass: func.miptree.d32-sfloat.*.view-3d.* fail: func.miptree.s8-uint.*.view-3d.* --- src/vulkan/anv_meta.c | 304 +++++++++++++++++++++++++++++++++++------------ src/vulkan/anv_private.h | 7 +- src/vulkan/gen8_state.c | 2 +- 3 files changed, 235 insertions(+), 78 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 607054b92ab..aa48b978427 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -385,6 +385,43 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_restore(cmd_buffer, &saved_state); } +static VkImageViewType +meta_blit_get_src_image_view_type(const struct anv_image *src_image) +{ + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: + return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static uint32_t +meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, + const VkImageSubresource *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->arraySlice; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + static void anv_device_init_meta_blit_state(struct anv_device *device) { @@ -404,7 +441,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) } ); - VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + VkShaderModule fsm_2d = GLSL_VK_SHADER_MODULE(device, FRAGMENT, out vec4 f_color; in vec4 v_tex_coord; layout(set = 0, binding = 0) uniform sampler2D u_tex; @@ -414,6 +451,16 @@ anv_device_init_meta_blit_state(struct anv_device *device) } ); + VkShaderModule fsm_3d = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + out vec4 f_color; + in vec4 v_tex_coord; + layout(set = 0, binding = 0) uniform sampler3D u_tex; + void main() + { + f_color = texture(u_tex, v_tex_coord.xyz); + } + ); + VkShader vs; anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { @@ -422,13 +469,21 @@ anv_device_init_meta_blit_state(struct anv_device *device) .pName = "main", }, &vs); - VkShader fs; + VkShader fs_2d; anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = fsm, + .module = fsm_2d, .pName = "main", - }, &fs); + }, &fs_2d); + + VkShader fs_3d; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm_3d, + .pName = "main", + }, &fs_3d); VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -441,7 +496,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, { .binding = 1, - .strideInBytes = 16, + .strideInBytes = 5 * sizeof(float), .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX }, }, @@ -465,7 +520,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) /* Texture Coordinate */ .location = 2, .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, + .format = VK_FORMAT_R32G32B32_SFLOAT, .offsetInBytes = 8 } } @@ -494,61 +549,74 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, &device->meta_state.blit.pipeline_layout); - anv_graphics_pipeline_create(anv_device_to_handle(device), - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .pSpecializationInfo = NULL - }, { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .pSpecializationInfo = NULL - }, - }, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pRasterState = &(VkPipelineRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }, - .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = {0}, /* TEMPLATE VALUE! FILL ME IN! */ + .pSpecializationInfo = NULL }, - &(struct anv_graphics_pipeline_create_info) { - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, }, - &device->meta_state.blit.pipeline); + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].shader = fs_2d; + anv_graphics_pipeline_create(anv_device_to_handle(device), + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.blit.pipeline_2d_src); + + pipeline_shader_stages[1].shader = fs_3d; + anv_graphics_pipeline_create(anv_device_to_handle(device), + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.blit.pipeline_3d_src); anv_DestroyShaderModule(anv_device_to_handle(device), vsm); anv_DestroyShader(anv_device_to_handle(device), vs); - anv_DestroyShaderModule(anv_device_to_handle(device), fsm); - anv_DestroyShader(anv_device_to_handle(device), fs); + anv_DestroyShaderModule(anv_device_to_handle(device), fsm_2d); + anv_DestroyShader(anv_device_to_handle(device), fs_2d); + anv_DestroyShaderModule(anv_device_to_handle(device), fsm_3d); + anv_DestroyShader(anv_device_to_handle(device), fs_3d); } static void @@ -559,11 +627,6 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_save(cmd_buffer, saved_state); - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline); - /* We don't need anything here, only set if not already set. */ if (cmd_buffer->state.rs_state == NULL) anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), @@ -585,9 +648,11 @@ struct blit_region { static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, struct anv_image_view *src_view, VkOffset3D src_offset, VkExtent3D src_extent, + struct anv_image *dest_image, struct anv_color_attachment_view *dest_view, VkOffset3D dest_offset, VkExtent3D dest_extent) @@ -597,7 +662,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct blit_vb_data { float pos[2]; - float tex_coord[2]; + float tex_coord[3]; } *vb_data; unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); @@ -615,6 +680,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { (float)(src_offset.x + src_extent.width) / (float)src_view->extent.width, (float)(src_offset.y + src_extent.height) / (float)src_view->extent.height, + (float)(src_offset.z + src_extent.depth) / (float)src_view->extent.depth, }, }; @@ -626,6 +692,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { (float)src_offset.x / (float)src_view->extent.width, (float)(src_offset.y + src_extent.height) / (float)src_view->extent.height, + (float)(src_offset.z + src_extent.depth) / (float)src_view->extent.depth, }, }; @@ -637,6 +704,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { (float)src_offset.x / (float)src_view->extent.width, (float)src_offset.y / (float)src_view->extent.height, + (float)src_offset.z / (float)src_view->extent.depth, }, }; @@ -747,6 +815,28 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .pAttachmentClearValues = NULL, }, VK_RENDER_PASS_CONTENTS_INLINE); + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + anv_finishme("VK_IMAGE_TYPE_1D"); + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), anv_framebuffer_from_handle(fb)->vp_state); @@ -864,9 +954,11 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, cmd_buffer); meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), &src_view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), &dest_view, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); @@ -966,6 +1058,9 @@ void anv_CmdCopyImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); + const VkImageViewType src_view_type = + meta_blit_get_src_image_view_type(src_image); + struct anv_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -976,7 +1071,7 @@ void anv_CmdCopyImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = src_view_type, .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, @@ -994,6 +1089,20 @@ void anv_CmdCopyImage( }, cmd_buffer); + const VkOffset3D dest_offset = { + .x = pRegions[r].destOffset.x, + .y = pRegions[r].destOffset.y, + .z = 0, + }; + + const uint32_t dest_array_slice = + meta_blit_get_dest_view_base_array_slice(dest_image, + &pRegions[r].destSubresource, + &pRegions[r].destOffset); + + if (pRegions[r].extent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { @@ -1001,17 +1110,17 @@ void anv_CmdCopyImage( .image = destImage, .format = dest_image->format->vk_format, .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .baseArraySlice = dest_array_slice, .arraySize = 1, }, cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + src_image, &src_view, pRegions[r].srcOffset, pRegions[r].extent, - &dest_view, - pRegions[r].destOffset, + dest_image, &dest_view, + dest_offset, pRegions[r].extent); } @@ -1033,6 +1142,9 @@ void anv_CmdBlitImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); + const VkImageViewType src_view_type = + meta_blit_get_src_image_view_type(src_image); + struct anv_saved_state saved_state; anv_finishme("respect VkTexFilter"); @@ -1045,7 +1157,7 @@ void anv_CmdBlitImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = src_view_type, .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, @@ -1063,6 +1175,20 @@ void anv_CmdBlitImage( }, cmd_buffer); + const VkOffset3D dest_offset = { + .x = pRegions[r].destOffset.x, + .y = pRegions[r].destOffset.y, + .z = 0, + }; + + const uint32_t dest_array_slice = + meta_blit_get_dest_view_base_array_slice(dest_image, + &pRegions[r].destSubresource, + &pRegions[r].destOffset); + + if (pRegions[r].destExtent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { @@ -1070,17 +1196,17 @@ void anv_CmdBlitImage( .image = destImage, .format = dest_image->format->vk_format, .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .baseArraySlice = dest_array_slice, .arraySize = 1, }, cmd_buffer); meta_emit_blit(cmd_buffer, - &src_view, + src_image, &src_view, pRegions[r].srcOffset, pRegions[r].srcExtent, - &dest_view, - pRegions[r].destOffset, + dest_image, &dest_view, + dest_offset, pRegions[r].destExtent); } @@ -1181,6 +1307,20 @@ void anv_CmdCopyBufferToImage( }, cmd_buffer); + const VkOffset3D dest_offset = { + .x = pRegions[r].imageOffset.x, + .y = pRegions[r].imageOffset.y, + .z = 0, + }; + + const uint32_t dest_array_slice = + meta_blit_get_dest_view_base_array_slice(dest_image, + &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + if (pRegions[r].imageExtent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + struct anv_color_attachment_view dest_view; anv_color_attachment_view_init(&dest_view, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { @@ -1188,17 +1328,19 @@ void anv_CmdCopyBufferToImage( .image = anv_image_to_handle(dest_image), .format = proxy_format, .mipLevel = pRegions[r].imageSubresource.mipLevel, - .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .baseArraySlice = dest_array_slice, .arraySize = 1, }, cmd_buffer); meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), &src_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, + dest_image, &dest_view, - pRegions[r].imageOffset, + dest_offset, pRegions[r].imageExtent); anv_DestroyImage(vk_device, srcImage); @@ -1220,15 +1362,21 @@ void anv_CmdCopyImageToBuffer( VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_saved_state saved_state; + const VkImageViewType src_view_type = + meta_blit_get_src_image_view_type(src_image); + meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].imageExtent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + struct anv_image_view src_view; anv_image_view_init(&src_view, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = src_view_type, .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, @@ -1268,9 +1416,11 @@ void anv_CmdCopyImageToBuffer( cmd_buffer); meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), &src_view, pRegions[r].imageOffset, pRegions[r].imageExtent, + anv_image_from_handle(destImage), &dest_view, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); @@ -1501,7 +1651,9 @@ anv_device_finish_meta(struct anv_device *device) /* Blit */ anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline); + device->meta_state.blit.pipeline_2d_src); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src); anv_DestroyPipelineLayout(anv_device_to_handle(device), device->meta_state.blit.pipeline_layout); anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b290d60cc08..9115393ad4b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -356,7 +356,12 @@ struct anv_meta_state { } clear; struct { - VkPipeline pipeline; + /** Pipeline that blits from a 2D image. */ + VkPipeline pipeline_2d_src; + + /** Pipeline that blits from a 3D image. */ + VkPipeline pipeline_3d_src; + VkPipelineLayout pipeline_layout; VkDescriptorSetLayout ds_layout; } blit; diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 26509f1d817..4c9161c0c9c 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -363,7 +363,7 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, } struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_2D, + .SurfaceType = image->type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], -- cgit v1.2.3 From 622a317e4c6163190508fecec82111520a84015e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 14:37:09 -0700 Subject: vk/image: Teach vkCreateImage about layout of 1D surfaces Calling vkCreateImage() with VK_IMAGE_TYPE_1D now succeeds and computes the surface layout correctly. --- src/vulkan/anv_image.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f0577a50a94..dce4208ed5f 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -72,7 +72,7 @@ static const struct anv_surf_type_limits { int32_t height; int32_t depth; } anv_surf_type_limits[] = { - [SURFTYPE_1D] = {16384, 0, 2048}, + [SURFTYPE_1D] = {16384, 1, 2048}, [SURFTYPE_2D] = {16384, 16384, 2048}, [SURFTYPE_3D] = {2048, 2048, 2048}, [SURFTYPE_CUBE] = {16384, 16384, 340}, @@ -171,8 +171,13 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, switch (create_info->vk_info->imageType) { case VK_IMAGE_TYPE_1D: - anv_finishme("VK_IMAGE_TYPE_1D"); - break; + /* From the Broadwell PRM >> Memory Views >> Common Surface Formats >> + * Surface Layout >> 1D Surfaces: + * + * One-dimensional surfaces are identical to 2D surfaces with height of one. + * + * So fallthrough... + */ case VK_IMAGE_TYPE_2D: { const uint32_t w0 = align_u32(extent->width, i); const uint32_t h0 = align_u32(extent->height, j); @@ -267,9 +272,6 @@ anv_image_create(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D || - pCreateInfo->imageType == VK_IMAGE_TYPE_3D); anv_assert(pCreateInfo->mipLevels > 0); anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->samples == 1); -- cgit v1.2.3 From 85520aa0703126438b48e5ef42f8cded6cb4ab8b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Sep 2015 15:16:57 -0700 Subject: vk/image: Remove stale FINISHME for non-2D image views gen8_image_view_init() now supports 1D, 2D, and 3D image views. --- src/vulkan/gen8_state.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 4c9161c0c9c..5646637e4a0 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -161,9 +161,6 @@ gen8_image_view_init(struct anv_image_view *iview, const struct anv_image_view_info *view_type_info = anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - view->bo = image->bo; view->offset = image->offset + surface->offset; view->format = format_info; -- cgit v1.2.3 From 222ddac810fae965507b16702eb4e4c6eee97e16 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 15:59:23 -0700 Subject: anv: Document the index and offset parameters of anv_bo --- src/vulkan/anv_private.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 9115393ad4b..5e4fa35e208 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -206,8 +206,19 @@ anv_vector_finish(struct anv_vector *queue) struct anv_bo { int gem_handle; + + /* Index into the current validation list. This is used by the + * validation list building alrogithm to track which buffers are already + * in the validation list so that we can ensure uniqueness. + */ uint32_t index; + + /* Last known offset. This value is provided by the kernel when we + * execbuf and is used as the presumed offset for the next bunch of + * relocations. + */ uint64_t offset; + uint64_t size; void *map; }; -- cgit v1.2.3 From 74bf7aa07c0e87cafb1a9fb085a2fe99a548c8de Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 14:52:26 -0700 Subject: anv/allocator: Take the device mutex when growing a block pool We don't have any locking issues yet because we use the pool size itself as a mutex in block_pool_alloc to guarantee that only one thread is resizing at a time. However, we are about to add support for growing the block pool at both ends. This introduces two potential races: 1) You could have two block_pool_alloc() calls that both try to grow the block pool, one from each end. 2) The relocation handling code will now have to think about not only the bo that we use for the block pool but also the offset from the start of that bo to the center of the block pool. It's possible that the block pool growing code could race with the relocation handling code and get a bo and offset out of sync. Grabbing the device mutex solves both of these problems. Thanks to (2), we can't really do anything more granular. --- src/vulkan/anv_allocator.c | 18 +++++++++++++----- src/vulkan/tests/block_pool_no_free.c | 2 ++ src/vulkan/tests/state_pool.c | 4 ++++ src/vulkan/tests/state_pool_free_list_only.c | 2 ++ src/vulkan/tests/state_pool_no_free.c | 2 ++ 5 files changed, 23 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 121ce039250..201cc931cbb 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -287,6 +287,8 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) int gem_handle; struct anv_mmap_cleanup *cleanup; + pthread_mutex_lock(&pool->device->mutex); + if (old_size == 0) { size = 32 * pool->block_size; } else { @@ -295,17 +297,17 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) cleanup = anv_vector_add(&pool->mmap_cleanups); if (!cleanup) - return 0; + goto fail; *cleanup = ANV_MMAP_CLEANUP_INIT; if (old_size == 0) pool->fd = memfd_create("block pool", MFD_CLOEXEC); if (pool->fd == -1) - return 0; + goto fail; if (ftruncate(pool->fd, size) == -1) - return 0; + goto fail; /* First try to see if mremap can grow the map in place. */ map = MAP_FAILED; @@ -324,11 +326,11 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) cleanup->size = size; } if (map == MAP_FAILED) - return 0; + goto fail; gem_handle = anv_gem_userptr(pool->device, map, size); if (gem_handle == 0) - return 0; + goto fail; cleanup->gem_handle = gem_handle; /* Now that we successfull allocated everything, we can write the new @@ -339,7 +341,13 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) pool->bo.map = map; pool->bo.index = 0; + pthread_mutex_unlock(&pool->device->mutex); + return size; + +fail: + pthread_mutex_unlock(&pool->device->mutex); + return 0; } uint32_t diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c index 898a82b0909..d40504c4a87 100644 --- a/src/vulkan/tests/block_pool_no_free.c +++ b/src/vulkan/tests/block_pool_no_free.c @@ -51,6 +51,7 @@ static void run_test() struct anv_device device; struct anv_block_pool pool; + pthread_mutex_init(&device.mutex, NULL); anv_block_pool_init(&pool, &device, 16); for (unsigned i = 0; i < NUM_THREADS; i++) { @@ -95,6 +96,7 @@ static void run_test() } anv_block_pool_finish(&pool); + pthread_mutex_destroy(&device.mutex); } int main(int argc, char **argv) diff --git a/src/vulkan/tests/state_pool.c b/src/vulkan/tests/state_pool.c index e235ee9b394..878ec19a595 100644 --- a/src/vulkan/tests/state_pool.c +++ b/src/vulkan/tests/state_pool.c @@ -38,6 +38,8 @@ int main(int argc, char **argv) struct anv_block_pool block_pool; struct anv_state_pool state_pool; + pthread_mutex_init(&device.mutex, NULL); + for (unsigned i = 0; i < NUM_RUNS; i++) { anv_block_pool_init(&block_pool, &device, 256); anv_state_pool_init(&state_pool, &block_pool); @@ -50,4 +52,6 @@ int main(int argc, char **argv) anv_state_pool_finish(&state_pool); anv_block_pool_finish(&block_pool); } + + pthread_mutex_destroy(&device.mutex); } diff --git a/src/vulkan/tests/state_pool_free_list_only.c b/src/vulkan/tests/state_pool_free_list_only.c index 9e89cf6425f..2f4eb47fe45 100644 --- a/src/vulkan/tests/state_pool_free_list_only.c +++ b/src/vulkan/tests/state_pool_free_list_only.c @@ -37,6 +37,7 @@ int main(int argc, char **argv) struct anv_block_pool block_pool; struct anv_state_pool state_pool; + pthread_mutex_init(&device.mutex, NULL); anv_block_pool_init(&block_pool, &device, 4096); anv_state_pool_init(&state_pool, &block_pool); @@ -61,4 +62,5 @@ int main(int argc, char **argv) anv_state_pool_finish(&state_pool); anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); } diff --git a/src/vulkan/tests/state_pool_no_free.c b/src/vulkan/tests/state_pool_no_free.c index 4b3ca78974f..4b248c2ee66 100644 --- a/src/vulkan/tests/state_pool_no_free.c +++ b/src/vulkan/tests/state_pool_no_free.c @@ -58,6 +58,7 @@ static void run_test() struct anv_block_pool block_pool; struct anv_state_pool state_pool; + pthread_mutex_init(&device.mutex, NULL); anv_block_pool_init(&block_pool, &device, 64); anv_state_pool_init(&state_pool, &block_pool); @@ -106,6 +107,7 @@ static void run_test() anv_state_pool_finish(&state_pool); anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); } int main(int argc, char **argv) -- cgit v1.2.3 From 8c6bc1e85d4b2eebf90a5ac862d650c9973bb126 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 16:54:56 -0700 Subject: anv/allocator: Create 2GB memfd up-front for the block pool --- src/vulkan/anv_allocator.c | 21 ++++++++++++--------- src/vulkan/anv_private.h | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 201cc931cbb..1b116a3a49a 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -254,6 +254,18 @@ anv_block_pool_init(struct anv_block_pool *pool, pool->bo.offset = 0; pool->block_size = block_size; pool->free_list = ANV_FREE_LIST_EMPTY; + + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + if (pool->fd == -1) + return; + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) + return; + anv_vector_init(&pool->mmap_cleanups, round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); @@ -300,15 +312,6 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) goto fail; *cleanup = ANV_MMAP_CLEANUP_INIT; - if (old_size == 0) - pool->fd = memfd_create("block pool", MFD_CLOEXEC); - - if (pool->fd == -1) - goto fail; - - if (ftruncate(pool->fd, size) == -1) - goto fail; - /* First try to see if mremap can grow the map in place. */ map = MAP_FAILED; if (old_size > 0) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5e4fa35e208..5931e0af98d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -268,6 +268,9 @@ struct anv_block_pool { struct anv_block_state state; }; +/* Block pools are backed by a fixed-size 2GB memfd */ +#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) + static inline uint32_t anv_block_pool_size(struct anv_block_pool *pool) { -- cgit v1.2.3 From c55fa89251a1188b312aa09ba260cba7a411a282 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 15:00:43 -0700 Subject: anv/allocator: Use a signed 32-bit offset for the free list This has the unfortunate side-effect of making it so that we can't have a block pool bigger than 1GB. However, that's unlikely to happen and, for the sake of bi-directional block pools, we need to negative offsets. --- src/vulkan/anv_allocator.c | 23 ++++++++++++++++------- src/vulkan/anv_private.h | 2 +- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 1b116a3a49a..6c7c85d5e74 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -151,7 +151,7 @@ round_to_power_of_two(uint32_t value) } static bool -anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) +anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) { union anv_free_list current, new, old; @@ -164,7 +164,7 @@ anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) */ __sync_synchronize(); - uint32_t *next_ptr = *map + current.offset; + int32_t *next_ptr = *map + current.offset; new.offset = VG_NOACCESS_READ(next_ptr); new.count = current.count + 1; old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); @@ -179,10 +179,10 @@ anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) } static void -anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) +anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) { union anv_free_list current, old, new; - uint32_t *next_ptr = map + offset; + int32_t *next_ptr = map + offset; old = *list; do { @@ -307,6 +307,12 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) size = old_size * 2; } + /* We can't have a block pool bigger than 1GB because we use signed + * 32-bit offsets in the free list and we don't want overflow. We + * should never need a block pool bigger than 1GB anyway. + */ + assert(size <= (1u << 31)); + cleanup = anv_vector_add(&pool->mmap_cleanups); if (!cleanup) goto fail; @@ -356,11 +362,12 @@ fail: uint32_t anv_block_pool_alloc(struct anv_block_pool *pool) { - uint32_t offset; + int32_t offset; struct anv_block_state state, old, new; /* Try free list first. */ if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(offset >= 0); assert(pool->map); return offset; } @@ -411,12 +418,14 @@ static uint32_t anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, struct anv_block_pool *block_pool) { - uint32_t offset; + int32_t offset; struct anv_block_state block, old, new; /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { + assert(offset >= 0); return offset; + } /* If free list was empty (or somebody raced us and took the items) we * allocate a new item from the end of the block */ diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5931e0af98d..ef7c7083bb0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -229,7 +229,7 @@ struct anv_bo { */ union anv_free_list { struct { - uint32_t offset; + int32_t offset; /* A simple count that is incremented every time the head changes. */ uint32_t count; -- cgit v1.2.3 From 55daed947d3a0a7802733443a5f922dcc28a5770 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 17:43:47 -0700 Subject: vk/allocator: Split block_pool_alloc into two functions --- src/vulkan/anv_allocator.c | 53 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 6c7c85d5e74..6393233f0c4 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -359,11 +359,40 @@ fail: return 0; } +static uint32_t +anv_block_pool_alloc_new(struct anv_block_pool *pool, + struct anv_block_state *pool_state) +{ + struct anv_block_state state, old, new; + + while (1) { + state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); + if (state.next < state.end) { + assert(pool->map); + return state.next; + } else if (state.next == state.end) { + /* We allocated the first block outside the pool, we have to grow it. + * pool->next_block acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, state.end); + assert(new.end > 0); + old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); + if (old.next != state.next) + futex_wake(&pool_state->end, INT_MAX); + return state.next; + } else { + futex_wait(&pool_state->end, state.end); + continue; + } + } +} + uint32_t anv_block_pool_alloc(struct anv_block_pool *pool) { int32_t offset; - struct anv_block_state state, old, new; /* Try free list first. */ if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { @@ -372,27 +401,7 @@ anv_block_pool_alloc(struct anv_block_pool *pool) return offset; } - restart: - state.u64 = __sync_fetch_and_add(&pool->state.u64, pool->block_size); - if (state.next < state.end) { - assert(pool->map); - return state.next; - } else if (state.next == state.end) { - /* We allocated the first block outside the pool, we have to grow it. - * pool->next_block acts a mutex: threads who try to allocate now will - * get block indexes above the current limit and hit futex_wait - * below. */ - new.next = state.next + pool->block_size; - new.end = anv_block_pool_grow(pool, state.end); - assert(new.end > 0); - old.u64 = __sync_lock_test_and_set(&pool->state.u64, new.u64); - if (old.next != state.next) - futex_wake(&pool->state.end, INT_MAX); - return state.next; - } else { - futex_wait(&pool->state.end, state.end); - goto restart; - } + return anv_block_pool_alloc_new(pool, &pool->state); } void -- cgit v1.2.3 From 15624fcf55bff9d16f3eaa461e4a3010bbe0e4ba Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 17:57:40 -0700 Subject: anv/tests: Refactor the block_pool_no_free test This simply breaks the monotonicity check out into its own function --- src/vulkan/tests/block_pool_no_free.c | 49 +++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c index d40504c4a87..71eb90103ef 100644 --- a/src/vulkan/tests/block_pool_no_free.c +++ b/src/vulkan/tests/block_pool_no_free.c @@ -46,23 +46,8 @@ static void *alloc_blocks(void *_job) return NULL; } -static void run_test() +static void validate_monotonic(uint32_t **blocks) { - struct anv_device device; - struct anv_block_pool pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&pool, &device, 16); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = &pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); - /* A list of indices, one per thread */ unsigned next[NUM_THREADS]; memset(next, 0, sizeof(next)); @@ -76,8 +61,8 @@ static void run_test() if (next[i] >= BLOCKS_PER_THREAD) continue; - if (thread_max < jobs[i].blocks[next[i]]) { - thread_max = jobs[i].blocks[next[i]]; + if (thread_max < blocks[i][next[i]]) { + thread_max = blocks[i][next[i]]; max_thread_idx = i; } } @@ -89,11 +74,35 @@ static void run_test() break; /* That next element had better be higher than the previous highest */ - assert(jobs[max_thread_idx].blocks[next[max_thread_idx]] > highest); + assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); - highest = jobs[max_thread_idx].blocks[next[max_thread_idx]]; + highest = blocks[max_thread_idx][next[max_thread_idx]]; next[max_thread_idx]++; } +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + uint32_t *block_ptrs[NUM_THREADS]; + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].blocks; + + validate_monotonic(block_ptrs); anv_block_pool_finish(&pool); pthread_mutex_destroy(&device.mutex); -- cgit v1.2.3 From 5f57ff7e18c1c545aafcdc267bc22594cef81d3c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 17:46:09 -0700 Subject: anv/allocator: Make the block pool double-ended This allows us to allocate from either side of the block pool in a consistent way. If you use the previous block_pool_alloc function, you will get offsets from the start of the pool as normal. If you use the new block_pool_alloc_back function, you will get a negative index that corresponds to something in the "back" of the pool. --- src/vulkan/anv_allocator.c | 171 +++++++++++++++++++++++++++++++--- src/vulkan/anv_private.h | 34 ++++++- src/vulkan/tests/block_pool_no_free.c | 11 ++- 3 files changed, 197 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 6393233f0c4..57b42eb4202 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -241,7 +241,7 @@ anv_ptr_free_list_push(void **list, void *elem) } static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size); +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); void anv_block_pool_init(struct anv_block_pool *pool, @@ -252,8 +252,10 @@ anv_block_pool_init(struct anv_block_pool *pool, pool->device = device; pool->bo.gem_handle = 0; pool->bo.offset = 0; + pool->bo.size = 0; pool->block_size = block_size; pool->free_list = ANV_FREE_LIST_EMPTY; + pool->back_free_list = ANV_FREE_LIST_EMPTY; pool->fd = memfd_create("block pool", MFD_CLOEXEC); if (pool->fd == -1) @@ -269,9 +271,13 @@ anv_block_pool_init(struct anv_block_pool *pool, anv_vector_init(&pool->mmap_cleanups, round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); - /* Immediately grow the pool so we'll have a backing bo. */ pool->state.next = 0; - pool->state.end = anv_block_pool_grow(pool, 0); + pool->state.end = 0; + pool->back_state.next = 0; + pool->back_state.end = 0; + + /* Immediately grow the pool so we'll have a backing bo. */ + pool->state.end = anv_block_pool_grow(pool, &pool->state); } void @@ -291,8 +297,34 @@ anv_block_pool_finish(struct anv_block_pool *pool) close(pool->fd); } +#define PAGE_SIZE 4096 + +/** Grows and re-centers the block pool. + * + * We grow the block pool in one or both directions in such a way that the + * following conditions are met: + * + * 1) The size of the entire pool is always a power of two. + * + * 2) The pool only grows on both ends. Neither end can get + * shortened. + * + * 3) At the end of the allocation, we have about twice as much space + * allocated for each end as we have used. This way the pool doesn't + * grow too far in one direction or the other. + * + * 4) If the _alloc_back() has never been called, then the back portion of + * the pool retains a size of zero. (This makes it easier for users of + * the block pool that only want a one-sided pool.) + * + * 5) We have enough space allocated for at least one more block in + * whichever side `state` points to. + * + * 6) The center of the pool is always aligned to both the block_size of + * the pool and a 4K CPU page. + */ static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) { size_t size; void *map; @@ -301,8 +333,39 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) pthread_mutex_lock(&pool->device->mutex); + assert(state == &pool->state || state == &pool->back_state); + + /* Gather a little usage information on the pool. Since we may have + * threadsd waiting in queue to get some storage while we resize, it's + * actually possible that total_used will be larger than old_size. In + * particular, block_pool_alloc() increments state->next prior to + * calling block_pool_grow, so this ensures that we get enough space for + * which ever side tries to grow the pool. + * + * We align to a page size because it makes it easier to do our + * calculations later in such a way that we state page-aigned. + */ + uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); + uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); + uint32_t total_used = front_used + back_used; + + assert(state == &pool->state || back_used > 0); + + size_t old_size = pool->bo.size; + + if (old_size != 0 && + back_used * 2 <= pool->center_bo_offset && + front_used * 2 <= (old_size - pool->center_bo_offset)) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + if (old_size == 0) { - size = 32 * pool->block_size; + /* This is the first allocation */ + size = MAX2(32 * pool->block_size, PAGE_SIZE); } else { size = old_size * 2; } @@ -313,6 +376,35 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) */ assert(size <= (1u << 31)); + /* We compute a new center_bo_offset such that, when we double the size + * of the pool, we maintain the ratio of how much is used by each side. + * This way things should remain more-or-less balanced. + */ + uint32_t center_bo_offset; + if (back_used == 0) { + /* If we're in this case then we have never called alloc_back(). In + * this case, we want keep the offset at 0 to make things as simple + * as possible for users that don't care about back allocations. + */ + center_bo_offset = 0; + } else { + center_bo_offset = ((uint64_t)size * back_used) / total_used; + + /* Align down to a multiple of both the block size and page size */ + uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); + assert(util_is_power_of_two(granularity)); + center_bo_offset &= ~(granularity - 1); + + assert(center_bo_offset >= back_used); + } + + assert(center_bo_offset % pool->block_size == 0); + assert(center_bo_offset % PAGE_SIZE == 0); + + /* Assert that we only ever grow the pool */ + assert(center_bo_offset >= pool->back_state.end); + assert(size - center_bo_offset >= pool->back_state.end); + cleanup = anv_vector_add(&pool->mmap_cleanups); if (!cleanup) goto fail; @@ -320,7 +412,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) /* First try to see if mremap can grow the map in place. */ map = MAP_FAILED; - if (old_size > 0) + if (old_size > 0 && center_bo_offset == 0) map = mremap(pool->map, old_size, size, 0); if (map == MAP_FAILED) { /* Just leak the old map until we destroy the pool. We can't munmap it @@ -330,7 +422,8 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) * should try to get some numbers. */ map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pool->fd, 0); + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); cleanup->map = map; cleanup->size = size; } @@ -344,18 +437,30 @@ anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) /* Now that we successfull allocated everything, we can write the new * values back into pool. */ - pool->map = map; + pool->map = map + center_bo_offset; + pool->center_bo_offset = center_bo_offset; pool->bo.gem_handle = gem_handle; pool->bo.size = size; pool->bo.map = map; pool->bo.index = 0; +done: pthread_mutex_unlock(&pool->device->mutex); - return size; + /* Return the appropreate new size. This function never actually + * updates state->next. Instead, we let the caller do that because it + * needs to do so in order to maintain its concurrency model. + */ + if (state == &pool->state) { + return pool->bo.size - pool->center_bo_offset; + } else { + assert(pool->center_bo_offset > 0); + return pool->center_bo_offset; + } fail: pthread_mutex_unlock(&pool->device->mutex); + return 0; } @@ -372,12 +477,12 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool, return state.next; } else if (state.next == state.end) { /* We allocated the first block outside the pool, we have to grow it. - * pool->next_block acts a mutex: threads who try to allocate now will + * pool_state->next acts a mutex: threads who try to allocate now will * get block indexes above the current limit and hit futex_wait * below. */ new.next = state.next + pool->block_size; - new.end = anv_block_pool_grow(pool, state.end); - assert(new.end > 0); + new.end = anv_block_pool_grow(pool, pool_state); + assert(new.end >= new.next && new.end % pool->block_size == 0); old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); if (old.next != state.next) futex_wake(&pool_state->end, INT_MAX); @@ -389,7 +494,7 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool, } } -uint32_t +int32_t anv_block_pool_alloc(struct anv_block_pool *pool) { int32_t offset; @@ -404,10 +509,46 @@ anv_block_pool_alloc(struct anv_block_pool *pool) return anv_block_pool_alloc_new(pool, &pool->state); } +/* Allocates a block out of the back of the block pool. + * + * This will allocated a block earlier than the "start" of the block pool. + * The offsets returned from this function will be negative but will still + * be correct relative to the block pool's map pointer. + * + * If you ever use anv_block_pool_alloc_back, then you will have to do + * gymnastics with the block pool's BO when doing relocations. + */ +int32_t +anv_block_pool_alloc_back(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { + assert(offset < 0); + assert(pool->map); + return offset; + } + + offset = anv_block_pool_alloc_new(pool, &pool->back_state); + + /* The offset we get out of anv_block_pool_alloc_new() is actually the + * number of bytes downwards from the middle to the end of the block. + * We need to turn it into a (negative) offset from the middle to the + * start of the block. + */ + assert(offset >= 0); + return -(offset + pool->block_size); +} + void -anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset) +anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) { - anv_free_list_push(&pool->free_list, pool->map, offset); + if (offset < 0) { + anv_free_list_push(&pool->back_free_list, pool->map, offset); + } else { + anv_free_list_push(&pool->free_list, pool->map, offset); + } } static void diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ef7c7083bb0..667f9ddbf3c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -253,6 +253,25 @@ struct anv_block_pool { struct anv_device *device; struct anv_bo bo; + + /* Offset from the start of the memfd to the "center" of the block pool. */ + uint32_t center_fd_offset; + + /* The offset from the start of the bo to the "center" of the block + * pool. Pointers to allocated blocks are given by + * bo.map + center_bo_offset + offsets. + */ + uint32_t center_bo_offset; + + /* Current memory map of the block pool. This pointer may or may not + * point to the actual beginning of the block pool memory. If + * anv_block_pool_alloc_back has ever been called, then this pointer + * will point to the "center" position of the buffer and all offsets + * (negative or positive) given out by the block pool alloc functions + * will be valid relative to this pointer. + * + * In particular, map == bo.map + center_offset + */ void *map; int fd; @@ -266,15 +285,23 @@ struct anv_block_pool { union anv_free_list free_list; struct anv_block_state state; + + union anv_free_list back_free_list; + struct anv_block_state back_state; }; /* Block pools are backed by a fixed-size 2GB memfd */ #define BLOCK_POOL_MEMFD_SIZE (1ull << 32) +/* The center of the block pool is also the middle of the memfd. This may + * change in the future if we decide differently for some reason. + */ +#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) + static inline uint32_t anv_block_pool_size(struct anv_block_pool *pool) { - return pool->state.end; + return pool->state.end + pool->back_state.end; } struct anv_state { @@ -309,8 +336,9 @@ struct anv_state_stream { void anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size); void anv_block_pool_finish(struct anv_block_pool *pool); -uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); -void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); +int32_t anv_block_pool_alloc(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); void anv_state_pool_init(struct anv_state_pool *pool, struct anv_block_pool *block_pool); void anv_state_pool_finish(struct anv_state_pool *pool); diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c index 71eb90103ef..01c23e21b2d 100644 --- a/src/vulkan/tests/block_pool_no_free.c +++ b/src/vulkan/tests/block_pool_no_free.c @@ -34,14 +34,18 @@ struct job { unsigned id; struct anv_block_pool *pool; uint32_t blocks[BLOCKS_PER_THREAD]; + uint32_t back_blocks[BLOCKS_PER_THREAD]; } jobs[NUM_THREADS]; + static void *alloc_blocks(void *_job) { struct job *job = _job; - for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { job->blocks[i] = anv_block_pool_alloc(job->pool); + job->back_blocks[i] = -anv_block_pool_alloc_back(job->pool); + } return NULL; } @@ -98,10 +102,15 @@ static void run_test() for (unsigned i = 0; i < NUM_THREADS; i++) pthread_join(jobs[i].thread, NULL); + /* Validate that the block allocations were monotonic */ uint32_t *block_ptrs[NUM_THREADS]; for (unsigned i = 0; i < NUM_THREADS; i++) block_ptrs[i] = jobs[i].blocks; + validate_monotonic(block_ptrs); + /* Validate that the back block allocations were monotonic */ + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].back_blocks; validate_monotonic(block_ptrs); anv_block_pool_finish(&pool); -- cgit v1.2.3 From dcf424c98cf32597e83cbc87eb5d62909a5bf481 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Sep 2015 21:37:01 -0700 Subject: anv/tests: Add some asserts for data integrity in block_pool_no_free --- src/vulkan/tests/block_pool_no_free.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c index 01c23e21b2d..86d1a76151f 100644 --- a/src/vulkan/tests/block_pool_no_free.c +++ b/src/vulkan/tests/block_pool_no_free.c @@ -41,10 +41,30 @@ struct job { static void *alloc_blocks(void *_job) { struct job *job = _job; + int32_t block, *data; for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { - job->blocks[i] = anv_block_pool_alloc(job->pool); - job->back_blocks[i] = -anv_block_pool_alloc_back(job->pool); + block = anv_block_pool_alloc(job->pool); + data = job->pool->map + block; + *data = block; + assert(block >= 0); + job->blocks[i] = block; + + block = anv_block_pool_alloc_back(job->pool); + data = job->pool->map + block; + *data = block; + assert(block < 0); + job->back_blocks[i] = -block; + } + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = job->blocks[i]; + data = job->pool->map + block; + assert(*data == block); + + block = -job->back_blocks[i]; + data = job->pool->map + block; + assert(*data == block); } return NULL; -- cgit v1.2.3 From b5f6889648488d735e920a630917ffa17ff3691f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Sep 2015 11:19:16 -0700 Subject: vk/device: Don't allow device or instance creation with invalid extensions --- src/vulkan/anv_device.c | 55 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index cf93cd1a6eb..a1c12e0dd17 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -110,6 +110,21 @@ static const VkAllocCallbacks default_alloc_callbacks = { .pfnFree = default_free }; +static const VkExtensionProperties global_extensions[] = { + { + .extName = "VK_WSI_swapchain", + .specVersion = 12 + }, +}; + +static const VkExtensionProperties device_extensions[] = { + { + .extName = "VK_WSI_device_swapchain", + .specVersion = 12 + }, +}; + + VkResult anv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance) @@ -120,6 +135,19 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + for (uint32_t i = 0; i < pCreateInfo->extensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + global_extensions[j].extName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_INVALID_EXTENSION); + } + if (pCreateInfo->pAllocCb) { alloc_callbacks = pCreateInfo->pAllocCb; user_data = pCreateInfo->pAllocCb->pUserData; @@ -546,6 +574,19 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + for (uint32_t i = 0; i < pCreateInfo->extensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + device_extensions[j].extName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_INVALID_EXTENSION); + } + anv_set_dispatch_gen(physical_device->info->gen); device = anv_instance_alloc(instance, sizeof(*device), 8, @@ -636,13 +677,6 @@ VkResult anv_DestroyDevice( return VK_SUCCESS; } -static const VkExtensionProperties global_extensions[] = { - { - .extName = "VK_WSI_swapchain", - .specVersion = 12 - }, -}; - VkResult anv_GetGlobalExtensionProperties( const char* pLayerName, uint32_t* pCount, @@ -661,13 +695,6 @@ VkResult anv_GetGlobalExtensionProperties( return VK_SUCCESS; } -static const VkExtensionProperties device_extensions[] = { - { - .extName = "VK_WSI_device_swapchain", - .specVersion = 12 - }, -}; - VkResult anv_GetPhysicalDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, -- cgit v1.2.3 From 3b8aa26b8eedc415d93bc4f2099d39736e5249b2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Sep 2015 12:36:23 -0700 Subject: anv/formats: Properly report depth-stencil formats --- src/vulkan/anv_formats.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index f5d00a0f8ff..516281f2274 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -265,25 +265,30 @@ VkResult anv_GetPhysicalDeviceFormatProperties( if (format->surface_format == UNSUPPORTED) goto unsupported; - info = &surface_formats[format->surface_format]; - if (!info->exists) - goto unsupported; - uint32_t linear = 0, tiled = 0; - if (info->sampling <= gen) { - linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - } - if (info->render_target <= gen) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - } - if (info->alpha_blend <= gen) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - } - if (info->input_vb <= gen) { - linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + if (anv_format_is_depth_or_stencil(format)) { + tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + } else { + /* The surface_formats table only contains color formats */ + info = &surface_formats[format->surface_format]; + if (!info->exists) + goto unsupported; + + if (info->sampling <= gen) { + linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + } + if (info->render_target <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + if (info->alpha_blend <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + if (info->input_vb <= gen) { + linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + } } pFormatProperties->linearTilingFeatures = linear; -- cgit v1.2.3 From d616493953da7e349967a78c6fc671d35c1b788e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Sep 2015 16:28:42 -0700 Subject: anv/meta: Pass the depth through the clear vertex shader It shouldn't matter since we shut off the VS but it's at least clearer. --- src/vulkan/anv_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index aa48b978427..4f70e573008 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -39,13 +39,13 @@ anv_device_init_meta_clear_state(struct anv_device *device) * does not dead-code our inputs. */ VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, - in vec2 a_pos; + in vec3 a_pos; in vec4 a_color; flat out vec4 v_color; void main() { v_color = a_color; - gl_Position = vec4(a_pos, 0, 1); + gl_Position = vec4(a_pos, 1); } ); -- cgit v1.2.3 From 595e6cacf1ca953dfa41e497255a4c3c21b0e14d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Sep 2015 14:24:50 -0700 Subject: meta: Initial support for packing parameters Probably incomplete but it should do for now --- src/vulkan/anv_meta.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 4f70e573008..b850305913a 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1219,10 +1219,12 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, { ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); - if (copy->bufferRowLength != 0) - anv_finishme("bufferRowLength not supported in CopyBufferToImage"); - if (copy->bufferImageHeight != 0) - anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; VkImage vk_image; VkResult result = anv_CreateImage(vk_device, @@ -1230,11 +1232,7 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .format = format, - .extent = { - .width = copy->imageExtent.width, - .height = copy->imageExtent.height, - .depth = 1, - }, + .extent = extent, .mipLevels = 1, .arraySize = 1, .samples = 1, -- cgit v1.2.3 From a788e7c659e86dec5430310e634401819b7d44f9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Sep 2015 18:23:21 -0700 Subject: anv/device: Move mutex initialization to befor block pools --- src/vulkan/anv_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a1c12e0dd17..9d3b5dbfa0a 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -605,6 +605,8 @@ VkResult anv_CreateDevice( if (device->context_id == -1) goto fail_fd; + pthread_mutex_init(&device->mutex, NULL); + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); @@ -624,8 +626,6 @@ VkResult anv_CreateDevice( device->compiler = anv_compiler_create(device); - pthread_mutex_init(&device->mutex, NULL); - anv_queue_init(device, &device->queue); anv_device_init_meta(device); -- cgit v1.2.3 From 616db92b01deb93674dff24eceb254ef780fbae5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Sep 2015 19:54:18 -0700 Subject: nir/spirv: Add better location handling Previously, our location handling was focussed on either no location (usually implicit 0) or a builting. Unfortunately, if you gave it a location, it would blow it away and just not care. This worked fine with crucible and our meta shaders but didn't work with the CTS. The new code uses the "data.explicit_location" field to denote that it has a "final" location (usually from a builtin) and, otherwise, the location is considered to be relative to the base for that shader stage. --- src/glsl/nir/spirv_to_nir.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e6fe74de9e8..60e7a338ee1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -760,7 +760,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, var->data.read_only = true; break; case SpvDecorationLocation: - var->data.explicit_location = true; var->data.location = dec->literals[0]; break; case SpvDecorationComponent: @@ -781,6 +780,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, nir_variable_mode mode; vtn_get_builtin_location(dec->literals[0], &var->data.location, &mode); + var->data.explicit_location = true; var->data.mode = mode; if (mode == nir_var_shader_in || mode == nir_var_system_value) var->data.read_only = true; @@ -830,6 +830,7 @@ get_builtin_variable(struct vtn_builder *b, nir_variable_mode mode; vtn_get_builtin_location(builtin, &var->data.location, &mode); + var->data.explicit_location = true; var->data.mode = mode; var->name = ralloc_strdup(var, "builtin"); @@ -1282,23 +1283,27 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, val->deref = nir_deref_var_create(b, var); val->deref_type = type; - if (b->execution_model == SpvExecutionModelFragment && - var->data.mode == nir_var_shader_out) { - var->data.location += FRAG_RESULT_DATA0; - } else if (b->execution_model == SpvExecutionModelVertex && - var->data.mode == nir_var_shader_in) { - var->data.location += VERT_ATTRIB_GENERIC0; - } else if (var->data.mode == nir_var_shader_in || - var->data.mode == nir_var_shader_out) { - var->data.location += VARYING_SLOT_VAR0; - } - - /* We handle decorations last because decorations might cause us to - * over-write other things such as the variable's location and we want - * those changes to stick. + /* We handle decorations first because decorations might give us + * location information. We use the data.explicit_location field to + * note that the location provided is the "final" location. If + * data.explicit_location == false, this means that it's relative to + * whatever the base location is. */ vtn_foreach_decoration(b, val, var_decoration_cb, var); + if (!var->data.explicit_location) { + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + } + /* If this was a uniform block, then we're not going to actually use the * variable (we're only going to use it to compute offsets), so don't * declare it in the shader. -- cgit v1.2.3 From 041f5ea0896c9f0f7dd44d36749c4558693d9412 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Sep 2015 16:21:56 -0700 Subject: anv/meta: Add location specifiers to meta shaders --- src/vulkan/anv_meta.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index b850305913a..3ab1ceabb92 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -39,9 +39,9 @@ anv_device_init_meta_clear_state(struct anv_device *device) * does not dead-code our inputs. */ VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, - in vec3 a_pos; - in vec4 a_color; - flat out vec4 v_color; + layout(location = 0) in vec3 a_pos; + layout(location = 1) in vec4 a_color; + layout(location = 0) flat out vec4 v_color; void main() { v_color = a_color; @@ -50,8 +50,8 @@ anv_device_init_meta_clear_state(struct anv_device *device) ); VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - out vec4 f_color; - flat in vec4 v_color; + layout(location = 0) out vec4 f_color; + layout(location = 0) flat in vec4 v_color; void main() { f_color = v_color; @@ -431,9 +431,9 @@ anv_device_init_meta_blit_state(struct anv_device *device) * does not dead-code our inputs. */ VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, - in vec2 a_pos; - in vec2 a_tex_coord; - out vec4 v_tex_coord; + layout(location = 0) in vec2 a_pos; + layout(location = 1) in vec2 a_tex_coord; + layout(location = 0) out vec4 v_tex_coord; void main() { v_tex_coord = vec4(a_tex_coord, 0, 1); @@ -442,8 +442,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) ); VkShaderModule fsm_2d = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - out vec4 f_color; - in vec4 v_tex_coord; + layout(location = 0) out vec4 f_color; + layout(location = 0) in vec4 v_tex_coord; layout(set = 0, binding = 0) uniform sampler2D u_tex; void main() { @@ -452,8 +452,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) ); VkShaderModule fsm_3d = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - out vec4 f_color; - in vec4 v_tex_coord; + layout(location = 0) out vec4 f_color; + layout(location = 0) in vec4 v_tex_coord; layout(set = 0, binding = 0) uniform sampler3D u_tex; void main() { -- cgit v1.2.3 From 76be58efce858614d10709ac2952329495d1ed8f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Sep 2015 17:16:40 -0700 Subject: anv/batch_chain: Clean up the reloc list swapping code --- src/vulkan/anv_batch_chain.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 77a5bd6f9ec..b8f08b9cd8a 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -895,21 +895,19 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) */ if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { uint32_t idx = first_batch_bo->bo.index; + uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; struct drm_i915_gem_exec_object2 tmp_obj = cmd_buffer->execbuf2.objects[idx]; assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); - cmd_buffer->execbuf2.objects[idx] = - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx] = - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; + cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; cmd_buffer->execbuf2.bos[idx]->index = idx; - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = - &first_batch_bo->bo; - first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; + cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; + first_batch_bo->bo.index = last_idx; } /* Now we go through and fixup all of the relocation lists to point to -- cgit v1.2.3 From 429665823d6f9ed401d9a74cd77877619d0ff782 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Sep 2015 14:20:04 -0700 Subject: anv/allocator: Do a better job of centering bi-directional block pools --- src/vulkan/anv_allocator.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 57b42eb4202..cbd2b6d0f51 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -388,6 +388,9 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) */ center_bo_offset = 0; } else { + /* Try to "center" the allocation based on how much is currently in + * use on each side of the center line. + */ center_bo_offset = ((uint64_t)size * back_used) / total_used; /* Align down to a multiple of both the block size and page size */ @@ -396,6 +399,14 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) center_bo_offset &= ~(granularity - 1); assert(center_bo_offset >= back_used); + + /* Make sure we don't shrink the back end of the pool */ + if (center_bo_offset < pool->back_state.end) + center_bo_offset = pool->back_state.end; + + /* Make sure that we don't shrink the front end of the pool */ + if (size - center_bo_offset < pool->state.end) + center_bo_offset = size - pool->state.end; } assert(center_bo_offset % pool->block_size == 0); @@ -403,7 +414,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) /* Assert that we only ever grow the pool */ assert(center_bo_offset >= pool->back_state.end); - assert(size - center_bo_offset >= pool->back_state.end); + assert(size - center_bo_offset >= pool->state.end); cleanup = anv_vector_add(&pool->mmap_cleanups); if (!cleanup) -- cgit v1.2.3 From 99e62f5ce892367ec50a66d946c21ba6d709a5d4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Sep 2015 14:21:16 -0700 Subject: anv/allocator: Delete the unused center_fd_offset from anv_block_pool --- src/vulkan/anv_private.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 667f9ddbf3c..fa910064945 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -254,9 +254,6 @@ struct anv_block_pool { struct anv_bo bo; - /* Offset from the start of the memfd to the "center" of the block pool. */ - uint32_t center_fd_offset; - /* The offset from the start of the bo to the "center" of the block * pool. Pointers to allocated blocks are given by * bo.map + center_bo_offset + offsets. -- cgit v1.2.3 From e1a7c721d34158f94f6b6dee94a211d32e9f5752 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Sep 2015 14:20:23 -0700 Subject: anv/allocator: Don't ever call mremap This has always been a bit sketchy and neither Kristian nor I have ever really liked it. --- src/vulkan/anv_allocator.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index cbd2b6d0f51..05126305b51 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -421,23 +421,18 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) goto fail; *cleanup = ANV_MMAP_CLEANUP_INIT; - /* First try to see if mremap can grow the map in place. */ - map = MAP_FAILED; - if (old_size > 0 && center_bo_offset == 0) - map = mremap(pool->map, old_size, size, 0); - if (map == MAP_FAILED) { - /* Just leak the old map until we destroy the pool. We can't munmap it - * without races or imposing locking on the block allocate fast path. On - * the whole the leaked maps adds up to less than the size of the - * current map. MAP_POPULATE seems like the right thing to do, but we - * should try to get some numbers. - */ - map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pool->fd, - BLOCK_POOL_MEMFD_CENTER - center_bo_offset); - cleanup->map = map; - cleanup->size = size; - } + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); + cleanup->map = map; + cleanup->size = size; + if (map == MAP_FAILED) goto fail; -- cgit v1.2.3 From bc17f9c9d76dbf2cdc657cfb6861b887dbca3046 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Sep 2015 10:58:34 -0700 Subject: anv/cmd_buffer: Add a helper for getting the surface state base address --- src/vulkan/anv_batch_chain.c | 9 +++++++++ src/vulkan/anv_private.h | 2 ++ src/vulkan/gen7_cmd_buffer.c | 2 +- src/vulkan/gen8_cmd_buffer.c | 2 +- 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index b8f08b9cd8a..6c37ce0aca3 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -395,6 +395,15 @@ anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; } +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + return (struct anv_address) { + .bo = anv_cmd_buffer_current_surface_bo(cmd_buffer), + .offset = 0, + }; +} + static void emit_batch_buffer_start(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index fa910064945..2e8bfa2c1d6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -897,6 +897,8 @@ struct anv_bo * anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer); struct anv_reloc_list * anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer); +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index b149673f405..95d1e28698f 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -48,7 +48,7 @@ gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .GeneralStateAccessUpperBound = { scratch_bo, scratch_bo->size }, .GeneralStateAccessUpperBoundModifyEnable = true, - .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), .SurfaceStateMemoryObjectControlState = GEN7_MOCS, .SurfaceStateBaseAddressModifyEnable = true, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 5737879ffe7..5018b6775d9 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -824,7 +824,7 @@ gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSizeModifyEnable = true, - .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), .SurfaceStateMemoryObjectControlState = GEN8_MOCS, .SurfaceStateBaseAddressModifyEnable = true, -- cgit v1.2.3 From 913a9b76f78fd6fbef3658667acf2d737d656eb2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Sep 2015 08:46:39 -0700 Subject: anv/batch_chain: Remove the current_surface_bo helper It's no longer used outside anv_batch_chain so we certainly don't need to be exporting. Inside anv_batch_chain, it's only used twice and it can be replaced by a single line so there's really no point. --- src/vulkan/anv_batch_chain.c | 10 ++-------- src/vulkan/anv_private.h | 2 -- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 6c37ce0aca3..aaf65c33f84 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -383,12 +383,6 @@ anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); } -struct anv_bo * -anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; -} - struct anv_reloc_list * anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) { @@ -399,7 +393,7 @@ struct anv_address anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) { return (struct anv_address) { - .bo = anv_cmd_buffer_current_surface_bo(cmd_buffer), + .bo = &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo, .offset = 0, }; } @@ -478,7 +472,7 @@ anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment) { struct anv_bo *surface_bo = - anv_cmd_buffer_current_surface_bo(cmd_buffer); + &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; struct anv_state state; state.offset = align_u32(cmd_buffer->surface_next, alignment); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 2e8bfa2c1d6..f710cefe36c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -893,8 +893,6 @@ struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); -struct anv_bo * -anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer); struct anv_reloc_list * anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer); struct anv_address -- cgit v1.2.3 From f5e72695e0e75aa787d42646962228b04965d2d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Sep 2015 13:51:54 -0700 Subject: anv/entrypoints: Rename anv_layer to anv_dispatch_table --- src/vulkan/anv_entrypoints_gen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 149f34c9842..50c98456846 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -80,7 +80,7 @@ for line in fileinput.input(): if opt_header: print "/* This file generated from vk_gen.py, don't edit directly. */\n" - print "struct anv_layer {" + print "struct anv_dispatch_table {" print " union {" print " void *entrypoints[%d];" % len(entrypoints) print " struct {" @@ -166,7 +166,7 @@ print "};\n" for layer in [ "anv", "validate", "gen7", "gen8" ]: for type, name, args, num, h in entrypoints: print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) - print "\nconst struct anv_layer %s_layer = {" % layer + print "\nconst struct anv_dispatch_table %s_layer = {" % layer for type, name, args, num, h in entrypoints: print " .%s = %s_%s," % (name, layer, name) print "};\n" -- cgit v1.2.3 From 00d18a661f7685fd3742337674deca372ad7cbc4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Sep 2015 14:20:07 -0700 Subject: anv/entrypoints: Expose the anv_resolve_entrypoint function --- src/vulkan/anv_entrypoints_gen.py | 8 ++++---- src/vulkan/anv_private.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 50c98456846..0fa677bbe02 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -201,8 +201,8 @@ anv_set_dispatch_gen(uint32_t gen) dispatch_gen = gen; } -static void * __attribute__ ((noinline)) -resolve_entrypoint(uint32_t index) +void * __attribute__ ((noinline)) +anv_resolve_entrypoint(uint32_t index) { if (enable_validate && validate_layer.entrypoints[index]) return validate_layer.entrypoints[index]; @@ -229,7 +229,7 @@ resolve_entrypoint(uint32_t index) # lets the resolver look it up in the table. for type, name, args, num, h in entrypoints: - print "static void *resolve_%s(void) { return resolve_entrypoint(%d); }" % (name, num) + print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) @@ -305,6 +305,6 @@ anv_lookup_entrypoint(const char *name) if (strcmp(name, strings + e->name) != 0) return NULL; - return resolve_entrypoint(i); + return anv_resolve_entrypoint(i); } """ % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f710cefe36c..7e5982db645 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -366,6 +366,10 @@ void anv_bo_pool_finish(struct anv_bo_pool *pool); VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + +void *anv_resolve_entrypoint(uint32_t index); + + struct anv_physical_device { struct anv_instance * instance; uint32_t chipset_id; -- cgit v1.2.3 From a95f51c1d79848941dae2965dc0d2db4d1fd3d18 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Sep 2015 14:20:35 -0700 Subject: anv: Add a global dispatch table for use in meta operations --- src/vulkan/anv_device.c | 4 +++- src/vulkan/anv_meta.c | 19 ++++++++++--------- src/vulkan/anv_private.h | 10 ++++++++++ src/vulkan/glsl_scraper.py | 11 ++++++----- 4 files changed, 29 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9d3b5dbfa0a..75a889f134d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -31,6 +31,8 @@ #include "mesa/main/git_sha1.h" #include "util/strtod.h" +struct anv_dispatch_table dtable; + static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, @@ -794,7 +796,7 @@ VkResult anv_QueueWaitIdle( { ANV_FROM_HANDLE(anv_queue, queue, _queue); - return vkDeviceWaitIdle(anv_device_to_handle(queue->device)); + return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); } VkResult anv_DeviceWaitIdle( diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3ab1ceabb92..30da2fee90f 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -318,7 +318,8 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.shared.cb_state); - vkCmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), + 0, 3, 0, num_instances); } void @@ -802,7 +803,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .dependencyCount = 0, }, &pass); - vkCmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass, @@ -845,9 +846,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, device->meta_state.blit.pipeline_layout, 0, 1, &set, 0, NULL); - vkCmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); - vkCmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. @@ -1531,7 +1532,7 @@ void anv_CmdClearColorImage( .dependencyCount = 0, }, &pass); - vkCmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderArea = { @@ -1559,7 +1560,7 @@ void anv_CmdClearColorImage( meta_emit_clear(cmd_buffer, 1, &instance_data, (VkClearDepthStencilValue) {0}); - vkCmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); } } } @@ -1621,19 +1622,19 @@ anv_device_init_meta(struct anv_device *device) anv_device_init_meta_clear_state(device); anv_device_init_meta_blit_state(device); - vkCreateDynamicRasterState(anv_device_to_handle(device), + ANV_CALL(CreateDynamicRasterState)(anv_device_to_handle(device), &(VkDynamicRasterStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, }, &device->meta_state.shared.rs_state); - vkCreateDynamicColorBlendState(anv_device_to_handle(device), + ANV_CALL(CreateDynamicColorBlendState)(anv_device_to_handle(device), &(VkDynamicColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO }, &device->meta_state.shared.cb_state); - vkCreateDynamicDepthStencilState(anv_device_to_handle(device), + ANV_CALL(CreateDynamicDepthStencilState)(anv_device_to_handle(device), &(VkDynamicDepthStencilStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO }, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 7e5982db645..07a1d97c65b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -369,6 +369,16 @@ void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); void *anv_resolve_entrypoint(uint32_t index); +extern struct anv_dispatch_table dtable; + +#define ANV_CALL(func) ({ \ + if (dtable.func == NULL) { \ + size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ + dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ + } \ + dtable.func; \ +}) + struct anv_physical_device { struct anv_instance * instance; diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py index 4963742ea36..244cb4888d5 100644 --- a/src/vulkan/glsl_scraper.py +++ b/src/vulkan/glsl_scraper.py @@ -240,11 +240,12 @@ with open_file(outfname, 'w') as outfile: #define _ANV_SPIRV_MODULE_INFO2(_line) _anv_glsl_helpers_shader ## _line ## _info #define _ANV_SPIRV_MODULE_INFO(_line) _ANV_SPIRV_MODULE_INFO2(_line) - #define GLSL_VK_SHADER_MODULE(device, stage, ...) ({ \\ - VkShaderModule __module; \\ - vkCreateShaderModule(anv_device_to_handle(device), \\ - &_ANV_SPIRV_MODULE_INFO(__LINE__), &__module); \\ - __module; \\ + #define GLSL_VK_SHADER_MODULE(device, stage, ...) ({ \\ + VkShaderModule __module; \\ + ANV_CALL(CreateShaderModule)(anv_device_to_handle(device), \\ + &_ANV_SPIRV_MODULE_INFO(__LINE__), \\ + &__module); \\ + __module; \\ }) """)) -- cgit v1.2.3 From 39cd3783a42926c0c4570d6e5576e2e88baece4f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Sep 2015 13:51:40 -0700 Subject: anv: Add support for the ICD loader --- src/vulkan/anv_cmd_buffer.c | 1 + src/vulkan/anv_device.c | 4 ++++ src/vulkan/anv_private.h | 17 +++++++++++++++++ 3 files changed, 22 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index cca5cfae3ef..38d032b3661 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -109,6 +109,7 @@ VkResult anv_CreateCommandBuffer( if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cmd_buffer->device = device; result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 75a889f134d..d79ff8a779c 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -45,6 +45,7 @@ anv_physical_device_init(struct anv_physical_device *device, if (fd < 0) return vk_errorf(VK_ERROR_UNAVAILABLE, "failed to open %s: %m", path); + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = instance; device->path = path; @@ -159,6 +160,7 @@ VkResult anv_CreateInstance( if (!instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; instance->pAllocUserData = alloc_callbacks->pUserData; instance->pfnAlloc = alloc_callbacks->pfnAlloc; instance->pfnFree = alloc_callbacks->pfnFree; @@ -523,6 +525,7 @@ PFN_vkVoidFunction anv_GetDeviceProcAddr( static VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue) { + queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; queue->device = device; queue->pool = &device->surface_state_pool; @@ -596,6 +599,7 @@ VkResult anv_CreateDevice( if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = physical_device->instance; /* XXX(chadv): Can we dup() physicalDevice->fd here? */ diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 07a1d97c65b..4917f9ad768 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -57,6 +57,13 @@ extern "C" { #endif +#define ICD_LOADER_MAGIC 0x01CDC0DE + +typedef union _VK_LOADER_DATA { + uintptr_t loaderMagic; + void *loaderData; +} VK_LOADER_DATA; + #define anv_noreturn __attribute__((__noreturn__)) #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) @@ -381,6 +388,8 @@ extern struct anv_dispatch_table dtable; struct anv_physical_device { + VK_LOADER_DATA _loader_data; + struct anv_instance * instance; uint32_t chipset_id; const char * path; @@ -390,6 +399,8 @@ struct anv_physical_device { }; struct anv_instance { + VK_LOADER_DATA _loader_data; + void * pAllocUserData; PFN_vkAllocFunction pfnAlloc; PFN_vkFreeFunction pfnFree; @@ -427,6 +438,8 @@ struct anv_meta_state { }; struct anv_queue { + VK_LOADER_DATA _loader_data; + struct anv_device * device; struct anv_state_pool * pool; @@ -447,6 +460,8 @@ struct anv_queue { }; struct anv_device { + VK_LOADER_DATA _loader_data; + struct anv_instance * instance; uint32_t chipset_id; struct brw_device_info info; @@ -832,6 +847,8 @@ enum anv_cmd_buffer_exec_mode { }; struct anv_cmd_buffer { + VK_LOADER_DATA _loader_data; + struct anv_device * device; struct list_head pool_link; -- cgit v1.2.3 From e9dff5bb99af647e19ad823d27b5413be0f18b19 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Sep 2015 11:47:19 -0700 Subject: vk: Add an ICD declaration file --- configure.ac | 1 + src/vulkan/anv_icd.json.in | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 src/vulkan/anv_icd.json.in (limited to 'src') diff --git a/configure.ac b/configure.ac index e8919caa74b..4669a67dc04 100644 --- a/configure.ac +++ b/configure.ac @@ -2415,6 +2415,7 @@ AC_CONFIG_FILES([Makefile src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile src/vulkan/Makefile + src/vulkan/anv_icd.json src/vulkan/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile]) diff --git a/src/vulkan/anv_icd.json.in b/src/vulkan/anv_icd.json.in new file mode 100644 index 00000000000..8520dd59063 --- /dev/null +++ b/src/vulkan/anv_icd.json.in @@ -0,0 +1,8 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@abs_top_builddir@/lib/libvulkan.so.0.0.0", + "abi_versions": "0.138.2" + } +} + -- cgit v1.2.3 From cf24211d55b304991a87c47a42caae31cbcb13fc Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 25 Sep 2015 10:42:07 -0700 Subject: vk: Roll back GLSL parser support for vulkan In the interest of reducing our delta to mesa master, let's undo these changes now that we only support SPIR-V. --- src/glsl/ast.h | 8 -------- src/glsl/ast_to_hir.cpp | 19 +------------------ src/glsl/ast_type.cpp | 5 ----- src/glsl/glsl_parser.yy | 10 ++++------ src/glsl/ir.h | 14 +------------- src/glsl/link_uniform_block_active_visitor.cpp | 5 ----- src/glsl/link_uniform_block_active_visitor.h | 2 -- src/glsl/link_uniform_blocks.cpp | 4 ---- src/glsl/nir/glsl_to_nir.cpp | 17 ++++------------- 9 files changed, 10 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/src/glsl/ast.h b/src/glsl/ast.h index eb6d8461671..d8c6cea7832 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -527,9 +527,6 @@ struct ast_type_qualifier { unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */ /** \} */ - /** \name Vulkan qualifiers */ - unsigned vk_set:1; - /** \name Layout qualifiers for GL_ARB_tessellation_shader */ /** \{ */ /* tess eval input layout */ @@ -642,11 +639,6 @@ struct ast_type_qualifier { */ glsl_base_type image_base_type; - /** - * Vulkan descriptor set - */ - int set; - /** * Return true if and only if an interpolation qualifier is present. */ diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 981438de597..81b44bd6786 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2777,16 +2777,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; } - if (qual->flags.q.vk_set) { - if (!qual->flags.q.explicit_binding) - _mesa_glsl_error(loc, state, - "Vulkan descriptor set layout requires both set " - "and binding qualifiers"); - - var->data.vk_set = true; - var->data.set = qual->set; - var->data.binding = qual->binding; - } else if (qual->flags.q.explicit_location) { + if (qual->flags.q.explicit_location) { validate_explicit_location(qual, var, state, loc); } else if (qual->flags.q.explicit_index) { _mesa_glsl_error(loc, state, "explicit index requires explicit location"); @@ -6280,10 +6271,6 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; - var->data.vk_set = this->layout.flags.q.vk_set; - var->data.set = this->layout.set; - var->data.binding = this->layout.binding; - state->symbols->add_variable(var); instructions->push_tail(var); } @@ -6357,10 +6344,6 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; - var->data.vk_set = this->layout.flags.q.vk_set; - var->data.set = this->layout.set; - var->data.binding = this->layout.binding; - state->symbols->add_variable(var); instructions->push_tail(var); } diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index 892122af03d..a4671e203e2 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -297,11 +297,6 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, this->image_base_type = q.image_base_type; } - if (q.flags.q.vk_set) { - this->set = q.set; - this->binding = q.binding; - } - return true; } diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index e1b390844d3..59e4527b238 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1468,16 +1468,14 @@ layout_qualifier_id: } } - if (match_layout_qualifier("binding", $1, state) == 0) { + if ((state->has_420pack() || + state->has_atomic_counters() || + state->has_shader_storage_buffer_objects()) && + match_layout_qualifier("binding", $1, state) == 0) { $$.flags.q.explicit_binding = 1; $$.binding = $3; } - if (match_layout_qualifier("set", $1, state) == 0) { - $$.flags.q.vk_set = 1; - $$.set = $3; - } - if (state->has_atomic_counters() && match_layout_qualifier("offset", $1, state) == 0) { $$.flags.q.explicit_offset = 1; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 7aac9af9001..ede8caa6e47 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -688,11 +688,6 @@ public: unsigned explicit_location:1; unsigned explicit_index:1; - /** - * Do we have a Vulkan (group, index) qualifier for this variable? - */ - unsigned vk_set:1; - /** * Was an initial binding explicitly set in the shader? * @@ -762,10 +757,8 @@ public: * \note * The GLSL spec only allows the values 0 or 1 for the index in \b dual * source blending. - * - * This is now also used for the Vulkan descriptor set index. */ - int16_t index; + unsigned index:1; /** * \brief Layout qualifier for gl_FragDepth. @@ -813,11 +806,6 @@ public: */ int16_t binding; - /** - * Vulkan descriptor set for the resource. - */ - int16_t set; - /** * Storage location of the base of this variable * diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp index 981c1f75571..510294783a0 100644 --- a/src/glsl/link_uniform_block_active_visitor.cpp +++ b/src/glsl/link_uniform_block_active_visitor.cpp @@ -54,11 +54,6 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) b->binding = 0; } - if (var->data.vk_set) { - b->set = var->data.set; - b->index = var->data.index; - } - _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b); return b; } else { diff --git a/src/glsl/link_uniform_block_active_visitor.h b/src/glsl/link_uniform_block_active_visitor.h index d8aefd69991..b663a884db4 100644 --- a/src/glsl/link_uniform_block_active_visitor.h +++ b/src/glsl/link_uniform_block_active_visitor.h @@ -35,8 +35,6 @@ struct link_uniform_block_active { unsigned num_array_elements; unsigned binding; - unsigned set; - unsigned index; bool has_instance_name; bool has_binding; diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index b80e5736f6b..4df39e200d5 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -293,8 +293,6 @@ link_uniform_blocks(void *mem_ctx, blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); - blocks[i].Set = b->set; - blocks[i].Binding = b->binding; blocks[i].IsShaderStorage = b->is_shader_storage; i++; @@ -315,8 +313,6 @@ link_uniform_blocks(void *mem_ctx, blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); - blocks[i].Set = b->set; - blocks[i].Binding = b->binding; blocks[i].IsShaderStorage = b->is_shader_storage; i++; diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index af97da9cc21..e3597e57e73 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -327,7 +327,7 @@ nir_visitor::visit(ir_variable *ir) } var->data.index = ir->data.index; - var->data.descriptor_set = ir->data.set; + var->data.descriptor_set = 0; var->data.binding = ir->data.binding; /* XXX Get rid of buffer_index */ var->data.atomic.buffer_index = ir->data.binding; @@ -1003,20 +1003,11 @@ nir_visitor::visit(ir_expression *ir) op = nir_intrinsic_load_ubo_indirect; } - ir_constant *const_block = ir->operands[0]->as_constant(); - assert(const_block && "can't figure out descriptor set index"); - unsigned index = const_block->value.u[0]; - unsigned set = sh->UniformBlocks[index].Set; - unsigned binding = sh->UniformBlocks[index].Binding; - nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); load->num_components = ir->type->vector_elements; - load->const_index[0] = set; - load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */ - nir_load_const_instr *load_binding = nir_load_const_instr_create(shader, 1); - load_binding->value.u[0] = binding; - nir_instr_insert_after_cf_list(this->cf_node_list, &load_binding->instr); - load->src[0] = nir_src_for_ssa(&load_binding->def); + load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ + load->const_index[1] = 1; /* number of vec4's */ + load->src[0] = evaluate_rvalue(ir->operands[0]); if (!const_index) load->src[1] = evaluate_rvalue(ir->operands[1]); add_instr(&load->instr, ir->type->vector_elements); -- cgit v1.2.3 From 850cfcad3e0d63205822ddef8670c0c1d9b5a681 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 25 Sep 2015 15:15:37 -0700 Subject: vk: Also define vk_errorf in non-debug builds --- src/vulkan/anv_private.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 4917f9ad768..99135424956 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -135,6 +135,7 @@ VkResult __vk_errorf(VkResult error, const char *file, int line, const char *for #define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); #else #define vk_error(error) error +#define vk_errorf(error, format, ...) error #endif void __anv_finishme(const char *file, int line, const char *format, ...) -- cgit v1.2.3 From 164f08c2553f7b88b7ead183cc24656d70316954 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 25 Sep 2015 15:16:56 -0700 Subject: vk: Add anv_icd.json to .gitignore --- src/vulkan/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 3ccb52c3d2d..6b9074d9f03 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -4,3 +4,4 @@ /anv_entrypoints.h /wayland-drm-protocol.c /wayland-drm-client-protocol.h +/anv_icd.json \ No newline at end of file -- cgit v1.2.3 From 97636345dafd100dc80a796e53e0a5439c36d421 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 28 Sep 2015 10:16:03 -0700 Subject: vk: Fix vkGetPhysicalDeviceSparseImageFormatProperties() The driver does not yet support sparse images, so return zero properties for all formats. --- src/vulkan/anv_formats.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 516281f2274..3ec2c7774aa 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -325,5 +325,8 @@ VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties) { - stub_return(VK_UNSUPPORTED); + /* Sparse images are not yet supported. */ + *pNumProperties = 0; + + return VK_SUCCESS; } -- cgit v1.2.3 From 4e48f94469b37efc333dbc3271fe075bed3ba625 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Sep 2015 11:08:50 -0700 Subject: anv/device: Wrap a couple valgrind calls in the VG macro This fixes the build for systems that don't have valgrind devel packages installed. --- src/vulkan/anv_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index d79ff8a779c..3c10545e8bc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -201,8 +201,8 @@ anv_instance_alloc(struct anv_instance *instance, size_t size, void *mem = instance->pfnAlloc(instance->pAllocUserData, size, alignment, allocType); if (mem) { - VALGRIND_MEMPOOL_ALLOC(instance, mem, size); - VALGRIND_MAKE_MEM_UNDEFINED(mem, size); + VG(VALGRIND_MEMPOOL_ALLOC(instance, mem, size)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(mem, size)); } return mem; } @@ -213,7 +213,7 @@ anv_instance_free(struct anv_instance *instance, void *mem) if (mem == NULL) return; - VALGRIND_MEMPOOL_FREE(instance, mem); + VG(VALGRIND_MEMPOOL_FREE(instance, mem)); instance->pfnFree(instance->pAllocUserData, mem); } -- cgit v1.2.3 From c15ce5c8341251e02794d2cae7ac95e561398e9a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 28 Sep 2015 10:52:11 -0700 Subject: vk: Advertise that depthstencil formats support sampling Let vkGetPhysicalDeviceFormatProperties() set VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT for tiled depthstencil images. --- src/vulkan/anv_formats.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 3ec2c7774aa..2c3487c18cc 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -267,6 +267,7 @@ VkResult anv_GetPhysicalDeviceFormatProperties( uint32_t linear = 0, tiled = 0; if (anv_format_is_depth_or_stencil(format)) { + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; } else { /* The surface_formats table only contains color formats */ -- cgit v1.2.3 From 9f3122db0e8e8c6ec1569633d3f6c673ba82d2b4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 28 Sep 2015 11:02:53 -0700 Subject: vk: Refactor anv_GetPhysicalDeviceFormatProperties() Move the bulk of the function body to a new function anv_physical_device_get_format_properties(). This allows us to reuse the function when implementing anv_GetPhysicalDeviceImageFormatProperties() without calling into the public entry point. --- src/vulkan/anv_formats.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 2c3487c18cc..937ef73c964 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -245,18 +245,16 @@ VkResult anv_validate_GetPhysicalDeviceFormatProperties( return anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); } -VkResult anv_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat _format, - VkFormatProperties* pFormatProperties) +static VkResult +anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, + const struct anv_format *format, + VkFormatProperties *out_properties) { - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); const struct surface_format_info *info; int gen; - const struct anv_format *format = anv_format_for_vk_format(_format); if (format == NULL) - return vk_error(VK_ERROR_INVALID_VALUE); + return VK_ERROR_INVALID_VALUE; gen = physical_device->info->gen * 10; if (physical_device->info->is_haswell) @@ -292,14 +290,33 @@ VkResult anv_GetPhysicalDeviceFormatProperties( } } - pFormatProperties->linearTilingFeatures = linear; - pFormatProperties->optimalTilingFeatures = tiled; + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; return VK_SUCCESS; unsupported: - pFormatProperties->linearTilingFeatures = 0; - pFormatProperties->optimalTilingFeatures = 0; + out_properties->linearTilingFeatures = 0; + out_properties->optimalTilingFeatures = 0; + + return VK_SUCCESS; +} + + +VkResult anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkResult result; + + result = anv_physical_device_get_format_properties( + physical_device, + anv_format_for_vk_format(format), + pFormatProperties); + if (result != VK_SUCCESS) + return vk_error(result); return VK_SUCCESS; } -- cgit v1.2.3 From ddcedb979a143ca25a8c16b9ce39956311b3b324 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 28 Sep 2015 11:46:38 -0700 Subject: vk: Implement vkGetPhysicalDeviceImageFormatProperties() The implementation is incomplete because we lie about VkImageFormatProperties::maxResourceSize, hardcoding it to UINT32_MAX for all supported cases. --- src/vulkan/anv_formats.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 937ef73c964..6173ee7872a 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -323,14 +323,105 @@ VkResult anv_GetPhysicalDeviceFormatProperties( VkResult anv_GetPhysicalDeviceImageFormatProperties( VkPhysicalDevice physicalDevice, - VkFormat format, + VkFormat _format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageFormatProperties* pImageFormatProperties) { - /* TODO: We should do something here. Chad? */ - stub_return(VK_UNSUPPORTED); + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct anv_format *format = anv_format_for_vk_format(_format); + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkResult result; + + result = anv_physical_device_get_format_properties(physical_device, format, + &format_props); + if (result != VK_SUCCESS) + return vk_error(result); + + /* Extract the VkFormatFeatureFlags that are relevant for the queried + * tiling. + */ + if (tiling == VK_IMAGE_TILING_LINEAR) { + format_feature_flags = format_props.linearTilingFeatures; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { + format_feature_flags = format_props.optimalTilingFeatures; + } else { + unreachable("bad VkImageTiling"); + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { + /* Meta implements transfers by sampling from the source image. */ + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT) { + if (format->has_stencil) { + /* Not yet implemented because copying to a W-tiled surface is crazy + * hard. + */ + anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT for " + "stencil format"); + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { + /* Nothing to check. */ + } + + if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { + /* Ignore this flag because it was removed from the + * provisional_I_20150910 header. + */ + } + + *pImageFormatProperties = (VkImageFormatProperties) { + /* FINISHME: Support multisampling */ + .maxSamples = 1, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + return VK_SUCCESS; + +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxSamples = 0, + .maxResourceSize = 0, + }; + + return VK_SUCCESS; } VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( -- cgit v1.2.3 From 9ac3dde3a08d2914d4a95c42493b2b91d0f05244 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Sep 2015 15:56:14 -0700 Subject: anv/wsi_wayland: Fix FIFO mode Previously, there were a number of things we were doing wrong: 1) We weren't flushing the wl_display so dead-looping clients weren't guaranteed to work. 2) We were sending the frame event after calling wl_surface.commit() so it wasn't getting assigned to the correct frame 3) We weren't actually setting fifo_ready to false. Unfortunately, we never noticed because (3) was hiding the other two. This commit fixes all three and clients that use FIFO mode are now properly refresh-rate limited. --- src/vulkan/anv_wsi_wayland.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 11f2dae9759..a601ad1851f 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -516,14 +516,17 @@ wsi_wl_queue_present(struct anv_swap_chain *anv_chain, assert(image_index < chain->image_count); wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); - wl_surface_commit(chain->surface); if (chain->present_mode == VK_PRESENT_MODE_FIFO_WSI) { struct wl_callback *frame = wl_surface_frame(chain->surface); wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); wl_callback_add_listener(frame, &frame_listener, chain); + chain->fifo_ready = false; } + wl_surface_commit(chain->surface); + wl_display_flush(chain->display->display); + return VK_SUCCESS; } -- cgit v1.2.3 From d517de6126a1a79adaf5717f2b58ac0a15a87230 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Sep 2015 16:59:21 -0700 Subject: anv: Make anv_state.offset an int32_t Binding tables will have a negative offset and we need a way to express that. Besides, the chances of a state offset being larger than 2 GB is so remote it's not worth thinking about. --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 99135424956..8839e875656 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -310,7 +310,7 @@ anv_block_pool_size(struct anv_block_pool *pool) } struct anv_state { - uint32_t offset; + int32_t offset; uint32_t alloc_size; void *map; }; -- cgit v1.2.3 From 95487668df65185c20fb20379d1b5ba0a744c3ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Sep 2015 17:05:51 -0700 Subject: anv/batch_chain: Add a _alloc_binding_table function --- src/vulkan/anv_batch_chain.c | 7 +++++++ src/vulkan/anv_cmd_buffer.c | 6 +++--- src/vulkan/anv_private.h | 3 +++ 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index aaf65c33f84..51ba7ef4a31 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -488,6 +488,13 @@ anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, return state; } +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries) +{ + return anv_cmd_buffer_alloc_surface_state(cmd_buffer, entries * 4, 32); +} + struct anv_state anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment) diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 38d032b3661..5dc338782b3 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -395,7 +395,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_layout *layout; - uint32_t attachments, bias, size; + uint32_t attachments, bias; if (stage == VK_SHADER_STAGE_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; @@ -418,8 +418,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (attachments + surface_count == 0) return VK_SUCCESS; - size = (bias + surface_count) * sizeof(uint32_t); - *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + surface_count); uint32_t *bt_map = bt_state->map; if (bt_state->map == NULL) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8839e875656..6b9b12fd8ec 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -933,6 +933,9 @@ struct anv_state anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries); +struct anv_state anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); -- cgit v1.2.3 From 219a1929f7a9c3fcf0fa1cdecac592cc01dc4cf3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Sep 2015 17:12:58 -0700 Subject: anv/util: Add helpers for getting the first and last elements of a vector --- src/vulkan/anv_private.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6b9b12fd8ec..915460dadf4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -200,6 +200,21 @@ anv_vector_length(struct anv_vector *queue) return (queue->head - queue->tail) / queue->element_size; } +static inline void * +anv_vector_head(struct anv_vector *vector) +{ + assert(vector->tail < vector->head); + return (void *)((char *)vector->data + + ((vector->head - vector->element_size) & + (vector->size - 1))); +} + +static inline void * +anv_vector_tail(struct anv_vector *vector) +{ + return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); +} + static inline void anv_vector_finish(struct anv_vector *queue) { -- cgit v1.2.3 From 737e89bc8d05aeb2e76b3b812ca1a3dbc16a9c8c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Sep 2015 14:10:33 -0700 Subject: anv/meta: Use the dynamic state stream for temporary buffers --- src/vulkan/anv_meta.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 30da2fee90f..fb18033c44d 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -272,7 +272,7 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, }; size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); - state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 16); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 16); /* Copy in the vertex and instance data */ memcpy(state.map, vertex_data, sizeof(vertex_data)); @@ -282,7 +282,7 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_buffer vertex_buffer = { .device = cmd_buffer->device, .size = size, - .bo = &device->surface_state_block_pool.bo, + .bo = &device->dynamic_state_block_pool.bo, .offset = state.offset }; @@ -669,7 +669,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); struct anv_state vb_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, vb_size, 16); + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); memset(vb_state.map, 0, sizeof(struct vue_header)); vb_data = vb_state.map + sizeof(struct vue_header); @@ -712,7 +712,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_buffer vertex_buffer = { .device = device, .size = vb_size, - .bo = &device->surface_state_block_pool.bo, + .bo = &device->dynamic_state_block_pool.bo, .offset = vb_state.offset, }; -- cgit v1.2.3 From 0e94446b25632523e5d9901a2e5e61c9c6f28ed9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Sep 2015 16:36:00 -0700 Subject: anv/device: Use a 4K block size for surface state blocks We want to start using the surface state block pool for binding tables and binding tables. In order to do this, we need to be able to set surface state base address to the address of a block and surface state base address has a 4K alignment requriement. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3c10545e8bc..b6f083b4c83 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -621,7 +621,7 @@ VkResult anv_CreateDevice( &device->dynamic_state_block_pool); anv_block_pool_init(&device->instruction_block_pool, device, 2048); - anv_block_pool_init(&device->surface_state_block_pool, device, 2048); + anv_block_pool_init(&device->surface_state_block_pool, device, 4096); anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); -- cgit v1.2.3 From 8c00f9ab56050a1a33f19025e27abf9323cbe0d0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Sep 2015 15:29:28 -0700 Subject: anv/gen8: Do a render cache flush prior to changing state base address --- src/vulkan/gen8_cmd_buffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 5018b6775d9..0e830b4427d 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -817,6 +817,16 @@ gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.scratch_size > 0) scratch_bo = &device->scratch_block_pool.bo; + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. Without + * this, we get GPU hangs when using multi-level command buffers which + * clear depth, reset state base address, and then go render stuff. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .RenderTargetCacheFlushEnable = true); + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, .GeneralStateBaseAddress = { scratch_bo, 0 }, .GeneralStateMemoryObjectControlState = GEN8_MOCS, -- cgit v1.2.3 From d93f6385a7a41d6ea13809005520a44283a40405 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Sep 2015 12:39:34 -0700 Subject: anv/batch_chain: Add helpers for fixing up block_pool relocations --- src/vulkan/anv_batch_chain.c | 62 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 51ba7ef4a31..e8581a93095 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -880,6 +880,68 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, } } +static void +adjust_relocations_from_block_pool(struct anv_block_pool *pool, + struct anv_reloc_list *relocs) +{ + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* In general, we don't know how stale the relocated value is. It + * may have been used last time or it may not. Since we don't want + * to stomp it while the GPU may be accessing it, we haven't updated + * it anywhere else in the code. Instead, we just set the presumed + * offset to what it is now based on the delta and the data in the + * block pool. Then the kernel will update it for us if needed. + */ + uint32_t *reloc_data = pool->map + relocs->relocs[i].offset; + relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta; + + /* All of the relocations from this block pool to other BO's should + * have been emitted relative to the surface block pool center. We + * need to add the center offset to make them relative to the + * beginning of the actual GEM bo. + */ + relocs->relocs[i].offset += pool->center_bo_offset; + } +} + +static void +adjust_relocations_to_block_pool(struct anv_block_pool *pool, + struct anv_bo *from_bo, + struct anv_reloc_list *relocs, + uint32_t *last_pool_center_bo_offset) +{ + assert(*last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; + + /* When we initially emit relocations into a block pool, we don't + * actually know what the final center_bo_offset will be so we just emit + * it as if center_bo_offset == 0. Now that we know what the center + * offset is, we need to walk the list of relocations and adjust any + * relocations that point to the pool bo with the correct offset. + */ + for (size_t i = 0; i < relocs->num_relocs; i++) { + if (relocs->reloc_bos[i] == &pool->bo) { + /* Adjust the delta value in the relocation to correctly + * correspond to the new delta. Initially, this value may have + * been negative (if treated as unsigned), but we trust in + * uint32_t roll-over to fix that for us at this point. + */ + relocs->relocs[i].delta += delta; + + /* Since the delta has changed, we need to update the actual + * relocated value with the new presumed value. This function + * should only be called on batch buffers, so we know it isn't in + * use by the GPU at the moment. + */ + uint32_t *reloc_data = from_bo->map + relocs->relocs[i].offset; + *reloc_data = relocs->relocs[i].presumed_offset + + relocs->relocs[i].delta; + } + } + + *last_pool_center_bo_offset = pool->center_bo_offset; +} + void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) { -- cgit v1.2.3 From f06bc45b0c487227649e4083195361ed55525b91 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Sep 2015 12:40:17 -0700 Subject: anv/batch_chain: Use the surface state pool for binding tables --- src/vulkan/anv_batch_chain.c | 161 ++++++++++++++++++++----------------------- src/vulkan/anv_cmd_buffer.c | 39 ++++------- src/vulkan/anv_private.h | 23 ++++--- 3 files changed, 99 insertions(+), 124 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index e8581a93095..c621c0745b7 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -281,6 +281,8 @@ anv_batch_bo_clone(struct anv_device *device, bbo->length = other_bbo->length; memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; + *bbo_out = bbo; return VK_SUCCESS; @@ -300,6 +302,7 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, batch->next = batch->start = bbo->bo.map; batch->end = bbo->bo.map + bbo->bo.size - batch_padding; batch->relocs = &bbo->relocs; + bbo->last_ss_pool_bo_offset = 0; bbo->relocs.num_relocs = 0; } @@ -377,24 +380,12 @@ anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); } -static inline struct anv_batch_bo * -anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); -} - -struct anv_reloc_list * -anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) -{ - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; -} - struct anv_address anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) { return (struct anv_address) { - .bo = &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo, - .offset = 0, + .bo = &cmd_buffer->device->surface_state_block_pool.bo, + .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), }; } @@ -468,31 +459,28 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) } struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset) { - struct anv_bo *surface_bo = - &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); struct anv_state state; - state.offset = align_u32(cmd_buffer->surface_next, alignment); - if (state.offset + size > surface_bo->size) + state.alloc_size = align_u32(entries * 4, 32); + + if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) return (struct anv_state) { 0 }; - state.map = surface_bo->map + state.offset; - state.alloc_size = size; - cmd_buffer->surface_next = state.offset + size; + state.offset = cmd_buffer->bt_next; + state.map = block_pool->map + *bt_block + state.offset; - assert(state.offset + size <= surface_bo->size); + cmd_buffer->bt_next += state.alloc_size; - return state; -} + assert(*bt_block < 0); + *state_offset = -(*bt_block); -struct anv_state -anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, - uint32_t entries) -{ - return anv_cmd_buffer_alloc_surface_state(cmd_buffer, entries * 4, 32); + return state; } struct anv_state @@ -504,28 +492,17 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, } VkResult -anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) { - struct anv_batch_bo *new_bbo, *old_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); - - /* Finish off the old buffer */ - old_bbo->length = cmd_buffer->surface_next; - - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); - if (result != VK_SUCCESS) - return result; + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); + if (offset == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - *seen_bbo = new_bbo; - - cmd_buffer->surface_next = 1; - list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + *offset = anv_block_pool_alloc_back(block_pool); + cmd_buffer->bt_next = 0; return VK_SUCCESS; } @@ -533,12 +510,11 @@ anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { - struct anv_batch_bo *batch_bo, *surface_bbo; + struct anv_batch_bo *batch_bo; struct anv_device *device = cmd_buffer->device; VkResult result; list_inithead(&cmd_buffer->batch_bos); - list_inithead(&cmd_buffer->surface_bos); result = anv_batch_bo_create(device, &batch_bo); if (result != VK_SUCCESS) @@ -553,23 +529,25 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - result = anv_batch_bo_create(device, &surface_bbo); - if (result != VK_SUCCESS) - goto fail_batch_bo; - - list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); - int success = anv_vector_init(&cmd_buffer->seen_bbos, sizeof(struct anv_bo *), 8 * sizeof(struct anv_bo *)); if (!success) - goto fail_surface_bo; + goto fail_batch_bo; *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; - /* Start surface_next at 1 so surface offset 0 is invalid. */ - cmd_buffer->surface_next = 1; + success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), + 8 * sizeof(int32_t)); + if (!success) + goto fail_seen_bbos; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, + cmd_buffer->device); + if (result != VK_SUCCESS) + goto fail_bt_blocks; + + anv_cmd_buffer_new_binding_table_block(cmd_buffer); cmd_buffer->execbuf2.objects = NULL; cmd_buffer->execbuf2.bos = NULL; @@ -577,8 +555,10 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; - fail_surface_bo: - anv_batch_bo_destroy(surface_bbo, device); + fail_bt_blocks: + anv_vector_finish(&cmd_buffer->bt_blocks); + fail_seen_bbos: + anv_vector_finish(&cmd_buffer->seen_bbos); fail_batch_bo: anv_batch_bo_destroy(batch_bo, device); @@ -590,6 +570,15 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; + int32_t *bt_block; + anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + anv_vector_finish(&cmd_buffer->bt_blocks); + + anv_reloc_list_finish(&cmd_buffer->surface_relocs, cmd_buffer->device); + anv_vector_finish(&cmd_buffer->seen_bbos); /* Destroy all of the batch buffers */ @@ -598,12 +587,6 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) anv_batch_bo_destroy(bbo, device); } - /* Destroy all of the surface state buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->surface_bos, link) { - anv_batch_bo_destroy(bbo, device); - } - anv_device_free(device, cmd_buffer->execbuf2.objects); anv_device_free(device, cmd_buffer->execbuf2.bos); } @@ -626,18 +609,15 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); + while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { + int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); } - assert(!list_empty(&cmd_buffer->batch_bos)); - - anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); + cmd_buffer->bt_next = 0; - cmd_buffer->surface_next = 1; + cmd_buffer->surface_relocs.num_relocs = 0; /* Reset the list of seen buffers */ cmd_buffer->seen_bbos.head = 0; @@ -645,16 +625,12 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = anv_cmd_buffer_current_batch_bo(cmd_buffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); } void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - struct anv_batch_bo *surface_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); @@ -668,8 +644,6 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - surface_bbo->length = cmd_buffer->surface_next; - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { /* If this is a secondary command buffer, we need to determine the * mode in which it will be executed with vkExecuteCommands. We @@ -777,8 +751,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, assert(!"Invalid execution mode"); } - /* Mark the surface buffer from the secondary as seen */ - anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); + anv_reloc_list_append(&primary->surface_relocs, primary->device, + &secondary->surface_relocs, 0); } static VkResult @@ -946,16 +920,25 @@ void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch *batch = &cmd_buffer->batch; + struct anv_block_pool *ss_pool = + &cmd_buffer->device->surface_state_block_pool; cmd_buffer->execbuf2.bo_count = 0; cmd_buffer->execbuf2.need_reloc = false; + adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); + /* First, we walk over all of the bos we've seen and add them and their * relocations to the validate list. */ struct anv_batch_bo **bbo; - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + &(*bbo)->last_ss_pool_bo_offset); + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + } struct anv_batch_bo *first_batch_bo = list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); @@ -989,6 +972,8 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, .buffer_count = cmd_buffer->execbuf2.bo_count, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5dc338782b3..c74d6d0eb17 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -383,9 +383,8 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; - *(uint32_t *)(state.map + dword * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, state.offset + dword * 4, bo, offset); + anv_reloc_list_add(&cmd_buffer->surface_relocs, cmd_buffer->device, + state.offset + dword * 4, bo, offset); } VkResult @@ -395,7 +394,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_layout *layout; - uint32_t attachments, bias; + uint32_t attachments, bias, state_offset; if (stage == VK_SHADER_STAGE_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; @@ -419,7 +418,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, - bias + surface_count); + bias + surface_count, + &state_offset); uint32_t *bt_map = bt_state->map; if (bt_state->map == NULL) @@ -439,17 +439,9 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const struct anv_color_attachment_view *view = (const struct anv_color_attachment_view *)attachment; - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - memcpy(state.map, view->view.surface_state.map, 64); - - add_surface_state_reloc(cmd_buffer, state, view->view.bo, view->view.offset); - - bt_map[a] = state.offset; + bt_map[a] = view->view.surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, view->view.surface_state, + view->view.bo, view->view.offset); } if (layout == NULL) @@ -470,16 +462,9 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (!view) continue; - struct anv_state state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - - if (state.map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - memcpy(state.map, view->surface_state.map, 64); - add_surface_state_reloc(cmd_buffer, state, view->bo, view->offset); - - bt_map[start + b] = state.offset; + bt_map[start + b] = view->surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, view->surface_state, + view->bo, view->offset); } } @@ -595,7 +580,7 @@ anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) { assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); assert(result == VK_SUCCESS); /* Re-emit state base addresses so we get the new surface state base diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 915460dadf4..eb02f09e07f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -572,6 +572,9 @@ struct anv_batch_bo { /* Bytes actually consumed in this batch BO */ size_t length; + /* Last seen surface state block pool bo offset */ + uint32_t last_ss_pool_bo_offset; + struct anv_reloc_list relocs; }; @@ -876,8 +879,6 @@ struct anv_cmd_buffer { * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). */ struct list_head batch_bos; - struct list_head surface_bos; - uint32_t surface_next; enum anv_cmd_buffer_exec_mode exec_mode; /* A vector of anv_batch_bo pointers for every batch or surface buffer @@ -887,6 +888,14 @@ struct anv_cmd_buffer { */ struct anv_vector seen_bbos; + /* A vector of int32_t's for every block of binding tables. + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector bt_blocks; + uint32_t bt_next; + struct anv_reloc_list surface_relocs; + /* Information needed for execbuf * * These fields are generated by anv_cmd_buffer_prepare_execbuf(). @@ -940,21 +949,17 @@ struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); -struct anv_reloc_list * -anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer); struct anv_address anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment); -struct anv_state anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, - uint32_t entries); + uint32_t entries, uint32_t *state_offset); struct anv_state anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); -VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -- cgit v1.2.3 From 337caee91078e3d83ff01b929a44a74600cde6a7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Sep 2015 20:17:00 -0700 Subject: anv/wsi_x11: Properly report BadDrawable errors to the client --- src/vulkan/anv_wsi_x11.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index f481a017001..e5d0a2572aa 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -73,8 +73,18 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, xcb_window_t win = (xcb_window_t)(uintptr_t)vk_window->pPlatformWindow; xcb_get_geometry_cookie_t cookie = xcb_get_geometry(conn, win); + xcb_generic_error_t *err; xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, - NULL); + &err); + if (!geom) { + if (err->error_code == XCB_DRAWABLE) { + return vk_error(VK_ERROR_INVALID_HANDLE); + } else { + return vk_error(VK_ERROR_UNKNOWN); + } + free(err); + } + VkExtent2D extent = { geom->width, geom->height }; free(geom); -- cgit v1.2.3 From c1553653a2ed4f06301a7818a1878da46f785882 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 13:44:13 -0700 Subject: vk/wsi/x11: Send OUT_OF_DATE if the X drawable goes away --- src/vulkan/anv_wsi_x11.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index e5d0a2572aa..03aef4cbf23 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -192,9 +192,18 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, struct x11_image *image = &chain->images[chain->next_image]; if (image->busy) { + xcb_generic_error_t *err; xcb_get_geometry_reply_t *geom = - xcb_get_geometry_reply(chain->conn, image->geom_cookie, NULL); - image->busy = false; + xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); + if (!geom) { + if (err->error_code == XCB_DRAWABLE) { + /* Probably the best thing to do if our drawable goes away */ + return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + } else { + return vk_error(VK_ERROR_UNKNOWN); + } + free(err); + } if (geom->width != chain->extent.width || geom->height != chain->extent.height) { @@ -202,6 +211,8 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, return vk_error(VK_ERROR_OUT_OF_DATE_WSI); } free(geom); + + image->busy = false; } *image_index = chain->next_image; -- cgit v1.2.3 From b68805f83c515916d4256f3b2f1ec67a662ca953 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 16:15:51 -0700 Subject: anv: Add some NIR builder helpers These should all eventually be up-streamed. However, since they currently have no upstream users, they would just bitrot there. We'll keep them local for the time being. --- src/vulkan/anv_nir_builder.h | 102 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/vulkan/anv_nir_builder.h (limited to 'src') diff --git a/src/vulkan/anv_nir_builder.h b/src/vulkan/anv_nir_builder.h new file mode 100644 index 00000000000..299c8c1aad0 --- /dev/null +++ b/src/vulkan/anv_nir_builder.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "glsl/nir/nir_builder.h" +#include "util/ralloc.h" + +/* This file includes NIR helpers used by meta shaders in the Vulkan + * driver. Eventually, these will all be merged into nir_builder. + * However, for now, keeping them in their own file helps to prevent merge + * conflicts. + */ + +static inline void +nir_builder_init_simple_shader(nir_builder *b, gl_shader_stage stage) +{ + b->shader = nir_shader_create(NULL, stage, NULL); + + nir_function *func = nir_function_create(b->shader, + ralloc_strdup(b->shader, "main")); + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = 0; + + b->impl = nir_function_impl_create(overload); + b->cursor = nir_after_cf_list(&b->impl->body); +} + +static inline void +nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) +{ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, dest); + copy->variables[1] = nir_deref_var_create(copy, src); + nir_builder_instr_insert(build, ©->instr); +} + +static inline nir_variable * +nir_variable_create(nir_shader *shader, const char *name, + const struct glsl_type *type, nir_variable_mode mode) +{ + nir_variable *var = rzalloc(shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = mode; + + if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || + (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + + switch (var->data.mode) { + case nir_var_local: + assert(!"nir_variable_create cannot be used for local variables"); + break; + + case nir_var_global: + exec_list_push_tail(&shader->globals, &var->node); + break; + + case nir_var_shader_in: + exec_list_push_tail(&shader->inputs, &var->node); + break; + + case nir_var_shader_out: + exec_list_push_tail(&shader->outputs, &var->node); + break; + + case nir_var_uniform: + case nir_var_shader_storage: + exec_list_push_tail(&shader->uniforms, &var->node); + break; + + case nir_var_system_value: + exec_list_push_tail(&shader->system_values, &var->node); + break; + + default: + unreachable("not reached"); + } + + return var; +} -- cgit v1.2.3 From add99c4beb3fc08e21bae4cd2e288deb8a2c26b9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 16:16:57 -0700 Subject: anv: Add a back-door for passing NIR shaders directly into the pipeline This will allow us to use NIR directly for meta operations rather than having to go through SPIR-V. --- src/vulkan/anv_compiler.cpp | 21 ++++++++++++++------- src/vulkan/anv_pipeline.c | 1 + src/vulkan/anv_private.h | 4 ++++ 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 8acfc068e90..b06775d05ca 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -895,7 +895,6 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, struct anv_shader *shader = pipeline->shaders[stage]; struct gl_shader *mesa_shader; int name = 0; - uint32_t *spirv; mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); fail_if(mesa_shader == NULL, @@ -937,13 +936,21 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, struct gl_shader_compiler_options *glsl_options = &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; - spirv = (uint32_t *) shader->module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(shader->module->size % 4 == 0); + if (shader->module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + mesa_shader->Program->nir = shader->module->nir; + mesa_shader->Program->nir->options = glsl_options->NirOptions; + } else { + uint32_t *spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); - mesa_shader->Program->nir = - spirv_to_nir(spirv, shader->module->size / 4, - stage_info[stage].stage, glsl_options->NirOptions); + mesa_shader->Program->nir = + spirv_to_nir(spirv, shader->module->size / 4, + stage_info[stage].stage, glsl_options->NirOptions); + } nir_validate_shader(mesa_shader->Program->nir); brw_process_nir(mesa_shader->Program->nir, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 833957a32e2..daf520f9714 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -47,6 +47,7 @@ VkResult anv_CreateShaderModule( if (module == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + module->nir = NULL; module->size = pCreateInfo->codeSize; memcpy(module->data, pCreateInfo->pCode, module->size); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index eb02f09e07f..f0d288342d6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -994,7 +994,11 @@ struct anv_fence { bool ready; }; +struct nir_shader; + struct anv_shader_module { + struct nir_shader * nir; + uint32_t size; char data[0]; }; -- cgit v1.2.3 From 7851a4392a7a6b04779538f989fb614811f618c4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 16:18:32 -0700 Subject: anv/meta: Use NIR directly for clear shaders --- src/vulkan/anv_meta.c | 97 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index fb18033c44d..0c0f889ed14 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -29,40 +29,83 @@ #include "anv_private.h" #include "anv_meta_spirv_autogen.h" +#include "anv_nir_builder.h" + +static nir_shader * +build_nir_vertex_shader(bool attr_flat) +{ + nir_builder b; + + const struct glsl_type *vertex_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX); + + nir_variable *pos_in = nir_variable_create(b.shader, "a_pos", + vertex_type, + nir_var_shader_in); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, "gl_Position", + vertex_type, + nir_var_shader_out); + pos_in->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + /* Add one more pass-through attribute. For clear shaders, this is used + * to store the color and for blit shaders it's the texture coordinate. + */ + const struct glsl_type *attr_type = glsl_vec4_type(); + nir_variable *attr_in = nir_variable_create(b.shader, "a_attr", attr_type, + nir_var_shader_in); + attr_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *attr_out = nir_variable_create(b.shader, "v_attr", attr_type, + nir_var_shader_out); + attr_out->data.location = VARYING_SLOT_VAR0; + attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : + INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, attr_out, attr_in); + + return b.shader; +} + +static nir_shader * +build_nir_clear_fragment_shader() +{ + nir_builder b; + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); + + nir_variable *color_in = nir_variable_create(b.shader, "v_attr", + color_type, + nir_var_shader_in); + color_in->data.location = VARYING_SLOT_VAR0; + color_in->data.interpolation = INTERP_QUALIFIER_FLAT; + nir_variable *color_out = nir_variable_create(b.shader, "f_color", + color_type, + nir_var_shader_out); + color_out->data.location = FRAG_RESULT_DATA0; + nir_copy_var(&b, color_out, color_in); + + return b.shader; +} static void anv_device_init_meta_clear_state(struct anv_device *device) { - /* We don't use a vertex shader for clearing, but instead build and pass - * the VUEs directly to the rasterization backend. However, we do need - * to provide GLSL source for the vertex shader so that the compiler - * does not dead-code our inputs. - */ - VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, - layout(location = 0) in vec3 a_pos; - layout(location = 1) in vec4 a_color; - layout(location = 0) flat out vec4 v_color; - void main() - { - v_color = a_color; - gl_Position = vec4(a_pos, 1); - } - ); + struct anv_shader_module vsm = { + .nir = build_nir_vertex_shader(true), + }; - VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - layout(location = 0) out vec4 f_color; - layout(location = 0) flat in vec4 v_color; - void main() - { - f_color = v_color; - } - ); + struct anv_shader_module fsm = { + .nir = build_nir_clear_fragment_shader(), + }; VkShader vs; anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = vsm, + .module = anv_shader_module_to_handle(&vsm), .pName = "main", }, &vs); @@ -70,7 +113,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = fsm, + .module = anv_shader_module_to_handle(&fsm), .pName = "main", }, &fs); @@ -187,10 +230,10 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, &device->meta_state.clear.pipeline); - anv_DestroyShaderModule(anv_device_to_handle(device), vsm); - anv_DestroyShaderModule(anv_device_to_handle(device), fsm); anv_DestroyShader(anv_device_to_handle(device), vs); anv_DestroyShader(anv_device_to_handle(device), fs); + ralloc_free(vsm.nir); + ralloc_free(fsm.nir); } #define NUM_VB_USED 2 -- cgit v1.2.3 From f5ffb0e0cbdc2da7c2b03258fde2a0f27c563a7a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 16:18:44 -0700 Subject: anv/meta: Use NIR directly for blit shaders --- src/vulkan/anv_meta.c | 102 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 0c0f889ed14..d7efdf69b62 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -90,6 +90,59 @@ build_nir_clear_fragment_shader() return b.shader; } +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + nir_builder b; + + nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, "v_attr", + glsl_vec4_type(), + nir_var_shader_in); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, false, glsl_get_base_type(color_type)); + nir_variable *sampler = nir_variable_create(b.shader, "s_tex", sampler_type, + nir_var_uniform); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_load_var(&b, tex_pos_in)); + tex->dest_type = nir_type_float; /* TODO */ + + switch (tex_dim) { + case GLSL_SAMPLER_DIM_2D: + tex->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + tex->coord_components = 3; + break; + default: + assert(!"Unsupported texture dimension"); + } + + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, "f_color", + color_type, + nir_var_shader_out); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa); + + return b.shader; +} + static void anv_device_init_meta_clear_state(struct anv_device *device) { @@ -474,42 +527,23 @@ anv_device_init_meta_blit_state(struct anv_device *device) * to provide GLSL source for the vertex shader so that the compiler * does not dead-code our inputs. */ - VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, - layout(location = 0) in vec2 a_pos; - layout(location = 1) in vec2 a_tex_coord; - layout(location = 0) out vec4 v_tex_coord; - void main() - { - v_tex_coord = vec4(a_tex_coord, 0, 1); - gl_Position = vec4(a_pos, 0, 1); - } - ); + struct anv_shader_module vsm = { + .nir = build_nir_vertex_shader(false), + }; - VkShaderModule fsm_2d = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - layout(location = 0) out vec4 f_color; - layout(location = 0) in vec4 v_tex_coord; - layout(set = 0, binding = 0) uniform sampler2D u_tex; - void main() - { - f_color = texture(u_tex, v_tex_coord.xy); - } - ); + struct anv_shader_module fsm_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; - VkShaderModule fsm_3d = GLSL_VK_SHADER_MODULE(device, FRAGMENT, - layout(location = 0) out vec4 f_color; - layout(location = 0) in vec4 v_tex_coord; - layout(set = 0, binding = 0) uniform sampler3D u_tex; - void main() - { - f_color = texture(u_tex, v_tex_coord.xyz); - } - ); + struct anv_shader_module fsm_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; VkShader vs; anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = vsm, + .module = anv_shader_module_to_handle(&vsm), .pName = "main", }, &vs); @@ -517,7 +551,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = fsm_2d, + .module = anv_shader_module_to_handle(&fsm_2d), .pName = "main", }, &fs_2d); @@ -525,7 +559,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) anv_CreateShader(anv_device_to_handle(device), &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = fsm_3d, + .module = anv_shader_module_to_handle(&fsm_3d), .pName = "main", }, &fs_3d); @@ -655,12 +689,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) &vk_pipeline_info, &anv_pipeline_info, &device->meta_state.blit.pipeline_3d_src); - anv_DestroyShaderModule(anv_device_to_handle(device), vsm); anv_DestroyShader(anv_device_to_handle(device), vs); - anv_DestroyShaderModule(anv_device_to_handle(device), fsm_2d); anv_DestroyShader(anv_device_to_handle(device), fs_2d); - anv_DestroyShaderModule(anv_device_to_handle(device), fsm_3d); anv_DestroyShader(anv_device_to_handle(device), fs_3d); + ralloc_free(vsm.nir); + ralloc_free(fsm_2d.nir); + ralloc_free(fsm_3d.nir); } static void -- cgit v1.2.3 From 002e7b0cc3c34c626fede219903b8b086a14ae5a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 16:20:02 -0700 Subject: anv: Remove the GLSL -> SPIR-V scraper/converter This was very useful to get us up-and-going. However, now that we can use NIR directly for meta shaders, we don't need this anymore and we might as well drop the glslc dependency. --- configure.ac | 4 - src/vulkan/Makefile.am | 3 - src/vulkan/glsl_scraper.py | 253 --------------------------------------------- 3 files changed, 260 deletions(-) delete mode 100644 src/vulkan/glsl_scraper.py (limited to 'src') diff --git a/configure.ac b/configure.ac index fa64dab4e48..6ff50abda3d 100644 --- a/configure.ac +++ b/configure.ac @@ -1552,10 +1552,6 @@ AC_SUBST([GBM_PC_LIB_PRIV]) AM_CONDITIONAL(HAVE_VULKAN, true) -AC_ARG_VAR([GLSLC], [Path to the glslc executable]) -AC_CHECK_PROGS([GLSLC], [glslc]) -AC_SUBST([GLSLC]) - dnl dnl EGL configuration dnl diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index aada6314b2a..765d2cef03d 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -116,9 +116,6 @@ anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ -%_spirv_autogen.h: %.c glsl_scraper.py - $(AM_V_GEN) $(PYTHON3) $(srcdir)/glsl_scraper.py --with-glslc=$(GLSLC) -o $@ $< - CLEANFILES = $(BUILT_SOURCES) libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py deleted file mode 100644 index 244cb4888d5..00000000000 --- a/src/vulkan/glsl_scraper.py +++ /dev/null @@ -1,253 +0,0 @@ -#! /usr/bin/env python3 - -import argparse -import io -import os -import re -import shutil -import struct -import subprocess -import sys -import tempfile -from textwrap import dedent - -class ShaderCompileError(RuntimeError): - def __init__(self): - super(ShaderCompileError, self).__init__('Compile error') - -class Shader: - def __init__(self, stage): - self.stream = io.StringIO() - self.stage = stage - self.dwords = None - - def add_text(self, s): - self.stream.write(s) - - def finish_text(self, line): - self.line = line - - def glsl_source(self): - return dedent(self.stream.getvalue()) - - def __run_glslc(self, extra_args=[]): - stage_flag = '-fshader-stage=' - if self.stage == 'VERTEX': - stage_flag += 'vertex' - elif self.stage == 'TESS_CONTROL': - stage_flag += 'tesscontrol' - elif self.stage == 'TESS_EVALUATION': - stage_flag += 'tesseval' - elif self.stage == 'GEOMETRY': - stage_flag += 'geometry' - elif self.stage == 'FRAGMENT': - stage_flag += 'fragment' - elif self.stage == 'COMPUTE': - stage_flag += 'compute' - else: - assert False - - with subprocess.Popen([glslc] + extra_args + - [stage_flag, '-std=430core', '-o', '-', '-'], - stdout = subprocess.PIPE, - stdin = subprocess.PIPE) as proc: - - proc.stdin.write(self.glsl_source().encode('utf-8')) - out, err = proc.communicate(timeout=30) - - if proc.returncode != 0: - raise ShaderCompileError() - - return out - - def compile(self): - def dwords(f): - while True: - dword_str = f.read(4) - if not dword_str: - return - assert len(dword_str) == 4 - yield struct.unpack('I', dword_str)[0] - - spirv = self.__run_glslc() - self.dwords = list(dwords(io.BytesIO(spirv))) - self.assembly = str(self.__run_glslc(['-S']), 'utf-8') - - def dump_c_code(self, f): - f.write('\n\n') - prefix = '_anv_glsl_helpers_shader{0}'.format(self.line) - - f.write('/* GLSL Source code:\n') - for line in self.glsl_source().splitlines(): - f.write(' * ' + line + '\n') - - f.write(' *\n') - - f.write(' * SPIR-V Assembly:\n') - f.write(' *\n') - for line in self.assembly.splitlines(): - f.write(' * ' + line + '\n') - f.write(' */\n') - - f.write('static const uint32_t {0}_spirv_code[] = {{'.format(prefix)) - line_start = 0 - while line_start < len(self.dwords): - f.write('\n ') - for i in range(line_start, min(line_start + 6, len(self.dwords))): - f.write(' 0x{:08x},'.format(self.dwords[i])) - line_start += 6 - f.write('\n};\n') - - f.write(dedent("""\ - static const VkShaderModuleCreateInfo {0}_info = {{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .codeSize = sizeof({0}_spirv_code), - .pCode = {0}_spirv_code, - }}; - """.format(prefix))) - -token_exp = re.compile(r'(GLSL_VK_SHADER_MODULE|\(|\)|,)') - -class Parser: - def __init__(self, f): - self.infile = f - self.paren_depth = 0 - self.shader = None - self.line_number = 1 - self.shaders = [] - - def tokenize(f): - leftover = '' - for line in f: - pos = 0 - while True: - m = token_exp.search(line, pos) - if m: - if m.start() > pos: - leftover += line[pos:m.start()] - pos = m.end() - - if leftover: - yield leftover - leftover = '' - - yield m.group(0) - - else: - leftover += line[pos:] - break - - self.line_number += 1 - - if leftover: - yield leftover - - self.token_iter = tokenize(self.infile) - - def handle_shader_src(self): - paren_depth = 1 - for t in self.token_iter: - if t == '(': - paren_depth += 1 - elif t == ')': - paren_depth -= 1 - if paren_depth == 0: - return - - self.current_shader.add_text(t) - - def handle_macro(self, macro): - t = next(self.token_iter) - assert t == '(' - - # Throw away the device parameter - t = next(self.token_iter) - t = next(self.token_iter) - assert t == ',' - - stage = next(self.token_iter).strip() - - t = next(self.token_iter) - assert t == ',' - - self.current_shader = Shader(stage) - self.handle_shader_src() - self.current_shader.finish_text(self.line_number) - - self.shaders.append(self.current_shader) - self.current_shader = None - - def run(self): - for t in self.token_iter: - if t == 'GLSL_VK_SHADER_MODULE': - self.handle_macro(t) - -def open_file(name, mode): - if name == '-': - if mode == 'w': - return sys.stdout - elif mode == 'r': - return sys.stdin - else: - assert False - else: - return open(name, mode) - -def parse_args(): - description = dedent("""\ - This program scrapes a C file for any instance of the - qoShaderCreateInfoGLSL and qoCreateShaderGLSL macaros, grabs the - GLSL source code, compiles it to SPIR-V. The resulting SPIR-V code - is written to another C file as an array of 32-bit words. - - If '-' is passed as the input file or output file, stdin or stdout - will be used instead of a file on disc.""") - - p = argparse.ArgumentParser( - description=description, - formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument('-o', '--outfile', default='-', - help='Output to the given file (default: stdout).') - p.add_argument('--with-glslc', metavar='PATH', - default='glslc', - dest='glslc', - help='Full path to the glslc shader compiler.') - p.add_argument('infile', metavar='INFILE') - - return p.parse_args() - - -args = parse_args() -infname = args.infile -outfname = args.outfile -glslc = args.glslc - -with open_file(infname, 'r') as infile: - parser = Parser(infile) - parser.run() - -for shader in parser.shaders: - shader.compile() - -with open_file(outfname, 'w') as outfile: - outfile.write(dedent("""\ - /* =========================== DO NOT EDIT! =========================== - * This file is autogenerated by glsl_scraper.py. - */ - - #include - - #define _ANV_SPIRV_MODULE_INFO2(_line) _anv_glsl_helpers_shader ## _line ## _info - #define _ANV_SPIRV_MODULE_INFO(_line) _ANV_SPIRV_MODULE_INFO2(_line) - - #define GLSL_VK_SHADER_MODULE(device, stage, ...) ({ \\ - VkShaderModule __module; \\ - ANV_CALL(CreateShaderModule)(anv_device_to_handle(device), \\ - &_ANV_SPIRV_MODULE_INFO(__LINE__), \\ - &__module); \\ - __module; \\ - }) - """)) - - for shader in parser.shaders: - shader.dump_c_code(outfile) -- cgit v1.2.3 From 10f97718c353e101c64fa60fcde91e1550e39957 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Oct 2015 16:24:42 -0700 Subject: anv/allocator: Add a sanity assertion in state stream finish. We assert that the block offset we got while walking the list of blocks is actually a multiple of the block size. If something goes wrong and the GPU decides to stomp on the surface state buffer we can end up getting corruptions in our list of blocks. This assertion makes such corruptions a crash with a meaningful message rather than an infinite loop. --- src/vulkan/anv_allocator.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 05126305b51..db04a2c236c 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -697,6 +697,7 @@ anv_state_stream_finish(struct anv_state_stream *stream) block = stream->current_block; while (block != NULL_BLOCK) { + assert(block % stream->block_pool->block_size == 0); sb = stream->block_pool->map + block; next_block = VG_NOACCESS_READ(&sb->next); VG(VALGRIND_MEMPOOL_FREE(stream, VG_NOACCESS_READ(&sb->_vg_ptr))); -- cgit v1.2.3 From 8cb2e27c62ab958f028eb2887b84a17e95ffe8ec Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Sun, 4 Oct 2015 09:26:25 -0700 Subject: vk/0.170.2: Update VkRenderPassBeginInfo Rename members: attachmentCount -> clearValueCount pAttachmentClearValues -> pClearValues --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/anv_meta.c | 11 +++++++---- src/vulkan/gen7_cmd_buffer.c | 2 +- src/vulkan/gen8_cmd_buffer.c | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 6b8ca6022ee..16ed3f8dd67 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2020,8 +2020,8 @@ typedef struct { VkRenderPass renderPass; VkFramebuffer framebuffer; VkRect2D renderArea; - uint32_t attachmentCount; - const VkClearValue* pAttachmentClearValues; + uint32_t clearValueCount; + const VkClearValue* pClearValues; } VkRenderPassBeginInfo; typedef struct { diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d7efdf69b62..11b8557805c 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -428,6 +428,9 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, if (pass->has_stencil_clear_attachment) anv_finishme("stencil clear"); + /* FINISHME: Rethink how we count clear attachments in light of + * 0.138.2 -> 0.170.2 diff. + */ if (pass->num_color_clear_attachments == 0 && !pass->has_depth_clear_attachment) return; @@ -889,8 +892,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .offset = { dest_offset.x, dest_offset.y }, .extent = { dest_extent.width, dest_extent.height }, }, - .attachmentCount = 1, - .pAttachmentClearValues = NULL, + .clearValueCount = 0, + .pClearValues = NULL, }, VK_RENDER_PASS_CONTENTS_INLINE); VkPipeline pipeline; @@ -1621,8 +1624,8 @@ void anv_CmdClearColorImage( }, .renderPass = pass, .framebuffer = fb, - .attachmentCount = 1, - .pAttachmentClearValues = NULL, + .clearValueCount = 1, + .pClearValues = NULL, }, VK_RENDER_PASS_CONTENTS_INLINE); struct clear_instance_data instance_data = { diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 95d1e28698f..323022b13f9 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -641,7 +641,7 @@ begin_render_pass(struct anv_cmd_buffer *cmd_buffer, .DrawingRectangleOriginX = 0); anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pAttachmentClearValues); + pRenderPassBegin->pClearValues); } void gen7_CmdBeginRenderPass( diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 0e830b4427d..d82a409d886 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -580,7 +580,7 @@ void gen8_CmdBeginRenderPass( .DrawingRectangleOriginX = 0); anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pAttachmentClearValues); + pRenderPassBegin->pClearValues); gen8_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } -- cgit v1.2.3 From dd04be491d286dfc26ac8630ad3c9dbc839815cb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Sun, 4 Oct 2015 09:41:22 -0700 Subject: vk/0.170.2: Update Vk VkPipelineDepthStencilStateCreateInfo Rename member depthBoundsEnable -> depthBoundsTestEnable. --- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_meta.c | 2 +- src/vulkan/gen8_pipeline.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 16ed3f8dd67..1f35b01ae9b 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1652,7 +1652,7 @@ typedef struct { VkBool32 depthTestEnable; VkBool32 depthWriteEnable; VkCompareOp depthCompareOp; - VkBool32 depthBoundsEnable; + VkBool32 depthBoundsTestEnable; VkBool32 stencilTestEnable; VkStencilOpState front; VkStencilOpState back; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 11b8557805c..e43d11b06ee 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -254,7 +254,7 @@ anv_device_init_meta_clear_state(struct anv_device *device) .depthTestEnable = true, .depthWriteEnable = true, .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .depthBoundsEnable = false, + .depthBoundsTestEnable = false, .stencilTestEnable = true, .front = (VkStencilOpState) { .stencilPassOp = VK_STENCIL_OP_REPLACE, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index fae09f3fbbe..4900715a47a 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -269,7 +269,7 @@ emit_ds_state(struct anv_pipeline *pipeline, return; } - /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ + /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { .DepthTestEnable = info->depthTestEnable, -- cgit v1.2.3 From 7a089bd1a60a38f4048cedc49f141b8917536ae3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 06:48:14 -0700 Subject: vk/0.170.2: Update VkImageSubresourceRange Replace 'aspect' with 'aspectMask'. --- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_image.c | 58 +++++++++++++++++++++++++++++------------------- src/vulkan/anv_meta.c | 10 ++++----- src/vulkan/anv_private.h | 3 ++- src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 2 +- 6 files changed, 45 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 923a3b69ea3..fc6a1c638c6 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1505,7 +1505,7 @@ typedef struct { } VkChannelMapping; typedef struct { - VkImageAspect aspect; + VkImageAspectFlags aspectMask; uint32_t baseMipLevel; uint32_t mipLevels; uint32_t baseArraySlice; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index dce4208ed5f..2045aa27219 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -415,8 +415,7 @@ anv_validate_CreateImageView(VkDevice _device, assert(pCreateInfo->channels.a <= VK_CHANNEL_SWIZZLE_END_RANGE); /* Validate subresource. */ - assert(subresource->aspect >= VK_IMAGE_ASPECT_BEGIN_RANGE); - assert(subresource->aspect <= VK_IMAGE_ASPECT_END_RANGE); + assert(subresource->aspectMask != 0); assert(subresource->mipLevels > 0); assert(subresource->arraySize > 0); assert(subresource->baseMipLevel < image->levels); @@ -430,28 +429,33 @@ anv_validate_CreateImageView(VkDevice _device, assert(subresource->arraySize % 6 == 0); } + const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT + | VK_IMAGE_ASPECT_STENCIL_BIT; + /* Validate format. */ - switch (subresource->aspect) { - case VK_IMAGE_ASPECT_COLOR: + if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(!image->format->depth_format); assert(!image->format->has_stencil); assert(!view_format_info->depth_format); assert(!view_format_info->has_stencil); assert(view_format_info->cpp == image->format->cpp); - break; - case VK_IMAGE_ASPECT_DEPTH: - assert(image->format->depth_format); - assert(view_format_info->depth_format); - assert(view_format_info->cpp == image->format->cpp); - break; - case VK_IMAGE_ASPECT_STENCIL: - /* FINISHME: Is it legal to have an R8 view of S8? */ - assert(image->format->has_stencil); - assert(view_format_info->has_stencil); - break; - default: - assert(!"bad VkImageAspect"); - break; + } else if (subresource->aspectMask & ds_flags) { + assert((subresource->aspectMask & ~ds_flags) == 0); + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(image->format->depth_format); + assert(view_format_info->depth_format); + assert(view_format_info->cpp == image->format->cpp); + } + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL) { + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image->format->has_stencil); + assert(view_format_info->has_stencil); + } + } else { + assert(!"bad VkImageSubresourceRange::aspectFlags"); } return anv_CreateImageView(_device, pCreateInfo, pView); @@ -528,19 +532,27 @@ anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, } struct anv_surface * -anv_image_get_surface_for_aspect(struct anv_image *image, VkImageAspect aspect) +anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) { - switch (aspect) { - case VK_IMAGE_ASPECT_COLOR: + switch (aspect_mask) { + case VK_IMAGE_ASPECT_COLOR_BIT: assert(anv_format_is_color(image->format)); return &image->color_surface; - case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_DEPTH_BIT: assert(image->format->depth_format); return &image->depth_surface; - case VK_IMAGE_ASPECT_STENCIL: + case VK_IMAGE_ASPECT_STENCIL_BIT: assert(image->format->has_stencil); anv_finishme("stencil image views"); return &image->stencil_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + /* FINISHME: Support combined depthstencil aspect. Does the Vulkan spec + * allow is to reject it? Until we support it, filter out the stencil + * aspect and use only the depth aspect. + */ + anv_finishme("combined depthstencil aspect"); + assert(image->format->depth_format); + return &image->depth_surface; default: unreachable("image does not have aspect"); return NULL; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index e43d11b06ee..dd8f3a958a6 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1013,7 +1013,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, VK_CHANNEL_SWIZZLE_A }, .subresourceRange = { - .aspect = VK_IMAGE_ASPECT_COLOR, + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .mipLevels = 1, .baseArraySlice = 0, @@ -1161,7 +1161,7 @@ void anv_CmdCopyImage( VK_CHANNEL_SWIZZLE_A }, .subresourceRange = { - .aspect = pRegions[r].srcSubresource.aspect, + .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, .baseArraySlice = pRegions[r].srcSubresource.arraySlice, @@ -1247,7 +1247,7 @@ void anv_CmdBlitImage( VK_CHANNEL_SWIZZLE_A }, .subresourceRange = { - .aspect = pRegions[r].srcSubresource.aspect, + .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, .baseArraySlice = pRegions[r].srcSubresource.arraySlice, @@ -1377,7 +1377,7 @@ void anv_CmdCopyBufferToImage( VK_CHANNEL_SWIZZLE_A }, .subresourceRange = { - .aspect = proxy_aspect, + .aspectMask = 1 << proxy_aspect, .baseMipLevel = 0, .mipLevels = 1, .baseArraySlice = 0, @@ -1464,7 +1464,7 @@ void anv_CmdCopyImageToBuffer( VK_CHANNEL_SWIZZLE_A }, .subresourceRange = { - .aspect = pRegions[r].imageSubresource.aspect, + .aspectMask = 1 << pRegions[r].imageSubresource.aspect, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .mipLevels = 1, .baseArraySlice = pRegions[r].imageSubresource.arraySlice, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f0d288342d6..a2211bb76b3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1273,7 +1273,8 @@ VkResult anv_image_create(VkDevice _device, VkImage *pImage); struct anv_surface * -anv_image_get_surface_for_aspect(struct anv_image *image, VkImageAspect aspect); +anv_image_get_surface_for_aspect_mask(struct anv_image *image, + VkImageAspectFlags aspect_mask); struct anv_surface * anv_image_get_surface_for_color_attachment(struct anv_image *image); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index faf99a5aef8..f93349da0b0 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -274,7 +274,7 @@ gen7_image_view_init(struct anv_image_view *iview, const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; struct anv_surface_view *view = &iview->view; struct anv_surface *surface = - anv_image_get_surface_for_aspect(image, range->aspect); + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 5646637e4a0..3709a50005d 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -150,7 +150,7 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; struct anv_surface_view *view = &iview->view; struct anv_surface *surface = - anv_image_get_surface_for_aspect(image, range->aspect); + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ -- cgit v1.2.3 From 9c93aa9141505601d862a7c85fea0679e7c63e86 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 08:52:42 -0700 Subject: vk: Better types for VkShaderStage, VkShaderStageFlags vars In most places, the variable type was the uninformative uint32_t. --- src/vulkan/anv_cmd_buffer.c | 15 ++++++++------- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_private.h | 8 ++++---- src/vulkan/gen8_cmd_buffer.c | 5 ++--- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index c74d6d0eb17..419405204d5 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -333,7 +333,7 @@ void anv_CmdBindDescriptorSets( } if (set_layout->num_dynamic_buffers > 0) { - uint32_t s; + VkShaderStage s; for_each_bit(s, set_layout->shader_stages) { anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic_offsets); @@ -389,7 +389,7 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state) + VkShaderStage stage, struct anv_state *bt_state) { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -473,7 +473,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state) + VkShaderStage stage, struct anv_state *state) { struct anv_pipeline_layout *layout; uint32_t sampler_count; @@ -517,7 +517,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, } static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) { struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; @@ -567,8 +567,9 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) void anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { - uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; + VkShaderStage s; + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; VkResult result = VK_SUCCESS; for_each_bit(s, dirty) { @@ -715,7 +716,7 @@ void anv_CmdPushConstants( const void* values) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t stage; + VkShaderStage stage; for_each_bit(stage, stageFlags) { anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index b6f083b4c83..dd9b08ce075 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1462,7 +1462,7 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; uint32_t num_dynamic_buffers = 0; uint32_t count = 0; - uint32_t stages = 0; + VkShaderStageFlags stages = 0; uint32_t s; for (uint32_t i = 0; i < pCreateInfo->count; i++) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a2211bb76b3..29a04cab154 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -724,7 +724,7 @@ struct anv_descriptor_set_layout { uint32_t count; uint32_t num_dynamic_buffers; - uint32_t shader_stages; + VkShaderStageFlags shader_stages; struct anv_descriptor_slot entries[0]; }; @@ -828,8 +828,8 @@ struct anv_cmd_state { uint32_t vb_dirty; uint32_t dirty; uint32_t compute_dirty; - uint32_t descriptors_dirty; - uint32_t push_constants_dirty; + VkShaderStageFlags descriptors_dirty; + VkShaderStageFlags push_constants_dirty; uint32_t scratch_size; struct anv_pipeline * pipeline; struct anv_pipeline * compute_pipeline; @@ -1034,7 +1034,7 @@ struct anv_pipeline { uint32_t nr_gs_entries; } urb; - uint32_t active_stages; + VkShaderStageFlags active_stages; struct anv_state_stream program_stream; struct anv_state blend_state; uint32_t vs_simd8; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index d82a409d886..a294e3ad50c 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -32,8 +32,6 @@ static void gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { - uint32_t stage; - static const uint32_t push_constant_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 21, [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */ @@ -43,7 +41,8 @@ gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) [VK_SHADER_STAGE_COMPUTE] = 0, }; - uint32_t flushed = 0; + VkShaderStage stage; + VkShaderStageFlags flushed = 0; for_each_bit(stage, cmd_buffer->state.push_constants_dirty) { struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); -- cgit v1.2.3 From 35302240632aadd95ef97169a6013f088e9e1f17 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 08:58:35 -0700 Subject: vk: Annotate anv_cmd_state::gen7::index_type It's the value of 3DSTATE_INDEX_BUFFER.IndexFormat. --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 29a04cab154..15e5124d857 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -847,7 +847,7 @@ struct anv_cmd_state { struct { struct anv_buffer * index_buffer; - uint32_t index_type; + uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ uint32_t index_offset; } gen7; }; -- cgit v1.2.3 From 4ffb4549e0aa139c908afd3c1b05e9a3ca6acf40 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 13:17:39 -0700 Subject: vk/image: Document a Vulkan spec requirement for depthstencil The Vulkan spec (git a511ba2) requires support for some combined depth stencil formats. --- src/vulkan/anv_image.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2045aa27219..43245297f0e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -546,9 +546,11 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag anv_finishme("stencil image views"); return &image->stencil_surface; case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - /* FINISHME: Support combined depthstencil aspect. Does the Vulkan spec - * allow is to reject it? Until we support it, filter out the stencil - * aspect and use only the depth aspect. + /* FINISHME: The Vulkan spec (git a511ba2) requires support for combined + * depth stencil formats. Specifically, it states: + * + * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or + * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. */ anv_finishme("combined depthstencil aspect"); assert(image->format->depth_format); -- cgit v1.2.3 From 8bf021cf3dfdb50b709b7bba65dec8fd9495c74f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 13:22:44 -0700 Subject: vk: Return anv_image_view_info by value The struct is only 2 bytes. Returning it on the stack is better than returning a reference into the ELF .data segment. --- src/vulkan/anv_image.c | 4 ++-- src/vulkan/anv_private.h | 2 +- src/vulkan/gen7_state.c | 4 ++-- src/vulkan/gen8_state.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 43245297f0e..248df3d9b42 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -61,10 +61,10 @@ anv_image_view_info_table[] = { #undef INFO }; -const struct anv_image_view_info * +struct anv_image_view_info anv_image_view_info_for_vk_image_view_type(VkImageViewType type) { - return &anv_image_view_info_table[type]; + return anv_image_view_info_table[type]; } static const struct anv_surf_type_limits { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 15e5124d857..bc747fcd27c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1154,7 +1154,7 @@ struct anv_image_view_info { bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ }; -const struct anv_image_view_info * +struct anv_image_view_info anv_image_view_info_for_vk_image_view_type(VkImageViewType type); /** diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index f93349da0b0..7445bf02042 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -279,7 +279,7 @@ gen7_image_view_init(struct anv_image_view *iview, const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - const struct anv_image_view_info *view_type_info = + const struct anv_image_view_info view_type_info = anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) @@ -303,7 +303,7 @@ gen7_image_view_init(struct anv_image_view *iview, } struct GEN7_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = view_type_info->surface_type, + .SurfaceType = view_type_info.surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 3709a50005d..0ef44d03c44 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -158,7 +158,7 @@ gen8_image_view_init(struct anv_image_view *iview, const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - const struct anv_image_view_info *view_type_info = + const struct anv_image_view_info view_type_info = anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); view->bo = image->bo; @@ -220,7 +220,7 @@ gen8_image_view_init(struct anv_image_view *iview, }; struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = view_type_info->surface_type, + .SurfaceType = view_type_info.surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], -- cgit v1.2.3 From 2fc8122f66331e76ac436ea77ad5b57a011b188a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 17:02:42 -0700 Subject: vk: Drop dependency on no longer extant header anv_meta no longer uses GLSL shaders, and the build system no longer converts them to SPIR-V. So remove anv_meta_spirv_autogen.h from Makefile.am. --- src/vulkan/Makefile.am | 3 +-- src/vulkan/anv_meta.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 765d2cef03d..eebe54d9475 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -85,8 +85,7 @@ VULKAN_SOURCES = \ BUILT_SOURCES = \ anv_entrypoints.h \ - anv_entrypoints.c \ - anv_meta_spirv_autogen.h + anv_entrypoints.c if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d7efdf69b62..91b5024f1a4 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -28,7 +28,6 @@ #include #include "anv_private.h" -#include "anv_meta_spirv_autogen.h" #include "anv_nir_builder.h" static nir_shader * -- cgit v1.2.3 From 63439953d7f51700e1834e3003e209b387772b5e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 17:02:42 -0700 Subject: vk: Drop dependency on no longer extant header anv_meta no longer uses GLSL shaders, and the build system no longer converts them to SPIR-V. So remove anv_meta_spirv_autogen.h from Makefile.am. (cherry picked from commit 2fc8122f66331e76ac436ea77ad5b57a011b188a) --- src/vulkan/Makefile.am | 3 +-- src/vulkan/anv_meta.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 765d2cef03d..eebe54d9475 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -85,8 +85,7 @@ VULKAN_SOURCES = \ BUILT_SOURCES = \ anv_entrypoints.h \ - anv_entrypoints.c \ - anv_meta_spirv_autogen.h + anv_entrypoints.c if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index dd8f3a958a6..e1bec386fe4 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -28,7 +28,6 @@ #include #include "anv_private.h" -#include "anv_meta_spirv_autogen.h" #include "anv_nir_builder.h" static nir_shader * -- cgit v1.2.3 From ffd051830d837705f4da6d16e59953b02066c91e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 14:43:23 -0700 Subject: vk: Unionize anv_desciptor For a given struct anv_descriptor, all members are NULL (in which case the descriptor is empty) or exactly one member is non-NULL. To make struct anv_descriptor better reflect its set of valid states, convert the struct into a tagged union. --- src/vulkan/anv_cmd_buffer.c | 16 ++++++++++------ src/vulkan/anv_device.c | 21 +++++++++++++++++---- src/vulkan/anv_private.h | 14 ++++++++++++-- 3 files changed, 39 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 419405204d5..5a00ce24bd2 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -456,12 +456,14 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t start = bias + layout->set[set].stage[stage].surface_start; for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { - struct anv_surface_view *view = - d->set->descriptors[surface_slots[b].index].view; + struct anv_descriptor *desc = + &d->set->descriptors[surface_slots[b].index]; - if (!view) + if (desc->type != ANV_DESCRIPTOR_TYPE_SURFACE_VIEW) continue; + struct anv_surface_view *view = desc->surface_view; + bt_map[start + b] = view->surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, view->surface_state, view->bo, view->offset); @@ -502,12 +504,14 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, uint32_t start = layout->set[set].stage[stage].sampler_start; for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { - struct anv_sampler *sampler = - d->set->descriptors[sampler_slots[b].index].sampler; + struct anv_descriptor *desc = + &d->set->descriptors[sampler_slots[b].index]; - if (!sampler) + if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER) continue; + struct anv_sampler *sampler = desc->sampler; + memcpy(state->map + (start + b) * 16, sampler->state, sizeof(sampler->state)); } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index dd9b08ce075..7889c61ba5b 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1733,8 +1733,13 @@ VkResult anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for (uint32_t j = 0; j < write->count; j++) { - set->descriptors[write->destBinding + j].sampler = - anv_sampler_from_handle(write->pDescriptors[j].sampler); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pDescriptors[j].sampler); + + set->descriptors[write->destBinding + j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_SAMPLER, + .sampler = sampler, + }; } if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) @@ -1747,7 +1752,11 @@ VkResult anv_UpdateDescriptorSets( for (uint32_t j = 0; j < write->count; j++) { ANV_FROM_HANDLE(anv_image_view, iview, write->pDescriptors[j].imageView); - set->descriptors[write->destBinding + j].view = &iview->view; + + set->descriptors[write->destBinding + j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, + .surface_view = &iview->view, + }; } break; @@ -1767,7 +1776,11 @@ VkResult anv_UpdateDescriptorSets( for (uint32_t j = 0; j < write->count; j++) { ANV_FROM_HANDLE(anv_buffer_view, bview, write->pDescriptors[j].bufferView); - set->descriptors[write->destBinding + j].view = &bview->view; + + set->descriptors[write->destBinding + j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, + .surface_view = &bview->view, + }; } default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index bc747fcd27c..6ce5db209cb 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -728,9 +728,19 @@ struct anv_descriptor_set_layout { struct anv_descriptor_slot entries[0]; }; +enum anv_descriptor_type { + ANV_DESCRIPTOR_TYPE_EMPTY = 0, + ANV_DESCRIPTOR_TYPE_SAMPLER, + ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, +}; + struct anv_descriptor { - struct anv_sampler *sampler; - struct anv_surface_view *view; + union { + struct anv_sampler *sampler; + struct anv_surface_view *surface_view; + }; + + enum anv_descriptor_type type; }; struct anv_descriptor_set { -- cgit v1.2.3 From 74193a880f475da40e8c03ff7e772f3a288317a8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 15:49:10 -0700 Subject: vk: Use consistent names for anv_*_view variables Rename all anv_*_view variables to follow this convention: - sview -> anv_surface_view - bview -> anv_buffer_view - iview -> anv_image_view - aview -> anv_attachment_view - cview -> anv_color_attachment_view - ds_view -> anv_depth_stencil_attachment_view This clarifies existing code. And it will reduce noise in the upcoming commits that merge VkAttachmentView into VkImageView. --- src/vulkan/anv_cmd_buffer.c | 25 +++++---- src/vulkan/anv_device.c | 24 ++++----- src/vulkan/anv_image.c | 57 ++++++++++---------- src/vulkan/anv_meta.c | 125 ++++++++++++++++++++++--------------------- src/vulkan/anv_private.h | 20 +++---- src/vulkan/gen7_cmd_buffer.c | 12 ++--- src/vulkan/gen7_state.c | 54 ++++++++++--------- src/vulkan/gen8_cmd_buffer.c | 12 ++--- src/vulkan/gen8_state.c | 54 ++++++++++--------- 9 files changed, 200 insertions(+), 183 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5a00ce24bd2..50f8304f9b3 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -432,16 +432,19 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, * put the color attachments into the binding table. */ for (uint32_t a = 0; a < attachments; a++) { - const struct anv_attachment_view *attachment = + const struct anv_attachment_view *aview = fb->attachments[subpass->color_attachments[a]]; - assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - const struct anv_color_attachment_view *view = - (const struct anv_color_attachment_view *)attachment; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - bt_map[a] = view->view.surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, view->view.surface_state, - view->view.bo, view->view.offset); + const struct anv_color_attachment_view *cview = + (const struct anv_color_attachment_view *) aview; + + const struct anv_surface_view *sview = &cview->surface_view; + + bt_map[a] = sview->surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, sview->surface_state, + sview->bo, sview->offset); } if (layout == NULL) @@ -462,11 +465,11 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (desc->type != ANV_DESCRIPTOR_TYPE_SURFACE_VIEW) continue; - struct anv_surface_view *view = desc->surface_view; + const struct anv_surface_view *sview = desc->surface_view; - bt_map[start + b] = view->surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, view->surface_state, - view->bo, view->offset); + bt_map[start + b] = sview->surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, sview->surface_state, + sview->bo, sview->offset); } } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7889c61ba5b..b2c8027fcf9 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1396,19 +1396,19 @@ VkResult anv_buffer_view_create( struct anv_device * device, const VkBufferViewCreateInfo* pCreateInfo, - struct anv_buffer_view ** view_out) + struct anv_buffer_view ** bview_out) { ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *view; + struct anv_buffer_view *bview; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) + bview = anv_device_alloc(device, sizeof(*bview), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (bview == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->view = (struct anv_surface_view) { + bview->surface_view = (struct anv_surface_view) { .bo = buffer->bo, .offset = buffer->offset + pCreateInfo->offset, .surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64), @@ -1416,7 +1416,7 @@ anv_buffer_view_create( .range = pCreateInfo->range, }; - *view_out = view; + *bview_out = bview; return VK_SUCCESS; } @@ -1428,7 +1428,7 @@ VkResult anv_DestroyBufferView( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); - anv_surface_view_fini(device, &bview->view); + anv_surface_view_fini(device, &bview->surface_view); anv_device_free(device, bview); return VK_SUCCESS; @@ -1755,7 +1755,7 @@ VkResult anv_UpdateDescriptorSets( set->descriptors[write->destBinding + j] = (struct anv_descriptor) { .type = ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, - .surface_view = &iview->view, + .surface_view = &iview->surface_view, }; } break; @@ -1779,7 +1779,7 @@ VkResult anv_UpdateDescriptorSets( set->descriptors[write->destBinding + j] = (struct anv_descriptor) { .type = ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, - .surface_view = &bview->view, + .surface_view = &bview->surface_view, }; } @@ -2001,10 +2001,10 @@ VkResult anv_CreateFramebuffer( framebuffer->attachment_count = pCreateInfo->attachmentCount; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - ANV_FROM_HANDLE(anv_attachment_view, view, + ANV_FROM_HANDLE(anv_attachment_view, aview, pCreateInfo->pAttachments[i].view); - framebuffer->attachments[i] = view; + framebuffer->attachments[i] = aview; } framebuffer->width = pCreateInfo->width; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 248df3d9b42..19004320298 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -374,9 +374,9 @@ VkResult anv_GetImageSubresourceLayout( void anv_surface_view_fini(struct anv_device *device, - struct anv_surface_view *view) + struct anv_surface_view *sview) { - anv_state_pool_free(&device->surface_state_pool, view->surface_state); + anv_state_pool_free(&device->surface_state_pool, sview->surface_state); } VkResult @@ -505,30 +505,31 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview) ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); - anv_surface_view_fini(device, &iview->view); + anv_surface_view_fini(device, &iview->surface_view); anv_device_free(device, iview); return VK_SUCCESS; } static void -anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, +anv_depth_stencil_view_init(struct anv_depth_stencil_view *ds_view, const VkAttachmentViewCreateInfo *pCreateInfo) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; + ds_view->attachment_view.attachment_type = + ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->mipLevel == 0); anv_assert(pCreateInfo->baseArraySlice == 0); anv_assert(pCreateInfo->arraySize == 1); - view->image = image; - view->format = anv_format_for_vk_format(pCreateInfo->format); + ds_view->image = image; + ds_view->format = anv_format_for_vk_format(pCreateInfo->format); assert(anv_format_is_depth_or_stencil(image->format)); - assert(anv_format_is_depth_or_stencil(view->format)); + assert(anv_format_is_depth_or_stencil(ds_view->format)); } struct anv_surface * @@ -578,17 +579,17 @@ anv_image_get_surface_for_color_attachment(struct anv_image *image) } void -anv_color_attachment_view_init(struct anv_color_attachment_view *aview, +anv_color_attachment_view_init(struct anv_color_attachment_view *cview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { case 7: - gen7_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + gen7_color_attachment_view_init(cview, device, pCreateInfo, cmd_buffer); break; case 8: - gen8_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + gen8_color_attachment_view_init(cview, device, pCreateInfo, cmd_buffer); break; default: unreachable("unsupported gen\n"); @@ -608,44 +609,44 @@ anv_CreateAttachmentView(VkDevice _device, anv_format_for_vk_format(pCreateInfo->format); if (anv_format_is_depth_or_stencil(format)) { - struct anv_depth_stencil_view *view = - anv_device_alloc(device, sizeof(*view), 8, + struct anv_depth_stencil_view *ds_view = + anv_device_alloc(device, sizeof(*ds_view), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) + if (ds_view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_depth_stencil_view_init(view, pCreateInfo); + anv_depth_stencil_view_init(ds_view, pCreateInfo); - *pView = anv_attachment_view_to_handle(&view->base); + *pView = anv_attachment_view_to_handle(&ds_view->attachment_view); } else { - struct anv_color_attachment_view *view = - anv_device_alloc(device, sizeof(*view), 8, + struct anv_color_attachment_view *cview = + anv_device_alloc(device, sizeof(*cview), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) + if (cview == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_color_attachment_view_init(view, device, pCreateInfo, NULL); + anv_color_attachment_view_init(cview, device, pCreateInfo, NULL); - *pView = anv_attachment_view_to_handle(&view->base); + *pView = anv_attachment_view_to_handle(&cview->attachment_view); } return VK_SUCCESS; } VkResult -anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _view) +anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_attachment_view, view, _view); + ANV_FROM_HANDLE(anv_attachment_view, aview, _aview); - if (view->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { - struct anv_color_attachment_view *aview = - (struct anv_color_attachment_view *)view; + if (aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { + struct anv_color_attachment_view *cview = + (struct anv_color_attachment_view *) aview; - anv_surface_view_fini(device, &aview->view); + anv_surface_view_fini(device, &cview->surface_view); } - anv_device_free(device, view); + anv_device_free(device, aview); return VK_SUCCESS; } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index e1bec386fe4..fe235e23fb0 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -729,15 +729,17 @@ struct blit_region { static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, - struct anv_image_view *src_view, + struct anv_image_view *src_iview, VkOffset3D src_offset, VkExtent3D src_extent, struct anv_image *dest_image, - struct anv_color_attachment_view *dest_view, + struct anv_color_attachment_view *dest_cview, VkOffset3D dest_offset, VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; + struct anv_attachment_view *dest_aview = &dest_cview->attachment_view; + struct anv_surface_view *dest_sview = &dest_cview->surface_view; VkDescriptorPool dummy_desc_pool = { .handle = 1 }; struct blit_vb_data { @@ -758,9 +760,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src_view->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_view->extent.height, - (float)(src_offset.z + src_extent.depth) / (float)src_view->extent.depth, + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.z + src_extent.depth) / (float)src_iview->extent.depth, }, }; @@ -770,9 +772,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset.x / (float)src_view->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_view->extent.height, - (float)(src_offset.z + src_extent.depth) / (float)src_view->extent.depth, + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.z + src_extent.depth) / (float)src_iview->extent.depth, }, }; @@ -782,9 +784,9 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y, }, .tex_coord = { - (float)src_offset.x / (float)src_view->extent.width, - (float)src_offset.y / (float)src_view->extent.height, - (float)src_offset.z / (float)src_view->extent.depth, + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, }, }; @@ -822,7 +824,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pDescriptors = (VkDescriptorInfo[]) { { - .imageView = anv_image_view_to_handle(src_view), + .imageView = anv_image_view_to_handle(src_iview), .imageLayout = VK_IMAGE_LAYOUT_GENERAL }, } @@ -836,12 +838,12 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = (VkAttachmentBindInfo[]) { { - .view = anv_attachment_view_to_handle(&dest_view->base), + .view = anv_attachment_view_to_handle(dest_aview), .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = dest_view->base.extent.width, - .height = dest_view->base.extent.height, + .width = dest_aview->extent.width, + .height = dest_aview->extent.height, .layers = 1 }, &fb); @@ -852,7 +854,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = dest_view->view.format->vk_format, + .format = dest_sview->format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -998,8 +1000,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(dest_image)->bo = dest; anv_image_from_handle(dest_image)->offset = dest_offset; - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = src_image, @@ -1021,8 +1023,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer); - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + struct anv_color_attachment_view dest_cview; + anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = dest_image, @@ -1035,11 +1037,11 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), - &src_view, + &src_iview, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, anv_image_from_handle(dest_image), - &dest_view, + &dest_cview, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); @@ -1138,7 +1140,7 @@ void anv_CmdCopyImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - const VkImageViewType src_view_type = + const VkImageViewType src_iview_type = meta_blit_get_src_image_view_type(src_image); struct anv_saved_state saved_state; @@ -1146,12 +1148,12 @@ void anv_CmdCopyImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = src_view_type, + .viewType = src_iview_type, .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, @@ -1183,8 +1185,8 @@ void anv_CmdCopyImage( if (pRegions[r].extent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + struct anv_color_attachment_view dest_cview; + anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1196,10 +1198,10 @@ void anv_CmdCopyImage( cmd_buffer); meta_emit_blit(cmd_buffer, - src_image, &src_view, + src_image, &src_iview, pRegions[r].srcOffset, pRegions[r].extent, - dest_image, &dest_view, + dest_image, &dest_cview, dest_offset, pRegions[r].extent); } @@ -1222,7 +1224,7 @@ void anv_CmdBlitImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - const VkImageViewType src_view_type = + const VkImageViewType src_iview_type = meta_blit_get_src_image_view_type(src_image); struct anv_saved_state saved_state; @@ -1232,12 +1234,12 @@ void anv_CmdBlitImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = src_view_type, + .viewType = src_iview_type, .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, @@ -1269,8 +1271,8 @@ void anv_CmdBlitImage( if (pRegions[r].destExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + struct anv_color_attachment_view dest_cview; + anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1282,10 +1284,10 @@ void anv_CmdBlitImage( cmd_buffer); meta_emit_blit(cmd_buffer, - src_image, &src_view, + src_image, &src_iview, pRegions[r].srcOffset, pRegions[r].srcExtent, - dest_image, &dest_view, + dest_image, &dest_cview, dest_offset, pRegions[r].destExtent); } @@ -1362,8 +1364,8 @@ void anv_CmdCopyBufferToImage( proxy_format, &pRegions[r]); - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, @@ -1399,8 +1401,8 @@ void anv_CmdCopyBufferToImage( if (pRegions[r].imageExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + struct anv_color_attachment_view dest_cview; + anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), @@ -1413,11 +1415,11 @@ void anv_CmdCopyBufferToImage( meta_emit_blit(cmd_buffer, anv_image_from_handle(srcImage), - &src_view, + &src_iview, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, dest_image, - &dest_view, + &dest_cview, dest_offset, pRegions[r].imageExtent); @@ -1440,7 +1442,7 @@ void anv_CmdCopyImageToBuffer( VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_saved_state saved_state; - const VkImageViewType src_view_type = + const VkImageViewType src_iview_type = meta_blit_get_src_image_view_type(src_image); meta_prepare_blit(cmd_buffer, &saved_state); @@ -1449,12 +1451,12 @@ void anv_CmdCopyImageToBuffer( if (pRegions[r].imageExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_image_view src_view; - anv_image_view_init(&src_view, cmd_buffer->device, + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = src_view_type, + .viewType = src_iview_type, .format = src_image->format->vk_format, .channels = { VK_CHANNEL_SWIZZLE_R, @@ -1481,8 +1483,8 @@ void anv_CmdCopyImageToBuffer( dest_format, &pRegions[r]); - struct anv_color_attachment_view dest_view; - anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + struct anv_color_attachment_view dest_cview; + anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1495,11 +1497,11 @@ void anv_CmdCopyImageToBuffer( meta_emit_blit(cmd_buffer, anv_image_from_handle(srcImage), - &src_view, + &src_iview, pRegions[r].imageOffset, pRegions[r].imageExtent, anv_image_from_handle(destImage), - &dest_view, + &dest_cview, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); @@ -1546,8 +1548,8 @@ void anv_CmdClearColorImage( for (uint32_t r = 0; r < rangeCount; r++) { for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { - struct anv_color_attachment_view view; - anv_color_attachment_view_init(&view, cmd_buffer->device, + struct anv_color_attachment_view cview; + anv_color_attachment_view_init(&cview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = _image, @@ -1558,6 +1560,9 @@ void anv_CmdClearColorImage( }, cmd_buffer); + struct anv_attachment_view *aview = &cview.attachment_view; + struct anv_surface_view *sview = &cview.surface_view; + VkFramebuffer fb; anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), &(VkFramebufferCreateInfo) { @@ -1565,12 +1570,12 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = (VkAttachmentBindInfo[]) { { - .view = anv_attachment_view_to_handle(&view.base), + .view = anv_attachment_view_to_handle(aview), .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = view.base.extent.width, - .height = view.base.extent.height, + .width = aview->extent.width, + .height = aview->extent.height, .layers = 1 }, &fb); @@ -1581,7 +1586,7 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = view.view.format->vk_format, + .format = sview->format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -1617,8 +1622,8 @@ void anv_CmdClearColorImage( .renderArea = { .offset = { 0, 0, }, .extent = { - .width = view.base.extent.width, - .height = view.base.extent.height, + .width = aview->extent.width, + .height = aview->extent.height, }, }, .renderPass = pass, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6ce5db209cb..09890730fd6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1242,11 +1242,11 @@ struct anv_surface_view { }; struct anv_buffer_view { - struct anv_surface_view view; + struct anv_surface_view surface_view; }; struct anv_image_view { - struct anv_surface_view view; + struct anv_surface_view surface_view; VkExtent3D extent; }; @@ -1261,12 +1261,12 @@ struct anv_attachment_view { }; struct anv_color_attachment_view { - struct anv_attachment_view base; - struct anv_surface_view view; + struct anv_attachment_view attachment_view; + struct anv_surface_view surface_view; }; struct anv_depth_stencil_view { - struct anv_attachment_view base; + struct anv_attachment_view attachment_view; const struct anv_image *image; /**< VkAttachmentViewCreateInfo::image */ const struct anv_format *format; /**< VkAttachmentViewCreateInfo::format */ }; @@ -1306,24 +1306,24 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_color_attachment_view_init(struct anv_color_attachment_view *view, +void anv_color_attachment_view_init(struct anv_color_attachment_view *cview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, +void gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, +void gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); VkResult anv_buffer_view_create(struct anv_device *device, const VkBufferViewCreateInfo *pCreateInfo, - struct anv_buffer_view **view_out); + struct anv_buffer_view **bview_out); void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, @@ -1335,7 +1335,7 @@ void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format uint32_t offset, uint32_t range); void anv_surface_view_fini(struct anv_device *device, - struct anv_surface_view *view); + struct anv_surface_view *sview); struct anv_sampler { uint32_t state[4]; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 323022b13f9..b264013d62e 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -529,20 +529,20 @@ static void gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view = + const struct anv_depth_stencil_view *ds_view = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = view ? view->image : NULL; - const bool has_depth = view && view->format->depth_format; - const bool has_stencil = view && view->format->has_stencil; + const struct anv_image *image = ds_view ? ds_view->image : NULL; + const bool has_depth = ds_view && ds_view->format->depth_format; + const bool has_stencil = ds_view && ds_view->format->has_stencil; /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->format->depth_format, + .DepthWriteEnable = ds_view->format->depth_format, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->format->depth_format, + .SurfaceFormat = ds_view->format->depth_format, .SurfacePitch = image->depth_surface.stride - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 7445bf02042..a782690718b 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -65,20 +65,21 @@ VkResult gen7_CreateBufferView( VkBufferView* pView) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer_view *view; + struct anv_buffer_view *bview; VkResult result; - result = anv_buffer_view_create(device, pCreateInfo, &view); + result = anv_buffer_view_create(device, pCreateInfo, &bview); if (result != VK_SUCCESS) return result; const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - gen7_fill_buffer_surface_state(view->view.surface_state.map, format, - view->view.offset, pCreateInfo->range); + gen7_fill_buffer_surface_state(bview->surface_view.surface_state.map, + format, bview->surface_view.offset, + pCreateInfo->range); - *pView = anv_buffer_view_to_handle(view); + *pView = anv_buffer_view_to_handle(bview); return VK_SUCCESS; } @@ -272,7 +273,7 @@ gen7_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface_view *view = &iview->view; + struct anv_surface_view *sview = &iview->surface_view; struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -285,9 +286,9 @@ gen7_image_view_init(struct anv_image_view *iview, if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = anv_format_for_vk_format(pCreateInfo->format); + sview->bo = image->bo; + sview->offset = image->offset + surface->offset; + sview->format = anv_format_for_vk_format(pCreateInfo->format); iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), @@ -345,42 +346,44 @@ gen7_image_view_init(struct anv_image_view *iview, .BlueClearColor = 0, .AlphaClearColor = 0, .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, + .SurfaceBaseAddress = { NULL, sview->offset }, }; if (cmd_buffer) { - view->surface_state = + sview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - view->surface_state = + sview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } - GEN7_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + GEN7_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + &surface_state); } void -gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, +gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface_view *view = &aview->view; + struct anv_attachment_view *aview = &cview->attachment_view; + struct anv_surface_view *sview = &cview->surface_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); - aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + aview->attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = anv_format_for_vk_format(pCreateInfo->format); + sview->bo = image->bo; + sview->offset = image->offset + surface->offset; + sview->format = anv_format_for_vk_format(pCreateInfo->format); - aview->base.extent = (VkExtent3D) { + aview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), @@ -394,17 +397,17 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, } if (cmd_buffer) { - view->surface_state = + sview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - view->surface_state = + sview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } struct GEN7_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, .SurfaceArray = image->array_size > 1, - .SurfaceFormat = view->format->surface_format, + .SurfaceFormat = sview->format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], @@ -444,9 +447,10 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, .BlueClearColor = 0, .AlphaClearColor = 0, .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, + .SurfaceBaseAddress = { NULL, sview->offset }, }; - GEN7_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + GEN7_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + &surface_state); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index a294e3ad50c..30573639986 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -458,11 +458,11 @@ static void gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *view = + const struct anv_depth_stencil_view *ds_view = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = view ? view->image : NULL; - const bool has_depth = view && view->format->depth_format; - const bool has_stencil = view && view->format->has_stencil; + const struct anv_image *image = ds_view ? ds_view->image : NULL; + const bool has_depth = ds_view && ds_view->format->depth_format; + const bool has_stencil = ds_view && ds_view->format->has_stencil; /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ @@ -471,10 +471,10 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = view->format->depth_format, + .DepthWriteEnable = ds_view->format->depth_format, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = view->format->depth_format, + .SurfaceFormat = ds_view->format->depth_format, .SurfacePitch = image->depth_surface.stride - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 0ef44d03c44..c47d317d2a3 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -109,20 +109,21 @@ VkResult gen8_CreateBufferView( VkBufferView* pView) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer_view *view; + struct anv_buffer_view *bview; VkResult result; - result = anv_buffer_view_create(device, pCreateInfo, &view); + result = anv_buffer_view_create(device, pCreateInfo, &bview); if (result != VK_SUCCESS) return result; const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - gen8_fill_buffer_surface_state(view->view.surface_state.map, format, - view->view.offset, pCreateInfo->range); + gen8_fill_buffer_surface_state(bview->surface_view.surface_state.map, + format, bview->surface_view.offset, + pCreateInfo->range); - *pView = anv_buffer_view_to_handle(view); + *pView = anv_buffer_view_to_handle(bview); return VK_SUCCESS; } @@ -148,7 +149,7 @@ gen8_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface_view *view = &iview->view; + struct anv_surface_view *sview = &iview->surface_view; struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -161,9 +162,9 @@ gen8_image_view_init(struct anv_image_view *iview, const struct anv_image_view_info view_type_info = anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = format_info; + sview->bo = image->bo; + sview->offset = image->offset + surface->offset; + sview->format = format_info; iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), @@ -266,28 +267,30 @@ gen8_image_view_init(struct anv_image_view *iview, .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, + .SurfaceBaseAddress = { NULL, sview->offset }, }; if (cmd_buffer) { - view->surface_state = + sview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - view->surface_state = + sview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + GEN8_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + &surface_state); } void -gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, +gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface_view *view = &aview->view; + struct anv_attachment_view *aview = &cview->attachment_view; + struct anv_surface_view *sview = &cview->surface_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); const struct anv_format *format_info = @@ -296,17 +299,17 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ - aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + aview->attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - view->bo = image->bo; - view->offset = image->offset + surface->offset; - view->format = anv_format_for_vk_format(pCreateInfo->format); + sview->bo = image->bo; + sview->offset = image->offset + surface->offset; + sview->format = anv_format_for_vk_format(pCreateInfo->format); - aview->base.extent = (VkExtent3D) { + aview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), @@ -345,17 +348,17 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, * indicates the extent of the accessible 'R' coordinates minus 1 on * the LOD currently being rendered to. */ - rt_view_extent = aview->base.extent.depth; + rt_view_extent = aview->extent.depth; break; default: unreachable(!"bad VkImageType"); } if (cmd_buffer) { - view->surface_state = + sview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - view->surface_state = + sview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } @@ -408,10 +411,11 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, .ShaderChannelSelectBlue = SCS_BLUE, .ShaderChannelSelectAlpha = SCS_ALPHA, .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, view->offset }, + .SurfaceBaseAddress = { NULL, sview->offset }, }; - GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + GEN8_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + &surface_state); } VkResult gen8_CreateSampler( -- cgit v1.2.3 From f0f4dfa9cc1473c5e12eeeb9403f721d5611e905 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 16:24:53 -0700 Subject: vk: Drop anv_surface_view Push the members of struct anv_surface_view into anv_image_view and anv_buffer_view, then remove struct anv_surface_view. Observe that anv_surface_view::range is not needed for anv_image_view, and so was dropped there. This prepares for the merge of VkAttachmentView into VkImageView. Remove the common parent of anv_buffer_view and anv_image_view (that is, anv_surface_view) will make the merge easier. --- src/vulkan/anv_cmd_buffer.c | 34 ++++++++++++++++++++++++---------- src/vulkan/anv_device.c | 12 ++++++------ src/vulkan/anv_image.c | 18 +++++++++--------- src/vulkan/anv_meta.c | 8 ++++---- src/vulkan/anv_private.h | 28 +++++++++++++--------------- src/vulkan/gen7_state.c | 38 ++++++++++++++++++-------------------- src/vulkan/gen8_state.c | 36 +++++++++++++++++------------------- 7 files changed, 91 insertions(+), 83 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 50f8304f9b3..008eeed40d6 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -440,11 +440,11 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const struct anv_color_attachment_view *cview = (const struct anv_color_attachment_view *) aview; - const struct anv_surface_view *sview = &cview->surface_view; + const struct anv_image_view *iview = &cview->image_view; - bt_map[a] = sview->surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, sview->surface_state, - sview->bo, sview->offset); + bt_map[a] = iview->surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->surface_state, + iview->bo, iview->offset); } if (layout == NULL) @@ -462,14 +462,28 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_descriptor *desc = &d->set->descriptors[surface_slots[b].index]; - if (desc->type != ANV_DESCRIPTOR_TYPE_SURFACE_VIEW) - continue; + const struct anv_state *surface_state; + struct anv_bo *bo; + uint32_t bo_offset; - const struct anv_surface_view *sview = desc->surface_view; + switch (desc->type) { + case ANV_DESCRIPTOR_TYPE_EMPTY: + case ANV_DESCRIPTOR_TYPE_SAMPLER: + continue; + case ANV_DESCRIPTOR_TYPE_BUFFER_VIEW: + surface_state = &desc->buffer_view->surface_state; + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: + surface_state = &desc->image_view->surface_state; + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + } - bt_map[start + b] = sview->surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, sview->surface_state, - sview->bo, sview->offset); + bt_map[start + b] = surface_state->offset + state_offset; + add_surface_state_reloc(cmd_buffer, *surface_state, bo, bo_offset); } } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index b2c8027fcf9..5302ee5cd73 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1408,7 +1408,7 @@ anv_buffer_view_create( if (bview == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - bview->surface_view = (struct anv_surface_view) { + *bview = (struct anv_buffer_view) { .bo = buffer->bo, .offset = buffer->offset + pCreateInfo->offset, .surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64), @@ -1428,7 +1428,7 @@ VkResult anv_DestroyBufferView( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); - anv_surface_view_fini(device, &bview->surface_view); + anv_state_pool_free(&device->surface_state_pool, bview->surface_state); anv_device_free(device, bview); return VK_SUCCESS; @@ -1754,8 +1754,8 @@ VkResult anv_UpdateDescriptorSets( write->pDescriptors[j].imageView); set->descriptors[write->destBinding + j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, - .surface_view = &iview->surface_view, + .type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, + .image_view = iview, }; } break; @@ -1778,8 +1778,8 @@ VkResult anv_UpdateDescriptorSets( write->pDescriptors[j].bufferView); set->descriptors[write->destBinding + j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, - .surface_view = &bview->surface_view, + .type = ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, + .buffer_view = bview, }; } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 19004320298..75233d1fe61 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -372,13 +372,6 @@ VkResult anv_GetImageSubresourceLayout( stub_return(VK_UNSUPPORTED); } -void -anv_surface_view_fini(struct anv_device *device, - struct anv_surface_view *sview) -{ - anv_state_pool_free(&device->surface_state_pool, sview->surface_state); -} - VkResult anv_validate_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, @@ -499,13 +492,20 @@ anv_CreateImageView(VkDevice _device, return VK_SUCCESS; } +static void +anv_image_view_fini(struct anv_device *device, + struct anv_image_view *iview) +{ + anv_state_pool_free(&device->surface_state_pool, iview->surface_state); +} + VkResult anv_DestroyImageView(VkDevice _device, VkImageView _iview) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); - anv_surface_view_fini(device, &iview->surface_view); + anv_image_view_fini(device, iview); anv_device_free(device, iview); return VK_SUCCESS; @@ -643,7 +643,7 @@ anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) struct anv_color_attachment_view *cview = (struct anv_color_attachment_view *) aview; - anv_surface_view_fini(device, &cview->surface_view); + anv_image_view_fini(device, &cview->image_view); } anv_device_free(device, aview); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index fe235e23fb0..1574830e0b8 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -739,7 +739,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, { struct anv_device *device = cmd_buffer->device; struct anv_attachment_view *dest_aview = &dest_cview->attachment_view; - struct anv_surface_view *dest_sview = &dest_cview->surface_view; + struct anv_image_view *dest_iview = &dest_cview->image_view; VkDescriptorPool dummy_desc_pool = { .handle = 1 }; struct blit_vb_data { @@ -854,7 +854,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = dest_sview->format->vk_format, + .format = dest_iview->format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -1561,7 +1561,7 @@ void anv_CmdClearColorImage( cmd_buffer); struct anv_attachment_view *aview = &cview.attachment_view; - struct anv_surface_view *sview = &cview.surface_view; + struct anv_image_view *iview = &cview.image_view; VkFramebuffer fb; anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), @@ -1586,7 +1586,7 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = sview->format->vk_format, + .format = iview->format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 09890730fd6..8c4c9efdc6f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -730,14 +730,16 @@ struct anv_descriptor_set_layout { enum anv_descriptor_type { ANV_DESCRIPTOR_TYPE_EMPTY = 0, + ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, + ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, ANV_DESCRIPTOR_TYPE_SAMPLER, - ANV_DESCRIPTOR_TYPE_SURFACE_VIEW, }; struct anv_descriptor { union { + struct anv_buffer_view *buffer_view; + struct anv_image_view *image_view; struct anv_sampler *sampler; - struct anv_surface_view *surface_view; }; enum anv_descriptor_type type; @@ -1233,20 +1235,19 @@ struct anv_image { }; }; -struct anv_surface_view { +struct anv_buffer_view { struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ struct anv_bo *bo; - uint32_t offset; /**< VkBufferCreateInfo::offset */ - uint32_t range; /**< VkBufferCreateInfo::range */ - const struct anv_format *format; /**< VkBufferCreateInfo::format */ -}; - -struct anv_buffer_view { - struct anv_surface_view surface_view; + uint32_t offset; /**< Offset into bo. */ + uint32_t range; /**< VkBufferViewCreateInfo::range */ + const struct anv_format *format; /**< VkBufferViewCreateInfo::format */ }; struct anv_image_view { - struct anv_surface_view surface_view; + struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + const struct anv_format *format; /**< VkImageViewCreateInfo::format */ VkExtent3D extent; }; @@ -1262,7 +1263,7 @@ struct anv_attachment_view { struct anv_color_attachment_view { struct anv_attachment_view attachment_view; - struct anv_surface_view surface_view; + struct anv_image_view image_view; }; struct anv_depth_stencil_view { @@ -1334,9 +1335,6 @@ void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range); -void anv_surface_view_fini(struct anv_device *device, - struct anv_surface_view *sview); - struct anv_sampler { uint32_t state[4]; }; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index a782690718b..5031db5edea 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -75,9 +75,8 @@ VkResult gen7_CreateBufferView( const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - gen7_fill_buffer_surface_state(bview->surface_view.surface_state.map, - format, bview->surface_view.offset, - pCreateInfo->range); + gen7_fill_buffer_surface_state(bview->surface_state.map, format, + bview->offset, pCreateInfo->range); *pView = anv_buffer_view_to_handle(bview); @@ -273,7 +272,6 @@ gen7_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface_view *sview = &iview->surface_view; struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -286,9 +284,9 @@ gen7_image_view_init(struct anv_image_view *iview, if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); - sview->bo = image->bo; - sview->offset = image->offset + surface->offset; - sview->format = anv_format_for_vk_format(pCreateInfo->format); + iview->bo = image->bo; + iview->offset = image->offset + surface->offset; + iview->format = anv_format_for_vk_format(pCreateInfo->format); iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), @@ -346,18 +344,18 @@ gen7_image_view_init(struct anv_image_view *iview, .BlueClearColor = 0, .AlphaClearColor = 0, .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, sview->offset }, + .SurfaceBaseAddress = { NULL, iview->offset }, }; if (cmd_buffer) { - sview->surface_state = + iview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - sview->surface_state = + iview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } - GEN7_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, &surface_state); } @@ -369,7 +367,7 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_attachment_view *aview = &cview->attachment_view; - struct anv_surface_view *sview = &cview->surface_view; + struct anv_image_view *iview = &cview->image_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); @@ -379,9 +377,9 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - sview->bo = image->bo; - sview->offset = image->offset + surface->offset; - sview->format = anv_format_for_vk_format(pCreateInfo->format); + iview->bo = image->bo; + iview->offset = image->offset + surface->offset; + iview->format = anv_format_for_vk_format(pCreateInfo->format); aview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), @@ -397,17 +395,17 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, } if (cmd_buffer) { - sview->surface_state = + iview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - sview->surface_state = + iview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } struct GEN7_RENDER_SURFACE_STATE surface_state = { .SurfaceType = SURFTYPE_2D, .SurfaceArray = image->array_size > 1, - .SurfaceFormat = sview->format->surface_format, + .SurfaceFormat = iview->format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], @@ -447,10 +445,10 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, .BlueClearColor = 0, .AlphaClearColor = 0, .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, sview->offset }, + .SurfaceBaseAddress = { NULL, iview->offset }, }; - GEN7_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, &surface_state); } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c47d317d2a3..2b820a3177d 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -119,9 +119,8 @@ VkResult gen8_CreateBufferView( const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - gen8_fill_buffer_surface_state(bview->surface_view.surface_state.map, - format, bview->surface_view.offset, - pCreateInfo->range); + gen8_fill_buffer_surface_state(bview->surface_state.map, format, + bview->offset, pCreateInfo->range); *pView = anv_buffer_view_to_handle(bview); @@ -149,7 +148,6 @@ gen8_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface_view *sview = &iview->surface_view; struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -162,9 +160,9 @@ gen8_image_view_init(struct anv_image_view *iview, const struct anv_image_view_info view_type_info = anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); - sview->bo = image->bo; - sview->offset = image->offset + surface->offset; - sview->format = format_info; + iview->bo = image->bo; + iview->offset = image->offset + surface->offset; + iview->format = format_info; iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), @@ -267,18 +265,18 @@ gen8_image_view_init(struct anv_image_view *iview, .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, sview->offset }, + .SurfaceBaseAddress = { NULL, iview->offset }, }; if (cmd_buffer) { - sview->surface_state = + iview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - sview->surface_state = + iview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } - GEN8_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, &surface_state); } @@ -290,7 +288,7 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); struct anv_attachment_view *aview = &cview->attachment_view; - struct anv_surface_view *sview = &cview->surface_view; + struct anv_image_view *iview = &cview->image_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); const struct anv_format *format_info = @@ -305,9 +303,9 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - sview->bo = image->bo; - sview->offset = image->offset + surface->offset; - sview->format = anv_format_for_vk_format(pCreateInfo->format); + iview->bo = image->bo; + iview->offset = image->offset + surface->offset; + iview->format = anv_format_for_vk_format(pCreateInfo->format); aview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), @@ -355,10 +353,10 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, } if (cmd_buffer) { - sview->surface_state = + iview->surface_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); } else { - sview->surface_state = + iview->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } @@ -411,10 +409,10 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, .ShaderChannelSelectBlue = SCS_BLUE, .ShaderChannelSelectAlpha = SCS_ALPHA, .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, sview->offset }, + .SurfaceBaseAddress = { NULL, iview->offset }, }; - GEN8_RENDER_SURFACE_STATE_pack(NULL, sview->surface_state.map, + GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, &surface_state); } -- cgit v1.2.3 From ae30535602d8b2f10a31feb133331c0c985b8b41 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 16:58:25 -0700 Subject: vk: Drop anv_attachment_view::extent It's duplicated by anv_attachment_view::image_view::extent. --- src/vulkan/anv_meta.c | 12 ++++++------ src/vulkan/anv_private.h | 1 - src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 4 ++-- 4 files changed, 9 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 1574830e0b8..085acc7a318 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -842,8 +842,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = dest_aview->extent.width, - .height = dest_aview->extent.height, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, .layers = 1 }, &fb); @@ -1574,8 +1574,8 @@ void anv_CmdClearColorImage( .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = aview->extent.width, - .height = aview->extent.height, + .width = iview->extent.width, + .height = iview->extent.height, .layers = 1 }, &fb); @@ -1622,8 +1622,8 @@ void anv_CmdClearColorImage( .renderArea = { .offset = { 0, 0, }, .extent = { - .width = aview->extent.width, - .height = aview->extent.height, + .width = iview->extent.width, + .height = iview->extent.height, }, }, .renderPass = pass, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8c4c9efdc6f..b290fcc75b7 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1258,7 +1258,6 @@ enum anv_attachment_view_type { struct anv_attachment_view { enum anv_attachment_view_type attachment_type; - VkExtent3D extent; }; struct anv_color_attachment_view { diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 5031db5edea..d65c1a373f0 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -381,7 +381,7 @@ gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, iview->offset = image->offset + surface->offset; iview->format = anv_format_for_vk_format(pCreateInfo->format); - aview->extent = (VkExtent3D) { + iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 2b820a3177d..8e9b43cab05 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -307,7 +307,7 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, iview->offset = image->offset + surface->offset; iview->format = anv_format_for_vk_format(pCreateInfo->format); - aview->extent = (VkExtent3D) { + iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), @@ -346,7 +346,7 @@ gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, * indicates the extent of the accessible 'R' coordinates minus 1 on * the LOD currently being rendered to. */ - rt_view_extent = aview->extent.depth; + rt_view_extent = iview->extent.depth; break; default: unreachable(!"bad VkImageType"); -- cgit v1.2.3 From 935706234855e19e17402ee3b3cc19bc032b4932 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 17:27:38 -0700 Subject: vk: Merge anv_*_attachment_view into anv_attachment_view Remove anv_color_attachment_view and anv_depth_stencil_view, merging them into anv_attachment_view. This prepares for merging VkAttachmentView into VkImageView. --- src/vulkan/anv_cmd_buffer.c | 10 +++------ src/vulkan/anv_image.c | 50 +++++++++++++++++--------------------------- src/vulkan/anv_meta.c | 44 +++++++++++++++++++------------------- src/vulkan/anv_private.h | 21 ++++++------------- src/vulkan/gen7_cmd_buffer.c | 13 ++++++------ src/vulkan/gen7_state.c | 5 ++--- src/vulkan/gen8_cmd_buffer.c | 13 ++++++------ src/vulkan/gen8_state.c | 5 ++--- 8 files changed, 67 insertions(+), 94 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 008eeed40d6..cab916c1383 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -434,14 +434,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, for (uint32_t a = 0; a < attachments; a++) { const struct anv_attachment_view *aview = fb->attachments[subpass->color_attachments[a]]; + const struct anv_image_view *iview = &aview->image_view; assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); - const struct anv_color_attachment_view *cview = - (const struct anv_color_attachment_view *) aview; - - const struct anv_image_view *iview = &cview->image_view; - bt_map[a] = iview->surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, iview->surface_state, iview->bo, iview->offset); @@ -821,7 +817,7 @@ VkResult anv_ResetCommandPool( /** * Return NULL if the current subpass has no depthstencil attachment. */ -const struct anv_depth_stencil_view * +const struct anv_attachment_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) { const struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -835,5 +831,5 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); - return (const struct anv_depth_stencil_view *) aview; + return aview; } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 75233d1fe61..f926cc2b1c0 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -512,24 +512,23 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview) } static void -anv_depth_stencil_view_init(struct anv_depth_stencil_view *ds_view, +anv_depth_stencil_view_init(struct anv_attachment_view *aview, const VkAttachmentViewCreateInfo *pCreateInfo) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - ds_view->attachment_view.attachment_type = - ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; + aview->attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->mipLevel == 0); anv_assert(pCreateInfo->baseArraySlice == 0); anv_assert(pCreateInfo->arraySize == 1); - ds_view->image = image; - ds_view->format = anv_format_for_vk_format(pCreateInfo->format); + aview->image_view.image = image; + aview->image_view.format = anv_format_for_vk_format(pCreateInfo->format); assert(anv_format_is_depth_or_stencil(image->format)); - assert(anv_format_is_depth_or_stencil(ds_view->format)); + assert(anv_format_is_depth_or_stencil(aview->image_view.format)); } struct anv_surface * @@ -579,17 +578,17 @@ anv_image_get_surface_for_color_attachment(struct anv_image *image) } void -anv_color_attachment_view_init(struct anv_color_attachment_view *cview, +anv_color_attachment_view_init(struct anv_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { case 7: - gen7_color_attachment_view_init(cview, device, pCreateInfo, cmd_buffer); + gen7_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); break; case 8: - gen8_color_attachment_view_init(cview, device, pCreateInfo, cmd_buffer); + gen8_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); break; default: unreachable("unsupported gen\n"); @@ -602,34 +601,26 @@ anv_CreateAttachmentView(VkDevice _device, VkAttachmentView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_attachment_view *aview; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); + aview = anv_device_alloc(device, sizeof(*aview), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (aview == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); if (anv_format_is_depth_or_stencil(format)) { - struct anv_depth_stencil_view *ds_view = - anv_device_alloc(device, sizeof(*ds_view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (ds_view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_depth_stencil_view_init(ds_view, pCreateInfo); - - *pView = anv_attachment_view_to_handle(&ds_view->attachment_view); + anv_depth_stencil_view_init(aview, pCreateInfo); } else { - struct anv_color_attachment_view *cview = - anv_device_alloc(device, sizeof(*cview), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (cview == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_color_attachment_view_init(cview, device, pCreateInfo, NULL); - - *pView = anv_attachment_view_to_handle(&cview->attachment_view); + anv_color_attachment_view_init(aview, device, pCreateInfo, NULL); } + *pView = anv_attachment_view_to_handle(aview); + return VK_SUCCESS; } @@ -640,10 +631,7 @@ anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) ANV_FROM_HANDLE(anv_attachment_view, aview, _aview); if (aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { - struct anv_color_attachment_view *cview = - (struct anv_color_attachment_view *) aview; - - anv_image_view_fini(device, &cview->image_view); + anv_image_view_fini(device, &aview->image_view); } anv_device_free(device, aview); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 085acc7a318..f6134338fdd 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -733,13 +733,12 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkOffset3D src_offset, VkExtent3D src_extent, struct anv_image *dest_image, - struct anv_color_attachment_view *dest_cview, + struct anv_attachment_view *dest_aview, VkOffset3D dest_offset, VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; - struct anv_attachment_view *dest_aview = &dest_cview->attachment_view; - struct anv_image_view *dest_iview = &dest_cview->image_view; + struct anv_image_view *dest_iview = &dest_aview->image_view; VkDescriptorPool dummy_desc_pool = { .handle = 1 }; struct blit_vb_data { @@ -1023,8 +1022,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer); - struct anv_color_attachment_view dest_cview; - anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, + struct anv_attachment_view dest_aview; + anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = dest_image, @@ -1041,7 +1040,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, anv_image_from_handle(dest_image), - &dest_cview, + &dest_aview, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); @@ -1185,8 +1184,8 @@ void anv_CmdCopyImage( if (pRegions[r].extent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_color_attachment_view dest_cview; - anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, + struct anv_attachment_view dest_aview; + anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1201,7 +1200,7 @@ void anv_CmdCopyImage( src_image, &src_iview, pRegions[r].srcOffset, pRegions[r].extent, - dest_image, &dest_cview, + dest_image, &dest_aview, dest_offset, pRegions[r].extent); } @@ -1271,8 +1270,8 @@ void anv_CmdBlitImage( if (pRegions[r].destExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_color_attachment_view dest_cview; - anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, + struct anv_attachment_view dest_aview; + anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1287,7 +1286,7 @@ void anv_CmdBlitImage( src_image, &src_iview, pRegions[r].srcOffset, pRegions[r].srcExtent, - dest_image, &dest_cview, + dest_image, &dest_aview, dest_offset, pRegions[r].destExtent); } @@ -1401,8 +1400,8 @@ void anv_CmdCopyBufferToImage( if (pRegions[r].imageExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_color_attachment_view dest_cview; - anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, + struct anv_attachment_view dest_aview; + anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), @@ -1419,7 +1418,7 @@ void anv_CmdCopyBufferToImage( (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, dest_image, - &dest_cview, + &dest_aview, dest_offset, pRegions[r].imageExtent); @@ -1483,8 +1482,8 @@ void anv_CmdCopyImageToBuffer( dest_format, &pRegions[r]); - struct anv_color_attachment_view dest_cview; - anv_color_attachment_view_init(&dest_cview, cmd_buffer->device, + struct anv_attachment_view dest_aview; + anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1501,7 +1500,7 @@ void anv_CmdCopyImageToBuffer( pRegions[r].imageOffset, pRegions[r].imageExtent, anv_image_from_handle(destImage), - &dest_cview, + &dest_aview, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); @@ -1548,8 +1547,8 @@ void anv_CmdClearColorImage( for (uint32_t r = 0; r < rangeCount; r++) { for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { - struct anv_color_attachment_view cview; - anv_color_attachment_view_init(&cview, cmd_buffer->device, + struct anv_attachment_view aview; + anv_color_attachment_view_init(&aview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = _image, @@ -1560,8 +1559,7 @@ void anv_CmdClearColorImage( }, cmd_buffer); - struct anv_attachment_view *aview = &cview.attachment_view; - struct anv_image_view *iview = &cview.image_view; + struct anv_image_view *iview = &aview.image_view; VkFramebuffer fb; anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), @@ -1570,7 +1568,7 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = (VkAttachmentBindInfo[]) { { - .view = anv_attachment_view_to_handle(aview), + .view = anv_attachment_view_to_handle(&aview), .layout = VK_IMAGE_LAYOUT_GENERAL } }, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b290fcc75b7..a0d4f8c2867 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -994,7 +994,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass, const VkClearValue *clear_values); -const struct anv_depth_stencil_view * +const struct anv_attachment_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); @@ -1244,10 +1244,11 @@ struct anv_buffer_view { }; struct anv_image_view { + const struct anv_image *image; /**< VkAttachmentViewCreateInfo::image */ + const struct anv_format *format; /**< VkAttachmentViewCreateInfo::format */ struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ struct anv_bo *bo; uint32_t offset; /**< Offset into bo. */ - const struct anv_format *format; /**< VkImageViewCreateInfo::format */ VkExtent3D extent; }; @@ -1258,19 +1259,9 @@ enum anv_attachment_view_type { struct anv_attachment_view { enum anv_attachment_view_type attachment_type; -}; - -struct anv_color_attachment_view { - struct anv_attachment_view attachment_view; struct anv_image_view image_view; }; -struct anv_depth_stencil_view { - struct anv_attachment_view attachment_view; - const struct anv_image *image; /**< VkAttachmentViewCreateInfo::image */ - const struct anv_format *format; /**< VkAttachmentViewCreateInfo::format */ -}; - struct anv_image_create_info { const VkImageCreateInfo *vk_info; bool force_tile_mode; @@ -1306,17 +1297,17 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_color_attachment_view_init(struct anv_color_attachment_view *cview, +void anv_color_attachment_view_init(struct anv_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, +void gen7_color_attachment_view_init(struct anv_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, +void gen8_color_attachment_view_init(struct anv_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index b264013d62e..5803569502f 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -529,20 +529,21 @@ static void gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *ds_view = + const struct anv_attachment_view *aview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = ds_view ? ds_view->image : NULL; - const bool has_depth = ds_view && ds_view->format->depth_format; - const bool has_stencil = ds_view && ds_view->format->has_stencil; + const struct anv_image_view *iview = aview ? &aview->image_view : NULL; + const struct anv_image *image = iview ? iview->image : NULL; + const bool has_depth = iview && iview->format->depth_format; + const bool has_stencil = iview && iview->format->has_stencil; /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = ds_view->format->depth_format, + .DepthWriteEnable = iview->format->depth_format, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = ds_view->format->depth_format, + .SurfaceFormat = iview->format->depth_format, .SurfacePitch = image->depth_surface.stride - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index d65c1a373f0..778c9096461 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -360,14 +360,13 @@ gen7_image_view_init(struct anv_image_view *iview, } void -gen7_color_attachment_view_init(struct anv_color_attachment_view *cview, +gen7_color_attachment_view_init(struct anv_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_attachment_view *aview = &cview->attachment_view; - struct anv_image_view *iview = &cview->image_view; + struct anv_image_view *iview = &aview->image_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 30573639986..3ed32e8de3d 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -458,11 +458,12 @@ static void gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_depth_stencil_view *ds_view = + const struct anv_attachment_view *aview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = ds_view ? ds_view->image : NULL; - const bool has_depth = ds_view && ds_view->format->depth_format; - const bool has_stencil = ds_view && ds_view->format->has_stencil; + const struct anv_image_view *iview = aview ? &aview->image_view : NULL; + const struct anv_image *image = iview ? iview->image : NULL; + const bool has_depth = iview && iview->format->depth_format; + const bool has_stencil = iview && iview->format->has_stencil; /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ @@ -471,10 +472,10 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = ds_view->format->depth_format, + .DepthWriteEnable = iview->format->depth_format, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = ds_view->format->depth_format, + .SurfaceFormat = iview->format->depth_format, .SurfacePitch = image->depth_surface.stride - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 8e9b43cab05..450bc52b579 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -281,14 +281,13 @@ gen8_image_view_init(struct anv_image_view *iview, } void -gen8_color_attachment_view_init(struct anv_color_attachment_view *cview, +gen8_color_attachment_view_init(struct anv_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_attachment_view *aview = &cview->attachment_view; - struct anv_image_view *iview = &cview->image_view; + struct anv_image_view *iview = &aview->image_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); const struct anv_format *format_info = -- cgit v1.2.3 From f7c3519aaf12d2e3878becd965433c9970383d84 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 20:15:06 -0700 Subject: vk/0.170.2: Rename VkTexAddress to VkTexAddressMode --- include/vulkan/vulkan.h | 24 ++++++++++++------------ src/vulkan/gen7_state.c | 16 ++++++++-------- src/vulkan/gen8_state.c | 16 ++++++++-------- 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 97902309020..a13ff990020 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -688,16 +688,16 @@ typedef enum { } VkTexMipmapMode; typedef enum { - VK_TEX_ADDRESS_WRAP = 0, - VK_TEX_ADDRESS_MIRROR = 1, - VK_TEX_ADDRESS_CLAMP = 2, - VK_TEX_ADDRESS_MIRROR_ONCE = 3, - VK_TEX_ADDRESS_CLAMP_BORDER = 4, - VK_TEX_ADDRESS_BEGIN_RANGE = VK_TEX_ADDRESS_WRAP, - VK_TEX_ADDRESS_END_RANGE = VK_TEX_ADDRESS_CLAMP_BORDER, - VK_TEX_ADDRESS_NUM = (VK_TEX_ADDRESS_CLAMP_BORDER - VK_TEX_ADDRESS_WRAP + 1), + VK_TEX_ADDRESS_MODE_WRAP = 0, + VK_TEX_ADDRESS_MODE_MIRROR = 1, + VK_TEX_ADDRESS_MODE_CLAMP = 2, + VK_TEX_ADDRESS_MODE_MIRROR_ONCE = 3, + VK_TEX_ADDRESS_MODE_CLAMP_BORDER = 4, + VK_TEX_ADDRESS_BEGIN_RANGE = VK_TEX_ADDRESS_MODE_WRAP, + VK_TEX_ADDRESS_END_RANGE = VK_TEX_ADDRESS_MODE_CLAMP_BORDER, + VK_TEX_ADDRESS_NUM = (VK_TEX_ADDRESS_MODE_CLAMP_BORDER - VK_TEX_ADDRESS_MODE_WRAP + 1), VK_TEX_ADDRESS_MAX_ENUM = 0x7FFFFFFF -} VkTexAddress; +} VkTexAddressMode; typedef enum { VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, @@ -1717,9 +1717,9 @@ typedef struct { VkTexFilter magFilter; VkTexFilter minFilter; VkTexMipmapMode mipMode; - VkTexAddress addressU; - VkTexAddress addressV; - VkTexAddress addressW; + VkTexAddressMode addressModeU; + VkTexAddressMode addressModeV; + VkTexAddressMode addressModeW; float mipLodBias; float maxAnisotropy; VkBool32 compareEnable; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 778c9096461..a06e76015d3 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -95,11 +95,11 @@ static const uint32_t vk_to_gen_mipmap_mode[] = { }; static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + [VK_TEX_ADDRESS_MODE_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MODE_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_MODE_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MODE_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_MODE_CLAMP_BORDER] = TCM_CLAMP_BORDER, }; static const uint32_t vk_to_gen_compare_op[] = { @@ -169,9 +169,9 @@ VkResult gen7_CreateSampler( .UAddressMagFilterRoundingEnable = 0, .TrilinearFilterQuality = 0, .NonnormalizedCoordinateEnable = 0, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], }; GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 450bc52b579..60135feaebe 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -443,11 +443,11 @@ VkResult gen8_CreateSampler( }; static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + [VK_TEX_ADDRESS_MODE_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MODE_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_MODE_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MODE_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_MODE_CLAMP_BORDER] = TCM_CLAMP_BORDER, }; static const uint32_t vk_to_gen_compare_op[] = { @@ -503,9 +503,9 @@ VkResult gen8_CreateSampler( .UAddressMagFilterRoundingEnable = 0, .TrilinearFilterQuality = 0, .NonnormalizedCoordinateEnable = 0, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], }; GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); -- cgit v1.2.3 From 57f500324b8d7b6de1e3d5e0c0b18622b97ecca9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 20:17:24 -0700 Subject: vk/0.170.2: Add unnormalizedCoordinates to VkSamplerCreateInfo --- include/vulkan/vulkan.h | 1 + src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a13ff990020..2afe53b6c9d 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1727,6 +1727,7 @@ typedef struct { float minLod; float maxLod; VkBorderColor borderColor; + VkBool32 unnormalizedCoordinates; } VkSamplerCreateInfo; typedef struct { diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index a06e76015d3..ed9d9f8c568 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -168,7 +168,7 @@ VkResult gen7_CreateSampler( .UAddressMinFilterRoundingEnable = 0, .UAddressMagFilterRoundingEnable = 0, .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 60135feaebe..4990becf60a 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -502,7 +502,7 @@ VkResult gen8_CreateSampler( .UAddressMinFilterRoundingEnable = 0, .UAddressMagFilterRoundingEnable = 0, .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], -- cgit v1.2.3 From 757166592e5e26dc226849f94d1460db28a35170 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 20:26:21 -0700 Subject: vk/0.170.2: Rename pointer parameters of VkSubpassDescription --- include/vulkan/vulkan.h | 8 ++++---- src/vulkan/anv_device.c | 8 ++++---- src/vulkan/anv_meta.c | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2afe53b6c9d..25d0f829f77 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1885,13 +1885,13 @@ typedef struct { VkPipelineBindPoint pipelineBindPoint; VkSubpassDescriptionFlags flags; uint32_t inputCount; - const VkAttachmentReference* inputAttachments; + const VkAttachmentReference* pInputAttachments; uint32_t colorCount; - const VkAttachmentReference* colorAttachments; - const VkAttachmentReference* resolveAttachments; + const VkAttachmentReference* pColorAttachments; + const VkAttachmentReference* pResolveAttachments; VkAttachmentReference depthStencilAttachment; uint32_t preserveCount; - const VkAttachmentReference* preserveAttachments; + const VkAttachmentReference* pPreserveAttachments; } VkSubpassDescription; typedef struct { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 5302ee5cd73..fdc6f8e1034 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -2117,7 +2117,7 @@ VkResult anv_CreateRenderPass( for (uint32_t j = 0; j < desc->inputCount; j++) { subpass->input_attachments[j] - = desc->inputAttachments[j].attachment; + = desc->pInputAttachments[j].attachment; } } @@ -2128,18 +2128,18 @@ VkResult anv_CreateRenderPass( for (uint32_t j = 0; j < desc->colorCount; j++) { subpass->color_attachments[j] - = desc->colorAttachments[j].attachment; + = desc->pColorAttachments[j].attachment; } } - if (desc->resolveAttachments) { + if (desc->pResolveAttachments) { subpass->resolve_attachments = anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); for (uint32_t j = 0; j < desc->colorCount; j++) { subpass->resolve_attachments[j] - = desc->resolveAttachments[j].attachment; + = desc->pResolveAttachments[j].attachment; } } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index f6134338fdd..9201167bd98 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -865,17 +865,17 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputCount = 0, .colorCount = 1, - .colorAttachments = &(VkAttachmentReference) { + .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, }, - .resolveAttachments = NULL, + .pResolveAttachments = NULL, .depthStencilAttachment = (VkAttachmentReference) { .attachment = VK_ATTACHMENT_UNUSED, .layout = VK_IMAGE_LAYOUT_GENERAL, }, .preserveCount = 1, - .preserveAttachments = &(VkAttachmentReference) { + .pPreserveAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, }, @@ -1596,17 +1596,17 @@ void anv_CmdClearColorImage( .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputCount = 0, .colorCount = 1, - .colorAttachments = &(VkAttachmentReference) { + .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, }, - .resolveAttachments = NULL, + .pResolveAttachments = NULL, .depthStencilAttachment = (VkAttachmentReference) { .attachment = VK_ATTACHMENT_UNUSED, .layout = VK_IMAGE_LAYOUT_GENERAL, }, .preserveCount = 1, - .preserveAttachments = &(VkAttachmentReference) { + .pPreserveAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, }, -- cgit v1.2.3 From 8e1ef639b6af17cfde2234fb2bd60c9c45cf265b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 20:30:53 -0700 Subject: vk/0.170.2: Add the subpass field to VkCmdBufferBeginInfo --- include/vulkan/vulkan.h | 1 + src/vulkan/anv_cmd_buffer.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 25d0f829f77..48c4d195b2d 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1937,6 +1937,7 @@ typedef struct { const void* pNext; VkCmdBufferOptimizeFlags flags; VkRenderPass renderPass; + uint32_t subpass; VkFramebuffer framebuffer; } VkCmdBufferBeginInfo; diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index cab916c1383..eb70abfeb76 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -202,9 +202,10 @@ VkResult anv_BeginCommandBuffer( cmd_buffer->state.pass = anv_render_pass_from_handle(pBeginInfo->renderPass); - /* FIXME: We shouldn't be starting on the first subpass */ - anv_cmd_buffer_begin_subpass(cmd_buffer, - &cmd_buffer->state.pass->subpasses[0]); + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->subpass]; + + anv_cmd_buffer_begin_subpass(cmd_buffer, subpass); } anv_cmd_buffer_emit_state_base_address(cmd_buffer); -- cgit v1.2.3 From 460676122fffddb26ef324293fcb5efb566796db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 20:35:08 -0700 Subject: vk/0.170.2: Rename VkClearValue.ds to depthStencil --- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_meta.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 48c4d195b2d..5deb161274f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1999,7 +1999,7 @@ typedef struct { typedef union { VkClearColorValue color; - VkClearDepthStencilValue ds; + VkClearDepthStencilValue depthStencil; } VkClearValue; typedef struct { diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 9201167bd98..949e5a16601 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -458,7 +458,7 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, } else if (att->format->depth_format) { assert(ds_attachment == VK_ATTACHMENT_UNUSED); ds_attachment = i; - ds_clear_value= clear_values[ds_attachment].ds; + ds_clear_value = clear_values[ds_attachment].depthStencil; } } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { assert(att->format->has_stencil); -- cgit v1.2.3 From 05a26a60c831f1dafc60a9f88b0d5cb2ca303d01 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 20:50:51 -0700 Subject: vk/0.170.2: Make destructors return void --- include/vulkan/vulkan.h | 116 ++++++++++++++++++++++---------------------- src/vulkan/anv_cmd_buffer.c | 8 +-- src/vulkan/anv_device.c | 71 ++++++++------------------- src/vulkan/anv_image.c | 12 ++--- src/vulkan/anv_pipeline.c | 20 ++------ src/vulkan/anv_query.c | 4 +- 6 files changed, 89 insertions(+), 142 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 5deb161274f..9962cc17179 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2067,7 +2067,7 @@ typedef struct { typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); -typedef VkResult (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); +typedef void (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); @@ -2080,7 +2080,7 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevi typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); -typedef VkResult (VKAPI *PFN_vkDestroyDevice)(VkDevice device); +typedef void (VKAPI *PFN_vkDestroyDevice)(VkDevice device); typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetGlobalLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); @@ -2090,9 +2090,9 @@ typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCou typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI *PFN_vkDeviceWaitIdle)(VkDevice device); typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); -typedef VkResult (VKAPI *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); +typedef void (VKAPI *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); -typedef VkResult (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); +typedef void (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); typedef VkResult (VKAPI *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); @@ -2106,75 +2106,75 @@ typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuf typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageOpaqueMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -typedef VkResult (VKAPI *PFN_vkDestroyFence)(VkDevice device, VkFence fence); +typedef void (VKAPI *PFN_vkDestroyFence)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); typedef VkResult (VKAPI *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); -typedef VkResult (VKAPI *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); +typedef void (VKAPI *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); -typedef VkResult (VKAPI *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); +typedef void (VKAPI *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkResetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); -typedef VkResult (VKAPI *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); +typedef void (VKAPI *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); -typedef VkResult (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); +typedef void (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); -typedef VkResult (VKAPI *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); +typedef void (VKAPI *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -typedef VkResult (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); +typedef void (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -typedef VkResult (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); +typedef void (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); typedef VkResult (VKAPI *PFN_vkCreateAttachmentView)(VkDevice device, const VkAttachmentViewCreateInfo* pCreateInfo, VkAttachmentView* pView); -typedef VkResult (VKAPI *PFN_vkDestroyAttachmentView)(VkDevice device, VkAttachmentView attachmentView); +typedef void (VKAPI *PFN_vkDestroyAttachmentView)(VkDevice device, VkAttachmentView attachmentView); typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); -typedef VkResult (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); +typedef void (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -typedef VkResult (VKAPI *PFN_vkDestroyShader)(VkDevice device, VkShader shader); +typedef void (VKAPI *PFN_vkDestroyShader)(VkDevice device, VkShader shader); typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); -typedef VkResult (VKAPI *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); +typedef void (VKAPI *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); typedef VkResult (VKAPI *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); typedef VkResult (VKAPI *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef VkResult (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); +typedef void (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); -typedef VkResult (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); +typedef void (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); -typedef VkResult (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); +typedef void (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -typedef VkResult (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); +typedef void (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); -typedef VkResult (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); +typedef void (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); -typedef VkResult (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkDynamicViewportState dynamicViewportState); +typedef void (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkDynamicViewportState dynamicViewportState); typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); -typedef VkResult (VKAPI *PFN_vkDestroyDynamicRasterState)(VkDevice device, VkDynamicRasterState dynamicRasterState); +typedef void (VKAPI *PFN_vkDestroyDynamicRasterState)(VkDevice device, VkDynamicRasterState dynamicRasterState); typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); -typedef VkResult (VKAPI *PFN_vkDestroyDynamicColorBlendState)(VkDevice device, VkDynamicColorBlendState dynamicColorBlendState); +typedef void (VKAPI *PFN_vkDestroyDynamicColorBlendState)(VkDevice device, VkDynamicColorBlendState dynamicColorBlendState); typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); -typedef VkResult (VKAPI *PFN_vkDestroyDynamicDepthStencilState)(VkDevice device, VkDynamicDepthStencilState dynamicDepthStencilState); +typedef void (VKAPI *PFN_vkDestroyDynamicDepthStencilState)(VkDevice device, VkDynamicDepthStencilState dynamicDepthStencilState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); -typedef VkResult (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); +typedef void (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); -typedef VkResult (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); +typedef void (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); typedef VkResult (VKAPI *PFN_vkCreateCommandPool)(VkDevice device, const VkCmdPoolCreateInfo* pCreateInfo, VkCmdPool* pCmdPool); -typedef VkResult (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCmdPool cmdPool); +typedef void (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCmdPool cmdPool); typedef VkResult (VKAPI *PFN_vkResetCommandPool)(VkDevice device, VkCmdPool cmdPool, VkCmdPoolResetFlags flags); typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); -typedef VkResult (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); +typedef void (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer, VkCmdBufferResetFlags flags); @@ -2224,7 +2224,7 @@ VkResult VKAPI vkCreateInstance( const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); -VkResult VKAPI vkDestroyInstance( +void VKAPI vkDestroyInstance( VkInstance instance); VkResult VKAPI vkEnumeratePhysicalDevices( @@ -2283,7 +2283,7 @@ VkResult VKAPI vkCreateDevice( const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); -VkResult VKAPI vkDestroyDevice( +void VKAPI vkDestroyDevice( VkDevice device); VkResult VKAPI vkGetGlobalExtensionProperties( @@ -2329,7 +2329,7 @@ VkResult VKAPI vkAllocMemory( const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); -VkResult VKAPI vkFreeMemory( +void VKAPI vkFreeMemory( VkDevice device, VkDeviceMemory mem); @@ -2341,7 +2341,7 @@ VkResult VKAPI vkMapMemory( VkMemoryMapFlags flags, void** ppData); -VkResult VKAPI vkUnmapMemory( +void VKAPI vkUnmapMemory( VkDevice device, VkDeviceMemory mem); @@ -2421,7 +2421,7 @@ VkResult VKAPI vkCreateFence( const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -VkResult VKAPI vkDestroyFence( +void VKAPI vkDestroyFence( VkDevice device, VkFence fence); @@ -2446,7 +2446,7 @@ VkResult VKAPI vkCreateSemaphore( const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); -VkResult VKAPI vkDestroySemaphore( +void VKAPI vkDestroySemaphore( VkDevice device, VkSemaphore semaphore); @@ -2463,7 +2463,7 @@ VkResult VKAPI vkCreateEvent( const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); -VkResult VKAPI vkDestroyEvent( +void VKAPI vkDestroyEvent( VkDevice device, VkEvent event); @@ -2484,7 +2484,7 @@ VkResult VKAPI vkCreateQueryPool( const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); -VkResult VKAPI vkDestroyQueryPool( +void VKAPI vkDestroyQueryPool( VkDevice device, VkQueryPool queryPool); @@ -2502,7 +2502,7 @@ VkResult VKAPI vkCreateBuffer( const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); -VkResult VKAPI vkDestroyBuffer( +void VKAPI vkDestroyBuffer( VkDevice device, VkBuffer buffer); @@ -2511,7 +2511,7 @@ VkResult VKAPI vkCreateBufferView( const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); -VkResult VKAPI vkDestroyBufferView( +void VKAPI vkDestroyBufferView( VkDevice device, VkBufferView bufferView); @@ -2520,7 +2520,7 @@ VkResult VKAPI vkCreateImage( const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -VkResult VKAPI vkDestroyImage( +void VKAPI vkDestroyImage( VkDevice device, VkImage image); @@ -2535,7 +2535,7 @@ VkResult VKAPI vkCreateImageView( const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -VkResult VKAPI vkDestroyImageView( +void VKAPI vkDestroyImageView( VkDevice device, VkImageView imageView); @@ -2544,7 +2544,7 @@ VkResult VKAPI vkCreateAttachmentView( const VkAttachmentViewCreateInfo* pCreateInfo, VkAttachmentView* pView); -VkResult VKAPI vkDestroyAttachmentView( +void VKAPI vkDestroyAttachmentView( VkDevice device, VkAttachmentView attachmentView); @@ -2553,7 +2553,7 @@ VkResult VKAPI vkCreateShaderModule( const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); -VkResult VKAPI vkDestroyShaderModule( +void VKAPI vkDestroyShaderModule( VkDevice device, VkShaderModule shaderModule); @@ -2562,7 +2562,7 @@ VkResult VKAPI vkCreateShader( const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -VkResult VKAPI vkDestroyShader( +void VKAPI vkDestroyShader( VkDevice device, VkShader shader); @@ -2571,7 +2571,7 @@ VkResult VKAPI vkCreatePipelineCache( const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); -VkResult VKAPI vkDestroyPipelineCache( +void VKAPI vkDestroyPipelineCache( VkDevice device, VkPipelineCache pipelineCache); @@ -2604,7 +2604,7 @@ VkResult VKAPI vkCreateComputePipelines( const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -VkResult VKAPI vkDestroyPipeline( +void VKAPI vkDestroyPipeline( VkDevice device, VkPipeline pipeline); @@ -2613,7 +2613,7 @@ VkResult VKAPI vkCreatePipelineLayout( const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); -VkResult VKAPI vkDestroyPipelineLayout( +void VKAPI vkDestroyPipelineLayout( VkDevice device, VkPipelineLayout pipelineLayout); @@ -2622,7 +2622,7 @@ VkResult VKAPI vkCreateSampler( const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); -VkResult VKAPI vkDestroySampler( +void VKAPI vkDestroySampler( VkDevice device, VkSampler sampler); @@ -2631,7 +2631,7 @@ VkResult VKAPI vkCreateDescriptorSetLayout( const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -VkResult VKAPI vkDestroyDescriptorSetLayout( +void VKAPI vkDestroyDescriptorSetLayout( VkDevice device, VkDescriptorSetLayout descriptorSetLayout); @@ -2642,7 +2642,7 @@ VkResult VKAPI vkCreateDescriptorPool( const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); -VkResult VKAPI vkDestroyDescriptorPool( +void VKAPI vkDestroyDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool); @@ -2677,7 +2677,7 @@ VkResult VKAPI vkCreateDynamicViewportState( const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); -VkResult VKAPI vkDestroyDynamicViewportState( +void VKAPI vkDestroyDynamicViewportState( VkDevice device, VkDynamicViewportState dynamicViewportState); @@ -2686,7 +2686,7 @@ VkResult VKAPI vkCreateDynamicRasterState( const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); -VkResult VKAPI vkDestroyDynamicRasterState( +void VKAPI vkDestroyDynamicRasterState( VkDevice device, VkDynamicRasterState dynamicRasterState); @@ -2695,7 +2695,7 @@ VkResult VKAPI vkCreateDynamicColorBlendState( const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); -VkResult VKAPI vkDestroyDynamicColorBlendState( +void VKAPI vkDestroyDynamicColorBlendState( VkDevice device, VkDynamicColorBlendState dynamicColorBlendState); @@ -2704,7 +2704,7 @@ VkResult VKAPI vkCreateDynamicDepthStencilState( const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); -VkResult VKAPI vkDestroyDynamicDepthStencilState( +void VKAPI vkDestroyDynamicDepthStencilState( VkDevice device, VkDynamicDepthStencilState dynamicDepthStencilState); @@ -2713,7 +2713,7 @@ VkResult VKAPI vkCreateFramebuffer( const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); -VkResult VKAPI vkDestroyFramebuffer( +void VKAPI vkDestroyFramebuffer( VkDevice device, VkFramebuffer framebuffer); @@ -2722,7 +2722,7 @@ VkResult VKAPI vkCreateRenderPass( const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); -VkResult VKAPI vkDestroyRenderPass( +void VKAPI vkDestroyRenderPass( VkDevice device, VkRenderPass renderPass); @@ -2736,7 +2736,7 @@ VkResult VKAPI vkCreateCommandPool( const VkCmdPoolCreateInfo* pCreateInfo, VkCmdPool* pCmdPool); -VkResult VKAPI vkDestroyCommandPool( +void VKAPI vkDestroyCommandPool( VkDevice device, VkCmdPool cmdPool); @@ -2750,7 +2750,7 @@ VkResult VKAPI vkCreateCommandBuffer( const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); -VkResult VKAPI vkDestroyCommandBuffer( +void VKAPI vkDestroyCommandBuffer( VkDevice device, VkCmdBuffer commandBuffer); diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index eb70abfeb76..64f10ffe74e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -144,7 +144,7 @@ VkResult anv_CreateCommandBuffer( return result; } -VkResult anv_DestroyCommandBuffer( +void anv_DestroyCommandBuffer( VkDevice _device, VkCmdBuffer _cmd_buffer) { @@ -158,8 +158,6 @@ VkResult anv_DestroyCommandBuffer( anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_device_free(device, cmd_buffer); - - return VK_SUCCESS; } VkResult anv_ResetCommandBuffer( @@ -786,7 +784,7 @@ VkResult anv_CreateCommandPool( return VK_SUCCESS; } -VkResult anv_DestroyCommandPool( +void anv_DestroyCommandPool( VkDevice _device, VkCmdPool cmdPool) { @@ -796,8 +794,6 @@ VkResult anv_DestroyCommandPool( anv_ResetCommandPool(_device, cmdPool, 0); anv_device_free(device, pool); - - return VK_SUCCESS; } VkResult anv_ResetCommandPool( diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index fdc6f8e1034..dd8e3ffa0af 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -178,7 +178,7 @@ VkResult anv_CreateInstance( return VK_SUCCESS; } -VkResult anv_DestroyInstance( +void anv_DestroyInstance( VkInstance _instance) { ANV_FROM_HANDLE(anv_instance, instance, _instance); @@ -190,8 +190,6 @@ VkResult anv_DestroyInstance( _mesa_locale_fini(); instance->pfnFree(instance->pAllocUserData, instance); - - return VK_SUCCESS; } void * @@ -650,7 +648,7 @@ VkResult anv_CreateDevice( return vk_error(VK_ERROR_UNAVAILABLE); } -VkResult anv_DestroyDevice( +void anv_DestroyDevice( VkDevice _device) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -679,8 +677,6 @@ VkResult anv_DestroyDevice( close(device->fd); anv_instance_free(device->instance, device); - - return VK_SUCCESS; } VkResult anv_GetGlobalExtensionProperties( @@ -937,7 +933,7 @@ VkResult anv_AllocMemory( return result; } -VkResult anv_FreeMemory( +void anv_FreeMemory( VkDevice _device, VkDeviceMemory _mem) { @@ -951,8 +947,6 @@ VkResult anv_FreeMemory( anv_gem_close(device, mem->bo.gem_handle); anv_device_free(device, mem); - - return VK_SUCCESS; } VkResult anv_MapMemory( @@ -980,15 +974,13 @@ VkResult anv_MapMemory( return VK_SUCCESS; } -VkResult anv_UnmapMemory( +void anv_UnmapMemory( VkDevice _device, VkDeviceMemory _mem) { ANV_FROM_HANDLE(anv_device_memory, mem, _mem); anv_gem_munmap(mem->map, mem->map_size); - - return VK_SUCCESS; } VkResult anv_FlushMappedMemoryRanges( @@ -1195,7 +1187,7 @@ VkResult anv_CreateFence( return result; } -VkResult anv_DestroyFence( +void anv_DestroyFence( VkDevice _device, VkFence _fence) { @@ -1205,8 +1197,6 @@ VkResult anv_DestroyFence( anv_gem_munmap(fence->bo.map, fence->bo.size); anv_gem_close(device, fence->bo.gem_handle); anv_device_free(device, fence); - - return VK_SUCCESS; } VkResult anv_ResetFences( @@ -1278,11 +1268,11 @@ VkResult anv_CreateSemaphore( stub_return(VK_UNSUPPORTED); } -VkResult anv_DestroySemaphore( +void anv_DestroySemaphore( VkDevice device, VkSemaphore semaphore) { - stub_return(VK_UNSUPPORTED); + stub(); } VkResult anv_QueueSignalSemaphore( @@ -1309,11 +1299,11 @@ VkResult anv_CreateEvent( stub_return(VK_UNSUPPORTED); } -VkResult anv_DestroyEvent( +void anv_DestroyEvent( VkDevice device, VkEvent event) { - stub_return(VK_UNSUPPORTED); + stub(); } VkResult anv_GetEventStatus( @@ -1363,7 +1353,7 @@ VkResult anv_CreateBuffer( return VK_SUCCESS; } -VkResult anv_DestroyBuffer( +void anv_DestroyBuffer( VkDevice _device, VkBuffer _buffer) { @@ -1371,8 +1361,6 @@ VkResult anv_DestroyBuffer( ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); anv_device_free(device, buffer); - - return VK_SUCCESS; } void @@ -1421,7 +1409,7 @@ anv_buffer_view_create( return VK_SUCCESS; } -VkResult anv_DestroyBufferView( +void anv_DestroyBufferView( VkDevice _device, VkBufferView _bview) { @@ -1430,11 +1418,9 @@ VkResult anv_DestroyBufferView( anv_state_pool_free(&device->surface_state_pool, bview->surface_state); anv_device_free(device, bview); - - return VK_SUCCESS; } -VkResult anv_DestroySampler( +void anv_DestroySampler( VkDevice _device, VkSampler _sampler) { @@ -1442,8 +1428,6 @@ VkResult anv_DestroySampler( ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); anv_device_free(device, sampler); - - return VK_SUCCESS; } // Descriptor set functions @@ -1601,7 +1585,7 @@ VkResult anv_CreateDescriptorSetLayout( return VK_SUCCESS; } -VkResult anv_DestroyDescriptorSetLayout( +void anv_DestroyDescriptorSetLayout( VkDevice _device, VkDescriptorSetLayout _set_layout) { @@ -1609,8 +1593,6 @@ VkResult anv_DestroyDescriptorSetLayout( ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); anv_device_free(device, set_layout); - - return VK_SUCCESS; } VkResult anv_CreateDescriptorPool( @@ -1625,12 +1607,11 @@ VkResult anv_CreateDescriptorPool( return VK_SUCCESS; } -VkResult anv_DestroyDescriptorPool( +void anv_DestroyDescriptorPool( VkDevice _device, VkDescriptorPool _pool) { anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); - return VK_SUCCESS; } VkResult anv_ResetDescriptorPool( @@ -1901,7 +1882,7 @@ VkResult anv_CreateDynamicViewportState( return VK_SUCCESS; } -VkResult anv_DestroyDynamicViewportState( +void anv_DestroyDynamicViewportState( VkDevice _device, VkDynamicViewportState _vp_state) { @@ -1913,11 +1894,9 @@ VkResult anv_DestroyDynamicViewportState( anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor); anv_device_free(device, vp_state); - - return VK_SUCCESS; } -VkResult anv_DestroyDynamicRasterState( +void anv_DestroyDynamicRasterState( VkDevice _device, VkDynamicRasterState _rs_state) { @@ -1925,8 +1904,6 @@ VkResult anv_DestroyDynamicRasterState( ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state); anv_device_free(device, rs_state); - - return VK_SUCCESS; } VkResult anv_CreateDynamicColorBlendState( @@ -1958,7 +1935,7 @@ VkResult anv_CreateDynamicColorBlendState( return VK_SUCCESS; } -VkResult anv_DestroyDynamicColorBlendState( +void anv_DestroyDynamicColorBlendState( VkDevice _device, VkDynamicColorBlendState _cb_state) { @@ -1966,11 +1943,9 @@ VkResult anv_DestroyDynamicColorBlendState( ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state); anv_device_free(device, cb_state); - - return VK_SUCCESS; } -VkResult anv_DestroyDynamicDepthStencilState( +void anv_DestroyDynamicDepthStencilState( VkDevice _device, VkDynamicDepthStencilState _ds_state) { @@ -1978,8 +1953,6 @@ VkResult anv_DestroyDynamicDepthStencilState( ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state); anv_device_free(device, ds_state); - - return VK_SUCCESS; } VkResult anv_CreateFramebuffer( @@ -2037,7 +2010,7 @@ VkResult anv_CreateFramebuffer( return VK_SUCCESS; } -VkResult anv_DestroyFramebuffer( +void anv_DestroyFramebuffer( VkDevice _device, VkFramebuffer _fb) { @@ -2047,8 +2020,6 @@ VkResult anv_DestroyFramebuffer( anv_DestroyDynamicViewportState(anv_device_to_handle(device), fb->vp_state); anv_device_free(device, fb); - - return VK_SUCCESS; } VkResult anv_CreateRenderPass( @@ -2151,7 +2122,7 @@ VkResult anv_CreateRenderPass( return VK_SUCCESS; } -VkResult anv_DestroyRenderPass( +void anv_DestroyRenderPass( VkDevice _device, VkRenderPass _pass) { @@ -2170,8 +2141,6 @@ VkResult anv_DestroyRenderPass( } anv_device_free(device, pass); - - return VK_SUCCESS; } VkResult anv_GetRenderAreaGranularity( diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f926cc2b1c0..f0d099e0f25 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -353,14 +353,12 @@ anv_CreateImage(VkDevice device, pImage); } -VkResult +void anv_DestroyImage(VkDevice _device, VkImage _image) { ANV_FROM_HANDLE(anv_device, device, _device); anv_device_free(device, anv_image_from_handle(_image)); - - return VK_SUCCESS; } VkResult anv_GetImageSubresourceLayout( @@ -499,7 +497,7 @@ anv_image_view_fini(struct anv_device *device, anv_state_pool_free(&device->surface_state_pool, iview->surface_state); } -VkResult +void anv_DestroyImageView(VkDevice _device, VkImageView _iview) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -507,8 +505,6 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview) anv_image_view_fini(device, iview); anv_device_free(device, iview); - - return VK_SUCCESS; } static void @@ -624,7 +620,7 @@ anv_CreateAttachmentView(VkDevice _device, return VK_SUCCESS; } -VkResult +void anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -635,6 +631,4 @@ anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) } anv_device_free(device, aview); - - return VK_SUCCESS; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index daf520f9714..4af4b663c87 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -56,7 +56,7 @@ VkResult anv_CreateShaderModule( return VK_SUCCESS; } -VkResult anv_DestroyShaderModule( +void anv_DestroyShaderModule( VkDevice _device, VkShaderModule _module) { @@ -64,8 +64,6 @@ VkResult anv_DestroyShaderModule( ANV_FROM_HANDLE(anv_shader_module, module, _module); anv_device_free(device, module); - - return VK_SUCCESS; } VkResult anv_CreateShader( @@ -100,7 +98,7 @@ VkResult anv_CreateShader( return VK_SUCCESS; } -VkResult anv_DestroyShader( +void anv_DestroyShader( VkDevice _device, VkShader _shader) { @@ -108,8 +106,6 @@ VkResult anv_DestroyShader( ANV_FROM_HANDLE(anv_shader, shader, _shader); anv_device_free(device, shader); - - return VK_SUCCESS; } @@ -123,12 +119,10 @@ VkResult anv_CreatePipelineCache( stub_return(VK_SUCCESS); } -VkResult anv_DestroyPipelineCache( +void anv_DestroyPipelineCache( VkDevice _device, VkPipelineCache _cache) { - /* VkPipelineCache is a dummy object. */ - return VK_SUCCESS; } size_t anv_GetPipelineCacheSize( @@ -155,7 +149,7 @@ VkResult anv_MergePipelineCaches( stub_return(VK_UNSUPPORTED); } -VkResult anv_DestroyPipeline( +void anv_DestroyPipeline( VkDevice _device, VkPipeline _pipeline) { @@ -167,8 +161,6 @@ VkResult anv_DestroyPipeline( anv_state_stream_finish(&pipeline->program_stream); anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_device_free(pipeline->device, pipeline); - - return VK_SUCCESS; } static const uint32_t vk_to_gen_primitive_type[] = { @@ -418,7 +410,7 @@ VkResult anv_CreatePipelineLayout( return VK_SUCCESS; } -VkResult anv_DestroyPipelineLayout( +void anv_DestroyPipelineLayout( VkDevice _device, VkPipelineLayout _pipelineLayout) { @@ -426,6 +418,4 @@ VkResult anv_DestroyPipelineLayout( ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); anv_device_free(device, pipeline_layout); - - return VK_SUCCESS; } diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 4ef9d8c4b0c..9464531b8c5 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -72,7 +72,7 @@ VkResult anv_CreateQueryPool( return result; } -VkResult anv_DestroyQueryPool( +void anv_DestroyQueryPool( VkDevice _device, VkQueryPool _pool) { @@ -82,8 +82,6 @@ VkResult anv_DestroyQueryPool( anv_gem_munmap(pool->bo.map, pool->bo.size); anv_gem_close(device, pool->bo.gem_handle); anv_device_free(device, pool); - - return VK_SUCCESS; } VkResult anv_GetQueryPoolResults( -- cgit v1.2.3 From 65964cd49b0fd7cbc90805c8c18b4fd39f975a9c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 21:10:20 -0700 Subject: vk/0.170.2: Re-arrange parameters of vkCmdDraw[Indexed] --- include/vulkan/vulkan.h | 16 ++++++++-------- src/vulkan/anv_meta.c | 4 ++-- src/vulkan/gen7_cmd_buffer.c | 12 ++++++------ src/vulkan/gen8_cmd_buffer.c | 12 ++++++------ 4 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9962cc17179..9cee1c545b6 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2186,8 +2186,8 @@ typedef void (VKAPI *PFN_vkCmdBindDynamicDepthStencilState)(VkCmdBuffer cmdBuffe typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); -typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount); -typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCmdBuffer cmdBuffer, uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount); +typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); +typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); typedef void (VKAPI *PFN_vkCmdDrawIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); typedef void (VKAPI *PFN_vkCmdDrawIndexedIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); typedef void (VKAPI *PFN_vkCmdDispatch)(VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, uint32_t z); @@ -2811,18 +2811,18 @@ void VKAPI vkCmdBindVertexBuffers( void VKAPI vkCmdDraw( VkCmdBuffer cmdBuffer, - uint32_t firstVertex, uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount); + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); void VKAPI vkCmdDrawIndexed( VkCmdBuffer cmdBuffer, - uint32_t firstIndex, uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount); + uint32_t firstInstance); void VKAPI vkCmdDrawIndirect( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 949e5a16601..77c47dac247 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -414,7 +414,7 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, device->meta_state.shared.cb_state); ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), - 0, 3, 0, num_instances); + 3, num_instances, 0, 0); } void @@ -926,7 +926,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, device->meta_state.blit.pipeline_layout, 0, 1, &set, 0, NULL); - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 5803569502f..2b9ed5772e1 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -332,10 +332,10 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) void gen7_CmdDraw( VkCmdBuffer cmdBuffer, - uint32_t firstVertex, uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount) + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; @@ -354,11 +354,11 @@ void gen7_CmdDraw( void gen7_CmdDrawIndexed( VkCmdBuffer cmdBuffer, - uint32_t firstIndex, uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount) + uint32_t firstInstance) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 3ed32e8de3d..f48519e51b9 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -184,10 +184,10 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) void gen8_CmdDraw( VkCmdBuffer cmdBuffer, - uint32_t firstVertex, uint32_t vertexCount, - uint32_t firstInstance, - uint32_t instanceCount) + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); @@ -204,11 +204,11 @@ void gen8_CmdDraw( void gen8_CmdDrawIndexed( VkCmdBuffer cmdBuffer, - uint32_t firstIndex, uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, int32_t vertexOffset, - uint32_t firstInstance, - uint32_t instanceCount) + uint32_t firstInstance) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); -- cgit v1.2.3 From a6eba403e2a2274c8aca62a59a206fc583134b4d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 21:17:12 -0700 Subject: vk/0.170.2: Update to the new queue family properties query --- include/vulkan/vulkan.h | 15 +++++---------- src/vulkan/anv_device.c | 21 ++++++++------------- 2 files changed, 13 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9cee1c545b6..69decf94ce6 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1294,7 +1294,7 @@ typedef struct { VkQueueFlags queueFlags; uint32_t queueCount; VkBool32 supportsTimestamps; -} VkPhysicalDeviceQueueProperties; +} VkQueueFamilyProperties; typedef struct { VkMemoryPropertyFlags propertyFlags; @@ -2074,8 +2074,7 @@ typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevi typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageFormatProperties* pImageFormatProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueCount)(VkPhysicalDevice physicalDevice, uint32_t* pCount); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueProperties)(VkPhysicalDevice physicalDevice, uint32_t count, VkPhysicalDeviceQueueProperties* pQueueProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); @@ -2257,14 +2256,10 @@ VkResult VKAPI vkGetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -VkResult VKAPI vkGetPhysicalDeviceQueueCount( +VkResult VKAPI vkGetPhysicalDeviceQueueFamilyProperties( VkPhysicalDevice physicalDevice, - uint32_t* pCount); - -VkResult VKAPI vkGetPhysicalDeviceQueueProperties( - VkPhysicalDevice physicalDevice, - uint32_t count, - VkPhysicalDeviceQueueProperties* pQueueProperties); + uint32_t* pCount, + VkQueueFamilyProperties* pQueueFamilyProperties); VkResult VKAPI vkGetPhysicalDeviceMemoryProperties( VkPhysicalDevice physicalDevice, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index dd8e3ffa0af..31b7565b984 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -451,23 +451,18 @@ VkResult anv_GetPhysicalDeviceProperties( return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceQueueCount( +VkResult anv_GetPhysicalDeviceQueueFamilyProperties( VkPhysicalDevice physicalDevice, - uint32_t* pCount) + uint32_t* pCount, + VkQueueFamilyProperties* pQueueFamilyProperties) { - *pCount = 1; - - return VK_SUCCESS; -} + if (pQueueFamilyProperties == NULL) { + *pCount = 1; + } -VkResult anv_GetPhysicalDeviceQueueProperties( - VkPhysicalDevice physicalDevice, - uint32_t count, - VkPhysicalDeviceQueueProperties* pQueueProperties) -{ - assert(count == 1); + assert(*pCount >= 1); - *pQueueProperties = (VkPhysicalDeviceQueueProperties) { + *pQueueFamilyProperties = (VkQueueFamilyProperties) { .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_DMA_BIT, -- cgit v1.2.3 From 8ba684cbad6963be54f27aeeb42708069f8d9bd3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 09:25:03 -0700 Subject: vk/0.170.2: Rename extension and layer query functions --- include/vulkan/vulkan.h | 16 ++++++++-------- src/vulkan/anv_device.c | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 69decf94ce6..26fc48b5acb 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2080,10 +2080,10 @@ typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instanc typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); typedef void (VKAPI *PFN_vkDestroyDevice)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkGetGlobalExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetGlobalLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, VkFence fence); typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); @@ -2281,22 +2281,22 @@ VkResult VKAPI vkCreateDevice( void VKAPI vkDestroyDevice( VkDevice device); -VkResult VKAPI vkGetGlobalExtensionProperties( +VkResult VKAPI vkEnumerateInstanceExtensionProperties( const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -VkResult VKAPI vkGetPhysicalDeviceExtensionProperties( +VkResult VKAPI vkEnumerateDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -VkResult VKAPI vkGetGlobalLayerProperties( +VkResult VKAPI vkEnumerateInstanceLayerProperties( uint32_t* pCount, VkLayerProperties* pProperties); -VkResult VKAPI vkGetPhysicalDeviceLayerProperties( +VkResult VKAPI vkEnumerateDeviceLayerProperties( VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 31b7565b984..f92b5fbf21b 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -674,7 +674,7 @@ void anv_DestroyDevice( anv_instance_free(device->instance, device); } -VkResult anv_GetGlobalExtensionProperties( +VkResult anv_EnumerateInstanceExtensionProperties( const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties) @@ -692,7 +692,7 @@ VkResult anv_GetGlobalExtensionProperties( return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceExtensionProperties( +VkResult anv_EnumerateDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, @@ -711,7 +711,7 @@ VkResult anv_GetPhysicalDeviceExtensionProperties( return VK_SUCCESS; } -VkResult anv_GetGlobalLayerProperties( +VkResult anv_EnumerateInstanceLayerProperties( uint32_t* pCount, VkLayerProperties* pProperties) { @@ -724,7 +724,7 @@ VkResult anv_GetGlobalLayerProperties( return vk_error(VK_ERROR_INVALID_LAYER); } -VkResult anv_GetPhysicalDeviceLayerProperties( +VkResult anv_EnumerateDeviceLayerProperties( VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties) -- cgit v1.2.3 From a145acd8121bb563fa01e059b890974dae757c5b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 09:32:01 -0700 Subject: vk/0.170.2: Remove the pCount parameter from AllocDescriptorSets --- include/vulkan/vulkan.h | 5 ++--- src/vulkan/anv_device.c | 19 +++++++++---------- src/vulkan/anv_meta.c | 3 +-- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 26fc48b5acb..470e2f4de2c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2153,7 +2153,7 @@ typedef void (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescri typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef void (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets, uint32_t* pCount); +typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); @@ -2651,8 +2651,7 @@ VkResult VKAPI vkAllocDescriptorSets( VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, - VkDescriptorSet* pDescriptorSets, - uint32_t* pCount); + VkDescriptorSet* pDescriptorSets); VkResult VKAPI vkFreeDescriptorSets( VkDevice device, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index f92b5fbf21b..3ab91726024 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1652,29 +1652,28 @@ VkResult anv_AllocDescriptorSets( VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, - VkDescriptorSet* pDescriptorSets, - uint32_t* pCount) + VkDescriptorSet* pDescriptorSets) { ANV_FROM_HANDLE(anv_device, device, _device); - VkResult result; + VkResult result = VK_SUCCESS; struct anv_descriptor_set *set; + uint32_t i; - for (uint32_t i = 0; i < count; i++) { + for (i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); result = anv_descriptor_set_create(device, layout, &set); - if (result != VK_SUCCESS) { - *pCount = i; - return result; - } + if (result != VK_SUCCESS) + break; pDescriptorSets[i] = anv_descriptor_set_to_handle(set); } - *pCount = count; + if (result != VK_SUCCESS) + anv_FreeDescriptorSets(_device, descriptorPool, i, pDescriptorSets); - return VK_SUCCESS; + return result; } VkResult anv_FreeDescriptorSets( diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 77c47dac247..0fb4c50adba 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -806,11 +806,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, sizeof(struct vue_header), }); - uint32_t count; VkDescriptorSet set; anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - 1, &device->meta_state.blit.ds_layout, &set, &count); + 1, &device->meta_state.blit.ds_layout, &set); anv_UpdateDescriptorSets(anv_device_to_handle(device), 1, /* writeCount */ (VkWriteDescriptorSet[]) { -- cgit v1.2.3 From 02a9be31d617ff83f02ff4ce7d3c4288b72ce31b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 09:37:21 -0700 Subject: vk/0.170.2: Add the flags parameter to GetPhysicalDeviceImageFormatProperties --- include/vulkan/vulkan.h | 3 ++- src/vulkan/anv_formats.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 470e2f4de2c..408e4045ad5 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2071,7 +2071,7 @@ typedef void (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageFormatProperties* pImageFormatProperties); +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); @@ -2246,6 +2246,7 @@ VkResult VKAPI vkGetPhysicalDeviceImageFormatProperties( VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, + VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); VkResult VKAPI vkGetPhysicalDeviceLimits( diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 6173ee7872a..cae575bb04b 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -327,6 +327,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, + VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); -- cgit v1.2.3 From d1908d2c332a4f2d18940bbe14f1e584b1a11ffc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 09:40:39 -0700 Subject: vk/0.170.2: Rework parameters to CmdClearDepthStencil functions --- include/vulkan/vulkan.h | 12 +++++------- src/vulkan/anv_meta.c | 8 +++----- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 408e4045ad5..b7a5d6f9f01 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2199,9 +2199,9 @@ typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCmdBuffer cmdBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags imageAspectMask, VkImageLayout imageLayout, float depth, uint32_t stencil, uint32_t rectCount, const VkRect3D* pRects); +typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); @@ -2912,8 +2912,7 @@ void VKAPI vkCmdClearDepthStencilImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, - float depth, - uint32_t stencil, + const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); @@ -2927,10 +2926,9 @@ void VKAPI vkCmdClearColorAttachment( void VKAPI vkCmdClearDepthStencilAttachment( VkCmdBuffer cmdBuffer, - VkImageAspectFlags imageAspectMask, + VkImageAspectFlags aspectMask, VkImageLayout imageLayout, - float depth, - uint32_t stencil, + const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 0fb4c50adba..4e6ddf49f76 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1654,8 +1654,7 @@ void anv_CmdClearDepthStencilImage( VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, - float depth, - uint32_t stencil, + const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { @@ -1675,10 +1674,9 @@ void anv_CmdClearColorAttachment( void anv_CmdClearDepthStencilAttachment( VkCmdBuffer cmdBuffer, - VkImageAspectFlags imageAspectMask, + VkImageAspectFlags aspectMask, VkImageLayout imageLayout, - float depth, - uint32_t stencil, + const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects) { -- cgit v1.2.3 From 81c7fa877274b5767657497699f1d46981784fc9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 10:04:04 -0700 Subject: vk/0.170.2: Rework blits to use ImageSubresourceCopy --- include/vulkan/vulkan.h | 17 ++++++++++++----- src/vulkan/anv_meta.c | 19 ++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c7b7f47c3c3..cec176f3bf9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1949,18 +1949,25 @@ typedef struct { } VkBufferCopy; typedef struct { - VkImageSubresource srcSubresource; + VkImageAspect aspect; + uint32_t mipLevel; + uint32_t arrayLayer; + uint32_t arraySize; +} VkImageSubresourceCopy; + +typedef struct { + VkImageSubresourceCopy srcSubresource; VkOffset3D srcOffset; - VkImageSubresource destSubresource; + VkImageSubresourceCopy destSubresource; VkOffset3D destOffset; VkExtent3D extent; } VkImageCopy; typedef struct { - VkImageSubresource srcSubresource; + VkImageSubresourceCopy srcSubresource; VkOffset3D srcOffset; VkExtent3D srcExtent; - VkImageSubresource destSubresource; + VkImageSubresourceCopy destSubresource; VkOffset3D destOffset; VkExtent3D destExtent; } VkImageBlit; @@ -1969,7 +1976,7 @@ typedef struct { VkDeviceSize bufferOffset; uint32_t bufferRowLength; uint32_t bufferImageHeight; - VkImageSubresource imageSubresource; + VkImageSubresourceCopy imageSubresource; VkOffset3D imageOffset; VkExtent3D imageExtent; } VkBufferImageCopy; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 4e6ddf49f76..ad65403b100 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -502,13 +502,13 @@ meta_blit_get_src_image_view_type(const struct anv_image *src_image) static uint32_t meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, - const VkImageSubresource *dest_subresource, + const VkImageSubresourceCopy *dest_subresource, const VkOffset3D *dest_offset) { switch (dest_image->type) { case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: - return dest_subresource->arraySlice; + return dest_subresource->arrayLayer; case VK_IMAGE_TYPE_3D: /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, * but meta does it anyway. When doing so, we translate the @@ -1163,7 +1163,7 @@ void anv_CmdCopyImage( .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .baseArraySlice = pRegions[r].srcSubresource.arrayLayer, .arraySize = 1 }, }, @@ -1180,6 +1180,9 @@ void anv_CmdCopyImage( &pRegions[r].destSubresource, &pRegions[r].destOffset); + if (pRegions[r].srcSubresource.arraySize > 1) + anv_finishme("FINISHME: copy multiple array layers"); + if (pRegions[r].extent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); @@ -1249,7 +1252,7 @@ void anv_CmdBlitImage( .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .baseArraySlice = pRegions[r].srcSubresource.arrayLayer, .arraySize = 1 }, }, @@ -1266,6 +1269,9 @@ void anv_CmdBlitImage( &pRegions[r].destSubresource, &pRegions[r].destOffset); + if (pRegions[r].srcSubresource.arraySize > 1) + anv_finishme("FINISHME: copy multiple array layers"); + if (pRegions[r].destExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); @@ -1446,6 +1452,9 @@ void anv_CmdCopyImageToBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].imageSubresource.arraySize > 1) + anv_finishme("FINISHME: copy multiple array layers"); + if (pRegions[r].imageExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); @@ -1466,7 +1475,7 @@ void anv_CmdCopyImageToBuffer( .aspectMask = 1 << pRegions[r].imageSubresource.aspect, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .mipLevels = 1, - .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .baseArraySlice = pRegions[r].imageSubresource.arrayLayer, .arraySize = 1 }, }, -- cgit v1.2.3 From bd4cde708a2ad0bea04eb318d7c54447ccbbc7af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 10:07:47 -0700 Subject: vk/0.170.2: Rename fields in VkClearColorValue --- include/vulkan/vulkan.h | 6 +++--- src/vulkan/anv_device.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index cec176f3bf9..2c448b54f38 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1982,9 +1982,9 @@ typedef struct { } VkBufferImageCopy; typedef union { - float f32[4]; - int32_t s32[4]; - uint32_t u32[4]; + float float32[4]; + int32_t int32[4]; + uint32_t uint32[4]; } VkClearColorValue; typedef struct { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3ab91726024..a956c3cdf44 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -547,12 +547,12 @@ static void anv_device_init_border_colors(struct anv_device *device) { static const VkClearColorValue border_colors[] = { - [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } }, - [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } }, - [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } }, - [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } }, + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, }; device->border_colors = -- cgit v1.2.3 From cc389b1482f2860b83d277cebca0c2058c403410 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 10:11:50 -0700 Subject: vk/0.170.2: Rename cs to stage in ComputePipelineCreateInfo --- include/vulkan/vulkan.h | 2 +- src/vulkan/gen8_pipeline.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index bdd44a01073..62fbe97c616 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1714,7 +1714,7 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; - VkPipelineShaderStageCreateInfo cs; + VkPipelineShaderStageCreateInfo stage; VkPipelineCreateFlags flags; VkPipelineLayout layout; VkPipeline basePipelineHandle; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 4900715a47a..0e2526fce20 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -571,7 +571,7 @@ VkResult gen8_compute_pipeline_create( memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - anv_shader_from_handle(pCreateInfo->cs.shader); + anv_shader_from_handle(pCreateInfo->stage.shader); pipeline->use_repclear = false; -- cgit v1.2.3 From 6b5ce5daf5145a1e63ca28c537689583056ac28d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 5 Oct 2015 18:13:48 -0700 Subject: vk: Update comments for anv_image_view - Document the extent member. It's the extent of the view's base level. - s/VkAttachmentView/VkImageView/ --- src/vulkan/anv_private.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a0d4f8c2867..b0657cfbd14 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1244,12 +1244,12 @@ struct anv_buffer_view { }; struct anv_image_view { - const struct anv_image *image; /**< VkAttachmentViewCreateInfo::image */ - const struct anv_format *format; /**< VkAttachmentViewCreateInfo::format */ + const struct anv_image *image; /**< VkImageViewCreateInfo::image */ + const struct anv_format *format; /**< VkImageViewCreateInfo::format */ struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ struct anv_bo *bo; uint32_t offset; /**< Offset into bo. */ - VkExtent3D extent; + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ }; enum anv_attachment_view_type { -- cgit v1.2.3 From d4446a7e58e4c57da68432ed108320936a910e5d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 11:42:43 -0700 Subject: vk: Merge anv_attachment_view into anv_image_view This prepares for merging VkAttachmentView into VkImageView. --- src/vulkan/anv_cmd_buffer.c | 13 ++++------ src/vulkan/anv_device.c | 9 +++---- src/vulkan/anv_image.c | 49 +++++++++++++++++-------------------- src/vulkan/anv_meta.c | 58 +++++++++++++++++++++++--------------------- src/vulkan/anv_private.h | 21 ++++------------ src/vulkan/gen7_cmd_buffer.c | 3 +-- src/vulkan/gen7_state.c | 5 +--- src/vulkan/gen8_cmd_buffer.c | 3 +-- src/vulkan/gen8_state.c | 5 +--- 9 files changed, 71 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 64f10ffe74e..470ea11df6f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -431,11 +431,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, * put the color attachments into the binding table. */ for (uint32_t a = 0; a < attachments; a++) { - const struct anv_attachment_view *aview = + const struct anv_image_view *iview = fb->attachments[subpass->color_attachments[a]]; - const struct anv_image_view *iview = &aview->image_view; - - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); bt_map[a] = iview->surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, iview->surface_state, @@ -814,7 +811,7 @@ VkResult anv_ResetCommandPool( /** * Return NULL if the current subpass has no depthstencil attachment. */ -const struct anv_attachment_view * +const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) { const struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -823,10 +820,10 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) return NULL; - const struct anv_attachment_view *aview = + const struct anv_image_view *iview = fb->attachments[subpass->depth_stencil_attachment]; - assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + assert(anv_format_is_depth_or_stencil(iview->format)); - return aview; + return iview; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a956c3cdf44..a7430b858b5 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1960,7 +1960,7 @@ VkResult anv_CreateFramebuffer( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); size_t size = sizeof(*framebuffer) + - sizeof(struct anv_attachment_view *) * pCreateInfo->attachmentCount; + sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; framebuffer = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (framebuffer == NULL) @@ -1968,10 +1968,9 @@ VkResult anv_CreateFramebuffer( framebuffer->attachment_count = pCreateInfo->attachmentCount; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - ANV_FROM_HANDLE(anv_attachment_view, aview, - pCreateInfo->pAttachments[i].view); - - framebuffer->attachments[i] = aview; + VkAttachmentView _aview = pCreateInfo->pAttachments[i].view; + VkImageView _iview = { _aview.handle }; + framebuffer->attachments[i] = anv_image_view_from_handle(_iview); } framebuffer->width = pCreateInfo->width; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f0d099e0f25..8eb5a603c2b 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -490,41 +490,32 @@ anv_CreateImageView(VkDevice _device, return VK_SUCCESS; } -static void -anv_image_view_fini(struct anv_device *device, - struct anv_image_view *iview) -{ - anv_state_pool_free(&device->surface_state_pool, iview->surface_state); -} - void anv_DestroyImageView(VkDevice _device, VkImageView _iview) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); - anv_image_view_fini(device, iview); + anv_state_pool_free(&device->surface_state_pool, iview->surface_state); anv_device_free(device, iview); } static void -anv_depth_stencil_view_init(struct anv_attachment_view *aview, +anv_depth_stencil_view_init(struct anv_image_view *iview, const VkAttachmentViewCreateInfo *pCreateInfo) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - aview->attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; - /* XXX: We don't handle any of these */ anv_assert(pCreateInfo->mipLevel == 0); anv_assert(pCreateInfo->baseArraySlice == 0); anv_assert(pCreateInfo->arraySize == 1); - aview->image_view.image = image; - aview->image_view.format = anv_format_for_vk_format(pCreateInfo->format); + iview->image = image; + iview->format = anv_format_for_vk_format(pCreateInfo->format); assert(anv_format_is_depth_or_stencil(image->format)); - assert(anv_format_is_depth_or_stencil(aview->image_view.format)); + assert(anv_format_is_depth_or_stencil(iview->format)); } struct anv_surface * @@ -574,17 +565,17 @@ anv_image_get_surface_for_color_attachment(struct anv_image *image) } void -anv_color_attachment_view_init(struct anv_attachment_view *aview, +anv_color_attachment_view_init(struct anv_image_view *iview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { case 7: - gen7_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + gen7_color_attachment_view_init(iview, device, pCreateInfo, cmd_buffer); break; case 8: - gen8_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + gen8_color_attachment_view_init(iview, device, pCreateInfo, cmd_buffer); break; default: unreachable("unsupported gen\n"); @@ -597,25 +588,25 @@ anv_CreateAttachmentView(VkDevice _device, VkAttachmentView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_attachment_view *aview; + struct anv_image_view *iview; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); - aview = anv_device_alloc(device, sizeof(*aview), 8, + iview = anv_device_alloc(device, sizeof(*iview), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (aview == NULL) + if (iview == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); if (anv_format_is_depth_or_stencil(format)) { - anv_depth_stencil_view_init(aview, pCreateInfo); + anv_depth_stencil_view_init(iview, pCreateInfo); } else { - anv_color_attachment_view_init(aview, device, pCreateInfo, NULL); + anv_color_attachment_view_init(iview, device, pCreateInfo, NULL); } - *pView = anv_attachment_view_to_handle(aview); + pView->handle = anv_image_view_to_handle(iview).handle; return VK_SUCCESS; } @@ -624,11 +615,15 @@ void anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_attachment_view, aview, _aview); + VkImageView _iview = { .handle = _aview.handle }; + ANV_FROM_HANDLE(anv_image_view, iview, _iview); - if (aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { - anv_image_view_fini(device, &aview->image_view); + /* Depth and stencil render targets have no RENDER_SURFACE_STATE. Instead, + * they use 3DSTATE_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. + */ + if (!anv_format_is_depth_or_stencil(iview->format)) { + anv_state_pool_free(&device->surface_state_pool, iview->surface_state); } - anv_device_free(device, aview); + anv_device_free(device, iview); } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index ad65403b100..c214dc30a4c 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -733,12 +733,15 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkOffset3D src_offset, VkExtent3D src_extent, struct anv_image *dest_image, - struct anv_attachment_view *dest_aview, + struct anv_image_view *dest_iview, VkOffset3D dest_offset, VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; - struct anv_image_view *dest_iview = &dest_aview->image_view; + + VkImageView dest_iview_h = anv_image_view_to_handle(dest_iview); + VkAttachmentView dest_aview_h = { .handle = dest_iview_h.handle }; + VkDescriptorPool dummy_desc_pool = { .handle = 1 }; struct blit_vb_data { @@ -836,7 +839,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .attachmentCount = 1, .pAttachments = (VkAttachmentBindInfo[]) { { - .view = anv_attachment_view_to_handle(dest_aview), + .view = dest_aview_h, .layout = VK_IMAGE_LAYOUT_GENERAL } }, @@ -1021,8 +1024,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer); - struct anv_attachment_view dest_aview; - anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, + struct anv_image_view dest_iview; + anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = dest_image, @@ -1039,7 +1042,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, anv_image_from_handle(dest_image), - &dest_aview, + &dest_iview, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }); @@ -1186,8 +1189,8 @@ void anv_CmdCopyImage( if (pRegions[r].extent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_attachment_view dest_aview; - anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, + struct anv_image_view dest_iview; + anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1202,7 +1205,7 @@ void anv_CmdCopyImage( src_image, &src_iview, pRegions[r].srcOffset, pRegions[r].extent, - dest_image, &dest_aview, + dest_image, &dest_iview, dest_offset, pRegions[r].extent); } @@ -1275,8 +1278,8 @@ void anv_CmdBlitImage( if (pRegions[r].destExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_attachment_view dest_aview; - anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, + struct anv_image_view dest_iview; + anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1291,7 +1294,7 @@ void anv_CmdBlitImage( src_image, &src_iview, pRegions[r].srcOffset, pRegions[r].srcExtent, - dest_image, &dest_aview, + dest_image, &dest_iview, dest_offset, pRegions[r].destExtent); } @@ -1405,8 +1408,8 @@ void anv_CmdCopyBufferToImage( if (pRegions[r].imageExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_attachment_view dest_aview; - anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, + struct anv_image_view dest_iview; + anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), @@ -1423,7 +1426,7 @@ void anv_CmdCopyBufferToImage( (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, dest_image, - &dest_aview, + &dest_iview, dest_offset, pRegions[r].imageExtent); @@ -1490,8 +1493,8 @@ void anv_CmdCopyImageToBuffer( dest_format, &pRegions[r]); - struct anv_attachment_view dest_aview; - anv_color_attachment_view_init(&dest_aview, cmd_buffer->device, + struct anv_image_view dest_iview; + anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = destImage, @@ -1508,7 +1511,7 @@ void anv_CmdCopyImageToBuffer( pRegions[r].imageOffset, pRegions[r].imageExtent, anv_image_from_handle(destImage), - &dest_aview, + &dest_iview, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent); @@ -1555,8 +1558,8 @@ void anv_CmdClearColorImage( for (uint32_t r = 0; r < rangeCount; r++) { for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { - struct anv_attachment_view aview; - anv_color_attachment_view_init(&aview, cmd_buffer->device, + struct anv_image_view iview; + anv_color_attachment_view_init(&iview, cmd_buffer->device, &(VkAttachmentViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, .image = _image, @@ -1567,7 +1570,8 @@ void anv_CmdClearColorImage( }, cmd_buffer); - struct anv_image_view *iview = &aview.image_view; + VkImageView iview_h = anv_image_view_to_handle(&iview); + VkAttachmentView aview_h = { .handle = iview_h.handle }; VkFramebuffer fb; anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), @@ -1576,12 +1580,12 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = (VkAttachmentBindInfo[]) { { - .view = anv_attachment_view_to_handle(&aview), + .view = aview_h, .layout = VK_IMAGE_LAYOUT_GENERAL } }, - .width = iview->extent.width, - .height = iview->extent.height, + .width = iview.extent.width, + .height = iview.extent.height, .layers = 1 }, &fb); @@ -1592,7 +1596,7 @@ void anv_CmdClearColorImage( .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = iview->format->vk_format, + .format = iview.format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -1628,8 +1632,8 @@ void anv_CmdClearColorImage( .renderArea = { .offset = { 0, 0, }, .extent = { - .width = iview->extent.width, - .height = iview->extent.height, + .width = iview.extent.width, + .height = iview.extent.height, }, }, .renderPass = pass, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b0657cfbd14..3326625ebb3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -994,7 +994,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass, const VkClearValue *clear_values); -const struct anv_attachment_view * +const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); @@ -1252,16 +1252,6 @@ struct anv_image_view { VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ }; -enum anv_attachment_view_type { - ANV_ATTACHMENT_VIEW_TYPE_COLOR, - ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL, -}; - -struct anv_attachment_view { - enum anv_attachment_view_type attachment_type; - struct anv_image_view image_view; -}; - struct anv_image_create_info { const VkImageCreateInfo *vk_info; bool force_tile_mode; @@ -1297,17 +1287,17 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_color_attachment_view_init(struct anv_attachment_view *aview, +void anv_color_attachment_view_init(struct anv_image_view *iview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void gen7_color_attachment_view_init(struct anv_attachment_view *aview, +void gen7_color_attachment_view_init(struct anv_image_view *iview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void gen8_color_attachment_view_init(struct anv_attachment_view *aview, +void gen8_color_attachment_view_init(struct anv_image_view *iview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); @@ -1338,7 +1328,7 @@ struct anv_framebuffer { VkDynamicViewportState vp_state; uint32_t attachment_count; - const struct anv_attachment_view * attachments[0]; + const struct anv_image_view * attachments[0]; }; struct anv_subpass { @@ -1424,7 +1414,6 @@ ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCmdPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 2b9ed5772e1..709e82e78a0 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -529,9 +529,8 @@ static void gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_attachment_view *aview = + const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image_view *iview = aview ? &aview->image_view : NULL; const struct anv_image *image = iview ? iview->image : NULL; const bool has_depth = iview && iview->format->depth_format; const bool has_stencil = iview && iview->format->has_stencil; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index ed9d9f8c568..1959b8b5a49 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -360,18 +360,15 @@ gen7_image_view_init(struct anv_image_view *iview, } void -gen7_color_attachment_view_init(struct anv_attachment_view *aview, +gen7_color_attachment_view_init(struct anv_image_view *iview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_image_view *iview = &aview->image_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); - aview->attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; - anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index f48519e51b9..b516193458c 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -458,9 +458,8 @@ static void gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_attachment_view *aview = + const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image_view *iview = aview ? &aview->image_view : NULL; const struct anv_image *image = iview ? iview->image : NULL; const bool has_depth = iview && iview->format->depth_format; const bool has_stencil = iview && iview->format->has_stencil; diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 4990becf60a..d2a403b2f56 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -281,13 +281,12 @@ gen8_image_view_init(struct anv_image_view *iview, } void -gen8_color_attachment_view_init(struct anv_attachment_view *aview, +gen8_color_attachment_view_init(struct anv_image_view *iview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_image_view *iview = &aview->image_view; struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); const struct anv_format *format_info = @@ -296,8 +295,6 @@ gen8_color_attachment_view_init(struct anv_attachment_view *aview, uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ - aview->attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; - anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); -- cgit v1.2.3 From 1e4263b7d2e3d6f88527510eaaf6ca6df2ce4058 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 10:27:50 -0700 Subject: vk/0.170.2: s/baseArraySlice/baseArrayLayer/ --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/anv_image.c | 6 +++--- src/vulkan/anv_meta.c | 12 ++++++------ src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 3498b46ab62..90df1cb5a5d 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1397,7 +1397,7 @@ typedef struct { typedef struct { VkImageAspect aspect; uint32_t mipLevel; - uint32_t arraySlice; + uint32_t arrayLayer; } VkImageSubresource; typedef struct { @@ -1496,7 +1496,7 @@ typedef struct { VkImageAspectFlags aspectMask; uint32_t baseMipLevel; uint32_t mipLevels; - uint32_t baseArraySlice; + uint32_t baseArrayLayer; uint32_t arraySize; } VkImageSubresourceRange; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 8eb5a603c2b..f0620e4d67e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -411,12 +411,12 @@ anv_validate_CreateImageView(VkDevice _device, assert(subresource->arraySize > 0); assert(subresource->baseMipLevel < image->levels); assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); - assert(subresource->baseArraySlice < image->array_size); - assert(subresource->baseArraySlice + subresource->arraySize <= image->array_size); + assert(subresource->baseArrayLayer < image->array_size); + assert(subresource->baseArrayLayer + subresource->arraySize <= image->array_size); assert(pView); if (view_info->is_cube) { - assert(subresource->baseArraySlice % 6 == 0); + assert(subresource->baseArrayLayer % 6 == 0); assert(subresource->arraySize % 6 == 0); } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c214dc30a4c..4abd8d38213 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1018,7 +1018,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .mipLevels = 1, - .baseArraySlice = 0, + .baseArrayLayer = 0, .arraySize = 1 }, }, @@ -1166,7 +1166,7 @@ void anv_CmdCopyImage( .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arrayLayer, + .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, .arraySize = 1 }, }, @@ -1255,7 +1255,7 @@ void anv_CmdBlitImage( .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, - .baseArraySlice = pRegions[r].srcSubresource.arrayLayer, + .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, .arraySize = 1 }, }, @@ -1388,7 +1388,7 @@ void anv_CmdCopyBufferToImage( .aspectMask = 1 << proxy_aspect, .baseMipLevel = 0, .mipLevels = 1, - .baseArraySlice = 0, + .baseArrayLayer = 0, .arraySize = 1 }, }, @@ -1478,7 +1478,7 @@ void anv_CmdCopyImageToBuffer( .aspectMask = 1 << pRegions[r].imageSubresource.aspect, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .mipLevels = 1, - .baseArraySlice = pRegions[r].imageSubresource.arrayLayer, + .baseArrayLayer = pRegions[r].imageSubresource.arrayLayer, .arraySize = 1 }, }, @@ -1565,7 +1565,7 @@ void anv_CmdClearColorImage( .image = _image, .format = image->format->vk_format, .mipLevel = pRanges[r].baseMipLevel + l, - .baseArraySlice = pRanges[r].baseArraySlice + s, + .baseArraySlice = pRanges[r].baseArrayLayer + s, .arraySize = 1, }, cmd_buffer); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 1959b8b5a49..d7a4b10c6e8 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -322,7 +322,7 @@ gen7_image_view_init(struct anv_image_view *iview, .Width = image->extent.width - 1, .Depth = depth - 1, .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = range->baseArraySlice, + .MinimumArrayElement = range->baseArrayLayer, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, .YOffset = 0, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index d2a403b2f56..e836613db42 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -243,7 +243,7 @@ gen8_image_view_init(struct anv_image_view *iview, .Depth = depth - 1, .SurfacePitch = surface->stride - 1, .RenderTargetViewExtent = rt_view_extent - 1, - .MinimumArrayElement = range->baseArraySlice, + .MinimumArrayElement = range->baseArrayLayer, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, .YOffset = 0, -- cgit v1.2.3 From 89eebd889cbf1be38648a3812f513051b777aa57 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 10:34:59 -0700 Subject: vk/0.170.2: Fairly trivial enum shuffling --- include/vulkan/vulkan.h | 20 +++++++++----------- src/vulkan/anv_device.c | 2 +- src/vulkan/gen8_cmd_buffer.c | 3 +-- 3 files changed, 11 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 90df1cb5a5d..ce1d2d88e9f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -857,12 +857,11 @@ typedef enum { typedef VkFlags VkImageUsageFlags; typedef enum { - VK_IMAGE_CREATE_SPARSE_BIT = 0x00000001, + VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, - VK_IMAGE_CREATE_INVARIANT_DATA_BIT = 0x00000008, - VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000010, - VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000020, + VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008, + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010, } VkImageCreateFlagBits; typedef VkFlags VkImageCreateFlags; @@ -886,7 +885,7 @@ typedef enum { typedef VkFlags VkMemoryPropertyFlags; typedef enum { - VK_MEMORY_HEAP_HOST_LOCAL = 0x00000001, + VK_MEMORY_HEAP_HOST_LOCAL_BIT = 0x00000001, } VkMemoryHeapFlagBits; typedef VkFlags VkMemoryHeapFlags; typedef VkFlags VkMemoryMapFlags; @@ -948,7 +947,7 @@ typedef enum { typedef VkFlags VkBufferUsageFlags; typedef enum { - VK_BUFFER_CREATE_SPARSE_BIT = 0x00000001, + VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, } VkBufferCreateFlagBits; @@ -1015,10 +1014,9 @@ typedef enum { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, - VK_PIPELINE_STAGE_TRANSITION_BIT = 0x00002000, - VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, + VK_PIPELINE_STAGE_HOST_BIT = 0x00002000, VK_PIPELINE_STAGE_ALL_GRAPHICS = 0x000007FF, - VK_PIPELINE_STAGE_ALL_GPU_COMMANDS = 0x00003FFF, + VK_PIPELINE_STAGE_ALL_GPU_COMMANDS = 0x00001FFF, } VkPipelineStageFlagBits; typedef VkFlags VkPipelineStageFlags; @@ -1052,7 +1050,7 @@ typedef enum { typedef VkFlags VkCmdPoolCreateFlags; typedef enum { - VK_CMD_POOL_RESET_RELEASE_RESOURCES = 0x00000001, + VK_CMD_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, } VkCmdPoolResetFlagBits; typedef VkFlags VkCmdPoolResetFlags; typedef VkFlags VkCmdBufferCreateFlags; @@ -1067,7 +1065,7 @@ typedef enum { typedef VkFlags VkCmdBufferOptimizeFlags; typedef enum { - VK_CMD_BUFFER_RESET_RELEASE_RESOURCES = 0x00000001, + VK_CMD_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, } VkCmdBufferResetFlagBits; typedef VkFlags VkCmdBufferResetFlags; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a7430b858b5..e7f74266e68 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -495,7 +495,7 @@ VkResult anv_GetPhysicalDeviceMemoryProperties( pMemoryProperties->memoryHeapCount = 1; pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { .size = heap_size, - .flags = VK_MEMORY_HEAP_HOST_LOCAL, + .flags = VK_MEMORY_HEAP_HOST_LOCAL_BIT, }; return VK_SUCCESS; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b516193458c..1cd4a8561e6 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -935,8 +935,7 @@ void gen8_CmdPipelineBarrier( if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_TRANSITION_BIT)) { + VK_PIPELINE_STAGE_TRANSFER_BIT)) { cmd.CommandStreamerStallEnable = true; } -- cgit v1.2.3 From ccea9cc332bf3260f065a879e1732e52eacbf94c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 14:44:38 -0700 Subject: nir/spirv: Update to SPIR-V Rev. 32 --- src/glsl/nir/spirv.h | 119 +++++++++++++++++++++++++----------- src/glsl/nir/spirv_glsl450.h | 83 ++++++++++++------------- src/glsl/nir/spirv_glsl450_to_nir.c | 7 +-- src/glsl/nir/spirv_to_nir.c | 11 ++-- 4 files changed, 129 insertions(+), 91 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index d289c687c76..e9e53973801 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -1,20 +1,20 @@ /* ** Copyright (c) 2014-2015 The Khronos Group Inc. -** +** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), ** to deal in the Materials without restriction, including without limitation ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** and/or sell copies of the Materials, and to permit persons to whom the ** Materials are furnished to do so, subject to the following conditions: -** +** ** The above copyright notice and this permission notice shall be included in ** all copies or substantial portions of the Materials. -** +** ** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS ** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -30,16 +30,16 @@ */ /* -** Specification revision 31. +** Specification revision 33. ** Enumeration tokens for SPIR-V, in various styles: ** C, C++, C++11, JSON, Lua, Python -** +** ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL ** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL ** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL ** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** +** ** Some tokens act like mask values, which can be OR'd together, ** while others are mutually exclusive. The mask-like ones have ** "Mask" in their name, and a parallel enum that has the shift @@ -53,7 +53,7 @@ typedef unsigned int SpvId; static const unsigned int SpvMagicNumber = 0x07230203; static const unsigned int SpvVersion = 99; -static const unsigned int SpvRevision = 31; +static const unsigned int SpvRevision = 33; static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; @@ -100,7 +100,6 @@ typedef enum SpvExecutionMode_ { SpvExecutionModePointMode = 10, SpvExecutionModeXfb = 11, SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthAny = 13, SpvExecutionModeDepthGreater = 14, SpvExecutionModeDepthLess = 15, SpvExecutionModeDepthUnchanged = 16, @@ -119,6 +118,7 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeOutputTriangleStrip = 29, SpvExecutionModeVecTypeHint = 30, SpvExecutionModeContractionOff = 31, + SpvExecutionModeIndependentForwardProgress = 32, } SpvExecutionMode; typedef enum SpvStorageClass_ { @@ -131,6 +131,7 @@ typedef enum SpvStorageClass_ { SpvStorageClassPrivateGlobal = 6, SpvStorageClassFunction = 7, SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, SpvStorageClassAtomicCounter = 10, SpvStorageClassImage = 11, } SpvStorageClass; @@ -142,6 +143,7 @@ typedef enum SpvDim_ { SpvDimCube = 3, SpvDimRect = 4, SpvDimBuffer = 5, + SpvDimInputTarget = 6, } SpvDim; typedef enum SpvSamplerAddressingMode_ { @@ -249,6 +251,7 @@ typedef enum SpvImageOperandsShift_ { SpvImageOperandsOffsetShift = 4, SpvImageOperandsConstOffsetsShift = 5, SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, } SpvImageOperandsShift; typedef enum SpvImageOperandsMask_ { @@ -260,6 +263,7 @@ typedef enum SpvImageOperandsMask_ { SpvImageOperandsOffsetMask = 0x00000010, SpvImageOperandsConstOffsetsMask = 0x00000020, SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, } SpvImageOperandsMask; typedef enum SpvFPFastMathModeShift_ { @@ -322,7 +326,7 @@ typedef enum SpvDecoration_ { SpvDecorationCPacked = 10, SpvDecorationBuiltIn = 11, SpvDecorationSmooth = 12, - SpvDecorationNoperspective = 13, + SpvDecorationNoPerspective = 13, SpvDecorationFlat = 14, SpvDecorationPatch = 15, SpvDecorationCentroid = 16, @@ -333,10 +337,9 @@ typedef enum SpvDecoration_ { SpvDecorationVolatile = 21, SpvDecorationConstant = 22, SpvDecorationCoherent = 23, - SpvDecorationNonwritable = 24, - SpvDecorationNonreadable = 25, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, SpvDecorationUniform = 26, - SpvDecorationNoStaticUse = 27, SpvDecorationSaturatedConversion = 28, SpvDecorationStream = 29, SpvDecorationLocation = 30, @@ -351,6 +354,9 @@ typedef enum SpvDecoration_ { SpvDecorationFPRoundingMode = 39, SpvDecorationFPFastMathMode = 40, SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputTargetIndex = 43, + SpvDecorationAlignment = 44, } SpvDecoration; typedef enum SpvBuiltIn_ { @@ -395,6 +401,8 @@ typedef enum SpvBuiltIn_ { SpvBuiltInNumEnqueuedSubgroups = 39, SpvBuiltInSubgroupId = 40, SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, } SpvBuiltIn; typedef enum SpvSelectionControlShift_ { @@ -435,41 +443,43 @@ typedef enum SpvFunctionControlMask_ { } SpvFunctionControlMask; typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsRelaxedShift = 0, - SpvMemorySemanticsSequentiallyConsistentShift = 1, - SpvMemorySemanticsAcquireShift = 2, - SpvMemorySemanticsReleaseShift = 3, - SpvMemorySemanticsUniformMemoryShift = 4, - SpvMemorySemanticsSubgroupMemoryShift = 5, - SpvMemorySemanticsWorkgroupLocalMemoryShift = 6, - SpvMemorySemanticsWorkgroupGlobalMemoryShift = 7, - SpvMemorySemanticsAtomicCounterMemoryShift = 8, - SpvMemorySemanticsImageMemoryShift = 9, + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupLocalMemoryShift = 8, + SpvMemorySemanticsWorkgroupGlobalMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, } SpvMemorySemanticsShift; typedef enum SpvMemorySemanticsMask_ { SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsRelaxedMask = 0x00000001, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000002, - SpvMemorySemanticsAcquireMask = 0x00000004, - SpvMemorySemanticsReleaseMask = 0x00000008, - SpvMemorySemanticsUniformMemoryMask = 0x00000010, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000020, - SpvMemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, - SpvMemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000100, - SpvMemorySemanticsImageMemoryMask = 0x00000200, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupLocalMemoryMask = 0x00000100, + SpvMemorySemanticsWorkgroupGlobalMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, } SpvMemorySemanticsMask; typedef enum SpvMemoryAccessShift_ { SpvMemoryAccessVolatileShift = 0, SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, } SpvMemoryAccessShift; typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessMaskNone = 0, SpvMemoryAccessVolatileMask = 0x00000001, SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, } SpvMemoryAccessMask; typedef enum SpvScope_ { @@ -538,11 +548,30 @@ typedef enum SpvCapability_ { SpvCapabilityCullDistance = 33, SpvCapabilityImageCubeArray = 34, SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputTarget = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityAdvancedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, } SpvCapability; typedef enum SpvOp_ { SpvOpNop = 0, SpvOpUndef = 1, + SpvOpSourceContinued = 2, SpvOpSource = 3, SpvOpSourceExtension = 4, SpvOpName = 5, @@ -576,6 +605,7 @@ typedef enum SpvOp_ { SpvOpTypeReserveId = 36, SpvOpTypeQueue = 37, SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, SpvOpConstantTrue = 41, SpvOpConstantFalse = 42, SpvOpConstant = 43, @@ -602,6 +632,7 @@ typedef enum SpvOp_ { SpvOpPtrAccessChain = 67, SpvOpArrayLength = 68, SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, SpvOpDecorate = 71, SpvOpMemberDecorate = 72, SpvOpDecorationGroup = 73, @@ -629,7 +660,6 @@ typedef enum SpvOp_ { SpvOpImageDrefGather = 97, SpvOpImageRead = 98, SpvOpImageWrite = 99, - SpvOpImageQueryDim = 100, SpvOpImageQueryFormat = 101, SpvOpImageQueryOrder = 102, SpvOpImageQuerySizeLod = 103, @@ -678,7 +708,8 @@ typedef enum SpvOp_ { SpvOpDot = 148, SpvOpIAddCarry = 149, SpvOpISubBorrow = 150, - SpvOpIMulExtended = 151, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, SpvOpAny = 154, SpvOpAll = 155, SpvOpIsNan = 156, @@ -815,6 +846,22 @@ typedef enum SpvOp_ { SpvOpCaptureEventProfilingInfo = 302, SpvOpGetDefaultQueue = 303, SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, } SpvOp; #endif // #ifndef spirv_H + diff --git a/src/glsl/nir/spirv_glsl450.h b/src/glsl/nir/spirv_glsl450.h index d828b152f43..d1c9b5c1d44 100644 --- a/src/glsl/nir/spirv_glsl450.h +++ b/src/glsl/nir/spirv_glsl450.h @@ -13,7 +13,7 @@ ** ** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS ** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ ** ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -28,7 +28,7 @@ #define GLSLstd450_H const int GLSLstd450Version = 99; -const int GLSLstd450Revision = 2; +const int GLSLstd450Revision = 3; enum GLSLstd450 { GLSLstd450Bad = 0, // Don't use @@ -82,47 +82,44 @@ enum GLSLstd450 { GLSLstd450FClamp = 43, GLSLstd450UClamp = 44, GLSLstd450SClamp = 45, - GLSLstd450Mix = 46, - GLSLstd450Step = 47, - GLSLstd450SmoothStep = 48, - - GLSLstd450Fma = 49, - GLSLstd450Frexp = 50, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 51, // no OpVariable operand - GLSLstd450Ldexp = 52, - - GLSLstd450PackSnorm4x8 = 53, - GLSLstd450PackUnorm4x8 = 54, - GLSLstd450PackSnorm2x16 = 55, - GLSLstd450PackUnorm2x16 = 56, - GLSLstd450PackHalf2x16 = 57, - GLSLstd450PackDouble2x32 = 58, - GLSLstd450UnpackSnorm2x16 = 59, - GLSLstd450UnpackUnorm2x16 = 60, - GLSLstd450UnpackHalf2x16 = 61, - GLSLstd450UnpackSnorm4x8 = 62, - GLSLstd450UnpackUnorm4x8 = 63, - GLSLstd450UnpackDouble2x32 = 64, - - GLSLstd450Length = 65, - GLSLstd450Distance = 66, - GLSLstd450Cross = 67, - GLSLstd450Normalize = 68, - GLSLstd450FaceForward = 69, - GLSLstd450Reflect = 70, - GLSLstd450Refract = 71, - - GLSLstd450FindILSB = 72, - GLSLstd450FindSMSB = 73, - GLSLstd450FindUMSB = 74, - - GLSLstd450InterpolateAtCentroid = 75, - GLSLstd450InterpolateAtSample = 76, - GLSLstd450InterpolateAtOffset = 77, - - GLSLstd450AddCarry = 78, // These three should move to the core instruction set - GLSLstd450SubBorrow = 79, - GLSLstd450MulExtended = 80, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, GLSLstd450Count }; diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 60bd597820c..1056c0f1ddf 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -86,7 +86,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Modf: op = nir_op_fmod; break; case GLSLstd450FMin: op = nir_op_fmin; break; case GLSLstd450FMax: op = nir_op_fmax; break; - case GLSLstd450Mix: op = nir_op_flrp; break; + case GLSLstd450FMix: op = nir_op_flrp; break; case GLSLstd450Step: val->ssa->def = nir_sge(&b->nb, src[1], src[0]); return; @@ -116,9 +116,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); return; - case GLSLstd450AddCarry: op = nir_op_uadd_carry; break; - case GLSLstd450SubBorrow: op = nir_op_usub_borrow; break; - case GLSLstd450Exp: case GLSLstd450Log: case GLSLstd450FClamp: @@ -142,7 +139,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450FaceForward: case GLSLstd450Reflect: case GLSLstd450Refract: - case GLSLstd450MulExtended: + case GLSLstd450IMix: default: unreachable("Unhandled opcode"); } diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 45053a6c937..e4035f222a6 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -340,7 +340,7 @@ struct_member_decoration_cb(struct vtn_builder *b, case SpvDecorationSmooth: ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; break; - case SpvDecorationNoperspective: + case SpvDecorationNoPerspective: ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; break; case SpvDecorationFlat: @@ -740,7 +740,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationSmooth: var->data.interpolation = INTERP_QUALIFIER_SMOOTH; break; - case SpvDecorationNoperspective: + case SpvDecorationNoPerspective: var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; break; case SpvDecorationFlat: @@ -759,7 +759,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, assert(var->constant_initializer != NULL); var->data.read_only = true; break; - case SpvDecorationNonwritable: + case SpvDecorationNonWritable: var->data.read_only = true; break; case SpvDecorationLocation: @@ -790,9 +790,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, b->builtins[dec->literals[0]] = var; break; } - case SpvDecorationNoStaticUse: - /* This can safely be ignored */ - break; case SpvDecorationRowMajor: case SpvDecorationColMajor: case SpvDecorationGLSLShared: @@ -801,7 +798,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationAliased: case SpvDecorationVolatile: case SpvDecorationCoherent: - case SpvDecorationNonreadable: + case SpvDecorationNonReadable: case SpvDecorationUniform: /* This is really nice but we have no use for it right now. */ case SpvDecorationCPacked: -- cgit v1.2.3 From c272bb58f5cded827b8a4c94a419504f8ff4cc9e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 15:10:45 -0700 Subject: nir/spirv: Better texture handling --- src/glsl/nir/spirv_to_nir.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e4035f222a6..d0fb6a255a5 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1508,7 +1508,7 @@ static nir_tex_src vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) { nir_tex_src src; - src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def); + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); src.src_type = type; return src; } @@ -1568,16 +1568,19 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_texop texop; switch (opcode) { case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: texop = nir_texop_tex; break; + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + case SpvOpImageFetch: texop = nir_texop_txf; break; @@ -1616,8 +1619,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, } if (operands & SpvImageOperandsLodMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txl; + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txs); (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); } -- cgit v1.2.3 From 1d7ef82f4b49a2afcd575dab7b9e3ca80df90df5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 2 Oct 2015 14:38:10 -0700 Subject: i965: Delete brw_cs.cpp which was deleted in master --- src/mesa/drivers/dri/i965/brw_cs.cpp | 411 ----------------------------------- 1 file changed, 411 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp deleted file mode 100644 index 6ce5779137e..00000000000 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Copyright (c) 2014 - 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#include "util/ralloc.h" -#include "brw_context.h" -#include "brw_cs.h" -#include "brw_fs.h" -#include "brw_eu.h" -#include "brw_wm.h" -#include "intel_mipmap_tree.h" -#include "brw_state.h" -#include "intel_batchbuffer.h" - -extern "C" -bool -brw_cs_prog_data_compare(const void *in_a, const void *in_b) -{ - const struct brw_cs_prog_data *a = - (const struct brw_cs_prog_data *)in_a; - const struct brw_cs_prog_data *b = - (const struct brw_cs_prog_data *)in_b; - - /* Compare the base structure. */ - if (!brw_stage_prog_data_compare(&a->base, &b->base)) - return false; - - /* Compare the rest of the structure. */ - const unsigned offset = sizeof(struct brw_stage_prog_data); - if (memcmp(((char *) a) + offset, ((char *) b) + offset, - sizeof(struct brw_cs_prog_data) - offset)) - return false; - - return true; -} - - -const unsigned * -brw_cs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, - struct gl_compute_program *cp, - struct gl_shader_program *prog, - unsigned *final_assembly_size) -{ - bool start_busy = false; - double start_time = 0; - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - drm_intel_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - - struct brw_shader *shader = - (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - brw_dump_ir("compute", prog, &shader->base, &cp->Base); - - prog_data->local_size[0] = cp->LocalSize[0]; - prog_data->local_size[1] = cp->LocalSize[1]; - prog_data->local_size[2] = cp->LocalSize[2]; - unsigned local_workgroup_size = - cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2]; - - cfg_t *cfg = NULL; - const char *fail_msg = NULL; - - int st_index = -1; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS); - - /* Now the main event: Visit the shader IR and generate our CS IR for it. - */ - fs_visitor v8(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, - &cp->Base, 8, st_index); - if (!v8.run_cs()) { - fail_msg = v8.fail_msg; - } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { - cfg = v8.cfg; - prog_data->simd_size = 8; - } - - fs_visitor v16(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, - &cp->Base, 16, st_index); - if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && - !fail_msg && !v8.simd16_unsupported && - local_workgroup_size <= 16 * brw->max_cs_threads) { - /* Try a SIMD16 compile */ - v16.import_uniforms(&v8); - if (!v16.run_cs()) { - perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg); - if (!cfg) { - fail_msg = - "Couldn't generate SIMD16 program and not " - "enough threads for SIMD8"; - } - } else { - cfg = v16.cfg; - prog_data->simd_size = 16; - } - } - - if (unlikely(cfg == NULL)) { - assert(fail_msg); - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, fail_msg); - _mesa_problem(NULL, "Failed to compile compute shader: %s\n", - fail_msg); - return NULL; - } - - fs_generator g(brw->intelScreen->compiler, brw, - mem_ctx, (void*) key, &prog_data->base, &cp->Base, - v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); - if (INTEL_DEBUG & DEBUG_CS) { - char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", - prog->Label ? prog->Label : "unnamed", - prog->Name); - g.enable_debug(name); - } - - g.generate_code(cfg, prog_data->simd_size); - - if (unlikely(brw->perf_debug) && shader) { - if (shader->compiled_once) { - _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); - } - shader->compiled_once = true; - - if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { - perf_debug("CS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - } - - return g.get_assembly(final_assembly_size); -} - -static bool -brw_codegen_cs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_compute_program *cp, - struct brw_cs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - const GLuint *program; - void *mem_ctx = ralloc_context(NULL); - GLuint program_size; - struct brw_cs_prog_data prog_data; - - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - assert (cs); - - memset(&prog_data, 0, sizeof(prog_data)); - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count = cs->num_uniform_components + - cs->NumImages * BRW_IMAGE_PARAM_SIZE; - - /* The backend also sometimes adds params for texture size. */ - param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - prog_data.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.image_param = - rzalloc_array(NULL, struct brw_image_param, cs->NumImages); - prog_data.base.nr_params = param_count; - prog_data.base.nr_image_params = cs->NumImages; - - program = brw_cs_emit(brw, mem_ctx, key, &prog_data, - &cp->program, prog, &program_size); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - if (prog_data.base.total_scratch) { - brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo, - prog_data.base.total_scratch * brw->max_cs_threads); - } - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - fprintf(stderr, "\n"); - - brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG, - key, sizeof(*key), - program, program_size, - &prog_data, sizeof(prog_data), - &brw->cs.base.prog_offset, &brw->cs.prog_data); - ralloc_free(mem_ctx); - - return true; -} - - -static void -brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key) -{ - /* BRW_NEW_COMPUTE_PROGRAM */ - const struct brw_compute_program *cp = - (struct brw_compute_program *) brw->compute_program; - - memset(key, 0, sizeof(*key)); - - /* The unique compute program ID */ - key->program_string_id = cp->id; -} - - -extern "C" -void -brw_upload_cs_prog(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_cs_prog_key key; - struct brw_compute_program *cp = (struct brw_compute_program *) - brw->compute_program; - - if (!cp) - return; - - if (!brw_state_dirty(brw, 0, BRW_NEW_COMPUTE_PROGRAM)) - return; - - brw_cs_populate_key(brw, &key); - - if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, - &key, sizeof(key), - &brw->cs.base.prog_offset, &brw->cs.prog_data)) { - bool success = - brw_codegen_cs_prog(brw, - ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE], - cp, &key); - (void) success; - assert(success); - } - brw->cs.base.prog_data = &brw->cs.prog_data->base; -} - - -extern "C" bool -brw_cs_precompile(struct gl_context *ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_cs_prog_key key; - - struct gl_compute_program *cp = (struct gl_compute_program *) prog; - struct brw_compute_program *bcp = brw_compute_program(cp); - - memset(&key, 0, sizeof(key)); - key.program_string_id = bcp->id; - - brw_setup_tex_for_precompile(brw, &key.tex, prog); - - uint32_t old_prog_offset = brw->cs.base.prog_offset; - struct brw_cs_prog_data *old_prog_data = brw->cs.prog_data; - - bool success = brw_codegen_cs_prog(brw, shader_prog, bcp, &key); - - brw->cs.base.prog_offset = old_prog_offset; - brw->cs.prog_data = old_prog_data; - - return success; -} - - -static unsigned -get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) -{ - const unsigned simd_size = cs_prog_data->simd_size; - unsigned group_size = cs_prog_data->local_size[0] * - cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; - - return (group_size + simd_size - 1) / simd_size; -} - - -static void -brw_upload_cs_state(struct brw_context *brw) -{ - if (!brw->cs.prog_data) - return; - - uint32_t offset; - uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 8 * 4, 64, &offset); - struct brw_stage_state *stage_state = &brw->cs.base; - struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data; - struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - if (INTEL_DEBUG & DEBUG_SHADER_TIME) { - brw->vtbl.emit_buffer_surface_state( - brw, &stage_state->surf_offset[ - prog_data->binding_table.shader_time_start], - brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW, - brw->shader_time.bo->size, 1, true); - } - - uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - prog_data->binding_table.size_bytes, - 32, &stage_state->bind_bo_offset); - - unsigned threads = get_cs_thread_count(cs_prog_data); - - uint32_t dwords = brw->gen < 8 ? 8 : 9; - BEGIN_BATCH(dwords); - OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2)); - - if (prog_data->total_scratch) { - if (brw->gen >= 8) - OUT_RELOC64(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 11); - else - OUT_RELOC(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 11); - } else { - OUT_BATCH(0); - if (brw->gen >= 8) - OUT_BATCH(0); - } - - const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0; - const uint32_t vfe_gpgpu_mode = - brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0; - OUT_BATCH(SET_FIELD(brw->max_cs_threads - 1, MEDIA_VFE_STATE_MAX_THREADS) | - SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) | - SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) | - SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) | - vfe_gpgpu_mode); - - OUT_BATCH(0); - const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0; - OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ - memcpy(bind, stage_state->surf_offset, - prog_data->binding_table.size_bytes); - - memset(desc, 0, 8 * 4); - - int dw = 0; - desc[dw++] = brw->cs.base.prog_offset; - if (brw->gen >= 8) - desc[dw++] = 0; /* Kernel Start Pointer High */ - desc[dw++] = 0; - desc[dw++] = 0; - desc[dw++] = stage_state->bind_bo_offset; - desc[dw++] = 0; - const uint32_t media_threads = - brw->gen >= 8 ? - SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : - SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); - assert(threads <= brw->max_cs_threads); - desc[dw++] = media_threads; - - BEGIN_BATCH(4); - OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(8 * 4); - OUT_BATCH(offset); - ADVANCE_BATCH(); -} - - -extern "C" -const struct brw_tracked_state brw_cs_state = { - /* explicit initialisers aren't valid C++, comment - * them for documentation purposes */ - /* .dirty = */{ - /* .mesa = */ 0, - /* .brw = */ BRW_NEW_CS_PROG_DATA, - }, - /* .emit = */ brw_upload_cs_state -}; -- cgit v1.2.3 From d00718104f956783291fbfc28f9d0a2e9bee4003 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 18:18:47 -0700 Subject: vk/image: Remove stale anv_asserts for depthstencil attachments We don't fully handle mipmapped, array depthstencil attachments. But we handle the well enough for Crucible's miptree tests. --- src/vulkan/anv_image.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f0620e4d67e..29a442daa43 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -506,11 +506,6 @@ anv_depth_stencil_view_init(struct anv_image_view *iview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - /* XXX: We don't handle any of these */ - anv_assert(pCreateInfo->mipLevel == 0); - anv_assert(pCreateInfo->baseArraySlice == 0); - anv_assert(pCreateInfo->arraySize == 1); - iview->image = image; iview->format = anv_format_for_vk_format(pCreateInfo->format); -- cgit v1.2.3 From cf603714cb0e2e5c5dfe7309b8938c910933aafa Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 18:34:37 -0700 Subject: vk/meta: Fix usage flags for image-wrapped-buffers In make_image_for_buffer(), use VK_IMAGE_USAGE_SAMPLED_BIT when transferring from the buffer and use VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT when transferring to the buffer. --- src/vulkan/anv_meta.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 4abd8d38213..7646071276a 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1304,6 +1304,7 @@ void anv_CmdBlitImage( static VkImage make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, const VkBufferImageCopy *copy) { ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); @@ -1326,7 +1327,7 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, .arraySize = 1, .samples = 1, .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .usage = usage, .flags = 0, }, &vk_image); assert(result == VK_SUCCESS); @@ -1368,8 +1369,7 @@ void anv_CmdCopyBufferToImage( } VkImage srcImage = make_image_for_buffer(vk_device, srcBuffer, - proxy_format, - &pRegions[r]); + proxy_format, VK_IMAGE_USAGE_SAMPLED_BIT, &pRegions[r]); struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, @@ -1490,8 +1490,7 @@ void anv_CmdCopyImageToBuffer( } VkImage destImage = make_image_for_buffer(vk_device, destBuffer, - dest_format, - &pRegions[r]); + dest_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, &pRegions[r]); struct anv_image_view dest_iview; anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, -- cgit v1.2.3 From 44143a1f4658143b8acc4aaf515603f9507442f1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 18:17:09 -0700 Subject: vk: Add anv_image::usage It's a copy of VkImageCreateInfo::usage. Will be used for the VkAttachmentView/VkImageView merge. --- src/vulkan/anv_image.c | 24 ++++++++++++++++++++++++ src/vulkan/anv_private.h | 1 + src/vulkan/gen7_state.c | 1 + src/vulkan/gen8_state.c | 1 + 4 files changed, 27 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 29a442daa43..b95cadca9e7 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -304,8 +304,21 @@ anv_image_create(VkDevice _device, image->format = anv_format_for_vk_format(pCreateInfo->format); image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arraySize; + image->usage = pCreateInfo->usage; image->surf_type = surf_type; + if (pCreateInfo->usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { + /* Meta will transfer from the image by binding it as a texture. */ + image->usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (pCreateInfo->usage & VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT) { + /* Meta will transfer to the image by binding it as a color attachment, + * even if the image format is not a color format. + */ + image->usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + if (likely(anv_format_is_color(image->format))) { r = anv_image_make_surface(create_info, image->format, &image->size, &image->alignment, @@ -458,6 +471,11 @@ anv_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT)); + switch (device->info.gen) { case 7: gen7_image_view_init(iview, device, pCreateInfo, cmd_buffer); @@ -506,6 +524,8 @@ anv_depth_stencil_view_init(struct anv_image_view *iview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + assert(image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_BIT); + iview->image = image; iview->format = anv_format_for_vk_format(pCreateInfo->format); @@ -565,6 +585,10 @@ anv_color_attachment_view_init(struct anv_image_view *iview, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer) { + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + assert(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + switch (device->info.gen) { case 7: gen7_color_attachment_view_init(iview, device, pCreateInfo, cmd_buffer); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3326625ebb3..06ab1dc65a6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1202,6 +1202,7 @@ struct anv_image { VkExtent3D extent; uint32_t levels; uint32_t array_size; + VkImageUsageFlags usage; /**< VkImageCreateInfo::usage */ VkDeviceSize size; uint32_t alignment; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index d7a4b10c6e8..5829d03d3e3 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -369,6 +369,7 @@ gen7_color_attachment_view_init(struct anv_image_view *iview, struct anv_surface *surface = anv_image_get_surface_for_color_attachment(image); + assert(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index e836613db42..3c479e1c23e 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -295,6 +295,7 @@ gen8_color_attachment_view_init(struct anv_image_view *iview, uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ + assert(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); anv_assert(pCreateInfo->arraySize > 0); anv_assert(pCreateInfo->mipLevel < image->levels); anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); -- cgit v1.2.3 From 3fc2b1f32540ee8c3fa994814b69568c64c74d81 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 18:53:29 -0700 Subject: vk: Remove stale finishme for stencil image views They don't work completely. But they work well enough to satisfy Crucible. --- src/vulkan/anv_image.c | 1 - src/vulkan/anv_meta.c | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index b95cadca9e7..2b3c444428f 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -545,7 +545,6 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag return &image->depth_surface; case VK_IMAGE_ASPECT_STENCIL_BIT: assert(image->format->has_stencil); - anv_finishme("stencil image views"); return &image->stencil_surface; case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: /* FINISHME: The Vulkan spec (git a511ba2) requires support for combined diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 7646071276a..d69863b8375 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -985,12 +985,16 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .arraySize = 1, .samples = 1, .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .usage = 0, .flags = 0, }; - VkImage src_image, dest_image; + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; anv_CreateImage(vk_device, &image_info, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; anv_CreateImage(vk_device, &image_info, &dest_image); /* We could use a vk call to bind memory, but that would require -- cgit v1.2.3 From 37bf120930aa9e3ccf455efc633f2edbdec9dfc1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 18:55:16 -0700 Subject: vk/pipeline: Emit MSAA finishme only if samples > 1 If samples == 1, then there's nothing for Mesa to do, and the finishme message is only noise. --- src/vulkan/anv_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 4af4b663c87..192a4b17ae0 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -209,7 +209,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); if (pCreateInfo->pViewportState) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); - if (pCreateInfo->pMultisampleState) + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterSamples > 1) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; -- cgit v1.2.3 From 24de3d49eab3c8fd8aad3f4aeb8aac0154690374 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 6 Oct 2015 19:11:58 -0700 Subject: vk: Embed two surface states in anv_image_view This prepares for merging VkAttachmentView into VkImageView. The two surface states are: anv_image_view::color_rt_surface_state: RENDER_SURFACE_STATE when using image as a color render target. anv_image_view::nonrt_surface_state; RENDER_SURFACE_STATE when using image as a non render target. No Crucible regressions. --- src/vulkan/anv_cmd_buffer.c | 6 +- src/vulkan/anv_image.c | 199 +++++++++++++++++++++++++------------------- src/vulkan/anv_meta.c | 138 ++++++++++++++++++++++-------- src/vulkan/anv_private.h | 33 +++----- src/vulkan/gen7_state.c | 140 +++++++++---------------------- src/vulkan/gen8_state.c | 179 ++++++++------------------------------- 6 files changed, 306 insertions(+), 389 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 470ea11df6f..7587f2ed103 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -434,8 +434,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const struct anv_image_view *iview = fb->attachments[subpass->color_attachments[a]]; - bt_map[a] = iview->surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->surface_state, + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, iview->bo, iview->offset); } @@ -468,7 +468,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bo_offset = desc->buffer_view->offset; break; case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: - surface_state = &desc->image_view->surface_state; + surface_state = &desc->image_view->nonrt_surface_state; bo = desc->image_view->bo; bo_offset = desc->image_view->offset; break; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2b3c444428f..3f4d9b15c92 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -45,7 +45,6 @@ static const uint8_t anv_surf_type_from_image_type[] = { [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, - }; static const struct anv_image_view_info @@ -259,6 +258,26 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, return VK_SUCCESS; } +static VkImageUsageFlags +anv_image_get_full_usage(const VkImageCreateInfo *info) +{ + VkImageUsageFlags usage = info->usage; + + if (usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { + /* Meta will transfer from the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT) { + /* Meta will transfer to the image by binding it as a color attachment, + * even if the image format is not a color format. + */ + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + + return usage; +} + VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *create_info, @@ -304,19 +323,16 @@ anv_image_create(VkDevice _device, image->format = anv_format_for_vk_format(pCreateInfo->format); image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arraySize; - image->usage = pCreateInfo->usage; - image->surf_type = surf_type; + image->usage = anv_image_get_full_usage(pCreateInfo); + image->surface_type = surf_type; - if (pCreateInfo->usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { - /* Meta will transfer from the image by binding it as a texture. */ - image->usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT)) { + image->needs_nonrt_surface_state = true; } - if (pCreateInfo->usage & VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT) { - /* Meta will transfer to the image by binding it as a color attachment, - * even if the image format is not a color format. - */ - image->usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + image->needs_color_rt_surface_state = true; } if (likely(anv_format_is_color(image->format))) { @@ -472,9 +488,27 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + assert(range->arraySize > 0); + assert(range->baseMipLevel < image->levels); assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT)); + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_BIT)); + + switch (image->type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + range->arraySize - 1 <= image->array_size); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + range->arraySize - 1 + <= anv_minify(image->extent.depth, range->baseMipLevel)); + break; + } switch (device->info.gen) { case 7: @@ -508,29 +542,30 @@ anv_CreateImageView(VkDevice _device, return VK_SUCCESS; } -void -anv_DestroyImageView(VkDevice _device, VkImageView _iview) +static void +anv_image_view_destroy(struct anv_device *device, + struct anv_image_view *iview) { - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_image_view, iview, _iview); + if (iview->image->needs_color_rt_surface_state) { + anv_state_pool_free(&device->surface_state_pool, + iview->color_rt_surface_state); + } + + if (iview->image->needs_nonrt_surface_state) { + anv_state_pool_free(&device->surface_state_pool, + iview->nonrt_surface_state); + } - anv_state_pool_free(&device->surface_state_pool, iview->surface_state); anv_device_free(device, iview); } -static void -anv_depth_stencil_view_init(struct anv_image_view *iview, - const VkAttachmentViewCreateInfo *pCreateInfo) +void +anv_DestroyImageView(VkDevice _device, VkImageView _iview) { - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - - assert(image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_BIT); - - iview->image = image; - iview->format = anv_format_for_vk_format(pCreateInfo->format); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); - assert(anv_format_is_depth_or_stencil(image->format)); - assert(anv_format_is_depth_or_stencil(iview->format)); + anv_image_view_destroy(device, iview); } struct anv_surface * @@ -538,8 +573,22 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag { switch (aspect_mask) { case VK_IMAGE_ASPECT_COLOR_BIT: - assert(anv_format_is_color(image->format)); - return &image->color_surface; + /* Dragons will eat you. + * + * Meta attaches all destination surfaces as color render targets. Guess + * what surface the Meta Dragons really want. + */ + if (image->format->depth_format && image->format->has_stencil) { + anv_finishme("combined depth stencil formats"); + return &image->depth_surface; + } else if (image->format->depth_format) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } else { + return &image->color_surface; + } + break; case VK_IMAGE_ASPECT_DEPTH_BIT: assert(image->format->depth_format); return &image->depth_surface; @@ -562,67 +611,52 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag } } -/** The attachment may be a color view into a non-color image. */ -struct anv_surface * -anv_image_get_surface_for_color_attachment(struct anv_image *image) -{ - if (anv_format_is_color(image->format)) { - return &image->color_surface; - } else if (image->format->depth_format) { - return &image->depth_surface; - } else if (image->format->has_stencil) { - return &image->stencil_surface; - } else { - unreachable("image has bad format"); - return NULL; - } -} - -void -anv_color_attachment_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - - assert(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - switch (device->info.gen) { - case 7: - gen7_color_attachment_view_init(iview, device, pCreateInfo, cmd_buffer); - break; - case 8: - gen8_color_attachment_view_init(iview, device, pCreateInfo, cmd_buffer); - break; - default: - unreachable("unsupported gen\n"); - } -} - VkResult anv_CreateAttachmentView(VkDevice _device, - const VkAttachmentViewCreateInfo *pCreateInfo, + const VkAttachmentViewCreateInfo *info, VkAttachmentView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_image_view *iview; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); + assert(info->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); iview = anv_device_alloc(device, sizeof(*iview), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (iview == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - - if (anv_format_is_depth_or_stencil(format)) { - anv_depth_stencil_view_init(iview, pCreateInfo); - } else { - anv_color_attachment_view_init(iview, device, pCreateInfo, NULL); - } + const struct anv_format *format = anv_format_for_vk_format(info->format); + + VkImageAspectFlags aspect_mask = 0; + if (format->depth_format) + aspect_mask |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (format->has_stencil) + aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; + if (!aspect_mask) + aspect_mask |= VK_IMAGE_ASPECT_COLOR_BIT; + + anv_image_view_init(iview, device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = info->image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = info->format, + .channels = { + .r = VK_CHANNEL_SWIZZLE_R, + .g = VK_CHANNEL_SWIZZLE_G, + .b = VK_CHANNEL_SWIZZLE_B, + .a = VK_CHANNEL_SWIZZLE_A, + }, + .subresourceRange = { + .aspectMask = aspect_mask, + .baseMipLevel = info->mipLevel, + .mipLevels = 1, + .baseArrayLayer = info->baseArraySlice, + .arraySize = info->arraySize, + }, + }, + NULL); pView->handle = anv_image_view_to_handle(iview).handle; @@ -636,12 +670,5 @@ anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) VkImageView _iview = { .handle = _aview.handle }; ANV_FROM_HANDLE(anv_image_view, iview, _iview); - /* Depth and stencil render targets have no RENDER_SURFACE_STATE. Instead, - * they use 3DSTATE_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. - */ - if (!anv_format_is_depth_or_stencil(iview->format)) { - anv_state_pool_free(&device->surface_state_pool, iview->surface_state); - } - - anv_device_free(device, iview); + anv_image_view_destroy(device, iview); } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d69863b8375..c7c50ef87a9 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1029,14 +1029,25 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, cmd_buffer); struct anv_image_view dest_iview; - anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = copy_format, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, + .channels = { + .r = VK_CHANNEL_SWIZZLE_R, + .g = VK_CHANNEL_SWIZZLE_G, + .b = VK_CHANNEL_SWIZZLE_B, + .a = VK_CHANNEL_SWIZZLE_A, + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1, + }, }, cmd_buffer); @@ -1194,14 +1205,25 @@ void anv_CmdCopyImage( anv_finishme("FINISHME: copy multiple depth layers"); struct anv_image_view dest_iview; - anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_image->format->vk_format, - .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = dest_array_slice, - .arraySize = 1, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_array_slice, + .arraySize = 1 + }, }, cmd_buffer); @@ -1283,14 +1305,25 @@ void anv_CmdBlitImage( anv_finishme("FINISHME: copy multiple depth layers"); struct anv_image_view dest_iview; - anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_image->format->vk_format, - .mipLevel = pRegions[r].destSubresource.mipLevel, - .baseArraySlice = dest_array_slice, - .arraySize = 1, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_array_slice, + .arraySize = 1 + }, }, cmd_buffer); @@ -1413,14 +1446,25 @@ void anv_CmdCopyBufferToImage( anv_finishme("FINISHME: copy multiple depth layers"); struct anv_image_view dest_iview; - anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = proxy_format, - .mipLevel = pRegions[r].imageSubresource.mipLevel, - .baseArraySlice = dest_array_slice, - .arraySize = 1, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_array_slice, + .arraySize = 1 + }, }, cmd_buffer); @@ -1497,14 +1541,25 @@ void anv_CmdCopyImageToBuffer( dest_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, &pRegions[r]); struct anv_image_view dest_iview; - anv_color_attachment_view_init(&dest_iview, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_format, - .mipLevel = 0, - .baseArraySlice = 0, - .arraySize = 1, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1 + }, }, cmd_buffer); @@ -1562,14 +1617,25 @@ void anv_CmdClearColorImage( for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { struct anv_image_view iview; - anv_color_attachment_view_init(&iview, cmd_buffer->device, - &(VkAttachmentViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = _image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = image->format->vk_format, - .mipLevel = pRanges[r].baseMipLevel + l, - .baseArraySlice = pRanges[r].baseArrayLayer + s, - .arraySize = 1, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRanges[r].baseMipLevel + l, + .mipLevels = 1, + .baseArrayLayer = pRanges[r].baseArrayLayer + s, + .arraySize = 1 + }, }, cmd_buffer); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 06ab1dc65a6..e9735706a67 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1202,7 +1202,7 @@ struct anv_image { VkExtent3D extent; uint32_t levels; uint32_t array_size; - VkImageUsageFlags usage; /**< VkImageCreateInfo::usage */ + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ VkDeviceSize size; uint32_t alignment; @@ -1211,8 +1211,10 @@ struct anv_image { struct anv_bo *bo; VkDeviceSize offset; - /** RENDER_SURFACE_STATE.SurfaceType */ - uint8_t surf_type; + uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ + + bool needs_nonrt_surface_state:1; + bool needs_color_rt_surface_state:1; /** * Image subsurfaces @@ -1247,10 +1249,15 @@ struct anv_buffer_view { struct anv_image_view { const struct anv_image *image; /**< VkImageViewCreateInfo::image */ const struct anv_format *format; /**< VkImageViewCreateInfo::format */ - struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ struct anv_bo *bo; uint32_t offset; /**< Offset into bo. */ VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + /** RENDER_SURFACE_STATE when using image as a color render target. */ + struct anv_state color_rt_surface_state; + + /** RENDER_SURFACE_STATE when using image as a non render target. */ + struct anv_state nonrt_surface_state; }; struct anv_image_create_info { @@ -1268,9 +1275,6 @@ struct anv_surface * anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask); -struct anv_surface * -anv_image_get_surface_for_color_attachment(struct anv_image *image); - void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, @@ -1288,21 +1292,6 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -void anv_color_attachment_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - -void gen7_color_attachment_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - -void gen8_color_attachment_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - VkResult anv_buffer_view_create(struct anv_device *device, const VkBufferViewCreateInfo *pCreateInfo, struct anv_buffer_view **bview_out); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 5829d03d3e3..2497e39490d 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -113,6 +113,18 @@ static const uint32_t vk_to_gen_compare_op[] = { [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; +static struct anv_state +gen7_alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + 64, 64); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + VkResult gen7_CreateSampler( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, @@ -272,18 +284,17 @@ gen7_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - const struct anv_image_view_info view_type_info = - anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); + iview->image = image; iview->bo = image->bo; iview->offset = image->offset + surface->offset; iview->format = anv_format_for_vk_format(pCreateInfo->format); @@ -302,7 +313,7 @@ gen7_image_view_init(struct anv_image_view *iview, } struct GEN7_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = view_type_info.surface_type, + .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], @@ -316,7 +327,8 @@ gen7_image_view_init(struct anv_image_view *iview, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, - .RenderCacheReadWriteMode = false, + + .RenderCacheReadWriteMode = 0, /* TEMPLATE */ .Height = image->extent.height - 1, .Width = image->extent.width - 1, @@ -329,14 +341,8 @@ gen7_image_view_init(struct anv_image_view *iview, .SurfaceObjectControlState = GEN7_MOCS, - /* For render target surfaces, the hardware interprets field MIPCount/LOD as - * LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - .MIPCountLOD = range->mipLevels - 1, - .SurfaceMinLOD = range->baseMipLevel, + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ .MCSEnable = false, .RedClearColor = 0, @@ -347,85 +353,28 @@ gen7_image_view_init(struct anv_image_view *iview, .SurfaceBaseAddress = { NULL, iview->offset }, }; - if (cmd_buffer) { - iview->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - iview->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, - &surface_state); -} - -void -gen7_color_attachment_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface *surface = - anv_image_get_surface_for_color_attachment(image); + if (image->needs_nonrt_surface_state) { + iview->nonrt_surface_state = + gen7_alloc_surface_state(device, cmd_buffer); - assert(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - anv_assert(pCreateInfo->arraySize > 0); - anv_assert(pCreateInfo->mipLevel < image->levels); - anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + surface_state.RenderCacheReadWriteMode = false; - iview->bo = image->bo; - iview->offset = image->offset + surface->offset; - iview->format = anv_format_for_vk_format(pCreateInfo->format); - - iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), - .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), - .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), - }; - - uint32_t depth = 1; - if (pCreateInfo->arraySize > 1) { - depth = pCreateInfo->arraySize; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - if (cmd_buffer) { - iview->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - iview->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - struct GEN7_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = SURFTYPE_2D, - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = iview->format->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - - /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if - * Tiled Surface is False." + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ - .TiledSurface = surface->tile_mode > LINEAR, - .TileWalk = surface->tile_mode == YMAJOR ? TILEWALK_YMAJOR : TILEWALK_XMAJOR, + surface_state.SurfaceMinLOD = range->baseMipLevel; + surface_state.MIPCountLOD = range->mipLevels - 1; - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .RenderCacheReadWriteMode = WriteOnlyCache, + GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->nonrt_surface_state.map, + &surface_state); + } - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, - .MinimumArrayElement = pCreateInfo->baseArraySlice, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, + if (image->needs_color_rt_surface_state) { + iview->color_rt_surface_state = + gen7_alloc_surface_state(device, cmd_buffer); - .SurfaceObjectControlState = GEN7_MOCS, + surface_state.RenderCacheReadWriteMode = WriteOnlyCache; /* For render target surfaces, the hardware interprets field MIPCount/LOD as * LOD. The Broadwell PRM says: @@ -433,19 +382,10 @@ gen7_color_attachment_view_init(struct anv_image_view *iview, * MIPCountLOD defines the LOD that will be rendered into. * SurfaceMinLOD is ignored. */ - .SurfaceMinLOD = 0, - .MIPCountLOD = pCreateInfo->mipLevel, - - .MCSEnable = false, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - - }; + surface_state.MIPCountLOD = range->baseMipLevel; + surface_state.SurfaceMinLOD = 0; - GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, - &surface_state); + GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->color_rt_surface_state.map, + &surface_state); + } } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 3c479e1c23e..5095ce060bf 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -139,6 +139,18 @@ static const uint8_t anv_valign[] = { [16] = VALIGN16, }; +static struct anv_state +gen8_alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + 64, 64); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + void gen8_image_view_init(struct anv_image_view *iview, struct anv_device *device, @@ -148,6 +160,7 @@ gen8_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -157,9 +170,7 @@ gen8_image_view_init(struct anv_image_view *iview, const struct anv_format *format_info = anv_format_for_vk_format(pCreateInfo->format); - const struct anv_image_view_info view_type_info = - anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); - + iview->image = image; iview->bo = image->bo; iview->offset = image->offset + surface->offset; iview->format = format_info; @@ -219,7 +230,7 @@ gen8_image_view_init(struct anv_image_view *iview, }; struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = view_type_info.surface_type, + .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], @@ -248,12 +259,8 @@ gen8_image_view_init(struct anv_image_view *iview, .XOffset = 0, .YOffset = 0, - /* For sampler surfaces, the hardware interprets field MIPCount/LOD as - * MIPCount. The range of levels accessible by the sampler engine is - * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - .MIPCountLOD = range->mipLevels - 1, - .SurfaceMinLOD = range->baseMipLevel, + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ .AuxiliarySurfaceMode = AUX_NONE, .RedClearColor = 0, @@ -268,149 +275,37 @@ gen8_image_view_init(struct anv_image_view *iview, .SurfaceBaseAddress = { NULL, iview->offset }, }; - if (cmd_buffer) { - iview->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - iview->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - - GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, - &surface_state); -} - -void -gen8_color_attachment_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkAttachmentViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - struct anv_surface *surface = - anv_image_get_surface_for_color_attachment(image); - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - - uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ - uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ - - assert(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - anv_assert(pCreateInfo->arraySize > 0); - anv_assert(pCreateInfo->mipLevel < image->levels); - anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); - - iview->bo = image->bo; - iview->offset = image->offset + surface->offset; - iview->format = anv_format_for_vk_format(pCreateInfo->format); - - iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), - .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), - .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), - }; + if (image->needs_nonrt_surface_state) { + iview->nonrt_surface_state = + gen8_alloc_surface_state(device, cmd_buffer); - switch (image->type) { - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced - * by one for each increase from zero of Minimum Array Element. For - * example, if Minimum Array Element is set to 1024 on a 2D surface, - * the range of this field is reduced to [0,1023]. - */ - depth = pCreateInfo->arraySize; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 1D and 2D Surfaces: - * This field must be set to the same value as the Depth field. + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ - rt_view_extent = depth; - break; - case VK_IMAGE_TYPE_3D: - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * If the volume texture is MIP-mapped, this field specifies the - * depth of the base MIP level. - */ - depth = image->extent.depth; + surface_state.SurfaceMinLOD = range->baseMipLevel; + surface_state.MIPCountLOD = range->mipLevels - 1; - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 3D Surfaces: This field - * indicates the extent of the accessible 'R' coordinates minus 1 on - * the LOD currently being rendered to. - */ - rt_view_extent = iview->extent.depth; - break; - default: - unreachable(!"bad VkImageType"); - } - - if (cmd_buffer) { - iview->surface_state = - anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); - } else { - iview->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->nonrt_surface_state.map, + &surface_state); } - struct GEN8_RENDER_SURFACE_STATE surface_state = { - .SurfaceType = image->type, - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format_info->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = surface->tile_mode, - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = surface->qpitch >> 2, - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, - .RenderTargetViewExtent = rt_view_extent - 1, - .MinimumArrayElement = pCreateInfo->baseArraySlice, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, + if (image->needs_color_rt_surface_state) { + iview->color_rt_surface_state = + gen8_alloc_surface_state(device, cmd_buffer); - /* For render target surfaces, the hardware interprets field MIPCount/LOD as - * LOD. The Broadwell PRM says: + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: * * MIPCountLOD defines the LOD that will be rendered into. * SurfaceMinLOD is ignored. */ - .SurfaceMinLOD = 0, - .MIPCountLOD = pCreateInfo->mipLevel, - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; + surface_state.MIPCountLOD = range->baseMipLevel; + surface_state.SurfaceMinLOD = 0; - GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->surface_state.map, - &surface_state); + GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->color_rt_surface_state.map, + &surface_state); + } } VkResult gen8_CreateSampler( -- cgit v1.2.3 From 85ff3cfde32fe1614d93071f6a866bf797b4d12f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 07:28:46 -0700 Subject: vk: Drop -Wextra Eliminates lots of warnings due to anv_meta.c's inclusion of nir.h. I like the extra warnings, and they should probably get fixed. However, git-grep reveals that no other Mesa directory uses -Wextra. Building Vulkan produces a lot of compiler warnings from core Mesa headers that no other Mesa developer sees, and hence no other Mesa developer will fix. --- src/vulkan/Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index eebe54d9475..0d0abd27300 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -51,11 +51,11 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/vulkan libvulkan_la_CFLAGS = \ - -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g \ + -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g \ -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init libvulkan_la_CXXFLAGS = \ - -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g + -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g VULKAN_SOURCES = \ anv_allocator.c \ -- cgit v1.2.3 From 03dd72279f871c242a47bc4d03aef128bd5ae792 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 09:03:47 -0700 Subject: vk/image: Fix retrieval of anv_surface for depthstencil aspect If anv_image_get_surface_for_aspect_mask() is given a combined depthstencil aspect mask, and the image has a stencil surface but no depth surface, then return the stencil surface. Hacks on hacks. --- src/vulkan/anv_image.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 3f4d9b15c92..5973be1391b 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -596,15 +596,21 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag assert(image->format->has_stencil); return &image->stencil_surface; case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - /* FINISHME: The Vulkan spec (git a511ba2) requires support for combined - * depth stencil formats. Specifically, it states: - * - * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or - * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. - */ - anv_finishme("combined depthstencil aspect"); - assert(image->format->depth_format); - return &image->depth_surface; + if (image->format->depth_format && image->format->has_stencil) { + /* FINISHME: The Vulkan spec (git a511ba2) requires support for combined + * depth stencil formats. Specifically, it states: + * + * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or + * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. + */ + anv_finishme("combined depthstencil aspect"); + return &image->depth_surface; + } else if (image->format->depth_format) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } + /* fallthrough */ default: unreachable("image does not have aspect"); return NULL; -- cgit v1.2.3 From 6dea1a9ba1acd7a957feb9ad43504e7accc4f8f8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 07:30:52 -0700 Subject: vk/0.170.2: Merge VkAttachmentView into VkImageView --- include/vulkan/vulkan.h | 39 +++++---------------------------------- src/vulkan/anv_device.c | 3 +-- src/vulkan/anv_image.c | 34 ++-------------------------------- src/vulkan/anv_meta.c | 21 ++++----------------- 4 files changed, 12 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index ce1d2d88e9f..4fe9e67fabc 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -91,7 +91,6 @@ VK_DEFINE_NONDISP_HANDLE(VkEvent) VK_DEFINE_NONDISP_HANDLE(VkQueryPool) VK_DEFINE_NONDISP_HANDLE(VkBufferView) VK_DEFINE_NONDISP_HANDLE(VkImageView) -VK_DEFINE_NONDISP_HANDLE(VkAttachmentView) VK_DEFINE_NONDISP_HANDLE(VkShaderModule) VK_DEFINE_NONDISP_HANDLE(VkShader) VK_DEFINE_NONDISP_HANDLE(VkPipelineCache) @@ -962,10 +961,10 @@ typedef enum { typedef VkFlags VkImageAspectFlags; typedef enum { - VK_ATTACHMENT_VIEW_CREATE_READ_ONLY_DEPTH_BIT = 0x00000001, - VK_ATTACHMENT_VIEW_CREATE_READ_ONLY_STENCIL_BIT = 0x00000002, -} VkAttachmentViewCreateFlagBits; -typedef VkFlags VkAttachmentViewCreateFlags; + VK_IMAGE_VIEW_CREATE_READ_ONLY_DEPTH_BIT = 0x00000001, + VK_IMAGE_VIEW_CREATE_READ_ONLY_STENCIL_BIT = 0x00000002, +} VkImageViewCreateFlagBits; +typedef VkFlags VkImageViewCreateFlags; typedef VkFlags VkShaderModuleCreateFlags; typedef VkFlags VkShaderCreateFlags; @@ -1508,17 +1507,6 @@ typedef struct { VkImageSubresourceRange subresourceRange; } VkImageViewCreateInfo; -typedef struct { - VkStructureType sType; - const void* pNext; - VkImage image; - VkFormat format; - uint32_t mipLevel; - uint32_t baseArraySlice; - uint32_t arraySize; - VkAttachmentViewCreateFlags flags; -} VkAttachmentViewCreateInfo; - typedef struct { VkStructureType sType; const void* pNext; @@ -1784,7 +1772,6 @@ typedef struct { VkBufferView bufferView; VkSampler sampler; VkImageView imageView; - VkAttachmentView attachmentView; VkImageLayout imageLayout; } VkDescriptorInfo; @@ -1845,17 +1832,12 @@ typedef struct { uint32_t stencilBackRef; } VkDynamicDepthStencilStateCreateInfo; -typedef struct { - VkAttachmentView view; - VkImageLayout layout; -} VkAttachmentBindInfo; - typedef struct { VkStructureType sType; const void* pNext; VkRenderPass renderPass; uint32_t attachmentCount; - const VkAttachmentBindInfo* pAttachments; + const VkImageView* pAttachments; uint32_t width; uint32_t height; uint32_t layers; @@ -2137,8 +2119,6 @@ typedef void (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); typedef void (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); -typedef VkResult (VKAPI *PFN_vkCreateAttachmentView)(VkDevice device, const VkAttachmentViewCreateInfo* pCreateInfo, VkAttachmentView* pView); -typedef void (VKAPI *PFN_vkDestroyAttachmentView)(VkDevice device, VkAttachmentView attachmentView); typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); typedef void (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); @@ -2542,15 +2522,6 @@ void VKAPI vkDestroyImageView( VkDevice device, VkImageView imageView); -VkResult VKAPI vkCreateAttachmentView( - VkDevice device, - const VkAttachmentViewCreateInfo* pCreateInfo, - VkAttachmentView* pView); - -void VKAPI vkDestroyAttachmentView( - VkDevice device, - VkAttachmentView attachmentView); - VkResult VKAPI vkCreateShaderModule( VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e7f74266e68..b10cc4512b4 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1968,8 +1968,7 @@ VkResult anv_CreateFramebuffer( framebuffer->attachment_count = pCreateInfo->attachmentCount; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - VkAttachmentView _aview = pCreateInfo->pAttachments[i].view; - VkImageView _iview = { _aview.handle }; + VkImageView _iview = pCreateInfo->pAttachments[i]; framebuffer->attachments[i] = anv_image_view_from_handle(_iview); } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 5973be1391b..8522d0e8318 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -617,23 +617,7 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag } } -VkResult -anv_CreateAttachmentView(VkDevice _device, - const VkAttachmentViewCreateInfo *info, - VkAttachmentView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *iview; - - assert(info->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); - - iview = anv_device_alloc(device, sizeof(*iview), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (iview == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - const struct anv_format *format = anv_format_for_vk_format(info->format); - +#if 0 VkImageAspectFlags aspect_mask = 0; if (format->depth_format) aspect_mask |= VK_IMAGE_ASPECT_DEPTH_BIT; @@ -663,18 +647,4 @@ anv_CreateAttachmentView(VkDevice _device, }, }, NULL); - - pView->handle = anv_image_view_to_handle(iview).handle; - - return VK_SUCCESS; -} - -void -anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _aview) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - VkImageView _iview = { .handle = _aview.handle }; - ANV_FROM_HANDLE(anv_image_view, iview, _iview); - - anv_image_view_destroy(device, iview); -} +#endif diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c7c50ef87a9..a0b7070ebfe 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -738,10 +738,6 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; - - VkImageView dest_iview_h = anv_image_view_to_handle(dest_iview); - VkAttachmentView dest_aview_h = { .handle = dest_iview_h.handle }; - VkDescriptorPool dummy_desc_pool = { .handle = 1 }; struct blit_vb_data { @@ -837,11 +833,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .attachmentCount = 1, - .pAttachments = (VkAttachmentBindInfo[]) { - { - .view = dest_aview_h, - .layout = VK_IMAGE_LAYOUT_GENERAL - } + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), }, .width = dest_iview->extent.width, .height = dest_iview->extent.height, @@ -1639,19 +1632,13 @@ void anv_CmdClearColorImage( }, cmd_buffer); - VkImageView iview_h = anv_image_view_to_handle(&iview); - VkAttachmentView aview_h = { .handle = iview_h.handle }; - VkFramebuffer fb; anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .attachmentCount = 1, - .pAttachments = (VkAttachmentBindInfo[]) { - { - .view = aview_h, - .layout = VK_IMAGE_LAYOUT_GENERAL - } + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), }, .width = iview.extent.width, .height = iview.extent.height, -- cgit v1.2.3 From b1c024a9321cba865d152eb5f64ac74cff47125e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 09:09:37 -0700 Subject: vk/meta: Fix -Wstrict-prototypes In C, functions with no arguments require a void argument. build_nir_clear_fragment_shader() lacked that. Fixes: anv_meta.c:70:1: warning: function declaration isn't a prototype [-Wstrict-prototypes] --- src/vulkan/anv_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a0b7070ebfe..c0e4112f363 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -67,7 +67,7 @@ build_nir_vertex_shader(bool attr_flat) } static nir_shader * -build_nir_clear_fragment_shader() +build_nir_clear_fragment_shader(void) { nir_builder b; -- cgit v1.2.3 From 941a1059541197dfb356b45f1e003745533c4b0e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 09:27:31 -0700 Subject: anv/private: Add a typed_memcpy macro This is amazingly helpful when copying arrays of things around. --- src/vulkan/anv_private.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index e9735706a67..ac9422f6b90 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -115,6 +115,11 @@ anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) (b) = __builtin_ffs(__dword) - 1, __dword; \ __dword &= ~(1 << (b))) +#define typed_memcpy(dest, src, count) ({ \ + static_assert(sizeof(*src) == sizeof(*dest), ""); \ + memcpy((dest), (src), (count) * sizeof(*(src))); \ +}) + /* Define no kernel as 1, since that's an illegal offset for a kernel */ #define NO_KERNEL 1 -- cgit v1.2.3 From 55fcca306b349a959feecae8f84a6b62f532114f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 09:31:53 -0700 Subject: anv: Add a dynamic state data structure and basic helpers --- src/vulkan/anv_cmd_buffer.c | 72 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 56 +++++++++++++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 7587f2ed103..78b17ea15ee 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -38,6 +38,78 @@ * is concerned, most of anv_cmd_buffer is magic. */ +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope_scaled = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + static void anv_cmd_state_init(struct anv_cmd_state *state) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ac9422f6b90..047c1cf4355 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -763,9 +763,11 @@ void anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_set *set); -#define MAX_VBS 32 -#define MAX_SETS 8 -#define MAX_RTS 8 +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_DYNAMIC_BUFFERS 16 #define MAX_IMAGES 8 @@ -839,6 +841,54 @@ struct anv_push_constants { struct brw_image_param images[MAX_IMAGES]; }; +struct anv_dynamic_state { + struct { + uint32_t count; + VkViewport viewports[MAX_VIEWPORTS]; + } viewport; + + struct { + uint32_t count; + VkRect2D scissors[MAX_SCISSORS]; + } scissor; + + float line_width; + + struct { + float bias; + float clamp; + float slope_scaled; + } depth_bias; + + float blend_constants[4]; + + struct { + float min; + float max; + } depth_bounds; + + struct { + uint32_t front; + uint32_t back; + } stencil_compare_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_write_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_reference; +}; + +extern const struct anv_dynamic_state default_dynamic_state; + +void anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); + /** State required while building cmd buffer */ struct anv_cmd_state { uint32_t current_pipeline; -- cgit v1.2.3 From daf68a9465050c798012125e7150017c09576e0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 6 Oct 2015 17:21:44 -0700 Subject: vk/0.170.2: Switch to the new dynamic state model --- include/vulkan/vulkan.h | 185 +++++++++++++++-------------- src/vulkan/anv_cmd_buffer.c | 272 ++++++++++++++++++++++++++++++++++++++----- src/vulkan/anv_device.c | 29 +---- src/vulkan/anv_meta.c | 102 +++++----------- src/vulkan/anv_private.h | 73 +++--------- src/vulkan/gen7_cmd_buffer.c | 121 +++++++++++-------- src/vulkan/gen7_state.c | 72 ------------ src/vulkan/gen8_cmd_buffer.c | 116 +++++++++++------- src/vulkan/gen8_state.c | 83 ------------- 9 files changed, 534 insertions(+), 519 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 4fe9e67fabc..abf87d81e83 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -101,10 +101,6 @@ VK_DEFINE_NONDISP_HANDLE(VkDescriptorSetLayout) VK_DEFINE_NONDISP_HANDLE(VkSampler) VK_DEFINE_NONDISP_HANDLE(VkDescriptorPool) VK_DEFINE_NONDISP_HANDLE(VkDescriptorSet) -VK_DEFINE_NONDISP_HANDLE(VkDynamicViewportState) -VK_DEFINE_NONDISP_HANDLE(VkDynamicRasterState) -VK_DEFINE_NONDISP_HANDLE(VkDynamicColorBlendState) -VK_DEFINE_NONDISP_HANDLE(VkDynamicDepthStencilState) VK_DEFINE_NONDISP_HANDLE(VkFramebuffer) VK_DEFINE_NONDISP_HANDLE(VkCmdPool) @@ -683,6 +679,22 @@ typedef enum { VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF } VkBlendOp; +typedef enum { + VK_DYNAMIC_STATE_VIEWPORT = 0, + VK_DYNAMIC_STATE_SCISSOR = 1, + VK_DYNAMIC_STATE_LINE_WIDTH = 2, + VK_DYNAMIC_STATE_DEPTH_BIAS = 3, + VK_DYNAMIC_STATE_BLEND_CONSTANTS = 4, + VK_DYNAMIC_STATE_DEPTH_BOUNDS = 5, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK = 6, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK = 7, + VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8, + VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_NUM = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), + VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF +} VkDynamicState; + typedef enum { VK_TEX_FILTER_NEAREST = 0, VK_TEX_FILTER_LINEAR = 1, @@ -1068,6 +1080,13 @@ typedef enum { } VkCmdBufferResetFlagBits; typedef VkFlags VkCmdBufferResetFlags; +typedef enum { + VK_STENCIL_FACE_NONE = 0, + VK_STENCIL_FACE_FRONT_BIT = 0x00000001, + VK_STENCIL_FACE_BACK_BIT = 0x00000002, +} VkStencilFaceFlagBits; +typedef VkFlags VkStencilFaceFlags; + typedef enum { VK_QUERY_CONTROL_CONSERVATIVE_BIT = 0x00000001, } VkQueryControlFlagBits; @@ -1616,6 +1635,9 @@ typedef struct { VkStructureType sType; const void* pNext; uint32_t viewportCount; + const VkViewport* pViewports; + uint32_t scissorCount; + const VkRect2D* pScissors; } VkPipelineViewportStateCreateInfo; typedef struct { @@ -1626,6 +1648,11 @@ typedef struct { VkFillMode fillMode; VkCullMode cullMode; VkFrontFace frontFace; + VkBool32 depthBiasEnable; + float depthBias; + float depthBiasClamp; + float slopeScaledDepthBias; + float lineWidth; } VkPipelineRasterStateCreateInfo; typedef struct { @@ -1642,6 +1669,9 @@ typedef struct { VkStencilOp stencilPassOp; VkStencilOp stencilDepthFailOp; VkCompareOp stencilCompareOp; + uint32_t stencilCompareMask; + uint32_t stencilWriteMask; + uint32_t stencilReference; } VkStencilOpState; typedef struct { @@ -1654,6 +1684,8 @@ typedef struct { VkBool32 stencilTestEnable; VkStencilOpState front; VkStencilOpState back; + float minDepthBounds; + float maxDepthBounds; } VkPipelineDepthStencilStateCreateInfo; typedef struct { @@ -1671,12 +1703,21 @@ typedef struct { VkStructureType sType; const void* pNext; VkBool32 alphaToCoverageEnable; + VkBool32 alphaToOneEnable; VkBool32 logicOpEnable; VkLogicOp logicOp; uint32_t attachmentCount; const VkPipelineColorBlendAttachmentState* pAttachments; + float blendConst[4]; } VkPipelineColorBlendStateCreateInfo; +typedef struct { + VkStructureType sType; + const void* pNext; + uint32_t dynamicStateCount; + const VkDynamicState* pDynamicStates; +} VkPipelineDynamicStateCreateInfo; + typedef struct { VkStructureType sType; const void* pNext; @@ -1690,6 +1731,7 @@ typedef struct { const VkPipelineMultisampleStateCreateInfo* pMultisampleState; const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; const VkPipelineColorBlendStateCreateInfo* pColorBlendState; + const VkPipelineDynamicStateCreateInfo* pDynamicState; VkPipelineCreateFlags flags; VkPipelineLayout layout; VkRenderPass renderPass; @@ -1798,40 +1840,6 @@ typedef struct { uint32_t count; } VkCopyDescriptorSet; -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t viewportAndScissorCount; - const VkViewport* pViewports; - const VkRect2D* pScissors; -} VkDynamicViewportStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - float depthBias; - float depthBiasClamp; - float slopeScaledDepthBias; - float lineWidth; -} VkDynamicRasterStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - float blendConst[4]; -} VkDynamicColorBlendStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - float minDepthBounds; - float maxDepthBounds; - uint32_t stencilReadMask; - uint32_t stencilWriteMask; - uint32_t stencilFrontRef; - uint32_t stencilBackRef; -} VkDynamicDepthStencilStateCreateInfo; - typedef struct { VkStructureType sType; const void* pNext; @@ -2143,14 +2151,6 @@ typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescripto typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); -typedef VkResult (VKAPI *PFN_vkCreateDynamicViewportState)(VkDevice device, const VkDynamicViewportStateCreateInfo* pCreateInfo, VkDynamicViewportState* pState); -typedef void (VKAPI *PFN_vkDestroyDynamicViewportState)(VkDevice device, VkDynamicViewportState dynamicViewportState); -typedef VkResult (VKAPI *PFN_vkCreateDynamicRasterState)(VkDevice device, const VkDynamicRasterStateCreateInfo* pCreateInfo, VkDynamicRasterState* pState); -typedef void (VKAPI *PFN_vkDestroyDynamicRasterState)(VkDevice device, VkDynamicRasterState dynamicRasterState); -typedef VkResult (VKAPI *PFN_vkCreateDynamicColorBlendState)(VkDevice device, const VkDynamicColorBlendStateCreateInfo* pCreateInfo, VkDynamicColorBlendState* pState); -typedef void (VKAPI *PFN_vkDestroyDynamicColorBlendState)(VkDevice device, VkDynamicColorBlendState dynamicColorBlendState); -typedef VkResult (VKAPI *PFN_vkCreateDynamicDepthStencilState)(VkDevice device, const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, VkDynamicDepthStencilState* pState); -typedef void (VKAPI *PFN_vkDestroyDynamicDepthStencilState)(VkDevice device, VkDynamicDepthStencilState dynamicDepthStencilState); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef void (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); @@ -2165,10 +2165,15 @@ typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer, VkCmdBufferResetFlags flags); typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI *PFN_vkCmdBindDynamicViewportState)(VkCmdBuffer cmdBuffer, VkDynamicViewportState dynamicViewportState); -typedef void (VKAPI *PFN_vkCmdBindDynamicRasterState)(VkCmdBuffer cmdBuffer, VkDynamicRasterState dynamicRasterState); -typedef void (VKAPI *PFN_vkCmdBindDynamicColorBlendState)(VkCmdBuffer cmdBuffer, VkDynamicColorBlendState dynamicColorBlendState); -typedef void (VKAPI *PFN_vkCmdBindDynamicDepthStencilState)(VkCmdBuffer cmdBuffer, VkDynamicDepthStencilState dynamicDepthStencilState); +typedef void (VKAPI *PFN_vkCmdSetViewport)(VkCmdBuffer cmdBuffer, uint32_t viewportCount, const VkViewport* pViewports); +typedef void (VKAPI *PFN_vkCmdSetScissor)(VkCmdBuffer cmdBuffer, uint32_t scissorCount, const VkRect2D* pScissors); +typedef void (VKAPI *PFN_vkCmdSetLineWidth)(VkCmdBuffer cmdBuffer, float lineWidth); +typedef void (VKAPI *PFN_vkCmdSetDepthBias)(VkCmdBuffer cmdBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); +typedef void (VKAPI *PFN_vkCmdSetBlendConstants)(VkCmdBuffer cmdBuffer, const float blendConst[4]); +typedef void (VKAPI *PFN_vkCmdSetDepthBounds)(VkCmdBuffer cmdBuffer, float minDepthBounds, float maxDepthBounds); +typedef void (VKAPI *PFN_vkCmdSetStencilCompareMask)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); +typedef void (VKAPI *PFN_vkCmdSetStencilWriteMask)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); +typedef void (VKAPI *PFN_vkCmdSetStencilReference)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); @@ -2645,42 +2650,6 @@ VkResult VKAPI vkUpdateDescriptorSets( uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); -VkResult VKAPI vkCreateDynamicViewportState( - VkDevice device, - const VkDynamicViewportStateCreateInfo* pCreateInfo, - VkDynamicViewportState* pState); - -void VKAPI vkDestroyDynamicViewportState( - VkDevice device, - VkDynamicViewportState dynamicViewportState); - -VkResult VKAPI vkCreateDynamicRasterState( - VkDevice device, - const VkDynamicRasterStateCreateInfo* pCreateInfo, - VkDynamicRasterState* pState); - -void VKAPI vkDestroyDynamicRasterState( - VkDevice device, - VkDynamicRasterState dynamicRasterState); - -VkResult VKAPI vkCreateDynamicColorBlendState( - VkDevice device, - const VkDynamicColorBlendStateCreateInfo* pCreateInfo, - VkDynamicColorBlendState* pState); - -void VKAPI vkDestroyDynamicColorBlendState( - VkDevice device, - VkDynamicColorBlendState dynamicColorBlendState); - -VkResult VKAPI vkCreateDynamicDepthStencilState( - VkDevice device, - const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, - VkDynamicDepthStencilState* pState); - -void VKAPI vkDestroyDynamicDepthStencilState( - VkDevice device, - VkDynamicDepthStencilState dynamicDepthStencilState); - VkResult VKAPI vkCreateFramebuffer( VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, @@ -2743,21 +2712,49 @@ void VKAPI vkCmdBindPipeline( VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -void VKAPI vkCmdBindDynamicViewportState( +void VKAPI vkCmdSetViewport( + VkCmdBuffer cmdBuffer, + uint32_t viewportCount, + const VkViewport* pViewports); + +void VKAPI vkCmdSetScissor( + VkCmdBuffer cmdBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors); + +void VKAPI vkCmdSetLineWidth( + VkCmdBuffer cmdBuffer, + float lineWidth); + +void VKAPI vkCmdSetDepthBias( + VkCmdBuffer cmdBuffer, + float depthBias, + float depthBiasClamp, + float slopeScaledDepthBias); + +void VKAPI vkCmdSetBlendConstants( + VkCmdBuffer cmdBuffer, + const float blendConst[4]); + +void VKAPI vkCmdSetDepthBounds( VkCmdBuffer cmdBuffer, - VkDynamicViewportState dynamicViewportState); + float minDepthBounds, + float maxDepthBounds); -void VKAPI vkCmdBindDynamicRasterState( +void VKAPI vkCmdSetStencilCompareMask( VkCmdBuffer cmdBuffer, - VkDynamicRasterState dynamicRasterState); + VkStencilFaceFlags faceMask, + uint32_t stencilCompareMask); -void VKAPI vkCmdBindDynamicColorBlendState( +void VKAPI vkCmdSetStencilWriteMask( VkCmdBuffer cmdBuffer, - VkDynamicColorBlendState dynamicColorBlendState); + VkStencilFaceFlags faceMask, + uint32_t stencilWriteMask); -void VKAPI vkCmdBindDynamicDepthStencilState( +void VKAPI vkCmdSetStencilReference( VkCmdBuffer cmdBuffer, - VkDynamicDepthStencilState dynamicDepthStencilState); + VkStencilFaceFlags faceMask, + uint32_t stencilReference); void VKAPI vkCmdBindDescriptorSets( VkCmdBuffer cmdBuffer, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 78b17ea15ee..8486bd05a2c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -113,22 +113,16 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, static void anv_cmd_state_init(struct anv_cmd_state *state) { - state->rs_state = NULL; - state->vp_state = NULL; - state->cb_state = NULL; - state->ds_state = NULL; memset(&state->state_vf, 0, sizeof(state->state_vf)); memset(&state->descriptors, 0, sizeof(state->descriptors)); memset(&state->push_constants, 0, sizeof(state->push_constants)); - state->dirty = 0; + state->dirty = ~0; state->vb_dirty = 0; state->descriptors_dirty = 0; state->push_constants_dirty = 0; state->pipeline = NULL; - state->vp_state = NULL; - state->rs_state = NULL; - state->ds_state = NULL; + state->dynamic = default_dynamic_state; state->gen7.index_buffer = NULL; } @@ -333,48 +327,128 @@ void anv_CmdBindPipeline( } } -void anv_CmdBindDynamicViewportState( +void anv_CmdSetViewport( VkCmdBuffer cmdBuffer, - VkDynamicViewportState dynamicViewportState) + uint32_t viewportCount, + const VkViewport* pViewports) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); - cmd_buffer->state.vp_state = vp_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; + cmd_buffer->state.dynamic.viewport.count = viewportCount; + memcpy(cmd_buffer->state.dynamic.viewport.viewports, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_DYNAMIC_VIEWPORT_DIRTY; +} + +void anv_CmdSetScissor( + VkCmdBuffer cmdBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + cmd_buffer->state.dynamic.scissor.count = scissorCount; + memcpy(cmd_buffer->state.dynamic.scissor.scissors, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_DYNAMIC_SCISSOR_DIRTY; +} + +void anv_CmdSetLineWidth( + VkCmdBuffer cmdBuffer, + float lineWidth) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + cmd_buffer->state.dynamic.line_width = lineWidth; + + cmd_buffer->state.dirty |= ANV_DYNAMIC_LINE_WIDTH_DIRTY; +} + +void anv_CmdSetDepthBias( + VkCmdBuffer cmdBuffer, + float depthBias, + float depthBiasClamp, + float slopeScaledDepthBias) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBias; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope_scaled = slopeScaledDepthBias; + + cmd_buffer->state.dirty |= ANV_DYNAMIC_DEPTH_BIAS_DIRTY; +} + +void anv_CmdSetBlendConstants( + VkCmdBuffer cmdBuffer, + const float blendConst[4]) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConst, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY; +} + +void anv_CmdSetDepthBounds( + VkCmdBuffer cmdBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_DYNAMIC_DEPTH_BOUNDS_DIRTY; } -void anv_CmdBindDynamicRasterState( +void anv_CmdSetStencilCompareMask( VkCmdBuffer cmdBuffer, - VkDynamicRasterState dynamicRasterState) + VkStencilFaceFlags faceMask, + uint32_t stencilCompareMask) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - cmd_buffer->state.rs_state = rs_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = stencilCompareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = stencilCompareMask; + + cmd_buffer->state.dirty |= ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY; } -void anv_CmdBindDynamicColorBlendState( +void anv_CmdSetStencilWriteMask( VkCmdBuffer cmdBuffer, - VkDynamicColorBlendState dynamicColorBlendState) + VkStencilFaceFlags faceMask, + uint32_t stencilWriteMask) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - cmd_buffer->state.cb_state = cb_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = stencilWriteMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = stencilWriteMask; + + cmd_buffer->state.dirty |= ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY; } -void anv_CmdBindDynamicDepthStencilState( +void anv_CmdSetStencilReference( VkCmdBuffer cmdBuffer, - VkDynamicDepthStencilState dynamicDepthStencilState) + VkStencilFaceFlags faceMask, + uint32_t stencilReference) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); - cmd_buffer->state.ds_state = ds_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = stencilReference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = stencilReference; + + cmd_buffer->state.dirty |= ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY; } void anv_CmdBindDescriptorSets( @@ -736,6 +810,148 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, } } +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->originX + vp->width / 2, + .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->originX, + .XMaxViewPort = vp->originX + vp->width - 1, + .YMinViewPort = vp->originY, + .YMaxViewPort = vp->originY + vp->height - 1, + }; + + struct GEN7_CC_VIEWPORT cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GEN8_SF_CLIP_VIEWPORT_pack(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GEN7_CC_VIEWPORT_pack(NULL, cc_state.map + i * 32, &cc_viewport); + } + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +anv_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .originX = 0.0f, + .originY = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +static void +emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkRect2D *scissors) +{ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 32, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN7_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN7_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, + &empty_scissor); + } else { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = scissor_state.offset); +} + +void +anv_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.scissor.count > 0) { + emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, + cmd_buffer->state.dynamic.scissor.scissors); + } else { + /* Emit a default scissor based on the currently bound framebuffer */ + emit_scissor_state(cmd_buffer, 1, + &(VkRect2D) { + .offset = { .x = 0, .y = 0, }, + .extent = { + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + }, + }); + } +} + void anv_CmdSetEvent( VkCmdBuffer cmdBuffer, VkEvent event, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index b10cc4512b4..aec900065e6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -389,7 +389,7 @@ VkResult anv_GetPhysicalDeviceLimits( .primitiveRestartForPatches = UINT32_MAX, .maxSamplerLodBias = 16, .maxSamplerAnisotropy = 16, - .maxViewports = 16, + .maxViewports = MAX_VIEWPORTS, .maxDynamicViewportStates = UINT32_MAX, .maxViewportDimensions = { (1 << 14), (1 << 14) }, .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ @@ -1778,6 +1778,8 @@ VkResult anv_UpdateDescriptorSets( // State object functions +#if 0 + static inline int64_t clamp_int64(int64_t x, int64_t min, int64_t max) { @@ -1949,6 +1951,8 @@ void anv_DestroyDynamicDepthStencilState( anv_device_free(device, ds_state); } +#endif + VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, @@ -1976,27 +1980,6 @@ VkResult anv_CreateFramebuffer( framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; - anv_CreateDynamicViewportState(anv_device_to_handle(device), - &(VkDynamicViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO, - .viewportAndScissorCount = 1, - .pViewports = (VkViewport[]) { - { - .originX = 0, - .originY = 0, - .width = pCreateInfo->width, - .height = pCreateInfo->height, - .minDepth = 0, - .maxDepth = 1 - }, - }, - .pScissors = (VkRect2D[]) { - { { 0, 0 }, - { pCreateInfo->width, pCreateInfo->height } }, - } - }, - &framebuffer->vp_state); - *pFramebuffer = anv_framebuffer_to_handle(framebuffer); return VK_SUCCESS; @@ -2009,8 +1992,6 @@ void anv_DestroyFramebuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); - anv_DestroyDynamicViewportState(anv_device_to_handle(device), - fb->vp_state); anv_device_free(device, fb); } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c0e4112f363..565575ab219 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -293,20 +293,22 @@ struct anv_saved_state { struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; struct anv_descriptor_set *old_descriptor_set0; struct anv_pipeline *old_pipeline; - struct anv_dynamic_ds_state *old_ds_state; - struct anv_dynamic_cb_state *old_cb_state; + uint32_t dynamic_flags; + struct anv_dynamic_state dynamic; }; static void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, - struct anv_saved_state *state) + struct anv_saved_state *state, + uint32_t dynamic_state) { state->old_pipeline = cmd_buffer->state.pipeline; state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, sizeof(state->old_vertex_bindings)); - state->old_ds_state = cmd_buffer->state.ds_state; - state->old_cb_state = cmd_buffer->state.cb_state; + state->dynamic_flags = dynamic_state; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_state); } static void @@ -322,15 +324,9 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; - if (cmd_buffer->state.ds_state != state->old_ds_state) { - cmd_buffer->state.ds_state = state->old_ds_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; - } - - if (cmd_buffer->state.cb_state != state->old_cb_state) { - cmd_buffer->state.cb_state = state->old_cb_state; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; - } + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_flags); + cmd_buffer->state.dirty |= state->dynamic_flags; } struct vue_header { @@ -396,23 +392,6 @@ meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.clear.pipeline); - /* We don't need anything here, only set if not already set. */ - if (cmd_buffer->state.rs_state == NULL) - anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.rs_state); - - if (cmd_buffer->state.vp_state == NULL) - anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->state.framebuffer->vp_state); - - if (cmd_buffer->state.ds_state == NULL) - anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.ds_state); - - if (cmd_buffer->state.cb_state == NULL) - anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.cb_state); - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, num_instances, 0, 0); } @@ -466,7 +445,9 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, } } - anv_cmd_buffer_save(cmd_buffer, &saved_state); + anv_cmd_buffer_save(cmd_buffer, &saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); + cmd_buffer->state.dynamic.viewport.count = 0; struct anv_subpass subpass = { .input_count = 0, @@ -703,20 +684,8 @@ static void meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_saved_state *saved_state) { - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_save(cmd_buffer, saved_state); - - /* We don't need anything here, only set if not already set. */ - if (cmd_buffer->state.rs_state == NULL) - anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.rs_state); - if (cmd_buffer->state.ds_state == NULL) - anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.ds_state); - - anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.shared.cb_state); + anv_cmd_buffer_save(cmd_buffer, saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); } struct blit_region { @@ -913,8 +882,15 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), - anv_framebuffer_from_handle(fb)->vp_state); + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 1, + &(VkViewport) { + .originX = 0.0f, + .originY = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -1604,7 +1580,9 @@ void anv_CmdClearColorImage( ANV_FROM_HANDLE(anv_image, image, _image); struct anv_saved_state saved_state; - anv_cmd_buffer_save(cmd_buffer, &saved_state); + anv_cmd_buffer_save(cmd_buffer, &saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); + cmd_buffer->state.dynamic.viewport.count = 0; for (uint32_t r = 0; r < rangeCount; r++) { for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { @@ -1769,24 +1747,6 @@ anv_device_init_meta(struct anv_device *device) { anv_device_init_meta_clear_state(device); anv_device_init_meta_blit_state(device); - - ANV_CALL(CreateDynamicRasterState)(anv_device_to_handle(device), - &(VkDynamicRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, - }, - &device->meta_state.shared.rs_state); - - ANV_CALL(CreateDynamicColorBlendState)(anv_device_to_handle(device), - &(VkDynamicColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO - }, - &device->meta_state.shared.cb_state); - - ANV_CALL(CreateDynamicDepthStencilState)(anv_device_to_handle(device), - &(VkDynamicDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO - }, - &device->meta_state.shared.ds_state); } void @@ -1805,12 +1765,4 @@ anv_device_finish_meta(struct anv_device *device) device->meta_state.blit.pipeline_layout); anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), device->meta_state.blit.ds_layout); - - /* Shared */ - anv_DestroyDynamicRasterState(anv_device_to_handle(device), - device->meta_state.shared.rs_state); - anv_DestroyDynamicColorBlendState(anv_device_to_handle(device), - device->meta_state.shared.cb_state); - anv_DestroyDynamicDepthStencilState(anv_device_to_handle(device), - device->meta_state.shared.ds_state); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 047c1cf4355..3d47739aae7 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -450,12 +450,6 @@ struct anv_meta_state { VkPipelineLayout pipeline_layout; VkDescriptorSetLayout ds_layout; } blit; - - struct { - VkDynamicRasterState rs_state; - VkDynamicColorBlendState cb_state; - VkDynamicDepthStencilState ds_state; - } shared; }; struct anv_queue { @@ -680,40 +674,6 @@ struct anv_device_memory { void * map; }; -struct anv_dynamic_vp_state { - struct anv_state sf_clip_vp; - struct anv_state cc_vp; - struct anv_state scissor; -}; - -struct anv_dynamic_rs_state { - struct { - uint32_t sf[GEN7_3DSTATE_SF_length]; - } gen7; - - struct { - uint32_t sf[GEN8_3DSTATE_SF_length]; - uint32_t raster[GEN8_3DSTATE_RASTER_length]; - } gen8; -}; - -struct anv_dynamic_ds_state { - struct { - uint32_t depth_stencil_state[GEN7_DEPTH_STENCIL_STATE_length]; - uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; - } gen7; - - struct { - uint32_t wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; - } gen8; -}; - -struct anv_dynamic_cb_state { - uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; - -}; - struct anv_descriptor_slot { int8_t dynamic_slot; uint8_t index; @@ -800,12 +760,18 @@ struct anv_buffer { VkDeviceSize offset; }; -#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) -#define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) -#define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) -#define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) -#define ANV_CMD_BUFFER_VP_DIRTY (1 << 5) -#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 6) +/* The first 9 correspond to 1 << VK_DYNAMIC_STATE_FOO */ +#define ANV_DYNAMIC_VIEWPORT_DIRTY (1 << 0) +#define ANV_DYNAMIC_SCISSOR_DIRTY (1 << 1) +#define ANV_DYNAMIC_LINE_WIDTH_DIRTY (1 << 2) +#define ANV_DYNAMIC_DEPTH_BIAS_DIRTY (1 << 3) +#define ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY (1 << 4) +#define ANV_DYNAMIC_DEPTH_BOUNDS_DIRTY (1 << 5) +#define ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY (1 << 6) +#define ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY (1 << 7) +#define ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY (1 << 8) +#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 9) +#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 10) struct anv_vertex_binding { struct anv_buffer * buffer; @@ -903,14 +869,11 @@ struct anv_cmd_state { struct anv_framebuffer * framebuffer; struct anv_render_pass * pass; struct anv_subpass * subpass; - struct anv_dynamic_rs_state * rs_state; - struct anv_dynamic_ds_state * ds_state; - struct anv_dynamic_vp_state * vp_state; - struct anv_dynamic_cb_state * cb_state; uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set_binding descriptors[MAX_SETS]; struct anv_push_constants * push_constants[VK_SHADER_STAGE_NUM]; + struct anv_dynamic_state dynamic; struct { struct anv_buffer * index_buffer; @@ -1028,6 +991,9 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); + void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); @@ -1369,9 +1335,6 @@ struct anv_framebuffer { uint32_t height; uint32_t layers; - /* Viewport for clears */ - VkDynamicViewportState vp_state; - uint32_t attachment_count; const struct anv_image_view * attachments[0]; }; @@ -1464,10 +1427,6 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 709e82e78a0..0106aa74aa6 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -256,60 +256,87 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.descriptors_dirty) anv_flush_descriptor_sets(cmd_buffer); - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { - struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + if (cmd_buffer->state.dirty & ANV_DYNAMIC_VIEWPORT_DIRTY) + anv_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_DYNAMIC_SCISSOR_DIRTY) + anv_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_DYNAMIC_LINE_WIDTH_DIRTY | + ANV_DYNAMIC_DEPTH_BIAS_DIRTY)) { + + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; + + uint32_t sf_dw[GEN8_3DSTATE_SF_length]; + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); } - if (cmd_buffer->state.dirty & - (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->gen7.sf, - pipeline->gen7.sf); - } + if (cmd_buffer->state.dirty & (ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY | + ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN7_COLOR_CALC_STATE_length, 64); + struct GEN7_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - if (cmd_buffer->state.dirty & - (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - - if (cmd_buffer->state.ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - pipeline->gen7.depth_stencil_state, - GEN7_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->gen7.depth_stencil_state, - pipeline->gen7.depth_stencil_state, - GEN7_DEPTH_STENCIL_STATE_length, 64); - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, - .PointertoDEPTH_STENCIL_STATE = state.offset); + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset); } - if (cmd_buffer->state.dirty & - (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - if (cmd_buffer->state.ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.cb_state->color_calc_state, - GEN7_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->state.cb_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->gen7.color_calc_state, - GEN7_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->gen7.color_calc_state, - cmd_buffer->state.cb_state->color_calc_state, - GEN7_COLOR_CALC_STATE_length, 64); + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY | + ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY)) { + uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + + struct anv_state ds_state = + anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = state.offset); + GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = ds_state.offset); } if (cmd_buffer->state.gen7.index_buffer && diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 2497e39490d..d317fa4ec16 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -192,78 +192,6 @@ VkResult gen7_CreateSampler( return VK_SUCCESS; } -VkResult gen7_CreateDynamicRasterState( - VkDevice _device, - const VkDynamicRasterStateCreateInfo* pCreateInfo, - VkDynamicRasterState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_rs_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - bool enable_bias = pCreateInfo->depthBias != 0.0f || - pCreateInfo->slopeScaledDepthBias != 0.0f; - - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, - .LineWidth = pCreateInfo->lineWidth, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = pCreateInfo->depthBias, - .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, - .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp - }; - - GEN7_3DSTATE_SF_pack(NULL, state->gen7.sf, &sf); - - *pState = anv_dynamic_rs_state_to_handle(state); - - return VK_SUCCESS; -} - -VkResult gen7_CreateDynamicDepthStencilState( - VkDevice _device, - const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, - VkDynamicDepthStencilState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_ds_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN7_DEPTH_STENCIL_STATE depth_stencil_state = { - .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - }; - - GEN7_DEPTH_STENCIL_STATE_pack(NULL, state->gen7.depth_stencil_state, - &depth_stencil_state); - - struct GEN7_COLOR_CALC_STATE color_calc_state = { - .StencilReferenceValue = pCreateInfo->stencilFrontRef, - .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef - }; - - GEN7_COLOR_CALC_STATE_pack(NULL, state->gen7.color_calc_state, &color_calc_state); - - *pState = anv_dynamic_ds_state_to_handle(state); - - return VK_SUCCESS; -} static const uint8_t anv_halign[] = { [4] = HALIGN_4, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 1cd4a8561e6..a1db0170c09 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -121,57 +121,95 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.push_constants_dirty) gen8_cmd_buffer_flush_push_constants(cmd_buffer); - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { - struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = vp_state->scissor.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = vp_state->cc_vp.offset); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = vp_state->sf_clip_vp.offset); - } + if (cmd_buffer->state.dirty & ANV_DYNAMIC_VIEWPORT_DIRTY) + anv_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_DYNAMIC_SCISSOR_DIRTY) + anv_cmd_buffer_emit_scissor(cmd_buffer); if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_RS_DIRTY)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->gen8.sf, - pipeline->gen8.sf); - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.rs_state->gen8.raster, - pipeline->gen8.raster); + ANV_DYNAMIC_LINE_WIDTH_DIRTY)) { + uint32_t sf_dw[GEN8_3DSTATE_SF_length]; + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GEN8_3DSTATE_SF_pack(NULL, sf_dw, &sf); + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf); } - if (cmd_buffer->state.ds_state && - (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY))) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.ds_state->gen8.wm_depth_stencil, - pipeline->gen8.wm_depth_stencil); + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_DYNAMIC_DEPTH_BIAS_DIRTY)) { + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; + + uint32_t raster_dw[GEN8_3DSTATE_RASTER_length]; + struct GEN8_3DSTATE_RASTER raster = { + GEN8_3DSTATE_RASTER_header, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GEN8_3DSTATE_RASTER_pack(NULL, raster_dw, &raster); + anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, + pipeline->gen8.raster); } - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | - ANV_CMD_BUFFER_DS_DIRTY)) { - struct anv_state state; - if (cmd_buffer->state.ds_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.cb_state->color_calc_state, - GEN8_COLOR_CALC_STATE_length, 64); - else if (cmd_buffer->state.cb_state == NULL) - state = anv_cmd_buffer_emit_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->gen8.color_calc_state, - GEN8_COLOR_CALC_STATE_length, 64); - else - state = anv_cmd_buffer_merge_dynamic(cmd_buffer, - cmd_buffer->state.ds_state->gen8.color_calc_state, - cmd_buffer->state.cb_state->color_calc_state, - GEN8_COLOR_CALC_STATE_length, 64); + if (cmd_buffer->state.dirty & (ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY | + ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN8_COLOR_CALC_STATE_length, 64); + struct GEN8_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointer = cc_state.offset, .ColorCalcStatePointerValid = true); } + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY | + ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY)) { + uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gen8.wm_depth_stencil); + } + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { anv_batch_emit_merge(&cmd_buffer->batch, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 5095ce060bf..9be3bf46021 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -29,46 +29,6 @@ #include "anv_private.h" -VkResult gen8_CreateDynamicRasterState( - VkDevice _device, - const VkDynamicRasterStateCreateInfo* pCreateInfo, - VkDynamicRasterState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_rs_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, - .LineWidth = pCreateInfo->lineWidth, - }; - - GEN8_3DSTATE_SF_pack(NULL, state->gen8.sf, &sf); - - bool enable_bias = pCreateInfo->depthBias != 0.0f || - pCreateInfo->slopeScaledDepthBias != 0.0f; - struct GEN8_3DSTATE_RASTER raster = { - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = pCreateInfo->depthBias, - .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, - .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp - }; - - GEN8_3DSTATE_RASTER_pack(NULL, state->gen8.raster, &raster); - - *pState = anv_dynamic_rs_state_to_handle(state); - - return VK_SUCCESS; -} - void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range) @@ -407,46 +367,3 @@ VkResult gen8_CreateSampler( return VK_SUCCESS; } - -VkResult gen8_CreateDynamicDepthStencilState( - VkDevice _device, - const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, - VkDynamicDepthStencilState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_ds_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - - /* Is this what we need to do? */ - .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, - - .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - - .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, - .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, - }; - - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->gen8.wm_depth_stencil, - &wm_depth_stencil); - - struct GEN8_COLOR_CALC_STATE color_calc_state = { - .StencilReferenceValue = pCreateInfo->stencilFrontRef, - .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef - }; - - GEN8_COLOR_CALC_STATE_pack(NULL, state->gen8.color_calc_state, &color_calc_state); - - *pState = anv_dynamic_ds_state_to_handle(state); - - return VK_SUCCESS; -} -- cgit v1.2.3 From 1a52bc30390d56493ea2d0a950d8b9f01519ed24 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 09:28:21 -0700 Subject: anv/pipeline: Add support for dynamic state in pipelines --- src/vulkan/anv_cmd_buffer.c | 6 +++ src/vulkan/anv_pipeline.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 3 ++ 3 files changed, 103 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 8486bd05a2c..28d9dd9d694 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -319,6 +319,12 @@ void anv_CmdBindPipeline( cmd_buffer->state.vb_dirty |= pipeline->vb_used; cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); break; default: diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 192a4b17ae0..75f640154cc 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -177,6 +177,98 @@ static const uint32_t vk_to_gen_primitive_type[] = { [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 }; +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + pipeline->dynamic_state_mask = 0; + + if (pCreateInfo->pDynamicState == NULL) + return; + + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + for (uint32_t s = 0; s < count; s++) { + VkDynamicState state = pCreateInfo->pDynamicState->pDynamicStates[s]; + + assert(state < 32); + pipeline->dynamic_state_mask |= (1u << state); + + switch (state) { + case VK_DYNAMIC_STATE_VIEWPORT: + assert(pCreateInfo->pViewportState); + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + break; + + case VK_DYNAMIC_STATE_SCISSOR: + assert(pCreateInfo->pViewportState); + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + break; + + case VK_DYNAMIC_STATE_LINE_WIDTH: + assert(pCreateInfo->pRasterState); + dynamic->line_width = pCreateInfo->pRasterState->lineWidth; + break; + + case VK_DYNAMIC_STATE_DEPTH_BIAS: + assert(pCreateInfo->pRasterState); + dynamic->depth_bias.bias = pCreateInfo->pRasterState->depthBias; + dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp; + dynamic->depth_bias.slope_scaled = + pCreateInfo->pRasterState->slopeScaledDepthBias; + break; + + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConst, 4); + break; + + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + break; + + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.stencilCompareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.stencilCompareMask; + break; + + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.stencilWriteMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.stencilWriteMask; + break; + + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.stencilReference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.stencilReference; + break; + + default: + assert(!"Invalid dynamic state"); + } + } +} + VkResult anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -205,6 +297,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, anv_shader_from_handle(pCreateInfo->pStages[i].shader); } + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + if (pCreateInfo->pTessellationState) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); if (pCreateInfo->pViewportState) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3d47739aae7..0d1998d659c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1046,6 +1046,9 @@ struct anv_pipeline { struct anv_batch batch; uint32_t batch_data[256]; struct anv_reloc_list batch_relocs; + uint32_t dynamic_state_mask; + struct anv_dynamic_state dynamic_state; + struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; struct anv_pipeline_layout * layout; bool use_repclear; -- cgit v1.2.3 From 010c6efd6510ebe319eceda73957b910024a210e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 09:44:51 -0700 Subject: vk/0.170.2: Make vkUpdateDescriptorSets return void --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/anv_device.c | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index abf87d81e83..f453553ae5c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2150,7 +2150,7 @@ typedef void (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorP typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); -typedef VkResult (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); +typedef void (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef void (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); @@ -2643,7 +2643,7 @@ VkResult VKAPI vkFreeDescriptorSets( uint32_t count, const VkDescriptorSet* pDescriptorSets); -VkResult VKAPI vkUpdateDescriptorSets( +void VKAPI vkUpdateDescriptorSets( VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index aec900065e6..75c4c2c88d8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1693,7 +1693,7 @@ VkResult anv_FreeDescriptorSets( return VK_SUCCESS; } -VkResult anv_UpdateDescriptorSets( +void anv_UpdateDescriptorSets( VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, @@ -1772,8 +1772,6 @@ VkResult anv_UpdateDescriptorSets( src->descriptors[copy->srcBinding + j]; } } - - return VK_SUCCESS; } // State object functions -- cgit v1.2.3 From 982466aeffc89b29b5a2ccabe61dfd9b9a9085aa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 09:45:47 -0700 Subject: anv/device: Remove some #ifdef'd out code This was a left-over from the dynamic state update. --- src/vulkan/anv_device.c | 177 ------------------------------------------------ 1 file changed, 177 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 75c4c2c88d8..be744bb9b55 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1774,183 +1774,6 @@ void anv_UpdateDescriptorSets( } } -// State object functions - -#if 0 - -static inline int64_t -clamp_int64(int64_t x, int64_t min, int64_t max) -{ - if (x < min) - return min; - else if (x < max) - return x; - else - return max; -} - -VkResult anv_CreateDynamicViewportState( - VkDevice _device, - const VkDynamicViewportStateCreateInfo* pCreateInfo, - VkDynamicViewportState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_vp_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - unsigned count = pCreateInfo->viewportAndScissorCount; - state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, - count * 64, 64); - state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool, - count * 8, 32); - state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool, - count * 32, 32); - - for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { - const VkViewport *vp = &pCreateInfo->pViewports[i]; - const VkRect2D *s = &pCreateInfo->pScissors[i]; - - /* The gen7 state struct has just the matrix and guardband fields, the - * gen8 struct adds the min/max viewport fields. */ - struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { - .ViewportMatrixElementm00 = vp->width / 2, - .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, - .ViewportMatrixElementm30 = vp->originX + vp->width / 2, - .ViewportMatrixElementm31 = vp->originY + vp->height / 2, - .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, - .XMinClipGuardband = -1.0f, - .XMaxClipGuardband = 1.0f, - .YMinClipGuardband = -1.0f, - .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->originX, - .XMaxViewPort = vp->originX + vp->width - 1, - .YMinViewPort = vp->originY, - .YMaxViewPort = vp->originY + vp->height - 1, - }; - - struct GEN7_CC_VIEWPORT cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth - }; - - /* Since xmax and ymax are inclusive, we have to have xmax < xmin or - * ymax < ymin for empty clips. In case clip x, y, width height are all - * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't - * what we want. Just special case empty clips and produce a canonical - * empty clip. */ - static const struct GEN7_SCISSOR_RECT empty_scissor = { - .ScissorRectangleYMin = 1, - .ScissorRectangleXMin = 1, - .ScissorRectangleYMax = 0, - .ScissorRectangleXMax = 0 - }; - - const int max = 0xffff; - struct GEN7_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) - }; - - GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); - GEN7_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); - - if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN7_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); - } else { - GEN7_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); - } - } - - *pState = anv_dynamic_vp_state_to_handle(state); - - return VK_SUCCESS; -} - -void anv_DestroyDynamicViewportState( - VkDevice _device, - VkDynamicViewportState _vp_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, _vp_state); - - anv_state_pool_free(&device->dynamic_state_pool, vp_state->sf_clip_vp); - anv_state_pool_free(&device->dynamic_state_pool, vp_state->cc_vp); - anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor); - - anv_device_free(device, vp_state); -} - -void anv_DestroyDynamicRasterState( - VkDevice _device, - VkDynamicRasterState _rs_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state); - - anv_device_free(device, rs_state); -} - -VkResult anv_CreateDynamicColorBlendState( - VkDevice _device, - const VkDynamicColorBlendStateCreateInfo* pCreateInfo, - VkDynamicColorBlendState* pState) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_dynamic_cb_state *state; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO); - - state = anv_device_alloc(device, sizeof(*state), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (state == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN7_COLOR_CALC_STATE color_calc_state = { - .BlendConstantColorRed = pCreateInfo->blendConst[0], - .BlendConstantColorGreen = pCreateInfo->blendConst[1], - .BlendConstantColorBlue = pCreateInfo->blendConst[2], - .BlendConstantColorAlpha = pCreateInfo->blendConst[3] - }; - - GEN7_COLOR_CALC_STATE_pack(NULL, state->color_calc_state, &color_calc_state); - - *pState = anv_dynamic_cb_state_to_handle(state); - - return VK_SUCCESS; -} - -void anv_DestroyDynamicColorBlendState( - VkDevice _device, - VkDynamicColorBlendState _cb_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state); - - anv_device_free(device, cb_state); -} - -void anv_DestroyDynamicDepthStencilState( - VkDevice _device, - VkDynamicDepthStencilState _ds_state) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state); - - anv_device_free(device, ds_state); -} - -#endif - VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, -- cgit v1.2.3 From 033a37f5913ec453a9e007075913fecc4868dad5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 09:57:51 -0700 Subject: vk/0.170.2: Update VkPhysicalDeviceLimits --- include/vulkan/vulkan.h | 17 ++++++++++++++++- src/vulkan/anv_device.c | 9 ++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index f453553ae5c..b2cdf3c66ab 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -876,6 +876,17 @@ typedef enum { } VkImageCreateFlagBits; typedef VkFlags VkImageCreateFlags; +typedef enum { + VK_SAMPLE_COUNT_1_BIT = 0x00000001, + VK_SAMPLE_COUNT_2_BIT = 0x00000002, + VK_SAMPLE_COUNT_4_BIT = 0x00000004, + VK_SAMPLE_COUNT_8_BIT = 0x00000008, + VK_SAMPLE_COUNT_16_BIT = 0x00000010, + VK_SAMPLE_COUNT_32_BIT = 0x00000020, + VK_SAMPLE_COUNT_64_BIT = 0x00000040, +} VkSampleCountFlagBits; +typedef VkFlags VkSampleCountFlags; + typedef enum { VK_QUEUE_GRAPHICS_BIT = 0x00000001, VK_QUEUE_COMPUTE_BIT = 0x00000002, @@ -1214,12 +1225,14 @@ typedef struct { uint32_t maxImageDimension3D; uint32_t maxImageDimensionCube; uint32_t maxImageArrayLayers; + VkSampleCountFlags sampleCounts; uint32_t maxTexelBufferSize; uint32_t maxUniformBufferSize; uint32_t maxStorageBufferSize; uint32_t maxPushConstantsSize; uint32_t maxMemoryAllocationCount; VkDeviceSize bufferImageGranularity; + VkDeviceSize sparseAddressSpaceSize; uint32_t maxBoundDescriptorSets; uint32_t maxDescriptorSets; uint32_t maxPerStageDescriptorSamplers; @@ -1229,10 +1242,13 @@ typedef struct { uint32_t maxPerStageDescriptorStorageImages; uint32_t maxDescriptorSetSamplers; uint32_t maxDescriptorSetUniformBuffers; + uint32_t maxDescriptorSetUniformBuffersDynamic; uint32_t maxDescriptorSetStorageBuffers; + uint32_t maxDescriptorSetStorageBuffersDynamic; uint32_t maxDescriptorSetSampledImages; uint32_t maxDescriptorSetStorageImages; uint32_t maxVertexInputAttributes; + uint32_t maxVertexInputBindings; uint32_t maxVertexInputAttributeOffset; uint32_t maxVertexInputBindingStride; uint32_t maxVertexOutputComponents; @@ -1266,7 +1282,6 @@ typedef struct { float maxSamplerLodBias; float maxSamplerAnisotropy; uint32_t maxViewports; - uint32_t maxDynamicViewportStates; uint32_t maxViewportDimensions[2]; float viewportBoundsRange[2]; uint32_t viewportSubPixelBits; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index be744bb9b55..71ec45089db 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -330,12 +330,17 @@ VkResult anv_GetPhysicalDeviceLimits( .maxImageDimension3D = (1 << 10), .maxImageDimensionCube = (1 << 14), .maxImageArrayLayers = (1 << 10), + + /* Broadwell supports 1, 2, 4, and 8 samples. */ + .sampleCounts = 4, + .maxTexelBufferSize = (1 << 14), .maxUniformBufferSize = UINT32_MAX, .maxStorageBufferSize = UINT32_MAX, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = UINT32_MAX, .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, .maxDescriptorSets = UINT32_MAX, .maxPerStageDescriptorSamplers = 64, @@ -345,10 +350,13 @@ VkResult anv_GetPhysicalDeviceLimits( .maxPerStageDescriptorStorageImages = 64, .maxDescriptorSetSamplers = 256, .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetUniformBuffersDynamic = 256, .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetStorageBuffersDynamic = 256, .maxDescriptorSetSampledImages = 256, .maxDescriptorSetStorageImages = 256, .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, .maxVertexInputAttributeOffset = 256, .maxVertexInputBindingStride = 256, .maxVertexOutputComponents = 32, @@ -390,7 +398,6 @@ VkResult anv_GetPhysicalDeviceLimits( .maxSamplerLodBias = 16, .maxSamplerAnisotropy = 16, .maxViewports = MAX_VIEWPORTS, - .maxDynamicViewportStates = UINT32_MAX, .maxViewportDimensions = { (1 << 14), (1 << 14) }, .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ .viewportSubPixelBits = 13, /* We take a float? */ -- cgit v1.2.3 From 545f5cc6e1202ebf1777f63d25741e64e3699a97 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 10:05:02 -0700 Subject: vk/0.170.2: Update VkPhysicalDeviceFeatures --- include/vulkan/vulkan.h | 17 +++++------------ src/vulkan/anv_device.c | 9 ++++----- 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index b2cdf3c66ab..467a20619dd 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1152,7 +1152,7 @@ typedef struct { VkBool32 sampleRateShading; VkBool32 dualSourceBlend; VkBool32 logicOp; - VkBool32 instancedDrawIndirect; + VkBool32 multiDrawIndirect; VkBool32 depthClip; VkBool32 depthBiasClamp; VkBool32 fillModeNonSolid; @@ -1162,6 +1162,7 @@ typedef struct { VkBool32 textureCompressionETC2; VkBool32 textureCompressionASTC_LDR; VkBool32 textureCompressionBC; + VkBool32 occlusionQueryNonConservative; VkBool32 pipelineStatisticsQuery; VkBool32 vertexSideEffects; VkBool32 tessellationSideEffects; @@ -1169,11 +1170,9 @@ typedef struct { VkBool32 fragmentSideEffects; VkBool32 shaderTessellationPointSize; VkBool32 shaderGeometryPointSize; - VkBool32 shaderTextureGatherExtended; + VkBool32 shaderImageGatherExtended; VkBool32 shaderStorageImageExtendedFormats; VkBool32 shaderStorageImageMultisample; - VkBool32 shaderStorageBufferArrayConstantIndexing; - VkBool32 shaderStorageImageArrayConstantIndexing; VkBool32 shaderUniformBufferArrayDynamicIndexing; VkBool32 shaderSampledImageArrayDynamicIndexing; VkBool32 shaderStorageBufferArrayDynamicIndexing; @@ -1182,11 +1181,11 @@ typedef struct { VkBool32 shaderCullDistance; VkBool32 shaderFloat64; VkBool32 shaderInt64; - VkBool32 shaderFloat16; VkBool32 shaderInt16; VkBool32 shaderResourceResidency; VkBool32 shaderResourceMinLOD; - VkBool32 sparse; + VkBool32 alphaToOne; + VkBool32 sparseBinding; VkBool32 sparseResidencyBuffer; VkBool32 sparseResidencyImage2D; VkBool32 sparseResidencyImage3D; @@ -1194,12 +1193,6 @@ typedef struct { VkBool32 sparseResidency4Samples; VkBool32 sparseResidency8Samples; VkBool32 sparseResidency16Samples; - VkBool32 sparseResidencyStandard2DBlockShape; - VkBool32 sparseResidencyStandard2DMSBlockShape; - VkBool32 sparseResidencyStandard3DBlockShape; - VkBool32 sparseResidencyAlignedMipSize; - VkBool32 sparseResidencyNonResident; - VkBool32 sparseResidencyNonResidentStrict; VkBool32 sparseResidencyAliased; } VkPhysicalDeviceFeatures; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 71ec45089db..6473765cebc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -278,7 +278,7 @@ VkResult anv_GetPhysicalDeviceFeatures( .sampleRateShading = false, .dualSourceBlend = true, .logicOp = true, - .instancedDrawIndirect = true, + .multiDrawIndirect = true, .depthClip = false, .depthBiasClamp = false, .fillModeNonSolid = true, @@ -288,6 +288,7 @@ VkResult anv_GetPhysicalDeviceFeatures( .textureCompressionETC2 = true, .textureCompressionASTC_LDR = true, .textureCompressionBC = true, + .occlusionQueryNonConservative = false, /* FINISHME */ .pipelineStatisticsQuery = true, .vertexSideEffects = false, .tessellationSideEffects = false, @@ -295,11 +296,9 @@ VkResult anv_GetPhysicalDeviceFeatures( .fragmentSideEffects = false, .shaderTessellationPointSize = false, .shaderGeometryPointSize = true, - .shaderTextureGatherExtended = true, + .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = false, .shaderStorageImageMultisample = false, - .shaderStorageBufferArrayConstantIndexing = false, - .shaderStorageImageArrayConstantIndexing = false, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, @@ -308,8 +307,8 @@ VkResult anv_GetPhysicalDeviceFeatures( .shaderCullDistance = false, .shaderFloat64 = false, .shaderInt64 = false, - .shaderFloat16 = false, .shaderInt16 = false, + .alphaToOne = true, }; return VK_SUCCESS; -- cgit v1.2.3 From 98c2bb69172944e909922f00422bd235b8e543cc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 10:15:59 -0700 Subject: vk/0.170.2: Update VkFormatProperties --- include/vulkan/vulkan.h | 4 +++- src/vulkan/anv_formats.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 467a20619dd..1bcc31c59d9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -851,7 +851,8 @@ typedef enum { VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080, VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200, - VK_FORMAT_FEATURE_CONVERSION_BIT = 0x00000400, + VK_FORMAT_FEATURE_BLIT_SOURCE_BIT = 0x00000400, + VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT = 0x00000800, } VkFormatFeatureFlagBits; typedef VkFlags VkFormatFeatureFlags; @@ -1199,6 +1200,7 @@ typedef struct { typedef struct { VkFormatFeatureFlags linearTilingFeatures; VkFormatFeatureFlags optimalTilingFeatures; + VkFormatFeatureFlags bufferFeatures; } VkFormatProperties; typedef struct { diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index cae575bb04b..8f36bc9a7ce 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -252,6 +252,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d { const struct surface_format_info *info; int gen; + VkFormatFeatureFlags flags; if (format == NULL) return VK_ERROR_INVALID_VALUE; @@ -267,6 +268,10 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d if (anv_format_is_depth_or_stencil(format)) { tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SOURCE_BIT; + if (format->depth_format) { + tiled |= VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT; + } } else { /* The surface_formats table only contains color formats */ info = &surface_formats[format->surface_format]; @@ -274,12 +279,16 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d goto unsupported; if (info->sampling <= gen) { - linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SOURCE_BIT; + linear |= flags; + tiled |= flags; } if (info->render_target <= gen) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + flags = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT; + linear |= flags; + tiled |= flags; } if (info->alpha_blend <= gen) { linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; @@ -292,6 +301,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = 0; /* FINISHME */ return VK_SUCCESS; -- cgit v1.2.3 From 81e1dcc42c33d306d490b886b74df62e48eead99 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 10:28:30 -0700 Subject: vk/0.170.2: Update VkImageFormatProperties --- include/vulkan/vulkan.h | 7 +++++-- src/vulkan/anv_formats.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1bcc31c59d9..26827b6847f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1210,8 +1210,11 @@ typedef struct { } VkExtent3D; typedef struct { - uint64_t maxResourceSize; - uint32_t maxSamples; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts; + VkDeviceSize maxResourceSize; } VkImageFormatProperties; typedef struct { diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 8f36bc9a7ce..f5bae0ae0aa 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -344,6 +344,9 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( const struct anv_format *format = anv_format_for_vk_format(_format); VkFormatProperties format_props; VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; VkResult result; result = anv_physical_device_get_format_properties(physical_device, format, @@ -362,6 +365,35 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( unreachable("bad VkImageTiling"); } + switch (type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_2D: + /* FINISHME: Does this really differ for cube maps? The documentation + * for RENDER_SURFACE_STATE suggests so. + */ + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + if (usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { /* Meta implements transfers by sampling from the source image. */ if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { @@ -415,8 +447,12 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( } *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArraySize = maxArraySize, + /* FINISHME: Support multisampling */ - .maxSamples = 1, + .sampleCounts = VK_SAMPLE_COUNT_1_BIT, /* FINISHME: Accurately calculate * VkImageFormatProperties::maxResourceSize. @@ -428,7 +464,10 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( unsupported: *pImageFormatProperties = (VkImageFormatProperties) { - .maxSamples = 0, + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArraySize = 0, + .sampleCounts = 0, .maxResourceSize = 0, }; -- cgit v1.2.3 From d48e71ce55cd63735bedb41c9754b30571a06e8f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 10:36:46 -0700 Subject: vk/0.170.2: Update VkPhysicalDeviceProperties --- include/vulkan/vulkan.h | 16 +++++++++++----- src/vulkan/anv_device.c | 23 ++++++++--------------- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 26827b6847f..c4ed1ec0cd1 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1316,6 +1316,15 @@ typedef struct { float lineWidthGranularity; } VkPhysicalDeviceLimits; +typedef struct { + VkBool32 residencyStandard2DBlockShape; + VkBool32 residencyStandard2DMSBlockShape; + VkBool32 residencyStandard3DBlockShape; + VkBool32 residencyAlignedMipSize; + VkBool32 residencyNonResident; + VkBool32 residencyNonResidentStrict; +} VkPhysicalDeviceSparseProperties; + typedef struct { uint32_t apiVersion; uint32_t driverVersion; @@ -1324,6 +1333,8 @@ typedef struct { VkPhysicalDeviceType deviceType; char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME]; uint8_t pipelineCacheUUID[VK_UUID_LENGTH]; + VkPhysicalDeviceLimits limits; + VkPhysicalDeviceSparseProperties sparseProperties; } VkPhysicalDeviceProperties; typedef struct { @@ -2082,7 +2093,6 @@ typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, ui typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceLimits)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceLimits* pLimits); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); @@ -2254,10 +2264,6 @@ VkResult VKAPI vkGetPhysicalDeviceImageFormatProperties( VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); -VkResult VKAPI vkGetPhysicalDeviceLimits( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceLimits* pLimits); - VkResult VKAPI vkGetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 6473765cebc..310a5b91131 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -314,16 +314,16 @@ VkResult anv_GetPhysicalDeviceFeatures( return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceLimits( +VkResult anv_GetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, - VkPhysicalDeviceLimits* pLimits) + VkPhysicalDeviceProperties* pProperties) { - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - const struct brw_device_info *devinfo = physical_device->info; + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + const struct brw_device_info *devinfo = pdevice->info; - anv_finishme("Get correct values for PhysicalDeviceLimits"); + anv_finishme("Get correct values for VkPhysicalDeviceLimits"); - *pLimits = (VkPhysicalDeviceLimits) { + VkPhysicalDeviceLimits limits = { .maxImageDimension1D = (1 << 14), .maxImageDimension2D = (1 << 14), .maxImageDimension3D = (1 << 10), @@ -433,21 +433,14 @@ VkResult anv_GetPhysicalDeviceLimits( .lineWidthGranularity = (1.0 / 128.0), }; - return VK_SUCCESS; -} - -VkResult anv_GetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - *pProperties = (VkPhysicalDeviceProperties) { .apiVersion = VK_MAKE_VERSION(0, 138, 1), .driverVersion = 1, .vendorId = 0x8086, .deviceId = pdevice->chipset_id, .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ }; strcpy(pProperties->deviceName, pdevice->name); -- cgit v1.2.3 From 92e7bd361065ff3dad863736456f3729952bf5a2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 10:45:41 -0700 Subject: vk/0.170.2: Update vkCreateDescriptorPool Nothing to do. In Mesa the pool is a stub. --- include/vulkan/vulkan-0.170.2.h | 1 + include/vulkan/vulkan.h | 6 +++--- src/vulkan/anv_device.c | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan-0.170.2.h b/include/vulkan/vulkan-0.170.2.h index 6fdd806509c..03bcefea50a 100644 --- a/include/vulkan/vulkan-0.170.2.h +++ b/include/vulkan/vulkan-0.170.2.h @@ -1512,6 +1512,7 @@ typedef struct { VkSharingMode sharingMode; uint32_t queueFamilyCount; const uint32_t* pQueueFamilyIndices; + VkImageLayout initialLayout; } VkImageCreateInfo; typedef struct { diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 00dbf3dd69c..89963aea55c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1837,6 +1837,8 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; + VkDescriptorPoolUsage poolUsage; + uint32_t maxSets; uint32_t count; const VkDescriptorTypeCount* pTypeCount; } VkDescriptorPoolCreateInfo; @@ -2176,7 +2178,7 @@ typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCr typedef void (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); typedef void (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); -typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, VkDescriptorPoolUsage poolUsage, uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); +typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef void (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); @@ -2643,8 +2645,6 @@ void VKAPI vkDestroyDescriptorSetLayout( VkResult VKAPI vkCreateDescriptorPool( VkDevice device, - VkDescriptorPoolUsage poolUsage, - uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 310a5b91131..dd0aac48303 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1591,8 +1591,6 @@ void anv_DestroyDescriptorSetLayout( VkResult anv_CreateDescriptorPool( VkDevice device, - VkDescriptorPoolUsage poolUsage, - uint32_t maxSets, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool) { -- cgit v1.2.3 From 8dee32e71f0ccbdf2b9404fe553a83da8bea79dc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 10:58:55 -0700 Subject: vk/0.170: Update VkDescriptorInfo Ignore the new bufferInfo field with a anv_finishme. --- include/vulkan/vulkan.h | 7 +++++++ src/vulkan/anv_device.c | 10 ++++++++++ 2 files changed, 17 insertions(+) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 89963aea55c..dcba29cb426 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1843,11 +1843,18 @@ typedef struct { const VkDescriptorTypeCount* pTypeCount; } VkDescriptorPoolCreateInfo; +typedef struct { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize range; +} VkDescriptorBufferInfo; + typedef struct { VkBufferView bufferView; VkSampler sampler; VkImageView imageView; VkImageLayout imageLayout; + VkDescriptorBufferInfo bufferInfo; } VkDescriptorInfo; typedef struct { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index dd0aac48303..b12fabc2007 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1701,6 +1701,16 @@ void anv_UpdateDescriptorSets( const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); + for (uint32_t j = 0; j < write->count; ++j) { + const VkDescriptorBufferInfo *binfo + = &write->pDescriptors[j].bufferInfo; + + if (binfo->buffer.handle || binfo->offset || binfo->range) { + anv_finishme("VkWriteDesciptorSet::bufferInfo"); + break; + } + } + switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: -- cgit v1.2.3 From f9c948ed00787c56bac265dc934049ed67a1cd61 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 11:36:51 -0700 Subject: vk/0.170.2: Update VkResult Version 0.170.2 removes most of the error enums. In many cases, I had to replace an error with a less accurate (or even incorrect) one. In other cases, the error path is replaced with an assertion. --- include/vulkan/vulkan.h | 42 ++++++--------------------- src/vulkan/anv_device.c | 68 +++++++++++++++++++++++++++----------------- src/vulkan/anv_formats.c | 3 +- src/vulkan/anv_image.c | 27 ++++++------------ src/vulkan/anv_query.c | 7 +++-- src/vulkan/anv_util.c | 35 ++++++----------------- src/vulkan/anv_wsi_wayland.c | 27 +++++++++++------- src/vulkan/anv_wsi_x11.c | 40 +++++++++++--------------- src/vulkan/gen7_pipeline.c | 3 +- 9 files changed, 107 insertions(+), 145 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index dcba29cb426..1a572136340 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -128,41 +128,17 @@ typedef enum { VK_EVENT_SET = 4, VK_EVENT_RESET = 5, VK_INCOMPLETE = 6, - VK_ERROR_UNKNOWN = -1, - VK_ERROR_UNAVAILABLE = -2, + VK_ERROR_OUT_OF_HOST_MEMORY = -1, + VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, VK_ERROR_INITIALIZATION_FAILED = -3, - VK_ERROR_OUT_OF_HOST_MEMORY = -4, - VK_ERROR_OUT_OF_DEVICE_MEMORY = -5, - VK_ERROR_DEVICE_ALREADY_CREATED = -6, - VK_ERROR_DEVICE_LOST = -7, - VK_ERROR_INVALID_POINTER = -8, - VK_ERROR_INVALID_VALUE = -9, - VK_ERROR_INVALID_HANDLE = -10, - VK_ERROR_INVALID_ORDINAL = -11, - VK_ERROR_INVALID_MEMORY_SIZE = -12, - VK_ERROR_INVALID_EXTENSION = -13, - VK_ERROR_INVALID_FLAGS = -14, - VK_ERROR_INVALID_ALIGNMENT = -15, - VK_ERROR_INVALID_FORMAT = -16, - VK_ERROR_INVALID_IMAGE = -17, - VK_ERROR_INVALID_DESCRIPTOR_SET_DATA = -18, - VK_ERROR_INVALID_QUEUE_TYPE = -19, - VK_ERROR_UNSUPPORTED_SHADER_IL_VERSION = -20, - VK_ERROR_BAD_SHADER_CODE = -21, - VK_ERROR_BAD_PIPELINE_DATA = -22, - VK_ERROR_NOT_MAPPABLE = -23, - VK_ERROR_MEMORY_MAP_FAILED = -24, - VK_ERROR_MEMORY_UNMAP_FAILED = -25, - VK_ERROR_INCOMPATIBLE_DEVICE = -26, - VK_ERROR_INCOMPATIBLE_DRIVER = -27, - VK_ERROR_INCOMPLETE_COMMAND_BUFFER = -28, - VK_ERROR_BUILDING_COMMAND_BUFFER = -29, - VK_ERROR_MEMORY_NOT_BOUND = -30, - VK_ERROR_INCOMPATIBLE_QUEUE = -31, - VK_ERROR_INVALID_LAYER = -32, - VK_RESULT_BEGIN_RANGE = VK_ERROR_INVALID_LAYER, + VK_ERROR_DEVICE_LOST = -4, + VK_ERROR_MEMORY_MAP_FAILED = -5, + VK_ERROR_LAYER_NOT_PRESENT = -6, + VK_ERROR_EXTENSION_NOT_PRESENT = -7, + VK_ERROR_INCOMPATIBLE_DRIVER = -8, + VK_RESULT_BEGIN_RANGE = VK_ERROR_INCOMPATIBLE_DRIVER, VK_RESULT_END_RANGE = VK_INCOMPLETE, - VK_RESULT_NUM = (VK_INCOMPLETE - VK_ERROR_INVALID_LAYER + 1), + VK_RESULT_NUM = (VK_INCOMPLETE - VK_ERROR_INCOMPATIBLE_DRIVER + 1), VK_RESULT_MAX_ENUM = 0x7FFFFFFF } VkResult; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index b12fabc2007..4e677054afe 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -43,42 +43,49 @@ anv_physical_device_init(struct anv_physical_device *device, fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) - return vk_errorf(VK_ERROR_UNAVAILABLE, "failed to open %s: %m", path); + return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to open %s: %m", path); device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = instance; device->path = path; - + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); if (!device->chipset_id) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "failed to get chipset id: %m"); + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get chipset id: %m"); goto fail; } device->name = brw_get_device_name(device->chipset_id); device->info = brw_get_device_info(device->chipset_id, -1); if (!device->info) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "failed to get device info"); + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get device info"); goto fail; } if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "failed to get aperture size: %m"); + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get aperture size: %m"); goto fail; } if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "kernel missing gem wait"); + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing gem wait"); goto fail; } if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "kernel missing execbuf2"); + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing execbuf2"); goto fail; } if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) { - result = vk_errorf(VK_ERROR_UNAVAILABLE, "non-llc gpu"); + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "non-llc gpu"); goto fail; } @@ -148,7 +155,7 @@ VkResult anv_CreateInstance( } } if (!found) - return vk_error(VK_ERROR_INVALID_EXTENSION); + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } if (pCreateInfo->pAllocCb) { @@ -581,7 +588,7 @@ VkResult anv_CreateDevice( } } if (!found) - return vk_error(VK_ERROR_INVALID_EXTENSION); + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } anv_set_dispatch_gen(physical_device->info->gen); @@ -639,7 +646,7 @@ VkResult anv_CreateDevice( fail_device: anv_device_free(device, device); - return vk_error(VK_ERROR_UNAVAILABLE); + return vk_error(VK_ERROR_INITIALIZATION_FAILED); } void anv_DestroyDevice( @@ -720,7 +727,7 @@ VkResult anv_EnumerateInstanceLayerProperties( } /* None supported at this time */ - return vk_error(VK_ERROR_INVALID_LAYER); + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); } VkResult anv_EnumerateDeviceLayerProperties( @@ -734,7 +741,7 @@ VkResult anv_EnumerateDeviceLayerProperties( } /* None supported at this time */ - return vk_error(VK_ERROR_INVALID_LAYER); + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); } VkResult anv_GetDeviceQueue( @@ -769,13 +776,19 @@ VkResult anv_QueueSubmit( assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); - if (ret != 0) - return vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } if (fence) { ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) - return vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } } for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) @@ -838,14 +851,16 @@ VkResult anv_DeviceWaitIdle( ret = anv_gem_execbuffer(device, &execbuf); if (ret != 0) { - result = vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); goto fail; } timeout = INT64_MAX; ret = anv_gem_wait(device, bo->gem_handle, &timeout); if (ret != 0) { - result = vk_errorf(VK_ERROR_UNKNOWN, "execbuf2 failed: %m"); + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); goto fail; } @@ -901,10 +916,8 @@ VkResult anv_AllocMemory( assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); - if (pAllocInfo->memoryTypeIndex != 0) { - /* We support exactly one memory heap. */ - return vk_error(VK_ERROR_INVALID_VALUE); - } + /* We support exactly one memory heap. */ + assert(pAllocInfo->memoryTypeIndex == 0); /* FINISHME: Fail if allocation request exceeds heap size. */ @@ -1243,10 +1256,13 @@ VkResult anv_WaitForFences( for (uint32_t i = 0; i < fenceCount; i++) { ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) + if (ret == -1 && errno == ETIME) { return VK_TIMEOUT; - else if (ret == -1) - return vk_errorf(VK_ERROR_UNKNOWN, "gem wait failed: %m"); + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem wait failed: %m"); + } } return VK_SUCCESS; diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index f5bae0ae0aa..944e05f1476 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -254,8 +254,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d int gen; VkFormatFeatureFlags flags; - if (format == NULL) - return VK_ERROR_INVALID_VALUE; + assert(format != NULL); gen = physical_device->info->gen * 10; if (physical_device->info->is_haswell) diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 8522d0e8318..014a9b9b342 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -102,10 +102,7 @@ static const struct anv_tile_info { [WMAJOR] = { 128, 32, 4096 }, }; -/** - * Return -1 on failure. - */ -static int8_t +static uint8_t anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) { if (anv_info->force_tile_mode) @@ -117,11 +114,8 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) switch (anv_info->vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: - if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { - return -1; - } else { - return LINEAR; - } + assert(anv_info->vk_info->format != VK_FORMAT_S8_UINT); + return LINEAR; case VK_IMAGE_TILING_OPTIMAL: if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { return WMAJOR; @@ -153,10 +147,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const VkExtent3D *restrict extent = &create_info->vk_info->extent; const uint32_t levels = create_info->vk_info->mipLevels; const uint32_t array_size = create_info->vk_info->arraySize; - - const int8_t tile_mode = anv_image_choose_tile_mode(create_info); - if (tile_mode == -1) - return vk_error(VK_ERROR_INVALID_IMAGE); + const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); const struct anv_tile_info *tile_info = &anv_tile_info_table[tile_mode]; @@ -305,12 +296,10 @@ anv_image_create(VkDevice _device, const struct anv_surf_type_limits *limits = &anv_surf_type_limits[surf_type]; - if (extent->width > limits->width || - extent->height > limits->height || - extent->depth > limits->depth) { - /* TODO(chadv): What is the correct error? */ - return vk_errorf(VK_ERROR_INVALID_MEMORY_SIZE, "image extent is too large"); - } + /* Errors should be caught by VkImageFormatProperties. */ + assert(extent->width <= limits->width); + assert(extent->height <= limits->height); + assert(extent->depth <= limits->depth); image = anv_device_alloc(device, sizeof(*image), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 9464531b8c5..68535b40cac 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -120,8 +120,11 @@ VkResult anv_GetQueryPoolResults( if (flags & VK_QUERY_RESULT_WAIT_BIT) { ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); - if (ret == -1) - return vk_errorf(VK_ERROR_UNKNOWN, "gem_wait failed %m"); + if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem_wait failed %m"); + } } for (uint32_t i = 0; i < queryCount; i++) { diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c index 4c8fadcc805..628e399cb3e 100644 --- a/src/vulkan/anv_util.c +++ b/src/vulkan/anv_util.c @@ -92,39 +92,20 @@ __vk_errorf(VkResult error, const char *file, int line, const char *format, ...) const char *error_str; switch ((int32_t)error) { - ERROR_CASE(VK_ERROR_UNKNOWN) - ERROR_CASE(VK_ERROR_UNAVAILABLE) - ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + + /* Core errors */ ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) - ERROR_CASE(VK_ERROR_DEVICE_ALREADY_CREATED) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) ERROR_CASE(VK_ERROR_DEVICE_LOST) - ERROR_CASE(VK_ERROR_INVALID_POINTER) - ERROR_CASE(VK_ERROR_INVALID_VALUE) - ERROR_CASE(VK_ERROR_INVALID_HANDLE) - ERROR_CASE(VK_ERROR_INVALID_ORDINAL) - ERROR_CASE(VK_ERROR_INVALID_MEMORY_SIZE) - ERROR_CASE(VK_ERROR_INVALID_EXTENSION) - ERROR_CASE(VK_ERROR_INVALID_FLAGS) - ERROR_CASE(VK_ERROR_INVALID_ALIGNMENT) - ERROR_CASE(VK_ERROR_INVALID_FORMAT) - ERROR_CASE(VK_ERROR_INVALID_IMAGE) - ERROR_CASE(VK_ERROR_INVALID_DESCRIPTOR_SET_DATA) - ERROR_CASE(VK_ERROR_INVALID_QUEUE_TYPE) - ERROR_CASE(VK_ERROR_UNSUPPORTED_SHADER_IL_VERSION) - ERROR_CASE(VK_ERROR_BAD_SHADER_CODE) - ERROR_CASE(VK_ERROR_BAD_PIPELINE_DATA) - ERROR_CASE(VK_ERROR_NOT_MAPPABLE) ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) - ERROR_CASE(VK_ERROR_MEMORY_UNMAP_FAILED) - ERROR_CASE(VK_ERROR_INCOMPATIBLE_DEVICE) + ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) + ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) - ERROR_CASE(VK_ERROR_INCOMPLETE_COMMAND_BUFFER) - ERROR_CASE(VK_ERROR_BUILDING_COMMAND_BUFFER) - ERROR_CASE(VK_ERROR_MEMORY_NOT_BOUND) - ERROR_CASE(VK_ERROR_INCOMPATIBLE_QUEUE) - ERROR_CASE(VK_ERROR_INVALID_LAYER) + + /* Extension errors */ ERROR_CASE(VK_ERROR_OUT_OF_DATE_WSI) + default: assert(!"Unknown error"); error_str = "unknown error"; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index a601ad1851f..ba3ce8a2c65 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -322,10 +322,11 @@ wsi_wl_get_surface_info(struct anv_wsi_implementation *impl, { struct wsi_wayland *wsi = (struct wsi_wayland *)impl; - if (pDataSize == NULL) - return vk_error(VK_ERROR_INVALID_POINTER); + assert(pDataSize != NULL); switch (infoType) { + default: + unreachable("bad VkSurfaceInfoTypeWSI"); case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { VkSurfacePropertiesWSI *props = pData; @@ -384,8 +385,6 @@ wsi_wl_get_surface_info(struct anv_wsi_implementation *impl, memcpy(pData, present_modes, *pDataSize); return VK_SUCCESS; - default: - return vk_error(VK_ERROR_INVALID_VALUE); } } @@ -423,6 +422,8 @@ wsi_wl_get_swap_chain_info(struct anv_swap_chain *anv_chain, size_t size; switch (infoType) { + default: + unreachable("bad VkSwapChainInfoTypeWSI"); case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { VkSwapChainImagePropertiesWSI *images = pData; @@ -441,9 +442,6 @@ wsi_wl_get_swap_chain_info(struct anv_swap_chain *anv_chain, return VK_SUCCESS; } - - default: - return vk_error(VK_ERROR_INVALID_VALUE); } } @@ -615,14 +613,16 @@ wsi_wl_image_init(struct wsi_wl_swap_chain *chain, struct wsi_wl_image *image) image->memory->bo.gem_handle, surface->stride, I915_TILING_X); if (ret) { - result = vk_error(VK_ERROR_UNKNOWN); + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); goto fail_mem; } int fd = anv_gem_handle_to_fd(chain->base.device, image->memory->bo.gem_handle); if (fd == -1) { - result = vk_error(VK_ERROR_UNKNOWN); + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); goto fail_mem; } @@ -769,8 +769,13 @@ anv_wl_init_wsi(struct anv_instance *instance) int ret = pthread_mutex_init(&wsi->mutex, NULL); if (ret != 0) { - result = (ret == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY : - VK_ERROR_UNKNOWN; + if (ret == ENOMEM) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } else { + /* FINISHME: Choose a better error. */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + goto fail_alloc; } diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 03aef4cbf23..d226caf0eea 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -54,10 +54,11 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, VkSurfaceInfoTypeWSI infoType, size_t* pDataSize, void* pData) { - if (pDataSize == NULL) - return vk_error(VK_ERROR_INVALID_POINTER); + assert(pDataSize != NULL); switch (infoType) { + default: + unreachable("bad VkSurfaceInfoTypeWSI"); case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { VkSurfacePropertiesWSI *props = pData; @@ -77,12 +78,9 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, &err); if (!geom) { - if (err->error_code == XCB_DRAWABLE) { - return vk_error(VK_ERROR_INVALID_HANDLE); - } else { - return vk_error(VK_ERROR_UNKNOWN); - } + /* FINISHME: Choose a more accurate error. */ free(err); + return VK_ERROR_OUT_OF_DATE_WSI; } VkExtent2D extent = { geom->width, geom->height }; @@ -122,10 +120,7 @@ x11_get_surface_info(struct anv_wsi_implementation *impl, assert(*pDataSize >= sizeof(present_modes)); memcpy(pData, present_modes, *pDataSize); - return VK_SUCCESS; - default: - return vk_error(VK_ERROR_INVALID_VALUE); } } @@ -158,6 +153,8 @@ x11_get_swap_chain_info(struct anv_swap_chain *anv_chain, size_t size; switch (infoType) { + default: + unreachable("bad VkSwapChainInfoType"); case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { VkSwapChainImagePropertiesWSI *images = pData; @@ -173,12 +170,8 @@ x11_get_swap_chain_info(struct anv_swap_chain *anv_chain, images[i].image = anv_image_to_handle(chain->images[i].image); *pDataSize = size; - return VK_SUCCESS; } - - default: - return vk_error(VK_ERROR_INVALID_VALUE); } } @@ -196,13 +189,9 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); if (!geom) { - if (err->error_code == XCB_DRAWABLE) { - /* Probably the best thing to do if our drawable goes away */ - return vk_error(VK_ERROR_OUT_OF_DATE_WSI); - } else { - return vk_error(VK_ERROR_UNKNOWN); - } + /* Probably the best thing to do if our drawable goes away */ free(err); + return vk_error(VK_ERROR_OUT_OF_DATE_WSI); } if (geom->width != chain->extent.width || @@ -366,13 +355,17 @@ x11_create_swap_chain(struct anv_wsi_implementation *impl, int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, surface->stride, I915_TILING_X); if (ret) { - result = vk_errorf(VK_ERROR_UNKNOWN, "set_tiling failed: %m"); + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); goto fail; } int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); if (fd == -1) { - result = vk_errorf(VK_ERROR_UNKNOWN, "handle_to_fd failed: %m"); + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); goto fail; } @@ -400,7 +393,8 @@ x11_create_swap_chain(struct anv_wsi_implementation *impl, chain->gc = xcb_generate_id(chain->conn); if (!chain->gc) { - result = vk_error(VK_ERROR_UNKNOWN); + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; } diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 599432ed68a..affe04c526f 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -567,6 +567,5 @@ VkResult gen7_compute_pipeline_create( VkPipeline* pPipeline) { anv_finishme("primitive_id needs sbe swizzling setup"); - - return vk_error(VK_ERROR_UNAVAILABLE); + abort(); } -- cgit v1.2.3 From 0ca3c8480ddc280bc7faa8aec2db2c3cd299aade Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 7 Oct 2015 11:39:49 -0700 Subject: vk/0.170.2: Update remaining enums --- include/vulkan/vulkan.h | 122 +++++++++++++++++++++++++---------------------- src/vulkan/anv_formats.c | 4 +- src/vulkan/anv_image.c | 2 +- 3 files changed, 67 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1a572136340..62305a60116 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -44,6 +44,13 @@ extern "C" { #define VK_API_VERSION VK_MAKE_VERSION(0, 138, 2) +#if defined(__cplusplus) && (_MSC_VER >= 1800 || __cplusplus >= 201103L) + #define VK_NULL_HANDLE nullptr +#else + #define VK_NULL_HANDLE 0 +#endif + + #define VK_DEFINE_HANDLE(obj) typedef struct obj##_T* obj; @@ -104,14 +111,15 @@ VK_DEFINE_NONDISP_HANDLE(VkDescriptorSet) VK_DEFINE_NONDISP_HANDLE(VkFramebuffer) VK_DEFINE_NONDISP_HANDLE(VkCmdPool) -#define VK_LOD_CLAMP_NONE MAX_FLOAT -#define VK_LAST_MIP_LEVEL UINT32_MAX -#define VK_LAST_ARRAY_SLICE UINT32_MAX -#define VK_WHOLE_SIZE UINT64_MAX -#define VK_ATTACHMENT_UNUSED UINT32_MAX +#define VK_LOD_CLAMP_NONE 1000.0f +#define VK_REMAINING_MIP_LEVELS (~0U) +#define VK_REMAINING_ARRAY_LAYERS (~0U) +#define VK_WHOLE_SIZE (~0ULL) +#define VK_ATTACHMENT_UNUSED (~0U) #define VK_TRUE 1 #define VK_FALSE 0 -#define VK_NULL_HANDLE 0 +#define VK_QUEUE_FAMILY_IGNORED (~0U) +#define VK_SUBPASS_EXTERNAL (~0U) #define VK_MAX_PHYSICAL_DEVICE_NAME 256 #define VK_UUID_LENGTH 16 #define VK_MAX_MEMORY_TYPES 32 @@ -147,55 +155,52 @@ typedef enum { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 1, VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO = 2, VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 3, - VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO = 4, - VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 5, - VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 6, - VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 7, - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 9, - VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO = 10, - VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO = 11, - VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO = 12, - VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO = 13, - VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 14, - VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 15, - VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 16, - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 17, - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 18, - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 19, - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 20, - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 21, - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 22, - VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 23, - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 24, - VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 26, - VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 28, - VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 29, - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 30, - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 31, - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 32, - VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO = 33, - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 34, - VK_STRUCTURE_TYPE_MEMORY_BARRIER = 35, - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 36, - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 37, - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 38, - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 39, - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 40, - VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 41, - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 42, - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 43, - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 44, - VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION = 45, - VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 46, - VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 47, - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 48, - VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO = 49, + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 4, + VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 5, + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 6, + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 7, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 8, + VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 9, + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, + VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 11, + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 12, + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 13, + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 14, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 15, + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 16, + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 17, + VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 18, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 19, + VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO = 20, + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 21, + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 22, + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 23, + VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 24, + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 25, + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 27, + VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO = 28, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 29, + VK_STRUCTURE_TYPE_MEMORY_BARRIER = 30, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 31, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 32, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 34, + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 35, + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 36, + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 37, + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 38, + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 39, + VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION = 40, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 41, + VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 42, + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, + VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO = 44, + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 45, + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 46, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO, - VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), + VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkStructureType; @@ -302,7 +307,7 @@ typedef enum { VK_FORMAT_R11G11B10_UFLOAT = 87, VK_FORMAT_R9G9B9E5_UFLOAT = 88, VK_FORMAT_D16_UNORM = 89, - VK_FORMAT_D24_UNORM = 90, + VK_FORMAT_D24_UNORM_X8 = 90, VK_FORMAT_D32_SFLOAT = 91, VK_FORMAT_S8_UINT = 92, VK_FORMAT_D16_UNORM_S8_UINT = 93, @@ -461,9 +466,10 @@ typedef enum { VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, VK_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL = 6, VK_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL = 7, + VK_IMAGE_LAYOUT_PREINITIALIZED = 8, VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL, - VK_IMAGE_LAYOUT_NUM = (VK_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL - VK_IMAGE_LAYOUT_UNDEFINED + 1), + VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_LAYOUT_NUM = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF } VkImageLayout; @@ -838,7 +844,7 @@ typedef enum { VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, - VK_IMAGE_USAGE_DEPTH_STENCIL_BIT = 0x00000020, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, } VkImageUsageFlagBits; diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 944e05f1476..2690ff24692 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -120,7 +120,7 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D24_UNORM_X8, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), fmt(VK_FORMAT_S8_UINT, R8_UINT, .cpp = 1, .num_channels = 1, .has_stencil = true), fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), @@ -429,7 +429,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( } } - if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_BIT) { + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { goto unsupported; } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 014a9b9b342..e72c592905f 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -484,7 +484,7 @@ anv_image_view_init(struct anv_image_view *iview, assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_BIT)); + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); switch (image->type) { default: -- cgit v1.2.3 From 89845598921afbef35b0adefee6e2eb9446ef327 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 14:13:55 -0700 Subject: vk/0.170.2: Update to the new VK_EXT_KHR_swapchain extensions --- include/vulkan/vk_ext_khr_device_swapchain.h | 210 ++++++++++++++++++++++ include/vulkan/vk_ext_khr_swapchain.h | 153 ++++++++++++++++ include/vulkan/vk_wsi_device_swapchain.h | 249 --------------------------- include/vulkan/vk_wsi_swapchain.h | 133 -------------- src/vulkan/Makefile.am | 4 +- src/vulkan/anv_device.c | 8 +- src/vulkan/anv_private.h | 6 +- src/vulkan/anv_util.c | 2 +- src/vulkan/anv_wsi.c | 119 +++++++------ src/vulkan/anv_wsi.h | 42 +++-- src/vulkan/anv_wsi_wayland.c | 228 ++++++++++++------------ src/vulkan/anv_wsi_x11.c | 226 ++++++++++++------------ 12 files changed, 681 insertions(+), 699 deletions(-) create mode 100644 include/vulkan/vk_ext_khr_device_swapchain.h create mode 100644 include/vulkan/vk_ext_khr_swapchain.h delete mode 100644 include/vulkan/vk_wsi_device_swapchain.h delete mode 100644 include/vulkan/vk_wsi_swapchain.h (limited to 'src') diff --git a/include/vulkan/vk_ext_khr_device_swapchain.h b/include/vulkan/vk_ext_khr_device_swapchain.h new file mode 100644 index 00000000000..3bf73c84a96 --- /dev/null +++ b/include/vulkan/vk_ext_khr_device_swapchain.h @@ -0,0 +1,210 @@ +// +// File: vk_ext_khr_device_swapchain.h +// +/* +** Copyright (c) 2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#ifndef __VK_EXT_KHR_DEVICE_SWAPCHAIN_H__ +#define __VK_EXT_KHR_DEVICE_SWAPCHAIN_H__ + +#include "vulkan.h" + +#define VK_EXT_KHR_DEVICE_SWAPCHAIN_REVISION 53 +#define VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NUMBER 2 +#define VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NAME "VK_EXT_KHR_device_swapchain" + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +// ------------------------------------------------------------------------------------------------ +// Objects + +VK_DEFINE_NONDISP_HANDLE(VkSwapchainKHR); + +// ------------------------------------------------------------------------------------------------ +// Enumeration constants + +#define VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(type,id) ((type)((int)0xc0000000 - VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NUMBER * -1024 + (id))) +#define VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM_POSITIVE(type,id) ((type)((int)0x40000000 + (VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NUMBER - 1) * 1024 + (id))) + +// Extend VkStructureType enum with extension specific constants +#define VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkStructureType, 0) +#define VK_STRUCTURE_TYPE_PRESENT_INFO_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkStructureType, 1) + +// Extend VkImageLayout enum with extension specific constants +#define VK_IMAGE_LAYOUT_PRESENT_SOURCE_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkImageLayout, 2) + +// Extend VkResult enum with extension specific constants +// Return codes for successful operation execution +#define VK_SUBOPTIMAL_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM_POSITIVE(VkResult, 3) +// Error codes +#define VK_ERROR_OUT_OF_DATE_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkResult, 4) + +// ------------------------------------------------------------------------------------------------ +// Enumerations + +typedef enum { + VK_PRESENT_MODE_IMMEDIATE_KHR = 0, + VK_PRESENT_MODE_MAILBOX_KHR = 1, + VK_PRESENT_MODE_FIFO_KHR = 2, + VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR, + VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_KHR, + VK_PRESENT_MODE_NUM = (VK_PRESENT_MODE_FIFO_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1), + VK_PRESENT_MODE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPresentModeKHR; + +typedef enum { + VK_COLORSPACE_SRGB_NONLINEAR_KHR = 0x00000000, + VK_COLORSPACE_NUM = (VK_COLORSPACE_SRGB_NONLINEAR_KHR - VK_COLORSPACE_SRGB_NONLINEAR_KHR + 1), + VK_COLORSPACE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkColorSpaceKHR; + +// ------------------------------------------------------------------------------------------------ +// Flags + +// ------------------------------------------------------------------------------------------------ +// Structures + +typedef struct { + uint32_t minImageCount; // Supported minimum number of images for the surface + uint32_t maxImageCount; // Supported maximum number of images for the surface, 0 for unlimited + + VkExtent2D currentExtent; // Current image width and height for the surface, (-1, -1) if undefined + VkExtent2D minImageExtent; // Supported minimum image width and height for the surface + VkExtent2D maxImageExtent; // Supported maximum image width and height for the surface + + VkSurfaceTransformFlagsKHR supportedTransforms;// 1 or more bits representing the transforms supported + VkSurfaceTransformKHR currentTransform; // The surface's current transform relative to the device's natural orientation + + uint32_t maxImageArraySize; // Supported maximum number of image layers for the surface + + VkImageUsageFlags supportedUsageFlags;// Supported image usage flags for the surface +} VkSurfacePropertiesKHR; + +typedef struct { + VkFormat format; // Supported pair of rendering format + VkColorSpaceKHR colorSpace; // and colorspace for the surface +} VkSurfaceFormatKHR; + +typedef struct { + VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR + const void* pNext; // Pointer to next structure + + const VkSurfaceDescriptionKHR* pSurfaceDescription;// describes the swap chain's target surface + + uint32_t minImageCount; // Minimum number of presentation images the application needs + VkFormat imageFormat; // Format of the presentation images + VkColorSpaceKHR imageColorSpace; // Colorspace of the presentation images + VkExtent2D imageExtent; // Dimensions of the presentation images + VkImageUsageFlags imageUsageFlags; // Bits indicating how the presentation images will be used + VkSurfaceTransformKHR preTransform; // The transform, relative to the device's natural orientation, applied to the image content prior to presentation + uint32_t imageArraySize; // Determines the number of views for multiview/stereo presentation + + VkSharingMode sharingMode; // Sharing mode used for the presentation images + uint32_t queueFamilyCount; // Number of queue families having access to the images in case of concurrent sharing mode + const uint32_t* pQueueFamilyIndices; // Array of queue family indices having access to the images in case of concurrent sharing mode + + VkPresentModeKHR presentMode; // Which presentation mode to use for presents on this swap chain + + VkSwapchainKHR oldSwapchain; // Existing swap chain to replace, if any + + VkBool32 clipped; // Specifies whether presentable images may be affected by window clip regions +} VkSwapchainCreateInfoKHR; + +typedef struct { + VkStructureType sType; // Must be VK_STRUCTURE_TYPE_PRESENT_INFO_KHR + const void* pNext; // Pointer to next structure + uint32_t swapchainCount; // Number of swap chains to present in this call + const VkSwapchainKHR* swapchains; // Swap chains to present an image from + const uint32_t* imageIndices; // Indices of which swapchain images to present +} VkPresentInfoKHR; + +// ------------------------------------------------------------------------------------------------ +// Function types + +typedef VkResult (VKAPI *PFN_vkGetSurfacePropertiesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkSurfacePropertiesKHR* pSurfaceProperties); +typedef VkResult (VKAPI *PFN_vkGetSurfaceFormatsKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats); +typedef VkResult (VKAPI *PFN_vkGetSurfacePresentModesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkPresentModeKHR* pPresentModes); +typedef VkResult (VKAPI *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainKHR* pSwapchain); +typedef VkResult (VKAPI *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain); +typedef VkResult (VKAPI *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pCount, VkImage* pSwapchainImages); +typedef VkResult (VKAPI *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex); +typedef VkResult (VKAPI *PFN_vkQueuePresentKHR)(VkQueue queue, VkPresentInfoKHR* pPresentInfo); + +// ------------------------------------------------------------------------------------------------ +// Function prototypes + +#ifdef VK_PROTOTYPES + +VkResult VKAPI vkGetSurfacePropertiesKHR( + VkDevice device, + const VkSurfaceDescriptionKHR* pSurfaceDescription, + VkSurfacePropertiesKHR* pSurfaceProperties); + +VkResult VKAPI vkGetSurfaceFormatsKHR( + VkDevice device, + const VkSurfaceDescriptionKHR* pSurfaceDescription, + uint32_t* pCount, + VkSurfaceFormatKHR* pSurfaceFormats); + +VkResult VKAPI vkGetSurfacePresentModesKHR( + VkDevice device, + const VkSurfaceDescriptionKHR* pSurfaceDescription, + uint32_t* pCount, + VkPresentModeKHR* pPresentModes); + +VkResult VKAPI vkCreateSwapchainKHR( + VkDevice device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + VkSwapchainKHR* pSwapchain); + +VkResult VKAPI vkDestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR swapchain); + +VkResult VKAPI vkGetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint32_t* pCount, + VkImage* pSwapchainImages); + +VkResult VKAPI vkAcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t* pImageIndex); + +VkResult VKAPI vkQueuePresentKHR( + VkQueue queue, + VkPresentInfoKHR* pPresentInfo); + +#endif // VK_PROTOTYPES + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __VK_EXT_KHR_SWAPCHAIN_H__ diff --git a/include/vulkan/vk_ext_khr_swapchain.h b/include/vulkan/vk_ext_khr_swapchain.h new file mode 100644 index 00000000000..862b4d5e741 --- /dev/null +++ b/include/vulkan/vk_ext_khr_swapchain.h @@ -0,0 +1,153 @@ +// +// File: vk_ext_khr_swapchain.h +// +/* +** Copyright (c) 2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#ifndef __VK_EXT_KHR_SWAPCHAIN_H__ +#define __VK_EXT_KHR_SWAPCHAIN_H__ + +#include "vulkan.h" + +#define VK_EXT_KHR_SWAPCHAIN_REVISION 17 +#define VK_EXT_KHR_SWAPCHAIN_EXTENSION_NUMBER 1 +#define VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME "VK_EXT_KHR_swapchain" + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +// ------------------------------------------------------------------------------------------------ +// Objects + +// ------------------------------------------------------------------------------------------------ +// Enumeration constants + +#define VK_EXT_KHR_SWAPCHAIN_ENUM(type,id) ((type)((int)0xc0000000 - VK_EXT_KHR_SWAPCHAIN_EXTENSION_NUMBER * -1024 + (id))) +#define VK_EXT_KHR_SWAPCHAIN_ENUM_POSITIVE(type,id) ((type)((int)0x40000000 + (VK_EXT_KHR_SWAPCHAIN_EXTENSION_NUMBER - 1) * 1024 + (id))) + +// Extend VkStructureType enum with extension specific constants +#define VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR VK_EXT_KHR_SWAPCHAIN_ENUM(VkStructureType, 0) + +// ------------------------------------------------------------------------------------------------ +// Enumerations + +typedef enum { + VK_SURFACE_TRANSFORM_NONE_KHR = 0, + VK_SURFACE_TRANSFORM_ROT90_KHR = 1, + VK_SURFACE_TRANSFORM_ROT180_KHR = 2, + VK_SURFACE_TRANSFORM_ROT270_KHR = 3, + VK_SURFACE_TRANSFORM_HMIRROR_KHR = 4, + VK_SURFACE_TRANSFORM_HMIRROR_ROT90_KHR = 5, + VK_SURFACE_TRANSFORM_HMIRROR_ROT180_KHR = 6, + VK_SURFACE_TRANSFORM_HMIRROR_ROT270_KHR = 7, + VK_SURFACE_TRANSFORM_INHERIT_KHR = 8, +} VkSurfaceTransformKHR; + +typedef enum { + VK_SURFACE_TRANSFORM_NONE_BIT_KHR = 0x00000001, + VK_SURFACE_TRANSFORM_ROT90_BIT_KHR = 0x00000002, + VK_SURFACE_TRANSFORM_ROT180_BIT_KHR = 0x00000004, + VK_SURFACE_TRANSFORM_ROT270_BIT_KHR = 0x00000008, + VK_SURFACE_TRANSFORM_HMIRROR_BIT_KHR = 0x00000010, + VK_SURFACE_TRANSFORM_HMIRROR_ROT90_BIT_KHR = 0x00000020, + VK_SURFACE_TRANSFORM_HMIRROR_ROT180_BIT_KHR = 0x00000040, + VK_SURFACE_TRANSFORM_HMIRROR_ROT270_BIT_KHR = 0x00000080, + VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100, +} VkSurfaceTransformFlagBitsKHR; +typedef VkFlags VkSurfaceTransformFlagsKHR; + +typedef enum { + VK_PLATFORM_WIN32_KHR = 0, + VK_PLATFORM_X11_KHR = 1, + VK_PLATFORM_XCB_KHR = 2, + VK_PLATFORM_ANDROID_KHR = 3, + VK_PLATFORM_WAYLAND_KHR = 4, + VK_PLATFORM_MIR_KHR = 5, + VK_PLATFORM_BEGIN_RANGE_KHR = VK_PLATFORM_WIN32_KHR, + VK_PLATFORM_END_RANGE_KHR = VK_PLATFORM_MIR_KHR, + VK_PLATFORM_NUM_KHR = (VK_PLATFORM_MIR_KHR - VK_PLATFORM_WIN32_KHR + 1), + VK_PLATFORM_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPlatformKHR; + +// ------------------------------------------------------------------------------------------------ +// Flags + +// ------------------------------------------------------------------------------------------------ +// Structures + +// Placeholder structure header for the different types of surface description structures +typedef struct { + VkStructureType sType; // Can be any of the VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_XXX_KHR constants + const void* pNext; // Pointer to next structure +} VkSurfaceDescriptionKHR; + +// Surface description structure for a native platform window surface +typedef struct { + VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR + const void* pNext; // Pointer to next structure + VkPlatformKHR platform; // e.g. VK_PLATFORM_*_KHR + void* pPlatformHandle; + void* pPlatformWindow; +} VkSurfaceDescriptionWindowKHR; + +// pPlatformHandle points to this struct when platform is VK_PLATFORM_X11_KHR +#ifdef _X11_XLIB_H_ +typedef struct { + Display* dpy; // Display connection to an X server + Window root; // To identify the X screen +} VkPlatformHandleX11KHR; +#endif /* _X11_XLIB_H_ */ + +// pPlatformHandle points to this struct when platform is VK_PLATFORM_XCB_KHR +#ifdef __XCB_H__ +typedef struct { + xcb_connection_t* connection; // XCB connection to an X server + xcb_window_t root; // To identify the X screen +} VkPlatformHandleXcbKHR; +#endif /* __XCB_H__ */ + +// ------------------------------------------------------------------------------------------------ +// Function types + +typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkBool32* pSupported); + +// ------------------------------------------------------------------------------------------------ +// Function prototypes + +#ifdef VK_PROTOTYPES + +VkResult VKAPI vkGetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + const VkSurfaceDescriptionKHR* pSurfaceDescription, + VkBool32* pSupported); + +#endif // VK_PROTOTYPES + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __VK_EXT_KHR_SWAPCHAIN_H__ diff --git a/include/vulkan/vk_wsi_device_swapchain.h b/include/vulkan/vk_wsi_device_swapchain.h deleted file mode 100644 index c6e4db27815..00000000000 --- a/include/vulkan/vk_wsi_device_swapchain.h +++ /dev/null @@ -1,249 +0,0 @@ -// -// File: vk_wsi_device_swapchain.h -// -/* -** Copyright (c) 2014 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -#ifndef __VK_WSI_DEVICE_SWAPCHAIN_H__ -#define __VK_WSI_DEVICE_SWAPCHAIN_H__ - -#include "vulkan.h" - -#define VK_WSI_DEVICE_SWAPCHAIN_REVISION 40 -#define VK_WSI_DEVICE_SWAPCHAIN_EXTENSION_NUMBER 2 -#define VK_WSI_DEVICE_SWAPCHAIN_EXTENSION_NAME "VK_WSI_device_swapchain" - -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -// ------------------------------------------------------------------------------------------------ -// Objects - -VK_DEFINE_NONDISP_HANDLE(VkSwapChainWSI); - -// ------------------------------------------------------------------------------------------------ -// Enumeration constants - -#define VK_WSI_DEVICE_SWAPCHAIN_ENUM(type,id) ((type)((int)0xc0000000 - VK_WSI_DEVICE_SWAPCHAIN_EXTENSION_NUMBER * -1024 + (id))) -#define VK_WSI_DEVICE_SWAPCHAIN_ENUM_POSITIVE(type,id) ((type)((int)0x40000000 + (VK_WSI_DEVICE_SWAPCHAIN_EXTENSION_NUMBER - 1) * 1024 + (id))) - -// Extend VkStructureType enum with extension specific constants -#define VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI VK_WSI_DEVICE_SWAPCHAIN_ENUM(VkStructureType, 0) -#define VK_STRUCTURE_TYPE_QUEUE_PRESENT_INFO_WSI VK_WSI_DEVICE_SWAPCHAIN_ENUM(VkStructureType, 1) - -// Extend VkImageLayout enum with extension specific constants -#define VK_IMAGE_LAYOUT_PRESENT_SOURCE_WSI VK_WSI_DEVICE_SWAPCHAIN_ENUM(VkImageLayout, 2) - -// Extend VkResult enum with extension specific constants -// Return codes for successful operation execution -#define VK_SUBOPTIMAL_WSI VK_WSI_DEVICE_SWAPCHAIN_ENUM_POSITIVE(VkResult, 3) -// Error codes -#define VK_ERROR_OUT_OF_DATE_WSI VK_WSI_DEVICE_SWAPCHAIN_ENUM(VkResult, 4) - -// ------------------------------------------------------------------------------------------------ -// Enumerations - -typedef enum VkSurfaceTransformWSI_ -{ - VK_SURFACE_TRANSFORM_NONE_WSI = 0, - VK_SURFACE_TRANSFORM_ROT90_WSI = 1, - VK_SURFACE_TRANSFORM_ROT180_WSI = 2, - VK_SURFACE_TRANSFORM_ROT270_WSI = 3, - VK_SURFACE_TRANSFORM_HMIRROR_WSI = 4, - VK_SURFACE_TRANSFORM_HMIRROR_ROT90_WSI = 5, - VK_SURFACE_TRANSFORM_HMIRROR_ROT180_WSI = 6, - VK_SURFACE_TRANSFORM_HMIRROR_ROT270_WSI = 7, - VK_SURFACE_TRANSFORM_INHERIT_WSI = 8, -} VkSurfaceTransformWSI; - -typedef enum VkSurfaceTransformFlagBitsWSI_ -{ - VK_SURFACE_TRANSFORM_NONE_BIT_WSI = 0x00000001, - VK_SURFACE_TRANSFORM_ROT90_BIT_WSI = 0x00000002, - VK_SURFACE_TRANSFORM_ROT180_BIT_WSI = 0x00000004, - VK_SURFACE_TRANSFORM_ROT270_BIT_WSI = 0x00000008, - VK_SURFACE_TRANSFORM_HMIRROR_BIT_WSI = 0x00000010, - VK_SURFACE_TRANSFORM_HMIRROR_ROT90_BIT_WSI = 0x00000020, - VK_SURFACE_TRANSFORM_HMIRROR_ROT180_BIT_WSI = 0x00000040, - VK_SURFACE_TRANSFORM_HMIRROR_ROT270_BIT_WSI = 0x00000080, - VK_SURFACE_TRANSFORM_INHERIT_BIT_WSI = 0x00000100, -} VkSurfaceTransformFlagBitsWSI; -typedef VkFlags VkSurfaceTransformFlagsWSI; - -typedef enum VkSurfaceInfoTypeWSI_ -{ - VK_SURFACE_INFO_TYPE_PROPERTIES_WSI = 0, - VK_SURFACE_INFO_TYPE_FORMATS_WSI = 1, - VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI = 2, - VK_SURFACE_INFO_TYPE_BEGIN_RANGE_WSI = VK_SURFACE_INFO_TYPE_PROPERTIES_WSI, - VK_SURFACE_INFO_TYPE_END_RANGE_WSI = VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI, - VK_SURFACE_INFO_TYPE_NUM_WSI = (VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI - VK_SURFACE_INFO_TYPE_PROPERTIES_WSI + 1), - VK_SURFACE_INFO_TYPE_MAX_ENUM_WSI = 0x7FFFFFFF -} VkSurfaceInfoTypeWSI; - -typedef enum VkSwapChainInfoTypeWSI_ -{ - VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI = 0, - VK_SWAP_CHAIN_INFO_TYPE_BEGIN_RANGE_WSI = VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI, - VK_SWAP_CHAIN_INFO_TYPE_END_RANGE_WSI = VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI, - VK_SWAP_CHAIN_INFO_TYPE_NUM_WSI = (VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI - VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI + 1), - VK_SWAP_CHAIN_INFO_TYPE_MAX_ENUM_WSI = 0x7FFFFFFF -} VkSwapChainInfoTypeWSI; - -typedef enum VkPresentModeWSI_ -{ - VK_PRESENT_MODE_IMMEDIATE_WSI = 0, - VK_PRESENT_MODE_MAILBOX_WSI = 1, - VK_PRESENT_MODE_FIFO_WSI = 2, - VK_PRESENT_MODE_BEGIN_RANGE_WSI = VK_PRESENT_MODE_IMMEDIATE_WSI, - VK_PRESENT_MODE_END_RANGE_WSI = VK_PRESENT_MODE_FIFO_WSI, - VK_PRESENT_MODE_NUM = (VK_PRESENT_MODE_FIFO_WSI - VK_PRESENT_MODE_IMMEDIATE_WSI + 1), - VK_PRESENT_MODE_MAX_ENUM_WSI = 0x7FFFFFFF -} VkPresentModeWSI; - -// ------------------------------------------------------------------------------------------------ -// Flags - -// ------------------------------------------------------------------------------------------------ -// Structures - -typedef struct VkSurfacePropertiesWSI_ -{ - uint32_t minImageCount; // Supported minimum number of images for the surface - uint32_t maxImageCount; // Supported maximum number of images for the surface, 0 for unlimited - - VkExtent2D currentExtent; // Current image width and height for the surface, (-1, -1) if undefined. - VkExtent2D minImageExtent; // Supported minimum image width and height for the surface - VkExtent2D maxImageExtent; // Supported maximum image width and height for the surface - - VkSurfaceTransformFlagsWSI supportedTransforms;// 1 or more bits representing the transforms supported - VkSurfaceTransformWSI currentTransform; // The surface's current transform relative to the device's natural orientation. - - uint32_t maxImageArraySize; // Supported maximum number of image layers for the surface - - VkImageUsageFlags supportedUsageFlags;// Supported image usage flags for the surface -} VkSurfacePropertiesWSI; - -typedef struct VkSurfaceFormatPropertiesWSI_ -{ - VkFormat format; // Supported rendering format for the surface -} VkSurfaceFormatPropertiesWSI; - -typedef struct VkSurfacePresentModePropertiesWSI_ -{ - VkPresentModeWSI presentMode; // Supported presention mode for the surface -} VkSurfacePresentModePropertiesWSI; - -typedef struct VkSwapChainCreateInfoWSI_ -{ - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI - const void* pNext; // Pointer to next structure - - const VkSurfaceDescriptionWSI* pSurfaceDescription;// describes the swap chain's target surface - - uint32_t minImageCount; // Minimum number of presentation images the application needs - VkFormat imageFormat; // Format of the presentation images - VkExtent2D imageExtent; // Dimensions of the presentation images - VkImageUsageFlags imageUsageFlags; // Bits indicating how the presentation images will be used - VkSurfaceTransformWSI preTransform; // The transform, relative to the device's natural orientation, applied to the image content prior to presentation - uint32_t imageArraySize; // Determines the number of views for multiview/stereo presentation - - VkPresentModeWSI presentMode; // Which presentation mode to use for presents on this swap chain. - - VkSwapChainWSI oldSwapChain; // Existing swap chain to replace, if any. - - VkBool32 clipped; // Specifies whether presentable images may be affected by window clip regions. -} VkSwapChainCreateInfoWSI; - -typedef struct VkSwapChainImagePropertiesWSI_ -{ - VkImage image; // Persistent swap chain image handle -} VkSwapChainImagePropertiesWSI; - -typedef struct VkPresentInfoWSI_ -{ - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_QUEUE_PRESENT_INFO_WSI - const void* pNext; // Pointer to next structure - uint32_t swapChainCount; // Number of swap chains to present in this call - const VkSwapChainWSI* swapChains; // Swap chains to present an image from. - const uint32_t* imageIndices; // Indices of which swapChain images to present -} VkPresentInfoWSI; - -// ------------------------------------------------------------------------------------------------ -// Function types - -typedef VkResult (VKAPI *PFN_vkGetSurfaceInfoWSI)(VkDevice device, const VkSurfaceDescriptionWSI* pSurfaceDescription, VkSurfaceInfoTypeWSI infoType, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI *PFN_vkCreateSwapChainWSI)(VkDevice device, const VkSwapChainCreateInfoWSI* pCreateInfo, VkSwapChainWSI* pSwapChain); -typedef VkResult (VKAPI *PFN_vkDestroySwapChainWSI)(VkDevice device, VkSwapChainWSI swapChain); -typedef VkResult (VKAPI *PFN_vkGetSwapChainInfoWSI)(VkDevice device, VkSwapChainWSI swapChain, VkSwapChainInfoTypeWSI infoType, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI *PFN_vkAcquireNextImageWSI)(VkDevice device, VkSwapChainWSI swapChain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex); -typedef VkResult (VKAPI *PFN_vkQueuePresentWSI)(VkQueue queue, VkPresentInfoWSI* pPresentInfo); - -// ------------------------------------------------------------------------------------------------ -// Function prototypes - -#ifdef VK_PROTOTYPES - -VkResult VKAPI vkGetSurfaceInfoWSI( - VkDevice device, - const VkSurfaceDescriptionWSI* pSurfaceDescription, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, - void* pData); - -VkResult VKAPI vkCreateSwapChainWSI( - VkDevice device, - const VkSwapChainCreateInfoWSI* pCreateInfo, - VkSwapChainWSI* pSwapChain); - -VkResult VKAPI vkDestroySwapChainWSI( - VkDevice device, - VkSwapChainWSI swapChain); - -VkResult VKAPI vkGetSwapChainInfoWSI( - VkDevice device, - VkSwapChainWSI swapChain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, - void* pData); - -VkResult VKAPI vkAcquireNextImageWSI( - VkDevice device, - VkSwapChainWSI swapChain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t* pImageIndex); - -VkResult VKAPI vkQueuePresentWSI( - VkQueue queue, - VkPresentInfoWSI* pPresentInfo); - -#endif // VK_PROTOTYPES - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // __VK_WSI_SWAPCHAIN_H__ diff --git a/include/vulkan/vk_wsi_swapchain.h b/include/vulkan/vk_wsi_swapchain.h deleted file mode 100644 index 64704d47318..00000000000 --- a/include/vulkan/vk_wsi_swapchain.h +++ /dev/null @@ -1,133 +0,0 @@ -// -// File: vk_wsi_swapchain.h -// -/* -** Copyright (c) 2014 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -#ifndef __VK_WSI_SWAPCHAIN_H__ -#define __VK_WSI_SWAPCHAIN_H__ - -#include "vulkan.h" - -#define VK_WSI_SWAPCHAIN_REVISION 12 -#define VK_WSI_SWAPCHAIN_EXTENSION_NUMBER 1 -#define VK_WSI_SWAPCHAIN_EXTENSION_NAME "VK_WSI_swapchain" - -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -// ------------------------------------------------------------------------------------------------ -// Objects - -// ------------------------------------------------------------------------------------------------ -// Enumeration constants - -#define VK_WSI_SWAPCHAIN_ENUM(type,id) ((type)((int)0xc0000000 - VK_WSI_SWAPCHAIN_EXTENSION_NUMBER * -1024 + (id))) -#define VK_WSI_SWAPCHAIN_ENUM_POSITIVE(type,id) ((type)((int)0x40000000 + (VK_WSI_SWAPCHAIN_EXTENSION_NUMBER - 1) * 1024 + (id))) - -// Extend VkStructureType enum with extension specific constants -#define VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI VK_WSI_SWAPCHAIN_ENUM(VkStructureType, 0) - -// ------------------------------------------------------------------------------------------------ -// Enumerations - -typedef enum VkPlatformWSI_ -{ - VK_PLATFORM_WIN32_WSI = 0, - VK_PLATFORM_X11_WSI = 1, - VK_PLATFORM_XCB_WSI = 2, - VK_PLATFORM_ANDROID_WSI = 3, - VK_PLATFORM_WAYLAND_WSI = 4, - VK_PLATFORM_MIR_WSI = 5, - VK_PLATFORM_BEGIN_RANGE_WSI = VK_PLATFORM_WIN32_WSI, - VK_PLATFORM_END_RANGE_WSI = VK_PLATFORM_MIR_WSI, - VK_PLATFORM_NUM_WSI = (VK_PLATFORM_MIR_WSI - VK_PLATFORM_WIN32_WSI + 1), - VK_PLATFORM_MAX_ENUM_WSI = 0x7FFFFFFF -} VkPlatformWSI; - -// ------------------------------------------------------------------------------------------------ -// Flags - -// ------------------------------------------------------------------------------------------------ -// Structures - -// pPlatformHandle points to this struct when platform is VK_PLATFORM_X11_WSI -#ifdef _X11_XLIB_H_ -typedef struct VkPlatformHandleX11WSI_ -{ - Display* dpy; // Display connection to an X server - Window root; // To identify the X screen -} VkPlatformHandleX11WSI; -#endif /* _X11_XLIB_H_ */ - -// pPlatformHandle points to this struct when platform is VK_PLATFORM_XCB_WSI -#ifdef __XCB_H__ -typedef struct VkPlatformHandleXcbWSI_ -{ - xcb_connection_t* connection; // XCB connection to an X server - xcb_window_t root; // To identify the X screen -} VkPlatformHandleXcbWSI; -#endif /* __XCB_H__ */ - -// Placeholder structure header for the different types of surface description structures -typedef struct VkSurfaceDescriptionWSI_ -{ - VkStructureType sType; // Can be any of the VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_XXX_WSI constants - const void* pNext; // Pointer to next structure -} VkSurfaceDescriptionWSI; - -// Surface description structure for a native platform window surface -typedef struct VkSurfaceDescriptionWindowWSI_ -{ - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI - const void* pNext; // Pointer to next structure - VkPlatformWSI platform; // e.g. VK_PLATFORM_*_WSI - void* pPlatformHandle; - void* pPlatformWindow; -} VkSurfaceDescriptionWindowWSI; - -// ------------------------------------------------------------------------------------------------ -// Function types - -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceSurfaceSupportWSI)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, const VkSurfaceDescriptionWSI* pSurfaceDescription, VkBool32* pSupported); - -// ------------------------------------------------------------------------------------------------ -// Function prototypes - -#ifdef VK_PROTOTYPES - -VkResult VKAPI vkGetPhysicalDeviceSurfaceSupportWSI( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - const VkSurfaceDescriptionWSI* pSurfaceDescription, - VkBool32* pSupported); - -#endif // VK_PROTOTYPES - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // __VK_WSI_SWAPCHAIN_H__ diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 0d0abd27300..c5a076d2ba3 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -27,8 +27,8 @@ vulkan_include_HEADERS = \ $(top_srcdir)/include/vulkan/vk_platform.h \ $(top_srcdir)/include/vulkan/vulkan.h \ $(top_srcdir)/include/vulkan/vulkan_intel.h \ - $(top_srcdir)/include/vulkan/vk_wsi_swapchain.h \ - $(top_srcdir)/include/vulkan/vk_wsi_device_swapchain.h + $(top_srcdir)/include/vulkan/vk_ext_khr_swapchain.h \ + $(top_srcdir)/include/vulkan/vk_ext_khr_device_swapchain.h lib_LTLIBRARIES = libvulkan.la diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 4e677054afe..295aea9e434 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -122,15 +122,15 @@ static const VkAllocCallbacks default_alloc_callbacks = { static const VkExtensionProperties global_extensions[] = { { - .extName = "VK_WSI_swapchain", - .specVersion = 12 + .extName = VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 17, }, }; static const VkExtensionProperties device_extensions[] = { { - .extName = "VK_WSI_device_swapchain", - .specVersion = 12 + .extName = VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 53, }, }; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 0d1998d659c..dc5a2350e86 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -46,8 +46,8 @@ #define VK_PROTOTYPES #include #include -#include -#include +#include +#include #include "anv_entrypoints.h" @@ -429,7 +429,7 @@ struct anv_instance { uint32_t physicalDeviceCount; struct anv_physical_device physicalDevice; - struct anv_wsi_implementation * wsi_impl[VK_PLATFORM_NUM_WSI]; + struct anv_wsi_implementation * wsi_impl[VK_PLATFORM_NUM_KHR]; }; VkResult anv_init_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c index 628e399cb3e..22fd01c9495 100644 --- a/src/vulkan/anv_util.c +++ b/src/vulkan/anv_util.c @@ -104,7 +104,7 @@ __vk_errorf(VkResult error, const char *file, int line, const char *format, ...) ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) /* Extension errors */ - ERROR_CASE(VK_ERROR_OUT_OF_DATE_WSI) + ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) default: assert(!"Unknown error"); diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 21e01fc61f2..241481b9895 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -55,18 +55,18 @@ anv_finish_wsi(struct anv_instance *instance) } VkResult -anv_GetPhysicalDeviceSurfaceSupportWSI( +anv_GetPhysicalDeviceSurfaceSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, - const VkSurfaceDescriptionWSI* pSurfaceDescription, + const VkSurfaceDescriptionKHR* pSurfaceDescription, VkBool32* pSupported) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); assert(pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowWSI *window = (void *)pSurfaceDescription; + VkSurfaceDescriptionWindowKHR *window = (void *)pSurfaceDescription; struct anv_wsi_implementation *impl = physical_device->instance->wsi_impl[window->platform]; @@ -81,116 +81,135 @@ anv_GetPhysicalDeviceSurfaceSupportWSI( } VkResult -anv_GetSurfaceInfoWSI( +anv_GetSurfacePropertiesKHR( VkDevice _device, - const VkSurfaceDescriptionWSI* pSurfaceDescription, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) + const VkSurfaceDescriptionKHR* pSurfaceDescription, + VkSurfacePropertiesKHR* pSurfaceProperties) { ANV_FROM_HANDLE(anv_device, device, _device); assert(pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); - VkSurfaceDescriptionWindowWSI *window = - (VkSurfaceDescriptionWindowWSI *)pSurfaceDescription; + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); + VkSurfaceDescriptionWindowKHR *window = + (VkSurfaceDescriptionWindowKHR *)pSurfaceDescription; struct anv_wsi_implementation *impl = device->instance->wsi_impl[window->platform]; assert(impl); - return impl->get_surface_info(impl, device, window, infoType, - pDataSize, pData); + return impl->get_surface_properties(impl, device, window, + pSurfaceProperties); } VkResult -anv_CreateSwapChainWSI( +anv_GetSurfaceFormatsKHR( VkDevice _device, - const VkSwapChainCreateInfoWSI* pCreateInfo, - VkSwapChainWSI* pSwapChain) + const VkSurfaceDescriptionKHR* pSurfaceDescription, + uint32_t* pCount, + VkSurfaceFormatKHR* pSurfaceFormats) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_swap_chain *swap_chain; + + assert(pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); + VkSurfaceDescriptionWindowKHR *window = + (VkSurfaceDescriptionWindowKHR *)pSurfaceDescription; + + struct anv_wsi_implementation *impl = + device->instance->wsi_impl[window->platform]; + + assert(impl); + + return impl->get_surface_formats(impl, device, window, + pCount, pSurfaceFormats); +} + +VkResult +anv_CreateSwapchainKHR( + VkDevice _device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + VkSwapchainKHR* pSwapchain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_swapchain *swapchain; VkResult result; assert(pCreateInfo->pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); - VkSurfaceDescriptionWindowWSI *window = - (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); + VkSurfaceDescriptionWindowKHR *window = + (VkSurfaceDescriptionWindowKHR *)pCreateInfo->pSurfaceDescription; struct anv_wsi_implementation *impl = device->instance->wsi_impl[window->platform]; assert(impl); - result = impl->create_swap_chain(impl, device, pCreateInfo, &swap_chain); + result = impl->create_swapchain(impl, device, pCreateInfo, &swapchain); if (result == VK_SUCCESS) - *pSwapChain = anv_swap_chain_to_handle(swap_chain); + *pSwapchain = anv_swapchain_to_handle(swapchain); return result; } VkResult -anv_DestroySwapChainWSI( +anv_DestroySwapchainKHR( VkDevice device, - VkSwapChainWSI swapChain) + VkSwapchainKHR swapChain) { - ANV_FROM_HANDLE(anv_swap_chain, swap_chain, swapChain); + ANV_FROM_HANDLE(anv_swapchain, swapchain, swapChain); - assert(swap_chain->device == anv_device_from_handle(device)); + assert(swapchain->device == anv_device_from_handle(device)); - return swap_chain->destroy(swap_chain); + return swapchain->destroy(swapchain); } VkResult -anv_GetSwapChainInfoWSI( +anv_GetSwapchainImagesKHR( VkDevice device, - VkSwapChainWSI swapChain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, - void* pData) + VkSwapchainKHR _swapchain, + uint32_t* pCount, + VkImage* pSwapchainImages) { - ANV_FROM_HANDLE(anv_swap_chain, swap_chain, swapChain); + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - assert(swap_chain->device == anv_device_from_handle(device)); + assert(swapchain->device == anv_device_from_handle(device)); - return swap_chain->get_swap_chain_info(swap_chain, infoType, - pDataSize, pData); + return swapchain->get_images(swapchain, pCount, pSwapchainImages); } VkResult -anv_AcquireNextImageWSI( +anv_AcquireNextImageKHR( VkDevice device, - VkSwapChainWSI swapChain, + VkSwapchainKHR _swapchain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex) { - ANV_FROM_HANDLE(anv_swap_chain, swap_chain, swapChain); + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - assert(swap_chain->device == anv_device_from_handle(device)); + assert(swapchain->device == anv_device_from_handle(device)); - return swap_chain->acquire_next_image(swap_chain, - timeout, semaphore, pImageIndex); + return swapchain->acquire_next_image(swapchain, + timeout, semaphore, pImageIndex); } VkResult -anv_QueuePresentWSI( +anv_QueuePresentKHR( VkQueue _queue, - VkPresentInfoWSI* pPresentInfo) + VkPresentInfoKHR* pPresentInfo) { ANV_FROM_HANDLE(anv_queue, queue, _queue); VkResult result; - for (uint32_t i = 0; i < pPresentInfo->swapChainCount; i++) { - ANV_FROM_HANDLE(anv_swap_chain, swap_chain, pPresentInfo->swapChains[i]); + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->swapchains[i]); - assert(swap_chain->device == queue->device); + assert(swapchain->device == queue->device); - result = swap_chain->queue_present(swap_chain, queue, - pPresentInfo->imageIndices[i]); + result = swapchain->queue_present(swapchain, queue, + pPresentInfo->imageIndices[i]); /* TODO: What if one of them returns OUT_OF_DATE? */ if (result != VK_SUCCESS) return result; diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h index cbff854c2fc..280049b0e86 100644 --- a/src/vulkan/anv_wsi.h +++ b/src/vulkan/anv_wsi.h @@ -25,37 +25,45 @@ #include "anv_private.h" -struct anv_swap_chain { +struct anv_swapchain { struct anv_device * device; - VkResult (*destroy)(struct anv_swap_chain *swap_chain); - VkResult (*get_swap_chain_info)(struct anv_swap_chain *swap_chain, - VkSwapChainInfoTypeWSI infoType, - size_t *pDataSize, void *pData); - VkResult (*acquire_next_image)(struct anv_swap_chain *swap_chain, + VkResult (*destroy)(struct anv_swapchain *swapchain); + VkResult (*get_images)(struct anv_swapchain *swapchain, + uint32_t *pCount, VkImage *pSwapchainImages); + VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, uint64_t timeout, VkSemaphore semaphore, uint32_t *image_index); - VkResult (*queue_present)(struct anv_swap_chain *swap_chain, + VkResult (*queue_present)(struct anv_swapchain *swap_chain, struct anv_queue *queue, uint32_t image_index); }; -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) struct anv_wsi_implementation { VkResult (*get_window_supported)(struct anv_wsi_implementation *impl, struct anv_physical_device *physical_device, - const VkSurfaceDescriptionWindowWSI *window, + const VkSurfaceDescriptionWindowKHR *window, VkBool32 *pSupported); - VkResult (*get_surface_info)(struct anv_wsi_implementation *impl, + VkResult (*get_surface_properties)(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *window, + VkSurfacePropertiesKHR *properties); + VkResult (*get_surface_formats)(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *window, + uint32_t *pCount, + VkSurfaceFormatKHR *pSurfaceFormats); + VkResult (*get_surface_present_modes)(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *window, + uint32_t *pCount, + VkPresentModeKHR *pPresentModes); + VkResult (*create_swapchain)(struct anv_wsi_implementation *impl, struct anv_device *device, - VkSurfaceDescriptionWindowWSI *window, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, void* pData); - VkResult (*create_swap_chain)(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_swap_chain **swap_chain); + const VkSwapchainCreateInfoKHR *pCreateInfo, + struct anv_swapchain **swapchain); }; VkResult anv_x11_init_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index ba3ce8a2c65..f87f3ffef19 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -298,7 +298,7 @@ wsi_wl_get_display(struct wsi_wayland *wsi, struct wl_display *wl_display) static VkResult wsi_wl_get_window_supported(struct anv_wsi_implementation *impl, struct anv_physical_device *physical_device, - const VkSurfaceDescriptionWindowWSI *window, + const VkSurfaceDescriptionWindowKHR *window, VkBool32 *pSupported) { struct wsi_wayland *wsi = (struct wsi_wayland *)impl; @@ -308,84 +308,82 @@ wsi_wl_get_window_supported(struct anv_wsi_implementation *impl, return VK_SUCCESS; } -static const VkSurfacePresentModePropertiesWSI present_modes[] = { - { VK_PRESENT_MODE_MAILBOX_WSI }, - { VK_PRESENT_MODE_FIFO_WSI }, +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, + VK_PRESENT_MODE_FIFO_KHR, }; static VkResult -wsi_wl_get_surface_info(struct anv_wsi_implementation *impl, - struct anv_device *device, - VkSurfaceDescriptionWindowWSI *window, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, void* pData) +wsi_wl_get_surface_properties(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *window, + VkSurfacePropertiesKHR *props) { - struct wsi_wayland *wsi = (struct wsi_wayland *)impl; - - assert(pDataSize != NULL); + props->minImageCount = MIN_NUM_IMAGES; + props->maxImageCount = 4; + props->currentExtent = (VkExtent2D) { -1, -1 }; + props->minImageExtent = (VkExtent2D) { 1, 1 }; + props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; + props->maxImageArraySize = 1; + props->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - switch (infoType) { - default: - unreachable("bad VkSurfaceInfoTypeWSI"); - case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { - VkSurfacePropertiesWSI *props = pData; - - if (pData == NULL) { - *pDataSize = sizeof(*props); - return VK_SUCCESS; - } + return VK_SUCCESS; +} - assert(*pDataSize >= sizeof(*props)); +static VkResult +wsi_wl_get_surface_formats(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *window, + uint32_t *pCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + struct wsi_wl_display *display = + wsi_wl_get_display(wsi, window->pPlatformHandle); - props->minImageCount = MIN_NUM_IMAGES; - props->maxImageCount = 4; - props->currentExtent = (VkExtent2D) { -1, -1 }; - props->minImageExtent = (VkExtent2D) { 1, 1 }; - props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_WSI; - props->currentTransform = VK_SURFACE_TRANSFORM_NONE_WSI; - props->maxImageArraySize = 1; - props->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + uint32_t count = anv_vector_length(&display->formats); + if (pSurfaceFormats == NULL) { + *pCount = count; return VK_SUCCESS; } - case VK_SURFACE_INFO_TYPE_FORMATS_WSI: { - VkSurfaceFormatPropertiesWSI *formats = pData; - - struct wsi_wl_display *display = - wsi_wl_get_display(wsi, window->pPlatformHandle); - - uint32_t size = anv_vector_length(&display->formats) * sizeof(*formats); - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } + assert(*pCount >= count); + *pCount = count; - assert(*pDataSize >= size); - *pDataSize = size; + VkFormat *f; + anv_vector_foreach(f, &display->formats) { + *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { + .format = *f, + /* TODO: We should get this from the compositor somehow */ + .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + }; + } - VkFormat *f; - anv_vector_foreach(f, &display->formats) - (formats++)->format = *f; + return VK_SUCCESS; +} +static VkResult +wsi_wl_get_surface_present_modes(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *window, + uint32_t *pCount, + VkPresentModeKHR *pPresentModes) +{ + if (pPresentModes == NULL) { + *pCount = ARRAY_SIZE(present_modes); return VK_SUCCESS; } - case VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI: - if (pData == NULL) { - *pDataSize = sizeof(present_modes); - return VK_SUCCESS; - } + assert(*pCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pCount); + *pCount = ARRAY_SIZE(present_modes); - assert(*pDataSize >= sizeof(present_modes)); - memcpy(pData, present_modes, *pDataSize); - - return VK_SUCCESS; - } + return VK_SUCCESS; } struct wsi_wl_image { @@ -395,8 +393,8 @@ struct wsi_wl_image { bool busy; }; -struct wsi_wl_swap_chain { - struct anv_swap_chain base; +struct wsi_wl_swapchain { + struct anv_swapchain base; struct wsi_wl_display * display; struct wl_event_queue * queue; @@ -406,7 +404,7 @@ struct wsi_wl_swap_chain { VkFormat vk_format; uint32_t drm_format; - VkPresentModeWSI present_mode; + VkPresentModeKHR present_mode; bool fifo_ready; uint32_t image_count; @@ -414,44 +412,32 @@ struct wsi_wl_swap_chain { }; static VkResult -wsi_wl_get_swap_chain_info(struct anv_swap_chain *anv_chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, void* pData) +wsi_wl_get_images(struct anv_swapchain *anv_chain, + uint32_t *pCount, VkImage *pSwapchainImages) { - struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; - size_t size; - - switch (infoType) { - default: - unreachable("bad VkSwapChainInfoTypeWSI"); - case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { - VkSwapChainImagePropertiesWSI *images = pData; + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - size = chain->image_count * sizeof(*images); - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } - assert(size <= *pDataSize); - for (uint32_t i = 0; i < chain->image_count; i++) - images[i].image = anv_image_to_handle(chain->images[i].image); + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); - *pDataSize = size; + *pCount = chain->image_count; - return VK_SUCCESS; - } - } + return VK_SUCCESS; } static VkResult -wsi_wl_acquire_next_image(struct anv_swap_chain *anv_chain, +wsi_wl_acquire_next_image(struct anv_swapchain *anv_chain, uint64_t timeout, VkSemaphore semaphore, uint32_t *image_index) { - struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; int ret = wl_display_dispatch_queue_pending(chain->display->display, chain->queue); @@ -460,7 +446,7 @@ wsi_wl_acquire_next_image(struct anv_swap_chain *anv_chain, * kicked by the server so this seems more-or-less correct. */ if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); while (1) { for (uint32_t i = 0; i < chain->image_count; i++) { @@ -477,14 +463,14 @@ wsi_wl_acquire_next_image(struct anv_swap_chain *anv_chain, int ret = wl_display_dispatch_queue(chain->display->display, chain->queue); if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); } } static void frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) { - struct wsi_wl_swap_chain *chain = data; + struct wsi_wl_swapchain *chain = data; chain->fifo_ready = true; @@ -496,18 +482,18 @@ static const struct wl_callback_listener frame_listener = { }; static VkResult -wsi_wl_queue_present(struct anv_swap_chain *anv_chain, +wsi_wl_queue_present(struct anv_swapchain *anv_chain, struct anv_queue *queue, uint32_t image_index) { - struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - if (chain->present_mode == VK_PRESENT_MODE_FIFO_WSI) { + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { while (!chain->fifo_ready) { int ret = wl_display_dispatch_queue(chain->display->display, chain->queue); if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); } } @@ -515,7 +501,7 @@ wsi_wl_queue_present(struct anv_swap_chain *anv_chain, wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); - if (chain->present_mode == VK_PRESENT_MODE_FIFO_WSI) { + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { struct wl_callback *frame = wl_surface_frame(chain->surface); wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); wl_callback_add_listener(frame, &frame_listener, chain); @@ -529,7 +515,7 @@ wsi_wl_queue_present(struct anv_swap_chain *anv_chain, } static void -wsi_wl_image_finish(struct wsi_wl_swap_chain *chain, struct wsi_wl_image *image) +wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) { VkDevice vk_device = anv_device_to_handle(chain->base.device); anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory)); @@ -551,7 +537,7 @@ static const struct wl_buffer_listener buffer_listener = { }; static VkResult -wsi_wl_image_init(struct wsi_wl_swap_chain *chain, struct wsi_wl_image *image) +wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) { VkDevice vk_device = anv_device_to_handle(chain->base.device); VkResult result; @@ -651,9 +637,9 @@ fail_image: } static VkResult -wsi_wl_destroy_swap_chain(struct anv_swap_chain *anv_chain) +wsi_wl_destroy_swapchain(struct anv_swapchain *anv_chain) { - struct wsi_wl_swap_chain *chain = (struct wsi_wl_swap_chain *)anv_chain; + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; for (uint32_t i = 0; i < chain->image_count; i++) { if (chain->images[i].buffer) @@ -666,22 +652,22 @@ wsi_wl_destroy_swap_chain(struct anv_swap_chain *anv_chain) } static VkResult -wsi_wl_create_swap_chain(struct anv_wsi_implementation *impl, +wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, struct anv_device *device, - const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_swap_chain **swap_chain_out) + const VkSwapchainCreateInfoKHR *pCreateInfo, + struct anv_swapchain **swapchain_out) { struct wsi_wayland *wsi = (struct wsi_wayland *)impl; - struct wsi_wl_swap_chain *chain; + struct wsi_wl_swapchain *chain; VkResult result; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); assert(pCreateInfo->pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); - VkSurfaceDescriptionWindowWSI *vk_window = - (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; - assert(vk_window->platform == VK_PLATFORM_WAYLAND_WSI); + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); + VkSurfaceDescriptionWindowKHR *vk_window = + (VkSurfaceDescriptionWindowKHR *)pCreateInfo->pSurfaceDescription; + assert(vk_window->platform == VK_PLATFORM_WAYLAND_KHR); int num_images = pCreateInfo->minImageCount; @@ -693,7 +679,7 @@ wsi_wl_create_swap_chain(struct anv_wsi_implementation *impl, * 3) One to be currently held by the Wayland compositor * 4) One to render to */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_WSI) + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) num_images = MAX2(num_images, 4); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); @@ -703,8 +689,8 @@ wsi_wl_create_swap_chain(struct anv_wsi_implementation *impl, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); chain->base.device = device; - chain->base.destroy = wsi_wl_destroy_swap_chain; - chain->base.get_swap_chain_info = wsi_wl_get_swap_chain_info; + chain->base.destroy = wsi_wl_destroy_swapchain; + chain->base.get_images = wsi_wl_get_images; chain->base.acquire_next_image = wsi_wl_acquire_next_image; chain->base.queue_present = wsi_wl_queue_present; @@ -740,12 +726,12 @@ wsi_wl_create_swap_chain(struct anv_wsi_implementation *impl, chain->images[i].busy = false; } - *swap_chain_out = &chain->base; + *swapchain_out = &chain->base; return VK_SUCCESS; fail: - wsi_wl_destroy_swap_chain(&chain->base); + wsi_wl_destroy_swapchain(&chain->base); return result; } @@ -762,8 +748,10 @@ anv_wl_init_wsi(struct anv_instance *instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); wsi->base.get_window_supported = wsi_wl_get_window_supported; - wsi->base.get_surface_info = wsi_wl_get_surface_info; - wsi->base.create_swap_chain = wsi_wl_create_swap_chain; + wsi->base.get_surface_properties = wsi_wl_get_surface_properties; + wsi->base.get_surface_formats = wsi_wl_get_surface_formats; + wsi->base.get_surface_present_modes = wsi_wl_get_surface_present_modes; + wsi->base.create_swapchain = wsi_wl_create_swapchain; wsi->instance = instance; @@ -786,7 +774,7 @@ anv_wl_init_wsi(struct anv_instance *instance) goto fail_mutex; } - instance->wsi_impl[VK_PLATFORM_WAYLAND_WSI] = &wsi->base; + instance->wsi_impl[VK_PLATFORM_WAYLAND_KHR] = &wsi->base; return VK_SUCCESS; @@ -803,7 +791,7 @@ void anv_wl_finish_wsi(struct anv_instance *instance) { struct wsi_wayland *wsi = - (struct wsi_wayland *)instance->wsi_impl[VK_PLATFORM_WAYLAND_WSI]; + (struct wsi_wayland *)instance->wsi_impl[VK_PLATFORM_WAYLAND_KHR]; _mesa_hash_table_destroy(wsi->displays, NULL); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index d226caf0eea..54e40918489 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -27,20 +27,20 @@ #include "anv_wsi.h" -static const VkFormat formats[] = { - VK_FORMAT_B5G6R5_UNORM, - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_B8G8R8A8_SRGB, +static const VkSurfaceFormatKHR formats[] = { + { .format = VK_FORMAT_B5G6R5_UNORM, }, + { .format = VK_FORMAT_B8G8R8A8_UNORM, }, + { .format = VK_FORMAT_B8G8R8A8_SRGB, }, }; -static const VkSurfacePresentModePropertiesWSI present_modes[] = { - { VK_PRESENT_MODE_MAILBOX_WSI }, +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, }; static VkResult x11_get_window_supported(struct anv_wsi_implementation *impl, struct anv_physical_device *physical_device, - const VkSurfaceDescriptionWindowWSI *window, + const VkSurfaceDescriptionWindowKHR *window, VkBool32 *pSupported) { *pSupported = true; @@ -48,80 +48,76 @@ x11_get_window_supported(struct anv_wsi_implementation *impl, } static VkResult -x11_get_surface_info(struct anv_wsi_implementation *impl, - struct anv_device *device, - VkSurfaceDescriptionWindowWSI *vk_window, - VkSurfaceInfoTypeWSI infoType, - size_t* pDataSize, void* pData) +x11_get_surface_properties(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *vk_window, + VkSurfacePropertiesKHR *props) { - assert(pDataSize != NULL); - - switch (infoType) { - default: - unreachable("bad VkSurfaceInfoTypeWSI"); - case VK_SURFACE_INFO_TYPE_PROPERTIES_WSI: { - VkSurfacePropertiesWSI *props = pData; - - if (pData == NULL) { - *pDataSize = sizeof(*props); - return VK_SUCCESS; - } - - assert(*pDataSize >= sizeof(*props)); - - VkPlatformHandleXcbWSI *vk_xcb_handle = vk_window->pPlatformHandle; - xcb_connection_t *conn = vk_xcb_handle->connection; - xcb_window_t win = (xcb_window_t)(uintptr_t)vk_window->pPlatformWindow; - - xcb_get_geometry_cookie_t cookie = xcb_get_geometry(conn, win); - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, - &err); - if (!geom) { - /* FINISHME: Choose a more accurate error. */ - free(err); - return VK_ERROR_OUT_OF_DATE_WSI; - } + VkPlatformHandleXcbKHR *vk_xcb_handle = vk_window->pPlatformHandle; + xcb_connection_t *conn = vk_xcb_handle->connection; + xcb_window_t win = (xcb_window_t)(uintptr_t)vk_window->pPlatformWindow; + + xcb_get_geometry_cookie_t cookie = xcb_get_geometry(conn, win); + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, + &err); + if (!geom) { + free(err); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } - VkExtent2D extent = { geom->width, geom->height }; - free(geom); + VkExtent2D extent = { geom->width, geom->height }; + free(geom); + + props->minImageCount = 2; + props->maxImageCount = 4; + props->currentExtent = extent; + props->minImageExtent = extent; + props->maxImageExtent = extent; + props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; + props->maxImageArraySize = 1; + props->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - props->minImageCount = 2; - props->maxImageCount = 4; - props->currentExtent = extent; - props->minImageExtent = extent; - props->maxImageExtent = extent; - props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_WSI; - props->currentTransform = VK_SURFACE_TRANSFORM_NONE_WSI; - props->maxImageArraySize = 1; - props->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + return VK_SUCCESS; +} +static VkResult +x11_get_surface_formats(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *vk_window, + uint32_t *pCount, VkSurfaceFormatKHR *pSurfaceFormats) +{ + if (pSurfaceFormats == NULL) { + *pCount = ARRAY_SIZE(formats); return VK_SUCCESS; } - case VK_SURFACE_INFO_TYPE_FORMATS_WSI: - if (pData == NULL) { - *pDataSize = sizeof(formats); - return VK_SUCCESS; - } + assert(*pCount >= ARRAY_SIZE(formats)); + typed_memcpy(pSurfaceFormats, formats, *pCount); + *pCount = ARRAY_SIZE(formats); - assert(*pDataSize >= sizeof(formats)); - memcpy(pData, formats, *pDataSize); + return VK_SUCCESS; +} +static VkResult +x11_get_surface_present_modes(struct anv_wsi_implementation *impl, + struct anv_device *device, + const VkSurfaceDescriptionWindowKHR *vk_window, + uint32_t *pCount, VkPresentModeKHR *pPresentModes) +{ + if (pPresentModes == NULL) { + *pCount = ARRAY_SIZE(present_modes); return VK_SUCCESS; + } - case VK_SURFACE_INFO_TYPE_PRESENT_MODES_WSI: - if (pData == NULL) { - *pDataSize = sizeof(present_modes); - return VK_SUCCESS; - } + assert(*pCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pCount); + *pCount = ARRAY_SIZE(present_modes); - assert(*pDataSize >= sizeof(present_modes)); - memcpy(pData, present_modes, *pDataSize); - return VK_SUCCESS; - } + return VK_SUCCESS; } struct x11_image { @@ -132,8 +128,8 @@ struct x11_image { bool busy; }; -struct x11_swap_chain { - struct anv_swap_chain base; +struct x11_swapchain { + struct anv_swapchain base; xcb_connection_t * conn; xcb_window_t window; @@ -145,43 +141,32 @@ struct x11_swap_chain { }; static VkResult -x11_get_swap_chain_info(struct anv_swap_chain *anv_chain, - VkSwapChainInfoTypeWSI infoType, - size_t* pDataSize, void* pData) +x11_get_images(struct anv_swapchain *anv_chain, + uint32_t* pCount, VkImage *pSwapchainImages) { - struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; - size_t size; + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - switch (infoType) { - default: - unreachable("bad VkSwapChainInfoType"); - case VK_SWAP_CHAIN_INFO_TYPE_IMAGES_WSI: { - VkSwapChainImagePropertiesWSI *images = pData; - - size = chain->image_count * sizeof(*images); + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); - assert(size <= *pDataSize); - for (uint32_t i = 0; i < chain->image_count; i++) - images[i].image = anv_image_to_handle(chain->images[i].image); + *pCount = chain->image_count; - *pDataSize = size; - return VK_SUCCESS; - } - } + return VK_SUCCESS; } static VkResult -x11_acquire_next_image(struct anv_swap_chain *anv_chain, +x11_acquire_next_image(struct anv_swapchain *anv_chain, uint64_t timeout, VkSemaphore semaphore, uint32_t *image_index) { - struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; struct x11_image *image = &chain->images[chain->next_image]; if (image->busy) { @@ -189,15 +174,14 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); if (!geom) { - /* Probably the best thing to do if our drawable goes away */ free(err); - return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); } if (geom->width != chain->extent.width || geom->height != chain->extent.height) { free(geom); - return vk_error(VK_ERROR_OUT_OF_DATE_WSI); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); } free(geom); @@ -210,11 +194,11 @@ x11_acquire_next_image(struct anv_swap_chain *anv_chain, } static VkResult -x11_queue_present(struct anv_swap_chain *anv_chain, +x11_queue_present(struct anv_swapchain *anv_chain, struct anv_queue *queue, uint32_t image_index) { - struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; struct x11_image *image = &chain->images[image_index]; assert(image_index < chain->image_count); @@ -240,9 +224,9 @@ x11_queue_present(struct anv_swap_chain *anv_chain, } static VkResult -x11_destroy_swap_chain(struct anv_swap_chain *anv_chain) +x11_destroy_swapchain(struct anv_swapchain *anv_chain) { - struct x11_swap_chain *chain = (struct x11_swap_chain *)anv_chain; + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; xcb_void_cookie_t cookie; for (uint32_t i = 0; i < chain->image_count; i++) { @@ -263,24 +247,24 @@ x11_destroy_swap_chain(struct anv_swap_chain *anv_chain) } static VkResult -x11_create_swap_chain(struct anv_wsi_implementation *impl, +x11_create_swapchain(struct anv_wsi_implementation *impl, struct anv_device *device, - const VkSwapChainCreateInfoWSI *pCreateInfo, - struct anv_swap_chain **swap_chain_out) + const VkSwapchainCreateInfoKHR *pCreateInfo, + struct anv_swapchain **swapchain_out) { - struct x11_swap_chain *chain; + struct x11_swapchain *chain; xcb_void_cookie_t cookie; VkResult result; assert(pCreateInfo->pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_WSI); - VkSurfaceDescriptionWindowWSI *vk_window = - (VkSurfaceDescriptionWindowWSI *)pCreateInfo->pSurfaceDescription; - assert(vk_window->platform == VK_PLATFORM_XCB_WSI); + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); + VkSurfaceDescriptionWindowKHR *vk_window = + (VkSurfaceDescriptionWindowKHR *)pCreateInfo->pSurfaceDescription; + assert(vk_window->platform == VK_PLATFORM_XCB_KHR); int num_images = pCreateInfo->minImageCount; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = anv_device_alloc(device, size, 8, @@ -289,12 +273,12 @@ x11_create_swap_chain(struct anv_wsi_implementation *impl, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); chain->base.device = device; - chain->base.destroy = x11_destroy_swap_chain; - chain->base.get_swap_chain_info = x11_get_swap_chain_info; + chain->base.destroy = x11_destroy_swapchain; + chain->base.get_images = x11_get_images; chain->base.acquire_next_image = x11_acquire_next_image; chain->base.queue_present = x11_queue_present; - VkPlatformHandleXcbWSI *vk_xcb_handle = vk_window->pPlatformHandle; + VkPlatformHandleXcbKHR *vk_xcb_handle = vk_window->pPlatformHandle; chain->conn = (xcb_connection_t *) vk_xcb_handle->connection; chain->window = (xcb_window_t) (uintptr_t)vk_window->pPlatformWindow; @@ -405,7 +389,7 @@ x11_create_swap_chain(struct anv_wsi_implementation *impl, (uint32_t []) { 0 }); xcb_discard_reply(chain->conn, cookie.sequence); - *swap_chain_out = &chain->base; + *swapchain_out = &chain->base; return VK_SUCCESS; @@ -424,10 +408,12 @@ anv_x11_init_wsi(struct anv_instance *instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); impl->get_window_supported = x11_get_window_supported; - impl->get_surface_info = x11_get_surface_info; - impl->create_swap_chain = x11_create_swap_chain; + impl->get_surface_properties = x11_get_surface_properties; + impl->get_surface_formats = x11_get_surface_formats; + impl->get_surface_present_modes = x11_get_surface_present_modes; + impl->create_swapchain = x11_create_swapchain; - instance->wsi_impl[VK_PLATFORM_XCB_WSI] = impl; + instance->wsi_impl[VK_PLATFORM_XCB_KHR] = impl; return VK_SUCCESS; } @@ -435,5 +421,5 @@ anv_x11_init_wsi(struct anv_instance *instance) void anv_x11_finish_wsi(struct anv_instance *instance) { - anv_instance_free(instance, instance->wsi_impl[VK_PLATFORM_XCB_WSI]); + anv_instance_free(instance, instance->wsi_impl[VK_PLATFORM_XCB_KHR]); } -- cgit v1.2.3 From 48a87f4ba062e572a66f546997366d8896a4ecd1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Oct 2015 12:15:58 -0700 Subject: anv/queue: Get rid of the serial This was a remnant of the object tagging implementation we had at one point. We haven't used it for a long time so there's no good reason to keep it around. --- src/vulkan/anv_device.c | 13 ------------- src/vulkan/anv_private.h | 14 -------------- 2 files changed, 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 295aea9e434..c56704ea9f1 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -528,25 +528,12 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue) queue->device = device; queue->pool = &device->surface_state_pool; - queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); - if (queue->completed_serial.map == NULL) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - - *(uint32_t *)queue->completed_serial.map = 0; - queue->next_serial = 1; - return VK_SUCCESS; } static void anv_queue_finish(struct anv_queue *queue) { -#ifdef HAVE_VALGRIND - /* This gets torn down with the device so we only need to do this if - * valgrind is present. - */ - anv_state_pool_free(queue->pool, queue->completed_serial); -#endif } static void diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index dc5a2350e86..6bc781fa072 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -458,20 +458,6 @@ struct anv_queue { struct anv_device * device; struct anv_state_pool * pool; - - /** - * Serial number of the most recently completed batch executed on the - * engine. - */ - struct anv_state completed_serial; - - /** - * The next batch submitted to the engine will be assigned this serial - * number. - */ - uint32_t next_serial; - - uint32_t last_collected_serial; }; struct anv_device { -- cgit v1.2.3 From 0689a0f0f36dc69fec91a0248f1f219b2e600c98 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 10 Oct 2015 09:31:03 -0700 Subject: anv/device: Return VK_SUCCESS after setting pCount in QueueFamilyProperties --- src/vulkan/anv_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c56704ea9f1..89cd184a98e 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -464,6 +464,7 @@ VkResult anv_GetPhysicalDeviceQueueFamilyProperties( { if (pQueueFamilyProperties == NULL) { *pCount = 1; + return VK_SUCCESS; } assert(*pCount >= 1); -- cgit v1.2.3 From e21ecb841c7a2c5653ec86922b4e08006645ae8d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 12 Oct 2015 18:25:19 -0700 Subject: anv: Declare/validate the correct API version --- src/vulkan/anv_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 89cd184a98e..9625cc61252 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -145,6 +145,9 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + if (pCreateInfo->pAppInfo->apiVersion != VK_MAKE_VERSION(0, 170, 2)) + return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); + for (uint32_t i = 0; i < pCreateInfo->extensionCount; i++) { bool found = false; for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { @@ -441,7 +444,7 @@ VkResult anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(0, 138, 1), + .apiVersion = VK_MAKE_VERSION(0, 170, 2), .driverVersion = 1, .vendorId = 0x8086, .deviceId = pdevice->chipset_id, -- cgit v1.2.3 From c31f926726c64239b1fce3c685f37954e0d8984b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Oct 2015 11:45:58 -0700 Subject: anv/wsi: Add the GetSurfacePresentModesKHR stub Support has existed in the X11 and Wayland backends for a while but, somehow, the entrypoint got missed in the API shuffle. --- src/vulkan/anv_wsi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index 241481b9895..f5c2d3716a5 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -125,6 +125,30 @@ anv_GetSurfaceFormatsKHR( pCount, pSurfaceFormats); } +VkResult +anv_GetSurfacePresentModesKHR( + VkDevice _device, + const VkSurfaceDescriptionKHR* pSurfaceDescription, + uint32_t* pCount, + VkPresentModeKHR* pPresentModes) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(pSurfaceDescription->sType == + VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); + VkSurfaceDescriptionWindowKHR *window = + (VkSurfaceDescriptionWindowKHR *)pSurfaceDescription; + + struct anv_wsi_implementation *impl = + device->instance->wsi_impl[window->platform]; + + assert(impl); + + return impl->get_surface_present_modes(impl, device, window, + pCount, pPresentModes); +} + + VkResult anv_CreateSwapchainKHR( VkDevice _device, -- cgit v1.2.3 From fd2ec1c8ada5df8899c59847c6711303119ae56c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Oct 2015 15:00:35 -0700 Subject: anv/x11: Do something sensible if get_geometry fails in GetSurfaceProperties --- src/vulkan/anv_wsi_x11.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 54e40918489..8834230f9cd 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -61,19 +61,25 @@ x11_get_surface_properties(struct anv_wsi_implementation *impl, xcb_generic_error_t *err; xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, &err); - if (!geom) { + if (geom) { free(err); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + VkExtent2D extent = { geom->width, geom->height }; + props->currentExtent = extent; + props->minImageExtent = extent; + props->maxImageExtent = extent; + } else { + /* This can happen if the client didn't wait for the configure event + * to come back from the compositor. In that case, we don't know the + * size of the window so we just return valid "I don't know" stuff. + */ + free(geom); + props->currentExtent = (VkExtent2D) { -1, -1 }; + props->minImageExtent = (VkExtent2D) { 1, 1 }; + props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; } - VkExtent2D extent = { geom->width, geom->height }; - free(geom); - props->minImageCount = 2; props->maxImageCount = 4; - props->currentExtent = extent; - props->minImageExtent = extent; - props->maxImageExtent = extent; props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; props->maxImageArraySize = 1; -- cgit v1.2.3 From accbf178eb0741698d6837499f382d26a7fba5ae Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Oct 2015 15:19:58 -0700 Subject: i965/surface_formats: Pull the surface_format_info struct into a header --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_surface_formats.c | 19 ++---------- src/mesa/drivers/dri/i965/brw_surface_formats.h | 40 +++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_surface_formats.h (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 81ef6283fa1..b242ab55aae 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -53,6 +53,7 @@ i965_compiler_FILES = \ brw_shader.cpp \ brw_shader.h \ brw_surface_formats.c \ + brw_surface_formats.h \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 97fff60f3e5..0d49ab7b431 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -27,20 +27,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" - -struct surface_format_info { - bool exists; - int sampling; - int filtering; - int shadow_compare; - int chroma_key; - int render_target; - int alpha_blend; - int input_vb; - int streamed_output_vb; - int color_processing; - const char *name; -}; +#include "brw_surface_formats.h" /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. @@ -85,7 +72,7 @@ struct surface_format_info { * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. * - VOL4_Part1 section 3.9.11 Render Target Write. */ -const struct surface_format_info surface_formats[] = { +const struct brw_surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color */ SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT) SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT) @@ -616,7 +603,7 @@ brw_init_surface_formats(struct brw_context *brw) for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) { uint32_t texture, render; - const struct surface_format_info *rinfo, *tinfo; + const struct brw_surface_format_info *rinfo, *tinfo; bool is_integer = _mesa_is_format_integer_color(format); render = texture = brw_format_for_mesa_format(format); diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.h b/src/mesa/drivers/dri/i965/brw_surface_formats.h new file mode 100644 index 00000000000..5c7b60e680b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +struct brw_surface_format_info { + bool exists; + int sampling; + int filtering; + int shadow_compare; + int chroma_key; + int render_target; + int alpha_blend; + int input_vb; + int streamed_output_vb; + int color_processing; + const char *name; +}; + +extern const struct brw_surface_format_info surface_formats[]; -- cgit v1.2.3 From 28ed02588a6204fc3272de84dab009b5bb19e07d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Oct 2015 15:20:28 -0700 Subject: anv/formats: Use the surface_format_info struct from brw_surface_formats.h The surface_format_info struct changed in mesa but the copied-and-pasted version didn't get updated on the last mesa master merge. This both fixes the bug and should prevent this in the future. --- src/vulkan/anv_formats.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 2690ff24692..4efc537676c 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -22,6 +22,7 @@ */ #include "anv_private.h" +#include "brw_surface_formats.h" #define UNSUPPORTED 0xffff @@ -220,21 +221,6 @@ anv_format_for_vk_format(VkFormat format) // Format capabilities -struct surface_format_info { - bool exists; - int sampling; - int filtering; - int shadow_compare; - int chroma_key; - int render_target; - int alpha_blend; - int input_vb; - int streamed_output_vb; - int color_processing; -}; - -extern const struct surface_format_info surface_formats[]; - VkResult anv_validate_GetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat _format, @@ -250,7 +236,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d const struct anv_format *format, VkFormatProperties *out_properties) { - const struct surface_format_info *info; + const struct brw_surface_format_info *info; int gen; VkFormatFeatureFlags flags; -- cgit v1.2.3 From db5a5fcd1886529d194e5635160526e95ad8d045 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Oct 2015 15:50:02 -0700 Subject: anv/image: Add a basic implementation of GetImageSubresourceLayout --- src/vulkan/anv_image.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index e72c592905f..e3991e22234 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -379,13 +379,55 @@ anv_DestroyImage(VkDevice _device, VkImage _image) anv_device_free(device, anv_image_from_handle(_image)); } +static void +anv_surface_get_subresource_layout(struct anv_image *image, + struct anv_surface *surface, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + /* If we are on a non-zero mip level or array slice, we need to + * calculate a real offset. + */ + anv_assert(subresource->mipLevel == 0); + anv_assert(subresource->arrayLayer == 0); + + layout->offset = surface->offset; + layout->rowPitch = surface->stride; + layout->depthPitch = surface->qpitch; + + /* FINISHME: We really shouldn't be doing this calculation here */ + if (image->array_size > 1) + layout->size = surface->qpitch * image->array_size; + else + layout->size = surface->stride * image->extent.height; +} + VkResult anv_GetImageSubresourceLayout( VkDevice device, - VkImage image, + VkImage _image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout) { - stub_return(VK_UNSUPPORTED); + ANV_FROM_HANDLE(anv_image, image, _image); + + switch (pSubresource->aspect) { + case VK_IMAGE_ASPECT_COLOR: + anv_surface_get_subresource_layout(image, &image->color_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_DEPTH: + anv_surface_get_subresource_layout(image, &image->depth_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_STENCIL: + anv_surface_get_subresource_layout(image, &image->stencil_surface, + pSubresource, pLayout); + break; + default: + return vk_error(VK_UNSUPPORTED); + } + + return VK_SUCCESS; } VkResult -- cgit v1.2.3 From d2d8945eb8618a12ac5457bab54e76d317114775 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Oct 2015 20:00:05 -0700 Subject: nir/spirv: Fix a bug in indirect OpAccessChain handling --- src/glsl/nir/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d0fb6a255a5..6503b2590d7 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1381,10 +1381,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, deref_arr->base_offset = idx; } else { assert(idx_val->value_type == vtn_value_type_ssa); + assert(glsl_type_is_scalar(idx_val->ssa->type)); deref_arr->deref_array_type = nir_deref_array_type_indirect; deref_arr->base_offset = 0; - deref_arr->indirect = - nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); } tail->child = &deref_arr->deref; break; -- cgit v1.2.3 From b37c38c1cada402a258079c6b4e70d5783fb96c5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 15:18:49 -0700 Subject: anv: Completely rework descriptor set layouts This patch reworks a bunch of stuff in the way we do descriptor set layouts. Our previous approach had a couple of problems. First, it was based on a misunderstanding of arrays in descriptor sets. Second, it didn't properly handle descriptor sets where some bindings were missing stages. The new apporach should be correct and also makes some operations, particularly those on the hot-path, a bit easier. We use the descriptor set layout for four things: 1) To determine the map from bindings to the actual flattened descriptor set in vkUpdateDescriptorSets(). 2) To determine the descriptor <-> binding table entry mapping to use in anv_cmd_buffer_flush_descriptor_sets(). 3) To determine the mappings of dynamic indices. 4) To determine the (set, binding, array index) -> binding table entry mapping inside of shaders. The new approach is directly taylored towards these operations. --- src/vulkan/anv_cmd_buffer.c | 98 +++++++++----------- src/vulkan/anv_compiler.cpp | 19 ++-- src/vulkan/anv_device.c | 143 ++++++++--------------------- src/vulkan/anv_nir_apply_dynamic_offsets.c | 10 +- src/vulkan/anv_pipeline.c | 115 ++++++++++++++++------- src/vulkan/anv_private.h | 52 ++++++++--- 6 files changed, 222 insertions(+), 215 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 28d9dd9d694..718499d60c8 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -483,7 +483,7 @@ void anv_CmdBindDescriptorSets( cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; } - if (set_layout->num_dynamic_buffers > 0) { + if (set_layout->dynamic_offset_count > 0) { VkShaderStage s; for_each_bit(s, set_layout->shader_stages) { anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, @@ -492,13 +492,13 @@ void anv_CmdBindDescriptorSets( cmd_buffer->state.push_constants[s]->dynamic_offsets + layout->set[firstSet + i].dynamic_offset_start; - memcpy(offsets, pDynamicOffsets + dynamic_slot, - set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); + typed_memcpy(offsets, pDynamicOffsets + dynamic_slot, + set_layout->dynamic_offset_count); } cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - dynamic_slot += set_layout->num_dynamic_buffers; + dynamic_slot += set_layout->dynamic_offset_count; } } } @@ -594,41 +594,35 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (layout == NULL) return VK_SUCCESS; - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *surface_slots = - set_layout->stage[stage].surface_start; - - uint32_t start = bias + layout->set[set].stage[stage].surface_start; - - for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { - struct anv_descriptor *desc = - &d->set->descriptors[surface_slots[b].index]; - - const struct anv_state *surface_state; - struct anv_bo *bo; - uint32_t bo_offset; - - switch (desc->type) { - case ANV_DESCRIPTOR_TYPE_EMPTY: - case ANV_DESCRIPTOR_TYPE_SAMPLER: - continue; - case ANV_DESCRIPTOR_TYPE_BUFFER_VIEW: - surface_state = &desc->buffer_view->surface_state; - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - break; - case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: - surface_state = &desc->image_view->nonrt_surface_state; - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - break; - } - - bt_map[start + b] = surface_state->offset + state_offset; - add_surface_state_reloc(cmd_buffer, *surface_state, bo, bo_offset); + for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set].set; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + const struct anv_state *surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case ANV_DESCRIPTOR_TYPE_EMPTY: + case ANV_DESCRIPTOR_TYPE_SAMPLER: + continue; + case ANV_DESCRIPTOR_TYPE_BUFFER_VIEW: + surface_state = &desc->buffer_view->surface_state; + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: + surface_state = &desc->image_view->nonrt_surface_state; + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; } + + bt_map[bias + s] = surface_state->offset + state_offset; + add_surface_state_reloc(cmd_buffer, *surface_state, bo, bo_offset); } return VK_SUCCESS; @@ -656,26 +650,20 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - struct anv_descriptor_slot *sampler_slots = - set_layout->stage[stage].sampler_start; - - uint32_t start = layout->set[set].stage[stage].sampler_start; + for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set].set; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; - for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { - struct anv_descriptor *desc = - &d->set->descriptors[sampler_slots[b].index]; + if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER) + continue; - if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER) - continue; + struct anv_sampler *sampler = desc->sampler; - struct anv_sampler *sampler = desc->sampler; - - memcpy(state->map + (start + b) * 16, - sampler->state, sizeof(sampler->state)); - } + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); } return VK_SUCCESS; diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 759ec7ae4d9..bf931fe886f 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -92,13 +92,20 @@ set_binding_table_layout(struct brw_stage_prog_data *prog_data, k = bias; map = prog_data->map_entries; - for (uint32_t i = 0; i < layout->num_sets; i++) { - prog_data->bind_map[i].index = map; - for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) - *map++ = k++; + for (uint32_t set = 0; set < layout->num_sets; set++) { + prog_data->bind_map[set].index = map; + unsigned index_count = 0; + for (uint32_t b = 0; b < layout->set[set].layout->binding_count; b++) { + if (layout->set[set].layout->binding[b].stage[stage].surface_index < 0) + continue; + + unsigned array_size = layout->set[set].layout->binding[b].array_size; + for (uint32_t i = 0; i < array_size; i++) + *map++ = k++; + index_count += array_size; + } - prog_data->bind_map[i].index_count = - layout->set[i].layout->stage[stage].surface_count; + prog_data->bind_map[set].index_count = index_count; } return VK_SUCCESS; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9625cc61252..98c1b2334e6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1440,28 +1440,48 @@ VkResult anv_CreateDescriptorSetLayout( { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_descriptor_set_layout *set_layout; + uint32_t s; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + size_t size = sizeof(struct anv_descriptor_set_layout) + + pCreateInfo->count * sizeof(set_layout->binding[0]); + + set_layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + set_layout->binding_count = pCreateInfo->count; + set_layout->shader_stages = 0; + set_layout->size = 0; + + /* Initialize all binding_layout entries to -1 */ + memset(set_layout->binding, -1, + pCreateInfo->count * sizeof(set_layout->binding[0])); + uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t num_dynamic_buffers = 0; - uint32_t count = 0; - VkShaderStageFlags stages = 0; - uint32_t s; + uint32_t dynamic_offset_count = 0; + + for (uint32_t b = 0; b < pCreateInfo->count; b++) { + uint32_t array_size = MAX2(1, pCreateInfo->pBinding[b].arraySize); + set_layout->binding[b].array_size = array_size; + set_layout->size += array_size; - for (uint32_t i = 0; i < pCreateInfo->count; i++) { - switch (pCreateInfo->pBinding[i].descriptorType) { + switch (pCreateInfo->pBinding[b].descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - sampler_count[s] += pCreateInfo->pBinding[i].arraySize; + for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { + set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; + sampler_count[s] += array_size; + } break; default: break; } - switch (pCreateInfo->pBinding[i].descriptorType) { + switch (pCreateInfo->pBinding[b].descriptorType) { case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -1472,114 +1492,29 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - surface_count[s] += pCreateInfo->pBinding[i].arraySize; + for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { + set_layout->binding[b].stage[s].surface_index = surface_count[s]; + surface_count[s] += array_size; + } break; default: break; } - switch (pCreateInfo->pBinding[i].descriptorType) { + switch (pCreateInfo->pBinding[b].descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize; + set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; + dynamic_offset_count += array_size; break; default: break; } - stages |= pCreateInfo->pBinding[i].stageFlags; - count += pCreateInfo->pBinding[i].arraySize; + set_layout->shader_stages |= pCreateInfo->pBinding[b].stageFlags; } - uint32_t sampler_total = 0; - uint32_t surface_total = 0; - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - sampler_total += sampler_count[s]; - surface_total += surface_count[s]; - } - - size_t size = sizeof(*set_layout) + - (sampler_total + surface_total) * sizeof(set_layout->entries[0]); - set_layout = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!set_layout) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - set_layout->num_dynamic_buffers = num_dynamic_buffers; - set_layout->count = count; - set_layout->shader_stages = stages; - - struct anv_descriptor_slot *p = set_layout->entries; - struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM]; - struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM]; - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - set_layout->stage[s].surface_count = surface_count[s]; - set_layout->stage[s].surface_start = surface[s] = p; - p += surface_count[s]; - set_layout->stage[s].sampler_count = sampler_count[s]; - set_layout->stage[s].sampler_start = sampler[s] = p; - p += sampler_count[s]; - } - - uint32_t descriptor = 0; - int8_t dynamic_slot = 0; - bool is_dynamic; - for (uint32_t i = 0; i < pCreateInfo->count; i++) { - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { - sampler[s]->index = descriptor + j; - sampler[s]->dynamic_slot = -1; - sampler[s]++; - } - break; - default: - break; - } - - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - is_dynamic = true; - break; - default: - is_dynamic = false; - break; - } - - switch (pCreateInfo->pBinding[i].descriptorType) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) - for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { - surface[s]->index = descriptor + j; - if (is_dynamic) - surface[s]->dynamic_slot = dynamic_slot + j; - else - surface[s]->dynamic_slot = -1; - surface[s]++; - } - break; - default: - break; - } - - if (is_dynamic) - dynamic_slot += pCreateInfo->pBinding[i].arraySize; - - descriptor += pCreateInfo->pBinding[i].arraySize; - } + set_layout->dynamic_offset_count = dynamic_offset_count; *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); @@ -1627,7 +1562,7 @@ anv_descriptor_set_create(struct anv_device *device, struct anv_descriptor_set **out_set) { struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); + size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!set) diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index 1f6c64a9e02..9d29c3c23ea 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -39,7 +39,6 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) { struct apply_dynamic_offsets_state *state = void_state; struct anv_descriptor_set_layout *set_layout; - const struct anv_descriptor_slot *slot; nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) @@ -69,19 +68,18 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) } set_layout = state->layout->set[set].layout; - slot = &set_layout->stage[state->stage].surface_start[binding]; - if (slot->dynamic_slot < 0) + if (set_layout->binding[binding].dynamic_offset_index < 0) continue; - uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start + - slot->dynamic_slot; + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; state->builder.cursor = nir_before_instr(&intrin->instr); nir_intrinsic_instr *offset_load = nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); offset_load->num_components = 1; - offset_load->const_index[0] = state->indices_start + dynamic_index; + offset_load->const_index[0] = state->indices_start + index; offset_load->const_index[1] = 0; nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); nir_builder_instr_insert(&state->builder, &offset_load->instr); diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 75f640154cc..84e023c781a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -458,48 +458,101 @@ VkResult anv_CreatePipelineLayout( VkPipelineLayout* pPipelineLayout) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_layout *layout; + struct anv_pipeline_layout l, *layout; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - layout = anv_device_alloc(device, sizeof(*layout), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (layout == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + l.num_sets = pCreateInfo->descriptorSetCount; + + unsigned dynamic_offset_count = 0; + + memset(l.stage, 0, sizeof(l.stage)); + for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + l.set[set].layout = set_layout; + + l.set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index >= 0) + dynamic_offset_count += set_layout->binding[b].array_size; + } + + for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { + l.set[set].stage[s].surface_start = l.stage[s].surface_count; + l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; + + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + unsigned array_size = set_layout->binding[b].array_size; - layout->num_sets = pCreateInfo->descriptorSetCount; + if (set_layout->binding[b].stage[s].surface_index >= 0) { + l.stage[s].surface_count += array_size; - uint32_t surface_start[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t sampler_start[VK_SHADER_STAGE_NUM] = { 0, }; + if (set_layout->binding[b].dynamic_offset_index >= 0) + l.stage[s].has_dynamic_offsets = true; + } - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - layout->stage[s].has_dynamic_offsets = false; - layout->stage[s].surface_count = 0; - layout->stage[s].sampler_count = 0; + if (set_layout->binding[b].stage[s].sampler_index >= 0) + l.stage[s].sampler_count += array_size; + } + } } - uint32_t num_dynamic_offsets = 0; - for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, - pCreateInfo->pSetLayouts[i]); - - layout->set[i].layout = set_layout; - layout->set[i].dynamic_offset_start = num_dynamic_offsets; - num_dynamic_offsets += set_layout->num_dynamic_buffers; - for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { - if (set_layout->num_dynamic_buffers > 0) - layout->stage[s].has_dynamic_offsets = true; - - layout->set[i].stage[s].surface_start = surface_start[s]; - surface_start[s] += set_layout->stage[s].surface_count; - layout->set[i].stage[s].sampler_start = sampler_start[s]; - sampler_start[s] += set_layout->stage[s].sampler_count; - - layout->stage[s].surface_count += set_layout->stage[s].surface_count; - layout->stage[s].sampler_count += set_layout->stage[s].sampler_count; + unsigned num_bindings = 0; + for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) + num_bindings += l.stage[s].surface_count + l.stage[s].sampler_count; + + size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); + + layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Now we can actually build our surface and sampler maps */ + struct anv_pipeline_binding *entry = layout->entries; + for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { + l.stage[s].surface_to_descriptor = entry; + entry += l.stage[s].surface_count; + l.stage[s].sampler_to_descriptor = entry; + entry += l.stage[s].sampler_count; + + int surface = 0; + int sampler = 0; + for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { + struct anv_descriptor_set_layout *set_layout = l.set[set].layout; + + unsigned set_offset = 0; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + unsigned array_size = set_layout->binding[b].array_size; + + if (set_layout->binding[b].stage[s].surface_index >= 0) { + assert(surface == l.set[set].stage[s].surface_start + + set_layout->binding[b].stage[s].surface_index); + for (unsigned i = 0; i < array_size; i++) { + l.stage[s].surface_to_descriptor[surface + i].set = set; + l.stage[s].surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[s].sampler_index >= 0) { + assert(sampler == l.set[set].stage[s].sampler_start + + set_layout->binding[b].stage[s].sampler_index); + for (unsigned i = 0; i < array_size; i++) { + l.stage[s].sampler_to_descriptor[sampler + i].set = set; + l.stage[s].sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + + set_offset += array_size; + } } } + /* Finally, we're done setting it up, copy into the allocated version */ + *layout = l; + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); return VK_SUCCESS; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6bc781fa072..523e6dfa747 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -660,23 +660,37 @@ struct anv_device_memory { void * map; }; -struct anv_descriptor_slot { - int8_t dynamic_slot; - uint8_t index; -}; +struct anv_descriptor_set_binding_layout { + /* Number of array elements in this binding */ + uint16_t array_size; + + /* Index into the dynamic state array for a dynamic buffer */ + int16_t dynamic_offset_index; -struct anv_descriptor_set_layout { struct { - uint32_t surface_count; - struct anv_descriptor_slot *surface_start; - uint32_t sampler_count; - struct anv_descriptor_slot *sampler_start; + /* Index into the binding table for the associated surface */ + int16_t surface_index; + + /* Index into the sampler table for the associated sampler */ + int16_t sampler_index; } stage[VK_SHADER_STAGE_NUM]; +}; + +struct anv_descriptor_set_layout { + /* Number of bindings in this descriptor set */ + uint16_t binding_count; + + /* Total size of the descriptor set with room for all array entries */ + uint16_t size; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + /* Number of dynamic offsets used by this descriptor set */ + uint16_t dynamic_offset_count; - uint32_t count; - uint32_t num_dynamic_buffers; - VkShaderStageFlags shader_stages; - struct anv_descriptor_slot entries[0]; + /* Don't use this directly */ + struct anv_descriptor_set_binding_layout binding[0]; }; enum anv_descriptor_type { @@ -718,6 +732,14 @@ anv_descriptor_set_destroy(struct anv_device *device, #define MAX_DYNAMIC_BUFFERS 16 #define MAX_IMAGES 8 +struct anv_pipeline_binding { + /* The descriptor set this surface corresponds to */ + uint16_t set; + + /* Offset into the descriptor set */ + uint16_t offset; +}; + struct anv_pipeline_layout { struct { struct anv_descriptor_set_layout *layout; @@ -733,8 +755,12 @@ struct anv_pipeline_layout { struct { bool has_dynamic_offsets; uint32_t surface_count; + struct anv_pipeline_binding *surface_to_descriptor; uint32_t sampler_count; + struct anv_pipeline_binding *sampler_to_descriptor; } stage[VK_SHADER_STAGE_NUM]; + + struct anv_pipeline_binding entries[0]; }; struct anv_buffer { -- cgit v1.2.3 From 5eccd0b4b947d806c0725899a827e622e605f2cc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 16:37:35 -0700 Subject: nir/intrinsic: Allow up to four indices --- src/glsl/nir/nir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index dbd73f15b22..f2794cb7afb 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -874,7 +874,7 @@ typedef struct { */ uint8_t num_components; - int const_index[3]; + int const_index[4]; nir_deref_var *variables[2]; -- cgit v1.2.3 From 24bcc89c8fa326b838e9fea002065a40d4d04314 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 16:37:59 -0700 Subject: nir/intrinsics: Add new Vulkan load/store intrinsics --- src/glsl/nir/nir_intrinsics.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 1fd13cfc510..b5a0d715aa3 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -227,11 +227,14 @@ SYSTEM_VALUE(num_work_groups, 3, 0) true, 0, 0, indices, flags) LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +LOAD(ubo_vk, 1, 3, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ssbo_vk, 1, 3, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* * Stores work the same way as loads, except now the first register input is * the value or array to store and the optional second input is the indirect @@ -250,4 +253,6 @@ LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) STORE(output, 0, 0, 0, 0) STORE(ssbo, 1, 1, 1, 0) -LAST_INTRINSIC(store_ssbo_indirect) +STORE(ssbo_vk, 1, 1, 3, 0) + +LAST_INTRINSIC(store_ssbo_vk_indirect) -- cgit v1.2.3 From de608153fb5358bad38ae372f6c66ad3d6708f00 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 16:49:34 -0700 Subject: nir/spirv: Use the Vulkan ubo intrinsics --- src/glsl/nir/spirv_to_nir.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 6503b2590d7..6dfe530905d 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1019,7 +1019,7 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, static struct vtn_ssa_value * _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, - unsigned set, nir_ssa_def *binding, + unsigned set, unsigned binding, nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect, struct vtn_type *type) { @@ -1030,8 +1030,9 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); load->num_components = glsl_get_vector_elements(type->type); load->const_index[0] = set; - load->src[0] = nir_src_for_ssa(binding); - load->const_index[1] = offset; + load->const_index[1] = binding; + load->src[0] = nir_src_for_ssa(index); + load->const_index[2] = offset; if (indirect) load->src[1] = nir_src_for_ssa(indirect); nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); @@ -1042,13 +1043,13 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); if (glsl_type_is_struct(type->type)) { for (unsigned i = 0; i < elems; i++) { - val->elems[i] = _vtn_block_load(b, op, set, binding, + val->elems[i] = _vtn_block_load(b, op, set, binding, index, offset + type->offsets[i], indirect, type->members[i]); } } else { for (unsigned i = 0; i < elems; i++) { - val->elems[i] = _vtn_block_load(b, op, set, binding, + val->elems[i] = _vtn_block_load(b, op, set, binding, index, offset + i * type->stride, indirect, type->array_element); } @@ -1063,24 +1064,21 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, struct vtn_type *type, nir_deref *src_tail) { unsigned set = src->var->data.descriptor_set; + unsigned binding = src->var->data.binding; - nir_ssa_def *binding = nir_imm_int(&b->nb, src->var->data.binding); nir_deref *deref = &src->deref; - /* The block variable may be an array, in which case the array index adds - * an offset to the binding. Figure out that index now. - */ - + nir_ssa_def *index; if (deref->child->deref_type == nir_deref_type_array) { deref = deref->child; type = type->array_element; nir_deref_array *deref_array = nir_deref_as_array(deref); - if (deref_array->deref_array_type == nir_deref_array_type_direct) { - binding = nir_imm_int(&b->nb, src->var->data.binding + - deref_array->base_offset); - } else { - binding = nir_iadd(&b->nb, binding, deref_array->indirect.ssa); - } + index = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + index = nir_iadd(&b->nb, index, deref_array->indirect.ssa); + } else { + index = nir_imm_int(&b->nb, 0); } unsigned offset = 0; @@ -1114,10 +1112,10 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, } /* TODO SSBO's */ - nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_indirect - : nir_intrinsic_load_ubo; + nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_vk_indirect + : nir_intrinsic_load_ubo_vk; - return _vtn_block_load(b, op, set, binding, offset, indirect, type); + return _vtn_block_load(b, op, set, binding, index, offset, indirect, type); } /* -- cgit v1.2.3 From 9c9b7d79c8a54b4f61e52f64aa504b0d01529dd7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 16:39:58 -0700 Subject: anv/nir: Add a pass for applying a applying a pipeline layout to a shader This new pass lowers the _vk intrinsics which take a (set, binding, index) tripple to the single-index non-vk intrinsics based on the pipeline layout. --- src/glsl/nir/nir.h | 1 - src/glsl/nir/nir_lower_samplers.c | 70 ---------- src/mesa/drivers/dri/i965/brw_nir.c | 4 +- src/vulkan/Makefile.am | 1 + src/vulkan/anv_compiler.cpp | 3 + src/vulkan/anv_nir.h | 2 + src/vulkan/anv_nir_apply_pipeline_layout.c | 217 +++++++++++++++++++++++++++++ 7 files changed, 224 insertions(+), 74 deletions(-) create mode 100644 src/vulkan/anv_nir_apply_pipeline_layout.c (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index f2794cb7afb..f7b9483d74a 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1916,7 +1916,6 @@ void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program); -void nir_lower_samplers_for_vk(nir_shader *shader); bool nir_lower_system_values(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c index aaeff2c223e..5df79a69a06 100644 --- a/src/glsl/nir/nir_lower_samplers.c +++ b/src/glsl/nir/nir_lower_samplers.c @@ -34,30 +34,6 @@ #include "program/prog_parameter.h" #include "program/program.h" -static void -add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect) -{ - /* First, we have to resize the array of texture sources */ - nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - new_srcs[i].src_type = instr->src[i].src_type; - nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src); - } - - ralloc_free(instr->src); - instr->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; - instr->num_srcs++; - nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, - indirect); -} - /* Calculate the sampler index based on array indicies and also * calculate the base uniform location for struct members. */ @@ -210,49 +186,3 @@ nir_lower_samplers(nir_shader *shader, lower_impl(overload->impl, shader_program, shader->stage); } } - -static bool -lower_samplers_for_vk_block(nir_block *block, void *data) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tex = nir_instr_as_tex(instr); - - assert(tex->sampler); - - tex->sampler_set = tex->sampler->var->data.descriptor_set; - tex->sampler_index = tex->sampler->var->data.binding; - - if (tex->sampler->deref.child) { - assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); - nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child); - - /* Only one-level arrays are allowed in vulkan */ - assert(arr->deref.child == NULL); - - tex->sampler_index += arr->base_offset; - if (arr->deref_array_type == nir_deref_array_type_indirect) { - add_indirect_to_tex(tex, arr->indirect); - nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT); - - tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type); - } - } - - tex->sampler = NULL; - } - - return true; -} - -void -nir_lower_samplers_for_vk(nir_shader *shader) -{ - nir_foreach_overload(shader, overload) { - if (overload->impl) { - nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL); - } - } -} diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 19206600e64..0a9c09f1075 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -181,10 +181,8 @@ brw_process_nir(nir_shader *nir, if (shader_prog) { nir_lower_samplers(nir, shader_prog); - } else { - nir_lower_samplers_for_vk(nir); + nir_validate_shader(nir); } - nir_validate_shader(nir); nir_lower_system_values(nir); nir_validate_shader(nir); diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index a691329065c..aeed78ae840 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -70,6 +70,7 @@ VULKAN_SOURCES = \ anv_intel.c \ anv_meta.c \ anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ anv_pipeline.c \ anv_private.h \ anv_query.c \ diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index bf931fe886f..34023a7369f 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -285,6 +285,7 @@ really_do_vs_prog(struct brw_context *brw, create_params_array(pipeline, vs, &prog_data->base.base); anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, &prog_data->base.base); + anv_nir_apply_pipeline_layout(vs->Program->nir, pipeline->layout); GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data->inputs_read = vp->program.Base.InputsRead; @@ -571,6 +572,7 @@ really_do_wm_prog(struct brw_context *brw, create_params_array(pipeline, fs, &prog_data->base); anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); + anv_nir_apply_pipeline_layout(fs->Program->nir, pipeline->layout); prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(brw->intelScreen->devinfo, @@ -888,6 +890,7 @@ brw_codegen_cs_prog(struct brw_context *brw, create_params_array(pipeline, cs, &prog_data->base); anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); + anv_nir_apply_pipeline_layout(cs->Program->nir, pipeline->layout); program = brw_cs_emit(brw, mem_ctx, key, prog_data, &cp->program, prog, -1, &program_size); diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h index 1fd3484e059..af95e3a8849 100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@ -40,6 +40,8 @@ anv_vk_shader_stage_for_mesa_stage(gl_shader_stage stage) void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, nir_shader *shader, struct brw_stage_prog_data *prog_data); +bool anv_nir_apply_pipeline_layout(nir_shader *shader, + const struct anv_pipeline_layout *layout); #ifdef __cplusplus } diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c new file mode 100644 index 00000000000..dea2dee3b32 --- /dev/null +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -0,0 +1,217 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "glsl/nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + VkShaderStage stage; + const struct anv_pipeline_layout *layout; + + bool progress; +}; + +static nir_intrinsic_op +lowered_op(nir_intrinsic_op op) +{ + switch (op) { + case nir_intrinsic_load_ubo_vk: + return nir_intrinsic_load_ubo; + case nir_intrinsic_load_ubo_vk_indirect: + return nir_intrinsic_load_ubo_indirect; + case nir_intrinsic_load_ssbo_vk: + return nir_intrinsic_load_ssbo; + case nir_intrinsic_load_ssbo_vk_indirect: + return nir_intrinsic_load_ssbo_indirect; + case nir_intrinsic_store_ssbo_vk: + return nir_intrinsic_store_ssbo; + case nir_intrinsic_store_ssbo_vk_indirect: + return nir_intrinsic_store_ssbo_indirect; + default: + unreachable("Invalid intrinsic for lowering"); + } +} + +static uint32_t +get_surface_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + assert(set_layout->binding[binding].stage[state->stage].surface_index >= 0); + + uint32_t surface_index = + state->layout->set[set].stage[state->stage].surface_start + + set_layout->binding[binding].stage[state->stage].surface_index; + + assert(surface_index < state->layout->stage[state->stage].surface_count); + + return surface_index; +} + +static bool +try_lower_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + int block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo_vk: + case nir_intrinsic_load_ubo_vk_indirect: + case nir_intrinsic_load_ssbo_vk: + case nir_intrinsic_load_ssbo_vk_indirect: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo_vk: + case nir_intrinsic_store_ssbo_vk_indirect: + block_idx_src = 1; + break; + default: + return false; + } + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = intrin->const_index[0]; + uint32_t binding = intrin->const_index[1]; + + uint32_t surface_index = get_surface_index(set, binding, state); + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[block_idx_src]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[block_idx_src], 1)); + } + + nir_instr_rewrite_src(&intrin->instr, &intrin->src[block_idx_src], + nir_src_for_ssa(block_index)); + + intrin->intrinsic = lowered_op(intrin->intrinsic); + /* Shift the offset indices down */ + intrin->const_index[0] = intrin->const_index[2]; + intrin->const_index[1] = intrin->const_index[3]; + + return true; +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->sampler); + + unsigned set = tex->sampler->var->data.descriptor_set; + unsigned binding = tex->sampler->var->data.binding; + + tex->sampler_index = get_surface_index(set, binding, state); + + if (tex->sampler->deref.child) { + assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = + nir_deref_as_array(tex->sampler->deref.child); + + tex->sampler_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = nir_tex_src_sampler_offset; + tex->num_srcs++; + nir_instr_move_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + &deref_array->indirect); + } + } + + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: + if (try_lower_intrinsic(nir_instr_as_intrinsic(instr), state)) + state->progress = true; + break; + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + /* All texture instructions need lowering */ + state->progress = true; + break; + default: + continue; + } + } + + return true; +} + +bool +anv_nir_apply_pipeline_layout(nir_shader *shader, + const struct anv_pipeline_layout *layout) +{ + struct apply_pipeline_layout_state state = { + .shader = shader, + .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage), + .layout = layout, + }; + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_builder_init(&state.builder, overload->impl); + nir_foreach_block(overload->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(overload->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + return state.progress; +} -- cgit v1.2.3 From da994f4b7ecf699ae27cb2236387db4cb099f42d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 17:46:24 -0700 Subject: anv/nir: Rewrite apply_dynamic_offsets to handle the new vk intrinsics --- src/vulkan/anv_nir_apply_dynamic_offsets.c | 145 +++++++++++++++++++++-------- 1 file changed, 104 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index 9d29c3c23ea..afb3313ccb4 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -40,75 +40,138 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) struct apply_dynamic_offsets_state *state = void_state; struct anv_descriptor_set_layout *set_layout; + nir_builder *b = &state->builder; + nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - bool has_indirect = false; - uint32_t set, binding; + unsigned block_idx_src; switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo_indirect: - has_indirect = true; - /* fallthrough */ - case nir_intrinsic_load_ubo: { - set = intrin->const_index[0]; - - nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]); - if (const_binding) { - binding = const_binding->u[0]; - } else { - assert(0 && "need more info from the ir for this."); - } + case nir_intrinsic_load_ubo_vk: + case nir_intrinsic_load_ubo_vk_indirect: + case nir_intrinsic_load_ssbo_vk: + case nir_intrinsic_load_ssbo_vk_indirect: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo_vk: + case nir_intrinsic_store_ssbo_vk_indirect: + block_idx_src = 1; break; - } default: continue; /* the loop */ } + unsigned set = intrin->const_index[0]; + unsigned binding = intrin->const_index[1]; + set_layout = state->layout->set[set].layout; if (set_layout->binding[binding].dynamic_offset_index < 0) continue; + b->cursor = nir_before_instr(&intrin->instr); + + int indirect_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo_vk_indirect: + case nir_intrinsic_load_ssbo_vk_indirect: + indirect_src = 1; + break; + case nir_intrinsic_store_ssbo_vk_indirect: + indirect_src = 2; + break; + default: + indirect_src = -1; + break; + } + + /* First, we need to generate the uniform load for the buffer offset */ uint32_t index = state->layout->set[set].dynamic_offset_start + set_layout->binding[binding].dynamic_offset_index; - state->builder.cursor = nir_before_instr(&intrin->instr); + nir_const_value *const_arr_idx = + nir_src_as_const_value(intrin->src[block_idx_src]); + + nir_intrinsic_op offset_load_op; + if (const_arr_idx) + offset_load_op = nir_intrinsic_load_uniform; + else + offset_load_op = nir_intrinsic_load_uniform_indirect; nir_intrinsic_instr *offset_load = - nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + nir_intrinsic_instr_create(state->shader, offset_load_op); offset_load->num_components = 1; offset_load->const_index[0] = state->indices_start + index; - offset_load->const_index[1] = 0; + + if (const_arr_idx) { + offset_load->const_index[1] = const_arr_idx->u[0]; + } else { + offset_load->const_index[1] = 0; + nir_src_copy(&offset_load->src[0], &intrin->src[0], &intrin->instr); + } + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); - nir_builder_instr_insert(&state->builder, &offset_load->instr); + nir_builder_instr_insert(b, &offset_load->instr); nir_ssa_def *offset = &offset_load->dest.ssa; - if (has_indirect) { - assert(intrin->src[1].is_ssa); - offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset); + if (indirect_src >= 0) { + assert(intrin->src[indirect_src].is_ssa); + offset = nir_iadd(b, intrin->src[indirect_src].ssa, offset); } - assert(intrin->dest.is_ssa); - - nir_intrinsic_instr *new_load = - nir_intrinsic_instr_create(state->shader, - nir_intrinsic_load_ubo_indirect); - new_load->num_components = intrin->num_components; - new_load->const_index[0] = intrin->const_index[0]; - new_load->const_index[1] = intrin->const_index[1]; - nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr); - new_load->src[1] = nir_src_for_ssa(offset); - nir_ssa_dest_init(&new_load->instr, &new_load->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.name); - nir_builder_instr_insert(&state->builder, &new_load->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&new_load->dest.ssa)); - - nir_instr_remove(&intrin->instr); + /* Now we can modify the load/store intrinsic */ + + if (indirect_src < 0) { + /* The original intrinsic is not an indirect variant. We need to + * create a new one and copy the old data over first. + */ + + nir_intrinsic_op indirect_op; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo_vk: + indirect_op = nir_intrinsic_load_ubo_vk_indirect; + break; + case nir_intrinsic_load_ssbo_vk: + indirect_op = nir_intrinsic_load_ssbo_vk_indirect; + break; + case nir_intrinsic_store_ssbo_vk: + indirect_op = nir_intrinsic_store_ssbo_vk_indirect; + break; + default: + unreachable("Invalid direct load/store intrinsic"); + } + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(state->shader, indirect_op); + copy->num_components = intrin->num_components; + + for (unsigned i = 0; i < 4; i++) + copy->const_index[i] = intrin->const_index[i]; + + /* The indirect is always the last source */ + indirect_src = nir_intrinsic_infos[intrin->intrinsic].num_srcs; + + for (unsigned i = 0; i < (unsigned)indirect_src; i++) + nir_src_copy(©->src[i], &intrin->src[i], ©->instr); + + copy->src[indirect_src] = nir_src_for_ssa(offset); + nir_ssa_dest_init(©->instr, ©->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.name); + nir_builder_instr_insert(b, ©->instr); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(©->dest.ssa)); + + nir_instr_remove(&intrin->instr); + } else { + /* It's already indirect, so we can just rewrite the one source */ + nir_instr_rewrite_src(&intrin->instr, &intrin->src[indirect_src], + nir_src_for_ssa(offset)); + } } return true; -- cgit v1.2.3 From 42683e37570bac2b0cb36896cf9a558ae0a24eb4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 17:02:55 -0700 Subject: anv: Get rid of backend compiler hacks for descriptor sets Now that we have anv_nir_apply_pipeline_layout, we can hand the backend compiler intrinsics and texture instructions that use a flat buffer index just like it wants. There's no longer any reason for any of these hacks. --- src/mesa/drivers/dri/i965/brw_context.h | 6 ----- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 25 +++++---------------- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 8 +++---- src/vulkan/anv_compiler.cpp | 36 +++++------------------------- 4 files changed, 13 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a66c61e58f3..aa1284db3ce 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -358,12 +358,6 @@ struct brw_stage_prog_data { /** @} */ } binding_table; - uint32_t *map_entries; - struct { - uint32_t index_count; - uint32_t *index; - } bind_map[8]; /* MAX_SETS from vulkan/private.h */ - GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; unsigned nr_image_params; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 883b8cbf3e7..45c3f4ef3b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1419,22 +1419,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr has_indirect = true; /* fallthrough */ case nir_intrinsic_load_ubo: { - uint32_t set = instr->const_index[0]; nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; if (const_index) { - uint32_t binding = const_index->u[0]; - - /* FIXME: We should probably assert here, but dota2 seems to hit - * it and we'd like to keep going. - */ - if (binding >= stage_prog_data->bind_map[set].index_count) - binding = 0; - - surf_index = fs_reg(stage_prog_data->bind_map[set].index[binding]); + surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + + const_index->u[0]); } else { - assert(0 && "need more info from the ir for this."); /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value * from any live channel. @@ -1459,7 +1450,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_D), fs_reg(2)); - unsigned vec4_offset = instr->const_index[1] / 4; + unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, base_offset, vec4_offset + i); @@ -1467,7 +1458,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - fs_reg const_offset_reg((unsigned) instr->const_index[1] & ~15); + fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); @@ -1921,13 +1912,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { - uint32_t set = instr->sampler_set; - uint32_t binding = instr->sampler_index; - - assert(binding < stage_prog_data->bind_map[set].index_count); - assert(stage_prog_data->bind_map[set].index[binding] < 1000); - - unsigned sampler = stage_prog_data->bind_map[set].index[binding]; + unsigned sampler = instr->sampler_index; fs_reg sampler_reg(sampler); int gather_component = instr->component; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 37a74df6d71..41bd80df377 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -738,19 +738,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) has_indirect = true; /* fallthrough */ case nir_intrinsic_load_ubo: { - const uint32_t set = instr->const_index[0]; nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]); src_reg surf_index; dest = get_nir_dest(instr->dest); if (const_block_index) { - uint32_t binding = const_block_index->u[0]; - /* The block index is a constant, so just emit the binding table entry * as an immediate. */ - surf_index = src_reg(stage_prog_data->bind_map[set].index[binding]); + surf_index = src_reg(prog_data->base.binding_table.ubo_start + + const_block_index->u[0]); } else { /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value @@ -770,7 +768,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir->info.num_ssbos - 1); } - unsigned const_offset = instr->const_index[1]; + unsigned const_offset = instr->const_index[0]; src_reg offset; if (!has_indirect) { diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 34023a7369f..a3b8d1cc80c 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -72,41 +72,16 @@ static VkResult set_binding_table_layout(struct brw_stage_prog_data *prog_data, struct anv_pipeline *pipeline, uint32_t stage) { - uint32_t bias, count, k, *map; - struct anv_pipeline_layout *layout = pipeline->layout; - - /* No layout is valid for shaders that don't bind any resources. */ - if (pipeline->layout == NULL) - return VK_SUCCESS; - + unsigned bias; if (stage == VK_SHADER_STAGE_FRAGMENT) bias = MAX_RTS; else bias = 0; - count = layout->stage[stage].surface_count; - prog_data->map_entries = - (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); - if (prog_data->map_entries == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - k = bias; - map = prog_data->map_entries; - for (uint32_t set = 0; set < layout->num_sets; set++) { - prog_data->bind_map[set].index = map; - unsigned index_count = 0; - for (uint32_t b = 0; b < layout->set[set].layout->binding_count; b++) { - if (layout->set[set].layout->binding[b].stage[stage].surface_index < 0) - continue; - - unsigned array_size = layout->set[set].layout->binding[b].array_size; - for (uint32_t i = 0; i < array_size; i++) - *map++ = k++; - index_count += array_size; - } - - prog_data->bind_map[set].index_count = index_count; - } + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.image_start = bias; return VK_SUCCESS; } @@ -1400,7 +1375,6 @@ anv_compiler_free(struct anv_pipeline *pipeline) { for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { if (pipeline->prog_data[stage]) { - free(pipeline->prog_data[stage]->map_entries); /* We only ever set up the params array because we don't do * non-UBO pull constants */ -- cgit v1.2.3 From 896c1c65d67089417709f567c03e6930fc7af958 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Oct 2015 19:00:17 -0700 Subject: anv: Get rid of the descriptor_set_binding struct We no longer need it as we have a better way to deal with dynamic offsets. --- src/vulkan/anv_cmd_buffer.c | 8 ++++---- src/vulkan/anv_meta.c | 4 ++-- src/vulkan/anv_private.h | 7 +------ 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 718499d60c8..df076d0cac6 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -478,8 +478,8 @@ void anv_CmdBindDescriptorSets( ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; - if (cmd_buffer->state.descriptors[firstSet + i].set != set) { - cmd_buffer->state.descriptors[firstSet + i].set = set; + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; } @@ -598,7 +598,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_pipeline_binding *binding = &layout->stage[stage].surface_to_descriptor[s]; struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set].set; + cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; const struct anv_state *surface_state; @@ -654,7 +654,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, struct anv_pipeline_binding *binding = &layout->stage[stage].sampler_to_descriptor[s]; struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set].set; + cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER) diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 565575ab219..56a0d4b9d18 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -303,7 +303,7 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, uint32_t dynamic_state) { state->old_pipeline = cmd_buffer->state.pipeline; - state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, sizeof(state->old_vertex_bindings)); state->dynamic_flags = dynamic_state; @@ -316,7 +316,7 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, const struct anv_saved_state *state) { cmd_buffer->state.pipeline = state->old_pipeline; - cmd_buffer->state.descriptors[0].set = state->old_descriptor_set0; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, sizeof(state->old_vertex_bindings)); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 523e6dfa747..757a149289f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -790,11 +790,6 @@ struct anv_vertex_binding { VkDeviceSize offset; }; -struct anv_descriptor_set_binding { - struct anv_descriptor_set * set; - uint32_t dynamic_offsets[128]; -}; - struct anv_push_constants { /* Current allocated size of this push constants data structure. * Because a decent chunk of it may not be used (images on SKL, for @@ -883,7 +878,7 @@ struct anv_cmd_state { struct anv_subpass * subpass; uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; - struct anv_descriptor_set_binding descriptors[MAX_SETS]; + struct anv_descriptor_set * descriptors[MAX_SETS]; struct anv_push_constants * push_constants[VK_SHADER_STAGE_NUM]; struct anv_dynamic_state dynamic; -- cgit v1.2.3 From 6dc4cad994ca03b71f99b79856d8d8ff5cf3ed83 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 13:45:07 -0700 Subject: anv/cmd_buffer: Add an alloc_surface_state helper --- src/vulkan/anv_batch_chain.c | 6 ++++++ src/vulkan/anv_private.h | 2 ++ src/vulkan/gen7_state.c | 3 +-- src/vulkan/gen8_state.c | 3 +-- 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index c621c0745b7..77fbd6635c0 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -483,6 +483,12 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, return state; } +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) +{ + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); +} + struct anv_state anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 757a149289f..bcfb6fada50 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -992,6 +992,8 @@ struct anv_state anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t entries, uint32_t *state_offset); struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); +struct anv_state anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, uint32_t size, uint32_t alignment); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index d317fa4ec16..aef97838e17 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -118,8 +118,7 @@ gen7_alloc_surface_state(struct anv_device *device, struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer) { - return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, - 64, 64); + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); } else { return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 9be3bf46021..d9438127f2a 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -104,8 +104,7 @@ gen8_alloc_surface_state(struct anv_device *device, struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer) { - return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, - 64, 64); + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); } else { return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } -- cgit v1.2.3 From bed7d1e03cfb03148e65566d299430063e90a47a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 13:45:53 -0700 Subject: anv: Add support for BufferInfo in descriptor sets --- src/vulkan/anv_cmd_buffer.c | 17 ++++++++++++++++- src/vulkan/anv_device.c | 29 ++++++++++++++++++++++------- src/vulkan/anv_private.h | 22 +++++++++++++++++----- 3 files changed, 55 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index df076d0cac6..2801f730478 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -614,11 +614,25 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bo = desc->buffer_view->bo; bo_offset = desc->buffer_view->offset; break; + case ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET: { + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + anv_fill_buffer_surface_state(cmd_buffer->device, state.map, + anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT), + desc->offset, desc->range); + surface_state = &state; + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + } case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: surface_state = &desc->image_view->nonrt_surface_state; bo = desc->image_view->bo; bo_offset = desc->image_view->offset; break; + case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER: + /* Nothing for us to do here */ + break; } bt_map[bias + s] = surface_state->offset + state_offset; @@ -657,7 +671,8 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; - if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER) + if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER && + desc->type != ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER) continue; struct anv_sampler *sampler = desc->sampler; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 98c1b2334e6..cc9ead86e73 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1698,13 +1698,28 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->count; j++) { - ANV_FROM_HANDLE(anv_buffer_view, bview, - write->pDescriptors[j].bufferView); - - set->descriptors[write->destBinding + j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, - .buffer_view = bview, - }; + if (write->pDescriptors[j].bufferView.handle) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pDescriptors[j].bufferView); + + set->descriptors[write->destBinding + j] = + (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, + .buffer_view = bview, + }; + } else { + ANV_FROM_HANDLE(anv_buffer, buffer, + write->pDescriptors[j].bufferInfo.buffer); + assert(buffer); + + set->descriptors[write->destBinding + j] = + (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, + .buffer = buffer, + .offset = write->pDescriptors[j].bufferInfo.offset, + .range = write->pDescriptors[j].bufferInfo.range, + }; + } } default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index bcfb6fada50..6bbf7c0291b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -696,18 +696,30 @@ struct anv_descriptor_set_layout { enum anv_descriptor_type { ANV_DESCRIPTOR_TYPE_EMPTY = 0, ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, + ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, ANV_DESCRIPTOR_TYPE_SAMPLER, + ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER, }; struct anv_descriptor { + enum anv_descriptor_type type; + union { - struct anv_buffer_view *buffer_view; - struct anv_image_view *image_view; - struct anv_sampler *sampler; - }; + struct { + union { + struct anv_buffer_view *buffer_view; + struct anv_image_view *image_view; + }; + struct anv_sampler *sampler; + }; - enum anv_descriptor_type type; + struct { + struct anv_buffer *buffer; + uint64_t offset; + uint64_t range; + }; + }; }; struct anv_descriptor_set { -- cgit v1.2.3 From ba205696261055aa5298a1852117c0a24fae10c4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 14:34:07 -0700 Subject: anv/device: Make the CreateSemaphore stub return success --- src/vulkan/anv_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index cc9ead86e73..bd1c0f2908d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1266,7 +1266,8 @@ VkResult anv_CreateSemaphore( const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore) { - stub_return(VK_UNSUPPORTED); + pSemaphore->handle = 1; + stub_return(VK_SUCCESS); } void anv_DestroySemaphore( -- cgit v1.2.3 From b459b3d82cd4a77f1004907ad9b2487111390a84 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 15:17:07 -0700 Subject: anv/device: Remove some unneeded anv_finishmes --- src/vulkan/anv_device.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index bd1c0f2908d..d9065b05390 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1644,16 +1644,6 @@ void anv_UpdateDescriptorSets( const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); - for (uint32_t j = 0; j < write->count; ++j) { - const VkDescriptorBufferInfo *binfo - = &write->pDescriptors[j].bufferInfo; - - if (binfo->buffer.handle || binfo->offset || binfo->range) { - anv_finishme("VkWriteDesciptorSet::bufferInfo"); - break; - } - } - switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: -- cgit v1.2.3 From 03952b1513b7621bb0bb151d0947e8bada64ca04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 15:17:27 -0700 Subject: anv/device: Add support for combined image and sampler descriptors --- src/vulkan/anv_cmd_buffer.c | 9 ++++++--- src/vulkan/anv_device.c | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 2801f730478..19ad31167bf 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -608,6 +608,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, switch (desc->type) { case ANV_DESCRIPTOR_TYPE_EMPTY: case ANV_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ continue; case ANV_DESCRIPTOR_TYPE_BUFFER_VIEW: surface_state = &desc->buffer_view->surface_state; @@ -626,13 +627,11 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, break; } case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: + case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER: surface_state = &desc->image_view->nonrt_surface_state; bo = desc->image_view->bo; bo_offset = desc->image_view->offset; break; - case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER: - /* Nothing for us to do here */ - break; } bt_map[bias + s] = surface_state->offset + state_offset; @@ -677,6 +676,10 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, struct anv_sampler *sampler = desc->sampler; + /* FIXME: We shouldn't have to do this */ + if (sampler == NULL) + continue; + memcpy(state->map + (s * 16), sampler->state, sizeof(sampler->state)); } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index d9065b05390..2ae39741fe6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1646,7 +1646,6 @@ void anv_UpdateDescriptorSets( switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for (uint32_t j = 0; j < write->count; j++) { ANV_FROM_HANDLE(anv_sampler, sampler, write->pDescriptors[j].sampler); @@ -1656,11 +1655,22 @@ void anv_UpdateDescriptorSets( .sampler = sampler, }; } + break; - if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) - break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pDescriptors[j].imageView); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pDescriptors[j].sampler); - /* fallthrough */ + set->descriptors[write->destBinding + j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER, + .image_view = iview, + .sampler = sampler, + }; + } + break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: -- cgit v1.2.3 From f5eec407ead90b6aaeddb0d5416c129d1265ef01 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 15:38:20 -0700 Subject: anv/x11: Treat the pPlatformWindow as a xcb_window_t* instead of xcb_window_t --- src/vulkan/anv_wsi_x11.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 8834230f9cd..3512238e245 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -55,7 +55,7 @@ x11_get_surface_properties(struct anv_wsi_implementation *impl, { VkPlatformHandleXcbKHR *vk_xcb_handle = vk_window->pPlatformHandle; xcb_connection_t *conn = vk_xcb_handle->connection; - xcb_window_t win = (xcb_window_t)(uintptr_t)vk_window->pPlatformWindow; + xcb_window_t win = *(xcb_window_t *)vk_window->pPlatformWindow; xcb_get_geometry_cookie_t cookie = xcb_get_geometry(conn, win); xcb_generic_error_t *err; @@ -287,7 +287,7 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, VkPlatformHandleXcbKHR *vk_xcb_handle = vk_window->pPlatformHandle; chain->conn = (xcb_connection_t *) vk_xcb_handle->connection; - chain->window = (xcb_window_t) (uintptr_t)vk_window->pPlatformWindow; + chain->window = *(xcb_window_t *)vk_window->pPlatformWindow; chain->extent = pCreateInfo->imageExtent; chain->image_count = num_images; chain->next_image = 0; -- cgit v1.2.3 From 3130851add25d8008d40ca332cfd3b2b4527c0f9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 16:16:15 -0700 Subject: anv/x11: Only advertise VK_FORMAT_B8R8G8A8_UNORM The others don't work at the moment so we shouldn't be advertising them. --- src/vulkan/anv_wsi_x11.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 3512238e245..38a5e15d1a6 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -28,9 +28,7 @@ #include "anv_wsi.h" static const VkSurfaceFormatKHR formats[] = { - { .format = VK_FORMAT_B5G6R5_UNORM, }, { .format = VK_FORMAT_B8G8R8A8_UNORM, }, - { .format = VK_FORMAT_B8G8R8A8_SRGB, }, }; static const VkPresentModeKHR present_modes[] = { -- cgit v1.2.3 From 298d0316427315ce78ab6aaaaec7e6dd31a31292 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 17:24:32 -0700 Subject: anv/batch_chain: Add some sanity-check asserts for relocations --- src/vulkan/anv_batch_chain.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 77fbd6635c0..239149709df 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -830,8 +830,11 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, obj->relocation_count = relocs->num_relocs; obj->relocs_ptr = (uintptr_t) relocs->relocs; - for (size_t i = 0; i < relocs->num_relocs; i++) + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* A quick sanity check on relocations */ + assert(relocs->relocs[i].offset < bo->size); anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } } return VK_SUCCESS; @@ -872,6 +875,7 @@ adjust_relocations_from_block_pool(struct anv_block_pool *pool, * offset to what it is now based on the delta and the data in the * block pool. Then the kernel will update it for us if needed. */ + assert(relocs->relocs[i].offset < pool->state.end); uint32_t *reloc_data = pool->map + relocs->relocs[i].offset; relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta; @@ -913,6 +917,7 @@ adjust_relocations_to_block_pool(struct anv_block_pool *pool, * should only be called on batch buffers, so we know it isn't in * use by the GPU at the moment. */ + assert(relocs->relocs[i].offset < from_bo->size); uint32_t *reloc_data = from_bo->map + relocs->relocs[i].offset; *reloc_data = relocs->relocs[i].presumed_offset + relocs->relocs[i].delta; -- cgit v1.2.3 From 2552df41a1678785fb52529b80623b3b07bfa1c4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 18:28:00 -0700 Subject: anv/cmd_buffer: Reset the command buffer in BeginCommandBuffer --- src/vulkan/anv_cmd_buffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 19ad31167bf..b8783f9e212 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -258,6 +258,8 @@ VkResult anv_BeginCommandBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + cmd_buffer->opt_flags = pBeginInfo->flags; if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { -- cgit v1.2.3 From 8ed23654c9f7fed77bc706fbfd1d25edf0d7020e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Oct 2015 21:16:41 -0700 Subject: nir/spirv: Fix handling of vector component selects via OpAccessChain When we get to the end of the _vtn_load/store_varaible recursion, we may have one link left in the deref chain if there is a vector component select on the end. In this case, we need to truncate the deref chain early so that, when we make the copy for the load, we don't get the extra deref. The final deref will be handled by the vector extract/insert that comes later. --- src/glsl/nir/spirv_to_nir.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 6dfe530905d..a62ec6661b0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -912,6 +912,11 @@ _vtn_variable_load(struct vtn_builder *b, nir_deref *old_child = src_deref_tail->child; if (glsl_type_is_vector_or_scalar(val->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + src_deref_tail->child = NULL; + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); load->variables[0] = @@ -979,6 +984,11 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, nir_deref *old_child = dest_deref_tail->child; if (glsl_type_is_vector_or_scalar(src->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + dest_deref_tail->child = NULL; + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); store->variables[0] = -- cgit v1.2.3 From 368e703a01c631f04f2a2a1f5f62f61c06838fae Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 16 Oct 2015 12:04:13 -0700 Subject: anv/pipeline: Rework dynamic state handling Aparently, we had the dynamic state array in the pipeline backwards. Instead of enabling the bits in the pipeline, it disables them and marks them as "dynamic". --- src/vulkan/anv_meta.c | 40 ++++++++++++++ src/vulkan/anv_pipeline.c | 137 ++++++++++++++++++++++------------------------ src/vulkan/anv_private.h | 1 + 3 files changed, 105 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 56a0d4b9d18..8f6bc421194 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -240,6 +240,11 @@ anv_device_init_meta_clear_state(struct anv_device *device) .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, .primitiveRestartEnable = false, }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, .pRasterState = &(VkPipelineRasterStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, .depthClipEnable = true, @@ -272,6 +277,21 @@ anv_device_init_meta_clear_state(struct anv_device *device) VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, } }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, .flags = 0, }, &(struct anv_graphics_pipeline_create_info) { @@ -634,6 +654,11 @@ anv_device_init_meta_blit_state(struct anv_device *device) .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, .primitiveRestartEnable = false, }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, .pRasterState = &(VkPipelineRasterStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, .depthClipEnable = true, @@ -650,6 +675,21 @@ anv_device_init_meta_blit_state(struct anv_device *device) VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, } }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, .flags = 0, .layout = device->meta_state.blit.pipeline_layout, }; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 84e023c781a..c7f0e6b1b4b 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -181,92 +181,83 @@ static void anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { - pipeline->dynamic_state_mask = 0; + uint32_t states = ANV_DYNAMIC_STATE_DIRTY_MASK; - if (pCreateInfo->pDynamicState == NULL) - return; + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } - uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; - for (uint32_t s = 0; s < count; s++) { - VkDynamicState state = pCreateInfo->pDynamicState->pDynamicStates[s]; - - assert(state < 32); - pipeline->dynamic_state_mask |= (1u << state); - - switch (state) { - case VK_DYNAMIC_STATE_VIEWPORT: - assert(pCreateInfo->pViewportState); - dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; - typed_memcpy(dynamic->viewport.viewports, - pCreateInfo->pViewportState->pViewports, - pCreateInfo->pViewportState->viewportCount); - break; - - case VK_DYNAMIC_STATE_SCISSOR: - assert(pCreateInfo->pViewportState); - dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; - typed_memcpy(dynamic->scissor.scissors, - pCreateInfo->pViewportState->pScissors, - pCreateInfo->pViewportState->scissorCount); - break; + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } - case VK_DYNAMIC_STATE_LINE_WIDTH: - assert(pCreateInfo->pRasterState); - dynamic->line_width = pCreateInfo->pRasterState->lineWidth; - break; + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } - case VK_DYNAMIC_STATE_DEPTH_BIAS: - assert(pCreateInfo->pRasterState); - dynamic->depth_bias.bias = pCreateInfo->pRasterState->depthBias; - dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp; - dynamic->depth_bias.slope_scaled = - pCreateInfo->pRasterState->slopeScaledDepthBias; - break; + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterState); + dynamic->line_width = pCreateInfo->pRasterState->lineWidth; + } - case VK_DYNAMIC_STATE_BLEND_CONSTANTS: - assert(pCreateInfo->pColorBlendState); - typed_memcpy(dynamic->blend_constants, - pCreateInfo->pColorBlendState->blendConst, 4); - break; + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterState); + dynamic->depth_bias.bias = pCreateInfo->pRasterState->depthBias; + dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp; + dynamic->depth_bias.slope_scaled = + pCreateInfo->pRasterState->slopeScaledDepthBias; + } - case VK_DYNAMIC_STATE_DEPTH_BOUNDS: - assert(pCreateInfo->pDepthStencilState); - dynamic->depth_bounds.min = - pCreateInfo->pDepthStencilState->minDepthBounds; - dynamic->depth_bounds.max = - pCreateInfo->pDepthStencilState->maxDepthBounds; - break; + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConst, 4); + } - case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_compare_mask.front = - pCreateInfo->pDepthStencilState->front.stencilCompareMask; - dynamic->stencil_compare_mask.back = - pCreateInfo->pDepthStencilState->back.stencilCompareMask; - break; + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } - case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_write_mask.front = - pCreateInfo->pDepthStencilState->front.stencilWriteMask; - dynamic->stencil_write_mask.back = - pCreateInfo->pDepthStencilState->back.stencilWriteMask; - break; + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.stencilCompareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.stencilCompareMask; + } - case VK_DYNAMIC_STATE_STENCIL_REFERENCE: - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_reference.front = - pCreateInfo->pDepthStencilState->front.stencilReference; - dynamic->stencil_reference.back = - pCreateInfo->pDepthStencilState->back.stencilReference; - break; + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.stencilWriteMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.stencilWriteMask; + } - default: - assert(!"Invalid dynamic state"); - } + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.stencilReference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.stencilReference; } + + pipeline->dynamic_state_mask = states; } VkResult diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6bbf7c0291b..d33ca89429c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -794,6 +794,7 @@ struct anv_buffer { #define ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY (1 << 6) #define ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY (1 << 7) #define ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY (1 << 8) +#define ANV_DYNAMIC_STATE_DIRTY_MASK ((1 << 9) - 1) #define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 9) #define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 10) -- cgit v1.2.3 From 7010fe61c8c72ce9bc8df98d730d99652333d460 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 16 Oct 2015 20:01:45 -0700 Subject: anv: Add facilities for dumping an image to a file The ability to dump an arbitrary miplevel or array slice of an anv_image to a file is very useful for debugging. Nothing inside of the driver calls this right now, but it's very useful to call from GDB. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_dump.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 4 + 3 files changed, 215 insertions(+) create mode 100644 src/vulkan/anv_dump.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index aeed78ae840..985864a87fe 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -63,6 +63,7 @@ VULKAN_SOURCES = \ anv_batch_chain.c \ anv_compiler.cpp \ anv_device.c \ + anv_dump.c \ anv_entrypoints.c \ anv_entrypoints.h \ anv_formats.c \ diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c new file mode 100644 index 00000000000..3878941896c --- /dev/null +++ b/src/vulkan/anv_dump.c @@ -0,0 +1,210 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +/* This file contains utility functions for help debugging. They can be + * called from GDB or similar to help inspect images and buffers. + */ + +void +anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename) +{ + VkDevice vk_device = anv_device_to_handle(device); + VkResult result; + + VkExtent2D extent = { image->extent.width, image->extent.height }; + for (unsigned i = 0; i < miplevel; i++) { + extent.width = MAX2(1, extent.width / 2); + extent.height = MAX2(1, extent.height / 2); + } + + VkImage copy_image; + result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = (VkExtent3D) { extent.width, extent.height, 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT, + .flags = 0, + }, ©_image); + assert(result == VK_SUCCESS); + + VkMemoryRequirements reqs; + result = anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); + + VkDeviceMemory memory; + result = anv_AllocMemory(vk_device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = reqs.size, + .memoryTypeIndex = 0, + }, &memory); + assert(result == VK_SUCCESS); + + result = anv_BindImageMemory(vk_device, copy_image, memory, 0); + assert(result == VK_SUCCESS); + + VkCmdPool cmdPool; + result = anv_CreateCommandPool(vk_device, + &(VkCmdPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO, + .queueFamilyIndex = 0, + .flags = 0, + }, &cmdPool); + assert(result == VK_SUCCESS); + + VkCmdBuffer cmd; + result = anv_CreateCommandBuffer(vk_device, + &(VkCmdBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, + .cmdPool = cmdPool, + .level = VK_CMD_BUFFER_LEVEL_PRIMARY, + .flags = 0, + }, &cmd); + assert(result == VK_SUCCESS); + + result = anv_BeginCommandBuffer(cmd, + &(VkCmdBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + .flags = VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT, + }); + assert(result == VK_SUCCESS); + + anv_CmdBlitImage(cmd, + anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, + copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, + &(VkImageBlit) { + .srcSubresource = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .mipLevel = miplevel, + .arrayLayer = array_layer, + .arraySize = 1, + }, + .srcOffset = (VkOffset3D) { 0, 0, 0 }, + .srcExtent = (VkExtent3D) { + extent.width, + extent.height, + 1 + }, + .destSubresource = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .mipLevel = 0, + .arrayLayer = 0, + .arraySize = 1, + }, + .destOffset = (VkOffset3D) { 0, 0, 0 }, + .destExtent = (VkExtent3D) { + extent.width, + extent.height, + 1 + }, + }, VK_TEX_FILTER_NEAREST); + + ANV_CALL(CmdPipelineBarrier)(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + true, 1, + (const void * []) { &(VkImageMemoryBarrier) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .outputMask = VK_MEMORY_OUTPUT_TRANSFER_BIT, + .inputMask = VK_MEMORY_INPUT_HOST_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = 0, + .destQueueFamilyIndex = 0, + .image = copy_image, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1, + }, + }}); + + result = anv_EndCommandBuffer(cmd); + assert(result == VK_SUCCESS); + + VkFence fence; + result = anv_CreateFence(vk_device, + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, &fence); + assert(result == VK_SUCCESS); + + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), + 1, &cmd, fence); + assert(result == VK_SUCCESS); + + result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); + assert(result == VK_SUCCESS); + + anv_DestroyFence(vk_device, fence); + anv_DestroyCommandPool(vk_device, cmdPool); + + uint8_t *map; + result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); + assert(result == VK_SUCCESS); + + VkSubresourceLayout layout; + result = anv_GetImageSubresourceLayout(vk_device, copy_image, + &(VkImageSubresource) { + .aspect = VK_IMAGE_ASPECT_COLOR, + .mipLevel = 0, + .arrayLayer = 0, + }, &layout); + assert(result == VK_SUCCESS); + + map += layout.offset; + + /* Now we can finally write the PPM file */ + FILE *file = fopen(filename, "wb"); + assert(file); + + fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); + for (unsigned y = 0; y < extent.height; y++) { + uint8_t row[extent.width * 3]; + for (unsigned x = 0; x < extent.width; x++) { + row[x * 3 + 0] = map[x * 4 + 0]; + row[x * 3 + 1] = map[x * 4 + 1]; + row[x * 3 + 2] = map[x * 4 + 2]; + } + fwrite(row, 3, extent.width, file); + + map += layout.rowPitch; + } + fclose(file); + + anv_UnmapMemory(vk_device, memory); + anv_DestroyImage(vk_device, copy_image); + anv_FreeMemory(vk_device, memory); +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d33ca89429c..34bd53cf20b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1409,6 +1409,10 @@ void anv_device_finish_meta(struct anv_device *device); void *anv_lookup_entrypoint(const char *name); +void anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename); + #define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ \ static inline struct __anv_type * \ -- cgit v1.2.3 From 3e47e340366b9b739a1a94a6f0b55f0ee1db1b08 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 17 Oct 2015 08:17:00 -0700 Subject: anv: Add support for immutable descriptors --- src/vulkan/anv_device.c | 52 ++++++++++++++++++++++++++++++++++++++++++------ src/vulkan/anv_private.h | 5 ++++- 2 files changed, 50 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 2ae39741fe6..75fbe691c5f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1445,14 +1445,25 @@ VkResult anv_CreateDescriptorSetLayout( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + uint32_t immutable_sampler_count = 0; + for (uint32_t b = 0; b < pCreateInfo->count; b++) { + if (pCreateInfo->pBinding[b].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBinding[b].arraySize; + } + size_t size = sizeof(struct anv_descriptor_set_layout) + - pCreateInfo->count * sizeof(set_layout->binding[0]); + pCreateInfo->count * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct anv_sampler *); set_layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (!set_layout) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* We just allocate all the samplers at the end of the struct */ + struct anv_sampler **samplers = + (struct anv_sampler **)&set_layout->binding[pCreateInfo->count]; + set_layout->binding_count = pCreateInfo->count; set_layout->shader_stages = 0; set_layout->size = 0; @@ -1461,6 +1472,9 @@ VkResult anv_CreateDescriptorSetLayout( memset(set_layout->binding, -1, pCreateInfo->count * sizeof(set_layout->binding[0])); + /* Initialize all samplers to 0 */ + memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); + uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; uint32_t dynamic_offset_count = 0; @@ -1512,6 +1526,17 @@ VkResult anv_CreateDescriptorSetLayout( break; } + if (pCreateInfo->pBinding[b].pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += array_size; + + for (uint32_t i = 0; i < array_size; i++) + set_layout->binding[b].immutable_samplers[i] = + anv_sampler_from_handle(pCreateInfo->pBinding[b].pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; + } + set_layout->shader_stages |= pCreateInfo->pBinding[b].stageFlags; } @@ -1574,6 +1599,16 @@ anv_descriptor_set_create(struct anv_device *device, */ memset(set, 0, size); + /* Go through and fill out immutable samplers if we have any */ + struct anv_descriptor *desc = set->descriptors; + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; + } + *out_set = set; return VK_SUCCESS; @@ -1659,16 +1694,21 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for (uint32_t j = 0; j < write->count; j++) { + struct anv_descriptor *desc = + &set->descriptors[write->destBinding + j]; ANV_FROM_HANDLE(anv_image_view, iview, write->pDescriptors[j].imageView); ANV_FROM_HANDLE(anv_sampler, sampler, write->pDescriptors[j].sampler); - set->descriptors[write->destBinding + j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER, - .image_view = iview, - .sampler = sampler, - }; + desc->type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER; + desc->image_view = iview; + + /* If this descriptor has an immutable sampler, we don't want + * to stomp on it. + */ + if (sampler) + desc->sampler = sampler; } break; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 34bd53cf20b..5f9a3ce5c12 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -674,6 +674,9 @@ struct anv_descriptor_set_binding_layout { /* Index into the sampler table for the associated sampler */ int16_t sampler_index; } stage[VK_SHADER_STAGE_NUM]; + + /* Immutable samplers (or NULL if no immutable samplers) */ + struct anv_sampler **immutable_samplers; }; struct anv_descriptor_set_layout { @@ -689,7 +692,7 @@ struct anv_descriptor_set_layout { /* Number of dynamic offsets used by this descriptor set */ uint16_t dynamic_offset_count; - /* Don't use this directly */ + /* Bindings in this descriptor set */ struct anv_descriptor_set_binding_layout binding[0]; }; -- cgit v1.2.3 From 995d9c4ac7fb046e01196cec308ebe10002a28da Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 17 Oct 2015 10:35:27 -0700 Subject: anv/pipeline: Remove the ViewportState finishme We should be doing everything we need to with the viewport state --- src/vulkan/anv_pipeline.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index c7f0e6b1b4b..7fd8c100568 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -292,8 +292,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, if (pCreateInfo->pTessellationState) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - if (pCreateInfo->pViewportState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); if (pCreateInfo->pMultisampleState && pCreateInfo->pMultisampleState->rasterSamples > 1) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); -- cgit v1.2.3 From 7e6959402de45a668ec4efa3cdac042bcfa2b349 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 15:50:45 -0700 Subject: nir/spirv: Handle builtins in OpAccessChain Previously, we were trying to handle them later when loading. However, at that point, you've already lost information and it's harder to handle certain corner-cases. In particular, if you have a shader that does gl_PerVertex.gl_Position.x = foo we have trouble because we see the .x and we don't know that we're in gl_Position. If we, instead, handle it in OpAccessChain, we have all the information we need and we can silently re-direct it to the appropreate variable. This also lets us delete some code which is a nice side-effect. --- src/glsl/nir/spirv_to_nir.c | 61 +++++++++------------------------------------ 1 file changed, 12 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a62ec6661b0..2ba8216c0b4 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -854,44 +854,6 @@ get_builtin_variable(struct vtn_builder *b, return var; } -static void -vtn_builtin_load(struct vtn_builder *b, - struct vtn_ssa_value *val, - SpvBuiltIn builtin) -{ - assert(glsl_type_is_vector_or_scalar(val->type)); - - nir_variable *var = get_builtin_variable(b, val->type, builtin); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - nir_ssa_dest_init(&load->instr, &load->dest, - glsl_get_vector_elements(val->type), NULL); - - load->variables[0] = nir_deref_var_create(load, var); - load->num_components = glsl_get_vector_elements(val->type); - nir_builder_instr_insert(&b->nb, &load->instr); - val->def = &load->dest.ssa; -} - -static void -vtn_builtin_store(struct vtn_builder *b, - struct vtn_ssa_value *val, - SpvBuiltIn builtin) -{ - assert(glsl_type_is_vector_or_scalar(val->type)); - - nir_variable *var = get_builtin_variable(b, val->type, builtin); - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - - store->variables[0] = nir_deref_var_create(store, var); - store->num_components = glsl_get_vector_elements(val->type); - store->src[0] = nir_src_for_ssa(val->def); - nir_builder_instr_insert(&b->nb, &store->instr); -} - static struct vtn_ssa_value * _vtn_variable_load(struct vtn_builder *b, nir_deref_var *src_deref, struct vtn_type *src_type, @@ -900,11 +862,6 @@ _vtn_variable_load(struct vtn_builder *b, struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = src_deref_tail->type; - if (src_type->is_builtin) { - vtn_builtin_load(b, val, src_type->builtin); - return val; - } - /* The deref tail may contain a deref to select a component of a vector (in * other words, it might not be an actual tail) so we have to save it away * here since we overwrite it later. @@ -976,11 +933,6 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, nir_deref_var *dest_deref, nir_deref *dest_deref_tail, struct vtn_ssa_value *src) { - if (dest_type->is_builtin) { - vtn_builtin_store(b, src, dest_type->builtin); - return; - } - nir_deref *old_child = dest_deref_tail->child; if (glsl_type_is_vector_or_scalar(src->type)) { @@ -1410,7 +1362,18 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, default: unreachable("Invalid type for deref"); } - tail = tail->child; + + if (deref_type->is_builtin) { + /* If we encounter a builtin, we throw away the ress of the + * access chain, jump to the builtin, and keep building. + */ + nir_variable *builtin = get_builtin_variable(b, deref_type->type, + deref_type->builtin); + val->deref = nir_deref_var_create(b, builtin); + tail = &val->deref->deref; + } else { + tail = tail->child; + } } /* For uniform blocks, we don't resolve the access chain until we -- cgit v1.2.3 From 12c30c94987f2dd2e7bad1c84d2bd7df2c7b01d8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 16:08:23 -0700 Subject: nir/spirv: Use the new nir_variable helpers --- src/glsl/nir/spirv_to_nir.c | 50 ++++++++++----------------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 2ba8216c0b4..819109385ee 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -825,28 +825,14 @@ get_builtin_variable(struct vtn_builder *b, nir_variable *var = b->builtins[builtin]; if (!var) { - var = ralloc(b->shader, nir_variable); - var->type = type; - + int location; nir_variable_mode mode; - vtn_get_builtin_location(builtin, &var->data.location, &mode); - var->data.explicit_location = true; - var->data.mode = mode; - var->name = ralloc_strdup(var, "builtin"); + vtn_get_builtin_location(builtin, &location, &mode); - switch (mode) { - case nir_var_shader_in: - exec_list_push_tail(&b->shader->inputs, &var->node); - break; - case nir_var_shader_out: - exec_list_push_tail(&b->shader->outputs, &var->node); - break; - case nir_var_system_value: - exec_list_push_tail(&b->shader->system_values, &var->node); - break; - default: - unreachable("bad builtin mode"); - } + var = nir_variable_create(b->shader, mode, type, "builtin"); + + var->data.location = location; + var->data.explicit_location = true; b->builtins[builtin] = var; } @@ -1277,26 +1263,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, if (builtin_block) break; - switch (var->data.mode) { - case nir_var_shader_in: - exec_list_push_tail(&b->shader->inputs, &var->node); - break; - case nir_var_shader_out: - exec_list_push_tail(&b->shader->outputs, &var->node); - break; - case nir_var_global: - exec_list_push_tail(&b->shader->globals, &var->node); - break; - case nir_var_local: - exec_list_push_tail(&b->impl->locals, &var->node); - break; - case nir_var_uniform: - exec_list_push_tail(&b->shader->uniforms, &var->node); - break; - case nir_var_system_value: - exec_list_push_tail(&b->shader->system_values, &var->node); - break; + if (var->data.mode == nir_var_local) { + nir_function_impl_add_variable(b->impl, var); + } else { + nir_shader_add_variable(b->shader, var); } + break; } -- cgit v1.2.3 From fba55b711ec3933f2f98de62c3ab8c741ade0cd7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 16:17:38 -0700 Subject: anv/compiler: Remove unneeded wm prog data setup As of upstream mesa changes, brw_compile_fs does this for us so there's no need to have the code in the Vulkan driver anymore. --- src/vulkan/anv_compiler.cpp | 97 --------------------------------------------- 1 file changed, 97 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 2b8e7cee9aa..af193069c6c 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -137,71 +137,6 @@ create_params_array(struct anv_pipeline *pipeline, (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; } -/** - * Return a bitfield where bit n is set if barycentric interpolation mode n - * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. - */ -unsigned -brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, - bool shade_model_flat, - bool persample_shading, - nir_shader *shader) -{ - unsigned barycentric_interp_modes = 0; - - nir_foreach_variable(var, &shader->inputs) { - enum glsl_interp_qualifier interp_qualifier = - (enum glsl_interp_qualifier) var->data.interpolation; - bool is_centroid = var->data.centroid && !persample_shading; - bool is_sample = var->data.sample || persample_shading; - bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) || - (var->data.location == VARYING_SLOT_COL1); - - /* Ignore WPOS and FACE, because they don't require interpolation. */ - if (var->data.location == VARYING_SLOT_POS || - var->data.location == VARYING_SLOT_FACE) - continue; - - /* Determine the set (or sets) of barycentric coordinates needed to - * interpolate this variable. Note that when - * brw->needs_unlit_centroid_workaround is set, centroid interpolation - * uses PIXEL interpolation for unlit pixels and CENTROID interpolation - * for lit pixels, so we need both sets of barycentric coordinates. - */ - if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) { - if (is_centroid) { - barycentric_interp_modes |= - 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; - } else if (is_sample) { - barycentric_interp_modes |= - 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC; - } - if ((!is_centroid && !is_sample) || - devinfo->needs_unlit_centroid_workaround) { - barycentric_interp_modes |= - 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; - } - } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH || - (!(shade_model_flat && is_gl_Color) && - interp_qualifier == INTERP_QUALIFIER_NONE)) { - if (is_centroid) { - barycentric_interp_modes |= - 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; - } else if (is_sample) { - barycentric_interp_modes |= - 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC; - } - if ((!is_centroid && !is_sample) || - devinfo->needs_unlit_centroid_workaround) { - barycentric_interp_modes |= - 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; - } - } - } - - return barycentric_interp_modes; -} - static void brw_vs_populate_key(struct brw_context *brw, struct brw_vertex_program *vp, @@ -504,25 +439,6 @@ void brw_wm_populate_key(struct brw_context *brw, ctx->DrawBuffer = NULL; } -static uint8_t -computed_depth_mode(struct gl_fragment_program *fp) -{ - if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - switch (fp->FragDepthLayout) { - case FRAG_DEPTH_LAYOUT_NONE: - case FRAG_DEPTH_LAYOUT_ANY: - return BRW_PSCDEPTH_ON; - case FRAG_DEPTH_LAYOUT_GREATER: - return BRW_PSCDEPTH_ON_GE; - case FRAG_DEPTH_LAYOUT_LESS: - return BRW_PSCDEPTH_ON_LE; - case FRAG_DEPTH_LAYOUT_UNCHANGED: - return BRW_PSCDEPTH_OFF; - } - } - return BRW_PSCDEPTH_OFF; -} - static bool really_do_wm_prog(struct brw_context *brw, struct gl_shader_program *prog, @@ -540,23 +456,10 @@ really_do_wm_prog(struct brw_context *brw, memset(prog_data, 0, sizeof(*prog_data)); - /* key->alpha_test_func means simulating alpha testing via discards, - * so the shader definitely kills pixels. - */ - prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; - - prog_data->computed_depth_mode = computed_depth_mode(&fp->program); - create_params_array(pipeline, fs, &prog_data->base); anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); anv_nir_apply_pipeline_layout(fs->Program->nir, pipeline->layout); - prog_data->barycentric_interp_modes = - brw_compute_barycentric_interp_modes(brw->intelScreen->devinfo, - key->flat_shade, - key->persample_shading, - fp->program.Base.nir); - set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_FRAGMENT); /* This needs to come after shader time and pull constant entries, but we -- cgit v1.2.3 From 661d0db0778f6bcb01ab2d59f7867aefd5dd3e18 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 16:26:16 -0700 Subject: anv/compiler: Delete legacy clipping code This is a Vulkan driver. We don't need legacy clipping stuff and, even if we did, we don't plan on supporting pre-Sandybridge anyway. --- src/vulkan/anv_compiler.cpp | 37 ++----------------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index af193069c6c..3624563c879 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -198,44 +198,11 @@ really_do_vs_prog(struct brw_context *brw, &prog_data->base.base); anv_nir_apply_pipeline_layout(vs->Program->nir, pipeline->layout); - GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data->inputs_read = vp->program.Base.InputsRead; - if (key->copy_edgeflag) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); - prog_data->inputs_read |= VERT_BIT_EDGEFLAG; - } - - if (brw->gen < 6) { - /* Put dummy slots into the VUE for the SF to put the replaced - * point sprite coords in. We shouldn't need these dummy slots, - * which take up precious URB space, but it would mean that the SF - * doesn't get nice aligned pairs of input coords into output - * coords, which would be a pain to handle. - */ - for (int i = 0; i < 8; i++) { - if (key->point_coord_replace & (1 << i)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); - } - - /* if back colors are written, allocate slots for front colors too */ - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); - } - - /* In order for legacy clipping to work, we need to populate the clip - * distance varying slots whenever clipping is enabled, even if the vertex - * shader doesn't write to gl_ClipDistance. - */ - if (key->nr_userclip_plane_consts) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); - } - brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data->base.vue_map, outputs_written, + &prog_data->base.vue_map, + vp->program.Base.OutputsWritten, prog ? prog->SeparateShader : false); set_binding_table_layout(&prog_data->base.base, pipeline, -- cgit v1.2.3 From 27ca9ca4e1ca2e98e073df303e4dbfa3a527b206 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 16:45:11 -0700 Subject: anv/compiler: Get rid of legacy shader key setup Most of the shader key setup we did was for pre-Sandybridge and the stuff for SNB+ wasn't in the key setup. That stuff still isn't there but at least we've left ourselves notes for now. --- src/vulkan/anv_compiler.cpp | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 3624563c879..87aae64942b 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -142,36 +142,11 @@ brw_vs_populate_key(struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *prog = (struct gl_program *) vp; - memset(key, 0, sizeof(*key)); - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ - key->program_string_id = vp->id; - - /* _NEW_POLYGON */ - if (brw->gen < 6) { - key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - } - - if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | - VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { - /* _NEW_LIGHT | _NEW_BUFFERS */ - key->clamp_vertex_color = ctx->Light._ClampVertexColor; - } + /* XXX: Handle vertex input work-arounds */ - /* _NEW_POINT */ - if (brw->gen < 6 && ctx->Point.PointSprite) { - for (int i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - key->point_coord_replace |= (1 << i); - } - } + /* XXX: Handle sampler_prog_key */ } static bool -- cgit v1.2.3 From 60e8439237e7f29baa297e0ebb073dc868226e65 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 16:59:29 -0700 Subject: anv/compiler: Remove irrelevant wm key setup Most of this applies to Iron Lake and prior only. While we're at it, we get rid of the legacy GL shading model code. --- src/vulkan/anv_compiler.cpp | 64 --------------------------------------------- 1 file changed, 64 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 87aae64942b..09c2edb3091 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -212,8 +212,6 @@ void brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) { struct gl_context *ctx = &brw->ctx; - GLuint lookup = 0; - GLuint line_aa; bool program_uses_dfdy = fp->program.UsesDFdy; struct gl_framebuffer draw_buffer; bool multisample_fbo; @@ -237,72 +235,10 @@ void brw_wm_populate_key(struct brw_context *brw, multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; - /* Build the index for table lookup - */ - if (brw->gen < 6) { - /* _NEW_COLOR */ - if (fp->program.UsesKill || ctx->Color.AlphaEnabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) - lookup |= IZ_PS_COMPUTES_DEPTH_BIT; - - /* _NEW_DEPTH */ - if (ctx->Depth.Test) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; - - /* _NEW_STENCIL | _NEW_BUFFERS */ - if (ctx->Stencil._Enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - - if (ctx->Stencil.WriteMask[0] || - ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; - } - key->iz_lookup = lookup; - } - - line_aa = AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (ctx->Line.SmoothFlag) { - if (brw->reduced_primitive == GL_LINES) { - line_aa = AA_ALWAYS; - } - else if (brw->reduced_primitive == GL_TRIANGLES) { - if (ctx->Polygon.FrontMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if (ctx->Polygon.BackMode == GL_LINE || - (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_BACK)) - line_aa = AA_ALWAYS; - } - else if (ctx->Polygon.BackMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if ((ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT)) - line_aa = AA_ALWAYS; - } - } - } - - key->line_aa = line_aa; - /* _NEW_HINT */ key->high_quality_derivatives = ctx->Hint.FragmentShaderDerivative == GL_NICEST; - if (brw->gen < 6) - key->stats_wm = brw->stats_wm; - - /* _NEW_LIGHT */ - key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ key->clamp_fragment_color = ctx->Color._ClampFragmentColor; -- cgit v1.2.3 From 0d84a0d58b977639395e848ced05bb0c959bd1a2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 19 Oct 2015 08:37:03 -0700 Subject: vk/meta: Add required multisample state to pipeline The Vulkan spec (20 Oct 2015, git-aa308cb) requires that VkGraphicsPipelineCreateInfo::pMultisampleState not be NULL. --- src/vulkan/anv_meta.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 76b8c4173e6..c280ea0ef42 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -247,6 +247,12 @@ anv_device_init_meta_clear_state(struct anv_device *device) .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_CCW }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .depthTestEnable = true, @@ -661,6 +667,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_CCW }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, -- cgit v1.2.3 From 81f8b82fc866aeadcbedce12395605918c105317 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 19 Oct 2015 08:44:38 -0700 Subject: vk/meta: Add required renderpass to pipeline The Vulkan spec (20 Oct 2015, git-aa308cb) requires that VkGraphicsPipelineCreateInfo::renderPass be a valid handle. To satisfy that, define a static dummy render pass used for all meta operations. --- src/vulkan/anv_meta.c | 6 ++++++ src/vulkan/anv_private.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c280ea0ef42..09d035b0b07 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -30,6 +30,8 @@ #include "anv_private.h" #include "anv_nir_builder.h" +struct anv_render_pass anv_meta_dummy_renderpass = {0}; + static nir_shader * build_nir_vertex_shader(bool attr_flat) { @@ -293,6 +295,8 @@ anv_device_init_meta_clear_state(struct anv_device *device) }, }, .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, }, &(struct anv_graphics_pipeline_create_info) { .use_repclear = true, @@ -698,6 +702,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, .flags = 0, .layout = device->meta_state.blit.pipeline_layout, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, }; const struct anv_graphics_pipeline_create_info anv_pipeline_info = { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5f9a3ce5c12..68598a9dd49 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1395,6 +1395,8 @@ struct anv_render_pass { struct anv_subpass subpasses[0]; }; +extern struct anv_render_pass anv_meta_dummy_renderpass; + struct anv_query_pool_slot { uint64_t begin; uint64_t end; -- cgit v1.2.3 From 855180b3d91d1749ca5d6bbf90534ed4e3bf47d1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 20 Oct 2015 10:52:57 -0700 Subject: anv: Define anv_validate macro If a block of code is annotated with anv_validate, then the block runs only in debug builds. --- src/vulkan/anv_private.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 68598a9dd49..f03620f92ec 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -164,6 +164,16 @@ void anv_loge_v(const char *format, va_list va); #define anv_assert(x) #endif +/** + * If a block of code is annotated with anv_validate, then the block runs only + * in debug builds. + */ +#ifdef DEBUG +#define anv_validate if (1) +#else +#define anv_validate if (0) +#endif + void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); void anv_abortfv(const char *format, va_list va) anv_noreturn; -- cgit v1.2.3 From b51468b519f447c8e0afd492ce09d7c9485e222b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 16 Oct 2015 20:31:39 -0700 Subject: anv/pipeline: Validate VkGraphicsPipelineCreateInfo The Vulkan spec (20 Oct 2015, git-aa308cb) states that some fields of VkGraphicsPipelineCreateInfo are required under certain conditions. Add a new function, anv_pipeline_validate_create_info() that asserts the requirements hold. The assertions helped me discover bugs in Crucible and anv_meta.c. --- src/vulkan/anv_pipeline.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 7fd8c100568..28cccaa3d25 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -260,6 +260,51 @@ anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, pipeline->dynamic_state_mask = states; } +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterState); + assert(info->pMultisampleState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESS_CONTROL: + case VK_SHADER_STAGE_TESS_EVALUATION: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + VkResult anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -267,6 +312,10 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, { VkResult result; + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); -- cgit v1.2.3 From 2484d1a01fff6127b45280ee9bfbd1bbfaa425db Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 19 Oct 2015 11:39:30 -0700 Subject: anv/pipeline: Fix requirement for depthstencil state The Vulkan spec allows VkGraphicsPipelineCreateInfo::pDepthStencilState to be NULL when the pipeline's subpass contains no depthstencil attachment (see spec quote below). anv_pipeline_init_dynamic_state() required it unconditionally. This path fixes anv_pipeline_init_dynamic_state() to access pDepthStencilState only when there is a depthstencil attachment. From the Vulkan spec (20 Oct 2015, git-aa308cb) pDepthStencilState [...] may only be NULL if renderPass and subpass specify a subpass that has no depth/stencil attachment. --- src/vulkan/anv_pipeline.c | 71 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 28cccaa3d25..41a4d06403b 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -182,6 +182,10 @@ anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { uint32_t states = ANV_DYNAMIC_STATE_DIRTY_MASK; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; if (pCreateInfo->pDynamicState) { /* Remove all of the states that are marked as dynamic */ @@ -225,36 +229,49 @@ anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, pCreateInfo->pColorBlendState->blendConst, 4); } - if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->depth_bounds.min = - pCreateInfo->pDepthStencilState->minDepthBounds; - dynamic->depth_bounds.max = - pCreateInfo->pDepthStencilState->maxDepthBounds; - } + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_compare_mask.front = - pCreateInfo->pDepthStencilState->front.stencilCompareMask; - dynamic->stencil_compare_mask.back = - pCreateInfo->pDepthStencilState->back.stencilCompareMask; - } + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.stencilCompareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.stencilCompareMask; + } - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_write_mask.front = - pCreateInfo->pDepthStencilState->front.stencilWriteMask; - dynamic->stencil_write_mask.back = - pCreateInfo->pDepthStencilState->back.stencilWriteMask; - } + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.stencilWriteMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.stencilWriteMask; + } - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_reference.front = - pCreateInfo->pDepthStencilState->front.stencilReference; - dynamic->stencil_reference.back = - pCreateInfo->pDepthStencilState->back.stencilReference; + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.stencilReference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.stencilReference; + } } pipeline->dynamic_state_mask = states; -- cgit v1.2.3 From 4d4e559b6a26d2942c2a24cc81588adc2482f415 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 16 Oct 2015 20:03:46 -0700 Subject: vk: Use consistent names for anv_cmd_state dirty bits Prefix all anv_cmd_state dirty bit tokens with ANV_CMD_DIRTY. For example: old -> new ANV_DYNAMIC_VIEWPORT_DIRTY -> ANV_CMD_DIRTY_DYNAMIC_VIEWPORT ANV_CMD_BUFFER_PIPELINE_DIRTY -> ANV_CMD_DIRTY_PIPELINE Change type of anv_cmd_state::dirty and ::compute_dirty from uint32_t to the self-documenting type anv_cmd_dirty_mask_t. --- src/vulkan/anv_cmd_buffer.c | 23 +++++++++++------------ src/vulkan/anv_meta.c | 2 +- src/vulkan/anv_pipeline.c | 2 +- src/vulkan/anv_private.h | 32 +++++++++++++++++--------------- src/vulkan/gen7_cmd_buffer.c | 32 ++++++++++++++++---------------- src/vulkan/gen8_cmd_buffer.c | 34 +++++++++++++++++----------------- 6 files changed, 63 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index b8783f9e212..78c7635ef6d 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -312,14 +312,14 @@ void anv_CmdBindPipeline( switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; break; case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer->state.pipeline = pipeline; cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; /* Apply the dynamic state from the pipeline */ @@ -346,7 +346,7 @@ void anv_CmdSetViewport( memcpy(cmd_buffer->state.dynamic.viewport.viewports, pViewports, viewportCount * sizeof(*pViewports)); - cmd_buffer->state.dirty |= ANV_DYNAMIC_VIEWPORT_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; } void anv_CmdSetScissor( @@ -360,7 +360,7 @@ void anv_CmdSetScissor( memcpy(cmd_buffer->state.dynamic.scissor.scissors, pScissors, scissorCount * sizeof(*pScissors)); - cmd_buffer->state.dirty |= ANV_DYNAMIC_SCISSOR_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; } void anv_CmdSetLineWidth( @@ -370,8 +370,7 @@ void anv_CmdSetLineWidth( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); cmd_buffer->state.dynamic.line_width = lineWidth; - - cmd_buffer->state.dirty |= ANV_DYNAMIC_LINE_WIDTH_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; } void anv_CmdSetDepthBias( @@ -386,7 +385,7 @@ void anv_CmdSetDepthBias( cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; cmd_buffer->state.dynamic.depth_bias.slope_scaled = slopeScaledDepthBias; - cmd_buffer->state.dirty |= ANV_DYNAMIC_DEPTH_BIAS_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; } void anv_CmdSetBlendConstants( @@ -398,7 +397,7 @@ void anv_CmdSetBlendConstants( memcpy(cmd_buffer->state.dynamic.blend_constants, blendConst, sizeof(float) * 4); - cmd_buffer->state.dirty |= ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; } void anv_CmdSetDepthBounds( @@ -411,7 +410,7 @@ void anv_CmdSetDepthBounds( cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; - cmd_buffer->state.dirty |= ANV_DYNAMIC_DEPTH_BOUNDS_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; } void anv_CmdSetStencilCompareMask( @@ -426,7 +425,7 @@ void anv_CmdSetStencilCompareMask( if (faceMask & VK_STENCIL_FACE_BACK_BIT) cmd_buffer->state.dynamic.stencil_compare_mask.back = stencilCompareMask; - cmd_buffer->state.dirty |= ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; } void anv_CmdSetStencilWriteMask( @@ -441,7 +440,7 @@ void anv_CmdSetStencilWriteMask( if (faceMask & VK_STENCIL_FACE_BACK_BIT) cmd_buffer->state.dynamic.stencil_write_mask.back = stencilWriteMask; - cmd_buffer->state.dirty |= ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; } void anv_CmdSetStencilReference( @@ -456,7 +455,7 @@ void anv_CmdSetStencilReference( if (faceMask & VK_STENCIL_FACE_BACK_BIT) cmd_buffer->state.dynamic.stencil_reference.back = stencilReference; - cmd_buffer->state.dirty |= ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } void anv_CmdBindDescriptorSets( diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 09d035b0b07..591a89928a6 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -345,7 +345,7 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, sizeof(state->old_vertex_bindings)); cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 41a4d06403b..fda382eee19 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -181,7 +181,7 @@ static void anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { - uint32_t states = ANV_DYNAMIC_STATE_DIRTY_MASK; + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f03620f92ec..aadedb8b53a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -797,19 +797,21 @@ struct anv_buffer { VkDeviceSize offset; }; -/* The first 9 correspond to 1 << VK_DYNAMIC_STATE_FOO */ -#define ANV_DYNAMIC_VIEWPORT_DIRTY (1 << 0) -#define ANV_DYNAMIC_SCISSOR_DIRTY (1 << 1) -#define ANV_DYNAMIC_LINE_WIDTH_DIRTY (1 << 2) -#define ANV_DYNAMIC_DEPTH_BIAS_DIRTY (1 << 3) -#define ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY (1 << 4) -#define ANV_DYNAMIC_DEPTH_BOUNDS_DIRTY (1 << 5) -#define ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY (1 << 6) -#define ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY (1 << 7) -#define ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY (1 << 8) -#define ANV_DYNAMIC_STATE_DIRTY_MASK ((1 << 9) - 1) -#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 9) -#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 10) +enum anv_cmd_dirty_bits { + ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ + ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, + ANV_CMD_DIRTY_PIPELINE = 1 << 9, + ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, +}; +typedef uint32_t anv_cmd_dirty_mask_t; struct anv_vertex_binding { struct anv_buffer * buffer; @@ -892,8 +894,8 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest, struct anv_cmd_state { uint32_t current_pipeline; uint32_t vb_dirty; - uint32_t dirty; - uint32_t compute_dirty; + anv_cmd_dirty_mask_t dirty; + anv_cmd_dirty_mask_t compute_dirty; VkShaderStageFlags descriptors_dirty; VkShaderStageFlags push_constants_dirty; uint32_t scratch_size; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 0106aa74aa6..a99881f2eb9 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -128,7 +128,7 @@ void gen7_CmdBindIndexBuffer( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; cmd_buffer->state.gen7.index_buffer = buffer; cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; cmd_buffer->state.gen7.index_offset = offset; @@ -185,11 +185,11 @@ gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.current_pipeline = GPGPU; } - if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { /* FIXME: figure out descriptors for gen7 */ result = gen7_flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); @@ -242,7 +242,7 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } } - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, @@ -256,15 +256,15 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.descriptors_dirty) anv_flush_descriptor_sets(cmd_buffer); - if (cmd_buffer->state.dirty & ANV_DYNAMIC_VIEWPORT_DIRTY) + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) anv_cmd_buffer_emit_viewport(cmd_buffer); - if (cmd_buffer->state.dirty & ANV_DYNAMIC_SCISSOR_DIRTY) + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) anv_cmd_buffer_emit_scissor(cmd_buffer); - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_DYNAMIC_LINE_WIDTH_DIRTY | - ANV_DYNAMIC_DEPTH_BIAS_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; @@ -285,8 +285,8 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); } - if (cmd_buffer->state.dirty & (ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY | - ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GEN7_COLOR_CALC_STATE_length, 64); @@ -307,9 +307,9 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .ColorCalcStatePointer = cc_state.offset); } - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY | - ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { @@ -340,8 +340,8 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } if (cmd_buffer->state.gen7.index_buffer && - cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; uint32_t offset = cmd_buffer->state.gen7.index_offset; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index a1db0170c09..f626cad2831 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -104,7 +104,7 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } } - if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, @@ -121,14 +121,14 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.push_constants_dirty) gen8_cmd_buffer_flush_push_constants(cmd_buffer); - if (cmd_buffer->state.dirty & ANV_DYNAMIC_VIEWPORT_DIRTY) + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) anv_cmd_buffer_emit_viewport(cmd_buffer); - if (cmd_buffer->state.dirty & ANV_DYNAMIC_SCISSOR_DIRTY) + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) anv_cmd_buffer_emit_scissor(cmd_buffer); - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_DYNAMIC_LINE_WIDTH_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { uint32_t sf_dw[GEN8_3DSTATE_SF_length]; struct GEN8_3DSTATE_SF sf = { GEN8_3DSTATE_SF_header, @@ -138,8 +138,8 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf); } - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_DYNAMIC_DEPTH_BIAS_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; @@ -158,8 +158,8 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) pipeline->gen8.raster); } - if (cmd_buffer->state.dirty & (ANV_DYNAMIC_BLEND_CONSTANTS_DIRTY | - ANV_DYNAMIC_STENCIL_REFERENCE_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GEN8_COLOR_CALC_STATE_length, 64); @@ -181,9 +181,9 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .ColorCalcStatePointerValid = true); } - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_DYNAMIC_STENCIL_COMPARE_MASK_DIRTY | - ANV_DYNAMIC_STENCIL_WRITE_MASK_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { @@ -210,8 +210,8 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) pipeline->gen8.wm_depth_stencil); } - if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | - ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { anv_batch_emit_merge(&cmd_buffer->batch, cmd_buffer->state.state_vf, pipeline->gen8.vf); } @@ -331,7 +331,7 @@ void gen8_CmdBindIndexBuffer( }; GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, .IndexFormat = vk_to_gen_index_type[indexType], @@ -394,11 +394,11 @@ gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.current_pipeline = GPGPU; } - if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { result = gen8_flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; -- cgit v1.2.3 From 5f5224f2566b091835989759f8d9f1277a192021 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2015 13:01:15 -0700 Subject: anv/meta: Use the actual render pass for creating blit pipelines --- src/vulkan/anv_meta.c | 80 ++++++++++++++++++++++++------------------------ src/vulkan/anv_private.h | 2 ++ 2 files changed, 42 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 591a89928a6..8bfab1f8323 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -529,6 +529,42 @@ meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, static void anv_device_init_meta_blit_state(struct anv_device *device) { + anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .pPreserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &device->meta_state.blit.render_pass); + /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need * to provide GLSL source for the vertex shader so that the compiler @@ -702,7 +738,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, .flags = 0, .layout = device->meta_state.blit.pipeline_layout, - .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .renderPass = device->meta_state.blit.render_pass, .subpass = 0, }; @@ -862,47 +898,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .layers = 1 }, &fb); - VkRenderPass pass; - anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = dest_iview->format->vk_format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputCount = 0, - .colorCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .depthStencilAttachment = (VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveCount = 1, - .pPreserveAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - }, - .dependencyCount = 0, - }, &pass); - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = pass, + .renderPass = device->meta_state.blit.render_pass, .framebuffer = fb, .renderArea = { .offset = { dest_offset.x, dest_offset.y }, @@ -958,7 +957,6 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, */ anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); anv_DestroyFramebuffer(anv_device_to_handle(device), fb); - anv_DestroyRenderPass(anv_device_to_handle(device), pass); } static void @@ -1809,6 +1807,8 @@ anv_device_finish_meta(struct anv_device *device) device->meta_state.clear.pipeline); /* Blit */ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass); anv_DestroyPipeline(anv_device_to_handle(device), device->meta_state.blit.pipeline_2d_src); anv_DestroyPipeline(anv_device_to_handle(device), diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index aadedb8b53a..c0d9373b643 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -451,6 +451,8 @@ struct anv_meta_state { } clear; struct { + VkRenderPass render_pass; + /** Pipeline that blits from a 2D image. */ VkPipeline pipeline_2d_src; -- cgit v1.2.3 From b3a344db3035aa68361c934d08054eadfeab4e1f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 17:05:40 -0700 Subject: anv/compiler: Get rid of GS support. The geometry shader support is currently completely untested. As I go through and re-factor the compiler, I'd rather not refactor dead code that I don't have a way to know if I broke. Let's just remove it for now. We can put it back in easily enough later and then we'll do it properly. --- src/vulkan/anv_compiler.cpp | 279 +------------------------------------------- 1 file changed, 3 insertions(+), 276 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 09c2edb3091..0fbc2d1ecba 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -371,265 +371,6 @@ really_do_wm_prog(struct brw_context *brw, return true; } -bool -anv_codegen_gs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_geometry_program *gp, - struct brw_gs_prog_key *key, - struct anv_pipeline *pipeline) -{ - struct brw_gs_compile c; - - memset(&c, 0, sizeof(c)); - c.key = *key; - c.gp = gp; - - c.prog_data.include_primitive_id = - (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; - - c.prog_data.invocations = gp->program.Invocations; - - set_binding_table_layout(&c.prog_data.base.base, - pipeline, VK_SHADER_STAGE_GEOMETRY); - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - * - * Note: param_count needs to be num_uniform_components * 4, since we add - * padding around uniform values below vec4 size, so the worst case is that - * every uniform is a float which gets padded to the size of a vec4. - */ - struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - int param_count = gp->program.Base.nir->num_uniforms * 4; - - c.prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - c.prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - c.prog_data.base.base.image_param = - rzalloc_array(NULL, struct brw_image_param, gs->NumImages); - c.prog_data.base.base.nr_params = param_count; - c.prog_data.base.base.nr_image_params = gs->NumImages; - - brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, - &c.prog_data.base.base, false); - - if (brw->gen >= 8) { - c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 : - nir_gs_count_vertices(gp->program.Base.nir); - } - - if (brw->gen >= 7) { - if (gp->program.OutputType == GL_POINTS) { - /* When the output type is points, the geometry shader may output data - * to multiple streams, and EndPrimitive() has no effect. So we - * configure the hardware to interpret the control data as stream ID. - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; - - /* We only have to emit control bits if we are using streams */ - if (prog->Geom.UsesStreams) - c.control_data_bits_per_vertex = 2; - else - c.control_data_bits_per_vertex = 0; - } else { - /* When the output type is triangle_strip or line_strip, EndPrimitive() - * may be used to terminate the current strip and start a new one - * (similar to primitive restart), and outputting data to multiple - * streams is not supported. So we configure the hardware to interpret - * the control data as EndPrimitive information (a.k.a. "cut bits"). - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; - - /* We only need to output control data if the shader actually calls - * EndPrimitive(). - */ - c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; - } - } else { - /* There are no control data bits in gen6. */ - c.control_data_bits_per_vertex = 0; - - /* If it is using transform feedback, enable it */ - if (prog->TransformFeedback.NumVarying) - c.prog_data.gen6_xfb_enabled = true; - else - c.prog_data.gen6_xfb_enabled = false; - } - c.control_data_header_size_bits = - gp->program.VerticesOut * c.control_data_bits_per_vertex; - - /* 1 HWORD = 32 bytes = 256 bits */ - c.prog_data.control_data_header_size_hwords = - ALIGN(c.control_data_header_size_bits, 256) / 256; - - GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; - - brw_compute_vue_map(brw->intelScreen->devinfo, - &c.prog_data.base.vue_map, outputs_written, - prog ? prog->SeparateShader : false); - - /* Compute the output vertex size. - * - * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex - * Size (p168): - * - * [0,62] indicating [1,63] 16B units - * - * Specifies the size of each vertex stored in the GS output entry - * (following any Control Header data) as a number of 128-bit units - * (minus one). - * - * Programming Restrictions: The vertex size must be programmed as a - * multiple of 32B units with the following exception: Rendering is - * disabled (as per SOL stage state) and the vertex size output by the - * GS thread is 16B. - * - * If rendering is enabled (as per SOL state) the vertex size must be - * programmed as a multiple of 32B units. In other words, the only time - * software can program a vertex size with an odd number of 16B units - * is when rendering is disabled. - * - * Note: B=bytes in the above text. - * - * It doesn't seem worth the extra trouble to optimize the case where the - * vertex size is 16B (especially since this would require special-casing - * the GEN assembly that writes to the URB). So we just set the vertex - * size to a multiple of 32B (2 vec4's) in all cases. - * - * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We - * budget that as follows: - * - * 512 bytes for varyings (a varying component is 4 bytes and - * gl_MaxGeometryOutputComponents = 128) - * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 - * bytes) - * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE - * even if it's not used) - * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots - * whenever clip planes are enabled, even if the shader doesn't - * write to gl_ClipDistance) - * 16 bytes overhead since the VUE size must be a multiple of 32 bytes - * (see below)--this causes up to 1 VUE slot to be wasted - * 400 bytes available for varying packing overhead - * - * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) - * per interpolation type, so this is plenty. - * - */ - unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16; - assert(brw->gen == 6 || - output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); - c.prog_data.output_vertex_size_hwords = - ALIGN(output_vertex_size_bytes, 32) / 32; - - /* Compute URB entry size. The maximum allowed URB entry size is 32k. - * That divides up as follows: - * - * 64 bytes for the control data header (cut indices or StreamID bits) - * 4096 bytes for varyings (a varying component is 4 bytes and - * gl_MaxGeometryTotalOutputComponents = 1024) - * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 - * bytes/vertex and gl_MaxGeometryOutputVertices is 256) - * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE - * even if it's not used) - * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots - * whenever clip planes are enabled, even if the shader doesn't - * write to gl_ClipDistance) - * 4096 bytes overhead since the VUE size must be a multiple of 32 - * bytes (see above)--this causes up to 1 VUE slot to be wasted - * 8128 bytes available for varying packing overhead - * - * Worst-case varying packing overhead is 3/4 of a varying slot per - * interpolation type, which works out to 3072 bytes, so this would allow - * us to accommodate 2 interpolation types without any danger of running - * out of URB space. - * - * In practice, the risk of running out of URB space is very small, since - * the above figures are all worst-case, and most of them scale with the - * number of output vertices. So we'll just calculate the amount of space - * we need, and if it's too large, fail to compile. - * - * The above is for gen7+ where we have a single URB entry that will hold - * all the output. In gen6, we will have to allocate URB entries for every - * vertex we emit, so our URB entries only need to be large enough to hold - * a single vertex. Also, gen6 does not have a control data header. - */ - unsigned output_size_bytes; - if (brw->gen >= 7) { - output_size_bytes = - c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; - output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; - } else { - output_size_bytes = c.prog_data.output_vertex_size_hwords * 32; - } - - /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, - * which comes before the control header. - */ - if (brw->gen >= 8) - output_size_bytes += 32; - - assert(output_size_bytes >= 1); - int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; - if (brw->gen == 6) - max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; - if (output_size_bytes > max_output_size_bytes) - return false; - - - /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and - * a multiple of 128 bytes in gen6. - */ - if (brw->gen >= 7) - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - else - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; - - /* FIXME: Need to pull this from nir shader. */ - c.prog_data.output_topology = _3DPRIM_TRISTRIP; - - /* The GLSL linker will have already matched up GS inputs and the outputs - * of prior stages. The driver does extend VS outputs in some cases, but - * only for legacy OpenGL or Gen4-5 hardware, neither of which offer - * geometry shader support. So we can safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location making this work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - GLbitfield64 inputs_read = - gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(brw->intelScreen->devinfo, - &c.input_vue_map, inputs_read, - prog->SeparateShader); - - /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we - * need to program a URB read length of ceiling(num_slots / 2). - */ - c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; - - void *mem_ctx = ralloc_context(NULL); - unsigned program_size; - const unsigned *program = - brw_compile_gs(brw->intelScreen->compiler, brw, &c, gp->program.Base.nir, - prog, mem_ctx, -1, &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - pipeline->gs_vec4 = upload_kernel(pipeline, program, program_size); - pipeline->gs_vertex_count = gp->program.VerticesIn; - - ralloc_free(mem_ctx); - - return true; -} - static bool brw_codegen_cs_prog(struct brw_context *brw, struct gl_shader_program *prog, @@ -1103,23 +844,9 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->vs_vec4 = NO_KERNEL; } - - if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { - struct brw_gs_prog_key gs_key; - struct gl_geometry_program *gp = (struct gl_geometry_program *) - program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; - struct brw_geometry_program *bgp = brw_geometry_program(gp); - - success = anv_codegen_gs_prog(brw, program, bgp, &gs_key, pipeline); - fail_if(!success, "do_gs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, - &pipeline->gs_prog_data.base.base); - - if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - pipeline->gs_vec4 = NO_KERNEL; - } + /* Geometry shaders not yet supported */ + anv_assert(pipeline->shaders[VK_SHADER_STAGE_GEOMETRY] == NULL); + pipeline->gs_vec4 = NO_KERNEL; if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { struct brw_wm_prog_key wm_key; -- cgit v1.2.3 From 611ace68617dd12f91f8ab67e199c0cf9ba4d41b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 18:15:45 -0700 Subject: anv/compiler: Remove more pre-SNB shader key setup --- src/vulkan/anv_compiler.cpp | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 0fbc2d1ecba..14d3fad23e7 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -294,23 +294,6 @@ void brw_wm_populate_key(struct brw_context *brw, ctx->Multisample.Enabled && (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & - BRW_FS_VARYING_INPUT_MASK) > 16) - key->input_slots_valid = brw->vue_map_geom_out.slots_valid; - - - /* _NEW_COLOR | _NEW_BUFFERS */ - /* Pre-gen6, the hardware alpha test always used each render - * target's alpha to do alpha test, as opposed to render target 0's alpha - * like GL requires. Fix that by building the alpha test into the - * shader, and we'll skip enabling the fixed function alpha test. - */ - if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { - key->alpha_test_func = ctx->Color.AlphaFunc; - key->alpha_test_ref = ctx->Color.AlphaRef; - } - /* The unique fragment program ID */ key->program_string_id = fp->id; -- cgit v1.2.3 From bf6407079bfc221cbac559dcddd10c6f69037982 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 18:46:29 -0700 Subject: i965: Split process_nir into two haves; pre- and post- --- src/mesa/drivers/dri/i965/brw_nir.c | 39 +++++++++++++++++++++++-------------- src/mesa/drivers/dri/i965/brw_nir.h | 11 +++++++---- src/vulkan/anv_compiler.cpp | 8 +++++--- 3 files changed, 36 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index dc497770914..9a33188cb5c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -200,7 +200,14 @@ brw_create_nir(struct brw_context *brw, } nir_validate_shader(nir); - brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar); + brw_preprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); + + if (shader_prog) { + nir_lower_samplers(nir, shader_prog); + nir_validate_shader(nir); + } + + brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); static GLuint msg_id = 0; _mesa_gl_debug(&brw->ctx, &msg_id, @@ -208,23 +215,21 @@ brw_create_nir(struct brw_context *brw, MESA_DEBUG_TYPE_OTHER, MESA_DEBUG_SEVERITY_NOTIFICATION, "%s NIR shader:\n", - _mesa_shader_stage_to_abbrev(stage)); + _mesa_shader_stage_to_abbrev(nir->stage)); return nir; } void -brw_process_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - const struct gl_shader_program *shader_prog, - gl_shader_stage stage, bool is_scalar) +brw_preprocess_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar) { - bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); static const nir_lower_tex_options tex_options = { .lower_txp = ~0, }; - if (stage == MESA_SHADER_GEOMETRY) { + if (nir->stage == MESA_SHADER_GEOMETRY) { nir_lower_gs_intrinsics(nir); nir_validate_shader(nir); } @@ -249,6 +254,15 @@ brw_process_nir(nir_shader *nir, /* Get rid of split copies */ nir_optimize(nir, is_scalar); +} + +void +brw_postprocess_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar) +{ + bool debug_enabled = + (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); brw_nir_lower_inputs(nir, is_scalar); brw_nir_lower_outputs(nir, is_scalar); @@ -261,11 +275,6 @@ brw_process_nir(nir_shader *nir, nir_remove_dead_variables(nir); nir_validate_shader(nir); - if (shader_prog) { - nir_lower_samplers(nir, shader_prog); - nir_validate_shader(nir); - } - nir_lower_system_values(nir); nir_validate_shader(nir); @@ -301,7 +310,7 @@ brw_process_nir(nir_shader *nir, } fprintf(stderr, "NIR (SSA form) for %s shader:\n", - _mesa_shader_stage_to_string(stage)); + _mesa_shader_stage_to_string(nir->stage)); nir_print_shader(nir, stderr); } @@ -328,7 +337,7 @@ brw_process_nir(nir_shader *nir, if (unlikely(debug_enabled)) { fprintf(stderr, "NIR (final form) for %s shader:\n", - _mesa_shader_stage_to_string(stage)); + _mesa_shader_stage_to_string(nir->stage)); nir_print_shader(nir, stderr); } } diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 19e55527545..a6d6768795a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -86,10 +86,13 @@ enum brw_reg_type brw_type_for_nir_type(nir_alu_type type); enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type); void -brw_process_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - const struct gl_shader_program *shader_prog, - gl_shader_stage stage, bool is_scalar); +brw_preprocess_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar); +void +brw_postprocess_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar); void brw_nir_setup_glsl_uniforms(nir_shader *shader, struct gl_shader_program *shader_prog, diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 14d3fad23e7..5d74f0f131e 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -729,11 +729,13 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, } nir_validate_shader(mesa_shader->Program->nir); + brw_preprocess_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, is_scalar); + setup_nir_io(mesa_shader, mesa_shader->Program->nir); - brw_process_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, - NULL, mesa_shader->Stage, is_scalar); + brw_postprocess_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, is_scalar); mesa_shader->num_uniform_components = mesa_shader->Program->nir->num_uniforms; -- cgit v1.2.3 From 9e3615cc7de8323075b907ae95c5e65c1c3e2fd7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 20:19:09 -0700 Subject: i965: Move brw_compiler_create to brw_compiler.h --- src/mesa/drivers/dri/i965/brw_compiler.h | 3 +++ src/mesa/drivers/dri/i965/brw_shader.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 11c485d2f08..f6d5ab87be9 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -93,6 +93,9 @@ struct brw_compiler { struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + /** * Program key structures. diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index b33b08f40d7..2e47690d403 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -233,9 +233,6 @@ bool opt_predicated_break(struct backend_shader *s); extern "C" { #endif -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); - void brw_assign_common_binding_table_offsets(gl_shader_stage stage, const struct brw_device_info *devinfo, -- cgit v1.2.3 From 6fb4469588ba37ace4794b354c9fd30d18b5c9ff Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 20:21:45 -0700 Subject: anv: Move the brw_compiler from anv_compiler to physical_device --- src/vulkan/anv_compiler.cpp | 2 +- src/vulkan/anv_device.c | 13 +++++++++++++ src/vulkan/anv_private.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 5d74f0f131e..1ecd88a0331 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -460,7 +460,7 @@ anv_compiler_create(struct anv_device *device) brw_process_intel_debug_variable(); - compiler->screen->compiler = brw_compiler_create(compiler, &device->info); + compiler->screen->compiler = device->instance->physicalDevice.compiler; ctx = &compiler->brw->ctx; _mesa_init_shader_object_functions(&ctx->Driver); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 75fbe691c5f..e0bb7f6e4bc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -91,6 +91,12 @@ anv_physical_device_init(struct anv_physical_device *device, close(fd); + device->compiler = brw_compiler_create(NULL, device->info); + if (device->compiler == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + return VK_SUCCESS; fail: @@ -98,6 +104,12 @@ fail: return result; } +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + ralloc_free(device->compiler); +} + static void *default_alloc( void* pUserData, size_t size, @@ -193,6 +205,7 @@ void anv_DestroyInstance( { ANV_FROM_HANDLE(anv_instance, instance, _instance); + anv_physical_device_finish(&instance->physicalDevice); anv_finish_wsi(instance); VG(VALGRIND_DESTROY_MEMPOOL(instance)); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c0d9373b643..8c48a9c2b0a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -427,6 +427,7 @@ struct anv_physical_device { const char * name; const struct brw_device_info * info; uint64_t aperture_size; + struct brw_compiler * compiler; }; struct anv_instance { -- cgit v1.2.3 From 2d9e899e3576120f1a671c6cc38835b41269e607 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 22:05:36 -0700 Subject: nir: Add a pass to gather info from the shader This pass fills out a bunch of the fields in nir_shader_info by inspecting the shader. --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_gather_info.c | 101 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 src/glsl/nir/nir_gather_info.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 47dc628101d..05e76049764 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -32,6 +32,7 @@ NIR_FILES = \ nir/nir_control_flow_private.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ + nir/nir_gather_info.c \ nir/nir_gs_count_vertices.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 825c34805c4..9939b9e91a2 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1939,6 +1939,8 @@ void nir_lower_outputs_to_temporaries(nir_shader *shader); void nir_lower_outputs_to_temporaries(nir_shader *shader); +void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); diff --git a/src/glsl/nir/nir_gather_info.c b/src/glsl/nir/nir_gather_info.c new file mode 100644 index 00000000000..4893945c18b --- /dev/null +++ b/src/glsl/nir/nir_gather_info.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +static void +gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) +{ + switch (instr->intrinsic) { + case nir_intrinsic_discard: + assert(shader->stage == MESA_SHADER_FRAGMENT); + shader->info.fs.uses_discard = true; + break; + + case nir_intrinsic_load_front_face: + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_vertex_id_zero_base: + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_pos: + case nir_intrinsic_load_sample_mask_in: + case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_invocation_id: + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_num_work_groups: + shader->info.system_values_read |= + (1 << nir_system_value_from_intrinsic(instr->intrinsic)); + break; + default: + break; + } +} + +static void +gather_tex_info(nir_tex_instr *instr, nir_shader *shader) +{ + if (instr->op == nir_texop_tg4) + shader->info.uses_texture_gather = true; +} + +static bool +gather_info_block(nir_block *block, void *shader) +{ + nir_foreach_instr(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: + gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader); + break; + case nir_instr_type_tex: + gather_tex_info(nir_instr_as_tex(instr), shader); + break; + case nir_instr_type_call: + assert(!"nir_shader_gather_info only works if functions are inlined"); + break; + default: + break; + } + } + + return true; +} + +void +nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) +{ + shader->info.inputs_read = 0; + foreach_list_typed(nir_variable, var, node, &shader->inputs) + shader->info.inputs_read |= (1ull << var->data.location); + + shader->info.outputs_written = 0; + foreach_list_typed(nir_variable, var, node, &shader->outputs) + shader->info.outputs_written |= (1ull << var->data.location); + + shader->info.system_values_read = 0; + foreach_list_typed(nir_variable, var, node, &shader->system_values) + shader->info.system_values_read |= (1ull << var->data.location); + + nir_foreach_block(entrypoint, gather_info_block, shader); +} -- cgit v1.2.3 From a71e614d33e8d869bbaced8948349a7180783ab7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 22:06:59 -0700 Subject: anv: Completely rework shader compilation Now that we have a decent interface in upstream mesa, we can get rid of all our hacks. As of this commit, we no longer use any fake GL state objects and all of shader compilation is moved into anv_pipeline.c. This should make way for actually implementing a shader cache one of these days. As a nice side-benifit, this commit also gains us an extra 300 passing CTS tests because we're actually filling out the texture swizzle information for vertex shaders. --- src/vulkan/Makefile.am | 7 +- src/vulkan/anv_compiler.cpp | 891 -------------------------------------------- src/vulkan/anv_device.c | 25 +- src/vulkan/anv_pipeline.c | 646 ++++++++++++++++++++++++++++++-- src/vulkan/anv_private.h | 12 +- src/vulkan/gen8_pipeline.c | 22 +- 6 files changed, 661 insertions(+), 942 deletions(-) delete mode 100644 src/vulkan/anv_compiler.cpp (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 5abbd379b54..8538046e567 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -55,14 +55,10 @@ libvulkan_la_CFLAGS = \ -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g \ -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init -libvulkan_la_CXXFLAGS = \ - -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g - VULKAN_SOURCES = \ anv_allocator.c \ anv_cmd_buffer.c \ anv_batch_chain.c \ - anv_compiler.cpp \ anv_device.c \ anv_dump.c \ anv_entrypoints.c \ @@ -124,7 +120,7 @@ libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ ../mesa/libmesa.la \ ../mesa/drivers/dri/common/libdri_test_stubs.la \ - -lpthread -ldl + -lpthread -ldl -lstdc++ # Libvulkan with dummy gem. Used for unit tests. @@ -133,7 +129,6 @@ libvulkan_test_la_SOURCES = \ anv_gem_stubs.c libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) -libvulkan_test_la_CXXFLAGS = $(libvulkan_la_CXXFLAGS) libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp deleted file mode 100644 index 1ecd88a0331..00000000000 --- a/src/vulkan/anv_compiler.cpp +++ /dev/null @@ -1,891 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "anv_private.h" -#include "anv_nir.h" - -#include -#include /* brw_new_shader_program is here */ -#include - -#include -#include -#include -#include "brw_vec4_gs_visitor.h" -#include - -#include -#include -#include -#include -#include - -/* XXX: We need this to keep symbols in nir.h from conflicting with the - * generated GEN command packing headers. We need to fix *both* to not - * define something as generic as LOAD. - */ -#undef LOAD - -#include - -#define SPIR_V_MAGIC_NUMBER 0x07230203 - -static void -fail_if(int cond, const char *format, ...) -{ - va_list args; - - if (!cond) - return; - - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - - exit(1); -} - -static VkResult -set_binding_table_layout(struct brw_stage_prog_data *prog_data, - struct anv_pipeline *pipeline, uint32_t stage) -{ - unsigned bias; - if (stage == VK_SHADER_STAGE_FRAGMENT) - bias = MAX_RTS; - else - bias = 0; - - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.image_start = bias; - - return VK_SUCCESS; -} - -static uint32_t -upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) -{ - struct anv_state state = - anv_state_stream_alloc(&pipeline->program_stream, size, 64); - - assert(size < pipeline->program_stream.block_pool->block_size); - - memcpy(state.map, data, size); - - return state.offset; -} - -static void -create_params_array(struct anv_pipeline *pipeline, - struct gl_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage); - unsigned num_params = 0; - - if (shader->num_uniform_components) { - /* If the shader uses any push constants at all, we'll just give - * them the maximum possible number - */ - num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } - - if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - num_params += MAX_DYNAMIC_BUFFERS; - - if (num_params == 0) - return; - - prog_data->param = (const gl_constant_value **) - anv_device_alloc(pipeline->device, - num_params * sizeof(gl_constant_value *), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); - - /* We now set the param values to be offsets into a - * anv_push_constant_data structure. Since the compiler doesn't - * actually dereference any of the gl_constant_value pointers in the - * params array, it doesn't really matter what we put here. - */ - struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < num_params; i++) - prog_data->param[i] = - (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; -} - -static void -brw_vs_populate_key(struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* XXX: Handle vertex input work-arounds */ - - /* XXX: Handle sampler_prog_key */ -} - -static bool -really_do_vs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) -{ - GLuint program_size; - const GLuint *program; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - void *mem_ctx; - struct gl_shader *vs = NULL; - - if (prog) - vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; - - memset(prog_data, 0, sizeof(*prog_data)); - - mem_ctx = ralloc_context(NULL); - - create_params_array(pipeline, vs, &prog_data->base.base); - anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, - &prog_data->base.base); - anv_nir_apply_pipeline_layout(vs->Program->nir, pipeline->layout); - - prog_data->inputs_read = vp->program.Base.InputsRead; - - brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data->base.vue_map, - vp->program.Base.OutputsWritten, - prog ? prog->SeparateShader : false); - - set_binding_table_layout(&prog_data->base.base, pipeline, - VK_SHADER_STAGE_VERTEX); - - /* Emit GEN4 code. - */ - program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, - key, prog_data, vs->Program->nir, NULL, false, -1, - &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - const uint32_t offset = upload_kernel(pipeline, program, program_size); - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = offset; - pipeline->vs_vec4 = NO_KERNEL; - } else { - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = offset; - } - - ralloc_free(mem_ctx); - - return true; -} - -void brw_wm_populate_key(struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - bool program_uses_dfdy = fp->program.UsesDFdy; - struct gl_framebuffer draw_buffer; - bool multisample_fbo; - - memset(key, 0, sizeof(*key)); - - for (int i = 0; i < MAX_SAMPLERS; i++) { - /* Assume color sampler, no swizzling. */ - key->tex.swizzles[i] = SWIZZLE_XYZW; - } - - /* A non-zero framebuffer name indicates that the framebuffer was created by - * the user rather than the window system. */ - draw_buffer.Name = 1; - draw_buffer.Visual.samples = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer._NumColorDrawBuffers = 1; - draw_buffer.Width = 400; - draw_buffer.Height = 400; - ctx->DrawBuffer = &draw_buffer; - - multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; - - /* _NEW_HINT */ - key->high_quality_derivatives = - ctx->Hint.FragmentShaderDerivative == GL_NICEST; - - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ - key->clamp_fragment_color = ctx->Color._ClampFragmentColor; - - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * This is only needed for the WM_WPOSXY opcode when the fragment program - * uses the gl_FragCoord input. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * For DRI2 the origin_x/y will always be (0,0) but we still need the - * drawable height in order to invert the Y axis. - */ - if (fp->program.Base.InputsRead & VARYING_BIT_POS) { - key->drawable_height = ctx->DrawBuffer->Height; - } - - if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { - key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - } - - /* _NEW_BUFFERS */ - key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; - - /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ - key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); - - /* _NEW_BUFFERS _NEW_MULTISAMPLE */ - /* Ignore sample qualifier while computing this flag. */ - key->persample_shading = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; - if (key->persample_shading) - key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; - - key->compute_pos_offset = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && - fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; - - key->compute_sample_id = - multisample_fbo && - ctx->Multisample.Enabled && - (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); - - /* The unique fragment program ID */ - key->program_string_id = fp->id; - - ctx->DrawBuffer = NULL; -} - -static bool -really_do_wm_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) -{ - void *mem_ctx = ralloc_context(NULL); - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; - struct gl_shader *fs = NULL; - unsigned int program_size; - const uint32_t *program; - - if (prog) - fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - memset(prog_data, 0, sizeof(*prog_data)); - - create_params_array(pipeline, fs, &prog_data->base); - anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); - anv_nir_apply_pipeline_layout(fs->Program->nir, pipeline->layout); - - set_binding_table_layout(&prog_data->base, pipeline, - VK_SHADER_STAGE_FRAGMENT); - /* This needs to come after shader time and pull constant entries, but we - * don't have those set up now, so just put it after the layout entries. - */ - prog_data->binding_table.render_target_start = 0; - - program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, key, - prog_data, fp->program.Base.nir, fs->Program, - -1, -1, brw->use_rep_send, &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - uint32_t offset = upload_kernel(pipeline, program, program_size); - - if (prog_data->no_8) - pipeline->ps_simd8 = NO_KERNEL; - else - pipeline->ps_simd8 = offset; - - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = offset + prog_data->prog_offset_16; - } else { - pipeline->ps_simd16 = NO_KERNEL; - } - - ralloc_free(mem_ctx); - - return true; -} - -static bool -brw_codegen_cs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_compute_program *cp, - struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) -{ - const GLuint *program; - void *mem_ctx = ralloc_context(NULL); - GLuint program_size; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - assert (cs); - - memset(prog_data, 0, sizeof(*prog_data)); - - set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); - - create_params_array(pipeline, cs, &prog_data->base); - anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); - anv_nir_apply_pipeline_layout(cs->Program->nir, pipeline->layout); - - program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, key, - prog_data, cs->Program->nir, -1, - &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - fprintf(stderr, "\n"); - - pipeline->cs_simd = upload_kernel(pipeline, program, program_size); - - ralloc_free(mem_ctx); - - return true; -} - -static void -brw_cs_populate_key(struct brw_context *brw, - struct brw_compute_program *bcp, struct brw_cs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - /* The unique compute program ID */ - key->program_string_id = bcp->id; -} - -struct anv_compiler { - struct anv_device *device; - struct intel_screen *screen; - struct brw_context *brw; - struct gl_pipeline_object pipeline; -}; - -extern "C" { - -struct anv_compiler * -anv_compiler_create(struct anv_device *device) -{ - const struct brw_device_info *devinfo = &device->info; - struct anv_compiler *compiler; - struct gl_context *ctx; - - compiler = rzalloc(NULL, struct anv_compiler); - if (compiler == NULL) - return NULL; - - compiler->screen = rzalloc(compiler, struct intel_screen); - if (compiler->screen == NULL) - goto fail; - - compiler->brw = rzalloc(compiler, struct brw_context); - if (compiler->brw == NULL) - goto fail; - - compiler->device = device; - - compiler->brw->gen = devinfo->gen; - compiler->brw->is_g4x = devinfo->is_g4x; - compiler->brw->is_baytrail = devinfo->is_baytrail; - compiler->brw->is_haswell = devinfo->is_haswell; - compiler->brw->is_cherryview = devinfo->is_cherryview; - - /* We need this at least for CS, which will check brw->max_cs_threads - * against the work group size. */ - compiler->brw->max_vs_threads = devinfo->max_vs_threads; - compiler->brw->max_hs_threads = devinfo->max_hs_threads; - compiler->brw->max_ds_threads = devinfo->max_ds_threads; - compiler->brw->max_gs_threads = devinfo->max_gs_threads; - compiler->brw->max_wm_threads = devinfo->max_wm_threads; - compiler->brw->max_cs_threads = devinfo->max_cs_threads; - compiler->brw->urb.size = devinfo->urb.size; - compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; - compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; - compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; - compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; - compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - - compiler->brw->intelScreen = compiler->screen; - compiler->screen->devinfo = &device->info; - - brw_process_intel_debug_variable(); - - compiler->screen->compiler = device->instance->physicalDevice.compiler; - - ctx = &compiler->brw->ctx; - _mesa_init_shader_object_functions(&ctx->Driver); - - /* brw_select_clip_planes() needs this for bogus reasons. */ - ctx->_Shader = &compiler->pipeline; - - return compiler; - - fail: - ralloc_free(compiler); - return NULL; -} - -void -anv_compiler_destroy(struct anv_compiler *compiler) -{ - _mesa_free_errors_data(&compiler->brw->ctx); - ralloc_free(compiler); -} - -/* From gen7_urb.c */ - -/* FIXME: Add to struct intel_device_info */ - -static const int gen8_push_size = 32 * 1024; - -static void -gen7_compute_urb_partition(struct anv_pipeline *pipeline) -{ - const struct brw_device_info *devinfo = &pipeline->device->info; - bool vs_present = pipeline->vs_simd8 != NO_KERNEL; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; - unsigned vs_entry_size_bytes = vs_size * 64; - bool gs_present = pipeline->gs_vec4 != NO_KERNEL; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; - unsigned gs_entry_size_bytes = gs_size * 64; - - /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): - * - * VS Number of URB Entries must be divisible by 8 if the VS URB Entry - * Allocation Size is less than 9 512-bit URB entries. - * - * Similar text exists for GS. - */ - unsigned vs_granularity = (vs_size < 9) ? 8 : 1; - unsigned gs_granularity = (gs_size < 9) ? 8 : 1; - - /* URB allocations must be done in 8k chunks. */ - unsigned chunk_size_bytes = 8192; - - /* Determine the size of the URB in chunks. */ - unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; - - /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; - unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - - /* Initially, assign each stage the minimum amount of URB space it needs, - * and make a note of how much additional space it "wants" (the amount of - * additional space it could actually make use of). - */ - - /* VS has a lower limit on the number of URB entries */ - unsigned vs_chunks = - ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - unsigned vs_wants = - ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - vs_chunks; - - unsigned gs_chunks = 0; - unsigned gs_wants = 0; - if (gs_present) { - /* There are two constraints on the minimum amount of URB space we can - * allocate: - * - * (1) We need room for at least 2 URB entries, since we always operate - * the GS in DUAL_OBJECT mode. - * - * (2) We can't allocate less than nr_gs_entries_granularity. - */ - gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - gs_wants = - ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - gs_chunks; - } - - /* There should always be enough URB space to satisfy the minimum - * requirements of each stage. - */ - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; - assert(total_needs <= urb_chunks); - - /* Mete out remaining space (if any) in proportion to "wants". */ - unsigned total_wants = vs_wants + gs_wants; - unsigned remaining_space = urb_chunks - total_needs; - if (remaining_space > total_wants) - remaining_space = total_wants; - if (remaining_space > 0) { - unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); - vs_chunks += vs_additional; - remaining_space -= vs_additional; - gs_chunks += remaining_space; - } - - /* Sanity check that we haven't over-allocated. */ - assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); - - /* Finally, compute the number of entries that can fit in the space - * allocated to each stage. - */ - unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; - unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; - - /* Since we rounded up when computing *_wants, this may be slightly more - * than the maximum allowed amount, so correct for that. - */ - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); - - /* Ensure that we program a multiple of the granularity. */ - nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); - nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); - - /* Finally, sanity check to make sure we have at least the minimum number - * of entries needed for each stage. - */ - assert(nr_vs_entries >= devinfo->urb.min_vs_entries); - if (gs_present) - assert(nr_gs_entries >= 2); - - /* Lay out the URB in the following order: - * - push constants - * - VS - * - GS - */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; - - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; -} - -static const struct { - uint32_t token; - gl_shader_stage stage; - const char *name; -} stage_info[] = { - { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, - { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, - { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, - { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, - { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, - { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, -}; - -struct spirv_header{ - uint32_t magic; - uint32_t version; - uint32_t gen_magic; -}; - -static void -setup_nir_io(struct gl_shader *mesa_shader, - nir_shader *shader) -{ - struct gl_program *prog = mesa_shader->Program; - foreach_list_typed(nir_variable, var, node, &shader->inputs) { - prog->InputsRead |= BITFIELD64_BIT(var->data.location); - if (shader->stage == MESA_SHADER_FRAGMENT) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog; - - fprog->InterpQualifier[var->data.location] = - (glsl_interp_qualifier)var->data.interpolation; - if (var->data.centroid) - fprog->IsCentroid |= BITFIELD64_BIT(var->data.location); - if (var->data.sample) - fprog->IsSample |= BITFIELD64_BIT(var->data.location); - } - } - - foreach_list_typed(nir_variable, var, node, &shader->outputs) { - prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); - } - - shader->info.system_values_read = 0; - foreach_list_typed(nir_variable, var, node, &shader->system_values) { - shader->info.system_values_read |= BITFIELD64_BIT(var->data.location); - } - - shader->info.inputs_read = prog->InputsRead; - shader->info.outputs_written = prog->OutputsWritten; -} - -static void -anv_compile_shader_spirv(struct anv_compiler *compiler, - struct gl_shader_program *program, - struct anv_pipeline *pipeline, uint32_t stage) -{ - struct brw_context *brw = compiler->brw; - struct anv_shader *shader = pipeline->shaders[stage]; - struct gl_shader *mesa_shader; - int name = 0; - - mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); - fail_if(mesa_shader == NULL, - "failed to create %s shader\n", stage_info[stage].name); - -#define CREATE_PROGRAM(stage) \ - &ralloc(mesa_shader, struct brw_##stage##_program)->program.Base - - bool is_scalar; - struct gl_program *prog; - switch (stage) { - case VK_SHADER_STAGE_VERTEX: - prog = CREATE_PROGRAM(vertex); - is_scalar = compiler->screen->compiler->scalar_vs; - break; - case VK_SHADER_STAGE_GEOMETRY: - prog = CREATE_PROGRAM(geometry); - is_scalar = false; - break; - case VK_SHADER_STAGE_FRAGMENT: - prog = CREATE_PROGRAM(fragment); - is_scalar = true; - break; - case VK_SHADER_STAGE_COMPUTE: - prog = CREATE_PROGRAM(compute); - is_scalar = true; - break; - default: - unreachable("Unsupported shader stage"); - } - _mesa_init_gl_program(prog, 0, 0); - _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog); - - mesa_shader->Program->Parameters = - rzalloc(mesa_shader, struct gl_program_parameter_list); - - mesa_shader->Type = stage_info[stage].token; - mesa_shader->Stage = stage_info[stage].stage; - - struct gl_shader_compiler_options *glsl_options = - &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; - - if (shader->module->nir) { - /* Some things such as our meta clear/blit code will give us a NIR - * shader directly. In that case, we just ignore the SPIR-V entirely - * and just use the NIR shader */ - mesa_shader->Program->nir = shader->module->nir; - mesa_shader->Program->nir->options = glsl_options->NirOptions; - } else { - uint32_t *spirv = (uint32_t *) shader->module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(shader->module->size % 4 == 0); - - mesa_shader->Program->nir = - spirv_to_nir(spirv, shader->module->size / 4, - stage_info[stage].stage, glsl_options->NirOptions); - } - nir_validate_shader(mesa_shader->Program->nir); - - brw_preprocess_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, is_scalar); - - setup_nir_io(mesa_shader, mesa_shader->Program->nir); - - brw_postprocess_nir(mesa_shader->Program->nir, - compiler->screen->devinfo, is_scalar); - - mesa_shader->num_uniform_components = - mesa_shader->Program->nir->num_uniforms; - - fail_if(mesa_shader->Program->nir == NULL, - "failed to translate SPIR-V to NIR\n"); - - _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders], - mesa_shader); - program->NumShaders++; -} - -static void -add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, - struct brw_stage_prog_data *prog_data) -{ - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, - [VK_SHADER_STAGE_TESS_CONTROL] = 0, - [VK_SHADER_STAGE_TESS_EVALUATION] = 0, - [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, - [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, - [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, - }; - - pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= 1 << stage; - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; -} - -int -anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) -{ - struct gl_shader_program *program; - int name = 0; - struct brw_context *brw = compiler->brw; - - pipeline->writes_point_size = false; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - brw->use_rep_send = pipeline->use_repclear; - brw->no_simd8 = pipeline->use_repclear; - - program = _mesa_new_shader_program(name); - program->Shaders = (struct gl_shader **) - calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); - fail_if(program == NULL || program->Shaders == NULL, - "failed to create program\n"); - - for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { - if (pipeline->shaders[i]) - anv_compile_shader_spirv(compiler, program, pipeline, i); - } - - for (unsigned i = 0; i < program->NumShaders; i++) { - struct gl_shader *shader = program->Shaders[i]; - program->_LinkedShaders[shader->Stage] = shader; - } - - bool success; - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { - struct brw_vs_prog_key vs_key; - struct gl_vertex_program *vp = (struct gl_vertex_program *) - program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; - struct brw_vertex_program *bvp = brw_vertex_program(vp); - - brw_vs_populate_key(brw, bvp, &vs_key); - - success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, - &pipeline->vs_prog_data.base.base); - - if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - } - - /* Geometry shaders not yet supported */ - anv_assert(pipeline->shaders[VK_SHADER_STAGE_GEOMETRY] == NULL); - pipeline->gs_vec4 = NO_KERNEL; - - if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { - struct brw_wm_prog_key wm_key; - struct gl_fragment_program *fp = (struct gl_fragment_program *) - program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; - struct brw_fragment_program *bfp = brw_fragment_program(fp); - - brw_wm_populate_key(brw, bfp, &wm_key); - - success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); - fail_if(!success, "do_wm_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, - &pipeline->wm_prog_data.base); - } - - if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { - struct brw_cs_prog_key cs_key; - struct gl_compute_program *cp = (struct gl_compute_program *) - program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; - struct brw_compute_program *bcp = brw_compute_program(cp); - - brw_cs_populate_key(brw, bcp, &cs_key); - - success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); - fail_if(!success, "brw_codegen_cs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, - &pipeline->cs_prog_data.base); - } - - _mesa_delete_shader_program(&brw->ctx, program); - - struct anv_device *device = compiler->device; - while (device->scratch_block_pool.bo.size < pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - - gen7_compute_urb_partition(pipeline); - - return 0; -} - -/* This badly named function frees the struct anv_pipeline data that the compiler - * allocates. Currently just the prog_data structs. - */ -void -anv_compiler_free(struct anv_pipeline *pipeline) -{ - for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { - if (pipeline->prog_data[stage]) { - /* We only ever set up the params array because we don't do - * non-UBO pull constants - */ - anv_device_free(pipeline->device, pipeline->prog_data[stage]->param); - } - } -} - -} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e0bb7f6e4bc..fd87c85b0ce 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -33,6 +33,22 @@ struct anv_dispatch_table dtable; +static void +compiler_debug_log(void *data, const char *fmt, ...) +{ } + +static void +compiler_perf_log(void *data, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + vfprintf(stderr, fmt, args); + + va_end(args); +} + static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, @@ -91,11 +107,15 @@ anv_physical_device_init(struct anv_physical_device *device, close(fd); + brw_process_intel_debug_variable(); + device->compiler = brw_compiler_create(NULL, device->info); if (device->compiler == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; return VK_SUCCESS; @@ -146,7 +166,6 @@ static const VkExtensionProperties device_extensions[] = { }, }; - VkResult anv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance) @@ -633,8 +652,6 @@ VkResult anv_CreateDevice( device->info = *physical_device->info; - device->compiler = anv_compiler_create(device); - anv_queue_init(device, &device->queue); anv_device_init_meta(device); @@ -658,8 +675,6 @@ void anv_DestroyDevice( { ANV_FROM_HANDLE(anv_device, device, _device); - anv_compiler_destroy(device->compiler); - anv_queue_finish(&device->queue); anv_device_finish_meta(device); diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index fda382eee19..a923017310a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -28,6 +28,12 @@ #include #include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "glsl/nir/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" // Shader functions @@ -81,16 +87,12 @@ VkResult anv_CreateShader( const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; size_t name_len = strlen(name); - if (strcmp(name, "main") != 0) { - anv_finishme("Multiple shaders per module not really supported"); - } - shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (shader == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - shader->module = module; + shader->module = module, memcpy(shader->entrypoint, name, name_len + 1); *pShader = anv_shader_to_handle(shader); @@ -108,6 +110,86 @@ void anv_DestroyShader( anv_device_free(device, shader); } +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = { + [VK_SHADER_STAGE_VERTEX] = MESA_SHADER_VERTEX, + [VK_SHADER_STAGE_TESS_CONTROL] = -1, + [VK_SHADER_STAGE_TESS_EVALUATION] = -1, + [VK_SHADER_STAGE_GEOMETRY] = MESA_SHADER_GEOMETRY, + [VK_SHADER_STAGE_FRAGMENT] = MESA_SHADER_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE, +}; + +static bool +is_scalar_shader_stage(const struct brw_compiler *compiler, VkShaderStage stage) +{ + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + return compiler->scalar_vs; + case VK_SHADER_STAGE_GEOMETRY: + return false; + case VK_SHADER_STAGE_FRAGMENT: + case VK_SHADER_STAGE_COMPUTE: + return true; + default: + unreachable("Unsupported shader stage"); + } +} + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader *shader, VkShaderStage vk_stage) +{ + if (strcmp(shader->entrypoint, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + gl_shader_stage stage = vk_shader_stage_to_mesa_stage[vk_stage]; + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + if (shader->module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = shader->module->nir; + nir->options = nir_options; + } else { + uint32_t *spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); + + nir = spirv_to_nir(spirv, shader->module->size / 4, stage, nir_options); + } + nir_validate_shader(nir); + + /* Make sure the provided shader has exactly one entrypoint and that the + * name matches the name that came in from the VkShader. + */ + nir_function_impl *entrypoint = NULL; + nir_foreach_overload(nir, overload) { + if (strcmp(shader->entrypoint, overload->function->name) == 0 && + overload->impl) { + assert(entrypoint == NULL); + entrypoint = overload->impl; + } + } + assert(entrypoint != NULL); + + brw_preprocess_nir(nir, &device->info, + is_scalar_shader_stage(compiler, vk_stage)); + + nir_shader_gather_info(nir, entrypoint); + + return nir; +} VkResult anv_CreatePipelineCache( VkDevice device, @@ -156,7 +238,6 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - anv_compiler_free(pipeline); anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); anv_state_stream_finish(&pipeline->program_stream); anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); @@ -177,6 +258,506 @@ static const uint32_t vk_to_gen_primitive_type[] = { [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 }; +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* XXX: These are needed for flipping the coordinates. Do we need to do + * this in Vulkan? + */ + key->drawable_height = 0; + key->render_to_fbo = true; /* XXX really? */ + + key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pColorBlendState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->compute_sample_id = true; + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader *shader, + VkShaderStage stage, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, shader, stage); + if (nir == NULL) + return NULL; + + bool have_push_constants = false; + nir_foreach_variable(var, &nir->uniforms) { + if (!glsl_type_is_sampler(var->type)) { + have_push_constants = true; + break; + } + } + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (have_push_constants) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS; + + if (prog_data->nr_params > 0) { + prog_data->param = (const gl_constant_value **) + anv_device_alloc(pipeline->device, + prog_data->nr_params * sizeof(gl_constant_value *), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (have_push_constants) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + anv_nir_apply_pipeline_layout(nir, pipeline->layout); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias = stage == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + brw_postprocess_nir(nir, &pipeline->device->info, + is_scalar_shader_stage(compiler, stage)); + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params; + + return nir; +} + +static uint32_t +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + VkShaderStage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + + populate_vs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_VERTEX, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + false /* XXX: Do SSO? */); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + const uint32_t offset = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + + populate_wm_prog_key(&pipeline->device->info, info, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_FRAGMENT, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t offset = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + const VkComputePipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + + populate_cs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_COMPUTE, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + static void anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) @@ -335,7 +916,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); result = anv_reloc_list_init(&pipeline->batch_relocs, device); if (result != VK_SUCCESS) { @@ -349,11 +929,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - pipeline->shaders[pCreateInfo->pStages[i].stage] = - anv_shader_from_handle(pCreateInfo->pStages[i].shader); - } - anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); if (pCreateInfo->pTessellationState) @@ -363,27 +938,44 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; - anv_compiler_run(device->compiler, pipeline); + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_vec4 = NO_KERNEL; - pipeline->ps_ksp2 = 0; - pipeline->ps_grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->pStages[i].shader); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX: + anv_pipeline_compile_vs(pipeline, pCreateInfo, shader); + break; + case VK_SHADER_STAGE_FRAGMENT: + anv_pipeline_compile_fs(pipeline, pCreateInfo, shader); + break; + default: + anv_finishme("Unsupported shader stage"); } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; - } else { - unreachable("no ps shader"); } + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; pipeline->vb_used = 0; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8c48a9c2b0a..c1a7b01e8b0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -499,7 +499,6 @@ struct anv_device { struct anv_block_pool scratch_block_pool; - struct anv_compiler * compiler; pthread_mutex_t mutex; }; @@ -1089,7 +1088,6 @@ struct anv_pipeline { uint32_t dynamic_state_mask; struct anv_dynamic_state dynamic_state; - struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; struct anv_pipeline_layout * layout; bool use_repclear; @@ -1160,6 +1158,11 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra); +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + const VkComputePipelineCreateInfo *info, + struct anv_shader *shader); + VkResult anv_graphics_pipeline_create(VkDevice device, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -1187,11 +1190,6 @@ gen8_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); -struct anv_compiler *anv_compiler_create(struct anv_device *device); -void anv_compiler_destroy(struct anv_compiler *compiler); -int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); -void anv_compiler_free(struct anv_pipeline *pipeline); - struct anv_format { const VkFormat vk_format; const char *name; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 0e2526fce20..f3f378b181a 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -568,19 +568,29 @@ VkResult gen8_compute_pipeline_create( anv_state_stream_init(&pipeline->program_stream, &device->instruction_block_pool); - memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = - anv_shader_from_handle(pCreateInfo->stage.shader); + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_vec4 = NO_KERNEL; - pipeline->use_repclear = false; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE); + ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->stage.shader); + anv_pipeline_compile_cs(pipeline, pCreateInfo, shader); - anv_compiler_run(device->compiler, pipeline); + pipeline->use_repclear = false; const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_COMPUTE], .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), .ScratchSpaceBasePointerHigh = 0, .StackSize = 0, -- cgit v1.2.3 From 44b22ca441e2c9adf9edc718a81d57352cfbb057 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2015 15:23:56 -0700 Subject: nir/spirv: Handle SpvExecutionMode --- src/glsl/nir/spirv_to_nir.c | 86 +++++++++++++++++++++++++++++++++++-- src/glsl/nir/spirv_to_nir_private.h | 1 + 2 files changed, 84 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 819109385ee..52bc335637f 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -780,13 +780,18 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, var->data.descriptor_set = dec->literals[0]; break; case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + nir_variable_mode mode; - vtn_get_builtin_location(dec->literals[0], &var->data.location, - &mode); + vtn_get_builtin_location(builtin, &var->data.location, &mode); var->data.explicit_location = true; var->data.mode = mode; if (mode == nir_var_shader_in || mode == nir_var_system_value) var->data.read_only = true; + + if (builtin == SpvBuiltInPosition || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + b->builtins[dec->literals[0]] = var; break; } @@ -834,6 +839,9 @@ get_builtin_variable(struct vtn_builder *b, var->data.location = location; var->data.explicit_location = true; + if (builtin == SpvBuiltInPosition || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + b->builtins[builtin] = var; } @@ -2403,7 +2411,79 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpExecutionMode: - /* TODO */ + assert(b->entry_point == &b->values[w[1]]); + + switch((SpvExecutionMode)w[2]) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = (w[2] == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + b->shader->info.gs.invocations = w[3]; + break; + + case SpvExecutionModeDepthReplacing: + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + b->shader->info.cs.local_size[0] = w[3]; + b->shader->info.cs.local_size[1] = w[4]; + b->shader->info.cs.local_size[2] = w[5]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + b->shader->info.gs.vertices_out = w[3]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeInputTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeInputQuads: + case SpvExecutionModeInputIsolines: + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(!"TODO: Add geometry metadata"); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + case SpvExecutionModeIndependentForwardProgress: + break; /* OpenCL */ + } break; case SpvOpString: diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index decceff65a6..96044b05aac 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -178,6 +178,7 @@ struct vtn_builder { struct vtn_value *values; SpvExecutionModel execution_model; + bool origin_upper_left; struct vtn_value *entry_point; struct vtn_function *func; -- cgit v1.2.3 From 59bae36ffbe196a279e3044b2604a05886728d99 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2015 15:35:13 -0700 Subject: nir/spirv: Fix a typo --- src/glsl/nir/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 52bc335637f..748c0dd7f04 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -789,7 +789,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, if (mode == nir_var_shader_in || mode == nir_var_system_value) var->data.read_only = true; - if (builtin == SpvBuiltInPosition || builtin == SpvBuiltInSamplePosition) + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) var->data.origin_upper_left = b->origin_upper_left; b->builtins[dec->literals[0]] = var; @@ -839,7 +839,7 @@ get_builtin_variable(struct vtn_builder *b, var->data.location = location; var->data.explicit_location = true; - if (builtin == SpvBuiltInPosition || builtin == SpvBuiltInSamplePosition) + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) var->data.origin_upper_left = b->origin_upper_left; b->builtins[builtin] = var; -- cgit v1.2.3 From 27d868500aba43e88ef86f9f68aa729289569e75 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2015 15:37:14 -0700 Subject: anv/pipeline: Set key->render_to_fbo to false for fragment shaaders Vulkan uses the upper-left convention. This is the same as DX one and what our hardware does. We had it flipped around. --- src/vulkan/anv_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index a923017310a..ed0be39cb7b 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -303,7 +303,7 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, * this in Vulkan? */ key->drawable_height = 0; - key->render_to_fbo = true; /* XXX really? */ + key->render_to_fbo = false; key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; -- cgit v1.2.3 From 72d99f8a40b4c00983952265323b96e61ce45c6b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2015 16:00:55 -0700 Subject: anv/pipeline: Update a comment --- src/vulkan/anv_pipeline.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index ed0be39cb7b..1fce94b7f01 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -299,9 +299,7 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, /* XXX Vulkan doesn't appear to specify */ key->clamp_fragment_color = false; - /* XXX: These are needed for flipping the coordinates. Do we need to do - * this in Vulkan? - */ + /* Vulkan always specifies upper-left coordinates */ key->drawable_height = 0; key->render_to_fbo = false; -- cgit v1.2.3 From c8572d0f9c36c2485893e5f734ba094b8e9cdf74 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2015 16:02:00 -0700 Subject: anv/pipeline: Remove a redundant line We set compute_sample_id based on multisample state two lines below. --- src/vulkan/anv_pipeline.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 1fce94b7f01..6e6b71c785a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -312,7 +312,6 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, /* We should probably pull this out of the shader, but it's fairly * harmless to compute it and then let dead-code take care of it. */ - key->compute_sample_id = true; key->persample_shading = info->pMultisampleState->sampleShadingEnable; if (key->persample_shading) key->persample_2x = info->pMultisampleState->rasterSamples == 2; -- cgit v1.2.3 From 0ab926dfbf56ad6482b875d980ae95c533b765f9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 21 Oct 2015 11:36:39 -0700 Subject: anv: Don't teardown uninitialized anv_physical_device If the user called vkDestroyDevice but never called vkEnumeratePhysicalDevices, then the driver tried to ralloc_free() an unitialized anv_physical_device. Fixes test 'dEQP-VK.api.device_init.create_instance_name_version'. --- src/vulkan/anv_device.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index fd87c85b0ce..d450b2b4e87 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -224,7 +224,12 @@ void anv_DestroyInstance( { ANV_FROM_HANDLE(anv_instance, instance, _instance); - anv_physical_device_finish(&instance->physicalDevice); + if (instance->physicalDeviceCount > 0) { + /* We support at most one physical device. */ + assert(instance->physicalDeviceCount == 1); + anv_physical_device_finish(&instance->physicalDevice); + } + anv_finish_wsi(instance); VG(VALGRIND_DESTROY_MEMPOOL(instance)); -- cgit v1.2.3 From a8ffd6e72c0bf8d115212ce6d7b35f6acb56a0b6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 17:42:47 -0700 Subject: nir/gather_info: Add more info for geometry shaders --- src/glsl/nir/nir_gather_info.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_gather_info.c b/src/glsl/nir/nir_gather_info.c index 4893945c18b..c2413e3bbe2 100644 --- a/src/glsl/nir/nir_gather_info.c +++ b/src/glsl/nir/nir_gather_info.c @@ -48,6 +48,12 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) shader->info.system_values_read |= (1 << nir_system_value_from_intrinsic(instr->intrinsic)); break; + + case nir_intrinsic_end_primitive: + assert(shader->stage == MESA_SHADER_GEOMETRY); + shader->info.gs.uses_end_primitive = 1; + break; + default: break; } @@ -89,6 +95,7 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) foreach_list_typed(nir_variable, var, node, &shader->inputs) shader->info.inputs_read |= (1ull << var->data.location); + /* TODO: Some day we may need to add stream support to NIR */ shader->info.outputs_written = 0; foreach_list_typed(nir_variable, var, node, &shader->outputs) shader->info.outputs_written |= (1ull << var->data.location); -- cgit v1.2.3 From 27393c86307e19a15166c27c44fb0b1d76b87795 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 17:58:34 -0700 Subject: nir/spirv: Add support for GS metadata --- src/glsl/nir/spirv_to_nir.c | 69 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 748c0dd7f04..74de2e855d1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2375,6 +2375,55 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, return true; } +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeInputTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeInputQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeInputIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeInputTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -2413,10 +2462,11 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpExecutionMode: assert(b->entry_point == &b->values[w[1]]); - switch((SpvExecutionMode)w[2]) { + SpvExecutionMode mode = w[2]; + switch(mode) { case SpvExecutionModeOriginUpperLeft: case SpvExecutionModeOriginLowerLeft: - b->origin_upper_left = (w[2] == SpvExecutionModeOriginUpperLeft); + b->origin_upper_left = (mode == SpvExecutionModeOriginUpperLeft); break; case SpvExecutionModeEarlyFragmentTests: @@ -2424,6 +2474,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); b->shader->info.gs.invocations = w[3]; break; @@ -2441,6 +2492,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); b->shader->info.cs.local_size[0] = w[3]; b->shader->info.cs.local_size[1] = w[4]; b->shader->info.cs.local_size[2] = w[5]; @@ -2449,6 +2501,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; /* Nothing do do with this */ case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); b->shader->info.gs.vertices_out = w[3]; break; @@ -2459,10 +2512,20 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvExecutionModeInputTrianglesAdjacency: case SpvExecutionModeInputQuads: case SpvExecutionModeInputIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader.info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + case SpvExecutionModeOutputPoints: case SpvExecutionModeOutputLineStrip: case SpvExecutionModeOutputTriangleStrip: - assert(!"TODO: Add geometry metadata"); + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader.info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode); break; case SpvExecutionModeSpacingEqual: -- cgit v1.2.3 From 55a7ee730c206a42cc530cf3e249866398bae686 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 18:00:05 -0700 Subject: spirv/nir: Add more stage asserts --- src/glsl/nir/spirv_to_nir.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 74de2e855d1..c9133f741c6 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2470,6 +2470,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); b->shader->info.fs.early_fragment_tests = true; break; @@ -2479,15 +2480,19 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; break; case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; break; case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; break; case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; break; -- cgit v1.2.3 From 3d35e4361f6b2979813103ddeb7a79bd042263a6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 18:16:50 -0700 Subject: Fix a couple of dereferences --- src/glsl/nir/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index c9133f741c6..b201019bab1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2518,7 +2518,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvExecutionModeInputQuads: case SpvExecutionModeInputIsolines: if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader.info.gs.vertices_in = + b->shader->info.gs.vertices_in = vertices_in_from_spv_execution_mode(mode); } else { assert(!"Tesselation shaders not yet supported"); @@ -2529,7 +2529,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvExecutionModeOutputLineStrip: case SpvExecutionModeOutputTriangleStrip: assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader.info.gs.output_primitive = + b->shader->info.gs.output_primitive = gl_primitive_from_spv_execution_mode(mode); break; -- cgit v1.2.3 From 5790ee2bbbf039e0886ad02d0c9f0e929ad7ec87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 18:17:11 -0700 Subject: nir/spirv: Add support for various barrier type instructions --- src/glsl/nir/spirv_to_nir.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index b201019bab1..63f58bc2b2a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2268,6 +2268,37 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, } } +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + intrin->const_index[0] = w[1]; + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + static void vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) { @@ -2892,6 +2923,15 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_phi_first_pass(b, w); break; + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + default: unreachable("Unhandled opcode"); } -- cgit v1.2.3 From 164abff0c017b542604f945fb6c7de77e3403698 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 18:39:06 -0700 Subject: nir/spirv: Add support for more CS system values --- src/glsl/nir/spirv_to_nir.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 63f58bc2b2a..a3d38831267 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -708,18 +708,24 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, *location = FRAG_RESULT_DEPTH; *mode = nir_var_shader_out; break; - case SpvBuiltInHelperInvocation: - unreachable("unsupported builtin"); /* XXX */ - break; case SpvBuiltInNumWorkgroups: case SpvBuiltInWorkgroupSize: /* these are constants, need to be handled specially */ unreachable("unsupported builtin"); - case SpvBuiltInWorkgroupId: - case SpvBuiltInLocalInvocationId: + break; case SpvBuiltInGlobalInvocationId: case SpvBuiltInLocalInvocationIndex: - unreachable("no compute shader support"); + /* these are computed values, need to be handled specially */ + unreachable("unsupported builtin"); + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInHelperInvocation: default: unreachable("unsupported builtin"); } -- cgit v1.2.3 From d538fe849df0c3f80fea2765ff36dd2db5eab72b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 18:45:48 -0700 Subject: anv/pipeline: Add back basic geometry shader support Now that we've done the refactoring upstream, it's much easier to to get hooked up. We haven't tested things well enough to know that we're setting up the GPU state correctly for them yet but at least we can compile them now. --- src/vulkan/anv_pipeline.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 6e6b71c785a..80f8a4ff76f 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -282,6 +282,15 @@ populate_vs_prog_key(const struct brw_device_info *devinfo, /* XXX: Handle sampler_prog_key */ } +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + static void populate_wm_prog_key(const struct brw_device_info *devinfo, const VkGraphicsPipelineCreateInfo *info, @@ -509,6 +518,59 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, return VK_SUCCESS; } +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + + populate_gs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_GEOMETRY, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + false /* XXX: Do SSO? */); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + pipeline->gs_vec4 = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + static VkResult anv_pipeline_compile_fs(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *info, @@ -957,6 +1019,9 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, case VK_SHADER_STAGE_VERTEX: anv_pipeline_compile_vs(pipeline, pCreateInfo, shader); break; + case VK_SHADER_STAGE_GEOMETRY: + anv_pipeline_compile_gs(pipeline, pCreateInfo, shader); + break; case VK_SHADER_STAGE_FRAGMENT: anv_pipeline_compile_fs(pipeline, pCreateInfo, shader); break; -- cgit v1.2.3 From ea23cb3543e793796bd9de9a3c5cf9ebbed0d8de Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 20:36:25 -0700 Subject: nir/spirv: Add capabilities and decorations for basic geometry shaders --- src/glsl/nir/spirv_to_nir.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a3d38831267..ce191514763 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -400,6 +400,10 @@ type_decoration_cb(struct vtn_builder *b, /* Ignore these, since we get explicit offsets anyways */ break; + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + default: unreachable("Unhandled type decoration"); } @@ -2473,12 +2477,17 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpCapability: - /* - * TODO properly handle these and give a real error if asking for too - * much. - */ - assert(w[1] == SpvCapabilityMatrix || - w[1] == SpvCapabilityShader); + switch ((SpvCapability)w[1]) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + /* All shaders support these */ + break; + case SpvCapabilityGeometry: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + break; + default: + assert(!"Unsupported capability"); + } break; case SpvOpExtInstImport: -- cgit v1.2.3 From 5f29dacda2f7d5dc74856c72ca9954af20c83863 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 20:40:28 -0700 Subject: i965: Move get_hw_prim_for_gl_prim to brw_util.c --- src/mesa/drivers/dri/i965/brw_draw.c | 29 ----------------------------- src/mesa/drivers/dri/i965/brw_util.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 39a26b05201..61683c81b79 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -55,23 +55,6 @@ #define FILE_DEBUG_FLAG DEBUG_PRIMS -static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { - [GL_POINTS] =_3DPRIM_POINTLIST, - [GL_LINES] = _3DPRIM_LINELIST, - [GL_LINE_LOOP] = _3DPRIM_LINELOOP, - [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, - [GL_TRIANGLES] = _3DPRIM_TRILIST, - [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [GL_QUADS] = _3DPRIM_QUADLIST, - [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, - [GL_POLYGON] = _3DPRIM_POLYGON, - [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -}; - static const GLenum reduced_prim[GL_POLYGON+1] = { [GL_POINTS] = GL_POINTS, @@ -86,18 +69,6 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { [GL_POLYGON] = GL_TRIANGLES }; -uint32_t -get_hw_prim_for_gl_prim(int mode) -{ - if (mode >= BRW_PRIM_OFFSET) - return mode - BRW_PRIM_OFFSET; - else { - assert(mode < ARRAY_SIZE(prim_to_hw_prim)); - return prim_to_hw_prim[mode]; - } -} - - /* When the primitive changes, set a state bit and re-validate. Not * the nicest and would rather deal with this by having all the * programs be immune to the active primitive (ie. cope with all diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index f801dc06628..e4533f34f1f 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -102,3 +102,31 @@ GLuint brw_translate_blend_factor( GLenum factor ) unreachable("not reached"); } } + +static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { + [GL_POINTS] =_3DPRIM_POINTLIST, + [GL_LINES] = _3DPRIM_LINELIST, + [GL_LINE_LOOP] = _3DPRIM_LINELOOP, + [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, + [GL_TRIANGLES] = _3DPRIM_TRILIST, + [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [GL_QUADS] = _3DPRIM_QUADLIST, + [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, + [GL_POLYGON] = _3DPRIM_POLYGON, + [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +}; + +uint32_t +get_hw_prim_for_gl_prim(int mode) +{ + if (mode >= BRW_PRIM_OFFSET) + return mode - BRW_PRIM_OFFSET; + else { + assert(mode < ARRAY_SIZE(prim_to_hw_prim)); + return prim_to_hw_prim[mode]; + } +} -- cgit v1.2.3 From 403254988524e52a468f3cb370cc0014e3b54b38 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 20:42:23 -0700 Subject: i965/vec4: Handle returns at the end of functions --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index ea1e3e7bbcf..e79a9f3b5b9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1521,7 +1521,13 @@ vec4_visitor::nir_emit_jump(nir_jump_instr *instr) break; case nir_jump_return: - /* fall through */ + /* This has to be the last block in the shader. We don't handle + * early returns. + */ + assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && + instr->instr.block->cf_node.parent->type == nir_cf_node_function); + break; + default: unreachable("unknown jump"); } -- cgit v1.2.3 From 0329a252bd7418d778027c361e6a2bee7d69caab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 21:45:12 -0700 Subject: nir/spirv: Add defaults for GS input/output primitive types These are supposed to be specified in the SPIR-V source as SpvExecutionMode enums but glslang isn't giving them to us. A bug has been filed: https://github.com/KhronosGroup/glslang/issues/84 --- src/glsl/nir/spirv_to_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ce191514763..3d23f7ca1fd 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -3160,6 +3160,12 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); + /* XXX: We shouldn't need these defaults */ + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = 3; + b->shader->info.gs.output_primitive = 4; /* GL_TRIANGLES */ + } + /* Handle all the preamble instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); -- cgit v1.2.3 From 8af2a099569f123776a9556affde23ae66a10ef5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 21:45:49 -0700 Subject: anv/pipeline: Make the has_push_constants computation more accurate The computation used to only look for uniforms that weren't samplers. Now it also filters out arrays of samplers. --- src/vulkan/anv_pipeline.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 80f8a4ff76f..f035f5b8f5e 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -354,7 +354,11 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, bool have_push_constants = false; nir_foreach_variable(var, &nir->uniforms) { - if (!glsl_type_is_sampler(var->type)) { + const struct glsl_type *type = var->type; + if (glsl_type_is_array(type)) + type = glsl_get_array_element(type); + + if (!glsl_type_is_sampler(type)) { have_push_constants = true; break; } -- cgit v1.2.3 From d0e8c78407c9b259da55c8b203b96deadab6187e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 21:50:45 -0700 Subject: anv/pipeline: set the gs_vertex_count in compile_gs This was missed in the initial enabling commit. --- src/vulkan/anv_pipeline.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index f035f5b8f5e..173c9787d91 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -566,6 +566,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, /* TODO: SIMD8 GS */ pipeline->gs_vec4 = anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + pipeline->gs_vertex_count = nir->info.gs.vertices_in; ralloc_free(mem_ctx); -- cgit v1.2.3 From 82c579e31418c31238437c72e462b71a88ff01af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Oct 2015 21:51:16 -0700 Subject: anv/gen8: Set the correct maximum number of GS threads This equation was pulled from mesa gen8_gs_state.c --- src/vulkan/gen8_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index f3f378b181a..6d0edf0ec87 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -426,7 +426,7 @@ gen8_graphics_pipeline_create( .DispatchGRFStartRegisterForURBData = gs_prog_data->base.base.dispatch_grf_start_reg, - .MaximumNumberofThreads = device->info.max_gs_threads, + .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, //pipeline->gs_prog_data.dispatch_mode | .StatisticsEnable = true, -- cgit v1.2.3 From f23d951083f51ac4ca3dbffd31359a4be2337281 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Oct 2015 16:53:01 -0700 Subject: nir/validate: Add better validation of load/store types --- src/glsl/nir/nir_validate.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index c6fedf9b1ad..a42e830fd72 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -398,15 +398,27 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) } switch (instr->intrinsic) { - case nir_intrinsic_load_var: + case nir_intrinsic_load_var: { + const struct glsl_type *type = + nir_deref_tail(&instr->variables[0]->deref)->type; + assert(glsl_type_is_vector_or_scalar(type)); + assert(instr->num_components == glsl_get_vector_elements(type)); assert(instr->variables[0]->var->data.mode != nir_var_shader_out); break; - case nir_intrinsic_store_var: + } + case nir_intrinsic_store_var: { + const struct glsl_type *type = + nir_deref_tail(&instr->variables[0]->deref)->type; + assert(glsl_type_is_vector_or_scalar(type)); + assert(instr->num_components == glsl_get_vector_elements(type)); assert(instr->variables[0]->var->data.mode != nir_var_shader_in && instr->variables[0]->var->data.mode != nir_var_uniform && instr->variables[0]->var->data.mode != nir_var_shader_storage); break; + } case nir_intrinsic_copy_var: + assert(nir_deref_tail(&instr->variables[0]->deref)->type == + nir_deref_tail(&instr->variables[1]->deref)->type); assert(instr->variables[0]->var->data.mode != nir_var_shader_in && instr->variables[0]->var->data.mode != nir_var_uniform && instr->variables[0]->var->data.mode != nir_var_shader_storage); -- cgit v1.2.3 From 2ce6636c7501fc8ad740ce8be50a59cb2f665257 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Oct 2015 16:56:38 -0700 Subject: nir/spirv: Remove the vtn_type argument from _vtn_variable_load/store Now that builtins are handled in deref chains, we don't really need this anymore. --- src/glsl/nir/spirv_to_nir.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3d23f7ca1fd..52b82e0d674 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -860,8 +860,7 @@ get_builtin_variable(struct vtn_builder *b, static struct vtn_ssa_value * _vtn_variable_load(struct vtn_builder *b, - nir_deref_var *src_deref, struct vtn_type *src_type, - nir_deref *src_deref_tail) + nir_deref_var *src_deref, nir_deref *src_deref_tail) { struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); val->type = src_deref_tail->type; @@ -907,9 +906,7 @@ _vtn_variable_load(struct vtn_builder *b, src_deref_tail->child = &deref->deref; for (unsigned i = 0; i < elems; i++) { deref->base_offset = i; - val->elems[i] = _vtn_variable_load(b, src_deref, - src_type->array_element, - &deref->deref); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); } } else { assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); @@ -921,9 +918,7 @@ _vtn_variable_load(struct vtn_builder *b, for (unsigned i = 0; i < elems; i++) { deref->index = i; deref->deref.type = glsl_get_struct_field(val->type, i); - val->elems[i] = _vtn_variable_load(b, src_deref, - src_type->members[i], - &deref->deref); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); } } @@ -933,7 +928,7 @@ _vtn_variable_load(struct vtn_builder *b, } static void -_vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, +_vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, nir_deref *dest_deref_tail, struct vtn_ssa_value *src) { @@ -963,8 +958,7 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, dest_deref_tail->child = &deref->deref; for (unsigned i = 0; i < elems; i++) { deref->base_offset = i; - _vtn_variable_store(b, dest_type->array_element, dest_deref, - &deref->deref, src->elems[i]); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); } } else { assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); @@ -975,8 +969,7 @@ _vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, for (unsigned i = 0; i < elems; i++) { deref->index = i; deref->deref.type = glsl_get_struct_field(src->type, i); - _vtn_variable_store(b, dest_type->members[i], dest_deref, - &deref->deref, src->elems[i]); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); } } @@ -1117,7 +1110,7 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, if (src->var->interface_type && src->var->data.mode == nir_var_uniform) val = vtn_block_load(b, src, src_type, src_tail); else - val = _vtn_variable_load(b, src, src_type, src_tail); + val = _vtn_variable_load(b, src, src_tail); if (src_tail->child) { nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); @@ -1147,8 +1140,7 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, { nir_deref *dest_tail = get_deref_tail(dest); if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_type, - dest_tail); + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); nir_deref_array *deref = nir_deref_as_array(dest_tail->child); assert(deref->deref.child == NULL); if (deref->deref_array_type == nir_deref_array_type_direct) @@ -1157,9 +1149,9 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, else val->def = vtn_vector_insert_dynamic(b, val->def, src->def, deref->indirect.ssa); - _vtn_variable_store(b, dest_type, dest, dest_tail, val); + _vtn_variable_store(b, dest, dest_tail, val); } else { - _vtn_variable_store(b, dest_type, dest, dest_tail, src); + _vtn_variable_store(b, dest, dest_tail, src); } } -- cgit v1.2.3 From 9abef3e81748672a662ebf19ed52b263514c228f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Oct 2015 17:28:25 -0700 Subject: nir/spirv: Make get_builtin_variable take a nir_variable_mode We'll want this in a moment for validation but, for now, it just gets stompped by get_builtin_variable. --- src/glsl/nir/spirv_to_nir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 52b82e0d674..c8594085d5e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -834,6 +834,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, static nir_variable * get_builtin_variable(struct vtn_builder *b, + nir_variable_mode mode, const struct glsl_type *type, SpvBuiltIn builtin) { @@ -841,7 +842,6 @@ get_builtin_variable(struct vtn_builder *b, if (!var) { int location; - nir_variable_mode mode; vtn_get_builtin_location(builtin, &location, &mode); var = nir_variable_create(b->shader, mode, type, "builtin"); @@ -1349,7 +1349,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, /* If we encounter a builtin, we throw away the ress of the * access chain, jump to the builtin, and keep building. */ - nir_variable *builtin = get_builtin_variable(b, deref_type->type, + nir_variable *builtin = get_builtin_variable(b, + base->var->data.mode, + deref_type->type, deref_type->builtin); val->deref = nir_deref_var_create(b, builtin); tail = &val->deref->deref; -- cgit v1.2.3 From d11ea761682716c71d2cf31ff124ed25fa4a0a2a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Oct 2015 17:45:41 -0700 Subject: nir/spirv: Make vtn_get_builtin_location smarter Instead of just stomping on the mode, it now validates asserts that the previously set mode is correct and only changes it if needed. We need to do this because, in geometry shaders, there are some builtins that can be either an input or an output depending on context. We can get that information from the SPIR-V source but we can't throw it away. --- src/glsl/nir/spirv_to_nir.c | 68 ++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index c8594085d5e..973ff7c6777 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -633,21 +633,44 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } static void -vtn_get_builtin_location(SpvBuiltIn builtin, int *location, +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +validate_per_vertex_mode(struct vtn_builder *b, nir_variable_mode mode) +{ + switch (b->shader->stage) { + case MESA_SHADER_VERTEX: + assert(mode == nir_var_shader_out); + break; + case MESA_SHADER_GEOMETRY: + assert(mode == nir_var_shader_out || mode == nir_var_shader_in); + break; + default: + assert(!"Invalid shader stage"); + } +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, nir_variable_mode *mode) { switch (builtin) { case SpvBuiltInPosition: *location = VARYING_SLOT_POS; - *mode = nir_var_shader_out; + validate_per_vertex_mode(b, *mode); break; case SpvBuiltInPointSize: *location = VARYING_SLOT_PSIZ; - *mode = nir_var_shader_out; + validate_per_vertex_mode(b, *mode); break; case SpvBuiltInClipDistance: *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ - *mode = nir_var_shader_in; + validate_per_vertex_mode(b, *mode); break; case SpvBuiltInCullDistance: /* XXX figure this out */ @@ -657,11 +680,11 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, * builtin keyword VertexIndex to indicate the non-zero-based value. */ *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - *mode = nir_var_system_value; + set_mode_system_value(mode); break; case SpvBuiltInInstanceId: *location = SYSTEM_VALUE_INSTANCE_ID; - *mode = nir_var_system_value; + set_mode_system_value(mode); break; case SpvBuiltInPrimitiveId: *location = VARYING_SLOT_PRIMITIVE_ID; @@ -669,7 +692,7 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, break; case SpvBuiltInInvocationId: *location = SYSTEM_VALUE_INVOCATION_ID; - *mode = nir_var_system_value; + set_mode_system_value(mode); break; case SpvBuiltInLayer: *location = VARYING_SLOT_LAYER; @@ -682,35 +705,40 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, unreachable("no tessellation support"); case SpvBuiltInFragCoord: *location = VARYING_SLOT_POS; - *mode = nir_var_shader_in; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); break; case SpvBuiltInPointCoord: *location = VARYING_SLOT_PNTC; - *mode = nir_var_shader_out; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); break; case SpvBuiltInFrontFacing: *location = VARYING_SLOT_FACE; - *mode = nir_var_shader_out; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); break; case SpvBuiltInSampleId: *location = SYSTEM_VALUE_SAMPLE_ID; - *mode = nir_var_shader_in; + set_mode_system_value(mode); break; case SpvBuiltInSamplePosition: *location = SYSTEM_VALUE_SAMPLE_POS; - *mode = nir_var_shader_in; + set_mode_system_value(mode); break; case SpvBuiltInSampleMask: *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ - *mode = nir_var_shader_in; + set_mode_system_value(mode); break; case SpvBuiltInFragColor: *location = FRAG_RESULT_COLOR; - *mode = nir_var_shader_out; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_out); break; case SpvBuiltInFragDepth: *location = FRAG_RESULT_DEPTH; - *mode = nir_var_shader_out; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_out); break; case SpvBuiltInNumWorkgroups: case SpvBuiltInWorkgroupSize: @@ -723,11 +751,11 @@ vtn_get_builtin_location(SpvBuiltIn builtin, int *location, unreachable("unsupported builtin"); case SpvBuiltInWorkgroupId: *location = SYSTEM_VALUE_WORK_GROUP_ID; - *mode = nir_var_system_value; + set_mode_system_value(mode); break; case SpvBuiltInLocalInvocationId: *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; - *mode = nir_var_system_value; + set_mode_system_value(mode); break; case SpvBuiltInHelperInvocation: default: @@ -792,8 +820,8 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationBuiltIn: { SpvBuiltIn builtin = dec->literals[0]; - nir_variable_mode mode; - vtn_get_builtin_location(builtin, &var->data.location, &mode); + nir_variable_mode mode = var->data.mode; + vtn_get_builtin_location(b, builtin, &var->data.location, &mode); var->data.explicit_location = true; var->data.mode = mode; if (mode == nir_var_shader_in || mode == nir_var_system_value) @@ -842,7 +870,7 @@ get_builtin_variable(struct vtn_builder *b, if (!var) { int location; - vtn_get_builtin_location(builtin, &location, &mode); + vtn_get_builtin_location(b, builtin, &location, &mode); var = nir_variable_create(b->shader, mode, type, "builtin"); -- cgit v1.2.3 From 9fe907ec7999323035827483e8a8ea1db19863c7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Oct 2015 17:54:24 -0700 Subject: nir/spirv: Make the builtins array distinguish between in and out --- src/glsl/nir/spirv_to_nir.c | 16 +++++++++++++--- src/glsl/nir/spirv_to_nir_private.h | 5 ++++- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 973ff7c6777..245ee7424ad 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -830,7 +830,10 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) var->data.origin_upper_left = b->origin_upper_left; - b->builtins[dec->literals[0]] = var; + if (mode == nir_var_shader_out) + b->builtins[dec->literals[0]].out = var; + else + b->builtins[dec->literals[0]].in = var; break; } case SpvDecorationRowMajor: @@ -866,7 +869,11 @@ get_builtin_variable(struct vtn_builder *b, const struct glsl_type *type, SpvBuiltIn builtin) { - nir_variable *var = b->builtins[builtin]; + nir_variable *var; + if (mode == nir_var_shader_out) + var = b->builtins[builtin].out; + else + var = b->builtins[builtin].in; if (!var) { int location; @@ -880,7 +887,10 @@ get_builtin_variable(struct vtn_builder *b, if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) var->data.origin_upper_left = b->origin_upper_left; - b->builtins[builtin] = var; + if (mode == nir_var_shader_out) + b->builtins[builtin].out = var; + else + b->builtins[builtin].in = var; } return var; diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 96044b05aac..45111f816bc 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -172,7 +172,10 @@ struct vtn_builder { /* * NIR variable for each SPIR-V builtin. */ - nir_variable *builtins[42]; /* XXX need symbolic constant from SPIR-V header */ + struct { + nir_variable *in; + nir_variable *out; + } builtins[42]; /* XXX need symbolic constant from SPIR-V header */ unsigned value_id_bound; struct vtn_value *values; -- cgit v1.2.3 From ee8c67abe8fac06bb6efc45db6dd4c0238791bdf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 22 Oct 2015 17:58:20 -0700 Subject: nir/spirv: Add support for builtins in arrays --- src/glsl/nir/spirv_to_nir.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 245ee7424ad..ffdb5ea7fe0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1387,12 +1387,41 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, /* If we encounter a builtin, we throw away the ress of the * access chain, jump to the builtin, and keep building. */ + const struct glsl_type *builtin_type = deref_type->type; + + nir_deref_array *per_vertex_deref = NULL; + if (glsl_type_is_array(base->var->type)) { + /* This builtin is a per-vertex builtin */ + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + assert(base->var->data.mode == nir_var_shader_in); + builtin_type = glsl_array_type(builtin_type, + b->shader->info.gs.vertices_in); + + /* The first non-var deref should be an array deref. */ + assert(val->deref->deref.child->deref_type == + nir_deref_type_array); + per_vertex_deref = nir_deref_as_array(val->deref->deref.child); + } + nir_variable *builtin = get_builtin_variable(b, base->var->data.mode, - deref_type->type, + builtin_type, deref_type->builtin); val->deref = nir_deref_var_create(b, builtin); - tail = &val->deref->deref; + + if (per_vertex_deref) { + /* Since deref chains start at the variable, we can just + * steal that link and use it. + */ + val->deref->deref.child = &per_vertex_deref->deref; + per_vertex_deref->deref.child = NULL; + per_vertex_deref->deref.type = + glsl_get_array_element(builtin_type); + + tail = &per_vertex_deref->deref; + } else { + tail = &val->deref->deref; + } } else { tail = tail->child; } -- cgit v1.2.3 From 760c4b894db983c42e0aca60af252594af808b2a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Oct 2015 10:48:52 -0700 Subject: anv/pipeline: Pull separate_shader from NIR for vue map setup --- src/vulkan/anv_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 173c9787d91..da492d9ee7a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -493,7 +493,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, brw_compute_vue_map(&pipeline->device->info, &prog_data->base.vue_map, nir->info.outputs_written, - false /* XXX: Do SSO? */); + nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = @@ -552,7 +552,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, brw_compute_vue_map(&pipeline->device->info, &prog_data->base.vue_map, nir->info.outputs_written, - false /* XXX: Do SSO? */); + nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = -- cgit v1.2.3 From 8aba8cf513a0856415e2389b729b76417cd89584 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Oct 2015 10:53:00 -0700 Subject: anv/pipeline: Use separate-shader --- src/vulkan/anv_pipeline.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index da492d9ee7a..c6d2395cdc4 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -170,6 +170,9 @@ anv_shader_compile_to_nir(struct anv_device *device, } nir_validate_shader(nir); + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + /* Make sure the provided shader has exactly one entrypoint and that the * name matches the name that came in from the VkShader. */ -- cgit v1.2.3 From 4c59ee808f79b0429afcd9a6f307efb9c36637d5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Oct 2015 16:49:26 -0700 Subject: anv/gen8_pipeline: Various 3DSTATE_GS fixes --- src/vulkan/gen8_pipeline.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 6d0edf0ec87..3468ce02154 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -412,7 +412,7 @@ gen8_graphics_pipeline_create( anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .SingleProgramFlow = false, .KernelStartPointer = pipeline->gs_vec4, - .VectorMaskEnable = Vmask, + .VectorMaskEnable = Dmask, .SamplerCount = 0, .BindingTableEntryCount = 0, .ExpectedVertexCount = pipeline->gs_vertex_count, @@ -428,7 +428,7 @@ gen8_graphics_pipeline_create( .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - //pipeline->gs_prog_data.dispatch_mode | + .DispatchMode = gs_prog_data->base.dispatch_mode, .StatisticsEnable = true, .IncludePrimitiveID = gs_prog_data->include_primitive_id, .ReorderMode = TRAILING, @@ -436,6 +436,11 @@ gen8_graphics_pipeline_create( .ControlDataFormat = gs_prog_data->control_data_format, + .StaticOutput = gs_prog_data->static_vertex_count >= 0, + .StaticOutputVertexCount = + gs_prog_data->static_vertex_count >= 0 ? + gs_prog_data->static_vertex_count : 0, + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) * UserClipDistanceCullTestEnableBitmask(v) -- cgit v1.2.3 From 9006e555ce5ca539289a71d5234f876752a6c00d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Oct 2015 16:50:28 -0700 Subject: anv/pipeline: Bump the size of the pipeline batch to accomodate GS The 1k batch size wasn't big enough for a full pipeline setup including geometry shaders. Some day we should make it dynamic. --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c1a7b01e8b0..ac3d18fdcbe 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1083,7 +1083,7 @@ struct anv_shader { struct anv_pipeline { struct anv_device * device; struct anv_batch batch; - uint32_t batch_data[256]; + uint32_t batch_data[512]; struct anv_reloc_list batch_relocs; uint32_t dynamic_state_mask; struct anv_dynamic_state dynamic_state; -- cgit v1.2.3 From ab6ed2e1ac98745c62a10a82e414eb17e4e4ba44 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Oct 2015 21:30:21 -0700 Subject: anv/gen8_pipeline: Emit a real 3DSTATE_SBE_SWIZ packet --- src/vulkan/gen8_pipeline.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 3468ce02154..a51cf4924ae 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -351,7 +351,6 @@ gen8_graphics_pipeline_create( anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, .ChromaKeyKillEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, @@ -496,13 +495,51 @@ gen8_graphics_pipeline_create( const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_vec4 == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GEN8_3DSTATE_SBE_SWIZ swiz = { + GEN8_3DSTATE_SBE_SWIZ_header, + }; + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + int source_attr = fs_input_map->varying_to_slot[attr] - 2; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + swiz.Attribute[input_index].SourceAttribute = source_attr; + } + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .AttributeSwizzleEnable = true, .ForceVertexURBEntryReadLength = false, .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2), .PointSpriteTextureCoordinateOrigin = UPPERLEFT, .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs); + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GEN8_3DSTATE_SBE_SWIZ_length); + GEN8_3DSTATE_SBE_SWIZ_pack(&pipeline->batch, dw, &swiz); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, .KernelStartPointer0 = pipeline->ps_ksp0, -- cgit v1.2.3 From 37b6afb3d96a7bc68129a312c77dfaf94121a70f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Oct 2015 21:30:38 -0700 Subject: Add a todo comment about intput_slots_valid in the FS shader key --- src/vulkan/anv_pipeline.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index c6d2395cdc4..59f304f55df 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -305,6 +305,8 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, populate_sampler_prog_key(devinfo, &key->tex); + /* TODO: Fill out key->input_slots_valid */ + /* Vulkan doesn't specify a default */ key->high_quality_derivatives = false; -- cgit v1.2.3 From 800a9706f01e0e32240b416290b98af3dd5f0702 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 11:45:15 -0700 Subject: nir: Add a vulkan_resource_index intrinsic --- src/glsl/nir/nir_intrinsics.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 68a18b9c11a..9ef8f5bc6eb 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -156,6 +156,25 @@ INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* + * Vulkan descriptor set intrinsic + * + * The Vulkan API uses a different binding model from GL. In the Vulkan + * API, all external resources are represented by a tripple: + * + * (descriptor set, binding, array index) + * + * where the array index is the only thing allowed to be indirect. The + * vulkan_surface_index intrinsic takes the descriptor set and binding as + * its first two indices and the array index as its source. The third + * index is a nir_variable_mode in case that's useful to the backend. + * + * The intended usage is that the shader will call vulkan_surface_index to + * get an index and then pass that as the buffer index ubo/ssbo calls. + */ +INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* * SSBO atomic intrinsics * -- cgit v1.2.3 From 3d44b3aaa68e1dcf24ddb1b3c0370623f2ccfa50 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 13:41:54 -0700 Subject: nir/spirv: Use the new vulkan_resource_index intrinsic This is instead of using the _vk versions of UBO/SSBO load/store intrinsics --- src/glsl/nir/spirv_to_nir.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ffdb5ea7fe0..1eca267a047 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1014,22 +1014,43 @@ _vtn_variable_store(struct vtn_builder *b, dest_deref_tail->child = old_child; } +static nir_ssa_def * +nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *array_index) +{ + if (array_index == NULL) + array_index = nir_imm_int(b, 0); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + instr->const_index[0] = set; + instr->const_index[1] = binding; + instr->const_index[2] = mode; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(b, &instr->instr); + + return &instr->dest.ssa; +} + static struct vtn_ssa_value * _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, - unsigned set, unsigned binding, nir_ssa_def *index, - unsigned offset, nir_ssa_def *indirect, + unsigned set, unsigned binding, nir_variable_mode mode, + nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect, struct vtn_type *type) { struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); val->type = type->type; val->transposed = NULL; if (glsl_type_is_vector_or_scalar(type->type)) { + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, set, binding, + mode, index); nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); load->num_components = glsl_get_vector_elements(type->type); - load->const_index[0] = set; - load->const_index[1] = binding; - load->src[0] = nir_src_for_ssa(index); - load->const_index[2] = offset; + load->src[0] = nir_src_for_ssa(res_index); + load->const_index[0] = offset; if (indirect) load->src[1] = nir_src_for_ssa(indirect); nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); @@ -1040,13 +1061,13 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); if (glsl_type_is_struct(type->type)) { for (unsigned i = 0; i < elems; i++) { - val->elems[i] = _vtn_block_load(b, op, set, binding, index, + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, offset + type->offsets[i], indirect, type->members[i]); } } else { for (unsigned i = 0; i < elems; i++) { - val->elems[i] = _vtn_block_load(b, op, set, binding, index, + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, offset + i * type->stride, indirect, type->array_element); } @@ -1062,6 +1083,7 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, { unsigned set = src->var->data.descriptor_set; unsigned binding = src->var->data.binding; + nir_variable_mode mode = src->var->data.mode; nir_deref *deref = &src->deref; @@ -1109,10 +1131,11 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, } /* TODO SSBO's */ - nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_vk_indirect - : nir_intrinsic_load_ubo_vk; + nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_indirect + : nir_intrinsic_load_ubo; - return _vtn_block_load(b, op, set, binding, index, offset, indirect, type); + return _vtn_block_load(b, op, set, binding, mode, index, + offset, indirect, type); } /* -- cgit v1.2.3 From a6be53223ee8624b48e480158326ec32cf09dfe5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 13:42:51 -0700 Subject: anv/nir: Work with the new vulkan_resource_index intrinsic --- src/vulkan/anv_nir_apply_dynamic_offsets.c | 44 +++++++++++--------- src/vulkan/anv_nir_apply_pipeline_layout.c | 67 +++++++----------------------- 2 files changed, 39 insertions(+), 72 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index afb3313ccb4..e14644cd222 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -50,22 +50,27 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) unsigned block_idx_src; switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo_vk: - case nir_intrinsic_load_ubo_vk_indirect: - case nir_intrinsic_load_ssbo_vk: - case nir_intrinsic_load_ssbo_vk_indirect: + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ubo_indirect: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_ssbo_indirect: block_idx_src = 0; break; - case nir_intrinsic_store_ssbo_vk: - case nir_intrinsic_store_ssbo_vk_indirect: + case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_ssbo_indirect: block_idx_src = 1; break; default: continue; /* the loop */ } - unsigned set = intrin->const_index[0]; - unsigned binding = intrin->const_index[1]; + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; set_layout = state->layout->set[set].layout; if (set_layout->binding[binding].dynamic_offset_index < 0) @@ -75,11 +80,11 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) int indirect_src; switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo_vk_indirect: - case nir_intrinsic_load_ssbo_vk_indirect: + case nir_intrinsic_load_ubo_indirect: + case nir_intrinsic_load_ssbo_indirect: indirect_src = 1; break; - case nir_intrinsic_store_ssbo_vk_indirect: + case nir_intrinsic_store_ssbo_indirect: indirect_src = 2; break; default: @@ -92,7 +97,7 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) set_layout->binding[binding].dynamic_offset_index; nir_const_value *const_arr_idx = - nir_src_as_const_value(intrin->src[block_idx_src]); + nir_src_as_const_value(res_intrin->src[0]); nir_intrinsic_op offset_load_op; if (const_arr_idx) @@ -109,7 +114,8 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) offset_load->const_index[1] = const_arr_idx->u[0]; } else { offset_load->const_index[1] = 0; - nir_src_copy(&offset_load->src[0], &intrin->src[0], &intrin->instr); + nir_src_copy(&offset_load->src[0], &res_intrin->src[0], + &intrin->instr); } nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); @@ -130,14 +136,14 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) nir_intrinsic_op indirect_op; switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo_vk: - indirect_op = nir_intrinsic_load_ubo_vk_indirect; + case nir_intrinsic_load_ubo: + indirect_op = nir_intrinsic_load_ubo_indirect; break; - case nir_intrinsic_load_ssbo_vk: - indirect_op = nir_intrinsic_load_ssbo_vk_indirect; + case nir_intrinsic_load_ssbo: + indirect_op = nir_intrinsic_load_ssbo_indirect; break; - case nir_intrinsic_store_ssbo_vk: - indirect_op = nir_intrinsic_store_ssbo_vk_indirect; + case nir_intrinsic_store_ssbo: + indirect_op = nir_intrinsic_store_ssbo_indirect; break; default: unreachable("Invalid direct load/store intrinsic"); diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index dea2dee3b32..8270dc13981 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -34,27 +34,6 @@ struct apply_pipeline_layout_state { bool progress; }; -static nir_intrinsic_op -lowered_op(nir_intrinsic_op op) -{ - switch (op) { - case nir_intrinsic_load_ubo_vk: - return nir_intrinsic_load_ubo; - case nir_intrinsic_load_ubo_vk_indirect: - return nir_intrinsic_load_ubo_indirect; - case nir_intrinsic_load_ssbo_vk: - return nir_intrinsic_load_ssbo; - case nir_intrinsic_load_ssbo_vk_indirect: - return nir_intrinsic_load_ssbo_indirect; - case nir_intrinsic_store_ssbo_vk: - return nir_intrinsic_store_ssbo; - case nir_intrinsic_store_ssbo_vk_indirect: - return nir_intrinsic_store_ssbo_indirect; - default: - unreachable("Invalid intrinsic for lowering"); - } -} - static uint32_t get_surface_index(unsigned set, unsigned binding, struct apply_pipeline_layout_state *state) @@ -76,28 +55,12 @@ get_surface_index(unsigned set, unsigned binding, return surface_index; } -static bool -try_lower_intrinsic(nir_intrinsic_instr *intrin, - struct apply_pipeline_layout_state *state) +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) { nir_builder *b = &state->builder; - int block_idx_src; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo_vk: - case nir_intrinsic_load_ubo_vk_indirect: - case nir_intrinsic_load_ssbo_vk: - case nir_intrinsic_load_ssbo_vk_indirect: - block_idx_src = 0; - break; - case nir_intrinsic_store_ssbo_vk: - case nir_intrinsic_store_ssbo_vk_indirect: - block_idx_src = 1; - break; - default: - return false; - } - b->cursor = nir_before_instr(&intrin->instr); uint32_t set = intrin->const_index[0]; @@ -106,25 +69,19 @@ try_lower_intrinsic(nir_intrinsic_instr *intrin, uint32_t surface_index = get_surface_index(set, binding, state); nir_const_value *const_block_idx = - nir_src_as_const_value(intrin->src[block_idx_src]); + nir_src_as_const_value(intrin->src[0]); nir_ssa_def *block_index; if (const_block_idx) { block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); } else { block_index = nir_iadd(b, nir_imm_int(b, surface_index), - nir_ssa_for_src(b, intrin->src[block_idx_src], 1)); + nir_ssa_for_src(b, intrin->src[0], 1)); } - nir_instr_rewrite_src(&intrin->instr, &intrin->src[block_idx_src], - nir_src_for_ssa(block_index)); - - intrin->intrinsic = lowered_op(intrin->intrinsic); - /* Shift the offset indices down */ - intrin->const_index[0] = intrin->const_index[2]; - intrin->const_index[1] = intrin->const_index[3]; - - return true; + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); } static void @@ -177,10 +134,14 @@ apply_pipeline_layout_block(nir_block *block, void *void_state) nir_foreach_instr_safe(block, instr) { switch (instr->type) { - case nir_instr_type_intrinsic: - if (try_lower_intrinsic(nir_instr_as_intrinsic(instr), state)) + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); state->progress = true; + } break; + } case nir_instr_type_tex: lower_tex(nir_instr_as_tex(instr), state); /* All texture instructions need lowering */ -- cgit v1.2.3 From 423e7a55cce647811dc2d7b5c03bc35e9e7fac45 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 13:43:25 -0700 Subject: Revert "nir/intrinsics: Add new Vulkan load/store intrinsics" This reverts commit 24bcc89c8fa326b838e9fea002065a40d4d04314. Now that we have the new vulkan_resource_index intrinsic, these variants of the classic UBO/SSBO instrinsics aren't needed. --- src/glsl/nir/nir_intrinsics.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 9ef8f5bc6eb..e13951081e9 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -248,14 +248,11 @@ SYSTEM_VALUE(num_work_groups, 3, 0) true, 0, 0, indices, flags) LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(ubo_vk, 1, 3, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ssbo_vk, 1, 3, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - /* * Stores work the same way as loads, except now the first register input is * the value or array to store and the optional second input is the indirect @@ -274,6 +271,4 @@ LOAD(ssbo_vk, 1, 3, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) STORE(output, 0, 0, 0, 0) STORE(ssbo, 1, 1, 1, 0) -STORE(ssbo_vk, 1, 1, 3, 0) - -LAST_INTRINSIC(store_ssbo_vk_indirect) +LAST_INTRINSIC(store_ssbo_indirect) -- cgit v1.2.3 From 12feda0c095b7c4d598b8d4e4a0359833354b0b1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 13:44:14 -0700 Subject: Revert "nir/intrinsic: Allow up to four indices" This reverts commit 5eccd0b4b947d806c0725899a827e622e605f2cc. This was only needed for the store_ssbo_vk_indirect intrinsic --- src/glsl/nir/nir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index d0304bebbb0..ac418dfca90 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -874,7 +874,7 @@ typedef struct { */ uint8_t num_components; - int const_index[4]; + int const_index[3]; nir_deref_var *variables[2]; -- cgit v1.2.3 From c284c39b135821a9417b95319fa6726e5892bef9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 29 Oct 2015 10:59:55 -0700 Subject: anv: Fix parsing of load ops in VkAttachmentDescription My original understanding of VkAttachmentDescription::loadOp, stencilLoadOp was incorrect. Below are all possible combinations: VkFormat | loadOp=clear stencilLoadOp=clear ---------------+--------------------------- color | clear-color ignored depth-only | clear-depth ignored stencil-only | ignored clear-stencil depth-stencil | clear-depth clear-stencil --- src/vulkan/anv_device.c | 18 ++++++++++++------ src/vulkan/anv_meta.c | 17 +++++++++++------ 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index d450b2b4e87..05e723fe60d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1894,15 +1894,21 @@ VkResult anv_CreateRenderPass( // att->store_op = pCreateInfo->pAttachments[i].storeOp; // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - if (anv_format_is_color(att->format)) { + if (anv_format_is_color(att->format)) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { ++pass->num_color_clear_attachments; - } else if (att->format->depth_format) { + } + } else { + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { pass->has_depth_clear_attachment = true; } - } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - assert(att->format->has_stencil); - pass->has_stencil_clear_attachment = true; + + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + assert(att->format->has_stencil); + pass->has_stencil_clear_attachment = true; + } } } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 8bfab1f8323..cc605197f9b 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -446,8 +446,8 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < pass->attachment_count; i++) { const struct anv_render_pass_attachment *att = &pass->attachments[i]; - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - if (anv_format_is_color(att->format)) { + if (anv_format_is_color(att->format)) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { instance_data[layer] = (struct clear_instance_data) { .vue_header = { .RTAIndex = i, @@ -458,14 +458,19 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, }; color_attachments[layer] = i; layer++; - } else if (att->format->depth_format) { + } + } else { + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { assert(ds_attachment == VK_ATTACHMENT_UNUSED); ds_attachment = i; ds_clear_value = clear_values[ds_attachment].depthStencil; } - } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - assert(att->format->has_stencil); - anv_finishme("stencil clear"); + + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + anv_finishme("stencil clear"); + } } } -- cgit v1.2.3 From 1e981774390353618156aa1bc07657708be4fb17 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 29 Oct 2015 11:08:23 -0700 Subject: anv/pass: Move VkRenderPass code to new file Move it from anv_device.c to new file anv_pass.c. Because it will soon grow bigger. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_device.c | 137 ---------------------------------------- src/vulkan/anv_pass.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 137 deletions(-) create mode 100644 src/vulkan/anv_pass.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 8538046e567..475fa4f2ad2 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -69,6 +69,7 @@ VULKAN_SOURCES = \ anv_meta.c \ anv_nir_apply_dynamic_offsets.c \ anv_nir_apply_pipeline_layout.c \ + anv_pass.c \ anv_pipeline.c \ anv_private.h \ anv_query.c \ diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 05e723fe60d..9def97801c2 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1854,143 +1854,6 @@ void anv_DestroyFramebuffer( anv_device_free(device, fb); } -VkResult anv_CreateRenderPass( - VkDevice _device, - const VkRenderPassCreateInfo* pCreateInfo, - VkRenderPass* pRenderPass) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_render_pass *pass; - size_t size; - size_t attachments_offset; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - - size = sizeof(*pass); - size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); - attachments_offset = size; - size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); - - pass = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (pass == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Clear the subpasses along with the parent pass. This required because - * each array member of anv_subpass must be a valid pointer if not NULL. - */ - memset(pass, 0, size); - pass->attachment_count = pCreateInfo->attachmentCount; - pass->subpass_count = pCreateInfo->subpassCount; - pass->attachments = (void *) pass + attachments_offset; - - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - struct anv_render_pass_attachment *att = &pass->attachments[i]; - - att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); - att->samples = pCreateInfo->pAttachments[i].samples; - att->load_op = pCreateInfo->pAttachments[i].loadOp; - att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - - if (anv_format_is_color(att->format)) { - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - ++pass->num_color_clear_attachments; - } - } else { - if (att->format->depth_format && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - pass->has_depth_clear_attachment = true; - } - - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - assert(att->format->has_stencil); - pass->has_stencil_clear_attachment = true; - } - } - } - - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - struct anv_subpass *subpass = &pass->subpasses[i]; - - subpass->input_count = desc->inputCount; - subpass->color_count = desc->colorCount; - - if (desc->inputCount > 0) { - subpass->input_attachments = - anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - - for (uint32_t j = 0; j < desc->inputCount; j++) { - subpass->input_attachments[j] - = desc->pInputAttachments[j].attachment; - } - } - - if (desc->colorCount > 0) { - subpass->color_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - - for (uint32_t j = 0; j < desc->colorCount; j++) { - subpass->color_attachments[j] - = desc->pColorAttachments[j].attachment; - } - } - - if (desc->pResolveAttachments) { - subpass->resolve_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - - for (uint32_t j = 0; j < desc->colorCount; j++) { - subpass->resolve_attachments[j] - = desc->pResolveAttachments[j].attachment; - } - } - - subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; - } - - *pRenderPass = anv_render_pass_to_handle(pass); - - return VK_SUCCESS; -} - -void anv_DestroyRenderPass( - VkDevice _device, - VkRenderPass _pass) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - - for (uint32_t i = 0; i < pass->subpass_count; i++) { - /* In VkSubpassCreateInfo, each of the attachment arrays may be null. - * Don't free the null arrays. - */ - struct anv_subpass *subpass = &pass->subpasses[i]; - - anv_device_free(device, subpass->input_attachments); - anv_device_free(device, subpass->color_attachments); - anv_device_free(device, subpass->resolve_attachments); - } - - anv_device_free(device, pass); -} - -VkResult anv_GetRenderAreaGranularity( - VkDevice device, - VkRenderPass renderPass, - VkExtent2D* pGranularity) -{ - *pGranularity = (VkExtent2D) { 1, 1 }; - - return VK_SUCCESS; -} - void vkCmdDbgMarkerBegin( VkCmdBuffer cmdBuffer, const char* pMarker) diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c new file mode 100644 index 00000000000..3eab1eb9181 --- /dev/null +++ b/src/vulkan/anv_pass.c @@ -0,0 +1,161 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + + if (anv_format_is_color(att->format)) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + ++pass->num_color_clear_attachments; + } + } else { + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + pass->has_depth_clear_attachment = true; + } + + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + assert(att->format->has_stencil); + pass->has_stencil_clear_attachment = true; + } + } + } + + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputCount; + subpass->color_count = desc->colorCount; + + if (desc->inputCount > 0) { + subpass->input_attachments = + anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->inputCount; j++) { + subpass->input_attachments[j] + = desc->pInputAttachments[j].attachment; + } + } + + if (desc->colorCount > 0) { + subpass->color_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->color_attachments[j] + = desc->pColorAttachments[j].attachment; + } + } + + if (desc->pResolveAttachments) { + subpass->resolve_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->resolve_attachments[j] + = desc->pResolveAttachments[j].attachment; + } + } + + subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +void anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + for (uint32_t i = 0; i < pass->subpass_count; i++) { + /* In VkSubpassCreateInfo, each of the attachment arrays may be null. + * Don't free the null arrays. + */ + struct anv_subpass *subpass = &pass->subpasses[i]; + + anv_device_free(device, subpass->input_attachments); + anv_device_free(device, subpass->color_attachments); + anv_device_free(device, subpass->resolve_attachments); + } + + anv_device_free(device, pass); +} + +VkResult anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; + + return VK_SUCCESS; +} -- cgit v1.2.3 From 4073219cf186ca87cd1bffaabc1b2b12a9c0d563 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 29 Oct 2015 11:47:39 -0700 Subject: anv/pass: Remove redundant assert Trivial fix. --- src/vulkan/anv_pass.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index 3eab1eb9181..c3060239906 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -75,7 +75,6 @@ VkResult anv_CreateRenderPass( if (att->format->has_stencil && att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - assert(att->format->has_stencil); pass->has_stencil_clear_attachment = true; } } -- cgit v1.2.3 From f2a8c9db2435c19415bf3a0cea498779bfa1f9c9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 29 Oct 2015 10:22:09 -0700 Subject: nir/spirv: Rework the way we handle interface types --- src/glsl/nir/spirv_to_nir.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1eca267a047..0aa1bee3bac 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1246,29 +1246,35 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - nir_variable *var = ralloc(b->shader, nir_variable); + nir_variable *var = rzalloc(b->shader, nir_variable); var->type = type->type; var->name = ralloc_strdup(var, val->name); - bool builtin_block = false; + struct vtn_type *interface_type; if (type->block) { - var->interface_type = type->type; - builtin_block = type->builtin_block; + interface_type = type; } else if (glsl_type_is_array(type->type) && (type->array_element->block || type->array_element->buffer_block)) { - var->interface_type = type->array_element->type; - builtin_block = type->array_element->builtin_block; + interface_type = type->array_element; } else { - var->interface_type = NULL; + interface_type = NULL; } + if (interface_type) + var->interface_type = interface_type->type; + switch ((SpvStorageClass)w[3]) { case SpvStorageClassUniform: case SpvStorageClassUniformConstant: - var->data.mode = nir_var_uniform; - var->data.read_only = true; + if (interface_type && interface_type->buffer_block) { + var->data.mode = nir_var_shader_storage; + } else { + /* UBO's and samplers */ + var->data.mode = nir_var_uniform; + var->data.read_only = true; + } break; case SpvStorageClassInput: var->data.mode = nir_var_shader_in; @@ -1321,17 +1327,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } } - /* If this was a uniform block, then we're not going to actually use the - * variable (we're only going to use it to compute offsets), so don't - * declare it in the shader. - */ - if (var->data.mode == nir_var_uniform && var->interface_type) - break; - - /* Builtin blocks are lowered to individual variables during SPIR-V -> - * NIR, so don't declare them either. - */ - if (builtin_block) + /* Interface variables aren't actually going to be referenced by the + * generated NIR, so we don't put them in the list */ + if (interface_type) break; if (var->data.mode == nir_var_local) { -- cgit v1.2.3 From a2283508b0426df95968c46909d6ff16fa29a554 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 29 Oct 2015 21:35:27 -0700 Subject: nir/intrinsics: Add a load_push_constant intrinsic --- src/glsl/nir/nir_intrinsics.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index e13951081e9..b2ceff566cf 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -252,6 +252,7 @@ LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +LOAD(push_constant, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* * Stores work the same way as loads, except now the first register input is -- cgit v1.2.3 From 1f2624e6dd75860156a0385c3ccfb351a9206cec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 29 Oct 2015 14:33:05 -0700 Subject: nir/spirv: Add support for push constants --- src/glsl/nir/spirv_to_nir.c | 111 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 104 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 0aa1bee3bac..7c9567dd8b7 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1045,14 +1045,34 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, val->type = type->type; val->transposed = NULL; if (glsl_type_is_vector_or_scalar(type->type)) { - nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, set, binding, - mode, index); nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); load->num_components = glsl_get_vector_elements(type->type); - load->src[0] = nir_src_for_ssa(res_index); load->const_index[0] = offset; - if (indirect) + + switch (op) { + case nir_intrinsic_load_ubo_indirect: + case nir_intrinsic_load_ssbo_indirect: load->src[1] = nir_src_for_ssa(indirect); + /* fall through */ + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: { + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + load->src[0] = nir_src_for_ssa(res_index); + break; + } + + case nir_intrinsic_load_push_constant: + break; /* Nothing to do */ + case nir_intrinsic_load_push_constant_indirect: + load->src[0] = nir_src_for_ssa(indirect); + break; + + default: + unreachable("Invalid block load intrinsic"); + } + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); nir_builder_instr_insert(&b->nb, &load->instr); val->def = &load->dest.ssa; @@ -1130,9 +1150,27 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, } } - /* TODO SSBO's */ - nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_indirect - : nir_intrinsic_load_ubo; + nir_intrinsic_op op; + if (src->var->data.mode == nir_var_uniform) { + if (src->var->data.descriptor_set >= 0) { + /* UBO load */ + assert(src->var->data.binding >= 0); + + op = indirect ? nir_intrinsic_load_ubo_indirect + : nir_intrinsic_load_ubo; + } else { + /* Push constant load */ + assert(src->var->data.descriptor_set == -1 && + src->var->data.binding == -1); + + op = indirect ? nir_intrinsic_load_push_constant_indirect + : nir_intrinsic_load_push_constant; + } + } else { + assert(src->var->data.mode == nir_var_shader_storage); + op = indirect ? nir_intrinsic_load_ssbo_indirect + : nir_intrinsic_load_ssbo; + } return _vtn_block_load(b, op, set, binding, mode, index, offset, indirect, type); @@ -1236,6 +1274,54 @@ vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, } } +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1276,6 +1362,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->data.read_only = true; } break; + case SpvStorageClassPushConstant: + assert(interface_type && interface_type->block); + var->data.mode = nir_var_uniform; + var->data.read_only = true; + var->data.descriptor_set = -1; + var->data.binding = -1; + + /* We have exactly one push constant block */ + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(type); + break; case SpvStorageClassInput: var->data.mode = nir_var_shader_in; var->data.read_only = true; -- cgit v1.2.3 From 3883728730f293b763a5641560375b18d4f97782 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 29 Oct 2015 22:24:54 -0700 Subject: anv: Add better push constant support What we had before was kind of a hack where we made certain untrue assumptions about the incoming data. This new support, while it still doesn't support indirects properly (that will come), at least pulls the offsets and strides from SPIR-V like it's supposed to. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_nir.h | 2 + src/vulkan/anv_nir_lower_push_constants.c | 107 ++++++++++++++++++++++++++++++ src/vulkan/anv_pipeline.c | 16 +---- 4 files changed, 113 insertions(+), 13 deletions(-) create mode 100644 src/vulkan/anv_nir_lower_push_constants.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 475fa4f2ad2..0d6c9df6b67 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -69,6 +69,7 @@ VULKAN_SOURCES = \ anv_meta.c \ anv_nir_apply_dynamic_offsets.c \ anv_nir_apply_pipeline_layout.c \ + anv_nir_lower_push_constants.c \ anv_pass.c \ anv_pipeline.c \ anv_private.h \ diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h index af95e3a8849..b164ae581e1 100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@ -37,6 +37,8 @@ anv_vk_shader_stage_for_mesa_stage(gl_shader_stage stage) return (VkShaderStage)(int)stage; } +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); + void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, nir_shader *shader, struct brw_stage_prog_data *prog_data); diff --git a/src/vulkan/anv_nir_lower_push_constants.c b/src/vulkan/anv_nir_lower_push_constants.c new file mode 100644 index 00000000000..af48470522a --- /dev/null +++ b/src/vulkan/anv_nir_lower_push_constants.c @@ -0,0 +1,107 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" + +struct lower_push_constants_state { + nir_shader *shader; + bool is_scalar; +}; + +static bool +lower_push_constants_block(nir_block *block, void *void_state) +{ + struct lower_push_constants_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* TODO: Handle indirect push constants */ + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + assert(intrin->const_index[0] % 4 == 0); + unsigned dword_offset = intrin->const_index[0] / 4; + + /* We just turn them into uniform loads with the appropreate offset */ + intrin->intrinsic = nir_intrinsic_load_uniform; + intrin->const_index[0] = 0; + if (state->is_scalar) { + intrin->const_index[1] = dword_offset; + } else { + unsigned shift = dword_offset % 4; + /* Can't cross the vec4 boundary */ + assert(shift + intrin->num_components <= 4); + + /* vec4 shifts are in units of vec4's */ + intrin->const_index[1] = dword_offset / 4; + + if (shift) { + /* If there's a non-zero shift then we need to load a whole vec4 + * and use a move to swizzle it into place. + */ + assert(intrin->dest.is_ssa); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, + nir_op_imov); + mov->src[0].src = nir_src_for_ssa(&intrin->dest.ssa); + for (unsigned i = 0; i < intrin->num_components; i++) + mov->src[0].swizzle[i] = i + shift; + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + intrin->num_components, NULL); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + nir_instr_insert_after(&intrin->instr, &mov->instr); + + /* Stomp the number of components to 4 */ + intrin->num_components = 4; + } + } + } + + return true; +} + +void +anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) +{ + struct lower_push_constants_state state = { + .shader = shader, + .is_scalar = is_scalar, + }; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_foreach_block(overload->impl, lower_push_constants_block, &state); + } + + assert(shader->num_uniforms % 4 == 0); + if (is_scalar) + shader->num_uniforms /= 4; + else + shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); +} diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 59f304f55df..9fb5ddba20b 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -357,22 +357,12 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (nir == NULL) return NULL; - bool have_push_constants = false; - nir_foreach_variable(var, &nir->uniforms) { - const struct glsl_type *type = var->type; - if (glsl_type_is_array(type)) - type = glsl_get_array_element(type); - - if (!glsl_type_is_sampler(type)) { - have_push_constants = true; - break; - } - } + anv_nir_lower_push_constants(nir, is_scalar_shader_stage(compiler, stage)); /* Figure out the number of parameters */ prog_data->nr_params = 0; - if (have_push_constants) { + if (nir->num_uniforms > 0) { /* If the shader uses any push constants at all, we'll just give * them the maximum possible number */ @@ -394,7 +384,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, * params array, it doesn't really matter what we put here. */ struct anv_push_constants *null_data = NULL; - if (have_push_constants) { + if (nir->num_uniforms > 0) { /* Fill out the push constants section of the param array */ for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) prog_data->param[i] = (const gl_constant_value *) -- cgit v1.2.3 From 584f9d444238baaaacc138c81c46c88af98438f9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 2 Nov 2015 12:14:37 -0800 Subject: anv: Report 0 physical devices when not on Broadwell or Ivy Bridge Right now, Broadweel and Ivy Bridge are the only supported platforms. Hopefully, this reduces the chances that someone will try the driver on unsupported hardware and be confused that it doesn't work. --- src/vulkan/anv_device.c | 26 ++++++++++++++++++++------ src/vulkan/anv_private.h | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9def97801c2..3ab2a245de4 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -80,7 +80,18 @@ anv_physical_device_init(struct anv_physical_device *device, "failed to get device info"); goto fail; } - + + if (device->info->gen == 7 && + !device->info->is_haswell && !device->info->is_baytrail) { + fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete"); + } else if (device->info->gen == 8 && !device->info->is_cherryview) { + /* Briadwell is as fully supported as anything */ + } else { + result = vk_errorf(VK_UNSUPPORTED, + "Vulkan not yet supported on %s", device->name); + goto fail; + } + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "failed to get aperture size: %m"); @@ -206,7 +217,7 @@ VkResult anv_CreateInstance( instance->pfnAlloc = alloc_callbacks->pfnAlloc; instance->pfnFree = alloc_callbacks->pfnFree; instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; - instance->physicalDeviceCount = 0; + instance->physicalDeviceCount = -1; _mesa_locale_init(); @@ -271,13 +282,16 @@ VkResult anv_EnumeratePhysicalDevices( ANV_FROM_HANDLE(anv_instance, instance, _instance); VkResult result; - if (instance->physicalDeviceCount == 0) { + if (instance->physicalDeviceCount < 0) { result = anv_physical_device_init(&instance->physicalDevice, instance, "/dev/dri/renderD128"); - if (result != VK_SUCCESS) + if (result == VK_UNSUPPORTED) { + instance->physicalDeviceCount = 0; + } else if (result == VK_SUCCESS) { + instance->physicalDeviceCount = 1; + } else { return result; - - instance->physicalDeviceCount = 1; + } } /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ac3d18fdcbe..8e921afb5fb 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -437,7 +437,7 @@ struct anv_instance { PFN_vkAllocFunction pfnAlloc; PFN_vkFreeFunction pfnFree; uint32_t apiVersion; - uint32_t physicalDeviceCount; + int physicalDeviceCount; struct anv_physical_device physicalDevice; struct anv_wsi_implementation * wsi_impl[VK_PLATFORM_NUM_KHR]; -- cgit v1.2.3 From 4d1c76485ba8868d494b268675d26e76a40c088c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 3 Nov 2015 13:42:28 -0800 Subject: anv: Drop stale comment in anv_cmd_buffer_emit_binding_table() When emitting the binding table for the fragment shader stage, we no longer "walk all of the attachments, [inserting only] the color attachments into the binding table". Instead, we iterate only over the subpass's color attachments, which is the minimal possible iteration. While killing the comment, also rename the variable 'attachments' to 'color_count', as it's no longer a count of all framebuffer attachments but only the subpass's color attachment count. --- src/vulkan/anv_cmd_buffer.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 78c7635ef6d..99f10981f69 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -546,7 +546,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_layout *layout; - uint32_t attachments, bias, state_offset; + uint32_t color_count, bias, state_offset; if (stage == VK_SHADER_STAGE_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; @@ -555,10 +555,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (stage == VK_SHADER_STAGE_FRAGMENT) { bias = MAX_RTS; - attachments = subpass->color_count; + color_count = subpass->color_count; } else { bias = 0; - attachments = 0; + color_count = 0; } /* This is a little awkward: layout can be NULL but we still have to @@ -566,7 +566,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, * targets. */ uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - if (attachments + surface_count == 0) + if (color_count + surface_count == 0) return VK_SUCCESS; *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, @@ -577,13 +577,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - /* This is highly annoying. The Vulkan spec puts the depth-stencil - * attachments in with the color attachments. Unfortunately, thanks to - * other aspects of the API, we cana't really saparate them before this - * point. Therefore, we have to walk all of the attachments but only - * put the color attachments into the binding table. - */ - for (uint32_t a = 0; a < attachments; a++) { + for (uint32_t a = 0; a < color_count; a++) { const struct anv_image_view *iview = fb->attachments[subpass->color_attachments[a]]; -- cgit v1.2.3 From a1e7b8701a4687f29b013364a852aa773c80f960 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 3 Nov 2015 14:57:40 -0800 Subject: nir: remove sampler_set from nir_tex_instr Now that descriptor sets are handled in a lowering pass, this is no longer needed. --- src/glsl/nir/nir.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index ac418dfca90..229d534bf3d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1002,9 +1002,6 @@ typedef struct { /* gather component selector */ unsigned component : 2; - /* The descriptor set containing this texture */ - unsigned sampler_set; - /** The sampler index * * If this texture instruction has a nir_tex_src_sampler_offset source, -- cgit v1.2.3 From c56727037a968c3ff433468827eff25a40f26a71 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 3 Nov 2015 14:08:44 -0800 Subject: anv: Move struct anv_vue_header to anv_private.h Move it from anv_meta.c to the common header anv_private.h. This allows us to split the meta blit and meta clear code into separate files. --- src/vulkan/anv_meta.c | 17 +++++------------ src/vulkan/anv_private.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index cc605197f9b..f6b37c3f8b2 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -353,15 +353,8 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.dirty |= state->dynamic_flags; } -struct vue_header { - uint32_t Reserved; - uint32_t RTAIndex; - uint32_t ViewportIndex; - float PointWidth; -}; - struct clear_instance_data { - struct vue_header vue_header; + struct anv_vue_header vue_header; VkClearColorValue color; }; @@ -807,12 +800,12 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, float tex_coord[3]; } *vb_data; - unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); struct anv_state vb_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct vue_header)); - vb_data = vb_state.map + sizeof(struct vue_header); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); vb_data[0] = (struct blit_vb_data) { .pos = { @@ -864,7 +857,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, (VkDeviceSize[]) { 0, - sizeof(struct vue_header), + sizeof(struct anv_vue_header), }); VkDescriptorSet set; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8e921afb5fb..089bf0b79ed 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -672,6 +672,16 @@ struct anv_device_memory { void * map; }; +/** + * Header for Vertex URB Entry (VUE) + */ +struct anv_vue_header { + uint32_t Reserved; + uint32_t RTAIndex; /* RenderTargetArrayIndex */ + uint32_t ViewportIndex; + float PointWidth; +}; + struct anv_descriptor_set_binding_layout { /* Number of array elements in this binding */ uint16_t array_size; -- cgit v1.2.3 From 16b2a489dbd4db56fc9231f399a81685330e7e7c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 3 Nov 2015 14:19:45 -0800 Subject: anv: Move meta clear code to new file anv_meta_clear.c anv_meta.c currently handles blits, copies, clears, and resolves. The clear code is about to grow, and anv_meta.c is already busting at the seams. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_meta.c | 526 ++--------------------------------------- src/vulkan/anv_meta.h | 53 +++++ src/vulkan/anv_meta_clear.c | 552 ++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_meta_clear.h | 36 +++ 5 files changed, 656 insertions(+), 512 deletions(-) create mode 100644 src/vulkan/anv_meta.h create mode 100644 src/vulkan/anv_meta_clear.c create mode 100644 src/vulkan/anv_meta_clear.h (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 0d6c9df6b67..4a05657df71 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -67,6 +67,7 @@ VULKAN_SOURCES = \ anv_image.c \ anv_intel.c \ anv_meta.c \ + anv_meta_clear.c \ anv_nir_apply_dynamic_offsets.c \ anv_nir_apply_pipeline_layout.c \ anv_nir_lower_push_constants.c \ diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index f6b37c3f8b2..880af14208d 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -27,6 +27,8 @@ #include #include +#include "anv_meta.h" +#include "anv_meta_clear.h" #include "anv_private.h" #include "anv_nir_builder.h" @@ -66,27 +68,6 @@ build_nir_vertex_shader(bool attr_flat) return b.shader; } -static nir_shader * -build_nir_clear_fragment_shader(void) -{ - nir_builder b; - - const struct glsl_type *color_type = glsl_vec4_type(); - - nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); - - nir_variable *color_in = nir_variable_create(b.shader, nir_var_shader_in, - color_type, "v_attr"); - color_in->data.location = VARYING_SLOT_VAR0; - color_in->data.interpolation = INTERP_QUALIFIER_FLAT; - nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, - color_type, "f_color"); - color_out->data.location = FRAG_RESULT_DATA0; - nir_copy_var(&b, color_out, color_in); - - return b.shader; -} - static nir_shader * build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) { @@ -138,192 +119,9 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) return b.shader; } -static void -anv_device_init_meta_clear_state(struct anv_device *device) -{ - struct anv_shader_module vsm = { - .nir = build_nir_vertex_shader(true), - }; - - struct anv_shader_module fsm = { - .nir = build_nir_clear_fragment_shader(), - }; - - VkShader vs; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&vsm), - .pName = "main", - }, &vs); - - VkShader fs; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&fsm), - .pName = "main", - }, &fs); - - /* We use instanced rendering to clear multiple render targets. We have two - * vertex buffers: the first vertex buffer holds per-vertex data and - * provides the vertices for the clear rectangle. The second one holds - * per-instance data, which consists of the VUE header (which selects the - * layer) and the color (Vulkan supports per-RT clear colors). - */ - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .strideInBytes = 12, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - { - .binding = 1, - .strideInBytes = 32, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE - }, - }, - .attributeCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = 0 - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offsetInBytes = 0 - }, - { - /* Color */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 16 - } - } - }; - - anv_graphics_pipeline_create(anv_device_to_handle(device), - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - - .stageCount = 2, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .pSpecializationInfo = NULL - }, { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .pSpecializationInfo = NULL, - } - }, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterState = &(VkPipelineRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, - .depthClipEnable = true, - .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, - }, - .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = true, - .depthWriteEnable = true, - .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = false, - .stencilTestEnable = true, - .front = (VkStencilOpState) { - .stencilPassOp = VK_STENCIL_OP_REPLACE, - .stencilCompareOp = VK_COMPARE_OP_ALWAYS, - }, - .back = (VkStencilOpState) { - .stencilPassOp = VK_STENCIL_OP_REPLACE, - .stencilCompareOp = VK_COMPARE_OP_ALWAYS, - }, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .flags = 0, - .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), - .subpass = 0, - }, - &(struct anv_graphics_pipeline_create_info) { - .use_repclear = true, - .disable_viewport = true, - .disable_vs = true, - .use_rectlist = true - }, - &device->meta_state.clear.pipeline); - - anv_DestroyShader(anv_device_to_handle(device), vs); - anv_DestroyShader(anv_device_to_handle(device), fs); - ralloc_free(vsm.nir); - ralloc_free(fsm.nir); -} - -#define NUM_VB_USED 2 -struct anv_saved_state { - struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; - struct anv_descriptor_set *old_descriptor_set0; - struct anv_pipeline *old_pipeline; - uint32_t dynamic_flags; - struct anv_dynamic_state dynamic; -}; - -static void +void anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, - struct anv_saved_state *state, + struct anv_meta_saved_state *state, uint32_t dynamic_state) { state->old_pipeline = cmd_buffer->state.pipeline; @@ -335,16 +133,16 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, dynamic_state); } -static void +void anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, - const struct anv_saved_state *state) + const struct anv_meta_saved_state *state) { cmd_buffer->state.pipeline = state->old_pipeline; cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, sizeof(state->old_vertex_bindings)); - cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; + cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; @@ -353,140 +151,6 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.dirty |= state->dynamic_flags; } -struct clear_instance_data { - struct anv_vue_header vue_header; - VkClearColorValue color; -}; - -static void -meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, - int num_instances, - struct clear_instance_data *instance_data, - VkClearDepthStencilValue ds_clear_value) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_state state; - uint32_t size; - - const float vertex_data[] = { - /* Rect-list coordinates */ - 0.0, 0.0, ds_clear_value.depth, - fb->width, 0.0, ds_clear_value.depth, - fb->width, fb->height, ds_clear_value.depth, - - /* Align to 16 bytes */ - 0.0, 0.0, 0.0, - }; - - size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 16); - - /* Copy in the vertex and instance data */ - memcpy(state.map, vertex_data, sizeof(vertex_data)); - memcpy(state.map + sizeof(vertex_data), instance_data, - num_instances * sizeof(*instance_data)); - - struct anv_buffer vertex_buffer = { - .device = cmd_buffer->device, - .size = size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = state.offset - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(vertex_data) - }); - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.clear.pipeline); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), - 3, num_instances, 0, 0); -} - -void -anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values) -{ - struct anv_saved_state saved_state; - - if (pass->has_stencil_clear_attachment) - anv_finishme("stencil clear"); - - /* FINISHME: Rethink how we count clear attachments in light of - * 0.138.2 -> 0.170.2 diff. - */ - if (pass->num_color_clear_attachments == 0 && - !pass->has_depth_clear_attachment) - return; - - struct clear_instance_data instance_data[pass->num_color_clear_attachments]; - uint32_t color_attachments[pass->num_color_clear_attachments]; - uint32_t ds_attachment = VK_ATTACHMENT_UNUSED; - VkClearDepthStencilValue ds_clear_value = {0}; - - int layer = 0; - for (uint32_t i = 0; i < pass->attachment_count; i++) { - const struct anv_render_pass_attachment *att = &pass->attachments[i]; - - if (anv_format_is_color(att->format)) { - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - instance_data[layer] = (struct clear_instance_data) { - .vue_header = { - .RTAIndex = i, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = clear_values[i].color, - }; - color_attachments[layer] = i; - layer++; - } - } else { - if (att->format->depth_format && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - assert(ds_attachment == VK_ATTACHMENT_UNUSED); - ds_attachment = i; - ds_clear_value = clear_values[ds_attachment].depthStencil; - } - - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - anv_finishme("stencil clear"); - } - } - } - - anv_cmd_buffer_save(cmd_buffer, &saved_state, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); - cmd_buffer->state.dynamic.viewport.count = 0; - - struct anv_subpass subpass = { - .input_count = 0, - .color_count = pass->num_color_clear_attachments, - .color_attachments = color_attachments, - .depth_stencil_attachment = ds_attachment, - }; - - anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); - - meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, - instance_data, ds_clear_value); - - /* Restore API state */ - anv_cmd_buffer_restore(cmd_buffer, &saved_state); -} - static VkImageViewType meta_blit_get_src_image_view_type(const struct anv_image *src_image) { @@ -768,7 +432,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) static void meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_saved_state *saved_state) + struct anv_meta_saved_state *saved_state) { anv_cmd_buffer_save(cmd_buffer, saved_state, (1 << VK_DYNAMIC_STATE_VIEWPORT)); @@ -959,7 +623,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, static void meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, - const struct anv_saved_state *saved_state) + const struct anv_meta_saved_state *saved_state) { anv_cmd_buffer_restore(cmd_buffer, saved_state); } @@ -1093,7 +757,7 @@ void anv_CmdCopyBuffer( ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - struct anv_saved_state saved_state; + struct anv_meta_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -1176,7 +840,7 @@ void anv_CmdCopyImage( const VkImageViewType src_iview_type = meta_blit_get_src_image_view_type(src_image); - struct anv_saved_state saved_state; + struct anv_meta_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -1274,7 +938,7 @@ void anv_CmdBlitImage( const VkImageViewType src_iview_type = meta_blit_get_src_image_view_type(src_image); - struct anv_saved_state saved_state; + struct anv_meta_saved_state saved_state; anv_finishme("respect VkTexFilter"); @@ -1409,7 +1073,7 @@ void anv_CmdCopyBufferToImage( ANV_FROM_HANDLE(anv_image, dest_image, destImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); const VkFormat orig_format = dest_image->format->vk_format; - struct anv_saved_state saved_state; + struct anv_meta_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -1512,7 +1176,7 @@ void anv_CmdCopyImageToBuffer( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_saved_state saved_state; + struct anv_meta_saved_state saved_state; const VkImageViewType src_iview_type = meta_blit_get_src_image_view_type(src_image); @@ -1616,168 +1280,6 @@ void anv_CmdFillBuffer( stub(); } -void anv_CmdClearColorImage( - VkCmdBuffer cmdBuffer, - VkImage _image, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_image, image, _image); - struct anv_saved_state saved_state; - - anv_cmd_buffer_save(cmd_buffer, &saved_state, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); - cmd_buffer->state.dynamic.viewport.count = 0; - - for (uint32_t r = 0; r < rangeCount; r++) { - for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { - for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { - struct anv_image_view iview; - anv_image_view_init(&iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = _image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRanges[r].baseMipLevel + l, - .mipLevels = 1, - .baseArrayLayer = pRanges[r].baseArrayLayer + s, - .arraySize = 1 - }, - }, - cmd_buffer); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&iview), - }, - .width = iview.extent.width, - .height = iview.extent.height, - .layers = 1 - }, &fb); - - VkRenderPass pass; - anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, - .format = iview.format->vk_format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputCount = 0, - .colorCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .depthStencilAttachment = (VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveCount = 1, - .pPreserveAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - }, - .dependencyCount = 0, - }, &pass); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderArea = { - .offset = { 0, 0, }, - .extent = { - .width = iview.extent.width, - .height = iview.extent.height, - }, - }, - .renderPass = pass, - .framebuffer = fb, - .clearValueCount = 1, - .pClearValues = NULL, - }, VK_RENDER_PASS_CONTENTS_INLINE); - - struct clear_instance_data instance_data = { - .vue_header = { - .RTAIndex = 0, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = *pColor, - }; - - meta_emit_clear(cmd_buffer, 1, &instance_data, - (VkClearDepthStencilValue) {0}); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - } - } - } - - /* Restore API state */ - anv_cmd_buffer_restore(cmd_buffer, &saved_state); -} - -void anv_CmdClearDepthStencilImage( - VkCmdBuffer cmdBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - stub(); -} - -void anv_CmdClearColorAttachment( - VkCmdBuffer cmdBuffer, - uint32_t colorAttachment, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rectCount, - const VkRect3D* pRects) -{ - stub(); -} - -void anv_CmdClearDepthStencilAttachment( - VkCmdBuffer cmdBuffer, - VkImageAspectFlags aspectMask, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rectCount, - const VkRect3D* pRects) -{ - stub(); -} - void anv_CmdResolveImage( VkCmdBuffer cmdBuffer, VkImage srcImage, diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h new file mode 100644 index 00000000000..ac28d897f0b --- /dev/null +++ b/src/vulkan/anv_meta.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ANV_META_VERTEX_BINDING_COUNT 2 + +struct anv_meta_saved_state { + struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + uint32_t dynamic_flags; + struct anv_dynamic_state dynamic; +}; + +void +anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *state, + uint32_t dynamic_state); + +void +anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *state); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c new file mode 100644 index 00000000000..3a6a369bfc1 --- /dev/null +++ b/src/vulkan/anv_meta_clear.c @@ -0,0 +1,552 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "anv_meta_clear.h" +#include "anv_nir_builder.h" +#include "anv_private.h" + +struct clear_instance_data { + struct anv_vue_header vue_header; + VkClearColorValue color; +}; + +static void +meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, + int num_instances, + struct clear_instance_data *instance_data, + VkClearDepthStencilValue ds_clear_value) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_state state; + uint32_t size; + + const float vertex_data[] = { + /* Rect-list coordinates */ + 0.0, 0.0, ds_clear_value.depth, + fb->width, 0.0, ds_clear_value.depth, + fb->width, fb->height, ds_clear_value.depth, + + /* Align to 16 bytes */ + 0.0, 0.0, 0.0, + }; + + size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 16); + + /* Copy in the vertex and instance data */ + memcpy(state.map, vertex_data, sizeof(vertex_data)); + memcpy(state.map + sizeof(vertex_data), instance_data, + num_instances * sizeof(*instance_data)); + + struct anv_buffer vertex_buffer = { + .device = cmd_buffer->device, + .size = size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.clear.pipeline); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), + 3, num_instances, 0, 0); +} + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values) +{ + struct anv_meta_saved_state saved_state; + + if (pass->has_stencil_clear_attachment) + anv_finishme("stencil clear"); + + /* FINISHME: Rethink how we count clear attachments in light of + * 0.138.2 -> 0.170.2 diff. + */ + if (pass->num_color_clear_attachments == 0 && + !pass->has_depth_clear_attachment) + return; + + struct clear_instance_data instance_data[pass->num_color_clear_attachments]; + uint32_t color_attachments[pass->num_color_clear_attachments]; + uint32_t ds_attachment = VK_ATTACHMENT_UNUSED; + VkClearDepthStencilValue ds_clear_value = {0}; + + int layer = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + const struct anv_render_pass_attachment *att = &pass->attachments[i]; + + if (anv_format_is_color(att->format)) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + instance_data[layer] = (struct clear_instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = clear_values[i].color, + }; + color_attachments[layer] = i; + layer++; + } + } else { + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + assert(ds_attachment == VK_ATTACHMENT_UNUSED); + ds_attachment = i; + ds_clear_value = clear_values[ds_attachment].depthStencil; + } + + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + anv_finishme("stencil clear"); + } + } + } + + anv_cmd_buffer_save(cmd_buffer, &saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); + cmd_buffer->state.dynamic.viewport.count = 0; + + struct anv_subpass subpass = { + .input_count = 0, + .color_count = pass->num_color_clear_attachments, + .color_attachments = color_attachments, + .depth_stencil_attachment = ds_attachment, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); + + meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, + instance_data, ds_clear_value); + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +static nir_shader * +build_nir_vertex_shader(bool attr_flat) +{ + nir_builder b; + + const struct glsl_type *vertex_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vertex_type, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vertex_type, "gl_Position"); + pos_in->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + /* Add one more pass-through attribute. For clear shaders, this is used + * to store the color and for blit shaders it's the texture coordinate. + */ + const struct glsl_type *attr_type = glsl_vec4_type(); + nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, + attr_type, "a_attr"); + attr_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, + attr_type, "v_attr"); + attr_out->data.location = VARYING_SLOT_VAR0; + attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : + INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, attr_out, attr_in); + + return b.shader; +} + +static nir_shader * +build_nir_clear_fragment_shader(void) +{ + nir_builder b; + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); + + nir_variable *color_in = nir_variable_create(b.shader, nir_var_shader_in, + color_type, "v_attr"); + color_in->data.location = VARYING_SLOT_VAR0; + color_in->data.interpolation = INTERP_QUALIFIER_FLAT; + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + color_type, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_copy_var(&b, color_out, color_in); + + return b.shader; +} + +void +anv_device_init_meta_clear_state(struct anv_device *device) +{ + struct anv_shader_module vsm = { + .nir = build_nir_vertex_shader(true), + }; + + struct anv_shader_module fsm = { + .nir = build_nir_clear_fragment_shader(), + }; + + VkShader vs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&vsm), + .pName = "main", + }, &vs); + + VkShader fs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&fsm), + .pName = "main", + }, &fs); + + /* We use instanced rendering to clear multiple render targets. We have two + * vertex buffers: the first vertex buffer holds per-vertex data and + * provides the vertices for the clear rectangle. The second one holds + * per-instance data, which consists of the VUE header (which selects the + * layer) and the color (Vulkan supports per-RT clear colors). + */ + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 12, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 32, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Color */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 16 + } + } + }; + + anv_graphics_pipeline_create(anv_device_to_handle(device), + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL, + } + }, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = true, + .depthWriteEnable = true, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = false, + .stencilTestEnable = true, + .front = (VkStencilOpState) { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + }, + .back = (VkStencilOpState) { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .use_repclear = true, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.clear.pipeline); + + anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShader(anv_device_to_handle(device), fs); + ralloc_free(vsm.nir); + ralloc_free(fsm.nir); +} + +void anv_CmdClearColorImage( + VkCmdBuffer cmdBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, image, _image); + struct anv_meta_saved_state saved_state; + + anv_cmd_buffer_save(cmd_buffer, &saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); + cmd_buffer->state.dynamic.viewport.count = 0; + + for (uint32_t r = 0; r < rangeCount; r++) { + for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { + for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = _image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRanges[r].baseMipLevel + l, + .mipLevels = 1, + .baseArrayLayer = pRanges[r].baseArrayLayer + s, + .arraySize = 1 + }, + }, + cmd_buffer); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = iview.format->vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .pPreserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 1, + .pClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + struct clear_instance_data instance_data = { + .vue_header = { + .RTAIndex = 0, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = *pColor, + }; + + meta_emit_clear(cmd_buffer, 1, &instance_data, + (VkClearDepthStencilValue) {0}); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + } + } + } + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +void anv_CmdClearDepthStencilImage( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + stub(); +} + +void anv_CmdClearColorAttachment( + VkCmdBuffer cmdBuffer, + uint32_t colorAttachment, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdClearDepthStencilAttachment( + VkCmdBuffer cmdBuffer, + VkImageAspectFlags aspectMask, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} diff --git a/src/vulkan/anv_meta_clear.h b/src/vulkan/anv_meta_clear.h new file mode 100644 index 00000000000..4ac71f183f7 --- /dev/null +++ b/src/vulkan/anv_meta_clear.h @@ -0,0 +1,36 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct anv_device; + +void anv_device_init_meta_clear_state(struct anv_device *device); + +#ifdef __cplusplus +} +#endif -- cgit v1.2.3 From 2bdb9e2ed9eab016220f1e365dd135f023f1920f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 3 Nov 2015 14:31:19 -0800 Subject: anv/meta: Rename anv_cmd_buffer_save/restore As the functions are now exposed in anv_meta.h, let's rename them to clarify that they are meta functions. anv_cmd_buffer_save -> anv_meta_save anv_cmd_buffer_restore -> anv_meta_restore --- src/vulkan/anv_meta.c | 16 ++++++++-------- src/vulkan/anv_meta.h | 10 +++++----- src/vulkan/anv_meta_clear.c | 14 ++++++-------- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 880af14208d..976d028127e 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -120,9 +120,9 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) } void -anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *state, - uint32_t dynamic_state) +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_state) { state->old_pipeline = cmd_buffer->state.pipeline; state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; @@ -134,8 +134,8 @@ anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, } void -anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, - const struct anv_meta_saved_state *state) +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer) { cmd_buffer->state.pipeline = state->old_pipeline; cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; @@ -434,8 +434,8 @@ static void meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *saved_state) { - anv_cmd_buffer_save(cmd_buffer, saved_state, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); } struct blit_region { @@ -625,7 +625,7 @@ static void meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, const struct anv_meta_saved_state *saved_state) { - anv_cmd_buffer_restore(cmd_buffer, saved_state); + anv_meta_restore(saved_state, cmd_buffer); } static VkFormat diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index ac28d897f0b..2f26d944516 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -40,13 +40,13 @@ struct anv_meta_saved_state { }; void -anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *state, - uint32_t dynamic_state); +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_state); void -anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, - const struct anv_meta_saved_state *state); +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer); #ifdef __cplusplus } diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 3a6a369bfc1..8f217105b4f 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -140,8 +140,8 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, } } - anv_cmd_buffer_save(cmd_buffer, &saved_state, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(&saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); cmd_buffer->state.dynamic.viewport.count = 0; struct anv_subpass subpass = { @@ -156,8 +156,7 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, instance_data, ds_clear_value); - /* Restore API state */ - anv_cmd_buffer_restore(cmd_buffer, &saved_state); + anv_meta_restore(&saved_state, cmd_buffer); } static nir_shader * @@ -401,8 +400,8 @@ void anv_CmdClearColorImage( ANV_FROM_HANDLE(anv_image, image, _image); struct anv_meta_saved_state saved_state; - anv_cmd_buffer_save(cmd_buffer, &saved_state, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(&saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); cmd_buffer->state.dynamic.viewport.count = 0; for (uint32_t r = 0; r < rangeCount; r++) { @@ -514,8 +513,7 @@ void anv_CmdClearColorImage( } } - /* Restore API state */ - anv_cmd_buffer_restore(cmd_buffer, &saved_state); + anv_meta_restore(&saved_state, cmd_buffer); } void anv_CmdClearDepthStencilImage( -- cgit v1.2.3 From 7f82cc718f0faffc23a790914710f0b97db377d1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 4 Nov 2015 13:48:31 -0800 Subject: anv/meta: Use consistent naming for dynamic state mask Consistently rename bitmasks of Vulkan dynamic state to 'dynamic_mask'. anv_meta_saved_state::dynamic_flags -> dynamic_mask anv_meta_save(dynamic_state) -> dynamic_mask --- src/vulkan/anv_meta.c | 11 ++++++----- src/vulkan/anv_meta.h | 9 +++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 976d028127e..ba03dff9d3a 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -122,15 +122,16 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, - uint32_t dynamic_state) + uint32_t dynamic_mask) { state->old_pipeline = cmd_buffer->state.pipeline; state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, sizeof(state->old_vertex_bindings)); - state->dynamic_flags = dynamic_state; + + state->dynamic_mask = dynamic_mask; anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, - dynamic_state); + dynamic_mask); } void @@ -147,8 +148,8 @@ anv_meta_restore(const struct anv_meta_saved_state *state, cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, - state->dynamic_flags); - cmd_buffer->state.dirty |= state->dynamic_flags; + state->dynamic_mask); + cmd_buffer->state.dirty |= state->dynamic_mask; } static VkImageViewType diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index 2f26d944516..d798c6e909f 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -35,14 +35,19 @@ struct anv_meta_saved_state { struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; struct anv_descriptor_set *old_descriptor_set0; struct anv_pipeline *old_pipeline; - uint32_t dynamic_flags; + + /** + * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic + * state. + */ + uint32_t dynamic_mask; struct anv_dynamic_state dynamic; }; void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, - uint32_t dynamic_state); + uint32_t dynamic_mask); void anv_meta_restore(const struct anv_meta_saved_state *state, -- cgit v1.2.3 From 49c96a14c512b7b56d19c47d384ddcc67941633e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 3 Nov 2015 14:53:52 -0800 Subject: anv/meta: Clear color attribute is always flat No behavioral change. This patch just removes an unneeded function parameter. --- src/vulkan/anv_meta_clear.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 8f217105b4f..ade8b76f83a 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -160,7 +160,7 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, } static nir_shader * -build_nir_vertex_shader(bool attr_flat) +build_nir_vertex_shader(void) { nir_builder b; @@ -186,8 +186,7 @@ build_nir_vertex_shader(bool attr_flat) nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, attr_type, "v_attr"); attr_out->data.location = VARYING_SLOT_VAR0; - attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : - INTERP_QUALIFIER_SMOOTH; + attr_out->data.interpolation = INTERP_QUALIFIER_FLAT; nir_copy_var(&b, attr_out, attr_in); return b.shader; @@ -218,7 +217,7 @@ void anv_device_init_meta_clear_state(struct anv_device *device) { struct anv_shader_module vsm = { - .nir = build_nir_vertex_shader(true), + .nir = build_nir_vertex_shader(), }; struct anv_shader_module fsm = { -- cgit v1.2.3 From a9a3071fc4638f5f86ba846dd114e3383bf1bc17 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 3 Nov 2015 14:55:58 -0800 Subject: anv/meta: Rewrite clear code Fixes Crucible test "func.clear.load-clear.attachments-8". The old clear code, when clearing attachments for VK_ATTACHMENT_LOAD_OP_CLEAR, suffered from some fundamental bugs. The bugs were not fixable with the old code's approach. - It assumed that a VkRenderPass contained at most one depthstencil attachment. - It tried to clear all attachments (color and the sole depthstencil) with a single instanced draw call, using the VUE header's RenderTargetArrayIndex to specify the instance's target color attachment. But the RenderTargetArrayIndex does not select entries in the binding table; it only selects an array index of a singled layered surface. - If at least one attachment of VkRenderPass had VK_ATTACHMENT_LOAD_OP_CLEAR, then the old code cleared *all* attachments. This was a consequence of using a single draw call and single pipeline for the clear. The new clear code fixes those bugs by making a separate draw call for each attachment, and using one pipeline when clearing color attachments and a different pipeline for depth attachments. The new code, like the old code, does not clear stencil attachments. It is left as a FINISHME. --- src/vulkan/anv_meta.c | 4 +- src/vulkan/anv_meta_clear.c | 770 +++++++++++++++++++++++++++----------------- src/vulkan/anv_meta_clear.h | 1 + src/vulkan/anv_private.h | 3 +- 4 files changed, 486 insertions(+), 292 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index ba03dff9d3a..0f670393112 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1303,9 +1303,7 @@ anv_device_init_meta(struct anv_device *device) void anv_device_finish_meta(struct anv_device *device) { - /* Clear */ - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.clear.pipeline); + anv_device_finish_meta_clear_state(device); /* Blit */ anv_DestroyRenderPass(anv_device_to_handle(device), diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index ade8b76f83a..04640c04b7b 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -26,286 +26,140 @@ #include "anv_nir_builder.h" #include "anv_private.h" -struct clear_instance_data { +/** Vertex attributes for color clears. */ +struct color_clear_vattrs { struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ VkClearColorValue color; }; -static void -meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, - int num_instances, - struct clear_instance_data *instance_data, - VkClearDepthStencilValue ds_clear_value) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_state state; - uint32_t size; - - const float vertex_data[] = { - /* Rect-list coordinates */ - 0.0, 0.0, ds_clear_value.depth, - fb->width, 0.0, ds_clear_value.depth, - fb->width, fb->height, ds_clear_value.depth, - - /* Align to 16 bytes */ - 0.0, 0.0, 0.0, - }; - - size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 16); - - /* Copy in the vertex and instance data */ - memcpy(state.map, vertex_data, sizeof(vertex_data)); - memcpy(state.map + sizeof(vertex_data), instance_data, - num_instances * sizeof(*instance_data)); - - struct anv_buffer vertex_buffer = { - .device = cmd_buffer->device, - .size = size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = state.offset - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(vertex_data) - }); - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.clear.pipeline); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), - 3, num_instances, 0, 0); -} +/** Vertex attributes for depth clears. */ +struct depth_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /*<< 3DPRIM_RECTLIST */ +}; -void -anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values) +static void +meta_clear_begin(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) { - struct anv_meta_saved_state saved_state; - - if (pass->has_stencil_clear_attachment) - anv_finishme("stencil clear"); - - /* FINISHME: Rethink how we count clear attachments in light of - * 0.138.2 -> 0.170.2 diff. - */ - if (pass->num_color_clear_attachments == 0 && - !pass->has_depth_clear_attachment) - return; - - struct clear_instance_data instance_data[pass->num_color_clear_attachments]; - uint32_t color_attachments[pass->num_color_clear_attachments]; - uint32_t ds_attachment = VK_ATTACHMENT_UNUSED; - VkClearDepthStencilValue ds_clear_value = {0}; - - int layer = 0; - for (uint32_t i = 0; i < pass->attachment_count; i++) { - const struct anv_render_pass_attachment *att = &pass->attachments[i]; - - if (anv_format_is_color(att->format)) { - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - instance_data[layer] = (struct clear_instance_data) { - .vue_header = { - .RTAIndex = i, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = clear_values[i].color, - }; - color_attachments[layer] = i; - layer++; - } - } else { - if (att->format->depth_format && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - assert(ds_attachment == VK_ATTACHMENT_UNUSED); - ds_attachment = i; - ds_clear_value = clear_values[ds_attachment].depthStencil; - } - - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - anv_finishme("stencil clear"); - } - } - } + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); - anv_meta_save(&saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); cmd_buffer->state.dynamic.viewport.count = 0; - - struct anv_subpass subpass = { - .input_count = 0, - .color_count = pass->num_color_clear_attachments, - .color_attachments = color_attachments, - .depth_stencil_attachment = ds_attachment, - }; - - anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); - - meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, - instance_data, ds_clear_value); - - anv_meta_restore(&saved_state, cmd_buffer); + cmd_buffer->state.dynamic.scissor.count = 0; } -static nir_shader * -build_nir_vertex_shader(void) +static void +meta_clear_end(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) { - nir_builder b; - - const struct glsl_type *vertex_type = glsl_vec4_type(); - - nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX); - - nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vertex_type, "a_pos"); - pos_in->data.location = VERT_ATTRIB_GENERIC0; - nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vertex_type, "gl_Position"); - pos_in->data.location = VARYING_SLOT_POS; - nir_copy_var(&b, pos_out, pos_in); - - /* Add one more pass-through attribute. For clear shaders, this is used - * to store the color and for blit shaders it's the texture coordinate. - */ - const struct glsl_type *attr_type = glsl_vec4_type(); - nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, - attr_type, "a_attr"); - attr_in->data.location = VERT_ATTRIB_GENERIC1; - nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, - attr_type, "v_attr"); - attr_out->data.location = VARYING_SLOT_VAR0; - attr_out->data.interpolation = INTERP_QUALIFIER_FLAT; - nir_copy_var(&b, attr_out, attr_in); - - return b.shader; + anv_meta_restore(saved_state, cmd_buffer); } -static nir_shader * -build_nir_clear_fragment_shader(void) +static void +build_color_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs) { - nir_builder b; - - const struct glsl_type *color_type = glsl_vec4_type(); + nir_builder vs_b; + nir_builder fs_b; - nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); + nir_builder_init_simple_shader(&vs_b, MESA_SHADER_VERTEX); + nir_builder_init_simple_shader(&fs_b, MESA_SHADER_FRAGMENT); - nir_variable *color_in = nir_variable_create(b.shader, nir_var_shader_in, - color_type, "v_attr"); - color_in->data.location = VARYING_SLOT_VAR0; - color_in->data.interpolation = INTERP_QUALIFIER_FLAT; - nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, - color_type, "f_color"); - color_out->data.location = FRAG_RESULT_DATA0; - nir_copy_var(&b, color_out, color_in); + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); - return b.shader; + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_variable *vs_in_color = + nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, + "a_color"); + vs_in_color->data.location = VERT_ATTRIB_GENERIC1; + + nir_variable *vs_out_color = + nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, + "v_color"); + vs_out_color->data.location = VARYING_SLOT_VAR0; + vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *fs_in_color = + nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, + "v_color"); + fs_in_color->data.location = vs_out_color->data.location; + fs_in_color->data.interpolation = vs_out_color->data.interpolation; + + nir_variable *fs_out_color = + nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, + "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + nir_copy_var(&vs_b, vs_out_color, vs_in_color); + nir_copy_var(&fs_b, fs_out_color, fs_in_color); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; } -void -anv_device_init_meta_clear_state(struct anv_device *device) +static struct anv_pipeline * +create_pipeline(struct anv_device *device, + struct nir_shader *vs_nir, + struct nir_shader *fs_nir, + const VkPipelineVertexInputStateCreateInfo *vi_state, + const VkPipelineDepthStencilStateCreateInfo *ds_state, + const VkPipelineColorBlendStateCreateInfo *cb_state) { - struct anv_shader_module vsm = { - .nir = build_nir_vertex_shader(), - }; + VkDevice device_h = anv_device_to_handle(device); - struct anv_shader_module fsm = { - .nir = build_nir_clear_fragment_shader(), - }; + struct anv_shader_module vs_m = { .nir = vs_nir }; + struct anv_shader_module fs_m = { .nir = fs_nir }; - VkShader vs; - anv_CreateShader(anv_device_to_handle(device), + VkShader vs_h; + ANV_CALL(CreateShader)(device_h, &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&vsm), + .module = anv_shader_module_to_handle(&vs_m), .pName = "main", - }, &vs); + }, + &vs_h); - VkShader fs; - anv_CreateShader(anv_device_to_handle(device), + VkShader fs_h; + ANV_CALL(CreateShader)(device_h, &(VkShaderCreateInfo) { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&fsm), + .module = anv_shader_module_to_handle(&fs_m), .pName = "main", - }, &fs); - - /* We use instanced rendering to clear multiple render targets. We have two - * vertex buffers: the first vertex buffer holds per-vertex data and - * provides the vertices for the clear rectangle. The second one holds - * per-instance data, which consists of the VUE header (which selects the - * layer) and the color (Vulkan supports per-RT clear colors). - */ - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .strideInBytes = 12, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX - }, - { - .binding = 1, - .strideInBytes = 32, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE - }, }, - .attributeCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = 0 - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offsetInBytes = 0 - }, - { - /* Color */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = 16 - } - } - }; + &fs_h); - anv_graphics_pipeline_create(anv_device_to_handle(device), + VkPipeline pipeline_h; + anv_graphics_pipeline_create(device_h, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, - .pSpecializationInfo = NULL - }, { + .shader = vs_h, + }, + { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs, - .pSpecializationInfo = NULL, - } + .shader = fs_h, + }, }, - .pVertexInputState = &vi_create_info, + .pVertexInputState = vi_state, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, @@ -314,47 +168,34 @@ anv_device_init_meta_clear_state(struct anv_device *device) .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .viewportCount = 1, + .pViewports = NULL, /* dynamic */ .scissorCount = 1, + .pScissors = NULL, /* dynamic */ }, .pRasterState = &(VkPipelineRasterStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, - .depthClipEnable = true, + .depthClipEnable = false, .rasterizerDiscardEnable = false, .fillMode = VK_FILL_MODE_SOLID, .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW + .frontFace = VK_FRONT_FACE_CCW, + .depthBiasEnable = false, + .depthClipEnable = false, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterSamples = 1, + .rasterSamples = 1, /* FINISHME: Multisampling */ .sampleShadingEnable = false, .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, }, - .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = true, - .depthWriteEnable = true, - .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = false, - .stencilTestEnable = true, - .front = (VkStencilOpState) { - .stencilPassOp = VK_STENCIL_OP_REPLACE, - .stencilCompareOp = VK_COMPARE_OP_ALWAYS, - }, - .back = (VkStencilOpState) { - .stencilPassOp = VK_STENCIL_OP_REPLACE, - .stencilCompareOp = VK_COMPARE_OP_ALWAYS, - }, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, - } - }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + /* The meta clear pipeline declares all state as dynamic. + * As a consequence, vkCmdBindPipeline writes no dynamic state + * to the cmd buffer. Therefore, at the end of the meta clear, + * we need only restore dynamic state was vkCmdSet. + */ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .dynamicStateCount = 9, .pDynamicStates = (VkDynamicState[]) { @@ -379,12 +220,377 @@ anv_device_init_meta_clear_state(struct anv_device *device) .disable_vs = true, .use_rectlist = true }, - &device->meta_state.clear.pipeline); + &pipeline_h); + + ANV_CALL(DestroyShader)(device_h, vs_h); + ANV_CALL(DestroyShader)(device_h, fs_h); - anv_DestroyShader(anv_device_to_handle(device), vs); - anv_DestroyShader(anv_device_to_handle(device), fs); - ralloc_free(vsm.nir); - ralloc_free(fsm.nir); + ralloc_free(vs_nir); + ralloc_free(fs_nir); + + return anv_pipeline_from_handle(pipeline_h); +} + +static void +init_color_pipeline(struct anv_device *device) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_color_shaders(&vs_nir, &fs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = sizeof(struct color_clear_vattrs), + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = offsetof(struct color_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = offsetof(struct color_clear_vattrs, position), + }, + { + /* Color */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = offsetof(struct color_clear_vattrs, color), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .blendEnable = false, + .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | + VK_CHANNEL_G_BIT | + VK_CHANNEL_B_BIT, + }, + }, + }; + + device->meta_state.clear.color_pipeline = + create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state); +} + +static void +emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, + uint32_t attachment, + VkClearColorValue clear_value) +{ + struct anv_device *device = cmd_buffer->device; + VkCmdBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + VkPipeline pipeline_h = + anv_pipeline_to_handle(device->meta_state.clear.color_pipeline); + + const struct color_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { 0.0, 0.0 }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { fb->width, 0.0 }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { fb->width, fb->height }, + .color = clear_value, + }, + }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16); + memcpy(state.map, vertex_data, sizeof(vertex_data)); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, + &(struct anv_subpass) { + .color_count = 1, + .color_attachments = (uint32_t[]) { attachment }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 1, + (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != device->meta_state.clear.color_pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + + +static void +build_depthstencil_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs) +{ + nir_builder vs_b; + nir_builder fs_b; + + nir_builder_init_simple_shader(&vs_b, MESA_SHADER_VERTEX); + nir_builder_init_simple_shader(&fs_b, MESA_SHADER_FRAGMENT); + + const struct glsl_type *position_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +static void +init_depth_pipeline(struct anv_device *device) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_depthstencil_shaders(&vs_nir, &fs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = sizeof(struct depth_clear_vattrs), + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + }, + .attributeCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = offsetof(struct depth_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = offsetof(struct depth_clear_vattrs, position), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = true, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthWriteEnable = true, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + .logicOpEnable = false, + .attachmentCount = 0, + .pAttachments = NULL, + }; + + device->meta_state.clear.depth_pipeline = + create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state); +} + +static void +emit_load_depth_clear(struct anv_cmd_buffer *cmd_buffer, + uint32_t attachment, float clear_value) +{ + struct anv_device *device = cmd_buffer->device; + VkCmdBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + const struct depth_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { 0.0, 0.0 }, + }, + { + .vue_header = { 0 }, + .position = { fb->width, 0.0 }, + }, + { + .vue_header = { 0 }, + .position = { fb->width, fb->height }, + }, + }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16); + memcpy(state.map, vertex_data, sizeof(vertex_data)); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, + &(struct anv_subpass) { + .color_count = 0, + .depth_stencil_attachment = attachment, + }); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 1, + (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = fb->width, + .height = fb->height, + .minDepth = clear_value, + .maxDepth = clear_value, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != device->meta_state.clear.depth_pipeline) { + VkPipeline pipeline_h = + anv_pipeline_to_handle(device->meta_state.clear.depth_pipeline); + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + +void +anv_device_init_meta_clear_state(struct anv_device *device) +{ + init_color_pipeline(device); + init_depth_pipeline(device); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(device->meta_state.clear.color_pipeline)); + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(device->meta_state.clear.depth_pipeline)); +} + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values) +{ + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + for (uint32_t a = 0; a < pass->attachment_count; ++a) { + struct anv_render_pass_attachment *att = &pass->attachments[a]; + + if (anv_format_is_color(att->format)) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + emit_load_color_clear(cmd_buffer, a, clear_values[a].color); + } + } else { + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + emit_load_depth_clear(cmd_buffer, a, clear_values[a].depthStencil.depth); + } + + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + anv_finishme("stencil load clear"); + } + } + } + + meta_clear_end(&saved_state, cmd_buffer); } void anv_CmdClearColorImage( @@ -399,9 +605,7 @@ void anv_CmdClearColorImage( ANV_FROM_HANDLE(anv_image, image, _image); struct anv_meta_saved_state saved_state; - anv_meta_save(&saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); - cmd_buffer->state.dynamic.viewport.count = 0; + meta_clear_begin(&saved_state, cmd_buffer); for (uint32_t r = 0; r < rangeCount; r++) { for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { @@ -492,27 +696,17 @@ void anv_CmdClearColorImage( .renderPass = pass, .framebuffer = fb, .clearValueCount = 1, - .pClearValues = NULL, + .pClearValues = (VkClearValue[]) { + { .color = *pColor }, + }, }, VK_RENDER_PASS_CONTENTS_INLINE); - struct clear_instance_data instance_data = { - .vue_header = { - .RTAIndex = 0, - .ViewportIndex = 0, - .PointWidth = 0.0 - }, - .color = *pColor, - }; - - meta_emit_clear(cmd_buffer, 1, &instance_data, - (VkClearDepthStencilValue) {0}); - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); } } } - anv_meta_restore(&saved_state, cmd_buffer); + meta_clear_end(&saved_state, cmd_buffer); } void anv_CmdClearDepthStencilImage( diff --git a/src/vulkan/anv_meta_clear.h b/src/vulkan/anv_meta_clear.h index 4ac71f183f7..e53bd979763 100644 --- a/src/vulkan/anv_meta_clear.h +++ b/src/vulkan/anv_meta_clear.h @@ -30,6 +30,7 @@ extern "C" { struct anv_device; void anv_device_init_meta_clear_state(struct anv_device *device); +void anv_device_finish_meta_clear_state(struct anv_device *device); #ifdef __cplusplus } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 089bf0b79ed..be87bb59bf6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -448,7 +448,8 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { struct { - VkPipeline pipeline; + struct anv_pipeline *color_pipeline; + struct anv_pipeline *depth_pipeline; } clear; struct { -- cgit v1.2.3 From d259af3fbb179229ac7fb86bf910ff5e4391584b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 4 Nov 2015 15:54:38 -0800 Subject: anv: Remove unused anv_render_pass members Remove members num_color_clear_attachments has_depth_clear_attachment has_stencil_clear_attachment The new clear code in anv_meta_clear.c does not use them. --- src/vulkan/anv_pass.c | 16 ---------------- src/vulkan/anv_private.h | 5 ----- 2 files changed, 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index c3060239906..bf0f830f3c1 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -62,22 +62,6 @@ VkResult anv_CreateRenderPass( att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; // att->store_op = pCreateInfo->pAttachments[i].storeOp; // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - - if (anv_format_is_color(att->format)) { - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - ++pass->num_color_clear_attachments; - } - } else { - if (att->format->depth_format && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - pass->has_depth_clear_attachment = true; - } - - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - pass->has_stencil_clear_attachment = true; - } - } } for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index be87bb59bf6..396d4138dc1 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1410,11 +1410,6 @@ struct anv_render_pass_attachment { struct anv_render_pass { uint32_t attachment_count; uint32_t subpass_count; - - uint32_t num_color_clear_attachments; - bool has_depth_clear_attachment; - bool has_stencil_clear_attachment; - struct anv_render_pass_attachment * attachments; struct anv_subpass subpasses[0]; }; -- cgit v1.2.3 From 1b68120760460c3d23425dadf46df75274b6ddb0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 4 Nov 2015 19:48:59 -0800 Subject: anv/cmd_buffer: Don't use an anv_state pointer in emit_binding_table The anv_state is supposed to be a flyweight so we're not really saving anything by using a pointer. Also, we were creating one, setting a pointer to it, and then having it go out-of-scope which is bad. --- src/vulkan/anv_cmd_buffer.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 99f10981f69..57cc8a6456b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -596,7 +596,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; - const struct anv_state *surface_state; + struct anv_state surface_state; struct anv_bo *bo; uint32_t bo_offset; @@ -606,31 +606,30 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, /* Nothing for us to do here */ continue; case ANV_DESCRIPTOR_TYPE_BUFFER_VIEW: - surface_state = &desc->buffer_view->surface_state; + surface_state = desc->buffer_view->surface_state; bo = desc->buffer_view->bo; bo_offset = desc->buffer_view->offset; break; case ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET: { - struct anv_state state = + surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); - anv_fill_buffer_surface_state(cmd_buffer->device, state.map, + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map, anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT), desc->offset, desc->range); - surface_state = &state; bo = desc->buffer_view->bo; bo_offset = desc->buffer_view->offset; break; } case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER: - surface_state = &desc->image_view->nonrt_surface_state; + surface_state = desc->image_view->nonrt_surface_state; bo = desc->image_view->bo; bo_offset = desc->image_view->offset; break; } - bt_map[bias + s] = surface_state->offset + state_offset; - add_surface_state_reloc(cmd_buffer, *surface_state, bo, bo_offset); + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } return VK_SUCCESS; -- cgit v1.2.3 From a40f682c712f8d57b3d8fda5e3c2b0a785811d79 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 4 Nov 2015 19:51:46 -0800 Subject: anv/cmd_buffer: Fix SURFACE_STATE for non-view buffer bindings We were treating it as if it's a BufferView and weren't taking the offset into account properly. --- src/vulkan/anv_cmd_buffer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 57cc8a6456b..5ce57f1d98e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -611,13 +611,14 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bo_offset = desc->buffer_view->offset; break; case ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET: { + bo = desc->buffer->bo; + bo_offset = desc->buffer->offset + desc->offset; + surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map, anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT), - desc->offset, desc->range); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; + bo_offset, desc->range); break; } case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: -- cgit v1.2.3 From 16119ad884f472ed8e59811fca60bccc1dd8580a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 4 Nov 2015 17:00:01 -0800 Subject: anv/meta: Finish load clears for stencil attachments Tested by Crucible "func.depthstencil.stencil_triangles.*" in commit c194292d5eadb84e9d7489fc01ce0b653cdd4ca5 (HEAD -> master) Author: Chad Versace Date: Wed Nov 4 16:19:24 2015 -0800 Subject: func.depthstencil: Remove stencil clear workaround for Mesa --- src/vulkan/anv_meta_clear.c | 114 +++++++++++++++++++++++++++++++++----------- src/vulkan/anv_private.h | 4 +- 2 files changed, 88 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 04640c04b7b..7eb4c5587cc 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -33,8 +33,8 @@ struct color_clear_vattrs { VkClearColorValue color; }; -/** Vertex attributes for depth clears. */ -struct depth_clear_vattrs { +/** Vertex attributes for depthstencil clears. */ +struct depthstencil_clear_vattrs { struct anv_vue_header vue_header; float position[2]; /*<< 3DPRIM_RECTLIST */ }; @@ -45,7 +45,8 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, { anv_meta_save(saved_state, cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR)); + (1 << VK_DYNAMIC_STATE_SCISSOR) | + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); cmd_buffer->state.dynamic.viewport.count = 0; cmd_buffer->state.dynamic.scissor.count = 0; @@ -412,11 +413,13 @@ build_depthstencil_shaders(struct nir_shader **out_vs, *out_fs = fs_b.shader; } -static void -init_depth_pipeline(struct anv_device *device) +static struct anv_pipeline * +create_depthstencil_pipeline(struct anv_device *device, + VkImageAspectFlags aspects) { struct nir_shader *vs_nir; struct nir_shader *fs_nir; + build_depthstencil_shaders(&vs_nir, &fs_nir); const VkPipelineVertexInputStateCreateInfo vi_state = { @@ -425,7 +428,7 @@ init_depth_pipeline(struct anv_device *device) .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { .binding = 0, - .strideInBytes = sizeof(struct depth_clear_vattrs), + .strideInBytes = sizeof(struct depthstencil_clear_vattrs), .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX }, }, @@ -436,25 +439,32 @@ init_depth_pipeline(struct anv_device *device) .location = 0, .binding = 0, .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = offsetof(struct depth_clear_vattrs, vue_header), + .offsetInBytes = offsetof(struct depthstencil_clear_vattrs, vue_header), }, { /* Position */ .location = 1, .binding = 0, .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = offsetof(struct depth_clear_vattrs, position), + .offsetInBytes = offsetof(struct depthstencil_clear_vattrs, position), }, }, }; const VkPipelineDepthStencilStateCreateInfo ds_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = true, + .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .depthWriteEnable = true, + .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), .depthBoundsTestEnable = false, - .stencilTestEnable = false, + .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), + .front = { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + .stencilWriteMask = UINT32_MAX, + .stencilReference = 0, /* dynamic */ + }, + .back = { 0 /* dont care */ }, }; const VkPipelineColorBlendStateCreateInfo cb_state = { @@ -466,20 +476,21 @@ init_depth_pipeline(struct anv_device *device) .pAttachments = NULL, }; - device->meta_state.clear.depth_pipeline = - create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state); + return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state); } static void -emit_load_depth_clear(struct anv_cmd_buffer *cmd_buffer, - uint32_t attachment, float clear_value) +emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + uint32_t attachment, + VkImageAspectFlags aspects, + VkClearDepthStencilValue clear_value) { struct anv_device *device = cmd_buffer->device; VkCmdBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct depth_clear_vattrs vertex_data[3] = { + const struct depthstencil_clear_vattrs vertex_data[3] = { { .vue_header = { 0 }, .position = { 0.0, 0.0 }, @@ -518,8 +529,10 @@ emit_load_depth_clear(struct anv_cmd_buffer *cmd_buffer, .originY = 0, .width = fb->width, .height = fb->height, - .minDepth = clear_value, - .maxDepth = clear_value, + + /* Ignored when clearing only stencil. */ + .minDepth = clear_value.depth, + .maxDepth = clear_value.depth, }, }); @@ -531,34 +544,72 @@ emit_load_depth_clear(struct anv_cmd_buffer *cmd_buffer, } }); + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, + clear_value.stencil); + } + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, (VkDeviceSize[]) { 0 }); - if (cmd_buffer->state.pipeline != device->meta_state.clear.depth_pipeline) { - VkPipeline pipeline_h = - anv_pipeline_to_handle(device->meta_state.clear.depth_pipeline); + struct anv_pipeline *pipeline; + switch (aspects) { + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = device->meta_state.clear.depthstencil_pipeline; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + pipeline = device->meta_state.clear.depth_only_pipeline; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = device->meta_state.clear.stencil_only_pipeline; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->state.pipeline != pipeline) { ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); + anv_pipeline_to_handle(pipeline)); } ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); } +static void +init_depthstencil_pipelines(struct anv_device *device) +{ + device->meta_state.clear.depth_only_pipeline = + create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT); + + device->meta_state.clear.stencil_only_pipeline = + create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT); + + device->meta_state.clear.depthstencil_pipeline = + create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT); +} + void anv_device_init_meta_clear_state(struct anv_device *device) { init_color_pipeline(device); - init_depth_pipeline(device); + init_depthstencil_pipelines(device); } void anv_device_finish_meta_clear_state(struct anv_device *device) { - ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + VkDevice device_h = anv_device_to_handle(device); + + ANV_CALL(DestroyPipeline)(device_h, anv_pipeline_to_handle(device->meta_state.clear.color_pipeline)); - ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), - anv_pipeline_to_handle(device->meta_state.clear.depth_pipeline)); + ANV_CALL(DestroyPipeline)(device_h, + anv_pipeline_to_handle(device->meta_state.clear.depth_only_pipeline)); + ANV_CALL(DestroyPipeline)(device_h, + anv_pipeline_to_handle(device->meta_state.clear.stencil_only_pipeline)); + ANV_CALL(DestroyPipeline)(device_h, + anv_pipeline_to_handle(device->meta_state.clear.depthstencil_pipeline)); } void @@ -578,15 +629,20 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, emit_load_color_clear(cmd_buffer, a, clear_values[a].color); } } else { + VkImageAspectFlags aspects = 0; + if (att->format->depth_format && att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - emit_load_depth_clear(cmd_buffer, a, clear_values[a].depthStencil.depth); + aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; } if (att->format->has_stencil && att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - anv_finishme("stencil load clear"); + aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; } + + emit_load_depthstencil_clear(cmd_buffer, a, aspects, + clear_values[a].depthStencil); } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 396d4138dc1..858063da45c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -449,7 +449,9 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { struct { struct anv_pipeline *color_pipeline; - struct anv_pipeline *depth_pipeline; + struct anv_pipeline *depth_only_pipeline; + struct anv_pipeline *stencil_only_pipeline; + struct anv_pipeline *depthstencil_pipeline; } clear; struct { -- cgit v1.2.3 From f029e0ce13b1fd6182c98a76df3920431a867589 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 11:56:48 -0800 Subject: anv: Add a layout to anv_descriptor_set --- src/vulkan/anv_device.c | 2 ++ src/vulkan/anv_private.h | 1 + 2 files changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 26d0fe57a42..156a9d2b1b6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1646,6 +1646,8 @@ anv_descriptor_set_create(struct anv_device *device, */ memset(set, 0, size); + set->layout = layout; + /* Go through and fill out immutable samplers if we have any */ struct anv_descriptor *desc = set->descriptors; for (uint32_t b = 0; b < layout->binding_count; b++) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 858063da45c..a208162bbb6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -751,6 +751,7 @@ struct anv_descriptor { }; struct anv_descriptor_set { + const struct anv_descriptor_set_layout *layout; struct anv_descriptor descriptors[0]; }; -- cgit v1.2.3 From 45b1bbe80197f646e72362502fa047df372bdd4e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 12:07:36 -0800 Subject: anv: Add a descriptor_index to anv_descriptor_set_binding_layout --- src/vulkan/anv_device.c | 1 + src/vulkan/anv_pipeline.c | 4 +--- src/vulkan/anv_private.h | 3 +++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 156a9d2b1b6..e3c3bdeaa7c 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1529,6 +1529,7 @@ VkResult anv_CreateDescriptorSetLayout( for (uint32_t b = 0; b < pCreateInfo->count; b++) { uint32_t array_size = MAX2(1, pCreateInfo->pBinding[b].arraySize); set_layout->binding[b].array_size = array_size; + set_layout->binding[b].descriptor_index = set_layout->size; set_layout->size += array_size; switch (pCreateInfo->pBinding[b].descriptorType) { diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 9fb5ddba20b..1555b23fc69 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1230,9 +1230,9 @@ VkResult anv_CreatePipelineLayout( for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { struct anv_descriptor_set_layout *set_layout = l.set[set].layout; - unsigned set_offset = 0; for (uint32_t b = 0; b < set_layout->binding_count; b++) { unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; if (set_layout->binding[b].stage[s].surface_index >= 0) { assert(surface == l.set[set].stage[s].surface_start + @@ -1253,8 +1253,6 @@ VkResult anv_CreatePipelineLayout( } sampler += array_size; } - - set_offset += array_size; } } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a208162bbb6..8000e9f5396 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -689,6 +689,9 @@ struct anv_descriptor_set_binding_layout { /* Number of array elements in this binding */ uint16_t array_size; + /* Index into the flattend descriptor set */ + uint16_t descriptor_index; + /* Index into the dynamic state array for a dynamic buffer */ int16_t dynamic_offset_index; -- cgit v1.2.3 From 3aa2fc82dd60133b61635983939494fb4be541db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 12:28:01 -0800 Subject: anv: Rework UpdateDescriptorSets Previously, UpdateDescriptorSets was wrong because it assumed that the binding was the offset into the descriptor set. --- src/vulkan/anv_device.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e3c3bdeaa7c..eb38adfa426 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1728,6 +1728,10 @@ void anv_UpdateDescriptorSets( for (uint32_t i = 0; i < writeCount; i++) { const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); + const struct anv_descriptor_set_binding_layout *bind_layout = + &set->layout->binding[write->destBinding]; + struct anv_descriptor *desc = + &set->descriptors[bind_layout->descriptor_index]; switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -1735,7 +1739,7 @@ void anv_UpdateDescriptorSets( ANV_FROM_HANDLE(anv_sampler, sampler, write->pDescriptors[j].sampler); - set->descriptors[write->destBinding + j] = (struct anv_descriptor) { + desc[j] = (struct anv_descriptor) { .type = ANV_DESCRIPTOR_TYPE_SAMPLER, .sampler = sampler, }; @@ -1744,15 +1748,13 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: for (uint32_t j = 0; j < write->count; j++) { - struct anv_descriptor *desc = - &set->descriptors[write->destBinding + j]; ANV_FROM_HANDLE(anv_image_view, iview, write->pDescriptors[j].imageView); ANV_FROM_HANDLE(anv_sampler, sampler, write->pDescriptors[j].sampler); - desc->type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER; - desc->image_view = iview; + desc[j].type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER; + desc[j].image_view = iview; /* If this descriptor has an immutable sampler, we don't want * to stomp on it. @@ -1768,7 +1770,7 @@ void anv_UpdateDescriptorSets( ANV_FROM_HANDLE(anv_image_view, iview, write->pDescriptors[j].imageView); - set->descriptors[write->destBinding + j] = (struct anv_descriptor) { + desc[j] = (struct anv_descriptor) { .type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, .image_view = iview, }; @@ -1793,23 +1795,21 @@ void anv_UpdateDescriptorSets( ANV_FROM_HANDLE(anv_buffer_view, bview, write->pDescriptors[j].bufferView); - set->descriptors[write->destBinding + j] = - (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, - .buffer_view = bview, - }; + desc[j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, + .buffer_view = bview, + }; } else { ANV_FROM_HANDLE(anv_buffer, buffer, write->pDescriptors[j].bufferInfo.buffer); assert(buffer); - set->descriptors[write->destBinding + j] = - (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, - .buffer = buffer, - .offset = write->pDescriptors[j].bufferInfo.offset, - .range = write->pDescriptors[j].bufferInfo.range, - }; + desc[j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, + .buffer = buffer, + .offset = write->pDescriptors[j].bufferInfo.offset, + .range = write->pDescriptors[j].bufferInfo.range, + }; } } -- cgit v1.2.3 From 0360c3608b880f65921a497c8d17a74165d2970d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 12:52:12 -0800 Subject: anv/device: Only support binding UBOs through BufferInfo --- src/vulkan/anv_device.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index eb38adfa426..af5f8d25de1 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1791,26 +1791,17 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->count; j++) { - if (write->pDescriptors[j].bufferView.handle) { - ANV_FROM_HANDLE(anv_buffer_view, bview, - write->pDescriptors[j].bufferView); - - desc[j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, - .buffer_view = bview, - }; - } else { - ANV_FROM_HANDLE(anv_buffer, buffer, - write->pDescriptors[j].bufferInfo.buffer); - assert(buffer); - - desc[j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, - .buffer = buffer, - .offset = write->pDescriptors[j].bufferInfo.offset, - .range = write->pDescriptors[j].bufferInfo.range, - }; - } + assert(write->pDescriptors[j].bufferInfo.buffer.handle); + ANV_FROM_HANDLE(anv_buffer, buffer, + write->pDescriptors[j].bufferInfo.buffer); + assert(buffer); + + desc[j] = (struct anv_descriptor) { + .type = ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, + .buffer = buffer, + .offset = write->pDescriptors[j].bufferInfo.offset, + .range = write->pDescriptors[j].bufferInfo.range, + }; } default: -- cgit v1.2.3 From d7cc9929bb5c82ded45f3a6345c4141348514c85 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 13:16:15 -0800 Subject: anv: Remove all support for BufferViews We never *actually* supported them, we just used them for binding UBOs. Now that we have BufferInfo and we aren't supporting texture buffers yet, we should get rid of them until we can do them properly. --- src/vulkan/anv_cmd_buffer.c | 5 ----- src/vulkan/anv_device.c | 37 ++++++------------------------------- src/vulkan/anv_private.h | 15 --------------- src/vulkan/gen7_state.c | 24 ------------------------ src/vulkan/gen8_state.c | 24 ------------------------ 5 files changed, 6 insertions(+), 99 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5ce57f1d98e..be8a537f656 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -605,11 +605,6 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case ANV_DESCRIPTOR_TYPE_SAMPLER: /* Nothing for us to do here */ continue; - case ANV_DESCRIPTOR_TYPE_BUFFER_VIEW: - surface_state = desc->buffer_view->surface_state; - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - break; case ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET: { bo = desc->buffer->bo; bo_offset = desc->buffer->offset + desc->offset; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index af5f8d25de1..2e330e6bbad 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1429,44 +1429,19 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, } } -VkResult -anv_buffer_view_create( - struct anv_device * device, - const VkBufferViewCreateInfo* pCreateInfo, - struct anv_buffer_view ** bview_out) +VkResult anv_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) { - ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *bview; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); - - bview = anv_device_alloc(device, sizeof(*bview), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (bview == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - *bview = (struct anv_buffer_view) { - .bo = buffer->bo, - .offset = buffer->offset + pCreateInfo->offset, - .surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64), - .format = anv_format_for_vk_format(pCreateInfo->format), - .range = pCreateInfo->range, - }; - - *bview_out = bview; - - return VK_SUCCESS; + stub_return(VK_UNSUPPORTED); } void anv_DestroyBufferView( VkDevice _device, VkBufferView _bview) { - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); - - anv_state_pool_free(&device->surface_state_pool, bview->surface_state); - anv_device_free(device, bview); + stub(); } void anv_DestroySampler( diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8000e9f5396..25f88fc8b84 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -726,7 +726,6 @@ struct anv_descriptor_set_layout { enum anv_descriptor_type { ANV_DESCRIPTOR_TYPE_EMPTY = 0, - ANV_DESCRIPTOR_TYPE_BUFFER_VIEW, ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, ANV_DESCRIPTOR_TYPE_SAMPLER, @@ -739,7 +738,6 @@ struct anv_descriptor { union { struct { union { - struct anv_buffer_view *buffer_view; struct anv_image_view *image_view; }; struct anv_sampler *sampler; @@ -1317,14 +1315,6 @@ struct anv_image { }; }; -struct anv_buffer_view { - struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ - struct anv_bo *bo; - uint32_t offset; /**< Offset into bo. */ - uint32_t range; /**< VkBufferViewCreateInfo::range */ - const struct anv_format *format; /**< VkBufferViewCreateInfo::format */ -}; - struct anv_image_view { const struct anv_image *image; /**< VkImageViewCreateInfo::image */ const struct anv_format *format; /**< VkImageViewCreateInfo::format */ @@ -1371,10 +1361,6 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); -VkResult anv_buffer_view_create(struct anv_device *device, - const VkBufferViewCreateInfo *pCreateInfo, - struct anv_buffer_view **bview_out); - void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, uint32_t offset, uint32_t range); @@ -1482,7 +1468,6 @@ ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCmdPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index aef97838e17..0d67be4a99b 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -59,30 +59,6 @@ gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, GEN7_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); } -VkResult gen7_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer_view *bview; - VkResult result; - - result = anv_buffer_view_create(device, pCreateInfo, &bview); - if (result != VK_SUCCESS) - return result; - - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - - gen7_fill_buffer_surface_state(bview->surface_state.map, format, - bview->offset, pCreateInfo->range); - - *pView = anv_buffer_view_to_handle(bview); - - return VK_SUCCESS; -} - static const uint32_t vk_to_gen_tex_filter[] = { [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index d9438127f2a..61aede99ad4 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -63,30 +63,6 @@ gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); } -VkResult gen8_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer_view *bview; - VkResult result; - - result = anv_buffer_view_create(device, pCreateInfo, &bview); - if (result != VK_SUCCESS) - return result; - - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - - gen8_fill_buffer_surface_state(bview->surface_state.map, format, - bview->offset, pCreateInfo->range); - - *pView = anv_buffer_view_to_handle(bview); - - return VK_SUCCESS; -} - static const uint8_t anv_halign[] = { [4] = HALIGN4, [8] = HALIGN8, -- cgit v1.2.3 From f8052351ac1cce196b99a78114d513695fbca4d6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 13:29:47 -0800 Subject: anv/device: Increase the block size for instructions --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 2e330e6bbad..c5bea7253ed 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -661,7 +661,7 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->dynamic_state_pool, &device->dynamic_state_block_pool); - anv_block_pool_init(&device->instruction_block_pool, device, 2048); + anv_block_pool_init(&device->instruction_block_pool, device, 4096); anv_block_pool_init(&device->surface_state_block_pool, device, 4096); anv_state_pool_init(&device->surface_state_pool, -- cgit v1.2.3 From 612e35b2c65c99773b73e53d0e6fd112b1a7431f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 13:32:52 -0800 Subject: anv: Do range-checking in the shader for dynamic buffers --- src/vulkan/anv_cmd_buffer.c | 31 ++++--- src/vulkan/anv_device.c | 7 ++ src/vulkan/anv_nir_apply_dynamic_offsets.c | 144 +++++++++++++++++------------ src/vulkan/anv_pipeline.c | 2 +- src/vulkan/anv_private.h | 7 +- 5 files changed, 118 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index be8a537f656..c0e28bdf047 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -487,19 +487,28 @@ void anv_CmdBindDescriptorSets( if (set_layout->dynamic_offset_count > 0) { VkShaderStage s; for_each_bit(s, set_layout->shader_stages) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, - dynamic_offsets); - uint32_t *offsets = - cmd_buffer->state.push_constants[s]->dynamic_offsets + - layout->set[firstSet + i].dynamic_offset_start; - - typed_memcpy(offsets, pDynamicOffsets + dynamic_slot, - set_layout->dynamic_offset_count); - + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = (desc++)->range; + d++; + } + } } cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - - dynamic_slot += set_layout->dynamic_offset_count; } } } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c5bea7253ed..9483816b53f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1777,6 +1777,13 @@ void anv_UpdateDescriptorSets( .offset = write->pDescriptors[j].bufferInfo.offset, .range = write->pDescriptors[j].bufferInfo.range, }; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0) + desc[j].range = buffer->size - desc[j].offset; } default: diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index e14644cd222..d6c09474da7 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -107,77 +107,100 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) nir_intrinsic_instr *offset_load = nir_intrinsic_instr_create(state->shader, offset_load_op); - offset_load->num_components = 1; - offset_load->const_index[0] = state->indices_start + index; + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 2; if (const_arr_idx) { - offset_load->const_index[1] = const_arr_idx->u[0]; + offset_load->const_index[1] = const_arr_idx->u[0] * 2; } else { offset_load->const_index[1] = 0; - nir_src_copy(&offset_load->src[0], &res_intrin->src[0], - &intrin->instr); + offset_load->src[0] = nir_src_for_ssa( + nir_imul(b, nir_ssa_for_src(b, res_intrin->src[0], 1), + nir_imm_int(b, 2))); } - nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); nir_builder_instr_insert(b, &offset_load->instr); - nir_ssa_def *offset = &offset_load->dest.ssa; + /* We calculate the full offset and don't bother with the base + * offset. We need the full offset for the predicate anyway. + */ + nir_ssa_def *rel_offset = nir_imm_int(b, intrin->const_index[0]); if (indirect_src >= 0) { assert(intrin->src[indirect_src].is_ssa); - offset = nir_iadd(b, intrin->src[indirect_src].ssa, offset); + rel_offset = nir_iadd(b, intrin->src[indirect_src].ssa, rel_offset); } + nir_ssa_def *global_offset = nir_iadd(b, rel_offset, + &offset_load->dest.ssa); - /* Now we can modify the load/store intrinsic */ - - if (indirect_src < 0) { - /* The original intrinsic is not an indirect variant. We need to - * create a new one and copy the old data over first. - */ - - nir_intrinsic_op indirect_op; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - indirect_op = nir_intrinsic_load_ubo_indirect; - break; - case nir_intrinsic_load_ssbo: - indirect_op = nir_intrinsic_load_ssbo_indirect; - break; - case nir_intrinsic_store_ssbo: - indirect_op = nir_intrinsic_store_ssbo_indirect; - break; - default: - unreachable("Invalid direct load/store intrinsic"); - } - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(state->shader, indirect_op); - copy->num_components = intrin->num_components; - - for (unsigned i = 0; i < 4; i++) - copy->const_index[i] = intrin->const_index[i]; - - /* The indirect is always the last source */ - indirect_src = nir_intrinsic_infos[intrin->intrinsic].num_srcs; - - for (unsigned i = 0; i < (unsigned)indirect_src; i++) - nir_src_copy(©->src[i], &intrin->src[i], ©->instr); - - copy->src[indirect_src] = nir_src_for_ssa(offset); - nir_ssa_dest_init(©->instr, ©->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.name); - nir_builder_instr_insert(b, ©->instr); + /* Now we replace the load/store intrinsic */ + + nir_intrinsic_op indirect_op; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + indirect_op = nir_intrinsic_load_ubo_indirect; + break; + case nir_intrinsic_load_ssbo: + indirect_op = nir_intrinsic_load_ssbo_indirect; + break; + case nir_intrinsic_store_ssbo: + indirect_op = nir_intrinsic_store_ssbo_indirect; + break; + default: + unreachable("Invalid direct load/store intrinsic"); + } + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(state->shader, indirect_op); + copy->num_components = intrin->num_components; + + /* The indirect is always the last source */ + indirect_src = nir_intrinsic_infos[indirect_op].num_srcs - 1; + + for (unsigned i = 0; i < (unsigned)indirect_src; i++) + nir_src_copy(©->src[i], &intrin->src[i], ©->instr); + + copy->src[indirect_src] = nir_src_for_ssa(global_offset); + nir_ssa_dest_init(©->instr, ©->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.name); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_fge(b, nir_channel(b, &offset_load->dest.ssa, 1), + rel_offset); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); + + if (indirect_op != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(©->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(nir_imm_int(b, 0)); + exec_list_push_tail(&phi->srcs, &src2->node); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); assert(intrin->dest.is_ssa); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(©->dest.ssa)); - - nir_instr_remove(&intrin->instr); - } else { - /* It's already indirect, so we can just rewrite the one source */ - nir_instr_rewrite_src(&intrin->instr, &intrin->src[indirect_src], - nir_src_for_ssa(offset)); + nir_src_for_ssa(&phi->dest.ssa)); } + + nir_instr_remove(&intrin->instr); } return true; @@ -208,9 +231,12 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, } struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) - prog_data->param[i + shader->num_uniforms] = - (const gl_constant_value *)&null_data->dynamic_offsets[i]; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const gl_constant_value *)&null_data->dynamic[i].range; + } - shader->num_uniforms += MAX_DYNAMIC_BUFFERS; + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 2; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 1555b23fc69..e4f68350b81 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -370,7 +370,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, } if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - prog_data->nr_params += MAX_DYNAMIC_BUFFERS; + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; if (prog_data->nr_params > 0) { prog_data->param = (const gl_constant_value **) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 25f88fc8b84..821e16164eb 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -852,8 +852,11 @@ struct anv_push_constants { uint32_t base_vertex; uint32_t base_instance; - /* Offsets for dynamically bound buffers */ - uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; + /* Offsets and ranges for dynamically bound buffers */ + struct { + uint32_t offset; + uint32_t range; + } dynamic[MAX_DYNAMIC_BUFFERS]; /* Image data for image_load_store on pre-SKL */ struct brw_image_param images[MAX_IMAGES]; -- cgit v1.2.3 From 220261a0c9d4f08e060f02d9cc61219360c2d3f9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 14:09:52 -0800 Subject: anv: Use VkDescriptorType instead of anv_descriptor_type --- src/vulkan/anv_cmd_buffer.c | 29 +++++++++++++++++++++-------- src/vulkan/anv_device.c | 8 ++++---- src/vulkan/anv_private.h | 10 +--------- 3 files changed, 26 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index c0e28bdf047..77471941071 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -610,11 +610,14 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t bo_offset; switch (desc->type) { - case ANV_DESCRIPTOR_TYPE_EMPTY: - case ANV_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLER: /* Nothing for us to do here */ continue; - case ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET: { + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { bo = desc->buffer->bo; bo_offset = desc->buffer->offset + desc->offset; @@ -625,12 +628,20 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bo_offset, desc->range); break; } - case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW: - case ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER: + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: surface_state = desc->image_view->nonrt_surface_state; bo = desc->image_view->bo; bo_offset = desc->image_view->offset; break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + assert(!"Unsupported descriptor type"); + break; } bt_map[bias + s] = surface_state.offset + state_offset; @@ -669,13 +680,15 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; - if (desc->type != ANV_DESCRIPTOR_TYPE_SAMPLER && - desc->type != ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER) + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) continue; struct anv_sampler *sampler = desc->sampler; - /* FIXME: We shouldn't have to do this */ + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ if (sampler == NULL) continue; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9483816b53f..dcb3ef20115 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1715,7 +1715,7 @@ void anv_UpdateDescriptorSets( write->pDescriptors[j].sampler); desc[j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_SAMPLER, + .type = VK_DESCRIPTOR_TYPE_SAMPLER, .sampler = sampler, }; } @@ -1728,7 +1728,7 @@ void anv_UpdateDescriptorSets( ANV_FROM_HANDLE(anv_sampler, sampler, write->pDescriptors[j].sampler); - desc[j].type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER; + desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; desc[j].image_view = iview; /* If this descriptor has an immutable sampler, we don't want @@ -1746,7 +1746,7 @@ void anv_UpdateDescriptorSets( write->pDescriptors[j].imageView); desc[j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, + .type = write->descriptorType, .image_view = iview, }; } @@ -1772,7 +1772,7 @@ void anv_UpdateDescriptorSets( assert(buffer); desc[j] = (struct anv_descriptor) { - .type = ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, + .type = write->descriptorType, .buffer = buffer, .offset = write->pDescriptors[j].bufferInfo.offset, .range = write->pDescriptors[j].bufferInfo.range, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 821e16164eb..6ba2e460a8d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -724,16 +724,8 @@ struct anv_descriptor_set_layout { struct anv_descriptor_set_binding_layout binding[0]; }; -enum anv_descriptor_type { - ANV_DESCRIPTOR_TYPE_EMPTY = 0, - ANV_DESCRIPTOR_TYPE_BUFFER_AND_OFFSET, - ANV_DESCRIPTOR_TYPE_IMAGE_VIEW, - ANV_DESCRIPTOR_TYPE_SAMPLER, - ANV_DESCRIPTOR_TYPE_IMAGE_VIEW_AND_SAMPLER, -}; - struct anv_descriptor { - enum anv_descriptor_type type; + VkDescriptorType type; union { struct { -- cgit v1.2.3 From 5ba281e794fbc215adfedd44b73a628cdf39a513 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 14:49:15 -0800 Subject: nir/spirv: Add a helper for determining if a block is externally visable --- src/glsl/nir/spirv_to_nir.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 7c9567dd8b7..fc85c19a7a7 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1199,6 +1199,14 @@ static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *index); +static bool +variable_is_external_block(nir_variable *var) +{ + return var->interface_type && + (var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage); +} + static struct vtn_ssa_value * vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, struct vtn_type *src_type) @@ -1206,7 +1214,7 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, nir_deref *src_tail = get_deref_tail(src); struct vtn_ssa_value *val; - if (src->var->interface_type && src->var->data.mode == nir_var_uniform) + if (variable_is_external_block(src->var)) val = vtn_block_load(b, src, src_type, src_tail); else val = _vtn_variable_load(b, src, src_tail); @@ -1549,7 +1557,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, * actually access the variable, so we need to keep around the original * type of the variable. */ - if (base->var->interface_type && base->var->data.mode == nir_var_uniform) + + if (variable_is_external_block(base->var)) val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; else val->deref_type = deref_type; -- cgit v1.2.3 From 1b5c7e7ecd43d3526c0a6831ff6c1edfab4529af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 15:12:33 -0800 Subject: anv/pipeline: Expose is_scalar_shader_stage --- src/vulkan/anv_pipeline.c | 11 ++++++----- src/vulkan/anv_private.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index e4f68350b81..67579c14e87 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -121,8 +121,9 @@ static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = { [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE, }; -static bool -is_scalar_shader_stage(const struct brw_compiler *compiler, VkShaderStage stage) +bool +anv_is_scalar_shader_stage(const struct brw_compiler *compiler, + VkShaderStage stage) { switch (stage) { case VK_SHADER_STAGE_VERTEX: @@ -187,7 +188,7 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(entrypoint != NULL); brw_preprocess_nir(nir, &device->info, - is_scalar_shader_stage(compiler, vk_stage)); + anv_is_scalar_shader_stage(compiler, vk_stage)); nir_shader_gather_info(nir, entrypoint); @@ -357,7 +358,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (nir == NULL) return NULL; - anv_nir_lower_push_constants(nir, is_scalar_shader_stage(compiler, stage)); + anv_nir_lower_push_constants(nir, anv_is_scalar_shader_stage(compiler, stage)); /* Figure out the number of parameters */ prog_data->nr_params = 0; @@ -409,7 +410,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Finish the optimization and compilation process */ brw_postprocess_nir(nir, &pipeline->device->info, - is_scalar_shader_stage(compiler, stage)); + anv_is_scalar_shader_stage(compiler, stage)); /* nir_lower_io will only handle the push constants; we need to set this * to the full number of possible uniforms. diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6ba2e460a8d..631b92cf267 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -430,6 +430,9 @@ struct anv_physical_device { struct brw_compiler * compiler; }; +bool anv_is_scalar_shader_stage(const struct brw_compiler *compiler, + VkShaderStage stage); + struct anv_instance { VK_LOADER_DATA _loader_data; -- cgit v1.2.3 From 399d5314f679921f0e383171bb01a3d259b04754 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 15:14:10 -0800 Subject: anv/cmd_buffer: Rework the way we emit UBO surface state The new mechanism should be able to handle SSBOs as well as properly handle emitting surface state on gen7 where we need different strides depending on shader stage. --- src/vulkan/anv_cmd_buffer.c | 43 ++++++++++++++++++++++++++++++++++++++++--- src/vulkan/anv_device.c | 6 +++--- src/vulkan/anv_private.h | 9 ++++++--- src/vulkan/gen7_state.c | 8 +------- src/vulkan/gen8_state.c | 4 +--- 5 files changed, 51 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 77471941071..6763278c5ef 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -548,6 +548,41 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, state.offset + dword * 4, bo, offset); } +static void +fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, + VkShaderStage stage, VkDescriptorType type, + uint32_t offset, uint32_t range) +{ + VkFormat format; + uint32_t stride; + + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + if (anv_is_scalar_shader_stage(device->instance->physicalDevice.compiler, + stage)) { + stride = 4; + } else { + stride = 16; + } + format = VK_FORMAT_R32G32B32A32_SFLOAT; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + stride = 1; + format = VK_FORMAT_UNDEFINED; + break; + + default: + unreachable("Invalid descriptor type"); + } + + anv_fill_buffer_surface_state(device, state, + anv_format_for_vk_format(format), + offset, range, stride); +} + VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage, struct anv_state *bt_state) @@ -623,9 +658,11 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); - anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map, - anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT), - bo_offset, desc->range); + + fill_descriptor_buffer_surface_state(cmd_buffer->device, + surface_state.map, + stage, desc->type, + bo_offset, desc->range); break; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index dcb3ef20115..a4b58caab13 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1415,14 +1415,14 @@ void anv_DestroyBuffer( void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, - uint32_t offset, uint32_t range) + uint32_t offset, uint32_t range, uint32_t stride) { switch (device->info.gen) { case 7: - gen7_fill_buffer_surface_state(state, format, offset, range); + gen7_fill_buffer_surface_state(state, format, offset, range, stride); break; case 8: - gen8_fill_buffer_surface_state(state, format, offset, range); + gen8_fill_buffer_surface_state(state, format, offset, range, stride); break; default: unreachable("unsupported gen\n"); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 631b92cf267..500904713a6 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1361,12 +1361,15 @@ gen8_image_view_init(struct anv_image_view *iview, void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, - uint32_t offset, uint32_t range); + uint32_t offset, uint32_t range, + uint32_t stride); void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range); + uint32_t offset, uint32_t range, + uint32_t stride); void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range); + uint32_t offset, uint32_t range, + uint32_t stride); struct anv_sampler { uint32_t state[4]; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 0d67be4a99b..6f1cb8553e9 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -31,14 +31,8 @@ void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range) + uint32_t offset, uint32_t range, uint32_t stride) { - /* This assumes RGBA float format. */ - - uint32_t stride = 16; /* Depends on whether accessing shader is simd8 or - * vec4. Will need one of each for buffers that are - * used in both vec4 and simd8. */ - uint32_t num_elements = range / stride; struct GEN7_RENDER_SURFACE_STATE surface_state = { diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 61aede99ad4..ce69377fd77 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -31,10 +31,8 @@ void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range) + uint32_t offset, uint32_t range, uint32_t stride) { - /* This assumes RGBA float format. */ - uint32_t stride = 4; uint32_t num_elements = range / stride; struct GEN8_RENDER_SURFACE_STATE surface_state = { -- cgit v1.2.3 From 104525c33b154ed31cfedea2100b275abbc7a739 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 15:57:51 -0800 Subject: anv/pipeline: Set the right SSBO binding table start index for FS --- src/vulkan/anv_pipeline.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 67579c14e87..124140fa224 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -406,6 +406,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, prog_data->binding_table.size_bytes = 0; prog_data->binding_table.texture_start = bias; prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; prog_data->binding_table.image_start = bias; /* Finish the optimization and compilation process */ -- cgit v1.2.3 From 046563167ca9a608474a1c582087fdaf54bd9083 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 16:49:24 -0800 Subject: anv/apply_dynamic_offsets: Use the right sized immediate zero --- src/vulkan/anv_nir_apply_dynamic_offsets.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index d6c09474da7..dd4f5dfe545 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -187,10 +187,13 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) exec_list_push_tail(&phi->srcs, &src1->node); b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + nir_phi_src *src2 = ralloc(phi, nir_phi_src); struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); src2->pred = exec_node_data(nir_block, enode, cf_node.node); - src2->src = nir_src_for_ssa(nir_imm_int(b, 0)); + src2->src = nir_src_for_ssa(zero); exec_list_push_tail(&phi->srcs, &src2->node); nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); -- cgit v1.2.3 From a10d59c09abdaad19e2d6bbf70d9b2d4b36925db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 16:53:27 -0800 Subject: nir/spirv: Increment num_ubos/ssbos when creating variables --- src/glsl/nir/spirv_to_nir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index fc85c19a7a7..a06cffcefdc 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1364,10 +1364,13 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassUniformConstant: if (interface_type && interface_type->buffer_block) { var->data.mode = nir_var_shader_storage; + b->shader->info.num_ssbos++; } else { /* UBO's and samplers */ var->data.mode = nir_var_uniform; var->data.read_only = true; + if (interface_type) + b->shader->info.num_ubos++; } break; case SpvStorageClassPushConstant: -- cgit v1.2.3 From 17fa3d3572d7cafa95cfe31c365bbc15b3c789e3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 7 Nov 2015 08:03:25 -0800 Subject: nir/spirv: Give both block and buffer_block types an interface type --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a06cffcefdc..3f89bb2e779 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1346,7 +1346,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->name = ralloc_strdup(var, val->name); struct vtn_type *interface_type; - if (type->block) { + if (type->block || type->buffer_block) { interface_type = type; } else if (glsl_type_is_array(type->type) && (type->array_element->block || -- cgit v1.2.3 From 3ee923f1c2ef0832ebd6da445548e0a651aaae80 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 28 Sep 2015 17:54:59 -0700 Subject: anv: Rename cpp variable to "bs" cpp (chars-per-pixel) is an integer that fails to give useful data about most compressed formats. Instead, rename it to "bs" which stands for block size (in bytes). v2: Rename vk_format_for_bs to vk_format_for_size (Chad) Use "block size" instead of "bs" in error message (Chad) Reviewed-by: Chad Versace --- src/vulkan/anv_formats.c | 194 +++++++++++++++++++++---------------------- src/vulkan/anv_image.c | 6 +- src/vulkan/anv_meta.c | 30 +++---- src/vulkan/anv_private.h | 2 +- src/vulkan/anv_wsi_wayland.c | 2 +- 5 files changed, 117 insertions(+), 117 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 4efc537676c..28212341448 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -30,7 +30,7 @@ [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, RAW, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_UNDEFINED, RAW, .bs = 1, .num_channels = 1), fmt(VK_FORMAT_R4G4_UNORM, UNSUPPORTED), fmt(VK_FORMAT_R4G4_USCALED, UNSUPPORTED), fmt(VK_FORMAT_R4G4B4A4_UNORM, UNSUPPORTED), @@ -39,94 +39,94 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R5G6B5_USCALED, UNSUPPORTED), fmt(VK_FORMAT_R5G5B5A1_UNORM, UNSUPPORTED), fmt(VK_FORMAT_R5G5B5A1_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .cpp = 1, .num_channels = 1,), - fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_UINT, R8_UINT, .cpp = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SINT, R8_SINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .bs = 1, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, R8_UINT, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, R8_SINT, .bs = 1, .num_channels = 1), fmt(VK_FORMAT_R8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .cpp = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .bs = 2, .num_channels = 2), fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .cpp = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .bs = 3, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_SRGB, UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_UINT, R16_UINT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SINT, R16_SINT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .cpp = 2, .num_channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .cpp = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), - fmt(VK_FORMAT_R32_UINT, R32_UINT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SINT, R32_SINT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .cpp = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), - fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .cpp = 8, .num_channels = 1), - fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .cpp = 16, .num_channels = 2), - fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), - - fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_D24_UNORM_X8, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), - fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, R8_UINT, .cpp = 1, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), + fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, R16_UINT, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, R16_SINT, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, R32_UINT, .bs = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, R32_SINT, .bs = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .bs = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .bs = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .bs = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .bs = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .bs = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .bs = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .bs = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .bs = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .bs = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .bs = 16, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .bs = 8, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .bs = 16, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .bs = 24, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .bs = 32, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .bs = 4, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .bs = 4, .num_channels = 3), + + fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .bs = 2, .num_channels = 1, .depth_format = D16_UNORM), + fmt(VK_FORMAT_D24_UNORM_X8, R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .bs = 4, .num_channels = 1, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, R8_UINT, .bs = 1, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .bs = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .bs = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), @@ -182,9 +182,9 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_ASTC_12x10_SRGB, UNSUPPORTED), fmt(VK_FORMAT_ASTC_12x12_UNORM, UNSUPPORTED), fmt(VK_FORMAT_ASTC_12x12_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .cpp = 2, .num_channels = 3), + fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .bs = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .bs = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .bs = 2, .num_channels = 3), fmt(VK_FORMAT_B5G6R5_USCALED, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_UNORM, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SNORM, UNSUPPORTED), @@ -193,19 +193,19 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B8G8R8_UINT, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SINT, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .bs = 4, .num_channels = 4), fmt(VK_FORMAT_B8G8R8A8_SNORM, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_USCALED, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SSCALED, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_UINT, UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) + fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .bs = 4, .num_channels = 4) }; #undef fmt diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index e3991e22234..8aa74c2e191 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -225,7 +225,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, */ assert(anv_is_aligned(qpitch, j)); - uint32_t stride = align_u32(mt_width * format->cpp, tile_info->width); + uint32_t stride = align_u32(mt_width * format->bs, tile_info->width); if (create_info->stride > 0) stride = create_info->stride; @@ -490,14 +490,14 @@ anv_validate_CreateImageView(VkDevice _device, assert(!image->format->has_stencil); assert(!view_format_info->depth_format); assert(!view_format_info->has_stencil); - assert(view_format_info->cpp == image->format->cpp); + assert(view_format_info->bs == image->format->bs); } else if (subresource->aspectMask & ds_flags) { assert((subresource->aspectMask & ~ds_flags) == 0); if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { assert(image->format->depth_format); assert(view_format_info->depth_format); - assert(view_format_info->cpp == image->format->cpp); + assert(view_format_info->bs == image->format->bs); } if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL) { diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 0f670393112..b0f042f17ed 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -630,9 +630,9 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, } static VkFormat -vk_format_for_cpp(int cpp) +vk_format_for_size(int bs) { - switch (cpp) { + switch (bs) { case 1: return VK_FORMAT_R8_UINT; case 2: return VK_FORMAT_R8G8_UINT; case 3: return VK_FORMAT_R8G8B8_UINT; @@ -642,7 +642,7 @@ vk_format_for_cpp(int cpp) case 12: return VK_FORMAT_R32G32B32_UINT; case 16: return VK_FORMAT_R32G32B32A32_UINT; default: - unreachable("Invalid format cpp"); + unreachable("Invalid format block size"); } } @@ -770,30 +770,30 @@ void anv_CmdCopyBuffer( /* First, we compute the biggest format that can be used with the * given offsets and size. */ - int cpp = 16; + int bs = 16; int fs = ffs(src_offset) - 1; if (fs != -1) - cpp = MIN2(cpp, 1 << fs); - assert(src_offset % cpp == 0); + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); fs = ffs(dest_offset) - 1; if (fs != -1) - cpp = MIN2(cpp, 1 << fs); - assert(dest_offset % cpp == 0); + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); fs = ffs(pRegions[r].copySize) - 1; if (fs != -1) - cpp = MIN2(cpp, 1 << fs); - assert(pRegions[r].copySize % cpp == 0); + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].copySize % bs == 0); - VkFormat copy_format = vk_format_for_cpp(cpp); + VkFormat copy_format = vk_format_for_size(bs); /* This is maximum possible width/height our HW can handle */ uint64_t max_surface_dim = 1 << 14; /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * cpp; + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; while (copy_size > max_copy_size) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, @@ -803,10 +803,10 @@ void anv_CmdCopyBuffer( dest_offset += max_copy_size; } - uint64_t height = copy_size / (max_surface_dim * cpp); + uint64_t height = copy_size / (max_surface_dim * bs); assert(height < max_surface_dim); if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * cpp; + uint64_t rect_copy_size = height * max_surface_dim * bs; do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, max_surface_dim, height, copy_format); @@ -818,7 +818,7 @@ void anv_CmdCopyBuffer( if (copy_size != 0) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - copy_size / cpp, 1, copy_format); + copy_size / bs, 1, copy_format); } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 500904713a6..03d033ff0c5 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1207,7 +1207,7 @@ struct anv_format { const VkFormat vk_format; const char *name; uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ - uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ + uint8_t bs; /**< Block size (in bytes) of anv_format::surface_format. */ uint8_t num_channels; uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index f87f3ffef19..1828b090335 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -60,7 +60,7 @@ wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) return; /* Don't add formats which aren't supported by the driver */ - if (anv_format_for_vk_format(format)->cpp == 0) + if (anv_format_for_vk_format(format)->bs == 0) return; f = anv_vector_add(&display->formats); -- cgit v1.2.3 From a6c7d1e016545c05b64bacf4dd87a354a6c9cd58 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 29 Sep 2015 14:53:28 -0700 Subject: anv/formats: Add surface_format initializer v2: Rename __brw_fmt to __hw_fmt (Chad) Suggested-by: Jason Ekstrand Reviewed-by: Chad Versace chad.versace@intel.com --- src/vulkan/anv_formats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 28212341448..5631bea10e3 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -26,8 +26,8 @@ #define UNSUPPORTED 0xffff -#define fmt(__vk_fmt, ...) \ - [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, __VA_ARGS__ } +#define fmt(__vk_fmt, __hw_fmt, ...) \ + [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, .surface_format = __hw_fmt, __VA_ARGS__ } static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, RAW, .bs = 1, .num_channels = 1), -- cgit v1.2.3 From 7b4244dea02c38c194def5ff05636e09124cd6b7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 28 Sep 2015 17:42:39 -0700 Subject: anv/formats: Add fields for block dimensions A non-compressed texture is a 1x1x1 block. Compressed textures could have values which vary in different dimensions WxHxD. Reviewed-by: Chad Versace --- src/vulkan/anv_formats.c | 2 +- src/vulkan/anv_private.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 5631bea10e3..b588465e00e 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -27,7 +27,7 @@ #define UNSUPPORTED 0xffff #define fmt(__vk_fmt, __hw_fmt, ...) \ - [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, .surface_format = __hw_fmt, __VA_ARGS__ } + [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, .surface_format = __hw_fmt, .bw = 1, .bh = 1, .bd = 1, __VA_ARGS__ } static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, RAW, .bs = 1, .num_channels = 1), diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 03d033ff0c5..8a8fe8d04a4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1208,6 +1208,9 @@ struct anv_format { const char *name; uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ uint8_t bs; /**< Block size (in bytes) of anv_format::surface_format. */ + uint8_t bw; /**< Block width of anv_format::surface_format. */ + uint8_t bh; /**< Block height of anv_format::surface_format. */ + uint8_t bd; /**< Block depth of anv_format::surface_format. */ uint8_t num_channels; uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; -- cgit v1.2.3 From 300f7c2be34f9d20497b4127020d520de1c09ba5 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 28 Sep 2015 17:51:30 -0700 Subject: anv/image: Handle compressed format stride and size These formulas did not take compressed formats into account. Reviewed-by: Chad Versace --- src/vulkan/anv_image.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 8aa74c2e191..7051ac746f2 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -225,11 +225,13 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, */ assert(anv_is_aligned(qpitch, j)); - uint32_t stride = align_u32(mt_width * format->bs, tile_info->width); + uint32_t stride = align_u32(mt_width * format->bs / format->bw, + tile_info->width); if (create_info->stride > 0) stride = create_info->stride; - const uint32_t size = stride * align_u32(mt_height, tile_info->height); + const uint32_t size = stride * align_u32(mt_height / format->bh, + tile_info->height); const uint32_t offset = align_u32(*inout_image_size, tile_info->surface_alignment); -- cgit v1.2.3 From 381f602c6bca3d2eeef8bbeb6a96260bf93327a9 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 5 Oct 2015 17:27:32 -0700 Subject: anv/image: Handle compressed format qpitch and padding Reviewed-by: Chad Versace --- src/vulkan/anv_image.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 7051ac746f2..affabd1aebf 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -185,7 +185,8 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, * Views >> Common Surface Formats >> Surface Layout >> 2D Surfaces >> * Surface Arrays >> For All Surface Other Than Separate Stencil Buffer: */ - qpitch = h0 + h1 + 11 * j; + assert(format->bh == 1 || format->bh == 4); + qpitch = (h0 + h1 + 11 * j) / format->bh; mt_width = MAX(w0, w1 + w2); mt_height = array_size * qpitch; } @@ -230,8 +231,12 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, if (create_info->stride > 0) stride = create_info->stride; - const uint32_t size = stride * align_u32(mt_height / format->bh, - tile_info->height); + /* The padding requirement is found in the Broadwell PRM >> Volume 5: Memory + * Views >> Common Surface Formats >> Surface Padding Requirements >> + * Sampling Engine Surfaces >> Buffer Padding Requirements: + */ + const uint32_t mem_rows = align_u32(mt_height / format->bh, 2 * format->bh); + const uint32_t size = stride * align_u32(mem_rows, tile_info->height); const uint32_t offset = align_u32(*inout_image_size, tile_info->surface_alignment); -- cgit v1.2.3 From 41cf35d1d86e2075b736db7701764d66ea894b66 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 2 Oct 2015 16:11:24 -0700 Subject: anv/image: Determine the alignment units for compressed formats Alignment units, i and j, match the compressed format block width and height respectively. v2: Don't assert against HALIGN* and VALIGN* enums (Chad) Reviewed-by: Chad Versace --- src/vulkan/anv_image.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index affabd1aebf..247a75dd8f4 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -152,8 +152,10 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const struct anv_tile_info *tile_info = &anv_tile_info_table[tile_mode]; - const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ - const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t i = MAX(4, format->bw); /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t j = MAX(4, format->bh); /* FINISHME: Stop hardcoding subimage alignment */ + assert(i == 4 || i == 8 || i == 16); + assert(j == 4 || j == 8 || j == 16); uint16_t qpitch = min_qpitch; uint32_t mt_width = 0; -- cgit v1.2.3 From 9c2b37a9c3cb9586365ae7e7eb532b85eae98f3a Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 29 Sep 2015 15:47:39 -0700 Subject: anv/formats: Define ETC2 formats Reviewed-by: Chad Versace --- src/vulkan/anv_formats.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index b588465e00e..56f4ebda59a 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -144,16 +144,16 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_BC6H_SFLOAT, UNSUPPORTED), fmt(VK_FORMAT_BC7_UNORM, UNSUPPORTED), fmt(VK_FORMAT_BC7_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11G11_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_EAC_R11G11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, ETC2_RGB8 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, ETC2_SRGB8 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, ETC2_RGB8_PTA , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, ETC2_SRGB8_PTA , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, ETC2_EAC_RGBA8 , .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, ETC2_EAC_SRGB8_A8, .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11_UNORM, EAC_R11 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11_SNORM, EAC_SIGNED_R11 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11G11_UNORM, EAC_RG11 , .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11G11_SNORM, EAC_SIGNED_RG11 , .bs = 16, .bw = 4, .bh = 4), fmt(VK_FORMAT_ASTC_4x4_UNORM, UNSUPPORTED), fmt(VK_FORMAT_ASTC_4x4_SRGB, UNSUPPORTED), fmt(VK_FORMAT_ASTC_5x4_UNORM, UNSUPPORTED), -- cgit v1.2.3 From 862da6a891ecf570ab02ce9f07d1d22fff04b7ef Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 9 Nov 2015 12:18:12 -0800 Subject: anv/device: Add a newline to the end of a comment --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a4b58caab13..c6366a1cccb 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -83,7 +83,7 @@ anv_physical_device_init(struct anv_physical_device *device, if (device->info->gen == 7 && !device->info->is_haswell && !device->info->is_baytrail) { - fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete"); + fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); } else if (device->info->gen == 8 && !device->info->is_cherryview) { /* Briadwell is as fully supported as anything */ } else { -- cgit v1.2.3 From e8c2a52a704d1c5cefc55b59a28a62e59f70ce6a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 9 Nov 2015 15:58:19 -0800 Subject: anv/gen7: Properly handle missing color-blend state --- src/vulkan/gen7_pipeline.c | 96 ++++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index affe04c526f..3622071ead2 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -253,54 +253,66 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, { struct anv_device *device = pipeline->device; - /* FIXME-GEN7: All render targets share blend state settings on gen7, we - * can't implement this. - */ - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; - uint32_t num_dwords = GEN7_BLEND_STATE_length; pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - struct GEN7_BLEND_STATE blend_state = { - .ColorBufferBlendEnable = a->blendEnable, - .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], - - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], - - .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], - .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], - .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], - .AlphaToCoverageEnable = info->alphaToCoverageEnable, - -#if 0 - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; -#endif - - .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), - .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), - .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), - .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), - - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + if (info->pAttachments == NULL) { + struct GEN7_BLEND_STATE blend_state = { + .ColorBufferBlendEnable = false, + .WriteDisableAlpha = false, + .WriteDisableRed = false, + .WriteDisableGreen = false, + .WriteDisableBlue = false, + }; -#if 0 - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -#endif - }; + GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + } else { + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + + struct GEN7_BLEND_STATE blend_state = { + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + +# if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +# endif + + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +# if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +# endif + }; - GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + } anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, .BlendStatePointer = pipeline->blend_state.offset); -- cgit v1.2.3 From abede04314ac1d5c4e81929a984b46f0ca7901df Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 9 Nov 2015 16:03:49 -0800 Subject: anv/gen7: Fix the length of 3DSTATE_SF --- src/vulkan/gen7_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index a99881f2eb9..af76126c3a9 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -269,7 +269,7 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; - uint32_t sf_dw[GEN8_3DSTATE_SF_length]; + uint32_t sf_dw[GEN7_3DSTATE_SF_length]; struct GEN7_3DSTATE_SF sf = { GEN7_3DSTATE_SF_header, .LineWidth = cmd_buffer->state.dynamic.line_width, -- cgit v1.2.3 From 06f466a770fcb84a5a3671d27bffb456ded90739 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 9 Nov 2015 16:29:05 -0800 Subject: anv/nir: Fix codegen in lower_push_constants --- src/vulkan/anv_nir_lower_push_constants.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_lower_push_constants.c b/src/vulkan/anv_nir_lower_push_constants.c index af48470522a..4917bdc5954 100644 --- a/src/vulkan/anv_nir_lower_push_constants.c +++ b/src/vulkan/anv_nir_lower_push_constants.c @@ -70,7 +70,7 @@ lower_push_constants_block(nir_block *block, void *void_state) for (unsigned i = 0; i < intrin->num_components; i++) mov->src[0].swizzle[i] = i + shift; mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&intrin->instr, &intrin->dest, + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, intrin->num_components, NULL); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, @@ -79,6 +79,7 @@ lower_push_constants_block(nir_block *block, void *void_state) /* Stomp the number of components to 4 */ intrin->num_components = 4; + intrin->dest.ssa.num_components = 4; } } } -- cgit v1.2.3 From aafc87402d1d12a00e1969be52a2c3b6d18c5652 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 11:24:08 -0800 Subject: anv/device: Work around the i915 kernel driver timeout bug There is a bug in some versions of the i915 kernel driver where it will return immediately if the timeout is negative (it's supposed to wait indefinitely). We've worked around this in mesa for a few months but never implemented the work-around in the Vulkan driver. I rediscovered this bug again while working on Ivy Bridge becasuse the drive in my Ivy Bridge currently has Fedora 21 installed which has one of the offending kernels. --- src/vulkan/anv_device.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c6366a1cccb..37d58816f5d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1286,14 +1286,22 @@ VkResult anv_WaitForFences( uint64_t timeout) { ANV_FROM_HANDLE(anv_device, device, _device); - int64_t t = timeout; - int ret; + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed + * to block indefinitely timeouts <= 0. Unfortunately, this was broken + * for a couple of kernel releases. Since there's no way to know + * whether or not the kernel we're using is one of the broken ones, the + * best we can do is to clamp the timeout to INT64_MAX. This limits the + * maximum timeout from 584 years to 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; /* FIXME: handle !waitAll */ for (uint32_t i = 0; i < fenceCount; i++) { ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + int ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout); if (ret == -1 && errno == ETIME) { return VK_TIMEOUT; } else if (ret == -1) { -- cgit v1.2.3 From b461744c52c60bb855d6119e9adc5f93b75480d5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 11:31:31 -0800 Subject: anv/gen7: Properly handle VS with VertexID but no vertices --- src/vulkan/gen7_pipeline.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 3622071ead2..269d9d46ac5 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -39,10 +39,11 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, const uint32_t num_dwords = 1 + element_count * 2; uint32_t *p; - if (info->attributeCount > 0) { - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN7_3DSTATE_VERTEX_ELEMENTS); - } + if (info->attributeCount == 0 && !sgvs) + return; + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN7_3DSTATE_VERTEX_ELEMENTS); for (uint32_t i = 0; i < info->attributeCount; i++) { const VkVertexInputAttributeDescription *desc = -- cgit v1.2.3 From d9079648d0f1c380929dea0f3a447ddfdf5dcd27 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 14:43:18 -0800 Subject: anv/meta: Create a sampler in meta_emit_blit --- src/vulkan/anv_meta.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index b0f042f17ed..c96a9deb798 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -315,7 +315,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .count = 1, .pBinding = (VkDescriptorSetLayoutBinding[]) { { - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .arraySize = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = NULL @@ -455,7 +455,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *dest_image, struct anv_image_view *dest_iview, VkOffset3D dest_offset, - VkExtent3D dest_extent) + VkExtent3D dest_extent, + VkTexFilter blit_filter) { struct anv_device *device = cmd_buffer->device; VkDescriptorPool dummy_desc_pool = { .handle = 1 }; @@ -525,6 +526,14 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &sampler); + VkDescriptorSet set; anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, @@ -538,11 +547,12 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .destBinding = 0, .destArrayElement = 0, .count = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .pDescriptors = (VkDescriptorInfo[]) { { .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .sampler = sampler, }, } } @@ -619,6 +629,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, * descriptor sets, etc. has been used. We are free to delete it. */ anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler); anv_DestroyFramebuffer(anv_device_to_handle(device), fb); } @@ -741,7 +752,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(dest_image), &dest_iview, (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }); + (VkExtent3D) { width, height, 1 }, + VK_TEX_FILTER_NEAREST); anv_DestroyImage(vk_device, src_image); anv_DestroyImage(vk_device, dest_image); @@ -915,7 +927,8 @@ void anv_CmdCopyImage( pRegions[r].extent, dest_image, &dest_iview, dest_offset, - pRegions[r].extent); + pRegions[r].extent, + VK_TEX_FILTER_NEAREST); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1015,7 +1028,8 @@ void anv_CmdBlitImage( pRegions[r].srcExtent, dest_image, &dest_iview, dest_offset, - pRegions[r].destExtent); + pRegions[r].destExtent, + filter); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1158,7 +1172,8 @@ void anv_CmdCopyBufferToImage( dest_image, &dest_iview, dest_offset, - pRegions[r].imageExtent); + pRegions[r].imageExtent, + VK_TEX_FILTER_NEAREST); anv_DestroyImage(vk_device, srcImage); } @@ -1253,7 +1268,8 @@ void anv_CmdCopyImageToBuffer( anv_image_from_handle(destImage), &dest_iview, (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent); + pRegions[r].imageExtent, + VK_TEX_FILTER_NEAREST); anv_DestroyImage(vk_device, destImage); } -- cgit v1.2.3 From 427978d9334d887715e00c7609a36aedaac1b4cf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 15:02:52 -0800 Subject: anv/device: Use an actual int64_t in WaitForFences --- src/vulkan/anv_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 37d58816f5d..3c6760b832d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1297,11 +1297,13 @@ VkResult anv_WaitForFences( if (timeout > INT64_MAX) timeout = INT64_MAX; + int64_t t = timeout; + /* FIXME: handle !waitAll */ for (uint32_t i = 0; i < fenceCount; i++) { ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - int ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout); + int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); if (ret == -1 && errno == ETIME) { return VK_TIMEOUT; } else if (ret == -1) { -- cgit v1.2.3 From 9d18555c8d24f9037f3e4094c6aa5b80604b9324 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 15:14:11 -0800 Subject: anv/gen7: Add push constant support --- src/vulkan/gen7_cmd_buffer.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index af76126c3a9..b3619df2c2e 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -29,6 +29,40 @@ #include "anv_private.h" +static void +gen7_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 21, + [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 26, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 22, + [VK_SHADER_STAGE_FRAGMENT] = 23, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + VkShaderStage stage; + VkShaderStageFlags flushed = 0; + + for_each_bit(stage, cmd_buffer->state.push_constants_dirty) { + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= 1 << stage; + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; +} + void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) @@ -256,6 +290,9 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.descriptors_dirty) anv_flush_descriptor_sets(cmd_buffer); + if (cmd_buffer->state.push_constants_dirty) + gen7_cmd_buffer_flush_push_constants(cmd_buffer); + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) anv_cmd_buffer_emit_viewport(cmd_buffer); -- cgit v1.2.3 From 750b8f9e983832c8725ac2e7a040470959a3d8f2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 16:41:23 -0800 Subject: anv/gen7: Properly handle a GS with zero invocations --- src/vulkan/gen7_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 269d9d46ac5..6eed60dbd3d 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -502,7 +502,7 @@ gen7_graphics_pipeline_create( /* This in the next dword on HSW. */ .ControlDataFormat = gs_prog_data->control_data_format, .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - .InstanceControl = gs_prog_data->invocations - 1, + .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, .DispatchMode = gs_prog_data->base.dispatch_mode, .GSStatisticsEnable = true, .IncludePrimitiveID = gs_prog_data->include_primitive_id, -- cgit v1.2.3 From 3a3d79b38e27babab7c5b2d79032e0879d6a7c44 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 10 Nov 2015 16:42:34 -0800 Subject: anv/gen7: Implement the VS state depth-stall workaround --- src/vulkan/anv_device.c | 5 +++++ src/vulkan/anv_private.h | 2 ++ src/vulkan/gen7_cmd_buffer.c | 18 ++++++++++++++++++ src/vulkan/gen7_pipeline.c | 13 +++++++++++++ 4 files changed, 38 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3c6760b832d..1328516c48e 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -667,6 +667,8 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->surface_state_pool, &device->surface_state_block_pool); + anv_bo_init_new(&device->workaround_bo, device, 1024); + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); device->info = *physical_device->info; @@ -705,6 +707,9 @@ void anv_DestroyDevice( anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); #endif + anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); + anv_gem_close(device, device->workaround_bo.gem_handle); + anv_bo_pool_finish(&device->batch_bo_pool); anv_state_pool_finish(&device->dynamic_state_pool); anv_block_pool_finish(&device->dynamic_state_block_pool); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8a8fe8d04a4..a60c679cede 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -497,6 +497,8 @@ struct anv_device { struct anv_block_pool surface_state_block_pool; struct anv_state_pool surface_state_pool; + struct anv_bo workaround_bo; + struct anv_meta_state meta_state; struct anv_state border_colors; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index b3619df2c2e..5ebf129a802 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -287,6 +287,24 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); } + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } + if (cmd_buffer->state.descriptors_dirty) anv_flush_descriptor_sets(cmd_buffer); diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 6eed60dbd3d..1fed33a53d1 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -380,6 +380,19 @@ gen7_graphics_pipeline_create( anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false); anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &device->workaround_bo, 0 }); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, .ConstantBufferOffset = 0, .ConstantBufferSize = 4); -- cgit v1.2.3 From 67362698a9411a0d3b4189af2422420c221ef4fc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 9 Nov 2015 08:53:01 -0800 Subject: isl: Add enum isl_format --- src/vulkan/isl.h | 270 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 src/vulkan/isl.h (limited to 'src') diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h new file mode 100644 index 00000000000..eed4db41f10 --- /dev/null +++ b/src/vulkan/isl.h @@ -0,0 +1,270 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file + * @brief Intel Surface Layout + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Hardware enumeration SURFACE_FORMAT. + * + * For the official list, see Broadwell PRM: Volume 2b: Command Reference: + * Enumerations: SURFACE_FORMAT. + */ +enum isl_format { + ISL_FORMAT_R32G32B32A32_FLOAT = 0, + ISL_FORMAT_R32G32B32A32_SINT = 1, + ISL_FORMAT_R32G32B32A32_UINT = 2, + ISL_FORMAT_R32G32B32A32_UNORM = 3, + ISL_FORMAT_R32G32B32A32_SNORM = 4, + ISL_FORMAT_R64G64_FLOAT = 5, + ISL_FORMAT_R32G32B32X32_FLOAT = 6, + ISL_FORMAT_R32G32B32A32_SSCALED = 7, + ISL_FORMAT_R32G32B32A32_USCALED = 8, + ISL_FORMAT_R32G32B32A32_SFIXED = 32, + ISL_FORMAT_R64G64_PASSTHRU = 33, + ISL_FORMAT_R32G32B32_FLOAT = 64, + ISL_FORMAT_R32G32B32_SINT = 65, + ISL_FORMAT_R32G32B32_UINT = 66, + ISL_FORMAT_R32G32B32_UNORM = 67, + ISL_FORMAT_R32G32B32_SNORM = 68, + ISL_FORMAT_R32G32B32_SSCALED = 69, + ISL_FORMAT_R32G32B32_USCALED = 70, + ISL_FORMAT_R32G32B32_SFIXED = 80, + ISL_FORMAT_R16G16B16A16_UNORM = 128, + ISL_FORMAT_R16G16B16A16_SNORM = 129, + ISL_FORMAT_R16G16B16A16_SINT = 130, + ISL_FORMAT_R16G16B16A16_UINT = 131, + ISL_FORMAT_R16G16B16A16_FLOAT = 132, + ISL_FORMAT_R32G32_FLOAT = 133, + ISL_FORMAT_R32G32_SINT = 134, + ISL_FORMAT_R32G32_UINT = 135, + ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS = 136, + ISL_FORMAT_X32_TYPELESS_G8X24_UINT = 137, + ISL_FORMAT_L32A32_FLOAT = 138, + ISL_FORMAT_R32G32_UNORM = 139, + ISL_FORMAT_R32G32_SNORM = 140, + ISL_FORMAT_R64_FLOAT = 141, + ISL_FORMAT_R16G16B16X16_UNORM = 142, + ISL_FORMAT_R16G16B16X16_FLOAT = 143, + ISL_FORMAT_A32X32_FLOAT = 144, + ISL_FORMAT_L32X32_FLOAT = 145, + ISL_FORMAT_I32X32_FLOAT = 146, + ISL_FORMAT_R16G16B16A16_SSCALED = 147, + ISL_FORMAT_R16G16B16A16_USCALED = 148, + ISL_FORMAT_R32G32_SSCALED = 149, + ISL_FORMAT_R32G32_USCALED = 150, + ISL_FORMAT_R32G32_SFIXED = 160, + ISL_FORMAT_R64_PASSTHRU = 161, + ISL_FORMAT_B8G8R8A8_UNORM = 192, + ISL_FORMAT_B8G8R8A8_UNORM_SRGB = 193, + ISL_FORMAT_R10G10B10A2_UNORM = 194, + ISL_FORMAT_R10G10B10A2_UNORM_SRGB = 195, + ISL_FORMAT_R10G10B10A2_UINT = 196, + ISL_FORMAT_R10G10B10_SNORM_A2_UNORM = 197, + ISL_FORMAT_R8G8B8A8_UNORM = 199, + ISL_FORMAT_R8G8B8A8_UNORM_SRGB = 200, + ISL_FORMAT_R8G8B8A8_SNORM = 201, + ISL_FORMAT_R8G8B8A8_SINT = 202, + ISL_FORMAT_R8G8B8A8_UINT = 203, + ISL_FORMAT_R16G16_UNORM = 204, + ISL_FORMAT_R16G16_SNORM = 205, + ISL_FORMAT_R16G16_SINT = 206, + ISL_FORMAT_R16G16_UINT = 207, + ISL_FORMAT_R16G16_FLOAT = 208, + ISL_FORMAT_B10G10R10A2_UNORM = 209, + ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, + ISL_FORMAT_R11G11B10_FLOAT = 211, + ISL_FORMAT_R32_SINT = 214, + ISL_FORMAT_R32_UINT = 215, + ISL_FORMAT_R32_FLOAT = 216, + ISL_FORMAT_R24_UNORM_X8_TYPELESS = 217, + ISL_FORMAT_X24_TYPELESS_G8_UINT = 218, + ISL_FORMAT_L32_UNORM = 221, + ISL_FORMAT_A32_UNORM = 222, + ISL_FORMAT_L16A16_UNORM = 223, + ISL_FORMAT_I24X8_UNORM = 224, + ISL_FORMAT_L24X8_UNORM = 225, + ISL_FORMAT_A24X8_UNORM = 226, + ISL_FORMAT_I32_FLOAT = 227, + ISL_FORMAT_L32_FLOAT = 228, + ISL_FORMAT_A32_FLOAT = 229, + ISL_FORMAT_X8B8_UNORM_G8R8_SNORM = 230, + ISL_FORMAT_A8X8_UNORM_G8R8_SNORM = 231, + ISL_FORMAT_B8X8_UNORM_G8R8_SNORM = 232, + ISL_FORMAT_B8G8R8X8_UNORM = 233, + ISL_FORMAT_B8G8R8X8_UNORM_SRGB = 234, + ISL_FORMAT_R8G8B8X8_UNORM = 235, + ISL_FORMAT_R8G8B8X8_UNORM_SRGB = 236, + ISL_FORMAT_R9G9B9E5_SHAREDEXP = 237, + ISL_FORMAT_B10G10R10X2_UNORM = 238, + ISL_FORMAT_L16A16_FLOAT = 240, + ISL_FORMAT_R32_UNORM = 241, + ISL_FORMAT_R32_SNORM = 242, + ISL_FORMAT_R10G10B10X2_USCALED = 243, + ISL_FORMAT_R8G8B8A8_SSCALED = 244, + ISL_FORMAT_R8G8B8A8_USCALED = 245, + ISL_FORMAT_R16G16_SSCALED = 246, + ISL_FORMAT_R16G16_USCALED = 247, + ISL_FORMAT_R32_SSCALED = 248, + ISL_FORMAT_R32_USCALED = 249, + ISL_FORMAT_B5G6R5_UNORM = 256, + ISL_FORMAT_B5G6R5_UNORM_SRGB = 257, + ISL_FORMAT_B5G5R5A1_UNORM = 258, + ISL_FORMAT_B5G5R5A1_UNORM_SRGB = 259, + ISL_FORMAT_B4G4R4A4_UNORM = 260, + ISL_FORMAT_B4G4R4A4_UNORM_SRGB = 261, + ISL_FORMAT_R8G8_UNORM = 262, + ISL_FORMAT_R8G8_SNORM = 263, + ISL_FORMAT_R8G8_SINT = 264, + ISL_FORMAT_R8G8_UINT = 265, + ISL_FORMAT_R16_UNORM = 266, + ISL_FORMAT_R16_SNORM = 267, + ISL_FORMAT_R16_SINT = 268, + ISL_FORMAT_R16_UINT = 269, + ISL_FORMAT_R16_FLOAT = 270, + ISL_FORMAT_A8P8_UNORM_PALETTE0 = 271, + ISL_FORMAT_A8P8_UNORM_PALETTE1 = 272, + ISL_FORMAT_I16_UNORM = 273, + ISL_FORMAT_L16_UNORM = 274, + ISL_FORMAT_A16_UNORM = 275, + ISL_FORMAT_L8A8_UNORM = 276, + ISL_FORMAT_I16_FLOAT = 277, + ISL_FORMAT_L16_FLOAT = 278, + ISL_FORMAT_A16_FLOAT = 279, + ISL_FORMAT_L8A8_UNORM_SRGB = 280, + ISL_FORMAT_R5G5_SNORM_B6_UNORM = 281, + ISL_FORMAT_B5G5R5X1_UNORM = 282, + ISL_FORMAT_B5G5R5X1_UNORM_SRGB = 283, + ISL_FORMAT_R8G8_SSCALED = 284, + ISL_FORMAT_R8G8_USCALED = 285, + ISL_FORMAT_R16_SSCALED = 286, + ISL_FORMAT_R16_USCALED = 287, + ISL_FORMAT_P8A8_UNORM_PALETTE0 = 290, + ISL_FORMAT_P8A8_UNORM_PALETTE1 = 291, + ISL_FORMAT_A1B5G5R5_UNORM = 292, + ISL_FORMAT_A4B4G4R4_UNORM = 293, + ISL_FORMAT_L8A8_UINT = 294, + ISL_FORMAT_L8A8_SINT = 295, + ISL_FORMAT_R8_UNORM = 320, + ISL_FORMAT_R8_SNORM = 321, + ISL_FORMAT_R8_SINT = 322, + ISL_FORMAT_R8_UINT = 323, + ISL_FORMAT_A8_UNORM = 324, + ISL_FORMAT_I8_UNORM = 325, + ISL_FORMAT_L8_UNORM = 326, + ISL_FORMAT_P4A4_UNORM_PALETTE0 = 327, + ISL_FORMAT_A4P4_UNORM_PALETTE0 = 328, + ISL_FORMAT_R8_SSCALED = 329, + ISL_FORMAT_R8_USCALED = 330, + ISL_FORMAT_P8_UNORM_PALETTE0 = 331, + ISL_FORMAT_L8_UNORM_SRGB = 332, + ISL_FORMAT_P8_UNORM_PALETTE1 = 333, + ISL_FORMAT_P4A4_UNORM_PALETTE1 = 334, + ISL_FORMAT_A4P4_UNORM_PALETTE1 = 335, + ISL_FORMAT_Y8_UNORM = 336, + ISL_FORMAT_L8_UINT = 338, + ISL_FORMAT_L8_SINT = 339, + ISL_FORMAT_I8_UINT = 340, + ISL_FORMAT_I8_SINT = 341, + ISL_FORMAT_DXT1_RGB_SRGB = 384, + ISL_FORMAT_R1_UNORM = 385, + ISL_FORMAT_YCRCB_NORMAL = 386, + ISL_FORMAT_YCRCB_SWAPUVY = 387, + ISL_FORMAT_P2_UNORM_PALETTE0 = 388, + ISL_FORMAT_P2_UNORM_PALETTE1 = 389, + ISL_FORMAT_BC1_UNORM = 390, + ISL_FORMAT_BC2_UNORM = 391, + ISL_FORMAT_BC3_UNORM = 392, + ISL_FORMAT_BC4_UNORM = 393, + ISL_FORMAT_BC5_UNORM = 394, + ISL_FORMAT_BC1_UNORM_SRGB = 395, + ISL_FORMAT_BC2_UNORM_SRGB = 396, + ISL_FORMAT_BC3_UNORM_SRGB = 397, + ISL_FORMAT_MONO8 = 398, + ISL_FORMAT_YCRCB_SWAPUV = 399, + ISL_FORMAT_YCRCB_SWAPY = 400, + ISL_FORMAT_DXT1_RGB = 401, + ISL_FORMAT_FXT1 = 402, + ISL_FORMAT_R8G8B8_UNORM = 403, + ISL_FORMAT_R8G8B8_SNORM = 404, + ISL_FORMAT_R8G8B8_SSCALED = 405, + ISL_FORMAT_R8G8B8_USCALED = 406, + ISL_FORMAT_R64G64B64A64_FLOAT = 407, + ISL_FORMAT_R64G64B64_FLOAT = 408, + ISL_FORMAT_BC4_SNORM = 409, + ISL_FORMAT_BC5_SNORM = 410, + ISL_FORMAT_R16G16B16_FLOAT = 411, + ISL_FORMAT_R16G16B16_UNORM = 412, + ISL_FORMAT_R16G16B16_SNORM = 413, + ISL_FORMAT_R16G16B16_SSCALED = 414, + ISL_FORMAT_R16G16B16_USCALED = 415, + ISL_FORMAT_BC6H_SF16 = 417, + ISL_FORMAT_BC7_UNORM = 418, + ISL_FORMAT_BC7_UNORM_SRGB = 419, + ISL_FORMAT_BC6H_UF16 = 420, + ISL_FORMAT_PLANAR_420_8 = 421, + ISL_FORMAT_R8G8B8_UNORM_SRGB = 424, + ISL_FORMAT_ETC1_RGB8 = 425, + ISL_FORMAT_ETC2_RGB8 = 426, + ISL_FORMAT_EAC_R11 = 427, + ISL_FORMAT_EAC_RG11 = 428, + ISL_FORMAT_EAC_SIGNED_R11 = 429, + ISL_FORMAT_EAC_SIGNED_RG11 = 430, + ISL_FORMAT_ETC2_SRGB8 = 431, + ISL_FORMAT_R16G16B16_UINT = 432, + ISL_FORMAT_R16G16B16_SINT = 433, + ISL_FORMAT_R32_SFIXED = 434, + ISL_FORMAT_R10G10B10A2_SNORM = 435, + ISL_FORMAT_R10G10B10A2_USCALED = 436, + ISL_FORMAT_R10G10B10A2_SSCALED = 437, + ISL_FORMAT_R10G10B10A2_SINT = 438, + ISL_FORMAT_B10G10R10A2_SNORM = 439, + ISL_FORMAT_B10G10R10A2_USCALED = 440, + ISL_FORMAT_B10G10R10A2_SSCALED = 441, + ISL_FORMAT_B10G10R10A2_UINT = 442, + ISL_FORMAT_B10G10R10A2_SINT = 443, + ISL_FORMAT_R64G64B64A64_PASSTHRU = 444, + ISL_FORMAT_R64G64B64_PASSTHRU = 445, + ISL_FORMAT_ETC2_RGB8_PTA = 448, + ISL_FORMAT_ETC2_SRGB8_PTA = 449, + ISL_FORMAT_ETC2_EAC_RGBA8 = 450, + ISL_FORMAT_ETC2_EAC_SRGB8_A8 = 451, + ISL_FORMAT_R8G8B8_UINT = 456, + ISL_FORMAT_R8G8B8_SINT = 457, + ISL_FORMAT_RAW = 511, + + /* Hardware doesn't understand this out-of-band value */ + ISL_FORMAT_UNSUPPORTED = UINT16_MAX, +}; + +#ifdef __cplusplus +} +#endif -- cgit v1.2.3 From 7986efc644d3e05491fc31f969c2e3039b2c5ade Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 11 Nov 2015 10:35:14 -0800 Subject: isl: Add CSV of format layouts Add file isl_format_layout.csv, which describes the block layout, channel layout, and colorspace of all hardware surface formats. --- src/vulkan/isl_format_layout.csv | 287 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 src/vulkan/isl_format_layout.csv (limited to 'src') diff --git a/src/vulkan/isl_format_layout.csv b/src/vulkan/isl_format_layout.csv new file mode 100644 index 00000000000..2a302b002ef --- /dev/null +++ b/src/vulkan/isl_format_layout.csv @@ -0,0 +1,287 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +# +# @file +# @brief Layout of all hardware surface formats +# +# For the official list, see Broadwell PRM: Volume 2b: Command Reference: +# Enumerations: SURFACE_FORMAT. +# + + +# Columns: +# name: format name in PRM +# bpb: bits per block +# bw: block width, in pixels +# bh: block height, in pixels +# bd: block depth, in pixels +# r: red channel, data type and bitwidth +# g: green channel +# b: blue channel +# a: alpha channel +# l: luminance channel +# i: intensity channel +# p: palette channel +# space: colorspace +# txc: texture compression +# +# Data Types: +# x: void +# r: raw +# un: unorm +# sn: snorm +# uf: ufloat +# sf: sfloat +# ux: ufixed +# sx: sfixed +# ui: uint +# si: sint +# us: uscaled +# ss: sscaled + + +# Table is aligned with the Vim commands below, using the Align plugin: +# :AlignCtrl lr+ p8000000000000P1 +# /^# name/,$ Align, + +# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc +R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear, +R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear, +R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear, +R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear, +R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear, +R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear, +R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear, +R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear, +R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear, +R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear, +R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , , +R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear, +R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear, +R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear, +R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear, +R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear, +R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear, +R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear, +R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear, +R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear, +R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear, +R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear, +R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear, +R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear, +R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear, +R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear, +R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear, +R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear, +X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear, +L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear, +R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear, +R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear, +R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear, +R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear, +R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear, +A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha, +L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear, +I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear, +R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear, +R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear, +R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear, +R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear, +R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear, +R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , , +B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear, +R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear, +R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear, +R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear, +R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear, +R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear, +R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear, +R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear, +R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, +B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R11G11B10_FLOAT , 32, 1, 1, 1, sf11, sf11, sf10, , , , , linear, +R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, +R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, +R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, +R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear, +X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear, +L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear, +A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha, +L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear, +I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear, +L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear, +A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha, +I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear, +L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear, +A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha, +X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear, +B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear, +B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear, +L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear, +R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear, +R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear, +R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear, +R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear, +R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear, +R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear, +R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear, +R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear, +R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear, +B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear, +B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb, +B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb, +B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb, +R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear, +R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear, +R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear, +R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear, +R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear, +R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear, +R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear, +R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear, +R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear, +A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear, +L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear, +A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha, +L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear, +I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear, +L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear, +A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha, +L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb, +R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear, +B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear, +B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb, +R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear, +R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear, +R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear, +R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear, +P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear, +L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear, +R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear, +R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear, +R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear, +R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear, +A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha, +I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear, +L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear, +P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear, +R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear, +P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear, +L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear, +P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear, +P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv, +L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear, +L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear, +I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear, +I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear, +DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1 +R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear, +YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv, +P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear, +P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear, +BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1 +BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3 +BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5 +BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1 +BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2 +BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1 +BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3 +BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5 +MONO8 , 1, 1, 1, 1, , , , , , , , , +YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv, +DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1 +FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1 +R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear, +R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear, +R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear, +R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear, +R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear, +R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear, +BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1 +BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2 +R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear, +R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear, +R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear, +R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear, +R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear, +BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc +BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc +BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc +BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc +PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv, +R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb, +ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1 +ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2 +EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2 +EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2 +EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2 +EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2 +ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2 +R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear, +R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear, +R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear, +R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , , +R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , , +ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2 +ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2 +ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2 +ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2 +R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear, +R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear, +RAW , 0, 0, 0, 0, , , , , , , , , -- cgit v1.2.3 From bfb022a23552becfaa04509a93bbcf280657c5ee Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 12 Nov 2015 10:46:12 -0800 Subject: isl: Generate isl_format_layout.c Generate an array of struct isl_format_layout, using isl_format_layout.csv as input. Each entry follows the patten: [ISL_FORMAT_R32G32B32A32_FLOAT] = { ISL_FORMAT_R32G32B32A32_FLOAT, .bs = 16, .bpb = 128, .bw = 1, .bh = 1, .bd = 1, .channels = { .r = { ISL_SFLOAT, 32 }, .g = { ISL_SFLOAT, 32 }, .b = { ISL_SFLOAT, 32 }, .a = { ISL_SFLOAT, 32 }, .l = {}, .i = {}, .p = {}, }, .colorspace = ISL_COLORSPACE_LINEAR, .txc = ISL_TXC_NONE, }, --- src/vulkan/.gitignore | 3 +- src/vulkan/Makefile.am | 11 ++- src/vulkan/isl.h | 70 +++++++++++++++++++ src/vulkan/isl_format_layout_gen.bash | 128 ++++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 3 deletions(-) create mode 100755 src/vulkan/isl_format_layout_gen.bash (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 6b9074d9f03..316d24a9b02 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -4,4 +4,5 @@ /anv_entrypoints.h /wayland-drm-protocol.c /wayland-drm-client-protocol.h -/anv_icd.json \ No newline at end of file +/anv_icd.json +/isl_format_layout.c diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 4a05657df71..9f8c5e34063 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -83,11 +83,13 @@ VULKAN_SOURCES = \ gen8_pipeline.c \ gen7_state.c \ gen7_cmd_buffer.c \ - gen7_pipeline.c + gen7_pipeline.c \ + isl_format_layout.c BUILT_SOURCES = \ anv_entrypoints.h \ - anv_entrypoints.c + anv_entrypoints.c \ + isl_format_layout.c if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ @@ -117,6 +119,11 @@ anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ +isl_format_layout.c: isl_format_layout_gen.bash \ + isl_format_layout.csv + $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ + <$(srcdir)/isl_format_layout.csv >$@ + CLEANFILES = $(BUILT_SOURCES) libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index eed4db41f10..b170c215b35 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -28,6 +28,8 @@ #pragma once +#include + #ifdef __cplusplus extern "C" { #endif @@ -265,6 +267,74 @@ enum isl_format { ISL_FORMAT_UNSUPPORTED = UINT16_MAX, }; +enum isl_base_type { + ISL_VOID, + ISL_RAW, + ISL_UNORM, + ISL_SNORM, + ISL_UFLOAT, + ISL_SFLOAT, + ISL_UFIXED, + ISL_SFIXED, + ISL_UINT, + ISL_SINT, + ISL_USCALED, + ISL_SSCALED, +}; + +enum isl_colorspace { + ISL_COLORSPACE_NONE = 0, + ISL_COLORSPACE_LINEAR, + ISL_COLORSPACE_SRGB, + ISL_COLORSPACE_YUV, +}; + +/** + * Texture compression mode + */ +enum isl_txc { + ISL_TXC_NONE = 0, + ISL_TXC_DXT1, + ISL_TXC_DXT3, + ISL_TXC_DXT5, + ISL_TXC_FXT1, + ISL_TXC_RGTC1, + ISL_TXC_RGTC2, + ISL_TXC_BPTC, + ISL_TXC_ETC1, + ISL_TXC_ETC2, +}; + +struct isl_channel_layout { + enum isl_base_type type; + uint8_t bits; /**< Size in bits */ +}; + +struct isl_format_layout { + enum isl_format format; + + uint16_t bpb; /**< Bits per block */ + uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ + uint8_t bw; /**< Block width, in pixels */ + uint8_t bh; /**< Block height, in pixels */ + uint8_t bd; /**< Block depth, in pixels */ + + struct { + struct isl_channel_layout r; /**< Red channel */ + struct isl_channel_layout g; /**< Green channel */ + struct isl_channel_layout b; /**< Blue channel */ + struct isl_channel_layout a; /**< Alpha channel */ + struct isl_channel_layout l; /**< Luminance channel */ + struct isl_channel_layout i; /**< Intensity channel */ + struct isl_channel_layout p; /**< Palette channel */ + } channels; + + enum isl_colorspace colorspace; + enum isl_txc txc; +}; + +extern const struct isl_format_layout isl_format_layouts[]; + #ifdef __cplusplus } #endif diff --git a/src/vulkan/isl_format_layout_gen.bash b/src/vulkan/isl_format_layout_gen.bash new file mode 100755 index 00000000000..2511f299a7e --- /dev/null +++ b/src/vulkan/isl_format_layout_gen.bash @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +set -eu +set -o pipefail + +cat <<'EOF' +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" + +const struct isl_format_layout +isl_format_layouts[] = { +EOF + +sed -r ' +# Delete comment lines and empty lines +/^[[:space:]]*#/d +/^[[:space:]]*$/d + +# Delete spaces +s/[[:space:]]//g + +# Translate formats +s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/ + +# Translate data type of channels +s/\/ISL_COLORSPACE_\1/ +s/\// + +# Translate texture compression +s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/ +' | +tr 'a-z' 'A-Z' | # Convert to uppersace +while IFS=, read -r format bpb bw bh bd \ + red green blue alpha \ + luminance intensity palette \ + colorspace txc +do + : ${colorspace:=ISL_COLORSPACE_NONE} + : ${txc:=ISL_TXC_NONE} + + cat < Date: Thu, 12 Nov 2015 11:31:02 -0800 Subject: anv: Use enum isl_format in anv_format This patch begins using isl.h in Anvil. More refactors will follow. Change type of anv_format::surface_format from uint16_t -> enum isl_format. --- src/vulkan/anv_formats.c | 356 +++++++++++++++++++++++------------------------ src/vulkan/anv_private.h | 4 +- 2 files changed, 179 insertions(+), 181 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 56f4ebda59a..cc3df577201 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -24,188 +24,186 @@ #include "anv_private.h" #include "brw_surface_formats.h" -#define UNSUPPORTED 0xffff - #define fmt(__vk_fmt, __hw_fmt, ...) \ [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, .surface_format = __hw_fmt, .bw = 1, .bh = 1, .bd = 1, __VA_ARGS__ } static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, RAW, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R4G4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R4G4_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .bs = 1, .num_channels = 1,), - fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_UINT, R8_UINT, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SINT, R8_SINT, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SRGB, UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_UINT, R16_UINT, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SINT, R16_SINT, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R32_UINT, R32_UINT, .bs = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SINT, R32_SINT, .bs = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .bs = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .bs = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .bs = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .bs = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .bs = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .bs = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .bs = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .bs = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .bs = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .bs = 16, .num_channels = 4,), - fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .bs = 8, .num_channels = 1), - fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .bs = 16, .num_channels = 2), - fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .bs = 24, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .bs = 32, .num_channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .bs = 4, .num_channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .bs = 4, .num_channels = 3), - - fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .bs = 2, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_D24_UNORM_X8, R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), - fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .bs = 4, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, R8_UINT, .bs = 1, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .bs = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .bs = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), - - fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC2_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC2_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC3_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC3_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_BC4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC4_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC5_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC6H_UFLOAT, UNSUPPORTED), - fmt(VK_FORMAT_BC6H_SFLOAT, UNSUPPORTED), - fmt(VK_FORMAT_BC7_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_BC7_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, ETC2_RGB8 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, ETC2_SRGB8 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, ETC2_RGB8_PTA , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, ETC2_SRGB8_PTA , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, ETC2_EAC_RGBA8 , .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, ETC2_EAC_SRGB8_A8, .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11_UNORM, EAC_R11 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11_SNORM, EAC_SIGNED_R11 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11G11_UNORM, EAC_RG11 , .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11G11_SNORM, EAC_SIGNED_RG11 , .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ASTC_4x4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .bs = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .bs = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .bs = 2, .num_channels = 3), - fmt(VK_FORMAT_B5G6R5_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UNORM, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SSCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SRGB, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B8G8R8A8_SNORM, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_USCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SSCALED, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SINT, UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .bs = 4, .num_channels = 4) + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R4G4_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM, .bs = 1, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8X8_UNORM, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, ISL_FORMAT_R10G10B10A2_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, ISL_FORMAT_R10G10B10A2_SNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, ISL_FORMAT_R10G10B10A2_USCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, ISL_FORMAT_R10G10B10A2_SSCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, ISL_FORMAT_R10G10B10A2_UINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, ISL_FORMAT_R10G10B10A2_SINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT, .bs = 2, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT, .bs = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT, .bs = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT, .bs = 8, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT, .bs = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT, .bs = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT, .bs = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT, .bs = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT, .bs = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT, .bs = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT, .bs = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT, .bs = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT, .bs = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT, .bs = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT, .bs = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT, .bs = 16, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT, .bs = 8, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT, .bs = 16, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT, .bs = 24, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT, .bs = 32, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, ISL_FORMAT_R11G11B10_FLOAT, .bs = 4, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .bs = 4, .num_channels = 3), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .bs = 2, .num_channels = 1, .depth_format = D16_UNORM), + fmt(VK_FORMAT_D24_UNORM_X8, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .bs = 4, .num_channels = 1, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .bs = 1, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .bs = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .bs = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC2_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC2_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC3_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC3_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC4_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC4_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC5_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC5_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC6H_UFLOAT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC6H_SFLOAT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC7_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC7_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, ISL_FORMAT_ETC2_RGB8 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, ISL_FORMAT_ETC2_SRGB8 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, ISL_FORMAT_ETC2_RGB8_PTA , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, ISL_FORMAT_ETC2_SRGB8_PTA , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, ISL_FORMAT_ETC2_EAC_RGBA8 , .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, ISL_FORMAT_ETC2_EAC_SRGB8_A8, .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11_UNORM, ISL_FORMAT_EAC_R11 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11_SNORM, ISL_FORMAT_EAC_SIGNED_R11 , .bs = 8, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11G11_UNORM, ISL_FORMAT_EAC_RG11 , .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_EAC_R11G11_SNORM, ISL_FORMAT_EAC_SIGNED_RG11 , .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ASTC_4x4_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B4G4R4A4_UNORM, ISL_FORMAT_B4G4R4A4_UNORM, .bs = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, ISL_FORMAT_B5G5R5A1_UNORM, .bs = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, ISL_FORMAT_B5G6R5_UNORM, .bs = 2, .num_channels = 3), + fmt(VK_FORMAT_B5G6R5_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, ISL_FORMAT_B10G10R10A2_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, ISL_FORMAT_B10G10R10A2_SNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, ISL_FORMAT_B10G10R10A2_USCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, ISL_FORMAT_B10G10R10A2_SSCALED, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, ISL_FORMAT_B10G10R10A2_UINT, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, ISL_FORMAT_B10G10R10A2_SINT, .bs = 4, .num_channels = 4) }; #undef fmt @@ -246,7 +244,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d if (physical_device->info->is_haswell) gen += 5; - if (format->surface_format == UNSUPPORTED) + if (format->surface_format == ISL_FORMAT_UNSUPPORTED) goto unsupported; uint32_t linear = 0, tiled = 0; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a60c679cede..4495499eda1 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -50,8 +50,8 @@ #include #include "anv_entrypoints.h" - #include "brw_context.h" +#include "isl.h" #ifdef __cplusplus extern "C" { @@ -1208,7 +1208,7 @@ gen8_compute_pipeline_create(VkDevice _device, struct anv_format { const VkFormat vk_format; const char *name; - uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + enum isl_format surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ uint8_t bs; /**< Block size (in bytes) of anv_format::surface_format. */ uint8_t bw; /**< Block width of anv_format::surface_format. */ uint8_t bh; /**< Block height of anv_format::surface_format. */ -- cgit v1.2.3 From cbc31f453d08bca5a38d7cc3d849b4279f9b9665 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 12 Nov 2015 12:00:15 -0800 Subject: anv/formats: Re-indent the fmt() macro Use one line per struct member. --- src/vulkan/anv_formats.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index cc3df577201..3e7a6f6e5a8 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -25,7 +25,13 @@ #include "brw_surface_formats.h" #define fmt(__vk_fmt, __hw_fmt, ...) \ - [__vk_fmt] = { .vk_format = __vk_fmt, .name = #__vk_fmt, .surface_format = __hw_fmt, .bw = 1, .bh = 1, .bd = 1, __VA_ARGS__ } + [__vk_fmt] = { \ + .vk_format = __vk_fmt, \ + .name = #__vk_fmt, \ + .surface_format = __hw_fmt, \ + .bw = 1, .bh = 1, .bd = 1, \ + __VA_ARGS__ \ + } static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW, .bs = 1, .num_channels = 1), -- cgit v1.2.3 From addc2a9d021cddfebf01db37add9de84ba8def81 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 12 Nov 2015 12:14:43 -0800 Subject: anv: Remove redundant fields anv_format::bs,bw,bh,bd Instead, use the equivalent fields in anv_format::isl_layout. --- src/vulkan/anv_formats.c | 218 +++++++++++++++++++++---------------------- src/vulkan/anv_image.c | 23 +++-- src/vulkan/anv_private.h | 5 +- src/vulkan/anv_wsi_wayland.c | 4 +- 4 files changed, 127 insertions(+), 123 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 3e7a6f6e5a8..942e7cfb484 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -29,12 +29,12 @@ .vk_format = __vk_fmt, \ .name = #__vk_fmt, \ .surface_format = __hw_fmt, \ - .bw = 1, .bh = 1, .bd = 1, \ + .isl_layout = &isl_format_layouts[__hw_fmt], \ __VA_ARGS__ \ } static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW, .num_channels = 1), fmt(VK_FORMAT_R4G4_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R4G4_USCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R4G4B4A4_UNORM, ISL_FORMAT_UNSUPPORTED), @@ -43,94 +43,94 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R5G6B5_USCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R5G5B5A1_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R5G5B5A1_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM, .bs = 1, .num_channels = 1,), - fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT, .bs = 1, .num_channels = 1), - fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT, .bs = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT, .num_channels = 1), fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT, .bs = 2, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT, .bs = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT, .num_channels = 2), fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8X8_UNORM, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT, .bs = 3, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT, .bs = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8X8_UNORM, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT, .num_channels = 3), fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, ISL_FORMAT_R10G10B10A2_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, ISL_FORMAT_R10G10B10A2_SNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, ISL_FORMAT_R10G10B10A2_USCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, ISL_FORMAT_R10G10B10A2_SSCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, ISL_FORMAT_R10G10B10A2_UINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, ISL_FORMAT_R10G10B10A2_SINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT, .bs = 2, .num_channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT, .bs = 4, .num_channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT, .bs = 6, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT, .bs = 8, .num_channels = 4), - fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT, .bs = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT, .bs = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT, .bs = 4, .num_channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT, .bs = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT, .bs = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT, .bs = 8, .num_channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT, .bs = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT, .bs = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT, .bs = 12, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT, .bs = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT, .bs = 16, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT, .bs = 16, .num_channels = 4,), - fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT, .bs = 8, .num_channels = 1), - fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT, .bs = 16, .num_channels = 2), - fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT, .bs = 24, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT, .bs = 32, .num_channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, ISL_FORMAT_R11G11B10_FLOAT, .bs = 4, .num_channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .bs = 4, .num_channels = 3), - - fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .bs = 2, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_D24_UNORM_X8, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), - fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .bs = 4, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .bs = 1, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .bs = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .bs = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .bs = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, ISL_FORMAT_R10G10B10A2_UNORM, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, ISL_FORMAT_R10G10B10A2_SNORM, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, ISL_FORMAT_R10G10B10A2_USCALED, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, ISL_FORMAT_R10G10B10A2_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, ISL_FORMAT_R10G10B10A2_UINT, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, ISL_FORMAT_R10G10B10A2_SINT, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, ISL_FORMAT_R11G11B10_FLOAT, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .num_channels = 3), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1, .depth_format = D16_UNORM), + fmt(VK_FORMAT_D24_UNORM_X8, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .num_channels = 1, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), fmt(VK_FORMAT_BC1_RGB_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_BC1_RGB_SRGB, ISL_FORMAT_UNSUPPORTED), @@ -148,16 +148,16 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_BC6H_SFLOAT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_BC7_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_BC7_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, ISL_FORMAT_ETC2_RGB8 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, ISL_FORMAT_ETC2_SRGB8 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, ISL_FORMAT_ETC2_RGB8_PTA , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, ISL_FORMAT_ETC2_SRGB8_PTA , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, ISL_FORMAT_ETC2_EAC_RGBA8 , .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, ISL_FORMAT_ETC2_EAC_SRGB8_A8, .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11_UNORM, ISL_FORMAT_EAC_R11 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11_SNORM, ISL_FORMAT_EAC_SIGNED_R11 , .bs = 8, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11G11_UNORM, ISL_FORMAT_EAC_RG11 , .bs = 16, .bw = 4, .bh = 4), - fmt(VK_FORMAT_EAC_R11G11_SNORM, ISL_FORMAT_EAC_SIGNED_RG11 , .bs = 16, .bw = 4, .bh = 4), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, ISL_FORMAT_ETC2_RGB8), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, ISL_FORMAT_ETC2_SRGB8), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, ISL_FORMAT_ETC2_RGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, ISL_FORMAT_ETC2_SRGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, ISL_FORMAT_ETC2_EAC_RGBA8), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, ISL_FORMAT_ETC2_EAC_SRGB8_A8), + fmt(VK_FORMAT_EAC_R11_UNORM, ISL_FORMAT_EAC_R11), + fmt(VK_FORMAT_EAC_R11_SNORM, ISL_FORMAT_EAC_SIGNED_R11), + fmt(VK_FORMAT_EAC_R11G11_UNORM, ISL_FORMAT_EAC_RG11), + fmt(VK_FORMAT_EAC_R11G11_SNORM, ISL_FORMAT_EAC_SIGNED_RG11), fmt(VK_FORMAT_ASTC_4x4_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_ASTC_4x4_SRGB, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_ASTC_5x4_UNORM, ISL_FORMAT_UNSUPPORTED), @@ -186,9 +186,9 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_ASTC_12x10_SRGB, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_ASTC_12x12_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_ASTC_12x12_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, ISL_FORMAT_B4G4R4A4_UNORM, .bs = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, ISL_FORMAT_B5G5R5A1_UNORM, .bs = 2, .num_channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, ISL_FORMAT_B5G6R5_UNORM, .bs = 2, .num_channels = 3), + fmt(VK_FORMAT_B4G4R4A4_UNORM, ISL_FORMAT_B4G4R4A4_UNORM, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, ISL_FORMAT_B5G5R5A1_UNORM, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, ISL_FORMAT_B5G6R5_UNORM, .num_channels = 3), fmt(VK_FORMAT_B5G6R5_USCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), @@ -197,19 +197,19 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM, .bs = 4, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM, .num_channels = 4), fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, ISL_FORMAT_B10G10R10A2_UNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, ISL_FORMAT_B10G10R10A2_SNORM, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, ISL_FORMAT_B10G10R10A2_USCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, ISL_FORMAT_B10G10R10A2_SSCALED, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, ISL_FORMAT_B10G10R10A2_UINT, .bs = 4, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, ISL_FORMAT_B10G10R10A2_SINT, .bs = 4, .num_channels = 4) + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, ISL_FORMAT_B10G10R10A2_UNORM, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, ISL_FORMAT_B10G10R10A2_SNORM, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, ISL_FORMAT_B10G10R10A2_USCALED, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, ISL_FORMAT_B10G10R10A2_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, ISL_FORMAT_B10G10R10A2_UINT, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, ISL_FORMAT_B10G10R10A2_SINT, .num_channels = 4) }; #undef fmt @@ -250,7 +250,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d if (physical_device->info->is_haswell) gen += 5; - if (format->surface_format == ISL_FORMAT_UNSUPPORTED) + if (format->surface_format== ISL_FORMAT_UNSUPPORTED) goto unsupported; uint32_t linear = 0, tiled = 0; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 247a75dd8f4..ecd34e3ab5b 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -152,8 +152,12 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const struct anv_tile_info *tile_info = &anv_tile_info_table[tile_mode]; - const uint32_t i = MAX(4, format->bw); /* FINISHME: Stop hardcoding subimage alignment */ - const uint32_t j = MAX(4, format->bh); /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t bs = format->isl_layout->bs; + const uint32_t bw = format->isl_layout->bw; + const uint32_t bh = format->isl_layout->bh; + + const uint32_t i = MAX(4, bw); /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t j = MAX(4, bh); /* FINISHME: Stop hardcoding subimage alignment */ assert(i == 4 || i == 8 || i == 16); assert(j == 4 || j == 8 || j == 16); @@ -187,8 +191,8 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, * Views >> Common Surface Formats >> Surface Layout >> 2D Surfaces >> * Surface Arrays >> For All Surface Other Than Separate Stencil Buffer: */ - assert(format->bh == 1 || format->bh == 4); - qpitch = (h0 + h1 + 11 * j) / format->bh; + assert(bh ==1 || bh == 4); + qpitch = (h0 + h1 + 11 * j) / bh; mt_width = MAX(w0, w1 + w2); mt_height = array_size * qpitch; } @@ -228,8 +232,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, */ assert(anv_is_aligned(qpitch, j)); - uint32_t stride = align_u32(mt_width * format->bs / format->bw, - tile_info->width); + uint32_t stride = align_u32(mt_width * bs / bw, tile_info->width); if (create_info->stride > 0) stride = create_info->stride; @@ -237,7 +240,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, * Views >> Common Surface Formats >> Surface Padding Requirements >> * Sampling Engine Surfaces >> Buffer Padding Requirements: */ - const uint32_t mem_rows = align_u32(mt_height / format->bh, 2 * format->bh); + const uint32_t mem_rows = align_u32(mt_height / bh, 2 * bh); const uint32_t size = stride * align_u32(mem_rows, tile_info->height); const uint32_t offset = align_u32(*inout_image_size, tile_info->surface_alignment); @@ -499,14 +502,16 @@ anv_validate_CreateImageView(VkDevice _device, assert(!image->format->has_stencil); assert(!view_format_info->depth_format); assert(!view_format_info->has_stencil); - assert(view_format_info->bs == image->format->bs); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); } else if (subresource->aspectMask & ds_flags) { assert((subresource->aspectMask & ~ds_flags) == 0); if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { assert(image->format->depth_format); assert(view_format_info->depth_format); - assert(view_format_info->bs == image->format->bs); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); } if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 4495499eda1..205338adc26 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1209,10 +1209,7 @@ struct anv_format { const VkFormat vk_format; const char *name; enum isl_format surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ - uint8_t bs; /**< Block size (in bytes) of anv_format::surface_format. */ - uint8_t bw; /**< Block width of anv_format::surface_format. */ - uint8_t bh; /**< Block height of anv_format::surface_format. */ - uint8_t bd; /**< Block depth of anv_format::surface_format. */ + const struct isl_format_layout *isl_layout; uint8_t num_channels; uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 1828b090335..d3d73b8ae2d 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -60,8 +60,10 @@ wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) return; /* Don't add formats which aren't supported by the driver */ - if (anv_format_for_vk_format(format)->bs == 0) + if (anv_format_for_vk_format(format)->surface_format == + ISL_FORMAT_UNSUPPORTED) { return; + } f = anv_vector_add(&display->formats); if (f) -- cgit v1.2.3 From c6493dff79bad0ea8d91869b1e1d31bc8c85570c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 12 Nov 2015 12:24:01 -0800 Subject: anv: Strip trailing space in anv_private.h --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 205338adc26..cf6f415141a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -808,7 +808,7 @@ struct anv_buffer { /* Set when bound */ struct anv_bo * bo; - VkDeviceSize offset; + VkDeviceSize offset; }; enum anv_cmd_dirty_bits { -- cgit v1.2.3 From 477383e9ac2fd43f8b2176c13da7116f5d20b959 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 10:12:18 -0800 Subject: anv: Strip trailing whitespace from anv_device.c --- src/vulkan/anv_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 1328516c48e..af27711d9a4 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -115,7 +115,7 @@ anv_physical_device_init(struct anv_physical_device *device, "non-llc gpu"); goto fail; } - + close(fd); brw_process_intel_debug_variable(); @@ -129,7 +129,7 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->shader_perf_log = compiler_perf_log; return VK_SUCCESS; - + fail: close(fd); return result; @@ -647,7 +647,7 @@ VkResult anv_CreateDevice( device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); if (device->fd == -1) goto fail_device; - + device->context_id = anv_gem_create_context(device); if (device->context_id == -1) goto fail_fd; @@ -1020,7 +1020,7 @@ VkResult anv_MapMemory( mem->map_size = size; *ppData = mem->map; - + return VK_SUCCESS; } -- cgit v1.2.3 From b1bb270590ae969b86a3f838d83e06466f345723 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 09:27:06 -0800 Subject: isl: Add struct isl_device The struct is incomplete (it contains only the gen). And it's nowhere used yet. It will be used later for surface layout calculations. --- src/vulkan/Makefile.am | 1 + src/vulkan/isl.c | 33 +++++++++++++++++++++++++++++++++ src/vulkan/isl.h | 14 ++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 src/vulkan/isl.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 9f8c5e34063..6e4de53b2d0 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -84,6 +84,7 @@ VULKAN_SOURCES = \ gen7_state.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ + isl.c \ isl_format_layout.c BUILT_SOURCES = \ diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c new file mode 100644 index 00000000000..b4de5653677 --- /dev/null +++ b/src/vulkan/isl.c @@ -0,0 +1,33 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "isl.h" + +void +isl_device_init(struct isl_device *dev, uint8_t gen10x) +{ + assert(gen10x % 5 == 0); + dev->gen = gen10x; +} diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index b170c215b35..7680fa9ef9a 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -28,6 +28,7 @@ #pragma once +#include #include #ifdef __cplusplus @@ -305,6 +306,16 @@ enum isl_txc { ISL_TXC_ETC2, }; +struct isl_device { + /** + * @brief Hardware generation, 10x. + * + * For example, gen is 70 for Ivybridge and Baytrail; gen is 75 for + * Haswell. + */ + uint8_t gen; +}; + struct isl_channel_layout { enum isl_base_type type; uint8_t bits; /**< Size in bits */ @@ -333,6 +344,9 @@ struct isl_format_layout { enum isl_txc txc; }; +void +isl_device_init(struct isl_device *dev, uint8_t gen10x); + extern const struct isl_format_layout isl_format_layouts[]; #ifdef __cplusplus -- cgit v1.2.3 From 652727b0295d40d181c6a6420e19bdf0db005784 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 09:29:31 -0800 Subject: isl: Add structs isl_extent2d, isl_extent3d They are nowhere used yet. --- src/vulkan/isl.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index 7680fa9ef9a..fde16d3bbf0 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -316,6 +316,17 @@ struct isl_device { uint8_t gen; }; +struct isl_extent2d { + uint32_t width; + uint32_t height; +}; + +struct isl_extent3d { + uint32_t width; + uint32_t height; + uint32_t depth; +}; + struct isl_channel_layout { enum isl_base_type type; uint8_t bits; /**< Size in bits */ -- cgit v1.2.3 From a4a2ea3f79153627b631a2b1a7f3c4ac38430817 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 09:45:55 -0800 Subject: isl: Add enum isl_tiling and a query func The query func is isl_tiling_get_extent. --- src/vulkan/isl.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/isl.h | 19 +++++++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'src') diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c index b4de5653677..ecfa96973dd 100644 --- a/src/vulkan/isl.c +++ b/src/vulkan/isl.c @@ -23,11 +23,69 @@ #include +#include "mesa/main/imports.h" + #include "isl.h" +/** + * Log base 2, rounding towards zero. + */ +static inline uint32_t +isl_log2u(uint32_t n) +{ + assert(n != 0); + return 31 - __builtin_clz(n); +} + void isl_device_init(struct isl_device *dev, uint8_t gen10x) { assert(gen10x % 5 == 0); dev->gen = gen10x; } + +/** + * The returned extent's units are (width=bytes, height=rows). + */ +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t cpp, + struct isl_extent2d *e) +{ + static const struct isl_extent2d legacy_extents[] = { + [ISL_TILING_LINEAR] = { 1, 1 }, + [ISL_TILING_X] = { 512, 8 }, + [ISL_TILING_Y] = { 128, 32 }, + [ISL_TILING_W] = { 128, 32 }, + }; + + static const struct isl_extent2d yf_extents[] = { + /*cpp*/ + /* 1*/ [0] = { 64, 64 }, + /* 2*/ [1] = { 128, 32 }, + /* 4*/ [2] = { 128, 32 }, + /* 8*/ [3] = { 256, 16 }, + /*16*/ [4] = { 256, 16 }, + }; + + assert(cpp > 0); + + switch (tiling) { + case ISL_TILING_LINEAR: + case ISL_TILING_X: + case ISL_TILING_Y: + case ISL_TILING_W: + *e = legacy_extents[tiling]; + return; + case ISL_TILING_Yf: + case ISL_TILING_Ys: + assert(_mesa_is_pow_two(cpp)); + *e = yf_extents[isl_log2u(cpp)]; + if (tiling == ISL_TILING_Ys) { + e->width *= 4; + e->height *= 4; + } + return; + } +} diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index fde16d3bbf0..4c38170fc72 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -35,6 +35,19 @@ extern "C" { #endif +/** + * WARNING: These values differ from the hardware enum values, which are + * unstable across hardware generations. + */ +enum isl_tiling { + ISL_TILING_LINEAR, + ISL_TILING_W, + ISL_TILING_X, + ISL_TILING_Y, /**< Legacy Y tiling */ + ISL_TILING_Yf, + ISL_TILING_Ys, +}; + /** * Hardware enumeration SURFACE_FORMAT. * @@ -358,6 +371,12 @@ struct isl_format_layout { void isl_device_init(struct isl_device *dev, uint8_t gen10x); +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t cpp, + struct isl_extent2d *e); + extern const struct isl_format_layout isl_format_layouts[]; #ifdef __cplusplus -- cgit v1.2.3 From af392916ff3856c8e606212914c1623674c49aff Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 10:12:51 -0800 Subject: anv/device: Embed isl_device Embed struct isl_device into anv_physical_device and anv_device. It will later be used for surface layout calculations. --- src/vulkan/anv_device.c | 8 ++++++++ src/vulkan/anv_private.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index af27711d9a4..5d53deeb599 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -56,6 +56,7 @@ anv_physical_device_init(struct anv_physical_device *device, { VkResult result; int fd; + uint32_t gen10x; fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) @@ -81,6 +82,10 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } + gen10x = 10 * device->info->gen; + if (device->info->is_haswell) + gen10x += 5; + if (device->info->gen == 7 && !device->info->is_haswell && !device->info->is_baytrail) { fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); @@ -128,6 +133,8 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; + isl_device_init(&device->isl_dev, gen10x); + return VK_SUCCESS; fail: @@ -672,6 +679,7 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); device->info = *physical_device->info; + device->isl_dev = physical_device->isl_dev; anv_queue_init(device, &device->queue); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cf6f415141a..8269c7e8509 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -428,6 +428,7 @@ struct anv_physical_device { const struct brw_device_info * info; uint64_t aperture_size; struct brw_compiler * compiler; + struct isl_device isl_dev; }; bool anv_is_scalar_shader_stage(const struct brw_compiler *compiler, @@ -485,6 +486,7 @@ struct anv_device { struct anv_instance * instance; uint32_t chipset_id; struct brw_device_info info; + struct isl_device isl_dev; int context_id; int fd; -- cgit v1.2.3 From ba467467f49577381be651643dd7ff184b998590 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 10:24:57 -0800 Subject: anv: Use enum isl_tiling everywhere In anv_surface and anv_image_create_info, replace member 'uint8_t tile_mode' with 'enum isl_tiling'. As a nice side-effect, this patch also reduces bug potential because the hardware enum values for tile modes are unstable across hardware generations. --- src/vulkan/anv_image.c | 51 +++++++++++++++++++++++--------------------- src/vulkan/anv_intel.c | 4 ++-- src/vulkan/anv_private.h | 6 +++--- src/vulkan/anv_wsi_wayland.c | 4 ++-- src/vulkan/anv_wsi_x11.c | 4 ++-- src/vulkan/gen7_state.c | 5 +++-- src/vulkan/gen8_state.c | 11 +++++++++- 7 files changed, 49 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index ecd34e3ab5b..e90257e8faa 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -80,9 +80,6 @@ static const struct anv_surf_type_limits { }; static const struct anv_tile_info { - uint32_t width; - uint32_t height; - /** * Alignment for RENDER_SURFACE_STATE.SurfaceBaseAddress. * @@ -96,17 +93,19 @@ static const struct anv_tile_info { */ uint32_t surface_alignment; } anv_tile_info_table[] = { - [LINEAR] = { 1, 1, 64 }, - [XMAJOR] = { 512, 8, 4096 }, - [YMAJOR] = { 128, 32, 4096 }, - [WMAJOR] = { 128, 32, 4096 }, + [ISL_TILING_LINEAR] = { 64 }, + [ISL_TILING_X] = { 4096 }, + [ISL_TILING_Y] = { 4096 }, + [ISL_TILING_Yf] = { 4096 }, + [ISL_TILING_Ys] = { 4096 }, + [ISL_TILING_W] = { 4096 }, }; -static uint8_t -anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) +static enum isl_tiling +anv_image_choose_tiling(const struct anv_image_create_info *anv_info) { - if (anv_info->force_tile_mode) - return anv_info->tile_mode; + if (anv_info->force_tiling) + return anv_info->tiling; /* The Sandybridge PRM says that the stencil buffer "is supported * only in Tile W memory". @@ -115,16 +114,16 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) switch (anv_info->vk_info->tiling) { case VK_IMAGE_TILING_LINEAR: assert(anv_info->vk_info->format != VK_FORMAT_S8_UINT); - return LINEAR; + return ISL_TILING_LINEAR; case VK_IMAGE_TILING_OPTIMAL: if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { - return WMAJOR; + return ISL_TILING_W; } else { - return YMAJOR; + return ISL_TILING_Y; } default: assert(!"bad VKImageTiling"); - return LINEAR; + return ISL_TILING_LINEAR; } } @@ -134,7 +133,8 @@ anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) * struct anv_image_create_info. */ static VkResult -anv_image_make_surface(const struct anv_image_create_info *create_info, +anv_image_make_surface(const struct anv_device *dev, + const struct anv_image_create_info *create_info, const struct anv_format *format, uint64_t *inout_image_size, uint32_t *inout_image_alignment, @@ -147,15 +147,18 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, const VkExtent3D *restrict extent = &create_info->vk_info->extent; const uint32_t levels = create_info->vk_info->mipLevels; const uint32_t array_size = create_info->vk_info->arraySize; - const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); + const enum isl_tiling tiling = anv_image_choose_tiling(create_info); const struct anv_tile_info *tile_info = - &anv_tile_info_table[tile_mode]; + &anv_tile_info_table[tiling]; const uint32_t bs = format->isl_layout->bs; const uint32_t bw = format->isl_layout->bw; const uint32_t bh = format->isl_layout->bh; + struct isl_extent2d tile_extent; + isl_tiling_get_extent(&dev->isl_dev, tiling, bs, &tile_extent); + const uint32_t i = MAX(4, bw); /* FINISHME: Stop hardcoding subimage alignment */ const uint32_t j = MAX(4, bh); /* FINISHME: Stop hardcoding subimage alignment */ assert(i == 4 || i == 8 || i == 16); @@ -232,7 +235,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, */ assert(anv_is_aligned(qpitch, j)); - uint32_t stride = align_u32(mt_width * bs / bw, tile_info->width); + uint32_t stride = align_u32(mt_width * bs / bw, tile_extent.width); if (create_info->stride > 0) stride = create_info->stride; @@ -241,7 +244,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, * Sampling Engine Surfaces >> Buffer Padding Requirements: */ const uint32_t mem_rows = align_u32(mt_height / bh, 2 * bh); - const uint32_t size = stride * align_u32(mem_rows, tile_info->height); + const uint32_t size = stride * align_u32(mem_rows, tile_extent.height); const uint32_t offset = align_u32(*inout_image_size, tile_info->surface_alignment); @@ -252,7 +255,7 @@ anv_image_make_surface(const struct anv_image_create_info *create_info, *out_surface = (struct anv_surface) { .offset = offset, .stride = stride, - .tile_mode = tile_mode, + .tiling = tiling, .qpitch = qpitch, .h_align = i, .v_align = j, @@ -337,14 +340,14 @@ anv_image_create(VkDevice _device, } if (likely(anv_format_is_color(image->format))) { - r = anv_image_make_surface(create_info, image->format, + r = anv_image_make_surface(device, create_info, image->format, &image->size, &image->alignment, &image->color_surface); if (r != VK_SUCCESS) goto fail; } else { if (image->format->depth_format) { - r = anv_image_make_surface(create_info, image->format, + r = anv_image_make_surface(device, create_info, image->format, &image->size, &image->alignment, &image->depth_surface); if (r != VK_SUCCESS) @@ -352,7 +355,7 @@ anv_image_create(VkDevice _device, } if (image->format->has_stencil) { - r = anv_image_make_surface(create_info, anv_format_s8_uint, + r = anv_image_make_surface(device, create_info, anv_format_s8_uint, &image->size, &image->alignment, &image->stencil_surface); if (r != VK_SUCCESS) diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c index f64e2dcb1e4..48ac183ad79 100644 --- a/src/vulkan/anv_intel.c +++ b/src/vulkan/anv_intel.c @@ -68,8 +68,8 @@ VkResult anv_CreateDmaBufImageINTEL( anv_image_create(_device, &(struct anv_image_create_info) { - .force_tile_mode = true, - .tile_mode = XMAJOR, + .force_tiling = true, + .tiling = ISL_TILING_X, .stride = pCreateInfo->strideInBytes, .vk_info = &(VkImageCreateInfo) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8269c7e8509..a6728392130 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1272,7 +1272,7 @@ struct anv_surface { uint8_t v_align; /**< RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ /** \} */ - uint8_t tile_mode; /**< RENDER_SURFACE_STATE.TileMode */ + enum isl_tiling tiling; }; struct anv_image { @@ -1333,8 +1333,8 @@ struct anv_image_view { struct anv_image_create_info { const VkImageCreateInfo *vk_info; - bool force_tile_mode; - uint8_t tile_mode; + bool force_tiling; + enum isl_tiling tiling; uint32_t stride; }; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index d3d73b8ae2d..f32fda20c09 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -547,8 +547,8 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) VkImage vk_image; result = anv_image_create(vk_device, &(struct anv_image_create_info) { - .force_tile_mode = true, - .tile_mode = XMAJOR, + .force_tiling = true, + .tiling = ISL_TILING_X, .stride = 0, .vk_info = &(VkImageCreateInfo) { diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 38a5e15d1a6..9a5e41d025a 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -299,8 +299,8 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, anv_image_create(anv_device_to_handle(device), &(struct anv_image_create_info) { - .force_tile_mode = true, - .tile_mode = XMAJOR, + .force_tiling = true, + .tiling = ISL_TILING_X, .stride = 0, .vk_info = &(VkImageCreateInfo) { diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 6f1cb8553e9..a53b6e3faf3 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -219,8 +219,9 @@ gen7_image_view_init(struct anv_image_view *iview, /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if * Tiled Surface is False." */ - .TiledSurface = surface->tile_mode > LINEAR, - .TileWalk = surface->tile_mode == YMAJOR ? TILEWALK_YMAJOR : TILEWALK_XMAJOR, + .TiledSurface = surface->tiling != ISL_TILING_LINEAR, + .TileWalk = surface->tiling == ISL_TILING_Y ? + TILEWALK_YMAJOR : TILEWALK_XMAJOR, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index ce69377fd77..22fe21db1a8 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -162,13 +162,22 @@ gen8_image_view_init(struct anv_image_view *iview, [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA }; + static const uint8_t isl_to_gen_tiling[] = { + [ISL_TILING_LINEAR] = LINEAR, + [ISL_TILING_X] = XMAJOR, + [ISL_TILING_Y] = YMAJOR, + [ISL_TILING_Yf] = YMAJOR, + [ISL_TILING_Ys] = YMAJOR, + [ISL_TILING_W] = WMAJOR, + }; + struct GEN8_RENDER_SURFACE_STATE surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = anv_valign[surface->v_align], .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = surface->tile_mode, + .TileMode = isl_to_gen_tiling[surface->tiling], .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, -- cgit v1.2.3 From 738eaa8acff2bc4a351a62303ac43d1195160ec4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 11:12:46 -0800 Subject: isl: Embed brw_device_info in isl_device Suggested-by: Jason Ekstrand --- src/vulkan/anv_device.c | 7 +------ src/vulkan/isl.c | 6 +++--- src/vulkan/isl.h | 13 +++++-------- 3 files changed, 9 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 5d53deeb599..fb608592c1f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -56,7 +56,6 @@ anv_physical_device_init(struct anv_physical_device *device, { VkResult result; int fd; - uint32_t gen10x; fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) @@ -82,10 +81,6 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } - gen10x = 10 * device->info->gen; - if (device->info->is_haswell) - gen10x += 5; - if (device->info->gen == 7 && !device->info->is_haswell && !device->info->is_baytrail) { fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); @@ -133,7 +128,7 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; - isl_device_init(&device->isl_dev, gen10x); + isl_device_init(&device->isl_dev, device->info); return VK_SUCCESS; diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c index ecfa96973dd..ebfd2bfb104 100644 --- a/src/vulkan/isl.c +++ b/src/vulkan/isl.c @@ -38,10 +38,10 @@ isl_log2u(uint32_t n) } void -isl_device_init(struct isl_device *dev, uint8_t gen10x) +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info) { - assert(gen10x % 5 == 0); - dev->gen = gen10x; + dev->info = info; } /** diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index 4c38170fc72..fb0b6f4a75c 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -35,6 +35,8 @@ extern "C" { #endif +struct brw_device_info; + /** * WARNING: These values differ from the hardware enum values, which are * unstable across hardware generations. @@ -320,13 +322,7 @@ enum isl_txc { }; struct isl_device { - /** - * @brief Hardware generation, 10x. - * - * For example, gen is 70 for Ivybridge and Baytrail; gen is 75 for - * Haswell. - */ - uint8_t gen; + const struct brw_device_info *info; }; struct isl_extent2d { @@ -369,7 +365,8 @@ struct isl_format_layout { }; void -isl_device_init(struct isl_device *dev, uint8_t gen10x); +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info); void isl_tiling_get_extent(const struct isl_device *dev, -- cgit v1.2.3 From d5ba7a26d9487e382ba96183bcf1154b99a40779 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 15:48:48 -0800 Subject: glsl/types: Add a get_image_instance helper --- src/glsl/nir/glsl_types.cpp | 87 +++++++++++++++++++++++++++++++++++++++++++++ src/glsl/nir/glsl_types.h | 2 ++ 2 files changed, 89 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp index 309f9dca61e..80ab3591462 100644 --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@ -712,6 +712,93 @@ glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, unreachable("switch statement above should be complete"); } +const glsl_type * +glsl_type::get_image_instance(enum glsl_sampler_dim dim, + bool array, glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? image1DArray_type : image1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? image2DArray_type : image2D_type); + case GLSL_SAMPLER_DIM_3D: + return image3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? imageCubeArray_type : imageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + else + return image2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + else + return imageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? image2DMSArray_type : image2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_INT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? iimage1DArray_type : iimage1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? iimage2DArray_type : iimage2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return iimage3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? iimageCubeArray_type : iimageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return iimage2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return iimageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? iimage2DMSArray_type : iimage2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? uimage1DArray_type : uimage1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? uimage2DArray_type : uimage2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return uimage3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? uimageCubeArray_type : uimageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return uimage2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return uimageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? uimage2DMSArray_type : uimage2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); +} + const glsl_type * glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) { diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h index a8eade5f9e1..a85a9e6a6a8 100644 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@ -244,6 +244,8 @@ struct glsl_type { bool array, glsl_base_type type); + static const glsl_type *get_image_instance(enum glsl_sampler_dim dim, + bool array, glsl_base_type type); /** * Get the instance of an array type -- cgit v1.2.3 From 0572444a0ec0da1309247c732a0212b3944d8f2b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 15:49:13 -0800 Subject: nir/types: Add image type helpers --- src/glsl/nir/nir_types.cpp | 19 ++++++++++++++++--- src/glsl/nir/nir_types.h | 4 ++++ 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 4a1250e546c..27db5793a60 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -133,14 +133,14 @@ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type) { - assert(glsl_type_is_sampler(type)); + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); return (glsl_sampler_dim)type->sampler_dimensionality; } glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type) { - assert(glsl_type_is_sampler(type)); + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); return (glsl_base_type)type->sampler_type; } @@ -199,6 +199,12 @@ glsl_type_is_sampler(const struct glsl_type *type) return type->is_sampler(); } +bool +glsl_type_is_image(const struct glsl_type *type) +{ + return type->is_image(); +} + bool glsl_sampler_type_is_shadow(const struct glsl_type *type) { @@ -209,7 +215,7 @@ glsl_sampler_type_is_shadow(const struct glsl_type *type) bool glsl_sampler_type_is_array(const struct glsl_type *type) { - assert(glsl_type_is_sampler(type)); + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); return type->sampler_array; } @@ -289,6 +295,13 @@ glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type); } +const struct glsl_type * +glsl_image_type(enum glsl_sampler_dim dim, bool is_array, + enum glsl_base_type base_type) +{ + return glsl_type::get_image_instance(dim, is_array, base_type); +} + const glsl_type * glsl_function_type(const glsl_type *return_type, const glsl_function_param *params, unsigned num_params) diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index a61af6cba75..9cc71e899d7 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -84,6 +84,7 @@ bool glsl_type_is_matrix(const struct glsl_type *type); bool glsl_type_is_array(const struct glsl_type *type); bool glsl_type_is_struct(const struct glsl_type *type); bool glsl_type_is_sampler(const struct glsl_type *type); +bool glsl_type_is_image(const struct glsl_type *type); bool glsl_sampler_type_is_shadow(const struct glsl_type *type); bool glsl_sampler_type_is_array(const struct glsl_type *type); @@ -106,6 +107,9 @@ const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, enum glsl_base_type base_type); +const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim, + bool is_array, + enum glsl_base_type base_type); const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); -- cgit v1.2.3 From 453239f6a5f76df3c09496ca752928d2360fd78e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 15:52:52 -0800 Subject: nir/spirv: Add support for image types --- src/glsl/nir/spirv_to_nir.c | 75 ++++++++++++++++++++++++++++++++++--- src/glsl/nir/spirv_to_nir_private.h | 3 ++ 2 files changed, 72 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3f89bb2e779..6cfb784005e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -409,6 +409,56 @@ type_decoration_cb(struct vtn_builder *b, } } +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + static void vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -542,12 +592,25 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, bool is_shadow = w[4]; bool is_array = w[5]; - - assert(w[6] == 0 && "FIXME: Handl multi-sampled textures"); - assert(w[7] == 1 && "FIXME: Add support for non-sampled images"); - - val->type->type = glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + assert(!multisampled && "FIXME: Handl multi-sampled textures"); + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } break; } diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 45111f816bc..8cbf76e1ddd 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -102,6 +102,9 @@ struct vtn_type { */ bool builtin_block; + /* Image format for image_load_store type images */ + unsigned image_format; + /* for arrays and matrices, the array stride */ unsigned stride; -- cgit v1.2.3 From ffbc31d13b63363a57eeab1a74b36ba95a765dba Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 15:53:08 -0800 Subject: nir/spirv: Add support for creating image variables --- src/glsl/nir/spirv_to_nir.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 6cfb784005e..49f7c7a7602 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1266,6 +1266,7 @@ static bool variable_is_external_block(nir_variable *var) { return var->interface_type && + glsl_type_is_struct(var->interface_type) && (var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage); } @@ -1393,6 +1394,14 @@ vtn_type_block_size(struct vtn_type *type) } } +static bool +is_interface_type(struct vtn_type *type) +{ + return type->block || type->buffer_block || + glsl_type_is_sampler(type->type) || + glsl_type_is_image(type->type); +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1409,11 +1418,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->name = ralloc_strdup(var, val->name); struct vtn_type *interface_type; - if (type->block || type->buffer_block) { + if (is_interface_type(type)) { interface_type = type; } else if (glsl_type_is_array(type->type) && - (type->array_element->block || - type->array_element->buffer_block)) { + is_interface_type(type->array_element)) { interface_type = type->array_element; } else { interface_type = NULL; @@ -1432,8 +1440,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, /* UBO's and samplers */ var->data.mode = nir_var_uniform; var->data.read_only = true; - if (interface_type) - b->shader->info.num_ubos++; + if (interface_type) { + if (glsl_type_is_image(interface_type->type)) { + b->shader->info.num_images++; + var->data.image.format = interface_type->image_format; + } else if (glsl_type_is_sampler(interface_type->type)) { + b->shader->info.num_textures++; + } else { + assert(glsl_type_is_struct(interface_type->type)); + b->shader->info.num_ubos++; + } + } } break; case SpvStorageClassPushConstant: @@ -1498,9 +1515,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } } - /* Interface variables aren't actually going to be referenced by the - * generated NIR, so we don't put them in the list */ - if (interface_type) + /* Interface block variables aren't actually going to be referenced + * by the generated NIR, so we don't put them in the list + */ + if (interface_type && glsl_type_is_struct(interface_type->type)) break; if (var->data.mode == nir_var_local) { @@ -1648,7 +1666,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_type *src_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - if (glsl_get_base_type(src_type->type) == GLSL_TYPE_SAMPLER) { + if (src->var->interface_type && + (glsl_type_is_sampler(src->var->interface_type) || + glsl_type_is_image(src->var->interface_type))) { vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; return; } -- cgit v1.2.3 From 164b3ca164fc5063013c16d07ddff0c705f5f49f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 16:25:24 -0800 Subject: nir/builder: Add a nir_ssa_undef helper --- src/glsl/nir/nir_builder.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index bf45d0373c0..205aa067b0b 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -58,6 +58,20 @@ nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) nir_cf_node_insert(build->cursor, cf); } +static inline nir_ssa_def * +nir_ssa_undef(nir_builder *build, unsigned num_components) +{ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(build->shader, num_components); + if (!undef) + return NULL; + + nir_instr_insert(nir_before_block(nir_start_block(build->impl)), + &undef->instr); + + return &undef->def; +} + static inline nir_ssa_def * nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) { -- cgit v1.2.3 From 99494b96f07f55f264aa9a33990cc2eff83ba8d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 17:26:22 -0800 Subject: nir/spirv: Add support for image_load_store --- src/glsl/nir/spirv_to_nir.c | 187 +++++++++++++++++++++++++++++++++++- src/glsl/nir/spirv_to_nir_private.h | 8 ++ 2 files changed, 193 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 49f7c7a7602..db5823c9da1 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1689,7 +1689,6 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpCopyMemorySized: case SpvOpArrayLength: - case SpvOpImageTexelPointer: default: unreachable("Unhandled opcode"); } @@ -1911,6 +1910,163 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageRead: + image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + break; + + case SpvOpImageWrite: + image.deref = vtn_value(b, w[1], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); + intrin->src[0] = nir_src_for_ssa(image.coord); + intrin->src[1] = nir_src_for_ssa(image.sample); + + switch (opcode) { + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + glsl_get_vector_elements(type->type), NULL); + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = &intrin->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} static nir_alu_instr * create_vec(void *mem_ctx, unsigned num_components) @@ -3076,7 +3232,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpAccessChain: case SpvOpInBoundsAccessChain: case SpvOpArrayLength: - case SpvOpImageTexelPointer: vtn_handle_variables(b, opcode, w, count); break; @@ -3103,6 +3258,34 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_texture(b, opcode, w, count); break; + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(!"Atomic buffers not yet implemented"); + } + } + case SpvOpSNegate: case SpvOpFNegate: case SpvOpNot: diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 8cbf76e1ddd..f7be166da16 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -45,6 +45,7 @@ enum vtn_value_type { vtn_value_type_block, vtn_value_type_ssa, vtn_value_type_extension, + vtn_value_type_image_pointer, }; struct vtn_block { @@ -120,6 +121,12 @@ struct vtn_type { SpvBuiltIn builtin; }; +struct vtn_image_pointer { + nir_deref_var *deref; + nir_ssa_def *coord; + nir_ssa_def *sample; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -136,6 +143,7 @@ struct vtn_value { nir_deref_var *deref; struct vtn_type *deref_type; }; + struct vtn_image_pointer *image; struct vtn_function *func; struct vtn_block *block; struct vtn_ssa_value *ssa; -- cgit v1.2.3 From c68e28d7663e8cf94c3587d3043b958386be6fec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 21:31:58 -0800 Subject: nir/spirv: Refactor vtn_block_load We pull the offset calculations out into their own function so we can re-use it for stores. --- src/glsl/nir/spirv_to_nir.c | 54 ++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index db5823c9da1..3f4b1202c8f 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1160,51 +1160,48 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, return val; } -static struct vtn_ssa_value * -vtn_block_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *type, nir_deref *src_tail) +static void +vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type **type, nir_deref *src_tail, + nir_ssa_def **index, + unsigned *offset, nir_ssa_def **indirect) { - unsigned set = src->var->data.descriptor_set; - unsigned binding = src->var->data.binding; - nir_variable_mode mode = src->var->data.mode; - nir_deref *deref = &src->deref; - nir_ssa_def *index; if (deref->child->deref_type == nir_deref_type_array) { deref = deref->child; - type = type->array_element; + *type = (*type)->array_element; nir_deref_array *deref_array = nir_deref_as_array(deref); - index = nir_imm_int(&b->nb, deref_array->base_offset); + *index = nir_imm_int(&b->nb, deref_array->base_offset); if (deref_array->deref_array_type == nir_deref_array_type_indirect) - index = nir_iadd(&b->nb, index, deref_array->indirect.ssa); + *index = nir_iadd(&b->nb, *index, deref_array->indirect.ssa); } else { - index = nir_imm_int(&b->nb, 0); + *index = nir_imm_int(&b->nb, 0); } - unsigned offset = 0; - nir_ssa_def *indirect = NULL; + *offset = 0; + *indirect = NULL; while (deref != src_tail) { deref = deref->child; switch (deref->deref_type) { case nir_deref_type_array: { nir_deref_array *deref_array = nir_deref_as_array(deref); if (deref_array->deref_array_type == nir_deref_array_type_direct) { - offset += type->stride * deref_array->base_offset; + *offset += (*type)->stride * deref_array->base_offset; } else { - nir_ssa_def *offset = nir_imul(&b->nb, deref_array->indirect.ssa, - nir_imm_int(&b->nb, type->stride)); - indirect = indirect ? nir_iadd(&b->nb, indirect, offset) : offset; + nir_ssa_def *off = nir_imul(&b->nb, deref_array->indirect.ssa, + nir_imm_int(&b->nb, (*type)->stride)); + *indirect = *indirect ? nir_iadd(&b->nb, *indirect, off) : off; } - type = type->array_element; + *type = (*type)->array_element; break; } case nir_deref_type_struct: { nir_deref_struct *deref_struct = nir_deref_as_struct(deref); - offset += type->offsets[deref_struct->index]; - type = type->members[deref_struct->index]; + *offset += (*type)->offsets[deref_struct->index]; + *type = (*type)->members[deref_struct->index]; break; } @@ -1212,6 +1209,16 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, unreachable("unknown deref type"); } } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + nir_ssa_def *index; + unsigned offset; + nir_ssa_def *indirect; + vtn_block_get_offset(b, src, &type, src_tail, &index, &offset, &indirect); nir_intrinsic_op op; if (src->var->data.mode == nir_var_uniform) { @@ -1235,8 +1242,9 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, : nir_intrinsic_load_ssbo; } - return _vtn_block_load(b, op, set, binding, mode, index, - offset, indirect, type); + return _vtn_block_load(b, op, src->var->data.descriptor_set, + src->var->data.binding, src->var->data.mode, + index, offset, indirect, type); } /* -- cgit v1.2.3 From c1733886a6ba28d781da14fa2c6bc4ddf5ef675d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 21:33:16 -0800 Subject: nir/spirv: Add support for SSBO stores This only handles vector stores, not component-of-a-vector stores. --- src/glsl/nir/spirv_to_nir.c | 88 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 3f4b1202c8f..45964e65b7e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1305,6 +1305,65 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, return val; } +static void +_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, + struct vtn_ssa_value *src, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *index, unsigned offset, + nir_ssa_def *indirect, struct vtn_type *type) +{ + assert(src->type == type->type); + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); + store->num_components = glsl_get_vector_elements(type->type); + store->const_index[0] = offset; + store->const_index[1] = (1 << store->num_components) - 1; + store->src[0] = nir_src_for_ssa(src->def); + + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + store->src[1] = nir_src_for_ssa(res_index); + + if (op == nir_intrinsic_store_ssbo_indirect) + store->src[2] = nir_src_for_ssa(indirect); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else { + unsigned elems = glsl_get_length(type->type); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + _vtn_block_store(b, op, src->elems[i], set, binding, mode, + index, offset + type->offsets[i], indirect, + type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + _vtn_block_store(b, op, src->elems[i], set, binding, mode, + index, offset + i * type->stride, indirect, + type->array_element); + } + } + } +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *type, + nir_deref *dest_tail) +{ + nir_ssa_def *index; + unsigned offset; + nir_ssa_def *indirect; + vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset, &indirect); + + nir_intrinsic_op op = indirect ? nir_intrinsic_store_ssbo_indirect + : nir_intrinsic_store_ssbo; + + return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, + dest->var->data.binding, dest->var->data.mode, + index, offset, indirect, type); +} + static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, unsigned index); @@ -1318,19 +1377,24 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref_var *dest, struct vtn_type *dest_type) { nir_deref *dest_tail = get_deref_tail(dest); - if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); - nir_deref_array *deref = nir_deref_as_array(dest_tail->child); - assert(deref->deref.child == NULL); - if (deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_insert(b, val->def, src->def, - deref->base_offset); - else - val->def = vtn_vector_insert_dynamic(b, val->def, src->def, - deref->indirect.ssa); - _vtn_variable_store(b, dest, dest_tail, val); + if (variable_is_external_block(dest->var)) { + assert(dest->var->data.mode == nir_var_shader_storage); + vtn_block_store(b, src, dest, dest_type, dest_tail); } else { - _vtn_variable_store(b, dest, dest_tail, src); + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } } } -- cgit v1.2.3 From 91bc4e7cec4098000f3d9a265e7db7907a686e5a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 21:49:39 -0800 Subject: anv/pipeline: Don't free blend states that don't exist Compute pipelines don't need a blend state so we shouldn't be unconditionally freeing it. --- src/vulkan/anv_pipeline.c | 3 ++- src/vulkan/gen8_pipeline.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 124140fa224..a9cf16f79c2 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -244,7 +244,8 @@ void anv_DestroyPipeline( anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); anv_state_stream_finish(&pipeline->program_stream); - anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_device_free(pipeline->device, pipeline); } diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index a51cf4924ae..e98045248a4 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -598,6 +598,8 @@ VkResult gen8_compute_pipeline_create( pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + pipeline->blend_state.map = NULL; + result = anv_reloc_list_init(&pipeline->batch_relocs, device); if (result != VK_SUCCESS) { anv_device_free(device, pipeline); -- cgit v1.2.3 From e8f51fe4deb5082fece5f8cb167b89b0f03eb244 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 13 Nov 2015 22:50:52 -0800 Subject: anv/gen8: Subtract 1 from num_elements when setting up buffer surface state --- src/vulkan/gen8_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 22fe21db1a8..6eb65e6ec4e 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -45,9 +45,9 @@ gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, .MemoryObjectControlState = GEN8_MOCS, - .Height = (num_elements >> 7) & 0x3fff, - .Width = num_elements & 0x7f, - .Depth = (num_elements >> 21) & 0x3f, + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, .SurfacePitch = stride - 1, .NumberofMultisamples = MULTISAMPLECOUNT_1, .ShaderChannelSelectRed = SCS_RED, -- cgit v1.2.3 From b169bb902a5e8dfbae9c49faf599f7771dbbf10a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 2 Nov 2015 17:58:29 -0800 Subject: nir: Separate texture from sampler in nir_tex_instr This commit adds the capability to NIR to support separate textures and samplers. As it currently stands, glsl_to_nir only sets the sampler and leaves the texture alone as it did before and nir_lower_samplers assumes this. However, backends can, if they wish, assume that they are separate because nir_lower_samplers sets both texture and sampler index (they are the same in this case). --- src/glsl/nir/nir.c | 8 +++++++- src/glsl/nir/nir.h | 28 ++++++++++++++++++++++++---- src/glsl/nir/nir_instr_set.c | 13 ++++++++----- src/glsl/nir/nir_lower_samplers.c | 15 +++++++++++++-- src/glsl/nir/nir_print.c | 14 +++++++++++--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 +- src/mesa/program/prog_to_nir.c | 1 + 8 files changed, 66 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index bb7a5fa5835..3157ff82d99 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -488,8 +488,10 @@ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); + instr->texture_index = 0; + instr->texture_array_size = 0; + instr->texture = NULL; instr->sampler_index = 0; - instr->sampler_array_size = 0; instr->sampler = NULL; return instr; @@ -1007,6 +1009,10 @@ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) if (!visit_src(&instr->src[i].src, cb, state)) return false; + if (instr->texture != NULL) + if (!visit_deref_src(instr->texture, cb, state)) + return false; + if (instr->sampler != NULL) if (!visit_deref_src(instr->sampler, cb, state)) return false; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index beabcafef4e..ca65e0566f5 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -942,6 +942,7 @@ typedef enum { nir_tex_src_ms_index, /* MSAA sample index */ nir_tex_src_ddx, nir_tex_src_ddy, + nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ nir_num_tex_src_types } nir_tex_src_type; @@ -989,6 +990,24 @@ typedef struct { /* gather component selector */ unsigned component : 2; + /** The texture index + * + * If this texture instruction has a nir_tex_src_texture_offset source, + * then the texture index is given by texture_index + texture_offset. + */ + unsigned texture_index; + + /** The size of the texture array or 0 if it's not an array */ + unsigned texture_array_size; + + /** The texture deref + * + * If both this and `sampler` are both NULL, use texture_index instead. + * If `texture` is NULL, but `sampler` is non-NULL, then the texture is + * implied from the sampler. + */ + nir_deref_var *texture; + /** The sampler index * * If this texture instruction has a nir_tex_src_sampler_offset source, @@ -996,10 +1015,11 @@ typedef struct { */ unsigned sampler_index; - /** The size of the sampler array or 0 if it's not an array */ - unsigned sampler_array_size; - - nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ + /** The sampler deref + * + * If this is null, use sampler_index instead. + */ + nir_deref_var *sampler; } nir_tex_instr; static inline unsigned diff --git a/src/glsl/nir/nir_instr_set.c b/src/glsl/nir/nir_instr_set.c index d3f939fe805..eb021326097 100644 --- a/src/glsl/nir/nir_instr_set.c +++ b/src/glsl/nir/nir_instr_set.c @@ -155,8 +155,9 @@ hash_tex(uint32_t hash, const nir_tex_instr *instr) hash = HASH(hash, instr->const_offset); unsigned component = instr->component; hash = HASH(hash, component); + hash = HASH(hash, instr->texture_index); + hash = HASH(hash, instr->texture_array_size); hash = HASH(hash, instr->sampler_index); - hash = HASH(hash, instr->sampler_array_size); assert(!instr->sampler); @@ -305,13 +306,15 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) memcmp(tex1->const_offset, tex2->const_offset, sizeof(tex1->const_offset)) != 0 || tex1->component != tex2->component || - tex1->sampler_index != tex2->sampler_index || - tex1->sampler_array_size != tex2->sampler_array_size) { + tex1->texture_index != tex2->texture_index || + tex1->texture_array_size != tex2->texture_array_size || + tex1->sampler_index != tex2->sampler_index) { return false; } /* Don't support un-lowered sampler derefs currently. */ - assert(!tex1->sampler && !tex2->sampler); + assert(!tex1->texture && !tex1->sampler && + !tex2->texture && !tex2->sampler); return true; } @@ -422,7 +425,7 @@ instr_can_rewrite(nir_instr *instr) nir_tex_instr *tex = nir_instr_as_tex(instr); /* Don't support un-lowered sampler derefs currently. */ - if (tex->sampler) + if (tex->texture || tex->sampler) return false; return true; diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c index 5df79a69a06..19deafab37a 100644 --- a/src/glsl/nir/nir_lower_samplers.c +++ b/src/glsl/nir/nir_lower_samplers.c @@ -95,6 +95,9 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr if (instr->sampler == NULL) return; + /* GLSL only has combined textures/samplers */ + assert(instr->texture == NULL); + instr->sampler_index = 0; unsigned location = instr->sampler->var->data.location; unsigned array_elements = 1; @@ -107,7 +110,7 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr if (indirect) { /* First, we have to resize the array of texture sources */ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); + instr->num_srcs + 2); for (unsigned i = 0; i < instr->num_srcs; i++) { new_srcs[i].src_type = instr->src[i].src_type; @@ -121,13 +124,19 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr /* Now we can go ahead and move the source over to being a * first-class texture source. */ + instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + nir_src_for_ssa(indirect)); + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; instr->num_srcs++; nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, nir_src_for_ssa(indirect)); - instr->sampler_array_size = array_elements; + instr->texture_array_size = array_elements; } if (location > shader_program->NumUniformStorage - 1 || @@ -140,6 +149,8 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr shader_program->UniformStorage[location].opaque[stage].index; instr->sampler = NULL; + + instr->texture_index = instr->sampler_index; } typedef struct { diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index f7f5fdf3181..2db209d434d 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -551,6 +551,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_ddy: fprintf(fp, "(ddy)"); break; + case nir_tex_src_texture_offset: + fprintf(fp, "(texture_offset)"); + break; case nir_tex_src_sampler_offset: fprintf(fp, "(sampler_offset)"); break; @@ -581,13 +584,18 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) fprintf(fp, "%u (gather_component), ", instr->component); } + if (instr->texture) { + assert(instr->sampler); + fprintf(fp, " (texture)"); + } if (instr->sampler) { print_deref(instr->sampler, state); + fprintf(fp, " (sampler)"); } else { - fprintf(fp, "%u", instr->sampler_index); + assert(instr->texture == NULL); + fprintf(fp, "%u (texture) %u (sampler)", + instr->texture_index, instr->sampler_index); } - - fprintf(fp, " (sampler)"); } static void diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c282f835cae..19084fe5bcd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2592,7 +2592,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) case nir_tex_src_sampler_offset: { /* Figure out the highest possible sampler index and mark it as used */ - uint32_t max_used = sampler + instr->sampler_array_size - 1; + uint32_t max_used = sampler + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4 && devinfo->gen < 8) { max_used += stage_prog_data->binding_table.gather_texture_start; } else { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 27933d7d61c..e86eb1403fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1673,7 +1673,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) * the last element of the array. Mark it here, because the generator * doesn't have enough information to determine the bound. */ - uint32_t array_size = instr->sampler_array_size; + uint32_t array_size = instr->texture_array_size; uint32_t max_used = sampler + array_size - 1; if (instr->op == nir_texop_tg4) { max_used += prog_data->base.binding_table.gather_texture_start; diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 539e3c05312..d5386ee70e8 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -609,6 +609,7 @@ ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src, instr->op = op; instr->dest_type = nir_type_float; instr->is_shadow = prog_inst->TexShadow; + instr->texture_index = prog_inst->TexSrcUnit; instr->sampler_index = prog_inst->TexSrcUnit; switch (prog_inst->TexSrcTarget) { -- cgit v1.2.3 From c2a373ec85a628bdc6f372c573c5065f285f3731 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 2 Nov 2015 15:24:05 -0800 Subject: i965/fs: Separate the sampler from the surface in generate_tex --- src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f40e58b8ca0..372b85ef359 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -462,6 +462,7 @@ private: void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index); void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, struct brw_reg src, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 139cda3ca59..656c50969c4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -679,6 +679,7 @@ fs_generator::generate_get_buffer_size(fs_inst *inst, void fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { int msg_type = -1; @@ -915,14 +916,16 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src ? prog_data->binding_table.gather_texture_start : prog_data->binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, rlen, @@ -931,19 +934,24 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src simd_mode, return_format); - brw_mark_surface_used(prog_data, sampler + base_binding_table_index); + brw_mark_surface_used(prog_data, surface + base_binding_table_index); } else { /* Non-const sampler index */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@ -2052,7 +2060,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1]); + generate_tex(inst, dst, src[0], src[1], src[1]); break; case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: -- cgit v1.2.3 From c09e140b650f26d153c5310f987ce0dcb51c04a8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 2 Nov 2015 16:04:29 -0800 Subject: i965/fs: Plumb separate surfaces and samplers through from NIR --- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 47 +++++++++++++++---------- src/mesa/drivers/dri/i965/brw_fs.h | 4 ++- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 25 +++++++++---- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 16 +++++---- 6 files changed, 60 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 5308d175416..7fa4ce87f18 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -85,7 +85,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, unsigned msg_length) { fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), - fs_reg(0u)); + fs_reg(0u), fs_reg(0u)); inst->base_mrf = base_mrf; inst->mlen = msg_length; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 80b8c8e1207..09cb4c3620f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -734,15 +734,15 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_LOD_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: - assert(src[8].file == IMM && src[9].file == IMM); + assert(src[9].file == IMM && src[10].file == IMM); /* Texture coordinates. */ if (i == 0) - return src[8].ud; + return src[9].ud; /* Texture derivatives. */ else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL) - return src[9].ud; + return src[10].ud; /* Texture offset. */ - else if (i == 7) + else if (i == 8) return 2; /* MCS */ else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL) @@ -3618,6 +3618,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &coordinate, const fs_reg &shadow_c, const fs_reg &lod, const fs_reg &lod2, + const fs_reg &surface, const fs_reg &sampler, unsigned coord_components, unsigned grad_components) @@ -3710,8 +3711,9 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, inst->opcode = op; inst->src[0] = reg_undef; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = msg_begin.nr; inst->mlen = msg_end.nr - msg_begin.nr; inst->header_size = 1; @@ -3723,6 +3725,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, const fs_reg &sample_index, + const fs_reg &surface, const fs_reg &sampler, const fs_reg &offset_value, unsigned coord_components, @@ -3805,8 +3808,9 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, inst->opcode = op; inst->src[0] = reg_undef; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = message.nr; inst->mlen = msg_end.nr - message.nr; inst->header_size = header_size; @@ -3830,7 +3834,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, const fs_reg &sample_index, - const fs_reg &mcs, const fs_reg &sampler, + const fs_reg &mcs, + const fs_reg &surface, + const fs_reg &sampler, fs_reg offset_value, unsigned coord_components, unsigned grad_components) @@ -4033,8 +4039,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, /* Generate the SEND. */ inst->opcode = op; inst->src[0] = src_payload; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = -1; inst->mlen = mlen; inst->header_size = header_size; @@ -4053,25 +4060,27 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) const fs_reg &lod2 = inst->src[3]; const fs_reg &sample_index = inst->src[4]; const fs_reg &mcs = inst->src[5]; - const fs_reg &sampler = inst->src[6]; - const fs_reg &offset_value = inst->src[7]; - assert(inst->src[8].file == IMM && inst->src[9].file == IMM); - const unsigned coord_components = inst->src[8].ud; - const unsigned grad_components = inst->src[9].ud; + const fs_reg &surface = inst->src[6]; + const fs_reg &sampler = inst->src[7]; + const fs_reg &offset_value = inst->src[8]; + assert(inst->src[9].file == IMM && inst->src[10].file == IMM); + const unsigned coord_components = inst->src[9].ud; + const unsigned grad_components = inst->src[10].ud; if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - mcs, sampler, offset_value, + mcs, surface, sampler, offset_value, coord_components, grad_components); } else if (devinfo->gen >= 5) { lower_sampler_logical_send_gen5(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - sampler, offset_value, + surface, sampler, offset_value, coord_components, grad_components); } else { lower_sampler_logical_send_gen4(bld, inst, op, coordinate, - shadow_c, lod, lod2, sampler, + shadow_c, lod, lod2, + surface, sampler, coord_components, grad_components); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 372b85ef359..9b56afd292f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -116,7 +116,7 @@ public: void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); void compute_clip_distance(gl_clip_plane *clip_planes); - uint32_t gather_channel(int orig_chan, uint32_t sampler); + uint32_t gather_channel(int orig_chan, uint32_t surface, uint32_t sampler); void swizzle_result(ir_texture_opcode op, int dest_components, fs_reg orig_val, uint32_t sampler); @@ -231,6 +231,8 @@ public: int gather_component, bool is_cube_array, bool is_rect, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 656c50969c4..9d7fb94c397 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2060,7 +2060,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1], src[1]); + generate_tex(inst, dst, src[0], src[1], src[2]); break; case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 19084fe5bcd..33bcdbbf674 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2522,7 +2522,9 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; + fs_reg texture_reg(texture); fs_reg sampler_reg(sampler); int gather_component = instr->component; @@ -2590,9 +2592,9 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) case nir_tex_src_projector: unreachable("should be lowered"); - case nir_tex_src_sampler_offset: { - /* Figure out the highest possible sampler index and mark it as used */ - uint32_t max_used = sampler + instr->texture_array_size - 1; + case nir_tex_src_texture_offset: { + /* Figure out the highest possible texture index and mark it as used */ + uint32_t max_used = texture + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4 && devinfo->gen < 8) { max_used += stage_prog_data->binding_table.gather_texture_start; } else { @@ -2600,6 +2602,14 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) } brw_mark_surface_used(prog_data, max_used); + /* Emit code to evaluate the actual indexing expression */ + texture_reg = vgrf(glsl_type::uint_type); + bld.ADD(texture_reg, src, fs_reg(texture)); + texture_reg = bld.emit_uniformize(texture_reg); + break; + } + + case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); bld.ADD(sampler_reg, src, fs_reg(sampler)); @@ -2614,8 +2624,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) if (instr->op == nir_texop_txf_ms) { if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg); + key_tex->compressed_multisample_layout_mask & (1 << texture)) { + mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg); } else { mcs = fs_reg(0u); } @@ -2652,7 +2662,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D); fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst, bld.vgrf(BRW_REGISTER_TYPE_D, 1), - sampler_reg); + texture_reg, texture_reg); inst->mlen = 1; inst->header_size = 1; inst->base_mrf = -1; @@ -2665,7 +2675,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, is_rect, sampler, sampler_reg); + is_cube_array, is_rect, + texture, texture_reg, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index a7bd9cea7af..2647a40c730 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -198,12 +198,12 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, /* Sample from the MCS surface attached to this multisample texture. */ fs_reg fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, - const fs_reg &sampler) + const fs_reg &texture) { const fs_reg dest = vgrf(glsl_type::uvec4_type); const fs_reg srcs[] = { coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), - sampler, fs_reg(), fs_reg(components), fs_reg(0) + texture, texture, fs_reg(), fs_reg(components), fs_reg(0) }; fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); @@ -228,6 +228,8 @@ fs_visitor::emit_texture(ir_texture_opcode op, int gather_component, bool is_cube_array, bool is_rect, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg) { @@ -273,7 +275,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1)); const fs_reg srcs[] = { coordinate, shadow_c, lod, lod2, - sample_index, mcs, sampler_reg, offset_value, + sample_index, mcs, surface_reg, sampler_reg, offset_value, fs_reg(coord_components), fs_reg(grad_components) }; enum opcode opcode; @@ -326,10 +328,10 @@ fs_visitor::emit_texture(ir_texture_opcode op, if (op == ir_tg4) { inst->offset |= - gather_channel(gather_component, sampler) << 16; /* M0.2:16-17 */ + gather_channel(gather_component, surface, sampler) << 16; /* M0.2:16-17 */ if (devinfo->gen == 6) - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst); } /* fixup #layers for cube map arrays */ @@ -387,7 +389,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) * Set up the gather channel based on the swizzle, for gather4. */ uint32_t -fs_visitor::gather_channel(int orig_chan, uint32_t sampler) +fs_visitor::gather_channel(int orig_chan, uint32_t surface, uint32_t sampler) { int swiz = GET_SWZ(key_tex->swizzles[sampler], orig_chan); switch (swiz) { @@ -396,7 +398,7 @@ fs_visitor::gather_channel(int orig_chan, uint32_t sampler) /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ - if (key_tex->gather_channel_quirk_mask & (1 << sampler)) + if (key_tex->gather_channel_quirk_mask & (1 << surface)) return 2; return 1; case SWIZZLE_Z: return 2; -- cgit v1.2.3 From 3dd84822df7dbf48f24611eb20728b4041ffe315 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 2 Nov 2015 18:28:49 -0800 Subject: i965/vec4: Separate the sampler from the surface in generate_tex --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 20107ac2054..432ccb77cc3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -108,6 +108,7 @@ generate_tex(struct brw_codegen *p, vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { const struct brw_device_info *devinfo = p->devinfo; @@ -263,14 +264,16 @@ generate_tex(struct brw_codegen *p, ? prog_data->base.binding_table.gather_texture_start : prog_data->base.binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, dst, inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, 1, /* response length */ @@ -284,14 +287,19 @@ generate_tex(struct brw_codegen *p, /* Non-constant sampler index. */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@ -1318,7 +1326,7 @@ generate_code(struct brw_codegen *p, case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(p, prog_data, inst, dst, src[0], src[1]); + generate_tex(p, prog_data, inst, dst, src[0], src[1], src[1]); break; case VS_OPCODE_URB_WRITE: -- cgit v1.2.3 From c7d504ad937b084a345fd6252784435d451ab03e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 2 Nov 2015 18:39:17 -0800 Subject: i965/vec4: Plumb separate surfaces and samplers through from NIR --- src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 27 ++++++++++++++++++-------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 12 ++++++++---- 3 files changed, 30 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index ec8abf49cd8..52d68c5a33d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -273,9 +273,11 @@ public: src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, src_reg surface_reg, uint32_t sampler, src_reg sampler_reg); - uint32_t gather_channel(unsigned gather_component, uint32_t sampler); + uint32_t gather_channel(unsigned gather_component, + uint32_t surface, uint32_t sampler); src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, src_reg sampler); void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index e86eb1403fa..242bcf83d2d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1574,7 +1574,9 @@ glsl_type_for_nir_alu_type(nir_alu_type alu_type, void vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; + src_reg texture_reg = src_reg(texture); src_reg sampler_reg = src_reg(sampler); src_reg coordinate; const glsl_type *coord_type = NULL; @@ -1655,8 +1657,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); assert(coord_type != NULL); if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg); + key_tex->compressed_multisample_layout_mask & (1 << texture)) { + mcs = emit_mcs_fetch(coord_type, coordinate, texture_reg); } else { mcs = src_reg(0u); } @@ -1668,13 +1670,12 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); break; - case nir_tex_src_sampler_offset: { - /* The highest sampler which may be used by this operation is + case nir_tex_src_texture_offset: { + /* The highest texture which may be used by this operation is * the last element of the array. Mark it here, because the generator * doesn't have enough information to determine the bound. */ - uint32_t array_size = instr->texture_array_size; - uint32_t max_used = sampler + array_size - 1; + uint32_t max_used = texture + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4) { max_used += prog_data->base.binding_table.gather_texture_start; } else { @@ -1683,6 +1684,15 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) brw_mark_surface_used(&prog_data->base, max_used); + /* Emit code to evaluate the actual indexing expression */ + src_reg src = get_nir_src(instr->src[i].src, 1); + src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), src, src_reg(texture))); + texture_reg = emit_uniformize(temp); + break; + } + + case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); @@ -1712,7 +1722,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Stuff the channel select bits in the top of the texture offset */ if (instr->op == nir_texop_tg4) - constant_offset |= gather_channel(instr->component, sampler) << 16; + constant_offset |= gather_channel(instr->component, texture, sampler) << 16; ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); @@ -1725,7 +1735,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) shadow_comparitor, lod, lod2, sample_index, constant_offset, offset_value, - mcs, is_cube_array, sampler, sampler_reg); + mcs, is_cube_array, + texture, texture_reg, sampler, sampler_reg); } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index fda3d7c4427..858d76fb5c6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -877,6 +877,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op, src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, + src_reg surface_reg, uint32_t sampler, src_reg sampler_reg) { @@ -936,7 +938,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op, inst->dst.writemask = WRITEMASK_XYZW; inst->shadow_compare = shadow_comparitor.file != BAD_FILE; - inst->src[1] = sampler_reg; + inst->src[1] = surface_reg; + inst->src[2] = sampler_reg; /* MRF for the first parameter */ int param_base = inst->base_mrf + inst->header_size; @@ -1062,7 +1065,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, } if (devinfo->gen == 6 && op == ir_tg4) { - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } swizzle_result(op, dest, @@ -1100,7 +1103,8 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) * Set up the gather channel based on the swizzle, for gather4. */ uint32_t -vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler) +vec4_visitor::gather_channel(unsigned gather_component, + uint32_t surface, uint32_t sampler) { int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); switch (swiz) { @@ -1109,7 +1113,7 @@ vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler) /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ - if (key_tex->gather_channel_quirk_mask & (1 << sampler)) + if (key_tex->gather_channel_quirk_mask & (1 << surface)) return 2; return 1; case SWIZZLE_Z: return 2; -- cgit v1.2.3 From e9dba8043026250b57d707975d3bcd0c43b1db0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 14 Nov 2015 09:00:35 -0800 Subject: anv/apply_pipeline_layout: Handle separate samplers and textures --- src/vulkan/anv_nir_apply_pipeline_layout.c | 90 ++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index 8270dc13981..fe1702dfda7 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -55,6 +55,30 @@ get_surface_index(unsigned set, unsigned binding, return surface_index; } +static uint32_t +get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + if (set_layout->binding[binding].stage[state->stage].sampler_index < 0) { + assert(tex_op == nir_texop_txf); + return 0; + } + + uint32_t sampler_index = + state->layout->set[set].stage[state->stage].sampler_start + + set_layout->binding[binding].stage[state->stage].sampler_index; + + assert(sampler_index < state->layout->stage[state->stage].sampler_count); + + return sampler_index; +} + static void lower_res_index_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -85,22 +109,15 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, } static void -lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) { - /* No one should have come by and lowered it already */ - assert(tex->sampler); + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); - unsigned set = tex->sampler->var->data.descriptor_set; - unsigned binding = tex->sampler->var->data.binding; - - tex->sampler_index = get_surface_index(set, binding, state); - - if (tex->sampler->deref.child) { - assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); - nir_deref_array *deref_array = - nir_deref_as_array(tex->sampler->deref.child); - - tex->sampler_index += deref_array->base_offset; + *const_index += deref_array->base_offset; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, @@ -117,13 +134,52 @@ lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) /* Now we can go ahead and move the source over to being a * first-class texture source. */ - tex->src[tex->num_srcs].src_type = nir_tex_src_sampler_offset; + tex->src[tex->num_srcs].src_type = src_type; tex->num_srcs++; - nir_instr_move_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, - &deref_array->indirect); + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); } } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->sampler); + nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler; + tex->texture_index = + get_surface_index(tex_deref->var->data.descriptor_set, + tex_deref->var->data.binding, state); + lower_tex_deref(tex, tex_deref, &tex->texture_index, + nir_tex_src_texture_offset, state); + + tex->sampler_index = + get_sampler_index(tex->sampler->var->data.descriptor_set, + tex->sampler->var->data.binding, tex->op, state); + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + + if (tex->texture) + cleanup_tex_deref(tex, tex->texture); + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; tex->sampler = NULL; } -- cgit v1.2.3 From 002db3ee15c6e4e9137a2cedd7661e540a8b6a34 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 14 Nov 2015 09:16:53 -0800 Subject: anv/cmd_buffer: Add a default descriptor type case This silences a bunch of compiler warnings. --- src/vulkan/anv_cmd_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 6763278c5ef..0f404e29b4a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -679,6 +679,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: assert(!"Unsupported descriptor type"); break; + + default: + assert(!"Invalid descriptor type"); + continue; } bt_map[bias + s] = surface_state.offset + state_offset; -- cgit v1.2.3 From 22d024e031eef7c4e208b0f0410e51084e229be0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 14 Nov 2015 22:32:54 -0800 Subject: nir/spirv: Add support for separate samplers and textures This gets tricky in a few places because we have to pass vtn_sampled_image values through OpAccessChain, but it works ok. At some point, it probably needs to be cleaned up but it doesn't occur to me exactly how to do that at the moment. We'll see how this approach goes. --- src/glsl/nir/spirv_to_nir.c | 96 ++++++++++++++++++++++++++++++------- src/glsl/nir/spirv_to_nir_private.h | 7 +++ 2 files changed, 87 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 740479f4d20..024988e06ef 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -617,6 +617,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type = vtn_value(b, w[2], vtn_value_type_type)->type; break; + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + * + * TODO: Eventually we should consider adding a "bare sampler" type + * to glsl_types. + */ + val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, + GLSL_TYPE_FLOAT); + break; + case SpvOpTypeRuntimeArray: case SpvOpTypeOpaque: case SpvOpTypeEvent: @@ -1603,12 +1616,26 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpAccessChain: case SpvOpInBoundsAccessChain: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; - val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + nir_deref_var *base; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_deref); + base = base_val->deref; + } + + nir_deref_var *deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - nir_deref *tail = &val->deref->deref; + nir_deref *tail = &deref->deref; while (tail->child) tail = tail->child; @@ -1679,29 +1706,29 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, b->shader->info.gs.vertices_in); /* The first non-var deref should be an array deref. */ - assert(val->deref->deref.child->deref_type == + assert(deref->deref.child->deref_type == nir_deref_type_array); - per_vertex_deref = nir_deref_as_array(val->deref->deref.child); + per_vertex_deref = nir_deref_as_array(deref->deref.child); } nir_variable *builtin = get_builtin_variable(b, base->var->data.mode, builtin_type, deref_type->builtin); - val->deref = nir_deref_var_create(b, builtin); + deref = nir_deref_var_create(b, builtin); if (per_vertex_deref) { /* Since deref chains start at the variable, we can just * steal that link and use it. */ - val->deref->deref.child = &per_vertex_deref->deref; + deref->deref.child = &per_vertex_deref->deref; per_vertex_deref->deref.child = NULL; per_vertex_deref->deref.type = glsl_get_array_element(builtin_type); tail = &per_vertex_deref->deref; } else { - tail = &val->deref->deref; + tail = &deref->deref; } } else { tail = tail->child; @@ -1712,12 +1739,20 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, * actually access the variable, so we need to keep around the original * type of the variable. */ - if (variable_is_external_block(base->var)) - val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - else + deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = deref; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + val->deref = deref; val->deref_type = deref_type; - + } break; } @@ -1822,8 +1857,28 @@ static void vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_deref)->deref; + return; + } + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - nir_deref_var *sampler = vtn_value(b, w[3], vtn_value_type_deref)->deref; + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_deref); + sampled.image = NULL; + sampled.sampler = sampled_val->deref; + } nir_tex_src srcs[8]; /* 8 should be enough */ nir_tex_src *p = srcs; @@ -1954,7 +2009,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); - const struct glsl_type *sampler_type = nir_deref_tail(&sampler->deref)->type; + const struct glsl_type *sampler_type = + nir_deref_tail(&sampled.sampler->deref)->type; instr->sampler_dim = glsl_get_sampler_dim(sampler_type); switch (glsl_get_sampler_result_type(sampler_type)) { @@ -1972,7 +2028,14 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->is_array = glsl_sampler_type_is_array(sampler_type); instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); - instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + instr->sampler = + nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); + if (sampled.image) { + instr->texture = + nir_deref_as_var(nir_copy_deref(instr, &sampled.image->deref)); + } else { + instr->texture = NULL; + } nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); @@ -3310,6 +3373,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_function_call(b, opcode, w, count); break; + case SpvOpSampledImage: case SpvOpImageSampleImplicitLod: case SpvOpImageSampleExplicitLod: case SpvOpImageSampleDrefImplicitLod: diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index f7be166da16..40f0c78ae78 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -46,6 +46,7 @@ enum vtn_value_type { vtn_value_type_ssa, vtn_value_type_extension, vtn_value_type_image_pointer, + vtn_value_type_sampled_image, }; struct vtn_block { @@ -127,6 +128,11 @@ struct vtn_image_pointer { nir_ssa_def *sample; }; +struct vtn_sampled_image { + nir_deref_var *image; /* Image or array of images */ + nir_deref_var *sampler; /* Sampler */ +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -144,6 +150,7 @@ struct vtn_value { struct vtn_type *deref_type; }; struct vtn_image_pointer *image; + struct vtn_sampled_image *sampled_image; struct vtn_function *func; struct vtn_block *block; struct vtn_ssa_value *ssa; -- cgit v1.2.3 From cb9e2305f87e7f3bad5a1b619f5679b335052d46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 16 Nov 2015 12:10:11 -0800 Subject: anv/cmd_buffer: Move gen-specific stuff into the appropreate files --- src/vulkan/anv_cmd_buffer.c | 227 ------------------------------------------- src/vulkan/anv_private.h | 6 +- src/vulkan/gen7_cmd_buffer.c | 168 +++++++++++++++++++++++++++++++- src/vulkan/gen8_cmd_buffer.c | 75 +++++++++++++- 4 files changed, 240 insertions(+), 236 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 0f404e29b4a..c0de376da25 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -740,91 +740,6 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } -static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) -{ - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); - if (result != VK_SUCCESS) - return result; - - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - if (samplers.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = samplers.offset); - } - - if (surfaces.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = surfaces.offset); - } - - return VK_SUCCESS; -} - -void -anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) -{ - VkShaderStage s; - VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - for_each_bit(s, dirty) { - result = flush_descriptor_set(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { - result = flush_descriptor_set(cmd_buffer, s); - - /* It had better succeed this time */ - assert(result == VK_SUCCESS); - } - } - - cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; -} - struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t dwords, uint32_t alignment) @@ -875,148 +790,6 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, } } -static void -emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkViewport *viewports) -{ - struct anv_state sf_clip_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkViewport *vp = &viewports[i]; - - /* The gen7 state struct has just the matrix and guardband fields, the - * gen8 struct adds the min/max viewport fields. */ - struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { - .ViewportMatrixElementm00 = vp->width / 2, - .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, - .ViewportMatrixElementm30 = vp->originX + vp->width / 2, - .ViewportMatrixElementm31 = vp->originY + vp->height / 2, - .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, - .XMinClipGuardband = -1.0f, - .XMaxClipGuardband = 1.0f, - .YMinClipGuardband = -1.0f, - .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->originX, - .XMaxViewPort = vp->originX + vp->width - 1, - .YMinViewPort = vp->originY, - .YMaxViewPort = vp->originY + vp->height - 1, - }; - - struct GEN7_CC_VIEWPORT cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth - }; - - GEN8_SF_CLIP_VIEWPORT_pack(NULL, sf_clip_state.map + i * 64, - &sf_clip_viewport); - GEN7_CC_VIEWPORT_pack(NULL, cc_state.map + i * 32, &cc_viewport); - } - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - .CCViewportPointer = cc_state.offset); - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - .SFClipViewportPointer = sf_clip_state.offset); -} - -void -anv_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.viewport.count > 0) { - emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, - cmd_buffer->state.dynamic.viewport.viewports); - } else { - /* If viewport count is 0, this is taken to mean "use the default" */ - emit_viewport_state(cmd_buffer, 1, - &(VkViewport) { - .originX = 0.0f, - .originY = 0.0f, - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - } -} - -static inline int64_t -clamp_int64(int64_t x, int64_t min, int64_t max) -{ - if (x < min) - return min; - else if (x < max) - return x; - else - return max; -} - -static void -emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkRect2D *scissors) -{ - struct anv_state scissor_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 32, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkRect2D *s = &scissors[i]; - - /* Since xmax and ymax are inclusive, we have to have xmax < xmin or - * ymax < ymin for empty clips. In case clip x, y, width height are all - * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't - * what we want. Just special case empty clips and produce a canonical - * empty clip. */ - static const struct GEN7_SCISSOR_RECT empty_scissor = { - .ScissorRectangleYMin = 1, - .ScissorRectangleXMin = 1, - .ScissorRectangleYMax = 0, - .ScissorRectangleXMax = 0 - }; - - const int max = 0xffff; - struct GEN7_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) - }; - - if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, - &empty_scissor); - } else { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, &scissor); - } - } - - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = scissor_state.offset); -} - -void -anv_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.scissor.count > 0) { - emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, - cmd_buffer->state.dynamic.scissor.scissors); - } else { - /* Emit a default scissor based on the currently bound framebuffer */ - emit_scissor_state(cmd_buffer, 1, - &(VkRect2D) { - .offset = { .x = 0, .y = 0, }, - .extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - }, - }); - } -} - void anv_CmdSetEvent( VkCmdBuffer cmdBuffer, VkEvent event, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a6728392130..5afd42b4d13 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1022,7 +1022,7 @@ VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *bt_state); VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *state); -void anv_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t dwords, @@ -1047,8 +1047,8 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 5ebf129a802..ee40a0de09c 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -148,6 +148,164 @@ gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) .TextureCacheInvalidationEnable = true); } +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) +{ + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; +} + +void +gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + VkShaderStage s; + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + for_each_bit(s, dirty) { + result = flush_descriptor_set(cmd_buffer, s); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { + result = flush_descriptor_set(cmd_buffer, s); + + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } + } + + cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; +} + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +static void +emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkRect2D *scissors) +{ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 32, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN7_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN7_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, + &empty_scissor); + } else { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = scissor_state.offset); +} + +void +gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.scissor.count > 0) { + emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, + cmd_buffer->state.dynamic.scissor.scissors); + } else { + /* Emit a default scissor based on the currently bound framebuffer */ + emit_scissor_state(cmd_buffer, 1, + &(VkRect2D) { + .offset = { .x = 0, .y = 0, }, + .extent = { + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + }, + }); + } +} + static const uint32_t vk_to_gen_index_type[] = { [VK_INDEX_TYPE_UINT16] = INDEX_WORD, [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, @@ -306,16 +464,20 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } if (cmd_buffer->state.descriptors_dirty) - anv_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); if (cmd_buffer->state.push_constants_dirty) gen7_cmd_buffer_flush_push_constants(cmd_buffer); + /* We use the gen8 state here because it only contains the additional + * min/max fields and, since they occur at the end of the packet and + * don't change the stride, they work on gen7 too. + */ if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - anv_cmd_buffer_emit_viewport(cmd_buffer); + gen8_cmd_buffer_emit_viewport(cmd_buffer); if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - anv_cmd_buffer_emit_scissor(cmd_buffer); + gen7_cmd_buffer_emit_scissor(cmd_buffer); if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index f626cad2831..2c76e31936f 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -63,6 +63,75 @@ gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.push_constants_dirty &= ~flushed; } +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->originX + vp->width / 2, + .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->originX, + .XMaxViewPort = vp->originX + vp->width - 1, + .YMinViewPort = vp->originY, + .YMaxViewPort = vp->originY + vp->height - 1, + }; + + struct GEN7_CC_VIEWPORT cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GEN8_SF_CLIP_VIEWPORT_pack(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GEN7_CC_VIEWPORT_pack(NULL, cc_state.map + i * 32, &cc_viewport); + } + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .originX = 0.0f, + .originY = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} + static void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { @@ -116,16 +185,16 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } if (cmd_buffer->state.descriptors_dirty) - anv_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); if (cmd_buffer->state.push_constants_dirty) gen8_cmd_buffer_flush_push_constants(cmd_buffer); if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - anv_cmd_buffer_emit_viewport(cmd_buffer); + gen8_cmd_buffer_emit_viewport(cmd_buffer); if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - anv_cmd_buffer_emit_scissor(cmd_buffer); + gen7_cmd_buffer_emit_scissor(cmd_buffer); if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { -- cgit v1.2.3 From de54b4b18fea9358cc6f0e7dc9f64256be00be06 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 16 Nov 2015 12:29:07 -0800 Subject: anv: Only include the pack headers where needed Previously, we were including gen7_pack.h, gen75_pack.h, and gen8_pack.h in anv_private.h. As we add more gens, this is going to become untenable. This commit moves things around so that we only use the pack headers when and if we need them. --- src/vulkan/anv_batch_chain.c | 3 +++ src/vulkan/anv_device.c | 2 ++ src/vulkan/anv_formats.c | 2 ++ src/vulkan/anv_image.c | 5 +++++ src/vulkan/anv_pipeline.c | 2 +- src/vulkan/anv_private.h | 29 ++++++++++++----------------- src/vulkan/gen7_cmd_buffer.c | 4 +++- src/vulkan/gen7_pipeline.c | 12 +++++++----- src/vulkan/gen7_state.c | 4 +++- src/vulkan/gen8_cmd_buffer.c | 6 ++++-- src/vulkan/gen8_pipeline.c | 2 ++ src/vulkan/gen8_state.c | 2 ++ 12 files changed, 46 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 239149709df..9d35da8024b 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -29,6 +29,9 @@ #include "anv_private.h" +#include "gen7_pack.h" +#include "gen8_pack.h" + /** \file anv_batch_chain.c * * This file contains functions related to anv_cmd_buffer as a data diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index fb608592c1f..6cdfd4e5c46 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -31,6 +31,8 @@ #include "mesa/main/git_sha1.h" #include "util/strtod.h" +#include "gen7_pack.h" + struct anv_dispatch_table dtable; static void diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 942e7cfb484..a231967a865 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -24,6 +24,8 @@ #include "anv_private.h" #include "brw_surface_formats.h" +#include "gen7_pack.h" + #define fmt(__vk_fmt, __hw_fmt, ...) \ [__vk_fmt] = { \ .vk_format = __vk_fmt, \ diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index e90257e8faa..7808454262e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -29,6 +29,11 @@ #include "anv_private.h" +/* FIXME: We shouldn't be using the actual hardware enum values here. They + * change across gens. Once we get that fixed, this include needs to go. + */ +#include "gen8_pack.h" + static const uint8_t anv_halign[] = { [4] = HALIGN4, [8] = HALIGN8, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index a9cf16f79c2..6c9deaddda7 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -260,7 +260,7 @@ static const uint32_t vk_to_gen_primitive_type[] = { [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 +/* [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 */ }; static void diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5afd42b4d13..a8ed5e8a7e2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -627,11 +627,6 @@ __gen_combine_address(struct anv_batch *batch, void *location, } } -#include "gen7_pack.h" -#include "gen75_pack.h" -#undef GEN8_3DSTATE_MULTISAMPLE -#include "gen8_pack.h" - #define anv_batch_emit(batch, cmd, ...) do { \ void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ struct cmd __template = { \ @@ -664,11 +659,11 @@ __gen_combine_address(struct anv_batch *batch, void *location, VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ } while (0) -static const struct GEN7_MEMORY_OBJECT_CONTROL_STATE GEN7_MOCS = { - .GraphicsDataTypeGFDT = 0, - .LLCCacheabilityControlLLCCC = 0, - .L3CacheabilityControlL3CC = 1 -}; +#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ + .GraphicsDataTypeGFDT = 0, \ + .LLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} #define GEN8_MOCS { \ .MemoryTypeLLCeLLCCacheabilityControl = WB, \ @@ -923,7 +918,7 @@ struct anv_cmd_state { struct anv_framebuffer * framebuffer; struct anv_render_pass * pass; struct anv_subpass * subpass; - uint32_t state_vf[GEN8_3DSTATE_VF_length]; + uint32_t state_vf[2]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set * descriptors[MAX_SETS]; struct anv_push_constants * push_constants[VK_SHADER_STAGE_NUM]; @@ -1150,15 +1145,15 @@ struct anv_pipeline { uint32_t cs_right_mask; struct { - uint32_t sf[GEN7_3DSTATE_SF_length]; - uint32_t depth_stencil_state[GEN7_DEPTH_STENCIL_STATE_length]; + uint32_t sf[7]; + uint32_t depth_stencil_state[3]; } gen7; struct { - uint32_t sf[GEN8_3DSTATE_SF_length]; - uint32_t vf[GEN8_3DSTATE_VF_length]; - uint32_t raster[GEN8_3DSTATE_RASTER_length]; - uint32_t wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t sf[4]; + uint32_t vf[2]; + uint32_t raster[5]; + uint32_t wm_depth_stencil[3]; } gen8; }; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index ee40a0de09c..4f5d50fe167 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "gen7_pack.h" + static void gen7_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { @@ -283,7 +285,7 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, } } - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, .ScissorRectPointer = scissor_state.offset); } diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 1fed33a53d1..d53489c2db1 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "gen7_pack.h" + static void gen7_emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info) @@ -92,8 +94,8 @@ static const uint32_t vk_to_gen_fillmode[] = { }; static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = CounterClockwise, - [VK_FRONT_FACE_CW] = Clockwise + [VK_FRONT_FACE_CCW] = 1, + [VK_FRONT_FACE_CW] = 0 }; static void @@ -575,9 +577,9 @@ gen7_graphics_pipeline_create( anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, .StatisticsEnable = true, .ThreadDispatchEnable = true, - .LineEndCapAntialiasingRegionWidth = _05pixels, - .LineAntialiasingRegionWidth = _10pixels, - .EarlyDepthStencilControl = NORMAL, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, .PointRasterizationRule = RASTRULE_UPPER_RIGHT, .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index a53b6e3faf3..596e232ab6f 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "gen7_pack.h" + void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, uint32_t stride) @@ -272,7 +274,7 @@ gen7_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state = gen7_alloc_surface_state(device, cmd_buffer); - surface_state.RenderCacheReadWriteMode = WriteOnlyCache; + surface_state.RenderCacheReadWriteMode = 0; /* Write only */ /* For render target surfaces, the hardware interprets field MIPCount/LOD as * LOD. The Broadwell PRM says: diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 2c76e31936f..2e4a618bad1 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "gen8_pack.h" + static void gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { @@ -94,14 +96,14 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, .YMaxViewPort = vp->originY + vp->height - 1, }; - struct GEN7_CC_VIEWPORT cc_viewport = { + struct GEN8_CC_VIEWPORT cc_viewport = { .MinimumDepth = vp->minDepth, .MaximumDepth = vp->maxDepth }; GEN8_SF_CLIP_VIEWPORT_pack(NULL, sf_clip_state.map + i * 64, &sf_clip_viewport); - GEN7_CC_VIEWPORT_pack(NULL, cc_state.map + i * 32, &cc_viewport); + GEN8_CC_VIEWPORT_pack(NULL, cc_state.map + i * 32, &cc_viewport); } anv_batch_emit(&cmd_buffer->batch, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index e98045248a4..81bc254b3f7 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "gen8_pack.h" + static void emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info) diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 6eb65e6ec4e..94972d20490 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -29,6 +29,8 @@ #include "anv_private.h" +#include "gen8_pack.h" + void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, uint32_t stride) -- cgit v1.2.3 From 34d55d69cfe592068a72ed7a5ca9adc1ee080976 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 01:32:55 -0800 Subject: anv/formats: Don't advertise stencil texture/blit prior to Broadwell --- src/vulkan/anv_formats.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index a231967a865..f1c8da00b3c 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -257,9 +257,11 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d uint32_t linear = 0, tiled = 0; if (anv_format_is_depth_or_stencil(format)) { - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SOURCE_BIT; + if (physical_device->info->gen >= 8) { + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SOURCE_BIT; + } if (format->depth_format) { tiled |= VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT; } -- cgit v1.2.3 From 0508046dc839a2ebd2800f169b45f2d3243dcda6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 16 Nov 2015 16:29:33 -0800 Subject: anv/cmd_buffer: Pack the 3DSTATE_VF packet on-demand --- src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/anv_private.h | 3 +-- src/vulkan/gen8_cmd_buffer.c | 17 ++++++++++------- src/vulkan/gen8_pipeline.c | 6 ------ 4 files changed, 12 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index c0de376da25..1a37d8124d9 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -113,7 +113,6 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, static void anv_cmd_state_init(struct anv_cmd_state *state) { - memset(&state->state_vf, 0, sizeof(state->state_vf)); memset(&state->descriptors, 0, sizeof(state->descriptors)); memset(&state->push_constants, 0, sizeof(state->push_constants)); @@ -122,6 +121,7 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->descriptors_dirty = 0; state->push_constants_dirty = 0; state->pipeline = NULL; + state->restart_index = UINT32_MAX; state->dynamic = default_dynamic_state; state->gen7.index_buffer = NULL; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a8ed5e8a7e2..55f562d06b8 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -918,7 +918,7 @@ struct anv_cmd_state { struct anv_framebuffer * framebuffer; struct anv_render_pass * pass; struct anv_subpass * subpass; - uint32_t state_vf[2]; + uint32_t restart_index; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set * descriptors[MAX_SETS]; struct anv_push_constants * push_constants[VK_SHADER_STAGE_NUM]; @@ -1151,7 +1151,6 @@ struct anv_pipeline { struct { uint32_t sf[4]; - uint32_t vf[2]; uint32_t raster[5]; uint32_t wm_depth_stencil[3]; } gen8; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 2e4a618bad1..9a3e3b5e061 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -283,8 +283,10 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_INDEX_BUFFER)) { - anv_batch_emit_merge(&cmd_buffer->batch, - cmd_buffer->state.state_vf, pipeline->gen8.vf); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index, + ); } cmd_buffer->state.vb_dirty &= ~vb_emit; @@ -396,19 +398,20 @@ void gen8_CmdBindIndexBuffer( [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, }; - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, }; - GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, .IndexFormat = vk_to_gen_index_type[indexType], .MemoryObjectControlState = GEN8_MOCS, .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferSize = buffer->size - offset); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; } static VkResult diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 81bc254b3f7..9d4ee9927cf 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -83,12 +83,6 @@ emit_ia_state(struct anv_pipeline *pipeline, const VkPipelineInputAssemblyStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - struct GEN8_3DSTATE_VF vf = { - GEN8_3DSTATE_VF_header, - .IndexedDrawCutIndexEnable = pipeline->primitive_restart - }; - GEN8_3DSTATE_VF_pack(NULL, pipeline->gen8.vf, &vf); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, .PrimitiveTopologyType = pipeline->topology); } -- cgit v1.2.3 From aa3002bd42c0fcd70dbeff7a3c2b049e048c0009 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 06:30:02 -0800 Subject: anv/entrypoints: Use devinfo instead of a gen number --- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_entrypoints_gen.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 6cdfd4e5c46..0643944fa23 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -637,7 +637,7 @@ VkResult anv_CreateDevice( return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } - anv_set_dispatch_gen(physical_device->info->gen); + anv_set_dispatch_devinfo(physical_device->info); device = anv_instance_alloc(instance, sizeof(*device), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 0fa677bbe02..bb250602b25 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -91,7 +91,7 @@ if opt_header: print " };\n" print "};\n" - print "void anv_set_dispatch_gen(uint32_t gen);\n" + print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" for type, name, args, num, h in entrypoints: print "%s anv_%s%s;" % (type, name, args) @@ -193,12 +193,12 @@ determine_validate(void) enable_validate = atoi(s); } -static uint32_t dispatch_gen; +static const struct brw_device_info *dispatch_devinfo; void -anv_set_dispatch_gen(uint32_t gen) +anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) { - dispatch_gen = gen; + dispatch_devinfo = devinfo; } void * __attribute__ ((noinline)) @@ -207,7 +207,12 @@ anv_resolve_entrypoint(uint32_t index) if (enable_validate && validate_layer.entrypoints[index]) return validate_layer.entrypoints[index]; - switch (dispatch_gen) { + if (dispatch_devinfo == NULL) { + assert(anv_layer.entrypoints[index]); + return anv_layer.entrypoints[index]; + } + + switch (dispatch_devinfo->gen) { case 8: if (gen8_layer.entrypoints[index]) return gen8_layer.entrypoints[index]; -- cgit v1.2.3 From 92d164b1c39ce52920d6bef6b348f244ba06f704 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 06:30:47 -0800 Subject: anv/entrypoints: Add dispatch support for haswell --- src/vulkan/anv_entrypoints_gen.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index bb250602b25..e0a521e8756 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -96,6 +96,7 @@ if opt_header: for type, name, args, num, h in entrypoints: print "%s anv_%s%s;" % (type, name, args) print "%s gen7_%s%s;" % (type, name, args) + print "%s gen75_%s%s;" % (type, name, args) print "%s gen8_%s%s;" % (type, name, args) print "%s anv_validate_%s%s;" % (type, name, args) exit() @@ -163,7 +164,7 @@ for type, name, args, num, h in entrypoints: print " { %5d, 0x%08x }," % (offsets[num], h) print "};\n" -for layer in [ "anv", "validate", "gen7", "gen8" ]: +for layer in [ "anv", "validate", "gen7", "gen75", "gen8" ]: for type, name, args, num, h in entrypoints: print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) print "\nconst struct anv_dispatch_table %s_layer = {" % layer @@ -218,6 +219,9 @@ anv_resolve_entrypoint(uint32_t index) return gen8_layer.entrypoints[index]; /* fall through */ case 7: + if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) + return gen75_layer.entrypoints[index]; + if (gen7_layer.entrypoints[index]) return gen7_layer.entrypoints[index]; /* fall through */ -- cgit v1.2.3 From 45320f677bb7d60fd301273d182ad0823d633137 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 06:31:25 -0800 Subject: anv: Add macros for doing per-gen compilation --- src/vulkan/anv_gen_macros.h | 146 ++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 34 +++++++---- 2 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 src/vulkan/anv_gen_macros.h (limited to 'src') diff --git a/src/vulkan/anv_gen_macros.h b/src/vulkan/anv_gen_macros.h new file mode 100644 index 00000000000..ef2ecd55a9b --- /dev/null +++ b/src/vulkan/anv_gen_macros.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +/* Macros for handling per-gen compilation. + * + * The prefixing macros GENX() and genX() automatically prefix whatever you + * give them by GENX_ or genX_ where X is the gen number. + * + * You can declare a function to be used on some range of gens like this: + * + * GENX_FUNC(GEN7, GEN75) void + * genX(my_function_name)(args...) + * { + * // Do stuff + * } + * + * If the file is compiled for any set of gens containing gen7 and gen75, + * the function will effectively only get compiled twice as + * gen7_my_function_nmae and gen75_my_function_name. The function has to + * be compilable on all gens, but it will become a static inline that gets + * discarded by the compiler on all gens not in range. + * + * You can do pseudo-runtime checks in your function such as + * + * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * // Do something + * } + * + * The contents of the if statement must be valid regardless of gen, but + * the if will get compiled away on everything except haswell. + * + * For places where you really do have a compile-time conflict, you can + * use preprocessor logic: + * + * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * // Do something + * #endif + * + * However, it is strongly recommended that the former be used whenever + * possible. + */ + +/* Base macro defined on the command line. If we don't have this, we can't + * do anything. + */ +#ifdef ANV_GENx10 + +/* Gen checking macros */ +#define ANV_GEN ((ANV_GENx10) / 10) +#define ANV_IS_HASWELL ((ANV_GENx10) == 75) + +/* Prefixing macros */ +#if (ANV_GENx10 == 70) +# define GENX(X) GEN7_##X +# define genX(x) gen7_##x +#elif (ANV_GENx10 == 75) +# define GENX(X) GEN75_##X +# define genX(x) gen75_##x +#elif (ANV_GENx10 == 80) +# define GENX(X) GEN8_##X +# define genX(x) gen8_##x +#elif (ANV_GENx10 == 90) +# define GENX(X) GEN9_##X +# define genX(x) gen9_##x +#else +# error "Need to add prefixing macros for your gen" +#endif + +/* Macros for comparing gens */ +#if (ANV_GENx10 >= 70) +#define __ANV_GEN_GE_GEN7(T, F) T +#else +#define __ANV_GEN_GE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 <= 70) +#define __ANV_GEN_LE_GEN7(T, F) T +#else +#define __ANV_GEN_LE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 >= 75) +#define __ANV_GEN_GE_GEN75(T, F) T +#else +#define __ANV_GEN_GE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 <= 75) +#define __ANV_GEN_LE_GEN75(T, F) T +#else +#define __ANV_GEN_LE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 >= 80) +#define __ANV_GEN_GE_GEN8(T, F) T +#else +#define __ANV_GEN_GE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 <= 80) +#define __ANV_GEN_LE_GEN8(T, F) T +#else +#define __ANV_GEN_LE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 >= 90) +#define __ANV_GEN_GE_GEN9(T, F) T +#else +#define __ANV_GEN_GE_GEN9(T, F) F +#endif + +#if (ANV_GENx10 <= 90) +#define __ANV_GEN_LE_GEN9(T, F) T +#else +#define __ANV_GEN_LE_GEN9(T, F) F +#endif + +#define __ANV_GEN_IN_RANGE(start, end, T, F) \ + __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) + +/* Declares a function as static inlind if it's not in range */ +#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) + +#endif /* ANV_GENx10 */ diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 55f562d06b8..a074ecda751 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -50,6 +50,7 @@ #include #include "anv_entrypoints.h" +#include "anv_gen_macros.h" #include "brw_context.h" #include "isl.h" @@ -627,24 +628,35 @@ __gen_combine_address(struct anv_batch *batch, void *location, } } -#define anv_batch_emit(batch, cmd, ...) do { \ - void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ - struct cmd __template = { \ - cmd ## _header, \ - __VA_ARGS__ \ - }; \ - cmd ## _pack(batch, __dst, &__template); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, cmd ## _length * 4)); \ +/* Wrapper macros needed to work around preprocessor argument issues. In + * particular, arguments don't get pre-evaluated if they are concatenated. + * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the + * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". + * We can work around this easily enough with these helpers. + */ +#define __anv_cmd_length(cmd) cmd ## _length +#define __anv_cmd_length_bias(cmd) cmd ## _length_bias +#define __anv_cmd_header(cmd) cmd ## _header +#define __anv_cmd_pack(cmd) cmd ## _pack + +#define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ } while (0) #define anv_batch_emitn(batch, n, cmd, ...) ({ \ void *__dst = anv_batch_emit_dwords(batch, n); \ struct cmd __template = { \ - cmd ## _header, \ - .DwordLength = n - cmd ## _length_bias, \ + __anv_cmd_header(cmd), \ + .DwordLength = n - __anv_cmd_length_bias(cmd), \ __VA_ARGS__ \ }; \ - cmd ## _pack(batch, __dst, &__template); \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ __dst; \ }) -- cgit v1.2.3 From f0390bcad6b5e1f1e94bbf762ccc98bf499c022b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 07:07:02 -0800 Subject: anv: Add initial Haswell support --- src/vulkan/Makefile.am | 24 ++++++++++--- src/vulkan/anv_device.c | 10 ++++-- src/vulkan/anv_image.c | 5 ++- src/vulkan/anv_pipeline.c | 10 ++++-- src/vulkan/anv_private.h | 24 +++++++++++++ src/vulkan/gen7_cmd_buffer.c | 83 +++++++++++++++++++++++++------------------- src/vulkan/gen7_pipeline.c | 22 +++++++----- src/vulkan/gen7_state.c | 70 ++++++++++++++++++++++++------------- 8 files changed, 171 insertions(+), 77 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 6e4de53b2d0..c484f2d92d6 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -34,6 +34,12 @@ lib_LTLIBRARIES = libvulkan.la check_LTLIBRARIES = libvulkan-test.la +PER_GEN_LIBS = \ + libanv-gen7.la \ + libanv-gen75.la + +noinst_LTLIBRARIES = $(PER_GEN_LIBS) + # The gallium includes are for the util/u_math.h include from main/macros.h AM_CPPFLAGS = \ @@ -81,9 +87,6 @@ VULKAN_SOURCES = \ gen8_state.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ - gen7_state.c \ - gen7_cmd_buffer.c \ - gen7_pipeline.c \ isl.c \ isl_format_layout.c @@ -92,6 +95,18 @@ BUILT_SOURCES = \ anv_entrypoints.c \ isl_format_layout.c +libanv_gen7_la_SOURCES = \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70 + +libanv_gen75_la_SOURCES = \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 + if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ wayland-drm-protocol.c \ @@ -131,7 +146,8 @@ libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ ../mesa/libmesa.la \ ../mesa/drivers/dri/common/libdri_test_stubs.la \ - -lpthread -ldl -lstdc++ + -lpthread -ldl -lstdc++ \ + $(PER_GEN_LIBS) # Libvulkan with dummy gem. Used for unit tests. diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0643944fa23..406792af95e 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -83,8 +83,9 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } - if (device->info->gen == 7 && - !device->info->is_haswell && !device->info->is_baytrail) { + if (device->info->is_haswell) { + fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && !device->info->is_baytrail) { fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); } else if (device->info->gen == 8 && !device->info->is_cherryview) { /* Briadwell is as fully supported as anything */ @@ -1439,7 +1440,10 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, { switch (device->info.gen) { case 7: - gen7_fill_buffer_surface_state(state, format, offset, range, stride); + if (device->info.is_haswell) + gen75_fill_buffer_surface_state(state, format, offset, range, stride); + else + gen7_fill_buffer_surface_state(state, format, offset, range, stride); break; case 8: gen8_fill_buffer_surface_state(state, format, offset, range, stride); diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 7808454262e..60d23a17f5f 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -565,7 +565,10 @@ anv_image_view_init(struct anv_image_view *iview, switch (device->info.gen) { case 7: - gen7_image_view_init(iview, device, pCreateInfo, cmd_buffer); + if (device->info.is_haswell) + gen75_image_view_init(iview, device, pCreateInfo, cmd_buffer); + else + gen7_image_view_init(iview, device, pCreateInfo, cmd_buffer); break; case 8: gen8_image_view_init(iview, device, pCreateInfo, cmd_buffer); diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 6c9deaddda7..597188c5f24 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1087,7 +1087,10 @@ anv_graphics_pipeline_create( switch (device->info.gen) { case 7: - return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + else + return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); case 8: return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); default: @@ -1129,7 +1132,10 @@ static VkResult anv_compute_pipeline_create( switch (device->info.gen) { case 7: - return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, pCreateInfo, pPipeline); + else + return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); case 8: return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a074ecda751..03e05fcb238 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -677,6 +677,11 @@ __gen_combine_address(struct anv_batch *batch, void *location, .L3CacheabilityControlL3CC = 1, \ } +#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ + .LLCeLLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + #define GEN8_MOCS { \ .MemoryTypeLLCeLLCCacheabilityControl = WB, \ .TargetCache = L3DefertoPATforLLCeLLCselection, \ @@ -1198,6 +1203,12 @@ gen7_graphics_pipeline_create(VkDevice _device, const struct anv_graphics_pipeline_create_info *extra, VkPipeline *pPipeline); +VkResult +gen75_graphics_pipeline_create(VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline); + VkResult gen8_graphics_pipeline_create(VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -1207,6 +1218,10 @@ VkResult gen7_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); +VkResult +gen75_compute_pipeline_create(VkDevice _device, + const VkComputePipelineCreateInfo *pCreateInfo, + VkPipeline *pPipeline); VkResult gen8_compute_pipeline_create(VkDevice _device, @@ -1363,6 +1378,12 @@ gen7_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void +gen75_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + void gen8_image_view_init(struct anv_image_view *iview, struct anv_device *device, @@ -1377,6 +1398,9 @@ void anv_fill_buffer_surface_state(struct anv_device *device, void *state, void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, uint32_t stride); +void gen75_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range, + uint32_t stride); void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, uint32_t stride); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 4f5d50fe167..61592111fca 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -30,9 +30,10 @@ #include "anv_private.h" #include "gen7_pack.h" +#include "gen75_pack.h" static void -gen7_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { static const uint32_t push_constant_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 21, @@ -65,9 +66,8 @@ gen7_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.push_constants_dirty &= ~flushed; } - -void -gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; struct anv_bo *scratch_bo = NULL; @@ -198,8 +198,8 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) return VK_SUCCESS; } -void -gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) { VkShaderStage s; VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & @@ -289,8 +289,8 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, .ScissorRectPointer = scissor_state.offset); } -void -gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.dynamic.scissor.count > 0) { emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, @@ -313,7 +313,12 @@ static const uint32_t vk_to_gen_index_type[] = { [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, }; -void gen7_CmdBindIndexBuffer( +static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, +}; + +void genX(CmdBindIndexBuffer)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -323,13 +328,15 @@ void gen7_CmdBindIndexBuffer( ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + if (ANV_IS_HASWELL) + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; cmd_buffer->state.gen7.index_buffer = buffer; cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; cmd_buffer->state.gen7.index_offset = offset; } static VkResult -gen7_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; @@ -366,7 +373,7 @@ gen7_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) } static void -gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; @@ -385,7 +392,7 @@ gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { /* FIXME: figure out descriptors for gen7 */ - result = gen7_flush_compute_descriptor_set(cmd_buffer); + result = flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; } @@ -394,7 +401,7 @@ gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) } static void -gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; uint32_t *p; @@ -469,7 +476,7 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); if (cmd_buffer->state.push_constants_dirty) - gen7_cmd_buffer_flush_push_constants(cmd_buffer); + cmd_buffer_flush_push_constants(cmd_buffer); /* We use the gen8 state here because it only contains the additional * min/max fields and, since they occur at the end of the packet and @@ -564,6 +571,12 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; uint32_t offset = cmd_buffer->state.gen7.index_offset; + if (ANV_IS_HASWELL) { + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); + } + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, .CutIndexEnable = pipeline->primitive_restart, .IndexFormat = cmd_buffer->state.gen7.index_type, @@ -576,7 +589,7 @@ gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -void gen7_CmdDraw( +void genX(CmdDraw)( VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, @@ -586,7 +599,7 @@ void gen7_CmdDraw( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - gen7_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, .VertexAccessType = SEQUENTIAL, @@ -598,7 +611,7 @@ void gen7_CmdDraw( .BaseVertexLocation = 0); } -void gen7_CmdDrawIndexed( +void genX(CmdDrawIndexed)( VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, @@ -609,7 +622,7 @@ void gen7_CmdDrawIndexed( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - gen7_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, .VertexAccessType = RANDOM, @@ -646,7 +659,7 @@ gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_3DPRIM_START_INSTANCE 0x243C #define GEN7_3DPRIM_BASE_VERTEX 0x2440 -void gen7_CmdDrawIndirect( +void genX(CmdDrawIndirect)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -659,7 +672,7 @@ void gen7_CmdDrawIndirect( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; - gen7_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); @@ -673,7 +686,7 @@ void gen7_CmdDrawIndirect( .PrimitiveTopologyType = pipeline->topology); } -void gen7_CmdDrawIndexedIndirect( +void genX(CmdDrawIndexedIndirect)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -686,7 +699,7 @@ void gen7_CmdDrawIndexedIndirect( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; - gen7_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); @@ -700,7 +713,7 @@ void gen7_CmdDrawIndexedIndirect( .PrimitiveTopologyType = pipeline->topology); } -void gen7_CmdDispatch( +void genX(CmdDispatch)( VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, @@ -710,7 +723,7 @@ void gen7_CmdDispatch( struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - gen7_cmd_buffer_flush_compute_state(cmd_buffer); + cmd_buffer_flush_compute_state(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, .SIMDSize = prog_data->simd_size / 16, @@ -730,7 +743,7 @@ void gen7_CmdDispatch( #define GPGPU_DISPATCHDIMY 0x2504 #define GPGPU_DISPATCHDIMZ 0x2508 -void gen7_CmdDispatchIndirect( +void genX(CmdDispatchIndirect)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset) @@ -742,7 +755,7 @@ void gen7_CmdDispatchIndirect( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; - gen7_cmd_buffer_flush_compute_state(cmd_buffer); + cmd_buffer_flush_compute_state(cmd_buffer); gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); @@ -760,7 +773,7 @@ void gen7_CmdDispatchIndirect( anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); } -void gen7_CmdPipelineBarrier( +void genX(CmdPipelineBarrier)( VkCmdBuffer cmdBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, @@ -772,7 +785,7 @@ void gen7_CmdPipelineBarrier( } static void -gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image_view *iview = @@ -854,14 +867,14 @@ gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); } -void -gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { cmd_buffer->state.subpass = subpass; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - gen7_cmd_buffer_emit_depth_stencil(cmd_buffer); + cmd_buffer_emit_depth_stencil(cmd_buffer); } static void @@ -890,7 +903,7 @@ begin_render_pass(struct anv_cmd_buffer *cmd_buffer, pRenderPassBegin->pClearValues); } -void gen7_CmdBeginRenderPass( +void genX(CmdBeginRenderPass)( VkCmdBuffer cmdBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkRenderPassContents contents) @@ -903,7 +916,7 @@ void gen7_CmdBeginRenderPass( gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } -void gen7_CmdNextSubpass( +void genX(CmdNextSubpass)( VkCmdBuffer cmdBuffer, VkRenderPassContents contents) { @@ -914,7 +927,7 @@ void gen7_CmdNextSubpass( gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); } -void gen7_CmdEndRenderPass( +void genX(CmdEndRenderPass)( VkCmdBuffer cmdBuffer) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index d53489c2db1..5e29df642da 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -30,6 +30,7 @@ #include "anv_private.h" #include "gen7_pack.h" +#include "gen75_pack.h" static void gen7_emit_vertex_input(struct anv_pipeline *pipeline, @@ -341,8 +342,8 @@ scratch_space(const struct brw_stage_prog_data *prog_data) return ffs(prog_data->total_scratch / 1024); } -VkResult -gen7_graphics_pipeline_create( +GENX_FUNC(GEN7, GEN75) VkResult +genX(graphics_pipeline_create)( VkDevice _device, const VkGraphicsPipelineCreateInfo* pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, @@ -478,9 +479,9 @@ gen7_graphics_pipeline_create( #endif if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VS, .VSFunctionEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); else - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = pipeline->vs_vec4, .ScratchSpaceBaseOffset = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), @@ -497,12 +498,12 @@ gen7_graphics_pipeline_create( const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; if (pipeline->gs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) { - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_GS, .GSEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); } else { urb_offset = 1; urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_GS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .KernelStartPointer = pipeline->gs_vec4, .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), @@ -521,7 +522,11 @@ gen7_graphics_pipeline_create( .DispatchMode = gs_prog_data->base.dispatch_mode, .GSStatisticsEnable = true, .IncludePrimitiveID = gs_prog_data->include_primitive_id, +# if (ANV_IS_HASWELL) + .ReorderMode = REORDER_TRAILING, +# else .ReorderEnable = true, +# endif .GSEnable = true); } @@ -539,7 +544,7 @@ gen7_graphics_pipeline_create( .VertexURBEntryReadOffset = urb_offset, .PointSpriteTextureCoordinateOrigin = UPPERLEFT); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), @@ -589,7 +594,8 @@ gen7_graphics_pipeline_create( return VK_SUCCESS; } -VkResult gen7_compute_pipeline_create( +GENX_FUNC(GEN7, GEN75) VkResult +genX(compute_pipeline_create)( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 596e232ab6f..7adb4485b90 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -30,29 +30,37 @@ #include "anv_private.h" #include "gen7_pack.h" +#include "gen75_pack.h" -void -gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range, uint32_t stride) +GENX_FUNC(GEN7, GEN75) void +genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range, + uint32_t stride) { uint32_t num_elements = range / stride; - struct GEN7_RENDER_SURFACE_STATE surface_state = { + struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = SURFTYPE_BUFFER, .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = VALIGN_4, .SurfaceHorizontalAlignment = HALIGN_4, .TiledSurface = false, .RenderCacheReadWriteMode = false, - .SurfaceObjectControlState = GEN7_MOCS, + .SurfaceObjectControlState = GENX(MOCS), .Height = (num_elements >> 7) & 0x3fff, .Width = num_elements & 0x7f, .Depth = (num_elements >> 21) & 0x3f, .SurfacePitch = stride - 1, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectR = SCS_RED, + .ShaderChannelSelectG = SCS_GREEN, + .ShaderChannelSelectB = SCS_BLUE, + .ShaderChannelSelectA = SCS_ALPHA, +# endif .SurfaceBaseAddress = { NULL, offset }, }; - GEN7_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); } static const uint32_t vk_to_gen_tex_filter[] = { @@ -86,8 +94,8 @@ static const uint32_t vk_to_gen_compare_op[] = { }; static struct anv_state -gen7_alloc_surface_state(struct anv_device *device, - struct anv_cmd_buffer *cmd_buffer) +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer) { return anv_cmd_buffer_alloc_surface_state(cmd_buffer); @@ -96,7 +104,7 @@ gen7_alloc_surface_state(struct anv_device *device, } } -VkResult gen7_CreateSampler( +VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler) @@ -174,11 +182,20 @@ static const uint8_t anv_valign[] = { [4] = VALIGN_4, }; -void -gen7_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) +static const uint32_t vk_to_gen_swizzle[] = { + [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, + [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, + [VK_CHANNEL_SWIZZLE_R] = SCS_RED, + [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, + [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, + [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA +}; + +GENX_FUNC(GEN7, GEN75) void +genX(image_view_init)(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); @@ -211,7 +228,7 @@ gen7_image_view_init(struct anv_image_view *iview, depth = image->extent.depth; } - struct GEN7_RENDER_SURFACE_STATE surface_state = { + struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, @@ -239,23 +256,29 @@ gen7_image_view_init(struct anv_image_view *iview, .XOffset = 0, .YOffset = 0, - .SurfaceObjectControlState = GEN7_MOCS, + .SurfaceObjectControlState = GENX(MOCS), .MIPCountLOD = 0, /* TEMPLATE */ .SurfaceMinLOD = 0, /* TEMPLATE */ .MCSEnable = false, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectR = vk_to_gen_swizzle[pCreateInfo->channels.r], + .ShaderChannelSelectG = vk_to_gen_swizzle[pCreateInfo->channels.g], + .ShaderChannelSelectB = vk_to_gen_swizzle[pCreateInfo->channels.b], + .ShaderChannelSelectA = vk_to_gen_swizzle[pCreateInfo->channels.a], +# else /* XXX: Seriously? */ .RedClearColor = 0, .GreenClearColor = 0, .BlueClearColor = 0, .AlphaClearColor = 0, +# endif .ResourceMinLOD = 0.0, .SurfaceBaseAddress = { NULL, iview->offset }, }; if (image->needs_nonrt_surface_state) { - iview->nonrt_surface_state = - gen7_alloc_surface_state(device, cmd_buffer); + iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); surface_state.RenderCacheReadWriteMode = false; @@ -266,13 +289,12 @@ gen7_image_view_init(struct anv_image_view *iview, surface_state.SurfaceMinLOD = range->baseMipLevel; surface_state.MIPCountLOD = range->mipLevels - 1; - GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->nonrt_surface_state.map, - &surface_state); + GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, + &surface_state); } if (image->needs_color_rt_surface_state) { - iview->color_rt_surface_state = - gen7_alloc_surface_state(device, cmd_buffer); + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); surface_state.RenderCacheReadWriteMode = 0; /* Write only */ @@ -285,7 +307,7 @@ gen7_image_view_init(struct anv_image_view *iview, surface_state.MIPCountLOD = range->baseMipLevel; surface_state.SurfaceMinLOD = 0; - GEN7_RENDER_SURFACE_STATE_pack(NULL, iview->color_rt_surface_state.map, - &surface_state); + GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, + &surface_state); } } -- cgit v1.2.3 From b707e90b6e43f311109101a6816ceb7bb34cceee Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 17:08:42 -0800 Subject: anv/gen7: Don't use the upper bound on dynamic state base address It doesn't do much for us and, if we have to resize the dynamic state block pool for any reason, it becomes out-of-date. --- src/vulkan/gen7_cmd_buffer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 61592111fca..d1a583d0bdf 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -91,9 +91,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, .DynamicStateMemoryObjectControlState = GEN7_MOCS, .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateAccessUpperBound = { &device->dynamic_state_block_pool.bo, - device->dynamic_state_block_pool.bo.size }, - .DynamicStateAccessUpperBoundModifyEnable = true, .IndirectObjectBaseAddress = { NULL, 0 }, .IndirectObjectMemoryObjectControlState = GEN7_MOCS, -- cgit v1.2.3 From 9e39bdabad15b82bf6425497adc4ee43669101e6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 17:09:27 -0800 Subject: anv/gen7: Implement CmdPipelineBarrier --- src/vulkan/gen7_cmd_buffer.c | 129 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index d1a583d0bdf..db420cdaa22 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -770,6 +770,7 @@ void genX(CmdDispatchIndirect)( anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); } +/* XXX: This is exactly identical to the gen8 version. */ void genX(CmdPipelineBarrier)( VkCmdBuffer cmdBuffer, VkPipelineStageFlags srcStageMask, @@ -778,7 +779,133 @@ void genX(CmdPipelineBarrier)( uint32_t memBarrierCount, const void* const* ppMemBarriers) { - stub(); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; + + struct GENX(PIPE_CONTROL) cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT)) { + cmd.CommandStreamerStallEnable = true; + } + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd); } static void -- cgit v1.2.3 From e9d634f4adeb0343d255dcd46ea7eb0d79f0416c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Nov 2015 19:10:31 -0800 Subject: gen7/pipeline: Re-arrange stencil parameters to match gen8 --- src/vulkan/gen7_pipeline.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 5e29df642da..bcfa986769e 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -227,27 +227,26 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, bool has_stencil = false; /* enable if subpass has stencil? */ struct GEN7_DEPTH_STENCIL_STATE state = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + /* Is this what we need to do? */ .StencilBufferWriteEnable = has_stencil, .StencilTestEnable = info->stencilTestEnable, - .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], - .DoubleSidedStencilEnable = true, - - .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], - .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.stencilDepthFailOp], .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], - - .DepthTestEnable = info->depthTestEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DepthBufferWriteEnable = info->depthWriteEnable, + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], }; - + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); } -- cgit v1.2.3 From fb8b2f5f9e029fdaaf78cac4b7f72084c4ae4ea2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 18 Nov 2015 11:43:48 -0800 Subject: anv/gen7: A bunch of depth-stencil fixes There are various bits which move around between Haswell and Ivy Bridge that we weren't taking into account. This also makes us actually set the StencilWriteEnable in a sane way. --- src/vulkan/anv_private.h | 1 + src/vulkan/gen7_cmd_buffer.c | 22 ++++++++++++++-------- src/vulkan/gen7_pipeline.c | 5 ----- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 03e05fcb238..fa6d48f7481 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -838,6 +838,7 @@ enum anv_cmd_dirty_bits { ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, ANV_CMD_DIRTY_PIPELINE = 1 << 9, ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, + ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, }; typedef uint32_t anv_cmd_dirty_mask_t; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index db420cdaa22..9b10f080850 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -531,14 +531,16 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { - /* Is this what we need to do? */ - .StencilBufferWriteEnable = - cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + .StencilBufferWriteEnable = iview && iview->format->has_stencil, .StencilTestMask = cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, @@ -920,7 +922,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, .DepthWriteEnable = iview->format->depth_format, .StencilWriteEnable = has_stencil, @@ -936,7 +938,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .LOD = 0, .Depth = 1 - 1, .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN7_MOCS, + .DepthBufferObjectControlState = GENX(MOCS), .RenderTargetViewExtent = 1 - 1); } else { /* Even when no depth buffer is present, the hardware requires that @@ -956,7 +958,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * actual framebuffer's width and height, even when neither depth buffer * nor stencil buffer is present. */ - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, .SurfaceFormat = D16_UNORM, .Width = fb->width - 1, @@ -966,8 +968,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* Emit 3DSTATE_STENCIL_BUFFER */ if (has_stencil) { - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, - .StencilBufferObjectControlState = GEN7_MOCS, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), +# if (ANV_IS_HASWELL) + .StencilBufferEnable = true, +# endif + .StencilBufferObjectControlState = GENX(MOCS), /* Stencil buffers have strange pitch. The PRM says: * @@ -997,6 +1002,7 @@ genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, { cmd_buffer->state.subpass = subpass; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; cmd_buffer_emit_depth_stencil(cmd_buffer); } diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index bcfa986769e..7d44c72b1a2 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -224,17 +224,12 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, return; } - bool has_stencil = false; /* enable if subpass has stencil? */ - struct GEN7_DEPTH_STENCIL_STATE state = { .DepthTestEnable = info->depthTestEnable, .DepthBufferWriteEnable = info->depthWriteEnable, .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], .DoubleSidedStencilEnable = true, - /* Is this what we need to do? */ - .StencilBufferWriteEnable = has_stencil, - .StencilTestEnable = info->stencilTestEnable, .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], -- cgit v1.2.3 From 6f613abc2bf8fc3cf70c51a1d569bc4eb9dd18af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 18 Nov 2015 12:25:11 -0800 Subject: anv/cmd_buffer: Add a new genX_cmd_buffer file for shared code This file contains code that can be shared across gens modulo recompiling. In particular, we can share STATE_BASE_ADDRESS setup and handling of the vkPipelineBarrier call. Not sharing STATE_BASE_ADDRESS setup has already been a source of bugs and the gen7 and gen8 implementations of PipelineBarrier were line-for-line identical. Incidentally, this should fix MOCS settings for dynamic and surface state on Haswell. --- src/vulkan/Makefile.am | 9 +- src/vulkan/anv_cmd_buffer.c | 5 +- src/vulkan/anv_private.h | 1 + src/vulkan/gen7_cmd_buffer.c | 219 ---------------------------------- src/vulkan/gen8_cmd_buffer.c | 228 ------------------------------------ src/vulkan/genX_cmd_buffer.c | 273 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 286 insertions(+), 449 deletions(-) create mode 100644 src/vulkan/genX_cmd_buffer.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index c484f2d92d6..e3825d711d1 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -36,7 +36,8 @@ check_LTLIBRARIES = libvulkan-test.la PER_GEN_LIBS = \ libanv-gen7.la \ - libanv-gen75.la + libanv-gen75.la \ + libanv-gen8.la noinst_LTLIBRARIES = $(PER_GEN_LIBS) @@ -96,17 +97,23 @@ BUILT_SOURCES = \ isl_format_layout.c libanv_gen7_la_SOURCES = \ + genX_cmd_buffer.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70 libanv_gen75_la_SOURCES = \ + genX_cmd_buffer.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c +libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 + if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ wayland-drm-protocol.c \ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 1a37d8124d9..6dedc3f335f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -244,7 +244,10 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { switch (cmd_buffer->device->info.gen) { case 7: - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index fa6d48f7481..9f76363c764 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1064,6 +1064,7 @@ void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 9b10f080850..4b3922d8278 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -66,87 +66,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.push_constants_dirty &= ~flushed; } -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - anv_batch_emit(&cmd_buffer->batch, GEN7_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN7_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateAccessUpperBound = { scratch_bo, scratch_bo->size }, - .GeneralStateAccessUpperBoundModifyEnable = true, - - .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), - .SurfaceStateMemoryObjectControlState = GEN7_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN7_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN7_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - - .IndirectObjectAccessUpperBound = { NULL, 0xffffffff }, - .IndirectObjectAccessUpperBoundModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN7_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionAccessUpperBound = { &device->instruction_block_pool.bo, - device->instruction_block_pool.bo.size }, - .InstructionAccessUpperBoundModifyEnable = true); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); -} - static VkResult flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) { @@ -772,144 +691,6 @@ void genX(CmdDispatchIndirect)( anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); } -/* XXX: This is exactly identical to the gen8 version. */ -void genX(CmdPipelineBarrier)( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t b, *dw; - - struct GENX(PIPE_CONTROL) cmd = { - GENX(PIPE_CONTROL_header), - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; - - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: - cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - cmd.DepthCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("Invalid memory output flag"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: - cmd.VFCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: - cmd.DCFlushEnable = true; - cmd.TextureCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: - cmd.TextureCacheInvalidationEnable = true; - break; - } - } - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); - GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd); -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 9a3e3b5e061..a02d7bb2321 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -916,231 +916,3 @@ void gen8_CmdCopyQueryPoolResults( dst_offset += destStride; } } - -void -gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - - /* Emit a render target cache flush. - * - * This isn't documented anywhere in the PRM. However, it seems to be - * necessary prior to changing the surface state base adress. Without - * this, we get GPU hangs when using multi-level command buffers which - * clear depth, reset state base address, and then go render stuff. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .RenderTargetCacheFlushEnable = true); - - anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GEN8_MOCS, - .GeneralStateBaseAddressModifyEnable = true, - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - - .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), - .SurfaceStateMemoryObjectControlState = GEN8_MOCS, - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GEN8_MOCS, - .DynamicStateBaseAddressModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GEN8_MOCS, - .IndirectObjectBaseAddressModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GEN8_MOCS, - .InstructionBaseAddressModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .TextureCacheInvalidationEnable = true); -} - -void gen8_CmdPipelineBarrier( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - uint32_t b, *dw; - - struct GEN8_PIPE_CONTROL cmd = { - GEN8_PIPE_CONTROL_header, - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; - - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: - cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - cmd.DepthCacheFlushEnable = true; - break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("Invalid memory output flag"); - } - } - - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: - cmd.VFCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: - cmd.DCFlushEnable = true; - cmd.TextureCacheInvalidationEnable = true; - break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: - cmd.TextureCacheInvalidationEnable = true; - break; - } - } - - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); - GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); -} diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c new file mode 100644 index 00000000000..66f7480a5e5 --- /dev/null +++ b/src/vulkan/genX_cmd_buffer.c @@ -0,0 +1,273 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_private.h" + +#if (ANV_GEN == 8) +# include "gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "gen75_pack.h" +#elif (ANV_GEN == 7) +# include "gen7_pack.h" +#endif + +void +genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + +/* XXX: Do we need this on more than just BDW? */ +#if (ANV_GEN == 8) + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. Without + * this, we get GPU hangs when using multi-level command buffers which + * clear depth, reset state base address, and then go render stuff. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .RenderTargetCacheFlushEnable = true); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GENX(MOCS), + .GeneralStateBaseAddressModifyEnable = true, + + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), + .SurfaceStateMemoryObjectControlState = GENX(MOCS), + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GENX(MOCS), + .DynamicStateBaseAddressModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GENX(MOCS), + .IndirectObjectBaseAddressModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GENX(MOCS), + .InstructionBaseAddressModifyEnable = true, + +# if (ANV_GEN >= 8) + /* Broadwell requires that we specify a buffer size for a bunch of + * these fields. However, since we will be growing the BO's live, we + * just set them all to the maximum. + */ + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true, +# endif + ); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true); +} + +void genX(CmdPipelineBarrier)( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; + + struct GENX(PIPE_CONTROL) cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT)) { + cmd.CommandStreamerStallEnable = true; + } + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd); +} -- cgit v1.2.3 From 828b1a6eb65fca4e61136a2297db4135c281d8fa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 18 Nov 2015 14:48:28 -0800 Subject: anv/device: Update the right sampler in UpdateDescriptorSets --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 406792af95e..913bcdc9959 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1759,7 +1759,7 @@ void anv_UpdateDescriptorSets( * to stomp on it. */ if (sampler) - desc->sampler = sampler; + desc[j].sampler = sampler; } break; -- cgit v1.2.3 From fa8db0dfcc542bbe31de5e65e25fe2946cb9552f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 18 Nov 2015 14:58:40 -0800 Subject: anv: Put all of the descriptor set stuff together in one file The stuff to take descriptor sets and turn them into binding tables and sampler tables is still in anv_cmd_buffer.c. We may want to consider putting it in anv_descriptor_set.c eventually. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_descriptor_set.c | 503 ++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_device.c | 348 --------------------------- src/vulkan/anv_pipeline.c | 116 --------- 4 files changed, 504 insertions(+), 464 deletions(-) create mode 100644 src/vulkan/anv_descriptor_set.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index e3825d711d1..63574f4b150 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -66,6 +66,7 @@ VULKAN_SOURCES = \ anv_allocator.c \ anv_cmd_buffer.c \ anv_batch_chain.c \ + anv_descriptor_set.c \ anv_device.c \ anv_dump.c \ anv_entrypoints.c \ diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c new file mode 100644 index 00000000000..d518dfa34ba --- /dev/null +++ b/src/vulkan/anv_descriptor_set.c @@ -0,0 +1,503 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/* + * Descriptor set layouts. + */ + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + uint32_t s; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t immutable_sampler_count = 0; + for (uint32_t b = 0; b < pCreateInfo->count; b++) { + if (pCreateInfo->pBinding[b].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBinding[b].arraySize; + } + + size_t size = sizeof(struct anv_descriptor_set_layout) + + pCreateInfo->count * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct anv_sampler *); + + set_layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* We just allocate all the samplers at the end of the struct */ + struct anv_sampler **samplers = + (struct anv_sampler **)&set_layout->binding[pCreateInfo->count]; + + set_layout->binding_count = pCreateInfo->count; + set_layout->shader_stages = 0; + set_layout->size = 0; + + /* Initialize all binding_layout entries to -1 */ + memset(set_layout->binding, -1, + pCreateInfo->count * sizeof(set_layout->binding[0])); + + /* Initialize all samplers to 0 */ + memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); + + uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t dynamic_offset_count = 0; + + for (uint32_t b = 0; b < pCreateInfo->count; b++) { + uint32_t array_size = MAX2(1, pCreateInfo->pBinding[b].arraySize); + set_layout->binding[b].array_size = array_size; + set_layout->binding[b].descriptor_index = set_layout->size; + set_layout->size += array_size; + + switch (pCreateInfo->pBinding[b].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { + set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; + sampler_count[s] += array_size; + } + break; + default: + break; + } + + switch (pCreateInfo->pBinding[b].descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { + set_layout->binding[b].stage[s].surface_index = surface_count[s]; + surface_count[s] += array_size; + } + break; + default: + break; + } + + switch (pCreateInfo->pBinding[b].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; + dynamic_offset_count += array_size; + break; + default: + break; + } + + if (pCreateInfo->pBinding[b].pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += array_size; + + for (uint32_t i = 0; i < array_size; i++) + set_layout->binding[b].immutable_samplers[i] = + anv_sampler_from_handle(pCreateInfo->pBinding[b].pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; + } + + set_layout->shader_stages |= pCreateInfo->pBinding[b].stageFlags; + } + + set_layout->dynamic_offset_count = dynamic_offset_count; + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_device_free(device, set_layout); +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just muttiple descriptor set layouts pasted together + */ + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout l, *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + l.num_sets = pCreateInfo->descriptorSetCount; + + unsigned dynamic_offset_count = 0; + + memset(l.stage, 0, sizeof(l.stage)); + for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + l.set[set].layout = set_layout; + + l.set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index >= 0) + dynamic_offset_count += set_layout->binding[b].array_size; + } + + for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { + l.set[set].stage[s].surface_start = l.stage[s].surface_count; + l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; + + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + unsigned array_size = set_layout->binding[b].array_size; + + if (set_layout->binding[b].stage[s].surface_index >= 0) { + l.stage[s].surface_count += array_size; + + if (set_layout->binding[b].dynamic_offset_index >= 0) + l.stage[s].has_dynamic_offsets = true; + } + + if (set_layout->binding[b].stage[s].sampler_index >= 0) + l.stage[s].sampler_count += array_size; + } + } + } + + unsigned num_bindings = 0; + for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) + num_bindings += l.stage[s].surface_count + l.stage[s].sampler_count; + + size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); + + layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Now we can actually build our surface and sampler maps */ + struct anv_pipeline_binding *entry = layout->entries; + for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { + l.stage[s].surface_to_descriptor = entry; + entry += l.stage[s].surface_count; + l.stage[s].sampler_to_descriptor = entry; + entry += l.stage[s].sampler_count; + + int surface = 0; + int sampler = 0; + for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { + struct anv_descriptor_set_layout *set_layout = l.set[set].layout; + + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[s].surface_index >= 0) { + assert(surface == l.set[set].stage[s].surface_start + + set_layout->binding[b].stage[s].surface_index); + for (unsigned i = 0; i < array_size; i++) { + l.stage[s].surface_to_descriptor[surface + i].set = set; + l.stage[s].surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[s].sampler_index >= 0) { + assert(sampler == l.set[set].stage[s].sampler_start + + set_layout->binding[b].stage[s].sampler_index); + for (unsigned i = 0; i < array_size; i++) { + l.stage[s].sampler_to_descriptor[sampler + i].set = set; + l.stage[s].sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + } + } + } + + /* Finally, we're done setting it up, copy into the allocated version */ + *layout = l; + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_device_free(device, pipeline_layout); +} + +/* + * Descriptor pools. These are a no-op for now. + */ + +VkResult anv_CreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo* pCreateInfo, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + pDescriptorPool->handle = 1; + return VK_SUCCESS; +} + +void anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + + set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + set->layout = layout; + + /* Go through and fill out immutable samplers if we have any */ + struct anv_descriptor *desc = set->descriptors; + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; + } + + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + anv_device_free(device, set); +} + +VkResult anv_AllocDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorSetUsage setUsage, + uint32_t count, + const VkDescriptorSetLayout* pSetLayouts, + VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result = VK_SUCCESS; + struct anv_descriptor_set *set; + uint32_t i; + + for (i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) + anv_FreeDescriptorSets(_device, descriptorPool, i, pDescriptorSets); + + return result; +} + +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + +void anv_UpdateDescriptorSets( + VkDevice device, + uint32_t writeCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t copyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + for (uint32_t i = 0; i < writeCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); + const struct anv_descriptor_set_binding_layout *bind_layout = + &set->layout->binding[write->destBinding]; + struct anv_descriptor *desc = + &set->descriptors[bind_layout->descriptor_index]; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pDescriptors[j].sampler); + + desc[j] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = sampler, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pDescriptors[j].imageView); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pDescriptors[j].sampler); + + desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + desc[j].image_view = iview; + + /* If this descriptor has an immutable sampler, we don't want + * to stomp on it. + */ + if (sampler) + desc[j].sampler = sampler; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pDescriptors[j].imageView); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .image_view = iview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_finishme("texel buffers not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->count; j++) { + assert(write->pDescriptors[j].bufferInfo.buffer.handle); + ANV_FROM_HANDLE(anv_buffer, buffer, + write->pDescriptors[j].bufferInfo.buffer); + assert(buffer); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer = buffer, + .offset = write->pDescriptors[j].bufferInfo.offset, + .range = write->pDescriptors[j].bufferInfo.range, + }; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0) + desc[j].range = buffer->size - desc[j].offset; + } + + default: + break; + } + } + + for (uint32_t i = 0; i < copyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); + ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); + for (uint32_t j = 0; j < copy->count; j++) { + dest->descriptors[copy->destBinding + j] = + src->descriptors[copy->srcBinding + j]; + } + } +} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 913bcdc9959..e2b00c97c25 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1478,354 +1478,6 @@ void anv_DestroySampler( anv_device_free(device, sampler); } -// Descriptor set functions - -VkResult anv_CreateDescriptorSetLayout( - VkDevice _device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - VkDescriptorSetLayout* pSetLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_descriptor_set_layout *set_layout; - uint32_t s; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - - uint32_t immutable_sampler_count = 0; - for (uint32_t b = 0; b < pCreateInfo->count; b++) { - if (pCreateInfo->pBinding[b].pImmutableSamplers) - immutable_sampler_count += pCreateInfo->pBinding[b].arraySize; - } - - size_t size = sizeof(struct anv_descriptor_set_layout) + - pCreateInfo->count * sizeof(set_layout->binding[0]) + - immutable_sampler_count * sizeof(struct anv_sampler *); - - set_layout = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!set_layout) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* We just allocate all the samplers at the end of the struct */ - struct anv_sampler **samplers = - (struct anv_sampler **)&set_layout->binding[pCreateInfo->count]; - - set_layout->binding_count = pCreateInfo->count; - set_layout->shader_stages = 0; - set_layout->size = 0; - - /* Initialize all binding_layout entries to -1 */ - memset(set_layout->binding, -1, - pCreateInfo->count * sizeof(set_layout->binding[0])); - - /* Initialize all samplers to 0 */ - memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); - - uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t dynamic_offset_count = 0; - - for (uint32_t b = 0; b < pCreateInfo->count; b++) { - uint32_t array_size = MAX2(1, pCreateInfo->pBinding[b].arraySize); - set_layout->binding[b].array_size = array_size; - set_layout->binding[b].descriptor_index = set_layout->size; - set_layout->size += array_size; - - switch (pCreateInfo->pBinding[b].descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { - set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; - sampler_count[s] += array_size; - } - break; - default: - break; - } - - switch (pCreateInfo->pBinding[b].descriptorType) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { - set_layout->binding[b].stage[s].surface_index = surface_count[s]; - surface_count[s] += array_size; - } - break; - default: - break; - } - - switch (pCreateInfo->pBinding[b].descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; - dynamic_offset_count += array_size; - break; - default: - break; - } - - if (pCreateInfo->pBinding[b].pImmutableSamplers) { - set_layout->binding[b].immutable_samplers = samplers; - samplers += array_size; - - for (uint32_t i = 0; i < array_size; i++) - set_layout->binding[b].immutable_samplers[i] = - anv_sampler_from_handle(pCreateInfo->pBinding[b].pImmutableSamplers[i]); - } else { - set_layout->binding[b].immutable_samplers = NULL; - } - - set_layout->shader_stages |= pCreateInfo->pBinding[b].stageFlags; - } - - set_layout->dynamic_offset_count = dynamic_offset_count; - - *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); - - return VK_SUCCESS; -} - -void anv_DestroyDescriptorSetLayout( - VkDevice _device, - VkDescriptorSetLayout _set_layout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); - - anv_device_free(device, set_layout); -} - -VkResult anv_CreateDescriptorPool( - VkDevice device, - const VkDescriptorPoolCreateInfo* pCreateInfo, - VkDescriptorPool* pDescriptorPool) -{ - anv_finishme("VkDescriptorPool is a stub"); - pDescriptorPool->handle = 1; - return VK_SUCCESS; -} - -void anv_DestroyDescriptorPool( - VkDevice _device, - VkDescriptorPool _pool) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); -} - -VkResult anv_ResetDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); - return VK_SUCCESS; -} - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set) -{ - struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); - - set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (!set) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - - set->layout = layout; - - /* Go through and fill out immutable samplers if we have any */ - struct anv_descriptor *desc = set->descriptors; - for (uint32_t b = 0; b < layout->binding_count; b++) { - if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].sampler = layout->binding[b].immutable_samplers[i]; - } - desc += layout->binding[b].array_size; - } - - *out_set = set; - - return VK_SUCCESS; -} - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set) -{ - anv_device_free(device, set); -} - -VkResult anv_AllocDescriptorSets( - VkDevice _device, - VkDescriptorPool descriptorPool, - VkDescriptorSetUsage setUsage, - uint32_t count, - const VkDescriptorSetLayout* pSetLayouts, - VkDescriptorSet* pDescriptorSets) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - VkResult result = VK_SUCCESS; - struct anv_descriptor_set *set; - uint32_t i; - - for (i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); - - result = anv_descriptor_set_create(device, layout, &set); - if (result != VK_SUCCESS) - break; - - pDescriptorSets[i] = anv_descriptor_set_to_handle(set); - } - - if (result != VK_SUCCESS) - anv_FreeDescriptorSets(_device, descriptorPool, i, pDescriptorSets); - - return result; -} - -VkResult anv_FreeDescriptorSets( - VkDevice _device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet* pDescriptorSets) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - for (uint32_t i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - - anv_descriptor_set_destroy(device, set); - } - - return VK_SUCCESS; -} - -void anv_UpdateDescriptorSets( - VkDevice device, - uint32_t writeCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t copyCount, - const VkCopyDescriptorSet* pDescriptorCopies) -{ - for (uint32_t i = 0; i < writeCount; i++) { - const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; - ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); - const struct anv_descriptor_set_binding_layout *bind_layout = - &set->layout->binding[write->destBinding]; - struct anv_descriptor *desc = - &set->descriptors[bind_layout->descriptor_index]; - - switch (write->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (uint32_t j = 0; j < write->count; j++) { - ANV_FROM_HANDLE(anv_sampler, sampler, - write->pDescriptors[j].sampler); - - desc[j] = (struct anv_descriptor) { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .sampler = sampler, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < write->count; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pDescriptors[j].imageView); - ANV_FROM_HANDLE(anv_sampler, sampler, - write->pDescriptors[j].sampler); - - desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - desc[j].image_view = iview; - - /* If this descriptor has an immutable sampler, we don't want - * to stomp on it. - */ - if (sampler) - desc[j].sampler = sampler; - } - break; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (uint32_t j = 0; j < write->count; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pDescriptors[j].imageView); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .image_view = iview, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - anv_finishme("texel buffers not implemented"); - break; - - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - anv_finishme("input attachments not implemented"); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->count; j++) { - assert(write->pDescriptors[j].bufferInfo.buffer.handle); - ANV_FROM_HANDLE(anv_buffer, buffer, - write->pDescriptors[j].bufferInfo.buffer); - assert(buffer); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .buffer = buffer, - .offset = write->pDescriptors[j].bufferInfo.offset, - .range = write->pDescriptors[j].bufferInfo.range, - }; - - /* For buffers with dynamic offsets, we use the full possible - * range in the surface state and do the actual range-checking - * in the shader. - */ - if (bind_layout->dynamic_offset_index >= 0) - desc[j].range = buffer->size - desc[j].offset; - } - - default: - break; - } - } - - for (uint32_t i = 0; i < copyCount; i++) { - const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; - ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); - ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); - for (uint32_t j = 0; j < copy->count; j++) { - dest->descriptors[copy->destBinding + j] = - src->descriptors[copy->srcBinding + j]; - } - } -} - VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 597188c5f24..1193d1e7a5d 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1167,119 +1167,3 @@ VkResult anv_CreateComputePipelines( return VK_SUCCESS; } - -// Pipeline layout functions - -VkResult anv_CreatePipelineLayout( - VkDevice _device, - const VkPipelineLayoutCreateInfo* pCreateInfo, - VkPipelineLayout* pPipelineLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_layout l, *layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - - l.num_sets = pCreateInfo->descriptorSetCount; - - unsigned dynamic_offset_count = 0; - - memset(l.stage, 0, sizeof(l.stage)); - for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, - pCreateInfo->pSetLayouts[set]); - l.set[set].layout = set_layout; - - l.set[set].dynamic_offset_start = dynamic_offset_count; - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index >= 0) - dynamic_offset_count += set_layout->binding[b].array_size; - } - - for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { - l.set[set].stage[s].surface_start = l.stage[s].surface_count; - l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; - - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - unsigned array_size = set_layout->binding[b].array_size; - - if (set_layout->binding[b].stage[s].surface_index >= 0) { - l.stage[s].surface_count += array_size; - - if (set_layout->binding[b].dynamic_offset_index >= 0) - l.stage[s].has_dynamic_offsets = true; - } - - if (set_layout->binding[b].stage[s].sampler_index >= 0) - l.stage[s].sampler_count += array_size; - } - } - } - - unsigned num_bindings = 0; - for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) - num_bindings += l.stage[s].surface_count + l.stage[s].sampler_count; - - size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); - - layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (layout == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Now we can actually build our surface and sampler maps */ - struct anv_pipeline_binding *entry = layout->entries; - for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { - l.stage[s].surface_to_descriptor = entry; - entry += l.stage[s].surface_count; - l.stage[s].sampler_to_descriptor = entry; - entry += l.stage[s].sampler_count; - - int surface = 0; - int sampler = 0; - for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { - struct anv_descriptor_set_layout *set_layout = l.set[set].layout; - - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - unsigned array_size = set_layout->binding[b].array_size; - unsigned set_offset = set_layout->binding[b].descriptor_index; - - if (set_layout->binding[b].stage[s].surface_index >= 0) { - assert(surface == l.set[set].stage[s].surface_start + - set_layout->binding[b].stage[s].surface_index); - for (unsigned i = 0; i < array_size; i++) { - l.stage[s].surface_to_descriptor[surface + i].set = set; - l.stage[s].surface_to_descriptor[surface + i].offset = set_offset + i; - } - surface += array_size; - } - - if (set_layout->binding[b].stage[s].sampler_index >= 0) { - assert(sampler == l.set[set].stage[s].sampler_start + - set_layout->binding[b].stage[s].sampler_index); - for (unsigned i = 0; i < array_size; i++) { - l.stage[s].sampler_to_descriptor[sampler + i].set = set; - l.stage[s].sampler_to_descriptor[sampler + i].offset = set_offset + i; - } - sampler += array_size; - } - } - } - } - - /* Finally, we're done setting it up, copy into the allocated version */ - *layout = l; - - *pPipelineLayout = anv_pipeline_layout_to_handle(layout); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineLayout( - VkDevice _device, - VkPipelineLayout _pipelineLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); - - anv_device_free(device, pipeline_layout); -} -- cgit v1.2.3 From e69db9159b6f4d405d152012daa9da72f4f321e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 11:52:28 -0800 Subject: gen8/pipeline: Minor blending fixes This makes various fields match upstream mesa --- src/vulkan/gen8_pipeline.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 9d4ee9927cf..2baa5c5f31d 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -198,6 +198,7 @@ emit_cb_state(struct anv_pipeline *pipeline, struct GEN8_BLEND_STATE blend_state = { .AlphaToCoverageEnable = info->alphaToCoverageEnable, + .AlphaToOneEnable = info->alphaToOneEnable, }; for (uint32_t i = 0; i < info->attachmentCount; i++) { @@ -208,8 +209,9 @@ emit_cb_state(struct anv_pipeline *pipeline, .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], .ColorBufferBlendEnable = a->blendEnable, .PreBlendSourceOnlyClampEnable = false, - .PreBlendColorClampEnable = false, - .PostBlendColorClampEnable = false, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], -- cgit v1.2.3 From b43ce6768dcaa143f143bda59beb7794cc6bd7b3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 11:52:54 -0800 Subject: gen8/pipeline: Set IndependentAlphaBlendEnable properly --- src/vulkan/gen8_pipeline.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 2baa5c5f31d..618774eda6e 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -204,6 +204,12 @@ emit_cb_state(struct anv_pipeline *pipeline, for (uint32_t i = 0; i < info->attachmentCount; i++) { const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + if (a->srcBlendColor != a->srcBlendAlpha || + a->destBlendColor != a->destBlendAlpha || + a->blendOpColor != a->blendOpAlpha) { + blend_state.IndependentAlphaBlendEnable = true; + } + blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { .LogicOpEnable = info->logicOpEnable, .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], -- cgit v1.2.3 From 55d16c090eaf7d5a8a9d7fca870b051f651573cc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 11:53:10 -0800 Subject: gen8/pipeline: Properly handle MIN/MAX blend ops --- src/vulkan/gen8_pipeline.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 618774eda6e..6e2e65d6344 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -229,6 +229,23 @@ emit_cb_state(struct anv_pipeline *pipeline, .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->blendOpColor == VK_BLEND_OP_MIN || + a->blendOpColor == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->blendOpAlpha == VK_BLEND_OP_MIN || + a->blendOpAlpha == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } } GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); -- cgit v1.2.3 From 48228c114ef127126dfcb55d242e9125ac116a06 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 12:49:20 -0800 Subject: nir/spirv: Add support for runtime arrays --- src/glsl/nir/spirv_to_nir.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 024988e06ef..db023f614a8 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -507,10 +507,15 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpTypeRuntimeArray: case SpvOpTypeArray: { struct vtn_type *array_element = vtn_value(b, w[2], vtn_value_type_type)->type; - val->type->type = glsl_array_type(array_element->type, w[3]); + + /* A length of 0 is used to denote unsized arrays */ + unsigned length = (opcode == SpvOpTypeArray) ? w[3] : 0; + + val->type->type = glsl_array_type(array_element->type, length); val->type->array_element = array_element; val->type->stride = 0; break; @@ -630,7 +635,6 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, GLSL_TYPE_FLOAT); break; - case SpvOpTypeRuntimeArray: case SpvOpTypeOpaque: case SpvOpTypeEvent: case SpvOpTypeDeviceEvent: -- cgit v1.2.3 From 1d42f773d3f236656f84a2ee6d1c726b9668104f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 14:45:44 -0800 Subject: gen8_state: Clamp sampler values to HW limitations --- src/vulkan/anv_private.h | 13 +++++++++++++ src/vulkan/gen8_state.c | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 9f76363c764..a3e63e4c0c0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -100,6 +100,19 @@ anv_minify(uint32_t n, uint32_t levels) return MAX(n >> levels, 1); } +static inline float +anv_clamp_f(float f, float min, float max) +{ + assert(min < max); + + if (f > max) + return max; + else if (f < min) + return min; + else + return f; +} + static inline bool anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) { diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 94972d20490..4e441797523 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -316,10 +316,10 @@ VkResult gen8_CreateSampler( .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], .MagModeFilter = mag_filter, .MinModeFilter = min_filter, - .TextureLODBias = pCreateInfo->mipLodBias * 256, + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), .ChromaKeyEnable = 0, .ChromaKeyIndex = 0, .ChromaKeyMode = 0, -- cgit v1.2.3 From cff74d6fb876c0ca4a6c5c5041c02770241beeb2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 15:02:45 -0800 Subject: nir/spirv: Handle OpNop --- src/glsl/nir/spirv_to_nir.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index db023f614a8..ba49b04a54e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -141,6 +141,11 @@ vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, unsigned count = w[0] >> SpvWordCountShift; assert(count >= 1 && w + count <= end); + if (opcode == SpvOpNop) { + w++; + continue; + } + if (!handler(b, opcode, w, count)) return w; -- cgit v1.2.3 From 1157b0360d9978028685dd78fa77871d5172c628 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 15:15:38 -0800 Subject: nir/spirv: Rework decoration iteration The old code didn't work correctly if you had member decorations after non-member decorations. Since glslang never gave us any of those, it wasn't properly tested. --- src/glsl/nir/spirv_to_nir.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index ba49b04a54e..70610ca0f66 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -186,24 +186,25 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, static void _foreach_decoration_helper(struct vtn_builder *b, struct vtn_value *base_value, - int member, + int parent_member, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data) { - int new_member = member; - for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { - if (dec->member >= 0) { - assert(member == -1); - new_member = dec->member; + int member; + if (dec->member < 0) { + member = parent_member; + } else { + assert(parent_member == -1); + member = dec->member; } if (dec->group) { assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, new_member, dec->group, + _foreach_decoration_helper(b, base_value, member, dec->group, cb, data); } else { - cb(b, base_value, new_member, dec, data); + cb(b, base_value, member, dec, data); } } } -- cgit v1.2.3 From e3ec964e44cce65a62deb22d80b4f865e298b501 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 15:48:03 -0800 Subject: anv/meta: Keep z coordinate flat while blitting --- src/vulkan/anv_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c96a9deb798..7cf60768158 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -481,7 +481,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)(src_offset.z + src_extent.depth) / (float)src_iview->extent.depth, + (float)src_offset.z / (float)src_iview->extent.depth, }, }; @@ -493,7 +493,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { (float)src_offset.x / (float)src_iview->extent.width, (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)(src_offset.z + src_extent.depth) / (float)src_iview->extent.depth, + (float)src_offset.z / (float)src_iview->extent.depth, }, }; -- cgit v1.2.3 From d6a7c659c75c5b1ae16652ad9d86245000702484 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 16:00:32 -0800 Subject: anv/meta: Use array textures for 2D This a total of 1 extra instruction in the shader and gives us a lot more flexibility in how we do blits. --- src/vulkan/anv_meta.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 7cf60768158..5df33c0a8d1 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -95,16 +95,9 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) tex->src[0].src = nir_src_for_ssa(nir_load_var(&b, tex_pos_in)); tex->dest_type = nir_type_float; /* TODO */ - switch (tex_dim) { - case GLSL_SAMPLER_DIM_2D: - tex->coord_components = 2; - break; - case GLSL_SAMPLER_DIM_3D: - tex->coord_components = 3; - break; - default: - assert(!"Unsupported texture dimension"); - } + if (tex_dim == GLSL_SAMPLER_DIM_2D) + tex->is_array = true; + tex->coord_components = 3; tex->sampler = nir_deref_var_create(tex, sampler); -- cgit v1.2.3 From 438eaa3ae7f7c79c4362c8900195a1ddeceb1ab0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 20 Nov 2015 17:39:06 -0800 Subject: anv/meta: Add initial support for multi-slice array and 3-D copies We still need to fix up a few bits once we have real CPP values, but this should get us a long ways. --- src/vulkan/anv_meta.c | 348 +++++++++++++++++++++++++++++--------------------- 1 file changed, 200 insertions(+), 148 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 5df33c0a8d1..c9e46a9286e 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -869,7 +869,7 @@ void anv_CmdCopyImage( .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, - .arraySize = 1 + .arraySize = pRegions[r].destSubresource.arraySize, }, }, cmd_buffer); @@ -880,48 +880,59 @@ void anv_CmdCopyImage( .z = 0, }; - const uint32_t dest_array_slice = + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.arraySize == 1 && + pRegions[r].destSubresource.arraySize == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.arraySize == + pRegions[r].destSubresource.arraySize); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].destSubresource.arraySize; + } + + const uint32_t dest_base_array_slice = meta_blit_get_dest_view_base_array_slice(dest_image, &pRegions[r].destSubresource, &pRegions[r].destOffset); - if (pRegions[r].srcSubresource.arraySize > 1) - anv_finishme("FINISHME: copy multiple array layers"); - - if (pRegions[r].extent.depth > 1) - anv_finishme("FINISHME: copy multiple depth layers"); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].destSubresource.mipLevel, - .mipLevels = 1, - .baseArrayLayer = dest_array_slice, - .arraySize = 1 + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .arraySize = 1 + }, }, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - pRegions[r].srcOffset, - pRegions[r].extent, - dest_image, &dest_iview, - dest_offset, - pRegions[r].extent, - VK_TEX_FILTER_NEAREST); + cmd_buffer); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_TEX_FILTER_NEAREST); + } } meta_finish_blit(cmd_buffer, &saved_state); @@ -1028,9 +1039,10 @@ void anv_CmdBlitImage( meta_finish_blit(cmd_buffer, &saved_state); } -static VkImage +static struct anv_image * make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, VkImageUsageFlags usage, + VkImageType image_type, const VkBufferImageCopy *copy) { ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); @@ -1066,7 +1078,7 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, image->bo = buffer->bo; image->offset = buffer->offset + copy->bufferOffset; - return anv_image_to_handle(image); + return image; } void anv_CmdCopyBufferToImage( @@ -1094,81 +1106,102 @@ void anv_CmdCopyBufferToImage( proxy_aspect = VK_IMAGE_ASPECT_COLOR; } - VkImage srcImage = make_image_for_buffer(vk_device, srcBuffer, - proxy_format, VK_IMAGE_USAGE_SAMPLED_BIT, &pRegions[r]); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = proxy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspectMask = 1 << proxy_aspect, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArrayLayer = 0, - .arraySize = 1 - }, - }, - cmd_buffer); + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, proxy_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &pRegions[r]); - const VkOffset3D dest_offset = { - .x = pRegions[r].imageOffset.x, - .y = pRegions[r].imageOffset.y, - .z = 0, - }; - - const uint32_t dest_array_slice = + const uint32_t dest_base_array_slice = meta_blit_get_dest_view_base_array_slice(dest_image, &pRegions[r].imageSubresource, &pRegions[r].imageOffset); - if (pRegions[r].imageExtent.depth > 1) - anv_finishme("FINISHME: copy multiple depth layers"); + unsigned num_slices; + if (dest_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.arraySize == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.arraySize; + } - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = proxy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A + for (unsigned slice = 0; slice < num_slices; slice++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = proxy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = 1 << proxy_aspect, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1, + }, }, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .mipLevels = 1, - .baseArrayLayer = dest_array_slice, - .arraySize = 1 + cmd_buffer); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = proxy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .arraySize = 1 + }, }, - }, - cmd_buffer); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - dest_image, - &dest_iview, - dest_offset, - pRegions[r].imageExtent, - VK_TEX_FILTER_NEAREST); + cmd_buffer); + + VkOffset3D src_offset = { 0, 0, slice }; + + const VkOffset3D dest_offset = { + .x = pRegions[r].imageOffset.x, + .y = pRegions[r].imageOffset.y, + .z = 0, + }; + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + dest_offset, + pRegions[r].imageExtent, + VK_TEX_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + /* XXX: Insert a real CPP */ + src_image->offset += src_image->extent.width * + src_image->extent.height * 4; + } - anv_DestroyImage(vk_device, srcImage); + anv_DestroyImage(vk_device, anv_image_to_handle(src_image)); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1193,12 +1226,6 @@ void anv_CmdCopyImageToBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - if (pRegions[r].imageSubresource.arraySize > 1) - anv_finishme("FINISHME: copy multiple array layers"); - - if (pRegions[r].imageExtent.depth > 1) - anv_finishme("FINISHME: copy multiple depth layers"); - struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -1217,7 +1244,7 @@ void anv_CmdCopyImageToBuffer( .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .mipLevels = 1, .baseArrayLayer = pRegions[r].imageSubresource.arrayLayer, - .arraySize = 1 + .arraySize = pRegions[r].imageSubresource.arraySize, }, }, cmd_buffer); @@ -1227,44 +1254,69 @@ void anv_CmdCopyImageToBuffer( dest_format = VK_FORMAT_R8_UINT; } - VkImage destImage = make_image_for_buffer(vk_device, destBuffer, - dest_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, &pRegions[r]); + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, dest_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.arraySize == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.arraySize; + } - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .mipLevels = 1, - .baseArrayLayer = 0, - .arraySize = 1 + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1 + }, }, - }, - cmd_buffer); + cmd_buffer); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_TEX_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + /* XXX: Insert a real CPP */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * 4; + } - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - pRegions[r].imageOffset, - pRegions[r].imageExtent, - anv_image_from_handle(destImage), - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - VK_TEX_FILTER_NEAREST); - - anv_DestroyImage(vk_device, destImage); + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image)); } meta_finish_blit(cmd_buffer, &saved_state); -- cgit v1.2.3 From 83c305f8efc019151f8cabaca0114bfcace096ef Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 21 Nov 2015 00:04:57 -0800 Subject: anv/meta_clear: Don't try to clear depth-stencil without LOAD_OP_CLEAR --- src/vulkan/anv_meta_clear.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 7eb4c5587cc..16e15c67cc9 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -629,20 +629,22 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, emit_load_color_clear(cmd_buffer, a, clear_values[a].color); } } else { - VkImageAspectFlags aspects = 0; + VkImageAspectFlags clear_aspects = 0; if (att->format->depth_format && att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; } if (att->format->has_stencil && att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; } - emit_load_depthstencil_clear(cmd_buffer, a, aspects, - clear_values[a].depthStencil); + if (clear_aspects) { + emit_load_depthstencil_clear(cmd_buffer, a, clear_aspects, + clear_values[a].depthStencil); + } } } -- cgit v1.2.3 From e14b2c76b40398a61f45f5d058079641661a66cb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 21 Nov 2015 11:39:12 -0800 Subject: anv/meta_clear: Don't trash state if no clears are needed --- src/vulkan/anv_meta_clear.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 16e15c67cc9..6645e37d124 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -619,6 +619,32 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, { struct anv_meta_saved_state saved_state; + /* Figure out whether or not we actually need to clear anything to avoid + * trashing state when clearing is a no-op. + */ + bool needs_clear = false; + for (uint32_t a = 0; a < pass->attachment_count; ++a) { + struct anv_render_pass_attachment *att = &pass->attachments[a]; + + if (anv_format_is_color(att->format)) { + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + needs_clear = true; + break; + } + } else { + if ((att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) || + (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)) { + needs_clear = true; + break; + } + } + } + + if (!needs_clear) + return; + meta_clear_begin(&saved_state, cmd_buffer); for (uint32_t a = 0; a < pass->attachment_count; ++a) { -- cgit v1.2.3 From 0c59cb42b5ffb3288a498093673482bb5420aadd Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 25 Nov 2015 14:13:53 -0800 Subject: vk: Move all gen8 files to gen8 lib --- src/vulkan/Makefile.am | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 63574f4b150..64b5e90ae33 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -86,9 +86,6 @@ VULKAN_SOURCES = \ anv_util.c \ anv_wsi.c \ anv_wsi_x11.c \ - gen8_state.c \ - gen8_cmd_buffer.c \ - gen8_pipeline.c \ isl.c \ isl_format_layout.c @@ -111,8 +108,11 @@ libanv_gen75_la_SOURCES = \ gen7_state.c libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 -libanv_gen8_la_SOURCES = \ - genX_cmd_buffer.c +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 if HAVE_EGL_PLATFORM_WAYLAND -- cgit v1.2.3 From 0e02a88ad479e2c0fd06df995554e035e044d405 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 15 Sep 2015 12:26:24 -0700 Subject: vk: Add GEN9 pack header --- src/vulkan/gen9_pack.h | 9767 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 9767 insertions(+) create mode 100644 src/vulkan/gen9_pack.h (limited to 'src') diff --git a/src/vulkan/gen9_pack.h b/src/vulkan/gen9_pack.h new file mode 100644 index 00000000000..b9dc30ad01b --- /dev/null +++ b/src/vulkan/gen9_pack.h @@ -0,0 +1,9767 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for SKL. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include +#include + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN9_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_VS_length 0x00000002 + +struct GEN9_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 31) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 7 + +#define GEN9_3DSTATE_VS_length 0x00000009 + +#define __gen_prefix(name) GEN9_ ## name + +struct __gen_prefix(3DSTATE_VS) { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool VertexCacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN9_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 2, 2) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN9_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN9_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN9_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 + +struct GEN9_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN9_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN9_MI_ATOMIC_length_bias 0x00000002 +#define GEN9_MI_ATOMIC_header \ + .CommandType = 0, \ + .MICommandOpcode = 47 + +#define GEN9_MI_ATOMIC_length 0x00000003 + +struct __gen_prefix(MI_ATOMIC) { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; + uint32_t PostSyncOperation; +#define DWORD 0 +#define QWORD 1 +#define OCTWORD 2 +#define RESERVED 3 + uint32_t DataSize; + uint32_t InlineData; + uint32_t CSSTALL; + uint32_t ReturnDataControl; + uint32_t ATOMICOPCODE; + uint32_t DwordLength; + __gen_address_type MemoryAddress; + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; +}; + +static inline void +GEN9_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_ATOMIC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->DataSize, 19, 20) | + __gen_field(values->InlineData, 18, 18) | + __gen_field(values->CSSTALL, 17, 17) | + __gen_field(values->ReturnDataControl, 16, 16) | + __gen_field(values->ATOMICOPCODE, 8, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->Operand1DataDword0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Operand2DataDword0, 0, 31) | + 0; + + dw[5] = + __gen_field(values->Operand1DataDword1, 0, 31) | + 0; + + dw[6] = + __gen_field(values->Operand2DataDword1, 0, 31) | + 0; + + dw[7] = + __gen_field(values->Operand1DataDword2, 0, 31) | + 0; + + dw[8] = + __gen_field(values->Operand2DataDword2, 0, 31) | + 0; + + dw[9] = + __gen_field(values->Operand1DataDword3, 0, 31) | + 0; + + dw[10] = + __gen_field(values->Operand2DataDword3, 0, 31) | + 0; + +} + +#define GEN9_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN9_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 1 + +#define GEN9_MI_BATCH_BUFFER_START_length 0x00000003 + +struct GEN9_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Firstlevelbatch 0 +#define Secondlevelbatch 1 + uint32_t SecondLevelBatchBuffer; + bool AddOffsetEnable; + uint32_t PredicationEnable; + bool ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN9_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SecondLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 2 + +#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000004 + +struct GEN9_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; +#define CompareMaskModeDisabled 0 +#define CompareMaskModeEnabled 1 + uint32_t CompareMaskMode; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->CompareMaskMode, 19, 19) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_FORCE_WAKEUP_length_bias 0x00000002 +#define GEN9_MI_FORCE_WAKEUP_header \ + .CommandType = 0, \ + .MICommandOpcode = 29, \ + .DwordLength = 0 + +#define GEN9_MI_FORCE_WAKEUP_length 0x00000002 + +struct GEN9_MI_FORCE_WAKEUP { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t MaskBits; + uint32_t ForceRenderAwake; + uint32_t ForceMediaAwake; +}; + +static inline void +GEN9_MI_FORCE_WAKEUP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_FORCE_WAKEUP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MaskBits, 16, 31) | + __gen_field(values->ForceRenderAwake, 1, 1) | + __gen_field(values->ForceMediaAwake, 0, 0) | + 0; + +} + +#define GEN9_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN9_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN9_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN9_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN9_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN9_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN9_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +#define GEN9_MI_LOAD_REGISTER_REG_length 0x00000003 + +struct GEN9_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN9_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_offset(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN9_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 +#define GEN9_MI_SEMAPHORE_SIGNAL_header \ + .CommandType = 0, \ + .MICommandOpcode = 27, \ + .DwordLength = 0 + +#define GEN9_MI_SEMAPHORE_SIGNAL_length 0x00000002 + +struct GEN9_MI_SEMAPHORE_SIGNAL { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t PostSyncOperation; +#define RCS 0 +#define VCS0 1 +#define BCS 2 +#define VECS 3 +#define VCS1 4 + uint32_t TargetEngineSelect; + uint32_t DwordLength; + uint32_t TargetContextID; +}; + +static inline void +GEN9_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SEMAPHORE_SIGNAL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->TargetEngineSelect, 15, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->TargetContextID, 0, 31) | + 0; + +} + +#define GEN9_MI_SEMAPHORE_WAIT_length_bias 0x00000002 +#define GEN9_MI_SEMAPHORE_WAIT_header \ + .CommandType = 0, \ + .MICommandOpcode = 28, \ + .DwordLength = 2 + +#define GEN9_MI_SEMAPHORE_WAIT_length 0x00000004 + +struct GEN9_MI_SEMAPHORE_WAIT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; +#define PollingMode 1 +#define SignalMode 0 + uint32_t WaitMode; +#define SAD_GREATER_THAN_SDD 0 +#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 +#define SAD_LESS_THAN_SDD 2 +#define SAD_LESS_THAN_OR_EQUAL_SDD 3 +#define SAD_EQUAL_SDD 4 +#define SAD_NOT_EQUAL_SDD 5 + uint32_t CompareOperation; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; + __gen_address_type SemaphoreAddress; +}; + +static inline void +GEN9_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SEMAPHORE_WAIT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->WaitMode, 15, 15) | + __gen_field(values->CompareOperation, 12, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN9_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN9_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN9_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + bool StoreQword; + uint32_t DwordLength; + __gen_address_type Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN9_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->StoreQword, 21, 21) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->Address, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN9_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN9_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 2 + +#define GEN9_MI_STORE_REGISTER_MEM_length 0x00000004 + +struct GEN9_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN9_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN9_PIPELINE_SELECT_length 0x00000001 + +struct GEN9_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t MaskBits; + uint32_t ForceMediaAwake; + uint32_t MediaSamplerDOPClockGateEnable; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN9_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->MaskBits, 8, 15) | + __gen_field(values->ForceMediaAwake, 5, 5) | + __gen_field(values->MediaSamplerDOPClockGateEnable, 4, 4) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN9_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN9_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 17 + +#define GEN9_STATE_BASE_ADDRESS_length 0x00000013 + +#define GEN9_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN9_MEMORY_OBJECT_CONTROL_STATE { + uint32_t IndextoMOCSTables; +}; + +static inline void +GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->IndextoMOCSTables, 1, 6) | + 0; + +} + +struct GEN9_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + bool GeneralStateBaseAddressModifyEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + __gen_address_type SurfaceStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + uint32_t GeneralStateBufferSize; + bool GeneralStateBufferSizeModifyEnable; + uint32_t DynamicStateBufferSize; + bool DynamicStateBufferSizeModifyEnable; + uint32_t IndirectObjectBufferSize; + bool IndirectObjectBufferSizeModifyEnable; + uint32_t InstructionBufferSize; + bool InstructionBuffersizeModifyEnable; + __gen_address_type BindlessSurfaceStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE BindlessSurfaceStateMemoryObjectControlState; + bool BindlessSurfaceStateBaseAddressModifyEnable; + uint32_t BindlessSurfaceStateSize; +}; + +static inline void +GEN9_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + dw[3] = + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | + 0; + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw6 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw8 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw_InstructionMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw10 = + __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + __gen_field(values->GeneralStateBufferSize, 12, 31) | + __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[13] = + __gen_field(values->DynamicStateBufferSize, 12, 31) | + __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[14] = + __gen_field(values->IndirectObjectBufferSize, 12, 31) | + __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | + 0; + + dw[15] = + __gen_field(values->InstructionBufferSize, 12, 31) | + __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | + 0; + + uint32_t dw_BindlessSurfaceStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_BindlessSurfaceStateMemoryObjectControlState, &values->BindlessSurfaceStateMemoryObjectControlState); + uint32_t dw16 = + __gen_field(dw_BindlessSurfaceStateMemoryObjectControlState, 4, 10) | + __gen_field(values->BindlessSurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw16 = + __gen_combine_address(data, &dw[16], values->BindlessSurfaceStateBaseAddress, dw16); + + dw[16] = qw16; + dw[17] = qw16 >> 32; + + dw[18] = + __gen_field(values->BindlessSurfaceStateSize, 12, 31) | + 0; + +} + +#define GEN9_STATE_PREFETCH_length_bias 0x00000002 +#define GEN9_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN9_STATE_PREFETCH_length 0x00000002 + +struct GEN9_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN9_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN9_STATE_SIP_length_bias 0x00000002 +#define GEN9_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 1 + +#define GEN9_STATE_SIP_length 0x00000003 + +struct GEN9_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t SystemInstructionPointer; +}; + +static inline void +GEN9_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->SystemInstructionPointer, 4, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN9_3DPRIMITIVE_length_bias 0x00000002 +#define GEN9_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN9_3DPRIMITIVE_length 0x00000007 + +struct GEN9_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN9_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN9_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN9_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AAPointCoverageBias; + float AACoverageBias; + float AAPointCoverageSlope; + float AACoverageSlope; + float AAPointCoverageEndCapBias; + float AACoverageEndCapBias; + float AAPointCoverageEndCapSlope; + float AACoverageEndCapSlope; +}; + +static inline void +GEN9_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN9_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +struct GEN9_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN9_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 + +struct GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; +#define NoValidData 0 + uint32_t BindingTablePoolBufferSize; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + __gen_field(dw_SurfaceObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BindingTablePoolBufferSize, 12, 31) | + 0; + +} + +#define GEN9_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN9_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN9_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; + bool BlendStatePointerValid; +}; + +static inline void +GEN9_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_field(values->BlendStatePointerValid, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN9_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN9_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; + bool ColorCalcStatePointerValid; +}; + +static inline void +GEN9_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_field(values->ColorCalcStatePointerValid, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN9_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN9_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN9_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN9_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN9_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN9_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_float(values->DepthClearValue) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN9_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_CLIP_length 0x00000004 + +struct GEN9_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceCullTestEnableBitmask; +#define _8Bit 0 +#define _4Bit 1 + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceClipTestEnableBitmask; +#define Normal 0 +#define Force 1 + bool ForceClipMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define API_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define NORMAL 0 +#define REJECT_ALL 3 +#define ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN9_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | + __gen_field(values->ForceClipMode, 16, 16) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN9_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_DS_length 0x0000000b + +#define GEN9_3DSTATE_CONSTANT_BODY_length 0x0000000a + +struct GEN9_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint32_t dw4 = + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw6 = + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + +} + +struct GEN9_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_GS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_HS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_PS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_VS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 6 + +#define GEN9_3DSTATE_DEPTH_BUFFER_length 0x00000008 + +struct GEN9_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_2D 1 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; +#define NONE 0 +#define TILEYF 1 +#define TILEYS 2 + uint32_t TiledResourceMode; + uint32_t MipTailStartLOD; + uint32_t RenderTargetViewExtent; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN9_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[5] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 6) | + 0; + + dw[6] = + __gen_field(values->TiledResourceMode, 30, 31) | + __gen_field(values->MipTailStartLOD, 26, 29) | + 0; + + dw[7] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN9_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN9_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN9_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_DS_length 0x0000000b + +struct GEN9_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; +#define SIMD4X2 0 +#define SIMD8_SINGLE_PATCH 1 +#define SIMD8_SINGLE_OR_DUAL_PATCH 2 + uint32_t DispatchMode; + bool ComputeWCoordinateEnable; + bool CacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; + uint64_t DUAL_PATCHKernelStartPointer; +}; + +static inline void +GEN9_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->DispatchMode, 3, 4) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->CacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + uint64_t qw9 = + __gen_offset(values->DUAL_PATCHKernelStartPointer, 6, 63) | + 0; + + dw[9] = qw9; + dw[10] = qw9 >> 32; + +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +#define GEN9_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN9_GATHER_CONSTANT_ENTRY_length 0x00000001 + +struct GEN9_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN9_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN9_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +#define GEN9_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +#define GEN9_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +#define GEN9_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + bool DX9OnDieRegisterReadEnable; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + __gen_field(values->DX9OnDieRegisterReadEnable, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +#define GEN9_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + bool DX9OnDieRegisterReadEnable; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + __gen_field(values->DX9OnDieRegisterReadEnable, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 + +struct GEN9_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + bool GatherPoolEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t GatherPoolBufferSize; +}; + +static inline void +GEN9_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->GatherPoolBufferSize, 12, 31) | + 0; + +} + +#define GEN9_3DSTATE_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 8 + +#define GEN9_3DSTATE_GS_length 0x0000000a + +struct GEN9_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ExpectedVertexCount; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData54; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamId; +#define DispatchModeSingle 0 +#define DispatchModeDualInstance 1 +#define DispatchModeDualObject 2 +#define DispatchModeSIMD8 3 + uint32_t DispatchMode; + bool StatisticsEnable; + uint32_t InvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool DiscardAdjacency; + bool Enable; +#define CUT 0 +#define SID 1 + uint32_t ControlDataFormat; + bool StaticOutput; + uint32_t StaticOutputVertexCount; + uint32_t MaximumNumberofThreads; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN9_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->ExpectedVertexCount, 0, 5) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData54, 29, 30) | + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | + 0; + + dw[7] = + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamId, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->InvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->Enable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_field(values->StaticOutput, 30, 30) | + __gen_field(values->StaticOutputVertexCount, 16, 26) | + __gen_field(values->MaximumNumberofThreads, 0, 8) | + 0; + + dw[9] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 + +struct GEN9_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN9_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 7 + +#define GEN9_3DSTATE_HS_length 0x00000009 + +struct GEN9_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + bool Enable; + bool StatisticsEnable; + uint32_t MaximumNumberofThreads; + uint32_t InstanceCount; + uint64_t KernelStartPointer; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData5; + bool SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool AccessesUAV; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; +#define SINGLE_PATCH 0 +#define DUAL_PATCH 1 +#define _8_PATCH 2 + uint32_t DispatchMode; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + bool IncludePrimitiveID; +}; + +static inline void +GEN9_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->MaximumNumberofThreads, 8, 16) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + uint64_t qw3 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = qw3; + dw[4] = qw3 >> 32; + + uint64_t qw5 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForURBData5, 28, 28) | + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->AccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->DispatchMode, 17, 18) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->IncludePrimitiveID, 0, 0) | + 0; + + dw[8] = + 0; + +} + +#define GEN9_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_INDEX_BUFFER_length 0x00000005 + +struct GEN9_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN9_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN9_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN9_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN9_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN9_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN9_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN9_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN9_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN9_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN9_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_MULTISAMPLE_length 0x00000002 + +struct GEN9_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelPositionOffsetEnable; +#define CENTER 0 +#define UL_CORNER 1 + uint32_t PixelLocation; + uint32_t NumberofMultisamples; +}; + +static inline void +GEN9_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelPositionOffsetEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + +} + +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN9_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN9_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN9_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN9_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN9_3DSTATE_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 10 + +#define GEN9_3DSTATE_PS_length 0x0000000c + +struct GEN9_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; +#define FlushedtoZero 0 +#define Retained 1 + uint32_t SinglePrecisionDenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreadsPerPSD; + bool PushConstantEnable; + bool RenderTargetFastClearEnable; +#define RESOLVE_DISABLED 0 +#define RESOLVE_PARTIAL 1 +#define RESOLVE_FULL 3 + uint32_t RenderTargetResolveType; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + uint64_t KernelStartPointer1; + uint64_t KernelStartPointer2; +}; + +static inline void +GEN9_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer0, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->RenderTargetResolveType, 6, 7) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | + 0; + + uint64_t qw8 = + __gen_offset(values->KernelStartPointer1, 6, 63) | + 0; + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint64_t qw10 = + __gen_offset(values->KernelStartPointer2, 6, 63) | + 0; + + dw[10] = qw10; + dw[11] = qw10 >> 32; + +} + +#define GEN9_3DSTATE_PS_BLEND_length_bias 0x00000002 +#define GEN9_3DSTATE_PS_BLEND_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 77, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PS_BLEND_length 0x00000002 + +struct GEN9_3DSTATE_PS_BLEND { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool AlphaToCoverageEnable; + bool HasWriteableRT; + bool ColorBufferBlendEnable; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaTestEnable; + bool IndependentAlphaBlendEnable; +}; + +static inline void +GEN9_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PS_BLEND * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->HasWriteableRT, 30, 30) | + __gen_field(values->ColorBufferBlendEnable, 29, 29) | + __gen_field(values->SourceAlphaBlendFactor, 24, 28) | + __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | + __gen_field(values->SourceBlendFactor, 14, 18) | + __gen_field(values->DestinationBlendFactor, 9, 13) | + __gen_field(values->AlphaTestEnable, 8, 8) | + __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | + 0; + +} + +#define GEN9_3DSTATE_PS_EXTRA_length_bias 0x00000002 +#define GEN9_3DSTATE_PS_EXTRA_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 79, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PS_EXTRA_length 0x00000002 + +struct GEN9_3DSTATE_PS_EXTRA { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool PixelShaderValid; + bool PixelShaderDoesnotwritetoRT; + bool oMaskPresenttoRenderTarget; + bool PixelShaderKillsPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; + bool ForceComputedDepth; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; + uint32_t Removed; + bool AttributeEnable; + bool PixelShaderDisablesAlphaToCoverage; + bool PixelShaderIsPerSample; + bool PixelShaderComputesStencil; + bool PixelShaderPullsBary; + bool PixelShaderHasUAV; +#define ICMS_NONE 0 +#define ICMS_NORMAL 1 +#define ICMS_INNER_CONSERVATIVE 2 +#define ICMS_DEPTH_COVERAGE 3 + uint32_t InputCoverageMaskState; +}; + +static inline void +GEN9_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PS_EXTRA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelShaderValid, 31, 31) | + __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | + __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | + __gen_field(values->PixelShaderKillsPixel, 28, 28) | + __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | + __gen_field(values->ForceComputedDepth, 25, 25) | + __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | + __gen_field(values->PixelShaderUsesSourceW, 23, 23) | + __gen_field(values->Removed, 17, 17) | + __gen_field(values->AttributeEnable, 8, 8) | + __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | + __gen_field(values->PixelShaderIsPerSample, 6, 6) | + __gen_field(values->PixelShaderComputesStencil, 5, 5) | + __gen_field(values->PixelShaderPullsBary, 3, 3) | + __gen_field(values->PixelShaderHasUAV, 2, 2) | + __gen_field(values->InputCoverageMaskState, 0, 1) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_RASTER_length_bias 0x00000002 +#define GEN9_3DSTATE_RASTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 80, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_RASTER_length 0x00000005 + +struct GEN9_3DSTATE_RASTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ViewportZFarClipTestEnable; + bool ConservativeRasterizationEnable; +#define DX9OGL 0 +#define DX100 1 +#define DX101 2 + uint32_t APIMode; +#define Clockwise 0 +#define CounterClockwise 1 + uint32_t FrontWinding; +#define FSC_NUMRASTSAMPLES_0 0 +#define FSC_NUMRASTSAMPLES_1 1 +#define FSC_NUMRASTSAMPLES_2 2 +#define FSC_NUMRASTSAMPLES_4 3 +#define FSC_NUMRASTSAMPLES_8 4 +#define FSC_NUMRASTSAMPLES_16 5 + uint32_t ForcedSampleCount; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; +#define Normal 0 +#define Force 1 + uint32_t ForceMultisampling; + bool SmoothPointEnable; + bool DXMultisampleRasterizationEnable; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t DXMultisampleRasterizationMode; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool AntialiasingEnable; + bool ScissorRectangleEnable; + bool ViewportZNearClipTestEnable; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN9_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_RASTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ViewportZFarClipTestEnable, 26, 26) | + __gen_field(values->ConservativeRasterizationEnable, 24, 24) | + __gen_field(values->APIMode, 22, 23) | + __gen_field(values->FrontWinding, 21, 21) | + __gen_field(values->ForcedSampleCount, 18, 20) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ForceMultisampling, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | + __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->AntialiasingEnable, 2, 2) | + __gen_field(values->ScissorRectangleEnable, 1, 1) | + __gen_field(values->ViewportZNearClipTestEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[3] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN9_3DSTATE_RS_CONSTANT_POINTER_length_bias 0x00000002 +#define GEN9_3DSTATE_RS_CONSTANT_POINTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 84, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_RS_CONSTANT_POINTER_length 0x00000004 + +struct GEN9_3DSTATE_RS_CONSTANT_POINTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define VS 0 +#define PS 4 + uint32_t ShaderSelect; +#define RS_STORE 0 +#define RS_LOAD 1 + uint32_t OperationLoadorStore; + __gen_address_type GlobalConstantBufferAddress; + __gen_address_type GlobalConstantBufferAddressHigh; +}; + +static inline void +GEN9_3DSTATE_RS_CONSTANT_POINTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_RS_CONSTANT_POINTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ShaderSelect, 28, 30) | + __gen_field(values->OperationLoadorStore, 12, 12) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->GlobalConstantBufferAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->GlobalConstantBufferAddressHigh, dw3); + +} + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN9_PALETTE_ENTRY_length 0x00000001 + +struct GEN9_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN9_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN9_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLE_PATTERN_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 7 + +#define GEN9_3DSTATE_SAMPLE_PATTERN_length 0x00000009 + +struct GEN9_3DSTATE_SAMPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float _16xSample3XOffset; + float _16xSample3YOffset; + float _16xSample2XOffset; + float _16xSample2YOffset; + float _16xSample1XOffset; + float _16xSample1YOffset; + float _16xSample0XOffset; + float _16xSample0YOffset; + float _16xSample7XOffset; + float _16xSample7YOffset; + float _16xSample6XOffset; + float _16xSample6YOffset; + float _16xSample5XOffset; + float _16xSample5YOffset; + float _16xSample4XOffset; + float _16xSample4YOffset; + float _16xSample11XOffset; + float _16xSample11YOffset; + float _16xSample10XOffset; + float _16xSample10YOffset; + float _16xSample9XOffset; + float _16xSample9YOffset; + float _16xSample8XOffset; + float _16xSample8YOffset; + float _16xSample15XOffset; + float _16xSample15YOffset; + float _16xSample14XOffset; + float _16xSample14YOffset; + float _16xSample13XOffset; + float _16xSample13YOffset; + float _16xSample12XOffset; + float _16xSample12YOffset; + float _8xSample7XOffset; + float _8xSample7YOffset; + float _8xSample6XOffset; + float _8xSample6YOffset; + float _8xSample5XOffset; + float _8xSample5YOffset; + float _8xSample4XOffset; + float _8xSample4YOffset; + float _8xSample3XOffset; + float _8xSample3YOffset; + float _8xSample2XOffset; + float _8xSample2YOffset; + float _8xSample1XOffset; + float _8xSample1YOffset; + float _8xSample0XOffset; + float _8xSample0YOffset; + float _4xSample3XOffset; + float _4xSample3YOffset; + float _4xSample2XOffset; + float _4xSample2YOffset; + float _4xSample1XOffset; + float _4xSample1YOffset; + float _4xSample0XOffset; + float _4xSample0YOffset; + float _1xSample0XOffset; + float _1xSample0YOffset; + float _2xSample1XOffset; + float _2xSample1YOffset; + float _2xSample0XOffset; + float _2xSample0YOffset; +}; + +static inline void +GEN9_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->_16xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[2] = + __gen_field(values->_16xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->_16xSample11XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample11YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample10XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample10YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample9XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample9YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample8XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample8YOffset * (1 << 4), 0, 3) | + 0; + + dw[4] = + __gen_field(values->_16xSample15XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample15YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample14XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample14YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample13XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample13YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample12XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample12YOffset * (1 << 4), 0, 3) | + 0; + + dw[5] = + __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[6] = + __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[7] = + __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[8] = + __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | + __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | + __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN9_3DSTATE_SBE_length_bias 0x00000002 +#define GEN9_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 4 + +#define GEN9_3DSTATE_SBE_length 0x00000006 + +struct GEN9_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ForceVertexURBEntryReadLength; + bool ForceVertexURBEntryReadOffset; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + bool PrimitiveIDOverrideComponentW; + bool PrimitiveIDOverrideComponentZ; + bool PrimitiveIDOverrideComponentY; + bool PrimitiveIDOverrideComponentX; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t PrimitiveIDOverrideAttributeSelect; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable; + uint32_t Attribute15ActiveComponentFormat; + uint32_t Attribute14ActiveComponentFormat; + uint32_t Attribute13ActiveComponentFormat; + uint32_t Attribute12ActiveComponentFormat; + uint32_t Attribute11ActiveComponentFormat; + uint32_t Attribute10ActiveComponentFormat; + uint32_t Attribute9ActiveComponentFormat; + uint32_t Attribute8ActiveComponentFormat; + uint32_t Attribute7ActiveComponentFormat; + uint32_t Attribute6ActiveComponentFormat; + uint32_t Attribute5ActiveComponentFormat; + uint32_t Attribute4ActiveComponentFormat; + uint32_t Attribute3ActiveComponentFormat; + uint32_t Attribute2ActiveComponentFormat; + uint32_t Attribute1ActiveComponentFormat; + uint32_t Attribute0ActiveComponentFormat; + uint32_t Attribute31ActiveComponentFormat; + uint32_t Attribute30ActiveComponentFormat; + uint32_t Attribute29ActiveComponentFormat; + uint32_t Attribute28ActiveComponentFormat; + uint32_t Attribute27ActiveComponentFormat; + uint32_t Attribute26ActiveComponentFormat; + uint32_t Attribute25ActiveComponentFormat; + uint32_t Attribute24ActiveComponentFormat; + uint32_t Attribute23ActiveComponentFormat; + uint32_t Attribute22ActiveComponentFormat; + uint32_t Attribute21ActiveComponentFormat; + uint32_t Attribute20ActiveComponentFormat; + uint32_t Attribute19ActiveComponentFormat; + uint32_t Attribute18ActiveComponentFormat; + uint32_t Attribute17ActiveComponentFormat; + uint32_t Attribute16ActiveComponentFormat; +}; + +static inline void +GEN9_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | + __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | + __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | + __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | + __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 5, 10) | + __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | + 0; + + dw[2] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ConstantInterpolationEnable, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Attribute15ActiveComponentFormat, 30, 31) | + __gen_field(values->Attribute14ActiveComponentFormat, 28, 29) | + __gen_field(values->Attribute13ActiveComponentFormat, 26, 27) | + __gen_field(values->Attribute12ActiveComponentFormat, 24, 25) | + __gen_field(values->Attribute11ActiveComponentFormat, 22, 23) | + __gen_field(values->Attribute10ActiveComponentFormat, 20, 21) | + __gen_field(values->Attribute9ActiveComponentFormat, 18, 19) | + __gen_field(values->Attribute8ActiveComponentFormat, 16, 17) | + __gen_field(values->Attribute7ActiveComponentFormat, 14, 15) | + __gen_field(values->Attribute6ActiveComponentFormat, 12, 13) | + __gen_field(values->Attribute5ActiveComponentFormat, 10, 11) | + __gen_field(values->Attribute4ActiveComponentFormat, 8, 9) | + __gen_field(values->Attribute3ActiveComponentFormat, 6, 7) | + __gen_field(values->Attribute2ActiveComponentFormat, 4, 5) | + __gen_field(values->Attribute1ActiveComponentFormat, 2, 3) | + __gen_field(values->Attribute0ActiveComponentFormat, 0, 1) | + 0; + + dw[5] = + __gen_field(values->Attribute31ActiveComponentFormat, 30, 31) | + __gen_field(values->Attribute30ActiveComponentFormat, 28, 29) | + __gen_field(values->Attribute29ActiveComponentFormat, 26, 27) | + __gen_field(values->Attribute28ActiveComponentFormat, 24, 25) | + __gen_field(values->Attribute27ActiveComponentFormat, 22, 23) | + __gen_field(values->Attribute26ActiveComponentFormat, 20, 21) | + __gen_field(values->Attribute25ActiveComponentFormat, 18, 19) | + __gen_field(values->Attribute24ActiveComponentFormat, 16, 17) | + __gen_field(values->Attribute23ActiveComponentFormat, 14, 15) | + __gen_field(values->Attribute22ActiveComponentFormat, 12, 13) | + __gen_field(values->Attribute21ActiveComponentFormat, 10, 11) | + __gen_field(values->Attribute20ActiveComponentFormat, 8, 9) | + __gen_field(values->Attribute19ActiveComponentFormat, 6, 7) | + __gen_field(values->Attribute18ActiveComponentFormat, 4, 5) | + __gen_field(values->Attribute17ActiveComponentFormat, 2, 3) | + __gen_field(values->Attribute16ActiveComponentFormat, 0, 1) | + 0; + +} + +#define GEN9_3DSTATE_SBE_SWIZ_length_bias 0x00000002 +#define GEN9_3DSTATE_SBE_SWIZ_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 81, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_SBE_SWIZ_length 0x0000000b + +#define GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 + +struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL { + bool ComponentOverrideW; + bool ComponentOverrideZ; + bool ComponentOverrideY; + bool ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + +struct GEN9_3DSTATE_SBE_SWIZ { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; + uint32_t AttributeWrapShortestEnables[16]; +}; + +static inline void +GEN9_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SBE_SWIZ * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { + uint32_t dw_Attribute0; + GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); + uint32_t dw_Attribute1; + GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); + dw[j] = + __gen_field(dw_Attribute0, 0, 15) | + __gen_field(dw_Attribute1, 16, 31) | + 0; + } + + for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { + dw[j] = + __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | + __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | + __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | + __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | + __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | + __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | + __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | + __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | + 0; + } + +} + +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN9_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN9_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SF_length_bias 0x00000002 +#define GEN9_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_SF_length 0x00000004 + +struct GEN9_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float LineWidth; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool ViewportTransformEnable; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; + bool LastPixelEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + bool SmoothPointEnable; + uint32_t VertexSubPixelPrecisionSelect; +#define Vertex 0 +#define State 1 + uint32_t PointWidthSource; + float PointWidth; +}; + +static inline void +GEN9_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->LineWidth * (1 << 7), 12, 29) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ViewportTransformEnable, 1, 1) | + 0; + + dw[2] = + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->PointWidthSource, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + +} + +#define GEN9_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 6 + +#define GEN9_3DSTATE_SO_BUFFER_length 0x00000008 + +struct GEN9_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool SOBufferEnable; + uint32_t SOBufferIndex; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + bool StreamOffsetWriteEnable; + bool StreamOutputBufferOffsetAddressEnable; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceSize; + __gen_address_type StreamOutputBufferOffsetAddress; + uint32_t StreamOffset; +}; + +static inline void +GEN9_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferEnable, 31, 31) | + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 22, 28) | + __gen_field(values->StreamOffsetWriteEnable, 21, 21) | + __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceSize, 0, 29) | + 0; + + uint32_t dw5 = + 0; + + uint64_t qw5 = + __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->StreamOffset, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN9_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN9_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN9_SO_DECL_ENTRY_length 0x00000002 + +#define GEN9_SO_DECL_length 0x00000001 + +struct GEN9_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN9_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN9_SO_DECL_ENTRY { + struct GEN9_SO_DECL Stream3Decl; + struct GEN9_SO_DECL Stream2Decl; + struct GEN9_SO_DECL Stream1Decl; + struct GEN9_SO_DECL Stream0Decl; +}; + +static inline void +GEN9_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN9_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN9_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN9_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN9_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN9_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_STENCIL_BUFFER_length 0x00000005 + +struct GEN9_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN9_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + __gen_field(dw_StencilBufferObjectControlState, 22, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN9_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_STREAMOUT_length 0x00000005 + +struct GEN9_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t APIRenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; +#define Normal 0 +#define Resreved 1 +#define Force_Off 2 +#define Force_on 3 + uint32_t ForceRendering; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; + uint32_t Buffer1SurfacePitch; + uint32_t Buffer0SurfacePitch; + uint32_t Buffer3SurfacePitch; + uint32_t Buffer2SurfacePitch; +}; + +static inline void +GEN9_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->APIRenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->ForceRendering, 23, 24) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + + dw[3] = + __gen_field(values->Buffer1SurfacePitch, 16, 27) | + __gen_field(values->Buffer0SurfacePitch, 0, 11) | + 0; + + dw[4] = + __gen_field(values->Buffer3SurfacePitch, 16, 27) | + __gen_field(values->Buffer2SurfacePitch, 0, 11) | + 0; + +} + +#define GEN9_3DSTATE_TE_length_bias 0x00000002 +#define GEN9_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_TE_length 0x00000004 + +struct GEN9_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN9_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN9_3DSTATE_URB_CLEAR_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_CLEAR_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_CLEAR_length 0x00000002 + +struct GEN9_3DSTATE_URB_CLEAR { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN9_3DSTATE_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_offset(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_DS_length 0x00000002 + +struct GEN9_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 31) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_GS_length 0x00000002 + +struct GEN9_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 31) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_HS_length 0x00000002 + +struct GEN9_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 31) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN9_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN9_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN9_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN9_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN9_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(dw_MemoryObjectControlState, 16, 22) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +struct GEN9_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN9_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN9_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN9_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN9_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN9_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN9_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_VF_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VF_length 0x00000002 + +struct GEN9_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool SequentialDrawCutIndexEnable; + bool ComponentPackingEnable; + bool IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN9_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->SequentialDrawCutIndexEnable, 10, 10) | + __gen_field(values->ComponentPackingEnable, 9, 9) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_VF_COMPONENT_PACKING_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_COMPONENT_PACKING_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 85, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_VF_COMPONENT_PACKING_length 0x00000005 + +struct GEN9_3DSTATE_VF_COMPONENT_PACKING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VertexElement07Enables; + uint32_t VertexElement06Enables; + uint32_t VertexElement05Enables; + uint32_t VertexElement04Enables; + uint32_t VertexElement03Enables; + uint32_t VertexElement02Enables; + uint32_t VertexElement01Enables; + uint32_t VertexElement00Enables; + uint32_t VertexElement15Enables; + uint32_t VertexElement14Enables; + uint32_t VertexElement13Enables; + uint32_t VertexElement12Enables; + uint32_t VertexElement11Enables; + uint32_t VertexElement10Enables; + uint32_t VertexElement09Enables; + uint32_t VertexElement08Enables; + uint32_t VertexElement23Enables; + uint32_t VertexElement22Enables; + uint32_t VertexElement21Enables; + uint32_t VertexElement20Enables; + uint32_t VertexElement19Enables; + uint32_t VertexElement18Enables; + uint32_t VertexElement17Enables; + uint32_t VertexElement16Enables; + uint32_t VertexElement31Enables; + uint32_t VertexElement30Enables; + uint32_t VertexElement29Enables; + uint32_t VertexElement28Enables; + uint32_t VertexElement27Enables; + uint32_t VertexElement26Enables; + uint32_t VertexElement25Enables; + uint32_t VertexElement24Enables; +}; + +static inline void +GEN9_3DSTATE_VF_COMPONENT_PACKING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_COMPONENT_PACKING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VertexElement07Enables, 28, 31) | + __gen_field(values->VertexElement06Enables, 24, 27) | + __gen_field(values->VertexElement05Enables, 20, 23) | + __gen_field(values->VertexElement04Enables, 16, 19) | + __gen_field(values->VertexElement03Enables, 12, 15) | + __gen_field(values->VertexElement02Enables, 8, 11) | + __gen_field(values->VertexElement01Enables, 4, 7) | + __gen_field(values->VertexElement00Enables, 0, 3) | + 0; + + dw[2] = + __gen_field(values->VertexElement15Enables, 28, 31) | + __gen_field(values->VertexElement14Enables, 24, 27) | + __gen_field(values->VertexElement13Enables, 20, 23) | + __gen_field(values->VertexElement12Enables, 16, 19) | + __gen_field(values->VertexElement11Enables, 12, 15) | + __gen_field(values->VertexElement10Enables, 8, 11) | + __gen_field(values->VertexElement09Enables, 4, 7) | + __gen_field(values->VertexElement08Enables, 0, 3) | + 0; + + dw[3] = + __gen_field(values->VertexElement23Enables, 28, 31) | + __gen_field(values->VertexElement22Enables, 24, 27) | + __gen_field(values->VertexElement21Enables, 20, 23) | + __gen_field(values->VertexElement20Enables, 16, 19) | + __gen_field(values->VertexElement19Enables, 12, 15) | + __gen_field(values->VertexElement18Enables, 8, 11) | + __gen_field(values->VertexElement17Enables, 4, 7) | + __gen_field(values->VertexElement16Enables, 0, 3) | + 0; + + dw[4] = + __gen_field(values->VertexElement31Enables, 28, 31) | + __gen_field(values->VertexElement30Enables, 24, 27) | + __gen_field(values->VertexElement29Enables, 20, 23) | + __gen_field(values->VertexElement28Enables, 16, 19) | + __gen_field(values->VertexElement27Enables, 12, 15) | + __gen_field(values->VertexElement26Enables, 8, 11) | + __gen_field(values->VertexElement25Enables, 4, 7) | + __gen_field(values->VertexElement24Enables, 0, 3) | + 0; + +} + +#define GEN9_3DSTATE_VF_INSTANCING_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_INSTANCING_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 73, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_VF_INSTANCING_length 0x00000003 + +struct GEN9_3DSTATE_VF_INSTANCING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstancingEnable; + uint32_t VertexElementIndex; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN9_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_INSTANCING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstancingEnable, 8, 8) | + __gen_field(values->VertexElementIndex, 0, 5) | + 0; + + dw[2] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_VF_SGVS_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_SGVS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 74, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VF_SGVS_length 0x00000002 + +struct GEN9_3DSTATE_VF_SGVS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstanceIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t InstanceIDComponentNumber; + uint32_t InstanceIDElementOffset; + bool VertexIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t VertexIDComponentNumber; + uint32_t VertexIDElementOffset; +}; + +static inline void +GEN9_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_SGVS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstanceIDEnable, 31, 31) | + __gen_field(values->InstanceIDComponentNumber, 29, 30) | + __gen_field(values->InstanceIDElementOffset, 16, 21) | + __gen_field(values->VertexIDEnable, 15, 15) | + __gen_field(values->VertexIDComponentNumber, 13, 14) | + __gen_field(values->VertexIDElementOffset, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN9_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN9_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN9_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN9_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_TOPOLOGY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 75, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VF_TOPOLOGY_length 0x00000002 + +struct GEN9_3DSTATE_VF_TOPOLOGY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PrimitiveTopologyType; +}; + +static inline void +GEN9_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_TOPOLOGY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN9_3DSTATE_WM_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_WM_length 0x00000002 + +struct GEN9_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool LegacyDepthBufferClearEnable; + bool LegacyDepthBufferResolveEnable; + bool LegacyHierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; +#define NORMAL 0 +#define PSEXEC 1 +#define PREPS 2 + uint32_t EarlyDepthStencilControl; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceThreadDispatchEnable; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineAntialiasingRegionWidth; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceKillPixelEnable; +}; + +static inline void +GEN9_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | + __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | + __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->ForceThreadDispatchEnable, 19, 20) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->ForceKillPixelEnable, 0, 1) | + 0; + +} + +#define GEN9_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_CHROMAKEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 76, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_WM_CHROMAKEY_length 0x00000002 + +struct GEN9_3DSTATE_WM_CHROMAKEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ChromaKeyKillEnable; +}; + +static inline void +GEN9_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM_CHROMAKEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyKillEnable, 31, 31) | + 0; + +} + +#define GEN9_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_DEPTH_STENCIL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 78, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_WM_DEPTH_STENCIL_length 0x00000004 + +struct GEN9_3DSTATE_WM_DEPTH_STENCIL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t BackfaceStencilTestFunction; + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestFunction; + uint32_t DepthTestFunction; + bool DoubleSidedStencilEnable; + bool StencilTestEnable; + bool StencilBufferWriteEnable; + bool DepthTestEnable; + bool DepthBufferWriteEnable; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + uint32_t StencilReferenceValue; + uint32_t BackfaceStencilReferenceValue; +}; + +static inline void +GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM_DEPTH_STENCIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilFailOp, 29, 31) | + __gen_field(values->StencilPassDepthFailOp, 26, 28) | + __gen_field(values->StencilPassDepthPassOp, 23, 25) | + __gen_field(values->BackfaceStencilTestFunction, 20, 22) | + __gen_field(values->BackfaceStencilFailOp, 17, 19) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | + __gen_field(values->StencilTestFunction, 8, 10) | + __gen_field(values->DepthTestFunction, 5, 7) | + __gen_field(values->DoubleSidedStencilEnable, 4, 4) | + __gen_field(values->StencilTestEnable, 3, 3) | + __gen_field(values->StencilBufferWriteEnable, 2, 2) | + __gen_field(values->DepthTestEnable, 1, 1) | + __gen_field(values->DepthBufferWriteEnable, 0, 0) | + 0; + + dw[2] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[3] = + __gen_field(values->StencilReferenceValue, 8, 15) | + __gen_field(values->BackfaceStencilReferenceValue, 0, 7) | + 0; + +} + +#define GEN9_3DSTATE_WM_HZ_OP_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_HZ_OP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 82, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_WM_HZ_OP_length 0x00000005 + +struct GEN9_3DSTATE_WM_HZ_OP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StencilBufferClearEnable; + bool DepthBufferClearEnable; + bool ScissorRectangleEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + uint32_t PixelPositionOffsetEnable; + bool FullSurfaceDepthClear; + uint32_t StencilClearValue; + uint32_t NumberofMultisamples; + uint32_t ClearRectangleYMin; + uint32_t ClearRectangleXMin; + uint32_t ClearRectangleYMax; + uint32_t ClearRectangleXMax; + uint32_t SampleMask; +}; + +static inline void +GEN9_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM_HZ_OP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferClearEnable, 31, 31) | + __gen_field(values->DepthBufferClearEnable, 30, 30) | + __gen_field(values->ScissorRectangleEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->PixelPositionOffsetEnable, 26, 26) | + __gen_field(values->FullSurfaceDepthClear, 25, 25) | + __gen_field(values->StencilClearValue, 16, 23) | + __gen_field(values->NumberofMultisamples, 13, 15) | + 0; + + dw[2] = + __gen_field(values->ClearRectangleYMin, 16, 31) | + __gen_field(values->ClearRectangleXMin, 0, 15) | + 0; + + dw[3] = + __gen_field(values->ClearRectangleYMax, 16, 31) | + __gen_field(values->ClearRectangleXMax, 0, 15) | + 0; + + dw[4] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN9_GPGPU_WALKER_length_bias 0x00000002 +#define GEN9_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 5, \ + .DwordLength = 13 + +#define GEN9_GPGPU_WALKER_length 0x0000000f + +struct GEN9_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingResumeZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN9_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 6, 31) | + 0; + + dw[4] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[9] = + 0; + + dw[10] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | + 0; + + dw[12] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[13] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN9_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN9_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN9_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN9_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN9_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN9_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN9_MEDIA_OBJECT_length 0x00000000 + +struct GEN9_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + uint32_t SliceDestinationSelectMSBs; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define Subslice3 3 +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN9_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->SliceDestinationSelectMSBs, 25, 26) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MEDIA_OBJECT_GRPID_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_GRPID_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 6 + +#define GEN9_MEDIA_OBJECT_GRPID_length 0x00000000 + +struct GEN9_MEDIA_OBJECT_GRPID { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t SliceDestinationSelectMSB; + uint32_t EndofThreadGroup; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define Subslice3 3 +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoreboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + uint32_t GroupID; + /* variable length fields follow */ +}; + +static inline void +GEN9_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT_GRPID * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SliceDestinationSelectMSB, 24, 24) | + __gen_field(values->EndofThreadGroup, 23, 23) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoreboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->GroupID, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN9_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN9_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN9_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN9_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN9_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN9_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t MaskedDispatch; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t GroupIDLoopSelect; + bool ScoreboardMask; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN9_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->MaskedDispatch, 22, 23) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->GroupIDLoopSelect, 8, 31) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 27) | + __gen_field(values->LocalLoopExecCount, 0, 11) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 26) | + __gen_field(values->BlockResolutionX, 0, 10) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 26) | + __gen_field(values->LocalStartX, 0, 10) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 27) | + __gen_field(values->LocalOuterLoopStrideX, 0, 11) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 27) | + __gen_field(values->LocalInnerLoopUnitX, 0, 11) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 26) | + __gen_field(values->GlobalResolutionX, 0, 10) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 27) | + __gen_field(values->GlobalStartX, 0, 11) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 27) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 11) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 27) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 11) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN9_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN9_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN9_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + bool FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN9_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN9_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN9_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 7 + +#define GEN9_MEDIA_VFE_STATE_length 0x00000009 + +struct GEN9_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t ScratchSpaceBasePointerHigh; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; + uint32_t SliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN9_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + 0; + + dw[4] = + __gen_field(values->SliceDisable, 0, 1) | + 0; + + dw[5] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[6] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[7] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[8] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN9_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN9_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN9_MI_ARB_CHECK_length 0x00000001 + +struct GEN9_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN9_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN9_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN9_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_CLFLUSH_length_bias 0x00000002 +#define GEN9_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN9_MI_CLFLUSH_length 0x00000000 + +struct GEN9_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + /* variable length fields follow */ +}; + +static inline void +GEN9_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + /* variable length fields follow */ +} + +#define GEN9_MI_COPY_MEM_MEM_length_bias 0x00000002 +#define GEN9_MI_COPY_MEM_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 46, \ + .DwordLength = 3 + +#define GEN9_MI_COPY_MEM_MEM_length 0x00000005 + +struct GEN9_MI_COPY_MEM_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTSource; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTDestination; + uint32_t DwordLength; + __gen_address_type DestinationMemoryAddress; + __gen_address_type SourceMemoryAddress; +}; + +static inline void +GEN9_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_COPY_MEM_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTTSource, 22, 22) | + __gen_field(values->UseGlobalGTTDestination, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw3 = + 0; + + uint64_t qw3 = + __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + + dw[3] = qw3; + dw[4] = qw3 >> 32; + +} + +#define GEN9_MI_DISPLAY_FLIP_length_bias 0x00000002 +#define GEN9_MI_DISPLAY_FLIP_header \ + .CommandType = 0, \ + .MICommandOpcode = 20 + +#define GEN9_MI_DISPLAY_FLIP_length 0x00000003 + +struct GEN9_MI_DISPLAY_FLIP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool AsyncFlipIndicator; +#define DisplayPlane1 0 +#define DisplayPlane2 1 +#define DisplayPlane3 2 +#define DisplayPlane4 4 +#define DisplayPlane5 5 +#define DisplayPlane6 6 +#define DisplayPlane7 7 +#define DisplayPlane8 8 +#define DisplayPlane9 9 +#define DisplayPlane10 10 +#define DisplayPlane11 11 +#define DisplayPlane12 12 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + bool Stereoscopic3DMode; + uint32_t DisplayBufferPitch; +#define Linear 0 +#define TiledX 1 +#define TiledYLegacyYB 4 +#define TiledYF 5 + bool TileParameter; + __gen_address_type DisplayBufferBaseAddress; +#define SyncFlip 0 +#define AsyncFlip 1 +#define Stereo3DFlip 2 + uint32_t FlipType; + __gen_address_type LeftEyeDisplayBufferBaseAddress; +}; + +static inline void +GEN9_MI_DISPLAY_FLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_DISPLAY_FLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->AsyncFlipIndicator, 22, 22) | + __gen_field(values->DisplayPlaneSelect, 8, 12) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Stereoscopic3DMode, 31, 31) | + __gen_field(values->DisplayBufferPitch, 6, 15) | + __gen_field(values->TileParameter, 0, 2) | + 0; + + uint32_t dw2 = + __gen_field(values->FlipType, 0, 1) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->DisplayBufferBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->LeftEyeDisplayBufferBaseAddress, dw3); + +} + +#define GEN9_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN9_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 2 + +#define GEN9_MI_LOAD_REGISTER_MEM_length 0x00000004 + +struct GEN9_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN9_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +#define GEN9_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + +struct GEN9_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN9_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN9_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN9_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +#define GEN9_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + +struct GEN9_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlane1A 0 +#define DisplayPlane1B 1 +#define DisplayPlane1C 4 + uint32_t DisplayPlaneSelect; +#define NeverForward 0 +#define AlwaysForward 1 +#define ConditionallyForward 2 + bool ScanLineEventDoneForward; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN9_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->ScanLineEventDoneForward, 17, 18) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN9_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN9_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 2 + +#define GEN9_MI_LOAD_URB_MEM_length 0x00000004 + +struct GEN9_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_MATH_length_bias 0x00000002 +#define GEN9_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +#define GEN9_MI_MATH_length 0x00000000 + +struct GEN9_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN9_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MI_NOOP_length_bias 0x00000001 +#define GEN9_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN9_MI_NOOP_length 0x00000001 + +struct GEN9_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN9_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN9_MI_PREDICATE_length_bias 0x00000001 +#define GEN9_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN9_MI_PREDICATE_length 0x00000001 + +struct GEN9_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN9_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN9_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN9_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN9_MI_REPORT_HEAD_length 0x00000001 + +struct GEN9_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN9_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +#define GEN9_MI_RS_CONTEXT_length 0x00000001 + +struct GEN9_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_RESTORE 0 +#define RS_SAVE 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN9_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN9_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN9_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +#define GEN9_MI_RS_CONTROL_length 0x00000001 + +struct GEN9_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_STOP 0 +#define RS_START 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN9_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN9_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN9_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +#define GEN9_MI_RS_STORE_DATA_IMM_length 0x00000004 + +struct GEN9_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN9_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN9_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN9_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN9_MI_SET_CONTEXT_length 0x00000002 + +struct GEN9_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN9_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN9_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN9_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1 + +#define GEN9_MI_SET_PREDICATE_length 0x00000001 + +struct GEN9_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define NOOPNever 0 +#define NOOPonResult2clear 1 +#define NOOPonResult2set 2 +#define NOOPonResultclear 3 +#define NOOPonResultset 4 +#define Executewhenonesliceenabled 5 +#define Executewhentwoslicesareenabled 6 +#define Executewhenthreeslicesareenabled 7 +#define NOOPAlways 15 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN9_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 3) | + 0; + +} + +#define GEN9_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN9_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN9_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN9_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UsePerProcessHardwareStatusPage; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN9_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN9_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN9_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 2 + +#define GEN9_MI_STORE_URB_MEM_length 0x00000004 + +struct GEN9_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN9_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN9_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN9_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN9_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN9_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN9_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN9_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN9_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN9_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN9_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN9_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN9_MI_UPDATE_GTT_length 0x00000000 + +struct GEN9_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN9_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN9_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN9_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +#define GEN9_MI_URB_ATOMIC_ALLOC_length 0x00000001 + +struct GEN9_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN9_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN9_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN9_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN9_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN9_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN9_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN9_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN9_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPlane1CVerticalBlankWaitEnable; + bool DisplayPlane6FlipPendingWaitEnable; + bool DisplayPlane12FlipPendingWaitEnable; + bool DisplayPlane11FlipPendingWaitEnable; + bool DisplayPlane10FlipPendingWaitEnable; + bool DisplayPlane9FlipPendingWaitEnable; + bool DisplayPlane3FlipPendingWaitEnable; + bool DisplayPlane1CScanLineWaitEnable; + bool DisplayPlane1BVerticalBlankWaitEnable; + bool DisplayPlane5FlipPendingWaitEnable; + bool DisplayPlane2FlipPendingWaitEnable; + bool DisplayPlane1BScanLineWaitEnable; + bool DisplayPlane8FlipPendingWaitEnable; + bool DisplayPlane7FlipPendingWaitEnable; + bool DisplayPlane1AVerticalBlankWaitEnable; + bool DisplayPlane4FlipPendingWaitEnable; + bool DisplayPlane1FlipPendingWaitEnable; + bool DisplayPlnae1AScanLineWaitEnable; +}; + +static inline void +GEN9_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlane1CVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplayPlane6FlipPendingWaitEnable, 20, 20) | + __gen_field(values->DisplayPlane12FlipPendingWaitEnable, 19, 19) | + __gen_field(values->DisplayPlane11FlipPendingWaitEnable, 18, 18) | + __gen_field(values->DisplayPlane10FlipPendingWaitEnable, 17, 17) | + __gen_field(values->DisplayPlane9FlipPendingWaitEnable, 16, 16) | + __gen_field(values->DisplayPlane3FlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPlane1CScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPlane1BVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplayPlane5FlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlane2FlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPlane1BScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPlane8FlipPendingWaitEnable, 7, 7) | + __gen_field(values->DisplayPlane7FlipPendingWaitEnable, 6, 6) | + __gen_field(values->DisplayPlane1AVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplayPlane4FlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlane1FlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPlnae1AScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN9_PIPE_CONTROL_length_bias 0x00000002 +#define GEN9_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 4 + +#define GEN9_PIPE_CONTROL_length 0x00000006 + +struct GEN9_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool FlushLLC; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint64_t ImmediateData; +}; + +static inline void +GEN9_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FlushLLC, 26, 26) | + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint64_t qw4 = + __gen_field(values->ImmediateData, 0, 63) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + +} + +#define GEN9_SCISSOR_RECT_length 0x00000002 + +struct GEN9_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN9_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN9_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN9_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; + float XMinViewPort; + float XMaxViewPort; + float YMinViewPort; + float YMaxViewPort; +}; + +static inline void +GEN9_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + __gen_float(values->XMinViewPort) | + 0; + + dw[13] = + __gen_float(values->XMaxViewPort) | + 0; + + dw[14] = + __gen_float(values->YMinViewPort) | + 0; + + dw[15] = + __gen_float(values->YMaxViewPort) | + 0; + +} + +#define GEN9_BLEND_STATE_length 0x00000011 + +#define GEN9_BLEND_STATE_ENTRY_length 0x00000002 + +struct GEN9_BLEND_STATE_ENTRY { + bool LogicOpEnable; + uint32_t LogicOpFunction; + uint32_t PreBlendSourceOnlyClampEnable; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; + bool ColorBufferBlendEnable; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t ColorBlendFunction; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t AlphaBlendFunction; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; +}; + +static inline void +GEN9_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BLEND_STATE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint64_t qw0 = + __gen_field(values->LogicOpEnable, 63, 63) | + __gen_field(values->LogicOpFunction, 59, 62) | + __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | + __gen_field(values->ColorClampRange, 34, 35) | + __gen_field(values->PreBlendColorClampEnable, 33, 33) | + __gen_field(values->PostBlendColorClampEnable, 32, 32) | + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->SourceBlendFactor, 26, 30) | + __gen_field(values->DestinationBlendFactor, 21, 25) | + __gen_field(values->ColorBlendFunction, 18, 20) | + __gen_field(values->SourceAlphaBlendFactor, 13, 17) | + __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | + __gen_field(values->AlphaBlendFunction, 5, 7) | + __gen_field(values->WriteDisableAlpha, 3, 3) | + __gen_field(values->WriteDisableRed, 2, 2) | + __gen_field(values->WriteDisableGreen, 1, 1) | + __gen_field(values->WriteDisableBlue, 0, 0) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN9_BLEND_STATE { + bool AlphaToCoverageEnable; + bool IndependentAlphaBlendEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + struct GEN9_BLEND_STATE_ENTRY Entry[8]; +}; + +static inline void +GEN9_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) + GEN9_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); +} + +#define GEN9_CC_VIEWPORT_length 0x00000002 + +struct GEN9_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN9_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN9_COLOR_CALC_STATE_length 0x00000006 + +struct GEN9_COLOR_CALC_STATE { +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN9_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR_length 0x00000001 + +struct GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR { + uint32_t ExtendedMessageLength; +#define NoTermination 0 +#define EOT 1 + uint32_t EndOfThread; + uint32_t TargetFunctionID; +}; + +static inline void +GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ExtendedMessageLength, 6, 9) | + __gen_field(values->EndOfThread, 5, 5) | + __gen_field(values->TargetFunctionID, 0, 3) | + 0; + +} + +#define GEN9_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN9_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; + uint32_t KernelStartPointerHigh; +#define Ftz 0 +#define SetByKernel 1 + uint32_t DenormMode; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantIndirectURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; +#define Encodes0K 0 +#define Encodes1K 1 +#define Encodes2K 2 +#define Encodes4K 3 +#define Encodes8K 4 +#define Encodes16K 5 +#define Encodes32K 6 +#define Encodes64K 7 + uint32_t SharedLocalMemorySize; + bool GlobalBarrierEnable; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN9_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointerHigh, 0, 15) | + 0; + + dw[2] = + __gen_field(values->DenormMode, 19, 19) | + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[4] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[5] = + __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->GlobalBarrierEnable, 15, 15) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | + 0; + + dw[7] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + +} + +#define GEN9_ROUNDINGPRECISIONTABLE_3_BITS_length 0x00000001 + +struct GEN9_ROUNDINGPRECISIONTABLE_3_BITS { +#define _116 0 +#define _216 1 +#define _316 2 +#define _416 3 +#define _516 4 +#define _616 5 +#define _716 6 +#define _816 7 + uint32_t RoundingPrecision; +}; + +static inline void +GEN9_ROUNDINGPRECISIONTABLE_3_BITS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_ROUNDINGPRECISIONTABLE_3_BITS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->RoundingPrecision, 0, 2) | + 0; + +} + +#define GEN9_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN9_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN9_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 6, 31) | + 0; + +} + +#define GEN9_RENDER_SURFACE_STATE_length 0x00000010 + +struct GEN9_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + bool ASTC_Enable; + uint32_t SurfaceFormat; +#define VALIGN4 1 +#define VALIGN8 2 +#define VALIGN16 3 + uint32_t SurfaceVerticalAlignment; +#define HALIGN4 1 +#define HALIGN8 2 +#define HALIGN16 3 + uint32_t SurfaceHorizontalAlignment; +#define LINEAR 0 +#define WMAJOR 1 +#define XMAJOR 2 +#define YMAJOR 3 + uint32_t TileMode; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; + bool SamplerL2BypassModeDisable; +#define WriteOnlyCache 0 +#define ReadWriteCache 1 + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + bool CubeFaceEnablePositiveZ; + bool CubeFaceEnableNegativeZ; + bool CubeFaceEnablePositiveY; + bool CubeFaceEnableNegativeY; + bool CubeFaceEnablePositiveX; + bool CubeFaceEnableNegativeX; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + float BaseMipLevel; + uint32_t SurfaceQPitch; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define _0DEG 0 +#define _90DEG 1 +#define _180DEG 2 +#define _270DEG 3 + uint32_t RenderTargetAndSampleUnormRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSS 0 +#define DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_2 1 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 +#define MULTISAMPLECOUNT_16 4 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t XOffset; + uint32_t YOffset; + bool EWADisableForCube; +#define NONE 0 +#define _4KB 1 +#define _64KB 2 +#define TILEYF 1 +#define TILEYS 2 + uint32_t TiledResourceMode; +#define GPUcoherent 0 +#define IAcoherent 1 + uint32_t CoherencyType; + uint32_t MipTailStartLOD; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t AuxiliarySurfaceQPitch; + uint32_t AuxiliarySurfacePitch; +#define AUX_NONE 0 +#define AUX_CCS_D 1 +#define AUX_APPEND 2 +#define AUX_HIZ 3 +#define AUX_CCS_E 5 + uint32_t AuxiliarySurfaceMode; + bool SeparateUVPlaneEnable; + uint32_t XOffsetforUorUVPlane; + uint32_t YOffsetforUorUVPlane; +#define Horizontal 0 +#define Vertical 1 + uint32_t MemoryCompressionMode; + bool MemoryCompressionEnable; + uint32_t ShaderChannelSelectRed; + uint32_t ShaderChannelSelectGreen; + uint32_t ShaderChannelSelectBlue; + uint32_t ShaderChannelSelectAlpha; + float ResourceMinLOD; + __gen_address_type SurfaceBaseAddress; + uint32_t XOffsetforVPlane; + uint32_t YOffsetforVPlane; + uint32_t AuxiliaryTableIndexforMediaCompressedSurface; + __gen_address_type AuxiliarySurfaceBaseAddress; + uint32_t QuiltHeight; + uint32_t QuiltWidth; + float HierarchicalDepthClearValue; + uint32_t RedClearColor; + uint32_t GreenClearColor; + uint32_t BlueClearColor; + uint32_t AlphaClearColor; +}; + +static inline void +GEN9_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->ASTC_Enable, 27, 27) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | + __gen_field(values->TileMode, 12, 13) | + __gen_field(values->VerticalLineStride, 11, 11) | + __gen_field(values->VerticalLineStrideOffset, 10, 10) | + __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | + __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | + __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | + __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | + __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | + __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(dw_MemoryObjectControlState, 24, 30) | + __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 21, 23) | + __gen_field(values->EWADisableForCube, 20, 20) | + __gen_field(values->TiledResourceMode, 18, 19) | + __gen_field(values->CoherencyType, 14, 14) | + __gen_field(values->MipTailStartLOD, 8, 11) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | + __gen_field(values->AuxiliarySurfacePitch, 3, 11) | + __gen_field(values->AuxiliarySurfaceMode, 0, 2) | + __gen_field(values->SeparateUVPlaneEnable, 31, 31) | + __gen_field(values->XOffsetforUorUVPlane, 16, 29) | + __gen_field(values->YOffsetforUorUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->MemoryCompressionMode, 31, 31) | + __gen_field(values->MemoryCompressionEnable, 30, 30) | + __gen_field(values->ShaderChannelSelectRed, 25, 27) | + __gen_field(values->ShaderChannelSelectGreen, 22, 24) | + __gen_field(values->ShaderChannelSelectBlue, 19, 21) | + __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw10 = + __gen_field(values->XOffsetforVPlane, 48, 61) | + __gen_field(values->YOffsetforVPlane, 32, 45) | + __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | + __gen_field(values->QuiltHeight, 5, 9) | + __gen_field(values->QuiltWidth, 0, 4) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + __gen_float(values->HierarchicalDepthClearValue) | + __gen_field(values->RedClearColor, 0, 31) | + 0; + + dw[13] = + __gen_field(values->GreenClearColor, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BlueClearColor, 0, 31) | + 0; + + dw[15] = + __gen_field(values->AlphaClearColor, 0, 31) | + 0; + +} + +#define GEN9_FILTER_COEFFICIENT_length 0x00000001 + +struct GEN9_FILTER_COEFFICIENT { + uint32_t FilterCoefficient; +}; + +static inline void +GEN9_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_FILTER_COEFFICIENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->FilterCoefficient, 0, 7) | + 0; + +} + +#define GEN9_SAMPLER_STATE_length 0x00000004 + +struct GEN9_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define CLAMP_NONE 0 +#define CLAMP_OGL 2 + uint32_t LODPreClampMode; + uint32_t CoarseLODQualityMode; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t IndirectStatePointer; +#define MIPNONE 0 +#define MIPFILTER 1 + uint32_t LODClampMagnificationMode; +#define STD_FILTER 0 +#define COMPARISON 1 +#define MINIMUM 2 +#define MAXIMUM 3 + uint32_t ReductionType; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define HIGH 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + bool ReductionTypeEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN9_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampMode, 27, 28) | + __gen_field(values->CoarseLODQualityMode, 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ChromaKeyEnable, 7, 7) | + __gen_field(values->ChromaKeyIndex, 5, 6) | + __gen_field(values->ChromaKeyMode, 4, 4) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_field(values->IndirectStatePointer, 6, 23) | + __gen_field(values->LODClampMagnificationMode, 0, 0) | + 0; + + dw[3] = + __gen_field(values->ReductionType, 22, 23) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->ReductionTypeEnable, 9, 9) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +#define GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 + +struct GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { + uint32_t Table0YFilterCoefficientn1; + uint32_t Table0XFilterCoefficientn1; + uint32_t Table0YFilterCoefficientn0; + uint32_t Table0XFilterCoefficientn0; + uint32_t Table0YFilterCoefficientn3; + uint32_t Table0XFilterCoefficientn3; + uint32_t Table0YFilterCoefficientn2; + uint32_t Table0XFilterCoefficientn2; + uint32_t Table0YFilterCoefficientn5; + uint32_t Table0XFilterCoefficientn5; + uint32_t Table0YFilterCoefficientn4; + uint32_t Table0XFilterCoefficientn4; + uint32_t Table0YFilterCoefficientn7; + uint32_t Table0XFilterCoefficientn7; + uint32_t Table0YFilterCoefficientn6; + uint32_t Table0XFilterCoefficientn6; + uint32_t Table1XFilterCoefficientn3; + uint32_t Table1XFilterCoefficientn2; + uint32_t Table1XFilterCoefficientn5; + uint32_t Table1XFilterCoefficientn4; + uint32_t Table1YFilterCoefficientn3; + uint32_t Table1YFilterCoefficientn2; + uint32_t Table1YFilterCoefficientn5; + uint32_t Table1YFilterCoefficientn4; +}; + +static inline void +GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | + 0; + + dw[3] = + __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | + 0; + + dw[4] = + __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | + 0; + + dw[5] = + __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | + 0; + + dw[7] = + __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_PID 7 + +/* Enum COMPONENT_ENABLES */ +#define CE_NONE 0 +#define CE_X 1 +#define CE_Y 2 +#define CE_XY 3 +#define CE_Z 4 +#define CE_XZ 5 +#define CE_YZ 6 +#define CE_XYZ 7 +#define CE_W 8 +#define CE_XW 9 +#define CE_YW 10 +#define CE_XYW 11 +#define CE_ZW 12 +#define CE_XZW 13 +#define CE_YZW 14 +#define CE_XYZW 15 + +/* Enum Attribute_Component_Format */ +#define ACF_DISABLED 0 +#define ACF_XY 1 +#define ACF_XYZ 2 +#define ACF_XYZW 3 + +/* Enum WRAP_SHORTEST_ENABLE */ +#define WSE_X 1 +#define WSE_Y 2 +#define WSE_XY 3 +#define WSE_Z 4 +#define WSE_XZ 5 +#define WSE_YZ 6 +#define WSE_XYZ 7 +#define WSE_W 8 +#define WSE_XW 9 +#define WSE_YW 10 +#define WSE_XYW 11 +#define WSE_ZW 12 +#define WSE_XZW 13 +#define WSE_YZW 14 +#define WSE_XYZW 15 + +/* Enum 3D_Stencil_Operation */ +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +/* Enum 3D_Color_Buffer_Blend_Factor */ +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + +/* Enum 3D_Color_Buffer_Blend_Function */ +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum 3D_Logic_Op_Function */ +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Shader Channel Select */ +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 +#define TCM_HALF_BORDER 6 + -- cgit v1.2.3 From c445fa2f774909f9c2a1e60c89e2f030c6ccfdf2 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 15 Sep 2015 12:37:21 -0700 Subject: vk: Make entrypoint generator output gen9 entry points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_entrypoints_gen.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index e0a521e8756..2e56274eb50 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -98,6 +98,7 @@ if opt_header: print "%s gen7_%s%s;" % (type, name, args) print "%s gen75_%s%s;" % (type, name, args) print "%s gen8_%s%s;" % (type, name, args) + print "%s gen9_%s%s;" % (type, name, args) print "%s anv_validate_%s%s;" % (type, name, args) exit() @@ -164,7 +165,7 @@ for type, name, args, num, h in entrypoints: print " { %5d, 0x%08x }," % (offsets[num], h) print "};\n" -for layer in [ "anv", "validate", "gen7", "gen75", "gen8" ]: +for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: for type, name, args, num, h in entrypoints: print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) print "\nconst struct anv_dispatch_table %s_layer = {" % layer @@ -214,6 +215,10 @@ anv_resolve_entrypoint(uint32_t index) } switch (dispatch_devinfo->gen) { + case 9: + if (gen9_layer.entrypoints[index]) + return gen9_layer.entrypoints[index]; + /* fall through */ case 8: if (gen8_layer.entrypoints[index]) return gen8_layer.entrypoints[index]; -- cgit v1.2.3 From cd4721c062a300739b107925f801c9b9ced5f9fa Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 25 Nov 2015 22:27:01 -0800 Subject: vk: Add SKL support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kristian Høgsberg Kristensen --- src/vulkan/Makefile.am | 10 +- src/vulkan/anv_cmd_buffer.c | 5 + src/vulkan/anv_device.c | 7 +- src/vulkan/anv_image.c | 3 + src/vulkan/anv_pipeline.c | 4 + src/vulkan/anv_private.h | 37 ++++++- src/vulkan/gen8_cmd_buffer.c | 257 +++++++++++++++++++++++++++---------------- src/vulkan/gen8_pipeline.c | 164 ++++++++++++++++++--------- src/vulkan/gen8_state.c | 47 ++++---- src/vulkan/genX_cmd_buffer.c | 4 +- 10 files changed, 366 insertions(+), 172 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 64b5e90ae33..3f6d4a3e2e9 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -37,7 +37,8 @@ check_LTLIBRARIES = libvulkan-test.la PER_GEN_LIBS = \ libanv-gen7.la \ libanv-gen75.la \ - libanv-gen8.la + libanv-gen8.la \ + libanv-gen9.la noinst_LTLIBRARIES = $(PER_GEN_LIBS) @@ -115,6 +116,13 @@ libanv_gen8_la_SOURCES = \ gen8_state.c libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 +libanv_gen9_la_SOURCES = \ + genX_cmd_buffer.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen9_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=90 + if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ wayland-drm-protocol.c \ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 6dedc3f335f..66b2f65e9f7 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -250,6 +250,8 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); default: unreachable("unsupported gen\n"); } @@ -788,6 +790,9 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, case 8: gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); break; + case 9: + gen9_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; default: unreachable("unsupported gen\n"); } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e2b00c97c25..94bc4f11f27 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -87,8 +87,10 @@ anv_physical_device_init(struct anv_physical_device *device, fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); } else if (device->info->gen == 7 && !device->info->is_baytrail) { fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); + } else if (device->info->gen == 9) { + fprintf(stderr, "WARNING: Skylake Vulkan support is incomplete\n"); } else if (device->info->gen == 8 && !device->info->is_cherryview) { - /* Briadwell is as fully supported as anything */ + /* Broadwell is as fully supported as anything */ } else { result = vk_errorf(VK_UNSUPPORTED, "Vulkan not yet supported on %s", device->name); @@ -1448,6 +1450,9 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, case 8: gen8_fill_buffer_surface_state(state, format, offset, range, stride); break; + case 9: + gen9_fill_buffer_surface_state(state, format, offset, range, stride); + break; default: unreachable("unsupported gen\n"); } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 60d23a17f5f..d344fcc2bea 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -573,6 +573,9 @@ anv_image_view_init(struct anv_image_view *iview, case 8: gen8_image_view_init(iview, device, pCreateInfo, cmd_buffer); break; + case 9: + gen9_image_view_init(iview, device, pCreateInfo, cmd_buffer); + break; default: unreachable("unsupported gen\n"); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 3d9e0705626..cf4bf9f87a0 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1082,6 +1082,8 @@ anv_graphics_pipeline_create( return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); case 8: return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); default: unreachable("unsupported gen\n"); } @@ -1127,6 +1129,8 @@ static VkResult anv_compute_pipeline_create( return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); case 8: return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, pCreateInfo, pPipeline); default: unreachable("unsupported gen\n"); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a3e63e4c0c0..36cee88602d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -701,6 +701,20 @@ __gen_combine_address(struct anv_batch *batch, void *location, .AgeforQUADLRU = 0 \ } +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. + */ + +#define GEN9_MOCS { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ + } + +#define GEN9_MOCS_PTE { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ + } + struct anv_device_memory { struct anv_bo bo; VkDeviceSize map_size; @@ -1079,6 +1093,7 @@ void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); @@ -1087,6 +1102,8 @@ void gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, void gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); +void gen9_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); @@ -1184,7 +1201,7 @@ struct anv_pipeline { struct { uint32_t sf[4]; uint32_t raster[5]; - uint32_t wm_depth_stencil[3]; + uint32_t wm_depth_stencil[4]; } gen8; }; @@ -1230,6 +1247,11 @@ gen8_graphics_pipeline_create(VkDevice _device, const struct anv_graphics_pipeline_create_info *extra, VkPipeline *pPipeline); VkResult +gen9_graphics_pipeline_create(VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline); +VkResult gen7_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); @@ -1242,6 +1264,10 @@ VkResult gen8_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); +VkResult +gen9_compute_pipeline_create(VkDevice _device, + const VkComputePipelineCreateInfo *pCreateInfo, + VkPipeline *pPipeline); struct anv_format { const VkFormat vk_format; @@ -1405,6 +1431,12 @@ gen8_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void +gen9_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, uint32_t offset, uint32_t range, @@ -1419,6 +1451,9 @@ void gen75_fill_buffer_surface_state(void *state, const struct anv_format *forma void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, uint32_t stride); +void gen9_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range, + uint32_t stride); struct anv_sampler { uint32_t state[4]; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index a02d7bb2321..09315319001 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -30,9 +30,10 @@ #include "anv_private.h" #include "gen8_pack.h" +#include "gen9_pack.h" static void -gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { static const uint32_t push_constant_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 21, @@ -52,7 +53,7 @@ gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) if (state.offset == 0) continue; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CONSTANT_VS, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), ._3DCommandSubOpcode = push_constant_opcodes[stage], .ConstantBody = { .PointerToConstantBuffer0 = { .offset = state.offset }, @@ -65,6 +66,7 @@ gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.push_constants_dirty &= ~flushed; } +#if ANV_GEN == 8 static void emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, uint32_t count, const VkViewport *viewports) @@ -79,7 +81,7 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, /* The gen7 state struct has just the matrix and guardband fields, the * gen8 struct adds the min/max viewport fields. */ - struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { .ViewportMatrixElementm00 = vp->width / 2, .ViewportMatrixElementm11 = vp->height / 2, .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, @@ -96,21 +98,21 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, .YMaxViewPort = vp->originY + vp->height - 1, }; - struct GEN8_CC_VIEWPORT cc_viewport = { + struct GENX(CC_VIEWPORT) cc_viewport = { .MinimumDepth = vp->minDepth, .MaximumDepth = vp->maxDepth }; - GEN8_SF_CLIP_VIEWPORT_pack(NULL, sf_clip_state.map + i * 64, + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sf_clip_viewport); - GEN8_CC_VIEWPORT_pack(NULL, cc_state.map + i * 32, &cc_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport); } anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), .CCViewportPointer = cc_state.offset); anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), .SFClipViewportPointer = sf_clip_state.offset); } @@ -133,9 +135,10 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) }); } } +#endif static void -gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; uint32_t *p; @@ -145,7 +148,10 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif .PipelineSelection = _3D); cmd_buffer->state.current_pipeline = _3D; } @@ -155,22 +161,22 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) const uint32_t num_dwords = 1 + num_buffers * 4; p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN8_3DSTATE_VERTEX_BUFFERS); + GENX(3DSTATE_VERTEX_BUFFERS)); uint32_t vb, i = 0; for_each_bit(vb, vb_emit) { struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - struct GEN8_VERTEX_BUFFER_STATE state = { + struct GENX(VERTEX_BUFFER_STATE) state = { .VertexBufferIndex = vb, - .MemoryObjectControlState = GEN8_MOCS, + .MemoryObjectControlState = GENX(MOCS), .AddressModifyEnable = true, .BufferPitch = pipeline->binding_stride[vb], .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferSize = buffer->size - offset }; - GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); i++; } } @@ -186,11 +192,23 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); } - if (cmd_buffer->state.descriptors_dirty) - gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); +#if ANV_GEN >= 9 + /* On SKL+ the new constants don't take effect until the next corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure + * that is sent. As it is, we re-emit binding tables but we could hold on + * to the offset of the most recent binding table and only re-emit the + * 3DSTATE_BINDING_TABLE_POINTER_* command. + */ + cmd_buffer->state.descriptors_dirty |= + cmd_buffer->state.push_constants_dirty & + cmd_buffer->state.pipeline->active_stages; +#endif if (cmd_buffer->state.push_constants_dirty) - gen8_cmd_buffer_flush_push_constants(cmd_buffer); + cmd_buffer_flush_push_constants(cmd_buffer); + + if (cmd_buffer->state.descriptors_dirty) + gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) gen8_cmd_buffer_emit_viewport(cmd_buffer); @@ -200,12 +218,13 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { - uint32_t sf_dw[GEN8_3DSTATE_SF_length]; - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), .LineWidth = cmd_buffer->state.dynamic.line_width, }; - GEN8_3DSTATE_SF_pack(NULL, sf_dw, &sf); + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf); } @@ -214,9 +233,9 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; - uint32_t raster_dw[GEN8_3DSTATE_RASTER_length]; - struct GEN8_3DSTATE_RASTER raster = { - GEN8_3DSTATE_RASTER_header, + uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), .GlobalDepthOffsetEnableSolid = enable_bias, .GlobalDepthOffsetEnableWireframe = enable_bias, .GlobalDepthOffsetEnablePoint = enable_bias, @@ -224,11 +243,17 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp }; - GEN8_3DSTATE_RASTER_pack(NULL, raster_dw, &raster); + GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, pipeline->gen8.raster); } + /* Stencil reference values were moves from COLOR_CALC_STATE in gen8 to + * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split + * across different state packets for gen8 and gen9. We handle that by + * using a big old #if switch here. + */ +#if ANV_GEN == 8 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = @@ -280,10 +305,55 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, pipeline->gen8.wm_depth_stencil); } +#else + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN9_COLOR_CALC_STATE_length, 64); + struct GEN9_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + }; + GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + anv_batch_emit(&cmd_buffer->batch, + GEN9_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN9_3DSTATE_WM_DEPTH_STENCIL_header, + + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front, + .BackfaceStencilReferenceValue = d->stencil_reference.back + }; + GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gen8.wm_depth_stencil); + } +#endif if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_INDEX_BUFFER)) { - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), .IndexedDrawCutIndexEnable = pipeline->primitive_restart, .CutIndex = cmd_buffer->state.restart_index, ); @@ -293,7 +363,7 @@ gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -void gen8_CmdDraw( +void genX(CmdDraw)( VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, @@ -302,9 +372,9 @@ void gen8_CmdDraw( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - gen8_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), .VertexAccessType = SEQUENTIAL, .VertexCountPerInstance = vertexCount, .StartVertexLocation = firstVertex, @@ -313,7 +383,7 @@ void gen8_CmdDraw( .BaseVertexLocation = 0); } -void gen8_CmdDrawIndexed( +void genX(CmdDrawIndexed)( VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, @@ -323,9 +393,9 @@ void gen8_CmdDrawIndexed( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - gen8_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), .VertexAccessType = RANDOM, .VertexCountPerInstance = indexCount, .StartVertexLocation = firstIndex, @@ -338,7 +408,7 @@ static void emit_lrm(struct anv_batch *batch, uint32_t reg, struct anv_bo *bo, uint32_t offset) { - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), .RegisterAddress = reg, .MemoryAddress = { bo, offset }); } @@ -346,7 +416,7 @@ emit_lrm(struct anv_batch *batch, static void emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) { - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), .RegisterOffset = reg, .DataDWord = imm); } @@ -359,7 +429,7 @@ emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_3DPRIM_START_INSTANCE 0x243C #define GEN7_3DPRIM_BASE_VERTEX 0x2440 -void gen8_CmdDrawIndirect( +void genX(CmdDrawIndirect)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -371,7 +441,7 @@ void gen8_CmdDrawIndirect( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; - gen8_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); @@ -379,12 +449,12 @@ void gen8_CmdDrawIndirect( emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), .IndirectParameterEnable = true, .VertexAccessType = SEQUENTIAL); } -void gen8_CmdBindIndexBuffer( +void genX(CmdBindIndexBuffer)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -405,9 +475,9 @@ void gen8_CmdBindIndexBuffer( cmd_buffer->state.restart_index = restart_index_for_type[indexType]; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GEN8_MOCS, + .MemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferSize = buffer->size - offset); @@ -415,7 +485,7 @@ void gen8_CmdBindIndexBuffer( } static VkResult -gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; @@ -431,7 +501,7 @@ gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; - struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { .KernelStartPointer = pipeline->cs_simd, .KernelStartPointerHigh = 0, .BindingTablePointer = surfaces.offset, @@ -441,13 +511,13 @@ gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ }; - uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); struct anv_state state = anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, state.map, &desc); - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), .InterfaceDescriptorTotalLength = size, .InterfaceDescriptorDataStartAddress = state.offset); @@ -455,7 +525,7 @@ gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) } static void -gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; @@ -463,7 +533,10 @@ gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif .PipelineSelection = GPGPU); cmd_buffer->state.current_pipeline = GPGPU; } @@ -473,7 +546,7 @@ gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - result = gen8_flush_compute_descriptor_set(cmd_buffer); + result = flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; } @@ -481,7 +554,7 @@ gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -void gen8_CmdDrawIndexedIndirect( +void genX(CmdDrawIndexedIndirect)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -493,7 +566,7 @@ void gen8_CmdDrawIndexedIndirect( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; - gen8_cmd_buffer_flush_state(cmd_buffer); + cmd_buffer_flush_state(cmd_buffer); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); @@ -501,12 +574,12 @@ void gen8_CmdDrawIndexedIndirect( emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), .IndirectParameterEnable = true, .VertexAccessType = RANDOM); } -void gen8_CmdDispatch( +void genX(CmdDispatch)( VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, @@ -516,9 +589,9 @@ void gen8_CmdDispatch( struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - gen8_cmd_buffer_flush_compute_state(cmd_buffer); + cmd_buffer_flush_compute_state(cmd_buffer); - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, @@ -529,14 +602,14 @@ void gen8_CmdDispatch( .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); } #define GPGPU_DISPATCHDIMX 0x2500 #define GPGPU_DISPATCHDIMY 0x2504 #define GPGPU_DISPATCHDIMZ 0x2508 -void gen8_CmdDispatchIndirect( +void genX(CmdDispatchIndirect)( VkCmdBuffer cmdBuffer, VkBuffer _buffer, VkDeviceSize offset) @@ -548,13 +621,13 @@ void gen8_CmdDispatchIndirect( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; - gen8_cmd_buffer_flush_compute_state(cmd_buffer); + cmd_buffer_flush_compute_state(cmd_buffer); emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), .IndirectParameterEnable = true, .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, @@ -563,11 +636,11 @@ void gen8_CmdDispatchIndirect( .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); - anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); } static void -gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image_view *iview = @@ -581,7 +654,7 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, .DepthWriteEnable = iview->format->depth_format, .StencilWriteEnable = has_stencil, @@ -597,7 +670,7 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .LOD = 0, .Depth = 1 - 1, .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GEN8_MOCS, + .DepthBufferObjectControlState = GENX(MOCS), .RenderTargetViewExtent = 1 - 1, .SurfaceQPitch = image->depth_surface.qpitch >> 2); } else { @@ -618,7 +691,7 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * actual framebuffer's width and height, even when neither depth buffer * nor stencil buffer is present. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, .SurfaceFormat = D16_UNORM, .Width = fb->width - 1, @@ -628,9 +701,9 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* Emit 3DSTATE_STENCIL_BUFFER */ if (has_stencil) { - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), .StencilBufferEnable = true, - .StencilBufferObjectControlState = GEN8_MOCS, + .StencilBufferObjectControlState = GENX(MOCS), /* Stencil buffers have strange pitch. The PRM says: * @@ -645,28 +718,28 @@ gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) }, .SurfaceQPitch = image->stencil_surface.stride >> 2); } else { - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); } /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); } void -gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { cmd_buffer->state.subpass = subpass; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - gen8_cmd_buffer_emit_depth_stencil(cmd_buffer); + cmd_buffer_emit_depth_stencil(cmd_buffer); } -void gen8_CmdBeginRenderPass( +void genX(CmdBeginRenderPass)( VkCmdBuffer cmdBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkRenderPassContents contents) @@ -680,7 +753,7 @@ void gen8_CmdBeginRenderPass( const VkRect2D *render_area = &pRenderPassBegin->renderArea; - anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), .ClippedDrawingRectangleYMin = render_area->offset.y, .ClippedDrawingRectangleXMin = render_area->offset.x, .ClippedDrawingRectangleYMax = @@ -693,10 +766,10 @@ void gen8_CmdBeginRenderPass( anv_cmd_buffer_clear_attachments(cmd_buffer, pass, pRenderPassBegin->pClearValues); - gen8_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); + genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses); } -void gen8_CmdNextSubpass( +void genX(CmdNextSubpass)( VkCmdBuffer cmdBuffer, VkRenderPassContents contents) { @@ -704,10 +777,10 @@ void gen8_CmdNextSubpass( assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - gen8_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); + genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); } -void gen8_CmdEndRenderPass( +void genX(CmdEndRenderPass)( VkCmdBuffer cmdBuffer) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); @@ -717,7 +790,7 @@ void gen8_CmdEndRenderPass( * Eventually, we should do flushing based on image format transitions * or something of that nature. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .PostSyncOperation = NoWrite, .RenderTargetCacheFlushEnable = true, .InstructionCacheInvalidateEnable = true, @@ -731,13 +804,13 @@ static void emit_ps_depth_count(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) { - anv_batch_emit(batch, GEN8_PIPE_CONTROL, + anv_batch_emit(batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WritePSDepthCount, .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ } -void gen8_CmdBeginQuery( +void genX(CmdBeginQuery)( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, @@ -758,7 +831,7 @@ void gen8_CmdBeginQuery( } } -void gen8_CmdEndQuery( +void genX(CmdEndQuery)( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot) @@ -780,7 +853,7 @@ void gen8_CmdEndQuery( #define TIMESTAMP 0x2358 -void gen8_CmdWriteTimestamp( +void genX(CmdWriteTimestamp)( VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, @@ -792,16 +865,16 @@ void gen8_CmdWriteTimestamp( switch (timestampType) { case VK_TIMESTAMP_TYPE_TOP: - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = TIMESTAMP, .MemoryAddress = { bo, buffer->offset + destOffset }); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = TIMESTAMP + 4, .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); break; case VK_TIMESTAMP_TYPE_BOTTOM: - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WriteTimestamp, .Address = /* FIXME: This is only lower 32 bits */ @@ -849,15 +922,15 @@ static void emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, struct anv_bo *bo, uint32_t offset) { - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), .RegisterAddress = reg, .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), .RegisterAddress = reg + 4, .MemoryAddress = { bo, offset + 4 }); } -void gen8_CmdCopyQueryPoolResults( +void genX(CmdCopyQueryPoolResults)( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, @@ -882,7 +955,7 @@ void gen8_CmdCopyQueryPoolResults( /* FIXME: If we're not waiting, should we just do this on the CPU? */ if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .CommandStreamerStallEnable = true, .StallAtPixelScoreboard = true); @@ -896,19 +969,19 @@ void gen8_CmdCopyQueryPoolResults( /* FIXME: We need to clamp the result for 32 bit. */ - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); dw[3] = alu(OPCODE_SUB, 0, 0); dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = CS_GPR(2), /* FIXME: This is only lower 32 bits */ .MemoryAddress = { buffer->bo, dst_offset }); if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = CS_GPR(2) + 4, /* FIXME: This is only lower 32 bits */ .MemoryAddress = { buffer->bo, dst_offset + 4 }); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 6e2e65d6344..0038bca01b4 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -30,6 +30,7 @@ #include "anv_private.h" #include "gen8_pack.h" +#include "gen9_pack.h" static void emit_vertex_input(struct anv_pipeline *pipeline, @@ -38,9 +39,11 @@ emit_vertex_input(struct anv_pipeline *pipeline, const uint32_t num_dwords = 1 + info->attributeCount * 2; uint32_t *p; + static_assert(ANV_GEN >= 8, "should be compiling this for gen < 8"); + if (info->attributeCount > 0) { p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN8_3DSTATE_VERTEX_ELEMENTS); + GENX(3DSTATE_VERTEX_ELEMENTS)); } for (uint32_t i = 0; i < info->attributeCount; i++) { @@ -48,7 +51,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, &info->pVertexAttributeDescriptions[i]; const struct anv_format *format = anv_format_for_vk_format(desc->format); - struct GEN8_VERTEX_ELEMENT_STATE element = { + struct GENX(VERTEX_ELEMENT_STATE) element = { .VertexBufferIndex = desc->binding, .Valid = true, .SourceElementFormat = format->surface_format, @@ -59,9 +62,9 @@ emit_vertex_input(struct anv_pipeline *pipeline, .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP }; - GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), .InstancingEnable = pipeline->instancing_enable[desc->binding], .VertexElementIndex = i, /* Vulkan so far doesn't have an instance divisor, so @@ -69,7 +72,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, .InstanceDataStepRate = 1); } - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, .VertexIDComponentNumber = 2, .VertexIDElementOffset = info->bindingCount, @@ -83,7 +86,7 @@ emit_ia_state(struct anv_pipeline *pipeline, const VkPipelineInputAssemblyStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), .PrimitiveTopologyType = pipeline->topology); } @@ -110,8 +113,8 @@ emit_rs_state(struct anv_pipeline *pipeline, [VK_FRONT_FACE_CW] = Clockwise }; - struct GEN8_3DSTATE_SF sf = { - GEN8_3DSTATE_SF_header, + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), .ViewportTransformEnable = !(extra && extra->disable_viewport), .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, @@ -122,19 +125,25 @@ emit_rs_state(struct anv_pipeline *pipeline, /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - GEN8_3DSTATE_SF_pack(NULL, pipeline->gen8.sf, &sf); + GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); - struct GEN8_3DSTATE_RASTER raster = { - GEN8_3DSTATE_RASTER_header, + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), .FrontWinding = vk_to_gen_front_face[info->frontFace], .CullMode = vk_to_gen_cullmode[info->cullMode], .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], .ScissorRectangleEnable = !(extra && extra->disable_scissor), +#if ANV_GEN == 8 .ViewportZClipTestEnable = info->depthClipEnable +#else + /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ + .ViewportZFarClipTestEnable = info->depthClipEnable, + .ViewportZNearClipTestEnable = info->depthClipEnable, +#endif }; - GEN8_3DSTATE_RASTER_pack(NULL, pipeline->gen8.raster, &raster); + GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); } static void @@ -192,11 +201,11 @@ emit_cb_state(struct anv_pipeline *pipeline, [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, }; - uint32_t num_dwords = GEN8_BLEND_STATE_length; + uint32_t num_dwords = GENX(BLEND_STATE_length); pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - struct GEN8_BLEND_STATE blend_state = { + struct GENX(BLEND_STATE) blend_state = { .AlphaToCoverageEnable = info->alphaToCoverageEnable, .AlphaToOneEnable = info->alphaToOneEnable, }; @@ -210,7 +219,7 @@ emit_cb_state(struct anv_pipeline *pipeline, blend_state.IndependentAlphaBlendEnable = true; } - blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { + blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { .LogicOpEnable = info->logicOpEnable, .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], .ColorBufferBlendEnable = a->blendEnable, @@ -248,9 +257,9 @@ emit_cb_state(struct anv_pipeline *pipeline, } } - GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), .BlendStatePointer = pipeline->blend_state.offset, .BlendStatePointerValid = true); } @@ -285,6 +294,7 @@ emit_ds_state(struct anv_pipeline *pipeline, /* We're going to OR this together with the dynamic state. We need * to make sure it's initialized to something useful. */ + /* FIXME: gen9 wm_depth_stencil */ memset(pipeline->gen8.wm_depth_stencil, 0, sizeof(pipeline->gen8.wm_depth_stencil)); return; @@ -292,7 +302,7 @@ emit_ds_state(struct anv_pipeline *pipeline, /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { .DepthTestEnable = info->depthTestEnable, .DepthBufferWriteEnable = info->depthWriteEnable, .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], @@ -309,11 +319,11 @@ emit_ds_state(struct anv_pipeline *pipeline, .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->gen8.wm_depth_stencil, &wm_depth_stencil); + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, pipeline->gen8.wm_depth_stencil, &wm_depth_stencil); } VkResult -gen8_graphics_pipeline_create( +genX(graphics_pipeline_create)( VkDevice _device, const VkGraphicsPipelineCreateInfo* pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, @@ -353,34 +363,34 @@ gen8_graphics_pipeline_create( emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); emit_cb_state(pipeline, pCreateInfo->pColorBlendState); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), .StatisticsEnable = true); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), .ConstantBufferOffset = 0, .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), .ConstantBufferOffset = 4, .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), .ConstantBufferOffset = 8, .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM_CHROMAKEY), .ChromaKeyKillEnable = false); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .ClipEnable = true, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), .MinimumPointWidth = 0.125, .MaximumPointWidth = 255.875); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .LineEndCapAntialiasingRegionWidth = _05pixels, .LineAntialiasingRegionWidth = _10pixels, @@ -394,30 +404,30 @@ gen8_graphics_pipeline_create( uint32_t log2_samples = __builtin_ffs(samples) - 1; bool enable_sampling = samples > 1 ? true : false; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), .PixelPositionOffsetEnable = enable_sampling, .PixelLocation = CENTER, .NumberofMultisamples = log2_samples); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = 0xffff); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), .VSURBStartingAddress = pipeline->urb.vs_start, .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), .GSURBStartingAddress = pipeline->urb.gs_start, .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), .HSURBStartingAddress = pipeline->urb.vs_start, .HSURBEntryAllocationSize = 0, .HSNumberofURBEntries = 0); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), .DSURBStartingAddress = pipeline->urb.vs_start, .DSURBEntryAllocationSize = 0, .DSNumberofURBEntries = 0); @@ -427,9 +437,9 @@ gen8_graphics_pipeline_create( length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; if (pipeline->gs_vec4 == NO_KERNEL) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .SingleProgramFlow = false, .KernelStartPointer = pipeline->gs_vec4, .VectorMaskEnable = Dmask, @@ -475,14 +485,14 @@ gen8_graphics_pipeline_create( length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .FunctionEnable = false, /* Even if VS is disabled, SBE still gets the amount of * vertex data to read from this field. */ .VertexURBEntryOutputReadOffset = offset, .VertexURBEntryOutputLength = length); else - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = pipeline->vs_simd8, .SingleVertexDispatch = Multiple, .VectorMaskEnable = Dmask, @@ -525,8 +535,8 @@ gen8_graphics_pipeline_create( else fs_input_map = &gs_prog_data->base.vue_map; - struct GEN8_3DSTATE_SBE_SWIZ swiz = { - GEN8_3DSTATE_SBE_SWIZ_header, + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), }; int max_source_attr = 0; @@ -548,20 +558,59 @@ gen8_graphics_pipeline_create( swiz.Attribute[input_index].SourceAttribute = source_attr; } - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), .AttributeSwizzleEnable = true, .ForceVertexURBEntryReadLength = false, .ForceVertexURBEntryReadOffset = false, .VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2), .PointSpriteTextureCoordinateOrigin = UPPERLEFT, .NumberofSFOutputAttributes = - wm_prog_data->num_varying_inputs); + wm_prog_data->num_varying_inputs, + +#if ANV_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + ); uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GEN8_3DSTATE_SBE_SWIZ_length); - GEN8_3DSTATE_SBE_SWIZ_pack(&pipeline->batch, dw, &swiz); + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, .SingleProgramFlow = false, @@ -571,7 +620,7 @@ gen8_graphics_pipeline_create( .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), - .MaximumNumberofThreadsPerPSD = 64 - 2, + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE: POSOFFSET_NONE, .PushConstantEnable = wm_prog_data->base.nr_params > 0, @@ -587,20 +636,25 @@ gen8_graphics_pipeline_create( .KernelStartPointer2 = pipeline->ps_ksp2); bool per_sample_ps = false; - anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), .PixelShaderValid = true, .PixelShaderKillsPixel = wm_prog_data->uses_kill, .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, .AttributeEnable = wm_prog_data->num_varying_inputs > 0, .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps); + .PixelShaderIsPerSample = per_sample_ps, +#if ANV_GEN >= 9 + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = ICMS_NONE +#endif + ); *pPipeline = anv_pipeline_to_handle(pipeline); return VK_SUCCESS; } -VkResult gen8_compute_pipeline_create( +VkResult genX(compute_pipeline_create)( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) @@ -654,7 +708,7 @@ VkResult gen8_compute_pipeline_create( const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_COMPUTE], .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), .ScratchSpaceBasePointerHigh = 0, @@ -663,7 +717,9 @@ VkResult gen8_compute_pipeline_create( .MaximumNumberofThreads = device->info.max_cs_threads - 1, .NumberofURBEntries = 2, .ResetGatewayTimer = true, +#if ANV_GEN == 8 .BypassGatewayControl = true, +#endif .URBEntryAllocationSize = 2, .CURBEAllocationSize = 0); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 4e441797523..c7dd6b3f836 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -30,14 +30,15 @@ #include "anv_private.h" #include "gen8_pack.h" +#include "gen9_pack.h" void -gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, - uint32_t offset, uint32_t range, uint32_t stride) +genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range, uint32_t stride) { uint32_t num_elements = range / stride; - struct GEN8_RENDER_SURFACE_STATE surface_state = { + struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = SURFTYPE_BUFFER, .SurfaceArray = false, .SurfaceFormat = format->surface_format, @@ -46,7 +47,7 @@ gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, .TileMode = LINEAR, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, + .MemoryObjectControlState = GENX(MOCS), .Height = ((num_elements - 1) >> 7) & 0x3fff, .Width = (num_elements - 1) & 0x7f, .Depth = ((num_elements - 1) >> 21) & 0x3f, @@ -60,7 +61,7 @@ gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, .SurfaceBaseAddress = { NULL, offset }, }; - GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); } static const uint8_t anv_halign[] = { @@ -76,8 +77,8 @@ static const uint8_t anv_valign[] = { }; static struct anv_state -gen8_alloc_surface_state(struct anv_device *device, - struct anv_cmd_buffer *cmd_buffer) +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer) { return anv_cmd_buffer_alloc_surface_state(cmd_buffer); @@ -87,10 +88,10 @@ gen8_alloc_surface_state(struct anv_device *device, } void -gen8_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) +genX(image_view_init)(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); @@ -173,7 +174,7 @@ gen8_image_view_init(struct anv_image_view *iview, [ISL_TILING_W] = WMAJOR, }; - struct GEN8_RENDER_SURFACE_STATE surface_state = { + struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, @@ -184,7 +185,7 @@ gen8_image_view_init(struct anv_image_view *iview, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GEN8_MOCS, + .MemoryObjectControlState = GENX(MOCS), /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have @@ -221,7 +222,7 @@ gen8_image_view_init(struct anv_image_view *iview, if (image->needs_nonrt_surface_state) { iview->nonrt_surface_state = - gen8_alloc_surface_state(device, cmd_buffer); + alloc_surface_state(device, cmd_buffer); /* For non render target surfaces, the hardware interprets field * MIPCount/LOD as MIPCount. The range of levels accessible by the @@ -230,13 +231,13 @@ gen8_image_view_init(struct anv_image_view *iview, surface_state.SurfaceMinLOD = range->baseMipLevel; surface_state.MIPCountLOD = range->mipLevels - 1; - GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->nonrt_surface_state.map, - &surface_state); + GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, + &surface_state); } if (image->needs_color_rt_surface_state) { iview->color_rt_surface_state = - gen8_alloc_surface_state(device, cmd_buffer); + alloc_surface_state(device, cmd_buffer); /* For render target surfaces, the hardware interprets field * MIPCount/LOD as LOD. The Broadwell PRM says: @@ -247,12 +248,12 @@ gen8_image_view_init(struct anv_image_view *iview, surface_state.MIPCountLOD = range->baseMipLevel; surface_state.SurfaceMinLOD = 0; - GEN8_RENDER_SURFACE_STATE_pack(NULL, iview->color_rt_surface_state.map, - &surface_state); + GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, + &surface_state); } } -VkResult gen8_CreateSampler( +VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler) @@ -308,11 +309,13 @@ VkResult gen8_CreateSampler( max_anisotropy = RATIO21; } - struct GEN8_SAMPLER_STATE sampler_state = { + struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampMode = 0, +#if ANV_GEN == 8 .BaseMipLevel = 0.0, +#endif .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], .MagModeFilter = mag_filter, .MinModeFilter = min_filter, @@ -345,7 +348,7 @@ VkResult gen8_CreateSampler( .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], }; - GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); *pSampler = anv_sampler_to_handle(sampler); diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 66f7480a5e5..cba0515161a 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -26,7 +26,9 @@ #include "anv_private.h" -#if (ANV_GEN == 8) +#if (ANV_GEN == 9) +# include "gen9_pack.h" +#elif (ANV_GEN == 8) # include "gen8_pack.h" #elif (ANV_IS_HASWELL) # include "gen75_pack.h" -- cgit v1.2.3 From d6d82f1ab33b7885f11dd720e93d9890df27ce36 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 26 Nov 2015 10:11:52 -0800 Subject: vk: Fix 3DSTATE_WM_DEPTH_STENCIL for gen8 This packet is a different size on gen8 and we hit an assertion when we try to merge a gen9 size dword array from the pipeline with the gen8 sized array we create from dynamic state. Use a static assert in the merge macro and fix this issue by using different wm_depth_stencil arrays on gen8 and gen9. --- src/vulkan/anv_private.h | 8 ++++++-- src/vulkan/gen8_cmd_buffer.c | 4 ++-- src/vulkan/gen8_pipeline.c | 8 ++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 36cee88602d..d208b2d74a0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -677,7 +677,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, do { \ uint32_t *dw; \ \ - assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ + static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ dw[i] = (dwords0)[i] | (dwords1)[i]; \ @@ -1201,8 +1201,12 @@ struct anv_pipeline { struct { uint32_t sf[4]; uint32_t raster[5]; - uint32_t wm_depth_stencil[4]; + uint32_t wm_depth_stencil[3]; } gen8; + + struct { + uint32_t wm_depth_stencil[4]; + } gen9; }; struct anv_graphics_pipeline_create_info { diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 09315319001..1d1433817d9 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -248,7 +248,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) pipeline->gen8.raster); } - /* Stencil reference values were moves from COLOR_CALC_STATE in gen8 to + /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split * across different state packets for gen8 and gen9. We handle that by * using a big old #if switch here. @@ -347,7 +347,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); anv_batch_emit_merge(&cmd_buffer->batch, dwords, - pipeline->gen8.wm_depth_stencil); + pipeline->gen9.wm_depth_stencil); } #endif diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 0038bca01b4..b62bc44e710 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -290,13 +290,17 @@ static void emit_ds_state(struct anv_pipeline *pipeline, const VkPipelineDepthStencilStateCreateInfo *info) { + uint32_t *dw = ANV_GEN == 8 ? + pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; + if (info == NULL) { /* We're going to OR this together with the dynamic state. We need * to make sure it's initialized to something useful. */ - /* FIXME: gen9 wm_depth_stencil */ memset(pipeline->gen8.wm_depth_stencil, 0, sizeof(pipeline->gen8.wm_depth_stencil)); + memset(pipeline->gen9.wm_depth_stencil, 0, + sizeof(pipeline->gen9.wm_depth_stencil)); return; } @@ -319,7 +323,7 @@ emit_ds_state(struct anv_pipeline *pipeline, .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], }; - GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, pipeline->gen8.wm_depth_stencil, &wm_depth_stencil); + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); } VkResult -- cgit v1.2.3 From 3db43e8f3e60f8dc746eb4ab2e86f6b1b32d248a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 10:31:44 -0800 Subject: vk/0.210.0: Switch to the new-style handle declarations --- include/vulkan/vk_ext_khr_device_swapchain.h | 2 +- include/vulkan/vulkan.h | 87 ++++++++++------------------ src/vulkan/anv_descriptor_set.c | 4 +- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_meta.c | 4 +- src/vulkan/anv_pipeline.c | 2 +- src/vulkan/anv_private.h | 4 +- 7 files changed, 38 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/include/vulkan/vk_ext_khr_device_swapchain.h b/include/vulkan/vk_ext_khr_device_swapchain.h index 3bf73c84a96..af95ff0cdfb 100644 --- a/include/vulkan/vk_ext_khr_device_swapchain.h +++ b/include/vulkan/vk_ext_khr_device_swapchain.h @@ -41,7 +41,7 @@ extern "C" // ------------------------------------------------------------------------------------------------ // Objects -VK_DEFINE_NONDISP_HANDLE(VkSwapchainKHR); +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR); // ------------------------------------------------------------------------------------------------ // Enumeration constants diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 03bcefea50a..fd898736fc5 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -44,46 +44,17 @@ extern "C" { #define VK_API_VERSION VK_MAKE_VERSION(0, 170, 2) -#if defined(__cplusplus) && (_MSC_VER >= 1800 || __cplusplus >= 201103L) - #define VK_NULL_HANDLE nullptr -#else - #define VK_NULL_HANDLE 0 -#endif +#define VK_NULL_HANDLE 0 + + + +#define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; -#define VK_DEFINE_HANDLE(obj) typedef struct obj##_T* obj; - - -#if defined(__cplusplus) - #if ((defined(_MSC_VER) && _MSC_VER >= 1800) || __cplusplus >= 201103L) - // The bool operator only works if there are no implicit conversions from an obj to - // a bool-compatible type, which can then be used to unintentionally violate type safety. - // C++11 and above supports the "explicit" keyword on conversion operators to stop this - // from happening. Otherwise users of C++ below C++11 won't get direct access to evaluating - // the object handle as a bool in expressions like: - // if (obj) vkDestroy(obj); - #define VK_NONDISP_HANDLE_OPERATOR_BOOL() explicit operator bool() const { return handle != 0; } - #define VK_NONDISP_HANDLE_CONSTRUCTOR_FROM_UINT64(obj) \ - explicit obj(uint64_t x) : handle(x) { } \ - obj(decltype(nullptr)) : handle(0) { } - #else - #define VK_NONDISP_HANDLE_OPERATOR_BOOL() - #define VK_NONDISP_HANDLE_CONSTRUCTOR_FROM_UINT64(obj) \ - obj(uint64_t x) : handle(x) { } - #endif - #define VK_DEFINE_NONDISP_HANDLE(obj) \ - struct obj { \ - obj() : handle(0) { } \ - VK_NONDISP_HANDLE_CONSTRUCTOR_FROM_UINT64(obj) \ - obj& operator =(uint64_t x) { handle = x; return *this; } \ - bool operator==(const obj& other) const { return handle == other.handle; } \ - bool operator!=(const obj& other) const { return handle != other.handle; } \ - bool operator!() const { return !handle; } \ - VK_NONDISP_HANDLE_OPERATOR_BOOL() \ - uint64_t handle; \ - }; +#if defined(__LP64__) || defined(_WIN64) || defined(__x86_64__) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef struct object##_T *object; #else - #define VK_DEFINE_NONDISP_HANDLE(obj) typedef struct obj##_T { uint64_t handle; } obj; + #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object; #endif @@ -98,27 +69,27 @@ VK_DEFINE_HANDLE(VkPhysicalDevice) VK_DEFINE_HANDLE(VkDevice) VK_DEFINE_HANDLE(VkQueue) VK_DEFINE_HANDLE(VkCmdBuffer) -VK_DEFINE_NONDISP_HANDLE(VkFence) -VK_DEFINE_NONDISP_HANDLE(VkDeviceMemory) -VK_DEFINE_NONDISP_HANDLE(VkBuffer) -VK_DEFINE_NONDISP_HANDLE(VkImage) -VK_DEFINE_NONDISP_HANDLE(VkSemaphore) -VK_DEFINE_NONDISP_HANDLE(VkEvent) -VK_DEFINE_NONDISP_HANDLE(VkQueryPool) -VK_DEFINE_NONDISP_HANDLE(VkBufferView) -VK_DEFINE_NONDISP_HANDLE(VkImageView) -VK_DEFINE_NONDISP_HANDLE(VkShaderModule) -VK_DEFINE_NONDISP_HANDLE(VkShader) -VK_DEFINE_NONDISP_HANDLE(VkPipelineCache) -VK_DEFINE_NONDISP_HANDLE(VkPipelineLayout) -VK_DEFINE_NONDISP_HANDLE(VkRenderPass) -VK_DEFINE_NONDISP_HANDLE(VkPipeline) -VK_DEFINE_NONDISP_HANDLE(VkDescriptorSetLayout) -VK_DEFINE_NONDISP_HANDLE(VkSampler) -VK_DEFINE_NONDISP_HANDLE(VkDescriptorPool) -VK_DEFINE_NONDISP_HANDLE(VkDescriptorSet) -VK_DEFINE_NONDISP_HANDLE(VkFramebuffer) -VK_DEFINE_NONDISP_HANDLE(VkCmdPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFence) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeviceMemory) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImage) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSemaphore) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkEvent) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkQueryPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferView) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImageView) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderModule) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShader) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineLayout) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipeline) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSetLayout) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSampler) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSet) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCmdPool) #define VK_LOD_CLAMP_NONE 1000.0f #define VK_REMAINING_MIP_LEVELS (~0U) diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index d518dfa34ba..22041b540d6 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -286,7 +286,7 @@ VkResult anv_CreateDescriptorPool( VkDescriptorPool* pDescriptorPool) { anv_finishme("VkDescriptorPool is a stub"); - pDescriptorPool->handle = 1; + *pDescriptorPool = (VkDescriptorPool)1; return VK_SUCCESS; } @@ -466,7 +466,7 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: for (uint32_t j = 0; j < write->count; j++) { - assert(write->pDescriptors[j].bufferInfo.buffer.handle); + assert(write->pDescriptors[j].bufferInfo.buffer); ANV_FROM_HANDLE(anv_buffer, buffer, write->pDescriptors[j].bufferInfo.buffer); assert(buffer); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 94bc4f11f27..aca082ac1b5 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1336,7 +1336,7 @@ VkResult anv_CreateSemaphore( const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore) { - pSemaphore->handle = 1; + *pSemaphore = (VkSemaphore)1; stub_return(VK_SUCCESS); } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index c9e46a9286e..96792e9d6da 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -335,7 +335,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = {0}, /* TEMPLATE VALUE! FILL ME IN! */ + .shader = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ .pSpecializationInfo = NULL }, }; @@ -452,7 +452,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkTexFilter blit_filter) { struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = { .handle = 1 }; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; struct blit_vb_data { float pos[2]; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index cf4bf9f87a0..a0606adaea6 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -189,7 +189,7 @@ VkResult anv_CreatePipelineCache( const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache) { - pPipelineCache->handle = 1; + *pPipelineCache = (VkPipelineCache)1; stub_return(VK_SUCCESS); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d208b2d74a0..c99d4e0a59e 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1537,13 +1537,13 @@ void anv_dump_image_to_ppm(struct anv_device *device, static inline struct __anv_type * \ __anv_type ## _from_handle(__VkType _handle) \ { \ - return (struct __anv_type *) _handle.handle; \ + return (struct __anv_type *)(uintptr_t) _handle; \ } \ \ static inline __VkType \ __anv_type ## _to_handle(struct __anv_type *_obj) \ { \ - return (__VkType) { .handle = (uint64_t) _obj }; \ + return (__VkType)(uintptr_t) _obj; \ } #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ -- cgit v1.2.3 From 6a8a542610243f32ee20989778af06d66d7b5b1a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 11:12:44 -0800 Subject: vk/0.210.0: A pile of minor enum updates --- include/vulkan/vulkan.h | 172 +++++++++++++++++++++---------------------- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_dump.c | 2 +- src/vulkan/anv_formats.c | 14 ++-- src/vulkan/anv_image.c | 4 +- src/vulkan/anv_pipeline.c | 22 +++--- src/vulkan/anv_wsi_wayland.c | 2 +- src/vulkan/anv_wsi_x11.c | 2 +- src/vulkan/gen7_pipeline.c | 14 ---- src/vulkan/genX_cmd_buffer.c | 4 +- 10 files changed, 112 insertions(+), 126 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index fd898736fc5..b90181a53d4 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -45,7 +45,7 @@ extern "C" { #define VK_NULL_HANDLE 0 - + #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; @@ -59,8 +59,8 @@ extern "C" { -typedef uint32_t VkBool32; typedef uint32_t VkFlags; +typedef uint32_t VkBool32; typedef uint64_t VkDeviceSize; typedef uint32_t VkSampleMask; @@ -377,26 +377,26 @@ typedef enum { VK_FORMAT_MAX_ENUM = 0x7FFFFFFF } VkFormat; -typedef enum { +typedef enum VkImageType { VK_IMAGE_TYPE_1D = 0, VK_IMAGE_TYPE_2D = 1, VK_IMAGE_TYPE_3D = 2, VK_IMAGE_TYPE_BEGIN_RANGE = VK_IMAGE_TYPE_1D, VK_IMAGE_TYPE_END_RANGE = VK_IMAGE_TYPE_3D, - VK_IMAGE_TYPE_NUM = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), + VK_IMAGE_TYPE_RANGE_SIZE = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkImageType; -typedef enum { - VK_IMAGE_TILING_LINEAR = 0, - VK_IMAGE_TILING_OPTIMAL = 1, - VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_LINEAR, - VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_TILING_NUM = (VK_IMAGE_TILING_OPTIMAL - VK_IMAGE_TILING_LINEAR + 1), +typedef enum VkImageTiling { + VK_IMAGE_TILING_OPTIMAL = 0, + VK_IMAGE_TILING_LINEAR = 1, + VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_LINEAR, + VK_IMAGE_TILING_RANGE_SIZE = (VK_IMAGE_TILING_LINEAR - VK_IMAGE_TILING_OPTIMAL + 1), VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF } VkImageTiling; -typedef enum { +typedef enum VkPhysicalDeviceType { VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, @@ -404,7 +404,7 @@ typedef enum { VK_PHYSICAL_DEVICE_TYPE_CPU = 4, VK_PHYSICAL_DEVICE_TYPE_BEGIN_RANGE = VK_PHYSICAL_DEVICE_TYPE_OTHER, VK_PHYSICAL_DEVICE_TYPE_END_RANGE = VK_PHYSICAL_DEVICE_TYPE_CPU, - VK_PHYSICAL_DEVICE_TYPE_NUM = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), + VK_PHYSICAL_DEVICE_TYPE_RANGE_SIZE = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkPhysicalDeviceType; @@ -419,41 +419,41 @@ typedef enum { VK_IMAGE_ASPECT_MAX_ENUM = 0x7FFFFFFF } VkImageAspect; -typedef enum { +typedef enum VkQueryType { VK_QUERY_TYPE_OCCLUSION = 0, VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_PIPELINE_STATISTICS, - VK_QUERY_TYPE_NUM = (VK_QUERY_TYPE_PIPELINE_STATISTICS - VK_QUERY_TYPE_OCCLUSION + 1), + VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_PIPELINE_STATISTICS - VK_QUERY_TYPE_OCCLUSION + 1), VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF } VkQueryType; -typedef enum { +typedef enum VkSharingMode { VK_SHARING_MODE_EXCLUSIVE = 0, VK_SHARING_MODE_CONCURRENT = 1, VK_SHARING_MODE_BEGIN_RANGE = VK_SHARING_MODE_EXCLUSIVE, VK_SHARING_MODE_END_RANGE = VK_SHARING_MODE_CONCURRENT, - VK_SHARING_MODE_NUM = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), + VK_SHARING_MODE_RANGE_SIZE = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), VK_SHARING_MODE_MAX_ENUM = 0x7FFFFFFF } VkSharingMode; -typedef enum { +typedef enum VkImageLayout { VK_IMAGE_LAYOUT_UNDEFINED = 0, VK_IMAGE_LAYOUT_GENERAL = 1, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, - VK_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL = 6, - VK_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL = 7, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, VK_IMAGE_LAYOUT_PREINITIALIZED = 8, VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, - VK_IMAGE_LAYOUT_NUM = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), + VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF } VkImageLayout; -typedef enum { +typedef enum VkImageViewType { VK_IMAGE_VIEW_TYPE_1D = 0, VK_IMAGE_VIEW_TYPE_2D = 1, VK_IMAGE_VIEW_TYPE_3D = 2, @@ -463,7 +463,7 @@ typedef enum { VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, VK_IMAGE_VIEW_TYPE_BEGIN_RANGE = VK_IMAGE_VIEW_TYPE_1D, VK_IMAGE_VIEW_TYPE_END_RANGE = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, - VK_IMAGE_VIEW_TYPE_NUM = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), + VK_IMAGE_VIEW_TYPE_RANGE_SIZE = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF } VkImageViewType; @@ -502,21 +502,21 @@ typedef enum { VK_VERTEX_INPUT_STEP_RATE_MAX_ENUM = 0x7FFFFFFF } VkVertexInputStepRate; -typedef enum { +typedef enum VkPrimitiveTopology { VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ = 6, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ = 7, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ = 8, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ = 9, - VK_PRIMITIVE_TOPOLOGY_PATCH = 10, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY = 6, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY = 7, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY = 8, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY = 9, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST = 10, VK_PRIMITIVE_TOPOLOGY_BEGIN_RANGE = VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH, - VK_PRIMITIVE_TOPOLOGY_NUM = (VK_PRIMITIVE_TOPOLOGY_PATCH - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), + VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + VK_PRIMITIVE_TOPOLOGY_RANGE_SIZE = (VK_PRIMITIVE_TOPOLOGY_PATCH_LIST - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF } VkPrimitiveTopology; @@ -550,7 +550,7 @@ typedef enum { VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF } VkFrontFace; -typedef enum { +typedef enum VkCompareOp { VK_COMPARE_OP_NEVER = 0, VK_COMPARE_OP_LESS = 1, VK_COMPARE_OP_EQUAL = 2, @@ -561,11 +561,11 @@ typedef enum { VK_COMPARE_OP_ALWAYS = 7, VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, - VK_COMPARE_OP_NUM = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), + VK_COMPARE_OP_RANGE_SIZE = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF } VkCompareOp; -typedef enum { +typedef enum VkStencilOp { VK_STENCIL_OP_KEEP = 0, VK_STENCIL_OP_ZERO = 1, VK_STENCIL_OP_REPLACE = 2, @@ -576,11 +576,11 @@ typedef enum { VK_STENCIL_OP_DEC_WRAP = 7, VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DEC_WRAP, - VK_STENCIL_OP_NUM = (VK_STENCIL_OP_DEC_WRAP - VK_STENCIL_OP_KEEP + 1), + VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DEC_WRAP - VK_STENCIL_OP_KEEP + 1), VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF } VkStencilOp; -typedef enum { +typedef enum VkLogicOp { VK_LOGIC_OP_CLEAR = 0, VK_LOGIC_OP_AND = 1, VK_LOGIC_OP_AND_REVERSE = 2, @@ -599,11 +599,11 @@ typedef enum { VK_LOGIC_OP_SET = 15, VK_LOGIC_OP_BEGIN_RANGE = VK_LOGIC_OP_CLEAR, VK_LOGIC_OP_END_RANGE = VK_LOGIC_OP_SET, - VK_LOGIC_OP_NUM = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), + VK_LOGIC_OP_RANGE_SIZE = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF } VkLogicOp; -typedef enum { +typedef enum VkBlendFactor { VK_BLEND_ZERO = 0, VK_BLEND_ONE = 1, VK_BLEND_SRC_COLOR = 2, @@ -625,11 +625,11 @@ typedef enum { VK_BLEND_ONE_MINUS_SRC1_ALPHA = 18, VK_BLEND_BEGIN_RANGE = VK_BLEND_ZERO, VK_BLEND_END_RANGE = VK_BLEND_ONE_MINUS_SRC1_ALPHA, - VK_BLEND_NUM = (VK_BLEND_ONE_MINUS_SRC1_ALPHA - VK_BLEND_ZERO + 1), + VK_BLEND_RANGE_SIZE = (VK_BLEND_ONE_MINUS_SRC1_ALPHA - VK_BLEND_ZERO + 1), VK_BLEND_MAX_ENUM = 0x7FFFFFFF } VkBlend; -typedef enum { +typedef enum VkBlendOp { VK_BLEND_OP_ADD = 0, VK_BLEND_OP_SUBTRACT = 1, VK_BLEND_OP_REVERSE_SUBTRACT = 2, @@ -637,11 +637,11 @@ typedef enum { VK_BLEND_OP_MAX = 4, VK_BLEND_OP_BEGIN_RANGE = VK_BLEND_OP_ADD, VK_BLEND_OP_END_RANGE = VK_BLEND_OP_MAX, - VK_BLEND_OP_NUM = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), + VK_BLEND_OP_RANGE_SIZE = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF } VkBlendOp; -typedef enum { +typedef enum VkDynamicState { VK_DYNAMIC_STATE_VIEWPORT = 0, VK_DYNAMIC_STATE_SCISSOR = 1, VK_DYNAMIC_STATE_LINE_WIDTH = 2, @@ -653,7 +653,7 @@ typedef enum { VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8, VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, - VK_DYNAMIC_STATE_NUM = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), + VK_DYNAMIC_STATE_RANGE_SIZE = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF } VkDynamicState; @@ -688,7 +688,7 @@ typedef enum { VK_TEX_ADDRESS_MAX_ENUM = 0x7FFFFFFF } VkTexAddressMode; -typedef enum { +typedef enum VkBorderColor { VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, VK_BORDER_COLOR_INT_TRANSPARENT_BLACK = 1, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK = 2, @@ -697,11 +697,11 @@ typedef enum { VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, - VK_BORDER_COLOR_NUM = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), + VK_BORDER_COLOR_RANGE_SIZE = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF } VkBorderColor; -typedef enum { +typedef enum VkDescriptorType { VK_DESCRIPTOR_TYPE_SAMPLER = 0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE = 2, @@ -715,7 +715,7 @@ typedef enum { VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, - VK_DESCRIPTOR_TYPE_NUM = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), + VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF } VkDescriptorType; @@ -737,31 +737,31 @@ typedef enum { VK_DESCRIPTOR_SET_USAGE_MAX_ENUM = 0x7FFFFFFF } VkDescriptorSetUsage; -typedef enum { +typedef enum VkAttachmentLoadOp { VK_ATTACHMENT_LOAD_OP_LOAD = 0, VK_ATTACHMENT_LOAD_OP_CLEAR = 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, VK_ATTACHMENT_LOAD_OP_BEGIN_RANGE = VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_END_RANGE = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_LOAD_OP_NUM = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), + VK_ATTACHMENT_LOAD_OP_RANGE_SIZE = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentLoadOp; -typedef enum { +typedef enum VkAttachmentStoreOp { VK_ATTACHMENT_STORE_OP_STORE = 0, VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, VK_ATTACHMENT_STORE_OP_BEGIN_RANGE = VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_STORE_OP_END_RANGE = VK_ATTACHMENT_STORE_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_NUM = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), + VK_ATTACHMENT_STORE_OP_RANGE_SIZE = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentStoreOp; -typedef enum { - VK_PIPELINE_BIND_POINT_COMPUTE = 0, - VK_PIPELINE_BIND_POINT_GRAPHICS = 1, - VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, - VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, - VK_PIPELINE_BIND_POINT_NUM = (VK_PIPELINE_BIND_POINT_GRAPHICS - VK_PIPELINE_BIND_POINT_COMPUTE + 1), +typedef enum VkPipelineBindPoint { + VK_PIPELINE_BIND_POINT_GRAPHICS = 0, + VK_PIPELINE_BIND_POINT_COMPUTE = 1, + VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, + VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, + VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1), VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF } VkPipelineBindPoint; @@ -774,12 +774,12 @@ typedef enum { VK_CMD_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF } VkCmdBufferLevel; -typedef enum { +typedef enum VkIndexType { VK_INDEX_TYPE_UINT16 = 0, VK_INDEX_TYPE_UINT32 = 1, VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, - VK_INDEX_TYPE_NUM = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), + VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF } VkIndexType; @@ -802,7 +802,7 @@ typedef enum { } VkRenderPassContents; -typedef enum { +typedef enum VkFormatFeatureFlagBits { VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002, VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT = 0x00000004, @@ -813,14 +813,14 @@ typedef enum { VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080, VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200, - VK_FORMAT_FEATURE_BLIT_SOURCE_BIT = 0x00000400, - VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT = 0x00000800, + VK_FORMAT_FEATURE_BLIT_SRC_BIT = 0x00000400, + VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800, } VkFormatFeatureFlagBits; typedef VkFlags VkFormatFeatureFlags; -typedef enum { - VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT = 0x00000001, - VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT = 0x00000002, +typedef enum VkImageUsageFlagBits { + VK_IMAGE_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_IMAGE_USAGE_TRANSFER_DST_BIT = 0x00000002, VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, @@ -830,7 +830,7 @@ typedef enum { } VkImageUsageFlagBits; typedef VkFlags VkImageUsageFlags; -typedef enum { +typedef enum VkImageCreateFlagBits { VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, @@ -839,7 +839,7 @@ typedef enum { } VkImageCreateFlagBits; typedef VkFlags VkImageCreateFlags; -typedef enum { +typedef enum VkSampleCountFlagBits { VK_SAMPLE_COUNT_1_BIT = 0x00000001, VK_SAMPLE_COUNT_2_BIT = 0x00000002, VK_SAMPLE_COUNT_4_BIT = 0x00000004, @@ -850,12 +850,11 @@ typedef enum { } VkSampleCountFlagBits; typedef VkFlags VkSampleCountFlags; -typedef enum { +typedef enum VkQueueFlagBits { VK_QUEUE_GRAPHICS_BIT = 0x00000001, VK_QUEUE_COMPUTE_BIT = 0x00000002, - VK_QUEUE_DMA_BIT = 0x00000004, - VK_QUEUE_SPARSE_MEMMGR_BIT = 0x00000008, - VK_QUEUE_EXTENDED_BIT = 0x40000000, + VK_QUEUE_TRANSFER_BIT = 0x00000004, + VK_QUEUE_SPARSE_BINDING_BIT = 0x00000008, } VkQueueFlagBits; typedef VkFlags VkQueueFlags; @@ -882,19 +881,19 @@ typedef enum { } VkSparseImageFormatFlagBits; typedef VkFlags VkSparseImageFormatFlags; -typedef enum { - VK_SPARSE_MEMORY_BIND_REPLICATE_64KIB_BLOCK_BIT = 0x00000001, +typedef enum VkSparseMemoryBindFlagBits { + VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001, } VkSparseMemoryBindFlagBits; typedef VkFlags VkSparseMemoryBindFlags; -typedef enum { +typedef enum VkFenceCreateFlagBits { VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, } VkFenceCreateFlagBits; typedef VkFlags VkFenceCreateFlags; typedef VkFlags VkSemaphoreCreateFlags; typedef VkFlags VkEventCreateFlags; -typedef enum { +typedef enum VkQueryPipelineStatisticFlagBits { VK_QUERY_PIPELINE_STATISTIC_IA_VERTICES_BIT = 0x00000001, VK_QUERY_PIPELINE_STATISTIC_IA_PRIMITIVES_BIT = 0x00000002, VK_QUERY_PIPELINE_STATISTIC_VS_INVOCATIONS_BIT = 0x00000004, @@ -909,7 +908,7 @@ typedef enum { } VkQueryPipelineStatisticFlagBits; typedef VkFlags VkQueryPipelineStatisticFlags; -typedef enum { +typedef enum VkQueryResultFlagBits { VK_QUERY_RESULT_DEFAULT = 0, VK_QUERY_RESULT_64_BIT = 0x00000001, VK_QUERY_RESULT_WAIT_BIT = 0x00000002, @@ -918,9 +917,9 @@ typedef enum { } VkQueryResultFlagBits; typedef VkFlags VkQueryResultFlags; -typedef enum { - VK_BUFFER_USAGE_TRANSFER_SOURCE_BIT = 0x00000001, - VK_BUFFER_USAGE_TRANSFER_DESTINATION_BIT = 0x00000002, +typedef enum VkBufferUsageFlagBits { + VK_BUFFER_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_BUFFER_USAGE_TRANSFER_DST_BIT = 0x00000002, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000004, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT = 0x00000008, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT = 0x00000010, @@ -962,25 +961,26 @@ typedef enum { } VkChannelFlagBits; typedef VkFlags VkChannelFlags; -typedef enum { +typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, } VkPipelineCreateFlagBits; typedef VkFlags VkPipelineCreateFlags; -typedef enum { +typedef enum VkShaderStageFlagBits { VK_SHADER_STAGE_VERTEX_BIT = 0x00000001, - VK_SHADER_STAGE_TESS_CONTROL_BIT = 0x00000002, - VK_SHADER_STAGE_TESS_EVALUATION_BIT = 0x00000004, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT = 0x00000002, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT = 0x00000004, VK_SHADER_STAGE_GEOMETRY_BIT = 0x00000008, VK_SHADER_STAGE_FRAGMENT_BIT = 0x00000010, VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, + VK_SHADER_STAGE_ALL_GRAPHICS = 0x1F, VK_SHADER_STAGE_ALL = 0x7FFFFFFF, } VkShaderStageFlagBits; typedef VkFlags VkShaderStageFlags; -typedef enum { +typedef enum VkAttachmentDescriptionFlagBits { VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, } VkAttachmentDescriptionFlagBits; typedef VkFlags VkAttachmentDescriptionFlags; @@ -990,13 +990,13 @@ typedef enum { } VkSubpassDescriptionFlagBits; typedef VkFlags VkSubpassDescriptionFlags; -typedef enum { +typedef enum VkPipelineStageFlagBits { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT = 0x00000010, - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT = 0x00000020, + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010, + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020, VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index aca082ac1b5..ebb86c09001 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -533,7 +533,7 @@ VkResult anv_GetPhysicalDeviceQueueFamilyProperties( *pQueueFamilyProperties = (VkQueueFamilyProperties) { .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_DMA_BIT, + VK_QUEUE_TRANSFER_BIT, .queueCount = 1, .supportsTimestamps = true, }; diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 3878941896c..3634ae68732 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -52,7 +52,7 @@ anv_dump_image_to_ppm(struct anv_device *device, .arraySize = 1, .samples = 1, .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, .flags = 0, }, ©_image); assert(result == VK_SUCCESS); diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index f1c8da00b3c..8d5eae4c12e 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -260,10 +260,10 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; if (physical_device->info->gen >= 8) { tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SOURCE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; } if (format->depth_format) { - tiled |= VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; } } else { /* The surface_formats table only contains color formats */ @@ -273,13 +273,13 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d if (info->sampling <= gen) { flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_BLIT_SOURCE_BIT; + VK_FORMAT_FEATURE_BLIT_SRC_BIT; linear |= flags; tiled |= flags; } if (info->render_target <= gen) { flags = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT; + VK_FORMAT_FEATURE_BLIT_DST_BIT; linear |= flags; tiled |= flags; } @@ -387,19 +387,19 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( break; } - if (usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta implements transfers by sampling from the source image. */ if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { goto unsupported; } } - if (usage & VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT) { + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { if (format->has_stencil) { /* Not yet implemented because copying to a W-tiled surface is crazy * hard. */ - anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT for " + anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " "stencil format"); goto unsupported; } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index d344fcc2bea..973e8bd609a 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -274,12 +274,12 @@ anv_image_get_full_usage(const VkImageCreateInfo *info) { VkImageUsageFlags usage = info->usage; - if (usage & VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT) { + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta will transfer from the image by binding it as a texture. */ usage |= VK_IMAGE_USAGE_SAMPLED_BIT; } - if (usage & VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT) { + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { /* Meta will transfer to the image by binding it as a color attachment, * even if the image format is not a color format. */ diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index a0606adaea6..a61f0c16757 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -239,17 +239,17 @@ void anv_DestroyPipeline( } static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, -/* [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 */ + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ }; static void diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index f32fda20c09..d226c8d06c1 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -330,7 +330,7 @@ wsi_wl_get_surface_properties(struct anv_wsi_implementation *impl, props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; props->maxImageArraySize = 1; props->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; return VK_SUCCESS; diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 9a5e41d025a..4beffce3ce5 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -82,7 +82,7 @@ x11_get_surface_properties(struct anv_wsi_implementation *impl, props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; props->maxImageArraySize = 1; props->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; return VK_SUCCESS; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 7d44c72b1a2..2d26a0380c5 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -316,20 +316,6 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, .BlendStatePointer = pipeline->blend_state.offset); } -static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 -}; - static inline uint32_t scratch_space(const struct brw_stage_prog_data *prog_data) { diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index cba0515161a..31dcdcd8dd5 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -164,8 +164,8 @@ void genX(CmdPipelineBarrier)( VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | -- cgit v1.2.3 From a89a485e79ad40793a85979d86d45760362be21a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 11:48:08 -0800 Subject: vk/0.210.0: Rename CmdBuffer to CommandBuffer --- include/vulkan/vulkan.h | 338 +++++++++++++++++++++---------------------- src/vulkan/anv_batch_chain.c | 4 +- src/vulkan/anv_cmd_buffer.c | 118 +++++++-------- src/vulkan/anv_device.c | 18 +-- src/vulkan/anv_dump.c | 24 +-- src/vulkan/anv_meta.c | 28 ++-- src/vulkan/anv_meta_clear.c | 16 +- src/vulkan/anv_private.h | 8 +- src/vulkan/anv_query.c | 2 +- src/vulkan/gen7_cmd_buffer.c | 46 +++--- src/vulkan/gen8_cmd_buffer.c | 62 ++++---- src/vulkan/genX_cmd_buffer.c | 4 +- 12 files changed, 334 insertions(+), 334 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index b90181a53d4..c09cefc39ef 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -68,7 +68,7 @@ VK_DEFINE_HANDLE(VkInstance) VK_DEFINE_HANDLE(VkPhysicalDevice) VK_DEFINE_HANDLE(VkDevice) VK_DEFINE_HANDLE(VkQueue) -VK_DEFINE_HANDLE(VkCmdBuffer) +VK_DEFINE_HANDLE(VkCommandBuffer) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFence) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeviceMemory) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) @@ -89,7 +89,7 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSampler) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSet) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCmdPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) #define VK_LOD_CLAMP_NONE 1000.0f #define VK_REMAINING_MIP_LEVELS (~0U) @@ -140,7 +140,7 @@ typedef enum { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 6, VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 7, VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 9, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_CREATE_INFO = 9, VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 11, VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 12, @@ -159,7 +159,7 @@ typedef enum { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 25, VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 26, VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO = 28, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 28, VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 29, VK_STRUCTURE_TYPE_MEMORY_BARRIER = 30, VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 31, @@ -175,7 +175,7 @@ typedef enum { VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 41, VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 42, VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, - VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO = 44, + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 44, VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 45, VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 46, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, @@ -765,14 +765,14 @@ typedef enum VkPipelineBindPoint { VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF } VkPipelineBindPoint; -typedef enum { - VK_CMD_BUFFER_LEVEL_PRIMARY = 0, - VK_CMD_BUFFER_LEVEL_SECONDARY = 1, - VK_CMD_BUFFER_LEVEL_BEGIN_RANGE = VK_CMD_BUFFER_LEVEL_PRIMARY, - VK_CMD_BUFFER_LEVEL_END_RANGE = VK_CMD_BUFFER_LEVEL_SECONDARY, - VK_CMD_BUFFER_LEVEL_NUM = (VK_CMD_BUFFER_LEVEL_SECONDARY - VK_CMD_BUFFER_LEVEL_PRIMARY + 1), - VK_CMD_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF -} VkCmdBufferLevel; +typedef enum VkCommandBufferLevel { + VK_COMMAND_BUFFER_LEVEL_PRIMARY = 0, + VK_COMMAND_BUFFER_LEVEL_SECONDARY = 1, + VK_COMMAND_BUFFER_LEVEL_BEGIN_RANGE = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + VK_COMMAND_BUFFER_LEVEL_END_RANGE = VK_COMMAND_BUFFER_LEVEL_SECONDARY, + VK_COMMAND_BUFFER_LEVEL_RANGE_SIZE = (VK_COMMAND_BUFFER_LEVEL_SECONDARY - VK_COMMAND_BUFFER_LEVEL_PRIMARY + 1), + VK_COMMAND_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF +} VkCommandBufferLevel; typedef enum VkIndexType { VK_INDEX_TYPE_UINT16 = 0, @@ -792,14 +792,14 @@ typedef enum { VK_TIMESTAMP_TYPE_MAX_ENUM = 0x7FFFFFFF } VkTimestampType; -typedef enum { - VK_RENDER_PASS_CONTENTS_INLINE = 0, - VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS = 1, - VK_RENDER_PASS_CONTENTS_BEGIN_RANGE = VK_RENDER_PASS_CONTENTS_INLINE, - VK_RENDER_PASS_CONTENTS_END_RANGE = VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS, - VK_RENDER_PASS_CONTENTS_NUM = (VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS - VK_RENDER_PASS_CONTENTS_INLINE + 1), - VK_RENDER_PASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF -} VkRenderPassContents; +typedef enum VkSubpassContents { + VK_SUBPASS_CONTENTS_INLINE = 0, + VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, + VK_SUBPASS_CONTENTS_BEGIN_RANGE = VK_SUBPASS_CONTENTS_INLINE, + VK_SUBPASS_CONTENTS_END_RANGE = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, + VK_SUBPASS_CONTENTS_RANGE_SIZE = (VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS - VK_SUBPASS_CONTENTS_INLINE + 1), + VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF +} VkSubpassContents; typedef enum VkFormatFeatureFlagBits { @@ -1033,17 +1033,17 @@ typedef enum { } VkMemoryInputFlagBits; typedef VkFlags VkMemoryInputFlags; -typedef enum { - VK_CMD_POOL_CREATE_TRANSIENT_BIT = 0x00000001, - VK_CMD_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, -} VkCmdPoolCreateFlagBits; -typedef VkFlags VkCmdPoolCreateFlags; +typedef enum VkCommandPoolCreateFlagBits { + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001, + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, +} VkCommandPoolCreateFlagBits; +typedef VkFlags VkCommandPoolCreateFlags; -typedef enum { - VK_CMD_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, -} VkCmdPoolResetFlagBits; -typedef VkFlags VkCmdPoolResetFlags; -typedef VkFlags VkCmdBufferCreateFlags; +typedef enum VkCommandPoolResetFlagBits { + VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, +} VkCommandPoolResetFlagBits; +typedef VkFlags VkCommandPoolResetFlags; +typedef VkFlags VkCommandBufferCreateFlags; typedef enum { VK_CMD_BUFFER_OPTIMIZE_SMALL_BATCH_BIT = 0x00000001, @@ -1051,13 +1051,13 @@ typedef enum { VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT = 0x00000004, VK_CMD_BUFFER_OPTIMIZE_DESCRIPTOR_SET_SWITCH_BIT = 0x00000008, VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT = 0x00000010, -} VkCmdBufferOptimizeFlagBits; -typedef VkFlags VkCmdBufferOptimizeFlags; +} VkCommandBufferOptimizeFlagBits; +typedef VkFlags VkCommandBufferOptimizeFlags; -typedef enum { - VK_CMD_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, -} VkCmdBufferResetFlagBits; -typedef VkFlags VkCmdBufferResetFlags; +typedef enum VkCommandBufferResetFlagBits { + VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, +} VkCommandBufferResetFlagBits; +typedef VkFlags VkCommandBufferResetFlags; typedef enum { VK_STENCIL_FACE_NONE = 0, @@ -1914,25 +1914,25 @@ typedef struct { VkStructureType sType; const void* pNext; uint32_t queueFamilyIndex; - VkCmdPoolCreateFlags flags; -} VkCmdPoolCreateInfo; + VkCommandPoolCreateFlags flags; +} VkCommandPoolCreateInfo; typedef struct { VkStructureType sType; const void* pNext; - VkCmdPool cmdPool; - VkCmdBufferLevel level; - VkCmdBufferCreateFlags flags; -} VkCmdBufferCreateInfo; + VkCommandPool commandPool; + VkCommandBufferLevel level; + VkCommandBufferCreateFlags flags; +} VkCommandBufferCreateInfo; typedef struct { VkStructureType sType; const void* pNext; - VkCmdBufferOptimizeFlags flags; + VkCommandBufferOptimizeFlags flags; VkRenderPass renderPass; uint32_t subpass; VkFramebuffer framebuffer; -} VkCmdBufferBeginInfo; +} VkCommandBufferBeginInfo; typedef struct { VkDeviceSize srcOffset; @@ -2024,26 +2024,26 @@ typedef struct { VkDeviceSize size; } VkBufferMemoryBarrier; -typedef struct { +typedef struct VkDispatchIndirectCommand { uint32_t x; uint32_t y; uint32_t z; -} VkDispatchIndirectCmd; +} VkDispatchIndirectCommand; -typedef struct { +typedef struct VkDrawIndexedIndirectCommand { uint32_t indexCount; uint32_t instanceCount; uint32_t firstIndex; int32_t vertexOffset; uint32_t firstInstance; -} VkDrawIndexedIndirectCmd; +} VkDrawIndexedIndirectCommand; -typedef struct { +typedef struct VkDrawIndirectCommand { uint32_t vertexCount; uint32_t instanceCount; uint32_t firstVertex; uint32_t firstInstance; -} VkDrawIndirectCmd; +} VkDrawIndirectCommand; typedef struct { VkStructureType sType; @@ -2084,7 +2084,7 @@ typedef VkResult (VKAPI *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDev typedef VkResult (VKAPI *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); typedef VkResult (VKAPI *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); -typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, VkFence fence); +typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI *PFN_vkDeviceWaitIdle)(VkDevice device); typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); @@ -2158,59 +2158,59 @@ typedef void (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer fr typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); typedef void (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); -typedef VkResult (VKAPI *PFN_vkCreateCommandPool)(VkDevice device, const VkCmdPoolCreateInfo* pCreateInfo, VkCmdPool* pCmdPool); -typedef void (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCmdPool cmdPool); -typedef VkResult (VKAPI *PFN_vkResetCommandPool)(VkDevice device, VkCmdPool cmdPool, VkCmdPoolResetFlags flags); -typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); -typedef void (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); -typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); -typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); -typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer, VkCmdBufferResetFlags flags); -typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI *PFN_vkCmdSetViewport)(VkCmdBuffer cmdBuffer, uint32_t viewportCount, const VkViewport* pViewports); -typedef void (VKAPI *PFN_vkCmdSetScissor)(VkCmdBuffer cmdBuffer, uint32_t scissorCount, const VkRect2D* pScissors); -typedef void (VKAPI *PFN_vkCmdSetLineWidth)(VkCmdBuffer cmdBuffer, float lineWidth); -typedef void (VKAPI *PFN_vkCmdSetDepthBias)(VkCmdBuffer cmdBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); -typedef void (VKAPI *PFN_vkCmdSetBlendConstants)(VkCmdBuffer cmdBuffer, const float blendConst[4]); -typedef void (VKAPI *PFN_vkCmdSetDepthBounds)(VkCmdBuffer cmdBuffer, float minDepthBounds, float maxDepthBounds); -typedef void (VKAPI *PFN_vkCmdSetStencilCompareMask)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); -typedef void (VKAPI *PFN_vkCmdSetStencilWriteMask)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); -typedef void (VKAPI *PFN_vkCmdSetStencilReference)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); -typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); -typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); -typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); -typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); -typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); -typedef void (VKAPI *PFN_vkCmdDrawIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI *PFN_vkCmdDrawIndexedIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI *PFN_vkCmdDispatch)(VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, uint32_t z); -typedef void (VKAPI *PFN_vkCmdDispatchIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset); -typedef void (VKAPI *PFN_vkCmdCopyBuffer)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCopyImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); -typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); -typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); -typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCmdBuffer cmdBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); -typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); -typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot); -typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); -typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); -typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); -typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCmdBuffer cmdBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); -typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkRenderPassContents contents); -typedef void (VKAPI *PFN_vkCmdNextSubpass)(VkCmdBuffer cmdBuffer, VkRenderPassContents contents); -typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCmdBuffer cmdBuffer); -typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCmdBuffer cmdBuffer, uint32_t cmdBuffersCount, const VkCmdBuffer* pCmdBuffers); +typedef VkResult (VKAPI *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, VkCommandPool* pCommandPool); +typedef void (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool); +typedef VkResult (VKAPI *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); +typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCommandBufferCreateInfo* pCreateInfo, VkCommandBuffer* pCommandBuffer); +typedef void (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCommandBuffer commandBuffer); +typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); +typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); +typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); +typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); +typedef void (VKAPI *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); +typedef void (VKAPI *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); +typedef void (VKAPI *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); +typedef void (VKAPI *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); +typedef void (VKAPI *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConst[4]); +typedef void (VKAPI *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); +typedef void (VKAPI *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); +typedef void (VKAPI *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); +typedef void (VKAPI *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); +typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); +typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); +typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); +typedef void (VKAPI *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); +typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); +typedef void (VKAPI *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); +typedef void (VKAPI *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); +typedef void (VKAPI *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); +typedef void (VKAPI *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); +typedef void (VKAPI *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); +typedef void (VKAPI *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); +typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); +typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); +typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); +typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); +typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); +typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); +typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); +typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot); +typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); +typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); +typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); +typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); +typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); +typedef void (VKAPI *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); +typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); +typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBuffersCount, const VkCommandBuffer* pCommandBuffers); #ifdef VK_PROTOTYPES VkResult VKAPI vkCreateInstance( @@ -2300,8 +2300,8 @@ VkResult VKAPI vkGetDeviceQueue( VkResult VKAPI vkQueueSubmit( VkQueue queue, - uint32_t cmdBufferCount, - const VkCmdBuffer* pCmdBuffers, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers, VkFence fence); VkResult VKAPI vkQueueWaitIdle( @@ -2671,89 +2671,89 @@ VkResult VKAPI vkGetRenderAreaGranularity( VkResult VKAPI vkCreateCommandPool( VkDevice device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool); + const VkCommandPoolCreateInfo* pCreateInfo, + VkCommandPool* pCommandPool); void VKAPI vkDestroyCommandPool( VkDevice device, - VkCmdPool cmdPool); + VkCommandPool commandPool); VkResult VKAPI vkResetCommandPool( VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags); + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); VkResult VKAPI vkCreateCommandBuffer( VkDevice device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer); + const VkCommandBufferCreateInfo* pCreateInfo, + VkCommandBuffer* pCommandBuffer); void VKAPI vkDestroyCommandBuffer( VkDevice device, - VkCmdBuffer commandBuffer); + VkCommandBuffer commandBuffer); VkResult VKAPI vkBeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo); + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo); VkResult VKAPI vkEndCommandBuffer( - VkCmdBuffer cmdBuffer); + VkCommandBuffer commandBuffer); VkResult VKAPI vkResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags); + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); void VKAPI vkCmdBindPipeline( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); void VKAPI vkCmdSetViewport( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); void VKAPI vkCmdSetScissor( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); void VKAPI vkCmdSetLineWidth( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, float lineWidth); void VKAPI vkCmdSetDepthBias( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); void VKAPI vkCmdSetBlendConstants( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const float blendConst[4]); void VKAPI vkCmdSetDepthBounds( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); void VKAPI vkCmdSetStencilCompareMask( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); void VKAPI vkCmdSetStencilWriteMask( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); void VKAPI vkCmdSetStencilReference( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); void VKAPI vkCmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, @@ -2763,27 +2763,27 @@ void VKAPI vkCmdBindDescriptorSets( const uint32_t* pDynamicOffsets); void VKAPI vkCmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); void VKAPI vkCmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); void VKAPI vkCmdDraw( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); void VKAPI vkCmdDrawIndexed( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, @@ -2791,39 +2791,39 @@ void VKAPI vkCmdDrawIndexed( uint32_t firstInstance); void VKAPI vkCmdDrawIndirect( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); void VKAPI vkCmdDrawIndexedIndirect( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); void VKAPI vkCmdDispatch( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); void VKAPI vkCmdDispatchIndirect( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); void VKAPI vkCmdCopyBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); void VKAPI vkCmdCopyImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, @@ -2832,7 +2832,7 @@ void VKAPI vkCmdCopyImage( const VkImageCopy* pRegions); void VKAPI vkCmdBlitImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, @@ -2842,7 +2842,7 @@ void VKAPI vkCmdBlitImage( VkTexFilter filter); void VKAPI vkCmdCopyBufferToImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, @@ -2850,7 +2850,7 @@ void VKAPI vkCmdCopyBufferToImage( const VkBufferImageCopy* pRegions); void VKAPI vkCmdCopyImageToBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, @@ -2858,21 +2858,21 @@ void VKAPI vkCmdCopyImageToBuffer( const VkBufferImageCopy* pRegions); void VKAPI vkCmdUpdateBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); void VKAPI vkCmdFillBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); void VKAPI vkCmdClearColorImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, @@ -2880,7 +2880,7 @@ void VKAPI vkCmdClearColorImage( const VkImageSubresourceRange* pRanges); void VKAPI vkCmdClearDepthStencilImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, @@ -2888,7 +2888,7 @@ void VKAPI vkCmdClearDepthStencilImage( const VkImageSubresourceRange* pRanges); void VKAPI vkCmdClearColorAttachment( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, @@ -2896,7 +2896,7 @@ void VKAPI vkCmdClearColorAttachment( const VkRect3D* pRects); void VKAPI vkCmdClearDepthStencilAttachment( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, @@ -2904,7 +2904,7 @@ void VKAPI vkCmdClearDepthStencilAttachment( const VkRect3D* pRects); void VKAPI vkCmdResolveImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, @@ -2913,17 +2913,17 @@ void VKAPI vkCmdResolveImage( const VkImageResolve* pRegions); void VKAPI vkCmdSetEvent( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); void VKAPI vkCmdResetEvent( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); void VKAPI vkCmdWaitEvents( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, @@ -2932,7 +2932,7 @@ void VKAPI vkCmdWaitEvents( const void* const* ppMemBarriers); void VKAPI vkCmdPipelineBarrier( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, @@ -2940,30 +2940,30 @@ void VKAPI vkCmdPipelineBarrier( const void* const* ppMemBarriers); void VKAPI vkCmdBeginQuery( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); void VKAPI vkCmdEndQuery( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot); void VKAPI vkCmdResetQueryPool( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); void VKAPI vkCmdWriteTimestamp( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); void VKAPI vkCmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, @@ -2973,7 +2973,7 @@ void VKAPI vkCmdCopyQueryPoolResults( VkQueryResultFlags flags); void VKAPI vkCmdPushConstants( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, @@ -2981,21 +2981,21 @@ void VKAPI vkCmdPushConstants( const void* values); void VKAPI vkCmdBeginRenderPass( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents); + VkSubpassContents contents); void VKAPI vkCmdNextSubpass( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents); + VkCommandBuffer commandBuffer, + VkSubpassContents contents); void VKAPI vkCmdEndRenderPass( - VkCmdBuffer cmdBuffer); + VkCommandBuffer commandBuffer); void VKAPI vkCmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers); + VkCommandBuffer commandBuffer, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCommandBuffers); #endif #ifdef __cplusplus diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 9d35da8024b..62189afec2f 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -641,7 +641,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); /* Round batch up to an even number of dwords. */ @@ -653,7 +653,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { /* If this is a secondary command buffer, we need to determine the * mode in which it will be executed with vkExecuteCommands. We * determine this statically here so that this stays in sync with the diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 66b2f65e9f7..f42f6fd5183 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -162,11 +162,11 @@ anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, VkResult anv_CreateCommandBuffer( VkDevice _device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer) + const VkCommandBufferCreateInfo* pCreateInfo, + VkCommandBuffer* pCommandBuffer) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->commandPool); struct anv_cmd_buffer *cmd_buffer; VkResult result; @@ -201,7 +201,7 @@ VkResult anv_CreateCommandBuffer( list_inithead(&cmd_buffer->pool_link); } - *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; @@ -212,7 +212,7 @@ VkResult anv_CreateCommandBuffer( void anv_DestroyCommandBuffer( VkDevice _device, - VkCmdBuffer _cmd_buffer) + VkCommandBuffer _cmd_buffer) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); @@ -227,10 +227,10 @@ void anv_DestroyCommandBuffer( } VkResult anv_ResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags) + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); @@ -258,16 +258,16 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) } VkResult anv_BeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo) + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); cmd_buffer->opt_flags = pBeginInfo->flags; - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { cmd_buffer->state.framebuffer = anv_framebuffer_from_handle(pBeginInfo->framebuffer); cmd_buffer->state.pass = @@ -286,14 +286,14 @@ VkResult anv_BeginCommandBuffer( } VkResult anv_EndCommandBuffer( - VkCmdBuffer cmdBuffer) + VkCommandBuffer commandBuffer) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_device *device = cmd_buffer->device; anv_cmd_buffer_end_batch_buffer(cmd_buffer); - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { /* The algorithm used to compute the validate list is not threadsafe as * it uses the bo->index field. We have to lock the device around it. * Fortunately, the chances for contention here are probably very low. @@ -307,11 +307,11 @@ VkResult anv_EndCommandBuffer( } void anv_CmdBindPipeline( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); switch (pipelineBindPoint) { @@ -341,11 +341,11 @@ void anv_CmdBindPipeline( } void anv_CmdSetViewport( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.viewport.count = viewportCount; memcpy(cmd_buffer->state.dynamic.viewport.viewports, @@ -355,11 +355,11 @@ void anv_CmdSetViewport( } void anv_CmdSetScissor( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.scissor.count = scissorCount; memcpy(cmd_buffer->state.dynamic.scissor.scissors, @@ -369,22 +369,22 @@ void anv_CmdSetScissor( } void anv_CmdSetLineWidth( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, float lineWidth) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.line_width = lineWidth; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; } void anv_CmdSetDepthBias( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.depth_bias.bias = depthBias; cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; @@ -394,10 +394,10 @@ void anv_CmdSetDepthBias( } void anv_CmdSetBlendConstants( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const float blendConst[4]) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); memcpy(cmd_buffer->state.dynamic.blend_constants, blendConst, sizeof(float) * 4); @@ -406,11 +406,11 @@ void anv_CmdSetBlendConstants( } void anv_CmdSetDepthBounds( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; @@ -419,11 +419,11 @@ void anv_CmdSetDepthBounds( } void anv_CmdSetStencilCompareMask( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) cmd_buffer->state.dynamic.stencil_compare_mask.front = stencilCompareMask; @@ -434,11 +434,11 @@ void anv_CmdSetStencilCompareMask( } void anv_CmdSetStencilWriteMask( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) cmd_buffer->state.dynamic.stencil_write_mask.front = stencilWriteMask; @@ -449,11 +449,11 @@ void anv_CmdSetStencilWriteMask( } void anv_CmdSetStencilReference( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) cmd_buffer->state.dynamic.stencil_reference.front = stencilReference; @@ -464,7 +464,7 @@ void anv_CmdSetStencilReference( } void anv_CmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, uint32_t firstSet, @@ -473,7 +473,7 @@ void anv_CmdBindDescriptorSets( uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); struct anv_descriptor_set_layout *set_layout; @@ -519,13 +519,13 @@ void anv_CmdBindDescriptorSets( } void anv_CmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; /* We have to defer setting up vertex buffer since we need the buffer @@ -799,7 +799,7 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, } void anv_CmdSetEvent( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask) { @@ -807,7 +807,7 @@ void anv_CmdSetEvent( } void anv_CmdResetEvent( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask) { @@ -815,7 +815,7 @@ void anv_CmdResetEvent( } void anv_CmdWaitEvents( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, @@ -855,14 +855,14 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, } void anv_CmdPushConstants( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); VkShaderStage stage; for_each_bit(stage, stageFlags) { @@ -876,20 +876,20 @@ void anv_CmdPushConstants( } void anv_CmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers) + VkCommandBuffer commandBuffer, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCmdBuffers) { - ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); - assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - for (uint32_t i = 0; i < cmdBuffersCount; i++) { + for (uint32_t i = 0; i < commandBuffersCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); anv_cmd_buffer_add_secondary(primary, secondary); } @@ -897,8 +897,8 @@ void anv_CmdExecuteCommands( VkResult anv_CreateCommandPool( VkDevice _device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool) + const VkCommandPoolCreateInfo* pCreateInfo, + VkCommandPool* pCmdPool) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_pool *pool; @@ -917,22 +917,22 @@ VkResult anv_CreateCommandPool( void anv_DestroyCommandPool( VkDevice _device, - VkCmdPool cmdPool) + VkCommandPool commandPool) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - anv_ResetCommandPool(_device, cmdPool, 0); + anv_ResetCommandPool(_device, commandPool, 0); anv_device_free(device, pool); } VkResult anv_ResetCommandPool( VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags) + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) { - ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index ebb86c09001..15ad2107627 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -812,8 +812,8 @@ VkResult anv_GetDeviceQueue( VkResult anv_QueueSubmit( VkQueue _queue, - uint32_t cmdBufferCount, - const VkCmdBuffer* pCmdBuffers, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers, VkFence _fence) { ANV_FROM_HANDLE(anv_queue, queue, _queue); @@ -821,10 +821,10 @@ VkResult anv_QueueSubmit( struct anv_device *device = queue->device; int ret; - for (uint32_t i = 0; i < cmdBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]); + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); - assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); if (ret != 0) { @@ -1526,21 +1526,21 @@ void anv_DestroyFramebuffer( } void vkCmdDbgMarkerBegin( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const char* pMarker) __attribute__ ((visibility ("default"))); void vkCmdDbgMarkerEnd( - VkCmdBuffer cmdBuffer) + VkCommandBuffer commandBuffer) __attribute__ ((visibility ("default"))); void vkCmdDbgMarkerBegin( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const char* pMarker) { } void vkCmdDbgMarkerEnd( - VkCmdBuffer cmdBuffer) + VkCommandBuffer commandBuffer) { } diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 3634ae68732..1c5cd4b56f9 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -72,28 +72,28 @@ anv_dump_image_to_ppm(struct anv_device *device, result = anv_BindImageMemory(vk_device, copy_image, memory, 0); assert(result == VK_SUCCESS); - VkCmdPool cmdPool; + VkCommandPool commandPool; result = anv_CreateCommandPool(vk_device, - &(VkCmdPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO, + &(VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = 0, .flags = 0, - }, &cmdPool); + }, &commandPool); assert(result == VK_SUCCESS); - VkCmdBuffer cmd; + VkCommandBuffer cmd; result = anv_CreateCommandBuffer(vk_device, - &(VkCmdBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, - .cmdPool = cmdPool, - .level = VK_CMD_BUFFER_LEVEL_PRIMARY, + &(VkCommandBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_CREATE_INFO, + .commandPool = commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .flags = 0, }, &cmd); assert(result == VK_SUCCESS); result = anv_BeginCommandBuffer(cmd, - &(VkCmdBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + &(VkCommandBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT, }); assert(result == VK_SUCCESS); @@ -169,7 +169,7 @@ anv_dump_image_to_ppm(struct anv_device *device, assert(result == VK_SUCCESS); anv_DestroyFence(vk_device, fence); - anv_DestroyCommandPool(vk_device, cmdPool); + anv_DestroyCommandPool(vk_device, commandPool); uint8_t *map; result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 96792e9d6da..143d637c55a 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -575,7 +575,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, .clearValueCount = 0, .pClearValues = NULL, - }, VK_RENDER_PASS_CONTENTS_INLINE); + }, VK_SUBPASS_CONTENTS_INLINE); VkPipeline pipeline; @@ -753,13 +753,13 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, } void anv_CmdCopyBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); @@ -831,7 +831,7 @@ void anv_CmdCopyBuffer( } void anv_CmdCopyImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, @@ -839,7 +839,7 @@ void anv_CmdCopyImage( uint32_t regionCount, const VkImageCopy* pRegions) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); @@ -939,7 +939,7 @@ void anv_CmdCopyImage( } void anv_CmdBlitImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, @@ -949,7 +949,7 @@ void anv_CmdBlitImage( VkTexFilter filter) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); @@ -1082,14 +1082,14 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, } void anv_CmdCopyBufferToImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); const VkFormat orig_format = dest_image->format->vk_format; @@ -1208,14 +1208,14 @@ void anv_CmdCopyBufferToImage( } void anv_CmdCopyImageToBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_meta_saved_state saved_state; @@ -1323,7 +1323,7 @@ void anv_CmdCopyImageToBuffer( } void anv_CmdUpdateBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, @@ -1333,7 +1333,7 @@ void anv_CmdUpdateBuffer( } void anv_CmdFillBuffer( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, @@ -1343,7 +1343,7 @@ void anv_CmdFillBuffer( } void anv_CmdResolveImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 6645e37d124..0709c41db46 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -311,7 +311,7 @@ emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, VkClearColorValue clear_value) { struct anv_device *device = cmd_buffer->device; - VkCmdBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; VkPipeline pipeline_h = anv_pipeline_to_handle(device->meta_state.clear.color_pipeline); @@ -487,7 +487,7 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, VkClearDepthStencilValue clear_value) { struct anv_device *device = cmd_buffer->device; - VkCmdBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct depthstencil_clear_vattrs vertex_data[3] = { @@ -678,14 +678,14 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, } void anv_CmdClearColorImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage _image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, image, _image); struct anv_meta_saved_state saved_state; @@ -783,7 +783,7 @@ void anv_CmdClearColorImage( .pClearValues = (VkClearValue[]) { { .color = *pColor }, }, - }, VK_RENDER_PASS_CONTENTS_INLINE); + }, VK_SUBPASS_CONTENTS_INLINE); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); } @@ -794,7 +794,7 @@ void anv_CmdClearColorImage( } void anv_CmdClearDepthStencilImage( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, @@ -805,7 +805,7 @@ void anv_CmdClearDepthStencilImage( } void anv_CmdClearColorAttachment( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, @@ -816,7 +816,7 @@ void anv_CmdClearColorAttachment( } void anv_CmdClearDepthStencilAttachment( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c99d4e0a59e..0e148b36b1c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1044,8 +1044,8 @@ struct anv_cmd_buffer { struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; - VkCmdBufferOptimizeFlags opt_flags; - VkCmdBufferLevel level; + VkCommandBufferOptimizeFlags opt_flags; + VkCommandBufferLevel level; struct anv_cmd_state state; }; @@ -1549,13 +1549,13 @@ void anv_dump_image_to_ppm(struct anv_device *device, #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ struct __anv_type *__name = __anv_type ## _from_handle(__handle) -ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCmdBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCmdPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 68535b40cac..8891aa02d76 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -142,7 +142,7 @@ VkResult anv_GetQueryPoolResults( } void anv_CmdResetQueryPool( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount) diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 4b3922d8278..b69982d6e52 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -235,12 +235,12 @@ static const uint32_t restart_index_for_type[] = { }; void genX(CmdBindIndexBuffer)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkIndexType indexType) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; @@ -508,13 +508,13 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } void genX(CmdDraw)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; cmd_buffer_flush_state(cmd_buffer); @@ -530,14 +530,14 @@ void genX(CmdDraw)( } void genX(CmdDrawIndexed)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; cmd_buffer_flush_state(cmd_buffer); @@ -578,13 +578,13 @@ gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_3DPRIM_BASE_VERTEX 0x2440 void genX(CmdDrawIndirect)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t count, uint32_t stride) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; struct anv_bo *bo = buffer->bo; @@ -605,13 +605,13 @@ void genX(CmdDrawIndirect)( } void genX(CmdDrawIndexedIndirect)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t count, uint32_t stride) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; struct anv_bo *bo = buffer->bo; @@ -632,12 +632,12 @@ void genX(CmdDrawIndexedIndirect)( } void genX(CmdDispatch)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; @@ -662,11 +662,11 @@ void genX(CmdDispatch)( #define GPGPU_DISPATCHDIMZ 0x2508 void genX(CmdDispatchIndirect)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; @@ -815,11 +815,11 @@ begin_render_pass(struct anv_cmd_buffer *cmd_buffer, } void genX(CmdBeginRenderPass)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents) + VkSubpassContents contents) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); begin_render_pass(cmd_buffer, pRenderPassBegin); @@ -828,20 +828,20 @@ void genX(CmdBeginRenderPass)( } void genX(CmdNextSubpass)( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents) + VkCommandBuffer commandBuffer, + VkSubpassContents contents) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); } void genX(CmdEndRenderPass)( - VkCmdBuffer cmdBuffer) + VkCommandBuffer commandBuffer) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); /* Emit a flushing pipe control at the end of a pass. This is kind of a * hack but it ensures that render targets always actually get written. diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 1d1433817d9..88062a6f61b 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -364,13 +364,13 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) } void genX(CmdDraw)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer_flush_state(cmd_buffer); @@ -384,14 +384,14 @@ void genX(CmdDraw)( } void genX(CmdDrawIndexed)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer_flush_state(cmd_buffer); @@ -430,13 +430,13 @@ emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_3DPRIM_BASE_VERTEX 0x2440 void genX(CmdDrawIndirect)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t count, uint32_t stride) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -455,12 +455,12 @@ void genX(CmdDrawIndirect)( } void genX(CmdBindIndexBuffer)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkIndexType indexType) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); static const uint32_t vk_to_gen_index_type[] = { @@ -555,13 +555,13 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) } void genX(CmdDrawIndexedIndirect)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t count, uint32_t stride) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; @@ -580,12 +580,12 @@ void genX(CmdDrawIndexedIndirect)( } void genX(CmdDispatch)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; @@ -610,11 +610,11 @@ void genX(CmdDispatch)( #define GPGPU_DISPATCHDIMZ 0x2508 void genX(CmdDispatchIndirect)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; @@ -740,11 +740,11 @@ genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, } void genX(CmdBeginRenderPass)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents) + VkSubpassContents contents) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); @@ -770,20 +770,20 @@ void genX(CmdBeginRenderPass)( } void genX(CmdNextSubpass)( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents) + VkCommandBuffer commandBuffer, + VkSubpassContents contents) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); } void genX(CmdEndRenderPass)( - VkCmdBuffer cmdBuffer) + VkCommandBuffer commandBuffer) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); /* Emit a flushing pipe control at the end of a pass. This is kind of a * hack but it ensures that render targets always actually get written. @@ -811,12 +811,12 @@ emit_ps_depth_count(struct anv_batch *batch, } void genX(CmdBeginQuery)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); switch (pool->type) { @@ -832,11 +832,11 @@ void genX(CmdBeginQuery)( } void genX(CmdEndQuery)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); switch (pool->type) { @@ -854,12 +854,12 @@ void genX(CmdEndQuery)( #define TIMESTAMP 0x2358 void genX(CmdWriteTimestamp)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); struct anv_bo *bo = buffer->bo; @@ -931,7 +931,7 @@ emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, } void genX(CmdCopyQueryPoolResults)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, @@ -940,7 +940,7 @@ void genX(CmdCopyQueryPoolResults)( VkDeviceSize destStride, VkQueryResultFlags flags) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); uint32_t slot_offset, dst_offset; diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 31dcdcd8dd5..166e335ae53 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -139,14 +139,14 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) } void genX(CmdPipelineBarrier)( - VkCmdBuffer cmdBuffer, + VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); uint32_t b, *dw; struct GENX(PIPE_CONTROL) cmd = { -- cgit v1.2.3 From f1a7c7841f4484d81d4f6c0242e04fdaea5f0933 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 12:21:19 -0800 Subject: vk/0.210.0: Switch to the new VKAPI function decorations While we're at it, we do a bunch of the VkResult -> void updates --- include/vulkan/vk_ext_khr_device_swapchain.h | 32 +- include/vulkan/vk_ext_khr_swapchain.h | 4 +- include/vulkan/vk_platform.h | 39 +- include/vulkan/vulkan.h | 586 +++++++++++++-------------- include/vulkan/vulkan_intel.h | 4 +- src/vulkan/anv_device.c | 37 +- src/vulkan/anv_dump.c | 5 +- src/vulkan/anv_entrypoints_gen.py | 2 +- src/vulkan/anv_formats.c | 32 +- src/vulkan/anv_image.c | 6 +- src/vulkan/anv_pass.c | 4 +- 11 files changed, 372 insertions(+), 379 deletions(-) (limited to 'src') diff --git a/include/vulkan/vk_ext_khr_device_swapchain.h b/include/vulkan/vk_ext_khr_device_swapchain.h index af95ff0cdfb..bb243b32dd2 100644 --- a/include/vulkan/vk_ext_khr_device_swapchain.h +++ b/include/vulkan/vk_ext_khr_device_swapchain.h @@ -144,60 +144,60 @@ typedef struct { // ------------------------------------------------------------------------------------------------ // Function types -typedef VkResult (VKAPI *PFN_vkGetSurfacePropertiesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkSurfacePropertiesKHR* pSurfaceProperties); -typedef VkResult (VKAPI *PFN_vkGetSurfaceFormatsKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats); -typedef VkResult (VKAPI *PFN_vkGetSurfacePresentModesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkPresentModeKHR* pPresentModes); -typedef VkResult (VKAPI *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainKHR* pSwapchain); -typedef VkResult (VKAPI *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain); -typedef VkResult (VKAPI *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pCount, VkImage* pSwapchainImages); -typedef VkResult (VKAPI *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex); -typedef VkResult (VKAPI *PFN_vkQueuePresentKHR)(VkQueue queue, VkPresentInfoKHR* pPresentInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetSurfacePropertiesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkSurfacePropertiesKHR* pSurfaceProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetSurfaceFormatsKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats); +typedef VkResult (VKAPI_PTR *PFN_vkGetSurfacePresentModesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkPresentModeKHR* pPresentModes); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainKHR* pSwapchain); +typedef VkResult (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain); +typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pCount, VkImage* pSwapchainImages); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex); +typedef VkResult (VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, VkPresentInfoKHR* pPresentInfo); // ------------------------------------------------------------------------------------------------ // Function prototypes #ifdef VK_PROTOTYPES -VkResult VKAPI vkGetSurfacePropertiesKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetSurfacePropertiesKHR( VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkSurfacePropertiesKHR* pSurfaceProperties); -VkResult VKAPI vkGetSurfaceFormatsKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetSurfaceFormatsKHR( VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats); -VkResult VKAPI vkGetSurfacePresentModesKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetSurfacePresentModesKHR( VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkPresentModeKHR* pPresentModes); -VkResult VKAPI vkCreateSwapchainKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainKHR* pSwapchain); -VkResult VKAPI vkDestroySwapchainKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkDestroySwapchainKHR( VkDevice device, VkSwapchainKHR swapchain); -VkResult VKAPI vkGetSwapchainImagesKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR( VkDevice device, VkSwapchainKHR swapchain, uint32_t* pCount, VkImage* pSwapchainImages); -VkResult VKAPI vkAcquireNextImageKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex); -VkResult VKAPI vkQueuePresentKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR( VkQueue queue, VkPresentInfoKHR* pPresentInfo); diff --git a/include/vulkan/vk_ext_khr_swapchain.h b/include/vulkan/vk_ext_khr_swapchain.h index 862b4d5e741..4c4f8a58a01 100644 --- a/include/vulkan/vk_ext_khr_swapchain.h +++ b/include/vulkan/vk_ext_khr_swapchain.h @@ -131,14 +131,14 @@ typedef struct { // ------------------------------------------------------------------------------------------------ // Function types -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkBool32* pSupported); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkBool32* pSupported); // ------------------------------------------------------------------------------------------------ // Function prototypes #ifdef VK_PROTOTYPES -VkResult VKAPI vkGetPhysicalDeviceSurfaceSupportKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, const VkSurfaceDescriptionKHR* pSurfaceDescription, diff --git a/include/vulkan/vk_platform.h b/include/vulkan/vk_platform.h index 7ba8d77b875..0f15167f742 100644 --- a/include/vulkan/vk_platform.h +++ b/include/vulkan/vk_platform.h @@ -39,15 +39,40 @@ extern "C" *************************************************************************************************** */ +/* Platform-specific calling convention macros. + * + * Platforms should define these so that Vulkan clients call Vulkan commands + * with the same calling conventions that the Vulkan implementation expects. + * + * VKAPI_ATTR - Placed before the return type in function declarations. + * Useful for C++11 and GCC/Clang-style function attribute syntax. + * VKAPI_CALL - Placed after the return type in function declarations. + * Useful for MSVC-style calling convention syntax. + * VKAPI_PTR - Placed between the '(' and '*' in function pointer types. + * + * Function declaration: VKAPI_ATTR void VKAPI_CALL vkCommand(void); + * Function pointer type: typedef void (VKAPI_PTR *PFN_vkCommand)(void); + */ #if defined(_WIN32) - // On Windows, VKAPI should equate to the __stdcall convention - #define VKAPI __stdcall -#elif defined(__GNUC__) - // On other platforms using GCC, VKAPI stays undefined - #define VKAPI + // On Windows, Vulkan commands use the stdcall convention + #define VKAPI_ATTR + #define VKAPI_CALL __stdcall + #define VKAPI_PTR VKAPI_CALL +#elif defined(__ANDROID__) && defined(__ARM_EABI__) && !defined(__ARM_ARCH_7A__) + // Android does not support Vulkan in native code using the "armeabi" ABI. + #error "Vulkan requires the 'armeabi-v7a' or 'armeabi-v7a-hard' ABI on 32-bit ARM CPUs" +#elif defined(__ANDROID__) && defined(__ARM_ARCH_7A__) + // On Android/ARMv7a, Vulkan functions use the armeabi-v7a-hard calling + // convention, even if the application's native code is compiled with the + // armeabi-v7a calling convention. + #define VKAPI_ATTR __attribute__((pcs("aapcs-vfp"))) + #define VKAPI_CALL + #define VKAPI_PTR VKAPI_ATTR #else - // Unsupported Platform! - #error "Unsupported OS Platform detected!" + // On other platforms, use the default calling convention + #define VKAPI_ATTR + #define VKAPI_CALL + #define VKAPI_PTR #endif #include diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c09cefc39ef..39cbb71c2d7 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1071,17 +1071,17 @@ typedef enum { } VkQueryControlFlagBits; typedef VkFlags VkQueryControlFlags; -typedef void* (VKAPI *PFN_vkAllocFunction)( +typedef void* (VKAPI_PTR *PFN_vkAllocFunction)( void* pUserData, size_t size, size_t alignment, VkSystemAllocType allocType); -typedef void (VKAPI *PFN_vkFreeFunction)( +typedef void (VKAPI_PTR *PFN_vkFreeFunction)( void* pUserData, void* pMem); -typedef void (VKAPI *PFN_vkVoidFunction)(void); +typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); typedef struct { VkStructureType sType; @@ -2066,175 +2066,175 @@ typedef struct { } VkMemoryBarrier; -typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); -typedef void (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); -typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); -typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); -typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); -typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); -typedef void (VKAPI *PFN_vkDestroyDevice)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); -typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); -typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); -typedef VkResult (VKAPI *PFN_vkDeviceWaitIdle)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); -typedef void (VKAPI *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); -typedef void (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -typedef VkResult (VKAPI *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); -typedef VkResult (VKAPI *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, uint32_t samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageOpaqueMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -typedef void (VKAPI *PFN_vkDestroyFence)(VkDevice device, VkFence fence); -typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); -typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); -typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); -typedef VkResult (VKAPI *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); -typedef void (VKAPI *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); -typedef VkResult (VKAPI *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); -typedef VkResult (VKAPI *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); -typedef VkResult (VKAPI *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); -typedef void (VKAPI *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkSetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkResetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); -typedef void (VKAPI *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); -typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); -typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); -typedef void (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); -typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); -typedef void (VKAPI *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); -typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -typedef void (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); -typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); -typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -typedef void (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); -typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); -typedef void (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); -typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -typedef void (VKAPI *PFN_vkDestroyShader)(VkDevice device, VkShader shader); -typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); -typedef void (VKAPI *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); -typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); -typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); -typedef VkResult (VKAPI *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); -typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef VkResult (VKAPI *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef void (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); -typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); -typedef void (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); -typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); -typedef void (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); -typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -typedef void (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); -typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); -typedef void (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); -typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); -typedef void (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); -typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); -typedef void (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); -typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); -typedef void (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); -typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); -typedef VkResult (VKAPI *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, VkCommandPool* pCommandPool); -typedef void (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool); -typedef VkResult (VKAPI *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); -typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCommandBufferCreateInfo* pCreateInfo, VkCommandBuffer* pCommandBuffer); -typedef void (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCommandBuffer commandBuffer); -typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); -typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); -typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); -typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); -typedef void (VKAPI *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); -typedef void (VKAPI *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); -typedef void (VKAPI *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); -typedef void (VKAPI *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConst[4]); -typedef void (VKAPI *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); -typedef void (VKAPI *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); -typedef void (VKAPI *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); -typedef void (VKAPI *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); -typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); -typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); -typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); -typedef void (VKAPI *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); -typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); -typedef void (VKAPI *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); -typedef void (VKAPI *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); -typedef void (VKAPI *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); -typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); -typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); -typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); -typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); -typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot); -typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); -typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); -typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); -typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); -typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); -typedef void (VKAPI *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); -typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); -typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBuffersCount, const VkCommandBuffer* pCommandBuffers); +typedef VkResult (VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); +typedef void (VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance); +typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); +typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); +typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); +typedef void (VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); +typedef void (VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); +typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); +typedef VkResult (VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); +typedef VkResult (VKAPI_PTR *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); +typedef void (VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); +typedef VkResult (VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); +typedef void (VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); +typedef VkResult (VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); +typedef VkResult (VKAPI_PTR *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); +typedef void (VKAPI_PTR *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); +typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, uint32_t samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); +typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparseImageOpaqueMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); +typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); +typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); +typedef VkResult (VKAPI_PTR *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); +typedef void (VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); +typedef VkResult (VKAPI_PTR *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); +typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); +typedef VkResult (VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); +typedef void (VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); +typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); +typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); +typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); +typedef void (VKAPI_PTR *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); +typedef VkResult (VKAPI_PTR *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); +typedef void (VKAPI_PTR *PFN_vkDestroyImage)(VkDevice device, VkImage image); +typedef void (VKAPI_PTR *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); +typedef VkResult (VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); +typedef void (VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); +typedef VkResult (VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); +typedef void (VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); +typedef VkResult (VKAPI_PTR *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); +typedef void (VKAPI_PTR *PFN_vkDestroyShader)(VkDevice device, VkShader shader); +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); +typedef size_t (VKAPI_PTR *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); +typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); +typedef VkResult (VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); +typedef void (VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); +typedef void (VKAPI_PTR *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); +typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); +typedef VkResult (VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); +typedef VkResult (VKAPI_PTR *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); +typedef VkResult (VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); +typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); +typedef VkResult (VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); +typedef void (VKAPI_PTR *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); +typedef void (VKAPI_PTR *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); +typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, VkCommandPool* pCommandPool); +typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool); +typedef VkResult (VKAPI_PTR *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCommandBufferCreateInfo* pCreateInfo, VkCommandBuffer* pCommandBuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCommandBuffer commandBuffer); +typedef VkResult (VKAPI_PTR *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); +typedef VkResult (VKAPI_PTR *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); +typedef VkResult (VKAPI_PTR *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); +typedef void (VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); +typedef void (VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); +typedef void (VKAPI_PTR *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); +typedef void (VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConst[4]); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); +typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); +typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); +typedef void (VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); +typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); +typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); +typedef void (VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI_PTR *PFN_vkCmdClearColorAttachment)(VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); +typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilAttachment)(VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); +typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot); +typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); +typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); +typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); +typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); +typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); +typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); +typedef void (VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBuffersCount, const VkCommandBuffer* pCommandBuffers); #ifdef VK_PROTOTYPES -VkResult VKAPI vkCreateInstance( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); -void VKAPI vkDestroyInstance( +VKAPI_ATTR void VKAPI_CALL vkDestroyInstance( VkInstance instance); -VkResult VKAPI vkEnumeratePhysicalDevices( +VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices( VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); -VkResult VKAPI vkGetPhysicalDeviceFeatures( +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -VkResult VKAPI vkGetPhysicalDeviceFormatProperties( +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); -VkResult VKAPI vkGetPhysicalDeviceImageFormatProperties( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, @@ -2243,83 +2243,83 @@ VkResult VKAPI vkGetPhysicalDeviceImageFormatProperties( VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); -VkResult VKAPI vkGetPhysicalDeviceProperties( +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -VkResult VKAPI vkGetPhysicalDeviceQueueFamilyProperties( +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties( VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); -VkResult VKAPI vkGetPhysicalDeviceMemoryProperties( +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); -PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( VkInstance instance, const char* pName); -PFN_vkVoidFunction VKAPI vkGetDeviceProcAddr( +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr( VkDevice device, const char* pName); -VkResult VKAPI vkCreateDevice( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); -void VKAPI vkDestroyDevice( +VKAPI_ATTR void VKAPI_CALL vkDestroyDevice( VkDevice device); -VkResult VKAPI vkEnumerateInstanceExtensionProperties( +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -VkResult VKAPI vkEnumerateDeviceExtensionProperties( +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -VkResult VKAPI vkEnumerateInstanceLayerProperties( +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceLayerProperties( uint32_t* pCount, VkLayerProperties* pProperties); -VkResult VKAPI vkEnumerateDeviceLayerProperties( +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceLayerProperties( VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); -VkResult VKAPI vkGetDeviceQueue( +VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue( VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); -VkResult VKAPI vkQueueSubmit( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); -VkResult VKAPI vkQueueWaitIdle( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle( VkQueue queue); -VkResult VKAPI vkDeviceWaitIdle( +VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle( VkDevice device); -VkResult VKAPI vkAllocMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkAllocMemory( VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); -void VKAPI vkFreeMemory( +VKAPI_ATTR void VKAPI_CALL vkFreeMemory( VkDevice device, VkDeviceMemory mem); -VkResult VKAPI vkMapMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, @@ -2327,54 +2327,54 @@ VkResult VKAPI vkMapMemory( VkMemoryMapFlags flags, void** ppData); -void VKAPI vkUnmapMemory( +VKAPI_ATTR void VKAPI_CALL vkUnmapMemory( VkDevice device, VkDeviceMemory mem); -VkResult VKAPI vkFlushMappedMemoryRanges( +VKAPI_ATTR VkResult VKAPI_CALL vkFlushMappedMemoryRanges( VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -VkResult VKAPI vkInvalidateMappedMemoryRanges( +VKAPI_ATTR VkResult VKAPI_CALL vkInvalidateMappedMemoryRanges( VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -VkResult VKAPI vkGetDeviceMemoryCommitment( +VKAPI_ATTR void VKAPI_CALL vkGetDeviceMemoryCommitment( VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); -VkResult VKAPI vkBindBufferMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory( VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); -VkResult VKAPI vkBindImageMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory( VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); -VkResult VKAPI vkGetBufferMemoryRequirements( +VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements( VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); -VkResult VKAPI vkGetImageMemoryRequirements( +VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements( VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); -VkResult VKAPI vkGetImageSparseMemoryRequirements( +VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); -VkResult VKAPI vkGetPhysicalDeviceSparseImageFormatProperties( +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, @@ -2384,97 +2384,97 @@ VkResult VKAPI vkGetPhysicalDeviceSparseImageFormatProperties( uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); -VkResult VKAPI vkQueueBindSparseBufferMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparseBufferMemory( VkQueue queue, VkBuffer buffer, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -VkResult VKAPI vkQueueBindSparseImageOpaqueMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparseImageOpaqueMemory( VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -VkResult VKAPI vkQueueBindSparseImageMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparseImageMemory( VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); -VkResult VKAPI vkCreateFence( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence( VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -void VKAPI vkDestroyFence( +VKAPI_ATTR void VKAPI_CALL vkDestroyFence( VkDevice device, VkFence fence); -VkResult VKAPI vkResetFences( +VKAPI_ATTR VkResult VKAPI_CALL vkResetFences( VkDevice device, uint32_t fenceCount, const VkFence* pFences); -VkResult VKAPI vkGetFenceStatus( +VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceStatus( VkDevice device, VkFence fence); -VkResult VKAPI vkWaitForFences( +VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences( VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); -VkResult VKAPI vkCreateSemaphore( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore( VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); -void VKAPI vkDestroySemaphore( +VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( VkDevice device, VkSemaphore semaphore); -VkResult VKAPI vkQueueSignalSemaphore( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueSignalSemaphore( VkQueue queue, VkSemaphore semaphore); -VkResult VKAPI vkQueueWaitSemaphore( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitSemaphore( VkQueue queue, VkSemaphore semaphore); -VkResult VKAPI vkCreateEvent( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent( VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); -void VKAPI vkDestroyEvent( +VKAPI_ATTR void VKAPI_CALL vkDestroyEvent( VkDevice device, VkEvent event); -VkResult VKAPI vkGetEventStatus( +VKAPI_ATTR VkResult VKAPI_CALL vkGetEventStatus( VkDevice device, VkEvent event); -VkResult VKAPI vkSetEvent( +VKAPI_ATTR VkResult VKAPI_CALL vkSetEvent( VkDevice device, VkEvent event); -VkResult VKAPI vkResetEvent( +VKAPI_ATTR VkResult VKAPI_CALL vkResetEvent( VkDevice device, VkEvent event); -VkResult VKAPI vkCreateQueryPool( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateQueryPool( VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); -void VKAPI vkDestroyQueryPool( +VKAPI_ATTR void VKAPI_CALL vkDestroyQueryPool( VkDevice device, VkQueryPool queryPool); -VkResult VKAPI vkGetQueryPoolResults( +VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( VkDevice device, VkQueryPool queryPool, uint32_t startQuery, @@ -2483,149 +2483,149 @@ VkResult VKAPI vkGetQueryPoolResults( void* pData, VkQueryResultFlags flags); -VkResult VKAPI vkCreateBuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer( VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); -void VKAPI vkDestroyBuffer( +VKAPI_ATTR void VKAPI_CALL vkDestroyBuffer( VkDevice device, VkBuffer buffer); -VkResult VKAPI vkCreateBufferView( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateBufferView( VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); -void VKAPI vkDestroyBufferView( +VKAPI_ATTR void VKAPI_CALL vkDestroyBufferView( VkDevice device, VkBufferView bufferView); -VkResult VKAPI vkCreateImage( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage( VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -void VKAPI vkDestroyImage( +VKAPI_ATTR void VKAPI_CALL vkDestroyImage( VkDevice device, VkImage image); -VkResult VKAPI vkGetImageSubresourceLayout( +VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout( VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); -VkResult VKAPI vkCreateImageView( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImageView( VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -void VKAPI vkDestroyImageView( +VKAPI_ATTR void VKAPI_CALL vkDestroyImageView( VkDevice device, VkImageView imageView); -VkResult VKAPI vkCreateShaderModule( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule( VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); -void VKAPI vkDestroyShaderModule( +VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( VkDevice device, VkShaderModule shaderModule); -VkResult VKAPI vkCreateShader( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateShader( VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -void VKAPI vkDestroyShader( +VKAPI_ATTR void VKAPI_CALL vkDestroyShader( VkDevice device, VkShader shader); -VkResult VKAPI vkCreatePipelineCache( +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache( VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); -void VKAPI vkDestroyPipelineCache( +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache( VkDevice device, VkPipelineCache pipelineCache); -size_t VKAPI vkGetPipelineCacheSize( +VKAPI_ATTR size_t VKAPI_CALL vkGetPipelineCacheSize( VkDevice device, VkPipelineCache pipelineCache); -VkResult VKAPI vkGetPipelineCacheData( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( VkDevice device, VkPipelineCache pipelineCache, void* pData); -VkResult VKAPI vkMergePipelineCaches( +VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches( VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); -VkResult VKAPI vkCreateGraphicsPipelines( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -VkResult VKAPI vkCreateComputePipelines( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -void VKAPI vkDestroyPipeline( +VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( VkDevice device, VkPipeline pipeline); -VkResult VKAPI vkCreatePipelineLayout( +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineLayout( VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); -void VKAPI vkDestroyPipelineLayout( +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineLayout( VkDevice device, VkPipelineLayout pipelineLayout); -VkResult VKAPI vkCreateSampler( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSampler( VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); -void VKAPI vkDestroySampler( +VKAPI_ATTR void VKAPI_CALL vkDestroySampler( VkDevice device, VkSampler sampler); -VkResult VKAPI vkCreateDescriptorSetLayout( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorSetLayout( VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -void VKAPI vkDestroyDescriptorSetLayout( +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorSetLayout( VkDevice device, VkDescriptorSetLayout descriptorSetLayout); -VkResult VKAPI vkCreateDescriptorPool( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool( VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); -void VKAPI vkDestroyDescriptorPool( +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool); -VkResult VKAPI vkResetDescriptorPool( +VKAPI_ATTR VkResult VKAPI_CALL vkResetDescriptorPool( VkDevice device, VkDescriptorPool descriptorPool); -VkResult VKAPI vkAllocDescriptorSets( +VKAPI_ATTR VkResult VKAPI_CALL vkAllocDescriptorSets( VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, @@ -2633,126 +2633,126 @@ VkResult VKAPI vkAllocDescriptorSets( const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); -VkResult VKAPI vkFreeDescriptorSets( +VKAPI_ATTR VkResult VKAPI_CALL vkFreeDescriptorSets( VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); -void VKAPI vkUpdateDescriptorSets( +VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets( VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); -VkResult VKAPI vkCreateFramebuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer( VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); -void VKAPI vkDestroyFramebuffer( +VKAPI_ATTR void VKAPI_CALL vkDestroyFramebuffer( VkDevice device, VkFramebuffer framebuffer); -VkResult VKAPI vkCreateRenderPass( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass( VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); -void VKAPI vkDestroyRenderPass( +VKAPI_ATTR void VKAPI_CALL vkDestroyRenderPass( VkDevice device, VkRenderPass renderPass); -VkResult VKAPI vkGetRenderAreaGranularity( +VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity( VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); -VkResult VKAPI vkCreateCommandPool( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, VkCommandPool* pCommandPool); -void VKAPI vkDestroyCommandPool( +VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool( VkDevice device, VkCommandPool commandPool); -VkResult VKAPI vkResetCommandPool( +VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); -VkResult VKAPI vkCreateCommandBuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandBuffer( VkDevice device, const VkCommandBufferCreateInfo* pCreateInfo, VkCommandBuffer* pCommandBuffer); -void VKAPI vkDestroyCommandBuffer( +VKAPI_ATTR void VKAPI_CALL vkDestroyCommandBuffer( VkDevice device, VkCommandBuffer commandBuffer); -VkResult VKAPI vkBeginCommandBuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); -VkResult VKAPI vkEndCommandBuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer( VkCommandBuffer commandBuffer); -VkResult VKAPI vkResetCommandBuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandBuffer( VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); -void VKAPI vkCmdBindPipeline( +VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -void VKAPI vkCmdSetViewport( +VKAPI_ATTR void VKAPI_CALL vkCmdSetViewport( VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); -void VKAPI vkCmdSetScissor( +VKAPI_ATTR void VKAPI_CALL vkCmdSetScissor( VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); -void VKAPI vkCmdSetLineWidth( +VKAPI_ATTR void VKAPI_CALL vkCmdSetLineWidth( VkCommandBuffer commandBuffer, float lineWidth); -void VKAPI vkCmdSetDepthBias( +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias( VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); -void VKAPI vkCmdSetBlendConstants( +VKAPI_ATTR void VKAPI_CALL vkCmdSetBlendConstants( VkCommandBuffer commandBuffer, const float blendConst[4]); -void VKAPI vkCmdSetDepthBounds( +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBounds( VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); -void VKAPI vkCmdSetStencilCompareMask( +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilCompareMask( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); -void VKAPI vkCmdSetStencilWriteMask( +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilWriteMask( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); -void VKAPI vkCmdSetStencilReference( +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilReference( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); -void VKAPI vkCmdBindDescriptorSets( +VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, @@ -2762,27 +2762,27 @@ void VKAPI vkCmdBindDescriptorSets( uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); -void VKAPI vkCmdBindIndexBuffer( +VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); -void VKAPI vkCmdBindVertexBuffers( +VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers( VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); -void VKAPI vkCmdDraw( +VKAPI_ATTR void VKAPI_CALL vkCmdDraw( VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); -void VKAPI vkCmdDrawIndexed( +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed( VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, @@ -2790,39 +2790,39 @@ void VKAPI vkCmdDrawIndexed( int32_t vertexOffset, uint32_t firstInstance); -void VKAPI vkCmdDrawIndirect( +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -void VKAPI vkCmdDrawIndexedIndirect( +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -void VKAPI vkCmdDispatch( +VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); -void VKAPI vkCmdDispatchIndirect( +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); -void VKAPI vkCmdCopyBuffer( +VKAPI_ATTR void VKAPI_CALL vkCmdCopyBuffer( VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); -void VKAPI vkCmdCopyImage( +VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -2831,7 +2831,7 @@ void VKAPI vkCmdCopyImage( uint32_t regionCount, const VkImageCopy* pRegions); -void VKAPI vkCmdBlitImage( +VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -2841,7 +2841,7 @@ void VKAPI vkCmdBlitImage( const VkImageBlit* pRegions, VkTexFilter filter); -void VKAPI vkCmdCopyBufferToImage( +VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, @@ -2849,7 +2849,7 @@ void VKAPI vkCmdCopyBufferToImage( uint32_t regionCount, const VkBufferImageCopy* pRegions); -void VKAPI vkCmdCopyImageToBuffer( +VKAPI_ATTR void VKAPI_CALL vkCmdCopyImageToBuffer( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -2857,21 +2857,21 @@ void VKAPI vkCmdCopyImageToBuffer( uint32_t regionCount, const VkBufferImageCopy* pRegions); -void VKAPI vkCmdUpdateBuffer( +VKAPI_ATTR void VKAPI_CALL vkCmdUpdateBuffer( VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); -void VKAPI vkCmdFillBuffer( +VKAPI_ATTR void VKAPI_CALL vkCmdFillBuffer( VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); -void VKAPI vkCmdClearColorImage( +VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage( VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, @@ -2879,7 +2879,7 @@ void VKAPI vkCmdClearColorImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -void VKAPI vkCmdClearDepthStencilImage( +VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilImage( VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, @@ -2887,7 +2887,7 @@ void VKAPI vkCmdClearDepthStencilImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -void VKAPI vkCmdClearColorAttachment( +VKAPI_ATTR void VKAPI_CALL vkCmdClearColorAttachment( VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, @@ -2895,7 +2895,7 @@ void VKAPI vkCmdClearColorAttachment( uint32_t rectCount, const VkRect3D* pRects); -void VKAPI vkCmdClearDepthStencilAttachment( +VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilAttachment( VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, @@ -2903,7 +2903,7 @@ void VKAPI vkCmdClearDepthStencilAttachment( uint32_t rectCount, const VkRect3D* pRects); -void VKAPI vkCmdResolveImage( +VKAPI_ATTR void VKAPI_CALL vkCmdResolveImage( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, @@ -2912,17 +2912,17 @@ void VKAPI vkCmdResolveImage( uint32_t regionCount, const VkImageResolve* pRegions); -void VKAPI vkCmdSetEvent( +VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent( VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -void VKAPI vkCmdResetEvent( +VKAPI_ATTR void VKAPI_CALL vkCmdResetEvent( VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -void VKAPI vkCmdWaitEvents( +VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents( VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, @@ -2931,7 +2931,7 @@ void VKAPI vkCmdWaitEvents( uint32_t memBarrierCount, const void* const* ppMemBarriers); -void VKAPI vkCmdPipelineBarrier( +VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, @@ -2939,30 +2939,30 @@ void VKAPI vkCmdPipelineBarrier( uint32_t memBarrierCount, const void* const* ppMemBarriers); -void VKAPI vkCmdBeginQuery( +VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); -void VKAPI vkCmdEndQuery( +VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot); -void VKAPI vkCmdResetQueryPool( +VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); -void VKAPI vkCmdWriteTimestamp( +VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); -void VKAPI vkCmdCopyQueryPoolResults( +VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, @@ -2972,7 +2972,7 @@ void VKAPI vkCmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags); -void VKAPI vkCmdPushConstants( +VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants( VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, @@ -2980,19 +2980,19 @@ void VKAPI vkCmdPushConstants( uint32_t length, const void* values); -void VKAPI vkCmdBeginRenderPass( +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); -void VKAPI vkCmdNextSubpass( +VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass( VkCommandBuffer commandBuffer, VkSubpassContents contents); -void VKAPI vkCmdEndRenderPass( +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass( VkCommandBuffer commandBuffer); -void VKAPI vkCmdExecuteCommands( +VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( VkCommandBuffer commandBuffer, uint32_t commandBuffersCount, const VkCommandBuffer* pCommandBuffers); diff --git a/include/vulkan/vulkan_intel.h b/include/vulkan/vulkan_intel.h index 631e63657bf..2c963e7d51d 100644 --- a/include/vulkan/vulkan_intel.h +++ b/include/vulkan/vulkan_intel.h @@ -42,11 +42,11 @@ typedef struct VkDmaBufImageCreateInfo_ uint32_t strideInBytes; } VkDmaBufImageCreateInfo; -typedef VkResult (VKAPI *PFN_vkCreateDmaBufImageINTEL)(VkDevice device, const VkDmaBufImageCreateInfo* pCreateInfo, VkDeviceMemory* pMem, VkImage* pImage); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDmaBufImageINTEL)(VkDevice device, const VkDmaBufImageCreateInfo* pCreateInfo, VkDeviceMemory* pMem, VkImage* pImage); #ifdef VK_PROTOTYPES -VkResult VKAPI vkCreateDmaBufImageINTEL( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDmaBufImageINTEL( VkDevice _device, const VkDmaBufImageCreateInfo* pCreateInfo, VkDeviceMemory* pMem, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 15ad2107627..49d74bbfc78 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -330,7 +330,7 @@ VkResult anv_EnumeratePhysicalDevices( return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceFeatures( +void anv_GetPhysicalDeviceFeatures( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures) { @@ -378,11 +378,9 @@ VkResult anv_GetPhysicalDeviceFeatures( .shaderInt16 = false, .alphaToOne = true, }; - - return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceProperties( +void anv_GetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) { @@ -514,18 +512,16 @@ VkResult anv_GetPhysicalDeviceProperties( strcpy(pProperties->deviceName, pdevice->name); snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH, "anv-%s", MESA_GIT_SHA1 + 4); - - return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceQueueFamilyProperties( +void anv_GetPhysicalDeviceQueueFamilyProperties( VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties) { if (pQueueFamilyProperties == NULL) { *pCount = 1; - return VK_SUCCESS; + return; } assert(*pCount >= 1); @@ -537,11 +533,9 @@ VkResult anv_GetPhysicalDeviceQueueFamilyProperties( .queueCount = 1, .supportsTimestamps = true, }; - - return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceMemoryProperties( +void anv_GetPhysicalDeviceMemoryProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties) { @@ -565,8 +559,6 @@ VkResult anv_GetPhysicalDeviceMemoryProperties( .size = heap_size, .flags = VK_MEMORY_HEAP_HOST_LOCAL_BIT, }; - - return VK_SUCCESS; } PFN_vkVoidFunction anv_GetInstanceProcAddr( @@ -795,7 +787,7 @@ VkResult anv_EnumerateDeviceLayerProperties( return vk_error(VK_ERROR_LAYER_NOT_PRESENT); } -VkResult anv_GetDeviceQueue( +void anv_GetDeviceQueue( VkDevice _device, uint32_t queueNodeIndex, uint32_t queueIndex, @@ -806,8 +798,6 @@ VkResult anv_GetDeviceQueue( assert(queueIndex == 0); *pQueue = anv_queue_to_handle(&device->queue); - - return VK_SUCCESS; } VkResult anv_QueueSubmit( @@ -1059,7 +1049,7 @@ VkResult anv_InvalidateMappedMemoryRanges( return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); } -VkResult anv_GetBufferMemoryRequirements( +void anv_GetBufferMemoryRequirements( VkDevice device, VkBuffer _buffer, VkMemoryRequirements* pMemoryRequirements) @@ -1079,11 +1069,9 @@ VkResult anv_GetBufferMemoryRequirements( pMemoryRequirements->size = buffer->size; pMemoryRequirements->alignment = 16; - - return VK_SUCCESS; } -VkResult anv_GetImageMemoryRequirements( +void anv_GetImageMemoryRequirements( VkDevice device, VkImage _image, VkMemoryRequirements* pMemoryRequirements) @@ -1103,26 +1091,23 @@ VkResult anv_GetImageMemoryRequirements( pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; - - return VK_SUCCESS; } -VkResult anv_GetImageSparseMemoryRequirements( +void anv_GetImageSparseMemoryRequirements( VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements) { - return vk_error(VK_UNSUPPORTED); + stub(); } -VkResult anv_GetDeviceMemoryCommitment( +void anv_GetDeviceMemoryCommitment( VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes) { *pCommittedMemoryInBytes = 0; - stub_return(VK_SUCCESS); } VkResult anv_BindBufferMemory( diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 1c5cd4b56f9..9d0c4d700e5 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -58,7 +58,7 @@ anv_dump_image_to_ppm(struct anv_device *device, assert(result == VK_SUCCESS); VkMemoryRequirements reqs; - result = anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); + anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); VkDeviceMemory memory; result = anv_AllocMemory(vk_device, @@ -176,13 +176,12 @@ anv_dump_image_to_ppm(struct anv_device *device, assert(result == VK_SUCCESS); VkSubresourceLayout layout; - result = anv_GetImageSubresourceLayout(vk_device, copy_image, + anv_GetImageSubresourceLayout(vk_device, copy_image, &(VkImageSubresource) { .aspect = VK_IMAGE_ASPECT_COLOR, .mipLevel = 0, .arrayLayer = 0, }, &layout); - assert(result == VK_SUCCESS); map += layout.offset; diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 2e56274eb50..406f1421054 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -27,7 +27,7 @@ import fileinput, re, sys # Each function typedef in the vulkan.h header is all on one line and matches # this regepx. We hope that won't change. -p = re.compile('typedef ([^ ]*) *\(VKAPI \*PFN_vk([^(]*)\)(.*);') +p = re.compile('typedef ([^ ]*) *\(VKAPI_PTR \*PFN_vk([^(]*)\)(.*);') entrypoints = [] diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 8d5eae4c12e..8afef443696 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -227,17 +227,17 @@ anv_format_for_vk_format(VkFormat format) // Format capabilities -VkResult anv_validate_GetPhysicalDeviceFormatProperties( +void anv_validate_GetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat _format, VkFormatProperties* pFormatProperties) { const struct anv_format *format = anv_format_for_vk_format(_format); fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); - return anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); + anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); } -static VkResult +static void anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, const struct anv_format *format, VkFormatProperties *out_properties) @@ -296,32 +296,25 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d out_properties->optimalTilingFeatures = tiled; out_properties->bufferFeatures = 0; /* FINISHME */ - return VK_SUCCESS; + return; unsupported: out_properties->linearTilingFeatures = 0; out_properties->optimalTilingFeatures = 0; - - return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceFormatProperties( +void anv_GetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkResult result; - result = anv_physical_device_get_format_properties( + anv_physical_device_get_format_properties( physical_device, anv_format_for_vk_format(format), pFormatProperties); - if (result != VK_SUCCESS) - return vk_error(result); - - return VK_SUCCESS; } VkResult anv_GetPhysicalDeviceImageFormatProperties( @@ -340,12 +333,9 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( VkExtent3D maxExtent; uint32_t maxMipLevels; uint32_t maxArraySize; - VkResult result; - result = anv_physical_device_get_format_properties(physical_device, format, - &format_props); - if (result != VK_SUCCESS) - return vk_error(result); + anv_physical_device_get_format_properties(physical_device, format, + &format_props); /* Extract the VkFormatFeatureFlags that are relevant for the queried * tiling. @@ -453,7 +443,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( .maxResourceSize = UINT32_MAX, }; - return VK_SUCCESS; + return VK_SUCCESS; unsupported: *pImageFormatProperties = (VkImageFormatProperties) { @@ -467,7 +457,7 @@ unsupported: return VK_SUCCESS; } -VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( +void anv_GetPhysicalDeviceSparseImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, @@ -479,6 +469,4 @@ VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( { /* Sparse images are not yet supported. */ *pNumProperties = 0; - - return VK_SUCCESS; } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 973e8bd609a..1a500d2b4e4 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -422,7 +422,7 @@ anv_surface_get_subresource_layout(struct anv_image *image, layout->size = surface->stride * image->extent.height; } -VkResult anv_GetImageSubresourceLayout( +void anv_GetImageSubresourceLayout( VkDevice device, VkImage _image, const VkImageSubresource* pSubresource, @@ -444,10 +444,8 @@ VkResult anv_GetImageSubresourceLayout( pSubresource, pLayout); break; default: - return vk_error(VK_UNSUPPORTED); + assert(!"Invalid image aspect"); } - - return VK_SUCCESS; } VkResult diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index bf0f830f3c1..a89c494f7e0 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -133,12 +133,10 @@ void anv_DestroyRenderPass( anv_device_free(device, pass); } -VkResult anv_GetRenderAreaGranularity( +void anv_GetRenderAreaGranularity( VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity) { *pGranularity = (VkExtent2D) { 1, 1 }; - - return VK_SUCCESS; } -- cgit v1.2.3 From a53f23d93f7e7ee65cb4937178ea8c2c28484756 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 13:06:12 -0800 Subject: vk/0.210.0: Rework texture view component mapping --- include/vulkan/vulkan.h | 39 +++++++++++++++-------------- src/vulkan/anv_image.c | 16 ++++++------ src/vulkan/anv_meta.c | 60 --------------------------------------------- src/vulkan/anv_meta_clear.c | 6 ----- src/vulkan/gen7_state.c | 35 +++++++++++++++++--------- src/vulkan/gen8_state.c | 39 +++++++++++++++++++---------- 6 files changed, 78 insertions(+), 117 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 39cbb71c2d7..2f2dc11c3c2 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -467,18 +467,19 @@ typedef enum VkImageViewType { VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF } VkImageViewType; -typedef enum { - VK_CHANNEL_SWIZZLE_ZERO = 0, - VK_CHANNEL_SWIZZLE_ONE = 1, - VK_CHANNEL_SWIZZLE_R = 2, - VK_CHANNEL_SWIZZLE_G = 3, - VK_CHANNEL_SWIZZLE_B = 4, - VK_CHANNEL_SWIZZLE_A = 5, - VK_CHANNEL_SWIZZLE_BEGIN_RANGE = VK_CHANNEL_SWIZZLE_ZERO, - VK_CHANNEL_SWIZZLE_END_RANGE = VK_CHANNEL_SWIZZLE_A, - VK_CHANNEL_SWIZZLE_NUM = (VK_CHANNEL_SWIZZLE_A - VK_CHANNEL_SWIZZLE_ZERO + 1), - VK_CHANNEL_SWIZZLE_MAX_ENUM = 0x7FFFFFFF -} VkChannelSwizzle; +typedef enum VkComponentSwizzle { + VK_COMPONENT_SWIZZLE_IDENTITY = 0, + VK_COMPONENT_SWIZZLE_ZERO = 1, + VK_COMPONENT_SWIZZLE_ONE = 2, + VK_COMPONENT_SWIZZLE_R = 3, + VK_COMPONENT_SWIZZLE_G = 4, + VK_COMPONENT_SWIZZLE_B = 5, + VK_COMPONENT_SWIZZLE_A = 6, + VK_COMPONENT_SWIZZLE_BEGIN_RANGE = VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_END_RANGE = VK_COMPONENT_SWIZZLE_A, + VK_COMPONENT_SWIZZLE_RANGE_SIZE = (VK_COMPONENT_SWIZZLE_A - VK_COMPONENT_SWIZZLE_IDENTITY + 1), + VK_COMPONENT_SWIZZLE_MAX_ENUM = 0x7FFFFFFF +} VkComponentSwizzle; typedef enum { VK_SHADER_STAGE_VERTEX = 0, @@ -1493,12 +1494,12 @@ typedef struct { VkDeviceSize depthPitch; } VkSubresourceLayout; -typedef struct { - VkChannelSwizzle r; - VkChannelSwizzle g; - VkChannelSwizzle b; - VkChannelSwizzle a; -} VkChannelMapping; +typedef struct VkComponentMapping { + VkComponentSwizzle r; + VkComponentSwizzle g; + VkComponentSwizzle b; + VkComponentSwizzle a; +} VkComponentMapping; typedef struct { VkImageAspectFlags aspectMask; @@ -1514,7 +1515,7 @@ typedef struct { VkImage image; VkImageViewType viewType; VkFormat format; - VkChannelMapping channels; + VkComponentMapping components; VkImageSubresourceRange subresourceRange; VkImageViewCreateFlags flags; } VkImageViewCreateInfo; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 1a500d2b4e4..0fe2df10a7a 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -474,14 +474,14 @@ anv_validate_CreateImageView(VkDevice _device, view_format_info = anv_format_for_vk_format(pCreateInfo->format); /* Validate channel swizzles. */ - assert(pCreateInfo->channels.r >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.r <= VK_CHANNEL_SWIZZLE_END_RANGE); - assert(pCreateInfo->channels.g >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.g <= VK_CHANNEL_SWIZZLE_END_RANGE); - assert(pCreateInfo->channels.b >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.b <= VK_CHANNEL_SWIZZLE_END_RANGE); - assert(pCreateInfo->channels.a >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->channels.a <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); /* Validate subresource. */ assert(subresource->aspectMask != 0); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 143d637c55a..130557c1aa7 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -698,12 +698,6 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .image = src_image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = copy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, @@ -721,12 +715,6 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .image = dest_image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = copy_format, - .channels = { - .r = VK_CHANNEL_SWIZZLE_R, - .g = VK_CHANNEL_SWIZZLE_G, - .b = VK_CHANNEL_SWIZZLE_B, - .a = VK_CHANNEL_SWIZZLE_A, - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, @@ -858,12 +846,6 @@ void anv_CmdCopyImage( .image = srcImage, .viewType = src_iview_type, .format = src_image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, @@ -908,12 +890,6 @@ void anv_CmdCopyImage( .image = destImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].destSubresource.mipLevel, @@ -970,12 +946,6 @@ void anv_CmdBlitImage( .image = srcImage, .viewType = src_iview_type, .format = src_image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = 1 << pRegions[r].srcSubresource.aspect, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, @@ -1010,12 +980,6 @@ void anv_CmdBlitImage( .image = destImage, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].destSubresource.mipLevel, @@ -1133,12 +1097,6 @@ void anv_CmdCopyBufferToImage( .image = anv_image_to_handle(src_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = proxy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = 1 << proxy_aspect, .baseMipLevel = 0, @@ -1156,12 +1114,6 @@ void anv_CmdCopyBufferToImage( .image = anv_image_to_handle(dest_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = proxy_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, @@ -1233,12 +1185,6 @@ void anv_CmdCopyImageToBuffer( .image = srcImage, .viewType = src_iview_type, .format = src_image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = 1 << pRegions[r].imageSubresource.aspect, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, @@ -1279,12 +1225,6 @@ void anv_CmdCopyImageToBuffer( .image = anv_image_to_handle(dest_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = dest_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 0709c41db46..ff8183a736a 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -701,12 +701,6 @@ void anv_CmdClearColorImage( .image = _image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = image->format->vk_format, - .channels = { - VK_CHANNEL_SWIZZLE_R, - VK_CHANNEL_SWIZZLE_G, - VK_CHANNEL_SWIZZLE_B, - VK_CHANNEL_SWIZZLE_A - }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRanges[r].baseMipLevel + l, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 7adb4485b90..924b7303da0 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -182,15 +182,24 @@ static const uint8_t anv_valign[] = { [4] = VALIGN_4, }; -static const uint32_t vk_to_gen_swizzle[] = { - [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, - [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, - [VK_CHANNEL_SWIZZLE_R] = SCS_RED, - [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, - [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, - [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA +static const uint32_t vk_to_gen_swizzle_map[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA }; +static uint32_t +vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + return vk_to_gen_swizzle_map[component]; + else + return vk_to_gen_swizzle_map[swizzle]; +} + GENX_FUNC(GEN7, GEN75) void genX(image_view_init)(struct anv_image_view *iview, struct anv_device *device, @@ -263,10 +272,14 @@ genX(image_view_init)(struct anv_image_view *iview, .MCSEnable = false, # if (ANV_IS_HASWELL) - .ShaderChannelSelectR = vk_to_gen_swizzle[pCreateInfo->channels.r], - .ShaderChannelSelectG = vk_to_gen_swizzle[pCreateInfo->channels.g], - .ShaderChannelSelectB = vk_to_gen_swizzle[pCreateInfo->channels.b], - .ShaderChannelSelectA = vk_to_gen_swizzle[pCreateInfo->channels.a], + .ShaderChannelSelectR = vk_to_gen_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R), + .ShaderChannelSelectG = vk_to_gen_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G), + .ShaderChannelSelectB = vk_to_gen_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B), + .ShaderChannelSelectA = vk_to_gen_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A), # else /* XXX: Seriously? */ .RedClearColor = 0, .GreenClearColor = 0, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c7dd6b3f836..12a30534bfb 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -87,6 +87,24 @@ alloc_surface_state(struct anv_device *device, } } +static const uint32_t vk_to_gen_swizzle_map[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA +}; + +static uint32_t +vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + return vk_to_gen_swizzle_map[component]; + else + return vk_to_gen_swizzle_map[swizzle]; +} + void genX(image_view_init)(struct anv_image_view *iview, struct anv_device *device, @@ -156,15 +174,6 @@ genX(image_view_init)(struct anv_image_view *iview, unreachable(!"bad VkImageType"); } - static const uint32_t vk_to_gen_swizzle[] = { - [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, - [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, - [VK_CHANNEL_SWIZZLE_R] = SCS_RED, - [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, - [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, - [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA - }; - static const uint8_t isl_to_gen_tiling[] = { [ISL_TILING_LINEAR] = LINEAR, [ISL_TILING_X] = XMAJOR, @@ -212,10 +221,14 @@ genX(image_view_init)(struct anv_image_view *iview, .GreenClearColor = 0, .BlueClearColor = 0, .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], + .ShaderChannelSelectRed = vk_to_gen_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R), + .ShaderChannelSelectGreen = vk_to_gen_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G), + .ShaderChannelSelectBlue = vk_to_gen_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B), + .ShaderChannelSelectAlpha = vk_to_gen_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A), .ResourceMinLOD = 0.0, .SurfaceBaseAddress = { NULL, iview->offset }, }; -- cgit v1.2.3 From 9b1cb8fdbc7e3c2fb9b950a9844ef653761b7480 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 13:28:09 -0800 Subject: vk/0.210.0: Rework a few raster/input enums --- include/vulkan/vulkan.h | 73 ++++++++++++++++++++++----------------------- src/vulkan/anv_meta.c | 8 ++--- src/vulkan/anv_meta_clear.c | 8 ++--- src/vulkan/anv_pipeline.c | 6 ++-- src/vulkan/gen7_pipeline.c | 18 +++++------ src/vulkan/gen8_pipeline.c | 18 +++++------ 6 files changed, 64 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2f2dc11c3c2..1520d993d04 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -494,14 +494,14 @@ typedef enum { VK_SHADER_STAGE_MAX_ENUM = 0x7FFFFFFF } VkShaderStage; -typedef enum { - VK_VERTEX_INPUT_STEP_RATE_VERTEX = 0, - VK_VERTEX_INPUT_STEP_RATE_INSTANCE = 1, - VK_VERTEX_INPUT_STEP_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_STEP_RATE_VERTEX, - VK_VERTEX_INPUT_STEP_RATE_END_RANGE = VK_VERTEX_INPUT_STEP_RATE_INSTANCE, - VK_VERTEX_INPUT_STEP_RATE_NUM = (VK_VERTEX_INPUT_STEP_RATE_INSTANCE - VK_VERTEX_INPUT_STEP_RATE_VERTEX + 1), - VK_VERTEX_INPUT_STEP_RATE_MAX_ENUM = 0x7FFFFFFF -} VkVertexInputStepRate; +typedef enum VkVertexInputRate { + VK_VERTEX_INPUT_RATE_VERTEX = 0, + VK_VERTEX_INPUT_RATE_INSTANCE = 1, + VK_VERTEX_INPUT_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_RATE_VERTEX, + VK_VERTEX_INPUT_RATE_END_RANGE = VK_VERTEX_INPUT_RATE_INSTANCE, + VK_VERTEX_INPUT_RATE_RANGE_SIZE = (VK_VERTEX_INPUT_RATE_INSTANCE - VK_VERTEX_INPUT_RATE_VERTEX + 1), + VK_VERTEX_INPUT_RATE_MAX_ENUM = 0x7FFFFFFF +} VkVertexInputRate; typedef enum VkPrimitiveTopology { VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, @@ -521,33 +521,22 @@ typedef enum VkPrimitiveTopology { VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF } VkPrimitiveTopology; -typedef enum { - VK_FILL_MODE_POINTS = 0, - VK_FILL_MODE_WIREFRAME = 1, - VK_FILL_MODE_SOLID = 2, - VK_FILL_MODE_BEGIN_RANGE = VK_FILL_MODE_POINTS, - VK_FILL_MODE_END_RANGE = VK_FILL_MODE_SOLID, - VK_FILL_MODE_NUM = (VK_FILL_MODE_SOLID - VK_FILL_MODE_POINTS + 1), - VK_FILL_MODE_MAX_ENUM = 0x7FFFFFFF -} VkFillMode; - -typedef enum { - VK_CULL_MODE_NONE = 0, - VK_CULL_MODE_FRONT = 1, - VK_CULL_MODE_BACK = 2, - VK_CULL_MODE_FRONT_AND_BACK = 3, - VK_CULL_MODE_BEGIN_RANGE = VK_CULL_MODE_NONE, - VK_CULL_MODE_END_RANGE = VK_CULL_MODE_FRONT_AND_BACK, - VK_CULL_MODE_NUM = (VK_CULL_MODE_FRONT_AND_BACK - VK_CULL_MODE_NONE + 1), - VK_CULL_MODE_MAX_ENUM = 0x7FFFFFFF -} VkCullMode; - -typedef enum { - VK_FRONT_FACE_CCW = 0, - VK_FRONT_FACE_CW = 1, - VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_CCW, - VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CW, - VK_FRONT_FACE_NUM = (VK_FRONT_FACE_CW - VK_FRONT_FACE_CCW + 1), +typedef enum VkPolygonMode { + VK_POLYGON_MODE_FILL = 0, + VK_POLYGON_MODE_LINE = 1, + VK_POLYGON_MODE_POINT = 2, + VK_POLYGON_MODE_BEGIN_RANGE = VK_POLYGON_MODE_FILL, + VK_POLYGON_MODE_END_RANGE = VK_POLYGON_MODE_POINT, + VK_POLYGON_MODE_RANGE_SIZE = (VK_POLYGON_MODE_POINT - VK_POLYGON_MODE_FILL + 1), + VK_POLYGON_MODE_MAX_ENUM = 0x7FFFFFFF +} VkPolygonMode; + +typedef enum VkFrontFace { + VK_FRONT_FACE_COUNTER_CLOCKWISE = 0, + VK_FRONT_FACE_CLOCKWISE = 1, + VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_COUNTER_CLOCKWISE, + VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CLOCKWISE, + VK_FRONT_FACE_RANGE_SIZE = (VK_FRONT_FACE_CLOCKWISE - VK_FRONT_FACE_COUNTER_CLOCKWISE + 1), VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF } VkFrontFace; @@ -979,6 +968,14 @@ typedef enum VkShaderStageFlagBits { VK_SHADER_STAGE_ALL_GRAPHICS = 0x1F, VK_SHADER_STAGE_ALL = 0x7FFFFFFF, } VkShaderStageFlagBits; + +typedef enum VkCullModeFlagBits { + VK_CULL_MODE_NONE = 0, + VK_CULL_MODE_FRONT_BIT = 0x00000001, + VK_CULL_MODE_BACK_BIT = 0x00000002, + VK_CULL_MODE_FRONT_AND_BACK = 0x3, +} VkCullModeFlagBits; +typedef VkFlags VkCullModeFlags; typedef VkFlags VkShaderStageFlags; typedef enum VkAttachmentDescriptionFlagBits { @@ -1569,7 +1566,7 @@ typedef struct { typedef struct { uint32_t binding; uint32_t strideInBytes; - VkVertexInputStepRate stepRate; + VkVertexInputRate inputRate; } VkVertexInputBindingDescription; typedef struct { @@ -1639,8 +1636,8 @@ typedef struct { const void* pNext; VkBool32 depthClipEnable; VkBool32 rasterizerDiscardEnable; - VkFillMode fillMode; - VkCullMode cullMode; + VkPolygonMode polygonMode; + VkCullModeFlags cullMode; VkFrontFace frontFace; VkBool32 depthBiasEnable; float depthBias; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 130557c1aa7..eabe207389e 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -269,12 +269,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) { .binding = 0, .strideInBytes = 0, - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, { .binding = 1, .strideInBytes = 5 * sizeof(float), - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, .attributeCount = 3, @@ -359,9 +359,9 @@ anv_device_init_meta_blit_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, .depthClipEnable = true, .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, + .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index ff8183a736a..bcdc9e4d5b4 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -177,9 +177,9 @@ create_pipeline(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, .depthClipEnable = false, .rasterizerDiscardEnable = false, - .fillMode = VK_FILL_MODE_SOLID, + .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CCW, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, .depthBiasEnable = false, .depthClipEnable = false, }, @@ -246,7 +246,7 @@ init_color_pipeline(struct anv_device *device) { .binding = 0, .strideInBytes = sizeof(struct color_clear_vattrs), - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, .attributeCount = 3, @@ -429,7 +429,7 @@ create_depthstencil_pipeline(struct anv_device *device, { .binding = 0, .strideInBytes = sizeof(struct depthstencil_clear_vattrs), - .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, .attributeCount = 2, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index a61f0c16757..16d054665f5 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1043,12 +1043,12 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, /* Step rate is programmed per vertex element (attribute), not * binding. Set up a map of which bindings step per instance, for * reference by vertex element setup. */ - switch (desc->stepRate) { + switch (desc->inputRate) { default: - case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + case VK_VERTEX_INPUT_RATE_VERTEX: pipeline->instancing_enable[desc->binding] = false; break; - case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + case VK_VERTEX_INPUT_RATE_INSTANCE: pipeline->instancing_enable[desc->binding] = true; break; } diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 2d26a0380c5..d3ec1001ba8 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -83,20 +83,20 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH }; static const uint32_t vk_to_gen_fillmode[] = { - [VK_FILL_MODE_POINTS] = RASTER_POINT, - [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, - [VK_FILL_MODE_SOLID] = RASTER_SOLID + [VK_POLYGON_MODE_FILL] = RASTER_SOLID, + [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, + [VK_POLYGON_MODE_POINT] = RASTER_POINT, }; static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = 1, - [VK_FRONT_FACE_CW] = 0 + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 }; static void @@ -113,8 +113,8 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* LegacyGlobalDepthBiasEnable */ .StatisticsEnable = true, - .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ViewTransformEnable = !(extra && extra->disable_viewport), .FrontWinding = vk_to_gen_front_face[info->frontFace], /* bool AntiAliasingEnable; */ diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index b62bc44e710..8f106571a87 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -97,20 +97,20 @@ emit_rs_state(struct anv_pipeline *pipeline, { static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH }; static const uint32_t vk_to_gen_fillmode[] = { - [VK_FILL_MODE_POINTS] = RASTER_POINT, - [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, - [VK_FILL_MODE_SOLID] = RASTER_SOLID + [VK_POLYGON_MODE_FILL] = RASTER_SOLID, + [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, + [VK_POLYGON_MODE_POINT] = RASTER_POINT, }; static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_CCW] = CounterClockwise, - [VK_FRONT_FACE_CW] = Clockwise + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 }; struct GENX(3DSTATE_SF) sf = { @@ -131,8 +131,8 @@ emit_rs_state(struct anv_pipeline *pipeline, GENX(3DSTATE_RASTER_header), .FrontWinding = vk_to_gen_front_face[info->frontFace], .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ScissorRectangleEnable = !(extra && extra->disable_scissor), #if ANV_GEN == 8 .ViewportZClipTestEnable = info->depthClipEnable -- cgit v1.2.3 From 2c77b0cd018747904f57071bd973a065105fc7d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 13:29:49 -0800 Subject: gen7/8/cmd_buffer: Inline vk_to_gen_swizzle It's currently unused on IVB so we get compiler warnings. --- src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 924b7303da0..e6c544053de 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -191,7 +191,7 @@ static const uint32_t vk_to_gen_swizzle_map[] = { [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA }; -static uint32_t +static inline uint32_t vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) { if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 12a30534bfb..d6db3644b39 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -96,7 +96,7 @@ static const uint32_t vk_to_gen_swizzle_map[] = { [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA }; -static uint32_t +static inline uint32_t vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) { if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) -- cgit v1.2.3 From 73ef7d47d22442c089abbdd8bca028e4ea407824 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 13:49:28 -0800 Subject: vk/0.210.0: Rework color blending enums --- include/vulkan/vulkan.h | 80 ++++++++++++++++++++-------------------- src/vulkan/gen7_pipeline.c | 66 ++++++++++++++++----------------- src/vulkan/gen7_state.c | 4 +- src/vulkan/gen8_pipeline.c | 92 +++++++++++++++++++++++----------------------- src/vulkan/gen8_state.c | 4 +- 5 files changed, 123 insertions(+), 123 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1520d993d04..c7a4ad1c387 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -544,10 +544,10 @@ typedef enum VkCompareOp { VK_COMPARE_OP_NEVER = 0, VK_COMPARE_OP_LESS = 1, VK_COMPARE_OP_EQUAL = 2, - VK_COMPARE_OP_LESS_EQUAL = 3, + VK_COMPARE_OP_LESS_OR_EQUAL = 3, VK_COMPARE_OP_GREATER = 4, VK_COMPARE_OP_NOT_EQUAL = 5, - VK_COMPARE_OP_GREATER_EQUAL = 6, + VK_COMPARE_OP_GREATER_OR_EQUAL = 6, VK_COMPARE_OP_ALWAYS = 7, VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, @@ -559,14 +559,14 @@ typedef enum VkStencilOp { VK_STENCIL_OP_KEEP = 0, VK_STENCIL_OP_ZERO = 1, VK_STENCIL_OP_REPLACE = 2, - VK_STENCIL_OP_INC_CLAMP = 3, - VK_STENCIL_OP_DEC_CLAMP = 4, + VK_STENCIL_OP_INCREMENT_AND_CLAMP = 3, + VK_STENCIL_OP_DECREMENT_AND_CLAMP = 4, VK_STENCIL_OP_INVERT = 5, - VK_STENCIL_OP_INC_WRAP = 6, - VK_STENCIL_OP_DEC_WRAP = 7, + VK_STENCIL_OP_INCREMENT_AND_WRAP = 6, + VK_STENCIL_OP_DECREMENT_AND_WRAP = 7, VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, - VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DEC_WRAP, - VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DEC_WRAP - VK_STENCIL_OP_KEEP + 1), + VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DECREMENT_AND_WRAP, + VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DECREMENT_AND_WRAP - VK_STENCIL_OP_KEEP + 1), VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF } VkStencilOp; @@ -576,11 +576,11 @@ typedef enum VkLogicOp { VK_LOGIC_OP_AND_REVERSE = 2, VK_LOGIC_OP_COPY = 3, VK_LOGIC_OP_AND_INVERTED = 4, - VK_LOGIC_OP_NOOP = 5, + VK_LOGIC_OP_NO_OP = 5, VK_LOGIC_OP_XOR = 6, VK_LOGIC_OP_OR = 7, VK_LOGIC_OP_NOR = 8, - VK_LOGIC_OP_EQUIV = 9, + VK_LOGIC_OP_EQUIVALENT = 9, VK_LOGIC_OP_INVERT = 10, VK_LOGIC_OP_OR_REVERSE = 11, VK_LOGIC_OP_COPY_INVERTED = 12, @@ -594,30 +594,30 @@ typedef enum VkLogicOp { } VkLogicOp; typedef enum VkBlendFactor { - VK_BLEND_ZERO = 0, - VK_BLEND_ONE = 1, - VK_BLEND_SRC_COLOR = 2, - VK_BLEND_ONE_MINUS_SRC_COLOR = 3, - VK_BLEND_DEST_COLOR = 4, - VK_BLEND_ONE_MINUS_DEST_COLOR = 5, - VK_BLEND_SRC_ALPHA = 6, - VK_BLEND_ONE_MINUS_SRC_ALPHA = 7, - VK_BLEND_DEST_ALPHA = 8, - VK_BLEND_ONE_MINUS_DEST_ALPHA = 9, - VK_BLEND_CONSTANT_COLOR = 10, - VK_BLEND_ONE_MINUS_CONSTANT_COLOR = 11, - VK_BLEND_CONSTANT_ALPHA = 12, - VK_BLEND_ONE_MINUS_CONSTANT_ALPHA = 13, - VK_BLEND_SRC_ALPHA_SATURATE = 14, - VK_BLEND_SRC1_COLOR = 15, - VK_BLEND_ONE_MINUS_SRC1_COLOR = 16, - VK_BLEND_SRC1_ALPHA = 17, - VK_BLEND_ONE_MINUS_SRC1_ALPHA = 18, - VK_BLEND_BEGIN_RANGE = VK_BLEND_ZERO, - VK_BLEND_END_RANGE = VK_BLEND_ONE_MINUS_SRC1_ALPHA, - VK_BLEND_RANGE_SIZE = (VK_BLEND_ONE_MINUS_SRC1_ALPHA - VK_BLEND_ZERO + 1), - VK_BLEND_MAX_ENUM = 0x7FFFFFFF -} VkBlend; + VK_BLEND_FACTOR_ZERO = 0, + VK_BLEND_FACTOR_ONE = 1, + VK_BLEND_FACTOR_SRC_COLOR = 2, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR = 3, + VK_BLEND_FACTOR_DST_COLOR = 4, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR = 5, + VK_BLEND_FACTOR_SRC_ALPHA = 6, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA = 7, + VK_BLEND_FACTOR_DST_ALPHA = 8, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA = 9, + VK_BLEND_FACTOR_CONSTANT_COLOR = 10, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR = 11, + VK_BLEND_FACTOR_CONSTANT_ALPHA = 12, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA = 13, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE = 14, + VK_BLEND_FACTOR_SRC1_COLOR = 15, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR = 16, + VK_BLEND_FACTOR_SRC1_ALPHA = 17, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA = 18, + VK_BLEND_FACTOR_BEGIN_RANGE = VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_END_RANGE = VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, + VK_BLEND_FACTOR_RANGE_SIZE = (VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - VK_BLEND_FACTOR_ZERO + 1), + VK_BLEND_FACTOR_MAX_ENUM = 0x7FFFFFFF +} VkBlendFactor; typedef enum VkBlendOp { VK_BLEND_OP_ADD = 0, @@ -1681,12 +1681,12 @@ typedef struct { typedef struct { VkBool32 blendEnable; - VkBlend srcBlendColor; - VkBlend destBlendColor; - VkBlendOp blendOpColor; - VkBlend srcBlendAlpha; - VkBlend destBlendAlpha; - VkBlendOp blendOpAlpha; + VkBlendFactor srcColorBlendFactor; + VkBlendFactor dstColorBlendFactor; + VkBlendOp colorBlendOp; + VkBlendFactor srcAlphaBlendFactor; + VkBlendFactor dstAlphaBlendFactor; + VkBlendOp alphaBlendOp; VkChannelFlags channelWriteMask; } VkPipelineColorBlendAttachmentState; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index d3ec1001ba8..1ae4d26bcdd 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -144,10 +144,10 @@ static const uint32_t vk_to_gen_compare_op[] = { [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; @@ -155,11 +155,11 @@ static const uint32_t vk_to_gen_stencil_op[] = { [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, - [VK_STENCIL_OP_INC_CLAMP] = STENCILOP_INCRSAT, - [VK_STENCIL_OP_DEC_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, - [VK_STENCIL_OP_INC_WRAP] = STENCILOP_INCR, - [VK_STENCIL_OP_DEC_WRAP] = STENCILOP_DECR, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, }; static const uint32_t vk_to_gen_blend_op[] = { @@ -176,11 +176,11 @@ static const uint32_t vk_to_gen_logic_op[] = { [VK_LOGIC_OP_AND] = LOGICOP_AND, [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, [VK_LOGIC_OP_XOR] = LOGICOP_XOR, [VK_LOGIC_OP_OR] = LOGICOP_OR, [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, @@ -190,25 +190,25 @@ static const uint32_t vk_to_gen_logic_op[] = { }; static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, }; static void @@ -274,14 +274,14 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, struct GEN7_BLEND_STATE blend_state = { .ColorBufferBlendEnable = a->blendEnable, .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], - .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], - .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], .AlphaToCoverageEnable = info->alphaToCoverageEnable, # if 0 diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index e6c544053de..8fff649bfea 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -86,10 +86,10 @@ static const uint32_t vk_to_gen_compare_op[] = { [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 8f106571a87..fb0373c1f7e 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -158,11 +158,11 @@ emit_cb_state(struct anv_pipeline *pipeline, [VK_LOGIC_OP_AND] = LOGICOP_AND, [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, [VK_LOGIC_OP_XOR] = LOGICOP_XOR, [VK_LOGIC_OP_OR] = LOGICOP_OR, [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, @@ -172,25 +172,25 @@ emit_cb_state(struct anv_pipeline *pipeline, }; static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, }; static const uint32_t vk_to_gen_blend_op[] = { @@ -213,9 +213,9 @@ emit_cb_state(struct anv_pipeline *pipeline, for (uint32_t i = 0; i < info->attachmentCount; i++) { const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - if (a->srcBlendColor != a->srcBlendAlpha || - a->destBlendColor != a->destBlendAlpha || - a->blendOpColor != a->blendOpAlpha) { + if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || + a->dstColorBlendFactor != a->dstAlphaBlendFactor || + a->colorBlendOp != a->alphaBlendOp) { blend_state.IndependentAlphaBlendEnable = true; } @@ -227,12 +227,12 @@ emit_cb_state(struct anv_pipeline *pipeline, .ColorClampRange = COLORCLAMP_RTFORMAT, .PreBlendColorClampEnable = true, .PostBlendColorClampEnable = true, - .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], - .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], - .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], - .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), @@ -245,13 +245,13 @@ emit_cb_state(struct anv_pipeline *pipeline, * means that, for MIN and MAX, we have to stomp the blend factor to * ONE to make it a no-op. */ - if (a->blendOpColor == VK_BLEND_OP_MIN || - a->blendOpColor == VK_BLEND_OP_MAX) { + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; } - if (a->blendOpAlpha == VK_BLEND_OP_MIN || - a->blendOpAlpha == VK_BLEND_OP_MAX) { + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; } @@ -265,25 +265,25 @@ emit_cb_state(struct anv_pipeline *pipeline, } static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, - [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, - [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, - [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, - [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, - [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; static const uint32_t vk_to_gen_stencil_op[] = { [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, - [VK_STENCIL_OP_INC_CLAMP] = STENCILOP_INCRSAT, - [VK_STENCIL_OP_DEC_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, - [VK_STENCIL_OP_INC_WRAP] = STENCILOP_INCR, - [VK_STENCIL_OP_DEC_WRAP] = STENCILOP_DECR, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, }; static void diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index d6db3644b39..3fc985d52b7 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -305,10 +305,10 @@ VkResult genX(CreateSampler)( [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, }; -- cgit v1.2.3 From 4ab9391fbb35e627f2033494f0440536bb3341a8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 14:19:41 -0800 Subject: vk/0.210.0: Rework dynamic states --- include/vulkan/vulkan.h | 44 ++++++++++++++++++++++---------------------- src/vulkan/anv_cmd_buffer.c | 32 ++++++++++++++++---------------- src/vulkan/anv_meta_clear.c | 8 ++++---- src/vulkan/anv_pipeline.c | 21 +++++++++++---------- src/vulkan/anv_private.h | 2 +- src/vulkan/gen7_cmd_buffer.c | 4 ++-- src/vulkan/gen7_pipeline.c | 18 +++++++++--------- src/vulkan/gen8_cmd_buffer.c | 4 ++-- src/vulkan/gen8_pipeline.c | 16 ++++++++-------- 9 files changed, 75 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c7a4ad1c387..d378598b0d3 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1640,9 +1640,9 @@ typedef struct { VkCullModeFlags cullMode; VkFrontFace frontFace; VkBool32 depthBiasEnable; - float depthBias; + float depthBiasConstantFactor; float depthBiasClamp; - float slopeScaledDepthBias; + float depthBiasSlopeFactor; float lineWidth; } VkPipelineRasterStateCreateInfo; @@ -1655,14 +1655,14 @@ typedef struct { const VkSampleMask* pSampleMask; } VkPipelineMultisampleStateCreateInfo; -typedef struct { - VkStencilOp stencilFailOp; - VkStencilOp stencilPassOp; - VkStencilOp stencilDepthFailOp; - VkCompareOp stencilCompareOp; - uint32_t stencilCompareMask; - uint32_t stencilWriteMask; - uint32_t stencilReference; +typedef struct VkStencilOpState { + VkStencilOp failOp; + VkStencilOp passOp; + VkStencilOp depthFailOp; + VkCompareOp compareOp; + uint32_t compareMask; + uint32_t writeMask; + uint32_t reference; } VkStencilOpState; typedef struct { @@ -1699,7 +1699,7 @@ typedef struct { VkLogicOp logicOp; uint32_t attachmentCount; const VkPipelineColorBlendAttachmentState* pAttachments; - float blendConst[4]; + float blendConstants[4]; } VkPipelineColorBlendStateCreateInfo; typedef struct { @@ -2168,12 +2168,12 @@ typedef void (VKAPI_PTR *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, V typedef void (VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); typedef void (VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); typedef void (VKAPI_PTR *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); -typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); -typedef void (VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConst[4]); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor); +typedef void (VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConstants[4]); typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); -typedef void (VKAPI_PTR *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); -typedef void (VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); -typedef void (VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference); typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); typedef void (VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); @@ -2722,13 +2722,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetLineWidth( VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias( VkCommandBuffer commandBuffer, - float depthBias, + float depthBiasConstantFactor, float depthBiasClamp, - float slopeScaledDepthBias); + float depthBiasSlopeFactor); VKAPI_ATTR void VKAPI_CALL vkCmdSetBlendConstants( VkCommandBuffer commandBuffer, - const float blendConst[4]); + const float blendConstants[4]); VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBounds( VkCommandBuffer commandBuffer, @@ -2738,17 +2738,17 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBounds( VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilCompareMask( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t stencilCompareMask); + uint32_t compareMask); VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilWriteMask( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t stencilWriteMask); + uint32_t writeMask); VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilReference( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t stencilReference); + uint32_t reference); VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index f42f6fd5183..401fd5681f6 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -50,7 +50,7 @@ const struct anv_dynamic_state default_dynamic_state = { .depth_bias = { .bias = 0.0f, .clamp = 0.0f, - .slope_scaled = 0.0f, + .slope = 0.0f, }, .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, .depth_bounds = { @@ -380,27 +380,27 @@ void anv_CmdSetLineWidth( void anv_CmdSetDepthBias( VkCommandBuffer commandBuffer, - float depthBias, + float depthBiasConstantFactor, float depthBiasClamp, - float slopeScaledDepthBias) + float depthBiasSlopeFactor) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.dynamic.depth_bias.bias = depthBias; + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; - cmd_buffer->state.dynamic.depth_bias.slope_scaled = slopeScaledDepthBias; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; } void anv_CmdSetBlendConstants( VkCommandBuffer commandBuffer, - const float blendConst[4]) + const float blendConstants[4]) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); memcpy(cmd_buffer->state.dynamic.blend_constants, - blendConst, sizeof(float) * 4); + blendConstants, sizeof(float) * 4); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; } @@ -421,14 +421,14 @@ void anv_CmdSetDepthBounds( void anv_CmdSetStencilCompareMask( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t stencilCompareMask) + uint32_t compareMask) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.front = stencilCompareMask; + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.back = stencilCompareMask; + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; } @@ -436,14 +436,14 @@ void anv_CmdSetStencilCompareMask( void anv_CmdSetStencilWriteMask( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t stencilWriteMask) + uint32_t writeMask) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.front = stencilWriteMask; + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.back = stencilWriteMask; + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; } @@ -451,14 +451,14 @@ void anv_CmdSetStencilWriteMask( void anv_CmdSetStencilReference( VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t stencilReference) + uint32_t reference) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_reference.front = stencilReference; + cmd_buffer->state.dynamic.stencil_reference.front = reference; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_reference.back = stencilReference; + cmd_buffer->state.dynamic.stencil_reference.back = reference; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index bcdc9e4d5b4..a9bcbc3be9e 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -459,10 +459,10 @@ create_depthstencil_pipeline(struct anv_device *device, .depthBoundsTestEnable = false, .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), .front = { - .stencilPassOp = VK_STENCIL_OP_REPLACE, - .stencilCompareOp = VK_COMPARE_OP_ALWAYS, - .stencilWriteMask = UINT32_MAX, - .stencilReference = 0, /* dynamic */ + .passOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .writeMask = UINT32_MAX, + .reference = 0, /* dynamic */ }, .back = { 0 /* dont care */ }, }; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 16d054665f5..e4209bd13c9 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -849,16 +849,17 @@ anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { assert(pCreateInfo->pRasterState); - dynamic->depth_bias.bias = pCreateInfo->pRasterState->depthBias; + dynamic->depth_bias.bias = + pCreateInfo->pRasterState->depthBiasConstantFactor; dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp; - dynamic->depth_bias.slope_scaled = - pCreateInfo->pRasterState->slopeScaledDepthBias; + dynamic->depth_bias.slope = + pCreateInfo->pRasterState->depthBiasSlopeFactor; } if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { assert(pCreateInfo->pColorBlendState); typed_memcpy(dynamic->blend_constants, - pCreateInfo->pColorBlendState->blendConst, 4); + pCreateInfo->pColorBlendState->blendConstants, 4); } /* If there is no depthstencil attachment, then don't read @@ -884,25 +885,25 @@ anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { assert(pCreateInfo->pDepthStencilState); dynamic->stencil_compare_mask.front = - pCreateInfo->pDepthStencilState->front.stencilCompareMask; + pCreateInfo->pDepthStencilState->front.compareMask; dynamic->stencil_compare_mask.back = - pCreateInfo->pDepthStencilState->back.stencilCompareMask; + pCreateInfo->pDepthStencilState->back.compareMask; } if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { assert(pCreateInfo->pDepthStencilState); dynamic->stencil_write_mask.front = - pCreateInfo->pDepthStencilState->front.stencilWriteMask; + pCreateInfo->pDepthStencilState->front.writeMask; dynamic->stencil_write_mask.back = - pCreateInfo->pDepthStencilState->back.stencilWriteMask; + pCreateInfo->pDepthStencilState->back.writeMask; } if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { assert(pCreateInfo->pDepthStencilState); dynamic->stencil_reference.front = - pCreateInfo->pDepthStencilState->front.stencilReference; + pCreateInfo->pDepthStencilState->front.reference; dynamic->stencil_reference.back = - pCreateInfo->pDepthStencilState->back.stencilReference; + pCreateInfo->pDepthStencilState->back.reference; } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 0e148b36b1c..c86003f5823 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -917,7 +917,7 @@ struct anv_dynamic_state { struct { float bias; float clamp; - float slope_scaled; + float slope; } depth_bias; float blend_constants[4]; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index b69982d6e52..9a3d4799d62 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -409,7 +409,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; uint32_t sf_dw[GEN7_3DSTATE_SF_length]; struct GEN7_3DSTATE_SF sf = { @@ -419,7 +419,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .GlobalDepthOffsetEnableWireframe = enable_bias, .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp }; GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 1ae4d26bcdd..2f51b4bfc73 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -231,15 +231,15 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, .DoubleSidedStencilEnable = true, .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], - - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], - .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.stencilDepthFailOp], - .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], }; GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 88062a6f61b..b6523b5ecf5 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -231,7 +231,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f; + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; struct GENX(3DSTATE_RASTER) raster = { @@ -240,7 +240,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .GlobalDepthOffsetEnableWireframe = enable_bias, .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp }; GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index fb0373c1f7e..16143300cf1 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -313,14 +313,14 @@ emit_ds_state(struct anv_pipeline *pipeline, .DoubleSidedStencilEnable = true, .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], - .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], - .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], }; GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); -- cgit v1.2.3 From 569f70be56908b3d74222bbb73cc0145f5f39735 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 14:52:38 -0800 Subject: vk/0.210.0: Rework copy/clear/blit API --- include/vulkan/vulkan.h | 127 +++++++++++++++++++++----------------------- src/vulkan/anv_dump.c | 18 +++---- src/vulkan/anv_meta.c | 78 +++++++++++++-------------- src/vulkan/anv_meta_clear.c | 20 ++----- 4 files changed, 114 insertions(+), 129 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index d378598b0d3..3ff673f6fe4 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1932,74 +1932,81 @@ typedef struct { VkFramebuffer framebuffer; } VkCommandBufferBeginInfo; -typedef struct { +typedef struct VkBufferCopy { VkDeviceSize srcOffset; - VkDeviceSize destOffset; - VkDeviceSize copySize; + VkDeviceSize dstOffset; + VkDeviceSize size; } VkBufferCopy; -typedef struct { - VkImageAspect aspect; +typedef struct VkImageSubresourceLayers { + VkImageAspectFlags aspectMask; uint32_t mipLevel; - uint32_t arrayLayer; - uint32_t arraySize; -} VkImageSubresourceCopy; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkImageSubresourceLayers; -typedef struct { - VkImageSubresourceCopy srcSubresource; +typedef struct VkImageCopy { + VkImageSubresourceLayers srcSubresource; VkOffset3D srcOffset; - VkImageSubresourceCopy destSubresource; - VkOffset3D destOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; VkExtent3D extent; } VkImageCopy; -typedef struct { - VkImageSubresourceCopy srcSubresource; +typedef struct VkImageBlit { + VkImageSubresourceLayers srcSubresource; VkOffset3D srcOffset; VkExtent3D srcExtent; - VkImageSubresourceCopy destSubresource; - VkOffset3D destOffset; - VkExtent3D destExtent; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; + VkExtent3D dstExtent; } VkImageBlit; -typedef struct { +typedef struct VkBufferImageCopy { VkDeviceSize bufferOffset; uint32_t bufferRowLength; uint32_t bufferImageHeight; - VkImageSubresourceCopy imageSubresource; + VkImageSubresourceLayers imageSubresource; VkOffset3D imageOffset; VkExtent3D imageExtent; } VkBufferImageCopy; -typedef union { +typedef union VkClearColorValue { float float32[4]; int32_t int32[4]; uint32_t uint32[4]; } VkClearColorValue; -typedef struct { +typedef struct VkClearDepthStencilValue { float depth; uint32_t stencil; } VkClearDepthStencilValue; -typedef struct { - VkOffset3D offset; - VkExtent3D extent; -} VkRect3D; +typedef union VkClearValue { + VkClearColorValue color; + VkClearDepthStencilValue depthStencil; +} VkClearValue; -typedef struct { - VkImageSubresourceCopy srcSubresource; +typedef struct VkClearAttachment { + VkImageAspectFlags aspectMask; + uint32_t colorAttachment; + VkClearValue clearValue; +} VkClearAttachment; + +typedef struct VkClearRect { + VkRect2D rect; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkClearRect; + +typedef struct VkImageResolve { + VkImageSubresourceLayers srcSubresource; VkOffset3D srcOffset; - VkImageSubresourceCopy destSubresource; - VkOffset3D destOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; VkExtent3D extent; } VkImageResolve; -typedef union { - VkClearColorValue color; - VkClearDepthStencilValue depthStencil; -} VkClearValue; - typedef struct { VkStructureType sType; const void* pNext; @@ -2192,9 +2199,8 @@ typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, V typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); typedef void (VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI_PTR *PFN_vkCmdClearColorAttachment)(VkCommandBuffer commandBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilAttachment)(VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment* pAttachments, uint32_t rectCount, const VkClearRect* pRects); +typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); @@ -2816,7 +2822,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect( VKAPI_ATTR void VKAPI_CALL vkCmdCopyBuffer( VkCommandBuffer commandBuffer, VkBuffer srcBuffer, - VkBuffer destBuffer, + VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); @@ -2824,8 +2830,8 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); @@ -2833,8 +2839,8 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); @@ -2842,8 +2848,8 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( VkCommandBuffer commandBuffer, VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); @@ -2851,22 +2857,22 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImageToBuffer( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, - VkBuffer destBuffer, + VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); VKAPI_ATTR void VKAPI_CALL vkCmdUpdateBuffer( VkCommandBuffer commandBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData); VKAPI_ATTR void VKAPI_CALL vkCmdFillBuffer( VkCommandBuffer commandBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize fillSize, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, uint32_t data); VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage( @@ -2885,28 +2891,19 @@ VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilImage( uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -VKAPI_ATTR void VKAPI_CALL vkCmdClearColorAttachment( - VkCommandBuffer commandBuffer, - uint32_t colorAttachment, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rectCount, - const VkRect3D* pRects); - -VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilAttachment( +VKAPI_ATTR void VKAPI_CALL vkCmdClearAttachments( VkCommandBuffer commandBuffer, - VkImageAspectFlags aspectMask, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, uint32_t rectCount, - const VkRect3D* pRects); + const VkClearRect* pRects); VKAPI_ATTR void VKAPI_CALL vkCmdResolveImage( VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 9d0c4d700e5..52b9ca17b43 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -103,10 +103,10 @@ anv_dump_image_to_ppm(struct anv_device *device, copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, &(VkImageBlit) { .srcSubresource = { - .aspect = VK_IMAGE_ASPECT_COLOR, + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = miplevel, - .arrayLayer = array_layer, - .arraySize = 1, + .baseArrayLayer = array_layer, + .layerCount = 1, }, .srcOffset = (VkOffset3D) { 0, 0, 0 }, .srcExtent = (VkExtent3D) { @@ -114,14 +114,14 @@ anv_dump_image_to_ppm(struct anv_device *device, extent.height, 1 }, - .destSubresource = { - .aspect = VK_IMAGE_ASPECT_COLOR, + .dstSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, - .arrayLayer = 0, - .arraySize = 1, + .baseArrayLayer = 0, + .layerCount = 1, }, - .destOffset = (VkOffset3D) { 0, 0, 0 }, - .destExtent = (VkExtent3D) { + .dstOffset = (VkOffset3D) { 0, 0, 0 }, + .dstExtent = (VkExtent3D) { extent.width, extent.height, 1 diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index eabe207389e..3b4c8890b13 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -163,13 +163,13 @@ meta_blit_get_src_image_view_type(const struct anv_image *src_image) static uint32_t meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, - const VkImageSubresourceCopy *dest_subresource, + const VkImageSubresourceLayers *dest_subresource, const VkOffset3D *dest_offset) { switch (dest_image->type) { case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: - return dest_subresource->arrayLayer; + return dest_subresource->baseArrayLayer; case VK_IMAGE_TYPE_3D: /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, * but meta does it anyway. When doing so, we translate the @@ -757,8 +757,8 @@ void anv_CmdCopyBuffer( for (unsigned r = 0; r < regionCount; r++) { uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; - uint64_t copy_size = pRegions[r].copySize; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; /* First, we compute the biggest format that can be used with the * given offsets and size. @@ -775,10 +775,10 @@ void anv_CmdCopyBuffer( bs = MIN2(bs, 1 << fs); assert(dest_offset % bs == 0); - fs = ffs(pRegions[r].copySize) - 1; + fs = ffs(pRegions[r].size) - 1; if (fs != -1) bs = MIN2(bs, 1 << fs); - assert(pRegions[r].copySize % bs == 0); + assert(pRegions[r].size % bs == 0); VkFormat copy_format = vk_format_for_size(bs); @@ -847,37 +847,37 @@ void anv_CmdCopyImage( .viewType = src_iview_type, .format = src_image->format->vk_format, .subresourceRange = { - .aspectMask = 1 << pRegions[r].srcSubresource.aspect, + .aspectMask = pRegions[r].srcSubresource.aspectMask, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, - .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, - .arraySize = pRegions[r].destSubresource.arraySize, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .arraySize = pRegions[r].dstSubresource.layerCount, }, }, cmd_buffer); const VkOffset3D dest_offset = { - .x = pRegions[r].destOffset.x, - .y = pRegions[r].destOffset.y, + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, .z = 0, }; unsigned num_slices; if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].srcSubresource.arraySize == 1 && - pRegions[r].destSubresource.arraySize == 1); + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); num_slices = pRegions[r].extent.depth; } else { - assert(pRegions[r].srcSubresource.arraySize == - pRegions[r].destSubresource.arraySize); + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); assert(pRegions[r].extent.depth == 1); - num_slices = pRegions[r].destSubresource.arraySize; + num_slices = pRegions[r].dstSubresource.layerCount; } const uint32_t dest_base_array_slice = meta_blit_get_dest_view_base_array_slice(dest_image, - &pRegions[r].destSubresource, - &pRegions[r].destOffset); + &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); for (unsigned slice = 0; slice < num_slices; slice++) { VkOffset3D src_offset = pRegions[r].srcOffset; @@ -892,7 +892,7 @@ void anv_CmdCopyImage( .format = dest_image->format->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, .mipLevels = 1, .baseArrayLayer = dest_base_array_slice + slice, .arraySize = 1 @@ -947,30 +947,30 @@ void anv_CmdBlitImage( .viewType = src_iview_type, .format = src_image->format->vk_format, .subresourceRange = { - .aspectMask = 1 << pRegions[r].srcSubresource.aspect, + .aspectMask = pRegions[r].srcSubresource.aspectMask, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .mipLevels = 1, - .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, .arraySize = 1 }, }, cmd_buffer); const VkOffset3D dest_offset = { - .x = pRegions[r].destOffset.x, - .y = pRegions[r].destOffset.y, + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, .z = 0, }; const uint32_t dest_array_slice = meta_blit_get_dest_view_base_array_slice(dest_image, - &pRegions[r].destSubresource, - &pRegions[r].destOffset); + &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); - if (pRegions[r].srcSubresource.arraySize > 1) + if (pRegions[r].srcSubresource.layerCount > 1) anv_finishme("FINISHME: copy multiple array layers"); - if (pRegions[r].destExtent.depth > 1) + if (pRegions[r].dstExtent.depth > 1) anv_finishme("FINISHME: copy multiple depth layers"); struct anv_image_view dest_iview; @@ -982,7 +982,7 @@ void anv_CmdBlitImage( .format = dest_image->format->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, .mipLevels = 1, .baseArrayLayer = dest_array_slice, .arraySize = 1 @@ -996,7 +996,7 @@ void anv_CmdBlitImage( pRegions[r].srcExtent, dest_image, &dest_iview, dest_offset, - pRegions[r].destExtent, + pRegions[r].dstExtent, filter); } @@ -1063,11 +1063,11 @@ void anv_CmdCopyBufferToImage( for (unsigned r = 0; r < regionCount; r++) { VkFormat proxy_format = orig_format; - VkImageAspect proxy_aspect = pRegions[r].imageSubresource.aspect; + VkImageAspectFlags proxy_aspect = pRegions[r].imageSubresource.aspectMask; if (orig_format == VK_FORMAT_S8_UINT) { proxy_format = VK_FORMAT_R8_UINT; - proxy_aspect = VK_IMAGE_ASPECT_COLOR; + proxy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; } struct anv_image *src_image = @@ -1082,11 +1082,11 @@ void anv_CmdCopyBufferToImage( unsigned num_slices; if (dest_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.arraySize == 1); + assert(pRegions[r].imageSubresource.layerCount == 1); num_slices = pRegions[r].imageExtent.depth; } else { assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.arraySize; + num_slices = pRegions[r].imageSubresource.layerCount; } for (unsigned slice = 0; slice < num_slices; slice++) { @@ -1098,7 +1098,7 @@ void anv_CmdCopyBufferToImage( .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = proxy_format, .subresourceRange = { - .aspectMask = 1 << proxy_aspect, + .aspectMask = proxy_aspect, .baseMipLevel = 0, .mipLevels = 1, .baseArrayLayer = 0, @@ -1186,11 +1186,11 @@ void anv_CmdCopyImageToBuffer( .viewType = src_iview_type, .format = src_image->format->vk_format, .subresourceRange = { - .aspectMask = 1 << pRegions[r].imageSubresource.aspect, + .aspectMask = pRegions[r].imageSubresource.aspectMask, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .mipLevels = 1, - .baseArrayLayer = pRegions[r].imageSubresource.arrayLayer, - .arraySize = pRegions[r].imageSubresource.arraySize, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .arraySize = pRegions[r].imageSubresource.layerCount, }, }, cmd_buffer); @@ -1207,11 +1207,11 @@ void anv_CmdCopyImageToBuffer( unsigned num_slices; if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.arraySize == 1); + assert(pRegions[r].imageSubresource.layerCount == 1); num_slices = pRegions[r].imageExtent.depth; } else { assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.arraySize; + num_slices = pRegions[r].imageSubresource.layerCount; } for (unsigned slice = 0; slice < num_slices; slice++) { diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index a9bcbc3be9e..d1f1d95a3aa 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -798,24 +798,12 @@ void anv_CmdClearDepthStencilImage( stub(); } -void anv_CmdClearColorAttachment( +void anv_CmdClearAttachments( VkCommandBuffer commandBuffer, - uint32_t colorAttachment, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rectCount, - const VkRect3D* pRects) -{ - stub(); -} - -void anv_CmdClearDepthStencilAttachment( - VkCommandBuffer commandBuffer, - VkImageAspectFlags aspectMask, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, uint32_t rectCount, - const VkRect3D* pRects) + const VkClearRect* pRects) { stub(); } -- cgit v1.2.3 From 2e10ca574867f3185755ce0ea9eb70260326bc9a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 16:10:02 -0800 Subject: vk/0.210.0: Misc. function argument renames --- include/vulkan/vulkan.h | 48 ++++++++++++++++++++++---------------------- src/vulkan/anv_cmd_buffer.c | 16 +++++++-------- src/vulkan/gen7_cmd_buffer.c | 4 ++-- src/vulkan/gen8_cmd_buffer.c | 12 +++++------ 4 files changed, 40 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 3ff673f6fe4..79133dd7628 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2186,17 +2186,17 @@ typedef void (VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); typedef void (VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); -typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); typedef void (VKAPI_PTR *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); typedef void (VKAPI_PTR *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); -typedef void (VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); -typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); -typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); +typedef void (VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); +typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData); +typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data); typedef void (VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); typedef void (VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment* pAttachments, uint32_t rectCount, const VkClearRect* pRects); @@ -2205,12 +2205,12 @@ typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEve typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); -typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t slot); +typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry, VkQueryControlFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry); typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); -typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); -typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); +typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void* pValues); typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); @@ -2761,7 +2761,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, - uint32_t setCount, + uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); @@ -2798,14 +2798,14 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - uint32_t count, + uint32_t drawCount, uint32_t stride); VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirect( VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - uint32_t count, + uint32_t drawCount, uint32_t stride); VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( @@ -2937,13 +2937,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t slot, + uint32_t entry, VkQueryControlFlags flags); VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t slot); + uint32_t entry); VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( VkCommandBuffer commandBuffer, @@ -2962,18 +2962,18 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, VkQueryResultFlags flags); VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants( VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values); + uint32_t offset, + uint32_t size, + const void* pValues); VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 401fd5681f6..7c00f9d46ba 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -468,7 +468,7 @@ void anv_CmdBindDescriptorSets( VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, uint32_t firstSet, - uint32_t setCount, + uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets) @@ -477,10 +477,10 @@ void anv_CmdBindDescriptorSets( ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); struct anv_descriptor_set_layout *set_layout; - assert(firstSet + setCount < MAX_SETS); + assert(firstSet + descriptorSetCount < MAX_SETS); uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < setCount; i++) { + for (uint32_t i = 0; i < descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); set_layout = layout->set[firstSet + i].layout; @@ -858,9 +858,9 @@ void anv_CmdPushConstants( VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values) + uint32_t offset, + uint32_t size, + const void* pValues) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); VkShaderStage stage; @@ -868,8 +868,8 @@ void anv_CmdPushConstants( for_each_bit(stage, stageFlags) { anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); - memcpy(cmd_buffer->state.push_constants[stage]->client_data + start, - values, length); + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); } cmd_buffer->state.push_constants_dirty |= stageFlags; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 9a3d4799d62..5f215c6c312 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -581,7 +581,7 @@ void genX(CmdDrawIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - uint32_t count, + uint32_t drawCount, uint32_t stride) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); @@ -608,7 +608,7 @@ void genX(CmdDrawIndexedIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - uint32_t count, + uint32_t drawCount, uint32_t stride) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b6523b5ecf5..c4ac28373e5 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -433,7 +433,7 @@ void genX(CmdDrawIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - uint32_t count, + uint32_t drawCount, uint32_t stride) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); @@ -558,7 +558,7 @@ void genX(CmdDrawIndexedIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - uint32_t count, + uint32_t drawCount, uint32_t stride) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); @@ -813,7 +813,7 @@ emit_ps_depth_count(struct anv_batch *batch, void genX(CmdBeginQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t slot, + uint32_t entry, VkQueryControlFlags flags) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); @@ -822,7 +822,7 @@ void genX(CmdBeginQuery)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot)); + entry * sizeof(struct anv_query_pool_slot)); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: @@ -834,7 +834,7 @@ void genX(CmdBeginQuery)( void genX(CmdEndQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t slot) + uint32_t entry) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); @@ -842,7 +842,7 @@ void genX(CmdEndQuery)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot) + 8); + entry * sizeof(struct anv_query_pool_slot) + 8); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: -- cgit v1.2.3 From e8f2294cd27fa17abf2c83ec5cf987affd662964 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 16:21:38 -0800 Subject: vk/0.210.0: Rework sampler filtering and mode enums --- include/vulkan/vulkan.h | 76 ++++++++++++++++++++++++------------------------- src/vulkan/anv_dump.c | 2 +- src/vulkan/anv_meta.c | 14 ++++----- src/vulkan/gen7_state.c | 22 +++++++------- src/vulkan/gen8_state.c | 22 +++++++------- 5 files changed, 68 insertions(+), 68 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 79133dd7628..986b86b4f43 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -647,36 +647,36 @@ typedef enum VkDynamicState { VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF } VkDynamicState; -typedef enum { - VK_TEX_FILTER_NEAREST = 0, - VK_TEX_FILTER_LINEAR = 1, - VK_TEX_FILTER_BEGIN_RANGE = VK_TEX_FILTER_NEAREST, - VK_TEX_FILTER_END_RANGE = VK_TEX_FILTER_LINEAR, - VK_TEX_FILTER_NUM = (VK_TEX_FILTER_LINEAR - VK_TEX_FILTER_NEAREST + 1), - VK_TEX_FILTER_MAX_ENUM = 0x7FFFFFFF -} VkTexFilter; - -typedef enum { - VK_TEX_MIPMAP_MODE_BASE = 0, - VK_TEX_MIPMAP_MODE_NEAREST = 1, - VK_TEX_MIPMAP_MODE_LINEAR = 2, - VK_TEX_MIPMAP_MODE_BEGIN_RANGE = VK_TEX_MIPMAP_MODE_BASE, - VK_TEX_MIPMAP_MODE_END_RANGE = VK_TEX_MIPMAP_MODE_LINEAR, - VK_TEX_MIPMAP_MODE_NUM = (VK_TEX_MIPMAP_MODE_LINEAR - VK_TEX_MIPMAP_MODE_BASE + 1), - VK_TEX_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF -} VkTexMipmapMode; - -typedef enum { - VK_TEX_ADDRESS_MODE_WRAP = 0, - VK_TEX_ADDRESS_MODE_MIRROR = 1, - VK_TEX_ADDRESS_MODE_CLAMP = 2, - VK_TEX_ADDRESS_MODE_MIRROR_ONCE = 3, - VK_TEX_ADDRESS_MODE_CLAMP_BORDER = 4, - VK_TEX_ADDRESS_BEGIN_RANGE = VK_TEX_ADDRESS_MODE_WRAP, - VK_TEX_ADDRESS_END_RANGE = VK_TEX_ADDRESS_MODE_CLAMP_BORDER, - VK_TEX_ADDRESS_NUM = (VK_TEX_ADDRESS_MODE_CLAMP_BORDER - VK_TEX_ADDRESS_MODE_WRAP + 1), - VK_TEX_ADDRESS_MAX_ENUM = 0x7FFFFFFF -} VkTexAddressMode; +typedef enum VkFilter { + VK_FILTER_NEAREST = 0, + VK_FILTER_LINEAR = 1, + VK_FILTER_BEGIN_RANGE = VK_FILTER_NEAREST, + VK_FILTER_END_RANGE = VK_FILTER_LINEAR, + VK_FILTER_RANGE_SIZE = (VK_FILTER_LINEAR - VK_FILTER_NEAREST + 1), + VK_FILTER_MAX_ENUM = 0x7FFFFFFF +} VkFilter; + +typedef enum VkSamplerMipmapMode { + VK_SAMPLER_MIPMAP_MODE_BASE = 0, + VK_SAMPLER_MIPMAP_MODE_NEAREST = 1, + VK_SAMPLER_MIPMAP_MODE_LINEAR = 2, + VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_BASE, + VK_SAMPLER_MIPMAP_MODE_END_RANGE = VK_SAMPLER_MIPMAP_MODE_LINEAR, + VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_BASE + 1), + VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerMipmapMode; + +typedef enum VkSamplerAddressMode { + VK_SAMPLER_ADDRESS_MODE_REPEAT = 0, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT = 1, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, + VK_SAMPLER_ADDRESS_MODE_BEGIN_RANGE = VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_END_RANGE = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_RANGE_SIZE = (VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE - VK_SAMPLER_ADDRESS_MODE_REPEAT + 1), + VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerAddressMode; typedef enum VkBorderColor { VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, @@ -1759,12 +1759,12 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; - VkTexFilter magFilter; - VkTexFilter minFilter; - VkTexMipmapMode mipMode; - VkTexAddressMode addressModeU; - VkTexAddressMode addressModeV; - VkTexAddressMode addressModeW; + VkFilter magFilter; + VkFilter minFilter; + VkSamplerMipmapMode mipmapMode; + VkSamplerAddressMode addressModeU; + VkSamplerAddressMode addressModeV; + VkSamplerAddressMode addressModeW; float mipLodBias; float maxAnisotropy; VkBool32 compareEnable; @@ -2192,7 +2192,7 @@ typedef void (VKAPI_PTR *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint3 typedef void (VKAPI_PTR *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); typedef void (VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); +typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter); typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData); @@ -2843,7 +2843,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, - VkTexFilter filter); + VkFilter filter); VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 52b9ca17b43..6192361e491 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -126,7 +126,7 @@ anv_dump_image_to_ppm(struct anv_device *device, extent.height, 1 }, - }, VK_TEX_FILTER_NEAREST); + }, VK_FILTER_NEAREST); ANV_CALL(CmdPipelineBarrier)(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3b4c8890b13..3f93e1d0b44 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -449,7 +449,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *dest_iview, VkOffset3D dest_offset, VkExtent3D dest_extent, - VkTexFilter blit_filter) + VkFilter blit_filter) { struct anv_device *device = cmd_buffer->device; VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; @@ -734,7 +734,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, &dest_iview, (VkOffset3D) { 0, 0, 0 }, (VkExtent3D) { width, height, 1 }, - VK_TEX_FILTER_NEAREST); + VK_FILTER_NEAREST); anv_DestroyImage(vk_device, src_image); anv_DestroyImage(vk_device, dest_image); @@ -907,7 +907,7 @@ void anv_CmdCopyImage( dest_image, &dest_iview, dest_offset, pRegions[r].extent, - VK_TEX_FILTER_NEAREST); + VK_FILTER_NEAREST); } } @@ -922,7 +922,7 @@ void anv_CmdBlitImage( VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, - VkTexFilter filter) + VkFilter filter) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); @@ -934,7 +934,7 @@ void anv_CmdBlitImage( struct anv_meta_saved_state saved_state; - anv_finishme("respect VkTexFilter"); + anv_finishme("respect VkFilter"); meta_prepare_blit(cmd_buffer, &saved_state); @@ -1141,7 +1141,7 @@ void anv_CmdCopyBufferToImage( &dest_iview, dest_offset, pRegions[r].imageExtent, - VK_TEX_FILTER_NEAREST); + VK_FILTER_NEAREST); /* Once we've done the blit, all of the actual information about * the image is embedded in the command buffer so we can just @@ -1244,7 +1244,7 @@ void anv_CmdCopyImageToBuffer( &dest_iview, (VkOffset3D) { 0, 0, 0 }, pRegions[r].imageExtent, - VK_TEX_FILTER_NEAREST); + VK_FILTER_NEAREST); /* Once we've done the blit, all of the actual information about * the image is embedded in the command buffer so we can just diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 8fff649bfea..a30de348732 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -64,22 +64,22 @@ genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, } static const uint32_t vk_to_gen_tex_filter[] = { - [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + [VK_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_FILTER_LINEAR] = MAPFILTER_LINEAR }; static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR }; static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_MODE_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MODE_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_MODE_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MODE_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_MODE_CLAMP_BORDER] = TCM_CLAMP_BORDER, + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, }; static const uint32_t vk_to_gen_compare_op[] = { @@ -134,7 +134,7 @@ VkResult genX(CreateSampler)( .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], .MagModeFilter = mag_filter, .MinModeFilter = min_filter, .TextureLODBias = pCreateInfo->mipLodBias * 256, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 3fc985d52b7..092e1ae7822 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -283,22 +283,22 @@ VkResult genX(CreateSampler)( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); static const uint32_t vk_to_gen_tex_filter[] = { - [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + [VK_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_FILTER_LINEAR] = MAPFILTER_LINEAR }; static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR }; static const uint32_t vk_to_gen_tex_address[] = { - [VK_TEX_ADDRESS_MODE_WRAP] = TCM_WRAP, - [VK_TEX_ADDRESS_MODE_MIRROR] = TCM_MIRROR, - [VK_TEX_ADDRESS_MODE_CLAMP] = TCM_CLAMP, - [VK_TEX_ADDRESS_MODE_MIRROR_ONCE] = TCM_MIRROR_ONCE, - [VK_TEX_ADDRESS_MODE_CLAMP_BORDER] = TCM_CLAMP_BORDER, + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, }; static const uint32_t vk_to_gen_compare_op[] = { @@ -329,7 +329,7 @@ VkResult genX(CreateSampler)( #if ANV_GEN == 8 .BaseMipLevel = 0.0, #endif - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], .MagModeFilter = mag_filter, .MinModeFilter = min_filter, .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), -- cgit v1.2.3 From fe644721aa122e46733f23ba1c902321fc1f64e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 16:28:36 -0800 Subject: vk/0.210.0: Rename property pCount parameters --- include/vulkan/vulkan.h | 20 ++++++++++---------- src/vulkan/anv_device.c | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 986b86b4f43..3d141124d5a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2078,16 +2078,16 @@ typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physi typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); typedef VkResult (VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); typedef void (VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pPropertyCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties); typedef void (VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); @@ -2253,7 +2253,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties( VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties( VkPhysicalDevice physicalDevice, - uint32_t* pCount, + uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties); VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( @@ -2278,22 +2278,22 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyDevice( VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( const char* pLayerName, - uint32_t* pCount, + uint32_t* pPropertyCount, VkExtensionProperties* pProperties); VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, - uint32_t* pCount, + uint32_t* pPropertyCount, VkExtensionProperties* pProperties); VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceLayerProperties( - uint32_t* pCount, + uint32_t* pPropertyCount, VkLayerProperties* pProperties); VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceLayerProperties( VkPhysicalDevice physicalDevice, - uint32_t* pCount, + uint32_t* pPropertyCount, VkLayerProperties* pProperties); VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue( diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 49d74bbfc78..dc5cbd6344d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -725,17 +725,17 @@ void anv_DestroyDevice( VkResult anv_EnumerateInstanceExtensionProperties( const char* pLayerName, - uint32_t* pCount, + uint32_t* pPropertyCount, VkExtensionProperties* pProperties) { if (pProperties == NULL) { - *pCount = ARRAY_SIZE(global_extensions); + *pPropertyCount = ARRAY_SIZE(global_extensions); return VK_SUCCESS; } - assert(*pCount >= ARRAY_SIZE(global_extensions)); + assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); - *pCount = ARRAY_SIZE(global_extensions); + *pPropertyCount = ARRAY_SIZE(global_extensions); memcpy(pProperties, global_extensions, sizeof(global_extensions)); return VK_SUCCESS; @@ -744,28 +744,28 @@ VkResult anv_EnumerateInstanceExtensionProperties( VkResult anv_EnumerateDeviceExtensionProperties( VkPhysicalDevice physicalDevice, const char* pLayerName, - uint32_t* pCount, + uint32_t* pPropertyCount, VkExtensionProperties* pProperties) { if (pProperties == NULL) { - *pCount = ARRAY_SIZE(device_extensions); + *pPropertyCount = ARRAY_SIZE(device_extensions); return VK_SUCCESS; } - assert(*pCount >= ARRAY_SIZE(device_extensions)); + assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); - *pCount = ARRAY_SIZE(device_extensions); + *pPropertyCount = ARRAY_SIZE(device_extensions); memcpy(pProperties, device_extensions, sizeof(device_extensions)); return VK_SUCCESS; } VkResult anv_EnumerateInstanceLayerProperties( - uint32_t* pCount, + uint32_t* pPropertyCount, VkLayerProperties* pProperties) { if (pProperties == NULL) { - *pCount = 0; + *pPropertyCount = 0; return VK_SUCCESS; } @@ -775,11 +775,11 @@ VkResult anv_EnumerateInstanceLayerProperties( VkResult anv_EnumerateDeviceLayerProperties( VkPhysicalDevice physicalDevice, - uint32_t* pCount, + uint32_t* pPropertyCount, VkLayerProperties* pProperties) { if (pProperties == NULL) { - *pCount = 0; + *pPropertyCount = 0; return VK_SUCCESS; } -- cgit v1.2.3 From fd53603e4259adee7ce976050412309b5a0550cb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 16:42:12 -0800 Subject: vk/0.210.0: Misc. no-op structure changes The only non-trivial change is to sparse resources that we don't handle anyway. --- include/vulkan/vulkan.h | 219 ++++++++++++++++++++++++++---------------------- src/vulkan/anv_device.c | 26 +----- 2 files changed, 123 insertions(+), 122 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 3d141124d5a..3a428ca54e6 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1302,37 +1302,37 @@ typedef struct { VkPhysicalDeviceSparseProperties sparseProperties; } VkPhysicalDeviceProperties; -typedef struct { +typedef struct VkQueueFamilyProperties { VkQueueFlags queueFlags; uint32_t queueCount; VkBool32 supportsTimestamps; } VkQueueFamilyProperties; -typedef struct { +typedef struct VkMemoryType { VkMemoryPropertyFlags propertyFlags; uint32_t heapIndex; } VkMemoryType; -typedef struct { +typedef struct VkMemoryHeap { VkDeviceSize size; VkMemoryHeapFlags flags; } VkMemoryHeap; -typedef struct { +typedef struct VkPhysicalDeviceMemoryProperties { uint32_t memoryTypeCount; VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; uint32_t memoryHeapCount; VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; } VkPhysicalDeviceMemoryProperties; -typedef struct { +typedef struct VkDeviceQueueCreateInfo { VkStructureType sType; const void* pNext; uint32_t queueFamilyIndex; uint32_t queueCount; } VkDeviceQueueCreateInfo; -typedef struct { +typedef struct VkDeviceCreateInfo { VkStructureType sType; const void* pNext; uint32_t queueRecordCount; @@ -1344,12 +1344,12 @@ typedef struct { const VkPhysicalDeviceFeatures* pEnabledFeatures; } VkDeviceCreateInfo; -typedef struct { +typedef struct VkExtensionProperties { char extName[VK_MAX_EXTENSION_NAME]; uint32_t specVersion; } VkExtensionProperties; -typedef struct { +typedef struct VkLayerProperties { char layerName[VK_MAX_EXTENSION_NAME]; uint32_t specVersion; uint32_t implVersion; @@ -1363,7 +1363,7 @@ typedef struct { uint32_t memoryTypeIndex; } VkMemoryAllocInfo; -typedef struct { +typedef struct VkMappedMemoryRange { VkStructureType sType; const void* pNext; VkDeviceMemory mem; @@ -1371,74 +1371,107 @@ typedef struct { VkDeviceSize size; } VkMappedMemoryRange; -typedef struct { +typedef struct VkMemoryRequirements { VkDeviceSize size; VkDeviceSize alignment; uint32_t memoryTypeBits; } VkMemoryRequirements; -typedef struct { - VkImageAspect aspect; +typedef struct VkSparseImageFormatProperties { + VkImageAspectFlags aspectMask; VkExtent3D imageGranularity; VkSparseImageFormatFlags flags; } VkSparseImageFormatProperties; -typedef struct { - VkSparseImageFormatProperties formatProps; - uint32_t imageMipTailStartLOD; +typedef struct VkSparseImageMemoryRequirements { + VkSparseImageFormatProperties formatProperties; + uint32_t imageMipTailStartLod; VkDeviceSize imageMipTailSize; VkDeviceSize imageMipTailOffset; VkDeviceSize imageMipTailStride; } VkSparseImageMemoryRequirements; -typedef struct { - VkDeviceSize rangeOffset; - VkDeviceSize rangeSize; - VkDeviceSize memOffset; - VkDeviceMemory mem; +typedef struct VkSparseMemoryBind { + VkDeviceSize resourceOffset; + VkDeviceSize size; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; VkSparseMemoryBindFlags flags; -} VkSparseMemoryBindInfo; +} VkSparseMemoryBind; -typedef struct { +typedef struct VkSparseBufferMemoryBindInfo { + VkBuffer buffer; + uint32_t bindCount; + const VkSparseMemoryBind* pBinds; +} VkSparseBufferMemoryBindInfo; + +typedef struct VkSparseImageOpaqueMemoryBindInfo { + VkImage image; + uint32_t bindCount; + const VkSparseMemoryBind* pBinds; +} VkSparseImageOpaqueMemoryBindInfo; + +typedef struct VkImageSubresource { VkImageAspect aspect; uint32_t mipLevel; uint32_t arrayLayer; } VkImageSubresource; -typedef struct { +typedef struct VkOffset3D { int32_t x; int32_t y; int32_t z; } VkOffset3D; -typedef struct { +typedef struct VkSparseImageMemoryBind { VkImageSubresource subresource; VkOffset3D offset; VkExtent3D extent; - VkDeviceSize memOffset; - VkDeviceMemory mem; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; VkSparseMemoryBindFlags flags; +} VkSparseImageMemoryBind; + +typedef struct VkSparseImageMemoryBindInfo { + VkImage image; + uint32_t bindCount; + const VkSparseImageMemoryBind* pBinds; } VkSparseImageMemoryBindInfo; -typedef struct { +typedef struct VkBindSparseInfo { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore* pWaitSemaphores; + uint32_t bufferBindCount; + const VkSparseBufferMemoryBindInfo* pBufferBinds; + uint32_t imageOpaqueBindCount; + const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds; + uint32_t imageBindCount; + const VkSparseImageMemoryBindInfo* pImageBinds; + uint32_t signalSemaphoreCount; + const VkSemaphore* pSignalSemaphores; +} VkBindSparseInfo; + +typedef struct VkFenceCreateInfo { VkStructureType sType; const void* pNext; VkFenceCreateFlags flags; } VkFenceCreateInfo; -typedef struct { +typedef struct VkSemaphoreCreateInfo { VkStructureType sType; const void* pNext; VkSemaphoreCreateFlags flags; } VkSemaphoreCreateInfo; -typedef struct { +typedef struct VkEventCreateInfo { VkStructureType sType; const void* pNext; VkEventCreateFlags flags; } VkEventCreateInfo; -typedef struct { +typedef struct VkQueryPoolCreateInfo { VkStructureType sType; const void* pNext; VkQueryType queryType; @@ -1446,7 +1479,7 @@ typedef struct { VkQueryPipelineStatisticFlags pipelineStatistics; } VkQueryPoolCreateInfo; -typedef struct { +typedef struct VkBufferCreateInfo { VkStructureType sType; const void* pNext; VkDeviceSize size; @@ -1457,7 +1490,7 @@ typedef struct { const uint32_t* pQueueFamilyIndices; } VkBufferCreateInfo; -typedef struct { +typedef struct VkBufferViewCreateInfo { VkStructureType sType; const void* pNext; VkBuffer buffer; @@ -1466,7 +1499,7 @@ typedef struct { VkDeviceSize range; } VkBufferViewCreateInfo; -typedef struct { +typedef struct VkImageCreateInfo { VkStructureType sType; const void* pNext; VkImageType imageType; @@ -1484,7 +1517,7 @@ typedef struct { VkImageLayout initialLayout; } VkImageCreateInfo; -typedef struct { +typedef struct VkSubresourceLayout { VkDeviceSize offset; VkDeviceSize size; VkDeviceSize rowPitch; @@ -1498,7 +1531,7 @@ typedef struct VkComponentMapping { VkComponentSwizzle a; } VkComponentMapping; -typedef struct { +typedef struct VkImageSubresourceRange { VkImageAspectFlags aspectMask; uint32_t baseMipLevel; uint32_t mipLevels; @@ -1506,7 +1539,7 @@ typedef struct { uint32_t arraySize; } VkImageSubresourceRange; -typedef struct { +typedef struct VkImageViewCreateInfo { VkStructureType sType; const void* pNext; VkImage image; @@ -1517,7 +1550,7 @@ typedef struct { VkImageViewCreateFlags flags; } VkImageViewCreateInfo; -typedef struct { +typedef struct VkShaderModuleCreateInfo { VkStructureType sType; const void* pNext; size_t codeSize; @@ -1534,7 +1567,7 @@ typedef struct { VkShaderStage stage; } VkShaderCreateInfo; -typedef struct { +typedef struct VkPipelineCacheCreateInfo { VkStructureType sType; const void* pNext; size_t initialSize; @@ -1542,20 +1575,20 @@ typedef struct { size_t maxSize; } VkPipelineCacheCreateInfo; -typedef struct { +typedef struct VkSpecializationMapEntry { uint32_t constantId; size_t size; uint32_t offset; } VkSpecializationMapEntry; -typedef struct { +typedef struct VkSpecializationInfo { uint32_t mapEntryCount; - const VkSpecializationMapEntry* pMap; + const VkSpecializationMapEntry* pMapEntries; size_t dataSize; const void* pData; } VkSpecializationInfo; -typedef struct { +typedef struct VkPipelineShaderStageCreateInfo { VkStructureType sType; const void* pNext; VkShaderStage stage; @@ -1563,20 +1596,20 @@ typedef struct { const VkSpecializationInfo* pSpecializationInfo; } VkPipelineShaderStageCreateInfo; -typedef struct { +typedef struct VkVertexInputBindingDescription { uint32_t binding; uint32_t strideInBytes; VkVertexInputRate inputRate; } VkVertexInputBindingDescription; -typedef struct { +typedef struct VkVertexInputAttributeDescription { uint32_t location; uint32_t binding; VkFormat format; uint32_t offsetInBytes; } VkVertexInputAttributeDescription; -typedef struct { +typedef struct VkPipelineVertexInputStateCreateInfo { VkStructureType sType; const void* pNext; uint32_t bindingCount; @@ -1585,20 +1618,20 @@ typedef struct { const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; } VkPipelineVertexInputStateCreateInfo; -typedef struct { +typedef struct VkPipelineInputAssemblyStateCreateInfo { VkStructureType sType; const void* pNext; VkPrimitiveTopology topology; VkBool32 primitiveRestartEnable; } VkPipelineInputAssemblyStateCreateInfo; -typedef struct { +typedef struct VkPipelineTessellationStateCreateInfo { VkStructureType sType; const void* pNext; uint32_t patchControlPoints; } VkPipelineTessellationStateCreateInfo; -typedef struct { +typedef struct VkViewport { float originX; float originY; float width; @@ -1607,22 +1640,22 @@ typedef struct { float maxDepth; } VkViewport; -typedef struct { +typedef struct VkOffset2D { int32_t x; int32_t y; } VkOffset2D; -typedef struct { +typedef struct VkExtent2D { int32_t width; int32_t height; } VkExtent2D; -typedef struct { +typedef struct VkRect2D { VkOffset2D offset; VkExtent2D extent; } VkRect2D; -typedef struct { +typedef struct VkPipelineViewportStateCreateInfo { VkStructureType sType; const void* pNext; uint32_t viewportCount; @@ -1646,7 +1679,7 @@ typedef struct { float lineWidth; } VkPipelineRasterStateCreateInfo; -typedef struct { +typedef struct VkPipelineMultisampleStateCreateInfo { VkStructureType sType; const void* pNext; uint32_t rasterSamples; @@ -1665,7 +1698,7 @@ typedef struct VkStencilOpState { uint32_t reference; } VkStencilOpState; -typedef struct { +typedef struct VkPipelineDepthStencilStateCreateInfo { VkStructureType sType; const void* pNext; VkBool32 depthTestEnable; @@ -1679,7 +1712,7 @@ typedef struct { float maxDepthBounds; } VkPipelineDepthStencilStateCreateInfo; -typedef struct { +typedef struct VkPipelineColorBlendAttachmentState { VkBool32 blendEnable; VkBlendFactor srcColorBlendFactor; VkBlendFactor dstColorBlendFactor; @@ -1690,7 +1723,7 @@ typedef struct { VkChannelFlags channelWriteMask; } VkPipelineColorBlendAttachmentState; -typedef struct { +typedef struct VkPipelineColorBlendStateCreateInfo { VkStructureType sType; const void* pNext; VkBool32 alphaToCoverageEnable; @@ -1702,14 +1735,14 @@ typedef struct { float blendConstants[4]; } VkPipelineColorBlendStateCreateInfo; -typedef struct { +typedef struct VkPipelineDynamicStateCreateInfo { VkStructureType sType; const void* pNext; uint32_t dynamicStateCount; const VkDynamicState* pDynamicStates; } VkPipelineDynamicStateCreateInfo; -typedef struct { +typedef struct VkGraphicsPipelineCreateInfo { VkStructureType sType; const void* pNext; uint32_t stageCount; @@ -1731,7 +1764,7 @@ typedef struct { int32_t basePipelineIndex; } VkGraphicsPipelineCreateInfo; -typedef struct { +typedef struct VkComputePipelineCreateInfo { VkStructureType sType; const void* pNext; VkPipelineShaderStageCreateInfo stage; @@ -1741,13 +1774,13 @@ typedef struct { int32_t basePipelineIndex; } VkComputePipelineCreateInfo; -typedef struct { +typedef struct VkPushConstantRange { VkShaderStageFlags stageFlags; - uint32_t start; - uint32_t length; + uint32_t offset; + uint32_t size; } VkPushConstantRange; -typedef struct { +typedef struct VkPipelineLayoutCreateInfo { VkStructureType sType; const void* pNext; uint32_t descriptorSetCount; @@ -1756,7 +1789,7 @@ typedef struct { const VkPushConstantRange* pPushConstantRanges; } VkPipelineLayoutCreateInfo; -typedef struct { +typedef struct VkSamplerCreateInfo { VkStructureType sType; const void* pNext; VkFilter magFilter; @@ -1775,14 +1808,14 @@ typedef struct { VkBool32 unnormalizedCoordinates; } VkSamplerCreateInfo; -typedef struct { +typedef struct VkDescriptorSetLayoutBinding { VkDescriptorType descriptorType; uint32_t arraySize; VkShaderStageFlags stageFlags; const VkSampler* pImmutableSamplers; } VkDescriptorSetLayoutBinding; -typedef struct { +typedef struct VkDescriptorSetLayoutCreateInfo { VkStructureType sType; const void* pNext; uint32_t count; @@ -1794,7 +1827,7 @@ typedef struct { uint32_t count; } VkDescriptorTypeCount; -typedef struct { +typedef struct VkDescriptorPoolCreateInfo { VkStructureType sType; const void* pNext; VkDescriptorPoolUsage poolUsage; @@ -1817,7 +1850,7 @@ typedef struct { VkDescriptorBufferInfo bufferInfo; } VkDescriptorInfo; -typedef struct { +typedef struct VkWriteDescriptorSet { VkStructureType sType; const void* pNext; VkDescriptorSet destSet; @@ -1828,7 +1861,7 @@ typedef struct { const VkDescriptorInfo* pDescriptors; } VkWriteDescriptorSet; -typedef struct { +typedef struct VkCopyDescriptorSet { VkStructureType sType; const void* pNext; VkDescriptorSet srcSet; @@ -1840,7 +1873,7 @@ typedef struct { uint32_t count; } VkCopyDescriptorSet; -typedef struct { +typedef struct VkFramebufferCreateInfo { VkStructureType sType; const void* pNext; VkRenderPass renderPass; @@ -1851,7 +1884,7 @@ typedef struct { uint32_t layers; } VkFramebufferCreateInfo; -typedef struct { +typedef struct VkAttachmentDescription { VkStructureType sType; const void* pNext; VkFormat format; @@ -1865,12 +1898,12 @@ typedef struct { VkAttachmentDescriptionFlags flags; } VkAttachmentDescription; -typedef struct { +typedef struct VkAttachmentReference { uint32_t attachment; VkImageLayout layout; } VkAttachmentReference; -typedef struct { +typedef struct VkSubpassDescription { VkStructureType sType; const void* pNext; VkPipelineBindPoint pipelineBindPoint; @@ -1885,7 +1918,7 @@ typedef struct { const VkAttachmentReference* pPreserveAttachments; } VkSubpassDescription; -typedef struct { +typedef struct VkSubpassDependency { VkStructureType sType; const void* pNext; uint32_t srcSubpass; @@ -1897,7 +1930,7 @@ typedef struct { VkBool32 byRegion; } VkSubpassDependency; -typedef struct { +typedef struct VkRenderPassCreateInfo { VkStructureType sType; const void* pNext; uint32_t attachmentCount; @@ -1908,7 +1941,7 @@ typedef struct { const VkSubpassDependency* pDependencies; } VkRenderPassCreateInfo; -typedef struct { +typedef struct VkCommandPoolCreateInfo { VkStructureType sType; const void* pNext; uint32_t queueFamilyIndex; @@ -1923,7 +1956,7 @@ typedef struct { VkCommandBufferCreateFlags flags; } VkCommandBufferCreateInfo; -typedef struct { +typedef struct VkCommandBufferBeginInfo { VkStructureType sType; const void* pNext; VkCommandBufferOptimizeFlags flags; @@ -2007,7 +2040,7 @@ typedef struct VkImageResolve { VkExtent3D extent; } VkImageResolve; -typedef struct { +typedef struct VkRenderPassBeginInfo { VkStructureType sType; const void* pNext; VkRenderPass renderPass; @@ -2017,7 +2050,7 @@ typedef struct { const VkClearValue* pClearValues; } VkRenderPassBeginInfo; -typedef struct { +typedef struct VkBufferMemoryBarrier { VkStructureType sType; const void* pNext; VkMemoryOutputFlags outputMask; @@ -2050,7 +2083,7 @@ typedef struct VkDrawIndirectCommand { uint32_t firstInstance; } VkDrawIndirectCommand; -typedef struct { +typedef struct VkImageMemoryBarrier { VkStructureType sType; const void* pNext; VkMemoryOutputFlags outputMask; @@ -2063,7 +2096,7 @@ typedef struct { VkImageSubresourceRange subresourceRange; } VkImageMemoryBarrier; -typedef struct { +typedef struct VkMemoryBarrier { VkStructureType sType; const void* pNext; VkMemoryOutputFlags outputMask; @@ -2105,9 +2138,7 @@ typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkB typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, uint32_t samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparseImageOpaqueMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); +typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); @@ -2388,23 +2419,11 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); -VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparseBufferMemory( - VkQueue queue, - VkBuffer buffer, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparseImageOpaqueMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparse( VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparseImageMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseImageMemoryBindInfo* pBindInfo); + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence); VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence( VkDevice device, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index dc5cbd6344d..f0e26433eed 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1140,29 +1140,11 @@ VkResult anv_BindImageMemory( return VK_SUCCESS; } -VkResult anv_QueueBindSparseBufferMemory( +VkResult anv_QueueBindSparse( VkQueue queue, - VkBuffer buffer, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueBindSparseImageOpaqueMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueBindSparseImageMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseImageMemoryBindInfo* pBindInfo) + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence) { stub_return(VK_UNSUPPORTED); } -- cgit v1.2.3 From e673d642090e6a6fe339570726e52b9191b89d3e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 17:00:30 -0800 Subject: vk/0.210.0: Change field names in vertex input structs --- include/vulkan/vulkan.h | 8 ++++---- src/vulkan/anv_meta.c | 14 +++++++------- src/vulkan/anv_meta_clear.c | 22 +++++++++++----------- src/vulkan/anv_pipeline.c | 4 ++-- src/vulkan/gen7_pipeline.c | 11 ++++++----- src/vulkan/gen8_pipeline.c | 14 +++++++------- 6 files changed, 37 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 3a428ca54e6..93c4047da9e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1598,7 +1598,7 @@ typedef struct VkPipelineShaderStageCreateInfo { typedef struct VkVertexInputBindingDescription { uint32_t binding; - uint32_t strideInBytes; + uint32_t stride; VkVertexInputRate inputRate; } VkVertexInputBindingDescription; @@ -1606,15 +1606,15 @@ typedef struct VkVertexInputAttributeDescription { uint32_t location; uint32_t binding; VkFormat format; - uint32_t offsetInBytes; + uint32_t offset; } VkVertexInputAttributeDescription; typedef struct VkPipelineVertexInputStateCreateInfo { VkStructureType sType; const void* pNext; - uint32_t bindingCount; + uint32_t vertexBindingDescriptionCount; const VkVertexInputBindingDescription* pVertexBindingDescriptions; - uint32_t attributeCount; + uint32_t vertexAttributeDescriptionCount; const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; } VkPipelineVertexInputStateCreateInfo; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3f93e1d0b44..f28775e0f7a 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -264,41 +264,41 @@ anv_device_init_meta_blit_state(struct anv_device *device) VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 2, + .vertexBindingDescriptionCount = 2, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { .binding = 0, - .strideInBytes = 0, + .stride = 0, .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, { .binding = 1, - .strideInBytes = 5 * sizeof(float), + .stride = 5 * sizeof(float), .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, - .attributeCount = 3, + .vertexAttributeDescriptionCount = 3, .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { { /* VUE Header */ .location = 0, .binding = 0, .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = 0 + .offset = 0 }, { /* Position */ .location = 1, .binding = 1, .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = 0 + .offset = 0 }, { /* Texture Coordinate */ .location = 2, .binding = 1, .format = VK_FORMAT_R32G32B32_SFLOAT, - .offsetInBytes = 8 + .offset = 8 } } }; diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index d1f1d95a3aa..9f3085005f4 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -241,36 +241,36 @@ init_color_pipeline(struct anv_device *device) const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 1, + .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { .binding = 0, - .strideInBytes = sizeof(struct color_clear_vattrs), + .stride = sizeof(struct color_clear_vattrs), .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, - .attributeCount = 3, + .vertexAttributeDescriptionCount = 3, .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { { /* VUE Header */ .location = 0, .binding = 0, .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = offsetof(struct color_clear_vattrs, vue_header), + .offset = offsetof(struct color_clear_vattrs, vue_header), }, { /* Position */ .location = 1, .binding = 0, .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = offsetof(struct color_clear_vattrs, position), + .offset = offsetof(struct color_clear_vattrs, position), }, { /* Color */ .location = 2, .binding = 0, .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offsetInBytes = offsetof(struct color_clear_vattrs, color), + .offset = offsetof(struct color_clear_vattrs, color), }, }, }; @@ -424,29 +424,29 @@ create_depthstencil_pipeline(struct anv_device *device, const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .bindingCount = 1, + .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { { .binding = 0, - .strideInBytes = sizeof(struct depthstencil_clear_vattrs), + .stride = sizeof(struct depthstencil_clear_vattrs), .inputRate = VK_VERTEX_INPUT_RATE_VERTEX }, }, - .attributeCount = 2, + .vertexAttributeDescriptionCount = 2, .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { { /* VUE Header */ .location = 0, .binding = 0, .format = VK_FORMAT_R32G32B32A32_UINT, - .offsetInBytes = offsetof(struct depthstencil_clear_vattrs, vue_header), + .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), }, { /* Position */ .location = 1, .binding = 0, .format = VK_FORMAT_R32G32_SFLOAT, - .offsetInBytes = offsetof(struct depthstencil_clear_vattrs, position), + .offset = offsetof(struct depthstencil_clear_vattrs, position), }, }, }; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index e4209bd13c9..41a26c70d85 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1034,12 +1034,12 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; pipeline->vb_used = 0; - for (uint32_t i = 0; i < vi_info->bindingCount; i++) { + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { const VkVertexInputBindingDescription *desc = &vi_info->pVertexBindingDescriptions[i]; pipeline->vb_used |= 1 << desc->binding; - pipeline->binding_stride[desc->binding] = desc->strideInBytes; + pipeline->binding_stride[desc->binding] = desc->stride; /* Step rate is programmed per vertex element (attribute), not * binding. Set up a map of which bindings step per instance, for diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 2f51b4bfc73..f63ca4dcb4f 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -38,17 +38,18 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, { const bool sgvs = pipeline->vs_prog_data.uses_vertexid || pipeline->vs_prog_data.uses_instanceid; - const uint32_t element_count = info->attributeCount + (sgvs ? 1 : 0); + const uint32_t element_count = + info->vertexAttributeDescriptionCount + (sgvs ? 1 : 0); const uint32_t num_dwords = 1 + element_count * 2; uint32_t *p; - if (info->attributeCount == 0 && !sgvs) + if (info->vertexAttributeDescriptionCount == 0 && !sgvs) return; p = anv_batch_emitn(&pipeline->batch, num_dwords, GEN7_3DSTATE_VERTEX_ELEMENTS); - for (uint32_t i = 0; i < info->attributeCount; i++) { + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; const struct anv_format *format = anv_format_for_vk_format(desc->format); @@ -58,7 +59,7 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, .Valid = true, .SourceElementFormat = format->surface_format, .EdgeFlagEnable = false, - .SourceElementOffset = desc->offsetInBytes, + .SourceElementOffset = desc->offset, .Component0Control = VFCOMP_STORE_SRC, .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, @@ -77,7 +78,7 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, .Component2Control = VFCOMP_STORE_VID, .Component3Control = VFCOMP_STORE_IID }; - GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + info->attributeCount * 2], &element); + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + info->vertexAttributeDescriptionCount * 2], &element); } } diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 16143300cf1..9bf7d2d555e 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -36,17 +36,17 @@ static void emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info) { - const uint32_t num_dwords = 1 + info->attributeCount * 2; + const uint32_t num_dwords = 1 + info->vertexAttributeDescriptionCount * 2; uint32_t *p; static_assert(ANV_GEN >= 8, "should be compiling this for gen < 8"); - if (info->attributeCount > 0) { + if (info->vertexAttributeDescriptionCount > 0) { p = anv_batch_emitn(&pipeline->batch, num_dwords, GENX(3DSTATE_VERTEX_ELEMENTS)); } - for (uint32_t i = 0; i < info->attributeCount; i++) { + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; const struct anv_format *format = anv_format_for_vk_format(desc->format); @@ -56,7 +56,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, .Valid = true, .SourceElementFormat = format->surface_format, .EdgeFlagEnable = false, - .SourceElementOffset = desc->offsetInBytes, + .SourceElementOffset = desc->offset, .Component0Control = VFCOMP_STORE_SRC, .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, @@ -75,10 +75,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, .VertexIDComponentNumber = 2, - .VertexIDElementOffset = info->bindingCount, + .VertexIDElementOffset = info->vertexBindingDescriptionCount, .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = info->bindingCount); + .InstanceIDElementOffset = info->vertexBindingDescriptionCount); } static void @@ -355,7 +355,7 @@ genX(graphics_pipeline_create)( * inputs. */ if (pipeline->vs_simd8 == NO_KERNEL) { pipeline->wm_prog_data.num_varying_inputs = - pCreateInfo->pVertexInputState->attributeCount - 2; + pCreateInfo->pVertexInputState->vertexAttributeDescriptionCount - 2; } assert(pCreateInfo->pVertexInputState); -- cgit v1.2.3 From 9fa6e328ebac4616148a23a7a4fbe7ed25ec315f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 17:20:49 -0800 Subject: vk/0.210.0: Move alphaToOne and alphaToCoverate to multisample state --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/anv_meta_clear.c | 6 ++---- src/vulkan/anv_pipeline.c | 3 ++- src/vulkan/gen7_pipeline.c | 8 +++++--- src/vulkan/gen8_pipeline.c | 10 ++++++---- 5 files changed, 17 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 6930dc7d9ed..34470999bbf 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1703,6 +1703,8 @@ typedef struct VkPipelineMultisampleStateCreateInfo { VkBool32 sampleShadingEnable; float minSampleShading; const VkSampleMask* pSampleMask; + VkBool32 alphaToCoverageEnable; + VkBool32 alphaToOneEnable; } VkPipelineMultisampleStateCreateInfo; typedef struct VkStencilOpState { @@ -1745,8 +1747,6 @@ typedef struct VkPipelineColorBlendStateCreateInfo { VkStructureType sType; const void* pNext; VkPipelineColorBlendStateCreateFlags flags; - VkBool32 alphaToCoverageEnable; - VkBool32 alphaToOneEnable; VkBool32 logicOpEnable; VkLogicOp logicOp; uint32_t attachmentCount; diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 9f3085005f4..1403d7d0c3d 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -188,6 +188,8 @@ create_pipeline(struct anv_device *device, .rasterSamples = 1, /* FINISHME: Multisampling */ .sampleShadingEnable = false, .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, }, .pDepthStencilState = ds_state, .pColorBlendState = cb_state, @@ -285,8 +287,6 @@ init_color_pipeline(struct anv_device *device) const VkPipelineColorBlendStateCreateInfo cb_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, .logicOpEnable = false, .attachmentCount = 1, .pAttachments = (VkPipelineColorBlendAttachmentState []) { @@ -469,8 +469,6 @@ create_depthstencil_pipeline(struct anv_device *device, const VkPipelineColorBlendStateCreateInfo cb_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, .logicOpEnable = false, .attachmentCount = 0, .pAttachments = NULL, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 41a26c70d85..47c62754a8a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -311,7 +311,8 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; key->replicate_alpha = key->nr_color_regions > 1 && - info->pColorBlendState->alphaToCoverageEnable; + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) { /* We should probably pull this out of the shader, but it's fairly diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index f63ca4dcb4f..872025c7367 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -248,7 +248,8 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, static void gen7_emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info) + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) { struct anv_device *device = pipeline->device; @@ -283,7 +284,7 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .AlphaToCoverageEnable = info->alphaToCoverageEnable, + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, # if 0 bool AlphaToOneEnable; @@ -355,7 +356,8 @@ genX(graphics_pipeline_create)( gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS, .StatisticsEnable = true); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 9bf7d2d555e..8abd29c71e8 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -148,7 +148,8 @@ emit_rs_state(struct anv_pipeline *pipeline, static void emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info) + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) { struct anv_device *device = pipeline->device; @@ -206,8 +207,8 @@ emit_cb_state(struct anv_pipeline *pipeline, anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); struct GENX(BLEND_STATE) blend_state = { - .AlphaToCoverageEnable = info->alphaToCoverageEnable, - .AlphaToOneEnable = info->alphaToOneEnable, + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, }; for (uint32_t i = 0; i < info->attachmentCount; i++) { @@ -365,7 +366,8 @@ genX(graphics_pipeline_create)( assert(pCreateInfo->pRasterState); emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), .StatisticsEnable = true); -- cgit v1.2.3 From 5f348bd0e59e545d10dc1cab8495ddc0d3e72fea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 17:26:32 -0800 Subject: vk/0.210.0: Rename origin fields of VkViewport --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/anv_meta.c | 4 ++-- src/vulkan/anv_meta_clear.c | 8 ++++---- src/vulkan/gen8_cmd_buffer.c | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 34470999bbf..49241de1863 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1646,8 +1646,8 @@ typedef struct VkPipelineTessellationStateCreateInfo { } VkPipelineTessellationStateCreateInfo; typedef struct VkViewport { - float originX; - float originY; + float x; + float y; float width; float height; float minDepth; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index f28775e0f7a..7a9d631e876 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -601,8 +601,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 1, &(VkViewport) { - .originX = 0.0f, - .originY = 0.0f, + .x = 0.0f, + .y = 0.0f, .width = dest_iview->extent.width, .height = dest_iview->extent.height, .minDepth = 0.0f, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 1403d7d0c3d..091f7632e97 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -355,8 +355,8 @@ emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CmdSetViewport)(cmd_buffer_h, 1, (VkViewport[]) { { - .originX = 0, - .originY = 0, + .x = 0, + .y = 0, .width = fb->width, .height = fb->height, .minDepth = 0.0, @@ -523,8 +523,8 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CmdSetViewport)(cmd_buffer_h, 1, (VkViewport[]) { { - .originX = 0, - .originY = 0, + .x = 0, + .y = 0, .width = fb->width, .height = fb->height, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index c4ac28373e5..bdccee8a7b7 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -85,17 +85,17 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, .ViewportMatrixElementm00 = vp->width / 2, .ViewportMatrixElementm11 = vp->height / 2, .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, - .ViewportMatrixElementm30 = vp->originX + vp->width / 2, - .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, .XMinClipGuardband = -1.0f, .XMaxClipGuardband = 1.0f, .YMinClipGuardband = -1.0f, .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->originX, - .XMaxViewPort = vp->originX + vp->width - 1, - .YMinViewPort = vp->originY, - .YMaxViewPort = vp->originY + vp->height - 1, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = vp->y, + .YMaxViewPort = vp->y + vp->height - 1, }; struct GENX(CC_VIEWPORT) cc_viewport = { @@ -126,8 +126,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) /* If viewport count is 0, this is taken to mean "use the default" */ emit_viewport_state(cmd_buffer, 1, &(VkViewport) { - .originX = 0.0f, - .originY = 0.0f, + .x = 0.0f, + .y = 0.0f, .width = cmd_buffer->state.framebuffer->width, .height = cmd_buffer->state.framebuffer->height, .minDepth = 0.0f, -- cgit v1.2.3 From 17486b86644688c4f295d189b354a6ea1d6a1471 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 17:46:38 -0800 Subject: vk/0.210.0: More fun with flags fields --- include/vulkan/vulkan.h | 75 +++++++++++++++++++++++++++++--------------- src/vulkan/anv_batch_chain.c | 4 +-- src/vulkan/anv_cmd_buffer.c | 4 +-- src/vulkan/anv_dump.c | 2 +- src/vulkan/anv_private.h | 2 +- 5 files changed, 55 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index afe54d9814a..28e9b3283ce 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -791,6 +791,7 @@ typedef enum VkSubpassContents { VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF } VkSubpassContents; +typedef VkFlags VkInstanceCreateFlags; typedef enum VkFormatFeatureFlagBits { VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, @@ -858,10 +859,12 @@ typedef enum { } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; -typedef enum { +typedef enum VkMemoryHeapFlagBits { VK_MEMORY_HEAP_HOST_LOCAL_BIT = 0x00000001, } VkMemoryHeapFlagBits; typedef VkFlags VkMemoryHeapFlags; +typedef VkFlags VkDeviceCreateFlags; +typedef VkFlags VkDeviceQueueCreateFlags; typedef VkFlags VkMemoryMapFlags; typedef enum { @@ -882,6 +885,7 @@ typedef enum VkFenceCreateFlagBits { typedef VkFlags VkFenceCreateFlags; typedef VkFlags VkSemaphoreCreateFlags; typedef VkFlags VkEventCreateFlags; +typedef VkFlags VkQueryPoolCreateFlags; typedef enum VkQueryPipelineStatisticFlagBits { VK_QUERY_PIPELINE_STATISTIC_IA_VERTICES_BIT = 0x00000001, @@ -907,6 +911,13 @@ typedef enum VkQueryResultFlagBits { } VkQueryResultFlagBits; typedef VkFlags VkQueryResultFlags; +typedef enum VkBufferCreateFlagBits { + VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, +} VkBufferCreateFlagBits; +typedef VkFlags VkBufferCreateFlags; + typedef enum VkBufferUsageFlagBits { VK_BUFFER_USAGE_TRANSFER_SRC_BIT = 0x00000001, VK_BUFFER_USAGE_TRANSFER_DST_BIT = 0x00000002, @@ -919,13 +930,7 @@ typedef enum VkBufferUsageFlagBits { VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100, } VkBufferUsageFlagBits; typedef VkFlags VkBufferUsageFlags; - -typedef enum { - VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, - VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, - VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, -} VkBufferCreateFlagBits; -typedef VkFlags VkBufferCreateFlags; +typedef VkFlags VkBufferViewCreateFlags; typedef enum { VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, @@ -941,6 +946,7 @@ typedef enum { } VkImageViewCreateFlagBits; typedef VkFlags VkImageViewCreateFlags; typedef VkFlags VkShaderModuleCreateFlags; +typedef VkFlags VkPipelineCacheCreateFlags; typedef VkFlags VkShaderCreateFlags; typedef enum { @@ -986,16 +992,23 @@ typedef VkFlags VkPipelineMultisampleStateCreateFlags; typedef VkFlags VkPipelineDepthStencilStateCreateFlags; typedef VkFlags VkPipelineColorBlendStateCreateFlags; typedef VkFlags VkPipelineDynamicStateCreateFlags; +typedef VkFlags VkPipelineLayoutCreateFlags; typedef VkFlags VkShaderStageFlags; +typedef VkFlags VkSamplerCreateFlags; +typedef VkFlags VkDescriptorSetLayoutCreateFlags; + +typedef enum VkDescriptorPoolCreateFlagBits { + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001, +} VkDescriptorPoolCreateFlagBits; +typedef VkFlags VkDescriptorPoolCreateFlags; +typedef VkFlags VkDescriptorPoolResetFlags; +typedef VkFlags VkFramebufferCreateFlags; +typedef VkFlags VkRenderPassCreateFlags; typedef enum VkAttachmentDescriptionFlagBits { VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, } VkAttachmentDescriptionFlagBits; typedef VkFlags VkAttachmentDescriptionFlags; - -typedef enum { - VK_SUBPASS_DESCRIPTION_NO_OVERDRAW_BIT = 0x00000001, -} VkSubpassDescriptionFlagBits; typedef VkFlags VkSubpassDescriptionFlags; typedef enum VkPipelineStageFlagBits { @@ -1053,14 +1066,12 @@ typedef enum VkCommandPoolResetFlagBits { typedef VkFlags VkCommandPoolResetFlags; typedef VkFlags VkCommandBufferCreateFlags; -typedef enum { - VK_CMD_BUFFER_OPTIMIZE_SMALL_BATCH_BIT = 0x00000001, - VK_CMD_BUFFER_OPTIMIZE_PIPELINE_SWITCH_BIT = 0x00000002, - VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT = 0x00000004, - VK_CMD_BUFFER_OPTIMIZE_DESCRIPTOR_SET_SWITCH_BIT = 0x00000008, - VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT = 0x00000010, -} VkCommandBufferOptimizeFlagBits; -typedef VkFlags VkCommandBufferOptimizeFlags; +typedef enum VkCommandBufferUsageFlagBits { + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT = 0x00000001, + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT = 0x00000002, + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT = 0x00000004, +} VkCommandBufferUsageFlagBits; +typedef VkFlags VkCommandBufferUsageFlags; typedef enum VkCommandBufferResetFlagBits { VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, @@ -1110,6 +1121,7 @@ typedef struct { typedef struct { VkStructureType sType; const void* pNext; + VkInstanceCreateFlags flags; const VkApplicationInfo* pAppInfo; const VkAllocCallbacks* pAllocCb; uint32_t layerCount; @@ -1338,6 +1350,7 @@ typedef struct VkPhysicalDeviceMemoryProperties { typedef struct VkDeviceQueueCreateInfo { VkStructureType sType; const void* pNext; + VkDeviceQueueCreateFlags flags; uint32_t queueFamilyIndex; uint32_t queueCount; } VkDeviceQueueCreateInfo; @@ -1345,6 +1358,7 @@ typedef struct VkDeviceQueueCreateInfo { typedef struct VkDeviceCreateInfo { VkStructureType sType; const void* pNext; + VkDeviceCreateFlags flags; uint32_t queueRecordCount; const VkDeviceQueueCreateInfo* pRequestedQueues; uint32_t layerCount; @@ -1484,6 +1498,7 @@ typedef struct VkEventCreateInfo { typedef struct VkQueryPoolCreateInfo { VkStructureType sType; const void* pNext; + VkQueryPoolCreateFlags flags; VkQueryType queryType; uint32_t slots; VkQueryPipelineStatisticFlags pipelineStatistics; @@ -1492,9 +1507,9 @@ typedef struct VkQueryPoolCreateInfo { typedef struct VkBufferCreateInfo { VkStructureType sType; const void* pNext; + VkBufferCreateFlags flags; VkDeviceSize size; VkBufferUsageFlags usage; - VkBufferCreateFlags flags; VkSharingMode sharingMode; uint32_t queueFamilyCount; const uint32_t* pQueueFamilyIndices; @@ -1503,6 +1518,7 @@ typedef struct VkBufferCreateInfo { typedef struct VkBufferViewCreateInfo { VkStructureType sType; const void* pNext; + VkBufferViewCreateFlags flags; VkBuffer buffer; VkFormat format; VkDeviceSize offset; @@ -1512,6 +1528,7 @@ typedef struct VkBufferViewCreateInfo { typedef struct VkImageCreateInfo { VkStructureType sType; const void* pNext; + VkImageCreateFlags flags; VkImageType imageType; VkFormat format; VkExtent3D extent; @@ -1520,7 +1537,6 @@ typedef struct VkImageCreateInfo { uint32_t samples; VkImageTiling tiling; VkImageUsageFlags usage; - VkImageCreateFlags flags; VkSharingMode sharingMode; uint32_t queueFamilyCount; const uint32_t* pQueueFamilyIndices; @@ -1552,12 +1568,12 @@ typedef struct VkImageSubresourceRange { typedef struct VkImageViewCreateInfo { VkStructureType sType; const void* pNext; + VkImageViewCreateFlags flags; VkImage image; VkImageViewType viewType; VkFormat format; VkComponentMapping components; VkImageSubresourceRange subresourceRange; - VkImageViewCreateFlags flags; } VkImageViewCreateInfo; typedef struct VkShaderModuleCreateInfo { @@ -1580,6 +1596,7 @@ typedef struct { typedef struct VkPipelineCacheCreateInfo { VkStructureType sType; const void* pNext; + VkPipelineCacheCreateFlags flags; size_t initialSize; const void* initialData; size_t maxSize; @@ -1803,6 +1820,7 @@ typedef struct VkPushConstantRange { typedef struct VkPipelineLayoutCreateInfo { VkStructureType sType; const void* pNext; + VkPipelineLayoutCreateFlags flags; uint32_t descriptorSetCount; const VkDescriptorSetLayout* pSetLayouts; uint32_t pushConstantRangeCount; @@ -1812,6 +1830,7 @@ typedef struct VkPipelineLayoutCreateInfo { typedef struct VkSamplerCreateInfo { VkStructureType sType; const void* pNext; + VkSamplerCreateFlags flags; VkFilter magFilter; VkFilter minFilter; VkSamplerMipmapMode mipmapMode; @@ -1838,6 +1857,7 @@ typedef struct VkDescriptorSetLayoutBinding { typedef struct VkDescriptorSetLayoutCreateInfo { VkStructureType sType; const void* pNext; + VkDescriptorSetLayoutCreateFlags flags; uint32_t count; const VkDescriptorSetLayoutBinding* pBinding; } VkDescriptorSetLayoutCreateInfo; @@ -1850,6 +1870,7 @@ typedef struct { typedef struct VkDescriptorPoolCreateInfo { VkStructureType sType; const void* pNext; + VkDescriptorPoolCreateFlags flags; VkDescriptorPoolUsage poolUsage; uint32_t maxSets; uint32_t count; @@ -1896,6 +1917,7 @@ typedef struct VkCopyDescriptorSet { typedef struct VkFramebufferCreateInfo { VkStructureType sType; const void* pNext; + VkFramebufferCreateFlags flags; VkRenderPass renderPass; uint32_t attachmentCount; const VkImageView* pAttachments; @@ -1907,6 +1929,7 @@ typedef struct VkFramebufferCreateInfo { typedef struct VkAttachmentDescription { VkStructureType sType; const void* pNext; + VkAttachmentDescriptionFlags flags; VkFormat format; uint32_t samples; VkAttachmentLoadOp loadOp; @@ -1915,7 +1938,6 @@ typedef struct VkAttachmentDescription { VkAttachmentStoreOp stencilStoreOp; VkImageLayout initialLayout; VkImageLayout finalLayout; - VkAttachmentDescriptionFlags flags; } VkAttachmentDescription; typedef struct VkAttachmentReference { @@ -1926,8 +1948,8 @@ typedef struct VkAttachmentReference { typedef struct VkSubpassDescription { VkStructureType sType; const void* pNext; - VkPipelineBindPoint pipelineBindPoint; VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; uint32_t inputCount; const VkAttachmentReference* pInputAttachments; uint32_t colorCount; @@ -1953,6 +1975,7 @@ typedef struct VkSubpassDependency { typedef struct VkRenderPassCreateInfo { VkStructureType sType; const void* pNext; + VkRenderPassCreateFlags flags; uint32_t attachmentCount; const VkAttachmentDescription* pAttachments; uint32_t subpassCount; @@ -1979,7 +2002,7 @@ typedef struct { typedef struct VkCommandBufferBeginInfo { VkStructureType sType; const void* pNext; - VkCommandBufferOptimizeFlags flags; + VkCommandBufferUsageFlags flags; VkRenderPass renderPass; uint32_t subpass; VkFramebuffer framebuffer; diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 62189afec2f..34d7268631d 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -667,8 +667,8 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * probably better of simply copying it into our batch. */ cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; - } else if (cmd_buffer->opt_flags & - VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { + } else if (!(cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; /* When we chain, we need to add an MI_BATCH_BUFFER_START command diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 7c00f9d46ba..adba2072c51 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -188,7 +188,7 @@ VkResult anv_CreateCommandBuffer( &device->dynamic_state_block_pool); cmd_buffer->level = pCreateInfo->level; - cmd_buffer->opt_flags = 0; + cmd_buffer->usage_flags = 0; anv_cmd_state_init(&cmd_buffer->state); @@ -265,7 +265,7 @@ VkResult anv_BeginCommandBuffer( anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - cmd_buffer->opt_flags = pBeginInfo->flags; + cmd_buffer->usage_flags = pBeginInfo->flags; if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { cmd_buffer->state.framebuffer = diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 6192361e491..b2d0b385d0a 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -94,7 +94,7 @@ anv_dump_image_to_ppm(struct anv_device *device, result = anv_BeginCommandBuffer(cmd, &(VkCommandBufferBeginInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }); assert(result == VK_SUCCESS); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c86003f5823..6130cb97b75 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1044,7 +1044,7 @@ struct anv_cmd_buffer { struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; - VkCommandBufferOptimizeFlags opt_flags; + VkCommandBufferUsageFlags usage_flags; VkCommandBufferLevel level; struct anv_cmd_state state; -- cgit v1.2.3 From 1ab9f843bc40c54daeb94676131e8323d4f2359f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 17:54:51 -0800 Subject: vk/0.210.0: Don't allow chaining of description structs --- include/vulkan/vulkan.h | 6 ------ src/vulkan/anv_meta.c | 2 -- src/vulkan/anv_meta_clear.c | 2 -- 3 files changed, 10 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 28e9b3283ce..2c0c5a53c49 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1927,8 +1927,6 @@ typedef struct VkFramebufferCreateInfo { } VkFramebufferCreateInfo; typedef struct VkAttachmentDescription { - VkStructureType sType; - const void* pNext; VkAttachmentDescriptionFlags flags; VkFormat format; uint32_t samples; @@ -1946,8 +1944,6 @@ typedef struct VkAttachmentReference { } VkAttachmentReference; typedef struct VkSubpassDescription { - VkStructureType sType; - const void* pNext; VkSubpassDescriptionFlags flags; VkPipelineBindPoint pipelineBindPoint; uint32_t inputCount; @@ -1961,8 +1957,6 @@ typedef struct VkSubpassDescription { } VkSubpassDescription; typedef struct VkSubpassDependency { - VkStructureType sType; - const void* pNext; uint32_t srcSubpass; uint32_t destSubpass; VkPipelineStageFlags srcStageMask; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 7a9d631e876..877143cd7a0 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -190,7 +190,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, @@ -199,7 +198,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputCount = 0, .colorCount = 1, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 091f7632e97..99004fd64ca 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -728,7 +728,6 @@ void anv_CmdClearColorImage( .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, .format = iview.format->vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, @@ -737,7 +736,6 @@ void anv_CmdClearColorImage( }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputCount = 0, .colorCount = 1, -- cgit v1.2.3 From 7f2284063df569df40f68074833f1518005d348d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 18:05:00 -0800 Subject: vk/0.210.0: s/raster/rasterization/ --- include/vulkan/vulkan.h | 10 +++++----- src/vulkan/anv_meta.c | 6 +++--- src/vulkan/anv_meta_clear.c | 6 +++--- src/vulkan/anv_pipeline.c | 21 +++++++++++---------- src/vulkan/gen7_pipeline.c | 9 +++++---- src/vulkan/gen8_pipeline.c | 6 +++--- 6 files changed, 30 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2c0c5a53c49..93c9bb488e7 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -151,7 +151,7 @@ typedef enum { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 17, VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 18, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 19, - VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO = 20, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 20, VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 21, VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 22, VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 23, @@ -1696,7 +1696,7 @@ typedef struct VkPipelineViewportStateCreateInfo { const VkRect2D* pScissors; } VkPipelineViewportStateCreateInfo; -typedef struct { +typedef struct VkPipelineRasterizationStateCreateInfo { VkStructureType sType; const void* pNext; VkPipelineRasterizationStateCreateFlags flags; @@ -1710,13 +1710,13 @@ typedef struct { float depthBiasClamp; float depthBiasSlopeFactor; float lineWidth; -} VkPipelineRasterStateCreateInfo; +} VkPipelineRasterizationStateCreateInfo; typedef struct VkPipelineMultisampleStateCreateInfo { VkStructureType sType; const void* pNext; VkPipelineMultisampleStateCreateFlags flags; - uint32_t rasterSamples; + VkSampleCountFlagBits rasterizationSamples; VkBool32 sampleShadingEnable; float minSampleShading; const VkSampleMask* pSampleMask; @@ -1789,7 +1789,7 @@ typedef struct VkGraphicsPipelineCreateInfo { const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; const VkPipelineTessellationStateCreateInfo* pTessellationState; const VkPipelineViewportStateCreateInfo* pViewportState; - const VkPipelineRasterStateCreateInfo* pRasterState; + const VkPipelineRasterizationStateCreateInfo* pRasterizationState; const VkPipelineMultisampleStateCreateInfo* pMultisampleState; const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; const VkPipelineColorBlendStateCreateInfo* pColorBlendState; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 877143cd7a0..f9b40004b23 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -353,8 +353,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) .viewportCount = 1, .scissorCount = 1, }, - .pRasterState = &(VkPipelineRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .depthClipEnable = true, .rasterizerDiscardEnable = false, .polygonMode = VK_POLYGON_MODE_FILL, @@ -363,7 +363,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterSamples = 1, + .rasterizationSamples = 1, .sampleShadingEnable = false, .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, }, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 99004fd64ca..010323180e8 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -173,8 +173,8 @@ create_pipeline(struct anv_device *device, .scissorCount = 1, .pScissors = NULL, /* dynamic */ }, - .pRasterState = &(VkPipelineRasterStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .depthClipEnable = false, .rasterizerDiscardEnable = false, .polygonMode = VK_POLYGON_MODE_FILL, @@ -185,7 +185,7 @@ create_pipeline(struct anv_device *device, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterSamples = 1, /* FINISHME: Multisampling */ + .rasterizationSamples = 1, /* FINISHME: Multisampling */ .sampleShadingEnable = false, .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, .alphaToCoverageEnable = false, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 47c62754a8a..25188a86486 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -314,13 +314,13 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, info->pMultisampleState && info->pMultisampleState->alphaToCoverageEnable; - if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) { + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { /* We should probably pull this out of the shader, but it's fairly * harmless to compute it and then let dead-code take care of it. */ key->persample_shading = info->pMultisampleState->sampleShadingEnable; if (key->persample_shading) - key->persample_2x = info->pMultisampleState->rasterSamples == 2; + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; @@ -844,17 +844,18 @@ anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, } if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { - assert(pCreateInfo->pRasterState); - dynamic->line_width = pCreateInfo->pRasterState->lineWidth; + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; } if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { - assert(pCreateInfo->pRasterState); + assert(pCreateInfo->pRasterizationState); dynamic->depth_bias.bias = - pCreateInfo->pRasterState->depthBiasConstantFactor; - dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp; + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; dynamic->depth_bias.slope = - pCreateInfo->pRasterState->depthBiasSlopeFactor; + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; } if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { @@ -935,7 +936,7 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) assert(info->pVertexInputState); assert(info->pInputAssemblyState); assert(info->pViewportState); - assert(info->pRasterState); + assert(info->pRasterizationState); assert(info->pMultisampleState); if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) @@ -987,7 +988,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, if (pCreateInfo->pTessellationState) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); if (pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->rasterSamples > 1) + pCreateInfo->pMultisampleState->rasterizationSamples > 1) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 872025c7367..9126eb9b363 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -102,7 +102,7 @@ static const uint32_t vk_to_gen_front_face[] = { static void gen7_emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterStateCreateInfo *info, + const VkPipelineRasterizationStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { struct GEN7_3DSTATE_SF sf = { @@ -351,8 +351,8 @@ genX(graphics_pipeline_create)( assert(pCreateInfo->pVertexInputState); gen7_emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); - assert(pCreateInfo->pRasterState); - gen7_emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + assert(pCreateInfo->pRasterizationState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); @@ -391,7 +391,8 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); - const VkPipelineRasterStateCreateInfo *rs_info = pCreateInfo->pRasterState; + const VkPipelineRasterizationStateCreateInfo *rs_info = + pCreateInfo->pRasterizationState; anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 8abd29c71e8..0adb0c39c03 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -92,7 +92,7 @@ emit_ia_state(struct anv_pipeline *pipeline, static void emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterStateCreateInfo *info, + const VkPipelineRasterizationStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { static const uint32_t vk_to_gen_cullmode[] = { @@ -363,8 +363,8 @@ genX(graphics_pipeline_create)( emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); assert(pCreateInfo->pInputAssemblyState); emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); - assert(pCreateInfo->pRasterState); - emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + assert(pCreateInfo->pRasterizationState); + emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); emit_cb_state(pipeline, pCreateInfo->pColorBlendState, pCreateInfo->pMultisampleState); -- cgit v1.2.3 From 4cf0b57bbfd5526e63c63d3a6fa6b4cadd128a98 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 18:12:55 -0800 Subject: vk/0.210.0: Rename ChannelFlags to ColorComponentFlags --- include/vulkan/vulkan.h | 18 +++++++++--------- src/vulkan/anv_meta.c | 7 +++++-- src/vulkan/anv_meta_clear.c | 8 ++++---- src/vulkan/gen7_pipeline.c | 8 ++++---- src/vulkan/gen8_pipeline.c | 8 ++++---- 5 files changed, 26 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 93c9bb488e7..921c3bca830 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -949,14 +949,6 @@ typedef VkFlags VkShaderModuleCreateFlags; typedef VkFlags VkPipelineCacheCreateFlags; typedef VkFlags VkShaderCreateFlags; -typedef enum { - VK_CHANNEL_R_BIT = 0x00000001, - VK_CHANNEL_G_BIT = 0x00000002, - VK_CHANNEL_B_BIT = 0x00000004, - VK_CHANNEL_A_BIT = 0x00000008, -} VkChannelFlagBits; -typedef VkFlags VkChannelFlags; - typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, @@ -991,6 +983,14 @@ typedef VkFlags VkCullModeFlags; typedef VkFlags VkPipelineMultisampleStateCreateFlags; typedef VkFlags VkPipelineDepthStencilStateCreateFlags; typedef VkFlags VkPipelineColorBlendStateCreateFlags; + +typedef enum VkColorComponentFlagBits { + VK_COLOR_COMPONENT_R_BIT = 0x00000001, + VK_COLOR_COMPONENT_G_BIT = 0x00000002, + VK_COLOR_COMPONENT_B_BIT = 0x00000004, + VK_COLOR_COMPONENT_A_BIT = 0x00000008, +} VkColorComponentFlagBits; +typedef VkFlags VkColorComponentFlags; typedef VkFlags VkPipelineDynamicStateCreateFlags; typedef VkFlags VkPipelineLayoutCreateFlags; typedef VkFlags VkShaderStageFlags; @@ -1757,7 +1757,7 @@ typedef struct VkPipelineColorBlendAttachmentState { VkBlendFactor srcAlphaBlendFactor; VkBlendFactor dstAlphaBlendFactor; VkBlendOp alphaBlendOp; - VkChannelFlags channelWriteMask; + VkColorComponentFlags colorWriteMask; } VkPipelineColorBlendAttachmentState; typedef struct VkPipelineColorBlendStateCreateInfo { diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index f9b40004b23..a3f5733e122 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -371,8 +371,11 @@ anv_device_init_meta_blit_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, } }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 010323180e8..27c6a291967 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -292,10 +292,10 @@ init_color_pipeline(struct anv_device *device) .pAttachments = (VkPipelineColorBlendAttachmentState []) { { .blendEnable = false, - .channelWriteMask = VK_CHANNEL_A_BIT | - VK_CHANNEL_R_BIT | - VK_CHANNEL_G_BIT | - VK_CHANNEL_B_BIT, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, }, }, }; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 9126eb9b363..80c5f1a99a1 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -291,10 +291,10 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, bool AlphaToCoverageDitherEnable; # endif - .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), - .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), - .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), - .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), .LogicOpEnable = info->logicOpEnable, .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 0adb0c39c03..c2bbdb72829 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -234,10 +234,10 @@ emit_cb_state(struct anv_pipeline *pipeline, .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), - .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), - .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), - .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), }; /* Our hardware applies the blend factor prior to the blend function -- cgit v1.2.3 From aadb7dce9b5dfc16878638580dc80759a45ca773 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 21:10:14 -0800 Subject: vk/0.210.0: Update to the new instance/device create structs --- include/vulkan/vulkan.h | 37 +++++++++++++++++++------------------ src/vulkan/anv_device.c | 16 ++++++++-------- 2 files changed, 27 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index e110be63ced..860df312735 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -100,12 +100,12 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) #define VK_FALSE 0 #define VK_QUEUE_FAMILY_IGNORED (~0U) #define VK_SUBPASS_EXTERNAL (~0U) -#define VK_MAX_PHYSICAL_DEVICE_NAME 256 -#define VK_UUID_LENGTH 16 +#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256 +#define VK_UUID_SIZE 16 #define VK_MAX_MEMORY_TYPES 32 #define VK_MAX_MEMORY_HEAPS 16 -#define VK_MAX_EXTENSION_NAME 256 -#define VK_MAX_DESCRIPTION 256 +#define VK_MAX_EXTENSION_NAME_SIZE 256 +#define VK_MAX_DESCRIPTION_SIZE 256 typedef enum VkResult { @@ -1315,11 +1315,11 @@ typedef struct VkPhysicalDeviceSparseProperties { typedef struct VkPhysicalDeviceProperties { uint32_t apiVersion; uint32_t driverVersion; - uint32_t vendorId; - uint32_t deviceId; + uint32_t vendorID; + uint32_t deviceID; VkPhysicalDeviceType deviceType; - char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME]; - uint8_t pipelineCacheUUID[VK_UUID_LENGTH]; + char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; + uint8_t pipelineCacheUUID[VK_UUID_SIZE]; VkPhysicalDeviceLimits limits; VkPhysicalDeviceSparseProperties sparseProperties; } VkPhysicalDeviceProperties; @@ -1353,31 +1353,32 @@ typedef struct VkDeviceQueueCreateInfo { VkDeviceQueueCreateFlags flags; uint32_t queueFamilyIndex; uint32_t queueCount; + const float* pQueuePriorities; } VkDeviceQueueCreateInfo; typedef struct VkDeviceCreateInfo { VkStructureType sType; const void* pNext; VkDeviceCreateFlags flags; - uint32_t queueRecordCount; - const VkDeviceQueueCreateInfo* pRequestedQueues; - uint32_t layerCount; - const char*const* ppEnabledLayerNames; - uint32_t extensionCount; - const char*const* ppEnabledExtensionNames; + uint32_t queueCreateInfoCount; + const VkDeviceQueueCreateInfo* pQueueCreateInfos; + uint32_t enabledLayerNameCount; + const char* const* ppEnabledLayerNames; + uint32_t enabledExtensionNameCount; + const char* const* ppEnabledExtensionNames; const VkPhysicalDeviceFeatures* pEnabledFeatures; } VkDeviceCreateInfo; typedef struct VkExtensionProperties { - char extName[VK_MAX_EXTENSION_NAME]; + char extensionName[VK_MAX_EXTENSION_NAME_SIZE]; uint32_t specVersion; } VkExtensionProperties; typedef struct VkLayerProperties { - char layerName[VK_MAX_EXTENSION_NAME]; + char layerName[VK_MAX_EXTENSION_NAME_SIZE]; uint32_t specVersion; - uint32_t implVersion; - char description[VK_MAX_DESCRIPTION]; + uint32_t implementationVersion; + char description[VK_MAX_DESCRIPTION_SIZE]; } VkLayerProperties; typedef struct { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index f0e26433eed..e0a6ed62ebd 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -172,14 +172,14 @@ static const VkAllocCallbacks default_alloc_callbacks = { static const VkExtensionProperties global_extensions[] = { { - .extName = VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME, + .extensionName = VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME, .specVersion = 17, }, }; static const VkExtensionProperties device_extensions[] = { { - .extName = VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NAME, + .extensionName = VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NAME, .specVersion = 53, }, }; @@ -201,7 +201,7 @@ VkResult anv_CreateInstance( bool found = false; for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - global_extensions[j].extName) == 0) { + global_extensions[j].extensionName) == 0) { found = true; break; } @@ -502,15 +502,15 @@ void anv_GetPhysicalDeviceProperties( *pProperties = (VkPhysicalDeviceProperties) { .apiVersion = VK_MAKE_VERSION(0, 170, 2), .driverVersion = 1, - .vendorId = 0x8086, - .deviceId = pdevice->chipset_id, + .vendorID = 0x8086, + .deviceID = pdevice->chipset_id, .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, .limits = limits, .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ }; strcpy(pProperties->deviceName, pdevice->name); - snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH, + snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); } @@ -619,11 +619,11 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - for (uint32_t i = 0; i < pCreateInfo->extensionCount; i++) { + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionNameCount; i++) { bool found = false; for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - device_extensions[j].extName) == 0) { + device_extensions[j].extensionName) == 0) { found = true; break; } -- cgit v1.2.3 From 6a6da54ccb4b401e3a6675b15001854e10d94dbd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 30 Nov 2015 21:18:12 -0800 Subject: vk/0.210.0: Rename parameters to memory binding/mapping functions --- include/vulkan/vulkan.h | 32 ++++++++++++++++---------------- src/vulkan/anv_device.c | 34 +++++++++++++++++----------------- 2 files changed, 33 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 860df312735..92eaaaa0b9e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2165,13 +2165,13 @@ typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); typedef VkResult (VKAPI_PTR *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); typedef void (VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); -typedef void (VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -typedef VkResult (VKAPI_PTR *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); +typedef VkResult (VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); +typedef void (VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory memory); +typedef VkResult (VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); +typedef VkResult (VKAPI_PTR *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); typedef void (VKAPI_PTR *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); -typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); +typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset); +typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset); typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); @@ -2394,7 +2394,7 @@ VKAPI_ATTR void VKAPI_CALL vkFreeMemory( VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( VkDevice device, - VkDeviceMemory mem, + VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, @@ -2402,17 +2402,17 @@ VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( VKAPI_ATTR void VKAPI_CALL vkUnmapMemory( VkDevice device, - VkDeviceMemory mem); + VkDeviceMemory memory); VKAPI_ATTR VkResult VKAPI_CALL vkFlushMappedMemoryRanges( VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges); + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges); VKAPI_ATTR VkResult VKAPI_CALL vkInvalidateMappedMemoryRanges( VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges); + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges); VKAPI_ATTR void VKAPI_CALL vkGetDeviceMemoryCommitment( VkDevice device, @@ -2422,14 +2422,14 @@ VKAPI_ATTR void VKAPI_CALL vkGetDeviceMemoryCommitment( VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory( VkDevice device, VkBuffer buffer, - VkDeviceMemory mem, - VkDeviceSize memOffset); + VkDeviceMemory memory, + VkDeviceSize memoryOffset); VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory( VkDevice device, VkImage image, - VkDeviceMemory mem, - VkDeviceSize memOffset); + VkDeviceMemory memory, + VkDeviceSize memoryOffset); VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements( VkDevice device, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e0a6ed62ebd..c3dcd9f56cc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -999,14 +999,14 @@ void anv_FreeMemory( VkResult anv_MapMemory( VkDevice _device, - VkDeviceMemory _mem, + VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only * takes a VkDeviceMemory pointer, it seems like only one map of the memory @@ -1024,17 +1024,17 @@ VkResult anv_MapMemory( void anv_UnmapMemory( VkDevice _device, - VkDeviceMemory _mem) + VkDeviceMemory _memory) { - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); anv_gem_munmap(mem->map, mem->map_size); } VkResult anv_FlushMappedMemoryRanges( VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges) + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) { /* clflush here for !llc platforms */ @@ -1043,10 +1043,10 @@ VkResult anv_FlushMappedMemoryRanges( VkResult anv_InvalidateMappedMemoryRanges( VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges) + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) { - return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); + return anv_FlushMappedMemoryRanges(device, memoryRangeCount, pMemoryRanges); } void anv_GetBufferMemoryRequirements( @@ -1113,14 +1113,14 @@ void anv_GetDeviceMemoryCommitment( VkResult anv_BindBufferMemory( VkDevice device, VkBuffer _buffer, - VkDeviceMemory _mem, - VkDeviceSize memOffset) + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) { - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); buffer->bo = &mem->bo; - buffer->offset = memOffset; + buffer->offset = memoryOffset; return VK_SUCCESS; } @@ -1128,14 +1128,14 @@ VkResult anv_BindBufferMemory( VkResult anv_BindImageMemory( VkDevice device, VkImage _image, - VkDeviceMemory _mem, - VkDeviceSize memOffset) + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) { - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); ANV_FROM_HANDLE(anv_image, image, _image); image->bo = &mem->bo; - image->offset = memOffset; + image->offset = memoryOffset; return VK_SUCCESS; } -- cgit v1.2.3 From 3f6abd0161c4f0c5f4716d1b97a2378219469b7a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 12:05:21 -0800 Subject: vk/0.210.0: Rework descriptor sets --- include/vulkan/vulkan.h | 105 ++++++++++++++++------------------ src/vulkan/anv_descriptor_set.c | 122 +++++++++++++++++++++------------------- src/vulkan/anv_meta.c | 29 ++++++---- 3 files changed, 127 insertions(+), 129 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 92eaaaa0b9e..0a28ade4a25 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -178,6 +178,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 44, VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 45, VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 46, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 47, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), @@ -709,24 +710,6 @@ typedef enum VkDescriptorType { VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF } VkDescriptorType; -typedef enum { - VK_DESCRIPTOR_POOL_USAGE_ONE_SHOT = 0, - VK_DESCRIPTOR_POOL_USAGE_DYNAMIC = 1, - VK_DESCRIPTOR_POOL_USAGE_BEGIN_RANGE = VK_DESCRIPTOR_POOL_USAGE_ONE_SHOT, - VK_DESCRIPTOR_POOL_USAGE_END_RANGE = VK_DESCRIPTOR_POOL_USAGE_DYNAMIC, - VK_DESCRIPTOR_POOL_USAGE_NUM = (VK_DESCRIPTOR_POOL_USAGE_DYNAMIC - VK_DESCRIPTOR_POOL_USAGE_ONE_SHOT + 1), - VK_DESCRIPTOR_POOL_USAGE_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorPoolUsage; - -typedef enum { - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT = 0, - VK_DESCRIPTOR_SET_USAGE_STATIC = 1, - VK_DESCRIPTOR_SET_USAGE_BEGIN_RANGE = VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - VK_DESCRIPTOR_SET_USAGE_END_RANGE = VK_DESCRIPTOR_SET_USAGE_STATIC, - VK_DESCRIPTOR_SET_USAGE_NUM = (VK_DESCRIPTOR_SET_USAGE_STATIC - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT + 1), - VK_DESCRIPTOR_SET_USAGE_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorSetUsage; - typedef enum VkAttachmentLoadOp { VK_ATTACHMENT_LOAD_OP_LOAD = 0, VK_ATTACHMENT_LOAD_OP_CLEAR = 1, @@ -1822,7 +1805,7 @@ typedef struct VkPipelineLayoutCreateInfo { VkStructureType sType; const void* pNext; VkPipelineLayoutCreateFlags flags; - uint32_t descriptorSetCount; + uint32_t setLayoutCount; const VkDescriptorSetLayout* pSetLayouts; uint32_t pushConstantRangeCount; const VkPushConstantRange* pPushConstantRanges; @@ -1849,8 +1832,9 @@ typedef struct VkSamplerCreateInfo { } VkSamplerCreateInfo; typedef struct VkDescriptorSetLayoutBinding { + uint32_t binding; VkDescriptorType descriptorType; - uint32_t arraySize; + uint32_t descriptorCount; VkShaderStageFlags stageFlags; const VkSampler* pImmutableSamplers; } VkDescriptorSetLayoutBinding; @@ -1859,48 +1843,55 @@ typedef struct VkDescriptorSetLayoutCreateInfo { VkStructureType sType; const void* pNext; VkDescriptorSetLayoutCreateFlags flags; - uint32_t count; + uint32_t bindingCount; const VkDescriptorSetLayoutBinding* pBinding; } VkDescriptorSetLayoutCreateInfo; -typedef struct { +typedef struct VkDescriptorPoolSize { VkDescriptorType type; - uint32_t count; -} VkDescriptorTypeCount; + uint32_t descriptorCount; +} VkDescriptorPoolSize; typedef struct VkDescriptorPoolCreateInfo { VkStructureType sType; const void* pNext; VkDescriptorPoolCreateFlags flags; - VkDescriptorPoolUsage poolUsage; uint32_t maxSets; - uint32_t count; - const VkDescriptorTypeCount* pTypeCount; + uint32_t poolSizeCount; + const VkDescriptorPoolSize* pPoolSizes; } VkDescriptorPoolCreateInfo; -typedef struct { - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize range; -} VkDescriptorBufferInfo; +typedef struct VkDescriptorSetAllocateInfo { + VkStructureType sType; + const void* pNext; + VkDescriptorPool descriptorPool; + uint32_t setLayoutCount; + const VkDescriptorSetLayout* pSetLayouts; +} VkDescriptorSetAllocateInfo; -typedef struct { - VkBufferView bufferView; +typedef struct VkDescriptorImageInfo { VkSampler sampler; VkImageView imageView; VkImageLayout imageLayout; - VkDescriptorBufferInfo bufferInfo; -} VkDescriptorInfo; +} VkDescriptorImageInfo; + +typedef struct VkDescriptorBufferInfo { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize range; +} VkDescriptorBufferInfo; typedef struct VkWriteDescriptorSet { VkStructureType sType; const void* pNext; - VkDescriptorSet destSet; - uint32_t destBinding; - uint32_t destArrayElement; - uint32_t count; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; VkDescriptorType descriptorType; - const VkDescriptorInfo* pDescriptors; + const VkDescriptorImageInfo* pImageInfo; + const VkDescriptorBufferInfo* pBufferInfo; + const VkBufferView* pTexelBufferView; } VkWriteDescriptorSet; typedef struct VkCopyDescriptorSet { @@ -1909,10 +1900,10 @@ typedef struct VkCopyDescriptorSet { VkDescriptorSet srcSet; uint32_t srcBinding; uint32_t srcArrayElement; - VkDescriptorSet destSet; - uint32_t destBinding; - uint32_t destArrayElement; - uint32_t count; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; } VkCopyDescriptorSet; typedef struct VkFramebufferCreateInfo { @@ -2223,10 +2214,10 @@ typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, c typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI_PTR *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); -typedef VkResult (VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); -typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); +typedef VkResult (VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkAllocateDescriptorSets)(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo, VkDescriptorSet* pDescriptorSets); +typedef VkResult (VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets); +typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies); typedef VkResult (VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); typedef void (VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); @@ -2684,27 +2675,25 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool( VKAPI_ATTR VkResult VKAPI_CALL vkResetDescriptorPool( VkDevice device, - VkDescriptorPool descriptorPool); + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags); -VKAPI_ATTR VkResult VKAPI_CALL vkAllocDescriptorSets( +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateDescriptorSets( VkDevice device, - VkDescriptorPool descriptorPool, - VkDescriptorSetUsage setUsage, - uint32_t count, - const VkDescriptorSetLayout* pSetLayouts, + const VkDescriptorSetAllocateInfo* pAllocateInfo, VkDescriptorSet* pDescriptorSets); VKAPI_ATTR VkResult VKAPI_CALL vkFreeDescriptorSets( VkDevice device, VkDescriptorPool descriptorPool, - uint32_t count, + uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets); VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets( VkDevice device, - uint32_t writeCount, + uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t copyCount, + uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies); VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer( diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 22041b540d6..186e9f92a5a 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -44,14 +44,16 @@ VkResult anv_CreateDescriptorSetLayout( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + uint32_t max_binding = 0; uint32_t immutable_sampler_count = 0; - for (uint32_t b = 0; b < pCreateInfo->count; b++) { - if (pCreateInfo->pBinding[b].pImmutableSamplers) - immutable_sampler_count += pCreateInfo->pBinding[b].arraySize; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBinding[j].binding); + if (pCreateInfo->pBinding[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBinding[j].descriptorCount; } size_t size = sizeof(struct anv_descriptor_set_layout) + - pCreateInfo->count * sizeof(set_layout->binding[0]) + + (max_binding + 1) * sizeof(set_layout->binding[0]) + immutable_sampler_count * sizeof(struct anv_sampler *); set_layout = anv_device_alloc(device, size, 8, @@ -61,15 +63,15 @@ VkResult anv_CreateDescriptorSetLayout( /* We just allocate all the samplers at the end of the struct */ struct anv_sampler **samplers = - (struct anv_sampler **)&set_layout->binding[pCreateInfo->count]; + (struct anv_sampler **)&set_layout->binding[max_binding + 1]; - set_layout->binding_count = pCreateInfo->count; + set_layout->binding_count = max_binding + 1; set_layout->shader_stages = 0; set_layout->size = 0; /* Initialize all binding_layout entries to -1 */ memset(set_layout->binding, -1, - pCreateInfo->count * sizeof(set_layout->binding[0])); + (max_binding + 1) * sizeof(set_layout->binding[0])); /* Initialize all samplers to 0 */ memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); @@ -78,25 +80,28 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; uint32_t dynamic_offset_count = 0; - for (uint32_t b = 0; b < pCreateInfo->count; b++) { - uint32_t array_size = MAX2(1, pCreateInfo->pBinding[b].arraySize); - set_layout->binding[b].array_size = array_size; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBinding[j]; + uint32_t b = binding->binding; + + assert(binding->descriptorCount > 0); + set_layout->binding[b].array_size = binding->descriptorCount; set_layout->binding[b].descriptor_index = set_layout->size; - set_layout->size += array_size; + set_layout->size += binding->descriptorCount; - switch (pCreateInfo->pBinding[b].descriptorType) { + switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { + for_each_bit(s, binding->stageFlags) { set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; - sampler_count[s] += array_size; + sampler_count[s] += binding->descriptorCount; } break; default: break; } - switch (pCreateInfo->pBinding[b].descriptorType) { + switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -107,37 +112,37 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, pCreateInfo->pBinding[b].stageFlags) { + for_each_bit(s, binding->stageFlags) { set_layout->binding[b].stage[s].surface_index = surface_count[s]; - surface_count[s] += array_size; + surface_count[s] += binding->descriptorCount; } break; default: break; } - switch (pCreateInfo->pBinding[b].descriptorType) { + switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; - dynamic_offset_count += array_size; + dynamic_offset_count += binding->descriptorCount; break; default: break; } - if (pCreateInfo->pBinding[b].pImmutableSamplers) { + if (binding->pImmutableSamplers) { set_layout->binding[b].immutable_samplers = samplers; - samplers += array_size; + samplers += binding->descriptorCount; - for (uint32_t i = 0; i < array_size; i++) + for (uint32_t i = 0; i < binding->descriptorCount; i++) set_layout->binding[b].immutable_samplers[i] = - anv_sampler_from_handle(pCreateInfo->pBinding[b].pImmutableSamplers[i]); + anv_sampler_from_handle(binding->pImmutableSamplers[i]); } else { set_layout->binding[b].immutable_samplers = NULL; } - set_layout->shader_stages |= pCreateInfo->pBinding[b].stageFlags; + set_layout->shader_stages |= binding->stageFlags; } set_layout->dynamic_offset_count = dynamic_offset_count; @@ -172,12 +177,12 @@ VkResult anv_CreatePipelineLayout( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - l.num_sets = pCreateInfo->descriptorSetCount; + l.num_sets = pCreateInfo->setLayoutCount; unsigned dynamic_offset_count = 0; memset(l.stage, 0, sizeof(l.stage)); - for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]); l.set[set].layout = set_layout; @@ -228,7 +233,7 @@ VkResult anv_CreatePipelineLayout( int surface = 0; int sampler = 0; - for (uint32_t set = 0; set < pCreateInfo->descriptorSetCount; set++) { + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { struct anv_descriptor_set_layout *set_layout = l.set[set].layout; for (uint32_t b = 0; b < set_layout->binding_count; b++) { @@ -299,7 +304,8 @@ void anv_DestroyDescriptorPool( VkResult anv_ResetDescriptorPool( VkDevice device, - VkDescriptorPool descriptorPool) + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) { anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); return VK_SUCCESS; @@ -346,12 +352,9 @@ anv_descriptor_set_destroy(struct anv_device *device, anv_device_free(device, set); } -VkResult anv_AllocDescriptorSets( +VkResult anv_AllocateDescriptorSets( VkDevice _device, - VkDescriptorPool descriptorPool, - VkDescriptorSetUsage setUsage, - uint32_t count, - const VkDescriptorSetLayout* pSetLayouts, + const VkDescriptorSetAllocateInfo* pAllocateInfo, VkDescriptorSet* pDescriptorSets) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -360,8 +363,9 @@ VkResult anv_AllocDescriptorSets( struct anv_descriptor_set *set; uint32_t i; - for (i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); + for (i = 0; i < pAllocateInfo->setLayoutCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); result = anv_descriptor_set_create(device, layout, &set); if (result != VK_SUCCESS) @@ -371,7 +375,8 @@ VkResult anv_AllocDescriptorSets( } if (result != VK_SUCCESS) - anv_FreeDescriptorSets(_device, descriptorPool, i, pDescriptorSets); + anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, + i, pDescriptorSets); return result; } @@ -395,24 +400,24 @@ VkResult anv_FreeDescriptorSets( void anv_UpdateDescriptorSets( VkDevice device, - uint32_t writeCount, + uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t copyCount, + uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies) { - for (uint32_t i = 0; i < writeCount; i++) { + for (uint32_t i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; - ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); + ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); const struct anv_descriptor_set_binding_layout *bind_layout = - &set->layout->binding[write->destBinding]; + &set->layout->binding[write->dstBinding]; struct anv_descriptor *desc = &set->descriptors[bind_layout->descriptor_index]; switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: - for (uint32_t j = 0; j < write->count; j++) { + for (uint32_t j = 0; j < write->descriptorCount; j++) { ANV_FROM_HANDLE(anv_sampler, sampler, - write->pDescriptors[j].sampler); + write->pImageInfo[j].sampler); desc[j] = (struct anv_descriptor) { .type = VK_DESCRIPTOR_TYPE_SAMPLER, @@ -422,11 +427,11 @@ void anv_UpdateDescriptorSets( break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < write->count; j++) { + for (uint32_t j = 0; j < write->descriptorCount; j++) { ANV_FROM_HANDLE(anv_image_view, iview, - write->pDescriptors[j].imageView); + write->pImageInfo[j].imageView); ANV_FROM_HANDLE(anv_sampler, sampler, - write->pDescriptors[j].sampler); + write->pImageInfo[j].sampler); desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; desc[j].image_view = iview; @@ -441,9 +446,9 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (uint32_t j = 0; j < write->count; j++) { + for (uint32_t j = 0; j < write->descriptorCount; j++) { ANV_FROM_HANDLE(anv_image_view, iview, - write->pDescriptors[j].imageView); + write->pImageInfo[j].imageView); desc[j] = (struct anv_descriptor) { .type = write->descriptorType, @@ -465,17 +470,16 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->count; j++) { - assert(write->pDescriptors[j].bufferInfo.buffer); - ANV_FROM_HANDLE(anv_buffer, buffer, - write->pDescriptors[j].bufferInfo.buffer); + for (uint32_t j = 0; j < write->descriptorCount; j++) { + assert(write->pBufferInfo[j].buffer); + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); assert(buffer); desc[j] = (struct anv_descriptor) { .type = write->descriptorType, .buffer = buffer, - .offset = write->pDescriptors[j].bufferInfo.offset, - .range = write->pDescriptors[j].bufferInfo.range, + .offset = write->pBufferInfo[j].offset, + .range = write->pBufferInfo[j].range, }; /* For buffers with dynamic offsets, we use the full possible @@ -491,12 +495,12 @@ void anv_UpdateDescriptorSets( } } - for (uint32_t i = 0; i < copyCount; i++) { + for (uint32_t i = 0; i < descriptorCopyCount; i++) { const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; - ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); - ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); - for (uint32_t j = 0; j < copy->count; j++) { - dest->descriptors[copy->destBinding + j] = + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); + ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->dstSet); + for (uint32_t j = 0; j < copy->descriptorCount; j++) { + dest->descriptors[copy->dstBinding + j] = src->descriptors[copy->srcBinding + j]; } } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a3f5733e122..a2626a0f63e 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -303,11 +303,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) VkDescriptorSetLayoutCreateInfo ds_layout_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .count = 1, + .bindingCount = 1, .pBinding = (VkDescriptorSetLayoutBinding[]) { { + .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .arraySize = 1, + .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = NULL }, @@ -319,7 +320,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) anv_CreatePipelineLayout(anv_device_to_handle(device), &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .descriptorSetCount = 1, + .setLayoutCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout, }, &device->meta_state.blit.pipeline_layout); @@ -529,24 +530,28 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, &sampler); VkDescriptorSet set; - anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - 1, &device->meta_state.blit.ds_layout, &set); + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); anv_UpdateDescriptorSets(anv_device_to_handle(device), 1, /* writeCount */ (VkWriteDescriptorSet[]) { { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .destSet = set, - .destBinding = 0, - .destArrayElement = 0, - .count = 1, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pDescriptors = (VkDescriptorInfo[]) { + .pImageInfo = (VkDescriptorImageInfo[]) { { + .sampler = sampler, .imageView = anv_image_view_to_handle(src_iview), .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - .sampler = sampler, }, } } -- cgit v1.2.3 From 407b8cc5e0e0e0382b7a350480a7654fcb2a164f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 12:19:11 -0800 Subject: vk/0.210.0: Get rid of VkImageAspect --- include/vulkan/vulkan.h | 13 +------------ src/vulkan/anv_dump.c | 2 +- src/vulkan/anv_image.c | 12 +++++++----- 3 files changed, 9 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 0a28ade4a25..cae02065ad1 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -409,17 +409,6 @@ typedef enum VkPhysicalDeviceType { VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkPhysicalDeviceType; -typedef enum { - VK_IMAGE_ASPECT_COLOR = 0, - VK_IMAGE_ASPECT_DEPTH = 1, - VK_IMAGE_ASPECT_STENCIL = 2, - VK_IMAGE_ASPECT_METADATA = 3, - VK_IMAGE_ASPECT_BEGIN_RANGE = VK_IMAGE_ASPECT_COLOR, - VK_IMAGE_ASPECT_END_RANGE = VK_IMAGE_ASPECT_METADATA, - VK_IMAGE_ASPECT_NUM = (VK_IMAGE_ASPECT_METADATA - VK_IMAGE_ASPECT_COLOR + 1), - VK_IMAGE_ASPECT_MAX_ENUM = 0x7FFFFFFF -} VkImageAspect; - typedef enum VkQueryType { VK_QUERY_TYPE_OCCLUSION = 0, VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, @@ -1420,7 +1409,7 @@ typedef struct VkSparseImageOpaqueMemoryBindInfo { } VkSparseImageOpaqueMemoryBindInfo; typedef struct VkImageSubresource { - VkImageAspect aspect; + VkImageAspectFlags aspectMask; uint32_t mipLevel; uint32_t arrayLayer; } VkImageSubresource; diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index b2d0b385d0a..b232f6b206b 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -178,7 +178,7 @@ anv_dump_image_to_ppm(struct anv_device *device, VkSubresourceLayout layout; anv_GetImageSubresourceLayout(vk_device, copy_image, &(VkImageSubresource) { - .aspect = VK_IMAGE_ASPECT_COLOR, + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .arrayLayer = 0, }, &layout); diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 0fe2df10a7a..6c39b355958 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -430,16 +430,18 @@ void anv_GetImageSubresourceLayout( { ANV_FROM_HANDLE(anv_image, image, _image); - switch (pSubresource->aspect) { - case VK_IMAGE_ASPECT_COLOR: + assert(__builtin_popcount(pSubresource->aspectMask) == 1); + + switch (pSubresource->aspectMask) { + case VK_IMAGE_ASPECT_COLOR_BIT: anv_surface_get_subresource_layout(image, &image->color_surface, pSubresource, pLayout); break; - case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_DEPTH_BIT: anv_surface_get_subresource_layout(image, &image->depth_surface, pSubresource, pLayout); break; - case VK_IMAGE_ASPECT_STENCIL: + case VK_IMAGE_ASPECT_STENCIL_BIT: anv_surface_get_subresource_layout(image, &image->stencil_surface, pSubresource, pLayout); break; @@ -520,7 +522,7 @@ anv_validate_CreateImageView(VkDevice _device, image->format->isl_layout->bs); } - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL) { + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { /* FINISHME: Is it legal to have an R8 view of S8? */ assert(image->format->has_stencil); assert(view_format_info->has_stencil); -- cgit v1.2.3 From 299f8f1511cfa2bfab77038ba28a90bcfc136a5d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 12:52:56 -0800 Subject: vk/0.210.0: More structure field renaming --- include/vulkan/vulkan.h | 23 +++++++++++------------ src/vulkan/anv_dump.c | 6 +++--- src/vulkan/anv_formats.c | 4 ++-- src/vulkan/anv_image.c | 22 +++++++++++----------- src/vulkan/anv_intel.c | 2 +- src/vulkan/anv_meta.c | 44 ++++++++++++++++++++++---------------------- src/vulkan/anv_meta_clear.c | 8 ++++---- src/vulkan/anv_query.c | 2 +- src/vulkan/anv_wsi_wayland.c | 2 +- src/vulkan/anv_wsi_x11.c | 2 +- src/vulkan/gen7_state.c | 6 +++--- src/vulkan/gen8_state.c | 4 ++-- 12 files changed, 62 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index cae02065ad1..cdf491cfba9 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1172,7 +1172,7 @@ typedef struct VkExtent3D { typedef struct VkImageFormatProperties { VkExtent3D maxExtent; uint32_t maxMipLevels; - uint32_t maxArraySize; + uint32_t maxArrayLayers; VkSampleCountFlags sampleCounts; VkDeviceSize maxResourceSize; } VkImageFormatProperties; @@ -1363,7 +1363,7 @@ typedef struct { typedef struct VkMappedMemoryRange { VkStructureType sType; const void* pNext; - VkDeviceMemory mem; + VkDeviceMemory memory; VkDeviceSize offset; VkDeviceSize size; } VkMappedMemoryRange; @@ -1473,7 +1473,7 @@ typedef struct VkQueryPoolCreateInfo { const void* pNext; VkQueryPoolCreateFlags flags; VkQueryType queryType; - uint32_t slots; + uint32_t entryCount; VkQueryPipelineStatisticFlags pipelineStatistics; } VkQueryPoolCreateInfo; @@ -1484,7 +1484,7 @@ typedef struct VkBufferCreateInfo { VkDeviceSize size; VkBufferUsageFlags usage; VkSharingMode sharingMode; - uint32_t queueFamilyCount; + uint32_t queueFamilyIndexCount; const uint32_t* pQueueFamilyIndices; } VkBufferCreateInfo; @@ -1506,12 +1506,12 @@ typedef struct VkImageCreateInfo { VkFormat format; VkExtent3D extent; uint32_t mipLevels; - uint32_t arraySize; + uint32_t arrayLayers; uint32_t samples; VkImageTiling tiling; VkImageUsageFlags usage; VkSharingMode sharingMode; - uint32_t queueFamilyCount; + uint32_t queueFamilyIndexCount; const uint32_t* pQueueFamilyIndices; VkImageLayout initialLayout; } VkImageCreateInfo; @@ -1533,9 +1533,9 @@ typedef struct VkComponentMapping { typedef struct VkImageSubresourceRange { VkImageAspectFlags aspectMask; uint32_t baseMipLevel; - uint32_t mipLevels; + uint32_t levelCount; uint32_t baseArrayLayer; - uint32_t arraySize; + uint32_t layerCount; } VkImageSubresourceRange; typedef struct VkImageViewCreateInfo { @@ -1570,9 +1570,8 @@ typedef struct VkPipelineCacheCreateInfo { VkStructureType sType; const void* pNext; VkPipelineCacheCreateFlags flags; - size_t initialSize; - const void* initialData; - size_t maxSize; + size_t initialDataSize; + const void* pInitialData; } VkPipelineCacheCreateInfo; typedef struct VkSpecializationMapEntry { @@ -1962,8 +1961,8 @@ typedef struct VkRenderPassCreateInfo { typedef struct VkCommandPoolCreateInfo { VkStructureType sType; const void* pNext; - uint32_t queueFamilyIndex; VkCommandPoolCreateFlags flags; + uint32_t queueFamilyIndex; } VkCommandPoolCreateInfo; typedef struct { diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index b232f6b206b..80d9fcec88f 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -49,7 +49,7 @@ anv_dump_image_to_ppm(struct anv_device *device, .format = VK_FORMAT_R8G8B8A8_UNORM, .extent = (VkExtent3D) { extent.width, extent.height, 1 }, .mipLevels = 1, - .arraySize = 1, + .arrayLayers = 1, .samples = 1, .tiling = VK_IMAGE_TILING_LINEAR, .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, @@ -144,9 +144,9 @@ anv_dump_image_to_ppm(struct anv_device *device, .subresourceRange = (VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = 0, - .arraySize = 1, + .layerCount = 1, }, }}); diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 8afef443696..e79a7105d7d 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -432,7 +432,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = maxExtent, .maxMipLevels = maxMipLevels, - .maxArraySize = maxArraySize, + .maxArrayLayers = maxArraySize, /* FINISHME: Support multisampling */ .sampleCounts = VK_SAMPLE_COUNT_1_BIT, @@ -449,7 +449,7 @@ unsupported: *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = { 0, 0, 0 }, .maxMipLevels = 0, - .maxArraySize = 0, + .maxArrayLayers = 0, .sampleCounts = 0, .maxResourceSize = 0, }; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 6c39b355958..f52bf456d1e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -151,7 +151,7 @@ anv_image_make_surface(const struct anv_device *dev, const VkExtent3D *restrict extent = &create_info->vk_info->extent; const uint32_t levels = create_info->vk_info->mipLevels; - const uint32_t array_size = create_info->vk_info->arraySize; + const uint32_t array_size = create_info->vk_info->arrayLayers; const enum isl_tiling tiling = anv_image_choose_tiling(create_info); const struct anv_tile_info *tile_info = @@ -303,7 +303,7 @@ anv_image_create(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); anv_assert(pCreateInfo->mipLevels > 0); - anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->arrayLayers > 0); anv_assert(pCreateInfo->samples == 1); anv_assert(pCreateInfo->extent.width > 0); anv_assert(pCreateInfo->extent.height > 0); @@ -331,7 +331,7 @@ anv_image_create(VkDevice _device, image->extent = pCreateInfo->extent; image->format = anv_format_for_vk_format(pCreateInfo->format); image->levels = pCreateInfo->mipLevels; - image->array_size = pCreateInfo->arraySize; + image->array_size = pCreateInfo->arrayLayers; image->usage = anv_image_get_full_usage(pCreateInfo); image->surface_type = surf_type; @@ -487,17 +487,17 @@ anv_validate_CreateImageView(VkDevice _device, /* Validate subresource. */ assert(subresource->aspectMask != 0); - assert(subresource->mipLevels > 0); - assert(subresource->arraySize > 0); + assert(subresource->levelCount > 0); + assert(subresource->layerCount > 0); assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); + assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); assert(subresource->baseArrayLayer < image->array_size); - assert(subresource->baseArrayLayer + subresource->arraySize <= image->array_size); + assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); assert(pView); if (view_info->is_cube) { assert(subresource->baseArrayLayer % 6 == 0); - assert(subresource->arraySize % 6 == 0); + assert(subresource->layerCount % 6 == 0); } const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT @@ -543,7 +543,7 @@ anv_image_view_init(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - assert(range->arraySize > 0); + assert(range->layerCount > 0); assert(range->baseMipLevel < image->levels); assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | @@ -555,10 +555,10 @@ anv_image_view_init(struct anv_image_view *iview, unreachable("bad VkImageType"); case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + range->arraySize - 1 <= image->array_size); + assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); break; case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + range->arraySize - 1 + assert(range->baseArrayLayer + range->layerCount - 1 <= anv_minify(image->extent.depth, range->baseMipLevel)); break; } diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c index 48ac183ad79..5507bf2d5cb 100644 --- a/src/vulkan/anv_intel.c +++ b/src/vulkan/anv_intel.c @@ -78,7 +78,7 @@ VkResult anv_CreateDmaBufImageINTEL( .format = pCreateInfo->format, .extent = pCreateInfo->extent, .mipLevels = 1, - .arraySize = 1, + .arrayLayers = 1, .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a2626a0f63e..419667a9f30 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -674,7 +674,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .depth = 1, }, .mipLevels = 1, - .arraySize = 1, + .arrayLayers = 1, .samples = 1, .tiling = VK_IMAGE_TILING_LINEAR, .usage = 0, @@ -707,9 +707,9 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = 0, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); @@ -724,9 +724,9 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = 0, - .arraySize = 1, + .layerCount = 1, }, }, cmd_buffer); @@ -855,9 +855,9 @@ void anv_CmdCopyImage( .subresourceRange = { .aspectMask = pRegions[r].srcSubresource.aspectMask, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .arraySize = pRegions[r].dstSubresource.layerCount, + .layerCount = pRegions[r].dstSubresource.layerCount, }, }, cmd_buffer); @@ -899,9 +899,9 @@ void anv_CmdCopyImage( .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = dest_base_array_slice + slice, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); @@ -955,9 +955,9 @@ void anv_CmdBlitImage( .subresourceRange = { .aspectMask = pRegions[r].srcSubresource.aspectMask, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); @@ -989,9 +989,9 @@ void anv_CmdBlitImage( .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = dest_array_slice, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); @@ -1032,7 +1032,7 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, .format = format, .extent = extent, .mipLevels = 1, - .arraySize = 1, + .arrayLayers = 1, .samples = 1, .tiling = VK_IMAGE_TILING_LINEAR, .usage = usage, @@ -1106,9 +1106,9 @@ void anv_CmdCopyBufferToImage( .subresourceRange = { .aspectMask = proxy_aspect, .baseMipLevel = 0, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = 0, - .arraySize = 1, + .layerCount = 1, }, }, cmd_buffer); @@ -1123,9 +1123,9 @@ void anv_CmdCopyBufferToImage( .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = dest_base_array_slice + slice, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); @@ -1194,9 +1194,9 @@ void anv_CmdCopyImageToBuffer( .subresourceRange = { .aspectMask = pRegions[r].imageSubresource.aspectMask, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, - .arraySize = pRegions[r].imageSubresource.layerCount, + .layerCount = pRegions[r].imageSubresource.layerCount, }, }, cmd_buffer); @@ -1234,9 +1234,9 @@ void anv_CmdCopyImageToBuffer( .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = 0, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 27c6a291967..0531eccad9b 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -690,8 +690,8 @@ void anv_CmdClearColorImage( meta_clear_begin(&saved_state, cmd_buffer); for (uint32_t r = 0; r < rangeCount; r++) { - for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { - for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { + for (uint32_t l = 0; l < pRanges[r].levelCount; l++) { + for (uint32_t s = 0; s < pRanges[r].layerCount; s++) { struct anv_image_view iview; anv_image_view_init(&iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -702,9 +702,9 @@ void anv_CmdClearColorImage( .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRanges[r].baseMipLevel + l, - .mipLevels = 1, + .levelCount = 1, .baseArrayLayer = pRanges[r].baseArrayLayer + s, - .arraySize = 1 + .layerCount = 1 }, }, cmd_buffer); diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 8891aa02d76..7aa54363aee 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -55,7 +55,7 @@ VkResult anv_CreateQueryPool( if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); + size = pCreateInfo->entryCount * sizeof(struct anv_query_pool_slot); result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) goto fail; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index d226c8d06c1..581c8a0e90c 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -561,7 +561,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) .depth = 1 }, .mipLevels = 1, - .arraySize = 1, + .arrayLayers = 1, .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 4beffce3ce5..1ab31c52573 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -313,7 +313,7 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, .depth = 1 }, .mipLevels = 1, - .arraySize = 1, + .arrayLayers = 1, .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index a30de348732..e4cf552f90d 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -231,8 +231,8 @@ genX(image_view_init)(struct anv_image_view *iview, }; uint32_t depth = 1; - if (range->arraySize > 1) { - depth = range->arraySize; + if (range->layerCount > 1) { + depth = range->layerCount; } else if (image->extent.depth > 1) { depth = image->extent.depth; } @@ -300,7 +300,7 @@ genX(image_view_init)(struct anv_image_view *iview, * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = range->mipLevels - 1; + surface_state.MIPCountLOD = range->levelCount - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 092e1ae7822..02394dc6f61 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -145,7 +145,7 @@ genX(image_view_init)(struct anv_image_view *iview, * example, if Minimum Array Element is set to 1024 on a 2D surface, * the range of this field is reduced to [0,1023]. */ - depth = range->arraySize; + depth = range->layerCount; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: * @@ -242,7 +242,7 @@ genX(image_view_init)(struct anv_image_view *iview, * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = range->mipLevels - 1; + surface_state.MIPCountLOD = range->levelCount - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); -- cgit v1.2.3 From 43f3e92348939ba6e0c09d219739001f195a69fd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 13:09:22 -0800 Subject: vk/0.210.0: Rework render pass description structures --- include/vulkan/vulkan.h | 12 ++++++------ src/vulkan/anv_meta.c | 8 ++++---- src/vulkan/anv_meta_clear.c | 8 ++++---- src/vulkan/anv_pass.c | 30 +++++++++++++++++++----------- 4 files changed, 33 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index cdf491cfba9..484ce8fbcd6 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1926,21 +1926,21 @@ typedef struct VkAttachmentReference { typedef struct VkSubpassDescription { VkSubpassDescriptionFlags flags; VkPipelineBindPoint pipelineBindPoint; - uint32_t inputCount; + uint32_t inputAttachmentCount; const VkAttachmentReference* pInputAttachments; - uint32_t colorCount; + uint32_t colorAttachmentCount; const VkAttachmentReference* pColorAttachments; const VkAttachmentReference* pResolveAttachments; - VkAttachmentReference depthStencilAttachment; - uint32_t preserveCount; + const VkAttachmentReference* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; const VkAttachmentReference* pPreserveAttachments; } VkSubpassDescription; typedef struct VkSubpassDependency { uint32_t srcSubpass; - uint32_t destSubpass; + uint32_t dstSubpass; VkPipelineStageFlags srcStageMask; - VkPipelineStageFlags destStageMask; + VkPipelineStageFlags dstStageMask; VkMemoryOutputFlags outputMask; VkMemoryInputFlags inputMask; VkBool32 byRegion; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 419667a9f30..de5a037b5a8 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -199,18 +199,18 @@ anv_device_init_meta_blit_state(struct anv_device *device) .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputCount = 0, - .colorCount = 1, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, }, .pResolveAttachments = NULL, - .depthStencilAttachment = (VkAttachmentReference) { + .pDepthStencilAttachment = &(VkAttachmentReference) { .attachment = VK_ATTACHMENT_UNUSED, .layout = VK_IMAGE_LAYOUT_GENERAL, }, - .preserveCount = 1, + .preserveAttachmentCount = 1, .pPreserveAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 0531eccad9b..76ca1b6df99 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -737,18 +737,18 @@ void anv_CmdClearColorImage( .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputCount = 0, - .colorCount = 1, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, }, .pResolveAttachments = NULL, - .depthStencilAttachment = (VkAttachmentReference) { + .pDepthStencilAttachment = &(VkAttachmentReference) { .attachment = VK_ATTACHMENT_UNUSED, .layout = VK_IMAGE_LAYOUT_GENERAL, }, - .preserveCount = 1, + .preserveAttachmentCount = 1, .pPreserveAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_GENERAL, diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index a89c494f7e0..4990b6a6c40 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -68,26 +68,28 @@ VkResult anv_CreateRenderPass( const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; struct anv_subpass *subpass = &pass->subpasses[i]; - subpass->input_count = desc->inputCount; - subpass->color_count = desc->colorCount; + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; - if (desc->inputCount > 0) { + if (desc->inputAttachmentCount > 0) { subpass->input_attachments = - anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), + anv_device_alloc(device, + desc->inputAttachmentCount * sizeof(uint32_t), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t j = 0; j < desc->inputCount; j++) { + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { subpass->input_attachments[j] = desc->pInputAttachments[j].attachment; } } - if (desc->colorCount > 0) { + if (desc->colorAttachmentCount > 0) { subpass->color_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + anv_device_alloc(device, + desc->colorAttachmentCount * sizeof(uint32_t), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t j = 0; j < desc->colorCount; j++) { + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->color_attachments[j] = desc->pColorAttachments[j].attachment; } @@ -95,16 +97,22 @@ VkResult anv_CreateRenderPass( if (desc->pResolveAttachments) { subpass->resolve_attachments = - anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + anv_device_alloc(device, + desc->colorAttachmentCount * sizeof(uint32_t), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - for (uint32_t j = 0; j < desc->colorCount; j++) { + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->resolve_attachments[j] = desc->pResolveAttachments[j].attachment; } } - subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = + desc->pDepthStencilAttachment->attachment; + } else { + subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; + } } *pRenderPass = anv_render_pass_to_handle(pass); -- cgit v1.2.3 From b1cd025b88e3651fac8dd2f7861516f653422ee4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 13:39:28 -0800 Subject: vk/0.210.0: Replace MemoryInput/OutputFlags with AccessFlags --- include/vulkan/vulkan.h | 65 +++++++++++++++++++++++--------------------- src/vulkan/anv_dump.c | 4 +-- src/vulkan/genX_cmd_buffer.c | 63 +++++++++++++++++++++++------------------- 3 files changed, 71 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 484ce8fbcd6..5a13260a86e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1003,28 +1003,31 @@ typedef enum VkPipelineStageFlagBits { } VkPipelineStageFlagBits; typedef VkFlags VkPipelineStageFlags; -typedef enum { - VK_MEMORY_OUTPUT_HOST_WRITE_BIT = 0x00000001, - VK_MEMORY_OUTPUT_SHADER_WRITE_BIT = 0x00000002, - VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT = 0x00000004, - VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000008, - VK_MEMORY_OUTPUT_TRANSFER_BIT = 0x00000010, -} VkMemoryOutputFlagBits; -typedef VkFlags VkMemoryOutputFlags; - -typedef enum { - VK_MEMORY_INPUT_HOST_READ_BIT = 0x00000001, - VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT = 0x00000002, - VK_MEMORY_INPUT_INDEX_FETCH_BIT = 0x00000004, - VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT = 0x00000008, - VK_MEMORY_INPUT_UNIFORM_READ_BIT = 0x00000010, - VK_MEMORY_INPUT_SHADER_READ_BIT = 0x00000020, - VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT = 0x00000040, - VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000080, - VK_MEMORY_INPUT_INPUT_ATTACHMENT_BIT = 0x00000100, - VK_MEMORY_INPUT_TRANSFER_BIT = 0x00000200, -} VkMemoryInputFlagBits; -typedef VkFlags VkMemoryInputFlags; +typedef enum VkAccessFlagBits { + VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, + VK_ACCESS_INDEX_READ_BIT = 0x00000002, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004, + VK_ACCESS_UNIFORM_READ_BIT = 0x00000008, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT = 0x00000010, + VK_ACCESS_SHADER_READ_BIT = 0x00000020, + VK_ACCESS_SHADER_WRITE_BIT = 0x00000040, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT = 0x00000080, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400, + VK_ACCESS_TRANSFER_READ_BIT = 0x00000800, + VK_ACCESS_TRANSFER_WRITE_BIT = 0x00001000, + VK_ACCESS_HOST_READ_BIT = 0x00002000, + VK_ACCESS_HOST_WRITE_BIT = 0x00004000, + VK_ACCESS_MEMORY_READ_BIT = 0x00008000, + VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000, +} VkAccessFlagBits; +typedef VkFlags VkAccessFlags; + +typedef enum VkDependencyFlagBits { + VK_DEPENDENCY_BY_REGION_BIT = 0x00000001, +} VkDependencyFlagBits; +typedef VkFlags VkDependencyFlags; typedef enum VkCommandPoolCreateFlagBits { VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001, @@ -1941,9 +1944,9 @@ typedef struct VkSubpassDependency { uint32_t dstSubpass; VkPipelineStageFlags srcStageMask; VkPipelineStageFlags dstStageMask; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; - VkBool32 byRegion; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; } VkSubpassDependency; typedef struct VkRenderPassCreateInfo { @@ -2070,8 +2073,8 @@ typedef struct VkRenderPassBeginInfo { typedef struct VkBufferMemoryBarrier { VkStructureType sType; const void* pNext; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; uint32_t srcQueueFamilyIndex; uint32_t destQueueFamilyIndex; VkBuffer buffer; @@ -2103,8 +2106,8 @@ typedef struct VkDrawIndirectCommand { typedef struct VkImageMemoryBarrier { VkStructureType sType; const void* pNext; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; VkImageLayout oldLayout; VkImageLayout newLayout; uint32_t srcQueueFamilyIndex; @@ -2116,8 +2119,8 @@ typedef struct VkImageMemoryBarrier { typedef struct VkMemoryBarrier { VkStructureType sType; const void* pNext; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; } VkMemoryBarrier; diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 80d9fcec88f..9d323d754ff 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -134,8 +134,8 @@ anv_dump_image_to_ppm(struct anv_device *device, true, 1, (const void * []) { &(VkImageMemoryBarrier) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .outputMask = VK_MEMORY_OUTPUT_TRANSFER_BIT, - .inputMask = VK_MEMORY_INPUT_HOST_READ_BIT, + .srcAccessMask = VK_ACCESS_HOST_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = 0, diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 166e335ae53..e404d5efa9e 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -194,28 +194,28 @@ void genX(CmdPipelineBarrier)( * the app asks for. One of these days we may make this a bit better * but right now that's all the hardware allows for in most areas. */ - VkMemoryOutputFlags out_flags = 0; - VkMemoryInputFlags in_flags = 0; + VkAccessFlags src_flags = 0; + VkAccessFlags dst_flags = 0; for (uint32_t i = 0; i < memBarrierCount; i++) { const struct anv_common *common = ppMemBarriers[i]; switch (common->sType) { case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; + src_flags |= barrier->srcAccessMask; + dst_flags |= barrier->dstAccessMask; break; } case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; + src_flags |= barrier->srcAccessMask; + dst_flags |= barrier->dstAccessMask; break; } case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - out_flags |= barrier->outputMask; - in_flags |= barrier->inputMask; + src_flags |= barrier->srcAccessMask; + dst_flags |= barrier->dstAccessMask; break; } default: @@ -223,50 +223,57 @@ void genX(CmdPipelineBarrier)( } } - for_each_bit(b, out_flags) { - switch ((VkMemoryOutputFlags)(1 << b)) { - case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_HOST_WRITE_BIT: break; /* FIXME: Little-core systems */ - case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + case VK_ACCESS_SHADER_WRITE_BIT: cmd.DCFlushEnable = true; break; - case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: cmd.RenderTargetCacheFlushEnable = true; break; - case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: cmd.DepthCacheFlushEnable = true; break; - case VK_MEMORY_OUTPUT_TRANSFER_BIT: + case VK_ACCESS_TRANSFER_WRITE_BIT: cmd.RenderTargetCacheFlushEnable = true; cmd.DepthCacheFlushEnable = true; break; default: - unreachable("Invalid memory output flag"); + assert(!"Not a write bit"); } } - for_each_bit(b, out_flags) { - switch ((VkMemoryInputFlags)(1 << b)) { - case VK_MEMORY_INPUT_HOST_READ_BIT: + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_HOST_READ_BIT: break; /* FIXME: Little-core systems */ - case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: - case VK_MEMORY_INPUT_INDEX_FETCH_BIT: - case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: cmd.VFCacheInvalidationEnable = true; break; - case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + case VK_ACCESS_UNIFORM_READ_BIT: cmd.ConstantCacheInvalidationEnable = true; /* fallthrough */ - case VK_MEMORY_INPUT_SHADER_READ_BIT: + case VK_ACCESS_SHADER_READ_BIT: cmd.DCFlushEnable = true; cmd.TextureCacheInvalidationEnable = true; break; - case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: - case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: - break; /* XXX: Hunh? */ - case VK_MEMORY_INPUT_TRANSFER_BIT: + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: cmd.TextureCacheInvalidationEnable = true; break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: + cmd.DCFlushEnable = true; + break; + case VK_ACCESS_TRANSFER_READ_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_MEMORY_READ_BIT: + break; /* XXX: What is this? */ + default: + assert(!"Not a read bit"); } } -- cgit v1.2.3 From c30a0218202d65f4ceaa8d9c0d2afc8b2d0c0621 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 13:50:09 -0800 Subject: vk/0.210.0: More function argument renaming --- include/vulkan/vulkan.h | 29 +++++++++++++++-------------- src/vulkan/anv_dump.c | 2 +- src/vulkan/anv_pipeline.c | 1 + 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 5a13260a86e..08559da94c2 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2076,7 +2076,7 @@ typedef struct VkBufferMemoryBarrier { VkAccessFlags srcAccessMask; VkAccessFlags dstAccessMask; uint32_t srcQueueFamilyIndex; - uint32_t destQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; VkBuffer buffer; VkDeviceSize offset; VkDeviceSize size; @@ -2111,7 +2111,7 @@ typedef struct VkImageMemoryBarrier { VkImageLayout oldLayout; VkImageLayout newLayout; uint32_t srcQueueFamilyIndex; - uint32_t destQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; VkImage image; VkImageSubresourceRange subresourceRange; } VkImageMemoryBarrier; @@ -2192,8 +2192,8 @@ typedef void (VKAPI_PTR *PFN_vkDestroyShader)(VkDevice device, VkShader shader); typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); typedef size_t (VKAPI_PTR *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); -typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); -typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); typedef VkResult (VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); typedef void (VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); @@ -2254,8 +2254,8 @@ typedef void (VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffe typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry, VkQueryControlFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry); typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); @@ -2602,11 +2602,12 @@ VKAPI_ATTR size_t VKAPI_CALL vkGetPipelineCacheSize( VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( VkDevice device, VkPipelineCache pipelineCache, + size_t* pDataSize, void* pData); VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches( VkDevice device, - VkPipelineCache destCache, + VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); @@ -2959,17 +2960,17 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents( uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers); + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const void* const* ppMemoryBarriers); VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers); + VkPipelineStageFlags dstStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const void* const* ppMemoryBarriers); VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 9d323d754ff..34a338339de 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -139,7 +139,7 @@ anv_dump_image_to_ppm(struct anv_device *device, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = 0, - .destQueueFamilyIndex = 0, + .dstQueueFamilyIndex = 0, .image = copy_image, .subresourceRange = (VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 25188a86486..7ed933f4da5 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -210,6 +210,7 @@ size_t anv_GetPipelineCacheSize( VkResult anv_GetPipelineCacheData( VkDevice device, VkPipelineCache pipelineCache, + size_t* pDataSize, void* pData) { stub_return(VK_UNSUPPORTED); -- cgit v1.2.3 From 9349625d60ff8a2476e062ba4038cd81ee2394e2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 13:58:25 -0800 Subject: vk/0.210.0: Rework VkInstanceCreateInfo --- include/vulkan/vulkan.h | 16 ++++++++-------- src/vulkan/anv_device.c | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 08559da94c2..b10c751ad4a 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1078,11 +1078,11 @@ typedef void (VKAPI_PTR *PFN_vkFreeFunction)( typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); -typedef struct { +typedef struct VkApplicationInfo { VkStructureType sType; const void* pNext; - const char* pAppName; - uint32_t appVersion; + const char* pApplicationName; + uint32_t applicationVersion; const char* pEngineName; uint32_t engineVersion; uint32_t apiVersion; @@ -1098,12 +1098,12 @@ typedef struct VkInstanceCreateInfo { VkStructureType sType; const void* pNext; VkInstanceCreateFlags flags; - const VkApplicationInfo* pAppInfo; + const VkApplicationInfo* pApplicationInfo; const VkAllocCallbacks* pAllocCb; - uint32_t layerCount; - const char*const* ppEnabledLayerNames; - uint32_t extensionCount; - const char*const* ppEnabledExtensionNames; + uint32_t enabledLayerNameCount; + const char* const* ppEnabledLayerNames; + uint32_t enabledExtensionNameCount; + const char* const* ppEnabledExtensionNames; } VkInstanceCreateInfo; typedef struct VkPhysicalDeviceFeatures { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c3dcd9f56cc..852792db413 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -194,10 +194,10 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - if (pCreateInfo->pAppInfo->apiVersion != VK_MAKE_VERSION(0, 170, 2)) + if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 170, 2)) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); - for (uint32_t i = 0; i < pCreateInfo->extensionCount; i++) { + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionNameCount; i++) { bool found = false; for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], @@ -223,7 +223,7 @@ VkResult anv_CreateInstance( instance->pAllocUserData = alloc_callbacks->pUserData; instance->pfnAlloc = alloc_callbacks->pfnAlloc; instance->pfnFree = alloc_callbacks->pfnFree; - instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; + instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion; instance->physicalDeviceCount = -1; _mesa_locale_init(); -- cgit v1.2.3 From d3547e733443f406ff505e5f9f9790de0ae16907 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Dec 2015 14:09:17 -0800 Subject: vk/0.210.0: Use VkSampleCountFlagBits for sample counts --- include/vulkan/vulkan.h | 10 +++++----- src/vulkan/anv_image.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index b10c751ad4a..39b3c01fc11 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1510,7 +1510,7 @@ typedef struct VkImageCreateInfo { VkExtent3D extent; uint32_t mipLevels; uint32_t arrayLayers; - uint32_t samples; + VkSampleCountFlagBits samples; VkImageTiling tiling; VkImageUsageFlags usage; VkSharingMode sharingMode; @@ -1912,7 +1912,7 @@ typedef struct VkFramebufferCreateInfo { typedef struct VkAttachmentDescription { VkAttachmentDescriptionFlags flags; VkFormat format; - uint32_t samples; + VkSampleCountFlagBits samples; VkAttachmentLoadOp loadOp; VkAttachmentStoreOp storeOp; VkAttachmentLoadOp stencilLoadOp; @@ -2157,7 +2157,7 @@ typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage ima typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, uint32_t samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence); @@ -2433,10 +2433,10 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, - uint32_t samples, + VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, - uint32_t* pNumProperties, + uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparse( diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f52bf456d1e..db5207859a0 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -304,7 +304,7 @@ anv_image_create(VkDevice _device, anv_assert(pCreateInfo->mipLevels > 0); anv_assert(pCreateInfo->arrayLayers > 0); - anv_assert(pCreateInfo->samples == 1); + anv_assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT); anv_assert(pCreateInfo->extent.width > 0); anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth > 0); -- cgit v1.2.3 From fcfb404a5860c67129e3672768cc557c0e4a1f12 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 03:28:27 -0800 Subject: vk/0.210.0: Rework allocation to use the new pAllocator's --- include/vulkan/vulkan.h | 266 +++++++++++++++++++++++++--------------- include/vulkan/vulkan_intel.h | 5 +- src/vulkan/anv_batch_chain.c | 156 ++++++++++++----------- src/vulkan/anv_cmd_buffer.c | 47 +++---- src/vulkan/anv_descriptor_set.c | 28 +++-- src/vulkan/anv_device.c | 204 ++++++++++++++---------------- src/vulkan/anv_dump.c | 22 ++-- src/vulkan/anv_image.c | 42 +++---- src/vulkan/anv_intel.c | 18 +-- src/vulkan/anv_meta.c | 55 +++++---- src/vulkan/anv_meta_clear.c | 26 ++-- src/vulkan/anv_pass.c | 34 ++--- src/vulkan/anv_pipeline.c | 79 +++++++----- src/vulkan/anv_private.h | 94 +++++++++----- src/vulkan/anv_query.c | 12 +- src/vulkan/anv_wsi_wayland.c | 36 +++--- src/vulkan/anv_wsi_x11.c | 28 +++-- src/vulkan/gen7_pipeline.c | 10 +- src/vulkan/gen7_state.c | 5 +- src/vulkan/gen8_pipeline.c | 17 +-- src/vulkan/gen8_state.c | 5 +- 21 files changed, 669 insertions(+), 520 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 39b3c01fc11..cb4ed6f613f 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -133,7 +133,7 @@ typedef enum VkResult { typedef enum VkStructureType { VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 1, - VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO = 2, + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 2, VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 3, VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 4, VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 5, @@ -185,17 +185,25 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkStructureType; -typedef enum { - VK_SYSTEM_ALLOC_TYPE_API_OBJECT = 0, - VK_SYSTEM_ALLOC_TYPE_INTERNAL = 1, - VK_SYSTEM_ALLOC_TYPE_INTERNAL_TEMP = 2, - VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER = 3, - VK_SYSTEM_ALLOC_TYPE_DEBUG = 4, - VK_SYSTEM_ALLOC_TYPE_BEGIN_RANGE = VK_SYSTEM_ALLOC_TYPE_API_OBJECT, - VK_SYSTEM_ALLOC_TYPE_END_RANGE = VK_SYSTEM_ALLOC_TYPE_DEBUG, - VK_SYSTEM_ALLOC_TYPE_NUM = (VK_SYSTEM_ALLOC_TYPE_DEBUG - VK_SYSTEM_ALLOC_TYPE_API_OBJECT + 1), - VK_SYSTEM_ALLOC_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkSystemAllocType; +typedef enum VkSystemAllocationScope { + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, + VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, + VK_SYSTEM_ALLOCATION_SCOPE_BEGIN_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND, + VK_SYSTEM_ALLOCATION_SCOPE_END_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE, + VK_SYSTEM_ALLOCATION_SCOPE_RANGE_SIZE = (VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND + 1), + VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF +} VkSystemAllocationScope; + +typedef enum VkInternalAllocationType { + VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, + VK_INTERNAL_ALLOCATION_TYPE_BEGIN_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, + VK_INTERNAL_ALLOCATION_TYPE_END_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, + VK_INTERNAL_ALLOCATION_TYPE_RANGE_SIZE = (VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE + 1), + VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkInternalAllocationType; typedef enum VkFormat { VK_FORMAT_UNDEFINED = 0, @@ -1066,15 +1074,34 @@ typedef enum { } VkQueryControlFlagBits; typedef VkFlags VkQueryControlFlags; -typedef void* (VKAPI_PTR *PFN_vkAllocFunction)( - void* pUserData, - size_t size, - size_t alignment, - VkSystemAllocType allocType); +typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)( + void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); + +typedef void* (VKAPI_PTR *PFN_vkReallocationFunction)( + void* pUserData, + void* pOriginal, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); typedef void (VKAPI_PTR *PFN_vkFreeFunction)( - void* pUserData, - void* pMem); + void* pUserData, + void* pMemory); + +typedef void (VKAPI_PTR *PFN_vkInternalAllocationNotification)( + void* pUserData, + size_t size, + VkInternalAllocationType allocationType, + VkSystemAllocationScope allocationScope); + +typedef void (VKAPI_PTR *PFN_vkInternalFreeNotification)( + void* pUserData, + size_t size, + VkInternalAllocationType allocationType, + VkSystemAllocationScope allocationScope); typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); @@ -1088,24 +1115,26 @@ typedef struct VkApplicationInfo { uint32_t apiVersion; } VkApplicationInfo; -typedef struct { - void* pUserData; - PFN_vkAllocFunction pfnAlloc; - PFN_vkFreeFunction pfnFree; -} VkAllocCallbacks; - typedef struct VkInstanceCreateInfo { VkStructureType sType; const void* pNext; VkInstanceCreateFlags flags; const VkApplicationInfo* pApplicationInfo; - const VkAllocCallbacks* pAllocCb; uint32_t enabledLayerNameCount; const char* const* ppEnabledLayerNames; uint32_t enabledExtensionNameCount; const char* const* ppEnabledExtensionNames; } VkInstanceCreateInfo; +typedef struct VkAllocationCallbacks { + void* pUserData; + PFN_vkAllocationFunction pfnAllocation; + PFN_vkReallocationFunction pfnReallocation; + PFN_vkFreeFunction pfnFree; + PFN_vkInternalAllocationNotification pfnInternalAllocation; + PFN_vkInternalFreeNotification pfnInternalFree; +} VkAllocationCallbacks; + typedef struct VkPhysicalDeviceFeatures { VkBool32 robustBufferAccess; VkBool32 fullDrawIndexUint32; @@ -1356,12 +1385,12 @@ typedef struct VkLayerProperties { char description[VK_MAX_DESCRIPTION_SIZE]; } VkLayerProperties; -typedef struct { +typedef struct VkMemoryAllocateInfo { VkStructureType sType; const void* pNext; VkDeviceSize allocationSize; uint32_t memoryTypeIndex; -} VkMemoryAllocInfo; +} VkMemoryAllocateInfo; typedef struct VkMappedMemoryRange { VkStructureType sType; @@ -2124,8 +2153,8 @@ typedef struct VkMemoryBarrier { } VkMemoryBarrier; -typedef VkResult (VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); -typedef void (VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance); +typedef VkResult (VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance); +typedef void (VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); @@ -2135,8 +2164,8 @@ typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysica typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); -typedef VkResult (VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); -typedef void (VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice); +typedef void (VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pPropertyCount, VkLayerProperties* pProperties); @@ -2145,8 +2174,8 @@ typedef void (VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFa typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); -typedef VkResult (VKAPI_PTR *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); -typedef void (VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); +typedef VkResult (VKAPI_PTR *PFN_vkAllocateMemory)(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory); +typedef void (VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); typedef void (VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory memory); typedef VkResult (VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); @@ -2159,63 +2188,63 @@ typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkIm typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); -typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); +typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef VkResult (VKAPI_PTR *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); -typedef VkResult (VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); -typedef void (VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore); +typedef void (VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); -typedef VkResult (VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); -typedef void (VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent); +typedef void (VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); typedef VkResult (VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); -typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); +typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); +typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); -typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); -typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); -typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); -typedef void (VKAPI_PTR *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); -typedef VkResult (VKAPI_PTR *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -typedef void (VKAPI_PTR *PFN_vkDestroyImage)(VkDevice device, VkImage image); +typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBufferView* pView); +typedef void (VKAPI_PTR *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkImage* pImage); +typedef void (VKAPI_PTR *PFN_vkDestroyImage)(VkDevice device, VkImage image, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); -typedef VkResult (VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -typedef void (VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); -typedef VkResult (VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); -typedef void (VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); +typedef VkResult (VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkImageView* pView); +typedef void (VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule); +typedef void (VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); typedef void (VKAPI_PTR *PFN_vkDestroyShader)(VkDevice device, VkShader shader); -typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); -typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache, const VkAllocationCallbacks* pAllocator); typedef size_t (VKAPI_PTR *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t* pDataSize, void* pData); typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); -typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef VkResult (VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef void (VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); -typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); -typedef void (VKAPI_PTR *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); -typedef VkResult (VKAPI_PTR *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); -typedef void (VKAPI_PTR *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); -typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); -typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); -typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); +typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef VkResult (VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef void (VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSampler* pSampler); +typedef void (VKAPI_PTR *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorSetLayout* pSetLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool); +typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags); typedef VkResult (VKAPI_PTR *PFN_vkAllocateDescriptorSets)(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo, VkDescriptorSet* pDescriptorSets); typedef VkResult (VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets); typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies); -typedef VkResult (VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); -typedef void (VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); -typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); -typedef void (VKAPI_PTR *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); +typedef VkResult (VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void (VKAPI_PTR *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); -typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, VkCommandPool* pCommandPool); -typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool); +typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); +typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCommandBufferCreateInfo* pCreateInfo, VkCommandBuffer* pCommandBuffer); typedef void (VKAPI_PTR *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCommandBuffer commandBuffer); @@ -2270,10 +2299,12 @@ typedef void (VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkInstance* pInstance); VKAPI_ATTR void VKAPI_CALL vkDestroyInstance( - VkInstance instance); + VkInstance instance, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices( VkInstance instance, @@ -2322,10 +2353,12 @@ VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr( VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDevice* pDevice); VKAPI_ATTR void VKAPI_CALL vkDestroyDevice( - VkDevice device); + VkDevice device, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( const char* pLayerName, @@ -2365,14 +2398,16 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle( VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle( VkDevice device); -VKAPI_ATTR VkResult VKAPI_CALL vkAllocMemory( +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory( VkDevice device, - const VkMemoryAllocInfo* pAllocInfo, - VkDeviceMemory* pMem); + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMemory); VKAPI_ATTR void VKAPI_CALL vkFreeMemory( VkDevice device, - VkDeviceMemory mem); + VkDeviceMemory memory, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( VkDevice device, @@ -2448,11 +2483,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparse( VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence( VkDevice device, const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkFence* pFence); VKAPI_ATTR void VKAPI_CALL vkDestroyFence( VkDevice device, - VkFence fence); + VkFence fence, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkResetFences( VkDevice device, @@ -2473,11 +2510,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences( VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore( VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore); VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( VkDevice device, - VkSemaphore semaphore); + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkQueueSignalSemaphore( VkQueue queue, @@ -2490,11 +2529,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitSemaphore( VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent( VkDevice device, const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkEvent* pEvent); VKAPI_ATTR void VKAPI_CALL vkDestroyEvent( VkDevice device, - VkEvent event); + VkEvent event, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkGetEventStatus( VkDevice device, @@ -2511,11 +2552,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkResetEvent( VKAPI_ATTR VkResult VKAPI_CALL vkCreateQueryPool( VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); VKAPI_ATTR void VKAPI_CALL vkDestroyQueryPool( VkDevice device, - VkQueryPool queryPool); + VkQueryPool queryPool, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( VkDevice device, @@ -2529,29 +2572,35 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer( VkDevice device, const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer); VKAPI_ATTR void VKAPI_CALL vkDestroyBuffer( VkDevice device, - VkBuffer buffer); + VkBuffer buffer, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateBufferView( VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkBufferView* pView); VKAPI_ATTR void VKAPI_CALL vkDestroyBufferView( VkDevice device, - VkBufferView bufferView); + VkBufferView bufferView, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage( VkDevice device, const VkImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkImage* pImage); VKAPI_ATTR void VKAPI_CALL vkDestroyImage( VkDevice device, - VkImage image); + VkImage image, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout( VkDevice device, @@ -2562,20 +2611,24 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout( VKAPI_ATTR VkResult VKAPI_CALL vkCreateImageView( VkDevice device, const VkImageViewCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkImageView* pView); VKAPI_ATTR void VKAPI_CALL vkDestroyImageView( VkDevice device, - VkImageView imageView); + VkImageView imageView, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule( VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule); VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( VkDevice device, - VkShaderModule shaderModule); + VkShaderModule shaderModule, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateShader( VkDevice device, @@ -2589,11 +2642,13 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyShader( VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache( VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache); VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache( VkDevice device, - VkPipelineCache pipelineCache); + VkPipelineCache pipelineCache, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR size_t VKAPI_CALL vkGetPipelineCacheSize( VkDevice device, @@ -2614,56 +2669,67 @@ VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches( VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( VkDevice device, VkPipelineCache pipelineCache, - uint32_t count, + uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( VkDevice device, VkPipelineCache pipelineCache, - uint32_t count, + uint32_t createInfoCount, const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( VkDevice device, - VkPipeline pipeline); + VkPipeline pipeline, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineLayout( VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout); VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineLayout( VkDevice device, - VkPipelineLayout pipelineLayout); + VkPipelineLayout pipelineLayout, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateSampler( VkDevice device, const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkSampler* pSampler); VKAPI_ATTR void VKAPI_CALL vkDestroySampler( VkDevice device, - VkSampler sampler); + VkSampler sampler, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorSetLayout( VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDescriptorSetLayout* pSetLayout); VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorSetLayout( VkDevice device, - VkDescriptorSetLayout descriptorSetLayout); + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool( VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool); VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool( VkDevice device, - VkDescriptorPool descriptorPool); + VkDescriptorPool descriptorPool, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkResetDescriptorPool( VkDevice device, @@ -2691,20 +2757,24 @@ VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets( VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer( VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer); VKAPI_ATTR void VKAPI_CALL vkDestroyFramebuffer( VkDevice device, - VkFramebuffer framebuffer); + VkFramebuffer framebuffer, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass( VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); VKAPI_ATTR void VKAPI_CALL vkDestroyRenderPass( VkDevice device, - VkRenderPass renderPass); + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity( VkDevice device, @@ -2714,11 +2784,13 @@ VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity( VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool( VkDevice device, - VkCommandPool commandPool); + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( VkDevice device, diff --git a/include/vulkan/vulkan_intel.h b/include/vulkan/vulkan_intel.h index 2c963e7d51d..1f77128961c 100644 --- a/include/vulkan/vulkan_intel.h +++ b/include/vulkan/vulkan_intel.h @@ -42,13 +42,14 @@ typedef struct VkDmaBufImageCreateInfo_ uint32_t strideInBytes; } VkDmaBufImageCreateInfo; -typedef VkResult (VKAPI_PTR *PFN_vkCreateDmaBufImageINTEL)(VkDevice device, const VkDmaBufImageCreateInfo* pCreateInfo, VkDeviceMemory* pMem, VkImage* pImage); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDmaBufImageINTEL)(VkDevice device, const VkDmaBufImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem, VkImage* pImage); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateDmaBufImageINTEL( VkDevice _device, - const VkDmaBufImageCreateInfo* pCreateInfo, + const VkDmaBufImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem, VkImage* pImage); diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 34d7268631d..6a5faea5777 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -47,7 +47,7 @@ static VkResult anv_reloc_list_init_clone(struct anv_reloc_list *list, - struct anv_device *device, + const VkAllocationCallbacks *alloc, const struct anv_reloc_list *other_list) { if (other_list) { @@ -59,18 +59,18 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list, } list->relocs = - anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (list->relocs == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); list->reloc_bos = - anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (list->reloc_bos == NULL) { - anv_device_free(device, list->relocs); + anv_free(alloc, list->relocs); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } @@ -85,20 +85,23 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list, } VkResult -anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) { - return anv_reloc_list_init_clone(list, device, NULL); + return anv_reloc_list_init_clone(list, alloc, NULL); } void -anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) { - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); } static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, +anv_reloc_list_grow(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, size_t num_additional_relocs) { if (list->num_relocs + num_additional_relocs <= list->array_length) @@ -109,16 +112,16 @@ anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, new_length *= 2; struct drm_i915_gem_relocation_entry *new_relocs = - anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_relocs == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); struct anv_bo **new_reloc_bos = - anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_relocs == NULL) { - anv_device_free(device, new_relocs); + anv_free(alloc, new_relocs); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } @@ -126,8 +129,8 @@ anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, memcpy(new_reloc_bos, list->reloc_bos, list->num_relocs * sizeof(*list->reloc_bos)); - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); list->array_length = new_length; list->relocs = new_relocs; @@ -137,13 +140,14 @@ anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, } uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, +anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, uint32_t offset, struct anv_bo *target_bo, uint32_t delta) { struct drm_i915_gem_relocation_entry *entry; int index; - anv_reloc_list_grow(list, device, 1); + anv_reloc_list_grow(list, alloc, 1); /* TODO: Handle failure */ /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ @@ -161,10 +165,11 @@ anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, } static void -anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, +anv_reloc_list_append(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, struct anv_reloc_list *other, uint32_t offset) { - anv_reloc_list_grow(list, device, other->num_relocs); + anv_reloc_list_grow(list, alloc, other->num_relocs); /* TODO: Handle failure */ memcpy(&list->relocs[list->num_relocs], &other->relocs[0], @@ -200,7 +205,7 @@ uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t delta) { - return anv_reloc_list_add(batch->relocs, batch->device, + return anv_reloc_list_add(batch->relocs, batch->alloc, location - batch->start, bo, delta); } @@ -221,7 +226,7 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) memcpy(batch->next, other->start, size); offset = batch->next - batch->start; - anv_reloc_list_append(batch->relocs, batch->device, + anv_reloc_list_append(batch->relocs, batch->alloc, other->relocs, offset); batch->next += size; @@ -232,20 +237,21 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) *-----------------------------------------------------------------------*/ static VkResult -anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo **bbo_out) { VkResult result; - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); if (result != VK_SUCCESS) goto fail_alloc; - result = anv_reloc_list_init(&bbo->relocs, device); + result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); if (result != VK_SUCCESS) goto fail_bo_alloc; @@ -254,30 +260,31 @@ anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) return VK_SUCCESS; fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); fail_alloc: - anv_device_free(device, bbo); + anv_free(&cmd_buffer->pool->alloc, bbo); return result; } static VkResult -anv_batch_bo_clone(struct anv_device *device, +anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, const struct anv_batch_bo *other_bbo, struct anv_batch_bo **bbo_out) { VkResult result; - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); if (result != VK_SUCCESS) goto fail_alloc; - result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); + result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, + &other_bbo->relocs); if (result != VK_SUCCESS) goto fail_bo_alloc; @@ -291,9 +298,9 @@ anv_batch_bo_clone(struct anv_device *device, return VK_SUCCESS; fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); fail_alloc: - anv_device_free(device, bbo); + anv_free(&cmd_buffer->pool->alloc, bbo); return result; } @@ -328,15 +335,17 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) } static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +anv_batch_bo_destroy(struct anv_batch_bo *bbo, + struct anv_cmd_buffer *cmd_buffer) { - anv_reloc_list_finish(&bbo->relocs, device); - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - anv_device_free(device, bbo); + anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_free(&cmd_buffer->pool->alloc, bbo); } static VkResult -anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, +anv_batch_bo_list_clone(const struct list_head *list, + struct anv_cmd_buffer *cmd_buffer, struct list_head *new_list) { VkResult result = VK_SUCCESS; @@ -346,7 +355,7 @@ anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, struct anv_batch_bo *prev_bbo = NULL; list_for_each_entry(struct anv_batch_bo, bbo, list, link) { struct anv_batch_bo *new_bbo; - result = anv_batch_bo_clone(device, bbo, &new_bbo); + result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); if (result != VK_SUCCESS) break; list_addtail(&new_bbo->link, new_list); @@ -367,7 +376,7 @@ anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, if (result != VK_SUCCESS) { list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) - anv_batch_bo_destroy(bbo, device); + anv_batch_bo_destroy(bbo, cmd_buffer); } return result; @@ -393,7 +402,8 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) } static void -emit_batch_buffer_start(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) +emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) { /* In gen8+ the address field grew to two dwords to accomodate 48 bit * offsets. The high 16 bits are in the last dword, so we can use the gen8 @@ -408,8 +418,9 @@ emit_batch_buffer_start(struct anv_batch *batch, struct anv_bo *bo, uint32_t off const uint32_t gen8_length = GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - .DwordLength = batch->device->info.gen < 8 ? gen7_length : gen8_length, + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, + .DwordLength = cmd_buffer->device->info.gen < 8 ? + gen7_length : gen8_length, ._2ndLevelBatchBuffer = _1stlevelbatch, .AddressSpaceIndicator = ASI_PPGTT, .BatchBufferStartAddress = { bo, offset }); @@ -430,7 +441,7 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); - emit_batch_buffer_start(batch, &bbo->bo, 0); + emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); anv_batch_bo_finish(current_bbo, batch); } @@ -441,13 +452,13 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) struct anv_cmd_buffer *cmd_buffer = _data; struct anv_batch_bo *new_bbo; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); if (result != VK_SUCCESS) return result; struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + anv_batch_bo_destroy(new_bbo, cmd_buffer); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } *seen_bbo = new_bbo; @@ -520,18 +531,17 @@ VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch_bo *batch_bo; - struct anv_device *device = cmd_buffer->device; VkResult result; list_inithead(&cmd_buffer->batch_bos); - result = anv_batch_bo_create(device, &batch_bo); + result = anv_batch_bo_create(cmd_buffer, &batch_bo); if (result != VK_SUCCESS) return result; list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); - cmd_buffer->batch.device = device; + cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; cmd_buffer->batch.user_data = cmd_buffer; @@ -552,7 +562,7 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) goto fail_seen_bbos; result = anv_reloc_list_init(&cmd_buffer->surface_relocs, - cmd_buffer->device); + &cmd_buffer->pool->alloc); if (result != VK_SUCCESS) goto fail_bt_blocks; @@ -569,7 +579,7 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) fail_seen_bbos: anv_vector_finish(&cmd_buffer->seen_bbos); fail_batch_bo: - anv_batch_bo_destroy(batch_bo, device); + anv_batch_bo_destroy(batch_bo, cmd_buffer); return result; } @@ -577,8 +587,6 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { - struct anv_device *device = cmd_buffer->device; - int32_t *bt_block; anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, @@ -586,31 +594,29 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) } anv_vector_finish(&cmd_buffer->bt_blocks); - anv_reloc_list_finish(&cmd_buffer->surface_relocs, cmd_buffer->device); + anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); anv_vector_finish(&cmd_buffer->seen_bbos); /* Destroy all of the batch buffers */ list_for_each_entry_safe(struct anv_batch_bo, bbo, &cmd_buffer->batch_bos, link) { - anv_batch_bo_destroy(bbo, device); + anv_batch_bo_destroy(bbo, cmd_buffer); } - anv_device_free(device, cmd_buffer->execbuf2.objects); - anv_device_free(device, cmd_buffer->execbuf2.bos); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); } void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) { - struct anv_device *device = cmd_buffer->device; - /* Delete all but the first batch bo */ assert(!list_empty(&cmd_buffer->batch_bos)); while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); + anv_batch_bo_destroy(bbo, cmd_buffer); } assert(!list_empty(&cmd_buffer->batch_bos)); @@ -713,7 +719,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *last_bbo = list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - emit_batch_buffer_start(&primary->batch, &first_bbo->bo, 0); + emit_batch_buffer_start(primary, &first_bbo->bo, 0); struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); assert(primary->batch.start == this_bbo->bo.map); @@ -727,14 +733,14 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, */ last_bbo->relocs.num_relocs--; secondary->batch.next -= GEN8_MI_BATCH_BUFFER_START_length * 4; - emit_batch_buffer_start(&secondary->batch, &this_bbo->bo, offset); + emit_batch_buffer_start(secondary, &this_bbo->bo, offset); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); break; } case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { struct list_head copy_list; VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, - secondary->device, + secondary, ©_list); if (result != VK_SUCCESS) return; /* FIXME */ @@ -760,7 +766,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, assert(!"Invalid execution mode"); } - anv_reloc_list_append(&primary->surface_relocs, primary->device, + anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, &secondary->surface_relocs, 0); } @@ -784,16 +790,16 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->execbuf2.array_length * 2 : 64; struct drm_i915_gem_exec_object2 *new_objects = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_objects == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); struct anv_bo **new_bos = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_objects == NULL) { - anv_device_free(cmd_buffer->device, new_objects); + anv_free(&cmd_buffer->pool->alloc, new_objects); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index adba2072c51..48711b6032b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -134,23 +134,17 @@ anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; if (*ptr == NULL) { - *ptr = anv_device_alloc(cmd_buffer->device, size, 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (*ptr == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - (*ptr)->size = size; } else if ((*ptr)->size < size) { - void *new_data = anv_device_alloc(cmd_buffer->device, size, 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_data == NULL) + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - memcpy(new_data, *ptr, (*ptr)->size); - anv_device_free(cmd_buffer->device, *ptr); - - *ptr = new_data; - (*ptr)->size = size; } + (*ptr)->size = size; return VK_SUCCESS; } @@ -170,13 +164,14 @@ VkResult anv_CreateCommandBuffer( struct anv_cmd_buffer *cmd_buffer; VkResult result; - cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cmd_buffer->device = device; + cmd_buffer->pool = pool; result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) @@ -205,7 +200,8 @@ VkResult anv_CreateCommandBuffer( return VK_SUCCESS; - fail: anv_device_free(device, cmd_buffer); + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); return result; } @@ -214,7 +210,6 @@ void anv_DestroyCommandBuffer( VkDevice _device, VkCommandBuffer _cmd_buffer) { - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); list_del(&cmd_buffer->pool_link); @@ -223,7 +218,8 @@ void anv_DestroyCommandBuffer( anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - anv_device_free(device, cmd_buffer); + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); } VkResult anv_ResetCommandBuffer( @@ -549,7 +545,7 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; - anv_reloc_list_add(&cmd_buffer->surface_relocs, cmd_buffer->device, + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, state.offset + dword * 4, bo, offset); } @@ -898,16 +894,22 @@ void anv_CmdExecuteCommands( VkResult anv_CreateCommandPool( VkDevice _device, const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkCommandPool* pCmdPool) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_cmd_pool *pool; - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + list_inithead(&pool->cmd_buffers); *pCmdPool = anv_cmd_pool_to_handle(pool); @@ -917,14 +919,15 @@ VkResult anv_CreateCommandPool( void anv_DestroyCommandPool( VkDevice _device, - VkCommandPool commandPool) + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); anv_ResetCommandPool(_device, commandPool, 0); - anv_device_free(device, pool); + anv_free2(&device->alloc, pAllocator, pool); } VkResult anv_ResetCommandPool( diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 186e9f92a5a..081d1e85ae1 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -36,6 +36,7 @@ VkResult anv_CreateDescriptorSetLayout( VkDevice _device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDescriptorSetLayout* pSetLayout) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -56,8 +57,8 @@ VkResult anv_CreateDescriptorSetLayout( (max_binding + 1) * sizeof(set_layout->binding[0]) + immutable_sampler_count * sizeof(struct anv_sampler *); - set_layout = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!set_layout) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -154,12 +155,13 @@ VkResult anv_CreateDescriptorSetLayout( void anv_DestroyDescriptorSetLayout( VkDevice _device, - VkDescriptorSetLayout _set_layout) + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); - anv_device_free(device, set_layout); + anv_free2(&device->alloc, pAllocator, set_layout); } /* @@ -170,6 +172,7 @@ void anv_DestroyDescriptorSetLayout( VkResult anv_CreatePipelineLayout( VkDevice _device, const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -219,7 +222,8 @@ VkResult anv_CreatePipelineLayout( size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); - layout = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (layout == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -273,12 +277,13 @@ VkResult anv_CreatePipelineLayout( void anv_DestroyPipelineLayout( VkDevice _device, - VkPipelineLayout _pipelineLayout) + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); - anv_device_free(device, pipeline_layout); + anv_free2(&device->alloc, pAllocator, pipeline_layout); } /* @@ -288,6 +293,7 @@ void anv_DestroyPipelineLayout( VkResult anv_CreateDescriptorPool( VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool) { anv_finishme("VkDescriptorPool is a stub"); @@ -297,7 +303,8 @@ VkResult anv_CreateDescriptorPool( void anv_DestroyDescriptorPool( VkDevice _device, - VkDescriptorPool _pool) + VkDescriptorPool _pool, + const VkAllocationCallbacks* pAllocator) { anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); } @@ -319,7 +326,8 @@ anv_descriptor_set_create(struct anv_device *device, struct anv_descriptor_set *set; size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); - set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!set) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -349,7 +357,7 @@ void anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_set *set) { - anv_device_free(device, set); + anv_free(&device->alloc /* XXX: Use the pool */, set); } VkResult anv_AllocateDescriptorSets( diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 852792db413..3bbcdf04e35 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -148,28 +148,6 @@ anv_physical_device_finish(struct anv_physical_device *device) ralloc_free(device->compiler); } -static void *default_alloc( - void* pUserData, - size_t size, - size_t alignment, - VkSystemAllocType allocType) -{ - return malloc(size); -} - -static void default_free( - void* pUserData, - void* pMem) -{ - free(pMem); -} - -static const VkAllocCallbacks default_alloc_callbacks = { - .pUserData = NULL, - .pfnAlloc = default_alloc, - .pfnFree = default_free -}; - static const VkExtensionProperties global_extensions[] = { { .extensionName = VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME, @@ -184,13 +162,39 @@ static const VkExtensionProperties device_extensions[] = { }, }; +static void * +default_alloc_func(void *pUserData, size_t size, size_t align, + VkSystemAllocationScope allocationScope) +{ + return malloc(size); +} + +static void * +default_realloc_func(void *pUserData, void *pOriginal, size_t size, + size_t align, VkSystemAllocationScope allocationScope) +{ + return realloc(pOriginal, size); +} + +static void +default_free_func(void *pUserData, void *pMemory) +{ + free(pMemory); +} + +static const VkAllocationCallbacks default_alloc = { + .pUserData = NULL, + .pfnAllocation = default_alloc_func, + .pfnReallocation = default_realloc_func, + .pfnFree = default_free_func, +}; + VkResult anv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) { struct anv_instance *instance; - const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; - void *user_data = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); @@ -210,19 +214,18 @@ VkResult anv_CreateInstance( return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } - if (pCreateInfo->pAllocCb) { - alloc_callbacks = pCreateInfo->pAllocCb; - user_data = pCreateInfo->pAllocCb->pUserData; - } - instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - instance->pAllocUserData = alloc_callbacks->pUserData; - instance->pfnAlloc = alloc_callbacks->pfnAlloc; - instance->pfnFree = alloc_callbacks->pfnFree; + + if (pAllocator) + instance->alloc = *pAllocator; + else + instance->alloc = default_alloc; + instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion; instance->physicalDeviceCount = -1; @@ -238,7 +241,8 @@ VkResult anv_CreateInstance( } void anv_DestroyInstance( - VkInstance _instance) + VkInstance _instance, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_instance, instance, _instance); @@ -254,31 +258,7 @@ void anv_DestroyInstance( _mesa_locale_fini(); - instance->pfnFree(instance->pAllocUserData, instance); -} - -void * -anv_instance_alloc(struct anv_instance *instance, size_t size, - size_t alignment, VkSystemAllocType allocType) -{ - void *mem = instance->pfnAlloc(instance->pAllocUserData, - size, alignment, allocType); - if (mem) { - VG(VALGRIND_MEMPOOL_ALLOC(instance, mem, size)); - VG(VALGRIND_MAKE_MEM_UNDEFINED(mem, size)); - } - return mem; -} - -void -anv_instance_free(struct anv_instance *instance, void *mem) -{ - if (mem == NULL) - return; - - VG(VALGRIND_MEMPOOL_FREE(instance, mem)); - - instance->pfnFree(instance->pAllocUserData, mem); + anv_free(&instance->alloc, instance); } VkResult anv_EnumeratePhysicalDevices( @@ -611,10 +591,10 @@ anv_device_init_border_colors(struct anv_device *device) VkResult anv_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - struct anv_instance *instance = physical_device->instance; struct anv_device *device; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); @@ -634,14 +614,20 @@ VkResult anv_CreateDevice( anv_set_dispatch_devinfo(physical_device->info); - device = anv_instance_alloc(instance, sizeof(*device), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + device = anv_alloc2(&physical_device->instance->alloc, pAllocator, + sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = physical_device->instance; + if (pAllocator) + device->alloc = *pAllocator; + else + device->alloc = physical_device->instance->alloc; + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); if (device->fd == -1) @@ -686,13 +672,14 @@ VkResult anv_CreateDevice( fail_fd: close(device->fd); fail_device: - anv_device_free(device, device); + anv_free(&device->alloc, device); return vk_error(VK_ERROR_INITIALIZATION_FAILED); } void anv_DestroyDevice( - VkDevice _device) + VkDevice _device, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -720,7 +707,7 @@ void anv_DestroyDevice( close(device->fd); - anv_instance_free(device->instance, device); + anv_free(&device->alloc, device); } VkResult anv_EnumerateInstanceExtensionProperties( @@ -915,22 +902,6 @@ VkResult anv_DeviceWaitIdle( return result; } -void * -anv_device_alloc(struct anv_device * device, - size_t size, - size_t alignment, - VkSystemAllocType allocType) -{ - return anv_instance_alloc(device->instance, size, alignment, allocType); -} - -void -anv_device_free(struct anv_device * device, - void * mem) -{ - anv_instance_free(device->instance, mem); -} - VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) { @@ -946,28 +917,29 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) return VK_SUCCESS; } -VkResult anv_AllocMemory( +VkResult anv_AllocateMemory( VkDevice _device, - const VkMemoryAllocInfo* pAllocInfo, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_device_memory *mem; VkResult result; - assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); /* We support exactly one memory heap. */ - assert(pAllocInfo->memoryTypeIndex == 0); + assert(pAllocateInfo->memoryTypeIndex == 0); /* FINISHME: Fail if allocation request exceeds heap size. */ - mem = anv_device_alloc(device, sizeof(*mem), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize); + result = anv_bo_init_new(&mem->bo, device, pAllocateInfo->allocationSize); if (result != VK_SUCCESS) goto fail; @@ -976,14 +948,15 @@ VkResult anv_AllocMemory( return VK_SUCCESS; fail: - anv_device_free(device, mem); + anv_free2(&device->alloc, pAllocator, mem); return result; } void anv_FreeMemory( VkDevice _device, - VkDeviceMemory _mem) + VkDeviceMemory _mem, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_device_memory, mem, _mem); @@ -994,7 +967,7 @@ void anv_FreeMemory( if (mem->bo.gem_handle != 0) anv_gem_close(device, mem->bo.gem_handle); - anv_device_free(device, mem); + anv_free2(&device->alloc, pAllocator, mem); } VkResult anv_MapMemory( @@ -1152,6 +1125,7 @@ VkResult anv_QueueBindSparse( VkResult anv_CreateFence( VkDevice _device, const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkFence* pFence) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -1163,8 +1137,8 @@ VkResult anv_CreateFence( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - fence = anv_device_alloc(device, sizeof(*fence), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (fence == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1207,21 +1181,22 @@ VkResult anv_CreateFence( return VK_SUCCESS; fail: - anv_device_free(device, fence); + anv_free2(&device->alloc, pAllocator, fence); return result; } void anv_DestroyFence( VkDevice _device, - VkFence _fence) + VkFence _fence, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); anv_gem_munmap(fence->bo.map, fence->bo.size); anv_gem_close(device, fence->bo.gem_handle); - anv_device_free(device, fence); + anv_free2(&device->alloc, pAllocator, fence); } VkResult anv_ResetFences( @@ -1301,6 +1276,7 @@ VkResult anv_WaitForFences( VkResult anv_CreateSemaphore( VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore) { *pSemaphore = (VkSemaphore)1; @@ -1309,7 +1285,8 @@ VkResult anv_CreateSemaphore( void anv_DestroySemaphore( VkDevice device, - VkSemaphore semaphore) + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator) { stub(); } @@ -1333,6 +1310,7 @@ VkResult anv_QueueWaitSemaphore( VkResult anv_CreateEvent( VkDevice device, const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkEvent* pEvent) { stub_return(VK_UNSUPPORTED); @@ -1340,7 +1318,8 @@ VkResult anv_CreateEvent( void anv_DestroyEvent( VkDevice device, - VkEvent event) + VkEvent event, + const VkAllocationCallbacks* pAllocator) { stub(); } @@ -1371,6 +1350,7 @@ VkResult anv_ResetEvent( VkResult anv_CreateBuffer( VkDevice _device, const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -1378,8 +1358,8 @@ VkResult anv_CreateBuffer( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); - buffer = anv_device_alloc(device, sizeof(*buffer), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1394,12 +1374,13 @@ VkResult anv_CreateBuffer( void anv_DestroyBuffer( VkDevice _device, - VkBuffer _buffer) + VkBuffer _buffer, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - anv_device_free(device, buffer); + anv_free2(&device->alloc, pAllocator, buffer); } void @@ -1428,6 +1409,7 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, VkResult anv_CreateBufferView( VkDevice _device, const VkBufferViewCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkBufferView* pView) { stub_return(VK_UNSUPPORTED); @@ -1435,24 +1417,27 @@ VkResult anv_CreateBufferView( void anv_DestroyBufferView( VkDevice _device, - VkBufferView _bview) + VkBufferView _bview, + const VkAllocationCallbacks* pAllocator) { stub(); } void anv_DestroySampler( VkDevice _device, - VkSampler _sampler) + VkSampler _sampler, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); - anv_device_free(device, sampler); + anv_free2(&device->alloc, pAllocator, sampler); } VkResult anv_CreateFramebuffer( VkDevice _device, const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -1462,8 +1447,8 @@ VkResult anv_CreateFramebuffer( size_t size = sizeof(*framebuffer) + sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; - framebuffer = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (framebuffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1484,12 +1469,13 @@ VkResult anv_CreateFramebuffer( void anv_DestroyFramebuffer( VkDevice _device, - VkFramebuffer _fb) + VkFramebuffer _fb, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); - anv_device_free(device, fb); + anv_free2(&device->alloc, pAllocator, fb); } void vkCmdDbgMarkerBegin( diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 34a338339de..62ed4fb4861 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -54,19 +54,19 @@ anv_dump_image_to_ppm(struct anv_device *device, .tiling = VK_IMAGE_TILING_LINEAR, .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, .flags = 0, - }, ©_image); + }, NULL, ©_image); assert(result == VK_SUCCESS); VkMemoryRequirements reqs; anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); VkDeviceMemory memory; - result = anv_AllocMemory(vk_device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = reqs.size, .memoryTypeIndex = 0, - }, &memory); + }, NULL, &memory); assert(result == VK_SUCCESS); result = anv_BindImageMemory(vk_device, copy_image, memory, 0); @@ -78,7 +78,7 @@ anv_dump_image_to_ppm(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = 0, .flags = 0, - }, &commandPool); + }, NULL, &commandPool); assert(result == VK_SUCCESS); VkCommandBuffer cmd; @@ -158,7 +158,7 @@ anv_dump_image_to_ppm(struct anv_device *device, &(VkFenceCreateInfo) { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = 0, - }, &fence); + }, NULL, &fence); assert(result == VK_SUCCESS); result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), @@ -168,8 +168,8 @@ anv_dump_image_to_ppm(struct anv_device *device, result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); assert(result == VK_SUCCESS); - anv_DestroyFence(vk_device, fence); - anv_DestroyCommandPool(vk_device, commandPool); + anv_DestroyFence(vk_device, fence, NULL); + anv_DestroyCommandPool(vk_device, commandPool, NULL); uint8_t *map; result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); @@ -204,6 +204,6 @@ anv_dump_image_to_ppm(struct anv_device *device, fclose(file); anv_UnmapMemory(vk_device, memory); - anv_DestroyImage(vk_device, copy_image); - anv_FreeMemory(vk_device, memory); + anv_DestroyImage(vk_device, copy_image, NULL); + anv_FreeMemory(vk_device, memory, NULL); } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index db5207859a0..c831d756407 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -292,6 +292,7 @@ anv_image_get_full_usage(const VkImageCreateInfo *info) VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *create_info, + const VkAllocationCallbacks* alloc, VkImage *pImage) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -321,8 +322,8 @@ anv_image_create(VkDevice _device, assert(extent->height <= limits->height); assert(extent->depth <= limits->depth); - image = anv_device_alloc(device, sizeof(*image), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!image) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -374,7 +375,7 @@ anv_image_create(VkDevice _device, fail: if (image) - anv_device_free(device, image); + anv_free2(&device->alloc, alloc, image); return r; } @@ -382,21 +383,24 @@ fail: VkResult anv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImage *pImage) { return anv_image_create(device, &(struct anv_image_create_info) { .vk_info = pCreateInfo, }, + pAllocator, pImage); } void -anv_DestroyImage(VkDevice _device, VkImage _image) +anv_DestroyImage(VkDevice _device, VkImage _image, + const VkAllocationCallbacks *pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); - anv_device_free(device, anv_image_from_handle(_image)); + anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); } static void @@ -453,6 +457,7 @@ void anv_GetImageSubresourceLayout( VkResult anv_validate_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImageView *pView) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); @@ -531,7 +536,7 @@ anv_validate_CreateImageView(VkDevice _device, assert(!"bad VkImageSubresourceRange::aspectFlags"); } - return anv_CreateImageView(_device, pCreateInfo, pView); + return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); } void @@ -584,13 +589,14 @@ anv_image_view_init(struct anv_image_view *iview, VkResult anv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImageView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_image_view *view; - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -601,10 +607,13 @@ anv_CreateImageView(VkDevice _device, return VK_SUCCESS; } -static void -anv_image_view_destroy(struct anv_device *device, - struct anv_image_view *iview) +void +anv_DestroyImageView(VkDevice _device, VkImageView _iview, + const VkAllocationCallbacks *pAllocator) { + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + if (iview->image->needs_color_rt_surface_state) { anv_state_pool_free(&device->surface_state_pool, iview->color_rt_surface_state); @@ -615,16 +624,7 @@ anv_image_view_destroy(struct anv_device *device, iview->nonrt_surface_state); } - anv_device_free(device, iview); -} - -void -anv_DestroyImageView(VkDevice _device, VkImageView _iview) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_image_view, iview, _iview); - - anv_image_view_destroy(device, iview); + anv_free2(&device->alloc, pAllocator, iview); } struct anv_surface * diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c index 5507bf2d5cb..95665b5aa41 100644 --- a/src/vulkan/anv_intel.c +++ b/src/vulkan/anv_intel.c @@ -32,6 +32,7 @@ VkResult anv_CreateDmaBufImageINTEL( VkDevice _device, const VkDmaBufImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem, VkImage* pImage) { @@ -43,8 +44,8 @@ VkResult anv_CreateDmaBufImageINTEL( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); - mem = anv_device_alloc(device, sizeof(*mem), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -59,13 +60,6 @@ VkResult anv_CreateDmaBufImageINTEL( mem->bo.offset = 0; mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; - image = anv_device_alloc(device, sizeof(*image), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (image == NULL) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_mem; - } - anv_image_create(_device, &(struct anv_image_create_info) { .force_tiling = true, @@ -85,7 +79,7 @@ VkResult anv_CreateDmaBufImageINTEL( .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }}, - &image_h); + pAllocator, &image_h); image = anv_image_from_handle(image_h); image->bo = &mem->bo; @@ -100,10 +94,8 @@ VkResult anv_CreateDmaBufImageINTEL( return VK_SUCCESS; - fail_mem: - anv_gem_close(device, mem->bo.gem_handle); fail: - anv_device_free(device, mem); + anv_free2(&device->alloc, pAllocator, mem); return result; } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index de5a037b5a8..a5fb782eab8 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -217,7 +217,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, }, .dependencyCount = 0, - }, &device->meta_state.blit.render_pass); + }, NULL, &device->meta_state.blit.render_pass); /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need @@ -315,7 +315,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) } }; anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, - &device->meta_state.blit.ds_layout); + NULL, &device->meta_state.blit.ds_layout); anv_CreatePipelineLayout(anv_device_to_handle(device), &(VkPipelineLayoutCreateInfo) { @@ -323,7 +323,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .setLayoutCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout, }, - &device->meta_state.blit.pipeline_layout); + NULL, &device->meta_state.blit.pipeline_layout); VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { @@ -411,12 +411,12 @@ anv_device_init_meta_blit_state(struct anv_device *device) pipeline_shader_stages[1].shader = fs_2d; anv_graphics_pipeline_create(anv_device_to_handle(device), &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.blit.pipeline_2d_src); + NULL, &device->meta_state.blit.pipeline_2d_src); pipeline_shader_stages[1].shader = fs_3d; anv_graphics_pipeline_create(anv_device_to_handle(device), &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.blit.pipeline_3d_src); + NULL, &device->meta_state.blit.pipeline_3d_src); anv_DestroyShader(anv_device_to_handle(device), vs); anv_DestroyShader(anv_device_to_handle(device), fs_2d); @@ -527,7 +527,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = blit_filter, .minFilter = blit_filter, - }, &sampler); + }, &cmd_buffer->pool->alloc, &sampler); VkDescriptorSet set; anv_AllocateDescriptorSets(anv_device_to_handle(device), @@ -568,7 +568,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .width = dest_iview->extent.width, .height = dest_iview->extent.height, .layers = 1 - }, &fb); + }, &cmd_buffer->pool->alloc, &fb); ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { @@ -628,8 +628,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, * descriptor sets, etc. has been used. We are free to delete it. */ anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); - anv_DestroySampler(anv_device_to_handle(device), sampler); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); } static void @@ -683,11 +685,13 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, VkImage src_image; image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - anv_CreateImage(vk_device, &image_info, &src_image); + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); VkImage dest_image; image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, &dest_image); + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); /* We could use a vk call to bind memory, but that would require * creating a dummy memory object etc. so there's really no point. @@ -742,8 +746,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, (VkExtent3D) { width, height, 1 }, VK_FILTER_NEAREST); - anv_DestroyImage(vk_device, src_image); - anv_DestroyImage(vk_device, dest_image); + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); } void anv_CmdCopyBuffer( @@ -1013,6 +1017,7 @@ static struct anv_image * make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, VkImageUsageFlags usage, VkImageType image_type, + const VkAllocationCallbacks *alloc, const VkBufferImageCopy *copy) { ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); @@ -1037,7 +1042,7 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, .tiling = VK_IMAGE_TILING_LINEAR, .usage = usage, .flags = 0, - }, &vk_image); + }, alloc, &vk_image); assert(result == VK_SUCCESS); ANV_FROM_HANDLE(anv_image, image, vk_image); @@ -1079,7 +1084,8 @@ void anv_CmdCopyBufferToImage( struct anv_image *src_image = make_image_for_buffer(vk_device, srcBuffer, proxy_format, VK_IMAGE_USAGE_SAMPLED_BIT, - dest_image->type, &pRegions[r]); + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); const uint32_t dest_base_array_slice = meta_blit_get_dest_view_base_array_slice(dest_image, @@ -1159,7 +1165,8 @@ void anv_CmdCopyBufferToImage( src_image->extent.height * 4; } - anv_DestroyImage(vk_device, anv_image_to_handle(src_image)); + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1209,7 +1216,8 @@ void anv_CmdCopyImageToBuffer( struct anv_image *dest_image = make_image_for_buffer(vk_device, destBuffer, dest_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - src_image->type, &pRegions[r]); + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); unsigned num_slices; if (src_image->type == VK_IMAGE_TYPE_3D) { @@ -1262,7 +1270,8 @@ void anv_CmdCopyImageToBuffer( dest_image->extent.height * 4; } - anv_DestroyImage(vk_device, anv_image_to_handle(dest_image)); + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); } meta_finish_blit(cmd_buffer, &saved_state); @@ -1314,13 +1323,13 @@ anv_device_finish_meta(struct anv_device *device) /* Blit */ anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass); + device->meta_state.blit.render_pass, NULL); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src); + device->meta_state.blit.pipeline_2d_src, NULL); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src); + device->meta_state.blit.pipeline_3d_src, NULL); anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout); + device->meta_state.blit.pipeline_layout, NULL); anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout); + device->meta_state.blit.ds_layout, NULL); } diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 76ca1b6df99..d448b661dcf 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -118,7 +118,8 @@ create_pipeline(struct anv_device *device, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, - const VkPipelineColorBlendStateCreateInfo *cb_state) + const VkPipelineColorBlendStateCreateInfo *cb_state, + const VkAllocationCallbacks *alloc) { VkDevice device_h = anv_device_to_handle(device); @@ -223,6 +224,7 @@ create_pipeline(struct anv_device *device, .disable_vs = true, .use_rectlist = true }, + alloc, &pipeline_h); ANV_CALL(DestroyShader)(device_h, vs_h); @@ -302,7 +304,7 @@ init_color_pipeline(struct anv_device *device) device->meta_state.clear.color_pipeline = create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state); + &cb_state, NULL); } static void @@ -475,7 +477,7 @@ create_depthstencil_pipeline(struct anv_device *device, }; return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state); + &cb_state, NULL); } static void @@ -601,13 +603,17 @@ anv_device_finish_meta_clear_state(struct anv_device *device) VkDevice device_h = anv_device_to_handle(device); ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.color_pipeline)); + anv_pipeline_to_handle(device->meta_state.clear.color_pipeline), + NULL); ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.depth_only_pipeline)); + anv_pipeline_to_handle(device->meta_state.clear.depth_only_pipeline), + NULL); ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.stencil_only_pipeline)); + anv_pipeline_to_handle(device->meta_state.clear.stencil_only_pipeline), + NULL); ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.depthstencil_pipeline)); + anv_pipeline_to_handle(device->meta_state.clear.depthstencil_pipeline), + NULL); } void @@ -720,7 +726,7 @@ void anv_CmdClearColorImage( .width = iview.extent.width, .height = iview.extent.height, .layers = 1 - }, &fb); + }, &cmd_buffer->pool->alloc, &fb); VkRenderPass pass; anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), @@ -755,7 +761,7 @@ void anv_CmdClearColorImage( }, }, .dependencyCount = 0, - }, &pass); + }, &cmd_buffer->pool->alloc, &pass); ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { @@ -776,6 +782,8 @@ void anv_CmdClearColorImage( }, VK_SUBPASS_CONTENTS_INLINE); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* XXX: We're leaking the render pass and framebuffer */ } } } diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index 4990b6a6c40..6742274c72a 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -26,6 +26,7 @@ VkResult anv_CreateRenderPass( VkDevice _device, const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -40,8 +41,8 @@ VkResult anv_CreateRenderPass( attachments_offset = size; size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); - pass = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pass = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pass == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -73,9 +74,9 @@ VkResult anv_CreateRenderPass( if (desc->inputAttachmentCount > 0) { subpass->input_attachments = - anv_device_alloc(device, - desc->inputAttachmentCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + anv_alloc2(&device->alloc, pAllocator, + desc->inputAttachmentCount * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { subpass->input_attachments[j] @@ -85,9 +86,9 @@ VkResult anv_CreateRenderPass( if (desc->colorAttachmentCount > 0) { subpass->color_attachments = - anv_device_alloc(device, - desc->colorAttachmentCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + anv_alloc2(&device->alloc, pAllocator, + desc->colorAttachmentCount * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->color_attachments[j] @@ -97,9 +98,9 @@ VkResult anv_CreateRenderPass( if (desc->pResolveAttachments) { subpass->resolve_attachments = - anv_device_alloc(device, - desc->colorAttachmentCount * sizeof(uint32_t), - 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + anv_alloc2(&device->alloc, pAllocator, + desc->colorAttachmentCount * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->resolve_attachments[j] @@ -122,7 +123,8 @@ VkResult anv_CreateRenderPass( void anv_DestroyRenderPass( VkDevice _device, - VkRenderPass _pass) + VkRenderPass _pass, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); @@ -133,12 +135,12 @@ void anv_DestroyRenderPass( */ struct anv_subpass *subpass = &pass->subpasses[i]; - anv_device_free(device, subpass->input_attachments); - anv_device_free(device, subpass->color_attachments); - anv_device_free(device, subpass->resolve_attachments); + anv_free2(&device->alloc, pAllocator, subpass->input_attachments); + anv_free2(&device->alloc, pAllocator, subpass->color_attachments); + anv_free2(&device->alloc, pAllocator, subpass->resolve_attachments); } - anv_device_free(device, pass); + anv_free2(&device->alloc, pAllocator, pass); } void anv_GetRenderAreaGranularity( diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 7ed933f4da5..2d695ee39ab 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -40,6 +40,7 @@ VkResult anv_CreateShaderModule( VkDevice _device, const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -48,8 +49,9 @@ VkResult anv_CreateShaderModule( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); assert(pCreateInfo->flags == 0); - module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (module == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -64,12 +66,13 @@ VkResult anv_CreateShaderModule( void anv_DestroyShaderModule( VkDevice _device, - VkShaderModule _module) + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_shader_module, module, _module); - anv_device_free(device, module); + anv_free2(&device->alloc, pAllocator, module); } VkResult anv_CreateShader( @@ -87,8 +90,8 @@ VkResult anv_CreateShader( const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; size_t name_len = strlen(name); - shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + shader = anv_alloc(&device->alloc, sizeof(*shader) + name_len + 1, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (shader == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -107,7 +110,7 @@ void anv_DestroyShader( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_shader, shader, _shader); - anv_device_free(device, shader); + anv_free(&device->alloc, shader); } #define SPIR_V_MAGIC_NUMBER 0x07230203 @@ -187,6 +190,7 @@ anv_shader_compile_to_nir(struct anv_device *device, VkResult anv_CreatePipelineCache( VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache) { *pPipelineCache = (VkPipelineCache)1; @@ -196,7 +200,8 @@ VkResult anv_CreatePipelineCache( void anv_DestroyPipelineCache( VkDevice _device, - VkPipelineCache _cache) + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) { } @@ -227,16 +232,18 @@ VkResult anv_MergePipelineCaches( void anv_DestroyPipeline( VkDevice _device, - VkPipeline _pipeline) + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); anv_state_stream_finish(&pipeline->program_stream); if (pipeline->blend_state.map) anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); - anv_device_free(pipeline->device, pipeline); + anv_free2(&device->alloc, pAllocator, pipeline); } static const uint32_t vk_to_gen_primitive_type[] = { @@ -366,10 +373,9 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ prog_data->param = (const gl_constant_value **) - anv_device_alloc(pipeline->device, - prog_data->nr_params * sizeof(gl_constant_value *), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + malloc(prog_data->nr_params * sizeof(gl_constant_value *)); /* We now set the param values to be offsets into a * anv_push_constant_data structure. Since the compiler doesn't @@ -961,22 +967,23 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) VkResult anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra) + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) { - VkResult result; - anv_validate { anv_pipeline_validate_create_info(pCreateInfo); } + if (alloc == NULL) + alloc = &device->alloc; + pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - result = anv_reloc_list_init(&pipeline->batch_relocs, device); - if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); - return result; - } + anv_reloc_list_init(&pipeline->batch_relocs, alloc); + /* TODO: Handle allocation fail */ + + pipeline->batch.alloc = alloc; pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; @@ -1074,6 +1081,7 @@ anv_graphics_pipeline_create( VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -1081,13 +1089,13 @@ anv_graphics_pipeline_create( switch (device->info.gen) { case 7: if (device->info.is_haswell) - return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); else - return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); case 8: - return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); case 9: - return gen9_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + return gen9_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); default: unreachable("unsupported gen\n"); } @@ -1098,6 +1106,7 @@ VkResult anv_CreateGraphicsPipelines( VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines) { VkResult result = VK_SUCCESS; @@ -1105,10 +1114,10 @@ VkResult anv_CreateGraphicsPipelines( unsigned i = 0; for (; i < count; i++) { result = anv_graphics_pipeline_create(_device, &pCreateInfos[i], - NULL, &pPipelines[i]); + NULL, pAllocator, &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j]); + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); } return result; @@ -1121,6 +1130,7 @@ VkResult anv_CreateGraphicsPipelines( static VkResult anv_compute_pipeline_create( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -1128,13 +1138,13 @@ static VkResult anv_compute_pipeline_create( switch (device->info.gen) { case 7: if (device->info.is_haswell) - return gen75_compute_pipeline_create(_device, pCreateInfo, pPipeline); + return gen75_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); else - return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); + return gen7_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); case 8: - return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); + return gen8_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); case 9: - return gen9_compute_pipeline_create(_device, pCreateInfo, pPipeline); + return gen9_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); default: unreachable("unsupported gen\n"); } @@ -1145,6 +1155,7 @@ VkResult anv_CreateComputePipelines( VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines) { VkResult result = VK_SUCCESS; @@ -1152,10 +1163,10 @@ VkResult anv_CreateComputePipelines( unsigned i = 0; for (; i < count; i++) { result = anv_compute_pipeline_create(_device, &pCreateInfos[i], - &pPipelines[i]); + pAllocator, &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j]); + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); } return result; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6130cb97b75..964e6d8cf57 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -431,6 +431,50 @@ extern struct anv_dispatch_table dtable; dtable.func; \ }) +static inline void * +anv_alloc(const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnAllocation(alloc->pUserData, size, align, scope); +} + +static inline void * +anv_realloc(const VkAllocationCallbacks *alloc, + void *ptr, size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); +} + +static inline void +anv_free(const VkAllocationCallbacks *alloc, void *data) +{ + alloc->pfnFree(alloc->pUserData, data); +} + +static inline void * +anv_alloc2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + if (alloc) + return anv_alloc(alloc, size, align, scope); + else + return anv_alloc(parent_alloc, size, align, scope); +} + +static inline void +anv_free2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + void *data) +{ + if (alloc) + anv_free(alloc, data); + else + anv_free(parent_alloc, data); +} struct anv_physical_device { VK_LOADER_DATA _loader_data; @@ -451,9 +495,8 @@ bool anv_is_scalar_shader_stage(const struct brw_compiler *compiler, struct anv_instance { VK_LOADER_DATA _loader_data; - void * pAllocUserData; - PFN_vkAllocFunction pfnAlloc; - PFN_vkFreeFunction pfnFree; + VkAllocationCallbacks alloc; + uint32_t apiVersion; int physicalDeviceCount; struct anv_physical_device physicalDevice; @@ -497,6 +540,8 @@ struct anv_queue { struct anv_device { VK_LOADER_DATA _loader_data; + VkAllocationCallbacks alloc; + struct anv_instance * instance; uint32_t chipset_id; struct brw_device_info info; @@ -526,26 +571,6 @@ struct anv_device { pthread_mutex_t mutex; }; -void * -anv_instance_alloc(struct anv_instance * instance, - size_t size, - size_t alignment, - VkSystemAllocType allocType); - -void -anv_instance_free(struct anv_instance * instance, - void * mem); - -void * -anv_device_alloc(struct anv_device * device, - size_t size, - size_t alignment, - VkSystemAllocType allocType); - -void -anv_device_free(struct anv_device * device, - void * mem); - void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size); void anv_gem_munmap(void *p, uint64_t size); @@ -575,12 +600,12 @@ struct anv_reloc_list { }; VkResult anv_reloc_list_init(struct anv_reloc_list *list, - struct anv_device *device); + const VkAllocationCallbacks *alloc); void anv_reloc_list_finish(struct anv_reloc_list *list, - struct anv_device *device); + const VkAllocationCallbacks *alloc); uint64_t anv_reloc_list_add(struct anv_reloc_list *list, - struct anv_device *device, + const VkAllocationCallbacks *alloc, uint32_t offset, struct anv_bo *target_bo, uint32_t delta); @@ -600,7 +625,7 @@ struct anv_batch_bo { }; struct anv_batch { - struct anv_device * device; + const VkAllocationCallbacks * alloc; void * start; void * end; @@ -977,6 +1002,7 @@ struct anv_cmd_state { }; struct anv_cmd_pool { + VkAllocationCallbacks alloc; struct list_head cmd_buffers; }; @@ -994,6 +1020,7 @@ struct anv_cmd_buffer { struct anv_device * device; + struct anv_cmd_pool * pool; struct list_head pool_link; struct anv_batch batch; @@ -1220,7 +1247,8 @@ struct anv_graphics_pipeline_create_info { VkResult anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra); + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc); VkResult anv_pipeline_compile_cs(struct anv_pipeline *pipeline, @@ -1231,46 +1259,55 @@ VkResult anv_graphics_pipeline_create(VkDevice device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen7_graphics_pipeline_create(VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen75_graphics_pipeline_create(VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen8_graphics_pipeline_create(VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen9_graphics_pipeline_create(VkDevice _device, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen7_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen75_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen8_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen9_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); struct anv_format { @@ -1406,6 +1443,7 @@ struct anv_image_create_info { VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *info, + const VkAllocationCallbacks* alloc, VkImage *pImage); struct anv_surface * diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 7aa54363aee..cbceacf7f8e 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -32,6 +32,7 @@ VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -50,8 +51,8 @@ VkResult anv_CreateQueryPool( unreachable(""); } - pool = anv_device_alloc(device, sizeof(*pool), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -67,21 +68,22 @@ VkResult anv_CreateQueryPool( return VK_SUCCESS; fail: - anv_device_free(device, pool); + anv_free2(&device->alloc, pAllocator, pool); return result; } void anv_DestroyQueryPool( VkDevice _device, - VkQueryPool _pool) + VkQueryPool _pool, + const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, _pool); anv_gem_munmap(pool->bo.map, pool->bo.size); anv_gem_close(device, pool->bo.gem_handle); - anv_device_free(device, pool); + anv_free2(&device->alloc, pAllocator, pool); } VkResult anv_GetQueryPoolResults( diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 581c8a0e90c..412501445dd 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -215,15 +215,15 @@ wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) anv_vector_finish(&display->formats); if (display->drm) wl_drm_destroy(display->drm); - anv_instance_free(wsi->instance, display); + anv_free(&wsi->instance->alloc, display); } static struct wsi_wl_display * wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) { struct wsi_wl_display *display = - anv_instance_alloc(wsi->instance, sizeof(*display), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!display) return NULL; @@ -520,8 +520,8 @@ static void wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) { VkDevice vk_device = anv_device_to_handle(chain->base.device); - anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory)); - anv_DestroyImage(vk_device, anv_image_to_handle(image->image)); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), NULL); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image), NULL); } static void @@ -568,6 +568,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }}, + NULL, &vk_image); if (result != VK_SUCCESS) @@ -579,12 +580,13 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) struct anv_surface *surface = &image->image->color_surface; VkDeviceMemory vk_memory; - result = anv_AllocMemory(vk_device, - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = image->image->size, .memoryTypeIndex = 0, }, + NULL, &vk_memory); if (result != VK_SUCCESS) @@ -631,9 +633,9 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) return VK_SUCCESS; fail_mem: - anv_FreeMemory(vk_device, vk_memory); + anv_FreeMemory(vk_device, vk_memory, NULL); fail_image: - anv_DestroyImage(vk_device, vk_image); + anv_DestroyImage(vk_device, vk_image, NULL); return result; } @@ -648,7 +650,7 @@ wsi_wl_destroy_swapchain(struct anv_swapchain *anv_chain) wsi_wl_image_finish(chain, &chain->images[i]); } - anv_device_free(chain->base.device, chain); + anv_free(&chain->base.device->alloc, chain); return VK_SUCCESS; } @@ -685,8 +687,8 @@ wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, num_images = MAX2(num_images, 4); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + chain = anv_alloc(&device->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (chain == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -744,8 +746,8 @@ anv_wl_init_wsi(struct anv_instance *instance) struct wsi_wayland *wsi; VkResult result; - wsi = anv_instance_alloc(instance, sizeof(*wsi), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!wsi) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -784,7 +786,7 @@ fail_mutex: pthread_mutex_destroy(&wsi->mutex); fail_alloc: - anv_instance_free(instance, wsi); + anv_free(&instance->alloc, wsi); return result; } @@ -799,5 +801,5 @@ anv_wl_finish_wsi(struct anv_instance *instance) pthread_mutex_destroy(&wsi->mutex); - anv_instance_free(instance, wsi); + anv_free(&instance->alloc, wsi); } diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 1ab31c52573..f45442d522a 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -245,7 +245,7 @@ x11_destroy_swapchain(struct anv_swapchain *anv_chain) /* TODO: Delete images and free memory */ } - anv_device_free(chain->base.device, chain); + anv_free(NULL /* XXX: pAllocator */, chain); return VK_SUCCESS; } @@ -271,8 +271,8 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_device_alloc(device, size, 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + chain = anv_alloc(&device->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (chain == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -320,6 +320,7 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }}, + NULL, &image_h); image = anv_image_from_handle(image_h); @@ -327,13 +328,14 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, surface = &image->color_surface; - anv_AllocMemory(anv_device_to_handle(device), - &(VkMemoryAllocInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - &memory_h); + anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); memory = anv_device_memory_from_handle(memory_h); @@ -406,8 +408,8 @@ anv_x11_init_wsi(struct anv_instance *instance) { struct anv_wsi_implementation *impl; - impl = anv_instance_alloc(instance, sizeof(*impl), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + impl = anv_alloc(&instance->alloc, sizeof(*impl), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!impl) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -425,5 +427,5 @@ anv_x11_init_wsi(struct anv_instance *instance) void anv_x11_finish_wsi(struct anv_instance *instance) { - anv_instance_free(instance, instance->wsi_impl[VK_PLATFORM_XCB_KHR]); + anv_free(&instance->alloc, instance->wsi_impl[VK_PLATFORM_XCB_KHR]); } diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 80c5f1a99a1..95c7bd53591 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -329,6 +329,7 @@ genX(graphics_pipeline_create)( VkDevice _device, const VkGraphicsPipelineCreateInfo* pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -337,14 +338,14 @@ genX(graphics_pipeline_create)( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_pipeline_init(pipeline, device, pCreateInfo, extra); + result = anv_pipeline_init(pipeline, device, pCreateInfo, extra, pAllocator); if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); + anv_free2(&device->alloc, pAllocator, pipeline); return result; } @@ -582,6 +583,7 @@ GENX_FUNC(GEN7, GEN75) VkResult genX(compute_pipeline_create)( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { anv_finishme("primitive_id needs sbe swizzling setup"); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index e4cf552f90d..a3cb95dbb52 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -107,6 +107,7 @@ alloc_surface_state(struct anv_device *device, VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkSampler* pSampler) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -115,8 +116,8 @@ VkResult genX(CreateSampler)( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - sampler = anv_device_alloc(device, sizeof(*sampler), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index c2bbdb72829..f2777c02d4a 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -332,6 +332,7 @@ genX(graphics_pipeline_create)( VkDevice _device, const VkGraphicsPipelineCreateInfo* pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -341,12 +342,12 @@ genX(graphics_pipeline_create)( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_pipeline_init(pipeline, device, pCreateInfo, extra); + result = anv_pipeline_init(pipeline, device, pCreateInfo, extra, pAllocator); if (result != VK_SUCCESS) return result; @@ -663,6 +664,7 @@ genX(graphics_pipeline_create)( VkResult genX(compute_pipeline_create)( VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -671,8 +673,8 @@ VkResult genX(compute_pipeline_create)( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -681,9 +683,10 @@ VkResult genX(compute_pipeline_create)( pipeline->blend_state.map = NULL; - result = anv_reloc_list_init(&pipeline->batch_relocs, device); + result = anv_reloc_list_init(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); if (result != VK_SUCCESS) { - anv_device_free(device, pipeline); + anv_free2(&device->alloc, pAllocator, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 02394dc6f61..c1e0504a15b 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -269,6 +269,7 @@ genX(image_view_init)(struct anv_image_view *iview, VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkSampler* pSampler) { ANV_FROM_HANDLE(anv_device, device, _device); @@ -277,8 +278,8 @@ VkResult genX(CreateSampler)( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - sampler = anv_device_alloc(device, sizeof(*sampler), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); -- cgit v1.2.3 From a9fc0ce0e3293b45fc0a57e886e9d98eb79b7594 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 03:33:20 -0800 Subject: vk/0.210.0: Delete three no longer existant entrypoints --- include/vulkan/vulkan.h | 15 --------------- src/vulkan/anv_device.c | 14 -------------- src/vulkan/anv_pipeline.c | 7 ------- 3 files changed, 36 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index cb4ed6f613f..e29f9adf19e 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2195,8 +2195,6 @@ typedef VkResult (VKAPI_PTR *PFN_vkGetFenceStatus)(VkDevice device, VkFence fenc typedef VkResult (VKAPI_PTR *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); typedef VkResult (VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore); typedef void (VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); -typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); typedef VkResult (VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent); typedef void (VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); @@ -2220,7 +2218,6 @@ typedef VkResult (VKAPI_PTR *PFN_vkCreateShader)(VkDevice device, const VkShader typedef void (VKAPI_PTR *PFN_vkDestroyShader)(VkDevice device, VkShader shader); typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache); typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache, const VkAllocationCallbacks* pAllocator); -typedef size_t (VKAPI_PTR *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t* pDataSize, void* pData); typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); @@ -2518,14 +2515,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator); -VKAPI_ATTR VkResult VKAPI_CALL vkQueueSignalSemaphore( - VkQueue queue, - VkSemaphore semaphore); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitSemaphore( - VkQueue queue, - VkSemaphore semaphore); - VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent( VkDevice device, const VkEventCreateInfo* pCreateInfo, @@ -2650,10 +2639,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache( VkPipelineCache pipelineCache, const VkAllocationCallbacks* pAllocator); -VKAPI_ATTR size_t VKAPI_CALL vkGetPipelineCacheSize( - VkDevice device, - VkPipelineCache pipelineCache); - VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( VkDevice device, VkPipelineCache pipelineCache, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3bbcdf04e35..1c313ff9eea 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1291,20 +1291,6 @@ void anv_DestroySemaphore( stub(); } -VkResult anv_QueueSignalSemaphore( - VkQueue queue, - VkSemaphore semaphore) -{ - stub_return(VK_UNSUPPORTED); -} - -VkResult anv_QueueWaitSemaphore( - VkQueue queue, - VkSemaphore semaphore) -{ - stub_return(VK_UNSUPPORTED); -} - // Event functions VkResult anv_CreateEvent( diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 2d695ee39ab..f4344a018b8 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -205,13 +205,6 @@ void anv_DestroyPipelineCache( { } -size_t anv_GetPipelineCacheSize( - VkDevice device, - VkPipelineCache pipelineCache) -{ - stub_return(0); -} - VkResult anv_GetPipelineCacheData( VkDevice device, VkPipelineCache pipelineCache, -- cgit v1.2.3 From 5a024417894420d2647f53de8de8dd72e773fc74 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 03:34:43 -0800 Subject: vk/0.210.0: Rename a parameter to GetImageSparseMemoryRequirements --- include/vulkan/vulkan.h | 4 ++-- src/vulkan/anv_device.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index e29f9adf19e..7f62b391972 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2185,7 +2185,7 @@ typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer b typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset); typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); -typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); @@ -2458,7 +2458,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements( VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( VkDevice device, VkImage image, - uint32_t* pNumRequirements, + uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 1c313ff9eea..6fa7c9952e0 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1069,7 +1069,7 @@ void anv_GetImageMemoryRequirements( void anv_GetImageSparseMemoryRequirements( VkDevice device, VkImage image, - uint32_t* pNumRequirements, + uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements) { stub(); -- cgit v1.2.3 From 938a2939c855d7e2e25b9cafc3344f79e0ba203e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 03:48:58 -0800 Subject: vk/0.210.0: We now allocate command buffers; not create them --- include/vulkan/vulkan.h | 24 +++++++++++--------- src/vulkan/anv_cmd_buffer.c | 54 ++++++++++++++++++++++++++++++++++++--------- src/vulkan/anv_dump.c | 8 +++---- 3 files changed, 61 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 7f62b391972..9e73888e6b3 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -140,7 +140,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 6, VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 7, VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_COMMAND_BUFFER_CREATE_INFO = 9, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 9, VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 11, VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 12, @@ -1997,13 +1997,13 @@ typedef struct VkCommandPoolCreateInfo { uint32_t queueFamilyIndex; } VkCommandPoolCreateInfo; -typedef struct { +typedef struct VkCommandBufferAllocateInfo { VkStructureType sType; const void* pNext; VkCommandPool commandPool; VkCommandBufferLevel level; - VkCommandBufferCreateFlags flags; -} VkCommandBufferCreateInfo; + uint32_t bufferCount; +} VkCommandBufferAllocateInfo; typedef struct VkCommandBufferBeginInfo { VkStructureType sType; @@ -2243,8 +2243,8 @@ typedef void (VKAPI_PTR *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRend typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); -typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCommandBufferCreateInfo* pCreateInfo, VkCommandBuffer* pCommandBuffer); -typedef void (VKAPI_PTR *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCommandBuffer commandBuffer); +typedef VkResult (VKAPI_PTR *PFN_vkAllocateCommandBuffers)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers); +typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); typedef VkResult (VKAPI_PTR *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); typedef VkResult (VKAPI_PTR *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); typedef VkResult (VKAPI_PTR *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); @@ -2782,14 +2782,16 @@ VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( VkCommandPool commandPool, VkCommandPoolResetFlags flags); -VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandBuffer( +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers( VkDevice device, - const VkCommandBufferCreateInfo* pCreateInfo, - VkCommandBuffer* pCommandBuffer); + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers); -VKAPI_ATTR void VKAPI_CALL vkDestroyCommandBuffer( +VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers( VkDevice device, - VkCommandBuffer commandBuffer); + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 48711b6032b..125b078413b 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -154,13 +154,12 @@ anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, (offsetof(struct anv_push_constants, field) + \ sizeof(cmd_buffer->state.push_constants[0]->field))) -VkResult anv_CreateCommandBuffer( - VkDevice _device, - const VkCommandBufferCreateInfo* pCreateInfo, +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, VkCommandBuffer* pCommandBuffer) { - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->commandPool); struct anv_cmd_buffer *cmd_buffer; VkResult result; @@ -182,7 +181,7 @@ VkResult anv_CreateCommandBuffer( anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_block_pool); - cmd_buffer->level = pCreateInfo->level; + cmd_buffer->level = level; cmd_buffer->usage_flags = 0; anv_cmd_state_init(&cmd_buffer->state); @@ -206,12 +205,34 @@ VkResult anv_CreateCommandBuffer( return result; } -void anv_DestroyCommandBuffer( +VkResult anv_AllocateCommandBuffers( VkDevice _device, - VkCommandBuffer _cmd_buffer) + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->bufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ list_del(&cmd_buffer->pool_link); anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); @@ -222,6 +243,19 @@ void anv_DestroyCommandBuffer( anv_free(&cmd_buffer->pool->alloc, cmd_buffer); } +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } +} + VkResult anv_ResetCommandBuffer( VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) @@ -939,7 +973,7 @@ VkResult anv_ResetCommandPool( list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { - anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); + anv_cmd_buffer_destroy(cmd_buffer); } return VK_SUCCESS; diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 62ed4fb4861..4db7c2539e1 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -82,12 +82,12 @@ anv_dump_image_to_ppm(struct anv_device *device, assert(result == VK_SUCCESS); VkCommandBuffer cmd; - result = anv_CreateCommandBuffer(vk_device, - &(VkCommandBufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_CREATE_INFO, + result = anv_AllocateCommandBuffers(vk_device, + &(VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = commandPool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .flags = 0, + .bufferCount = 1, }, &cmd); assert(result == VK_SUCCESS); -- cgit v1.2.3 From e6ab06ae7f084ba0df4c4274e5de010a3a751cc2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 10:39:15 -0800 Subject: vk/0.210.0: Rework memory property flags --- include/vulkan/vulkan.h | 11 +++++------ src/vulkan/anv_device.c | 7 +++++-- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 22cdeab98a1..9b67a9952be 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -830,17 +830,16 @@ typedef enum VkQueueFlagBits { typedef VkFlags VkQueueFlags; typedef enum VkMemoryPropertyFlagBits { - VK_MEMORY_PROPERTY_DEVICE_ONLY = 0, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000001, - VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT = 0x00000002, - VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT = 0x00000004, - VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT = 0x00000008, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT = 0x00000001, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000002, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; typedef enum VkMemoryHeapFlagBits { - VK_MEMORY_HEAP_HOST_LOCAL_BIT = 0x00000001, + VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, } VkMemoryHeapFlagBits; typedef VkFlags VkMemoryHeapFlags; typedef VkFlags VkDeviceCreateFlags; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 6fa7c9952e0..9cf40e6d858 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -530,14 +530,17 @@ void anv_GetPhysicalDeviceMemoryProperties( /* The property flags below are valid only for llc platforms. */ pMemoryProperties->memoryTypeCount = 1; pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, .heapIndex = 1, }; pMemoryProperties->memoryHeapCount = 1; pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { .size = heap_size, - .flags = VK_MEMORY_HEAP_HOST_LOCAL_BIT, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; } -- cgit v1.2.3 From e10dc002e9544500a2247e49a132e18f994f34ee Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 14:35:07 -0800 Subject: vk/0.210.0: Remove VkShader --- include/vulkan/vulkan.h | 26 +----------- src/vulkan/anv_meta.c | 49 ++++++---------------- src/vulkan/anv_meta_clear.c | 27 ++----------- src/vulkan/anv_pipeline.c | 99 ++++++++++++++++----------------------------- src/vulkan/anv_private.h | 9 +---- src/vulkan/gen8_pipeline.c | 5 ++- 6 files changed, 58 insertions(+), 157 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9b67a9952be..f7bde4cc7c7 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -79,7 +79,6 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkQueryPool) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferView) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImageView) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderModule) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShader) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineLayout) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) @@ -136,7 +135,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 2, VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 3, VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 4, - VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 5, VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 6, VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 7, VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 8, @@ -921,7 +919,6 @@ typedef VkFlags VkBufferViewCreateFlags; typedef VkFlags VkImageViewCreateFlags; typedef VkFlags VkShaderModuleCreateFlags; typedef VkFlags VkPipelineCacheCreateFlags; -typedef VkFlags VkShaderCreateFlags; typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, @@ -1582,15 +1579,6 @@ typedef struct VkShaderModuleCreateInfo { const uint32_t* pCode; } VkShaderModuleCreateInfo; -typedef struct { - VkStructureType sType; - const void* pNext; - VkShaderModule module; - const char* pName; - VkShaderCreateFlags flags; - VkShaderStage stage; -} VkShaderCreateInfo; - typedef struct VkPipelineCacheCreateInfo { VkStructureType sType; const void* pNext; @@ -1617,7 +1605,8 @@ typedef struct VkPipelineShaderStageCreateInfo { const void* pNext; VkPipelineShaderStageCreateFlags flags; VkShaderStage stage; - VkShader shader; + VkShaderModule module; + const char* pName; const VkSpecializationInfo* pSpecializationInfo; } VkPipelineShaderStageCreateInfo; @@ -2207,8 +2196,6 @@ typedef VkResult (VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkIma typedef void (VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule); typedef void (VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -typedef void (VKAPI_PTR *PFN_vkDestroyShader)(VkDevice device, VkShader shader); typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache); typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t* pDataSize, void* pData); @@ -2612,15 +2599,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator); -VKAPI_ATTR VkResult VKAPI_CALL vkCreateShader( - VkDevice device, - const VkShaderCreateInfo* pCreateInfo, - VkShader* pShader); - -VKAPI_ATTR void VKAPI_CALL vkDestroyShader( - VkDevice device, - VkShader shader); - VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache( VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a5fb782eab8..84f828eb4cf 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -224,42 +224,18 @@ anv_device_init_meta_blit_state(struct anv_device *device) * to provide GLSL source for the vertex shader so that the compiler * does not dead-code our inputs. */ - struct anv_shader_module vsm = { + struct anv_shader_module vs = { .nir = build_nir_vertex_shader(false), }; - struct anv_shader_module fsm_2d = { + struct anv_shader_module fs_2d = { .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), }; - struct anv_shader_module fsm_3d = { + struct anv_shader_module fs_3d = { .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), }; - VkShader vs; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&vsm), - .pName = "main", - }, &vs); - - VkShader fs_2d; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&fsm_2d), - .pName = "main", - }, &fs_2d); - - VkShader fs_3d; - anv_CreateShader(anv_device_to_handle(device), - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&fsm_3d), - .pName = "main", - }, &fs_3d); - VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 2, @@ -329,12 +305,14 @@ anv_device_init_meta_blit_state(struct anv_device *device) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", .pSpecializationInfo = NULL }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", .pSpecializationInfo = NULL }, }; @@ -408,22 +386,19 @@ anv_device_init_meta_blit_state(struct anv_device *device) .use_rectlist = true }; - pipeline_shader_stages[1].shader = fs_2d; + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); anv_graphics_pipeline_create(anv_device_to_handle(device), &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_2d_src); - pipeline_shader_stages[1].shader = fs_3d; + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); anv_graphics_pipeline_create(anv_device_to_handle(device), &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_3d_src); - anv_DestroyShader(anv_device_to_handle(device), vs); - anv_DestroyShader(anv_device_to_handle(device), fs_2d); - anv_DestroyShader(anv_device_to_handle(device), fs_3d); - ralloc_free(vsm.nir); - ralloc_free(fsm_2d.nir); - ralloc_free(fsm_3d.nir); + ralloc_free(vs.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); } static void diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index d448b661dcf..cac34b3d7d3 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -126,24 +126,6 @@ create_pipeline(struct anv_device *device, struct anv_shader_module vs_m = { .nir = vs_nir }; struct anv_shader_module fs_m = { .nir = fs_nir }; - VkShader vs_h; - ANV_CALL(CreateShader)(device_h, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&vs_m), - .pName = "main", - }, - &vs_h); - - VkShader fs_h; - ANV_CALL(CreateShader)(device_h, - &(VkShaderCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, - .module = anv_shader_module_to_handle(&fs_m), - .pName = "main", - }, - &fs_h); - VkPipeline pipeline_h; anv_graphics_pipeline_create(device_h, &(VkGraphicsPipelineCreateInfo) { @@ -153,12 +135,14 @@ create_pipeline(struct anv_device *device, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX, - .shader = vs_h, + .module = anv_shader_module_to_handle(&vs_m), + .pName = "main", }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT, - .shader = fs_h, + .module = anv_shader_module_to_handle(&fs_m), + .pName = "main", }, }, .pVertexInputState = vi_state, @@ -227,9 +211,6 @@ create_pipeline(struct anv_device *device, alloc, &pipeline_h); - ANV_CALL(DestroyShader)(device_h, vs_h); - ANV_CALL(DestroyShader)(device_h, fs_h); - ralloc_free(vs_nir); ralloc_free(fs_nir); diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index f4344a018b8..8b95bce8823 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -75,44 +75,6 @@ void anv_DestroyShaderModule( anv_free2(&device->alloc, pAllocator, module); } -VkResult anv_CreateShader( - VkDevice _device, - const VkShaderCreateInfo* pCreateInfo, - VkShader* pShader) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module); - struct anv_shader *shader; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; - size_t name_len = strlen(name); - - shader = anv_alloc(&device->alloc, sizeof(*shader) + name_len + 1, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (shader == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - shader->module = module, - memcpy(shader->entrypoint, name, name_len + 1); - - *pShader = anv_shader_to_handle(shader); - - return VK_SUCCESS; -} - -void anv_DestroyShader( - VkDevice _device, - VkShader _shader) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader, shader, _shader); - - anv_free(&device->alloc, shader); -} - #define SPIR_V_MAGIC_NUMBER 0x07230203 static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = { @@ -136,9 +98,10 @@ anv_is_scalar_shader_stage(const struct brw_compiler *compiler, */ static nir_shader * anv_shader_compile_to_nir(struct anv_device *device, - struct anv_shader *shader, VkShaderStage vk_stage) + struct anv_shader_module *module, + const char *entrypoint_name, VkShaderStage vk_stage) { - if (strcmp(shader->entrypoint, "main") != 0) { + if (strcmp(entrypoint_name, "main") != 0) { anv_finishme("Multiple shaders per module not really supported"); } @@ -149,18 +112,18 @@ anv_shader_compile_to_nir(struct anv_device *device, compiler->glsl_compiler_options[stage].NirOptions; nir_shader *nir; - if (shader->module->nir) { + if (module->nir) { /* Some things such as our meta clear/blit code will give us a NIR * shader directly. In that case, we just ignore the SPIR-V entirely * and just use the NIR shader */ - nir = shader->module->nir; + nir = module->nir; nir->options = nir_options; } else { - uint32_t *spirv = (uint32_t *) shader->module->data; + uint32_t *spirv = (uint32_t *) module->data; assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(shader->module->size % 4 == 0); + assert(module->size % 4 == 0); - nir = spirv_to_nir(spirv, shader->module->size / 4, stage, nir_options); + nir = spirv_to_nir(spirv, module->size / 4, stage, nir_options); } nir_validate_shader(nir); @@ -172,7 +135,7 @@ anv_shader_compile_to_nir(struct anv_device *device, */ nir_function_impl *entrypoint = NULL; nir_foreach_overload(nir, overload) { - if (strcmp(shader->entrypoint, overload->function->name) == 0 && + if (strcmp(entrypoint_name, overload->function->name) == 0 && overload->impl) { assert(entrypoint == NULL); entrypoint = overload->impl; @@ -339,14 +302,16 @@ populate_cs_prog_key(const struct brw_device_info *devinfo, static nir_shader * anv_pipeline_compile(struct anv_pipeline *pipeline, - struct anv_shader *shader, + struct anv_shader_module *module, + const char *entrypoint, VkShaderStage stage, struct brw_stage_prog_data *prog_data) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, shader, stage); + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage); if (nir == NULL) return NULL; @@ -451,7 +416,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, static VkResult anv_pipeline_compile_vs(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *info, - struct anv_shader *shader) + struct anv_shader_module *module, + const char *entrypoint) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -464,7 +430,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); - nir_shader *nir = anv_pipeline_compile(pipeline, shader, + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, VK_SHADER_STAGE_VERTEX, &prog_data->base.base); if (nir == NULL) @@ -472,7 +438,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, void *mem_ctx = ralloc_context(NULL); - if (shader->module->nir == NULL) + if (module->nir == NULL) ralloc_steal(mem_ctx, nir); prog_data->inputs_read = nir->info.inputs_read; @@ -513,7 +479,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, static VkResult anv_pipeline_compile_gs(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *info, - struct anv_shader *shader) + struct anv_shader_module *module, + const char *entrypoint) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -526,7 +493,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); - nir_shader *nir = anv_pipeline_compile(pipeline, shader, + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, VK_SHADER_STAGE_GEOMETRY, &prog_data->base.base); if (nir == NULL) @@ -534,7 +501,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, void *mem_ctx = ralloc_context(NULL); - if (shader->module->nir == NULL) + if (module->nir == NULL) ralloc_steal(mem_ctx, nir); brw_compute_vue_map(&pipeline->device->info, @@ -567,7 +534,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, static VkResult anv_pipeline_compile_fs(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *info, - struct anv_shader *shader) + struct anv_shader_module *module, + const char *entrypoint) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -585,7 +553,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, prog_data->binding_table.render_target_start = 0; - nir_shader *nir = anv_pipeline_compile(pipeline, shader, + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, VK_SHADER_STAGE_FRAGMENT, &prog_data->base); if (nir == NULL) @@ -593,7 +561,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, void *mem_ctx = ralloc_context(NULL); - if (shader->module->nir == NULL) + if (module->nir == NULL) ralloc_steal(mem_ctx, nir); unsigned code_size; @@ -643,7 +611,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, VkResult anv_pipeline_compile_cs(struct anv_pipeline *pipeline, const VkComputePipelineCreateInfo *info, - struct anv_shader *shader) + struct anv_shader_module *module, + const char *entrypoint) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -656,7 +625,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); - nir_shader *nir = anv_pipeline_compile(pipeline, shader, + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, VK_SHADER_STAGE_COMPUTE, &prog_data->base); if (nir == NULL) @@ -664,7 +633,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, void *mem_ctx = ralloc_context(NULL); - if (shader->module->nir == NULL) + if (module->nir == NULL) ralloc_steal(mem_ctx, nir); unsigned code_size; @@ -1009,17 +978,19 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, pipeline->total_scratch = 0; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->pStages[i].shader); + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + const char *entrypoint = pCreateInfo->pStages[i].pName; switch (pCreateInfo->pStages[i].stage) { case VK_SHADER_STAGE_VERTEX: - anv_pipeline_compile_vs(pipeline, pCreateInfo, shader); + anv_pipeline_compile_vs(pipeline, pCreateInfo, module, entrypoint); break; case VK_SHADER_STAGE_GEOMETRY: - anv_pipeline_compile_gs(pipeline, pCreateInfo, shader); + anv_pipeline_compile_gs(pipeline, pCreateInfo, module, entrypoint); break; case VK_SHADER_STAGE_FRAGMENT: - anv_pipeline_compile_fs(pipeline, pCreateInfo, shader); + anv_pipeline_compile_fs(pipeline, pCreateInfo, module, entrypoint); break; default: anv_finishme("Unsupported shader stage"); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 964e6d8cf57..940d1662c1f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1163,11 +1163,6 @@ struct anv_shader_module { char data[0]; }; -struct anv_shader { - struct anv_shader_module * module; - char entrypoint[0]; -}; - struct anv_pipeline { struct anv_device * device; struct anv_batch batch; @@ -1253,7 +1248,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, VkResult anv_pipeline_compile_cs(struct anv_pipeline *pipeline, const VkComputePipelineCreateInfo *info, - struct anv_shader *shader); + struct anv_shader_module *module, + const char *entrypoint_name); VkResult anv_graphics_pipeline_create(VkDevice device, @@ -1607,7 +1603,6 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader, VkShader) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) #define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index f2777c02d4a..106f82d36fa 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -710,8 +710,9 @@ VkResult genX(compute_pipeline_create)( pipeline->total_scratch = 0; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE); - ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->stage.shader); - anv_pipeline_compile_cs(pipeline, pCreateInfo, shader); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); + anv_pipeline_compile_cs(pipeline, pCreateInfo, module, + pCreateInfo->stage.pName); pipeline->use_repclear = false; -- cgit v1.2.3 From a5f19f64c3317c98b984010f144416ce768a3c0b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 16:08:13 -0800 Subject: vk/0.210.0: Remove the VkShaderStage enum This made for an unfortunately large amount of work since we were using it fairly heavily internally. However, gl_shader_stage does basically the same things, so it's not too bad. --- include/vulkan/vulkan.h | 15 +------ src/vulkan/anv_cmd_buffer.c | 27 ++++++------ src/vulkan/anv_descriptor_set.c | 10 ++--- src/vulkan/anv_meta.c | 4 +- src/vulkan/anv_meta_clear.c | 4 +- src/vulkan/anv_nir.h | 7 --- src/vulkan/anv_nir_apply_dynamic_offsets.c | 4 +- src/vulkan/anv_nir_apply_pipeline_layout.c | 22 +++++----- src/vulkan/anv_pipeline.c | 70 ++++++++++++------------------ src/vulkan/anv_private.h | 35 ++++++++++----- src/vulkan/gen7_cmd_buffer.c | 54 +++++++++++------------ src/vulkan/gen7_pipeline.c | 6 +-- src/vulkan/gen8_cmd_buffer.c | 23 +++++----- src/vulkan/gen8_pipeline.c | 10 ++--- 14 files changed, 133 insertions(+), 158 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index f7bde4cc7c7..7affe394992 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -477,19 +477,6 @@ typedef enum VkComponentSwizzle { VK_COMPONENT_SWIZZLE_MAX_ENUM = 0x7FFFFFFF } VkComponentSwizzle; -typedef enum { - VK_SHADER_STAGE_VERTEX = 0, - VK_SHADER_STAGE_TESS_CONTROL = 1, - VK_SHADER_STAGE_TESS_EVALUATION = 2, - VK_SHADER_STAGE_GEOMETRY = 3, - VK_SHADER_STAGE_FRAGMENT = 4, - VK_SHADER_STAGE_COMPUTE = 5, - VK_SHADER_STAGE_BEGIN_RANGE = VK_SHADER_STAGE_VERTEX, - VK_SHADER_STAGE_END_RANGE = VK_SHADER_STAGE_COMPUTE, - VK_SHADER_STAGE_NUM = (VK_SHADER_STAGE_COMPUTE - VK_SHADER_STAGE_VERTEX + 1), - VK_SHADER_STAGE_MAX_ENUM = 0x7FFFFFFF -} VkShaderStage; - typedef enum VkVertexInputRate { VK_VERTEX_INPUT_RATE_VERTEX = 0, VK_VERTEX_INPUT_RATE_INSTANCE = 1, @@ -1604,7 +1591,7 @@ typedef struct VkPipelineShaderStageCreateInfo { VkStructureType sType; const void* pNext; VkPipelineShaderStageCreateFlags flags; - VkShaderStage stage; + VkShaderStageFlagBits stage; VkShaderModule module; const char* pName; const VkSpecializationInfo* pSpecializationInfo; diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 125b078413b..19d4be90274 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -129,7 +129,7 @@ anv_cmd_state_init(struct anv_cmd_state *state) static VkResult anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - VkShaderStage stage, uint32_t size) + gl_shader_stage stage, uint32_t size) { struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; @@ -520,8 +520,7 @@ void anv_CmdBindDescriptorSets( } if (set_layout->dynamic_offset_count > 0) { - VkShaderStage s; - for_each_bit(s, set_layout->shader_stages) { + anv_foreach_stage(s, set_layout->shader_stages) { anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); struct anv_push_constants *push = @@ -585,7 +584,8 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, static void fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, - VkShaderStage stage, VkDescriptorType type, + gl_shader_stage stage, + VkDescriptorType type, uint32_t offset, uint32_t range) { VkFormat format; @@ -594,8 +594,7 @@ fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - if (anv_is_scalar_shader_stage(device->instance->physicalDevice.compiler, - stage)) { + if (device->instance->physicalDevice.compiler->scalar_stage[stage]) { stride = 4; } else { stride = 16; @@ -620,19 +619,20 @@ fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - VkShaderStage stage, struct anv_state *bt_state) + gl_shader_stage stage, + struct anv_state *bt_state) { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_layout *layout; uint32_t color_count, bias, state_offset; - if (stage == VK_SHADER_STAGE_COMPUTE) + if (stage == MESA_SHADER_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; else layout = cmd_buffer->state.pipeline->layout; - if (stage == VK_SHADER_STAGE_FRAGMENT) { + if (stage == MESA_SHADER_FRAGMENT) { bias = MAX_RTS; color_count = subpass->color_count; } else { @@ -729,12 +729,12 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - VkShaderStage stage, struct anv_state *state) + gl_shader_stage stage, struct anv_state *state) { struct anv_pipeline_layout *layout; uint32_t sampler_count; - if (stage == VK_SHADER_STAGE_COMPUTE) + if (stage == MESA_SHADER_COMPUTE) layout = cmd_buffer->state.compute_pipeline->layout; else layout = cmd_buffer->state.pipeline->layout; @@ -858,7 +858,7 @@ void anv_CmdWaitEvents( struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - VkShaderStage stage) + gl_shader_stage stage) { struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; @@ -893,9 +893,8 @@ void anv_CmdPushConstants( const void* pValues) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - VkShaderStage stage; - for_each_bit(stage, stageFlags) { + anv_foreach_stage(stage, stageFlags) { anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 081d1e85ae1..e1cfd788b73 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -77,8 +77,8 @@ VkResult anv_CreateDescriptorSetLayout( /* Initialize all samplers to 0 */ memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); - uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; - uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; uint32_t dynamic_offset_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { @@ -196,7 +196,7 @@ VkResult anv_CreatePipelineLayout( dynamic_offset_count += set_layout->binding[b].array_size; } - for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { l.set[set].stage[s].surface_start = l.stage[s].surface_count; l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; @@ -217,7 +217,7 @@ VkResult anv_CreatePipelineLayout( } unsigned num_bindings = 0; - for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) num_bindings += l.stage[s].surface_count + l.stage[s].sampler_count; size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); @@ -229,7 +229,7 @@ VkResult anv_CreatePipelineLayout( /* Now we can actually build our surface and sampler maps */ struct anv_pipeline_binding *entry = layout->entries; - for (VkShaderStage s = 0; s < VK_SHADER_STAGE_NUM; s++) { + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { l.stage[s].surface_to_descriptor = entry; entry += l.stage[s].surface_count; l.stage[s].sampler_to_descriptor = entry; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 84f828eb4cf..67651c542b7 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -304,13 +304,13 @@ anv_device_init_meta_blit_state(struct anv_device *device) VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX, + .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = anv_shader_module_to_handle(&vs), .pName = "main", .pSpecializationInfo = NULL }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ .pName = "main", .pSpecializationInfo = NULL diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index cac34b3d7d3..a2667c7bb6e 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -134,13 +134,13 @@ create_pipeline(struct anv_device *device, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX, + .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = anv_shader_module_to_handle(&vs_m), .pName = "main", }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, .module = anv_shader_module_to_handle(&fs_m), .pName = "main", }, diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h index b164ae581e1..666b127451a 100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@ -30,13 +30,6 @@ extern "C" { #endif -static inline VkShaderStage -anv_vk_shader_stage_for_mesa_stage(gl_shader_stage stage) -{ - /* The two enums happen to line up. */ - return (VkShaderStage)(int)stage; -} - void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index dd4f5dfe545..c500ab3e03c 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -28,7 +28,6 @@ struct apply_dynamic_offsets_state { nir_shader *shader; nir_builder builder; - VkShaderStage stage; struct anv_pipeline_layout *layout; uint32_t indices_start; @@ -216,12 +215,11 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, { struct apply_dynamic_offsets_state state = { .shader = shader, - .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage), .layout = pipeline->layout, .indices_start = shader->num_uniforms, }; - if (!state.layout || !state.layout->stage[state.stage].has_dynamic_offsets) + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) return; nir_foreach_overload(shader, overload) { diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index fe1702dfda7..1b196cd62b7 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -28,7 +28,6 @@ struct apply_pipeline_layout_state { nir_shader *shader; nir_builder builder; - VkShaderStage stage; const struct anv_pipeline_layout *layout; bool progress; @@ -42,15 +41,17 @@ get_surface_index(unsigned set, unsigned binding, struct anv_descriptor_set_layout *set_layout = state->layout->set[set].layout; + gl_shader_stage stage = state->shader->stage; + assert(binding < set_layout->binding_count); - assert(set_layout->binding[binding].stage[state->stage].surface_index >= 0); + assert(set_layout->binding[binding].stage[stage].surface_index >= 0); uint32_t surface_index = - state->layout->set[set].stage[state->stage].surface_start + - set_layout->binding[binding].stage[state->stage].surface_index; + state->layout->set[set].stage[stage].surface_start + + set_layout->binding[binding].stage[stage].surface_index; - assert(surface_index < state->layout->stage[state->stage].surface_count); + assert(surface_index < state->layout->stage[stage].surface_count); return surface_index; } @@ -65,16 +66,18 @@ get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, assert(binding < set_layout->binding_count); - if (set_layout->binding[binding].stage[state->stage].sampler_index < 0) { + gl_shader_stage stage = state->shader->stage; + + if (set_layout->binding[binding].stage[stage].sampler_index < 0) { assert(tex_op == nir_texop_txf); return 0; } uint32_t sampler_index = - state->layout->set[set].stage[state->stage].sampler_start + - set_layout->binding[binding].stage[state->stage].sampler_index; + state->layout->set[set].stage[stage].sampler_start + + set_layout->binding[binding].stage[stage].sampler_index; - assert(sampler_index < state->layout->stage[state->stage].sampler_count); + assert(sampler_index < state->layout->stage[stage].sampler_count); return sampler_index; } @@ -217,7 +220,6 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, { struct apply_pipeline_layout_state state = { .shader = shader, - .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage), .layout = layout, }; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 8b95bce8823..e910298921e 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -77,35 +77,19 @@ void anv_DestroyShaderModule( #define SPIR_V_MAGIC_NUMBER 0x07230203 -static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = { - [VK_SHADER_STAGE_VERTEX] = MESA_SHADER_VERTEX, - [VK_SHADER_STAGE_TESS_CONTROL] = -1, - [VK_SHADER_STAGE_TESS_EVALUATION] = -1, - [VK_SHADER_STAGE_GEOMETRY] = MESA_SHADER_GEOMETRY, - [VK_SHADER_STAGE_FRAGMENT] = MESA_SHADER_FRAGMENT, - [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE, -}; - -bool -anv_is_scalar_shader_stage(const struct brw_compiler *compiler, - VkShaderStage stage) -{ - return compiler->scalar_stage[vk_shader_stage_to_mesa_stage[stage]]; -} - /* Eventually, this will become part of anv_CreateShader. Unfortunately, * we can't do that yet because we don't have the ability to copy nir. */ static nir_shader * anv_shader_compile_to_nir(struct anv_device *device, struct anv_shader_module *module, - const char *entrypoint_name, VkShaderStage vk_stage) + const char *entrypoint_name, + gl_shader_stage stage) { if (strcmp(entrypoint_name, "main") != 0) { anv_finishme("Multiple shaders per module not really supported"); } - gl_shader_stage stage = vk_shader_stage_to_mesa_stage[vk_stage]; const struct brw_compiler *compiler = device->instance->physicalDevice.compiler; const nir_shader_compiler_options *nir_options = @@ -304,7 +288,7 @@ static nir_shader * anv_pipeline_compile(struct anv_pipeline *pipeline, struct anv_shader_module *module, const char *entrypoint, - VkShaderStage stage, + gl_shader_stage stage, struct brw_stage_prog_data *prog_data) { const struct brw_compiler *compiler = @@ -315,7 +299,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (nir == NULL) return NULL; - anv_nir_lower_push_constants(nir, anv_is_scalar_shader_stage(compiler, stage)); + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); /* Figure out the number of parameters */ prog_data->nr_params = 0; @@ -358,7 +342,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). */ - unsigned bias = stage == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + unsigned bias = stage == MESA_SHADER_FRAGMENT ? MAX_RTS : 0; prog_data->binding_table.size_bytes = 0; prog_data->binding_table.texture_start = bias; prog_data->binding_table.ubo_start = bias; @@ -367,7 +351,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Finish the optimization and compilation process */ nir = brw_lower_nir(nir, &pipeline->device->info, NULL, - anv_is_scalar_shader_stage(compiler, stage)); + compiler->scalar_stage[stage]); /* nir_lower_io will only handle the push constants; we need to set this * to the full number of possible uniforms. @@ -392,21 +376,21 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, } static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, - VkShaderStage stage, + gl_shader_stage stage, struct brw_stage_prog_data *prog_data) { struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { - [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, - [VK_SHADER_STAGE_TESS_CONTROL] = 0, - [VK_SHADER_STAGE_TESS_EVALUATION] = 0, - [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, - [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, - [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, }; pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= 1 << stage; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); pipeline->scratch_start[stage] = pipeline->total_scratch; pipeline->total_scratch = align_u32(pipeline->total_scratch, 1024) + @@ -431,7 +415,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - VK_SHADER_STAGE_VERTEX, + MESA_SHADER_VERTEX, &prog_data->base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -470,7 +454,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, ralloc_free(mem_ctx); - anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, &prog_data->base.base); return VK_SUCCESS; @@ -494,7 +478,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - VK_SHADER_STAGE_GEOMETRY, + MESA_SHADER_GEOMETRY, &prog_data->base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -525,7 +509,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, ralloc_free(mem_ctx); - anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, &prog_data->base.base); return VK_SUCCESS; @@ -554,7 +538,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, prog_data->binding_table.render_target_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - VK_SHADER_STAGE_FRAGMENT, + MESA_SHADER_FRAGMENT, &prog_data->base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -602,7 +586,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, ralloc_free(mem_ctx); - anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, &prog_data->base); return VK_SUCCESS; @@ -626,7 +610,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - VK_SHADER_STAGE_COMPUTE, + MESA_SHADER_COMPUTE, &prog_data->base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -649,7 +633,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, shader_code, code_size); ralloc_free(mem_ctx); - anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, &prog_data->base); return VK_SUCCESS; @@ -916,8 +900,8 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) for (uint32_t i = 0; i < info->stageCount; ++i) { switch (info->pStages[i].stage) { - case VK_SHADER_STAGE_TESS_CONTROL: - case VK_SHADER_STAGE_TESS_EVALUATION: + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: assert(info->pTessellationState); break; default: @@ -983,13 +967,13 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const char *entrypoint = pCreateInfo->pStages[i].pName; switch (pCreateInfo->pStages[i].stage) { - case VK_SHADER_STAGE_VERTEX: + case VK_SHADER_STAGE_VERTEX_BIT: anv_pipeline_compile_vs(pipeline, pCreateInfo, module, entrypoint); break; - case VK_SHADER_STAGE_GEOMETRY: + case VK_SHADER_STAGE_GEOMETRY_BIT: anv_pipeline_compile_gs(pipeline, pCreateInfo, module, entrypoint); break; - case VK_SHADER_STAGE_FRAGMENT: + case VK_SHADER_STAGE_FRAGMENT_BIT: anv_pipeline_compile_fs(pipeline, pCreateInfo, module, entrypoint); break; default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 940d1662c1f..9c1e6b2f955 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -489,9 +489,6 @@ struct anv_physical_device { struct isl_device isl_dev; }; -bool anv_is_scalar_shader_stage(const struct brw_compiler *compiler, - VkShaderStage stage); - struct anv_instance { VK_LOADER_DATA _loader_data; @@ -772,7 +769,7 @@ struct anv_descriptor_set_binding_layout { /* Index into the sampler table for the associated sampler */ int16_t sampler_index; - } stage[VK_SHADER_STAGE_NUM]; + } stage[MESA_SHADER_STAGES]; /* Immutable samplers (or NULL if no immutable samplers) */ struct anv_sampler **immutable_samplers; @@ -852,7 +849,7 @@ struct anv_pipeline_layout { struct { uint32_t surface_start; uint32_t sampler_start; - } stage[VK_SHADER_STAGE_NUM]; + } stage[MESA_SHADER_STAGES]; } set[MAX_SETS]; uint32_t num_sets; @@ -863,7 +860,7 @@ struct anv_pipeline_layout { struct anv_pipeline_binding *surface_to_descriptor; uint32_t sampler_count; struct anv_pipeline_binding *sampler_to_descriptor; - } stage[VK_SHADER_STAGE_NUM]; + } stage[MESA_SHADER_STAGES]; struct anv_pipeline_binding entries[0]; }; @@ -991,7 +988,7 @@ struct anv_cmd_state { uint32_t restart_index; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set * descriptors[MAX_SETS]; - struct anv_push_constants * push_constants[VK_SHADER_STAGE_NUM]; + struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; struct anv_dynamic_state dynamic; struct { @@ -1137,7 +1134,7 @@ void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - VkShaderStage stage); + gl_shader_stage stage); void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass, @@ -1163,6 +1160,24 @@ struct anv_shader_module { char data[0]; }; +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +static inline VkShaderStageFlagBits +mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) +{ + return (1 << mesa_stage); +} + +#define anv_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, __tmp = (gl_shader_stage)(stage_bits);\ + stage = __builtin_ffs(__tmp) - 1, __tmp; \ + __tmp &= ~(1 << (stage))) + struct anv_pipeline { struct anv_device * device; struct anv_batch batch; @@ -1179,8 +1194,8 @@ struct anv_pipeline { struct brw_gs_prog_data gs_prog_data; struct brw_cs_prog_data cs_prog_data; bool writes_point_size; - struct brw_stage_prog_data * prog_data[VK_SHADER_STAGE_NUM]; - uint32_t scratch_start[VK_SHADER_STAGE_NUM]; + struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { uint32_t vs_start; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 5f215c6c312..dd80144270b 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -36,18 +36,17 @@ static void cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { static const uint32_t push_constant_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 21, - [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 26, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 22, - [VK_SHADER_STAGE_FRAGMENT] = 23, - [VK_SHADER_STAGE_COMPUTE] = 0, + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, }; - VkShaderStage stage; VkShaderStageFlags flushed = 0; - for_each_bit(stage, cmd_buffer->state.push_constants_dirty) { + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); if (state.offset == 0) @@ -60,14 +59,14 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), }); - flushed |= 1 << stage; + flushed |= mesa_to_vk_shader_stage(stage); } cmd_buffer->state.push_constants_dirty &= ~flushed; } static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; @@ -80,21 +79,21 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) return result; static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, + [MESA_SHADER_VERTEX] = 43, + [MESA_SHADER_TESS_CTRL] = 44, /* HS */ + [MESA_SHADER_TESS_EVAL] = 45, /* DS */ + [MESA_SHADER_GEOMETRY] = 46, + [MESA_SHADER_FRAGMENT] = 47, + [MESA_SHADER_COMPUTE] = 0, }; static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, + [MESA_SHADER_VERTEX] = 38, + [MESA_SHADER_TESS_CTRL] = 39, + [MESA_SHADER_TESS_EVAL] = 40, + [MESA_SHADER_GEOMETRY] = 41, + [MESA_SHADER_FRAGMENT] = 42, + [MESA_SHADER_COMPUTE] = 0, }; if (samplers.alloc_size > 0) { @@ -117,12 +116,11 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage) GENX_FUNC(GEN7, GEN7) void genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) { - VkShaderStage s; VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & cmd_buffer->state.pipeline->active_stages; VkResult result = VK_SUCCESS; - for_each_bit(s, dirty) { + anv_foreach_stage(s, dirty) { result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) break; @@ -140,7 +138,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_emit_state_base_address(cmd_buffer); /* Re-emit all active binding tables */ - for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { + anv_foreach_stage(s, cmd_buffer->state.pipeline->active_stages) { result = flush_descriptor_set(cmd_buffer, s); /* It had better succeed this time */ @@ -260,11 +258,11 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) VkResult result; result = anv_cmd_buffer_emit_samplers(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &samplers); + MESA_SHADER_COMPUTE, &samplers); if (result != VK_SUCCESS) return result; result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &surfaces); + MESA_SHADER_COMPUTE, &surfaces); if (result != VK_SUCCESS) return result; @@ -310,7 +308,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) /* FIXME: figure out descriptors for gen7 */ result = flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; } cmd_buffer->state.compute_dirty = 0; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 95c7bd53591..400b9ae997d 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -468,7 +468,7 @@ genX(graphics_pipeline_create)( else anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = pipeline->vs_vec4, - .ScratchSpaceBaseOffset = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), .DispatchGRFStartRegisterforURBData = @@ -490,7 +490,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .KernelStartPointer = pipeline->gs_vec4, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, @@ -531,7 +531,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), .MaximumNumberofThreads = device->info.max_wm_threads - 1, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index bdccee8a7b7..23dc9ad5748 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -36,18 +36,17 @@ static void cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { static const uint32_t push_constant_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 21, - [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 26, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 22, - [VK_SHADER_STAGE_FRAGMENT] = 23, - [VK_SHADER_STAGE_COMPUTE] = 0, + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, }; - VkShaderStage stage; VkShaderStageFlags flushed = 0; - for_each_bit(stage, cmd_buffer->state.push_constants_dirty) { + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); if (state.offset == 0) @@ -60,7 +59,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), }); - flushed |= 1 << stage; + flushed |= mesa_to_vk_shader_stage(stage); } cmd_buffer->state.push_constants_dirty &= ~flushed; @@ -493,11 +492,11 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) VkResult result; result = anv_cmd_buffer_emit_samplers(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &samplers); + MESA_SHADER_COMPUTE, &samplers); if (result != VK_SUCCESS) return result; result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - VK_SHADER_STAGE_COMPUTE, &surfaces); + MESA_SHADER_COMPUTE, &surfaces); if (result != VK_SUCCESS) return result; @@ -548,7 +547,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { result = flush_compute_descriptor_set(cmd_buffer); assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; } cmd_buffer->state.compute_dirty = 0; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 106f82d36fa..82a63d3bfb3 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -454,7 +454,7 @@ genX(graphics_pipeline_create)( .BindingTableEntryCount = 0, .ExpectedVertexCount = pipeline->gs_vertex_count, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, @@ -512,7 +512,7 @@ genX(graphics_pipeline_create)( .AccessesUAV = false, .SoftwareExceptionEnable = false, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), .DispatchGRFStartRegisterForURBData = @@ -624,7 +624,7 @@ genX(graphics_pipeline_create)( .VectorMaskEnable = true, .SamplerCount = 1, - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, @@ -709,7 +709,7 @@ VkResult genX(compute_pipeline_create)( pipeline->active_stages = 0; pipeline->total_scratch = 0; - assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE); + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); anv_pipeline_compile_cs(pipeline, pCreateInfo, module, pCreateInfo->stage.pName); @@ -719,7 +719,7 @@ VkResult genX(compute_pipeline_create)( const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), - .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_COMPUTE], + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), .ScratchSpaceBasePointerHigh = 0, .StackSize = 0, -- cgit v1.2.3 From fed3586f34ce701b0f6464c54456122e096c0bee Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 16:14:58 -0800 Subject: vk/0.210.0: Rework result and structure type enums By and large, this is just moving enum values around. However, it also removed VK_UNSUPPORTED which we were returning a number of places. Those places now return VK_ERROR_INCOMPATABLE_DRIVER. --- include/vulkan/vulkan.h | 113 ++++++++++++++++++++++++---------------------- src/vulkan/anv_device.c | 16 +++---- src/vulkan/anv_pipeline.c | 5 +- src/vulkan/anv_query.c | 4 +- 4 files changed, 71 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 7affe394992..2d8f0202326 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -109,12 +109,11 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) typedef enum VkResult { VK_SUCCESS = 0, - VK_UNSUPPORTED = 1, - VK_NOT_READY = 2, - VK_TIMEOUT = 3, - VK_EVENT_SET = 4, - VK_EVENT_RESET = 5, - VK_INCOMPLETE = 6, + VK_NOT_READY = 1, + VK_TIMEOUT = 2, + VK_EVENT_SET = 3, + VK_EVENT_RESET = 4, + VK_INCOMPLETE = 5, VK_ERROR_OUT_OF_HOST_MEMORY = -1, VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, VK_ERROR_INITIALIZATION_FAILED = -3, @@ -122,64 +121,68 @@ typedef enum VkResult { VK_ERROR_MEMORY_MAP_FAILED = -5, VK_ERROR_LAYER_NOT_PRESENT = -6, VK_ERROR_EXTENSION_NOT_PRESENT = -7, - VK_ERROR_INCOMPATIBLE_DRIVER = -8, - VK_RESULT_BEGIN_RANGE = VK_ERROR_INCOMPATIBLE_DRIVER, + VK_ERROR_FEATURE_NOT_PRESENT = -8, + VK_ERROR_INCOMPATIBLE_DRIVER = -9, + VK_ERROR_TOO_MANY_OBJECTS = -10, + VK_ERROR_FORMAT_NOT_SUPPORTED = -11, + VK_RESULT_BEGIN_RANGE = VK_ERROR_FORMAT_NOT_SUPPORTED, VK_RESULT_END_RANGE = VK_INCOMPLETE, - VK_RESULT_NUM = (VK_INCOMPLETE - VK_ERROR_INCOMPATIBLE_DRIVER + 1), + VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FORMAT_NOT_SUPPORTED + 1), VK_RESULT_MAX_ENUM = 0x7FFFFFFF } VkResult; typedef enum VkStructureType { VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, - VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 1, - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 2, - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 3, - VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 4, - VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 6, - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 7, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 9, + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 2, + VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 3, + VK_STRUCTURE_TYPE_SUBMIT_INFO = 4, + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 5, + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 6, + VK_STRUCTURE_TYPE_BIND_SPARSE_INFO = 7, + VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 8, + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 9, VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, - VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 11, - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 12, - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 13, - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 14, - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 15, - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 16, - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 17, - VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 18, - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 19, - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 20, - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 21, - VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 22, - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 23, - VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 24, - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 26, - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 28, - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 29, - VK_STRUCTURE_TYPE_MEMORY_BARRIER = 30, - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 31, - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 32, + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 11, + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 12, + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 13, + VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 14, + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 15, + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 16, + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 17, + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 18, + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 19, + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 20, + VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 21, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 22, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 23, + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 24, + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 25, + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 27, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28, + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29, + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30, + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 31, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 32, VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 34, - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 35, - VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 36, - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 37, - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 38, - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 39, - VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION = 40, - VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 41, - VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 42, - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, - VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 44, - VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 45, - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 46, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 47, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 34, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 35, + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 36, + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38, + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 41, + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 42, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 43, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 44, + VK_STRUCTURE_TYPE_MEMORY_BARRIER = 45, + VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 46, + VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 47, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), + VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkStructureType; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9cf40e6d858..3fba33960d5 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -92,7 +92,7 @@ anv_physical_device_init(struct anv_physical_device *device, } else if (device->info->gen == 8 && !device->info->is_cherryview) { /* Broadwell is as fully supported as anything */ } else { - result = vk_errorf(VK_UNSUPPORTED, + result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, "Vulkan not yet supported on %s", device->name); goto fail; } @@ -272,7 +272,7 @@ VkResult anv_EnumeratePhysicalDevices( if (instance->physicalDeviceCount < 0) { result = anv_physical_device_init(&instance->physicalDevice, instance, "/dev/dri/renderD128"); - if (result == VK_UNSUPPORTED) { + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { instance->physicalDeviceCount = 0; } else if (result == VK_SUCCESS) { instance->physicalDeviceCount = 1; @@ -1122,7 +1122,7 @@ VkResult anv_QueueBindSparse( const VkBindSparseInfo* pBindInfo, VkFence fence) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } VkResult anv_CreateFence( @@ -1302,7 +1302,7 @@ VkResult anv_CreateEvent( const VkAllocationCallbacks* pAllocator, VkEvent* pEvent) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } void anv_DestroyEvent( @@ -1317,21 +1317,21 @@ VkResult anv_GetEventStatus( VkDevice device, VkEvent event) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } VkResult anv_SetEvent( VkDevice device, VkEvent event) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } VkResult anv_ResetEvent( VkDevice device, VkEvent event) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } // Buffer functions @@ -1401,7 +1401,7 @@ VkResult anv_CreateBufferView( const VkAllocationCallbacks* pAllocator, VkBufferView* pView) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); } void anv_DestroyBufferView( diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index e910298921e..367d5180bd3 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -158,7 +158,8 @@ VkResult anv_GetPipelineCacheData( size_t* pDataSize, void* pData) { - stub_return(VK_UNSUPPORTED); + *pDataSize = 0; + stub_return(VK_SUCCESS); } VkResult anv_MergePipelineCaches( @@ -167,7 +168,7 @@ VkResult anv_MergePipelineCaches( uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches) { - stub_return(VK_UNSUPPORTED); + stub_return(VK_SUCCESS); } void anv_DestroyPipeline( diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index cbceacf7f8e..320d42cb6fd 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -46,7 +46,7 @@ VkResult anv_CreateQueryPool( case VK_QUERY_TYPE_OCCLUSION: break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - return VK_UNSUPPORTED; + return VK_ERROR_INCOMPATIBLE_DRIVER; default: unreachable(""); } @@ -107,7 +107,7 @@ VkResult anv_GetQueryPoolResults( if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { /* Where is the availabilty info supposed to go? */ anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return VK_UNSUPPORTED; + return VK_ERROR_INCOMPATIBLE_DRIVER; } assert(pool->type == VK_QUERY_TYPE_OCCLUSION); -- cgit v1.2.3 From 74c4c4acb60c9a9dc50bd573fbe3f5a3f7b10719 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 16:20:40 -0800 Subject: vk/0.210.0: Rework QueueFamilyProperties --- include/vulkan/vulkan.h | 3 ++- src/vulkan/anv_device.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 2d8f0202326..0a9499eadfe 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1311,7 +1311,8 @@ typedef struct VkPhysicalDeviceProperties { typedef struct VkQueueFamilyProperties { VkQueueFlags queueFlags; uint32_t queueCount; - VkBool32 supportsTimestamps; + uint32_t timestampValidBits; + VkExtent3D minImageTransferGranularity; } VkQueueFamilyProperties; typedef struct VkMemoryType { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3fba33960d5..11d97838c96 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -511,7 +511,8 @@ void anv_GetPhysicalDeviceQueueFamilyProperties( VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 1, - .supportsTimestamps = true, + .timestampValidBits = 0, /* XXX: Real value here */ + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, }; } -- cgit v1.2.3 From d6897453037111d654b1e58dd6e74aac6aa21134 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 16:58:54 -0800 Subject: vk/0.210.0: Rework device features and limits --- include/vulkan/vulkan.h | 95 +++++++++++++++++++++++++++---------------------- src/vulkan/anv_device.c | 89 ++++++++++++++++++++++++++------------------- 2 files changed, 105 insertions(+), 79 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 0a9499eadfe..8036b6126aa 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1123,29 +1123,31 @@ typedef struct VkPhysicalDeviceFeatures { VkBool32 geometryShader; VkBool32 tessellationShader; VkBool32 sampleRateShading; - VkBool32 dualSourceBlend; + VkBool32 dualSrcBlend; VkBool32 logicOp; VkBool32 multiDrawIndirect; - VkBool32 depthClip; + VkBool32 depthClamp; VkBool32 depthBiasClamp; VkBool32 fillModeNonSolid; VkBool32 depthBounds; VkBool32 wideLines; VkBool32 largePoints; + VkBool32 alphaToOne; + VkBool32 multiViewport; + VkBool32 samplerAnisotropy; VkBool32 textureCompressionETC2; VkBool32 textureCompressionASTC_LDR; VkBool32 textureCompressionBC; - VkBool32 occlusionQueryNonConservative; + VkBool32 occlusionQueryPrecise; VkBool32 pipelineStatisticsQuery; - VkBool32 vertexSideEffects; - VkBool32 tessellationSideEffects; - VkBool32 geometrySideEffects; - VkBool32 fragmentSideEffects; - VkBool32 shaderTessellationPointSize; - VkBool32 shaderGeometryPointSize; + VkBool32 vertexPipelineStoresAndAtomics; + VkBool32 fragmentStoresAndAtomics; + VkBool32 shaderTessellationAndGeometryPointSize; VkBool32 shaderImageGatherExtended; VkBool32 shaderStorageImageExtendedFormats; VkBool32 shaderStorageImageMultisample; + VkBool32 shaderStorageImageReadWithoutFormat; + VkBool32 shaderStorageImageWriteWithoutFormat; VkBool32 shaderUniformBufferArrayDynamicIndexing; VkBool32 shaderSampledImageArrayDynamicIndexing; VkBool32 shaderStorageBufferArrayDynamicIndexing; @@ -1156,8 +1158,7 @@ typedef struct VkPhysicalDeviceFeatures { VkBool32 shaderInt64; VkBool32 shaderInt16; VkBool32 shaderResourceResidency; - VkBool32 shaderResourceMinLOD; - VkBool32 alphaToOne; + VkBool32 shaderResourceMinLod; VkBool32 sparseBinding; VkBool32 sparseResidencyBuffer; VkBool32 sparseResidencyImage2D; @@ -1167,6 +1168,7 @@ typedef struct VkPhysicalDeviceFeatures { VkBool32 sparseResidency8Samples; VkBool32 sparseResidency16Samples; VkBool32 sparseResidencyAliased; + VkBool32 variableMultisampleRate; } VkPhysicalDeviceFeatures; typedef struct VkFormatProperties { @@ -1195,21 +1197,22 @@ typedef struct VkPhysicalDeviceLimits { uint32_t maxImageDimension3D; uint32_t maxImageDimensionCube; uint32_t maxImageArrayLayers; - VkSampleCountFlags sampleCounts; - uint32_t maxTexelBufferSize; - uint32_t maxUniformBufferSize; - uint32_t maxStorageBufferSize; + uint32_t maxTexelBufferElements; + uint32_t maxUniformBufferRange; + uint32_t maxStorageBufferRange; uint32_t maxPushConstantsSize; uint32_t maxMemoryAllocationCount; + uint32_t maxSamplerAllocationCount; VkDeviceSize bufferImageGranularity; VkDeviceSize sparseAddressSpaceSize; uint32_t maxBoundDescriptorSets; - uint32_t maxDescriptorSets; uint32_t maxPerStageDescriptorSamplers; uint32_t maxPerStageDescriptorUniformBuffers; uint32_t maxPerStageDescriptorStorageBuffers; uint32_t maxPerStageDescriptorSampledImages; uint32_t maxPerStageDescriptorStorageImages; + uint32_t maxPerStageDescriptorInputAttachments; + uint32_t maxPerStageResources; uint32_t maxDescriptorSetSamplers; uint32_t maxDescriptorSetUniformBuffers; uint32_t maxDescriptorSetUniformBuffersDynamic; @@ -1217,27 +1220,28 @@ typedef struct VkPhysicalDeviceLimits { uint32_t maxDescriptorSetStorageBuffersDynamic; uint32_t maxDescriptorSetSampledImages; uint32_t maxDescriptorSetStorageImages; + uint32_t maxDescriptorSetInputAttachments; uint32_t maxVertexInputAttributes; uint32_t maxVertexInputBindings; uint32_t maxVertexInputAttributeOffset; uint32_t maxVertexInputBindingStride; uint32_t maxVertexOutputComponents; - uint32_t maxTessGenLevel; - uint32_t maxTessPatchSize; - uint32_t maxTessControlPerVertexInputComponents; - uint32_t maxTessControlPerVertexOutputComponents; - uint32_t maxTessControlPerPatchOutputComponents; - uint32_t maxTessControlTotalOutputComponents; - uint32_t maxTessEvaluationInputComponents; - uint32_t maxTessEvaluationOutputComponents; + uint32_t maxTessellationGenerationLevel; + uint32_t maxTessellationPatchSize; + uint32_t maxTessellationControlPerVertexInputComponents; + uint32_t maxTessellationControlPerVertexOutputComponents; + uint32_t maxTessellationControlPerPatchOutputComponents; + uint32_t maxTessellationControlTotalOutputComponents; + uint32_t maxTessellationEvaluationInputComponents; + uint32_t maxTessellationEvaluationOutputComponents; uint32_t maxGeometryShaderInvocations; uint32_t maxGeometryInputComponents; uint32_t maxGeometryOutputComponents; uint32_t maxGeometryOutputVertices; uint32_t maxGeometryTotalOutputComponents; uint32_t maxFragmentInputComponents; - uint32_t maxFragmentOutputBuffers; - uint32_t maxFragmentDualSourceBuffers; + uint32_t maxFragmentOutputAttachments; + uint32_t maxFragmentDualSrcAttachments; uint32_t maxFragmentCombinedOutputResources; uint32_t maxComputeSharedMemorySize; uint32_t maxComputeWorkGroupCount[3]; @@ -1247,21 +1251,20 @@ typedef struct VkPhysicalDeviceLimits { uint32_t subTexelPrecisionBits; uint32_t mipmapPrecisionBits; uint32_t maxDrawIndexedIndexValue; - uint32_t maxDrawIndirectInstanceCount; - VkBool32 primitiveRestartForPatches; + uint32_t maxDrawIndirectCount; float maxSamplerLodBias; float maxSamplerAnisotropy; uint32_t maxViewports; uint32_t maxViewportDimensions[2]; float viewportBoundsRange[2]; uint32_t viewportSubPixelBits; - uint32_t minMemoryMapAlignment; - uint32_t minTexelBufferOffsetAlignment; - uint32_t minUniformBufferOffsetAlignment; - uint32_t minStorageBufferOffsetAlignment; - uint32_t minTexelOffset; + size_t minMemoryMapAlignment; + VkDeviceSize minTexelBufferOffsetAlignment; + VkDeviceSize minUniformBufferOffsetAlignment; + VkDeviceSize minStorageBufferOffsetAlignment; + int32_t minTexelOffset; uint32_t maxTexelOffset; - uint32_t minTexelGatherOffset; + int32_t minTexelGatherOffset; uint32_t maxTexelGatherOffset; float minInterpolationOffset; float maxInterpolationOffset; @@ -1269,23 +1272,31 @@ typedef struct VkPhysicalDeviceLimits { uint32_t maxFramebufferWidth; uint32_t maxFramebufferHeight; uint32_t maxFramebufferLayers; - uint32_t maxFramebufferColorSamples; - uint32_t maxFramebufferDepthSamples; - uint32_t maxFramebufferStencilSamples; + VkSampleCountFlags framebufferColorSampleCounts; + VkSampleCountFlags framebufferDepthSampleCounts; + VkSampleCountFlags framebufferStencilSampleCounts; + VkSampleCountFlags framebufferNoAttachmentsSampleCounts; uint32_t maxColorAttachments; - uint32_t maxSampledImageColorSamples; - uint32_t maxSampledImageDepthSamples; - uint32_t maxSampledImageIntegerSamples; - uint32_t maxStorageImageSamples; + VkSampleCountFlags sampledImageColorSampleCounts; + VkSampleCountFlags sampledImageIntegerSampleCounts; + VkSampleCountFlags sampledImageDepthSampleCounts; + VkSampleCountFlags sampledImageStencilSampleCounts; + VkSampleCountFlags storageImageSampleCounts; uint32_t maxSampleMaskWords; - uint64_t timestampFrequency; + float timestampPeriod; uint32_t maxClipDistances; uint32_t maxCullDistances; uint32_t maxCombinedClipAndCullDistances; + uint32_t discreteQueuePriorities; float pointSizeRange[2]; float lineWidthRange[2]; float pointSizeGranularity; float lineWidthGranularity; + VkBool32 strictLines; + VkBool32 standardSampleLocations; + VkDeviceSize optimalBufferCopyOffsetAlignment; + VkDeviceSize optimalBufferCopyRowPitchAlignment; + VkDeviceSize nonCoherentAtomSize; } VkPhysicalDeviceLimits; typedef struct VkPhysicalDeviceSparseProperties { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 11d97838c96..5b300afc0a8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -324,26 +324,26 @@ void anv_GetPhysicalDeviceFeatures( .geometryShader = true, .tessellationShader = false, .sampleRateShading = false, - .dualSourceBlend = true, + .dualSrcBlend = true, .logicOp = true, .multiDrawIndirect = true, - .depthClip = false, + .depthClamp = false, .depthBiasClamp = false, .fillModeNonSolid = true, .depthBounds = false, .wideLines = true, .largePoints = true, + .alphaToOne = true, + .multiViewport = true, + .samplerAnisotropy = false, /* FINISHME */ .textureCompressionETC2 = true, .textureCompressionASTC_LDR = true, .textureCompressionBC = true, - .occlusionQueryNonConservative = false, /* FINISHME */ + .occlusionQueryPrecise = false, /* FINISHME */ .pipelineStatisticsQuery = true, - .vertexSideEffects = false, - .tessellationSideEffects = false, - .geometrySideEffects = false, - .fragmentSideEffects = false, - .shaderTessellationPointSize = false, - .shaderGeometryPointSize = true, + .vertexPipelineStoresAndAtomics = false, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = false, .shaderStorageImageMultisample = false, @@ -351,12 +351,15 @@ void anv_GetPhysicalDeviceFeatures( .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, .shaderStorageImageArrayDynamicIndexing = false, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = true, .shaderClipDistance = false, .shaderCullDistance = false, .shaderFloat64 = false, .shaderInt64 = false, .shaderInt16 = false, .alphaToOne = true, + .variableMultisampleRate = false, }; } @@ -369,30 +372,34 @@ void anv_GetPhysicalDeviceProperties( anv_finishme("Get correct values for VkPhysicalDeviceLimits"); + VkSampleCountFlags sample_counts = + VK_SAMPLE_COUNT_1_BIT | + VK_SAMPLE_COUNT_2_BIT | + VK_SAMPLE_COUNT_4_BIT | + VK_SAMPLE_COUNT_8_BIT; + VkPhysicalDeviceLimits limits = { .maxImageDimension1D = (1 << 14), .maxImageDimension2D = (1 << 14), .maxImageDimension3D = (1 << 10), .maxImageDimensionCube = (1 << 14), .maxImageArrayLayers = (1 << 10), - - /* Broadwell supports 1, 2, 4, and 8 samples. */ - .sampleCounts = 4, - - .maxTexelBufferSize = (1 << 14), - .maxUniformBufferSize = UINT32_MAX, - .maxStorageBufferSize = UINT32_MAX, + .maxTexelBufferElements = (1 << 14), + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = UINT32_MAX, .bufferImageGranularity = 64, /* A cache line */ .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, - .maxDescriptorSets = UINT32_MAX, .maxPerStageDescriptorSamplers = 64, .maxPerStageDescriptorUniformBuffers = 64, .maxPerStageDescriptorStorageBuffers = 64, .maxPerStageDescriptorSampledImages = 64, .maxPerStageDescriptorStorageImages = 64, + .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageResources = 128, .maxDescriptorSetSamplers = 256, .maxDescriptorSetUniformBuffers = 256, .maxDescriptorSetUniformBuffersDynamic = 256, @@ -400,27 +407,28 @@ void anv_GetPhysicalDeviceProperties( .maxDescriptorSetStorageBuffersDynamic = 256, .maxDescriptorSetSampledImages = 256, .maxDescriptorSetStorageImages = 256, + .maxDescriptorSetInputAttachments = 256, .maxVertexInputAttributes = 32, .maxVertexInputBindings = 32, .maxVertexInputAttributeOffset = 256, .maxVertexInputBindingStride = 256, .maxVertexOutputComponents = 32, - .maxTessGenLevel = 0, - .maxTessPatchSize = 0, - .maxTessControlPerVertexInputComponents = 0, - .maxTessControlPerVertexOutputComponents = 0, - .maxTessControlPerPatchOutputComponents = 0, - .maxTessControlTotalOutputComponents = 0, - .maxTessEvaluationInputComponents = 0, - .maxTessEvaluationOutputComponents = 0, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, .maxGeometryShaderInvocations = 6, .maxGeometryInputComponents = 16, .maxGeometryOutputComponents = 16, .maxGeometryOutputVertices = 16, .maxGeometryTotalOutputComponents = 16, .maxFragmentInputComponents = 16, - .maxFragmentOutputBuffers = 8, - .maxFragmentDualSourceBuffers = 2, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 2, .maxFragmentCombinedOutputResources = 8, .maxComputeSharedMemorySize = 1024, .maxComputeWorkGroupCount = { @@ -438,8 +446,7 @@ void anv_GetPhysicalDeviceProperties( .subTexelPrecisionBits = 4 /* FIXME */, .mipmapPrecisionBits = 4 /* FIXME */, .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectInstanceCount = UINT32_MAX, - .primitiveRestartForPatches = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, .maxSamplerLodBias = 16, .maxSamplerAnisotropy = 16, .maxViewports = MAX_VIEWPORTS, @@ -460,23 +467,31 @@ void anv_GetPhysicalDeviceProperties( .maxFramebufferWidth = (1 << 14), .maxFramebufferHeight = (1 << 14), .maxFramebufferLayers = (1 << 10), - .maxFramebufferColorSamples = 8, - .maxFramebufferDepthSamples = 8, - .maxFramebufferStencilSamples = 8, + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, .maxColorAttachments = MAX_RTS, - .maxSampledImageColorSamples = 8, - .maxSampledImageDepthSamples = 8, - .maxSampledImageIntegerSamples = 1, - .maxStorageImageSamples = 1, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, - .timestampFrequency = 1000 * 1000 * 1000 / 80, + .timestampPeriod = 80.0 / (1000 * 1000 * 1000), .maxClipDistances = 0 /* FIXME */, .maxCullDistances = 0 /* FIXME */, .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .discreteQueuePriorities = 1, .pointSizeRange = { 0.125, 255.875 }, .lineWidthRange = { 0.0, 7.9921875 }, .pointSizeGranularity = (1.0 / 8.0), .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, /* FINISHME */ + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, }; *pProperties = (VkPhysicalDeviceProperties) { -- cgit v1.2.3 From 5757ad2959a91f71dcc4a3661cce0e5ab065052b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 17:07:26 -0800 Subject: vk/0.210.0: Remove depth clip and add depth clamp --- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_meta.c | 1 - src/vulkan/anv_meta_clear.c | 2 -- src/vulkan/gen8_pipeline.c | 6 +++--- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 8036b6126aa..67ab34ddf23 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1688,7 +1688,7 @@ typedef struct VkPipelineRasterizationStateCreateInfo { VkStructureType sType; const void* pNext; VkPipelineRasterizationStateCreateFlags flags; - VkBool32 depthClipEnable; + VkBool32 depthClampEnable; VkBool32 rasterizerDiscardEnable; VkPolygonMode polygonMode; VkCullModeFlags cullMode; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 67651c542b7..bed01980a0d 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -334,7 +334,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClipEnable = true, .rasterizerDiscardEnable = false, .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index a2667c7bb6e..24ff1ea75e4 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -160,13 +160,11 @@ create_pipeline(struct anv_device *device, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClipEnable = false, .rasterizerDiscardEnable = false, .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, .depthBiasEnable = false, - .depthClipEnable = false, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 82a63d3bfb3..d0395741474 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -135,11 +135,11 @@ emit_rs_state(struct anv_pipeline *pipeline, .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ScissorRectangleEnable = !(extra && extra->disable_scissor), #if ANV_GEN == 8 - .ViewportZClipTestEnable = info->depthClipEnable + .ViewportZClipTestEnable = true, #else /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ - .ViewportZFarClipTestEnable = info->depthClipEnable, - .ViewportZNearClipTestEnable = info->depthClipEnable, + .ViewportZFarClipTestEnable = true, + .ViewportZNearClipTestEnable = true, #endif }; -- cgit v1.2.3 From 4e904a0310be8a5c311355473e70df4fd4d9caa1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 17:18:41 -0800 Subject: vk/0.210.0: Rework vkQueueSubmit --- include/vulkan/vulkan.h | 17 ++++++++++++++--- src/vulkan/anv_device.c | 38 ++++++++++++++++++++------------------ src/vulkan/anv_dump.c | 8 ++++++-- 3 files changed, 40 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 67ab34ddf23..a897a6efbc0 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1377,6 +1377,17 @@ typedef struct VkLayerProperties { char description[VK_MAX_DESCRIPTION_SIZE]; } VkLayerProperties; +typedef struct VkSubmitInfo { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore* pWaitSemaphores; + uint32_t commandBufferCount; + const VkCommandBuffer* pCommandBuffers; + uint32_t signalSemaphoreCount; + const VkSemaphore* pSignalSemaphores; +} VkSubmitInfo; + typedef struct VkMemoryAllocateInfo { VkStructureType sType; const void* pNext; @@ -2155,7 +2166,7 @@ typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysica typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pPropertyCount, VkLayerProperties* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties); typedef void (VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); -typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); typedef VkResult (VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); typedef VkResult (VKAPI_PTR *PFN_vkAllocateMemory)(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory); @@ -2367,8 +2378,8 @@ VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue( VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( VkQueue queue, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, VkFence fence); VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle( diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 5b300afc0a8..e3b3541d251 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -808,8 +808,8 @@ void anv_GetDeviceQueue( VkResult anv_QueueSubmit( VkQueue _queue, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, VkFence _fence) { ANV_FROM_HANDLE(anv_queue, queue, _queue); @@ -817,29 +817,31 @@ VkResult anv_QueueSubmit( struct anv_device *device = queue->device; int ret; - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + for (uint32_t i = 0; i < submitCount; i++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - - if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); if (ret != 0) { /* We don't know the real error. */ return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); } - } - for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) - cmd_buffer->execbuf2.bos[i]->offset = cmd_buffer->execbuf2.objects[i].offset; + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + } + + for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) + cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; + } } return VK_SUCCESS; diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 4db7c2539e1..3a1430d49a0 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -161,8 +161,12 @@ anv_dump_image_to_ppm(struct anv_device *device, }, NULL, &fence); assert(result == VK_SUCCESS); - result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), - 1, &cmd, fence); + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, + &(VkSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, fence); assert(result == VK_SUCCESS); result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); -- cgit v1.2.3 From fa2435de3c1d849571e4929291d808cd8b46c080 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 17:47:37 -0800 Subject: vk/0.210.0: Update the VkFormat enum --- include/vulkan/vulkan.h | 319 ++++++++++++++++++++++--------------------- src/vulkan/anv_formats.c | 160 +++++++++++----------- src/vulkan/anv_wsi_wayland.c | 11 ++ 3 files changed, 254 insertions(+), 236 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a897a6efbc0..b382f548bf8 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -208,14 +208,14 @@ typedef enum VkInternalAllocationType { typedef enum VkFormat { VK_FORMAT_UNDEFINED = 0, - VK_FORMAT_R4G4_UNORM = 1, - VK_FORMAT_R4G4_USCALED = 2, - VK_FORMAT_R4G4B4A4_UNORM = 3, - VK_FORMAT_R4G4B4A4_USCALED = 4, - VK_FORMAT_R5G6B5_UNORM = 5, - VK_FORMAT_R5G6B5_USCALED = 6, - VK_FORMAT_R5G5B5A1_UNORM = 7, - VK_FORMAT_R5G5B5A1_USCALED = 8, + VK_FORMAT_R4G4_UNORM_PACK8 = 1, + VK_FORMAT_R4G4B4A4_UNORM_PACK16 = 2, + VK_FORMAT_B4G4R4A4_UNORM_PACK16 = 3, + VK_FORMAT_R5G6B5_UNORM_PACK16 = 4, + VK_FORMAT_B5G6R5_UNORM_PACK16 = 5, + VK_FORMAT_R5G5B5A1_UNORM_PACK16 = 6, + VK_FORMAT_B5G5R5A1_UNORM_PACK16 = 7, + VK_FORMAT_A1R5G5B5_UNORM_PACK16 = 8, VK_FORMAT_R8_UNORM = 9, VK_FORMAT_R8_SNORM = 10, VK_FORMAT_R8_USCALED = 11, @@ -237,153 +237,164 @@ typedef enum VkFormat { VK_FORMAT_R8G8B8_UINT = 27, VK_FORMAT_R8G8B8_SINT = 28, VK_FORMAT_R8G8B8_SRGB = 29, - VK_FORMAT_R8G8B8A8_UNORM = 30, - VK_FORMAT_R8G8B8A8_SNORM = 31, - VK_FORMAT_R8G8B8A8_USCALED = 32, - VK_FORMAT_R8G8B8A8_SSCALED = 33, - VK_FORMAT_R8G8B8A8_UINT = 34, - VK_FORMAT_R8G8B8A8_SINT = 35, - VK_FORMAT_R8G8B8A8_SRGB = 36, - VK_FORMAT_R10G10B10A2_UNORM = 37, - VK_FORMAT_R10G10B10A2_SNORM = 38, - VK_FORMAT_R10G10B10A2_USCALED = 39, - VK_FORMAT_R10G10B10A2_SSCALED = 40, - VK_FORMAT_R10G10B10A2_UINT = 41, - VK_FORMAT_R10G10B10A2_SINT = 42, - VK_FORMAT_R16_UNORM = 43, - VK_FORMAT_R16_SNORM = 44, - VK_FORMAT_R16_USCALED = 45, - VK_FORMAT_R16_SSCALED = 46, - VK_FORMAT_R16_UINT = 47, - VK_FORMAT_R16_SINT = 48, - VK_FORMAT_R16_SFLOAT = 49, - VK_FORMAT_R16G16_UNORM = 50, - VK_FORMAT_R16G16_SNORM = 51, - VK_FORMAT_R16G16_USCALED = 52, - VK_FORMAT_R16G16_SSCALED = 53, - VK_FORMAT_R16G16_UINT = 54, - VK_FORMAT_R16G16_SINT = 55, - VK_FORMAT_R16G16_SFLOAT = 56, - VK_FORMAT_R16G16B16_UNORM = 57, - VK_FORMAT_R16G16B16_SNORM = 58, - VK_FORMAT_R16G16B16_USCALED = 59, - VK_FORMAT_R16G16B16_SSCALED = 60, - VK_FORMAT_R16G16B16_UINT = 61, - VK_FORMAT_R16G16B16_SINT = 62, - VK_FORMAT_R16G16B16_SFLOAT = 63, - VK_FORMAT_R16G16B16A16_UNORM = 64, - VK_FORMAT_R16G16B16A16_SNORM = 65, - VK_FORMAT_R16G16B16A16_USCALED = 66, - VK_FORMAT_R16G16B16A16_SSCALED = 67, - VK_FORMAT_R16G16B16A16_UINT = 68, - VK_FORMAT_R16G16B16A16_SINT = 69, - VK_FORMAT_R16G16B16A16_SFLOAT = 70, - VK_FORMAT_R32_UINT = 71, - VK_FORMAT_R32_SINT = 72, - VK_FORMAT_R32_SFLOAT = 73, - VK_FORMAT_R32G32_UINT = 74, - VK_FORMAT_R32G32_SINT = 75, - VK_FORMAT_R32G32_SFLOAT = 76, - VK_FORMAT_R32G32B32_UINT = 77, - VK_FORMAT_R32G32B32_SINT = 78, - VK_FORMAT_R32G32B32_SFLOAT = 79, - VK_FORMAT_R32G32B32A32_UINT = 80, - VK_FORMAT_R32G32B32A32_SINT = 81, - VK_FORMAT_R32G32B32A32_SFLOAT = 82, - VK_FORMAT_R64_SFLOAT = 83, - VK_FORMAT_R64G64_SFLOAT = 84, - VK_FORMAT_R64G64B64_SFLOAT = 85, - VK_FORMAT_R64G64B64A64_SFLOAT = 86, - VK_FORMAT_R11G11B10_UFLOAT = 87, - VK_FORMAT_R9G9B9E5_UFLOAT = 88, - VK_FORMAT_D16_UNORM = 89, - VK_FORMAT_D24_UNORM_X8 = 90, - VK_FORMAT_D32_SFLOAT = 91, - VK_FORMAT_S8_UINT = 92, - VK_FORMAT_D16_UNORM_S8_UINT = 93, - VK_FORMAT_D24_UNORM_S8_UINT = 94, - VK_FORMAT_D32_SFLOAT_S8_UINT = 95, - VK_FORMAT_BC1_RGB_UNORM = 96, - VK_FORMAT_BC1_RGB_SRGB = 97, - VK_FORMAT_BC1_RGBA_UNORM = 98, - VK_FORMAT_BC1_RGBA_SRGB = 99, - VK_FORMAT_BC2_UNORM = 100, - VK_FORMAT_BC2_SRGB = 101, - VK_FORMAT_BC3_UNORM = 102, - VK_FORMAT_BC3_SRGB = 103, - VK_FORMAT_BC4_UNORM = 104, - VK_FORMAT_BC4_SNORM = 105, - VK_FORMAT_BC5_UNORM = 106, - VK_FORMAT_BC5_SNORM = 107, - VK_FORMAT_BC6H_UFLOAT = 108, - VK_FORMAT_BC6H_SFLOAT = 109, - VK_FORMAT_BC7_UNORM = 110, - VK_FORMAT_BC7_SRGB = 111, - VK_FORMAT_ETC2_R8G8B8_UNORM = 112, - VK_FORMAT_ETC2_R8G8B8_SRGB = 113, - VK_FORMAT_ETC2_R8G8B8A1_UNORM = 114, - VK_FORMAT_ETC2_R8G8B8A1_SRGB = 115, - VK_FORMAT_ETC2_R8G8B8A8_UNORM = 116, - VK_FORMAT_ETC2_R8G8B8A8_SRGB = 117, - VK_FORMAT_EAC_R11_UNORM = 118, - VK_FORMAT_EAC_R11_SNORM = 119, - VK_FORMAT_EAC_R11G11_UNORM = 120, - VK_FORMAT_EAC_R11G11_SNORM = 121, - VK_FORMAT_ASTC_4x4_UNORM = 122, - VK_FORMAT_ASTC_4x4_SRGB = 123, - VK_FORMAT_ASTC_5x4_UNORM = 124, - VK_FORMAT_ASTC_5x4_SRGB = 125, - VK_FORMAT_ASTC_5x5_UNORM = 126, - VK_FORMAT_ASTC_5x5_SRGB = 127, - VK_FORMAT_ASTC_6x5_UNORM = 128, - VK_FORMAT_ASTC_6x5_SRGB = 129, - VK_FORMAT_ASTC_6x6_UNORM = 130, - VK_FORMAT_ASTC_6x6_SRGB = 131, - VK_FORMAT_ASTC_8x5_UNORM = 132, - VK_FORMAT_ASTC_8x5_SRGB = 133, - VK_FORMAT_ASTC_8x6_UNORM = 134, - VK_FORMAT_ASTC_8x6_SRGB = 135, - VK_FORMAT_ASTC_8x8_UNORM = 136, - VK_FORMAT_ASTC_8x8_SRGB = 137, - VK_FORMAT_ASTC_10x5_UNORM = 138, - VK_FORMAT_ASTC_10x5_SRGB = 139, - VK_FORMAT_ASTC_10x6_UNORM = 140, - VK_FORMAT_ASTC_10x6_SRGB = 141, - VK_FORMAT_ASTC_10x8_UNORM = 142, - VK_FORMAT_ASTC_10x8_SRGB = 143, - VK_FORMAT_ASTC_10x10_UNORM = 144, - VK_FORMAT_ASTC_10x10_SRGB = 145, - VK_FORMAT_ASTC_12x10_UNORM = 146, - VK_FORMAT_ASTC_12x10_SRGB = 147, - VK_FORMAT_ASTC_12x12_UNORM = 148, - VK_FORMAT_ASTC_12x12_SRGB = 149, - VK_FORMAT_B4G4R4A4_UNORM = 150, - VK_FORMAT_B5G5R5A1_UNORM = 151, - VK_FORMAT_B5G6R5_UNORM = 152, - VK_FORMAT_B5G6R5_USCALED = 153, - VK_FORMAT_B8G8R8_UNORM = 154, - VK_FORMAT_B8G8R8_SNORM = 155, - VK_FORMAT_B8G8R8_USCALED = 156, - VK_FORMAT_B8G8R8_SSCALED = 157, - VK_FORMAT_B8G8R8_UINT = 158, - VK_FORMAT_B8G8R8_SINT = 159, - VK_FORMAT_B8G8R8_SRGB = 160, - VK_FORMAT_B8G8R8A8_UNORM = 161, - VK_FORMAT_B8G8R8A8_SNORM = 162, - VK_FORMAT_B8G8R8A8_USCALED = 163, - VK_FORMAT_B8G8R8A8_SSCALED = 164, - VK_FORMAT_B8G8R8A8_UINT = 165, - VK_FORMAT_B8G8R8A8_SINT = 166, - VK_FORMAT_B8G8R8A8_SRGB = 167, - VK_FORMAT_B10G10R10A2_UNORM = 168, - VK_FORMAT_B10G10R10A2_SNORM = 169, - VK_FORMAT_B10G10R10A2_USCALED = 170, - VK_FORMAT_B10G10R10A2_SSCALED = 171, - VK_FORMAT_B10G10R10A2_UINT = 172, - VK_FORMAT_B10G10R10A2_SINT = 173, + VK_FORMAT_B8G8R8_UNORM = 30, + VK_FORMAT_B8G8R8_SNORM = 31, + VK_FORMAT_B8G8R8_USCALED = 32, + VK_FORMAT_B8G8R8_SSCALED = 33, + VK_FORMAT_B8G8R8_UINT = 34, + VK_FORMAT_B8G8R8_SINT = 35, + VK_FORMAT_B8G8R8_SRGB = 36, + VK_FORMAT_R8G8B8A8_UNORM = 37, + VK_FORMAT_R8G8B8A8_SNORM = 38, + VK_FORMAT_R8G8B8A8_USCALED = 39, + VK_FORMAT_R8G8B8A8_SSCALED = 40, + VK_FORMAT_R8G8B8A8_UINT = 41, + VK_FORMAT_R8G8B8A8_SINT = 42, + VK_FORMAT_R8G8B8A8_SRGB = 43, + VK_FORMAT_B8G8R8A8_UNORM = 44, + VK_FORMAT_B8G8R8A8_SNORM = 45, + VK_FORMAT_B8G8R8A8_USCALED = 46, + VK_FORMAT_B8G8R8A8_SSCALED = 47, + VK_FORMAT_B8G8R8A8_UINT = 48, + VK_FORMAT_B8G8R8A8_SINT = 49, + VK_FORMAT_B8G8R8A8_SRGB = 50, + VK_FORMAT_A8B8G8R8_UNORM_PACK32 = 51, + VK_FORMAT_A8B8G8R8_SNORM_PACK32 = 52, + VK_FORMAT_A8B8G8R8_USCALED_PACK32 = 53, + VK_FORMAT_A8B8G8R8_SSCALED_PACK32 = 54, + VK_FORMAT_A8B8G8R8_UINT_PACK32 = 55, + VK_FORMAT_A8B8G8R8_SINT_PACK32 = 56, + VK_FORMAT_A8B8G8R8_SRGB_PACK32 = 57, + VK_FORMAT_A2R10G10B10_UNORM_PACK32 = 58, + VK_FORMAT_A2R10G10B10_SNORM_PACK32 = 59, + VK_FORMAT_A2R10G10B10_USCALED_PACK32 = 60, + VK_FORMAT_A2R10G10B10_SSCALED_PACK32 = 61, + VK_FORMAT_A2R10G10B10_UINT_PACK32 = 62, + VK_FORMAT_A2R10G10B10_SINT_PACK32 = 63, + VK_FORMAT_A2B10G10R10_UNORM_PACK32 = 64, + VK_FORMAT_A2B10G10R10_SNORM_PACK32 = 65, + VK_FORMAT_A2B10G10R10_USCALED_PACK32 = 66, + VK_FORMAT_A2B10G10R10_SSCALED_PACK32 = 67, + VK_FORMAT_A2B10G10R10_UINT_PACK32 = 68, + VK_FORMAT_A2B10G10R10_SINT_PACK32 = 69, + VK_FORMAT_R16_UNORM = 70, + VK_FORMAT_R16_SNORM = 71, + VK_FORMAT_R16_USCALED = 72, + VK_FORMAT_R16_SSCALED = 73, + VK_FORMAT_R16_UINT = 74, + VK_FORMAT_R16_SINT = 75, + VK_FORMAT_R16_SFLOAT = 76, + VK_FORMAT_R16G16_UNORM = 77, + VK_FORMAT_R16G16_SNORM = 78, + VK_FORMAT_R16G16_USCALED = 79, + VK_FORMAT_R16G16_SSCALED = 80, + VK_FORMAT_R16G16_UINT = 81, + VK_FORMAT_R16G16_SINT = 82, + VK_FORMAT_R16G16_SFLOAT = 83, + VK_FORMAT_R16G16B16_UNORM = 84, + VK_FORMAT_R16G16B16_SNORM = 85, + VK_FORMAT_R16G16B16_USCALED = 86, + VK_FORMAT_R16G16B16_SSCALED = 87, + VK_FORMAT_R16G16B16_UINT = 88, + VK_FORMAT_R16G16B16_SINT = 89, + VK_FORMAT_R16G16B16_SFLOAT = 90, + VK_FORMAT_R16G16B16A16_UNORM = 91, + VK_FORMAT_R16G16B16A16_SNORM = 92, + VK_FORMAT_R16G16B16A16_USCALED = 93, + VK_FORMAT_R16G16B16A16_SSCALED = 94, + VK_FORMAT_R16G16B16A16_UINT = 95, + VK_FORMAT_R16G16B16A16_SINT = 96, + VK_FORMAT_R16G16B16A16_SFLOAT = 97, + VK_FORMAT_R32_UINT = 98, + VK_FORMAT_R32_SINT = 99, + VK_FORMAT_R32_SFLOAT = 100, + VK_FORMAT_R32G32_UINT = 101, + VK_FORMAT_R32G32_SINT = 102, + VK_FORMAT_R32G32_SFLOAT = 103, + VK_FORMAT_R32G32B32_UINT = 104, + VK_FORMAT_R32G32B32_SINT = 105, + VK_FORMAT_R32G32B32_SFLOAT = 106, + VK_FORMAT_R32G32B32A32_UINT = 107, + VK_FORMAT_R32G32B32A32_SINT = 108, + VK_FORMAT_R32G32B32A32_SFLOAT = 109, + VK_FORMAT_R64_UINT = 110, + VK_FORMAT_R64_SINT = 111, + VK_FORMAT_R64_SFLOAT = 112, + VK_FORMAT_R64G64_UINT = 113, + VK_FORMAT_R64G64_SINT = 114, + VK_FORMAT_R64G64_SFLOAT = 115, + VK_FORMAT_R64G64B64_UINT = 116, + VK_FORMAT_R64G64B64_SINT = 117, + VK_FORMAT_R64G64B64_SFLOAT = 118, + VK_FORMAT_R64G64B64A64_UINT = 119, + VK_FORMAT_R64G64B64A64_SINT = 120, + VK_FORMAT_R64G64B64A64_SFLOAT = 121, + VK_FORMAT_B10G11R11_UFLOAT_PACK32 = 122, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 = 123, + VK_FORMAT_D16_UNORM = 124, + VK_FORMAT_X8_D24_UNORM_PACK32 = 125, + VK_FORMAT_D32_SFLOAT = 126, + VK_FORMAT_S8_UINT = 127, + VK_FORMAT_D16_UNORM_S8_UINT = 128, + VK_FORMAT_D24_UNORM_S8_UINT = 129, + VK_FORMAT_D32_SFLOAT_S8_UINT = 130, + VK_FORMAT_BC1_RGB_UNORM_BLOCK = 131, + VK_FORMAT_BC1_RGB_SRGB_BLOCK = 132, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK = 133, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK = 134, + VK_FORMAT_BC2_UNORM_BLOCK = 135, + VK_FORMAT_BC2_SRGB_BLOCK = 136, + VK_FORMAT_BC3_UNORM_BLOCK = 137, + VK_FORMAT_BC3_SRGB_BLOCK = 138, + VK_FORMAT_BC4_UNORM_BLOCK = 139, + VK_FORMAT_BC4_SNORM_BLOCK = 140, + VK_FORMAT_BC5_UNORM_BLOCK = 141, + VK_FORMAT_BC5_SNORM_BLOCK = 142, + VK_FORMAT_BC6H_UFLOAT_BLOCK = 143, + VK_FORMAT_BC6H_SFLOAT_BLOCK = 144, + VK_FORMAT_BC7_UNORM_BLOCK = 145, + VK_FORMAT_BC7_SRGB_BLOCK = 146, + VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = 147, + VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = 148, + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK = 149, + VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = 150, + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = 151, + VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = 152, + VK_FORMAT_EAC_R11_UNORM_BLOCK = 153, + VK_FORMAT_EAC_R11_SNORM_BLOCK = 154, + VK_FORMAT_EAC_R11G11_UNORM_BLOCK = 155, + VK_FORMAT_EAC_R11G11_SNORM_BLOCK = 156, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK = 157, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK = 158, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK = 161, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK = 162, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK = 165, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK = 166, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK = 171, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK = 172, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED, - VK_FORMAT_END_RANGE = VK_FORMAT_B10G10R10A2_SINT, - VK_FORMAT_NUM = (VK_FORMAT_B10G10R10A2_SINT - VK_FORMAT_UNDEFINED + 1), + VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1), VK_FORMAT_MAX_ENUM = 0x7FFFFFFF } VkFormat; diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index e79a7105d7d..abbf667a079 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -37,16 +37,16 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW, .num_channels = 1), - fmt(VK_FORMAT_R4G4_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R4G4_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM, .num_channels = 1,), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM, .num_channels = 1), fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED, .num_channels = 1), fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED, .num_channels = 1), fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT, .num_channels = 1), @@ -73,12 +73,18 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT, .num_channels = 4), fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT, .num_channels = 4), fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UNORM, ISL_FORMAT_R10G10B10A2_UNORM, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SNORM, ISL_FORMAT_R10G10B10A2_SNORM, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_USCALED, ISL_FORMAT_R10G10B10A2_USCALED, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SSCALED, ISL_FORMAT_R10G10B10A2_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_UINT, ISL_FORMAT_R10G10B10A2_UINT, .num_channels = 4), - fmt(VK_FORMAT_R10G10B10A2_SINT, ISL_FORMAT_R10G10B10A2_SINT, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT, .num_channels = 4), fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1), fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM, .num_channels = 1), fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED, .num_channels = 1), @@ -123,75 +129,71 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT, .num_channels = 2), fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT, .num_channels = 3), fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT, .num_channels = 4), - fmt(VK_FORMAT_R11G11B10_UFLOAT, ISL_FORMAT_R11G11B10_FLOAT, .num_channels = 3), - fmt(VK_FORMAT_R9G9B9E5_UFLOAT, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .num_channels = 3), + fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT, .num_channels = 3), + fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .num_channels = 3), fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_D24_UNORM_X8, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .num_channels = 1, .depth_format = D32_FLOAT), fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .num_channels = 1, .has_stencil = true), fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), - fmt(VK_FORMAT_BC1_RGB_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGB_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC2_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC2_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC3_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC3_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC4_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC4_SNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC5_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC5_SNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC6H_UFLOAT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC6H_SFLOAT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC7_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC7_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, ISL_FORMAT_ETC2_RGB8), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, ISL_FORMAT_ETC2_SRGB8), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, ISL_FORMAT_ETC2_RGB8_PTA), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, ISL_FORMAT_ETC2_SRGB8_PTA), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, ISL_FORMAT_ETC2_EAC_RGBA8), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, ISL_FORMAT_ETC2_EAC_SRGB8_A8), - fmt(VK_FORMAT_EAC_R11_UNORM, ISL_FORMAT_EAC_R11), - fmt(VK_FORMAT_EAC_R11_SNORM, ISL_FORMAT_EAC_SIGNED_R11), - fmt(VK_FORMAT_EAC_R11G11_UNORM, ISL_FORMAT_EAC_RG11), - fmt(VK_FORMAT_EAC_R11G11_SNORM, ISL_FORMAT_EAC_SIGNED_RG11), - fmt(VK_FORMAT_ASTC_4x4_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B4G4R4A4_UNORM, ISL_FORMAT_B4G4R4A4_UNORM, .num_channels = 4), - fmt(VK_FORMAT_B5G5R5A1_UNORM, ISL_FORMAT_B5G5R5A1_UNORM, .num_channels = 4), - fmt(VK_FORMAT_B5G6R5_UNORM, ISL_FORMAT_B5G6R5_UNORM, .num_channels = 3), - fmt(VK_FORMAT_B5G6R5_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), + fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), + fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), + fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), + fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), + fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), @@ -206,12 +208,6 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UNORM, ISL_FORMAT_B10G10R10A2_UNORM, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SNORM, ISL_FORMAT_B10G10R10A2_SNORM, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_USCALED, ISL_FORMAT_B10G10R10A2_USCALED, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SSCALED, ISL_FORMAT_B10G10R10A2_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_UINT, ISL_FORMAT_B10G10R10A2_UINT, .num_channels = 4), - fmt(VK_FORMAT_B10G10R10A2_SINT, ISL_FORMAT_B10G10R10A2_SINT, .num_channels = 4) }; #undef fmt diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 412501445dd..d75919ea19a 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -80,6 +80,10 @@ static uint32_t wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) { switch (vk_format) { + /* TODO: Figure out what all the formats mean and make this table + * correct. + */ +#if 0 case VK_FORMAT_R4G4B4A4_UNORM: return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; case VK_FORMAT_R5G6B5_UNORM: @@ -98,12 +102,15 @@ wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) return WL_DRM_FORMAT_RGB565; case VK_FORMAT_B5G5R5A1_UNORM: return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; +#endif case VK_FORMAT_B8G8R8_UNORM: return WL_DRM_FORMAT_BGRX8888; case VK_FORMAT_B8G8R8A8_UNORM: return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; +#if 0 case VK_FORMAT_B10G10R10A2_UNORM: return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; +#endif default: assert("!Unsupported Vulkan format"); @@ -117,6 +124,7 @@ drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) struct wsi_wl_display *display = data; switch (wl_format) { +#if 0 case WL_DRM_FORMAT_ABGR4444: case WL_DRM_FORMAT_XBGR4444: wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); @@ -149,16 +157,19 @@ drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) case WL_DRM_FORMAT_XRGB1555: wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); break; +#endif case WL_DRM_FORMAT_XRGB8888: wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_UNORM); /* fallthrough */ case WL_DRM_FORMAT_ARGB8888: wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_UNORM); break; +#if 0 case WL_DRM_FORMAT_ARGB2101010: case WL_DRM_FORMAT_XRGB2101010: wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); break; +#endif } } -- cgit v1.2.3 From 924fbfc9a1fc46a3698fa23649e6dccc3e3e6782 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Dec 2015 11:05:52 -0800 Subject: vk/0.210.0: Fix how we handle access flags in barriers The initial implementation in the 0.210.0 API update was misguieded as to what the access flags meant. This should be more correct. --- src/vulkan/genX_cmd_buffer.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index e404d5efa9e..ac1ab822b49 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -223,10 +223,11 @@ void genX(CmdPipelineBarrier)( } } - for_each_bit(b, dst_flags) { + /* The src flags represent how things were used previously. This is + * what we use for doing flushes. + */ + for_each_bit(b, src_flags) { switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_HOST_WRITE_BIT: - break; /* FIXME: Little-core systems */ case VK_ACCESS_SHADER_WRITE_BIT: cmd.DCFlushEnable = true; break; @@ -241,14 +242,16 @@ void genX(CmdPipelineBarrier)( cmd.DepthCacheFlushEnable = true; break; default: - assert(!"Not a write bit"); + /* Doesn't require a flush */ + break; } } - for_each_bit(b, src_flags) { + /* The dst flags represent how things will be used in the fugure. This + * is what we use for doing cache invalidations. + */ + for_each_bit(b, dst_flags) { switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_HOST_READ_BIT: - break; /* FIXME: Little-core systems */ case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: case VK_ACCESS_INDEX_READ_BIT: case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: @@ -258,22 +261,19 @@ void genX(CmdPipelineBarrier)( cmd.ConstantCacheInvalidationEnable = true; /* fallthrough */ case VK_ACCESS_SHADER_READ_BIT: - cmd.DCFlushEnable = true; cmd.TextureCacheInvalidationEnable = true; break; case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: cmd.TextureCacheInvalidationEnable = true; break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: - cmd.DCFlushEnable = true; - break; case VK_ACCESS_TRANSFER_READ_BIT: cmd.TextureCacheInvalidationEnable = true; break; case VK_ACCESS_MEMORY_READ_BIT: break; /* XXX: What is this? */ default: - assert(!"Not a read bit"); + /* Doesn't require a flush */ + break; } } -- cgit v1.2.3 From 888744cabf8daaa463394d0cf10341ab00a5ecd5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Dec 2015 13:25:12 -0800 Subject: vk/0.210.0: Update queries to the new API --- include/vulkan/vulkan.h | 62 +++++++++++++++++++++----------------------- src/vulkan/anv_query.c | 56 +++++++++++++++++++++++++++------------ src/vulkan/gen8_cmd_buffer.c | 27 ++++++++++--------- 3 files changed, 82 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index b382f548bf8..ab6b342c733 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -432,9 +432,10 @@ typedef enum VkPhysicalDeviceType { typedef enum VkQueryType { VK_QUERY_TYPE_OCCLUSION = 0, VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, + VK_QUERY_TYPE_TIMESTAMP = 2, VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, - VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_PIPELINE_STATISTICS, - VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_PIPELINE_STATISTICS - VK_QUERY_TYPE_OCCLUSION + 1), + VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP, + VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1), VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF } VkQueryType; @@ -752,15 +753,6 @@ typedef enum VkIndexType { VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF } VkIndexType; -typedef enum { - VK_TIMESTAMP_TYPE_TOP = 0, - VK_TIMESTAMP_TYPE_BOTTOM = 1, - VK_TIMESTAMP_TYPE_BEGIN_RANGE = VK_TIMESTAMP_TYPE_TOP, - VK_TIMESTAMP_TYPE_END_RANGE = VK_TIMESTAMP_TYPE_BOTTOM, - VK_TIMESTAMP_TYPE_NUM = (VK_TIMESTAMP_TYPE_BOTTOM - VK_TIMESTAMP_TYPE_TOP + 1), - VK_TIMESTAMP_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkTimestampType; - typedef enum VkSubpassContents { VK_SUBPASS_CONTENTS_INLINE = 0, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, @@ -874,17 +866,17 @@ typedef VkFlags VkEventCreateFlags; typedef VkFlags VkQueryPoolCreateFlags; typedef enum VkQueryPipelineStatisticFlagBits { - VK_QUERY_PIPELINE_STATISTIC_IA_VERTICES_BIT = 0x00000001, - VK_QUERY_PIPELINE_STATISTIC_IA_PRIMITIVES_BIT = 0x00000002, - VK_QUERY_PIPELINE_STATISTIC_VS_INVOCATIONS_BIT = 0x00000004, - VK_QUERY_PIPELINE_STATISTIC_GS_INVOCATIONS_BIT = 0x00000008, - VK_QUERY_PIPELINE_STATISTIC_GS_PRIMITIVES_BIT = 0x00000010, - VK_QUERY_PIPELINE_STATISTIC_C_INVOCATIONS_BIT = 0x00000020, - VK_QUERY_PIPELINE_STATISTIC_C_PRIMITIVES_BIT = 0x00000040, - VK_QUERY_PIPELINE_STATISTIC_FS_INVOCATIONS_BIT = 0x00000080, - VK_QUERY_PIPELINE_STATISTIC_TCS_PATCHES_BIT = 0x00000100, - VK_QUERY_PIPELINE_STATISTIC_TES_INVOCATIONS_BIT = 0x00000200, - VK_QUERY_PIPELINE_STATISTIC_CS_INVOCATIONS_BIT = 0x00000400, + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT = 0x00000001, + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT = 0x00000002, + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT = 0x00000004, + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT = 0x00000008, + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT = 0x00000010, + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT = 0x00000020, + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT = 0x00000040, + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT = 0x00000080, + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT = 0x00000100, + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT = 0x00000200, + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT = 0x00000400, } VkQueryPipelineStatisticFlagBits; typedef VkFlags VkQueryPipelineStatisticFlags; @@ -1047,6 +1039,11 @@ typedef enum VkCommandBufferUsageFlagBits { } VkCommandBufferUsageFlagBits; typedef VkFlags VkCommandBufferUsageFlags; +typedef enum VkQueryControlFlagBits { + VK_QUERY_CONTROL_PRECISE_BIT = 0x00000001, +} VkQueryControlFlagBits; +typedef VkFlags VkQueryControlFlags; + typedef enum VkCommandBufferResetFlagBits { VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, } VkCommandBufferResetFlagBits; @@ -1060,11 +1057,6 @@ typedef enum VkStencilFaceFlagBits { } VkStencilFaceFlagBits; typedef VkFlags VkStencilFaceFlags; -typedef enum { - VK_QUERY_CONTROL_CONSERVATIVE_BIT = 0x00000001, -} VkQueryControlFlagBits; -typedef VkFlags VkQueryControlFlags; - typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)( void* pUserData, size_t size, @@ -2018,6 +2010,9 @@ typedef struct VkCommandBufferBeginInfo { VkRenderPass renderPass; uint32_t subpass; VkFramebuffer framebuffer; + VkBool32 occlusionQueryEnable; + VkQueryControlFlags queryFlags; + VkQueryPipelineStatisticFlags pipelineStatistics; } VkCommandBufferBeginInfo; typedef struct VkBufferCopy { @@ -2208,7 +2203,7 @@ typedef VkResult (VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t dataSize, void* pData, VkDeviceSize stride, VkQueryResultFlags flags); typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer); typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBufferView* pView); @@ -2289,7 +2284,7 @@ typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry, VkQueryControlFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry); typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); -typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); +typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t entry); typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void* pValues); typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); @@ -2558,8 +2553,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, - size_t* pDataSize, + size_t dataSize, void* pData, + VkDeviceSize stride, VkQueryResultFlags flags); VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer( @@ -3045,9 +3041,9 @@ VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( VkCommandBuffer commandBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset); + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t entry); VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 320d42cb6fd..911b9a41264 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -38,17 +38,22 @@ VkResult anv_CreateQueryPool( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_query_pool *pool; VkResult result; - size_t size; + uint32_t slot_size; + uint64_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: + slot_size = sizeof(struct anv_query_pool_slot); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: return VK_ERROR_INCOMPATIBLE_DRIVER; + case VK_QUERY_TYPE_TIMESTAMP: + slot_size = sizeof(uint64_t); + break; default: - unreachable(""); + assert(!"Invalid query type"); } pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, @@ -56,7 +61,10 @@ VkResult anv_CreateQueryPool( if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - size = pCreateInfo->entryCount * sizeof(struct anv_query_pool_slot); + pool->type = pCreateInfo->queryType; + pool->slots = pCreateInfo->entryCount; + + size = pCreateInfo->entryCount * slot_size; result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) goto fail; @@ -91,16 +99,14 @@ VkResult anv_GetQueryPoolResults( VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, - size_t* pDataSize, + size_t dataSize, void* pData, + VkDeviceSize stride, VkQueryResultFlags flags) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - struct anv_query_pool_slot *slot = pool->bo.map; int64_t timeout = INT64_MAX; - uint32_t *dst32 = pData; - uint64_t *dst64 = pData; uint64_t result; int ret; @@ -110,12 +116,8 @@ VkResult anv_GetQueryPoolResults( return VK_ERROR_INCOMPATIBLE_DRIVER; } - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - if (flags & VK_QUERY_RESULT_64_BIT) - *pDataSize = queryCount * sizeof(uint64_t); - else - *pDataSize = queryCount * sizeof(uint32_t); + assert(pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP); if (pData == NULL) return VK_SUCCESS; @@ -129,15 +131,37 @@ VkResult anv_GetQueryPoolResults( } } + void *data_end = pData + dataSize; + for (uint32_t i = 0; i < queryCount; i++) { - result = slot[startQuery + i].end - slot[startQuery + i].begin; + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: { + struct anv_query_pool_slot *slot = pool->bo.map; + result = slot[startQuery + i].end - slot[startQuery + i].begin; + break; + } + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + /* Not yet implemented */ + break; + case VK_QUERY_TYPE_TIMESTAMP: { + uint64_t *slot = pool->bo.map; + result = slot[startQuery + i]; + break; + } + default: + assert(!"Invalid query type"); + } + if (flags & VK_QUERY_RESULT_64_BIT) { - *dst64++ = result; + *(uint64_t *)pData = result; } else { if (result > UINT32_MAX) result = UINT32_MAX; - *dst32++ = result; + *(uint32_t *)pData = result; } + pData += stride; + if (pData >= data_end) + break; } return VK_SUCCESS; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 23dc9ad5748..6d0ac25b6d8 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -854,33 +854,32 @@ void genX(CmdEndQuery)( void genX(CmdWriteTimestamp)( VkCommandBuffer commandBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset) + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t entry) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - struct anv_bo *bo = buffer->bo; + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - switch (timestampType) { - case VK_TIMESTAMP_TYPE_TOP: + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = TIMESTAMP, - .MemoryAddress = { bo, buffer->offset + destOffset }); + .MemoryAddress = { &pool->bo, entry * 8 }); anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + .MemoryAddress = { &pool->bo, entry * 8 + 4 }); break; - case VK_TIMESTAMP_TYPE_BOTTOM: + default: + /* Everything else is bottom-of-pipe */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WriteTimestamp, .Address = /* FIXME: This is only lower 32 bits */ - { bo, buffer->offset + destOffset }); - break; - - default: + { &pool->bo, entry * 8 }); break; } } -- cgit v1.2.3 From 4c19243562cba8b33e8139c7ac20ba8bef132633 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Dec 2015 13:27:00 -0800 Subject: vk/0.210.0: Advertise version 0.210.0 --- include/vulkan/vulkan-0.170.2.h | 3034 -------------------------------------- include/vulkan/vulkan-0.210.0.h | 3088 --------------------------------------- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_device.c | 4 +- 4 files changed, 3 insertions(+), 6125 deletions(-) delete mode 100644 include/vulkan/vulkan-0.170.2.h delete mode 100644 include/vulkan/vulkan-0.210.0.h (limited to 'src') diff --git a/include/vulkan/vulkan-0.170.2.h b/include/vulkan/vulkan-0.170.2.h deleted file mode 100644 index 03bcefea50a..00000000000 --- a/include/vulkan/vulkan-0.170.2.h +++ /dev/null @@ -1,3034 +0,0 @@ -#ifndef __vulkan_h_ -#define __vulkan_h_ 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* -** Copyright (c) 2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -/* -** This header is generated from the Khronos Vulkan XML API Registry. -** -*/ - - -#define VK_VERSION_1_0 1 -#include "vk_platform.h" - -#define VK_MAKE_VERSION(major, minor, patch) \ - ((major << 22) | (minor << 12) | patch) - -// Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 170, 2) - - -#if defined(__cplusplus) && (_MSC_VER >= 1800 || __cplusplus >= 201103L) - #define VK_NULL_HANDLE nullptr -#else - #define VK_NULL_HANDLE 0 -#endif - - -#define VK_DEFINE_HANDLE(obj) typedef struct obj##_T* obj; - - -#if defined(__cplusplus) - #if ((defined(_MSC_VER) && _MSC_VER >= 1800) || __cplusplus >= 201103L) - // The bool operator only works if there are no implicit conversions from an obj to - // a bool-compatible type, which can then be used to unintentionally violate type safety. - // C++11 and above supports the "explicit" keyword on conversion operators to stop this - // from happening. Otherwise users of C++ below C++11 won't get direct access to evaluating - // the object handle as a bool in expressions like: - // if (obj) vkDestroy(obj); - #define VK_NONDISP_HANDLE_OPERATOR_BOOL() explicit operator bool() const { return handle != 0; } - #define VK_NONDISP_HANDLE_CONSTRUCTOR_FROM_UINT64(obj) \ - explicit obj(uint64_t x) : handle(x) { } \ - obj(decltype(nullptr)) : handle(0) { } - #else - #define VK_NONDISP_HANDLE_OPERATOR_BOOL() - #define VK_NONDISP_HANDLE_CONSTRUCTOR_FROM_UINT64(obj) \ - obj(uint64_t x) : handle(x) { } - #endif - #define VK_DEFINE_NONDISP_HANDLE(obj) \ - struct obj { \ - obj() : handle(0) { } \ - VK_NONDISP_HANDLE_CONSTRUCTOR_FROM_UINT64(obj) \ - obj& operator =(uint64_t x) { handle = x; return *this; } \ - bool operator==(const obj& other) const { return handle == other.handle; } \ - bool operator!=(const obj& other) const { return handle != other.handle; } \ - bool operator!() const { return !handle; } \ - VK_NONDISP_HANDLE_OPERATOR_BOOL() \ - uint64_t handle; \ - }; -#else - #define VK_DEFINE_NONDISP_HANDLE(obj) typedef struct obj##_T { uint64_t handle; } obj; -#endif - - - -typedef uint32_t VkBool32; -typedef uint32_t VkFlags; -typedef uint64_t VkDeviceSize; -typedef uint32_t VkSampleMask; - -VK_DEFINE_HANDLE(VkInstance) -VK_DEFINE_HANDLE(VkPhysicalDevice) -VK_DEFINE_HANDLE(VkDevice) -VK_DEFINE_HANDLE(VkQueue) -VK_DEFINE_HANDLE(VkCmdBuffer) -VK_DEFINE_NONDISP_HANDLE(VkFence) -VK_DEFINE_NONDISP_HANDLE(VkDeviceMemory) -VK_DEFINE_NONDISP_HANDLE(VkBuffer) -VK_DEFINE_NONDISP_HANDLE(VkImage) -VK_DEFINE_NONDISP_HANDLE(VkSemaphore) -VK_DEFINE_NONDISP_HANDLE(VkEvent) -VK_DEFINE_NONDISP_HANDLE(VkQueryPool) -VK_DEFINE_NONDISP_HANDLE(VkBufferView) -VK_DEFINE_NONDISP_HANDLE(VkImageView) -VK_DEFINE_NONDISP_HANDLE(VkShaderModule) -VK_DEFINE_NONDISP_HANDLE(VkShader) -VK_DEFINE_NONDISP_HANDLE(VkPipelineCache) -VK_DEFINE_NONDISP_HANDLE(VkPipelineLayout) -VK_DEFINE_NONDISP_HANDLE(VkRenderPass) -VK_DEFINE_NONDISP_HANDLE(VkPipeline) -VK_DEFINE_NONDISP_HANDLE(VkDescriptorSetLayout) -VK_DEFINE_NONDISP_HANDLE(VkSampler) -VK_DEFINE_NONDISP_HANDLE(VkDescriptorPool) -VK_DEFINE_NONDISP_HANDLE(VkDescriptorSet) -VK_DEFINE_NONDISP_HANDLE(VkFramebuffer) -VK_DEFINE_NONDISP_HANDLE(VkCmdPool) - -#define VK_LOD_CLAMP_NONE 1000.0f -#define VK_REMAINING_MIP_LEVELS (~0U) -#define VK_REMAINING_ARRAY_LAYERS (~0U) -#define VK_WHOLE_SIZE (~0ULL) -#define VK_ATTACHMENT_UNUSED (~0U) -#define VK_TRUE 1 -#define VK_FALSE 0 -#define VK_QUEUE_FAMILY_IGNORED (~0U) -#define VK_SUBPASS_EXTERNAL (~0U) -#define VK_MAX_PHYSICAL_DEVICE_NAME 256 -#define VK_UUID_LENGTH 16 -#define VK_MAX_MEMORY_TYPES 32 -#define VK_MAX_MEMORY_HEAPS 16 -#define VK_MAX_EXTENSION_NAME 256 -#define VK_MAX_DESCRIPTION 256 - - -typedef enum { - VK_SUCCESS = 0, - VK_UNSUPPORTED = 1, - VK_NOT_READY = 2, - VK_TIMEOUT = 3, - VK_EVENT_SET = 4, - VK_EVENT_RESET = 5, - VK_INCOMPLETE = 6, - VK_ERROR_OUT_OF_HOST_MEMORY = -1, - VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, - VK_ERROR_INITIALIZATION_FAILED = -3, - VK_ERROR_DEVICE_LOST = -4, - VK_ERROR_MEMORY_MAP_FAILED = -5, - VK_ERROR_LAYER_NOT_PRESENT = -6, - VK_ERROR_EXTENSION_NOT_PRESENT = -7, - VK_ERROR_INCOMPATIBLE_DRIVER = -8, - VK_RESULT_BEGIN_RANGE = VK_ERROR_INCOMPATIBLE_DRIVER, - VK_RESULT_END_RANGE = VK_INCOMPLETE, - VK_RESULT_NUM = (VK_INCOMPLETE - VK_ERROR_INCOMPATIBLE_DRIVER + 1), - VK_RESULT_MAX_ENUM = 0x7FFFFFFF -} VkResult; - -typedef enum { - VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, - VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 1, - VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO = 2, - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 3, - VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 4, - VK_STRUCTURE_TYPE_SHADER_CREATE_INFO = 5, - VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 6, - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 7, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO = 9, - VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, - VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 11, - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 12, - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 13, - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 14, - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 15, - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 16, - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 17, - VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 18, - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 19, - VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO = 20, - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 21, - VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 22, - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 23, - VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 24, - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 26, - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO = 28, - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 29, - VK_STRUCTURE_TYPE_MEMORY_BARRIER = 30, - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 31, - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 32, - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 34, - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 35, - VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 36, - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 37, - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 38, - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 39, - VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION = 40, - VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION = 41, - VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY = 42, - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, - VK_STRUCTURE_TYPE_CMD_POOL_CREATE_INFO = 44, - VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 45, - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 46, - VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - VK_STRUCTURE_TYPE_NUM = (VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), - VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkStructureType; - -typedef enum { - VK_SYSTEM_ALLOC_TYPE_API_OBJECT = 0, - VK_SYSTEM_ALLOC_TYPE_INTERNAL = 1, - VK_SYSTEM_ALLOC_TYPE_INTERNAL_TEMP = 2, - VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER = 3, - VK_SYSTEM_ALLOC_TYPE_DEBUG = 4, - VK_SYSTEM_ALLOC_TYPE_BEGIN_RANGE = VK_SYSTEM_ALLOC_TYPE_API_OBJECT, - VK_SYSTEM_ALLOC_TYPE_END_RANGE = VK_SYSTEM_ALLOC_TYPE_DEBUG, - VK_SYSTEM_ALLOC_TYPE_NUM = (VK_SYSTEM_ALLOC_TYPE_DEBUG - VK_SYSTEM_ALLOC_TYPE_API_OBJECT + 1), - VK_SYSTEM_ALLOC_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkSystemAllocType; - -typedef enum { - VK_FORMAT_UNDEFINED = 0, - VK_FORMAT_R4G4_UNORM = 1, - VK_FORMAT_R4G4_USCALED = 2, - VK_FORMAT_R4G4B4A4_UNORM = 3, - VK_FORMAT_R4G4B4A4_USCALED = 4, - VK_FORMAT_R5G6B5_UNORM = 5, - VK_FORMAT_R5G6B5_USCALED = 6, - VK_FORMAT_R5G5B5A1_UNORM = 7, - VK_FORMAT_R5G5B5A1_USCALED = 8, - VK_FORMAT_R8_UNORM = 9, - VK_FORMAT_R8_SNORM = 10, - VK_FORMAT_R8_USCALED = 11, - VK_FORMAT_R8_SSCALED = 12, - VK_FORMAT_R8_UINT = 13, - VK_FORMAT_R8_SINT = 14, - VK_FORMAT_R8_SRGB = 15, - VK_FORMAT_R8G8_UNORM = 16, - VK_FORMAT_R8G8_SNORM = 17, - VK_FORMAT_R8G8_USCALED = 18, - VK_FORMAT_R8G8_SSCALED = 19, - VK_FORMAT_R8G8_UINT = 20, - VK_FORMAT_R8G8_SINT = 21, - VK_FORMAT_R8G8_SRGB = 22, - VK_FORMAT_R8G8B8_UNORM = 23, - VK_FORMAT_R8G8B8_SNORM = 24, - VK_FORMAT_R8G8B8_USCALED = 25, - VK_FORMAT_R8G8B8_SSCALED = 26, - VK_FORMAT_R8G8B8_UINT = 27, - VK_FORMAT_R8G8B8_SINT = 28, - VK_FORMAT_R8G8B8_SRGB = 29, - VK_FORMAT_R8G8B8A8_UNORM = 30, - VK_FORMAT_R8G8B8A8_SNORM = 31, - VK_FORMAT_R8G8B8A8_USCALED = 32, - VK_FORMAT_R8G8B8A8_SSCALED = 33, - VK_FORMAT_R8G8B8A8_UINT = 34, - VK_FORMAT_R8G8B8A8_SINT = 35, - VK_FORMAT_R8G8B8A8_SRGB = 36, - VK_FORMAT_R10G10B10A2_UNORM = 37, - VK_FORMAT_R10G10B10A2_SNORM = 38, - VK_FORMAT_R10G10B10A2_USCALED = 39, - VK_FORMAT_R10G10B10A2_SSCALED = 40, - VK_FORMAT_R10G10B10A2_UINT = 41, - VK_FORMAT_R10G10B10A2_SINT = 42, - VK_FORMAT_R16_UNORM = 43, - VK_FORMAT_R16_SNORM = 44, - VK_FORMAT_R16_USCALED = 45, - VK_FORMAT_R16_SSCALED = 46, - VK_FORMAT_R16_UINT = 47, - VK_FORMAT_R16_SINT = 48, - VK_FORMAT_R16_SFLOAT = 49, - VK_FORMAT_R16G16_UNORM = 50, - VK_FORMAT_R16G16_SNORM = 51, - VK_FORMAT_R16G16_USCALED = 52, - VK_FORMAT_R16G16_SSCALED = 53, - VK_FORMAT_R16G16_UINT = 54, - VK_FORMAT_R16G16_SINT = 55, - VK_FORMAT_R16G16_SFLOAT = 56, - VK_FORMAT_R16G16B16_UNORM = 57, - VK_FORMAT_R16G16B16_SNORM = 58, - VK_FORMAT_R16G16B16_USCALED = 59, - VK_FORMAT_R16G16B16_SSCALED = 60, - VK_FORMAT_R16G16B16_UINT = 61, - VK_FORMAT_R16G16B16_SINT = 62, - VK_FORMAT_R16G16B16_SFLOAT = 63, - VK_FORMAT_R16G16B16A16_UNORM = 64, - VK_FORMAT_R16G16B16A16_SNORM = 65, - VK_FORMAT_R16G16B16A16_USCALED = 66, - VK_FORMAT_R16G16B16A16_SSCALED = 67, - VK_FORMAT_R16G16B16A16_UINT = 68, - VK_FORMAT_R16G16B16A16_SINT = 69, - VK_FORMAT_R16G16B16A16_SFLOAT = 70, - VK_FORMAT_R32_UINT = 71, - VK_FORMAT_R32_SINT = 72, - VK_FORMAT_R32_SFLOAT = 73, - VK_FORMAT_R32G32_UINT = 74, - VK_FORMAT_R32G32_SINT = 75, - VK_FORMAT_R32G32_SFLOAT = 76, - VK_FORMAT_R32G32B32_UINT = 77, - VK_FORMAT_R32G32B32_SINT = 78, - VK_FORMAT_R32G32B32_SFLOAT = 79, - VK_FORMAT_R32G32B32A32_UINT = 80, - VK_FORMAT_R32G32B32A32_SINT = 81, - VK_FORMAT_R32G32B32A32_SFLOAT = 82, - VK_FORMAT_R64_SFLOAT = 83, - VK_FORMAT_R64G64_SFLOAT = 84, - VK_FORMAT_R64G64B64_SFLOAT = 85, - VK_FORMAT_R64G64B64A64_SFLOAT = 86, - VK_FORMAT_R11G11B10_UFLOAT = 87, - VK_FORMAT_R9G9B9E5_UFLOAT = 88, - VK_FORMAT_D16_UNORM = 89, - VK_FORMAT_D24_UNORM_X8 = 90, - VK_FORMAT_D32_SFLOAT = 91, - VK_FORMAT_S8_UINT = 92, - VK_FORMAT_D16_UNORM_S8_UINT = 93, - VK_FORMAT_D24_UNORM_S8_UINT = 94, - VK_FORMAT_D32_SFLOAT_S8_UINT = 95, - VK_FORMAT_BC1_RGB_UNORM = 96, - VK_FORMAT_BC1_RGB_SRGB = 97, - VK_FORMAT_BC1_RGBA_UNORM = 98, - VK_FORMAT_BC1_RGBA_SRGB = 99, - VK_FORMAT_BC2_UNORM = 100, - VK_FORMAT_BC2_SRGB = 101, - VK_FORMAT_BC3_UNORM = 102, - VK_FORMAT_BC3_SRGB = 103, - VK_FORMAT_BC4_UNORM = 104, - VK_FORMAT_BC4_SNORM = 105, - VK_FORMAT_BC5_UNORM = 106, - VK_FORMAT_BC5_SNORM = 107, - VK_FORMAT_BC6H_UFLOAT = 108, - VK_FORMAT_BC6H_SFLOAT = 109, - VK_FORMAT_BC7_UNORM = 110, - VK_FORMAT_BC7_SRGB = 111, - VK_FORMAT_ETC2_R8G8B8_UNORM = 112, - VK_FORMAT_ETC2_R8G8B8_SRGB = 113, - VK_FORMAT_ETC2_R8G8B8A1_UNORM = 114, - VK_FORMAT_ETC2_R8G8B8A1_SRGB = 115, - VK_FORMAT_ETC2_R8G8B8A8_UNORM = 116, - VK_FORMAT_ETC2_R8G8B8A8_SRGB = 117, - VK_FORMAT_EAC_R11_UNORM = 118, - VK_FORMAT_EAC_R11_SNORM = 119, - VK_FORMAT_EAC_R11G11_UNORM = 120, - VK_FORMAT_EAC_R11G11_SNORM = 121, - VK_FORMAT_ASTC_4x4_UNORM = 122, - VK_FORMAT_ASTC_4x4_SRGB = 123, - VK_FORMAT_ASTC_5x4_UNORM = 124, - VK_FORMAT_ASTC_5x4_SRGB = 125, - VK_FORMAT_ASTC_5x5_UNORM = 126, - VK_FORMAT_ASTC_5x5_SRGB = 127, - VK_FORMAT_ASTC_6x5_UNORM = 128, - VK_FORMAT_ASTC_6x5_SRGB = 129, - VK_FORMAT_ASTC_6x6_UNORM = 130, - VK_FORMAT_ASTC_6x6_SRGB = 131, - VK_FORMAT_ASTC_8x5_UNORM = 132, - VK_FORMAT_ASTC_8x5_SRGB = 133, - VK_FORMAT_ASTC_8x6_UNORM = 134, - VK_FORMAT_ASTC_8x6_SRGB = 135, - VK_FORMAT_ASTC_8x8_UNORM = 136, - VK_FORMAT_ASTC_8x8_SRGB = 137, - VK_FORMAT_ASTC_10x5_UNORM = 138, - VK_FORMAT_ASTC_10x5_SRGB = 139, - VK_FORMAT_ASTC_10x6_UNORM = 140, - VK_FORMAT_ASTC_10x6_SRGB = 141, - VK_FORMAT_ASTC_10x8_UNORM = 142, - VK_FORMAT_ASTC_10x8_SRGB = 143, - VK_FORMAT_ASTC_10x10_UNORM = 144, - VK_FORMAT_ASTC_10x10_SRGB = 145, - VK_FORMAT_ASTC_12x10_UNORM = 146, - VK_FORMAT_ASTC_12x10_SRGB = 147, - VK_FORMAT_ASTC_12x12_UNORM = 148, - VK_FORMAT_ASTC_12x12_SRGB = 149, - VK_FORMAT_B4G4R4A4_UNORM = 150, - VK_FORMAT_B5G5R5A1_UNORM = 151, - VK_FORMAT_B5G6R5_UNORM = 152, - VK_FORMAT_B5G6R5_USCALED = 153, - VK_FORMAT_B8G8R8_UNORM = 154, - VK_FORMAT_B8G8R8_SNORM = 155, - VK_FORMAT_B8G8R8_USCALED = 156, - VK_FORMAT_B8G8R8_SSCALED = 157, - VK_FORMAT_B8G8R8_UINT = 158, - VK_FORMAT_B8G8R8_SINT = 159, - VK_FORMAT_B8G8R8_SRGB = 160, - VK_FORMAT_B8G8R8A8_UNORM = 161, - VK_FORMAT_B8G8R8A8_SNORM = 162, - VK_FORMAT_B8G8R8A8_USCALED = 163, - VK_FORMAT_B8G8R8A8_SSCALED = 164, - VK_FORMAT_B8G8R8A8_UINT = 165, - VK_FORMAT_B8G8R8A8_SINT = 166, - VK_FORMAT_B8G8R8A8_SRGB = 167, - VK_FORMAT_B10G10R10A2_UNORM = 168, - VK_FORMAT_B10G10R10A2_SNORM = 169, - VK_FORMAT_B10G10R10A2_USCALED = 170, - VK_FORMAT_B10G10R10A2_SSCALED = 171, - VK_FORMAT_B10G10R10A2_UINT = 172, - VK_FORMAT_B10G10R10A2_SINT = 173, - VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED, - VK_FORMAT_END_RANGE = VK_FORMAT_B10G10R10A2_SINT, - VK_FORMAT_NUM = (VK_FORMAT_B10G10R10A2_SINT - VK_FORMAT_UNDEFINED + 1), - VK_FORMAT_MAX_ENUM = 0x7FFFFFFF -} VkFormat; - -typedef enum { - VK_IMAGE_TYPE_1D = 0, - VK_IMAGE_TYPE_2D = 1, - VK_IMAGE_TYPE_3D = 2, - VK_IMAGE_TYPE_BEGIN_RANGE = VK_IMAGE_TYPE_1D, - VK_IMAGE_TYPE_END_RANGE = VK_IMAGE_TYPE_3D, - VK_IMAGE_TYPE_NUM = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), - VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkImageType; - -typedef enum { - VK_IMAGE_TILING_LINEAR = 0, - VK_IMAGE_TILING_OPTIMAL = 1, - VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_LINEAR, - VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_TILING_NUM = (VK_IMAGE_TILING_OPTIMAL - VK_IMAGE_TILING_LINEAR + 1), - VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF -} VkImageTiling; - -typedef enum { - VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, - VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, - VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, - VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, - VK_PHYSICAL_DEVICE_TYPE_CPU = 4, - VK_PHYSICAL_DEVICE_TYPE_BEGIN_RANGE = VK_PHYSICAL_DEVICE_TYPE_OTHER, - VK_PHYSICAL_DEVICE_TYPE_END_RANGE = VK_PHYSICAL_DEVICE_TYPE_CPU, - VK_PHYSICAL_DEVICE_TYPE_NUM = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), - VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkPhysicalDeviceType; - -typedef enum { - VK_IMAGE_ASPECT_COLOR = 0, - VK_IMAGE_ASPECT_DEPTH = 1, - VK_IMAGE_ASPECT_STENCIL = 2, - VK_IMAGE_ASPECT_METADATA = 3, - VK_IMAGE_ASPECT_BEGIN_RANGE = VK_IMAGE_ASPECT_COLOR, - VK_IMAGE_ASPECT_END_RANGE = VK_IMAGE_ASPECT_METADATA, - VK_IMAGE_ASPECT_NUM = (VK_IMAGE_ASPECT_METADATA - VK_IMAGE_ASPECT_COLOR + 1), - VK_IMAGE_ASPECT_MAX_ENUM = 0x7FFFFFFF -} VkImageAspect; - -typedef enum { - VK_QUERY_TYPE_OCCLUSION = 0, - VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, - VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, - VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_PIPELINE_STATISTICS, - VK_QUERY_TYPE_NUM = (VK_QUERY_TYPE_PIPELINE_STATISTICS - VK_QUERY_TYPE_OCCLUSION + 1), - VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkQueryType; - -typedef enum { - VK_SHARING_MODE_EXCLUSIVE = 0, - VK_SHARING_MODE_CONCURRENT = 1, - VK_SHARING_MODE_BEGIN_RANGE = VK_SHARING_MODE_EXCLUSIVE, - VK_SHARING_MODE_END_RANGE = VK_SHARING_MODE_CONCURRENT, - VK_SHARING_MODE_NUM = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), - VK_SHARING_MODE_MAX_ENUM = 0x7FFFFFFF -} VkSharingMode; - -typedef enum { - VK_IMAGE_LAYOUT_UNDEFINED = 0, - VK_IMAGE_LAYOUT_GENERAL = 1, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, - VK_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL = 6, - VK_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL = 7, - VK_IMAGE_LAYOUT_PREINITIALIZED = 8, - VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, - VK_IMAGE_LAYOUT_NUM = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), - VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF -} VkImageLayout; - -typedef enum { - VK_IMAGE_VIEW_TYPE_1D = 0, - VK_IMAGE_VIEW_TYPE_2D = 1, - VK_IMAGE_VIEW_TYPE_3D = 2, - VK_IMAGE_VIEW_TYPE_CUBE = 3, - VK_IMAGE_VIEW_TYPE_1D_ARRAY = 4, - VK_IMAGE_VIEW_TYPE_2D_ARRAY = 5, - VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, - VK_IMAGE_VIEW_TYPE_BEGIN_RANGE = VK_IMAGE_VIEW_TYPE_1D, - VK_IMAGE_VIEW_TYPE_END_RANGE = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, - VK_IMAGE_VIEW_TYPE_NUM = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), - VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkImageViewType; - -typedef enum { - VK_CHANNEL_SWIZZLE_ZERO = 0, - VK_CHANNEL_SWIZZLE_ONE = 1, - VK_CHANNEL_SWIZZLE_R = 2, - VK_CHANNEL_SWIZZLE_G = 3, - VK_CHANNEL_SWIZZLE_B = 4, - VK_CHANNEL_SWIZZLE_A = 5, - VK_CHANNEL_SWIZZLE_BEGIN_RANGE = VK_CHANNEL_SWIZZLE_ZERO, - VK_CHANNEL_SWIZZLE_END_RANGE = VK_CHANNEL_SWIZZLE_A, - VK_CHANNEL_SWIZZLE_NUM = (VK_CHANNEL_SWIZZLE_A - VK_CHANNEL_SWIZZLE_ZERO + 1), - VK_CHANNEL_SWIZZLE_MAX_ENUM = 0x7FFFFFFF -} VkChannelSwizzle; - -typedef enum { - VK_SHADER_STAGE_VERTEX = 0, - VK_SHADER_STAGE_TESS_CONTROL = 1, - VK_SHADER_STAGE_TESS_EVALUATION = 2, - VK_SHADER_STAGE_GEOMETRY = 3, - VK_SHADER_STAGE_FRAGMENT = 4, - VK_SHADER_STAGE_COMPUTE = 5, - VK_SHADER_STAGE_BEGIN_RANGE = VK_SHADER_STAGE_VERTEX, - VK_SHADER_STAGE_END_RANGE = VK_SHADER_STAGE_COMPUTE, - VK_SHADER_STAGE_NUM = (VK_SHADER_STAGE_COMPUTE - VK_SHADER_STAGE_VERTEX + 1), - VK_SHADER_STAGE_MAX_ENUM = 0x7FFFFFFF -} VkShaderStage; - -typedef enum { - VK_VERTEX_INPUT_STEP_RATE_VERTEX = 0, - VK_VERTEX_INPUT_STEP_RATE_INSTANCE = 1, - VK_VERTEX_INPUT_STEP_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_STEP_RATE_VERTEX, - VK_VERTEX_INPUT_STEP_RATE_END_RANGE = VK_VERTEX_INPUT_STEP_RATE_INSTANCE, - VK_VERTEX_INPUT_STEP_RATE_NUM = (VK_VERTEX_INPUT_STEP_RATE_INSTANCE - VK_VERTEX_INPUT_STEP_RATE_VERTEX + 1), - VK_VERTEX_INPUT_STEP_RATE_MAX_ENUM = 0x7FFFFFFF -} VkVertexInputStepRate; - -typedef enum { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ = 6, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ = 7, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ = 8, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ = 9, - VK_PRIMITIVE_TOPOLOGY_PATCH = 10, - VK_PRIMITIVE_TOPOLOGY_BEGIN_RANGE = VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH, - VK_PRIMITIVE_TOPOLOGY_NUM = (VK_PRIMITIVE_TOPOLOGY_PATCH - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), - VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF -} VkPrimitiveTopology; - -typedef enum { - VK_FILL_MODE_POINTS = 0, - VK_FILL_MODE_WIREFRAME = 1, - VK_FILL_MODE_SOLID = 2, - VK_FILL_MODE_BEGIN_RANGE = VK_FILL_MODE_POINTS, - VK_FILL_MODE_END_RANGE = VK_FILL_MODE_SOLID, - VK_FILL_MODE_NUM = (VK_FILL_MODE_SOLID - VK_FILL_MODE_POINTS + 1), - VK_FILL_MODE_MAX_ENUM = 0x7FFFFFFF -} VkFillMode; - -typedef enum { - VK_CULL_MODE_NONE = 0, - VK_CULL_MODE_FRONT = 1, - VK_CULL_MODE_BACK = 2, - VK_CULL_MODE_FRONT_AND_BACK = 3, - VK_CULL_MODE_BEGIN_RANGE = VK_CULL_MODE_NONE, - VK_CULL_MODE_END_RANGE = VK_CULL_MODE_FRONT_AND_BACK, - VK_CULL_MODE_NUM = (VK_CULL_MODE_FRONT_AND_BACK - VK_CULL_MODE_NONE + 1), - VK_CULL_MODE_MAX_ENUM = 0x7FFFFFFF -} VkCullMode; - -typedef enum { - VK_FRONT_FACE_CCW = 0, - VK_FRONT_FACE_CW = 1, - VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_CCW, - VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CW, - VK_FRONT_FACE_NUM = (VK_FRONT_FACE_CW - VK_FRONT_FACE_CCW + 1), - VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF -} VkFrontFace; - -typedef enum { - VK_COMPARE_OP_NEVER = 0, - VK_COMPARE_OP_LESS = 1, - VK_COMPARE_OP_EQUAL = 2, - VK_COMPARE_OP_LESS_EQUAL = 3, - VK_COMPARE_OP_GREATER = 4, - VK_COMPARE_OP_NOT_EQUAL = 5, - VK_COMPARE_OP_GREATER_EQUAL = 6, - VK_COMPARE_OP_ALWAYS = 7, - VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, - VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, - VK_COMPARE_OP_NUM = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), - VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF -} VkCompareOp; - -typedef enum { - VK_STENCIL_OP_KEEP = 0, - VK_STENCIL_OP_ZERO = 1, - VK_STENCIL_OP_REPLACE = 2, - VK_STENCIL_OP_INC_CLAMP = 3, - VK_STENCIL_OP_DEC_CLAMP = 4, - VK_STENCIL_OP_INVERT = 5, - VK_STENCIL_OP_INC_WRAP = 6, - VK_STENCIL_OP_DEC_WRAP = 7, - VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, - VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DEC_WRAP, - VK_STENCIL_OP_NUM = (VK_STENCIL_OP_DEC_WRAP - VK_STENCIL_OP_KEEP + 1), - VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF -} VkStencilOp; - -typedef enum { - VK_LOGIC_OP_CLEAR = 0, - VK_LOGIC_OP_AND = 1, - VK_LOGIC_OP_AND_REVERSE = 2, - VK_LOGIC_OP_COPY = 3, - VK_LOGIC_OP_AND_INVERTED = 4, - VK_LOGIC_OP_NOOP = 5, - VK_LOGIC_OP_XOR = 6, - VK_LOGIC_OP_OR = 7, - VK_LOGIC_OP_NOR = 8, - VK_LOGIC_OP_EQUIV = 9, - VK_LOGIC_OP_INVERT = 10, - VK_LOGIC_OP_OR_REVERSE = 11, - VK_LOGIC_OP_COPY_INVERTED = 12, - VK_LOGIC_OP_OR_INVERTED = 13, - VK_LOGIC_OP_NAND = 14, - VK_LOGIC_OP_SET = 15, - VK_LOGIC_OP_BEGIN_RANGE = VK_LOGIC_OP_CLEAR, - VK_LOGIC_OP_END_RANGE = VK_LOGIC_OP_SET, - VK_LOGIC_OP_NUM = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), - VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF -} VkLogicOp; - -typedef enum { - VK_BLEND_ZERO = 0, - VK_BLEND_ONE = 1, - VK_BLEND_SRC_COLOR = 2, - VK_BLEND_ONE_MINUS_SRC_COLOR = 3, - VK_BLEND_DEST_COLOR = 4, - VK_BLEND_ONE_MINUS_DEST_COLOR = 5, - VK_BLEND_SRC_ALPHA = 6, - VK_BLEND_ONE_MINUS_SRC_ALPHA = 7, - VK_BLEND_DEST_ALPHA = 8, - VK_BLEND_ONE_MINUS_DEST_ALPHA = 9, - VK_BLEND_CONSTANT_COLOR = 10, - VK_BLEND_ONE_MINUS_CONSTANT_COLOR = 11, - VK_BLEND_CONSTANT_ALPHA = 12, - VK_BLEND_ONE_MINUS_CONSTANT_ALPHA = 13, - VK_BLEND_SRC_ALPHA_SATURATE = 14, - VK_BLEND_SRC1_COLOR = 15, - VK_BLEND_ONE_MINUS_SRC1_COLOR = 16, - VK_BLEND_SRC1_ALPHA = 17, - VK_BLEND_ONE_MINUS_SRC1_ALPHA = 18, - VK_BLEND_BEGIN_RANGE = VK_BLEND_ZERO, - VK_BLEND_END_RANGE = VK_BLEND_ONE_MINUS_SRC1_ALPHA, - VK_BLEND_NUM = (VK_BLEND_ONE_MINUS_SRC1_ALPHA - VK_BLEND_ZERO + 1), - VK_BLEND_MAX_ENUM = 0x7FFFFFFF -} VkBlend; - -typedef enum { - VK_BLEND_OP_ADD = 0, - VK_BLEND_OP_SUBTRACT = 1, - VK_BLEND_OP_REVERSE_SUBTRACT = 2, - VK_BLEND_OP_MIN = 3, - VK_BLEND_OP_MAX = 4, - VK_BLEND_OP_BEGIN_RANGE = VK_BLEND_OP_ADD, - VK_BLEND_OP_END_RANGE = VK_BLEND_OP_MAX, - VK_BLEND_OP_NUM = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), - VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF -} VkBlendOp; - -typedef enum { - VK_DYNAMIC_STATE_VIEWPORT = 0, - VK_DYNAMIC_STATE_SCISSOR = 1, - VK_DYNAMIC_STATE_LINE_WIDTH = 2, - VK_DYNAMIC_STATE_DEPTH_BIAS = 3, - VK_DYNAMIC_STATE_BLEND_CONSTANTS = 4, - VK_DYNAMIC_STATE_DEPTH_BOUNDS = 5, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK = 6, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK = 7, - VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8, - VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, - VK_DYNAMIC_STATE_NUM = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), - VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF -} VkDynamicState; - -typedef enum { - VK_TEX_FILTER_NEAREST = 0, - VK_TEX_FILTER_LINEAR = 1, - VK_TEX_FILTER_BEGIN_RANGE = VK_TEX_FILTER_NEAREST, - VK_TEX_FILTER_END_RANGE = VK_TEX_FILTER_LINEAR, - VK_TEX_FILTER_NUM = (VK_TEX_FILTER_LINEAR - VK_TEX_FILTER_NEAREST + 1), - VK_TEX_FILTER_MAX_ENUM = 0x7FFFFFFF -} VkTexFilter; - -typedef enum { - VK_TEX_MIPMAP_MODE_BASE = 0, - VK_TEX_MIPMAP_MODE_NEAREST = 1, - VK_TEX_MIPMAP_MODE_LINEAR = 2, - VK_TEX_MIPMAP_MODE_BEGIN_RANGE = VK_TEX_MIPMAP_MODE_BASE, - VK_TEX_MIPMAP_MODE_END_RANGE = VK_TEX_MIPMAP_MODE_LINEAR, - VK_TEX_MIPMAP_MODE_NUM = (VK_TEX_MIPMAP_MODE_LINEAR - VK_TEX_MIPMAP_MODE_BASE + 1), - VK_TEX_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF -} VkTexMipmapMode; - -typedef enum { - VK_TEX_ADDRESS_MODE_WRAP = 0, - VK_TEX_ADDRESS_MODE_MIRROR = 1, - VK_TEX_ADDRESS_MODE_CLAMP = 2, - VK_TEX_ADDRESS_MODE_MIRROR_ONCE = 3, - VK_TEX_ADDRESS_MODE_CLAMP_BORDER = 4, - VK_TEX_ADDRESS_BEGIN_RANGE = VK_TEX_ADDRESS_MODE_WRAP, - VK_TEX_ADDRESS_END_RANGE = VK_TEX_ADDRESS_MODE_CLAMP_BORDER, - VK_TEX_ADDRESS_NUM = (VK_TEX_ADDRESS_MODE_CLAMP_BORDER - VK_TEX_ADDRESS_MODE_WRAP + 1), - VK_TEX_ADDRESS_MAX_ENUM = 0x7FFFFFFF -} VkTexAddressMode; - -typedef enum { - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, - VK_BORDER_COLOR_INT_TRANSPARENT_BLACK = 1, - VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK = 2, - VK_BORDER_COLOR_INT_OPAQUE_BLACK = 3, - VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE = 4, - VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, - VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, - VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, - VK_BORDER_COLOR_NUM = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), - VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF -} VkBorderColor; - -typedef enum { - VK_DESCRIPTOR_TYPE_SAMPLER = 0, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1, - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE = 2, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE = 3, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER = 4, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER = 5, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER = 6, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, - VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, - VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, - VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, - VK_DESCRIPTOR_TYPE_NUM = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), - VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorType; - -typedef enum { - VK_DESCRIPTOR_POOL_USAGE_ONE_SHOT = 0, - VK_DESCRIPTOR_POOL_USAGE_DYNAMIC = 1, - VK_DESCRIPTOR_POOL_USAGE_BEGIN_RANGE = VK_DESCRIPTOR_POOL_USAGE_ONE_SHOT, - VK_DESCRIPTOR_POOL_USAGE_END_RANGE = VK_DESCRIPTOR_POOL_USAGE_DYNAMIC, - VK_DESCRIPTOR_POOL_USAGE_NUM = (VK_DESCRIPTOR_POOL_USAGE_DYNAMIC - VK_DESCRIPTOR_POOL_USAGE_ONE_SHOT + 1), - VK_DESCRIPTOR_POOL_USAGE_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorPoolUsage; - -typedef enum { - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT = 0, - VK_DESCRIPTOR_SET_USAGE_STATIC = 1, - VK_DESCRIPTOR_SET_USAGE_BEGIN_RANGE = VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, - VK_DESCRIPTOR_SET_USAGE_END_RANGE = VK_DESCRIPTOR_SET_USAGE_STATIC, - VK_DESCRIPTOR_SET_USAGE_NUM = (VK_DESCRIPTOR_SET_USAGE_STATIC - VK_DESCRIPTOR_SET_USAGE_ONE_SHOT + 1), - VK_DESCRIPTOR_SET_USAGE_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorSetUsage; - -typedef enum { - VK_ATTACHMENT_LOAD_OP_LOAD = 0, - VK_ATTACHMENT_LOAD_OP_CLEAR = 1, - VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, - VK_ATTACHMENT_LOAD_OP_BEGIN_RANGE = VK_ATTACHMENT_LOAD_OP_LOAD, - VK_ATTACHMENT_LOAD_OP_END_RANGE = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_LOAD_OP_NUM = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), - VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF -} VkAttachmentLoadOp; - -typedef enum { - VK_ATTACHMENT_STORE_OP_STORE = 0, - VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, - VK_ATTACHMENT_STORE_OP_BEGIN_RANGE = VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_STORE_OP_END_RANGE = VK_ATTACHMENT_STORE_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_NUM = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), - VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF -} VkAttachmentStoreOp; - -typedef enum { - VK_PIPELINE_BIND_POINT_COMPUTE = 0, - VK_PIPELINE_BIND_POINT_GRAPHICS = 1, - VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, - VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, - VK_PIPELINE_BIND_POINT_NUM = (VK_PIPELINE_BIND_POINT_GRAPHICS - VK_PIPELINE_BIND_POINT_COMPUTE + 1), - VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF -} VkPipelineBindPoint; - -typedef enum { - VK_CMD_BUFFER_LEVEL_PRIMARY = 0, - VK_CMD_BUFFER_LEVEL_SECONDARY = 1, - VK_CMD_BUFFER_LEVEL_BEGIN_RANGE = VK_CMD_BUFFER_LEVEL_PRIMARY, - VK_CMD_BUFFER_LEVEL_END_RANGE = VK_CMD_BUFFER_LEVEL_SECONDARY, - VK_CMD_BUFFER_LEVEL_NUM = (VK_CMD_BUFFER_LEVEL_SECONDARY - VK_CMD_BUFFER_LEVEL_PRIMARY + 1), - VK_CMD_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF -} VkCmdBufferLevel; - -typedef enum { - VK_INDEX_TYPE_UINT16 = 0, - VK_INDEX_TYPE_UINT32 = 1, - VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, - VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, - VK_INDEX_TYPE_NUM = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), - VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkIndexType; - -typedef enum { - VK_TIMESTAMP_TYPE_TOP = 0, - VK_TIMESTAMP_TYPE_BOTTOM = 1, - VK_TIMESTAMP_TYPE_BEGIN_RANGE = VK_TIMESTAMP_TYPE_TOP, - VK_TIMESTAMP_TYPE_END_RANGE = VK_TIMESTAMP_TYPE_BOTTOM, - VK_TIMESTAMP_TYPE_NUM = (VK_TIMESTAMP_TYPE_BOTTOM - VK_TIMESTAMP_TYPE_TOP + 1), - VK_TIMESTAMP_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkTimestampType; - -typedef enum { - VK_RENDER_PASS_CONTENTS_INLINE = 0, - VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS = 1, - VK_RENDER_PASS_CONTENTS_BEGIN_RANGE = VK_RENDER_PASS_CONTENTS_INLINE, - VK_RENDER_PASS_CONTENTS_END_RANGE = VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS, - VK_RENDER_PASS_CONTENTS_NUM = (VK_RENDER_PASS_CONTENTS_SECONDARY_CMD_BUFFERS - VK_RENDER_PASS_CONTENTS_INLINE + 1), - VK_RENDER_PASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF -} VkRenderPassContents; - - -typedef enum { - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, - VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002, - VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT = 0x00000004, - VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000008, - VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT = 0x00000010, - VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT = 0x00000020, - VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT = 0x00000040, - VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080, - VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100, - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200, - VK_FORMAT_FEATURE_BLIT_SOURCE_BIT = 0x00000400, - VK_FORMAT_FEATURE_BLIT_DESTINATION_BIT = 0x00000800, -} VkFormatFeatureFlagBits; -typedef VkFlags VkFormatFeatureFlags; - -typedef enum { - VK_IMAGE_USAGE_TRANSFER_SOURCE_BIT = 0x00000001, - VK_IMAGE_USAGE_TRANSFER_DESTINATION_BIT = 0x00000002, - VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, - VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, - VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, -} VkImageUsageFlagBits; -typedef VkFlags VkImageUsageFlags; - -typedef enum { - VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, - VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, - VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, - VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008, - VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010, -} VkImageCreateFlagBits; -typedef VkFlags VkImageCreateFlags; - -typedef enum { - VK_SAMPLE_COUNT_1_BIT = 0x00000001, - VK_SAMPLE_COUNT_2_BIT = 0x00000002, - VK_SAMPLE_COUNT_4_BIT = 0x00000004, - VK_SAMPLE_COUNT_8_BIT = 0x00000008, - VK_SAMPLE_COUNT_16_BIT = 0x00000010, - VK_SAMPLE_COUNT_32_BIT = 0x00000020, - VK_SAMPLE_COUNT_64_BIT = 0x00000040, -} VkSampleCountFlagBits; -typedef VkFlags VkSampleCountFlags; - -typedef enum { - VK_QUEUE_GRAPHICS_BIT = 0x00000001, - VK_QUEUE_COMPUTE_BIT = 0x00000002, - VK_QUEUE_DMA_BIT = 0x00000004, - VK_QUEUE_SPARSE_MEMMGR_BIT = 0x00000008, - VK_QUEUE_EXTENDED_BIT = 0x40000000, -} VkQueueFlagBits; -typedef VkFlags VkQueueFlags; - -typedef enum { - VK_MEMORY_PROPERTY_DEVICE_ONLY = 0, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000001, - VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT = 0x00000002, - VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT = 0x00000004, - VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT = 0x00000008, - VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, -} VkMemoryPropertyFlagBits; -typedef VkFlags VkMemoryPropertyFlags; - -typedef enum { - VK_MEMORY_HEAP_HOST_LOCAL_BIT = 0x00000001, -} VkMemoryHeapFlagBits; -typedef VkFlags VkMemoryHeapFlags; -typedef VkFlags VkMemoryMapFlags; - -typedef enum { - VK_SPARSE_IMAGE_FMT_SINGLE_MIPTAIL_BIT = 0x00000001, - VK_SPARSE_IMAGE_FMT_ALIGNED_MIP_SIZE_BIT = 0x00000002, - VK_SPARSE_IMAGE_FMT_NONSTD_BLOCK_SIZE_BIT = 0x00000004, -} VkSparseImageFormatFlagBits; -typedef VkFlags VkSparseImageFormatFlags; - -typedef enum { - VK_SPARSE_MEMORY_BIND_REPLICATE_64KIB_BLOCK_BIT = 0x00000001, -} VkSparseMemoryBindFlagBits; -typedef VkFlags VkSparseMemoryBindFlags; - -typedef enum { - VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, -} VkFenceCreateFlagBits; -typedef VkFlags VkFenceCreateFlags; -typedef VkFlags VkSemaphoreCreateFlags; -typedef VkFlags VkEventCreateFlags; - -typedef enum { - VK_QUERY_PIPELINE_STATISTIC_IA_VERTICES_BIT = 0x00000001, - VK_QUERY_PIPELINE_STATISTIC_IA_PRIMITIVES_BIT = 0x00000002, - VK_QUERY_PIPELINE_STATISTIC_VS_INVOCATIONS_BIT = 0x00000004, - VK_QUERY_PIPELINE_STATISTIC_GS_INVOCATIONS_BIT = 0x00000008, - VK_QUERY_PIPELINE_STATISTIC_GS_PRIMITIVES_BIT = 0x00000010, - VK_QUERY_PIPELINE_STATISTIC_C_INVOCATIONS_BIT = 0x00000020, - VK_QUERY_PIPELINE_STATISTIC_C_PRIMITIVES_BIT = 0x00000040, - VK_QUERY_PIPELINE_STATISTIC_FS_INVOCATIONS_BIT = 0x00000080, - VK_QUERY_PIPELINE_STATISTIC_TCS_PATCHES_BIT = 0x00000100, - VK_QUERY_PIPELINE_STATISTIC_TES_INVOCATIONS_BIT = 0x00000200, - VK_QUERY_PIPELINE_STATISTIC_CS_INVOCATIONS_BIT = 0x00000400, -} VkQueryPipelineStatisticFlagBits; -typedef VkFlags VkQueryPipelineStatisticFlags; - -typedef enum { - VK_QUERY_RESULT_DEFAULT = 0, - VK_QUERY_RESULT_64_BIT = 0x00000001, - VK_QUERY_RESULT_WAIT_BIT = 0x00000002, - VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004, - VK_QUERY_RESULT_PARTIAL_BIT = 0x00000008, -} VkQueryResultFlagBits; -typedef VkFlags VkQueryResultFlags; - -typedef enum { - VK_BUFFER_USAGE_TRANSFER_SOURCE_BIT = 0x00000001, - VK_BUFFER_USAGE_TRANSFER_DESTINATION_BIT = 0x00000002, - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000004, - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT = 0x00000008, - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT = 0x00000010, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT = 0x00000020, - VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040, - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080, - VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100, -} VkBufferUsageFlagBits; -typedef VkFlags VkBufferUsageFlags; - -typedef enum { - VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, - VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, - VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, -} VkBufferCreateFlagBits; -typedef VkFlags VkBufferCreateFlags; - -typedef enum { - VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, - VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, - VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, - VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, -} VkImageAspectFlagBits; -typedef VkFlags VkImageAspectFlags; - -typedef enum { - VK_IMAGE_VIEW_CREATE_READ_ONLY_DEPTH_BIT = 0x00000001, - VK_IMAGE_VIEW_CREATE_READ_ONLY_STENCIL_BIT = 0x00000002, -} VkImageViewCreateFlagBits; -typedef VkFlags VkImageViewCreateFlags; -typedef VkFlags VkShaderModuleCreateFlags; -typedef VkFlags VkShaderCreateFlags; - -typedef enum { - VK_CHANNEL_R_BIT = 0x00000001, - VK_CHANNEL_G_BIT = 0x00000002, - VK_CHANNEL_B_BIT = 0x00000004, - VK_CHANNEL_A_BIT = 0x00000008, -} VkChannelFlagBits; -typedef VkFlags VkChannelFlags; - -typedef enum { - VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, - VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, - VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, -} VkPipelineCreateFlagBits; -typedef VkFlags VkPipelineCreateFlags; - -typedef enum { - VK_SHADER_STAGE_VERTEX_BIT = 0x00000001, - VK_SHADER_STAGE_TESS_CONTROL_BIT = 0x00000002, - VK_SHADER_STAGE_TESS_EVALUATION_BIT = 0x00000004, - VK_SHADER_STAGE_GEOMETRY_BIT = 0x00000008, - VK_SHADER_STAGE_FRAGMENT_BIT = 0x00000010, - VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, - VK_SHADER_STAGE_ALL = 0x7FFFFFFF, -} VkShaderStageFlagBits; -typedef VkFlags VkShaderStageFlags; - -typedef enum { - VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, -} VkAttachmentDescriptionFlagBits; -typedef VkFlags VkAttachmentDescriptionFlags; - -typedef enum { - VK_SUBPASS_DESCRIPTION_NO_OVERDRAW_BIT = 0x00000001, -} VkSubpassDescriptionFlagBits; -typedef VkFlags VkSubpassDescriptionFlags; - -typedef enum { - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, - VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT = 0x00000010, - VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT = 0x00000020, - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, - VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, - VK_PIPELINE_STAGE_HOST_BIT = 0x00002000, - VK_PIPELINE_STAGE_ALL_GRAPHICS = 0x000007FF, - VK_PIPELINE_STAGE_ALL_GPU_COMMANDS = 0x00001FFF, -} VkPipelineStageFlagBits; -typedef VkFlags VkPipelineStageFlags; - -typedef enum { - VK_MEMORY_OUTPUT_HOST_WRITE_BIT = 0x00000001, - VK_MEMORY_OUTPUT_SHADER_WRITE_BIT = 0x00000002, - VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT = 0x00000004, - VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000008, - VK_MEMORY_OUTPUT_TRANSFER_BIT = 0x00000010, -} VkMemoryOutputFlagBits; -typedef VkFlags VkMemoryOutputFlags; - -typedef enum { - VK_MEMORY_INPUT_HOST_READ_BIT = 0x00000001, - VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT = 0x00000002, - VK_MEMORY_INPUT_INDEX_FETCH_BIT = 0x00000004, - VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT = 0x00000008, - VK_MEMORY_INPUT_UNIFORM_READ_BIT = 0x00000010, - VK_MEMORY_INPUT_SHADER_READ_BIT = 0x00000020, - VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT = 0x00000040, - VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000080, - VK_MEMORY_INPUT_INPUT_ATTACHMENT_BIT = 0x00000100, - VK_MEMORY_INPUT_TRANSFER_BIT = 0x00000200, -} VkMemoryInputFlagBits; -typedef VkFlags VkMemoryInputFlags; - -typedef enum { - VK_CMD_POOL_CREATE_TRANSIENT_BIT = 0x00000001, - VK_CMD_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, -} VkCmdPoolCreateFlagBits; -typedef VkFlags VkCmdPoolCreateFlags; - -typedef enum { - VK_CMD_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, -} VkCmdPoolResetFlagBits; -typedef VkFlags VkCmdPoolResetFlags; -typedef VkFlags VkCmdBufferCreateFlags; - -typedef enum { - VK_CMD_BUFFER_OPTIMIZE_SMALL_BATCH_BIT = 0x00000001, - VK_CMD_BUFFER_OPTIMIZE_PIPELINE_SWITCH_BIT = 0x00000002, - VK_CMD_BUFFER_OPTIMIZE_ONE_TIME_SUBMIT_BIT = 0x00000004, - VK_CMD_BUFFER_OPTIMIZE_DESCRIPTOR_SET_SWITCH_BIT = 0x00000008, - VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT = 0x00000010, -} VkCmdBufferOptimizeFlagBits; -typedef VkFlags VkCmdBufferOptimizeFlags; - -typedef enum { - VK_CMD_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, -} VkCmdBufferResetFlagBits; -typedef VkFlags VkCmdBufferResetFlags; - -typedef enum { - VK_STENCIL_FACE_NONE = 0, - VK_STENCIL_FACE_FRONT_BIT = 0x00000001, - VK_STENCIL_FACE_BACK_BIT = 0x00000002, -} VkStencilFaceFlagBits; -typedef VkFlags VkStencilFaceFlags; - -typedef enum { - VK_QUERY_CONTROL_CONSERVATIVE_BIT = 0x00000001, -} VkQueryControlFlagBits; -typedef VkFlags VkQueryControlFlags; - -typedef void* (VKAPI *PFN_vkAllocFunction)( - void* pUserData, - size_t size, - size_t alignment, - VkSystemAllocType allocType); - -typedef void (VKAPI *PFN_vkFreeFunction)( - void* pUserData, - void* pMem); - -typedef void (VKAPI *PFN_vkVoidFunction)(void); - -typedef struct { - VkStructureType sType; - const void* pNext; - const char* pAppName; - uint32_t appVersion; - const char* pEngineName; - uint32_t engineVersion; - uint32_t apiVersion; -} VkApplicationInfo; - -typedef struct { - void* pUserData; - PFN_vkAllocFunction pfnAlloc; - PFN_vkFreeFunction pfnFree; -} VkAllocCallbacks; - -typedef struct { - VkStructureType sType; - const void* pNext; - const VkApplicationInfo* pAppInfo; - const VkAllocCallbacks* pAllocCb; - uint32_t layerCount; - const char*const* ppEnabledLayerNames; - uint32_t extensionCount; - const char*const* ppEnabledExtensionNames; -} VkInstanceCreateInfo; - -typedef struct { - VkBool32 robustBufferAccess; - VkBool32 fullDrawIndexUint32; - VkBool32 imageCubeArray; - VkBool32 independentBlend; - VkBool32 geometryShader; - VkBool32 tessellationShader; - VkBool32 sampleRateShading; - VkBool32 dualSourceBlend; - VkBool32 logicOp; - VkBool32 multiDrawIndirect; - VkBool32 depthClip; - VkBool32 depthBiasClamp; - VkBool32 fillModeNonSolid; - VkBool32 depthBounds; - VkBool32 wideLines; - VkBool32 largePoints; - VkBool32 textureCompressionETC2; - VkBool32 textureCompressionASTC_LDR; - VkBool32 textureCompressionBC; - VkBool32 occlusionQueryNonConservative; - VkBool32 pipelineStatisticsQuery; - VkBool32 vertexSideEffects; - VkBool32 tessellationSideEffects; - VkBool32 geometrySideEffects; - VkBool32 fragmentSideEffects; - VkBool32 shaderTessellationPointSize; - VkBool32 shaderGeometryPointSize; - VkBool32 shaderImageGatherExtended; - VkBool32 shaderStorageImageExtendedFormats; - VkBool32 shaderStorageImageMultisample; - VkBool32 shaderUniformBufferArrayDynamicIndexing; - VkBool32 shaderSampledImageArrayDynamicIndexing; - VkBool32 shaderStorageBufferArrayDynamicIndexing; - VkBool32 shaderStorageImageArrayDynamicIndexing; - VkBool32 shaderClipDistance; - VkBool32 shaderCullDistance; - VkBool32 shaderFloat64; - VkBool32 shaderInt64; - VkBool32 shaderInt16; - VkBool32 shaderResourceResidency; - VkBool32 shaderResourceMinLOD; - VkBool32 alphaToOne; - VkBool32 sparseBinding; - VkBool32 sparseResidencyBuffer; - VkBool32 sparseResidencyImage2D; - VkBool32 sparseResidencyImage3D; - VkBool32 sparseResidency2Samples; - VkBool32 sparseResidency4Samples; - VkBool32 sparseResidency8Samples; - VkBool32 sparseResidency16Samples; - VkBool32 sparseResidencyAliased; -} VkPhysicalDeviceFeatures; - -typedef struct { - VkFormatFeatureFlags linearTilingFeatures; - VkFormatFeatureFlags optimalTilingFeatures; - VkFormatFeatureFlags bufferFeatures; -} VkFormatProperties; - -typedef struct { - int32_t width; - int32_t height; - int32_t depth; -} VkExtent3D; - -typedef struct { - VkExtent3D maxExtent; - uint32_t maxMipLevels; - uint32_t maxArraySize; - VkSampleCountFlags sampleCounts; - VkDeviceSize maxResourceSize; -} VkImageFormatProperties; - -typedef struct { - uint32_t maxImageDimension1D; - uint32_t maxImageDimension2D; - uint32_t maxImageDimension3D; - uint32_t maxImageDimensionCube; - uint32_t maxImageArrayLayers; - VkSampleCountFlags sampleCounts; - uint32_t maxTexelBufferSize; - uint32_t maxUniformBufferSize; - uint32_t maxStorageBufferSize; - uint32_t maxPushConstantsSize; - uint32_t maxMemoryAllocationCount; - VkDeviceSize bufferImageGranularity; - VkDeviceSize sparseAddressSpaceSize; - uint32_t maxBoundDescriptorSets; - uint32_t maxDescriptorSets; - uint32_t maxPerStageDescriptorSamplers; - uint32_t maxPerStageDescriptorUniformBuffers; - uint32_t maxPerStageDescriptorStorageBuffers; - uint32_t maxPerStageDescriptorSampledImages; - uint32_t maxPerStageDescriptorStorageImages; - uint32_t maxDescriptorSetSamplers; - uint32_t maxDescriptorSetUniformBuffers; - uint32_t maxDescriptorSetUniformBuffersDynamic; - uint32_t maxDescriptorSetStorageBuffers; - uint32_t maxDescriptorSetStorageBuffersDynamic; - uint32_t maxDescriptorSetSampledImages; - uint32_t maxDescriptorSetStorageImages; - uint32_t maxVertexInputAttributes; - uint32_t maxVertexInputBindings; - uint32_t maxVertexInputAttributeOffset; - uint32_t maxVertexInputBindingStride; - uint32_t maxVertexOutputComponents; - uint32_t maxTessGenLevel; - uint32_t maxTessPatchSize; - uint32_t maxTessControlPerVertexInputComponents; - uint32_t maxTessControlPerVertexOutputComponents; - uint32_t maxTessControlPerPatchOutputComponents; - uint32_t maxTessControlTotalOutputComponents; - uint32_t maxTessEvaluationInputComponents; - uint32_t maxTessEvaluationOutputComponents; - uint32_t maxGeometryShaderInvocations; - uint32_t maxGeometryInputComponents; - uint32_t maxGeometryOutputComponents; - uint32_t maxGeometryOutputVertices; - uint32_t maxGeometryTotalOutputComponents; - uint32_t maxFragmentInputComponents; - uint32_t maxFragmentOutputBuffers; - uint32_t maxFragmentDualSourceBuffers; - uint32_t maxFragmentCombinedOutputResources; - uint32_t maxComputeSharedMemorySize; - uint32_t maxComputeWorkGroupCount[3]; - uint32_t maxComputeWorkGroupInvocations; - uint32_t maxComputeWorkGroupSize[3]; - uint32_t subPixelPrecisionBits; - uint32_t subTexelPrecisionBits; - uint32_t mipmapPrecisionBits; - uint32_t maxDrawIndexedIndexValue; - uint32_t maxDrawIndirectInstanceCount; - VkBool32 primitiveRestartForPatches; - float maxSamplerLodBias; - float maxSamplerAnisotropy; - uint32_t maxViewports; - uint32_t maxViewportDimensions[2]; - float viewportBoundsRange[2]; - uint32_t viewportSubPixelBits; - uint32_t minMemoryMapAlignment; - uint32_t minTexelBufferOffsetAlignment; - uint32_t minUniformBufferOffsetAlignment; - uint32_t minStorageBufferOffsetAlignment; - uint32_t minTexelOffset; - uint32_t maxTexelOffset; - uint32_t minTexelGatherOffset; - uint32_t maxTexelGatherOffset; - float minInterpolationOffset; - float maxInterpolationOffset; - uint32_t subPixelInterpolationOffsetBits; - uint32_t maxFramebufferWidth; - uint32_t maxFramebufferHeight; - uint32_t maxFramebufferLayers; - uint32_t maxFramebufferColorSamples; - uint32_t maxFramebufferDepthSamples; - uint32_t maxFramebufferStencilSamples; - uint32_t maxColorAttachments; - uint32_t maxSampledImageColorSamples; - uint32_t maxSampledImageDepthSamples; - uint32_t maxSampledImageIntegerSamples; - uint32_t maxStorageImageSamples; - uint32_t maxSampleMaskWords; - uint64_t timestampFrequency; - uint32_t maxClipDistances; - uint32_t maxCullDistances; - uint32_t maxCombinedClipAndCullDistances; - float pointSizeRange[2]; - float lineWidthRange[2]; - float pointSizeGranularity; - float lineWidthGranularity; -} VkPhysicalDeviceLimits; - -typedef struct { - VkBool32 residencyStandard2DBlockShape; - VkBool32 residencyStandard2DMSBlockShape; - VkBool32 residencyStandard3DBlockShape; - VkBool32 residencyAlignedMipSize; - VkBool32 residencyNonResident; - VkBool32 residencyNonResidentStrict; -} VkPhysicalDeviceSparseProperties; - -typedef struct { - uint32_t apiVersion; - uint32_t driverVersion; - uint32_t vendorId; - uint32_t deviceId; - VkPhysicalDeviceType deviceType; - char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME]; - uint8_t pipelineCacheUUID[VK_UUID_LENGTH]; - VkPhysicalDeviceLimits limits; - VkPhysicalDeviceSparseProperties sparseProperties; -} VkPhysicalDeviceProperties; - -typedef struct { - VkQueueFlags queueFlags; - uint32_t queueCount; - VkBool32 supportsTimestamps; -} VkQueueFamilyProperties; - -typedef struct { - VkMemoryPropertyFlags propertyFlags; - uint32_t heapIndex; -} VkMemoryType; - -typedef struct { - VkDeviceSize size; - VkMemoryHeapFlags flags; -} VkMemoryHeap; - -typedef struct { - uint32_t memoryTypeCount; - VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; - uint32_t memoryHeapCount; - VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; -} VkPhysicalDeviceMemoryProperties; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t queueFamilyIndex; - uint32_t queueCount; -} VkDeviceQueueCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t queueRecordCount; - const VkDeviceQueueCreateInfo* pRequestedQueues; - uint32_t layerCount; - const char*const* ppEnabledLayerNames; - uint32_t extensionCount; - const char*const* ppEnabledExtensionNames; - const VkPhysicalDeviceFeatures* pEnabledFeatures; -} VkDeviceCreateInfo; - -typedef struct { - char extName[VK_MAX_EXTENSION_NAME]; - uint32_t specVersion; -} VkExtensionProperties; - -typedef struct { - char layerName[VK_MAX_EXTENSION_NAME]; - uint32_t specVersion; - uint32_t implVersion; - char description[VK_MAX_DESCRIPTION]; -} VkLayerProperties; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDeviceSize allocationSize; - uint32_t memoryTypeIndex; -} VkMemoryAllocInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDeviceMemory mem; - VkDeviceSize offset; - VkDeviceSize size; -} VkMappedMemoryRange; - -typedef struct { - VkDeviceSize size; - VkDeviceSize alignment; - uint32_t memoryTypeBits; -} VkMemoryRequirements; - -typedef struct { - VkImageAspect aspect; - VkExtent3D imageGranularity; - VkSparseImageFormatFlags flags; -} VkSparseImageFormatProperties; - -typedef struct { - VkSparseImageFormatProperties formatProps; - uint32_t imageMipTailStartLOD; - VkDeviceSize imageMipTailSize; - VkDeviceSize imageMipTailOffset; - VkDeviceSize imageMipTailStride; -} VkSparseImageMemoryRequirements; - -typedef struct { - VkDeviceSize rangeOffset; - VkDeviceSize rangeSize; - VkDeviceSize memOffset; - VkDeviceMemory mem; - VkSparseMemoryBindFlags flags; -} VkSparseMemoryBindInfo; - -typedef struct { - VkImageAspect aspect; - uint32_t mipLevel; - uint32_t arrayLayer; -} VkImageSubresource; - -typedef struct { - int32_t x; - int32_t y; - int32_t z; -} VkOffset3D; - -typedef struct { - VkImageSubresource subresource; - VkOffset3D offset; - VkExtent3D extent; - VkDeviceSize memOffset; - VkDeviceMemory mem; - VkSparseMemoryBindFlags flags; -} VkSparseImageMemoryBindInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkFenceCreateFlags flags; -} VkFenceCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkSemaphoreCreateFlags flags; -} VkSemaphoreCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkEventCreateFlags flags; -} VkEventCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkQueryType queryType; - uint32_t slots; - VkQueryPipelineStatisticFlags pipelineStatistics; -} VkQueryPoolCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDeviceSize size; - VkBufferUsageFlags usage; - VkBufferCreateFlags flags; - VkSharingMode sharingMode; - uint32_t queueFamilyCount; - const uint32_t* pQueueFamilyIndices; -} VkBufferCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkBuffer buffer; - VkFormat format; - VkDeviceSize offset; - VkDeviceSize range; -} VkBufferViewCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkImageType imageType; - VkFormat format; - VkExtent3D extent; - uint32_t mipLevels; - uint32_t arraySize; - uint32_t samples; - VkImageTiling tiling; - VkImageUsageFlags usage; - VkImageCreateFlags flags; - VkSharingMode sharingMode; - uint32_t queueFamilyCount; - const uint32_t* pQueueFamilyIndices; - VkImageLayout initialLayout; -} VkImageCreateInfo; - -typedef struct { - VkDeviceSize offset; - VkDeviceSize size; - VkDeviceSize rowPitch; - VkDeviceSize depthPitch; -} VkSubresourceLayout; - -typedef struct { - VkChannelSwizzle r; - VkChannelSwizzle g; - VkChannelSwizzle b; - VkChannelSwizzle a; -} VkChannelMapping; - -typedef struct { - VkImageAspectFlags aspectMask; - uint32_t baseMipLevel; - uint32_t mipLevels; - uint32_t baseArrayLayer; - uint32_t arraySize; -} VkImageSubresourceRange; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkImage image; - VkImageViewType viewType; - VkFormat format; - VkChannelMapping channels; - VkImageSubresourceRange subresourceRange; - VkImageViewCreateFlags flags; -} VkImageViewCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - size_t codeSize; - const void* pCode; - VkShaderModuleCreateFlags flags; -} VkShaderModuleCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkShaderModule module; - const char* pName; - VkShaderCreateFlags flags; - VkShaderStage stage; -} VkShaderCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - size_t initialSize; - const void* initialData; - size_t maxSize; -} VkPipelineCacheCreateInfo; - -typedef struct { - uint32_t constantId; - size_t size; - uint32_t offset; -} VkSpecializationMapEntry; - -typedef struct { - uint32_t mapEntryCount; - const VkSpecializationMapEntry* pMap; - size_t dataSize; - const void* pData; -} VkSpecializationInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkShaderStage stage; - VkShader shader; - const VkSpecializationInfo* pSpecializationInfo; -} VkPipelineShaderStageCreateInfo; - -typedef struct { - uint32_t binding; - uint32_t strideInBytes; - VkVertexInputStepRate stepRate; -} VkVertexInputBindingDescription; - -typedef struct { - uint32_t location; - uint32_t binding; - VkFormat format; - uint32_t offsetInBytes; -} VkVertexInputAttributeDescription; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t bindingCount; - const VkVertexInputBindingDescription* pVertexBindingDescriptions; - uint32_t attributeCount; - const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; -} VkPipelineVertexInputStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkPrimitiveTopology topology; - VkBool32 primitiveRestartEnable; -} VkPipelineInputAssemblyStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t patchControlPoints; -} VkPipelineTessellationStateCreateInfo; - -typedef struct { - float originX; - float originY; - float width; - float height; - float minDepth; - float maxDepth; -} VkViewport; - -typedef struct { - int32_t x; - int32_t y; -} VkOffset2D; - -typedef struct { - int32_t width; - int32_t height; -} VkExtent2D; - -typedef struct { - VkOffset2D offset; - VkExtent2D extent; -} VkRect2D; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t viewportCount; - const VkViewport* pViewports; - uint32_t scissorCount; - const VkRect2D* pScissors; -} VkPipelineViewportStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkBool32 depthClipEnable; - VkBool32 rasterizerDiscardEnable; - VkFillMode fillMode; - VkCullMode cullMode; - VkFrontFace frontFace; - VkBool32 depthBiasEnable; - float depthBias; - float depthBiasClamp; - float slopeScaledDepthBias; - float lineWidth; -} VkPipelineRasterStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t rasterSamples; - VkBool32 sampleShadingEnable; - float minSampleShading; - const VkSampleMask* pSampleMask; -} VkPipelineMultisampleStateCreateInfo; - -typedef struct { - VkStencilOp stencilFailOp; - VkStencilOp stencilPassOp; - VkStencilOp stencilDepthFailOp; - VkCompareOp stencilCompareOp; - uint32_t stencilCompareMask; - uint32_t stencilWriteMask; - uint32_t stencilReference; -} VkStencilOpState; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkBool32 depthTestEnable; - VkBool32 depthWriteEnable; - VkCompareOp depthCompareOp; - VkBool32 depthBoundsTestEnable; - VkBool32 stencilTestEnable; - VkStencilOpState front; - VkStencilOpState back; - float minDepthBounds; - float maxDepthBounds; -} VkPipelineDepthStencilStateCreateInfo; - -typedef struct { - VkBool32 blendEnable; - VkBlend srcBlendColor; - VkBlend destBlendColor; - VkBlendOp blendOpColor; - VkBlend srcBlendAlpha; - VkBlend destBlendAlpha; - VkBlendOp blendOpAlpha; - VkChannelFlags channelWriteMask; -} VkPipelineColorBlendAttachmentState; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkBool32 alphaToCoverageEnable; - VkBool32 alphaToOneEnable; - VkBool32 logicOpEnable; - VkLogicOp logicOp; - uint32_t attachmentCount; - const VkPipelineColorBlendAttachmentState* pAttachments; - float blendConst[4]; -} VkPipelineColorBlendStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t dynamicStateCount; - const VkDynamicState* pDynamicStates; -} VkPipelineDynamicStateCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t stageCount; - const VkPipelineShaderStageCreateInfo* pStages; - const VkPipelineVertexInputStateCreateInfo* pVertexInputState; - const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; - const VkPipelineTessellationStateCreateInfo* pTessellationState; - const VkPipelineViewportStateCreateInfo* pViewportState; - const VkPipelineRasterStateCreateInfo* pRasterState; - const VkPipelineMultisampleStateCreateInfo* pMultisampleState; - const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; - const VkPipelineColorBlendStateCreateInfo* pColorBlendState; - const VkPipelineDynamicStateCreateInfo* pDynamicState; - VkPipelineCreateFlags flags; - VkPipelineLayout layout; - VkRenderPass renderPass; - uint32_t subpass; - VkPipeline basePipelineHandle; - int32_t basePipelineIndex; -} VkGraphicsPipelineCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkPipelineShaderStageCreateInfo stage; - VkPipelineCreateFlags flags; - VkPipelineLayout layout; - VkPipeline basePipelineHandle; - int32_t basePipelineIndex; -} VkComputePipelineCreateInfo; - -typedef struct { - VkShaderStageFlags stageFlags; - uint32_t start; - uint32_t length; -} VkPushConstantRange; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t descriptorSetCount; - const VkDescriptorSetLayout* pSetLayouts; - uint32_t pushConstantRangeCount; - const VkPushConstantRange* pPushConstantRanges; -} VkPipelineLayoutCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkTexFilter magFilter; - VkTexFilter minFilter; - VkTexMipmapMode mipMode; - VkTexAddressMode addressModeU; - VkTexAddressMode addressModeV; - VkTexAddressMode addressModeW; - float mipLodBias; - float maxAnisotropy; - VkBool32 compareEnable; - VkCompareOp compareOp; - float minLod; - float maxLod; - VkBorderColor borderColor; - VkBool32 unnormalizedCoordinates; -} VkSamplerCreateInfo; - -typedef struct { - VkDescriptorType descriptorType; - uint32_t arraySize; - VkShaderStageFlags stageFlags; - const VkSampler* pImmutableSamplers; -} VkDescriptorSetLayoutBinding; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t count; - const VkDescriptorSetLayoutBinding* pBinding; -} VkDescriptorSetLayoutCreateInfo; - -typedef struct { - VkDescriptorType type; - uint32_t count; -} VkDescriptorTypeCount; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDescriptorPoolUsage poolUsage; - uint32_t maxSets; - uint32_t count; - const VkDescriptorTypeCount* pTypeCount; -} VkDescriptorPoolCreateInfo; - -typedef struct { - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize range; -} VkDescriptorBufferInfo; - -typedef struct { - VkBufferView bufferView; - VkSampler sampler; - VkImageView imageView; - VkImageLayout imageLayout; - VkDescriptorBufferInfo bufferInfo; -} VkDescriptorInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDescriptorSet destSet; - uint32_t destBinding; - uint32_t destArrayElement; - uint32_t count; - VkDescriptorType descriptorType; - const VkDescriptorInfo* pDescriptors; -} VkWriteDescriptorSet; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkDescriptorSet srcSet; - uint32_t srcBinding; - uint32_t srcArrayElement; - VkDescriptorSet destSet; - uint32_t destBinding; - uint32_t destArrayElement; - uint32_t count; -} VkCopyDescriptorSet; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkRenderPass renderPass; - uint32_t attachmentCount; - const VkImageView* pAttachments; - uint32_t width; - uint32_t height; - uint32_t layers; -} VkFramebufferCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkFormat format; - uint32_t samples; - VkAttachmentLoadOp loadOp; - VkAttachmentStoreOp storeOp; - VkAttachmentLoadOp stencilLoadOp; - VkAttachmentStoreOp stencilStoreOp; - VkImageLayout initialLayout; - VkImageLayout finalLayout; - VkAttachmentDescriptionFlags flags; -} VkAttachmentDescription; - -typedef struct { - uint32_t attachment; - VkImageLayout layout; -} VkAttachmentReference; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkPipelineBindPoint pipelineBindPoint; - VkSubpassDescriptionFlags flags; - uint32_t inputCount; - const VkAttachmentReference* pInputAttachments; - uint32_t colorCount; - const VkAttachmentReference* pColorAttachments; - const VkAttachmentReference* pResolveAttachments; - VkAttachmentReference depthStencilAttachment; - uint32_t preserveCount; - const VkAttachmentReference* pPreserveAttachments; -} VkSubpassDescription; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t srcSubpass; - uint32_t destSubpass; - VkPipelineStageFlags srcStageMask; - VkPipelineStageFlags destStageMask; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; - VkBool32 byRegion; -} VkSubpassDependency; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t attachmentCount; - const VkAttachmentDescription* pAttachments; - uint32_t subpassCount; - const VkSubpassDescription* pSubpasses; - uint32_t dependencyCount; - const VkSubpassDependency* pDependencies; -} VkRenderPassCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - uint32_t queueFamilyIndex; - VkCmdPoolCreateFlags flags; -} VkCmdPoolCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkCmdPool cmdPool; - VkCmdBufferLevel level; - VkCmdBufferCreateFlags flags; -} VkCmdBufferCreateInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkCmdBufferOptimizeFlags flags; - VkRenderPass renderPass; - uint32_t subpass; - VkFramebuffer framebuffer; -} VkCmdBufferBeginInfo; - -typedef struct { - VkDeviceSize srcOffset; - VkDeviceSize destOffset; - VkDeviceSize copySize; -} VkBufferCopy; - -typedef struct { - VkImageAspect aspect; - uint32_t mipLevel; - uint32_t arrayLayer; - uint32_t arraySize; -} VkImageSubresourceCopy; - -typedef struct { - VkImageSubresourceCopy srcSubresource; - VkOffset3D srcOffset; - VkImageSubresourceCopy destSubresource; - VkOffset3D destOffset; - VkExtent3D extent; -} VkImageCopy; - -typedef struct { - VkImageSubresourceCopy srcSubresource; - VkOffset3D srcOffset; - VkExtent3D srcExtent; - VkImageSubresourceCopy destSubresource; - VkOffset3D destOffset; - VkExtent3D destExtent; -} VkImageBlit; - -typedef struct { - VkDeviceSize bufferOffset; - uint32_t bufferRowLength; - uint32_t bufferImageHeight; - VkImageSubresourceCopy imageSubresource; - VkOffset3D imageOffset; - VkExtent3D imageExtent; -} VkBufferImageCopy; - -typedef union { - float float32[4]; - int32_t int32[4]; - uint32_t uint32[4]; -} VkClearColorValue; - -typedef struct { - float depth; - uint32_t stencil; -} VkClearDepthStencilValue; - -typedef struct { - VkOffset3D offset; - VkExtent3D extent; -} VkRect3D; - -typedef struct { - VkImageSubresourceCopy srcSubresource; - VkOffset3D srcOffset; - VkImageSubresourceCopy destSubresource; - VkOffset3D destOffset; - VkExtent3D extent; -} VkImageResolve; - -typedef union { - VkClearColorValue color; - VkClearDepthStencilValue depthStencil; -} VkClearValue; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkRenderPass renderPass; - VkFramebuffer framebuffer; - VkRect2D renderArea; - uint32_t clearValueCount; - const VkClearValue* pClearValues; -} VkRenderPassBeginInfo; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; - uint32_t srcQueueFamilyIndex; - uint32_t destQueueFamilyIndex; - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize size; -} VkBufferMemoryBarrier; - -typedef struct { - uint32_t x; - uint32_t y; - uint32_t z; -} VkDispatchIndirectCmd; - -typedef struct { - uint32_t indexCount; - uint32_t instanceCount; - uint32_t firstIndex; - int32_t vertexOffset; - uint32_t firstInstance; -} VkDrawIndexedIndirectCmd; - -typedef struct { - uint32_t vertexCount; - uint32_t instanceCount; - uint32_t firstVertex; - uint32_t firstInstance; -} VkDrawIndirectCmd; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; - VkImageLayout oldLayout; - VkImageLayout newLayout; - uint32_t srcQueueFamilyIndex; - uint32_t destQueueFamilyIndex; - VkImage image; - VkImageSubresourceRange subresourceRange; -} VkImageMemoryBarrier; - -typedef struct { - VkStructureType sType; - const void* pNext; - VkMemoryOutputFlags outputMask; - VkMemoryInputFlags inputMask; -} VkMemoryBarrier; - - -typedef VkResult (VKAPI *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, VkInstance* pInstance); -typedef void (VKAPI *PFN_vkDestroyInstance)(VkInstance instance); -typedef VkResult (VKAPI *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); -typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); -typedef PFN_vkVoidFunction (VKAPI *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); -typedef VkResult (VKAPI *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice); -typedef void (VKAPI *PFN_vkDestroyDevice)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); -typedef VkResult (VKAPI *PFN_vkQueueSubmit)(VkQueue queue, uint32_t cmdBufferCount, const VkCmdBuffer* pCmdBuffers, VkFence fence); -typedef VkResult (VKAPI *PFN_vkQueueWaitIdle)(VkQueue queue); -typedef VkResult (VKAPI *PFN_vkDeviceWaitIdle)(VkDevice device); -typedef VkResult (VKAPI *PFN_vkAllocMemory)(VkDevice device, const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem); -typedef void (VKAPI *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); -typedef void (VKAPI *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory mem); -typedef VkResult (VKAPI *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -typedef VkResult (VKAPI *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); -typedef VkResult (VKAPI *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); -typedef VkResult (VKAPI *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory mem, VkDeviceSize memOffset); -typedef VkResult (VKAPI *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pNumRequirements, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); -typedef VkResult (VKAPI *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, uint32_t samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pNumProperties, VkSparseImageFormatProperties* pProperties); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseBufferMemory)(VkQueue queue, VkBuffer buffer, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageOpaqueMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI *PFN_vkQueueBindSparseImageMemory)(VkQueue queue, VkImage image, uint32_t numBindings, const VkSparseImageMemoryBindInfo* pBindInfo); -typedef VkResult (VKAPI *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, VkFence* pFence); -typedef void (VKAPI *PFN_vkDestroyFence)(VkDevice device, VkFence fence); -typedef VkResult (VKAPI *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); -typedef VkResult (VKAPI *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); -typedef VkResult (VKAPI *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); -typedef VkResult (VKAPI *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, VkSemaphore* pSemaphore); -typedef void (VKAPI *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore); -typedef VkResult (VKAPI *PFN_vkQueueSignalSemaphore)(VkQueue queue, VkSemaphore semaphore); -typedef VkResult (VKAPI *PFN_vkQueueWaitSemaphore)(VkQueue queue, VkSemaphore semaphore); -typedef VkResult (VKAPI *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, VkEvent* pEvent); -typedef void (VKAPI *PFN_vkDestroyEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkSetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkResetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, VkQueryPool* pQueryPool); -typedef void (VKAPI *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool); -typedef VkResult (VKAPI *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t* pDataSize, void* pData, VkQueryResultFlags flags); -typedef VkResult (VKAPI *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, VkBuffer* pBuffer); -typedef void (VKAPI *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer); -typedef VkResult (VKAPI *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, VkBufferView* pView); -typedef void (VKAPI *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView); -typedef VkResult (VKAPI *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, VkImage* pImage); -typedef void (VKAPI *PFN_vkDestroyImage)(VkDevice device, VkImage image); -typedef VkResult (VKAPI *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); -typedef VkResult (VKAPI *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, VkImageView* pView); -typedef void (VKAPI *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView); -typedef VkResult (VKAPI *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, VkShaderModule* pShaderModule); -typedef void (VKAPI *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule); -typedef VkResult (VKAPI *PFN_vkCreateShader)(VkDevice device, const VkShaderCreateInfo* pCreateInfo, VkShader* pShader); -typedef void (VKAPI *PFN_vkDestroyShader)(VkDevice device, VkShader shader); -typedef VkResult (VKAPI *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, VkPipelineCache* pPipelineCache); -typedef void (VKAPI *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache); -typedef size_t (VKAPI *PFN_vkGetPipelineCacheSize)(VkDevice device, VkPipelineCache pipelineCache); -typedef VkResult (VKAPI *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, void* pData); -typedef VkResult (VKAPI *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); -typedef VkResult (VKAPI *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef VkResult (VKAPI *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, VkPipeline* pPipelines); -typedef void (VKAPI *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline); -typedef VkResult (VKAPI *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, VkPipelineLayout* pPipelineLayout); -typedef void (VKAPI *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout); -typedef VkResult (VKAPI *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, VkSampler* pSampler); -typedef void (VKAPI *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler); -typedef VkResult (VKAPI *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayout* pSetLayout); -typedef void (VKAPI *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout); -typedef VkResult (VKAPI *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, VkDescriptorPool* pDescriptorPool); -typedef void (VKAPI *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool); -typedef VkResult (VKAPI *PFN_vkAllocDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetUsage setUsage, uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets); -typedef VkResult (VKAPI *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t count, const VkDescriptorSet* pDescriptorSets); -typedef void (VKAPI *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t writeCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t copyCount, const VkCopyDescriptorSet* pDescriptorCopies); -typedef VkResult (VKAPI *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, VkFramebuffer* pFramebuffer); -typedef void (VKAPI *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer); -typedef VkResult (VKAPI *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, VkRenderPass* pRenderPass); -typedef void (VKAPI *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass); -typedef VkResult (VKAPI *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); -typedef VkResult (VKAPI *PFN_vkCreateCommandPool)(VkDevice device, const VkCmdPoolCreateInfo* pCreateInfo, VkCmdPool* pCmdPool); -typedef void (VKAPI *PFN_vkDestroyCommandPool)(VkDevice device, VkCmdPool cmdPool); -typedef VkResult (VKAPI *PFN_vkResetCommandPool)(VkDevice device, VkCmdPool cmdPool, VkCmdPoolResetFlags flags); -typedef VkResult (VKAPI *PFN_vkCreateCommandBuffer)(VkDevice device, const VkCmdBufferCreateInfo* pCreateInfo, VkCmdBuffer* pCmdBuffer); -typedef void (VKAPI *PFN_vkDestroyCommandBuffer)(VkDevice device, VkCmdBuffer commandBuffer); -typedef VkResult (VKAPI *PFN_vkBeginCommandBuffer)(VkCmdBuffer cmdBuffer, const VkCmdBufferBeginInfo* pBeginInfo); -typedef VkResult (VKAPI *PFN_vkEndCommandBuffer)(VkCmdBuffer cmdBuffer); -typedef VkResult (VKAPI *PFN_vkResetCommandBuffer)(VkCmdBuffer cmdBuffer, VkCmdBufferResetFlags flags); -typedef void (VKAPI *PFN_vkCmdBindPipeline)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI *PFN_vkCmdSetViewport)(VkCmdBuffer cmdBuffer, uint32_t viewportCount, const VkViewport* pViewports); -typedef void (VKAPI *PFN_vkCmdSetScissor)(VkCmdBuffer cmdBuffer, uint32_t scissorCount, const VkRect2D* pScissors); -typedef void (VKAPI *PFN_vkCmdSetLineWidth)(VkCmdBuffer cmdBuffer, float lineWidth); -typedef void (VKAPI *PFN_vkCmdSetDepthBias)(VkCmdBuffer cmdBuffer, float depthBias, float depthBiasClamp, float slopeScaledDepthBias); -typedef void (VKAPI *PFN_vkCmdSetBlendConstants)(VkCmdBuffer cmdBuffer, const float blendConst[4]); -typedef void (VKAPI *PFN_vkCmdSetDepthBounds)(VkCmdBuffer cmdBuffer, float minDepthBounds, float maxDepthBounds); -typedef void (VKAPI *PFN_vkCmdSetStencilCompareMask)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); -typedef void (VKAPI *PFN_vkCmdSetStencilWriteMask)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); -typedef void (VKAPI *PFN_vkCmdSetStencilReference)(VkCmdBuffer cmdBuffer, VkStencilFaceFlags faceMask, uint32_t stencilReference); -typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); -typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); -typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); -typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); -typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); -typedef void (VKAPI *PFN_vkCmdDrawIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI *PFN_vkCmdDrawIndexedIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride); -typedef void (VKAPI *PFN_vkCmdDispatch)(VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, uint32_t z); -typedef void (VKAPI *PFN_vkCmdDispatchIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset); -typedef void (VKAPI *PFN_vkCmdCopyBuffer)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkBuffer destBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCopyImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdBlitImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkTexFilter filter); -typedef void (VKAPI *PFN_vkCmdCopyBufferToImage)(VkCmdBuffer cmdBuffer, VkBuffer srcBuffer, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdCopyImageToBuffer)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer destBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI *PFN_vkCmdUpdateBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize dataSize, const uint32_t* pData); -typedef void (VKAPI *PFN_vkCmdFillBuffer)(VkCmdBuffer cmdBuffer, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize fillSize, uint32_t data); -typedef void (VKAPI *PFN_vkCmdClearColorImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilImage)(VkCmdBuffer cmdBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI *PFN_vkCmdClearColorAttachment)(VkCmdBuffer cmdBuffer, uint32_t colorAttachment, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdClearDepthStencilAttachment)(VkCmdBuffer cmdBuffer, VkImageAspectFlags aspectMask, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rectCount, const VkRect3D* pRects); -typedef void (VKAPI *PFN_vkCmdResolveImage)(VkCmdBuffer cmdBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage destImage, VkImageLayout destImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); -typedef void (VKAPI *PFN_vkCmdSetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI *PFN_vkCmdResetEvent)(VkCmdBuffer cmdBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI *PFN_vkCmdWaitEvents)(VkCmdBuffer cmdBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdPipelineBarrier)(VkCmdBuffer cmdBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memBarrierCount, const void* const* ppMemBarriers); -typedef void (VKAPI *PFN_vkCmdBeginQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot, VkQueryControlFlags flags); -typedef void (VKAPI *PFN_vkCmdEndQuery)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t slot); -typedef void (VKAPI *PFN_vkCmdResetQueryPool)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); -typedef void (VKAPI *PFN_vkCmdWriteTimestamp)(VkCmdBuffer cmdBuffer, VkTimestampType timestampType, VkBuffer destBuffer, VkDeviceSize destOffset); -typedef void (VKAPI *PFN_vkCmdCopyQueryPoolResults)(VkCmdBuffer cmdBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, VkDeviceSize destStride, VkQueryResultFlags flags); -typedef void (VKAPI *PFN_vkCmdPushConstants)(VkCmdBuffer cmdBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t start, uint32_t length, const void* values); -typedef void (VKAPI *PFN_vkCmdBeginRenderPass)(VkCmdBuffer cmdBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkRenderPassContents contents); -typedef void (VKAPI *PFN_vkCmdNextSubpass)(VkCmdBuffer cmdBuffer, VkRenderPassContents contents); -typedef void (VKAPI *PFN_vkCmdEndRenderPass)(VkCmdBuffer cmdBuffer); -typedef void (VKAPI *PFN_vkCmdExecuteCommands)(VkCmdBuffer cmdBuffer, uint32_t cmdBuffersCount, const VkCmdBuffer* pCmdBuffers); - -#ifdef VK_PROTOTYPES -VkResult VKAPI vkCreateInstance( - const VkInstanceCreateInfo* pCreateInfo, - VkInstance* pInstance); - -void VKAPI vkDestroyInstance( - VkInstance instance); - -VkResult VKAPI vkEnumeratePhysicalDevices( - VkInstance instance, - uint32_t* pPhysicalDeviceCount, - VkPhysicalDevice* pPhysicalDevices); - -VkResult VKAPI vkGetPhysicalDeviceFeatures( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures* pFeatures); - -VkResult VKAPI vkGetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties* pFormatProperties); - -VkResult VKAPI vkGetPhysicalDeviceImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageCreateFlags flags, - VkImageFormatProperties* pImageFormatProperties); - -VkResult VKAPI vkGetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties); - -VkResult VKAPI vkGetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, - VkQueueFamilyProperties* pQueueFamilyProperties); - -VkResult VKAPI vkGetPhysicalDeviceMemoryProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties* pMemoryProperties); - -PFN_vkVoidFunction VKAPI vkGetInstanceProcAddr( - VkInstance instance, - const char* pName); - -PFN_vkVoidFunction VKAPI vkGetDeviceProcAddr( - VkDevice device, - const char* pName); - -VkResult VKAPI vkCreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - VkDevice* pDevice); - -void VKAPI vkDestroyDevice( - VkDevice device); - -VkResult VKAPI vkEnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pCount, - VkExtensionProperties* pProperties); - -VkResult VKAPI vkEnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pCount, - VkExtensionProperties* pProperties); - -VkResult VKAPI vkEnumerateInstanceLayerProperties( - uint32_t* pCount, - VkLayerProperties* pProperties); - -VkResult VKAPI vkEnumerateDeviceLayerProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, - VkLayerProperties* pProperties); - -VkResult VKAPI vkGetDeviceQueue( - VkDevice device, - uint32_t queueFamilyIndex, - uint32_t queueIndex, - VkQueue* pQueue); - -VkResult VKAPI vkQueueSubmit( - VkQueue queue, - uint32_t cmdBufferCount, - const VkCmdBuffer* pCmdBuffers, - VkFence fence); - -VkResult VKAPI vkQueueWaitIdle( - VkQueue queue); - -VkResult VKAPI vkDeviceWaitIdle( - VkDevice device); - -VkResult VKAPI vkAllocMemory( - VkDevice device, - const VkMemoryAllocInfo* pAllocInfo, - VkDeviceMemory* pMem); - -void VKAPI vkFreeMemory( - VkDevice device, - VkDeviceMemory mem); - -VkResult VKAPI vkMapMemory( - VkDevice device, - VkDeviceMemory mem, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void** ppData); - -void VKAPI vkUnmapMemory( - VkDevice device, - VkDeviceMemory mem); - -VkResult VKAPI vkFlushMappedMemoryRanges( - VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges); - -VkResult VKAPI vkInvalidateMappedMemoryRanges( - VkDevice device, - uint32_t memRangeCount, - const VkMappedMemoryRange* pMemRanges); - -VkResult VKAPI vkGetDeviceMemoryCommitment( - VkDevice device, - VkDeviceMemory memory, - VkDeviceSize* pCommittedMemoryInBytes); - -VkResult VKAPI vkBindBufferMemory( - VkDevice device, - VkBuffer buffer, - VkDeviceMemory mem, - VkDeviceSize memOffset); - -VkResult VKAPI vkBindImageMemory( - VkDevice device, - VkImage image, - VkDeviceMemory mem, - VkDeviceSize memOffset); - -VkResult VKAPI vkGetBufferMemoryRequirements( - VkDevice device, - VkBuffer buffer, - VkMemoryRequirements* pMemoryRequirements); - -VkResult VKAPI vkGetImageMemoryRequirements( - VkDevice device, - VkImage image, - VkMemoryRequirements* pMemoryRequirements); - -VkResult VKAPI vkGetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t* pNumRequirements, - VkSparseImageMemoryRequirements* pSparseMemoryRequirements); - -VkResult VKAPI vkGetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - uint32_t samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t* pNumProperties, - VkSparseImageFormatProperties* pProperties); - -VkResult VKAPI vkQueueBindSparseBufferMemory( - VkQueue queue, - VkBuffer buffer, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo); - -VkResult VKAPI vkQueueBindSparseImageOpaqueMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseMemoryBindInfo* pBindInfo); - -VkResult VKAPI vkQueueBindSparseImageMemory( - VkQueue queue, - VkImage image, - uint32_t numBindings, - const VkSparseImageMemoryBindInfo* pBindInfo); - -VkResult VKAPI vkCreateFence( - VkDevice device, - const VkFenceCreateInfo* pCreateInfo, - VkFence* pFence); - -void VKAPI vkDestroyFence( - VkDevice device, - VkFence fence); - -VkResult VKAPI vkResetFences( - VkDevice device, - uint32_t fenceCount, - const VkFence* pFences); - -VkResult VKAPI vkGetFenceStatus( - VkDevice device, - VkFence fence); - -VkResult VKAPI vkWaitForFences( - VkDevice device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t timeout); - -VkResult VKAPI vkCreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - VkSemaphore* pSemaphore); - -void VKAPI vkDestroySemaphore( - VkDevice device, - VkSemaphore semaphore); - -VkResult VKAPI vkQueueSignalSemaphore( - VkQueue queue, - VkSemaphore semaphore); - -VkResult VKAPI vkQueueWaitSemaphore( - VkQueue queue, - VkSemaphore semaphore); - -VkResult VKAPI vkCreateEvent( - VkDevice device, - const VkEventCreateInfo* pCreateInfo, - VkEvent* pEvent); - -void VKAPI vkDestroyEvent( - VkDevice device, - VkEvent event); - -VkResult VKAPI vkGetEventStatus( - VkDevice device, - VkEvent event); - -VkResult VKAPI vkSetEvent( - VkDevice device, - VkEvent event); - -VkResult VKAPI vkResetEvent( - VkDevice device, - VkEvent event); - -VkResult VKAPI vkCreateQueryPool( - VkDevice device, - const VkQueryPoolCreateInfo* pCreateInfo, - VkQueryPool* pQueryPool); - -void VKAPI vkDestroyQueryPool( - VkDevice device, - VkQueryPool queryPool); - -VkResult VKAPI vkGetQueryPoolResults( - VkDevice device, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - size_t* pDataSize, - void* pData, - VkQueryResultFlags flags); - -VkResult VKAPI vkCreateBuffer( - VkDevice device, - const VkBufferCreateInfo* pCreateInfo, - VkBuffer* pBuffer); - -void VKAPI vkDestroyBuffer( - VkDevice device, - VkBuffer buffer); - -VkResult VKAPI vkCreateBufferView( - VkDevice device, - const VkBufferViewCreateInfo* pCreateInfo, - VkBufferView* pView); - -void VKAPI vkDestroyBufferView( - VkDevice device, - VkBufferView bufferView); - -VkResult VKAPI vkCreateImage( - VkDevice device, - const VkImageCreateInfo* pCreateInfo, - VkImage* pImage); - -void VKAPI vkDestroyImage( - VkDevice device, - VkImage image); - -VkResult VKAPI vkGetImageSubresourceLayout( - VkDevice device, - VkImage image, - const VkImageSubresource* pSubresource, - VkSubresourceLayout* pLayout); - -VkResult VKAPI vkCreateImageView( - VkDevice device, - const VkImageViewCreateInfo* pCreateInfo, - VkImageView* pView); - -void VKAPI vkDestroyImageView( - VkDevice device, - VkImageView imageView); - -VkResult VKAPI vkCreateShaderModule( - VkDevice device, - const VkShaderModuleCreateInfo* pCreateInfo, - VkShaderModule* pShaderModule); - -void VKAPI vkDestroyShaderModule( - VkDevice device, - VkShaderModule shaderModule); - -VkResult VKAPI vkCreateShader( - VkDevice device, - const VkShaderCreateInfo* pCreateInfo, - VkShader* pShader); - -void VKAPI vkDestroyShader( - VkDevice device, - VkShader shader); - -VkResult VKAPI vkCreatePipelineCache( - VkDevice device, - const VkPipelineCacheCreateInfo* pCreateInfo, - VkPipelineCache* pPipelineCache); - -void VKAPI vkDestroyPipelineCache( - VkDevice device, - VkPipelineCache pipelineCache); - -size_t VKAPI vkGetPipelineCacheSize( - VkDevice device, - VkPipelineCache pipelineCache); - -VkResult VKAPI vkGetPipelineCacheData( - VkDevice device, - VkPipelineCache pipelineCache, - void* pData); - -VkResult VKAPI vkMergePipelineCaches( - VkDevice device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches); - -VkResult VKAPI vkCreateGraphicsPipelines( - VkDevice device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - VkPipeline* pPipelines); - -VkResult VKAPI vkCreateComputePipelines( - VkDevice device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo* pCreateInfos, - VkPipeline* pPipelines); - -void VKAPI vkDestroyPipeline( - VkDevice device, - VkPipeline pipeline); - -VkResult VKAPI vkCreatePipelineLayout( - VkDevice device, - const VkPipelineLayoutCreateInfo* pCreateInfo, - VkPipelineLayout* pPipelineLayout); - -void VKAPI vkDestroyPipelineLayout( - VkDevice device, - VkPipelineLayout pipelineLayout); - -VkResult VKAPI vkCreateSampler( - VkDevice device, - const VkSamplerCreateInfo* pCreateInfo, - VkSampler* pSampler); - -void VKAPI vkDestroySampler( - VkDevice device, - VkSampler sampler); - -VkResult VKAPI vkCreateDescriptorSetLayout( - VkDevice device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - VkDescriptorSetLayout* pSetLayout); - -void VKAPI vkDestroyDescriptorSetLayout( - VkDevice device, - VkDescriptorSetLayout descriptorSetLayout); - -VkResult VKAPI vkCreateDescriptorPool( - VkDevice device, - const VkDescriptorPoolCreateInfo* pCreateInfo, - VkDescriptorPool* pDescriptorPool); - -void VKAPI vkDestroyDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool); - -VkResult VKAPI vkResetDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool); - -VkResult VKAPI vkAllocDescriptorSets( - VkDevice device, - VkDescriptorPool descriptorPool, - VkDescriptorSetUsage setUsage, - uint32_t count, - const VkDescriptorSetLayout* pSetLayouts, - VkDescriptorSet* pDescriptorSets); - -VkResult VKAPI vkFreeDescriptorSets( - VkDevice device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet* pDescriptorSets); - -void VKAPI vkUpdateDescriptorSets( - VkDevice device, - uint32_t writeCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t copyCount, - const VkCopyDescriptorSet* pDescriptorCopies); - -VkResult VKAPI vkCreateFramebuffer( - VkDevice device, - const VkFramebufferCreateInfo* pCreateInfo, - VkFramebuffer* pFramebuffer); - -void VKAPI vkDestroyFramebuffer( - VkDevice device, - VkFramebuffer framebuffer); - -VkResult VKAPI vkCreateRenderPass( - VkDevice device, - const VkRenderPassCreateInfo* pCreateInfo, - VkRenderPass* pRenderPass); - -void VKAPI vkDestroyRenderPass( - VkDevice device, - VkRenderPass renderPass); - -VkResult VKAPI vkGetRenderAreaGranularity( - VkDevice device, - VkRenderPass renderPass, - VkExtent2D* pGranularity); - -VkResult VKAPI vkCreateCommandPool( - VkDevice device, - const VkCmdPoolCreateInfo* pCreateInfo, - VkCmdPool* pCmdPool); - -void VKAPI vkDestroyCommandPool( - VkDevice device, - VkCmdPool cmdPool); - -VkResult VKAPI vkResetCommandPool( - VkDevice device, - VkCmdPool cmdPool, - VkCmdPoolResetFlags flags); - -VkResult VKAPI vkCreateCommandBuffer( - VkDevice device, - const VkCmdBufferCreateInfo* pCreateInfo, - VkCmdBuffer* pCmdBuffer); - -void VKAPI vkDestroyCommandBuffer( - VkDevice device, - VkCmdBuffer commandBuffer); - -VkResult VKAPI vkBeginCommandBuffer( - VkCmdBuffer cmdBuffer, - const VkCmdBufferBeginInfo* pBeginInfo); - -VkResult VKAPI vkEndCommandBuffer( - VkCmdBuffer cmdBuffer); - -VkResult VKAPI vkResetCommandBuffer( - VkCmdBuffer cmdBuffer, - VkCmdBufferResetFlags flags); - -void VKAPI vkCmdBindPipeline( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline pipeline); - -void VKAPI vkCmdSetViewport( - VkCmdBuffer cmdBuffer, - uint32_t viewportCount, - const VkViewport* pViewports); - -void VKAPI vkCmdSetScissor( - VkCmdBuffer cmdBuffer, - uint32_t scissorCount, - const VkRect2D* pScissors); - -void VKAPI vkCmdSetLineWidth( - VkCmdBuffer cmdBuffer, - float lineWidth); - -void VKAPI vkCmdSetDepthBias( - VkCmdBuffer cmdBuffer, - float depthBias, - float depthBiasClamp, - float slopeScaledDepthBias); - -void VKAPI vkCmdSetBlendConstants( - VkCmdBuffer cmdBuffer, - const float blendConst[4]); - -void VKAPI vkCmdSetDepthBounds( - VkCmdBuffer cmdBuffer, - float minDepthBounds, - float maxDepthBounds); - -void VKAPI vkCmdSetStencilCompareMask( - VkCmdBuffer cmdBuffer, - VkStencilFaceFlags faceMask, - uint32_t stencilCompareMask); - -void VKAPI vkCmdSetStencilWriteMask( - VkCmdBuffer cmdBuffer, - VkStencilFaceFlags faceMask, - uint32_t stencilWriteMask); - -void VKAPI vkCmdSetStencilReference( - VkCmdBuffer cmdBuffer, - VkStencilFaceFlags faceMask, - uint32_t stencilReference); - -void VKAPI vkCmdBindDescriptorSets( - VkCmdBuffer cmdBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout layout, - uint32_t firstSet, - uint32_t setCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets); - -void VKAPI vkCmdBindIndexBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkIndexType indexType); - -void VKAPI vkCmdBindVertexBuffers( - VkCmdBuffer cmdBuffer, - uint32_t startBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets); - -void VKAPI vkCmdDraw( - VkCmdBuffer cmdBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance); - -void VKAPI vkCmdDrawIndexed( - VkCmdBuffer cmdBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance); - -void VKAPI vkCmdDrawIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride); - -void VKAPI vkCmdDrawIndexedIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t count, - uint32_t stride); - -void VKAPI vkCmdDispatch( - VkCmdBuffer cmdBuffer, - uint32_t x, - uint32_t y, - uint32_t z); - -void VKAPI vkCmdDispatchIndirect( - VkCmdBuffer cmdBuffer, - VkBuffer buffer, - VkDeviceSize offset); - -void VKAPI vkCmdCopyBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions); - -void VKAPI vkCmdCopyImage( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions); - -void VKAPI vkCmdBlitImage( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkTexFilter filter); - -void VKAPI vkCmdCopyBufferToImage( - VkCmdBuffer cmdBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions); - -void VKAPI vkCmdCopyImageToBuffer( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions); - -void VKAPI vkCmdUpdateBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize dataSize, - const uint32_t* pData); - -void VKAPI vkCmdFillBuffer( - VkCmdBuffer cmdBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize fillSize, - uint32_t data); - -void VKAPI vkCmdClearColorImage( - VkCmdBuffer cmdBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges); - -void VKAPI vkCmdClearDepthStencilImage( - VkCmdBuffer cmdBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges); - -void VKAPI vkCmdClearColorAttachment( - VkCmdBuffer cmdBuffer, - uint32_t colorAttachment, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rectCount, - const VkRect3D* pRects); - -void VKAPI vkCmdClearDepthStencilAttachment( - VkCmdBuffer cmdBuffer, - VkImageAspectFlags aspectMask, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rectCount, - const VkRect3D* pRects); - -void VKAPI vkCmdResolveImage( - VkCmdBuffer cmdBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageResolve* pRegions); - -void VKAPI vkCmdSetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask); - -void VKAPI vkCmdResetEvent( - VkCmdBuffer cmdBuffer, - VkEvent event, - VkPipelineStageFlags stageMask); - -void VKAPI vkCmdWaitEvents( - VkCmdBuffer cmdBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers); - -void VKAPI vkCmdPipelineBarrier( - VkCmdBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers); - -void VKAPI vkCmdBeginQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot, - VkQueryControlFlags flags); - -void VKAPI vkCmdEndQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot); - -void VKAPI vkCmdResetQueryPool( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount); - -void VKAPI vkCmdWriteTimestamp( - VkCmdBuffer cmdBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset); - -void VKAPI vkCmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags); - -void VKAPI vkCmdPushConstants( - VkCmdBuffer cmdBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t start, - uint32_t length, - const void* values); - -void VKAPI vkCmdBeginRenderPass( - VkCmdBuffer cmdBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkRenderPassContents contents); - -void VKAPI vkCmdNextSubpass( - VkCmdBuffer cmdBuffer, - VkRenderPassContents contents); - -void VKAPI vkCmdEndRenderPass( - VkCmdBuffer cmdBuffer); - -void VKAPI vkCmdExecuteCommands( - VkCmdBuffer cmdBuffer, - uint32_t cmdBuffersCount, - const VkCmdBuffer* pCmdBuffers); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/vulkan/vulkan-0.210.0.h b/include/vulkan/vulkan-0.210.0.h deleted file mode 100644 index 61ea4fb1f61..00000000000 --- a/include/vulkan/vulkan-0.210.0.h +++ /dev/null @@ -1,3088 +0,0 @@ -#ifndef __vulkan_h_ -#define __vulkan_h_ 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* -** Copyright (c) 2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -/* -** This header is generated from the Khronos Vulkan XML API Registry. -** -*/ - - -#define VK_VERSION_1_0 1 -#include "vk_platform.h" - -#define VK_MAKE_VERSION(major, minor, patch) \ - ((major << 22) | (minor << 12) | patch) - -// Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 210, 0) - - -#define VK_NULL_HANDLE 0 - - - -#define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; - - -#if defined(__LP64__) || defined(_WIN64) || defined(__x86_64__) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) - #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef struct object##_T *object; -#else - #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object; -#endif - - - -typedef uint32_t VkFlags; -typedef uint32_t VkBool32; -typedef uint64_t VkDeviceSize; -typedef uint32_t VkSampleMask; - -VK_DEFINE_HANDLE(VkInstance) -VK_DEFINE_HANDLE(VkPhysicalDevice) -VK_DEFINE_HANDLE(VkDevice) -VK_DEFINE_HANDLE(VkQueue) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSemaphore) -VK_DEFINE_HANDLE(VkCommandBuffer) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFence) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeviceMemory) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImage) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkEvent) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkQueryPool) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferView) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImageView) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderModule) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineLayout) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipeline) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSetLayout) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSampler) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSet) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) - -#define VK_LOD_CLAMP_NONE 1000.0f -#define VK_REMAINING_MIP_LEVELS (~0U) -#define VK_REMAINING_ARRAY_LAYERS (~0U) -#define VK_WHOLE_SIZE (~0ULL) -#define VK_ATTACHMENT_UNUSED (~0U) -#define VK_TRUE 1 -#define VK_FALSE 0 -#define VK_QUEUE_FAMILY_IGNORED (~0U) -#define VK_SUBPASS_EXTERNAL (~0U) -#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256 -#define VK_UUID_SIZE 16 -#define VK_MAX_MEMORY_TYPES 32 -#define VK_MAX_MEMORY_HEAPS 16 -#define VK_MAX_EXTENSION_NAME_SIZE 256 -#define VK_MAX_DESCRIPTION_SIZE 256 - - -typedef enum VkResult { - VK_SUCCESS = 0, - VK_NOT_READY = 1, - VK_TIMEOUT = 2, - VK_EVENT_SET = 3, - VK_EVENT_RESET = 4, - VK_INCOMPLETE = 5, - VK_ERROR_OUT_OF_HOST_MEMORY = -1, - VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, - VK_ERROR_INITIALIZATION_FAILED = -3, - VK_ERROR_DEVICE_LOST = -4, - VK_ERROR_MEMORY_MAP_FAILED = -5, - VK_ERROR_LAYER_NOT_PRESENT = -6, - VK_ERROR_EXTENSION_NOT_PRESENT = -7, - VK_ERROR_FEATURE_NOT_PRESENT = -8, - VK_ERROR_INCOMPATIBLE_DRIVER = -9, - VK_ERROR_TOO_MANY_OBJECTS = -10, - VK_ERROR_FORMAT_NOT_SUPPORTED = -11, - VK_RESULT_BEGIN_RANGE = VK_ERROR_FORMAT_NOT_SUPPORTED, - VK_RESULT_END_RANGE = VK_INCOMPLETE, - VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FORMAT_NOT_SUPPORTED + 1), - VK_RESULT_MAX_ENUM = 0x7FFFFFFF -} VkResult; - -typedef enum VkStructureType { - VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, - VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, - VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 2, - VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 3, - VK_STRUCTURE_TYPE_SUBMIT_INFO = 4, - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 5, - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 6, - VK_STRUCTURE_TYPE_BIND_SPARSE_INFO = 7, - VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 8, - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 9, - VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 11, - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 12, - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 13, - VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 14, - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 15, - VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 16, - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 17, - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 18, - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 19, - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 20, - VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 21, - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 22, - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 23, - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 24, - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 25, - VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 26, - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 27, - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28, - VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29, - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30, - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 31, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 32, - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 34, - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 35, - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 36, - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37, - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38, - VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39, - VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40, - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 41, - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 42, - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 43, - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 44, - VK_STRUCTURE_TYPE_MEMORY_BARRIER = 45, - VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 46, - VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 47, - VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, - VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), - VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkStructureType; - -typedef enum VkSystemAllocationScope { - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, - VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, - VK_SYSTEM_ALLOCATION_SCOPE_BEGIN_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND, - VK_SYSTEM_ALLOCATION_SCOPE_END_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE, - VK_SYSTEM_ALLOCATION_SCOPE_RANGE_SIZE = (VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND + 1), - VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF -} VkSystemAllocationScope; - -typedef enum VkInternalAllocationType { - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, - VK_INTERNAL_ALLOCATION_TYPE_BEGIN_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, - VK_INTERNAL_ALLOCATION_TYPE_END_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, - VK_INTERNAL_ALLOCATION_TYPE_RANGE_SIZE = (VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE + 1), - VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkInternalAllocationType; - -typedef enum VkFormat { - VK_FORMAT_UNDEFINED = 0, - VK_FORMAT_R4G4_UNORM_PACK8 = 1, - VK_FORMAT_R4G4B4A4_UNORM_PACK16 = 2, - VK_FORMAT_B4G4R4A4_UNORM_PACK16 = 3, - VK_FORMAT_R5G6B5_UNORM_PACK16 = 4, - VK_FORMAT_B5G6R5_UNORM_PACK16 = 5, - VK_FORMAT_R5G5B5A1_UNORM_PACK16 = 6, - VK_FORMAT_B5G5R5A1_UNORM_PACK16 = 7, - VK_FORMAT_A1R5G5B5_UNORM_PACK16 = 8, - VK_FORMAT_R8_UNORM = 9, - VK_FORMAT_R8_SNORM = 10, - VK_FORMAT_R8_USCALED = 11, - VK_FORMAT_R8_SSCALED = 12, - VK_FORMAT_R8_UINT = 13, - VK_FORMAT_R8_SINT = 14, - VK_FORMAT_R8_SRGB = 15, - VK_FORMAT_R8G8_UNORM = 16, - VK_FORMAT_R8G8_SNORM = 17, - VK_FORMAT_R8G8_USCALED = 18, - VK_FORMAT_R8G8_SSCALED = 19, - VK_FORMAT_R8G8_UINT = 20, - VK_FORMAT_R8G8_SINT = 21, - VK_FORMAT_R8G8_SRGB = 22, - VK_FORMAT_R8G8B8_UNORM = 23, - VK_FORMAT_R8G8B8_SNORM = 24, - VK_FORMAT_R8G8B8_USCALED = 25, - VK_FORMAT_R8G8B8_SSCALED = 26, - VK_FORMAT_R8G8B8_UINT = 27, - VK_FORMAT_R8G8B8_SINT = 28, - VK_FORMAT_R8G8B8_SRGB = 29, - VK_FORMAT_B8G8R8_UNORM = 30, - VK_FORMAT_B8G8R8_SNORM = 31, - VK_FORMAT_B8G8R8_USCALED = 32, - VK_FORMAT_B8G8R8_SSCALED = 33, - VK_FORMAT_B8G8R8_UINT = 34, - VK_FORMAT_B8G8R8_SINT = 35, - VK_FORMAT_B8G8R8_SRGB = 36, - VK_FORMAT_R8G8B8A8_UNORM = 37, - VK_FORMAT_R8G8B8A8_SNORM = 38, - VK_FORMAT_R8G8B8A8_USCALED = 39, - VK_FORMAT_R8G8B8A8_SSCALED = 40, - VK_FORMAT_R8G8B8A8_UINT = 41, - VK_FORMAT_R8G8B8A8_SINT = 42, - VK_FORMAT_R8G8B8A8_SRGB = 43, - VK_FORMAT_B8G8R8A8_UNORM = 44, - VK_FORMAT_B8G8R8A8_SNORM = 45, - VK_FORMAT_B8G8R8A8_USCALED = 46, - VK_FORMAT_B8G8R8A8_SSCALED = 47, - VK_FORMAT_B8G8R8A8_UINT = 48, - VK_FORMAT_B8G8R8A8_SINT = 49, - VK_FORMAT_B8G8R8A8_SRGB = 50, - VK_FORMAT_A8B8G8R8_UNORM_PACK32 = 51, - VK_FORMAT_A8B8G8R8_SNORM_PACK32 = 52, - VK_FORMAT_A8B8G8R8_USCALED_PACK32 = 53, - VK_FORMAT_A8B8G8R8_SSCALED_PACK32 = 54, - VK_FORMAT_A8B8G8R8_UINT_PACK32 = 55, - VK_FORMAT_A8B8G8R8_SINT_PACK32 = 56, - VK_FORMAT_A8B8G8R8_SRGB_PACK32 = 57, - VK_FORMAT_A2R10G10B10_UNORM_PACK32 = 58, - VK_FORMAT_A2R10G10B10_SNORM_PACK32 = 59, - VK_FORMAT_A2R10G10B10_USCALED_PACK32 = 60, - VK_FORMAT_A2R10G10B10_SSCALED_PACK32 = 61, - VK_FORMAT_A2R10G10B10_UINT_PACK32 = 62, - VK_FORMAT_A2R10G10B10_SINT_PACK32 = 63, - VK_FORMAT_A2B10G10R10_UNORM_PACK32 = 64, - VK_FORMAT_A2B10G10R10_SNORM_PACK32 = 65, - VK_FORMAT_A2B10G10R10_USCALED_PACK32 = 66, - VK_FORMAT_A2B10G10R10_SSCALED_PACK32 = 67, - VK_FORMAT_A2B10G10R10_UINT_PACK32 = 68, - VK_FORMAT_A2B10G10R10_SINT_PACK32 = 69, - VK_FORMAT_R16_UNORM = 70, - VK_FORMAT_R16_SNORM = 71, - VK_FORMAT_R16_USCALED = 72, - VK_FORMAT_R16_SSCALED = 73, - VK_FORMAT_R16_UINT = 74, - VK_FORMAT_R16_SINT = 75, - VK_FORMAT_R16_SFLOAT = 76, - VK_FORMAT_R16G16_UNORM = 77, - VK_FORMAT_R16G16_SNORM = 78, - VK_FORMAT_R16G16_USCALED = 79, - VK_FORMAT_R16G16_SSCALED = 80, - VK_FORMAT_R16G16_UINT = 81, - VK_FORMAT_R16G16_SINT = 82, - VK_FORMAT_R16G16_SFLOAT = 83, - VK_FORMAT_R16G16B16_UNORM = 84, - VK_FORMAT_R16G16B16_SNORM = 85, - VK_FORMAT_R16G16B16_USCALED = 86, - VK_FORMAT_R16G16B16_SSCALED = 87, - VK_FORMAT_R16G16B16_UINT = 88, - VK_FORMAT_R16G16B16_SINT = 89, - VK_FORMAT_R16G16B16_SFLOAT = 90, - VK_FORMAT_R16G16B16A16_UNORM = 91, - VK_FORMAT_R16G16B16A16_SNORM = 92, - VK_FORMAT_R16G16B16A16_USCALED = 93, - VK_FORMAT_R16G16B16A16_SSCALED = 94, - VK_FORMAT_R16G16B16A16_UINT = 95, - VK_FORMAT_R16G16B16A16_SINT = 96, - VK_FORMAT_R16G16B16A16_SFLOAT = 97, - VK_FORMAT_R32_UINT = 98, - VK_FORMAT_R32_SINT = 99, - VK_FORMAT_R32_SFLOAT = 100, - VK_FORMAT_R32G32_UINT = 101, - VK_FORMAT_R32G32_SINT = 102, - VK_FORMAT_R32G32_SFLOAT = 103, - VK_FORMAT_R32G32B32_UINT = 104, - VK_FORMAT_R32G32B32_SINT = 105, - VK_FORMAT_R32G32B32_SFLOAT = 106, - VK_FORMAT_R32G32B32A32_UINT = 107, - VK_FORMAT_R32G32B32A32_SINT = 108, - VK_FORMAT_R32G32B32A32_SFLOAT = 109, - VK_FORMAT_R64_UINT = 110, - VK_FORMAT_R64_SINT = 111, - VK_FORMAT_R64_SFLOAT = 112, - VK_FORMAT_R64G64_UINT = 113, - VK_FORMAT_R64G64_SINT = 114, - VK_FORMAT_R64G64_SFLOAT = 115, - VK_FORMAT_R64G64B64_UINT = 116, - VK_FORMAT_R64G64B64_SINT = 117, - VK_FORMAT_R64G64B64_SFLOAT = 118, - VK_FORMAT_R64G64B64A64_UINT = 119, - VK_FORMAT_R64G64B64A64_SINT = 120, - VK_FORMAT_R64G64B64A64_SFLOAT = 121, - VK_FORMAT_B10G11R11_UFLOAT_PACK32 = 122, - VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 = 123, - VK_FORMAT_D16_UNORM = 124, - VK_FORMAT_X8_D24_UNORM_PACK32 = 125, - VK_FORMAT_D32_SFLOAT = 126, - VK_FORMAT_S8_UINT = 127, - VK_FORMAT_D16_UNORM_S8_UINT = 128, - VK_FORMAT_D24_UNORM_S8_UINT = 129, - VK_FORMAT_D32_SFLOAT_S8_UINT = 130, - VK_FORMAT_BC1_RGB_UNORM_BLOCK = 131, - VK_FORMAT_BC1_RGB_SRGB_BLOCK = 132, - VK_FORMAT_BC1_RGBA_UNORM_BLOCK = 133, - VK_FORMAT_BC1_RGBA_SRGB_BLOCK = 134, - VK_FORMAT_BC2_UNORM_BLOCK = 135, - VK_FORMAT_BC2_SRGB_BLOCK = 136, - VK_FORMAT_BC3_UNORM_BLOCK = 137, - VK_FORMAT_BC3_SRGB_BLOCK = 138, - VK_FORMAT_BC4_UNORM_BLOCK = 139, - VK_FORMAT_BC4_SNORM_BLOCK = 140, - VK_FORMAT_BC5_UNORM_BLOCK = 141, - VK_FORMAT_BC5_SNORM_BLOCK = 142, - VK_FORMAT_BC6H_UFLOAT_BLOCK = 143, - VK_FORMAT_BC6H_SFLOAT_BLOCK = 144, - VK_FORMAT_BC7_UNORM_BLOCK = 145, - VK_FORMAT_BC7_SRGB_BLOCK = 146, - VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = 147, - VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = 148, - VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK = 149, - VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = 150, - VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = 151, - VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = 152, - VK_FORMAT_EAC_R11_UNORM_BLOCK = 153, - VK_FORMAT_EAC_R11_SNORM_BLOCK = 154, - VK_FORMAT_EAC_R11G11_UNORM_BLOCK = 155, - VK_FORMAT_EAC_R11G11_SNORM_BLOCK = 156, - VK_FORMAT_ASTC_4x4_UNORM_BLOCK = 157, - VK_FORMAT_ASTC_4x4_SRGB_BLOCK = 158, - VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159, - VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160, - VK_FORMAT_ASTC_5x5_UNORM_BLOCK = 161, - VK_FORMAT_ASTC_5x5_SRGB_BLOCK = 162, - VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163, - VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164, - VK_FORMAT_ASTC_6x6_UNORM_BLOCK = 165, - VK_FORMAT_ASTC_6x6_SRGB_BLOCK = 166, - VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168, - VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169, - VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170, - VK_FORMAT_ASTC_8x8_UNORM_BLOCK = 171, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK = 172, - VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173, - VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174, - VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175, - VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, - VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, - VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, - VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, - VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, - VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, - VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, - VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED, - VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK, - VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1), - VK_FORMAT_MAX_ENUM = 0x7FFFFFFF -} VkFormat; - -typedef enum VkImageType { - VK_IMAGE_TYPE_1D = 0, - VK_IMAGE_TYPE_2D = 1, - VK_IMAGE_TYPE_3D = 2, - VK_IMAGE_TYPE_BEGIN_RANGE = VK_IMAGE_TYPE_1D, - VK_IMAGE_TYPE_END_RANGE = VK_IMAGE_TYPE_3D, - VK_IMAGE_TYPE_RANGE_SIZE = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), - VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkImageType; - -typedef enum VkImageTiling { - VK_IMAGE_TILING_OPTIMAL = 0, - VK_IMAGE_TILING_LINEAR = 1, - VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_LINEAR, - VK_IMAGE_TILING_RANGE_SIZE = (VK_IMAGE_TILING_LINEAR - VK_IMAGE_TILING_OPTIMAL + 1), - VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF -} VkImageTiling; - -typedef enum VkPhysicalDeviceType { - VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, - VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, - VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, - VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, - VK_PHYSICAL_DEVICE_TYPE_CPU = 4, - VK_PHYSICAL_DEVICE_TYPE_BEGIN_RANGE = VK_PHYSICAL_DEVICE_TYPE_OTHER, - VK_PHYSICAL_DEVICE_TYPE_END_RANGE = VK_PHYSICAL_DEVICE_TYPE_CPU, - VK_PHYSICAL_DEVICE_TYPE_RANGE_SIZE = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), - VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkPhysicalDeviceType; - -typedef enum VkQueryType { - VK_QUERY_TYPE_OCCLUSION = 0, - VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, - VK_QUERY_TYPE_TIMESTAMP = 2, - VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, - VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP, - VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1), - VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkQueryType; - -typedef enum VkSharingMode { - VK_SHARING_MODE_EXCLUSIVE = 0, - VK_SHARING_MODE_CONCURRENT = 1, - VK_SHARING_MODE_BEGIN_RANGE = VK_SHARING_MODE_EXCLUSIVE, - VK_SHARING_MODE_END_RANGE = VK_SHARING_MODE_CONCURRENT, - VK_SHARING_MODE_RANGE_SIZE = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), - VK_SHARING_MODE_MAX_ENUM = 0x7FFFFFFF -} VkSharingMode; - -typedef enum VkImageLayout { - VK_IMAGE_LAYOUT_UNDEFINED = 0, - VK_IMAGE_LAYOUT_GENERAL = 1, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, - VK_IMAGE_LAYOUT_PREINITIALIZED = 8, - VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, - VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), - VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF -} VkImageLayout; - -typedef enum VkImageViewType { - VK_IMAGE_VIEW_TYPE_1D = 0, - VK_IMAGE_VIEW_TYPE_2D = 1, - VK_IMAGE_VIEW_TYPE_3D = 2, - VK_IMAGE_VIEW_TYPE_CUBE = 3, - VK_IMAGE_VIEW_TYPE_1D_ARRAY = 4, - VK_IMAGE_VIEW_TYPE_2D_ARRAY = 5, - VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, - VK_IMAGE_VIEW_TYPE_BEGIN_RANGE = VK_IMAGE_VIEW_TYPE_1D, - VK_IMAGE_VIEW_TYPE_END_RANGE = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, - VK_IMAGE_VIEW_TYPE_RANGE_SIZE = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), - VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkImageViewType; - -typedef enum VkComponentSwizzle { - VK_COMPONENT_SWIZZLE_IDENTITY = 0, - VK_COMPONENT_SWIZZLE_ZERO = 1, - VK_COMPONENT_SWIZZLE_ONE = 2, - VK_COMPONENT_SWIZZLE_R = 3, - VK_COMPONENT_SWIZZLE_G = 4, - VK_COMPONENT_SWIZZLE_B = 5, - VK_COMPONENT_SWIZZLE_A = 6, - VK_COMPONENT_SWIZZLE_BEGIN_RANGE = VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_END_RANGE = VK_COMPONENT_SWIZZLE_A, - VK_COMPONENT_SWIZZLE_RANGE_SIZE = (VK_COMPONENT_SWIZZLE_A - VK_COMPONENT_SWIZZLE_IDENTITY + 1), - VK_COMPONENT_SWIZZLE_MAX_ENUM = 0x7FFFFFFF -} VkComponentSwizzle; - -typedef enum VkVertexInputRate { - VK_VERTEX_INPUT_RATE_VERTEX = 0, - VK_VERTEX_INPUT_RATE_INSTANCE = 1, - VK_VERTEX_INPUT_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_RATE_VERTEX, - VK_VERTEX_INPUT_RATE_END_RANGE = VK_VERTEX_INPUT_RATE_INSTANCE, - VK_VERTEX_INPUT_RATE_RANGE_SIZE = (VK_VERTEX_INPUT_RATE_INSTANCE - VK_VERTEX_INPUT_RATE_VERTEX + 1), - VK_VERTEX_INPUT_RATE_MAX_ENUM = 0x7FFFFFFF -} VkVertexInputRate; - -typedef enum VkPrimitiveTopology { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY = 6, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY = 7, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY = 8, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY = 9, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST = 10, - VK_PRIMITIVE_TOPOLOGY_BEGIN_RANGE = VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, - VK_PRIMITIVE_TOPOLOGY_RANGE_SIZE = (VK_PRIMITIVE_TOPOLOGY_PATCH_LIST - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), - VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF -} VkPrimitiveTopology; - -typedef enum VkPolygonMode { - VK_POLYGON_MODE_FILL = 0, - VK_POLYGON_MODE_LINE = 1, - VK_POLYGON_MODE_POINT = 2, - VK_POLYGON_MODE_BEGIN_RANGE = VK_POLYGON_MODE_FILL, - VK_POLYGON_MODE_END_RANGE = VK_POLYGON_MODE_POINT, - VK_POLYGON_MODE_RANGE_SIZE = (VK_POLYGON_MODE_POINT - VK_POLYGON_MODE_FILL + 1), - VK_POLYGON_MODE_MAX_ENUM = 0x7FFFFFFF -} VkPolygonMode; - -typedef enum VkFrontFace { - VK_FRONT_FACE_COUNTER_CLOCKWISE = 0, - VK_FRONT_FACE_CLOCKWISE = 1, - VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_COUNTER_CLOCKWISE, - VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CLOCKWISE, - VK_FRONT_FACE_RANGE_SIZE = (VK_FRONT_FACE_CLOCKWISE - VK_FRONT_FACE_COUNTER_CLOCKWISE + 1), - VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF -} VkFrontFace; - -typedef enum VkCompareOp { - VK_COMPARE_OP_NEVER = 0, - VK_COMPARE_OP_LESS = 1, - VK_COMPARE_OP_EQUAL = 2, - VK_COMPARE_OP_LESS_OR_EQUAL = 3, - VK_COMPARE_OP_GREATER = 4, - VK_COMPARE_OP_NOT_EQUAL = 5, - VK_COMPARE_OP_GREATER_OR_EQUAL = 6, - VK_COMPARE_OP_ALWAYS = 7, - VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, - VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, - VK_COMPARE_OP_RANGE_SIZE = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), - VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF -} VkCompareOp; - -typedef enum VkStencilOp { - VK_STENCIL_OP_KEEP = 0, - VK_STENCIL_OP_ZERO = 1, - VK_STENCIL_OP_REPLACE = 2, - VK_STENCIL_OP_INCREMENT_AND_CLAMP = 3, - VK_STENCIL_OP_DECREMENT_AND_CLAMP = 4, - VK_STENCIL_OP_INVERT = 5, - VK_STENCIL_OP_INCREMENT_AND_WRAP = 6, - VK_STENCIL_OP_DECREMENT_AND_WRAP = 7, - VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, - VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DECREMENT_AND_WRAP, - VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DECREMENT_AND_WRAP - VK_STENCIL_OP_KEEP + 1), - VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF -} VkStencilOp; - -typedef enum VkLogicOp { - VK_LOGIC_OP_CLEAR = 0, - VK_LOGIC_OP_AND = 1, - VK_LOGIC_OP_AND_REVERSE = 2, - VK_LOGIC_OP_COPY = 3, - VK_LOGIC_OP_AND_INVERTED = 4, - VK_LOGIC_OP_NO_OP = 5, - VK_LOGIC_OP_XOR = 6, - VK_LOGIC_OP_OR = 7, - VK_LOGIC_OP_NOR = 8, - VK_LOGIC_OP_EQUIVALENT = 9, - VK_LOGIC_OP_INVERT = 10, - VK_LOGIC_OP_OR_REVERSE = 11, - VK_LOGIC_OP_COPY_INVERTED = 12, - VK_LOGIC_OP_OR_INVERTED = 13, - VK_LOGIC_OP_NAND = 14, - VK_LOGIC_OP_SET = 15, - VK_LOGIC_OP_BEGIN_RANGE = VK_LOGIC_OP_CLEAR, - VK_LOGIC_OP_END_RANGE = VK_LOGIC_OP_SET, - VK_LOGIC_OP_RANGE_SIZE = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), - VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF -} VkLogicOp; - -typedef enum VkBlendFactor { - VK_BLEND_FACTOR_ZERO = 0, - VK_BLEND_FACTOR_ONE = 1, - VK_BLEND_FACTOR_SRC_COLOR = 2, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR = 3, - VK_BLEND_FACTOR_DST_COLOR = 4, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR = 5, - VK_BLEND_FACTOR_SRC_ALPHA = 6, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA = 7, - VK_BLEND_FACTOR_DST_ALPHA = 8, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA = 9, - VK_BLEND_FACTOR_CONSTANT_COLOR = 10, - VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR = 11, - VK_BLEND_FACTOR_CONSTANT_ALPHA = 12, - VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA = 13, - VK_BLEND_FACTOR_SRC_ALPHA_SATURATE = 14, - VK_BLEND_FACTOR_SRC1_COLOR = 15, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR = 16, - VK_BLEND_FACTOR_SRC1_ALPHA = 17, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA = 18, - VK_BLEND_FACTOR_BEGIN_RANGE = VK_BLEND_FACTOR_ZERO, - VK_BLEND_FACTOR_END_RANGE = VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, - VK_BLEND_FACTOR_RANGE_SIZE = (VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - VK_BLEND_FACTOR_ZERO + 1), - VK_BLEND_FACTOR_MAX_ENUM = 0x7FFFFFFF -} VkBlendFactor; - -typedef enum VkBlendOp { - VK_BLEND_OP_ADD = 0, - VK_BLEND_OP_SUBTRACT = 1, - VK_BLEND_OP_REVERSE_SUBTRACT = 2, - VK_BLEND_OP_MIN = 3, - VK_BLEND_OP_MAX = 4, - VK_BLEND_OP_BEGIN_RANGE = VK_BLEND_OP_ADD, - VK_BLEND_OP_END_RANGE = VK_BLEND_OP_MAX, - VK_BLEND_OP_RANGE_SIZE = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), - VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF -} VkBlendOp; - -typedef enum VkDynamicState { - VK_DYNAMIC_STATE_VIEWPORT = 0, - VK_DYNAMIC_STATE_SCISSOR = 1, - VK_DYNAMIC_STATE_LINE_WIDTH = 2, - VK_DYNAMIC_STATE_DEPTH_BIAS = 3, - VK_DYNAMIC_STATE_BLEND_CONSTANTS = 4, - VK_DYNAMIC_STATE_DEPTH_BOUNDS = 5, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK = 6, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK = 7, - VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8, - VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, - VK_DYNAMIC_STATE_RANGE_SIZE = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), - VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF -} VkDynamicState; - -typedef enum VkFilter { - VK_FILTER_NEAREST = 0, - VK_FILTER_LINEAR = 1, - VK_FILTER_BEGIN_RANGE = VK_FILTER_NEAREST, - VK_FILTER_END_RANGE = VK_FILTER_LINEAR, - VK_FILTER_RANGE_SIZE = (VK_FILTER_LINEAR - VK_FILTER_NEAREST + 1), - VK_FILTER_MAX_ENUM = 0x7FFFFFFF -} VkFilter; - -typedef enum VkSamplerMipmapMode { - VK_SAMPLER_MIPMAP_MODE_BASE = 0, - VK_SAMPLER_MIPMAP_MODE_NEAREST = 1, - VK_SAMPLER_MIPMAP_MODE_LINEAR = 2, - VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_BASE, - VK_SAMPLER_MIPMAP_MODE_END_RANGE = VK_SAMPLER_MIPMAP_MODE_LINEAR, - VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_BASE + 1), - VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF -} VkSamplerMipmapMode; - -typedef enum VkSamplerAddressMode { - VK_SAMPLER_ADDRESS_MODE_REPEAT = 0, - VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT = 1, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, - VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, - VK_SAMPLER_ADDRESS_MODE_BEGIN_RANGE = VK_SAMPLER_ADDRESS_MODE_REPEAT, - VK_SAMPLER_ADDRESS_MODE_END_RANGE = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - VK_SAMPLER_ADDRESS_MODE_RANGE_SIZE = (VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE - VK_SAMPLER_ADDRESS_MODE_REPEAT + 1), - VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF -} VkSamplerAddressMode; - -typedef enum VkBorderColor { - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, - VK_BORDER_COLOR_INT_TRANSPARENT_BLACK = 1, - VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK = 2, - VK_BORDER_COLOR_INT_OPAQUE_BLACK = 3, - VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE = 4, - VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, - VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, - VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, - VK_BORDER_COLOR_RANGE_SIZE = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), - VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF -} VkBorderColor; - -typedef enum VkDescriptorType { - VK_DESCRIPTOR_TYPE_SAMPLER = 0, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1, - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE = 2, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE = 3, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER = 4, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER = 5, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER = 6, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, - VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, - VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, - VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, - VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), - VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorType; - -typedef enum VkAttachmentLoadOp { - VK_ATTACHMENT_LOAD_OP_LOAD = 0, - VK_ATTACHMENT_LOAD_OP_CLEAR = 1, - VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, - VK_ATTACHMENT_LOAD_OP_BEGIN_RANGE = VK_ATTACHMENT_LOAD_OP_LOAD, - VK_ATTACHMENT_LOAD_OP_END_RANGE = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_LOAD_OP_RANGE_SIZE = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), - VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF -} VkAttachmentLoadOp; - -typedef enum VkAttachmentStoreOp { - VK_ATTACHMENT_STORE_OP_STORE = 0, - VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, - VK_ATTACHMENT_STORE_OP_BEGIN_RANGE = VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_STORE_OP_END_RANGE = VK_ATTACHMENT_STORE_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_RANGE_SIZE = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), - VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF -} VkAttachmentStoreOp; - -typedef enum VkPipelineBindPoint { - VK_PIPELINE_BIND_POINT_GRAPHICS = 0, - VK_PIPELINE_BIND_POINT_COMPUTE = 1, - VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, - VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, - VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1), - VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF -} VkPipelineBindPoint; - -typedef enum VkCommandBufferLevel { - VK_COMMAND_BUFFER_LEVEL_PRIMARY = 0, - VK_COMMAND_BUFFER_LEVEL_SECONDARY = 1, - VK_COMMAND_BUFFER_LEVEL_BEGIN_RANGE = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - VK_COMMAND_BUFFER_LEVEL_END_RANGE = VK_COMMAND_BUFFER_LEVEL_SECONDARY, - VK_COMMAND_BUFFER_LEVEL_RANGE_SIZE = (VK_COMMAND_BUFFER_LEVEL_SECONDARY - VK_COMMAND_BUFFER_LEVEL_PRIMARY + 1), - VK_COMMAND_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF -} VkCommandBufferLevel; - -typedef enum VkIndexType { - VK_INDEX_TYPE_UINT16 = 0, - VK_INDEX_TYPE_UINT32 = 1, - VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, - VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, - VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), - VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkIndexType; - -typedef enum VkSubpassContents { - VK_SUBPASS_CONTENTS_INLINE = 0, - VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, - VK_SUBPASS_CONTENTS_BEGIN_RANGE = VK_SUBPASS_CONTENTS_INLINE, - VK_SUBPASS_CONTENTS_END_RANGE = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, - VK_SUBPASS_CONTENTS_RANGE_SIZE = (VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS - VK_SUBPASS_CONTENTS_INLINE + 1), - VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF -} VkSubpassContents; - -typedef VkFlags VkInstanceCreateFlags; - -typedef enum VkFormatFeatureFlagBits { - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, - VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002, - VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT = 0x00000004, - VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000008, - VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT = 0x00000010, - VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT = 0x00000020, - VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT = 0x00000040, - VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080, - VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100, - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200, - VK_FORMAT_FEATURE_BLIT_SRC_BIT = 0x00000400, - VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800, -} VkFormatFeatureFlagBits; -typedef VkFlags VkFormatFeatureFlags; - -typedef enum VkImageUsageFlagBits { - VK_IMAGE_USAGE_TRANSFER_SRC_BIT = 0x00000001, - VK_IMAGE_USAGE_TRANSFER_DST_BIT = 0x00000002, - VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, - VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, - VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, -} VkImageUsageFlagBits; -typedef VkFlags VkImageUsageFlags; - -typedef enum VkImageCreateFlagBits { - VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, - VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, - VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, - VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008, - VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010, -} VkImageCreateFlagBits; -typedef VkFlags VkImageCreateFlags; - -typedef enum VkSampleCountFlagBits { - VK_SAMPLE_COUNT_1_BIT = 0x00000001, - VK_SAMPLE_COUNT_2_BIT = 0x00000002, - VK_SAMPLE_COUNT_4_BIT = 0x00000004, - VK_SAMPLE_COUNT_8_BIT = 0x00000008, - VK_SAMPLE_COUNT_16_BIT = 0x00000010, - VK_SAMPLE_COUNT_32_BIT = 0x00000020, - VK_SAMPLE_COUNT_64_BIT = 0x00000040, -} VkSampleCountFlagBits; -typedef VkFlags VkSampleCountFlags; - -typedef enum VkQueueFlagBits { - VK_QUEUE_GRAPHICS_BIT = 0x00000001, - VK_QUEUE_COMPUTE_BIT = 0x00000002, - VK_QUEUE_TRANSFER_BIT = 0x00000004, - VK_QUEUE_SPARSE_BINDING_BIT = 0x00000008, -} VkQueueFlagBits; -typedef VkFlags VkQueueFlags; - -typedef enum VkMemoryPropertyFlagBits { - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT = 0x00000001, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000002, - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004, - VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, - VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, -} VkMemoryPropertyFlagBits; -typedef VkFlags VkMemoryPropertyFlags; - -typedef enum VkMemoryHeapFlagBits { - VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, -} VkMemoryHeapFlagBits; -typedef VkFlags VkMemoryHeapFlags; -typedef VkFlags VkDeviceCreateFlags; -typedef VkFlags VkDeviceQueueCreateFlags; -typedef VkFlags VkMemoryMapFlags; - -typedef enum VkImageAspectFlagBits { - VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, - VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, - VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, - VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, -} VkImageAspectFlagBits; -typedef VkFlags VkImageAspectFlags; - -typedef enum VkSparseImageFormatFlagBits { - VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT = 0x00000001, - VK_SPARSE_IMAGE_FORMAT_ALIGNED_MIP_SIZE_BIT = 0x00000002, - VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT = 0x00000004, -} VkSparseImageFormatFlagBits; -typedef VkFlags VkSparseImageFormatFlags; - -typedef enum VkSparseMemoryBindFlagBits { - VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001, -} VkSparseMemoryBindFlagBits; -typedef VkFlags VkSparseMemoryBindFlags; - -typedef enum VkFenceCreateFlagBits { - VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, -} VkFenceCreateFlagBits; -typedef VkFlags VkFenceCreateFlags; -typedef VkFlags VkSemaphoreCreateFlags; -typedef VkFlags VkEventCreateFlags; -typedef VkFlags VkQueryPoolCreateFlags; - -typedef enum VkQueryPipelineStatisticFlagBits { - VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT = 0x00000001, - VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT = 0x00000002, - VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT = 0x00000004, - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT = 0x00000008, - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT = 0x00000010, - VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT = 0x00000020, - VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT = 0x00000040, - VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT = 0x00000080, - VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT = 0x00000100, - VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT = 0x00000200, - VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT = 0x00000400, -} VkQueryPipelineStatisticFlagBits; -typedef VkFlags VkQueryPipelineStatisticFlags; - -typedef enum VkQueryResultFlagBits { - VK_QUERY_RESULT_DEFAULT = 0, - VK_QUERY_RESULT_64_BIT = 0x00000001, - VK_QUERY_RESULT_WAIT_BIT = 0x00000002, - VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004, - VK_QUERY_RESULT_PARTIAL_BIT = 0x00000008, -} VkQueryResultFlagBits; -typedef VkFlags VkQueryResultFlags; - -typedef enum VkBufferCreateFlagBits { - VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, - VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, - VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, -} VkBufferCreateFlagBits; -typedef VkFlags VkBufferCreateFlags; - -typedef enum VkBufferUsageFlagBits { - VK_BUFFER_USAGE_TRANSFER_SRC_BIT = 0x00000001, - VK_BUFFER_USAGE_TRANSFER_DST_BIT = 0x00000002, - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000004, - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT = 0x00000008, - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT = 0x00000010, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT = 0x00000020, - VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040, - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080, - VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100, -} VkBufferUsageFlagBits; -typedef VkFlags VkBufferUsageFlags; -typedef VkFlags VkBufferViewCreateFlags; -typedef VkFlags VkImageViewCreateFlags; -typedef VkFlags VkShaderModuleCreateFlags; -typedef VkFlags VkPipelineCacheCreateFlags; - -typedef enum VkPipelineCreateFlagBits { - VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, - VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, - VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, -} VkPipelineCreateFlagBits; -typedef VkFlags VkPipelineCreateFlags; -typedef VkFlags VkPipelineShaderStageCreateFlags; - -typedef enum VkShaderStageFlagBits { - VK_SHADER_STAGE_VERTEX_BIT = 0x00000001, - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT = 0x00000002, - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT = 0x00000004, - VK_SHADER_STAGE_GEOMETRY_BIT = 0x00000008, - VK_SHADER_STAGE_FRAGMENT_BIT = 0x00000010, - VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, - VK_SHADER_STAGE_ALL_GRAPHICS = 0x1F, - VK_SHADER_STAGE_ALL = 0x7FFFFFFF, -} VkShaderStageFlagBits; -typedef VkFlags VkPipelineVertexInputStateCreateFlags; -typedef VkFlags VkPipelineInputAssemblyStateCreateFlags; -typedef VkFlags VkPipelineTesselationStateCreateFlags; -typedef VkFlags VkPipelineViewportStateCreateFlags; -typedef VkFlags VkPipelineRasterizationStateCreateFlags; - -typedef enum VkCullModeFlagBits { - VK_CULL_MODE_NONE = 0, - VK_CULL_MODE_FRONT_BIT = 0x00000001, - VK_CULL_MODE_BACK_BIT = 0x00000002, - VK_CULL_MODE_FRONT_AND_BACK = 0x3, -} VkCullModeFlagBits; -typedef VkFlags VkCullModeFlags; -typedef VkFlags VkPipelineMultisampleStateCreateFlags; -typedef VkFlags VkPipelineDepthStencilStateCreateFlags; -typedef VkFlags VkPipelineColorBlendStateCreateFlags; - -typedef enum VkColorComponentFlagBits { - VK_COLOR_COMPONENT_R_BIT = 0x00000001, - VK_COLOR_COMPONENT_G_BIT = 0x00000002, - VK_COLOR_COMPONENT_B_BIT = 0x00000004, - VK_COLOR_COMPONENT_A_BIT = 0x00000008, -} VkColorComponentFlagBits; -typedef VkFlags VkColorComponentFlags; -typedef VkFlags VkPipelineDynamicStateCreateFlags; -typedef VkFlags VkPipelineLayoutCreateFlags; -typedef VkFlags VkShaderStageFlags; -typedef VkFlags VkSamplerCreateFlags; -typedef VkFlags VkDescriptorSetLayoutCreateFlags; - -typedef enum VkDescriptorPoolCreateFlagBits { - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001, -} VkDescriptorPoolCreateFlagBits; -typedef VkFlags VkDescriptorPoolCreateFlags; -typedef VkFlags VkDescriptorPoolResetFlags; -typedef VkFlags VkFramebufferCreateFlags; -typedef VkFlags VkRenderPassCreateFlags; - -typedef enum VkAttachmentDescriptionFlagBits { - VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, -} VkAttachmentDescriptionFlagBits; -typedef VkFlags VkAttachmentDescriptionFlags; -typedef VkFlags VkSubpassDescriptionFlags; - -typedef enum VkPipelineStageFlagBits { - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010, - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020, - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, - VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, - VK_PIPELINE_STAGE_HOST_BIT = 0x00002000, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00004000, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00008000, -} VkPipelineStageFlagBits; -typedef VkFlags VkPipelineStageFlags; - -typedef enum VkAccessFlagBits { - VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, - VK_ACCESS_INDEX_READ_BIT = 0x00000002, - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004, - VK_ACCESS_UNIFORM_READ_BIT = 0x00000008, - VK_ACCESS_INPUT_ATTACHMENT_READ_BIT = 0x00000010, - VK_ACCESS_SHADER_READ_BIT = 0x00000020, - VK_ACCESS_SHADER_WRITE_BIT = 0x00000040, - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT = 0x00000080, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400, - VK_ACCESS_TRANSFER_READ_BIT = 0x00000800, - VK_ACCESS_TRANSFER_WRITE_BIT = 0x00001000, - VK_ACCESS_HOST_READ_BIT = 0x00002000, - VK_ACCESS_HOST_WRITE_BIT = 0x00004000, - VK_ACCESS_MEMORY_READ_BIT = 0x00008000, - VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000, -} VkAccessFlagBits; -typedef VkFlags VkAccessFlags; - -typedef enum VkDependencyFlagBits { - VK_DEPENDENCY_BY_REGION_BIT = 0x00000001, -} VkDependencyFlagBits; -typedef VkFlags VkDependencyFlags; - -typedef enum VkCommandPoolCreateFlagBits { - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001, - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, -} VkCommandPoolCreateFlagBits; -typedef VkFlags VkCommandPoolCreateFlags; - -typedef enum VkCommandPoolResetFlagBits { - VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, -} VkCommandPoolResetFlagBits; -typedef VkFlags VkCommandPoolResetFlags; - -typedef enum VkCommandBufferUsageFlagBits { - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT = 0x00000001, - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT = 0x00000002, - VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT = 0x00000004, -} VkCommandBufferUsageFlagBits; -typedef VkFlags VkCommandBufferUsageFlags; - -typedef enum VkQueryControlFlagBits { - VK_QUERY_CONTROL_PRECISE_BIT = 0x00000001, -} VkQueryControlFlagBits; -typedef VkFlags VkQueryControlFlags; - -typedef enum VkCommandBufferResetFlagBits { - VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, -} VkCommandBufferResetFlagBits; -typedef VkFlags VkCommandBufferResetFlags; - -typedef enum VkStencilFaceFlagBits { - VK_STENCIL_FACE_NONE = 0, - VK_STENCIL_FACE_FRONT_BIT = 0x00000001, - VK_STENCIL_FACE_BACK_BIT = 0x00000002, - VK_STENCIL_FRONT_AND_BACK = 0x3, -} VkStencilFaceFlagBits; -typedef VkFlags VkStencilFaceFlags; - -typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)( - void* pUserData, - size_t size, - size_t alignment, - VkSystemAllocationScope allocationScope); - -typedef void* (VKAPI_PTR *PFN_vkReallocationFunction)( - void* pUserData, - void* pOriginal, - size_t size, - size_t alignment, - VkSystemAllocationScope allocationScope); - -typedef void (VKAPI_PTR *PFN_vkFreeFunction)( - void* pUserData, - void* pMemory); - -typedef void (VKAPI_PTR *PFN_vkInternalAllocationNotification)( - void* pUserData, - size_t size, - VkInternalAllocationType allocationType, - VkSystemAllocationScope allocationScope); - -typedef void (VKAPI_PTR *PFN_vkInternalFreeNotification)( - void* pUserData, - size_t size, - VkInternalAllocationType allocationType, - VkSystemAllocationScope allocationScope); - -typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); - -typedef struct VkApplicationInfo { - VkStructureType sType; - const void* pNext; - const char* pApplicationName; - uint32_t applicationVersion; - const char* pEngineName; - uint32_t engineVersion; - uint32_t apiVersion; -} VkApplicationInfo; - -typedef struct VkInstanceCreateInfo { - VkStructureType sType; - const void* pNext; - VkInstanceCreateFlags flags; - const VkApplicationInfo* pApplicationInfo; - uint32_t enabledLayerNameCount; - const char* const* ppEnabledLayerNames; - uint32_t enabledExtensionNameCount; - const char* const* ppEnabledExtensionNames; -} VkInstanceCreateInfo; - -typedef struct VkAllocationCallbacks { - void* pUserData; - PFN_vkAllocationFunction pfnAllocation; - PFN_vkReallocationFunction pfnReallocation; - PFN_vkFreeFunction pfnFree; - PFN_vkInternalAllocationNotification pfnInternalAllocation; - PFN_vkInternalFreeNotification pfnInternalFree; -} VkAllocationCallbacks; - -typedef struct VkPhysicalDeviceFeatures { - VkBool32 robustBufferAccess; - VkBool32 fullDrawIndexUint32; - VkBool32 imageCubeArray; - VkBool32 independentBlend; - VkBool32 geometryShader; - VkBool32 tessellationShader; - VkBool32 sampleRateShading; - VkBool32 dualSrcBlend; - VkBool32 logicOp; - VkBool32 multiDrawIndirect; - VkBool32 depthClamp; - VkBool32 depthBiasClamp; - VkBool32 fillModeNonSolid; - VkBool32 depthBounds; - VkBool32 wideLines; - VkBool32 largePoints; - VkBool32 alphaToOne; - VkBool32 multiViewport; - VkBool32 samplerAnisotropy; - VkBool32 textureCompressionETC2; - VkBool32 textureCompressionASTC_LDR; - VkBool32 textureCompressionBC; - VkBool32 occlusionQueryPrecise; - VkBool32 pipelineStatisticsQuery; - VkBool32 vertexPipelineStoresAndAtomics; - VkBool32 fragmentStoresAndAtomics; - VkBool32 shaderTessellationAndGeometryPointSize; - VkBool32 shaderImageGatherExtended; - VkBool32 shaderStorageImageExtendedFormats; - VkBool32 shaderStorageImageMultisample; - VkBool32 shaderStorageImageReadWithoutFormat; - VkBool32 shaderStorageImageWriteWithoutFormat; - VkBool32 shaderUniformBufferArrayDynamicIndexing; - VkBool32 shaderSampledImageArrayDynamicIndexing; - VkBool32 shaderStorageBufferArrayDynamicIndexing; - VkBool32 shaderStorageImageArrayDynamicIndexing; - VkBool32 shaderClipDistance; - VkBool32 shaderCullDistance; - VkBool32 shaderFloat64; - VkBool32 shaderInt64; - VkBool32 shaderInt16; - VkBool32 shaderResourceResidency; - VkBool32 shaderResourceMinLod; - VkBool32 sparseBinding; - VkBool32 sparseResidencyBuffer; - VkBool32 sparseResidencyImage2D; - VkBool32 sparseResidencyImage3D; - VkBool32 sparseResidency2Samples; - VkBool32 sparseResidency4Samples; - VkBool32 sparseResidency8Samples; - VkBool32 sparseResidency16Samples; - VkBool32 sparseResidencyAliased; - VkBool32 variableMultisampleRate; -} VkPhysicalDeviceFeatures; - -typedef struct VkFormatProperties { - VkFormatFeatureFlags linearTilingFeatures; - VkFormatFeatureFlags optimalTilingFeatures; - VkFormatFeatureFlags bufferFeatures; -} VkFormatProperties; - -typedef struct VkExtent3D { - int32_t width; - int32_t height; - int32_t depth; -} VkExtent3D; - -typedef struct VkImageFormatProperties { - VkExtent3D maxExtent; - uint32_t maxMipLevels; - uint32_t maxArrayLayers; - VkSampleCountFlags sampleCounts; - VkDeviceSize maxResourceSize; -} VkImageFormatProperties; - -typedef struct VkPhysicalDeviceLimits { - uint32_t maxImageDimension1D; - uint32_t maxImageDimension2D; - uint32_t maxImageDimension3D; - uint32_t maxImageDimensionCube; - uint32_t maxImageArrayLayers; - uint32_t maxTexelBufferElements; - uint32_t maxUniformBufferRange; - uint32_t maxStorageBufferRange; - uint32_t maxPushConstantsSize; - uint32_t maxMemoryAllocationCount; - uint32_t maxSamplerAllocationCount; - VkDeviceSize bufferImageGranularity; - VkDeviceSize sparseAddressSpaceSize; - uint32_t maxBoundDescriptorSets; - uint32_t maxPerStageDescriptorSamplers; - uint32_t maxPerStageDescriptorUniformBuffers; - uint32_t maxPerStageDescriptorStorageBuffers; - uint32_t maxPerStageDescriptorSampledImages; - uint32_t maxPerStageDescriptorStorageImages; - uint32_t maxPerStageDescriptorInputAttachments; - uint32_t maxPerStageResources; - uint32_t maxDescriptorSetSamplers; - uint32_t maxDescriptorSetUniformBuffers; - uint32_t maxDescriptorSetUniformBuffersDynamic; - uint32_t maxDescriptorSetStorageBuffers; - uint32_t maxDescriptorSetStorageBuffersDynamic; - uint32_t maxDescriptorSetSampledImages; - uint32_t maxDescriptorSetStorageImages; - uint32_t maxDescriptorSetInputAttachments; - uint32_t maxVertexInputAttributes; - uint32_t maxVertexInputBindings; - uint32_t maxVertexInputAttributeOffset; - uint32_t maxVertexInputBindingStride; - uint32_t maxVertexOutputComponents; - uint32_t maxTessellationGenerationLevel; - uint32_t maxTessellationPatchSize; - uint32_t maxTessellationControlPerVertexInputComponents; - uint32_t maxTessellationControlPerVertexOutputComponents; - uint32_t maxTessellationControlPerPatchOutputComponents; - uint32_t maxTessellationControlTotalOutputComponents; - uint32_t maxTessellationEvaluationInputComponents; - uint32_t maxTessellationEvaluationOutputComponents; - uint32_t maxGeometryShaderInvocations; - uint32_t maxGeometryInputComponents; - uint32_t maxGeometryOutputComponents; - uint32_t maxGeometryOutputVertices; - uint32_t maxGeometryTotalOutputComponents; - uint32_t maxFragmentInputComponents; - uint32_t maxFragmentOutputAttachments; - uint32_t maxFragmentDualSrcAttachments; - uint32_t maxFragmentCombinedOutputResources; - uint32_t maxComputeSharedMemorySize; - uint32_t maxComputeWorkGroupCount[3]; - uint32_t maxComputeWorkGroupInvocations; - uint32_t maxComputeWorkGroupSize[3]; - uint32_t subPixelPrecisionBits; - uint32_t subTexelPrecisionBits; - uint32_t mipmapPrecisionBits; - uint32_t maxDrawIndexedIndexValue; - uint32_t maxDrawIndirectCount; - float maxSamplerLodBias; - float maxSamplerAnisotropy; - uint32_t maxViewports; - uint32_t maxViewportDimensions[2]; - float viewportBoundsRange[2]; - uint32_t viewportSubPixelBits; - size_t minMemoryMapAlignment; - VkDeviceSize minTexelBufferOffsetAlignment; - VkDeviceSize minUniformBufferOffsetAlignment; - VkDeviceSize minStorageBufferOffsetAlignment; - int32_t minTexelOffset; - uint32_t maxTexelOffset; - int32_t minTexelGatherOffset; - uint32_t maxTexelGatherOffset; - float minInterpolationOffset; - float maxInterpolationOffset; - uint32_t subPixelInterpolationOffsetBits; - uint32_t maxFramebufferWidth; - uint32_t maxFramebufferHeight; - uint32_t maxFramebufferLayers; - VkSampleCountFlags framebufferColorSampleCounts; - VkSampleCountFlags framebufferDepthSampleCounts; - VkSampleCountFlags framebufferStencilSampleCounts; - VkSampleCountFlags framebufferNoAttachmentsSampleCounts; - uint32_t maxColorAttachments; - VkSampleCountFlags sampledImageColorSampleCounts; - VkSampleCountFlags sampledImageIntegerSampleCounts; - VkSampleCountFlags sampledImageDepthSampleCounts; - VkSampleCountFlags sampledImageStencilSampleCounts; - VkSampleCountFlags storageImageSampleCounts; - uint32_t maxSampleMaskWords; - float timestampPeriod; - uint32_t maxClipDistances; - uint32_t maxCullDistances; - uint32_t maxCombinedClipAndCullDistances; - uint32_t discreteQueuePriorities; - float pointSizeRange[2]; - float lineWidthRange[2]; - float pointSizeGranularity; - float lineWidthGranularity; - VkBool32 strictLines; - VkBool32 standardSampleLocations; - VkDeviceSize optimalBufferCopyOffsetAlignment; - VkDeviceSize optimalBufferCopyRowPitchAlignment; - VkDeviceSize nonCoherentAtomSize; -} VkPhysicalDeviceLimits; - -typedef struct VkPhysicalDeviceSparseProperties { - VkBool32 residencyStandard2DBlockShape; - VkBool32 residencyStandard2DMultisampleBlockShape; - VkBool32 residencyStandard3DBlockShape; - VkBool32 residencyAlignedMipSize; - VkBool32 residencyNonResidentStrict; -} VkPhysicalDeviceSparseProperties; - -typedef struct VkPhysicalDeviceProperties { - uint32_t apiVersion; - uint32_t driverVersion; - uint32_t vendorID; - uint32_t deviceID; - VkPhysicalDeviceType deviceType; - char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; - uint8_t pipelineCacheUUID[VK_UUID_SIZE]; - VkPhysicalDeviceLimits limits; - VkPhysicalDeviceSparseProperties sparseProperties; -} VkPhysicalDeviceProperties; - -typedef struct VkQueueFamilyProperties { - VkQueueFlags queueFlags; - uint32_t queueCount; - uint32_t timestampValidBits; - VkExtent3D minImageTransferGranularity; -} VkQueueFamilyProperties; - -typedef struct VkMemoryType { - VkMemoryPropertyFlags propertyFlags; - uint32_t heapIndex; -} VkMemoryType; - -typedef struct VkMemoryHeap { - VkDeviceSize size; - VkMemoryHeapFlags flags; -} VkMemoryHeap; - -typedef struct VkPhysicalDeviceMemoryProperties { - uint32_t memoryTypeCount; - VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; - uint32_t memoryHeapCount; - VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; -} VkPhysicalDeviceMemoryProperties; - -typedef struct VkDeviceQueueCreateInfo { - VkStructureType sType; - const void* pNext; - VkDeviceQueueCreateFlags flags; - uint32_t queueFamilyIndex; - uint32_t queueCount; - const float* pQueuePriorities; -} VkDeviceQueueCreateInfo; - -typedef struct VkDeviceCreateInfo { - VkStructureType sType; - const void* pNext; - VkDeviceCreateFlags flags; - uint32_t queueCreateInfoCount; - const VkDeviceQueueCreateInfo* pQueueCreateInfos; - uint32_t enabledLayerNameCount; - const char* const* ppEnabledLayerNames; - uint32_t enabledExtensionNameCount; - const char* const* ppEnabledExtensionNames; - const VkPhysicalDeviceFeatures* pEnabledFeatures; -} VkDeviceCreateInfo; - -typedef struct VkExtensionProperties { - char extensionName[VK_MAX_EXTENSION_NAME_SIZE]; - uint32_t specVersion; -} VkExtensionProperties; - -typedef struct VkLayerProperties { - char layerName[VK_MAX_EXTENSION_NAME_SIZE]; - uint32_t specVersion; - uint32_t implementationVersion; - char description[VK_MAX_DESCRIPTION_SIZE]; -} VkLayerProperties; - -typedef struct VkSubmitInfo { - VkStructureType sType; - const void* pNext; - uint32_t waitSemaphoreCount; - const VkSemaphore* pWaitSemaphores; - uint32_t commandBufferCount; - const VkCommandBuffer* pCommandBuffers; - uint32_t signalSemaphoreCount; - const VkSemaphore* pSignalSemaphores; -} VkSubmitInfo; - -typedef struct VkMemoryAllocateInfo { - VkStructureType sType; - const void* pNext; - VkDeviceSize allocationSize; - uint32_t memoryTypeIndex; -} VkMemoryAllocateInfo; - -typedef struct VkMappedMemoryRange { - VkStructureType sType; - const void* pNext; - VkDeviceMemory memory; - VkDeviceSize offset; - VkDeviceSize size; -} VkMappedMemoryRange; - -typedef struct VkMemoryRequirements { - VkDeviceSize size; - VkDeviceSize alignment; - uint32_t memoryTypeBits; -} VkMemoryRequirements; - -typedef struct VkSparseImageFormatProperties { - VkImageAspectFlags aspectMask; - VkExtent3D imageGranularity; - VkSparseImageFormatFlags flags; -} VkSparseImageFormatProperties; - -typedef struct VkSparseImageMemoryRequirements { - VkSparseImageFormatProperties formatProperties; - uint32_t imageMipTailStartLod; - VkDeviceSize imageMipTailSize; - VkDeviceSize imageMipTailOffset; - VkDeviceSize imageMipTailStride; -} VkSparseImageMemoryRequirements; - -typedef struct VkSparseMemoryBind { - VkDeviceSize resourceOffset; - VkDeviceSize size; - VkDeviceMemory memory; - VkDeviceSize memoryOffset; - VkSparseMemoryBindFlags flags; -} VkSparseMemoryBind; - -typedef struct VkSparseBufferMemoryBindInfo { - VkBuffer buffer; - uint32_t bindCount; - const VkSparseMemoryBind* pBinds; -} VkSparseBufferMemoryBindInfo; - -typedef struct VkSparseImageOpaqueMemoryBindInfo { - VkImage image; - uint32_t bindCount; - const VkSparseMemoryBind* pBinds; -} VkSparseImageOpaqueMemoryBindInfo; - -typedef struct VkImageSubresource { - VkImageAspectFlags aspectMask; - uint32_t mipLevel; - uint32_t arrayLayer; -} VkImageSubresource; - -typedef struct VkOffset3D { - int32_t x; - int32_t y; - int32_t z; -} VkOffset3D; - -typedef struct VkSparseImageMemoryBind { - VkImageSubresource subresource; - VkOffset3D offset; - VkExtent3D extent; - VkDeviceMemory memory; - VkDeviceSize memoryOffset; - VkSparseMemoryBindFlags flags; -} VkSparseImageMemoryBind; - -typedef struct VkSparseImageMemoryBindInfo { - VkImage image; - uint32_t bindCount; - const VkSparseImageMemoryBind* pBinds; -} VkSparseImageMemoryBindInfo; - -typedef struct VkBindSparseInfo { - VkStructureType sType; - const void* pNext; - uint32_t waitSemaphoreCount; - const VkSemaphore* pWaitSemaphores; - uint32_t bufferBindCount; - const VkSparseBufferMemoryBindInfo* pBufferBinds; - uint32_t imageOpaqueBindCount; - const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds; - uint32_t imageBindCount; - const VkSparseImageMemoryBindInfo* pImageBinds; - uint32_t signalSemaphoreCount; - const VkSemaphore* pSignalSemaphores; -} VkBindSparseInfo; - -typedef struct VkFenceCreateInfo { - VkStructureType sType; - const void* pNext; - VkFenceCreateFlags flags; -} VkFenceCreateInfo; - -typedef struct VkSemaphoreCreateInfo { - VkStructureType sType; - const void* pNext; - VkSemaphoreCreateFlags flags; -} VkSemaphoreCreateInfo; - -typedef struct VkEventCreateInfo { - VkStructureType sType; - const void* pNext; - VkEventCreateFlags flags; -} VkEventCreateInfo; - -typedef struct VkQueryPoolCreateInfo { - VkStructureType sType; - const void* pNext; - VkQueryPoolCreateFlags flags; - VkQueryType queryType; - uint32_t entryCount; - VkQueryPipelineStatisticFlags pipelineStatistics; -} VkQueryPoolCreateInfo; - -typedef struct VkBufferCreateInfo { - VkStructureType sType; - const void* pNext; - VkBufferCreateFlags flags; - VkDeviceSize size; - VkBufferUsageFlags usage; - VkSharingMode sharingMode; - uint32_t queueFamilyIndexCount; - const uint32_t* pQueueFamilyIndices; -} VkBufferCreateInfo; - -typedef struct VkBufferViewCreateInfo { - VkStructureType sType; - const void* pNext; - VkBufferViewCreateFlags flags; - VkBuffer buffer; - VkFormat format; - VkDeviceSize offset; - VkDeviceSize range; -} VkBufferViewCreateInfo; - -typedef struct VkImageCreateInfo { - VkStructureType sType; - const void* pNext; - VkImageCreateFlags flags; - VkImageType imageType; - VkFormat format; - VkExtent3D extent; - uint32_t mipLevels; - uint32_t arrayLayers; - VkSampleCountFlagBits samples; - VkImageTiling tiling; - VkImageUsageFlags usage; - VkSharingMode sharingMode; - uint32_t queueFamilyIndexCount; - const uint32_t* pQueueFamilyIndices; - VkImageLayout initialLayout; -} VkImageCreateInfo; - -typedef struct VkSubresourceLayout { - VkDeviceSize offset; - VkDeviceSize size; - VkDeviceSize rowPitch; - VkDeviceSize depthPitch; -} VkSubresourceLayout; - -typedef struct VkComponentMapping { - VkComponentSwizzle r; - VkComponentSwizzle g; - VkComponentSwizzle b; - VkComponentSwizzle a; -} VkComponentMapping; - -typedef struct VkImageSubresourceRange { - VkImageAspectFlags aspectMask; - uint32_t baseMipLevel; - uint32_t levelCount; - uint32_t baseArrayLayer; - uint32_t layerCount; -} VkImageSubresourceRange; - -typedef struct VkImageViewCreateInfo { - VkStructureType sType; - const void* pNext; - VkImageViewCreateFlags flags; - VkImage image; - VkImageViewType viewType; - VkFormat format; - VkComponentMapping components; - VkImageSubresourceRange subresourceRange; -} VkImageViewCreateInfo; - -typedef struct VkShaderModuleCreateInfo { - VkStructureType sType; - const void* pNext; - VkShaderModuleCreateFlags flags; - size_t codeSize; - const uint32_t* pCode; -} VkShaderModuleCreateInfo; - -typedef struct VkPipelineCacheCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineCacheCreateFlags flags; - size_t initialDataSize; - const void* pInitialData; -} VkPipelineCacheCreateInfo; - -typedef struct VkSpecializationMapEntry { - uint32_t constantID; - uint32_t offset; - size_t size; -} VkSpecializationMapEntry; - -typedef struct VkSpecializationInfo { - uint32_t mapEntryCount; - const VkSpecializationMapEntry* pMapEntries; - size_t dataSize; - const void* pData; -} VkSpecializationInfo; - -typedef struct VkPipelineShaderStageCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineShaderStageCreateFlags flags; - VkShaderStageFlagBits stage; - VkShaderModule module; - const char* pName; - const VkSpecializationInfo* pSpecializationInfo; -} VkPipelineShaderStageCreateInfo; - -typedef struct VkVertexInputBindingDescription { - uint32_t binding; - uint32_t stride; - VkVertexInputRate inputRate; -} VkVertexInputBindingDescription; - -typedef struct VkVertexInputAttributeDescription { - uint32_t location; - uint32_t binding; - VkFormat format; - uint32_t offset; -} VkVertexInputAttributeDescription; - -typedef struct VkPipelineVertexInputStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineVertexInputStateCreateFlags flags; - uint32_t vertexBindingDescriptionCount; - const VkVertexInputBindingDescription* pVertexBindingDescriptions; - uint32_t vertexAttributeDescriptionCount; - const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; -} VkPipelineVertexInputStateCreateInfo; - -typedef struct VkPipelineInputAssemblyStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineInputAssemblyStateCreateFlags flags; - VkPrimitiveTopology topology; - VkBool32 primitiveRestartEnable; -} VkPipelineInputAssemblyStateCreateInfo; - -typedef struct VkPipelineTessellationStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineTesselationStateCreateFlags flags; - uint32_t patchControlPoints; -} VkPipelineTessellationStateCreateInfo; - -typedef struct VkViewport { - float x; - float y; - float width; - float height; - float minDepth; - float maxDepth; -} VkViewport; - -typedef struct VkOffset2D { - int32_t x; - int32_t y; -} VkOffset2D; - -typedef struct VkExtent2D { - int32_t width; - int32_t height; -} VkExtent2D; - -typedef struct VkRect2D { - VkOffset2D offset; - VkExtent2D extent; -} VkRect2D; - -typedef struct VkPipelineViewportStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineViewportStateCreateFlags flags; - uint32_t viewportCount; - const VkViewport* pViewports; - uint32_t scissorCount; - const VkRect2D* pScissors; -} VkPipelineViewportStateCreateInfo; - -typedef struct VkPipelineRasterizationStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineRasterizationStateCreateFlags flags; - VkBool32 depthClampEnable; - VkBool32 rasterizerDiscardEnable; - VkPolygonMode polygonMode; - VkCullModeFlags cullMode; - VkFrontFace frontFace; - VkBool32 depthBiasEnable; - float depthBiasConstantFactor; - float depthBiasClamp; - float depthBiasSlopeFactor; - float lineWidth; -} VkPipelineRasterizationStateCreateInfo; - -typedef struct VkPipelineMultisampleStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineMultisampleStateCreateFlags flags; - VkSampleCountFlagBits rasterizationSamples; - VkBool32 sampleShadingEnable; - float minSampleShading; - const VkSampleMask* pSampleMask; - VkBool32 alphaToCoverageEnable; - VkBool32 alphaToOneEnable; -} VkPipelineMultisampleStateCreateInfo; - -typedef struct VkStencilOpState { - VkStencilOp failOp; - VkStencilOp passOp; - VkStencilOp depthFailOp; - VkCompareOp compareOp; - uint32_t compareMask; - uint32_t writeMask; - uint32_t reference; -} VkStencilOpState; - -typedef struct VkPipelineDepthStencilStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineDepthStencilStateCreateFlags flags; - VkBool32 depthTestEnable; - VkBool32 depthWriteEnable; - VkCompareOp depthCompareOp; - VkBool32 depthBoundsTestEnable; - VkBool32 stencilTestEnable; - VkStencilOpState front; - VkStencilOpState back; - float minDepthBounds; - float maxDepthBounds; -} VkPipelineDepthStencilStateCreateInfo; - -typedef struct VkPipelineColorBlendAttachmentState { - VkBool32 blendEnable; - VkBlendFactor srcColorBlendFactor; - VkBlendFactor dstColorBlendFactor; - VkBlendOp colorBlendOp; - VkBlendFactor srcAlphaBlendFactor; - VkBlendFactor dstAlphaBlendFactor; - VkBlendOp alphaBlendOp; - VkColorComponentFlags colorWriteMask; -} VkPipelineColorBlendAttachmentState; - -typedef struct VkPipelineColorBlendStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineColorBlendStateCreateFlags flags; - VkBool32 logicOpEnable; - VkLogicOp logicOp; - uint32_t attachmentCount; - const VkPipelineColorBlendAttachmentState* pAttachments; - float blendConstants[4]; -} VkPipelineColorBlendStateCreateInfo; - -typedef struct VkPipelineDynamicStateCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineDynamicStateCreateFlags flags; - uint32_t dynamicStateCount; - const VkDynamicState* pDynamicStates; -} VkPipelineDynamicStateCreateInfo; - -typedef struct VkGraphicsPipelineCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineCreateFlags flags; - uint32_t stageCount; - const VkPipelineShaderStageCreateInfo* pStages; - const VkPipelineVertexInputStateCreateInfo* pVertexInputState; - const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; - const VkPipelineTessellationStateCreateInfo* pTessellationState; - const VkPipelineViewportStateCreateInfo* pViewportState; - const VkPipelineRasterizationStateCreateInfo* pRasterizationState; - const VkPipelineMultisampleStateCreateInfo* pMultisampleState; - const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; - const VkPipelineColorBlendStateCreateInfo* pColorBlendState; - const VkPipelineDynamicStateCreateInfo* pDynamicState; - VkPipelineLayout layout; - VkRenderPass renderPass; - uint32_t subpass; - VkPipeline basePipelineHandle; - int32_t basePipelineIndex; -} VkGraphicsPipelineCreateInfo; - -typedef struct VkComputePipelineCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineCreateFlags flags; - VkPipelineShaderStageCreateInfo stage; - VkPipelineLayout layout; - VkPipeline basePipelineHandle; - int32_t basePipelineIndex; -} VkComputePipelineCreateInfo; - -typedef struct VkPushConstantRange { - VkShaderStageFlags stageFlags; - uint32_t offset; - uint32_t size; -} VkPushConstantRange; - -typedef struct VkPipelineLayoutCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineLayoutCreateFlags flags; - uint32_t setLayoutCount; - const VkDescriptorSetLayout* pSetLayouts; - uint32_t pushConstantRangeCount; - const VkPushConstantRange* pPushConstantRanges; -} VkPipelineLayoutCreateInfo; - -typedef struct VkSamplerCreateInfo { - VkStructureType sType; - const void* pNext; - VkSamplerCreateFlags flags; - VkFilter magFilter; - VkFilter minFilter; - VkSamplerMipmapMode mipmapMode; - VkSamplerAddressMode addressModeU; - VkSamplerAddressMode addressModeV; - VkSamplerAddressMode addressModeW; - float mipLodBias; - float maxAnisotropy; - VkBool32 compareEnable; - VkCompareOp compareOp; - float minLod; - float maxLod; - VkBorderColor borderColor; - VkBool32 unnormalizedCoordinates; -} VkSamplerCreateInfo; - -typedef struct VkDescriptorSetLayoutBinding { - uint32_t binding; - VkDescriptorType descriptorType; - uint32_t descriptorCount; - VkShaderStageFlags stageFlags; - const VkSampler* pImmutableSamplers; -} VkDescriptorSetLayoutBinding; - -typedef struct VkDescriptorSetLayoutCreateInfo { - VkStructureType sType; - const void* pNext; - VkDescriptorSetLayoutCreateFlags flags; - uint32_t bindingCount; - const VkDescriptorSetLayoutBinding* pBinding; -} VkDescriptorSetLayoutCreateInfo; - -typedef struct VkDescriptorPoolSize { - VkDescriptorType type; - uint32_t descriptorCount; -} VkDescriptorPoolSize; - -typedef struct VkDescriptorPoolCreateInfo { - VkStructureType sType; - const void* pNext; - VkDescriptorPoolCreateFlags flags; - uint32_t maxSets; - uint32_t poolSizeCount; - const VkDescriptorPoolSize* pPoolSizes; -} VkDescriptorPoolCreateInfo; - -typedef struct VkDescriptorSetAllocateInfo { - VkStructureType sType; - const void* pNext; - VkDescriptorPool descriptorPool; - uint32_t setLayoutCount; - const VkDescriptorSetLayout* pSetLayouts; -} VkDescriptorSetAllocateInfo; - -typedef struct VkDescriptorImageInfo { - VkSampler sampler; - VkImageView imageView; - VkImageLayout imageLayout; -} VkDescriptorImageInfo; - -typedef struct VkDescriptorBufferInfo { - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize range; -} VkDescriptorBufferInfo; - -typedef struct VkWriteDescriptorSet { - VkStructureType sType; - const void* pNext; - VkDescriptorSet dstSet; - uint32_t dstBinding; - uint32_t dstArrayElement; - uint32_t descriptorCount; - VkDescriptorType descriptorType; - const VkDescriptorImageInfo* pImageInfo; - const VkDescriptorBufferInfo* pBufferInfo; - const VkBufferView* pTexelBufferView; -} VkWriteDescriptorSet; - -typedef struct VkCopyDescriptorSet { - VkStructureType sType; - const void* pNext; - VkDescriptorSet srcSet; - uint32_t srcBinding; - uint32_t srcArrayElement; - VkDescriptorSet dstSet; - uint32_t dstBinding; - uint32_t dstArrayElement; - uint32_t descriptorCount; -} VkCopyDescriptorSet; - -typedef struct VkFramebufferCreateInfo { - VkStructureType sType; - const void* pNext; - VkFramebufferCreateFlags flags; - VkRenderPass renderPass; - uint32_t attachmentCount; - const VkImageView* pAttachments; - uint32_t width; - uint32_t height; - uint32_t layers; -} VkFramebufferCreateInfo; - -typedef struct VkAttachmentDescription { - VkAttachmentDescriptionFlags flags; - VkFormat format; - VkSampleCountFlagBits samples; - VkAttachmentLoadOp loadOp; - VkAttachmentStoreOp storeOp; - VkAttachmentLoadOp stencilLoadOp; - VkAttachmentStoreOp stencilStoreOp; - VkImageLayout initialLayout; - VkImageLayout finalLayout; -} VkAttachmentDescription; - -typedef struct VkAttachmentReference { - uint32_t attachment; - VkImageLayout layout; -} VkAttachmentReference; - -typedef struct VkSubpassDescription { - VkSubpassDescriptionFlags flags; - VkPipelineBindPoint pipelineBindPoint; - uint32_t inputAttachmentCount; - const VkAttachmentReference* pInputAttachments; - uint32_t colorAttachmentCount; - const VkAttachmentReference* pColorAttachments; - const VkAttachmentReference* pResolveAttachments; - const VkAttachmentReference* pDepthStencilAttachment; - uint32_t preserveAttachmentCount; - const VkAttachmentReference* pPreserveAttachments; -} VkSubpassDescription; - -typedef struct VkSubpassDependency { - uint32_t srcSubpass; - uint32_t dstSubpass; - VkPipelineStageFlags srcStageMask; - VkPipelineStageFlags dstStageMask; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; - VkDependencyFlags dependencyFlags; -} VkSubpassDependency; - -typedef struct VkRenderPassCreateInfo { - VkStructureType sType; - const void* pNext; - VkRenderPassCreateFlags flags; - uint32_t attachmentCount; - const VkAttachmentDescription* pAttachments; - uint32_t subpassCount; - const VkSubpassDescription* pSubpasses; - uint32_t dependencyCount; - const VkSubpassDependency* pDependencies; -} VkRenderPassCreateInfo; - -typedef struct VkCommandPoolCreateInfo { - VkStructureType sType; - const void* pNext; - VkCommandPoolCreateFlags flags; - uint32_t queueFamilyIndex; -} VkCommandPoolCreateInfo; - -typedef struct VkCommandBufferAllocateInfo { - VkStructureType sType; - const void* pNext; - VkCommandPool commandPool; - VkCommandBufferLevel level; - uint32_t bufferCount; -} VkCommandBufferAllocateInfo; - -typedef struct VkCommandBufferBeginInfo { - VkStructureType sType; - const void* pNext; - VkCommandBufferUsageFlags flags; - VkRenderPass renderPass; - uint32_t subpass; - VkFramebuffer framebuffer; - VkBool32 occlusionQueryEnable; - VkQueryControlFlags queryFlags; - VkQueryPipelineStatisticFlags pipelineStatistics; -} VkCommandBufferBeginInfo; - -typedef struct VkBufferCopy { - VkDeviceSize srcOffset; - VkDeviceSize dstOffset; - VkDeviceSize size; -} VkBufferCopy; - -typedef struct VkImageSubresourceLayers { - VkImageAspectFlags aspectMask; - uint32_t mipLevel; - uint32_t baseArrayLayer; - uint32_t layerCount; -} VkImageSubresourceLayers; - -typedef struct VkImageCopy { - VkImageSubresourceLayers srcSubresource; - VkOffset3D srcOffset; - VkImageSubresourceLayers dstSubresource; - VkOffset3D dstOffset; - VkExtent3D extent; -} VkImageCopy; - -typedef struct VkImageBlit { - VkImageSubresourceLayers srcSubresource; - VkOffset3D srcOffset; - VkExtent3D srcExtent; - VkImageSubresourceLayers dstSubresource; - VkOffset3D dstOffset; - VkExtent3D dstExtent; -} VkImageBlit; - -typedef struct VkBufferImageCopy { - VkDeviceSize bufferOffset; - uint32_t bufferRowLength; - uint32_t bufferImageHeight; - VkImageSubresourceLayers imageSubresource; - VkOffset3D imageOffset; - VkExtent3D imageExtent; -} VkBufferImageCopy; - -typedef union VkClearColorValue { - float float32[4]; - int32_t int32[4]; - uint32_t uint32[4]; -} VkClearColorValue; - -typedef struct VkClearDepthStencilValue { - float depth; - uint32_t stencil; -} VkClearDepthStencilValue; - -typedef union VkClearValue { - VkClearColorValue color; - VkClearDepthStencilValue depthStencil; -} VkClearValue; - -typedef struct VkClearAttachment { - VkImageAspectFlags aspectMask; - uint32_t colorAttachment; - VkClearValue clearValue; -} VkClearAttachment; - -typedef struct VkClearRect { - VkRect2D rect; - uint32_t baseArrayLayer; - uint32_t layerCount; -} VkClearRect; - -typedef struct VkImageResolve { - VkImageSubresourceLayers srcSubresource; - VkOffset3D srcOffset; - VkImageSubresourceLayers dstSubresource; - VkOffset3D dstOffset; - VkExtent3D extent; -} VkImageResolve; - -typedef struct VkRenderPassBeginInfo { - VkStructureType sType; - const void* pNext; - VkRenderPass renderPass; - VkFramebuffer framebuffer; - VkRect2D renderArea; - uint32_t clearValueCount; - const VkClearValue* pClearValues; -} VkRenderPassBeginInfo; - -typedef struct VkBufferMemoryBarrier { - VkStructureType sType; - const void* pNext; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; - uint32_t srcQueueFamilyIndex; - uint32_t dstQueueFamilyIndex; - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize size; -} VkBufferMemoryBarrier; - -typedef struct VkDispatchIndirectCommand { - uint32_t x; - uint32_t y; - uint32_t z; -} VkDispatchIndirectCommand; - -typedef struct VkDrawIndexedIndirectCommand { - uint32_t indexCount; - uint32_t instanceCount; - uint32_t firstIndex; - int32_t vertexOffset; - uint32_t firstInstance; -} VkDrawIndexedIndirectCommand; - -typedef struct VkDrawIndirectCommand { - uint32_t vertexCount; - uint32_t instanceCount; - uint32_t firstVertex; - uint32_t firstInstance; -} VkDrawIndirectCommand; - -typedef struct VkImageMemoryBarrier { - VkStructureType sType; - const void* pNext; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; - VkImageLayout oldLayout; - VkImageLayout newLayout; - uint32_t srcQueueFamilyIndex; - uint32_t dstQueueFamilyIndex; - VkImage image; - VkImageSubresourceRange subresourceRange; -} VkImageMemoryBarrier; - -typedef struct VkMemoryBarrier { - VkStructureType sType; - const void* pNext; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; -} VkMemoryBarrier; - - -typedef VkResult (VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance); -typedef void (VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); -typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); -typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); -typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); -typedef VkResult (VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice); -typedef void (VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pPropertyCount, VkLayerProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties); -typedef void (VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); -typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); -typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); -typedef VkResult (VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); -typedef VkResult (VKAPI_PTR *PFN_vkAllocateMemory)(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory); -typedef void (VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); -typedef void (VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory memory); -typedef VkResult (VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); -typedef VkResult (VKAPI_PTR *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); -typedef void (VKAPI_PTR *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); -typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset); -typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset); -typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); -typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); -typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); -typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); -typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); -typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); -typedef VkResult (VKAPI_PTR *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); -typedef VkResult (VKAPI_PTR *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); -typedef VkResult (VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore); -typedef void (VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent); -typedef void (VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); -typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); -typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t dataSize, void* pData, VkDeviceSize stride, VkQueryResultFlags flags); -typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer); -typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBufferView* pView); -typedef void (VKAPI_PTR *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkImage* pImage); -typedef void (VKAPI_PTR *PFN_vkDestroyImage)(VkDevice device, VkImage image, const VkAllocationCallbacks* pAllocator); -typedef void (VKAPI_PTR *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); -typedef VkResult (VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkImageView* pView); -typedef void (VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule); -typedef void (VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache); -typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t* pDataSize, void* pData); -typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); -typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); -typedef VkResult (VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); -typedef void (VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout); -typedef void (VKAPI_PTR *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSampler* pSampler); -typedef void (VKAPI_PTR *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorSetLayout* pSetLayout); -typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool); -typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags); -typedef VkResult (VKAPI_PTR *PFN_vkAllocateDescriptorSets)(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo, VkDescriptorSet* pDescriptorSets); -typedef VkResult (VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets); -typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies); -typedef VkResult (VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer); -typedef void (VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); -typedef void (VKAPI_PTR *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass, const VkAllocationCallbacks* pAllocator); -typedef void (VKAPI_PTR *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); -typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); -typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); -typedef VkResult (VKAPI_PTR *PFN_vkAllocateCommandBuffers)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers); -typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); -typedef VkResult (VKAPI_PTR *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); -typedef VkResult (VKAPI_PTR *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); -typedef VkResult (VKAPI_PTR *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); -typedef void (VKAPI_PTR *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); -typedef void (VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); -typedef void (VKAPI_PTR *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); -typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor); -typedef void (VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConstants[4]); -typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); -typedef void (VKAPI_PTR *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask); -typedef void (VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask); -typedef void (VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference); -typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); -typedef void (VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); -typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); -typedef void (VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); -typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); -typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); -typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); -typedef void (VKAPI_PTR *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); -typedef void (VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter); -typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData); -typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data); -typedef void (VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); -typedef void (VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment* pAttachments, uint32_t rectCount, const VkClearRect* pRects); -typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); -typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); -typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); -typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry, VkQueryControlFlags flags); -typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry); -typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); -typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t entry); -typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); -typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void* pValues); -typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); -typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); -typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); -typedef void (VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBuffersCount, const VkCommandBuffer* pCommandBuffers); - -#ifdef VK_PROTOTYPES -VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( - const VkInstanceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkInstance* pInstance); - -VKAPI_ATTR void VKAPI_CALL vkDestroyInstance( - VkInstance instance, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices( - VkInstance instance, - uint32_t* pPhysicalDeviceCount, - VkPhysicalDevice* pPhysicalDevices); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures* pFeatures); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties* pFormatProperties); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageCreateFlags flags, - VkImageFormatProperties* pImageFormatProperties); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pQueueFamilyPropertyCount, - VkQueueFamilyProperties* pQueueFamilyProperties); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties* pMemoryProperties); - -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( - VkInstance instance, - const char* pName); - -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr( - VkDevice device, - const char* pName); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDevice* pDevice); - -VKAPI_ATTR void VKAPI_CALL vkDestroyDevice( - VkDevice device, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties); - -VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties); - -VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceLayerProperties( - uint32_t* pPropertyCount, - VkLayerProperties* pProperties); - -VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceLayerProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pPropertyCount, - VkLayerProperties* pProperties); - -VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue( - VkDevice device, - uint32_t queueFamilyIndex, - uint32_t queueIndex, - VkQueue* pQueue); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( - VkQueue queue, - uint32_t submitCount, - const VkSubmitInfo* pSubmits, - VkFence fence); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle( - VkQueue queue); - -VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle( - VkDevice device); - -VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory( - VkDevice device, - const VkMemoryAllocateInfo* pAllocateInfo, - const VkAllocationCallbacks* pAllocator, - VkDeviceMemory* pMemory); - -VKAPI_ATTR void VKAPI_CALL vkFreeMemory( - VkDevice device, - VkDeviceMemory memory, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( - VkDevice device, - VkDeviceMemory memory, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void** ppData); - -VKAPI_ATTR void VKAPI_CALL vkUnmapMemory( - VkDevice device, - VkDeviceMemory memory); - -VKAPI_ATTR VkResult VKAPI_CALL vkFlushMappedMemoryRanges( - VkDevice device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange* pMemoryRanges); - -VKAPI_ATTR VkResult VKAPI_CALL vkInvalidateMappedMemoryRanges( - VkDevice device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange* pMemoryRanges); - -VKAPI_ATTR void VKAPI_CALL vkGetDeviceMemoryCommitment( - VkDevice device, - VkDeviceMemory memory, - VkDeviceSize* pCommittedMemoryInBytes); - -VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory( - VkDevice device, - VkBuffer buffer, - VkDeviceMemory memory, - VkDeviceSize memoryOffset); - -VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory( - VkDevice device, - VkImage image, - VkDeviceMemory memory, - VkDeviceSize memoryOffset); - -VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements( - VkDevice device, - VkBuffer buffer, - VkMemoryRequirements* pMemoryRequirements); - -VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements( - VkDevice device, - VkImage image, - VkMemoryRequirements* pMemoryRequirements); - -VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t* pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements* pSparseMemoryRequirements); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkSampleCountFlagBits samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t* pPropertyCount, - VkSparseImageFormatProperties* pProperties); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparse( - VkQueue queue, - uint32_t bindInfoCount, - const VkBindSparseInfo* pBindInfo, - VkFence fence); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence( - VkDevice device, - const VkFenceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFence* pFence); - -VKAPI_ATTR void VKAPI_CALL vkDestroyFence( - VkDevice device, - VkFence fence, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkResetFences( - VkDevice device, - uint32_t fenceCount, - const VkFence* pFences); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceStatus( - VkDevice device, - VkFence fence); - -VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences( - VkDevice device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t timeout); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSemaphore* pSemaphore); - -VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( - VkDevice device, - VkSemaphore semaphore, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent( - VkDevice device, - const VkEventCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkEvent* pEvent); - -VKAPI_ATTR void VKAPI_CALL vkDestroyEvent( - VkDevice device, - VkEvent event, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetEventStatus( - VkDevice device, - VkEvent event); - -VKAPI_ATTR VkResult VKAPI_CALL vkSetEvent( - VkDevice device, - VkEvent event); - -VKAPI_ATTR VkResult VKAPI_CALL vkResetEvent( - VkDevice device, - VkEvent event); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateQueryPool( - VkDevice device, - const VkQueryPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkQueryPool* pQueryPool); - -VKAPI_ATTR void VKAPI_CALL vkDestroyQueryPool( - VkDevice device, - VkQueryPool queryPool, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( - VkDevice device, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - size_t dataSize, - void* pData, - VkDeviceSize stride, - VkQueryResultFlags flags); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer( - VkDevice device, - const VkBufferCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkBuffer* pBuffer); - -VKAPI_ATTR void VKAPI_CALL vkDestroyBuffer( - VkDevice device, - VkBuffer buffer, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateBufferView( - VkDevice device, - const VkBufferViewCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkBufferView* pView); - -VKAPI_ATTR void VKAPI_CALL vkDestroyBufferView( - VkDevice device, - VkBufferView bufferView, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage( - VkDevice device, - const VkImageCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkImage* pImage); - -VKAPI_ATTR void VKAPI_CALL vkDestroyImage( - VkDevice device, - VkImage image, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout( - VkDevice device, - VkImage image, - const VkImageSubresource* pSubresource, - VkSubresourceLayout* pLayout); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateImageView( - VkDevice device, - const VkImageViewCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkImageView* pView); - -VKAPI_ATTR void VKAPI_CALL vkDestroyImageView( - VkDevice device, - VkImageView imageView, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule( - VkDevice device, - const VkShaderModuleCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkShaderModule* pShaderModule); - -VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( - VkDevice device, - VkShaderModule shaderModule, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache( - VkDevice device, - const VkPipelineCacheCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineCache* pPipelineCache); - -VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache( - VkDevice device, - VkPipelineCache pipelineCache, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( - VkDevice device, - VkPipelineCache pipelineCache, - size_t* pDataSize, - void* pData); - -VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches( - VkDevice device, - VkPipelineCache dstCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( - VkDevice device, - VkPipelineCache pipelineCache, - uint32_t createInfoCount, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( - VkDevice device, - VkPipelineCache pipelineCache, - uint32_t createInfoCount, - const VkComputePipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines); - -VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( - VkDevice device, - VkPipeline pipeline, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineLayout( - VkDevice device, - const VkPipelineLayoutCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineLayout* pPipelineLayout); - -VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineLayout( - VkDevice device, - VkPipelineLayout pipelineLayout, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateSampler( - VkDevice device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler); - -VKAPI_ATTR void VKAPI_CALL vkDestroySampler( - VkDevice device, - VkSampler sampler, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorSetLayout( - VkDevice device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDescriptorSetLayout* pSetLayout); - -VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorSetLayout( - VkDevice device, - VkDescriptorSetLayout descriptorSetLayout, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool( - VkDevice device, - const VkDescriptorPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDescriptorPool* pDescriptorPool); - -VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkResetDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool, - VkDescriptorPoolResetFlags flags); - -VKAPI_ATTR VkResult VKAPI_CALL vkAllocateDescriptorSets( - VkDevice device, - const VkDescriptorSetAllocateInfo* pAllocateInfo, - VkDescriptorSet* pDescriptorSets); - -VKAPI_ATTR VkResult VKAPI_CALL vkFreeDescriptorSets( - VkDevice device, - VkDescriptorPool descriptorPool, - uint32_t descriptorSetCount, - const VkDescriptorSet* pDescriptorSets); - -VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets( - VkDevice device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet* pDescriptorCopies); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer( - VkDevice device, - const VkFramebufferCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFramebuffer* pFramebuffer); - -VKAPI_ATTR void VKAPI_CALL vkDestroyFramebuffer( - VkDevice device, - VkFramebuffer framebuffer, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass( - VkDevice device, - const VkRenderPassCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkRenderPass* pRenderPass); - -VKAPI_ATTR void VKAPI_CALL vkDestroyRenderPass( - VkDevice device, - VkRenderPass renderPass, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity( - VkDevice device, - VkRenderPass renderPass, - VkExtent2D* pGranularity); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( - VkDevice device, - const VkCommandPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkCommandPool* pCommandPool); - -VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool( - VkDevice device, - VkCommandPool commandPool, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolResetFlags flags); - -VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers( - VkDevice device, - const VkCommandBufferAllocateInfo* pAllocateInfo, - VkCommandBuffer* pCommandBuffers); - -VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers( - VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers); - -VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( - VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo* pBeginInfo); - -VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer( - VkCommandBuffer commandBuffer); - -VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandBuffer( - VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags); - -VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline pipeline); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetViewport( - VkCommandBuffer commandBuffer, - uint32_t viewportCount, - const VkViewport* pViewports); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetScissor( - VkCommandBuffer commandBuffer, - uint32_t scissorCount, - const VkRect2D* pScissors); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetLineWidth( - VkCommandBuffer commandBuffer, - float lineWidth); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias( - VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetBlendConstants( - VkCommandBuffer commandBuffer, - const float blendConstants[4]); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBounds( - VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilCompareMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilWriteMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilReference( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference); - -VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets); - -VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkIndexType indexType); - -VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers( - VkCommandBuffer commandBuffer, - uint32_t startBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets); - -VKAPI_ATTR void VKAPI_CALL vkCmdDraw( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance); - -VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance); - -VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride); - -VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride); - -VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z); - -VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset); - -VKAPI_ATTR void VKAPI_CALL vkCmdCopyBuffer( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer dstBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions); - -VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions); - -VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkFilter filter); - -VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions); - -VKAPI_ATTR void VKAPI_CALL vkCmdCopyImageToBuffer( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer dstBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions); - -VKAPI_ATTR void VKAPI_CALL vkCmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const uint32_t* pData); - -VKAPI_ATTR void VKAPI_CALL vkCmdFillBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize size, - uint32_t data); - -VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage( - VkCommandBuffer commandBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges); - -VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilImage( - VkCommandBuffer commandBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges); - -VKAPI_ATTR void VKAPI_CALL vkCmdClearAttachments( - VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment* pAttachments, - uint32_t rectCount, - const VkClearRect* pRects); - -VKAPI_ATTR void VKAPI_CALL vkCmdResolveImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageResolve* pRegions); - -VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask); - -VKAPI_ATTR void VKAPI_CALL vkCmdResetEvent( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask); - -VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - uint32_t memoryBarrierCount, - const void* const* ppMemoryBarriers); - -VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - VkDependencyFlags dependencyFlags, - uint32_t memoryBarrierCount, - const void* const* ppMemoryBarriers); - -VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t entry, - VkQueryControlFlags flags); - -VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t entry); - -VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount); - -VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t entry); - -VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize stride, - VkQueryResultFlags flags); - -VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants( - VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void* pValues); - -VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkSubpassContents contents); - -VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass( - VkCommandBuffer commandBuffer, - VkSubpassContents contents); - -VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass( - VkCommandBuffer commandBuffer); - -VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( - VkCommandBuffer commandBuffer, - uint32_t commandBuffersCount, - const VkCommandBuffer* pCommandBuffers); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index ab6b342c733..61ea4fb1f61 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -41,7 +41,7 @@ extern "C" { ((major << 22) | (minor << 12) | patch) // Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 170, 2) +#define VK_API_VERSION VK_MAKE_VERSION(0, 210, 0) #define VK_NULL_HANDLE 0 diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e3b3541d251..37462a4a4a6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -198,7 +198,7 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 170, 2)) + if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 210, 0)) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); for (uint32_t i = 0; i < pCreateInfo->enabledExtensionNameCount; i++) { @@ -495,7 +495,7 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(0, 170, 2), + .apiVersion = VK_MAKE_VERSION(0, 210, 0), .driverVersion = 1, .vendorID = 0x8086, .deviceID = pdevice->chipset_id, -- cgit v1.2.3 From fde60c168495ab6a8f1ec3f4c3a41db575cd5d1e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 22:06:22 -0800 Subject: anv/entrypoints: Run the headers through the preprocessor first This allows us to filter based on preprocessor directives. We could build a partial preprocessor into the generator, but we would likely get it wrong. This allows us to filter out, for instance, windows-specific WSI stuff. --- src/vulkan/Makefile.am | 10 ++++++++-- src/vulkan/anv_entrypoints_gen.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 3f6d4a3e2e9..2f249488270 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -30,6 +30,12 @@ vulkan_include_HEADERS = \ $(top_srcdir)/include/vulkan/vk_ext_khr_swapchain.h \ $(top_srcdir)/include/vulkan/vk_ext_khr_device_swapchain.h +# Used when generating entrypoints to filter out unwanted extensions +VULKAN_ENTRYPOINT_CPPFLAGS = \ + -I$(top_srcdir)/include/vulkan \ + -DVK_USE_PLATFORM_XCB_KHR \ + -DVK_USE_PLATFORM_WAYLAND_KHR + lib_LTLIBRARIES = libvulkan.la check_LTLIBRARIES = libvulkan-test.la @@ -146,10 +152,10 @@ libvulkan_la_SOURCES = \ anv_gem.c anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) - $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) - $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ isl_format_layout.c: isl_format_layout_gen.bash \ isl_format_layout.csv diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py index 406f1421054..1e4cfcb1755 100644 --- a/src/vulkan/anv_entrypoints_gen.py +++ b/src/vulkan/anv_entrypoints_gen.py @@ -27,7 +27,7 @@ import fileinput, re, sys # Each function typedef in the vulkan.h header is all on one line and matches # this regepx. We hope that won't change. -p = re.compile('typedef ([^ ]*) *\(VKAPI_PTR \*PFN_vk([^(]*)\)(.*);') +p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') entrypoints = [] -- cgit v1.2.3 From d666487dc6c46ea6ad7ca71fd0b0b1e099e5da0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Dec 2015 16:28:36 -0800 Subject: vk: Add new WSI support and bump the API to 0.210.1 --- include/vulkan/vk_ext_khr_device_swapchain.h | 210 ----------- include/vulkan/vk_ext_khr_swapchain.h | 153 -------- include/vulkan/vulkan.h | 498 ++++++++++++++++++++++++++- src/vulkan/Makefile.am | 4 +- src/vulkan/anv_device.c | 20 +- src/vulkan/anv_private.h | 15 +- src/vulkan/anv_wsi.c | 211 +++++------- src/vulkan/anv_wsi.h | 60 ++-- src/vulkan/anv_wsi_wayland.c | 247 +++++++------ src/vulkan/anv_wsi_x11.c | 196 ++++++----- 10 files changed, 900 insertions(+), 714 deletions(-) delete mode 100644 include/vulkan/vk_ext_khr_device_swapchain.h delete mode 100644 include/vulkan/vk_ext_khr_swapchain.h (limited to 'src') diff --git a/include/vulkan/vk_ext_khr_device_swapchain.h b/include/vulkan/vk_ext_khr_device_swapchain.h deleted file mode 100644 index bb243b32dd2..00000000000 --- a/include/vulkan/vk_ext_khr_device_swapchain.h +++ /dev/null @@ -1,210 +0,0 @@ -// -// File: vk_ext_khr_device_swapchain.h -// -/* -** Copyright (c) 2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -#ifndef __VK_EXT_KHR_DEVICE_SWAPCHAIN_H__ -#define __VK_EXT_KHR_DEVICE_SWAPCHAIN_H__ - -#include "vulkan.h" - -#define VK_EXT_KHR_DEVICE_SWAPCHAIN_REVISION 53 -#define VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NUMBER 2 -#define VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NAME "VK_EXT_KHR_device_swapchain" - -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -// ------------------------------------------------------------------------------------------------ -// Objects - -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR); - -// ------------------------------------------------------------------------------------------------ -// Enumeration constants - -#define VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(type,id) ((type)((int)0xc0000000 - VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NUMBER * -1024 + (id))) -#define VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM_POSITIVE(type,id) ((type)((int)0x40000000 + (VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NUMBER - 1) * 1024 + (id))) - -// Extend VkStructureType enum with extension specific constants -#define VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkStructureType, 0) -#define VK_STRUCTURE_TYPE_PRESENT_INFO_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkStructureType, 1) - -// Extend VkImageLayout enum with extension specific constants -#define VK_IMAGE_LAYOUT_PRESENT_SOURCE_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkImageLayout, 2) - -// Extend VkResult enum with extension specific constants -// Return codes for successful operation execution -#define VK_SUBOPTIMAL_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM_POSITIVE(VkResult, 3) -// Error codes -#define VK_ERROR_OUT_OF_DATE_KHR VK_EXT_KHR_DEVICE_SWAPCHAIN_ENUM(VkResult, 4) - -// ------------------------------------------------------------------------------------------------ -// Enumerations - -typedef enum { - VK_PRESENT_MODE_IMMEDIATE_KHR = 0, - VK_PRESENT_MODE_MAILBOX_KHR = 1, - VK_PRESENT_MODE_FIFO_KHR = 2, - VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR, - VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_KHR, - VK_PRESENT_MODE_NUM = (VK_PRESENT_MODE_FIFO_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1), - VK_PRESENT_MODE_MAX_ENUM_KHR = 0x7FFFFFFF -} VkPresentModeKHR; - -typedef enum { - VK_COLORSPACE_SRGB_NONLINEAR_KHR = 0x00000000, - VK_COLORSPACE_NUM = (VK_COLORSPACE_SRGB_NONLINEAR_KHR - VK_COLORSPACE_SRGB_NONLINEAR_KHR + 1), - VK_COLORSPACE_MAX_ENUM_KHR = 0x7FFFFFFF -} VkColorSpaceKHR; - -// ------------------------------------------------------------------------------------------------ -// Flags - -// ------------------------------------------------------------------------------------------------ -// Structures - -typedef struct { - uint32_t minImageCount; // Supported minimum number of images for the surface - uint32_t maxImageCount; // Supported maximum number of images for the surface, 0 for unlimited - - VkExtent2D currentExtent; // Current image width and height for the surface, (-1, -1) if undefined - VkExtent2D minImageExtent; // Supported minimum image width and height for the surface - VkExtent2D maxImageExtent; // Supported maximum image width and height for the surface - - VkSurfaceTransformFlagsKHR supportedTransforms;// 1 or more bits representing the transforms supported - VkSurfaceTransformKHR currentTransform; // The surface's current transform relative to the device's natural orientation - - uint32_t maxImageArraySize; // Supported maximum number of image layers for the surface - - VkImageUsageFlags supportedUsageFlags;// Supported image usage flags for the surface -} VkSurfacePropertiesKHR; - -typedef struct { - VkFormat format; // Supported pair of rendering format - VkColorSpaceKHR colorSpace; // and colorspace for the surface -} VkSurfaceFormatKHR; - -typedef struct { - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR - const void* pNext; // Pointer to next structure - - const VkSurfaceDescriptionKHR* pSurfaceDescription;// describes the swap chain's target surface - - uint32_t minImageCount; // Minimum number of presentation images the application needs - VkFormat imageFormat; // Format of the presentation images - VkColorSpaceKHR imageColorSpace; // Colorspace of the presentation images - VkExtent2D imageExtent; // Dimensions of the presentation images - VkImageUsageFlags imageUsageFlags; // Bits indicating how the presentation images will be used - VkSurfaceTransformKHR preTransform; // The transform, relative to the device's natural orientation, applied to the image content prior to presentation - uint32_t imageArraySize; // Determines the number of views for multiview/stereo presentation - - VkSharingMode sharingMode; // Sharing mode used for the presentation images - uint32_t queueFamilyCount; // Number of queue families having access to the images in case of concurrent sharing mode - const uint32_t* pQueueFamilyIndices; // Array of queue family indices having access to the images in case of concurrent sharing mode - - VkPresentModeKHR presentMode; // Which presentation mode to use for presents on this swap chain - - VkSwapchainKHR oldSwapchain; // Existing swap chain to replace, if any - - VkBool32 clipped; // Specifies whether presentable images may be affected by window clip regions -} VkSwapchainCreateInfoKHR; - -typedef struct { - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_PRESENT_INFO_KHR - const void* pNext; // Pointer to next structure - uint32_t swapchainCount; // Number of swap chains to present in this call - const VkSwapchainKHR* swapchains; // Swap chains to present an image from - const uint32_t* imageIndices; // Indices of which swapchain images to present -} VkPresentInfoKHR; - -// ------------------------------------------------------------------------------------------------ -// Function types - -typedef VkResult (VKAPI_PTR *PFN_vkGetSurfacePropertiesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkSurfacePropertiesKHR* pSurfaceProperties); -typedef VkResult (VKAPI_PTR *PFN_vkGetSurfaceFormatsKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats); -typedef VkResult (VKAPI_PTR *PFN_vkGetSurfacePresentModesKHR)(VkDevice device, const VkSurfaceDescriptionKHR* pSurfaceDescription, uint32_t* pCount, VkPresentModeKHR* pPresentModes); -typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainKHR* pSwapchain); -typedef VkResult (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain); -typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pCount, VkImage* pSwapchainImages); -typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, uint32_t* pImageIndex); -typedef VkResult (VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, VkPresentInfoKHR* pPresentInfo); - -// ------------------------------------------------------------------------------------------------ -// Function prototypes - -#ifdef VK_PROTOTYPES - -VKAPI_ATTR VkResult VKAPI_CALL vkGetSurfacePropertiesKHR( - VkDevice device, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - VkSurfacePropertiesKHR* pSurfaceProperties); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetSurfaceFormatsKHR( - VkDevice device, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - uint32_t* pCount, - VkSurfaceFormatKHR* pSurfaceFormats); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetSurfacePresentModesKHR( - VkDevice device, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - uint32_t* pCount, - VkPresentModeKHR* pPresentModes); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( - VkDevice device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - VkSwapchainKHR* pSwapchain); - -VKAPI_ATTR VkResult VKAPI_CALL vkDestroySwapchainKHR( - VkDevice device, - VkSwapchainKHR swapchain); - -VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR( - VkDevice device, - VkSwapchainKHR swapchain, - uint32_t* pCount, - VkImage* pSwapchainImages); - -VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( - VkDevice device, - VkSwapchainKHR swapchain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t* pImageIndex); - -VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR( - VkQueue queue, - VkPresentInfoKHR* pPresentInfo); - -#endif // VK_PROTOTYPES - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // __VK_EXT_KHR_SWAPCHAIN_H__ diff --git a/include/vulkan/vk_ext_khr_swapchain.h b/include/vulkan/vk_ext_khr_swapchain.h deleted file mode 100644 index 4c4f8a58a01..00000000000 --- a/include/vulkan/vk_ext_khr_swapchain.h +++ /dev/null @@ -1,153 +0,0 @@ -// -// File: vk_ext_khr_swapchain.h -// -/* -** Copyright (c) 2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a -** copy of this software and/or associated documentation files (the -** "Materials"), to deal in the Materials without restriction, including -** without limitation the rights to use, copy, modify, merge, publish, -** distribute, sublicense, and/or sell copies of the Materials, and to -** permit persons to whom the Materials are furnished to do so, subject to -** the following conditions: -** -** The above copyright notice and this permission notice shall be included -** in all copies or substantial portions of the Materials. -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -*/ - -#ifndef __VK_EXT_KHR_SWAPCHAIN_H__ -#define __VK_EXT_KHR_SWAPCHAIN_H__ - -#include "vulkan.h" - -#define VK_EXT_KHR_SWAPCHAIN_REVISION 17 -#define VK_EXT_KHR_SWAPCHAIN_EXTENSION_NUMBER 1 -#define VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME "VK_EXT_KHR_swapchain" - -#ifdef __cplusplus -extern "C" -{ -#endif // __cplusplus - -// ------------------------------------------------------------------------------------------------ -// Objects - -// ------------------------------------------------------------------------------------------------ -// Enumeration constants - -#define VK_EXT_KHR_SWAPCHAIN_ENUM(type,id) ((type)((int)0xc0000000 - VK_EXT_KHR_SWAPCHAIN_EXTENSION_NUMBER * -1024 + (id))) -#define VK_EXT_KHR_SWAPCHAIN_ENUM_POSITIVE(type,id) ((type)((int)0x40000000 + (VK_EXT_KHR_SWAPCHAIN_EXTENSION_NUMBER - 1) * 1024 + (id))) - -// Extend VkStructureType enum with extension specific constants -#define VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR VK_EXT_KHR_SWAPCHAIN_ENUM(VkStructureType, 0) - -// ------------------------------------------------------------------------------------------------ -// Enumerations - -typedef enum { - VK_SURFACE_TRANSFORM_NONE_KHR = 0, - VK_SURFACE_TRANSFORM_ROT90_KHR = 1, - VK_SURFACE_TRANSFORM_ROT180_KHR = 2, - VK_SURFACE_TRANSFORM_ROT270_KHR = 3, - VK_SURFACE_TRANSFORM_HMIRROR_KHR = 4, - VK_SURFACE_TRANSFORM_HMIRROR_ROT90_KHR = 5, - VK_SURFACE_TRANSFORM_HMIRROR_ROT180_KHR = 6, - VK_SURFACE_TRANSFORM_HMIRROR_ROT270_KHR = 7, - VK_SURFACE_TRANSFORM_INHERIT_KHR = 8, -} VkSurfaceTransformKHR; - -typedef enum { - VK_SURFACE_TRANSFORM_NONE_BIT_KHR = 0x00000001, - VK_SURFACE_TRANSFORM_ROT90_BIT_KHR = 0x00000002, - VK_SURFACE_TRANSFORM_ROT180_BIT_KHR = 0x00000004, - VK_SURFACE_TRANSFORM_ROT270_BIT_KHR = 0x00000008, - VK_SURFACE_TRANSFORM_HMIRROR_BIT_KHR = 0x00000010, - VK_SURFACE_TRANSFORM_HMIRROR_ROT90_BIT_KHR = 0x00000020, - VK_SURFACE_TRANSFORM_HMIRROR_ROT180_BIT_KHR = 0x00000040, - VK_SURFACE_TRANSFORM_HMIRROR_ROT270_BIT_KHR = 0x00000080, - VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100, -} VkSurfaceTransformFlagBitsKHR; -typedef VkFlags VkSurfaceTransformFlagsKHR; - -typedef enum { - VK_PLATFORM_WIN32_KHR = 0, - VK_PLATFORM_X11_KHR = 1, - VK_PLATFORM_XCB_KHR = 2, - VK_PLATFORM_ANDROID_KHR = 3, - VK_PLATFORM_WAYLAND_KHR = 4, - VK_PLATFORM_MIR_KHR = 5, - VK_PLATFORM_BEGIN_RANGE_KHR = VK_PLATFORM_WIN32_KHR, - VK_PLATFORM_END_RANGE_KHR = VK_PLATFORM_MIR_KHR, - VK_PLATFORM_NUM_KHR = (VK_PLATFORM_MIR_KHR - VK_PLATFORM_WIN32_KHR + 1), - VK_PLATFORM_MAX_ENUM_KHR = 0x7FFFFFFF -} VkPlatformKHR; - -// ------------------------------------------------------------------------------------------------ -// Flags - -// ------------------------------------------------------------------------------------------------ -// Structures - -// Placeholder structure header for the different types of surface description structures -typedef struct { - VkStructureType sType; // Can be any of the VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_XXX_KHR constants - const void* pNext; // Pointer to next structure -} VkSurfaceDescriptionKHR; - -// Surface description structure for a native platform window surface -typedef struct { - VkStructureType sType; // Must be VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR - const void* pNext; // Pointer to next structure - VkPlatformKHR platform; // e.g. VK_PLATFORM_*_KHR - void* pPlatformHandle; - void* pPlatformWindow; -} VkSurfaceDescriptionWindowKHR; - -// pPlatformHandle points to this struct when platform is VK_PLATFORM_X11_KHR -#ifdef _X11_XLIB_H_ -typedef struct { - Display* dpy; // Display connection to an X server - Window root; // To identify the X screen -} VkPlatformHandleX11KHR; -#endif /* _X11_XLIB_H_ */ - -// pPlatformHandle points to this struct when platform is VK_PLATFORM_XCB_KHR -#ifdef __XCB_H__ -typedef struct { - xcb_connection_t* connection; // XCB connection to an X server - xcb_window_t root; // To identify the X screen -} VkPlatformHandleXcbKHR; -#endif /* __XCB_H__ */ - -// ------------------------------------------------------------------------------------------------ -// Function types - -typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, const VkSurfaceDescriptionKHR* pSurfaceDescription, VkBool32* pSupported); - -// ------------------------------------------------------------------------------------------------ -// Function prototypes - -#ifdef VK_PROTOTYPES - -VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - VkBool32* pSupported); - -#endif // VK_PROTOTYPES - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // __VK_EXT_KHR_SWAPCHAIN_H__ diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 61ea4fb1f61..fa58c593fb6 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -41,7 +41,7 @@ extern "C" { ((major << 22) | (minor << 12) | patch) // Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 210, 0) +#define VK_API_VERSION VK_MAKE_VERSION(0, 210, 1) #define VK_NULL_HANDLE 0 @@ -125,6 +125,11 @@ typedef enum VkResult { VK_ERROR_INCOMPATIBLE_DRIVER = -9, VK_ERROR_TOO_MANY_OBJECTS = -10, VK_ERROR_FORMAT_NOT_SUPPORTED = -11, + VK_ERROR_SURFACE_LOST_KHR = -1000000000, + VK_SUBOPTIMAL_KHR = 1000001003, + VK_ERROR_OUT_OF_DATE_KHR = -1000001004, + VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, + VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000008000, VK_RESULT_BEGIN_RANGE = VK_ERROR_FORMAT_NOT_SUPPORTED, VK_RESULT_END_RANGE = VK_INCOMPLETE, VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FORMAT_NOT_SUPPORTED + 1), @@ -180,6 +185,11 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_MEMORY_BARRIER = 45, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 46, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 47, + VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000, + VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001, + VK_STRUCTURE_TYPE_DISPLAY_MODE_CREATE_INFO_KHR = 1000002000, + VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR = 1000002001, + VK_STRUCTURE_TYPE_DISPLAY_PRESENT_INFO_KHR = 1000003000, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), @@ -458,6 +468,7 @@ typedef enum VkImageLayout { VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, VK_IMAGE_LAYOUT_PREINITIALIZED = 8, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002, VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), @@ -3081,6 +3092,491 @@ VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( const VkCommandBuffer* pCommandBuffers); #endif +#define VK_KHR_surface 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR) + +#define VK_KHR_SURFACE_REVISION 24 +#define VK_KHR_SURFACE_EXTENSION_NUMBER 1 +#define VK_KHR_SURFACE_EXTENSION_NAME "VK_KHR_surface" + + +typedef enum VkColorSpaceKHR { + VK_COLORSPACE_SRGB_NONLINEAR_KHR = 0, + VK_COLORSPACE_BEGIN_RANGE = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + VK_COLORSPACE_END_RANGE = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + VK_COLORSPACE_RANGE_SIZE = (VK_COLORSPACE_SRGB_NONLINEAR_KHR - VK_COLORSPACE_SRGB_NONLINEAR_KHR + 1), + VK_COLORSPACE_MAX_ENUM = 0x7FFFFFFF +} VkColorSpaceKHR; + +typedef enum VkPresentModeKHR { + VK_PRESENT_MODE_IMMEDIATE_KHR = 0, + VK_PRESENT_MODE_MAILBOX_KHR = 1, + VK_PRESENT_MODE_FIFO_KHR = 2, + VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3, + VK_PRESENT_MODE_BEGIN_RANGE = VK_PRESENT_MODE_IMMEDIATE_KHR, + VK_PRESENT_MODE_END_RANGE = VK_PRESENT_MODE_FIFO_RELAXED_KHR, + VK_PRESENT_MODE_RANGE_SIZE = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1), + VK_PRESENT_MODE_MAX_ENUM = 0x7FFFFFFF +} VkPresentModeKHR; + + +typedef enum VkSurfaceTransformFlagBitsKHR { + VK_SURFACE_TRANSFORM_NONE_BIT_KHR = 0x00000001, + VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR = 0x00000002, + VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR = 0x00000004, + VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR = 0x00000008, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_BIT_KHR = 0x00000010, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_90_BIT_KHR = 0x00000020, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_180_BIT_KHR = 0x00000040, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_270_BIT_KHR = 0x00000080, + VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100, +} VkSurfaceTransformFlagBitsKHR; +typedef VkFlags VkSurfaceTransformFlagsKHR; + +typedef enum VkCompositeAlphaFlagBitsKHR { + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR = 0x00000002, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR = 0x00000004, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR = 0x00000008, +} VkCompositeAlphaFlagBitsKHR; +typedef VkFlags VkCompositeAlphaFlagsKHR; + +typedef struct VkSurfaceCapabilitiesKHR { + uint32_t minImageCount; + uint32_t maxImageCount; + VkExtent2D currentExtent; + VkExtent2D minImageExtent; + VkExtent2D maxImageExtent; + uint32_t maxImageArrayLayers; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkSurfaceTransformFlagBitsKHR currentTransform; + VkCompositeAlphaFlagsKHR supportedCompositeAlpha; + VkImageUsageFlags supportedUsageFlags; +} VkSurfaceCapabilitiesKHR; + +typedef struct VkSurfaceFormatKHR { + VkFormat format; + VkColorSpaceKHR colorSpace; +} VkSurfaceFormatKHR; + + +typedef void (VKAPI_PTR *PFN_vkDestroySurfaceKHR)(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR surface, VkBool32* pSupported); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormatsKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pSurfaceFormatCount, VkSurfaceFormatKHR* pSurfaceFormats); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pPresentModeCount, VkPresentModeKHR* pPresentModes); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR( + VkInstance instance, + VkSurfaceKHR surface, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32* pSupported); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); +#endif + +#define VK_KHR_swapchain 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR) + +#define VK_KHR_SWAPCHAIN_REVISION 67 +#define VK_KHR_SWAPCHAIN_EXTENSION_NUMBER 2 +#define VK_KHR_SWAPCHAIN_EXTENSION_NAME "VK_KHR_swapchain" + +typedef VkFlags VkSwapchainCreateFlagsKHR; + +typedef struct VkSwapchainCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkSwapchainCreateFlagsKHR flags; + VkSurfaceKHR surface; + uint32_t minImageCount; + VkFormat imageFormat; + VkColorSpaceKHR imageColorSpace; + VkExtent2D imageExtent; + uint32_t imageArrayLayers; + VkImageUsageFlags imageUsage; + VkSharingMode imageSharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t* pQueueFamilyIndices; + VkSurfaceTransformFlagBitsKHR preTransform; + VkCompositeAlphaFlagBitsKHR compositeAlpha; + VkPresentModeKHR presentMode; + VkBool32 clipped; + VkSwapchainKHR oldSwapchain; +} VkSwapchainCreateInfoKHR; + +typedef struct VkPresentInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore* pWaitSemaphores; + uint32_t swapchainCount; + const VkSwapchainKHR* pSwapchains; + const uint32_t* pImageIndices; + VkResult* pResults; +} VkPresentInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain); +typedef void (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex); +typedef VkResult (VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, const VkPresentInfoKHR* pPresentInfo); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( + VkDevice device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain); + +VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR swapchain, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo); +#endif + +#define VK_KHR_display 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayModeKHR) + +#define VK_KHR_DISPLAY_REVISION 21 +#define VK_KHR_DISPLAY_EXTENSION_NUMBER 3 +#define VK_KHR_DISPLAY_EXTENSION_NAME "VK_KHR_display" + + +typedef enum VkDisplayPlaneAlphaFlagBitsKHR { + VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, + VK_DISPLAY_PLANE_ALPHA_GLOBAL_BIT_KHR = 0x00000002, + VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_BIT_KHR = 0x00000004, + VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_PREMULTIPLIED_BIT_KHR = 0x00000008, +} VkDisplayPlaneAlphaFlagBitsKHR; +typedef VkFlags VkDisplayModeCreateFlagsKHR; +typedef VkFlags VkDisplayPlaneAlphaFlagsKHR; +typedef VkFlags VkDisplaySurfaceCreateFlagsKHR; + +typedef struct VkDisplayPropertiesKHR { + VkDisplayKHR display; + const char* displayName; + VkExtent2D physicalDimensions; + VkExtent2D physicalResolution; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkBool32 planeReorderPossible; + VkBool32 persistentContent; +} VkDisplayPropertiesKHR; + +typedef struct VkDisplayModeParametersKHR { + VkExtent2D visibleRegion; + uint32_t refreshRate; +} VkDisplayModeParametersKHR; + +typedef struct VkDisplayModePropertiesKHR { + VkDisplayModeKHR displayMode; + VkDisplayModeParametersKHR parameters; +} VkDisplayModePropertiesKHR; + +typedef struct VkDisplayModeCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkDisplayModeCreateFlagsKHR flags; + VkDisplayModeParametersKHR parameters; +} VkDisplayModeCreateInfoKHR; + +typedef struct VkDisplayPlaneCapabilitiesKHR { + VkDisplayPlaneAlphaFlagsKHR supportedAlpha; + VkOffset2D minSrcPosition; + VkOffset2D maxSrcPosition; + VkExtent2D minSrcExtent; + VkExtent2D maxSrcExtent; + VkOffset2D minDstPosition; + VkOffset2D maxDstPosition; + VkExtent2D minDstExtent; + VkExtent2D maxDstExtent; +} VkDisplayPlaneCapabilitiesKHR; + +typedef struct VkDisplayPlanePropertiesKHR { + VkDisplayKHR currentDisplay; + uint32_t currentStackIndex; +} VkDisplayPlanePropertiesKHR; + +typedef struct VkDisplaySurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkDisplaySurfaceCreateFlagsKHR flags; + VkDisplayModeKHR displayMode; + uint32_t planeIndex; + uint32_t planeStackIndex; + VkSurfaceTransformFlagBitsKHR transform; + float globalAlpha; + VkDisplayPlaneAlphaFlagBitsKHR alphaMode; + VkExtent2D imageExtent; +} VkDisplaySurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneSupportedDisplaysKHR)(VkPhysicalDevice physicalDevice, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayModePropertiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayModeKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayPlaneSurfaceKHR)(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkDisplayPropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPlanePropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkDisplayPlanePropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneSupportedDisplaysKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pDisplayCount, + VkDisplayKHR* pDisplays); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayModePropertiesKHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + uint32_t* pPropertyCount, + VkDisplayModePropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayModeKHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + const VkDisplayModeCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDisplayModeKHR* pMode); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkDisplayModeKHR mode, + uint32_t planeIndex, + VkDisplayPlaneCapabilitiesKHR* pCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayPlaneSurfaceKHR( + VkInstance instance, + const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif + +#define VK_KHR_display_swapchain 1 +#define VK_KHR_DISPLAY_SWAPCHAIN_REVISION 9 +#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NUMBER 4 +#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" + +typedef struct VkDisplayPresentInfoKHR { + VkStructureType sType; + const void* pNext; + VkRect2D srcRect; + VkRect2D dstRect; + VkBool32 persistent; +} VkDisplayPresentInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateSharedSwapchainsKHR)(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( + VkDevice device, + uint32_t swapchainCount, + const VkSwapchainCreateInfoKHR* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchains); +#endif + +#ifdef VK_USE_PLATFORM_XLIB_KHR +#define VK_KHR_xlib_surface 1 +#include + +#define VK_KHR_XLIB_SURFACE_REVISION 5 +#define VK_KHR_XLIB_SURFACE_EXTENSION_NUMBER 5 +#define VK_KHR_XLIB_SURFACE_EXTENSION_NAME "VK_KHR_xlib_surface" + +typedef VkResult (VKAPI_PTR *PFN_vkCreateXlibSurfaceKHR)(VkInstance instance, Display* dpy, Window window, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display* dpy, VisualID visualID); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateXlibSurfaceKHR( + VkInstance instance, + Display* dpy, + Window window, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceXlibPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + Display* dpy, + VisualID visualID); +#endif +#endif /* VK_USE_PLATFORM_XLIB_KHR */ + +#ifdef VK_USE_PLATFORM_XCB_KHR +#define VK_KHR_xcb_surface 1 +#include + +#define VK_KHR_XCB_SURFACE_REVISION 5 +#define VK_KHR_XCB_SURFACE_EXTENSION_NUMBER 6 +#define VK_KHR_XCB_SURFACE_EXTENSION_NAME "VK_KHR_xcb_surface" + +typedef VkResult (VKAPI_PTR *PFN_vkCreateXcbSurfaceKHR)(VkInstance instance, xcb_connection_t* connection, xcb_window_t window, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateXcbSurfaceKHR( + VkInstance instance, + xcb_connection_t* connection, + xcb_window_t window, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id); +#endif +#endif /* VK_USE_PLATFORM_XCB_KHR */ + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +#define VK_KHR_wayland_surface 1 +#include + +#define VK_KHR_WAYLAND_SURFACE_REVISION 4 +#define VK_KHR_WAYLAND_SURFACE_EXTENSION_NUMBER 7 +#define VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME "VK_KHR_wayland_surface" + +typedef VkResult (VKAPI_PTR *PFN_vkCreateWaylandSurfaceKHR)(VkInstance instance, struct wl_display* display, struct wl_surface* surface, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateWaylandSurfaceKHR( + VkInstance instance, + struct wl_display* display, + struct wl_surface* surface, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display); +#endif +#endif /* VK_USE_PLATFORM_WAYLAND_KHR */ + +#ifdef VK_USE_PLATFORM_MIR_KHR +#define VK_KHR_mir_surface 1 +#include + +#define VK_KHR_MIR_SURFACE_REVISION 4 +#define VK_KHR_MIR_SURFACE_EXTENSION_NUMBER 8 +#define VK_KHR_MIR_SURFACE_EXTENSION_NAME "VK_KHR_mir_surface" + +typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, MirConnection* connection, MirSurface* mirSurface, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateMirSurfaceKHR( + VkInstance instance, + MirConnection* connection, + MirSurface* mirSurface, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceMirPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + MirConnection* connection); +#endif +#endif /* VK_USE_PLATFORM_MIR_KHR */ + +#ifdef VK_USE_PLATFORM_ANDROID_KHR +#define VK_KHR_android_surface 1 +#include + +#define VK_KHR_ANDROID_SURFACE_REVISION 4 +#define VK_KHR_ANDROID_SURFACE_EXTENSION_NUMBER 9 +#define VK_KHR_ANDROID_SURFACE_EXTENSION_NAME "VK_KHR_android_surface" + +typedef VkResult (VKAPI_PTR *PFN_vkCreateAndroidSurfaceKHR)(VkInstance instance, ANativeWindow* window, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateAndroidSurfaceKHR( + VkInstance instance, + ANativeWindow* window, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif +#endif /* VK_USE_PLATFORM_ANDROID_KHR */ + +#ifdef VK_USE_PLATFORM_WIN32_KHR +#define VK_KHR_win32_surface 1 +#include + +#define VK_KHR_WIN32_SURFACE_REVISION 4 +#define VK_KHR_WIN32_SURFACE_EXTENSION_NUMBER 10 +#define VK_KHR_WIN32_SURFACE_EXTENSION_NAME "VK_KHR_win32_surface" + +typedef VkResult (VKAPI_PTR *PFN_vkCreateWin32SurfaceKHR)(VkInstance instance, HINSTANCE hinstance, HWND hwnd, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex); + +#ifdef VK_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateWin32SurfaceKHR( + VkInstance instance, + HINSTANCE hinstance, + HWND hwnd, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWin32PresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex); +#endif +#endif /* VK_USE_PLATFORM_WIN32_KHR */ + #ifdef __cplusplus } #endif diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 2f249488270..24391eafc0b 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -26,9 +26,7 @@ vulkan_includedir = $(includedir)/vulkan vulkan_include_HEADERS = \ $(top_srcdir)/include/vulkan/vk_platform.h \ $(top_srcdir)/include/vulkan/vulkan.h \ - $(top_srcdir)/include/vulkan/vulkan_intel.h \ - $(top_srcdir)/include/vulkan/vk_ext_khr_swapchain.h \ - $(top_srcdir)/include/vulkan/vk_ext_khr_device_swapchain.h + $(top_srcdir)/include/vulkan/vulkan_intel.h # Used when generating entrypoints to filter out unwanted extensions VULKAN_ENTRYPOINT_CPPFLAGS = \ diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 37462a4a4a6..32f8dcf976f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -150,15 +150,25 @@ anv_physical_device_finish(struct anv_physical_device *device) static const VkExtensionProperties global_extensions[] = { { - .extensionName = VK_EXT_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 17, + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = 24, }, + { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = 5, + }, +#ifdef HAVE_WAYLAND_PLATFORM + { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = 4, + }, +#endif }; static const VkExtensionProperties device_extensions[] = { { - .extensionName = VK_EXT_KHR_DEVICE_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 53, + .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 67, }, }; @@ -198,7 +208,7 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 210, 0)) + if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 210, 1)) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); for (uint32_t i = 0; i < pCreateInfo->enabledExtensionNameCount; i++) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 9c1e6b2f955..43ebf4c11ae 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #ifdef HAVE_VALGRIND @@ -43,11 +44,19 @@ #include "util/macros.h" #include "util/list.h" +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#define VK_USE_PLATFORM_XCB_KHR +#define VK_USE_PLATFORM_WAYLAND_KHR + #define VK_PROTOTYPES #include #include -#include -#include #include "anv_entrypoints.h" #include "anv_gen_macros.h" @@ -498,7 +507,7 @@ struct anv_instance { int physicalDeviceCount; struct anv_physical_device physicalDevice; - struct anv_wsi_implementation * wsi_impl[VK_PLATFORM_NUM_KHR]; + void * wayland_wsi; }; VkResult anv_init_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index f5c2d3716a5..c181cd4d729 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -28,8 +28,6 @@ anv_init_wsi(struct anv_instance *instance) { VkResult result; - memset(instance->wsi_impl, 0, sizeof(instance->wsi_impl)); - result = anv_x11_init_wsi(instance); if (result != VK_SUCCESS) return result; @@ -54,186 +52,135 @@ anv_finish_wsi(struct anv_instance *instance) anv_x11_finish_wsi(instance); } -VkResult -anv_GetPhysicalDeviceSurfaceSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - VkBool32* pSupported) +void anv_DestroySurfaceKHR( + VkInstance instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks* pAllocator) { - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - assert(pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - - VkSurfaceDescriptionWindowKHR *window = (void *)pSurfaceDescription; - - struct anv_wsi_implementation *impl = - physical_device->instance->wsi_impl[window->platform]; + ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); - if (impl) { - return impl->get_window_supported(impl, physical_device, - window, pSupported); - } else { - *pSupported = false; - return VK_SUCCESS; - } + surface->destroy(surface, pAllocator); } -VkResult -anv_GetSurfacePropertiesKHR( - VkDevice _device, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - VkSurfacePropertiesKHR* pSurfaceProperties) +VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR _surface, + VkBool32* pSupported) { - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowKHR *window = - (VkSurfaceDescriptionWindowKHR *)pSurfaceDescription; - - struct anv_wsi_implementation *impl = - device->instance->wsi_impl[window->platform]; + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); - assert(impl); - - return impl->get_surface_properties(impl, device, window, - pSurfaceProperties); + return surface->get_support(surface, device, queueFamilyIndex, pSupported); } -VkResult -anv_GetSurfaceFormatsKHR( - VkDevice _device, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - uint32_t* pCount, - VkSurfaceFormatKHR* pSurfaceFormats) +VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowKHR *window = - (VkSurfaceDescriptionWindowKHR *)pSurfaceDescription; - - struct anv_wsi_implementation *impl = - device->instance->wsi_impl[window->platform]; + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); - assert(impl); - - return impl->get_surface_formats(impl, device, window, - pCount, pSurfaceFormats); + return surface->get_capabilities(surface, device, pSurfaceCapabilities); } -VkResult -anv_GetSurfacePresentModesKHR( - VkDevice _device, - const VkSurfaceDescriptionKHR* pSurfaceDescription, - uint32_t* pCount, - VkPresentModeKHR* pPresentModes) +VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) { - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowKHR *window = - (VkSurfaceDescriptionWindowKHR *)pSurfaceDescription; + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); - struct anv_wsi_implementation *impl = - device->instance->wsi_impl[window->platform]; + return surface->get_formats(surface, device, pSurfaceFormatCount, + pSurfaceFormats); +} - assert(impl); +VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); - return impl->get_surface_present_modes(impl, device, window, - pCount, pPresentModes); + return surface->get_present_modes(surface, device, pPresentModeCount, + pPresentModes); } - -VkResult -anv_CreateSwapchainKHR( - VkDevice _device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - VkSwapchainKHR* pSwapchain) +VkResult anv_CreateSwapchainKHR( + VkDevice _device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_wsi_surface, surface, pCreateInfo->surface); struct anv_swapchain *swapchain; - VkResult result; - assert(pCreateInfo->pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowKHR *window = - (VkSurfaceDescriptionWindowKHR *)pCreateInfo->pSurfaceDescription; - - struct anv_wsi_implementation *impl = - device->instance->wsi_impl[window->platform]; - - assert(impl); - - result = impl->create_swapchain(impl, device, pCreateInfo, &swapchain); + VkResult result = surface->create_swapchain(surface, device, pCreateInfo, + pAllocator, &swapchain); + if (result != VK_SUCCESS) + return result; - if (result == VK_SUCCESS) - *pSwapchain = anv_swapchain_to_handle(swapchain); + *pSwapchain = anv_swapchain_to_handle(swapchain); - return result; + return VK_SUCCESS; } -VkResult -anv_DestroySwapchainKHR( - VkDevice device, - VkSwapchainKHR swapChain) +void anv_DestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + const VkAllocationCallbacks* pAllocator) { - ANV_FROM_HANDLE(anv_swapchain, swapchain, swapChain); - - assert(swapchain->device == anv_device_from_handle(device)); + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - return swapchain->destroy(swapchain); + swapchain->destroy(swapchain, pAllocator); } -VkResult -anv_GetSwapchainImagesKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - uint32_t* pCount, - VkImage* pSwapchainImages) +VkResult anv_GetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages) { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - assert(swapchain->device == anv_device_from_handle(device)); - - return swapchain->get_images(swapchain, pCount, pSwapchainImages); + return swapchain->get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); } -VkResult -anv_AcquireNextImageKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t* pImageIndex) +VkResult anv_AcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex) { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - assert(swapchain->device == anv_device_from_handle(device)); - - return swapchain->acquire_next_image(swapchain, - timeout, semaphore, pImageIndex); + return swapchain->acquire_next_image(swapchain, timeout, semaphore, + pImageIndex); } -VkResult -anv_QueuePresentKHR( +VkResult anv_QueuePresentKHR( VkQueue _queue, - VkPresentInfoKHR* pPresentInfo) + const VkPresentInfoKHR* pPresentInfo) { ANV_FROM_HANDLE(anv_queue, queue, _queue); VkResult result; for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->swapchains[i]); + ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); assert(swapchain->device == queue->device); result = swapchain->queue_present(swapchain, queue, - pPresentInfo->imageIndices[i]); + pPresentInfo->pImageIndices[i]); /* TODO: What if one of them returns OUT_OF_DATE? */ if (result != VK_SUCCESS) return result; diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h index 280049b0e86..15b3f862499 100644 --- a/src/vulkan/anv_wsi.h +++ b/src/vulkan/anv_wsi.h @@ -25,10 +25,40 @@ #include "anv_private.h" +struct anv_swapchain; + +struct anv_wsi_surface { + struct anv_instance *instance; + + void (*destroy)(struct anv_wsi_surface *surface, + const VkAllocationCallbacks *pAllocator); + VkResult (*get_support)(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported); + VkResult (*get_capabilities)(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + VkResult (*get_formats)(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + VkResult (*get_present_modes)(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); + VkResult (*create_swapchain)(struct anv_wsi_surface *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); +}; + struct anv_swapchain { - struct anv_device * device; + struct anv_device *device; - VkResult (*destroy)(struct anv_swapchain *swapchain); + VkResult (*destroy)(struct anv_swapchain *swapchain, + const VkAllocationCallbacks *pAllocator); VkResult (*get_images)(struct anv_swapchain *swapchain, uint32_t *pCount, VkImage *pSwapchainImages); VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, @@ -39,33 +69,9 @@ struct anv_swapchain { uint32_t image_index); }; +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_wsi_surface, VkSurfaceKHR) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) -struct anv_wsi_implementation { - VkResult (*get_window_supported)(struct anv_wsi_implementation *impl, - struct anv_physical_device *physical_device, - const VkSurfaceDescriptionWindowKHR *window, - VkBool32 *pSupported); - VkResult (*get_surface_properties)(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *window, - VkSurfacePropertiesKHR *properties); - VkResult (*get_surface_formats)(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *window, - uint32_t *pCount, - VkSurfaceFormatKHR *pSurfaceFormats); - VkResult (*get_surface_present_modes)(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *window, - uint32_t *pCount, - VkPresentModeKHR *pPresentModes); - VkResult (*create_swapchain)(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSwapchainCreateInfoKHR *pCreateInfo, - struct anv_swapchain **swapchain); -}; - VkResult anv_x11_init_wsi(struct anv_instance *instance); void anv_x11_finish_wsi(struct anv_instance *instance); VkResult anv_wl_init_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index d75919ea19a..1dafcd996fe 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -30,6 +30,13 @@ #define MIN_NUM_IMAGES 2 +struct wsi_wl_surface { + struct anv_wsi_surface base; + + struct wl_display *display; + struct wl_surface *surface; +}; + struct wsi_wl_display { struct wl_display * display; struct wl_drm * drm; @@ -41,13 +48,11 @@ struct wsi_wl_display { }; struct wsi_wayland { - struct anv_wsi_implementation base; - struct anv_instance * instance; - pthread_mutex_t mutex; - /* Hash table of wl_display -> wsi_wl_display mappings */ - struct hash_table * displays; + pthread_mutex_t mutex; + /* Hash table of wl_display -> wsi_wl_display mappings */ + struct hash_table * displays; }; static void @@ -234,7 +239,7 @@ wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) { struct wsi_wl_display *display = anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + VK_SYSTEM_ALLOCATION_SCOPE_CACHE); if (!display) return NULL; @@ -278,8 +283,10 @@ fail: } static struct wsi_wl_display * -wsi_wl_get_display(struct wsi_wayland *wsi, struct wl_display *wl_display) +wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) { + struct wsi_wayland *wsi = instance->wayland_wsi; + pthread_mutex_lock(&wsi->mutex); struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, @@ -308,15 +315,23 @@ wsi_wl_get_display(struct wsi_wayland *wsi, struct wl_display *wl_display) return entry->data; } -static VkResult -wsi_wl_get_window_supported(struct anv_wsi_implementation *impl, - struct anv_physical_device *physical_device, - const VkSurfaceDescriptionWindowKHR *window, - VkBool32 *pSupported) +VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display) { - struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_display(physical_device->instance, display) != NULL; +} - *pSupported = wsi_wl_get_display(wsi, window->pPlatformHandle) != NULL; +static VkResult +wsi_wl_surface_get_support(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + *pSupported = true; return VK_SUCCESS; } @@ -327,20 +342,24 @@ static const VkPresentModeKHR present_modes[] = { }; static VkResult -wsi_wl_get_surface_properties(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *window, - VkSurfacePropertiesKHR *props) +wsi_wl_surface_get_capabilities(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* caps) { - props->minImageCount = MIN_NUM_IMAGES; - props->maxImageCount = 4; - props->currentExtent = (VkExtent2D) { -1, -1 }; - props->minImageExtent = (VkExtent2D) { 1, 1 }; - props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; - props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; - props->maxImageArraySize = 1; - props->supportedUsageFlags = + caps->minImageCount = MIN_NUM_IMAGES; + caps->maxImageCount = 4; + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + caps->maxImageArrayLayers = 1; + + caps->supportedCompositeAlpha = + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + + caps->supportedUsageFlags = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -348,25 +367,24 @@ wsi_wl_get_surface_properties(struct anv_wsi_implementation *impl, } static VkResult -wsi_wl_get_surface_formats(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *window, - uint32_t *pCount, - VkSurfaceFormatKHR *pSurfaceFormats) +wsi_wl_surface_get_formats(struct anv_wsi_surface *wsi_surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) { - struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + struct wsi_wl_surface *surface = (struct wsi_wl_surface *)wsi_surface; struct wsi_wl_display *display = - wsi_wl_get_display(wsi, window->pPlatformHandle); + wsi_wl_get_display(device->instance, surface->display); uint32_t count = anv_vector_length(&display->formats); if (pSurfaceFormats == NULL) { - *pCount = count; + *pSurfaceFormatCount = count; return VK_SUCCESS; } - assert(*pCount >= count); - *pCount = count; + assert(*pSurfaceFormatCount >= count); + *pSurfaceFormatCount = count; VkFormat *f; anv_vector_foreach(f, &display->formats) { @@ -381,20 +399,64 @@ wsi_wl_get_surface_formats(struct anv_wsi_implementation *impl, } static VkResult -wsi_wl_get_surface_present_modes(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *window, - uint32_t *pCount, - VkPresentModeKHR *pPresentModes) +wsi_wl_surface_get_present_modes(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) { if (pPresentModes == NULL) { - *pCount = ARRAY_SIZE(present_modes); + *pPresentModeCount = ARRAY_SIZE(present_modes); return VK_SUCCESS; } - assert(*pCount >= ARRAY_SIZE(present_modes)); - typed_memcpy(pPresentModes, present_modes, *pCount); - *pCount = ARRAY_SIZE(present_modes); + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static void +wsi_wl_surface_destroy(struct anv_wsi_surface *surface, + const VkAllocationCallbacks *pAllocator) +{ + anv_free2(&surface->instance->alloc, pAllocator, surface); +} + +static VkResult +wsi_wl_surface_create_swapchain(struct anv_wsi_surface *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateWaylandSurfaceKHR( + VkInstance _instance, + struct wl_display* wl_display, + struct wl_surface* wl_surface, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + struct wsi_wl_surface *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->display = wl_display; + surface->surface = wl_surface; + + surface->base.instance = instance; + surface->base.destroy = wsi_wl_surface_destroy; + surface->base.get_support = wsi_wl_surface_get_support; + surface->base.get_capabilities = wsi_wl_surface_get_capabilities; + surface->base.get_formats = wsi_wl_surface_get_formats; + surface->base.get_present_modes = wsi_wl_surface_get_present_modes; + surface->base.create_swapchain = wsi_wl_surface_create_swapchain; + + *pSurface = anv_wsi_surface_to_handle(&surface->base); return VK_SUCCESS; } @@ -425,8 +487,8 @@ struct wsi_wl_swapchain { }; static VkResult -wsi_wl_get_images(struct anv_swapchain *anv_chain, - uint32_t *pCount, VkImage *pSwapchainImages) +wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, + uint32_t *pCount, VkImage *pSwapchainImages) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; @@ -445,10 +507,10 @@ wsi_wl_get_images(struct anv_swapchain *anv_chain, } static VkResult -wsi_wl_acquire_next_image(struct anv_swapchain *anv_chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index) +wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; @@ -495,9 +557,9 @@ static const struct wl_callback_listener frame_listener = { }; static VkResult -wsi_wl_queue_present(struct anv_swapchain *anv_chain, - struct anv_queue *queue, - uint32_t image_index) +wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; @@ -528,11 +590,14 @@ wsi_wl_queue_present(struct anv_swapchain *anv_chain, } static void -wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) +wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) { VkDevice vk_device = anv_device_to_handle(chain->base.device); - anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), NULL); - anv_DestroyImage(vk_device, anv_image_to_handle(image->image), NULL); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), + pAllocator); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image), + pAllocator); } static void @@ -550,7 +615,8 @@ static const struct wl_buffer_listener buffer_listener = { }; static VkResult -wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) +wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) { VkDevice vk_device = anv_device_to_handle(chain->base.device); VkResult result; @@ -579,7 +645,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }}, - NULL, + pAllocator, &vk_image); if (result != VK_SUCCESS) @@ -597,7 +663,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) .allocationSize = image->image->size, .memoryTypeIndex = 0, }, - NULL, + pAllocator, &vk_memory); if (result != VK_SUCCESS) @@ -644,46 +710,42 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image) return VK_SUCCESS; fail_mem: - anv_FreeMemory(vk_device, vk_memory, NULL); + anv_FreeMemory(vk_device, vk_memory, pAllocator); fail_image: - anv_DestroyImage(vk_device, vk_image, NULL); + anv_DestroyImage(vk_device, vk_image, pAllocator); return result; } static VkResult -wsi_wl_destroy_swapchain(struct anv_swapchain *anv_chain) +wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; for (uint32_t i = 0; i < chain->image_count; i++) { if (chain->images[i].buffer) - wsi_wl_image_finish(chain, &chain->images[i]); + wsi_wl_image_finish(chain, &chain->images[i], pAllocator); } - anv_free(&chain->base.device->alloc, chain); + anv_free2(&chain->base.device->alloc, pAllocator, chain); return VK_SUCCESS; } static VkResult -wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSwapchainCreateInfoKHR *pCreateInfo, - struct anv_swapchain **swapchain_out) +wsi_wl_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) { - struct wsi_wayland *wsi = (struct wsi_wayland *)impl; + struct wsi_wl_surface *surface = (struct wsi_wl_surface *)wsi_surface; struct wsi_wl_swapchain *chain; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - assert(pCreateInfo->pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowKHR *vk_window = - (VkSurfaceDescriptionWindowKHR *)pCreateInfo->pSurfaceDescription; - assert(vk_window->platform == VK_PLATFORM_WAYLAND_KHR); - int num_images = pCreateInfo->minImageCount; assert(num_images >= MIN_NUM_IMAGES); @@ -698,18 +760,18 @@ wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, num_images = MAX2(num_images, 4); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_alloc(&device->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (chain == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); chain->base.device = device; - chain->base.destroy = wsi_wl_destroy_swapchain; - chain->base.get_images = wsi_wl_get_images; - chain->base.acquire_next_image = wsi_wl_acquire_next_image; - chain->base.queue_present = wsi_wl_queue_present; + chain->base.destroy = wsi_wl_swapchain_destroy; + chain->base.get_images = wsi_wl_swapchain_get_images; + chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; + chain->base.queue_present = wsi_wl_swapchain_queue_present; - chain->surface = vk_window->pPlatformWindow; + chain->surface = surface->surface; chain->extent = pCreateInfo->imageExtent; chain->vk_format = pCreateInfo->imageFormat; chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); @@ -726,7 +788,7 @@ wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, chain->images[i].buffer = NULL; chain->queue = NULL; - chain->display = wsi_wl_get_display(wsi, vk_window->pPlatformHandle); + chain->display = wsi_wl_get_display(device->instance, surface->display); if (!chain->display) goto fail; @@ -735,7 +797,7 @@ wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, goto fail; for (uint32_t i = 0; i < chain->image_count; i++) { - result = wsi_wl_image_init(chain, &chain->images[i]); + result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); if (result != VK_SUCCESS) goto fail; chain->images[i].busy = false; @@ -746,7 +808,7 @@ wsi_wl_create_swapchain(struct anv_wsi_implementation *impl, return VK_SUCCESS; fail: - wsi_wl_destroy_swapchain(&chain->base); + wsi_wl_swapchain_destroy(&chain->base, pAllocator); return result; } @@ -762,12 +824,6 @@ anv_wl_init_wsi(struct anv_instance *instance) if (!wsi) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - wsi->base.get_window_supported = wsi_wl_get_window_supported; - wsi->base.get_surface_properties = wsi_wl_get_surface_properties; - wsi->base.get_surface_formats = wsi_wl_get_surface_formats; - wsi->base.get_surface_present_modes = wsi_wl_get_surface_present_modes; - wsi->base.create_swapchain = wsi_wl_create_swapchain; - wsi->instance = instance; int ret = pthread_mutex_init(&wsi->mutex, NULL); @@ -789,7 +845,7 @@ anv_wl_init_wsi(struct anv_instance *instance) goto fail_mutex; } - instance->wsi_impl[VK_PLATFORM_WAYLAND_KHR] = &wsi->base; + instance->wayland_wsi = wsi; return VK_SUCCESS; @@ -805,8 +861,7 @@ fail_alloc: void anv_wl_finish_wsi(struct anv_instance *instance) { - struct wsi_wayland *wsi = - (struct wsi_wayland *)instance->wsi_impl[VK_PLATFORM_WAYLAND_KHR]; + struct wsi_wayland *wsi = instance->wayland_wsi; _mesa_hash_table_destroy(wsi->displays, NULL); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index f45442d522a..d327f4316d3 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -27,6 +27,13 @@ #include "anv_wsi.h" +struct x11_surface { + struct anv_wsi_surface base; + + xcb_connection_t *connection; + xcb_window_t window; +}; + static const VkSurfaceFormatKHR formats[] = { { .format = VK_FORMAT_B8G8R8A8_UNORM, }, }; @@ -35,53 +42,52 @@ static const VkPresentModeKHR present_modes[] = { VK_PRESENT_MODE_MAILBOX_KHR, }; -static VkResult -x11_get_window_supported(struct anv_wsi_implementation *impl, - struct anv_physical_device *physical_device, - const VkSurfaceDescriptionWindowKHR *window, - VkBool32 *pSupported) +VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id) { - *pSupported = true; - stub_return(VK_SUCCESS); + anv_finishme("Check that we actually have DRI3"); + stub_return(true); } static VkResult -x11_get_surface_properties(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *vk_window, - VkSurfacePropertiesKHR *props) +x11_surface_get_capabilities(struct anv_wsi_surface *wsi_surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR *caps) { - VkPlatformHandleXcbKHR *vk_xcb_handle = vk_window->pPlatformHandle; - xcb_connection_t *conn = vk_xcb_handle->connection; - xcb_window_t win = *(xcb_window_t *)vk_window->pPlatformWindow; + struct x11_surface *surface = (struct x11_surface *)wsi_surface; - xcb_get_geometry_cookie_t cookie = xcb_get_geometry(conn, win); + xcb_get_geometry_cookie_t cookie = xcb_get_geometry(surface->connection, + surface->window); xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(conn, cookie, - &err); + xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(surface->connection, + cookie, &err); if (geom) { - free(err); VkExtent2D extent = { geom->width, geom->height }; - props->currentExtent = extent; - props->minImageExtent = extent; - props->maxImageExtent = extent; + caps->currentExtent = extent; + caps->minImageExtent = extent; + caps->maxImageExtent = extent; } else { /* This can happen if the client didn't wait for the configure event * to come back from the compositor. In that case, we don't know the * size of the window so we just return valid "I don't know" stuff. */ - free(geom); - props->currentExtent = (VkExtent2D) { -1, -1 }; - props->minImageExtent = (VkExtent2D) { 1, 1 }; - props->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; } - - props->minImageCount = 2; - props->maxImageCount = 4; - props->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; - props->currentTransform = VK_SURFACE_TRANSFORM_NONE_KHR; - props->maxImageArraySize = 1; - props->supportedUsageFlags = + free(err); + free(geom); + + caps->minImageCount = 2; + caps->maxImageCount = 4; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + caps->maxImageArrayLayers = 1; + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; + caps->supportedUsageFlags = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -89,37 +95,81 @@ x11_get_surface_properties(struct anv_wsi_implementation *impl, } static VkResult -x11_get_surface_formats(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *vk_window, - uint32_t *pCount, VkSurfaceFormatKHR *pSurfaceFormats) +x11_surface_get_formats(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) { if (pSurfaceFormats == NULL) { - *pCount = ARRAY_SIZE(formats); + *pSurfaceFormatCount = ARRAY_SIZE(formats); return VK_SUCCESS; } - assert(*pCount >= ARRAY_SIZE(formats)); - typed_memcpy(pSurfaceFormats, formats, *pCount); - *pCount = ARRAY_SIZE(formats); + assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); + *pSurfaceFormatCount = ARRAY_SIZE(formats); return VK_SUCCESS; } static VkResult -x11_get_surface_present_modes(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSurfaceDescriptionWindowKHR *vk_window, - uint32_t *pCount, VkPresentModeKHR *pPresentModes) +x11_surface_get_present_modes(struct anv_wsi_surface *surface, + struct anv_physical_device *device, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) { if (pPresentModes == NULL) { - *pCount = ARRAY_SIZE(present_modes); + *pPresentModeCount = ARRAY_SIZE(present_modes); return VK_SUCCESS; } - assert(*pCount >= ARRAY_SIZE(present_modes)); - typed_memcpy(pPresentModes, present_modes, *pCount); - *pCount = ARRAY_SIZE(present_modes); + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static void +x11_surface_destroy(struct anv_wsi_surface *surface, + const VkAllocationCallbacks *pAllocator) +{ + anv_free2(&surface->instance->alloc, pAllocator, surface); +} + +static VkResult +x11_surface_create_swapchain(struct anv_wsi_surface *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateXcbSurfaceKHR( + VkInstance _instance, + xcb_connection_t* connection, + xcb_window_t window, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + struct x11_surface *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->connection = connection; + surface->window = window; + + surface->base.instance = instance; + surface->base.destroy = x11_surface_destroy; + surface->base.get_capabilities = x11_surface_get_capabilities; + surface->base.get_formats = x11_surface_get_formats; + surface->base.get_present_modes = x11_surface_get_present_modes; + surface->base.create_swapchain = x11_surface_create_swapchain; + + *pSurface = anv_wsi_surface_to_handle(&surface->base); return VK_SUCCESS; } @@ -228,7 +278,8 @@ x11_queue_present(struct anv_swapchain *anv_chain, } static VkResult -x11_destroy_swapchain(struct anv_swapchain *anv_chain) +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) { struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; xcb_void_cookie_t cookie; @@ -251,41 +302,35 @@ x11_destroy_swapchain(struct anv_swapchain *anv_chain) } static VkResult -x11_create_swapchain(struct anv_wsi_implementation *impl, - struct anv_device *device, - const VkSwapchainCreateInfoKHR *pCreateInfo, - struct anv_swapchain **swapchain_out) +x11_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) { + struct x11_surface *surface = (struct x11_surface *)wsi_surface; struct x11_swapchain *chain; xcb_void_cookie_t cookie; VkResult result; - assert(pCreateInfo->pSurfaceDescription->sType == - VK_STRUCTURE_TYPE_SURFACE_DESCRIPTION_WINDOW_KHR); - VkSurfaceDescriptionWindowKHR *vk_window = - (VkSurfaceDescriptionWindowKHR *)pCreateInfo->pSurfaceDescription; - assert(vk_window->platform == VK_PLATFORM_XCB_KHR); - int num_images = pCreateInfo->minImageCount; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_alloc(&device->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (chain == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); chain->base.device = device; - chain->base.destroy = x11_destroy_swapchain; + chain->base.destroy = x11_swapchain_destroy; chain->base.get_images = x11_get_images; chain->base.acquire_next_image = x11_acquire_next_image; chain->base.queue_present = x11_queue_present; - VkPlatformHandleXcbKHR *vk_xcb_handle = vk_window->pPlatformHandle; - - chain->conn = (xcb_connection_t *) vk_xcb_handle->connection; - chain->window = *(xcb_window_t *)vk_window->pPlatformWindow; + chain->conn = surface->connection; + chain->window = surface->window; chain->extent = pCreateInfo->imageExtent; chain->image_count = num_images; chain->next_image = 0; @@ -406,26 +451,9 @@ x11_create_swapchain(struct anv_wsi_implementation *impl, VkResult anv_x11_init_wsi(struct anv_instance *instance) { - struct anv_wsi_implementation *impl; - - impl = anv_alloc(&instance->alloc, sizeof(*impl), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!impl) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - impl->get_window_supported = x11_get_window_supported; - impl->get_surface_properties = x11_get_surface_properties; - impl->get_surface_formats = x11_get_surface_formats; - impl->get_surface_present_modes = x11_get_surface_present_modes; - impl->create_swapchain = x11_create_swapchain; - - instance->wsi_impl[VK_PLATFORM_XCB_KHR] = impl; - return VK_SUCCESS; } void anv_x11_finish_wsi(struct anv_instance *instance) -{ - anv_free(&instance->alloc, instance->wsi_impl[VK_PLATFORM_XCB_KHR]); -} +{ } -- cgit v1.2.3 From bfeaf67391ced69c18f8abfc1ce6cede3e0ffb27 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Dec 2015 15:23:33 -0800 Subject: anv/device: Give a version of 0.210.1 in apiVersion --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 32f8dcf976f..c25ad34a613 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -505,7 +505,7 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(0, 210, 0), + .apiVersion = VK_MAKE_VERSION(0, 210, 1), .driverVersion = 1, .vendorID = 0x8086, .deviceID = pdevice->chipset_id, -- cgit v1.2.3 From 57941b61ab12a2c9f6cd97fd08fea5d1ae04414e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 2 Dec 2015 18:41:08 -0800 Subject: anv/image: Vulkan's depthPitch is in bytes, not rows Fix for VkGetImageSubresourceLayout. --- src/vulkan/anv_image.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index c831d756407..157357ea74d 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -417,7 +417,9 @@ anv_surface_get_subresource_layout(struct anv_image *image, layout->offset = surface->offset; layout->rowPitch = surface->stride; - layout->depthPitch = surface->qpitch; + + /* Anvil's qpitch is in units of rows. Vulkan's depthPitch is in bytes. */ + layout->depthPitch = surface->qpitch * surface->stride; /* FINISHME: We really shouldn't be doing this calculation here */ if (image->array_size > 1) -- cgit v1.2.3 From bda43a0f59e5f5877c10b2ce6727fba99d486d37 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 2 Dec 2015 09:14:48 -0800 Subject: isl: Rename legacy Y tiling to ISL_TILING_Y0 Rename legacy Y tiling from ISL_TILING_Y to ISL_TILING_Y0 in order to clearly distinguish it from Yf and Ys. Using ISL_TILING_Y to denote legacy Y tiling would lead to confusion with i965, because i965 uses I195_TILE_Y to denote *any* Y tiling. --- src/vulkan/anv_image.c | 4 ++-- src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 2 +- src/vulkan/isl.c | 4 ++-- src/vulkan/isl.h | 5 ++++- 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 157357ea74d..7abd0db1460 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -100,7 +100,7 @@ static const struct anv_tile_info { } anv_tile_info_table[] = { [ISL_TILING_LINEAR] = { 64 }, [ISL_TILING_X] = { 4096 }, - [ISL_TILING_Y] = { 4096 }, + [ISL_TILING_Y0] = { 4096 }, [ISL_TILING_Yf] = { 4096 }, [ISL_TILING_Ys] = { 4096 }, [ISL_TILING_W] = { 4096 }, @@ -124,7 +124,7 @@ anv_image_choose_tiling(const struct anv_image_create_info *anv_info) if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { return ISL_TILING_W; } else { - return ISL_TILING_Y; + return ISL_TILING_Y0; } default: assert(!"bad VKImageTiling"); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index a3cb95dbb52..5a626f75eeb 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -249,7 +249,7 @@ genX(image_view_init)(struct anv_image_view *iview, * Tiled Surface is False." */ .TiledSurface = surface->tiling != ISL_TILING_LINEAR, - .TileWalk = surface->tiling == ISL_TILING_Y ? + .TileWalk = surface->tiling == ISL_TILING_Y0 ? TILEWALK_YMAJOR : TILEWALK_XMAJOR, .VerticalLineStride = 0, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c1e0504a15b..f46611c629d 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -177,7 +177,7 @@ genX(image_view_init)(struct anv_image_view *iview, static const uint8_t isl_to_gen_tiling[] = { [ISL_TILING_LINEAR] = LINEAR, [ISL_TILING_X] = XMAJOR, - [ISL_TILING_Y] = YMAJOR, + [ISL_TILING_Y0] = YMAJOR, [ISL_TILING_Yf] = YMAJOR, [ISL_TILING_Ys] = YMAJOR, [ISL_TILING_W] = WMAJOR, diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c index ebfd2bfb104..65b696a1003 100644 --- a/src/vulkan/isl.c +++ b/src/vulkan/isl.c @@ -56,7 +56,7 @@ isl_tiling_get_extent(const struct isl_device *dev, static const struct isl_extent2d legacy_extents[] = { [ISL_TILING_LINEAR] = { 1, 1 }, [ISL_TILING_X] = { 512, 8 }, - [ISL_TILING_Y] = { 128, 32 }, + [ISL_TILING_Y0] = { 128, 32 }, [ISL_TILING_W] = { 128, 32 }, }; @@ -74,7 +74,7 @@ isl_tiling_get_extent(const struct isl_device *dev, switch (tiling) { case ISL_TILING_LINEAR: case ISL_TILING_X: - case ISL_TILING_Y: + case ISL_TILING_Y0: case ISL_TILING_W: *e = legacy_extents[tiling]; return; diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index fb0b6f4a75c..fd6b1c2b5ad 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -40,12 +40,15 @@ struct brw_device_info; /** * WARNING: These values differ from the hardware enum values, which are * unstable across hardware generations. + * + * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to + * clearly distinguish it from Yf and Ys. */ enum isl_tiling { ISL_TILING_LINEAR, ISL_TILING_W, ISL_TILING_X, - ISL_TILING_Y, /**< Legacy Y tiling */ + ISL_TILING_Y0, /**< Legacy Y tiling */ ISL_TILING_Yf, ISL_TILING_Ys, }; -- cgit v1.2.3 From afdadec77f57b42d0c7f1f44c5ab87b636d38010 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 13 Nov 2015 16:01:35 -0800 Subject: isl: Implement isl_surf_init() for gen4-gen9 This is a big code push. The patch is about 3000 lines. Function isl_surf_init() calculates the physical layout of a surface. The implementation is "complete" (but untested) for all 1D, 2D, 3D, and cube surfaces for gen4 through gen9, except: * gen9 1D surfaces * gen9 Ys multisampled surfaces * auxiliary surfaces (such as hiz, mcs, ccs) --- src/vulkan/Makefile.am | 13 +- src/vulkan/isl.c | 1030 ++++++++++++++++++++++++++++++++++++++++++++++-- src/vulkan/isl.h | 568 +++++++++++++++++++++++++- src/vulkan/isl_gen4.c | 74 ++++ src/vulkan/isl_gen4.h | 47 +++ src/vulkan/isl_gen6.c | 160 ++++++++ src/vulkan/isl_gen6.h | 47 +++ src/vulkan/isl_gen7.c | 392 ++++++++++++++++++ src/vulkan/isl_gen7.h | 52 +++ src/vulkan/isl_gen8.c | 229 +++++++++++ src/vulkan/isl_gen8.h | 47 +++ src/vulkan/isl_gen9.c | 184 +++++++++ src/vulkan/isl_gen9.h | 41 ++ src/vulkan/isl_priv.h | 141 +++++++ 14 files changed, 2966 insertions(+), 59 deletions(-) create mode 100644 src/vulkan/isl_gen4.c create mode 100644 src/vulkan/isl_gen4.h create mode 100644 src/vulkan/isl_gen6.c create mode 100644 src/vulkan/isl_gen6.h create mode 100644 src/vulkan/isl_gen7.c create mode 100644 src/vulkan/isl_gen7.h create mode 100644 src/vulkan/isl_gen8.c create mode 100644 src/vulkan/isl_gen8.h create mode 100644 src/vulkan/isl_gen9.c create mode 100644 src/vulkan/isl_gen9.h create mode 100644 src/vulkan/isl_priv.h (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 24391eafc0b..fd74393be95 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -92,7 +92,18 @@ VULKAN_SOURCES = \ anv_wsi.c \ anv_wsi_x11.c \ isl.c \ - isl_format_layout.c + isl_format_layout.c \ + isl_gen4.c \ + isl_gen4.h \ + isl_gen6.c \ + isl_gen6.h \ + isl_gen7.c \ + isl_gen7.h \ + isl_gen8.c \ + isl_gen8.h \ + isl_gen9.c \ + isl_gen9.h \ + $(NULL) BUILT_SOURCES = \ anv_entrypoints.h \ diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c index 65b696a1003..41b842dd79b 100644 --- a/src/vulkan/isl.c +++ b/src/vulkan/isl.c @@ -23,18 +23,25 @@ #include -#include "mesa/main/imports.h" - #include "isl.h" +#include "isl_gen4.h" +#include "isl_gen6.h" +#include "isl_gen7.h" +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" -/** - * Log base 2, rounding towards zero. - */ -static inline uint32_t -isl_log2u(uint32_t n) +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...) { - assert(n != 0); - return 31 - __builtin_clz(n); + va_list ap; + char buf[512]; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); } void @@ -42,50 +49,997 @@ isl_device_init(struct isl_device *dev, const struct brw_device_info *info) { dev->info = info; + dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; + + /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some + * device properties at buildtime. Verify that the macros with the device + * properties chosen during runtime. + */ + assert(ISL_DEV_GEN(dev) == dev->info->gen); + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil); + + /* Did we break hiz or stencil? */ + if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) + assert(info->has_hiz_and_separate_stencil); + if (info->must_use_separate_stencil) + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); +} + +bool +isl_format_has_sint_channel(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT || + fmtl->channels.b.type == ISL_SINT || + fmtl->channels.a.type == ISL_SINT || + fmtl->channels.l.type == ISL_SINT || + fmtl->channels.i.type == ISL_SINT || + fmtl->channels.p.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT; } /** - * The returned extent's units are (width=bytes, height=rows). + * @param[out] info is written only on success */ +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *tile_info) +{ + const uint32_t bs = format_block_size; + uint32_t width, height; + + assert(bs > 0); + + switch (tiling) { + case ISL_TILING_LINEAR: + width = 1; + height = 1; + break; + + case ISL_TILING_X: + width = 1 << 9; + height = 1 << 3; + break; + + case ISL_TILING_Y0: + width = 1 << 7; + height = 1 << 5; + break; + + case ISL_TILING_W: + /* XXX: Should W tile be same as Y? */ + width = 1 << 6; + height = 1 << 6; + break; + + case ISL_TILING_Yf: + case ISL_TILING_Ys: { + if (ISL_DEV_GEN(dev) < 9) + return false; + + if (!isl_is_pow2(bs)) + return false; + + bool is_Ys = tiling == ISL_TILING_Ys; + + width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); + height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); + break; + } + } /* end switch */ + + *tile_info = (struct isl_tile_info) { + .tiling = tiling, + .width = width, + .height = height, + .size = width * height, + }; + + return true; +} + void isl_tiling_get_extent(const struct isl_device *dev, enum isl_tiling tiling, - uint32_t cpp, + uint32_t format_block_size, struct isl_extent2d *e) { - static const struct isl_extent2d legacy_extents[] = { - [ISL_TILING_LINEAR] = { 1, 1 }, - [ISL_TILING_X] = { 512, 8 }, - [ISL_TILING_Y0] = { 128, 32 }, - [ISL_TILING_W] = { 128, 32 }, + struct isl_tile_info tile_info; + isl_tiling_get_info(dev, tiling, format_block_size, &tile_info); + *e = isl_extent2d(tile_info.width, tile_info.height); +} + +/** + * @param[out] tiling is set only on success + */ +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling) +{ + isl_tiling_flags_t tiling_flags = info->tiling_flags; + + if (ISL_DEV_GEN(dev) >= 7) { + gen7_filter_tiling(dev, info, &tiling_flags); + } else { + isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); + gen7_filter_tiling(dev, info, &tiling_flags); + } + + #define CHOOSE(__tiling) \ + do { \ + if (tiling_flags & (1u << (__tiling))) { \ + *tiling = (__tiling); \ + return true; \ + } \ + } while (0) + + /* Of the tiling modes remaining, choose the one that offers the best + * performance. + */ + CHOOSE(ISL_TILING_Ys); + CHOOSE(ISL_TILING_Yf); + CHOOSE(ISL_TILING_Y0); + CHOOSE(ISL_TILING_X); + CHOOSE(ISL_TILING_W); + CHOOSE(ISL_TILING_LINEAR); + + #undef CHOOSE + + /* No tiling mode accomodates the inputs. */ + return false; +} + +static bool +isl_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + if (ISL_DEV_GEN(dev) >= 8) { + return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 7) { + return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 6) { + return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else { + return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); + } +} + +static void +isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, + uint32_t *width, uint32_t *height) +{ + assert(isl_is_pow2(samples)); + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil surface + * or Multisampled Surface StorageFormat in SURFACE_STATE is + * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before + * proceeding: [...] + */ + *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); + *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); +} + +static enum isl_array_pitch_span +isl_choose_array_pitch_span(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + const struct isl_extent4d *phys_level0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); + /* fallthrough */ + + case ISL_DIM_LAYOUT_GEN4_2D: + if (ISL_DEV_GEN(dev) >= 8) { + /* QPitch becomes programmable in Broadwell. So choose the + * most compact QPitch possible in order to conserve memory. + * + * From the Broadwell PRM >> Volume 2d: Command Reference: Structures + * >> RENDER_SURFACE_STATE Surface QPitch (p325): + * + * - Software must ensure that this field is set to a value + * sufficiently large such that the array slices in the surface + * do not overlap. Refer to the Memory Data Formats section for + * information on how surfaces are stored in memory. + * + * - This field specifies the distance in rows between array + * slices. It is used only in the following cases: + * + * - Surface Array is enabled OR + * - Number of Mulitsamples is not NUMSAMPLES_1 and + * Multisampled Surface Storage Format set to MSFMT_MSS OR + * - Surface Type is SURFTYPE_CUBE + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else if (ISL_DEV_GEN(dev) >= 7) { + /* Note that Ivybridge introduces + * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the + * driver more control over the QPitch. + */ + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) { + /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> + * Section 6.18.4.7: Surface Arrays (p112): + * + * If Surface Array Spacing is set to ARYSPC_FULL (note that + * the depth buffer and stencil buffer have an implied value of + * ARYSPC_FULL): + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (info->levels == 1) { + /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing + * to ARYSPC_LOD0. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, thus + * the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else { + if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, + * thus the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } + + case ISL_DIM_LAYOUT_GEN4_3D: + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + unreachable("bad isl_dim_layout"); + return ISL_ARRAY_PITCH_SPAN_FULL; +} + +static void +isl_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el) +{ + if (ISL_DEV_GEN(dev) >= 9) { + gen9_choose_lod_alignment_el(dev, info, tiling, msaa_layout, + lod_align_el); + } else if (ISL_DEV_GEN(dev) >= 8) { + gen8_choose_lod_alignment_el(dev, info, tiling, msaa_layout, + lod_align_el); + } else if (ISL_DEV_GEN(dev) >= 7) { + gen7_choose_lod_alignment_el(dev, info, tiling, msaa_layout, + lod_align_el); + } else if (ISL_DEV_GEN(dev) >= 6) { + gen6_choose_lod_alignment_el(dev, info, tiling, msaa_layout, + lod_align_el); + } else { + gen4_choose_lod_alignment_el(dev, info, tiling, msaa_layout, + lod_align_el); + } +} + +static enum isl_dim_layout +isl_surf_choose_dim_layout(const struct isl_device *dev, + enum isl_surf_dim logical_dim) +{ + if (ISL_DEV_GEN(dev) >= 9) { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + return ISL_DIM_LAYOUT_GEN9_1D; + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_2D; + } + } else { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + case ISL_SURF_DIM_2D: + return ISL_DIM_LAYOUT_GEN4_2D; + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_3D; + } + } + + unreachable("bad isl_surf_dim"); + return ISL_DIM_LAYOUT_GEN4_2D; +} + +/** + * Calculate the physical extent of the surface's first level, in units of + * surface samples. + */ +static void +isl_calc_phys_level0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent4d *phys_level0_sa) +{ + if (isl_format_is_yuv(info->format)) + isl_finishme("%s:%s: YUV format", __FILE__, __func__); + + switch (info->dim) { + case ISL_SURF_DIM_1D: + assert(info->height == 1); + assert(info->depth == 1); + assert(info->samples == 1); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN4_3D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); + /* fallthrough */ + + case ISL_DIM_LAYOUT_GEN4_2D: + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = 1, + .d = 1, + .a = info->array_len, + }; + break; + } + break; + + case ISL_SURF_DIM_2D: + assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); + + if (tiling == ISL_TILING_Ys && info->samples > 1) + isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + assert(info->depth == 1); + assert(info->samples == 1); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->array_len, + }; + break; + + case ISL_MSAA_LAYOUT_ARRAY: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->samples, + }; + break; + + case ISL_MSAA_LAYOUT_INTERLEAVED: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = 1, + }; + + isl_msaa_interleaved_scale_px_to_sa(info->samples, + &phys_level0_sa->w, + &phys_level0_sa->h); + break; + } + break; + + case ISL_SURF_DIM_3D: + assert(info->array_len == 1); + assert(info->samples == 1); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN4_2D: + assert(ISL_DEV_GEN(dev) >= 9); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->depth, + }; + break; + + case ISL_DIM_LAYOUT_GEN4_3D: + assert(ISL_DEV_GEN(dev) < 9); + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = info->depth, + .a = 1, + }; + break; + } + break; + } +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_2d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *lod_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + assert(phys_level0_sa->depth == 1); + + uint32_t slice_top_w = 0; + uint32_t slice_bottom_w = 0; + uint32_t slice_left_h = 0; + uint32_t slice_right_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t H = isl_minify(H0, l); + + if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil + * surface or Multisampled Surface StorageFormat in + * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be + * adjusted as follows before proceeding: [...] + */ + isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); + } + + uint32_t w = isl_align_npot(W, lod_align_sa->w); + uint32_t h = isl_align_npot(H, lod_align_sa->h); + + if (l == 0) { + slice_top_w = w; + slice_left_h = h; + slice_right_h = h; + } else if (l == 1) { + slice_bottom_w = w; + slice_left_h += h; + } else if (l == 2) { + slice_bottom_w += w; + } else { + slice_right_h += h; + } + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = MAX(slice_top_w, slice_bottom_w), + .h = MAX(slice_left_h, slice_right_h), }; +} - static const struct isl_extent2d yf_extents[] = { - /*cpp*/ - /* 1*/ [0] = { 64, 64 }, - /* 2*/ [1] = { 128, 32 }, - /* 4*/ [2] = { 128, 32 }, - /* 8*/ [3] = { 256, 16 }, - /*16*/ [4] = { 256, 16 }, +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_3d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *lod_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + assert(info->samples == 1); + assert(phys_level0_sa->array_len == 1); + + uint32_t slice_w = 0; + uint32_t slice_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + uint32_t D0 = phys_level0_sa->d; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t level_w = isl_align_npot(isl_minify(W0, l), lod_align_sa->w); + uint32_t level_h = isl_align_npot(isl_minify(H0, l), lod_align_sa->h); + uint32_t level_d = isl_align_npot(isl_minify(D0, l), lod_align_sa->d); + + uint32_t max_layers_horiz = MIN(level_d, 1u << l); + uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + slice_w = MAX(slice_w, level_w * max_layers_horiz); + slice_h += level_h * max_layers_vert; + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = slice_w, + .h = slice_h, }; +} + +/** + * Calculate the physical extent of the surface's first array slice, in units + * of surface samples. The result is aligned to \a lod_align_sa. + */ +static void +isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *lod_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", + __FILE__, __func__); + /*fallthrough*/ + case ISL_DIM_LAYOUT_GEN4_2D: + isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, + lod_align_sa, phys_level0_sa, + phys_slice0_sa); + return; + case ISL_DIM_LAYOUT_GEN4_3D: + isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, lod_align_sa, + phys_level0_sa, phys_slice0_sa); + return; + } +} + +/** + * Calculate the pitch between physical array slices, in units of rows of + * surface samples. The result is aligned to \a lod_align_sa. + */ +static uint32_t +isl_calc_array_pitch_sa_rows(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_array_pitch_span array_pitch_span, + const struct isl_extent3d *lod_align_sa, + const struct isl_extent4d *phys_level0_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - assert(cpp > 0); + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", + __FILE__, __func__); + /*fallthrough*/ - switch (tiling) { + case ISL_DIM_LAYOUT_GEN4_2D: + switch (array_pitch_span) { + case ISL_ARRAY_PITCH_SPAN_COMPACT: + return isl_align_npot(phys_slice0_sa->h, lod_align_sa->h); + case ISL_ARRAY_PITCH_SPAN_FULL: { + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: + * Memory Views >> Common Surface Formats >> Surface Layout >> 2D + * Surfaces >> Surface Arrays. + */ + uint32_t H0_sa = phys_level0_sa->h; + uint32_t H1_sa = isl_minify(H0_sa, 1); + + uint32_t h0_sa = isl_align_npot(H0_sa, lod_align_sa->h); + uint32_t h1_sa = isl_align_npot(H1_sa, lod_align_sa->h); + + uint32_t m; + if (ISL_DEV_GEN(dev) >= 7) { + /* The QPitch equation changed slightly in Ivybridge. */ + m = 12; + } else { + m = 11; + } + + uint32_t pitch_sa_rows = h0_sa + h1_sa + (m * lod_align_sa->h); + + if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && + (info->height % 4 == 1)) { + /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than + * the value calculated in the equation above , for every + * other odd Surface Height starting from 1 i.e. 1,5,9,13. + * + * XXX(chadv): Is the errata natural corollary of the physical + * layout of interleaved samples? + */ + pitch_sa_rows += 4; + } + + pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); + + return pitch_sa_rows; + } /* end case */ + break; + } + break; + + case ISL_DIM_LAYOUT_GEN4_3D: + assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); + return isl_align_npot(phys_slice0_sa->h, lod_align_sa->h); + } + + unreachable("bad isl_dim_layout"); + return 0; +} + +/** + * Calculate the pitch of each surface row, in bytes. + */ +static uint32_t +isl_calc_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *lod_align_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t row_pitch = info->min_pitch; + + /* First, align the surface to a cache line boundary, as the PRM explains + * below. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In order + * to avoid these GTT errors, “padding” at the bottom of the surface is + * sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * The sampling engine accesses texels outside of the surface if they + * are contained in the same cache line as texels that are within the + * surface. These texels will not participate in any calculation + * performed by the sampling engine and will not affect the result of + * any sampling engine operation, however if these texels lie outside of + * defined pages in the GTT, a GTT error will result when the cache line + * is accessed. In order to avoid these GTT errors, “padding” at the + * bottom and right side of a sampling engine surface is sometimes + * necessary. + * + * It is possible that a cache line will straddle a page boundary if the + * base address or pitch is not aligned. All pages included in the cache + * lines that are part of the surface must map to valid GTT entries to + * avoid errors. To determine the necessary padding on the bottom and + * right side of the surface, refer to the table in Alignment Unit Size + * section for the i and j parameters for the surface format in use. The + * surface must then be extended to the next multiple of the alignment + * unit size in each dimension, and all texels contained in this + * extended surface must have valid GTT entries. + * + * For example, suppose the surface size is 15 texels by 10 texels and + * the alignment parameters are i=4 and j=2. In this case, the extended + * surface would be 16 by 10. Note that these calculations are done in + * texels, and must be converted to bytes based on the surface format + * being used to determine whether additional pages need to be defined. + */ + row_pitch = MAX(row_pitch, + fmtl->bs * isl_align_div_npot(phys_slice0_sa->w, fmtl->bw)); + + switch (tile_info->tiling) { case ISL_TILING_LINEAR: - case ISL_TILING_X: - case ISL_TILING_Y0: - case ISL_TILING_W: - *e = legacy_extents[tiling]; - return; - case ISL_TILING_Yf: - case ISL_TILING_Ys: - assert(_mesa_is_pow_two(cpp)); - *e = yf_extents[isl_log2u(cpp)]; - if (tiling == ISL_TILING_Ys) { - e->width *= 4; - e->height *= 4; + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For linear render target surfaces and surfaces accessed with the + * typed data port messages, the pitch must be a multiple of the + * element size for non-YUV surface formats. Pitch must be + * a multiple of 2 * element size for YUV surface formats. + * + * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we + * ignore because isl doesn't do buffers.] + * + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + if (isl_format_is_yuv(info->format)) { + row_pitch = isl_align(row_pitch, fmtl->bs); + } else { + row_pitch = isl_align(row_pitch, 2 * fmtl->bs); + } } - return; + break; + default: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For tiled surfaces, the pitch must be a multiple of the tile + * width. + */ + row_pitch = isl_align(row_pitch, tile_info->width); + break; } + + return row_pitch; +} + +/** + * Calculate the surface's total height, including padding, in units of + * surface elements. + */ +static uint32_t +isl_calc_total_height_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + uint32_t phys_array_len, + uint32_t row_pitch, + uint32_t array_pitch_el_rows) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t total_h_el = phys_array_len * array_pitch_el_rows; + uint32_t pad_bytes = 0; + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In + * order to avoid these GTT errors, “padding” at the bottom of the + * surface is sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * ... Lots of padding requirements, all listed separately below. + */ + + /* We can safely ignore the first padding requirement, quoted below, + * because isl doesn't do buffers. + * + * - [pre-BDW] For buffers, which have no inherent “height,” padding + * requirements are different. A buffer must be padded to the next + * multiple of 256 array elements, with an additional 16 bytes added + * beyond that to account for the L1 cache line. + */ + + /* + * - For compressed textures [...], padding at the bottom of the surface + * is to an even compressed row. + */ + if (isl_format_is_compressed(info->format)) + total_h_el = isl_align(total_h_el, 2); + + /* + * - For cube surfaces, an additional two rows of padding are required + * at the bottom of the surface. + */ + if (info->usage & ISL_SURF_USAGE_CUBE_BIT) + total_h_el += 2; + + /* + * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, + * additional padding is required. These surfaces require an extra row + * plus 16 bytes of padding at the bottom in addition to the general + * padding requirements. + */ + if (isl_format_is_yuv(info->format) && + (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) { + total_h_el += 1; + pad_bytes += 16; + } + + /* + * - For linear surfaces, additional padding of 64 bytes is required at + * the bottom of the surface. This is in addition to the padding + * required above. + */ + if (tile_info->tiling == ISL_TILING_LINEAR) + pad_bytes += 64; + + /* The below text weakens, not strengthens, the padding requirements for + * linear surfaces. Therefore we can safely ignore it. + * + * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, + * non-MSAA, non-mip-mapped surfaces in linear memory, the only + * padding requirement is to the next aligned 64-byte boundary beyond + * the end of the surface. The rest of the padding requirements + * documented above do not apply to these surfaces. + */ + + /* + * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and + * height % 4 != 0, the surface must be padded with + * 4-(height % 4)*Surface Pitch # of bytes. + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { + total_h_el = isl_align(total_h_el, 4); + } + + /* + * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded + * to 4 times the Surface Pitch # of bytes + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + info->dim == ISL_SURF_DIM_1D) { + total_h_el += 4; + } + + /* Be sloppy. Align any leftover padding to a row boundary. */ + total_h_el += isl_align_div_npot(pad_bytes, row_pitch); + + return total_h_el; +} + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + const struct isl_extent4d logical_level0_px = { + .w = info->width, + .h = info->height, + .d = info->depth, + .a = info->array_len, + }; + + enum isl_dim_layout dim_layout = + isl_surf_choose_dim_layout(dev, info->dim); + + enum isl_tiling tiling; + if (!isl_surf_choose_tiling(dev, info, &tiling)) + return false; + + struct isl_tile_info tile_info; + if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info)) + return false; + + enum isl_msaa_layout msaa_layout; + if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) + return false; + + struct isl_extent3d lod_align_el; + isl_choose_lod_alignment_el(dev, info, tiling, msaa_layout, &lod_align_el); + + struct isl_extent3d lod_align_sa = + isl_extent3d_el_to_sa(info->format, lod_align_el); + + struct isl_extent4d phys_level0_sa; + isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, + &phys_level0_sa); + + enum isl_array_pitch_span array_pitch_span = + isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); + + struct isl_extent2d phys_slice0_sa; + isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, + &lod_align_sa, &phys_level0_sa, + &phys_slice0_sa); + assert(phys_slice0_sa.w % fmtl->bw == 0); + assert(phys_slice0_sa.h % fmtl->bh == 0); + + const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, + &lod_align_sa, + &phys_slice0_sa); + + const uint32_t array_pitch_sa_rows = + isl_calc_array_pitch_sa_rows(dev, info, dim_layout, array_pitch_span, + &lod_align_sa, &phys_level0_sa, + &phys_slice0_sa); + assert(array_pitch_sa_rows % fmtl->bh == 0); + + const uint32_t array_pitch_el_rows = array_pitch_sa_rows / fmtl->bh; + + const uint32_t total_h_el = + isl_calc_total_height_el(dev, info, &tile_info, + phys_level0_sa.array_len, row_pitch, + array_pitch_el_rows); + + const uint32_t total_h_sa = total_h_el * fmtl->bh; + const uint32_t size = row_pitch * total_h_sa; + + /* Alignment of surface base address, in bytes */ + uint32_t base_alignment = info->min_alignment; + base_alignment = isl_align(base_alignment, tile_info.size); + + *surf = (struct isl_surf) { + .dim = info->dim, + .dim_layout = dim_layout, + .msaa_layout = msaa_layout, + .tiling = tiling, + .format = info->format, + + .levels = info->levels, + .samples = info->samples, + + .lod_alignment_el = lod_align_el, + .logical_level0_px = logical_level0_px, + .phys_level0_sa = phys_level0_sa, + + .size = size, + .alignment = base_alignment, + .row_pitch = row_pitch, + .array_pitch_el_rows = array_pitch_el_rows, + .array_pitch_span = array_pitch_span, + + .usage = info->usage, + }; + + return true; } diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index fd6b1c2b5ad..ef42e2d69df 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -24,34 +24,70 @@ /** * @file * @brief Intel Surface Layout + * + * Header Layout + * ============= + * + * The header is ordered as: + * - forward declarations + * - macros that may be overridden at compile-time for specific gens + * - enums and constants + * - structs and unions + * - functions + * + * + * Surface Units + * ============= + * + * Some symbol names have a unit suffix. + * + * - px: logical pixels + * - sa: physical surface samples + * - el: physical surface elements + * - sa_rows: rows of physical surface samples + * - el_rows: rows of physical surface elements + * + * The Broadwell PRM [1] defines a surface element as follows: + * + * An element is defined as a pixel in uncompresed surface formats, and as + * a compression block in compressed surface formats. For + * MSFMT_DEPTH_STENCIL type multisampled surfaces, an element is a sample. + * + * [1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Vertical Alignment (p325) */ #pragma once +#include #include #include +#include "util/macros.h" + #ifdef __cplusplus extern "C" { #endif struct brw_device_info; +#ifndef ISL_DEV_GEN /** - * WARNING: These values differ from the hardware enum values, which are - * unstable across hardware generations. + * @brief Get the hardware generation of isl_device. * - * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to - * clearly distinguish it from Yf and Ys. + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_GEN(dev)=9 ...`. */ -enum isl_tiling { - ISL_TILING_LINEAR, - ISL_TILING_W, - ISL_TILING_X, - ISL_TILING_Y0, /**< Legacy Y tiling */ - ISL_TILING_Yf, - ISL_TILING_Ys, -}; +#define ISL_DEV_GEN(__dev) ((__dev)->info->gen) +#endif + +#ifndef ISL_DEV_USE_SEPARATE_STENCIL +/** + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_USE_SEPARATE_STENCIL(dev)=1 ...`. + */ +#define ISL_DEV_USE_SEPARATE_STENCIL(__dev) ((__dev)->use_separate_stencil) +#endif /** * Hardware enumeration SURFACE_FORMAT. @@ -286,6 +322,9 @@ enum isl_format { ISL_FORMAT_UNSUPPORTED = UINT16_MAX, }; +/** + * Numerical base type for channels of isl_format. + */ enum isl_base_type { ISL_VOID, ISL_RAW, @@ -301,6 +340,9 @@ enum isl_base_type { ISL_SSCALED, }; +/** + * Colorspace of isl_format. + */ enum isl_colorspace { ISL_COLORSPACE_NONE = 0, ISL_COLORSPACE_LINEAR, @@ -309,7 +351,7 @@ enum isl_colorspace { }; /** - * Texture compression mode + * Texture compression mode of isl_format. */ enum isl_txc { ISL_TXC_NONE = 0, @@ -324,19 +366,203 @@ enum isl_txc { ISL_TXC_ETC2, }; +/** + * @brief Hardware tile mode + * + * WARNING: These values differ from the hardware enum values, which are + * unstable across hardware generations. + * + * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to + * clearly distinguish it from Yf and Ys. + */ +enum isl_tiling { + ISL_TILING_LINEAR = 0, + ISL_TILING_W, + ISL_TILING_X, + ISL_TILING_Y0, /**< Legacy Y tiling */ + ISL_TILING_Yf, + ISL_TILING_Ys, +}; + +/** + * @defgroup Tiling Flags + * @{ + */ +typedef uint32_t isl_tiling_flags_t; +#define ISL_TILING_LINEAR_BIT (1u << ISL_TILING_LINEAR) +#define ISL_TILING_W_BIT (1u << ISL_TILING_W) +#define ISL_TILING_X_BIT (1u << ISL_TILING_X) +#define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) +#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) +#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) +#define ISL_TILING_ANY_MASK (~0u) +#define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) + +/** Any Y tiling, including legacy Y tiling. */ +#define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \ + ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) + +/** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */ +#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) +/** @} */ + +/** + * @brief Logical dimension of surface. + * + * Note: There is no dimension for cube map surfaces. ISL interprets cube maps + * as 2D array surfaces. + */ +enum isl_surf_dim { + ISL_SURF_DIM_1D, + ISL_SURF_DIM_2D, + ISL_SURF_DIM_3D, +}; + +/** + * @brief Physical layout of the surface's dimensions. + */ +enum isl_dim_layout { + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.3: 2D Surfaces. + * + * On many gens, 1D surfaces share the same layout as 2D surfaces. From + * the G35 PRM >> Volume 1: Graphics Core >> Section 6.17.2: 1D Surfaces: + * + * One-dimensional surfaces are identical to 2D surfaces with height of + * one. + */ + ISL_DIM_LAYOUT_GEN4_2D, + + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.5: 3D Surfaces. + */ + ISL_DIM_LAYOUT_GEN4_3D, + + /** + * For details, see the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> » 1D Surfaces. + */ + ISL_DIM_LAYOUT_GEN9_1D, +}; + +/* TODO(chadv): Explain */ +enum isl_array_pitch_span { + ISL_ARRAY_PITCH_SPAN_FULL, + ISL_ARRAY_PITCH_SPAN_COMPACT, +}; + +/** + * @defgroup Surface Usage + * @{ + */ +typedef uint64_t isl_surf_usage_flags_t; +#define ISL_SURF_USAGE_RENDER_TARGET_BIT (1u << 0) +#define ISL_SURF_USAGE_DEPTH_BIT (1u << 1) +#define ISL_SURF_USAGE_STENCIL_BIT (1u << 2) +#define ISL_SURF_USAGE_TEXTURE_BIT (1u << 3) +#define ISL_SURF_USAGE_CUBE_BIT (1u << 4) +#define ISL_SURF_USAGE_DISABLE_AUX_BIT (1u << 5) +#define ISL_SURF_USAGE_DISPLAY_BIT (1u << 6) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT (1u << 7) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT (1u << 8) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) +#define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) +#define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) +/** @} */ + +/** + * @brief Multisample Format + */ +enum isl_msaa_layout { + /** + * @brief Suface is single-sampled. + */ + ISL_MSAA_LAYOUT_NONE, + + /** + * @brief [SNB+] Interleaved Multisample Format + * + * In this format, multiple samples are interleaved into each cacheline. + * In other words, the sample index is swizzled into the low 6 bits of the + * surface's virtual address space. + * + * For example, suppose the surface is legacy Y tiled, is 4x multisampled, + * and its pixel format is 32bpp. Then the first cacheline is arranged + * thus: + * + * (0,0,0) (0,1,0) (0,0,1) (1,0,1) + * (1,0,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * The hardware docs refer to this format with multiple terms. In + * Sandybridge, this is the only multisample format; so no term is used. + * The Ivybridge docs refer to surfaces in this format as IMS (Interleaved + * Multisample Surface). Later hardware docs additionally refer to this + * format as MSFMT_DEPTH_STENCIL (because the format is deprecated for + * color surfaces). + * + * See the Sandybridge PRM, Volume 4, Part 1, Section 2.7 "Multisampled + * Surface Behavior". + * + * See the Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.1 "Interleaved + * Multisampled Surfaces". + */ + ISL_MSAA_LAYOUT_INTERLEAVED, + + /** + * @brief [IVB+] Array Multisample Format + * + * In this format, the surface's physical layout resembles that of a + * 2D array surface. + * + * Suppose the multisample surface's logical extent is (w, h) and its + * sample count is N. Then surface's physical extent is the same as + * a singlesample 2D surface whose logical extent is (w, h) and array + * length is N. Array slice `i` contains the pixel values for sample + * index `i`. + * + * The Ivybridge docs refer to surfaces in this format as UMS + * (Uncompressed Multsample Layout) and CMS (Compressed Multisample + * Surface). The Broadwell docs additionally refer to this format as + * MSFMT_MSS (MSS=Multisample Surface Storage). + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Uncompressed + * Multisample Surfaces". + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed + * Multisample Surfaces". + */ + ISL_MSAA_LAYOUT_ARRAY, +}; + + struct isl_device { const struct brw_device_info *info; + bool use_separate_stencil; }; struct isl_extent2d { - uint32_t width; - uint32_t height; + union { uint32_t w, width; }; + union { uint32_t h, height; }; }; struct isl_extent3d { - uint32_t width; - uint32_t height; - uint32_t depth; + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; +}; + +struct isl_extent4d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; + union { uint32_t a, array_len; }; }; struct isl_channel_layout { @@ -367,17 +593,319 @@ struct isl_format_layout { enum isl_txc txc; }; +struct isl_tile_info { + enum isl_tiling tiling; + uint32_t width; /**< in bytes */ + uint32_t height; /**< in rows of memory */ + uint32_t size; /**< in bytes */ +}; + +/** + * @brief Input to surface initialization + * + * @invariant width >= 1 + * @invariant height >= 1 + * @invariant depth >= 1 + * @invariant levels >= 1 + * @invariant samples >= 1 + * @invariant array_len >= 1 + * + * @invariant if 1D then height == 1 and depth == 1 and samples == 1 + * @invariant if 2D then depth == 1 + * @invariant if 3D then array_len == 1 and samples == 1 + */ +struct isl_surf_init_info { + enum isl_surf_dim dim; + enum isl_format format; + + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t levels; + uint32_t array_len; + uint32_t samples; + + /** Lower bound for isl_surf::alignment, in bytes. */ + uint32_t min_alignment; + + /** Lower bound for isl_surf::pitch, in bytes. */ + uint32_t min_pitch; + + isl_surf_usage_flags_t usage; + + /** Flags that alter how ISL selects isl_surf::tiling. */ + isl_tiling_flags_t tiling_flags; +}; + +struct isl_surf { + enum isl_surf_dim dim; + enum isl_dim_layout dim_layout; + enum isl_msaa_layout msaa_layout; + enum isl_tiling tiling; + enum isl_format format; + + /** + * Alignment of the upper-left sample of each LOD, in units of surface + * elements. + */ + struct isl_extent3d lod_alignment_el; + + /** + * Logical extent of the surface's base level, in units of pixels. This is + * identical to the extent defined in isl_surf_init_info. + */ + struct isl_extent4d logical_level0_px; + + /** + * Physical extent of the surface's base level, in units of pixels. + * + * Consider isl_dim_layout as an operator that transforms a logical surface + * layout to a physical surface layout. Then + * + * logical_layout := (isl_surf::dim, isl_surf::logical_level0_px) + * isl_surf::phys_level0_sa := isl_surf::dim_layout * logical_layout + */ + struct isl_extent4d phys_level0_sa; + + uint32_t levels; + uint32_t samples; + + /** Total size of the surface, in bytes. */ + uint32_t size; + + /** Required alignment for the surface's base address. */ + uint32_t alignment; + + /** + * Pitch between vertically adjacent samples, in bytes. + */ + uint32_t row_pitch; + + /** + * Pitch between physical array slices, in rows of surface elements. + */ + uint32_t array_pitch_el_rows; + + enum isl_array_pitch_span array_pitch_span; + + /** Copy of isl_surf_init_info::usage. */ + isl_surf_usage_flags_t usage; +}; + +extern const struct isl_format_layout isl_format_layouts[]; + void isl_device_init(struct isl_device *dev, const struct brw_device_info *info); +static inline const struct isl_format_layout * ATTRIBUTE_CONST +isl_format_get_layout(enum isl_format fmt) +{ + return &isl_format_layouts[fmt]; +} + +bool +isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; + +static inline bool +isl_format_is_compressed(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->txc != ISL_TXC_NONE; +} + +static inline bool +isl_format_has_bc_compression(enum isl_format fmt) +{ + switch (isl_format_get_layout(fmt)->txc) { + case ISL_TXC_DXT1: + case ISL_TXC_DXT3: + case ISL_TXC_DXT5: + return true; + case ISL_TXC_NONE: + case ISL_TXC_FXT1: + case ISL_TXC_RGTC1: + case ISL_TXC_RGTC2: + case ISL_TXC_BPTC: + case ISL_TXC_ETC1: + case ISL_TXC_ETC2: + return false; + } + + unreachable("bad texture compression mode"); + return false; +} + +static inline bool +isl_format_is_yuv(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->colorspace == ISL_COLORSPACE_YUV; +} + +static inline bool +isl_format_block_is_1x1x1(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; +} + +static inline bool +isl_tiling_is_std_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_STD_Y_MASK; +} + +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *info); + void isl_tiling_get_extent(const struct isl_device *dev, enum isl_tiling tiling, - uint32_t cpp, + uint32_t format_block_size, struct isl_extent2d *e); +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling); -extern const struct isl_format_layout isl_format_layouts[]; +static inline bool +isl_surf_usage_is_display(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DISPLAY_BIT; +} + +static inline bool +isl_surf_usage_is_depth(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DEPTH_BIT; +} + +static inline bool +isl_surf_usage_is_stencil(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_STENCIL_BIT; +} + +static inline bool +isl_surf_usage_is_depth_and_stencil(isl_surf_usage_flags_t usage) +{ + return (usage & ISL_SURF_USAGE_DEPTH_BIT) && + (usage & ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_usage_is_depth_or_stencil(isl_surf_usage_flags_t usage) +{ + return usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_info_is_z16(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R16_UNORM); +} + +static inline bool +isl_surf_info_is_z32_float(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R32_FLOAT); +} + +static inline struct isl_extent2d +isl_extent2d(uint32_t width, uint32_t height) +{ + return (struct isl_extent2d) { .w = width, .h = height }; +} + +static inline struct isl_extent3d +isl_extent3d(uint32_t width, uint32_t height, uint32_t depth) +{ + return (struct isl_extent3d) { .w = width, .h = height, .d = depth }; +} + +static inline struct isl_extent4d +isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, + uint32_t array_len) +{ + return (struct isl_extent4d) { + .w = width, + .h = height, + .d = depth, + .a = array_len, + }; +} + +#define isl_surf_init(dev, surf, ...) \ + isl_surf_init_s((dev), (surf), \ + &(struct isl_surf_init_info) { __VA_ARGS__ }); + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info); + +/** + * Alignment of the upper-left sample of each LOD, in units of surface + * elements. + */ +static inline struct isl_extent3d +isl_surf_get_lod_alignment_el(const struct isl_surf *surf) +{ + return surf->lod_alignment_el; +} + +/** + * Alignment of the upper-left sample of each LOD, in units of surface + * samples. + */ +static inline struct isl_extent3d +isl_surf_get_lod_alignment_sa(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return (struct isl_extent3d) { + .w = fmtl->bw * surf->lod_alignment_el.w, + .h = fmtl->bh * surf->lod_alignment_el.h, + .d = fmtl->bd * surf->lod_alignment_el.d, + }; +} + +/** + * Pitch between physical array slices, in rows of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) +{ + return surf->array_pitch_el_rows; +} + +/** + * Pitch between physical array slices, in rows of surface samples. + */ +static inline uint32_t +isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + return fmtl->bh * isl_surf_get_array_pitch_el_rows(surf); +} + +/** + * Pitch between physical array slices, in bytes. + */ +static inline uint32_t +isl_surf_get_array_pitch(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; +} #ifdef __cplusplus } diff --git a/src/vulkan/isl_gen4.c b/src/vulkan/isl_gen4.c new file mode 100644 index 00000000000..bf9bec16f7d --- /dev/null +++ b/src/vulkan/isl_gen4.c @@ -0,0 +1,74 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen4.h" +#include "isl_priv.h" + +bool +gen4_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + /* Gen4 and Gen5 do not support MSAA */ + assert(info->samples >= 1); + + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; +} + +void +gen4_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el) +{ + assert(info->samples == 1); + assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); + assert(!isl_tiling_is_std_y(tiling)); + + /* Note that neither the surface's horizontal nor vertical image alignment + * is programmable on gen4 nor gen5. + * + * From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression + * cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | 2 | + * | uncompressed formats | 4 | 2 | + * +------------------------+--------+--------+ + */ + + if (isl_format_is_compressed(info->format)) { + *lod_align_el = isl_extent3d(1, 1, 1); + return; + } + + *lod_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/vulkan/isl_gen4.h b/src/vulkan/isl_gen4.h new file mode 100644 index 00000000000..913a7c68ba9 --- /dev/null +++ b/src/vulkan/isl_gen4.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen4_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen4_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/isl_gen6.c b/src/vulkan/isl_gen6.c new file mode 100644 index 00000000000..8d522c37c29 --- /dev/null +++ b/src/vulkan/isl_gen6.c @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen6.h" +#include "isl_priv.h" + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(ISL_DEV_GEN(dev) == 6); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return false; + } + + /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: + * + * - any format with greater than 64 bits per element + * - any compressed texture format (BC*) + * - any YCRCB* format + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of + * Multisamples: + * + * If this field is any value other than MULTISAMPLECOUNT_1 the + * following restrictions apply: + * + * - the Surface Type must be SURFTYPE_2D + * - [...] + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + if (info->levels > 1) + return false; + + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; +} + +void +gen6_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el) +{ + /* Note that the surface's horizontal image alignment is not programmable + * on Sandybridge. + * + * From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | * | + * | uncompressed formats | 4 | * | + * +------------------------+--------+--------+ + * + * * For these formats, the vertical alignment factor “j” is determined + * as follows: + * - j = 4 for any depth buffer + * - j = 2 for separate stencil buffer + * - j = 4 for any render target surface is multisampled (4x) + * - j = 2 for all other render target surface + * + * From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2 + * SURFACE_STATE, Surface Vertical Alignment: + * + * - This field must be set to VALIGN_2 if the Surface Format is 96 bits + * per element (BPE). + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + */ + + if (isl_format_is_compressed(info->format)) { + *lod_align_el = isl_extent3d(1, 1, 1); + return; + } + + if (isl_format_is_yuv(info->format)) { + *lod_align_el = isl_extent3d(4, 2, 1); + return; + } + + if (info->samples > 1) { + *lod_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage) && + !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { + /* interleaved depthstencil buffer */ + *lod_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth(info->usage)) { + /* separate depth buffer */ + *lod_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* separate stencil buffer */ + *lod_align_el = isl_extent3d(4, 2, 1); + return; + } + + *lod_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/vulkan/isl_gen6.h b/src/vulkan/isl_gen6.h new file mode 100644 index 00000000000..56b7f2cb0b7 --- /dev/null +++ b/src/vulkan/isl_gen6.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen6_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/isl_gen7.c b/src/vulkan/isl_gen7.c new file mode 100644 index 00000000000..2ac1852402e --- /dev/null +++ b/src/vulkan/isl_gen7.c @@ -0,0 +1,392 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen7.h" +#include "isl_priv.h" + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + bool require_array = false; + bool require_interleaved = false; + + assert(ISL_DEV_GEN(dev) == 7); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: any format with greater than 64 bits per element, any + * compressed texture format (BC*), and any YCRCB* format. + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* The Ivyrbridge PRM insists twice that signed integer formats cannot be + * multisampled. + * + * From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when + * all RT channels are not written. + * + * And errata from the Ivybridge PRM, Volume 4 Part 1 p77, + * RENDER_SURFACE_STATE, MCS Enable: + * + * This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs + * when all RT channels are not written. + * + * Note that the above SINT restrictions apply only to *MSRTs* (that is, + * *multisampled* render targets). The restrictions seem to permit an MCS + * if the render target is singlesampled. + */ + if (isl_format_has_sint_channel(info->format)) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * +---------------------+----------------------------------------------------------------+ + * | MSFMT_MSS | Multsampled surface was/is rendered as a render target | + * | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer | + * +---------------------+----------------------------------------------------------------+ + * + * In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and + * MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED. + */ + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width + * is >= 8192 (meaning the actual surface width is >= 8193 pixels), this + * field must be set to MSFMT_MSS. + */ + if (info->samples == 8 && info->width == 8192) + require_array = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, + * ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number + * of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is + * > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL. + */ + if ((info->samples == 8 && info->height > 4194304u) || + (info->samples == 4 && info->height > 8388608u)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is + * one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or + * R24_UNORM_X8_TYPELESS. + */ + if (info->format == ISL_FORMAT_I24X8_UNORM || + info->format == ISL_FORMAT_L24X8_UNORM || + info->format == ISL_FORMAT_A24X8_UNORM || + info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + /* Default to the array layout because it permits multisample + * compression. + */ + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +static bool +gen7_format_needs_valign2(const struct isl_device *dev, + enum isl_format format) +{ + /* This workaround applies only to gen7 */ + if (ISL_DEV_GEN(dev) > 7) + return false; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + * + * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. + */ + return isl_format_is_yuv(format) || + format == ISL_FORMAT_R32G32B32_FLOAT; +} + +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags) +{ + /* IVB+ requires separate stencil */ + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); + + /* Clear flags unsupported on this hardware */ + if (ISL_DEV_GEN(dev) < 9) { + *flags &= ~ISL_TILING_Yf_BIT; + *flags &= ~ISL_TILING_Ys_BIT; + } + + /* And... clear the Yf and Ys bits anyway because Anvil doesn't support + * them yet. + */ + *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */ + *flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */ + + if (isl_surf_usage_is_depth(info->usage)) { + /* Depth requires Y. */ + *flags &= ISL_TILING_ANY_Y_MASK; + } + + /* Separate stencil requires W tiling, and W tiling requires separate + * stencil. + */ + if (isl_surf_usage_is_stencil(info->usage)) { + *flags &= ISL_TILING_W_BIT; + } else { + *flags &= ~ISL_TILING_W_BIT; + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle rotated display surfaces", + __FILE__, __func__); + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT | + ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle flipped display surfaces", + __FILE__, __func__); + } + + if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { + /* Before Skylake, the display engine does not accept Y */ + /* FINISHME[SKL]: Y tiling for display surfaces */ + *flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT); + } + + if (info->samples > 1) { + /* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled + * Surface: + * + * For multisample render targets, this field must be 1 (true). MSRTs + * can only be tiled. + * + * Multisample surfaces never require X tiling, and Y tiling generally + * performs better than X. So choose Y. (Unless it's stencil, then it + * must be W). + */ + *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); + } + + /* For 1D surfaces, use linear when possible. 1D surfaces (array and + * non-array) do not benefit from tiling. In fact, it leads to less + * efficient use of memory due to tile alignment. + */ + if (info->dim == ISL_SURF_DIM_1D && (*flags & ISL_TILING_LINEAR_BIT)) { + *flags = ISL_TILING_LINEAR_BIT; + } + + /* workaround */ + if (ISL_DEV_GEN(dev) == 7 && + gen7_format_needs_valign2(dev, info->format) && + (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && + info->samples == 1) { + /* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1, + * SURFACE_STATE Surface Vertical Alignment: + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + */ + *flags &= ~ISL_TILING_Y0_BIT; + } +} + +/** + * Choose horizontal LOD alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Hoizontal Alignment: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer, + * since these surfaces support only alignment of 8. Use of HALIGN_8 + * for other surfaces is supported, but uses more memory. + */ + if (isl_surf_info_is_z16(info) || + isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +/** + * Choose vertical LOD alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling) +{ + bool require_valign2 = false; + bool require_valign4 = false; + + if (isl_format_is_compressed(info->format)) + return 1; + + if (gen7_format_needs_valign2(dev, info->format)) + require_valign2 = true; + + /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but uses more memory. This field must be set to + * VALIGN_4 for all tiled Y Render Target surfaces. + * + */ + if (isl_surf_usage_is_depth(info->usage) || + info->samples > 1 || + tiling == ISL_TILING_Y0) { + require_valign4 = true; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* The Ivybridge PRM states that the stencil buffer's vertical alignment + * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment + * Unit Size]. However, valign=8 is outside the set of valid values of + * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 + * (0x0) and VALIGN_4 (0x1). + * + * The PRM is generally confused about the width, height, and alignment + * of the stencil buffer; and this confusion appears elsewhere. For + * example, the following PRM text effectively converts the stencil + * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, + * Volume 1, Part 1, Section + * 6.18.4.2 Base Address and LOD Calculation]: + * + * For separate stencil buffer, the width must be mutiplied by 2 and + * height divided by 2 as follows: + * + * w_L = 2*i*ceil(W_L/i) + * h_L = 1/2*j*ceil(H_L/j) + * + * The root of the confusion is that, in W tiling, each pair of rows is + * interleaved into one. + * + * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API + * is more polished. + */ + require_valign4 = true; + } + + assert(!require_valign2 || !require_valign4); + + if (require_valign4) + return 4; + + /* Prefer VALIGN_2 because it conserves memory. */ + return 2; +} + +void +gen7_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el) +{ + /* IVB+ does not support combined depthstencil. */ + assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); + + *lod_align_el = (struct isl_extent3d) { + .w = gen7_choose_halign_el(dev, info), + .h = gen7_choose_valign_el(dev, info, tiling), + .d = 1, + }; +} diff --git a/src/vulkan/isl_gen7.h b/src/vulkan/isl_gen7.h new file mode 100644 index 00000000000..c39bd4005f1 --- /dev/null +++ b/src/vulkan/isl_gen7.h @@ -0,0 +1,52 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags); + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen7_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/isl_gen8.c b/src/vulkan/isl_gen8.c new file mode 100644 index 00000000000..fe118cae000 --- /dev/null +++ b/src/vulkan/isl_gen8.c @@ -0,0 +1,229 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_priv.h" + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + bool require_array = false; + bool require_interleaved = false; + + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Tile Mode: + * + * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * must be YMAJOR. + * + * As usual, though, stencil is special. + */ + if (!isl_tiling_is_std_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) + return false; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Multisampled Surface Storage Format: + * + * All multisampled render target surfaces must have this field set to + * MSFMT_MSS + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) + require_array = true; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Number of Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D This field must be set to + * MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface + * or Render Target surface. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero. + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +/** + * Choose horizontal LOD alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer. + * In this case it must be set to HALIGN_8 since these surfaces + * support only alignment of 8. [...] + */ + if (isl_surf_info_is_z16(info)) + return 8; + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * [...] For Z32 formats it must be set to HALIGN_4. + */ + if (isl_surf_usage_is_depth(info->usage)) + return 4; + + if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, + * HALIGN 16 must be used. + * + * This case handles color surfaces that may own an auxiliary MCS, CCS_D, + * or CCS_E. Depth buffers, including those that own an auxiliary HiZ + * surface, are handled above and do not require HALIGN_16. + */ + assert(!isl_surf_usage_is_depth(info->usage)); + return 16; + } + + /* XXX(chadv): I believe the hardware requires each image to be + * cache-aligned. If that's true, then defaulting to halign=4 is wrong for + * many formats. Depending on the format's block size, we may need to + * increase halign to 8. + */ + return 4; +} + +/** + * Choose vertical LOD alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + /* From the Broadwell PRM > Volume 2d: Command Reference: Structures + * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but increases memory usage. + * + * - This field is intended to be set to VALIGN_8 only if the surface + * was rendered as a stencil buffer, since stencil buffer surfaces + * support only alignment of 8. If set to VALIGN_8, Surface Format + * must be R8_UINT. + */ + + if (isl_format_is_compressed(info->format)) + return 1; + + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +void +gen8_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el) +{ + assert(!isl_tiling_is_std_y(tiling)); + + /* The below text from the Broadwell PRM provides some insight into the + * hardware's requirements for LOD alignment. From the Broadwell PRM >> + * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: + * + * These [2D surfaces] must adhere to the following memory organization + * rules: + * + * - For non-compressed texture formats, each mipmap must start on an + * even row within the monolithic rectangular area. For + * 1-texel-high mipmaps, this may require a row of padding below + * the previous mipmap. This restriction does not apply to any + * compressed texture formats; each subsequent (lower-res) + * compressed mipmap is positioned directly below the previous + * mipmap. + * + * - Vertical alignment restrictions vary with memory tiling type: + * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled + * mipmaps are not required to start at the left edge of a tile + * row.) + */ + + *lod_align_el = (struct isl_extent3d) { + .w = gen8_choose_halign_el(dev, info), + .h = gen8_choose_valign_el(dev, info), + .d = 1, + }; +} diff --git a/src/vulkan/isl_gen8.h b/src/vulkan/isl_gen8.h new file mode 100644 index 00000000000..632d61936c1 --- /dev/null +++ b/src/vulkan/isl_gen8.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen8_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/isl_gen9.c b/src/vulkan/isl_gen9.c new file mode 100644 index 00000000000..00634e4e54c --- /dev/null +++ b/src/vulkan/isl_gen9.c @@ -0,0 +1,184 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +/** + * Calculate the LOD alignment, in units of surface samples, for the standard + * tiling formats Yf and Ys. + */ +static void +gen9_calc_std_lod_alignment_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *align_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(isl_tiling_is_std_y(tiling)); + + const uint32_t bs = fmtl->bs; + const uint32_t is_Ys = tiling == ISL_TILING_Ys; + + switch (info->dim) { + case ISL_SURF_DIM_1D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)), + .h = 1, + .d = 1, + }; + return; + case ISL_SURF_DIM_2D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment + * Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)), + .h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)), + .d = 1, + }; + + if (is_Ys) { + /* FINISHME(chadv): I don't trust this code. Untested. */ + isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + case ISL_MSAA_LAYOUT_INTERLEAVED: + break; + case ISL_MSAA_LAYOUT_ARRAY: + align_sa->w >>= (ffs(info->samples) - 0) / 2; + align_sa->h >>= (ffs(info->samples) - 1) / 2; + break; + } + } + return; + + case ISL_SURF_DIM_3D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)), + .h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)), + .d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)), + }; + return; + } + + unreachable("bad isl_surface_type"); +} + +void +gen9_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el) +{ + /* This BSpec text provides some insight into the hardware's alignment + * requirements [Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces]: + * + * An LOD must be aligned to a cache-line except for some special cases + * related to Planar YUV surfaces. In general, the cache-alignment + * restriction implies there is a minimum height for an LOD of 4 texels. + * So, LODs which are smaller than 4 high are padded. + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is used for 2D, CUBE, and 3D surface alignment when Tiled + * Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled). + * This field is ignored for 1D surfaces and also when Tiled Resource + * Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled). + * + * See the appropriate Alignment table in the "Surface Layout and + * Tiling" section under Common Surface Formats for the table of + * alignment values for Tiled Resrouces. + * + * - For uncompressed surfaces, the units of "j" are rows of pixels on + * the physical surface. For compressed texture formats, the units of + * "j" are in compression blocks, thus each increment in "j" is equal + * to h pixels, where h is the height of the compression block in + * pixels. + * + * - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16 + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal + * Alignment: + * + * - For uncompressed surfaces, the units of "i" are pixels on the + * physical surface. For compressed texture formats, the units of "i" + * are in compression blocks, thus each increment in "i" is equal to + * w pixels, where w is the width of the compression block in pixels. + * + * - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16 + */ + + if (isl_tiling_is_std_y(tiling)) { + struct isl_extent3d lod_align_sa; + gen9_calc_std_lod_alignment_sa(dev, info, tiling, msaa_layout, + &lod_align_sa); + + *lod_align_el = isl_extent3d_sa_to_el(info->format, lod_align_sa); + return; + } + + if (info->dim == ISL_SURF_DIM_1D) { + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *lod_align_el = isl_extent3d(64, 1, 1); + return; + } + + if (isl_format_is_compressed(info->format)) { + /* On Gen9, the meaning of RENDER_SURFACE_STATE's + * SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for + * compressed formats. They now indicate a multiple of the compression + * block. For example, if the compression mode is ETC2 then HALIGN_4 + * indicates a horizontal alignment of 16 pixels. + * + * To avoid wasting memory, choose the smallest alignment possible: + * HALIGN_4 and VALIGN_4. + */ + *lod_align_el = isl_extent3d(4, 4, 1); + return; + } + + gen8_choose_lod_alignment_el(dev, info, tiling, msaa_layout, lod_align_el); +} diff --git a/src/vulkan/isl_gen9.h b/src/vulkan/isl_gen9.h new file mode 100644 index 00000000000..14252b6f70c --- /dev/null +++ b/src/vulkan/isl_gen9.h @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen9_choose_lod_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *lod_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/isl_priv.h b/src/vulkan/isl_priv.h new file mode 100644 index 00000000000..1c9343a7d1f --- /dev/null +++ b/src/vulkan/isl_priv.h @@ -0,0 +1,141 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +#include "brw_device_info.h" +#include "mesa/main/imports.h" +#include "util/macros.h" + +#include "isl.h" + +#define isl_finishme(format, ...) \ + __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...); + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +ffs(uint32_t n) { + return __builtin_ffs(n); +} + +static inline bool +isl_is_pow2(uintmax_t n) +{ + return !(n & (n - 1)); +} + +/** + * Alignment must be a power of 2. + */ +static inline bool +isl_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(isl_is_pow2(a)); + return (n & (a - 1)) == 0; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align(uintmax_t n, uintmax_t a) +{ + assert(isl_is_pow2(a)); + return (n + a - 1) & ~(a - 1); +} + +static inline uintmax_t +isl_align_npot(uintmax_t n, uintmax_t a) +{ + assert(a > 0); + return ((n + a - 1) / a) * a; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align_div(uintmax_t n, uintmax_t a) +{ + return isl_align(n, a) / a; +} + +static inline uintmax_t +isl_align_div_npot(uintmax_t n, uintmax_t a) +{ + return isl_align_npot(n, a) / a; +} + +/** + * Log base 2, rounding towards zero. + */ +static inline uint32_t +isl_log2u(uint32_t n) +{ + assert(n != 0); + return 31 - __builtin_clz(n); +} + +static inline uint32_t +isl_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline struct isl_extent3d +isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + assert(extent_sa.w % fmtl->bw == 0); + assert(extent_sa.h % fmtl->bh == 0); + assert(extent_sa.d % fmtl->bd == 0); + + return (struct isl_extent3d) { + .w = extent_sa.w / fmtl->bw, + .h = extent_sa.h / fmtl->bh, + .d = extent_sa.d / fmtl->bd, + }; +} + +static inline struct isl_extent3d +isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return (struct isl_extent3d) { + .w = extent_el.w * fmtl->bw, + .h = extent_el.h * fmtl->bh, + .d = extent_el.d * fmtl->bd, + }; +} -- cgit v1.2.3 From b36938964063a4072abfd779f5607743dbc3b6f1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 2 Dec 2015 16:46:16 -0800 Subject: anv/image: Use isl to calculate surface layout Remove the surface layout calculations in anv_image_make_surface(). Let isl_surf_init() do the heavy lifting. Fixes 8 Crucible tests and regresses none. (hw=Broadwell and crucible@33d91ec). --- src/vulkan/anv_image.c | 239 ++++++++++++++++++------------------------------- 1 file changed, 85 insertions(+), 154 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 7abd0db1460..2bfe19f3609 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -84,188 +84,119 @@ static const struct anv_surf_type_limits { [SURFTYPE_STRBUF] = {128, 16384, 64}, }; -static const struct anv_tile_info { - /** - * Alignment for RENDER_SURFACE_STATE.SurfaceBaseAddress. - * - * To simplify calculations, the alignments defined in the table are - * sometimes larger than required. For example, Skylake requires that X and - * Y tiled buffers be aligned to 4K, but Broadwell permits smaller - * alignment. We choose 4K to accomodate both chipsets. The alignment of - * a linear buffer depends on its element type and usage. Linear depth - * buffers have the largest alignment, 64B, so we choose that for all linear - * buffers. - */ - uint32_t surface_alignment; -} anv_tile_info_table[] = { - [ISL_TILING_LINEAR] = { 64 }, - [ISL_TILING_X] = { 4096 }, - [ISL_TILING_Y0] = { 4096 }, - [ISL_TILING_Yf] = { 4096 }, - [ISL_TILING_Ys] = { 4096 }, - [ISL_TILING_W] = { 4096 }, -}; - -static enum isl_tiling -anv_image_choose_tiling(const struct anv_image_create_info *anv_info) +static isl_tiling_flags_t +choose_isl_tiling_flags(const struct anv_image_create_info *anv_info) { - if (anv_info->force_tiling) - return anv_info->tiling; - - /* The Sandybridge PRM says that the stencil buffer "is supported - * only in Tile W memory". - */ - - switch (anv_info->vk_info->tiling) { - case VK_IMAGE_TILING_LINEAR: - assert(anv_info->vk_info->format != VK_FORMAT_S8_UINT); - return ISL_TILING_LINEAR; - case VK_IMAGE_TILING_OPTIMAL: - if (unlikely(anv_info->vk_info->format == VK_FORMAT_S8_UINT)) { - return ISL_TILING_W; - } else { - return ISL_TILING_Y0; - } - default: - assert(!"bad VKImageTiling"); - return ISL_TILING_LINEAR; + const VkImageCreateInfo *vk_info = anv_info->vk_info; + + if (anv_info->force_tiling) { + return 1u << anv_info->tiling; + } else if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) { + return ISL_TILING_LINEAR_BIT; + } else if (vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) { + return ISL_TILING_ANY_MASK; + } else { + unreachable("bad anv_image_create_info"); + return 0; } } - /** - * The \a format argument is required and overrides any format in - * struct anv_image_create_info. + * The \a format argument is required and overrides any format found in struct + * anv_image_create_info. */ -static VkResult -anv_image_make_surface(const struct anv_device *dev, - const struct anv_image_create_info *create_info, - const struct anv_format *format, - uint64_t *inout_image_size, - uint32_t *inout_image_alignment, - struct anv_surface *out_surface) +static isl_surf_usage_flags_t +choose_isl_surf_usage(const struct anv_image_create_info *info, + const struct anv_format *format) { - /* See RENDER_SURFACE_STATE.SurfaceQPitch */ - static const uint16_t min_qpitch UNUSED = 0x4; - static const uint16_t max_qpitch UNUSED = 0x1ffc; - - const VkExtent3D *restrict extent = &create_info->vk_info->extent; - const uint32_t levels = create_info->vk_info->mipLevels; - const uint32_t array_size = create_info->vk_info->arrayLayers; - const enum isl_tiling tiling = anv_image_choose_tiling(create_info); + const VkImageCreateInfo *vk_info = info->vk_info; + isl_surf_usage_flags_t isl_flags = 0; - const struct anv_tile_info *tile_info = - &anv_tile_info_table[tiling]; + /* FINISHME: Support aux surfaces */ + isl_flags |= ISL_SURF_USAGE_DISABLE_AUX_BIT; - const uint32_t bs = format->isl_layout->bs; - const uint32_t bw = format->isl_layout->bw; - const uint32_t bh = format->isl_layout->bh; + if (vk_info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) + isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; - struct isl_extent2d tile_extent; - isl_tiling_get_extent(&dev->isl_dev, tiling, bs, &tile_extent); + if (vk_info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; - const uint32_t i = MAX(4, bw); /* FINISHME: Stop hardcoding subimage alignment */ - const uint32_t j = MAX(4, bh); /* FINISHME: Stop hardcoding subimage alignment */ - assert(i == 4 || i == 8 || i == 16); - assert(j == 4 || j == 8 || j == 16); + if (vk_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; - uint16_t qpitch = min_qpitch; - uint32_t mt_width = 0; - uint32_t mt_height = 0; + if (vk_info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + isl_flags |= ISL_SURF_USAGE_CUBE_BIT; - switch (create_info->vk_info->imageType) { - case VK_IMAGE_TYPE_1D: - /* From the Broadwell PRM >> Memory Views >> Common Surface Formats >> - * Surface Layout >> 1D Surfaces: - * - * One-dimensional surfaces are identical to 2D surfaces with height of one. - * - * So fallthrough... - */ - case VK_IMAGE_TYPE_2D: { - const uint32_t w0 = align_u32(extent->width, i); - const uint32_t h0 = align_u32(extent->height, j); - - if (levels == 1 && array_size == 1) { - qpitch = min_qpitch; - mt_width = w0; - mt_height = h0; - } else { - uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); - uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); - uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); + if (vk_info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + assert((format->depth_format != 0) ^ format->has_stencil); - /* The QPitch equation is found in the Broadwell PRM >> Volume 5: Memory - * Views >> Common Surface Formats >> Surface Layout >> 2D Surfaces >> - * Surface Arrays >> For All Surface Other Than Separate Stencil Buffer: - */ - assert(bh ==1 || bh == 4); - qpitch = (h0 + h1 + 11 * j) / bh; - mt_width = MAX(w0, w1 + w2); - mt_height = array_size * qpitch; + if (format->depth_format) { + isl_flags |= ISL_SURF_USAGE_DEPTH_BIT; + } else if (format->has_stencil) { + isl_flags |= ISL_SURF_USAGE_STENCIL_BIT; } - break; } - case VK_IMAGE_TYPE_3D: - /* The layout of 3D surfaces is described by the Broadwell PRM >> - * Volume 5: Memory Views >> Common Surface Formats >> Surface Layout >> - * 3D Surfaces. - */ - for (uint32_t l = 0; l < levels; ++l) { - const uint32_t w_l = align_u32(anv_minify(extent->width, l), i); - const uint32_t h_l = align_u32(anv_minify(extent->height, l), j); - const uint32_t d_l = anv_minify(extent->depth, l); - - const uint32_t max_layers_horiz = MIN(d_l, 1u << l); - const uint32_t max_layers_vert = align_u32(d_l, 1u << l) / (1u << l); - mt_width = MAX(mt_width, w_l * max_layers_horiz); - mt_height += h_l * max_layers_vert; - } - break; - default: - unreachable(!"bad VkImageType"); + if (vk_info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; } - assert(qpitch >= min_qpitch); - if (qpitch > max_qpitch) { - anv_loge("image qpitch > 0x%x\n", max_qpitch); - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + if (vk_info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Meta implements transfers by rendering into the destination image. */ + isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; } - /* From the Broadwell PRM, RENDER_SURFACE_STATE.SurfaceQpitch: - * - * This field must be set an integer multiple of the Surface Vertical - * Alignment. - */ - assert(anv_is_aligned(qpitch, j)); + return isl_flags; +} - uint32_t stride = align_u32(mt_width * bs / bw, tile_extent.width); - if (create_info->stride > 0) - stride = create_info->stride; +/** + * The \a format argument is required and overrides any format in + * struct anv_image_create_info. + */ +static VkResult +anv_image_make_surface(const struct anv_device *dev, + const struct anv_image_create_info *anv_info, + const struct anv_format *format, + uint64_t *inout_image_size, + uint32_t *inout_image_alignment, + struct anv_surface *out_surface) +{ + const VkImageCreateInfo *vk_info = anv_info->vk_info; - /* The padding requirement is found in the Broadwell PRM >> Volume 5: Memory - * Views >> Common Surface Formats >> Surface Padding Requirements >> - * Sampling Engine Surfaces >> Buffer Padding Requirements: - */ - const uint32_t mem_rows = align_u32(mt_height / bh, 2 * bh); - const uint32_t size = stride * align_u32(mem_rows, tile_extent.height); - const uint32_t offset = align_u32(*inout_image_size, - tile_info->surface_alignment); + static const enum isl_surf_dim vk_to_isl_surf_dim[] = { + [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, + [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, + [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, + }; - *inout_image_size = offset + size; - *inout_image_alignment = MAX(*inout_image_alignment, - tile_info->surface_alignment); + struct isl_surf isl_surf; + isl_surf_init(&dev->isl_dev, &isl_surf, + .dim = vk_to_isl_surf_dim[vk_info->imageType], + .format = format->surface_format, + .width = vk_info->extent.width, + .height = vk_info->extent.height, + .depth = vk_info->extent.depth, + .levels = vk_info->mipLevels, + .array_len = vk_info->arrayLayers, + .samples = vk_info->samples, + .min_alignment = 0, + .min_pitch = 0, + .usage = choose_isl_surf_usage(anv_info, format), + .tiling_flags = choose_isl_tiling_flags(anv_info)); *out_surface = (struct anv_surface) { - .offset = offset, - .stride = stride, - .tiling = tiling, - .qpitch = qpitch, - .h_align = i, - .v_align = j, + .offset = align_u32(*inout_image_size, isl_surf.alignment), + .stride = isl_surf.row_pitch, + .tiling = isl_surf.tiling, + .qpitch = isl_surf_get_array_pitch_sa_rows(&isl_surf), + .h_align = isl_surf_get_lod_alignment_sa(&isl_surf).width, + .v_align = isl_surf_get_lod_alignment_sa(&isl_surf).height, }; + *inout_image_size = out_surface->offset + isl_surf.size; + *inout_image_alignment = MAX(*inout_image_alignment, isl_surf.alignment); + return VK_SUCCESS; } -- cgit v1.2.3 From 594e673fcc777ed8d8579db75a920aa35af048be Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 2 Dec 2015 18:27:35 -0800 Subject: anv/image: Drop assertions on SURFTYPE extent limits In anv_image_create(), stop asserting that VkImageCreateInfo::extent does not exceed the hardware limits for the given SURFTYPE. The assertions were incorrect because they did not take into account the hardware gen. Anyways, these types of assertions belong in isl, not anvil. --- src/vulkan/anv_image.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2bfe19f3609..69103209022 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -71,19 +71,6 @@ anv_image_view_info_for_vk_image_view_type(VkImageViewType type) return anv_image_view_info_table[type]; } -static const struct anv_surf_type_limits { - int32_t width; - int32_t height; - int32_t depth; -} anv_surf_type_limits[] = { - [SURFTYPE_1D] = {16384, 1, 2048}, - [SURFTYPE_2D] = {16384, 16384, 2048}, - [SURFTYPE_3D] = {2048, 2048, 2048}, - [SURFTYPE_CUBE] = {16384, 16384, 340}, - [SURFTYPE_BUFFER] = {128, 16384, 64}, - [SURFTYPE_STRBUF] = {128, 16384, 64}, -}; - static isl_tiling_flags_t choose_isl_tiling_flags(const struct anv_image_create_info *anv_info) { @@ -228,7 +215,6 @@ anv_image_create(VkDevice _device, { ANV_FROM_HANDLE(anv_device, device, _device); const VkImageCreateInfo *pCreateInfo = create_info->vk_info; - const VkExtent3D *restrict extent = &pCreateInfo->extent; struct anv_image *image = NULL; VkResult r; @@ -245,14 +231,6 @@ anv_image_create(VkDevice _device, const uint8_t surf_type = anv_surf_type_from_image_type[pCreateInfo->imageType]; - const struct anv_surf_type_limits *limits = - &anv_surf_type_limits[surf_type]; - - /* Errors should be caught by VkImageFormatProperties. */ - assert(extent->width <= limits->width); - assert(extent->height <= limits->height); - assert(extent->depth <= limits->depth); - image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!image) -- cgit v1.2.3 From 981ef2f02d8c67addc0e643cc7f95baab7f4b1f6 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 3 Dec 2015 08:40:47 -0800 Subject: anv: Embed isl_surf into anv_surface This reduces struct anv_surface to just two members: an offset and the embedded isl_surf. --- src/vulkan/anv_image.c | 33 ++++++++++----------------------- src/vulkan/anv_private.h | 21 +++------------------ src/vulkan/anv_wsi_wayland.c | 4 ++-- src/vulkan/anv_wsi_x11.c | 4 ++-- src/vulkan/gen7_cmd_buffer.c | 4 ++-- src/vulkan/gen7_state.c | 13 ++++++++----- src/vulkan/gen8_cmd_buffer.c | 8 ++++---- src/vulkan/gen8_state.c | 13 ++++++++----- 8 files changed, 39 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 69103209022..f99155cad6a 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -147,7 +147,7 @@ anv_image_make_surface(const struct anv_device *dev, const struct anv_format *format, uint64_t *inout_image_size, uint32_t *inout_image_alignment, - struct anv_surface *out_surface) + struct anv_surface *out_anv_surf) { const VkImageCreateInfo *vk_info = anv_info->vk_info; @@ -157,8 +157,7 @@ anv_image_make_surface(const struct anv_device *dev, [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, }; - struct isl_surf isl_surf; - isl_surf_init(&dev->isl_dev, &isl_surf, + isl_surf_init(&dev->isl_dev, &out_anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = format->surface_format, .width = vk_info->extent.width, @@ -172,17 +171,12 @@ anv_image_make_surface(const struct anv_device *dev, .usage = choose_isl_surf_usage(anv_info, format), .tiling_flags = choose_isl_tiling_flags(anv_info)); - *out_surface = (struct anv_surface) { - .offset = align_u32(*inout_image_size, isl_surf.alignment), - .stride = isl_surf.row_pitch, - .tiling = isl_surf.tiling, - .qpitch = isl_surf_get_array_pitch_sa_rows(&isl_surf), - .h_align = isl_surf_get_lod_alignment_sa(&isl_surf).width, - .v_align = isl_surf_get_lod_alignment_sa(&isl_surf).height, - }; + out_anv_surf->offset = align_u32(*inout_image_size, + out_anv_surf->isl.alignment); - *inout_image_size = out_surface->offset + isl_surf.size; - *inout_image_alignment = MAX(*inout_image_alignment, isl_surf.alignment); + *inout_image_size = out_anv_surf->offset + out_anv_surf->isl.size; + *inout_image_alignment = MAX(*inout_image_alignment, + out_anv_surf->isl.alignment); return VK_SUCCESS; } @@ -325,16 +319,9 @@ anv_surface_get_subresource_layout(struct anv_image *image, anv_assert(subresource->arrayLayer == 0); layout->offset = surface->offset; - layout->rowPitch = surface->stride; - - /* Anvil's qpitch is in units of rows. Vulkan's depthPitch is in bytes. */ - layout->depthPitch = surface->qpitch * surface->stride; - - /* FINISHME: We really shouldn't be doing this calculation here */ - if (image->array_size > 1) - layout->size = surface->qpitch * image->array_size; - else - layout->size = surface->stride * image->extent.height; + layout->rowPitch = surface->isl.row_pitch; + layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); + layout->size = surface->isl.size; } void anv_GetImageSubresourceLayout( diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 43ebf4c11ae..3fc305ba15e 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1372,30 +1372,15 @@ struct anv_image_view_info anv_image_view_info_for_vk_image_view_type(VkImageViewType type); /** - * A proxy for the color surfaces, depth surfaces, and stencil surfaces. + * Subsurface of an anv_image. */ struct anv_surface { + struct isl_surf isl; + /** * Offset from VkImage's base address, as bound by vkBindImageMemory(). */ uint32_t offset; - - uint32_t stride; /**< RENDER_SURFACE_STATE.SurfacePitch */ - uint16_t qpitch; /**< RENDER_SURFACE_STATE.QPitch */ - - /** - * \name Alignment of miptree images, in units of pixels. - * - * These fields contain the real alignment values, not the values to be - * given to the GPU. For example, if h_align is 4, then program the GPU - * with HALIGN_4. - * \{ - */ - uint8_t h_align; /**< RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */ - uint8_t v_align; /**< RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ - /** \} */ - - enum isl_tiling tiling; }; struct anv_image { diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 1dafcd996fe..dd5baa452a0 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -678,7 +678,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, int ret = anv_gem_set_tiling(chain->base.device, image->memory->bo.gem_handle, - surface->stride, I915_TILING_X); + surface->isl.row_pitch, I915_TILING_X); if (ret) { /* FINISHME: Choose a better error. */ result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); @@ -699,7 +699,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, chain->extent.height, chain->drm_format, surface->offset, - surface->stride, + surface->isl.row_pitch, 0, 0, 0, 0 /* unused */); wl_display_roundtrip(chain->display->display); close(fd); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index d327f4316d3..15ad98c3f8b 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -388,7 +388,7 @@ x11_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, memory_h, 0); int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->stride, I915_TILING_X); + surface->isl.row_pitch, I915_TILING_X); if (ret) { /* FINISHME: Choose a better error. */ result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, @@ -415,7 +415,7 @@ x11_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, image->size, pCreateInfo->imageExtent.width, pCreateInfo->imageExtent.height, - surface->stride, + surface->isl.row_pitch, depth, bpp, fd); chain->images[i].image = image; diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index dd80144270b..7101831080b 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -707,7 +707,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, .SurfaceFormat = iview->format->depth_format, - .SurfacePitch = image->depth_surface.stride - 1, + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, .offset = image->depth_surface.offset, @@ -758,7 +758,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * The pitch must be set to 2x the value computed based on width, * as the stencil buffer is stored with two rows interleaved. */ - .SurfacePitch = 2 * image->stencil_surface.stride - 1, + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 5a626f75eeb..6dcb5bffdf1 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -238,18 +238,21 @@ genX(image_view_init)(struct anv_image_view *iview, depth = image->extent.depth; } + const struct isl_extent3d lod_align_sa = + isl_surf_get_lod_alignment_sa(&surface->isl); + struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .SurfaceVerticalAlignment = anv_valign[lod_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[lod_align_sa.width], /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if * Tiled Surface is False." */ - .TiledSurface = surface->tiling != ISL_TILING_LINEAR, - .TileWalk = surface->tiling == ISL_TILING_Y0 ? + .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, + .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? TILEWALK_YMAJOR : TILEWALK_XMAJOR, .VerticalLineStride = 0, @@ -260,7 +263,7 @@ genX(image_view_init)(struct anv_image_view *iview, .Height = image->extent.height - 1, .Width = image->extent.width - 1, .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, + .SurfacePitch = surface->isl.row_pitch - 1, .MinimumArrayElement = range->baseArrayLayer, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 6d0ac25b6d8..a23421102aa 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -659,7 +659,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, .SurfaceFormat = iview->format->depth_format, - .SurfacePitch = image->depth_surface.stride - 1, + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, .offset = image->depth_surface.offset, @@ -671,7 +671,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .MinimumArrayElement = 0, .DepthBufferObjectControlState = GENX(MOCS), .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = image->depth_surface.qpitch >> 2); + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2); } else { /* Even when no depth buffer is present, the hardware requires that * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: @@ -709,13 +709,13 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * The pitch must be set to 2x the value computed based on width, * as the stencil buffer is stored with two rows interleaved. */ - .SurfacePitch = 2 * image->stencil_surface.stride - 1, + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, .offset = image->offset + image->stencil_surface.offset, }, - .SurfaceQPitch = image->stencil_surface.stride >> 2); + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2); } else { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index f46611c629d..30478237bdd 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -183,13 +183,16 @@ genX(image_view_init)(struct anv_image_view *iview, [ISL_TILING_W] = WMAJOR, }; + const struct isl_extent3d lod_align_sa = + isl_surf_get_lod_alignment_sa(&surface->isl); + struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, - .SurfaceVerticalAlignment = anv_valign[surface->v_align], - .SurfaceHorizontalAlignment = anv_halign[surface->h_align], - .TileMode = isl_to_gen_tiling[surface->tiling], + .SurfaceVerticalAlignment = anv_valign[lod_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[lod_align_sa.width], + .TileMode = isl_to_gen_tiling[surface->isl.tiling], .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, @@ -202,11 +205,11 @@ genX(image_view_init)(struct anv_image_view *iview, */ .BaseMipLevel = 0.0, - .SurfaceQPitch = surface->qpitch >> 2, + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&surface->isl) >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, .Depth = depth - 1, - .SurfacePitch = surface->stride - 1, + .SurfacePitch = surface->isl.row_pitch - 1, .RenderTargetViewExtent = rt_view_extent - 1, .MinimumArrayElement = range->baseArrayLayer, .NumberofMultisamples = MULTISAMPLECOUNT_1, -- cgit v1.2.3 From 371fc2bc20879fb4529e00023c7ac4e9be9a1cf7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 3 Dec 2015 13:07:06 -0800 Subject: anv/gen9: Fix SURFACE_STATE halign and valign Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units of surface samples. A surface sample is equivalent to a pixel in all surfaces except interleaved multisample surfaces. In Skylake, it is in units of surface elements. A surface element is equivalent to a surface sample except for compressed formats, in which case the element is a compression block. --- src/vulkan/gen8_state.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 30478237bdd..e7acbd02281 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -105,6 +105,37 @@ vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) return vk_to_gen_swizzle_map[swizzle]; } +/** + * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment + * and SurfaceVerticalAlignment. + */ +static void +get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) +{ + #if ANV_GENx10 >= 90 + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d lod_align_el = isl_surf_get_lod_alignment_el(surf); + *halign = anv_halign[lod_align_el.width]; + *valign = anv_valign[lod_align_el.height]; + #else + /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in + * units of surface samples. For example, if SurfaceVerticalAlignment + * is VALIGN_4 and the surface is singlesampled, then for any surface + * format (compressed or not) the vertical alignment is + * 4 pixels. + */ + struct isl_extent3d lod_align_sa = isl_surf_get_lod_alignment_sa(surf); + *halign = anv_halign[lod_align_sa.width]; + *valign = anv_valign[lod_align_sa.height]; + #endif +} + void genX(image_view_init)(struct anv_image_view *iview, struct anv_device *device, @@ -183,15 +214,15 @@ genX(image_view_init)(struct anv_image_view *iview, [ISL_TILING_W] = WMAJOR, }; - const struct isl_extent3d lod_align_sa = - isl_surf_get_lod_alignment_sa(&surface->isl); + uint32_t halign, valign; + get_halign_valign(&surface->isl, &halign, &valign); struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, - .SurfaceVerticalAlignment = anv_valign[lod_align_sa.height], - .SurfaceHorizontalAlignment = anv_halign[lod_align_sa.width], + .SurfaceVerticalAlignment = valign, + .SurfaceHorizontalAlignment = halign, .TileMode = isl_to_gen_tiling[surface->isl.tiling], .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, -- cgit v1.2.3 From cb2382882e8a02df109b8692ba93f4ef559e7f79 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Dec 2015 18:07:58 -0800 Subject: nir/spirv: Update to SPIR-V version 1.0 --- src/glsl/nir/spirv.h | 55 +++++++++++++++++++------------------ src/glsl/nir/spirv_glsl450_to_nir.c | 4 +++ src/glsl/nir/spirv_to_nir.c | 34 ++++++++--------------- 3 files changed, 44 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h index e9e53973801..63bcb2f88dd 100644 --- a/src/glsl/nir/spirv.h +++ b/src/glsl/nir/spirv.h @@ -30,7 +30,6 @@ */ /* -** Specification revision 33. ** Enumeration tokens for SPIR-V, in various styles: ** C, C++, C++11, JSON, Lua, Python ** @@ -51,9 +50,12 @@ typedef unsigned int SpvId; +#define SPV_VERSION 0x10000 +#define SPV_REVISION 2 + static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 99; -static const unsigned int SpvRevision = 33; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 2; static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; @@ -61,7 +63,8 @@ typedef enum SpvSourceLanguage_ { SpvSourceLanguageUnknown = 0, SpvSourceLanguageESSL = 1, SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL = 3, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, } SpvSourceLanguage; typedef enum SpvExecutionModel_ { @@ -108,17 +111,16 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeInputPoints = 19, SpvExecutionModeInputLines = 20, SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeInputTriangles = 22, + SpvExecutionModeTriangles = 22, SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeInputQuads = 24, - SpvExecutionModeInputIsolines = 25, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, SpvExecutionModeOutputVertices = 26, SpvExecutionModeOutputPoints = 27, SpvExecutionModeOutputLineStrip = 28, SpvExecutionModeOutputTriangleStrip = 29, SpvExecutionModeVecTypeHint = 30, SpvExecutionModeContractionOff = 31, - SpvExecutionModeIndependentForwardProgress = 32, } SpvExecutionMode; typedef enum SpvStorageClass_ { @@ -126,9 +128,9 @@ typedef enum SpvStorageClass_ { SpvStorageClassInput = 1, SpvStorageClassUniform = 2, SpvStorageClassOutput = 3, - SpvStorageClassWorkgroupLocal = 4, - SpvStorageClassWorkgroupGlobal = 5, - SpvStorageClassPrivateGlobal = 6, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, SpvStorageClassFunction = 7, SpvStorageClassGeneric = 8, SpvStorageClassPushConstant = 9, @@ -143,7 +145,7 @@ typedef enum SpvDim_ { SpvDimCube = 3, SpvDimRect = 4, SpvDimBuffer = 5, - SpvDimInputTarget = 6, + SpvDimSubpassData = 6, } SpvDim; typedef enum SpvSamplerAddressingMode_ { @@ -241,6 +243,7 @@ typedef enum SpvImageChannelDataType_ { SpvImageChannelDataTypeHalfFloat = 13, SpvImageChannelDataTypeFloat = 14, SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, } SpvImageChannelDataType; typedef enum SpvImageOperandsShift_ { @@ -325,7 +328,6 @@ typedef enum SpvDecoration_ { SpvDecorationGLSLPacked = 9, SpvDecorationCPacked = 10, SpvDecorationBuiltIn = 11, - SpvDecorationSmooth = 12, SpvDecorationNoPerspective = 13, SpvDecorationFlat = 14, SpvDecorationPatch = 15, @@ -355,7 +357,7 @@ typedef enum SpvDecoration_ { SpvDecorationFPFastMathMode = 40, SpvDecorationLinkageAttributes = 41, SpvDecorationNoContraction = 42, - SpvDecorationInputTargetIndex = 43, + SpvDecorationInputAttachmentIndex = 43, SpvDecorationAlignment = 44, } SpvDecoration; @@ -380,7 +382,6 @@ typedef enum SpvBuiltIn_ { SpvBuiltInSampleId = 18, SpvBuiltInSamplePosition = 19, SpvBuiltInSampleMask = 20, - SpvBuiltInFragColor = 21, SpvBuiltInFragDepth = 22, SpvBuiltInHelperInvocation = 23, SpvBuiltInNumWorkgroups = 24, @@ -394,7 +395,6 @@ typedef enum SpvBuiltIn_ { SpvBuiltInEnqueuedWorkgroupSize = 32, SpvBuiltInGlobalOffset = 33, SpvBuiltInGlobalLinearId = 34, - SpvBuiltInWorkgroupLinearId = 35, SpvBuiltInSubgroupSize = 36, SpvBuiltInSubgroupMaxSize = 37, SpvBuiltInNumSubgroups = 38, @@ -449,8 +449,8 @@ typedef enum SpvMemorySemanticsShift_ { SpvMemorySemanticsSequentiallyConsistentShift = 4, SpvMemorySemanticsUniformMemoryShift = 6, SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupLocalMemoryShift = 8, - SpvMemorySemanticsWorkgroupGlobalMemoryShift = 9, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, SpvMemorySemanticsAtomicCounterMemoryShift = 10, SpvMemorySemanticsImageMemoryShift = 11, } SpvMemorySemanticsShift; @@ -463,8 +463,8 @@ typedef enum SpvMemorySemanticsMask_ { SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, SpvMemorySemanticsUniformMemoryMask = 0x00000040, SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupLocalMemoryMask = 0x00000100, - SpvMemorySemanticsWorkgroupGlobalMemoryMask = 0x00000200, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, SpvMemorySemanticsImageMemoryMask = 0x00000800, } SpvMemorySemanticsMask; @@ -528,7 +528,6 @@ typedef enum SpvCapability_ { SpvCapabilityImageBasic = 13, SpvCapabilityImageReadWrite = 14, SpvCapabilityImageMipmap = 15, - SpvCapabilityImageSRGBWrite = 16, SpvCapabilityPipes = 17, SpvCapabilityGroups = 18, SpvCapabilityDeviceEnqueue = 19, @@ -538,7 +537,6 @@ typedef enum SpvCapability_ { SpvCapabilityTessellationPointSize = 23, SpvCapabilityGeometryPointSize = 24, SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageExtendedFormats = 26, SpvCapabilityStorageImageMultisample = 27, SpvCapabilityUniformBufferArrayDynamicIndexing = 28, SpvCapabilitySampledImageArrayDynamicIndexing = 29, @@ -552,7 +550,7 @@ typedef enum SpvCapability_ { SpvCapabilitySampledRect = 37, SpvCapabilityGenericPointer = 38, SpvCapabilityInt8 = 39, - SpvCapabilityInputTarget = 40, + SpvCapabilityInputAttachment = 40, SpvCapabilitySparseResidency = 41, SpvCapabilityMinLod = 42, SpvCapabilitySampled1D = 43, @@ -561,11 +559,15 @@ typedef enum SpvCapability_ { SpvCapabilitySampledBuffer = 46, SpvCapabilityImageBuffer = 47, SpvCapabilityImageMSArray = 48, - SpvCapabilityAdvancedFormats = 49, + SpvCapabilityStorageImageExtendedFormats = 49, SpvCapabilityImageQuery = 50, SpvCapabilityDerivativeControl = 51, SpvCapabilityInterpolationFunction = 52, SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, } SpvCapability; typedef enum SpvOp_ { @@ -660,6 +662,7 @@ typedef enum SpvOp_ { SpvOpImageDrefGather = 97, SpvOpImageRead = 98, SpvOpImageWrite = 99, + SpvOpImage = 100, SpvOpImageQueryFormat = 101, SpvOpImageQueryOrder = 102, SpvOpImageQuerySizeLod = 103, @@ -804,8 +807,8 @@ typedef enum SpvOp_ { SpvOpUnreachable = 255, SpvOpLifetimeStart = 256, SpvOpLifetimeStop = 257, - SpvOpAsyncGroupCopy = 259, - SpvOpWaitGroupEvents = 260, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, SpvOpGroupAll = 261, SpvOpGroupAny = 262, SpvOpGroupBroadcast = 263, diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 1056c0f1ddf..ee1fca34c31 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -85,7 +85,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Modf: op = nir_op_fmod; break; case GLSLstd450FMin: op = nir_op_fmin; break; + case GLSLstd450UMin: op = nir_op_umin; break; + case GLSLstd450SMin: op = nir_op_imin; break; case GLSLstd450FMax: op = nir_op_fmax; break; + case GLSLstd450UMax: op = nir_op_umax; break; + case GLSLstd450SMax: op = nir_op_imax; break; case GLSLstd450FMix: op = nir_op_flrp; break; case GLSLstd450Step: val->ssa->def = nir_sge(&b->nb, src[1], src[0]); diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 86282d25e0a..d014f3cd811 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -343,9 +343,6 @@ struct_member_decoration_cb(struct vtn_builder *b, switch (dec->decoration) { case SpvDecorationRelaxedPrecision: break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationSmooth: - ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; - break; case SpvDecorationNoPerspective: ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; break; @@ -815,11 +812,6 @@ vtn_get_builtin_location(struct vtn_builder *b, *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ set_mode_system_value(mode); break; - case SpvBuiltInFragColor: - *location = FRAG_RESULT_COLOR; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - assert(*mode == nir_var_shader_out); - break; case SpvBuiltInFragDepth: *location = FRAG_RESULT_DEPTH; assert(b->shader->stage == MESA_SHADER_FRAGMENT); @@ -860,9 +852,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, switch (dec->decoration) { case SpvDecorationRelaxedPrecision: break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationSmooth: - var->data.interpolation = INTERP_QUALIFIER_SMOOTH; - break; case SpvDecorationNoPerspective: var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; break; @@ -1565,14 +1554,14 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassOutput: var->data.mode = nir_var_shader_out; break; - case SpvStorageClassPrivateGlobal: + case SpvStorageClassPrivate: var->data.mode = nir_var_global; break; case SpvStorageClassFunction: var->data.mode = nir_var_local; break; - case SpvStorageClassWorkgroupLocal: - case SpvStorageClassWorkgroupGlobal: + case SpvStorageClassWorkgroup: + case SpvStorageClassCrossWorkgroup: case SpvStorageClassGeneric: case SpvStorageClassAtomicCounter: default: @@ -3001,13 +2990,13 @@ gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) return 1; /* GL_LINES */ case SpvExecutionModeInputLinesAdjacency: return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ - case SpvExecutionModeInputTriangles: + case SpvExecutionModeTriangles: return 4; /* GL_TRIANGLES */ case SpvExecutionModeInputTrianglesAdjacency: return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ - case SpvExecutionModeInputQuads: + case SpvExecutionModeQuads: return 7; /* GL_QUADS */ - case SpvExecutionModeInputIsolines: + case SpvExecutionModeIsolines: return 0x8E7A; /* GL_ISOLINES */ case SpvExecutionModeOutputLineStrip: return 3; /* GL_LINE_STRIP */ @@ -3029,7 +3018,7 @@ vertices_in_from_spv_execution_mode(SpvExecutionMode mode) return 2; case SpvExecutionModeInputLinesAdjacency: return 4; - case SpvExecutionModeInputTriangles: + case SpvExecutionModeTriangles: return 3; case SpvExecutionModeInputTrianglesAdjacency: return 6; @@ -3133,10 +3122,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvExecutionModeInputPoints: case SpvExecutionModeInputLines: case SpvExecutionModeInputLinesAdjacency: - case SpvExecutionModeInputTriangles: + case SpvExecutionModeTriangles: case SpvExecutionModeInputTrianglesAdjacency: - case SpvExecutionModeInputQuads: - case SpvExecutionModeInputIsolines: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: if (b->shader->stage == MESA_SHADER_GEOMETRY) { b->shader->info.gs.vertices_in = vertices_in_from_spv_execution_mode(mode); @@ -3169,7 +3158,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvExecutionModeVecTypeHint: case SpvExecutionModeContractionOff: - case SpvExecutionModeIndependentForwardProgress: break; /* OpenCL */ } break; @@ -3746,7 +3734,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, assert(word_count > 5); assert(words[0] == SpvMagicNumber); - assert(words[1] == 99); + assert(words[1] >= 0x10000); /* words[2] == generator magic */ unsigned value_id_bound = words[3]; assert(words[4] == 0); -- cgit v1.2.3 From e0b5f0308c78ff5e6f7459a0f7d05e3fb376a104 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 1 Dec 2015 15:25:07 -0800 Subject: vk: Implement vkFlushMappedMemoryRanges() We'll do a runtime switch on device->info.has_llc for now. --- src/vulkan/anv_device.c | 43 +++++++++++++++++++++++++++++++++++++++---- src/vulkan/anv_private.h | 3 +++ 2 files changed, 42 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c25ad34a613..724c4120a06 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1035,22 +1035,57 @@ void anv_UnmapMemory( anv_gem_munmap(mem->map, mem->map_size); } +static void +clflush_mapped_ranges(struct anv_device *device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); + void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); + void *end = mem->map + ranges[i].offset + ranges[i].size; + + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } +} + VkResult anv_FlushMappedMemoryRanges( - VkDevice device, + VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges) { - /* clflush here for !llc platforms */ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_sfence(); + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); return VK_SUCCESS; } VkResult anv_InvalidateMappedMemoryRanges( - VkDevice device, + VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges) { - return anv_FlushMappedMemoryRanges(device, memoryRangeCount, pMemoryRanges); + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + /* Make sure no reads get moved up above the invalidate. */ + __builtin_ia32_lfence(); + + return VK_SUCCESS; } void anv_GetBufferMemoryRequirements( diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3fc305ba15e..c21d9d7c856 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -391,6 +391,9 @@ struct anv_state_stream { uint32_t end; }; +#define CACHELINE_SIZE 64 +#define CACHELINE_MASK 63 + void anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size); void anv_block_pool_finish(struct anv_block_pool *pool); -- cgit v1.2.3 From b431cf59a3f0412b7b7dd2660a793912a550bfc4 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 3 Dec 2015 12:09:33 -0800 Subject: vk: Set I915_CACHING_NONE for userptr BOs when !llc Regular objects are created I915_CACHING_CACHED on LLC platforms and I915_CACHING_NONE on non-LLC platforms. However, userptr objects are always created as I915_CACHING_CACHED, which on non-LLC means snooped. That can be useful but comes with a bit of overheard. Since we're eplicitly clflushing and don't want the overhead we need to turn it off. --- src/vulkan/anv_allocator.c | 12 ++++++++++++ src/vulkan/anv_gem.c | 26 ++++++++++++++++++++++++++ src/vulkan/anv_private.h | 3 +++ 3 files changed, 41 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index db04a2c236c..970d809e8a3 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -441,6 +441,18 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) goto fail; cleanup->gem_handle = gem_handle; + /* Regular objects are created I915_CACHING_CACHED on LLC platforms and + * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are + * always created as I915_CACHING_CACHED, which on non-LLC means + * snooped. That can be useful but comes with a bit of overheard. Since + * we're eplicitly clflushing and don't want the overhead we need to turn + * it off. */ + if (!pool->device->info.has_llc) { + anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); + anv_gem_set_domain(pool->device, gem_handle, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + } + /* Now that we successfull allocated everything, we can write the new * values back into pool. */ pool->map = map + center_bo_offset; diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c index 01671d2ea50..57b8505e156 100644 --- a/src/vulkan/anv_gem.c +++ b/src/vulkan/anv_gem.c @@ -137,6 +137,32 @@ anv_gem_userptr(struct anv_device *device, void *mem, size_t size) return userptr.handle; } +int +anv_gem_set_caching(struct anv_device *device, int gem_handle, uint32_t caching) +{ + struct drm_i915_gem_caching gem_caching; + + VG_CLEAR(gem_caching); + gem_caching.handle = gem_handle; + gem_caching.caching = caching; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); +} + +int +anv_gem_set_domain(struct anv_device *device, int gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + struct drm_i915_gem_set_domain gem_set_domain; + + VG_CLEAR(gem_set_domain); + gem_set_domain.handle = gem_handle; + gem_set_domain.read_domains = read_domains; + gem_set_domain.write_domain = write_domain; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); +} + /** * On error, \a timeout_ns holds the remaining time. */ diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c21d9d7c856..8be64d8887d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -598,6 +598,9 @@ int anv_gem_get_aperture(int fd, uint64_t *size); int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); int anv_gem_fd_to_handle(struct anv_device *device, int fd); int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_set_caching(struct anv_device *device, int gem_handle, uint32_t caching); +int anv_gem_set_domain(struct anv_device *device, int gem_handle, + uint32_t read_domains, uint32_t write_domain); VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); -- cgit v1.2.3 From 773592051be92f3f8c3ac11492b22d2bf4e96020 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 1 Dec 2015 15:37:12 -0800 Subject: vk: clflush all state for non-LLC GPUs --- src/vulkan/anv_batch_chain.c | 32 +++++++++++++++++- src/vulkan/anv_cmd_buffer.c | 31 +++++++++++++---- src/vulkan/anv_device.c | 30 ++++++++++++++--- src/vulkan/anv_meta.c | 2 ++ src/vulkan/anv_meta_clear.c | 6 ++-- src/vulkan/anv_pipeline.c | 4 +++ src/vulkan/anv_private.h | 33 ++++++++++++++++-- src/vulkan/gen7_cmd_buffer.c | 25 +++++++------- src/vulkan/gen7_pipeline.c | 79 +++++++++++++++++++++----------------------- src/vulkan/gen7_state.c | 5 +++ src/vulkan/gen8_cmd_buffer.c | 35 ++++++++++++-------- src/vulkan/gen8_pipeline.c | 2 ++ src/vulkan/gen8_state.c | 4 +++ 13 files changed, 203 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 6a5faea5777..41bae981397 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -724,6 +724,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); assert(primary->batch.start == this_bbo->bo.map); uint32_t offset = primary->batch.next - primary->batch.start; + const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we * can emit a new command and relocation for the current splice. In @@ -732,9 +733,25 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, * here. */ last_bbo->relocs.num_relocs--; - secondary->batch.next -= GEN8_MI_BATCH_BUFFER_START_length * 4; + secondary->batch.next -= inst_size; emit_batch_buffer_start(secondary, &this_bbo->bo, offset); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + + /* After patching up the secondary buffer, we need to clflush the + * modified instruction in case we're on a !llc platform. We use a + * little loop to handle the case where the instruction crosses a cache + * line boundary. + */ + if (!primary->device->info.has_llc) { + void *inst = secondary->batch.next - inst_size; + void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); + __builtin_ia32_sfence(); + while (p < secondary->batch.next) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } + break; } case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { @@ -886,6 +903,11 @@ adjust_relocations_from_block_pool(struct anv_block_pool *pool, */ assert(relocs->relocs[i].offset < pool->state.end); uint32_t *reloc_data = pool->map + relocs->relocs[i].offset; + + /* We're reading back the relocated value from potentially incoherent + * memory here. However, any change to the value will be from the kernel + * writing out relocations, which will keep the CPU cache up to date. + */ relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta; /* All of the relocations from this block pool to other BO's should @@ -994,6 +1016,14 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + if (!cmd_buffer->device->info.has_llc) { + __builtin_ia32_sfence(); + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) + __builtin_ia32_clflush((*bbo)->bo.map + i); + } + } + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, .buffer_count = cmd_buffer->execbuf2.bo_count, diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 19d4be90274..ee437aa6330 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -666,7 +666,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } if (layout == NULL) - return VK_SUCCESS; + goto out; for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { struct anv_pipeline_binding *binding = @@ -698,6 +698,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, surface_state.map, stage, desc->type, bo_offset, desc->range); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(surface_state); + break; } @@ -724,6 +728,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + return VK_SUCCESS; } @@ -772,20 +780,25 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, sampler->state, sizeof(sampler->state)); } + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + return VK_SUCCESS; } struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t dwords, uint32_t alignment) + const void *data, uint32_t size, uint32_t alignment) { struct anv_state state; - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - memcpy(state.map, a, dwords * 4); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); return state; } @@ -804,6 +817,9 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); return state; @@ -881,6 +897,9 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); } + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + return state; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 724c4120a06..384a457742f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -599,6 +599,20 @@ anv_queue_finish(struct anv_queue *queue) { } +static struct anv_state +anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) +{ + struct anv_state state; + + state = anv_state_pool_alloc(pool, size, align); + memcpy(state.map, p, size); + + if (!pool->block_pool->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + static void anv_device_init_border_colors(struct anv_device *device) { @@ -611,10 +625,8 @@ anv_device_init_border_colors(struct anv_device *device) [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, }; - device->border_colors = - anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(border_colors), 32); - memcpy(device->border_colors.map, border_colors, sizeof(border_colors)); + device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, + sizeof(border_colors), 32, border_colors); } VkResult anv_CreateDevice( @@ -885,6 +897,9 @@ VkResult anv_DeviceWaitIdle( anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN7_MI_NOOP); + if (!device->info.has_llc) + anv_state_clflush(state); + exec2_objects[0].handle = bo->gem_handle; exec2_objects[0].relocation_count = 0; exec2_objects[0].relocs_ptr = 0; @@ -1219,6 +1234,13 @@ VkResult anv_CreateFence( anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN7_MI_NOOP); + if (!device->info.has_llc) { + assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); + assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + __builtin_ia32_sfence(); + __builtin_ia32_clflush(fence->bo.map); + } + fence->exec2_objects[0].handle = fence->bo.gem_handle; fence->exec2_objects[0].relocation_count = 0; fence->exec2_objects[0].relocs_ptr = 0; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index bed01980a0d..008c8904d64 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -478,6 +478,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, }; + anv_state_clflush(vb_state); + struct anv_buffer vertex_buffer = { .device = device, .size = vb_size, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 24ff1ea75e4..cb1a84ed533 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -316,8 +316,7 @@ emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, }; struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16); - memcpy(state.map, vertex_data, sizeof(vertex_data)); + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); struct anv_buffer vertex_buffer = { .device = device, @@ -485,8 +484,7 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, }; struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16); - memcpy(state.map, vertex_data, sizeof(vertex_data)); + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); struct anv_buffer vertex_buffer = { .device = device, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 367d5180bd3..948e675d091 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -373,8 +373,12 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, memcpy(state.map, data, size); + if (!pipeline->device->info.has_llc) + anv_state_clflush(state); + return state.offset; } + static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8be64d8887d..3c0441b66f3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -394,6 +394,22 @@ struct anv_state_stream { #define CACHELINE_SIZE 64 #define CACHELINE_MASK 63 +static void inline +anv_state_clflush(struct anv_state state) +{ + /* state.map may not be cacheline aligned, so round down the start pointer + * to a cacheline boundary so we flush all pages that contain the state. + */ + void *end = state.map + state.alloc_size; + void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK); + + __builtin_ia32_sfence(); + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } +} + void anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size); void anv_block_pool_finish(struct anv_block_pool *pool); @@ -721,6 +737,20 @@ __gen_combine_address(struct anv_batch *batch, void *location, VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ } while (0) +#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ + const uint32_t __size = __anv_cmd_length(cmd) * 4; \ + struct anv_state __state = \ + anv_state_pool_alloc((pool), __size, align); \ + struct cmd __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ + if (!(pool)->block_pool->device->info.has_llc) \ + anv_state_clflush(__state); \ + __state; \ + }) + #define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ .GraphicsDataTypeGFDT = 0, \ .LLCCacheabilityControlLLCCC = 0, \ @@ -1104,8 +1134,7 @@ VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, void gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t dwords, - uint32_t alignment); + const void *data, uint32_t size, uint32_t alignment); struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t *b, uint32_t dwords, uint32_t alignment); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 7101831080b..5efa7cce002 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -201,6 +201,9 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, .ScissorRectPointer = scissor_state.offset); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(scissor_state); } GENX_FUNC(GEN7, GEN7) void @@ -266,19 +269,15 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; - struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = { - .KernelStartPointer = pipeline->cs_simd, - .BindingTablePointer = surfaces.offset, - .SamplerStatePointer = samplers.offset, - .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ - }; - - uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); struct anv_state state = - anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - - GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .NumberofThreadsinGPGPUThreadGroup = 0); + + const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, .InterfaceDescriptorTotalLength = size, .InterfaceDescriptorDataStartAddress = state.offset); @@ -441,6 +440,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dynamic.stencil_reference.back, }; GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CC_STATE_POINTERS, diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 400b9ae997d..8262956ef07 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -253,65 +253,60 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, { struct anv_device *device = pipeline->device; - uint32_t num_dwords = GEN7_BLEND_STATE_length; - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - if (info->pAttachments == NULL) { - struct GEN7_BLEND_STATE blend_state = { - .ColorBufferBlendEnable = false, - .WriteDisableAlpha = false, - .WriteDisableRed = false, - .WriteDisableGreen = false, - .WriteDisableBlue = false, - }; - - GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + .ColorBufferBlendEnable = false, + .WriteDisableAlpha = false, + .WriteDisableRed = false, + .WriteDisableGreen = false, + .WriteDisableBlue = false); } else { /* FIXME-GEN7: All render targets share blend state settings on gen7, we * can't implement this. */ const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, - struct GEN7_BLEND_STATE blend_state = { - .ColorBufferBlendEnable = a->blendEnable, - .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, # if 0 - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; # endif - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], # if 0 - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; # endif - }; - - GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + ); } anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 6dcb5bffdf1..6ffbacd8e77 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -308,6 +308,9 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); + + if (!device->info.has_llc) + anv_state_clflush(iview->nonrt_surface_state); } if (image->needs_color_rt_surface_state) { @@ -326,5 +329,7 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, &surface_state); + if (!device->info.has_llc) + anv_state_clflush(iview->color_rt_surface_state); } } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index a23421102aa..68ee6bb5696 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -107,6 +107,11 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport); } + if (!cmd_buffer->device->info.has_llc) { + anv_state_clflush(sf_clip_state); + anv_state_clflush(cc_state); + } + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), .CCViewportPointer = cc_state.offset); @@ -270,6 +275,9 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) }; GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CC_STATE_POINTERS, .ColorCalcStatePointer = cc_state.offset, @@ -317,6 +325,9 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) }; GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + anv_batch_emit(&cmd_buffer->batch, GEN9_3DSTATE_CC_STATE_POINTERS, .ColorCalcStatePointer = cc_state.offset, @@ -500,22 +511,18 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; - struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ - }; - - uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); struct anv_state state = - anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - - GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, state.map, &desc); + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0); + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), .InterfaceDescriptorTotalLength = size, .InterfaceDescriptorDataStartAddress = state.offset); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index d0395741474..faf997a4304 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -259,6 +259,8 @@ emit_cb_state(struct anv_pipeline *pipeline, } GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); + if (!device->info.has_llc) + anv_state_clflush(pipeline->blend_state); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), .BlendStatePointer = pipeline->blend_state.offset, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index e7acbd02281..901cc3b25a8 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -280,6 +280,8 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); + if (!device->info.has_llc) + anv_state_clflush(iview->nonrt_surface_state); } if (image->needs_color_rt_surface_state) { @@ -297,6 +299,8 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, &surface_state); + if (!device->info.has_llc) + anv_state_clflush(iview->color_rt_surface_state); } } -- cgit v1.2.3 From c3c61d210f0929a71e031dfb3830bf39cee583a4 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Dec 2015 23:09:09 -0800 Subject: vk: Expose two memory types for non-LLC GPUs We're required to expose a host-visible, coherent memory type. On big core GPUs that share, LLC, we can expose one such memory type that's also cached. However, on non-LLC GPUs we can't both be cached and coherent. Thus, we expose both the required coherent type and the cached but non-coherent combination. --- src/vulkan/anv_device.c | 46 ++++++++++++++++++++++++++++++++++++---------- src/vulkan/anv_private.h | 1 + 2 files changed, 37 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 384a457742f..a8bc409144d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -553,15 +553,38 @@ void anv_GetPhysicalDeviceMemoryProperties( */ heap_size = 3 * physical_device->aperture_size / 4; - /* The property flags below are valid only for llc platforms. */ - pMemoryProperties->memoryTypeCount = 1; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 1, - }; + if (physical_device->info->has_llc) { + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 1, + }; + } else { + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + pMemoryProperties->memoryTypeCount = 2; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 1, + }; + pMemoryProperties->memoryTypes[1] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 1, + }; + } pMemoryProperties->memoryHeapCount = 1; pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { @@ -976,7 +999,8 @@ VkResult anv_AllocateMemory( assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); /* We support exactly one memory heap. */ - assert(pAllocateInfo->memoryTypeIndex == 0); + assert(pAllocateInfo->memoryTypeIndex == 0 || + (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); /* FINISHME: Fail if allocation request exceeds heap size. */ @@ -989,6 +1013,8 @@ VkResult anv_AllocateMemory( if (result != VK_SUCCESS) goto fail; + mem->type_index = pAllocateInfo->memoryTypeIndex; + *pMem = anv_device_memory_to_handle(mem); return VK_SUCCESS; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3c0441b66f3..7c8cf241624 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -784,6 +784,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, struct anv_device_memory { struct anv_bo bo; + uint32_t type_index; VkDeviceSize map_size; void * map; }; -- cgit v1.2.3 From bbb6875f35c783d4f350dc9fbf2e2904b3d47a5a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Dec 2015 23:58:05 -0800 Subject: vk: Map uncached, coherent memory as write-combine This gives us the required characteristics for the memory type. --- src/vulkan/anv_allocator.c | 2 +- src/vulkan/anv_device.c | 14 ++++++++++++-- src/vulkan/anv_gem.c | 7 ++----- src/vulkan/anv_private.h | 2 +- src/vulkan/anv_query.c | 2 +- 5 files changed, 17 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 970d809e8a3..e8129c6d909 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -825,7 +825,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) assert(new_bo.size == pool->bo_size); - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size); + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); if (new_bo.map == NULL) { anv_gem_close(pool->device, new_bo.gem_handle); return vk_error(VK_ERROR_MEMORY_MAP_FAILED); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a8bc409144d..7e1b552ca7f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -121,6 +121,12 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } + if (anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION < 1)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing wc mmap"); + goto fail; + } + close(fd); brw_process_intel_debug_variable(); @@ -1059,7 +1065,11 @@ VkResult anv_MapMemory( * pointer here, but that may exhaust virtual memory on 32 bit * userspace. */ - mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size); + uint32_t gem_flags = 0; + if (!device->info.has_llc && mem->type_index == 0) + gem_flags |= I915_MMAP_WC; + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size, gem_flags); mem->map_size = size; *ppData = mem->map; @@ -1254,7 +1264,7 @@ VkResult anv_CreateFence( goto fail; fence->bo.map = - anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); batch.next = batch.start = fence->bo.map; batch.end = fence->bo.map + fence->bo.size; anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c index 57b8505e156..429f4a29fad 100644 --- a/src/vulkan/anv_gem.c +++ b/src/vulkan/anv_gem.c @@ -84,7 +84,7 @@ anv_gem_close(struct anv_device *device, int gem_handle) */ void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size) + uint64_t offset, uint64_t size, uint32_t flags) { struct drm_i915_gem_mmap gem_mmap; int ret; @@ -94,10 +94,7 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, gem_mmap.offset = offset; gem_mmap.size = size; VG_CLEAR(gem_mmap.addr_ptr); - -#ifdef I915_MMAP_WC - gem_mmap.flags = 0; -#endif + gem_mmap.flags = flags; ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); if (ret != 0) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 7c8cf241624..cc0281a0d9a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -597,7 +597,7 @@ struct anv_device { }; void* anv_gem_mmap(struct anv_device *device, - uint32_t gem_handle, uint64_t offset, uint64_t size); + uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); void anv_gem_munmap(void *p, uint64_t size); uint32_t anv_gem_create(struct anv_device *device, size_t size); void anv_gem_close(struct anv_device *device, int gem_handle); diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 911b9a41264..6f9d7d89aa6 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -69,7 +69,7 @@ VkResult anv_CreateQueryPool( if (result != VK_SUCCESS) goto fail; - pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); *pQueryPool = anv_query_pool_to_handle(pool); -- cgit v1.2.3 From dac57750db6779b3280fca3b7a375243b4aea623 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 1 Dec 2015 15:39:30 -0800 Subject: vk: Turn on Bay Trail, Cherryview and Broxton support --- src/vulkan/anv_device.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7e1b552ca7f..7556d8353d5 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -87,10 +87,14 @@ anv_physical_device_init(struct anv_physical_device *device, fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); } else if (device->info->gen == 7 && !device->info->is_baytrail) { fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); - } else if (device->info->gen == 9) { + } else if (device->info->gen == 7 && device->info->is_baytrail) { + fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); + } else if (device->info->gen == 9 && !device->info->is_broxton) { fprintf(stderr, "WARNING: Skylake Vulkan support is incomplete\n"); - } else if (device->info->gen == 8 && !device->info->is_cherryview) { - /* Broadwell is as fully supported as anything */ + } else if (device->info->gen == 9 && device->info->is_broxton) { + fprintf(stderr, "WARNING: Broxton Vulkan support is incomplete\n"); + } else if (device->info->gen == 8) { + /* Broadwell/Cherryview is as fully supported as anything */ } else { result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, "Vulkan not yet supported on %s", device->name); @@ -115,12 +119,6 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } - if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "non-llc gpu"); - goto fail; - } - if (anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION < 1)) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "kernel missing wc mmap"); -- cgit v1.2.3 From f1f78a371e194459699228253dd5dcb97347753b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Dec 2015 12:52:43 -0800 Subject: vk: gem handles are uint32_t No functional difference, but lets be consistent with the kernel API. --- src/vulkan/anv_allocator.c | 2 +- src/vulkan/anv_gem.c | 17 +++++++++-------- src/vulkan/anv_private.h | 19 +++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index e8129c6d909..4cff84131aa 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -328,7 +328,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) { size_t size; void *map; - int gem_handle; + uint32_t gem_handle; struct anv_mmap_cleanup *cleanup; pthread_mutex_lock(&pool->device->mutex); diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c index 429f4a29fad..b815f97ed69 100644 --- a/src/vulkan/anv_gem.c +++ b/src/vulkan/anv_gem.c @@ -70,7 +70,7 @@ anv_gem_create(struct anv_device *device, size_t size) } void -anv_gem_close(struct anv_device *device, int gem_handle) +anv_gem_close(struct anv_device *device, uint32_t gem_handle) { struct drm_gem_close close; @@ -116,7 +116,7 @@ anv_gem_munmap(void *p, uint64_t size) munmap(p, size); } -int +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size) { struct drm_i915_gem_userptr userptr; @@ -135,7 +135,8 @@ anv_gem_userptr(struct anv_device *device, void *mem, size_t size) } int -anv_gem_set_caching(struct anv_device *device, int gem_handle, uint32_t caching) +anv_gem_set_caching(struct anv_device *device, + uint32_t gem_handle, uint32_t caching) { struct drm_i915_gem_caching gem_caching; @@ -147,7 +148,7 @@ anv_gem_set_caching(struct anv_device *device, int gem_handle, uint32_t caching) } int -anv_gem_set_domain(struct anv_device *device, int gem_handle, +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, uint32_t read_domains, uint32_t write_domain) { struct drm_i915_gem_set_domain gem_set_domain; @@ -164,7 +165,7 @@ anv_gem_set_domain(struct anv_device *device, int gem_handle, * On error, \a timeout_ns holds the remaining time. */ int -anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) { struct drm_i915_gem_wait wait; int ret; @@ -189,7 +190,7 @@ anv_gem_execbuffer(struct anv_device *device, int anv_gem_set_tiling(struct anv_device *device, - int gem_handle, uint32_t stride, uint32_t tiling) + uint32_t gem_handle, uint32_t stride, uint32_t tiling) { struct drm_i915_gem_set_tiling set_tiling; int ret; @@ -269,7 +270,7 @@ anv_gem_get_aperture(int fd, uint64_t *size) } int -anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) { struct drm_prime_handle args; int ret; @@ -285,7 +286,7 @@ anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) return args.fd; } -int +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd) { struct drm_prime_handle args; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cc0281a0d9a..c1b777e5973 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -266,7 +266,7 @@ anv_vector_finish(struct anv_vector *queue) __anv_vector_offset += (queue)->element_size) struct anv_bo { - int gem_handle; + uint32_t gem_handle; /* Index into the current validation list. This is used by the * validation list building alrogithm to track which buffers are already @@ -600,22 +600,21 @@ void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); void anv_gem_munmap(void *p, uint64_t size); uint32_t anv_gem_create(struct anv_device *device, size_t size); -void anv_gem_close(struct anv_device *device, int gem_handle); -int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -int anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns); +void anv_gem_close(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); int anv_gem_execbuffer(struct anv_device *device, struct drm_i915_gem_execbuffer2 *execbuf); -int anv_gem_set_tiling(struct anv_device *device, int gem_handle, +int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, uint32_t stride, uint32_t tiling); int anv_gem_create_context(struct anv_device *device); int anv_gem_destroy_context(struct anv_device *device, int context); int anv_gem_get_param(int fd, uint32_t param); int anv_gem_get_aperture(int fd, uint64_t *size); -int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); -int anv_gem_fd_to_handle(struct anv_device *device, int fd); -int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -int anv_gem_set_caching(struct anv_device *device, int gem_handle, uint32_t caching); -int anv_gem_set_domain(struct anv_device *device, int gem_handle, +int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); +int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, uint32_t read_domains, uint32_t write_domain); VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); -- cgit v1.2.3 From 4dd5ef9e09c642d9482092cf3dd10455571cb28b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Dec 2015 15:46:30 -0800 Subject: vk: Add needed builddir subdirectories to the include path This fixes out-of-tree builds and closes #1 --- src/vulkan/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index fd74393be95..5651f661e05 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -61,6 +61,8 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src \ + -I$(top_builddir)/src/glsl/nir \ -I$(top_builddir)/src/vulkan libvulkan_la_CFLAGS = \ -- cgit v1.2.3 From 8b9ceda9f17a0f5def5319210bbe5cfec2f69ed2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 4 Dec 2015 16:11:47 -0800 Subject: anv/image: Delete old ifdef'd out code --- src/vulkan/anv_image.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f99155cad6a..9da4b8766ee 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -573,35 +573,3 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag return NULL; } } - -#if 0 - VkImageAspectFlags aspect_mask = 0; - if (format->depth_format) - aspect_mask |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (format->has_stencil) - aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; - if (!aspect_mask) - aspect_mask |= VK_IMAGE_ASPECT_COLOR_BIT; - - anv_image_view_init(iview, device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = info->image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = info->format, - .channels = { - .r = VK_CHANNEL_SWIZZLE_R, - .g = VK_CHANNEL_SWIZZLE_G, - .b = VK_CHANNEL_SWIZZLE_B, - .a = VK_CHANNEL_SWIZZLE_A, - }, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = info->mipLevel, - .mipLevels = 1, - .baseArrayLayer = info->baseArraySlice, - .arraySize = info->arraySize, - }, - }, - NULL); -#endif -- cgit v1.2.3 From a09b4c298ceed529ffc6dc5f48b1721aeb1d08a0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 4 Dec 2015 16:18:35 -0800 Subject: anv: Add func anv_get_isl_format() --- src/vulkan/anv_formats.c | 23 +++++++++++++++++++++++ src/vulkan/anv_private.h | 3 +++ 2 files changed, 26 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index abbf667a079..19de40b52c2 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -221,6 +221,29 @@ anv_format_for_vk_format(VkFormat format) return &anv_formats[format]; } +/** + * Exactly one bit must be set in \a aspect. + */ +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect) +{ + const struct anv_format *anv_fmt = &anv_formats[format]; + + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + return anv_fmt->surface_format; + case VK_IMAGE_ASPECT_DEPTH_BIT: + assert(anv_fmt->depth_format != 0); + return anv_fmt->surface_format; + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(anv_fmt->has_stencil); + return ISL_FORMAT_R8_UINT; + default: + unreachable("bad VkImageAspect"); + return ISL_FORMAT_UNSUPPORTED; + } +} + // Format capabilities void anv_validate_GetPhysicalDeviceFormatProperties( diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c1b777e5973..156905ffa5d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1385,6 +1385,9 @@ extern const struct anv_format *const anv_format_s8_uint; const struct anv_format * anv_format_for_vk_format(VkFormat format); +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect); + static inline bool anv_format_is_color(const struct anv_format *format) { -- cgit v1.2.3 From 2f270f0d15eba2b1ac0a86cf6ebb8e2cb0767d8a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 4 Dec 2015 16:29:25 -0800 Subject: anv/image: Fix choice of isl_surf_usage for depthstencil images Fixes assertion in vkCreateImage when VkFormat is combined depthstencil. Fixed many vulkancts tests that use combined depthstencil. For example, fixes dEQP-VK.pipeline.depth.format.d16_unorm_s8_uint.compare_ops.\ not_equal_less_or_equal_not_equal_greater. --- src/vulkan/anv_image.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 9da4b8766ee..00a2c14f141 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -90,11 +90,11 @@ choose_isl_tiling_flags(const struct anv_image_create_info *anv_info) /** * The \a format argument is required and overrides any format found in struct - * anv_image_create_info. + * anv_image_create_info. Exactly one bit must be set in \a aspect. */ static isl_surf_usage_flags_t choose_isl_surf_usage(const struct anv_image_create_info *info, - const struct anv_format *format) + VkImageAspectFlags aspect) { const VkImageCreateInfo *vk_info = info->vk_info; isl_surf_usage_flags_t isl_flags = 0; @@ -115,12 +115,15 @@ choose_isl_surf_usage(const struct anv_image_create_info *info, isl_flags |= ISL_SURF_USAGE_CUBE_BIT; if (vk_info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - assert((format->depth_format != 0) ^ format->has_stencil); - - if (format->depth_format) { + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_DEPTH_BIT: isl_flags |= ISL_SURF_USAGE_DEPTH_BIT; - } else if (format->has_stencil) { + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: isl_flags |= ISL_SURF_USAGE_STENCIL_BIT; + break; } } @@ -138,13 +141,13 @@ choose_isl_surf_usage(const struct anv_image_create_info *info, } /** - * The \a format argument is required and overrides any format in - * struct anv_image_create_info. + * The \a format argument is required and overrides any format in struct + * anv_image_create_info. Exactly one bit must be set in \a aspect. */ static VkResult anv_image_make_surface(const struct anv_device *dev, const struct anv_image_create_info *anv_info, - const struct anv_format *format, + VkImageAspectFlags aspect, uint64_t *inout_image_size, uint32_t *inout_image_alignment, struct anv_surface *out_anv_surf) @@ -159,7 +162,7 @@ anv_image_make_surface(const struct anv_device *dev, isl_surf_init(&dev->isl_dev, &out_anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], - .format = format->surface_format, + .format = anv_get_isl_format(vk_info->format, aspect), .width = vk_info->extent.width, .height = vk_info->extent.height, .depth = vk_info->extent.depth, @@ -168,7 +171,7 @@ anv_image_make_surface(const struct anv_device *dev, .samples = vk_info->samples, .min_alignment = 0, .min_pitch = 0, - .usage = choose_isl_surf_usage(anv_info, format), + .usage = choose_isl_surf_usage(anv_info, aspect), .tiling_flags = choose_isl_tiling_flags(anv_info)); out_anv_surf->offset = align_u32(*inout_image_size, @@ -249,14 +252,16 @@ anv_image_create(VkDevice _device, } if (likely(anv_format_is_color(image->format))) { - r = anv_image_make_surface(device, create_info, image->format, + r = anv_image_make_surface(device, create_info, + VK_IMAGE_ASPECT_COLOR_BIT, &image->size, &image->alignment, &image->color_surface); if (r != VK_SUCCESS) goto fail; } else { if (image->format->depth_format) { - r = anv_image_make_surface(device, create_info, image->format, + r = anv_image_make_surface(device, create_info, + VK_IMAGE_ASPECT_DEPTH_BIT, &image->size, &image->alignment, &image->depth_surface); if (r != VK_SUCCESS) @@ -264,7 +269,8 @@ anv_image_create(VkDevice _device, } if (image->format->has_stencil) { - r = anv_image_make_surface(device, create_info, anv_format_s8_uint, + r = anv_image_make_surface(device, create_info, + VK_IMAGE_ASPECT_STENCIL_BIT, &image->size, &image->alignment, &image->stencil_surface); if (r != VK_SUCCESS) -- cgit v1.2.3 From 7337870036263518ccac75758bdaa415c28a2c1a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Dec 2015 17:13:26 -0800 Subject: vk: Move isl files to libisl.la helper library These will be in their own library eventually - let's just do that now. --- src/vulkan/Makefile.am | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 5651f661e05..7c108e4b145 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -44,7 +44,7 @@ PER_GEN_LIBS = \ libanv-gen8.la \ libanv-gen9.la -noinst_LTLIBRARIES = $(PER_GEN_LIBS) +noinst_LTLIBRARIES = $(PER_GEN_LIBS) libisl.la # The gallium includes are for the util/u_math.h include from main/macros.h @@ -92,7 +92,11 @@ VULKAN_SOURCES = \ anv_query.c \ anv_util.c \ anv_wsi.c \ - anv_wsi_x11.c \ + anv_wsi_x11.c + +libisl_la_CFLAGS = $(libvulkan_la_CFLAGS) + +libisl_la_SOURCES = \ isl.c \ isl_format_layout.c \ isl_gen4.c \ @@ -180,7 +184,7 @@ libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ ../mesa/libmesa.la \ ../mesa/drivers/dri/common/libdri_test_stubs.la \ -lpthread -ldl -lstdc++ \ - $(PER_GEN_LIBS) + $(PER_GEN_LIBS) libisl.la # Libvulkan with dummy gem. Used for unit tests. -- cgit v1.2.3 From 0a5bee1fe6ab803c2efd917fae1ba91fc3dc0472 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Dec 2015 17:18:16 -0800 Subject: vk: Don't override and hardcode autoconf CFLAGS To disable optimizations pass CFLAGS="-O0 -g" on the configure command line. --- src/vulkan/Makefile.am | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 7c108e4b145..2e70fef6fb1 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -65,9 +65,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/glsl/nir \ -I$(top_builddir)/src/vulkan -libvulkan_la_CFLAGS = \ - -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g \ - -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init +libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init VULKAN_SOURCES = \ anv_allocator.c \ -- cgit v1.2.3 From 01e2932d6a8e4b342f202d4bf3c70a9798d25e90 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 7 Dec 2015 08:40:14 -0800 Subject: anv: Remove unused anv_format_s8_uint This is no longer needed after migrating to isl. --- src/vulkan/anv_formats.c | 3 --- src/vulkan/anv_private.h | 7 ------- 2 files changed, 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 19de40b52c2..eba0f1122a4 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -212,9 +212,6 @@ static const struct anv_format anv_formats[] = { #undef fmt -const struct anv_format *const -anv_format_s8_uint = &anv_formats[VK_FORMAT_S8_UINT]; - const struct anv_format * anv_format_for_vk_format(VkFormat format) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 156905ffa5d..5e611242481 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1375,13 +1375,6 @@ struct anv_format { bool has_stencil; }; -/** - * Stencil formats are often a special case. To reduce the number of lookups - * into the VkFormat-to-anv_format translation table when working with - * stencil, here is the handle to the table's entry for VK_FORMAT_S8_UINT. - */ -extern const struct anv_format *const anv_format_s8_uint; - const struct anv_format * anv_format_for_vk_format(VkFormat format); -- cgit v1.2.3 From c97d8af9aab090e78a145a219d49b6fb94ce0e13 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 7 Dec 2015 08:42:11 -0800 Subject: anv: Fix anv_gem_set_tiling to respect tiling param Function anv_gem_set_tiling() ignored its 'tiling' parameter. It unconditionally set the bo's tiling to I915_TILING_X. --- src/vulkan/anv_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c index b815f97ed69..9d0d0332001 100644 --- a/src/vulkan/anv_gem.c +++ b/src/vulkan/anv_gem.c @@ -202,7 +202,7 @@ anv_gem_set_tiling(struct anv_device *device, do { VG_CLEAR(set_tiling); set_tiling.handle = gem_handle; - set_tiling.tiling_mode = I915_TILING_X; + set_tiling.tiling_mode = tiling; set_tiling.stride = stride; ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); -- cgit v1.2.3 From 64e8af69b168ae3e4db0fde7cae4afbb0721b3e5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 7 Dec 2015 08:50:28 -0800 Subject: anv: Use isl_tiling_flags in anv_image_create_info Replace anv_image_create_info::force_tiling anv_image_create_info::tiling with the bitmask anv_image_create_info::isl_tiling_flags This allows us to drop the function anv_image.c:choose_isl_tiling_flags(). --- src/vulkan/anv_image.c | 25 +++++++------------------ src/vulkan/anv_intel.c | 3 +-- src/vulkan/anv_private.h | 3 +-- src/vulkan/anv_wsi_wayland.c | 3 +-- src/vulkan/anv_wsi_x11.c | 3 +-- 5 files changed, 11 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 00a2c14f141..b5c9a86ff18 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -71,23 +71,6 @@ anv_image_view_info_for_vk_image_view_type(VkImageViewType type) return anv_image_view_info_table[type]; } -static isl_tiling_flags_t -choose_isl_tiling_flags(const struct anv_image_create_info *anv_info) -{ - const VkImageCreateInfo *vk_info = anv_info->vk_info; - - if (anv_info->force_tiling) { - return 1u << anv_info->tiling; - } else if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) { - return ISL_TILING_LINEAR_BIT; - } else if (vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) { - return ISL_TILING_ANY_MASK; - } else { - unreachable("bad anv_image_create_info"); - return 0; - } -} - /** * The \a format argument is required and overrides any format found in struct * anv_image_create_info. Exactly one bit must be set in \a aspect. @@ -160,6 +143,11 @@ anv_image_make_surface(const struct anv_device *dev, [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, }; + + isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; + if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) + tiling_flags &= ISL_TILING_LINEAR_BIT; + isl_surf_init(&dev->isl_dev, &out_anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect), @@ -172,7 +160,7 @@ anv_image_make_surface(const struct anv_device *dev, .min_alignment = 0, .min_pitch = 0, .usage = choose_isl_surf_usage(anv_info, aspect), - .tiling_flags = choose_isl_tiling_flags(anv_info)); + .tiling_flags = tiling_flags); out_anv_surf->offset = align_u32(*inout_image_size, out_anv_surf->isl.alignment); @@ -298,6 +286,7 @@ anv_CreateImage(VkDevice device, return anv_image_create(device, &(struct anv_image_create_info) { .vk_info = pCreateInfo, + .isl_tiling_flags = ISL_TILING_ANY_MASK, }, pAllocator, pImage); diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c index 95665b5aa41..d95d9afe8cf 100644 --- a/src/vulkan/anv_intel.c +++ b/src/vulkan/anv_intel.c @@ -62,8 +62,7 @@ VkResult anv_CreateDmaBufImageINTEL( anv_image_create(_device, &(struct anv_image_create_info) { - .force_tiling = true, - .tiling = ISL_TILING_X, + .isl_tiling_flags = ISL_TILING_X_BIT, .stride = pCreateInfo->strideInBytes, .vk_info = &(VkImageCreateInfo) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5e611242481..cb3f9a7b367 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1472,8 +1472,7 @@ struct anv_image_view { struct anv_image_create_info { const VkImageCreateInfo *vk_info; - bool force_tiling; - enum isl_tiling tiling; + isl_tiling_flags_t isl_tiling_flags; uint32_t stride; }; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index dd5baa452a0..79453b594d8 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -624,8 +624,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, VkImage vk_image; result = anv_image_create(vk_device, &(struct anv_image_create_info) { - .force_tiling = true, - .tiling = ISL_TILING_X, + .isl_tiling_flags = ISL_TILING_X_BIT, .stride = 0, .vk_info = &(VkImageCreateInfo) { diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 15ad98c3f8b..f340de476ca 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -344,8 +344,7 @@ x11_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, anv_image_create(anv_device_to_handle(device), &(struct anv_image_create_info) { - .force_tiling = true, - .tiling = ISL_TILING_X, + .isl_tiling_flags = ISL_TILING_X_BIT, .stride = 0, .vk_info = &(VkImageCreateInfo) { -- cgit v1.2.3 From 3d85a28e90b534bf032bc25ae661327b7e1c8758 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 7 Dec 2015 08:53:43 -0800 Subject: anv: Assert the succes of isl_surf_init() --- src/vulkan/anv_image.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index b5c9a86ff18..a3f6e753d2d 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -136,6 +136,7 @@ anv_image_make_surface(const struct anv_device *dev, struct anv_surface *out_anv_surf) { const VkImageCreateInfo *vk_info = anv_info->vk_info; + bool ok UNUSED; static const enum isl_surf_dim vk_to_isl_surf_dim[] = { [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, @@ -148,7 +149,7 @@ anv_image_make_surface(const struct anv_device *dev, if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) tiling_flags &= ISL_TILING_LINEAR_BIT; - isl_surf_init(&dev->isl_dev, &out_anv_surf->isl, + ok = isl_surf_init(&dev->isl_dev, &out_anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect), .width = vk_info->extent.width, @@ -162,6 +163,11 @@ anv_image_make_surface(const struct anv_device *dev, .usage = choose_isl_surf_usage(anv_info, aspect), .tiling_flags = tiling_flags); + /* isl_surf_init() will fail only if provided invalid input. Invalid input + * is illegal in Vulkan. + */ + assert(ok); + out_anv_surf->offset = align_u32(*inout_image_size, out_anv_surf->isl.alignment); -- cgit v1.2.3 From 9098e0f074a55dfe00bf1412b035e501fb70ad1d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 7 Dec 2015 09:22:49 -0800 Subject: anv/image: Refactor anv_image_make_surface() Reduce the number of function parameters. Deduce the anv_image::*_surface from the parameters instead of requiring the caller to do that. --- src/vulkan/anv_image.c | 66 +++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index a3f6e753d2d..37a0d8faaa9 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -124,16 +124,34 @@ choose_isl_surf_usage(const struct anv_image_create_info *info, } /** - * The \a format argument is required and overrides any format in struct - * anv_image_create_info. Exactly one bit must be set in \a aspect. + * Exactly one bit must be set in \a aspect. + */ +static struct anv_surface * +get_surface(struct anv_image *image, VkImageAspectFlags aspect) +{ + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_COLOR_BIT: + return &image->color_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT: + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return &image->stencil_surface; + } +} + +/** + * Initialize the anv_image::*_surface selected by \a aspect. Then update the + * image's memory requirements (that is, the image's size and alignment). + * + * Exactly one bit must be set in \a aspect. */ static VkResult -anv_image_make_surface(const struct anv_device *dev, - const struct anv_image_create_info *anv_info, - VkImageAspectFlags aspect, - uint64_t *inout_image_size, - uint32_t *inout_image_alignment, - struct anv_surface *out_anv_surf) +make_surface(const struct anv_device *dev, + struct anv_image *image, + const struct anv_image_create_info *anv_info, + VkImageAspectFlags aspect) { const VkImageCreateInfo *vk_info = anv_info->vk_info; bool ok UNUSED; @@ -144,12 +162,13 @@ anv_image_make_surface(const struct anv_device *dev, [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, }; - isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) tiling_flags &= ISL_TILING_LINEAR_BIT; - ok = isl_surf_init(&dev->isl_dev, &out_anv_surf->isl, + struct anv_surface *anv_surf = get_surface(image, aspect); + + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect), .width = vk_info->extent.width, @@ -168,12 +187,9 @@ anv_image_make_surface(const struct anv_device *dev, */ assert(ok); - out_anv_surf->offset = align_u32(*inout_image_size, - out_anv_surf->isl.alignment); - - *inout_image_size = out_anv_surf->offset + out_anv_surf->isl.size; - *inout_image_alignment = MAX(*inout_image_alignment, - out_anv_surf->isl.alignment); + anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); + image->size = anv_surf->offset + anv_surf->isl.size; + image->alignment = MAX(image->alignment, anv_surf->isl.alignment); return VK_SUCCESS; } @@ -246,27 +262,21 @@ anv_image_create(VkDevice _device, } if (likely(anv_format_is_color(image->format))) { - r = anv_image_make_surface(device, create_info, - VK_IMAGE_ASPECT_COLOR_BIT, - &image->size, &image->alignment, - &image->color_surface); + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_COLOR_BIT); if (r != VK_SUCCESS) goto fail; } else { if (image->format->depth_format) { - r = anv_image_make_surface(device, create_info, - VK_IMAGE_ASPECT_DEPTH_BIT, - &image->size, &image->alignment, - &image->depth_surface); + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_DEPTH_BIT); if (r != VK_SUCCESS) goto fail; } if (image->format->has_stencil) { - r = anv_image_make_surface(device, create_info, - VK_IMAGE_ASPECT_STENCIL_BIT, - &image->size, &image->alignment, - &image->stencil_surface); + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_STENCIL_BIT); if (r != VK_SUCCESS) goto fail; } -- cgit v1.2.3 From cd75ff5d1749b9a982546b54cd2655930d2a5402 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 16 Nov 2015 22:28:52 -0800 Subject: anv/pipeline: Only apply a pipeline layout if we have one --- src/vulkan/anv_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 948e675d091..821f5e33774 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -338,7 +338,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ - anv_nir_apply_pipeline_layout(nir, pipeline->layout); + if (pipeline->layout) + anv_nir_apply_pipeline_layout(nir, pipeline->layout); /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). -- cgit v1.2.3 From 42b4417031eda1496db782d117c563ffae7929ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 16 Nov 2015 22:26:42 -0800 Subject: HACK/i965: Disable assign_var_locations on uniforms This conflicts with the way we're doing uniforms in Vulkan. --- src/mesa/drivers/dri/i965/brw_nir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 91358d8f389..10e92735210 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -298,9 +298,9 @@ brw_lower_nir(nir_shader *nir, OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); OPT_V(brw_nir_lower_outputs, is_scalar); - nir_assign_var_locations(&nir->uniforms, - &nir->num_uniforms, - is_scalar ? type_size_scalar : type_size_vec4); + //nir_assign_var_locations(&nir->uniforms, + // &nir->num_uniforms, + // is_scalar ? type_size_scalar : type_size_vec4); OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); if (shader_prog) { -- cgit v1.2.3 From 8f83222d3752f1855e6861ea5e29b5f2487b26e4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Dec 2015 21:00:19 -0800 Subject: isl: Add initial support for storage images --- src/vulkan/Makefile.am | 1 + src/vulkan/isl.h | 5 ++ src/vulkan/isl_image.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 src/vulkan/isl_image.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 2e70fef6fb1..e51f3cf01d7 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -107,6 +107,7 @@ libisl_la_SOURCES = \ isl_gen8.h \ isl_gen9.c \ isl_gen9.h \ + isl_image.c \ $(NULL) BUILT_SOURCES = \ diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index ef42e2d69df..50b0c20505b 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -70,6 +70,7 @@ extern "C" { #endif struct brw_device_info; +struct brw_image_param; #ifndef ISL_DEV_GEN /** @@ -753,6 +754,10 @@ isl_format_block_is_1x1x1(enum isl_format fmt) return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; } +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format fmt); + static inline bool isl_tiling_is_std_y(enum isl_tiling tiling) { diff --git a/src/vulkan/isl_image.c b/src/vulkan/isl_image.c new file mode 100644 index 00000000000..2d146d59ac5 --- /dev/null +++ b/src/vulkan/isl_image.c @@ -0,0 +1,137 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" +#include "brw_compiler.h" + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format format) +{ + switch (format) { + /* These are never lowered. Up to BDW we'll have to fall back to untyped + * surface access for 128bpp formats. + */ + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + return format; + + /* From HSW to BDW the only 64bpp format supported for typed access is + * RGBA_UINT16. IVB falls back to untyped. + */ + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component + * are supported. IVB doesn't support formats with more than one component + * for typed access. For 8 and 16 bpp formats IVB relies on the + * undocumented behavior that typed reads from R_UINT8 and R_UINT16 + * surfaces actually do a 32-bit misaligned read. The alternative would be + * to use two surface state entries with different formats for each image, + * one for reading (using R_UINT32) and another one for writing (using + * R_UINT8 or R_UINT16), but that would complicate the shaders we generate + * even more. + */ + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); + + /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported + * by the hardware. + */ + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + return ISL_FORMAT_R32_UINT; + + /* No normalized fixed-point formats are supported by the hardware. */ + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + return ISL_FORMAT_R16_UINT; + + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return ISL_FORMAT_R8_UINT; + + default: + assert(!"Unknown image format"); + return ISL_FORMAT_UNSUPPORTED; + } +} -- cgit v1.2.3 From ff05f634f643ba867838d34c1a99d8152debe8ac Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Dec 2015 17:17:30 -0800 Subject: anv/image: Add a separate storage image surface state Thanks to hardware limitations, storage images may need a different surface format and/or other bits in the surface state. --- src/vulkan/anv_image.c | 12 ++++++++++-- src/vulkan/anv_private.h | 4 ++++ src/vulkan/gen7_state.c | 14 ++++++++++++++ src/vulkan/gen8_state.c | 15 +++++++++++++++ 4 files changed, 43 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 37a0d8faaa9..09993180e7e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -252,8 +252,7 @@ anv_image_create(VkDevice _device, image->usage = anv_image_get_full_usage(pCreateInfo); image->surface_type = surf_type; - if (image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT)) { + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { image->needs_nonrt_surface_state = true; } @@ -261,6 +260,10 @@ anv_image_create(VkDevice _device, image->needs_color_rt_surface_state = true; } + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + image->needs_storage_surface_state = true; + } + if (likely(anv_format_is_color(image->format))) { r = make_surface(device, image, create_info, VK_IMAGE_ASPECT_COLOR_BIT); @@ -533,6 +536,11 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, iview->nonrt_surface_state); } + if (iview->image->needs_storage_surface_state) { + anv_state_pool_free(&device->surface_state_pool, + iview->storage_surface_state); + } + anv_free2(&device->alloc, pAllocator, iview); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cb3f9a7b367..a6db547e5e3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1433,6 +1433,7 @@ struct anv_image { bool needs_nonrt_surface_state:1; bool needs_color_rt_surface_state:1; + bool needs_storage_surface_state:1; /** * Image subsurfaces @@ -1468,6 +1469,9 @@ struct anv_image_view { /** RENDER_SURFACE_STATE when using image as a non render target. */ struct anv_state nonrt_surface_state; + + /** RENDER_SURFACE_STATE when using image as a storage image. */ + struct anv_state storage_surface_state; }; struct anv_image_create_info { diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 6ffbacd8e77..4101e84f827 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -332,4 +332,18 @@ genX(image_view_init)(struct anv_image_view *iview, if (!device->info.has_llc) anv_state_clflush(iview->color_rt_surface_state); } + + if (image->needs_storage_surface_state) { + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + + surface_state.SurfaceFormat = + isl_lower_storage_image_format(&device->isl_dev, + format->surface_format); + + surface_state.SurfaceMinLOD = range->baseMipLevel; + surface_state.MIPCountLOD = range->levelCount - 1; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, + &surface_state); + } } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 901cc3b25a8..59134d5214e 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -302,6 +302,21 @@ genX(image_view_init)(struct anv_image_view *iview, if (!device->info.has_llc) anv_state_clflush(iview->color_rt_surface_state); } + + if (image->needs_storage_surface_state) { + iview->storage_surface_state = + alloc_surface_state(device, cmd_buffer); + + surface_state.SurfaceFormat = + isl_lower_storage_image_format(&device->isl_dev, + format_info->surface_format); + + surface_state.SurfaceMinLOD = range->baseMipLevel; + surface_state.MIPCountLOD = range->levelCount - 1; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, + &surface_state); + } } VkResult genX(CreateSampler)( -- cgit v1.2.3 From 1eb731d9fe41bdf5813a8e3646a7df36121b244c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 16 Nov 2015 22:26:03 -0800 Subject: anv/descriptor_set: Add support for storage images in layouts --- src/vulkan/anv_descriptor_set.c | 20 ++++++++++++++++++-- src/vulkan/anv_private.h | 5 +++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index e1cfd788b73..9c4210025e3 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -79,6 +79,7 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; uint32_t dynamic_offset_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { @@ -132,6 +133,13 @@ VkResult anv_CreateDescriptorSetLayout( break; } + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + for_each_bit(s, binding->stageFlags) { + set_layout->binding[b].stage[s].image_index = image_count[s]; + image_count[s] += binding->descriptorCount; + } + } + if (binding->pImmutableSamplers) { set_layout->binding[b].immutable_samplers = samplers; samplers += binding->descriptorCount; @@ -199,6 +207,7 @@ VkResult anv_CreatePipelineLayout( for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { l.set[set].stage[s].surface_start = l.stage[s].surface_count; l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; + l.set[set].stage[s].image_start = l.stage[s].image_count; for (uint32_t b = 0; b < set_layout->binding_count; b++) { unsigned array_size = set_layout->binding[b].array_size; @@ -212,13 +221,19 @@ VkResult anv_CreatePipelineLayout( if (set_layout->binding[b].stage[s].sampler_index >= 0) l.stage[s].sampler_count += array_size; + + if (set_layout->binding[b].stage[s].image_index >= 0) + l.stage[s].image_count += array_size; } } } unsigned num_bindings = 0; - for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) - num_bindings += l.stage[s].surface_count + l.stage[s].sampler_count; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + num_bindings += l.stage[s].surface_count + + l.stage[s].sampler_count + + l.stage[s].image_count; + } size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); @@ -234,6 +249,7 @@ VkResult anv_CreatePipelineLayout( entry += l.stage[s].surface_count; l.stage[s].sampler_to_descriptor = entry; entry += l.stage[s].sampler_count; + entry += l.stage[s].image_count; int surface = 0; int sampler = 0; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a6db547e5e3..b3829216dd4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -814,6 +814,9 @@ struct anv_descriptor_set_binding_layout { /* Index into the sampler table for the associated sampler */ int16_t sampler_index; + + /* Index into the image table for the associated image */ + int16_t image_index; } stage[MESA_SHADER_STAGES]; /* Immutable samplers (or NULL if no immutable samplers) */ @@ -894,6 +897,7 @@ struct anv_pipeline_layout { struct { uint32_t surface_start; uint32_t sampler_start; + uint32_t image_start; } stage[MESA_SHADER_STAGES]; } set[MAX_SETS]; @@ -905,6 +909,7 @@ struct anv_pipeline_layout { struct anv_pipeline_binding *surface_to_descriptor; uint32_t sampler_count; struct anv_pipeline_binding *sampler_to_descriptor; + uint32_t image_count; } stage[MESA_SHADER_STAGES]; struct anv_pipeline_binding entries[0]; -- cgit v1.2.3 From 43ac954e25f28d240da65ed816458d3529675a04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 18 Nov 2015 15:14:05 -0800 Subject: anv: Add initial support for pushing image params The helper to fill out the image params data-structure is stilly a dummy, but this puts the infastructure in place. --- src/vulkan/anv_cmd_buffer.c | 26 +++++++++- src/vulkan/anv_image.c | 9 ++++ src/vulkan/anv_nir.h | 1 + src/vulkan/anv_nir_apply_pipeline_layout.c | 80 ++++++++++++++++++++++++++++++ src/vulkan/anv_pipeline.c | 6 ++- src/vulkan/anv_private.h | 4 ++ 6 files changed, 124 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index ee437aa6330..aacd2ab60e3 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -668,6 +668,16 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (layout == NULL) goto out; + if (layout->stage[stage].image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { struct anv_pipeline_binding *binding = &layout->stage[stage].surface_to_descriptor[s]; @@ -713,7 +723,20 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bo_offset = desc->image_view->offset; break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, + image_param); + image_param->surface_idx = bias + s; + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: assert(!"Unsupported descriptor type"); @@ -727,6 +750,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[bias + s] = surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } + assert(image == layout->stage[stage].image_count); out: if (!cmd_buffer->device->info.has_llc) diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 09993180e7e..68f9b086855 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -592,3 +592,12 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag return NULL; } } + +void +anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param) +{ + memset(param, 0, sizeof *param); + anv_finishme("Actually fill out brw_image_param"); +} diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h index 666b127451a..9a7a76fe216 100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@ -36,6 +36,7 @@ void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, nir_shader *shader, struct brw_stage_prog_data *prog_data); bool anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, const struct anv_pipeline_layout *layout); #ifdef __cplusplus diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index 1b196cd62b7..8632dc74e57 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -82,6 +82,29 @@ get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, return sampler_index; } +static uint32_t +get_image_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + assert(set_layout->binding[binding].stage[stage].image_index >= 0); + + uint32_t image_index = + state->layout->set[set].stage[stage].image_start + + set_layout->binding[binding].stage[stage].image_index; + + assert(image_index < state->layout->stage[stage].image_count); + + return image_index; +} + static void lower_res_index_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -214,8 +237,23 @@ apply_pipeline_layout_block(nir_block *block, void *void_state) return true; } +static void +setup_vec4_uniform_value(const gl_constant_value **params, + const gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + bool anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, const struct anv_pipeline_layout *layout) { struct apply_pipeline_layout_state state = { @@ -232,5 +270,47 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, } } + if (layout->stage[shader->stage].image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned image_index = get_image_index(var->data.descriptor_set, + var->data.binding, &state); + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += layout->stage[shader->stage].image_count * + BRW_IMAGE_PARAM_SIZE; + } + return state.progress; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 821f5e33774..9b6eef8074b 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -315,6 +315,10 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0) + prog_data->nr_params += pipeline->layout->stage[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + if (prog_data->nr_params > 0) { /* XXX: I think we're leaking this */ prog_data->param = (const gl_constant_value **) @@ -339,7 +343,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) - anv_nir_apply_pipeline_layout(nir, pipeline->layout); + anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout); /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b3829216dd4..4b672d38f1c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1541,6 +1541,10 @@ void gen9_fill_buffer_surface_state(void *state, const struct anv_format *format uint32_t offset, uint32_t range, uint32_t stride); +void anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param); + struct anv_sampler { uint32_t state[4]; }; -- cgit v1.2.3 From f4aee5d82fedcb426e34bb0503d004d6f2e39f73 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Dec 2015 21:45:41 -0800 Subject: gen8/cmd_buffer: Flush push constants after descriptor sets This is because, if storage images are used, flushing descriptor sets can cause push constants to become dirty. --- src/vulkan/gen8_cmd_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 68ee6bb5696..df97d271138 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -208,12 +208,12 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.pipeline->active_stages; #endif - if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); - if (cmd_buffer->state.descriptors_dirty) gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + if (cmd_buffer->state.push_constants_dirty) + cmd_buffer_flush_push_constants(cmd_buffer); + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) gen8_cmd_buffer_emit_viewport(cmd_buffer); -- cgit v1.2.3 From db66424218661c7beedf7966a5cc13929fcc01f0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 8 Dec 2015 14:25:28 -0800 Subject: anv: Remove unused anv_image_view_info_for_vk_image_view_type() --- src/vulkan/anv_image.c | 12 ++++++------ src/vulkan/anv_private.h | 9 --------- 2 files changed, 6 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 68f9b086855..1205babef07 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -34,6 +34,12 @@ */ #include "gen8_pack.h" +struct anv_image_view_info { + uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ + bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ + bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ +}; + static const uint8_t anv_halign[] = { [4] = HALIGN4, [8] = HALIGN8, @@ -65,12 +71,6 @@ anv_image_view_info_table[] = { #undef INFO }; -struct anv_image_view_info -anv_image_view_info_for_vk_image_view_type(VkImageViewType type) -{ - return anv_image_view_info_table[type]; -} - /** * The \a format argument is required and overrides any format found in struct * anv_image_create_info. Exactly one bit must be set in \a aspect. diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 4b672d38f1c..77aa3852259 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1398,15 +1398,6 @@ anv_format_is_depth_or_stencil(const struct anv_format *format) return format->depth_format || format->has_stencil; } -struct anv_image_view_info { - uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ - bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ - bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ -}; - -struct anv_image_view_info -anv_image_view_info_for_vk_image_view_type(VkImageViewType type); - /** * Subsurface of an anv_image. */ -- cgit v1.2.3 From d28df86c879fec259d16a80797cb1334a2fc0eac Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 8 Dec 2015 16:06:43 -0800 Subject: anv/compute: Fix thread width max off by 1 See cooresponding code in: commit 8d87070af295140fb3558b6784dc6303fde11a67 Author: Jordan Justen Date: Thu Aug 28 14:47:19 2014 -0700 i965/cs: Implement brw_emit_gpgpu_walker Signed-off-by: Jordan Justen --- src/vulkan/gen7_cmd_buffer.c | 4 ++-- src/vulkan/gen8_cmd_buffer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 5efa7cce002..c31ea338e64 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -646,7 +646,7 @@ void genX(CmdDispatch)( .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, .ThreadGroupIDXDimension = x, .ThreadGroupIDYDimension = y, .ThreadGroupIDZDimension = z, @@ -683,7 +683,7 @@ void genX(CmdDispatchIndirect)( .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index df97d271138..2366f4c244c 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -601,7 +601,7 @@ void genX(CmdDispatch)( .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, .ThreadGroupIDXDimension = x, .ThreadGroupIDYDimension = y, .ThreadGroupIDZDimension = z, @@ -638,7 +638,7 @@ void genX(CmdDispatchIndirect)( .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); -- cgit v1.2.3 From 974bdfa9ad87bb8282def5e9aab9dfe4849b3bc7 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 8 Dec 2015 16:16:42 -0800 Subject: i965: Move brw_cs_fill_local_id_payload to brw_compiler.h Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_compiler.h | 4 ++++ src/mesa/drivers/dri/i965/brw_cs.h | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index e49994f19a8..7af8036b4ee 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -685,6 +685,10 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, unsigned *final_assembly_size, char **error_str); +void +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, + void *buffer, uint32_t threads, uint32_t stride); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 899e340f14e..f0b3f97dedc 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -34,10 +34,6 @@ extern "C" { void brw_upload_cs_prog(struct brw_context *brw); -void -brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, - void *buffer, uint32_t threads, uint32_t stride); - #ifdef __cplusplus } #endif -- cgit v1.2.3 From f8d5fb4293ea13d0459dd69c40d3d68003f94015 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 8 Dec 2015 16:56:26 -0800 Subject: anv: Add anv_cmd_buffer_cs_push_constants Similar to anv_cmd_buffer_push_constants, but handles the compute pipeline, which requires different setup from the other stages. This also handles initializing the compute shader local IDs. Signed-off-by: Jordan Justen --- src/vulkan/anv_cmd_buffer.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 2 ++ 2 files changed, 56 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index aacd2ab60e3..d34d53dcbb3 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -927,6 +927,60 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, return state; } +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + const unsigned push_constant_data_size = + (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value); + const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + const unsigned param_aligned_count = + reg_aligned_constant_size / sizeof(uint32_t); + + /* If we don't actually have any push constants, bail. */ + if (reg_aligned_constant_size == 0) + return (struct anv_state) { .offset = 0 }; + + const unsigned threads = pipeline->cs_thread_width_max; + const unsigned total_push_constants_size = + reg_aligned_constant_size * threads; + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + total_push_constants_size, + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, + reg_aligned_constant_size); + + /* Setup uniform data for the first thread */ + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + } + + /* Copy uniform data from the first thread to every other thread */ + const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); + for (unsigned t = 1; t < threads; t++) { + memcpy(&u32_map[t * param_aligned_count + local_id_dwords], + &u32_map[local_id_dwords], + uniform_data_size); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + void anv_CmdPushConstants( VkCommandBuffer commandBuffer, VkPipelineLayout layout, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 77aa3852259..a180b9c69c3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1184,6 +1184,8 @@ void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage); +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, struct anv_render_pass *pass, -- cgit v1.2.3 From 47e5fb52f436a4633db6b9df41a9fd43656cd2bc Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 8 Dec 2015 17:00:39 -0800 Subject: gen8/compute: Setup push constants and local ids Signed-off-by: Jordan Justen --- src/vulkan/gen8_cmd_buffer.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 2366f4c244c..fccc2f4d084 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -47,6 +47,9 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) VkShaderStageFlags flushed = 0; anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); if (state.offset == 0) @@ -511,6 +514,21 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, GENX(INTERFACE_DESCRIPTOR_DATA), 64, @@ -520,6 +538,8 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .BindingTableEntryCount = 0, .SamplerStatePointer = samplers.offset, .SamplerCount = 0, + .ConstantIndirectURBEntryReadLength = push_constant_regs, + .ConstantURBEntryReadOffset = 0, .NumberofThreadsinGPGPUThreadGroup = 0); uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); -- cgit v1.2.3 From 9a9c551f3e50c90053c3ee9038247df6ec6337dc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 8 Dec 2015 15:22:47 -0800 Subject: anv/image: Drop unused halign, valign lookup tables --- src/vulkan/anv_image.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 1205babef07..6aa6b5a32b5 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -40,18 +40,6 @@ struct anv_image_view_info { bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ }; -static const uint8_t anv_halign[] = { - [4] = HALIGN4, - [8] = HALIGN8, - [16] = HALIGN16, -}; - -static const uint8_t anv_valign[] = { - [4] = VALIGN4, - [8] = VALIGN8, - [16] = VALIGN16, -}; - static const uint8_t anv_surf_type_from_image_type[] = { [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, -- cgit v1.2.3 From 5ba9121fe83a1e629457453b51f1bb741ca0bacd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 9 Dec 2015 16:30:52 -0800 Subject: anv/image: Remove some vkCreateImage validation Don't validate the baseArrayLayer and layerCount of cube images. This allows us to remove a bloated lookup table and an unneeded struct definition (anv_image_view_info). --- src/vulkan/anv_image.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 6aa6b5a32b5..dc52e6b368c 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -34,31 +34,12 @@ */ #include "gen8_pack.h" -struct anv_image_view_info { - uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ - bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ - bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ -}; - static const uint8_t anv_surf_type_from_image_type[] = { [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, }; -static const struct anv_image_view_info -anv_image_view_info_table[] = { - #define INFO(s, ...) { .surface_type = s, __VA_ARGS__ } - [VK_IMAGE_VIEW_TYPE_1D] = INFO(SURFTYPE_1D), - [VK_IMAGE_VIEW_TYPE_2D] = INFO(SURFTYPE_2D), - [VK_IMAGE_VIEW_TYPE_3D] = INFO(SURFTYPE_3D), - [VK_IMAGE_VIEW_TYPE_CUBE] = INFO(SURFTYPE_CUBE, .is_cube = 1), - [VK_IMAGE_VIEW_TYPE_1D_ARRAY] = INFO(SURFTYPE_1D, .is_array = 1), - [VK_IMAGE_VIEW_TYPE_2D_ARRAY] = INFO(SURFTYPE_2D, .is_array = 1), - [VK_IMAGE_VIEW_TYPE_CUBE_ARRAY] = INFO(SURFTYPE_CUBE, .is_array = 1, .is_cube = 1), - #undef INFO -}; - /** * The \a format argument is required and overrides any format found in struct * anv_image_create_info. Exactly one bit must be set in \a aspect. @@ -362,7 +343,6 @@ anv_validate_CreateImageView(VkDevice _device, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *subresource; - const struct anv_image_view_info *view_info; const struct anv_format *view_format_info; /* Validate structure type before dereferencing it. */ @@ -373,7 +353,6 @@ anv_validate_CreateImageView(VkDevice _device, /* Validate viewType is in range before using it. */ assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); - view_info = &anv_image_view_info_table[pCreateInfo->viewType]; /* Validate format is in range before using it. */ assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); @@ -400,11 +379,6 @@ anv_validate_CreateImageView(VkDevice _device, assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); assert(pView); - if (view_info->is_cube) { - assert(subresource->baseArrayLayer % 6 == 0); - assert(subresource->layerCount % 6 == 0); - } - const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; -- cgit v1.2.3 From 8beea9d45b5879ea3dbd9c0e48f0c0eb2451f380 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 12:27:13 -0800 Subject: anv/icd: Advertise the right ABI version --- src/vulkan/anv_icd.json.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_icd.json.in b/src/vulkan/anv_icd.json.in index 8520dd59063..cef6a30b402 100644 --- a/src/vulkan/anv_icd.json.in +++ b/src/vulkan/anv_icd.json.in @@ -2,7 +2,7 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@abs_top_builddir@/lib/libvulkan.so.0.0.0", - "abi_versions": "0.138.2" + "abi_versions": "0.210.1" } } -- cgit v1.2.3 From 13d1dd465cb0515c3a9857619bcb34f149b48ee4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 18:54:42 -0800 Subject: nir/spirv: Put SSBO store writemasks in the right index It moved with the nir_intrinsic_load/store update. --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 68edea09309..e101e1edcb0 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1319,7 +1319,7 @@ _vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, if (glsl_type_is_vector_or_scalar(type->type)) { nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); store->num_components = glsl_get_vector_elements(type->type); - store->const_index[1] = (1 << store->num_components) - 1; + store->const_index[0] = (1 << store->num_components) - 1; store->src[0] = nir_src_for_ssa(src->def); nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, -- cgit v1.2.3 From 3893e11f4bb0b6a0088a778a0fbfd9d0e3558c80 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 18:55:14 -0800 Subject: anv: Use 4 instead of sizeof(gl_constant_value) We no longer have access to gl_constant_value and, really, it's 4 because our uniform layout code works entirely in dwords. --- src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/gen8_cmd_buffer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5a56bb53c5e..8ade08ea746 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -930,7 +930,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; const unsigned push_constant_data_size = - (local_id_dwords + prog_data->nr_params) * sizeof(union gl_constant_value *); + (local_id_dwords + prog_data->nr_params) * 4; const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); const unsigned param_aligned_count = reg_aligned_constant_size / sizeof(uint32_t); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 4e5db676722..8d17066ee6f 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -521,7 +521,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * sizeof(union gl_constant_value *); + (prog_data->nr_params + local_id_dwords) * 4; unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); unsigned push_constant_regs = reg_aligned_constant_size / 32; -- cgit v1.2.3 From 21cf55ab54de1c7e42ffffd92dc123759734e370 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 18:56:15 -0800 Subject: gen8/cmd_buffer: Don't push CS constants if there aren't any Issuing MEDIA_CURB_LOAD with a size of zero causes GPU hangs on BDW. --- src/vulkan/gen8_cmd_buffer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 8d17066ee6f..5e566d3aaa1 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -525,9 +525,11 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); unsigned push_constant_regs = reg_aligned_constant_size / 32; - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), - .CURBETotalDataLength = push_state.alloc_size, - .CURBEDataStartAddress = push_state.offset); + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, -- cgit v1.2.3 From 6df7963531b9c33caf96a68e8e6cd4796677b8aa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 22:36:05 -0800 Subject: i965/HACK: Build brw_cs into libcompiler We need it for CS push constants --- src/mesa/drivers/dri/i965/Makefile.sources | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 7a2f43b1fe8..79ff237ec4e 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -2,6 +2,8 @@ i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ brw_compiler.h \ + brw_cs.c \ + brw_cs.h \ brw_cubemap_normalize.cpp \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ @@ -111,8 +113,6 @@ i965_FILES = \ brw_conditional_render.c \ brw_context.c \ brw_context.h \ - brw_cs.c \ - brw_cs.h \ brw_curbe.c \ brw_draw.c \ brw_draw.h \ -- cgit v1.2.3 From bd0e25d41e3b5369889747fe683fe47abf1899c9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 22:36:47 -0800 Subject: anv/apply_pipeline_layout: Multiply uniform sizes by 4 This is because uniforms are now in terms of bytes everywhere. --- src/vulkan/anv_nir_apply_pipeline_layout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index 5a31b02ae4f..96b9f21fd09 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -284,7 +284,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, var->data.binding, &state); var->data.driver_location = shader->num_uniforms + - image_index * BRW_IMAGE_PARAM_SIZE; + image_index * BRW_IMAGE_PARAM_SIZE * 4; } } @@ -310,7 +310,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, } shader->num_uniforms += layout->stage[shader->stage].image_count * - BRW_IMAGE_PARAM_SIZE; + BRW_IMAGE_PARAM_SIZE * 4; } return state.progress; -- cgit v1.2.3 From 6ae4e59faca7875322a9a8a64e9d7b4a5a87ed48 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Dec 2015 22:37:27 -0800 Subject: anv/pipeline: Get rid of the no kernel input parameters hack Previously, meta would pass null shaders in for the VS when it intended to disable the VS. However, this meant that we didn't know what inputs we had and would dead-code things in the FS. In order to solve this, we hard-coded a number. Now meta passes in a VS even if it plans to disable the stage so this is no longer needed. --- src/vulkan/gen8_pipeline.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index faf997a4304..85b1e15a0f6 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -353,15 +353,6 @@ genX(graphics_pipeline_create)( if (result != VK_SUCCESS) return result; - /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we - * hard code this to num_attributes - 2. This is because the attributes - * include VUE header and position, which aren't counted as varying - * inputs. */ - if (pipeline->vs_simd8 == NO_KERNEL) { - pipeline->wm_prog_data.num_varying_inputs = - pCreateInfo->pVertexInputState->vertexAttributeDescriptionCount - 2; - } - assert(pCreateInfo->pVertexInputState); emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); assert(pCreateInfo->pInputAssemblyState); -- cgit v1.2.3 From e8032761483df36f2d176b8529ee4a8eb86b6b8d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 11 Dec 2015 13:09:42 -0800 Subject: Revert "i965/HACK: Build brw_cs into libcompiler" This reverts commit 6df7963531b9c33caf96a68e8e6cd4796677b8aa. --- src/mesa/drivers/dri/i965/Makefile.sources | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 79ff237ec4e..7a2f43b1fe8 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -2,8 +2,6 @@ i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ brw_compiler.h \ - brw_cs.c \ - brw_cs.h \ brw_cubemap_normalize.cpp \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ @@ -113,6 +111,8 @@ i965_FILES = \ brw_conditional_render.c \ brw_context.c \ brw_context.h \ + brw_cs.c \ + brw_cs.h \ brw_curbe.c \ brw_draw.c \ brw_draw.h \ -- cgit v1.2.3 From d12ea21dd5ea5bf00b7aecd98ec1f10f79f26c3a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Dec 2015 13:25:15 -0800 Subject: gen8/pipeline: Support vec4 vertex shaders In order to actually get them, you need INTEL_DEBUG=vec4. --- src/vulkan/gen8_pipeline.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 85b1e15a0f6..07a43bdbe55 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -484,7 +484,10 @@ genX(graphics_pipeline_create)( offset = 1; length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) + uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : + pipeline->vs_vec4; + + if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .FunctionEnable = false, /* Even if VS is disabled, SBE still gets the amount of @@ -493,7 +496,7 @@ genX(graphics_pipeline_create)( .VertexURBEntryOutputLength = length); else anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .KernelStartPointer = pipeline->vs_simd8, + .KernelStartPointer = vs_start, .SingleVertexDispatch = Multiple, .VectorMaskEnable = Dmask, .SamplerCount = 0, @@ -515,7 +518,7 @@ genX(graphics_pipeline_create)( .MaximumNumberofThreads = device->info.max_vs_threads - 1, .StatisticsEnable = false, - .SIMD8DispatchEnable = true, + .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, .VertexCacheDisable = false, .FunctionEnable = true, -- cgit v1.2.3 From fd944197f27ff428f2599eb03bc0c4085c9fbc6a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 12 Dec 2015 16:09:51 -0800 Subject: i965/nir: Provide a default LOD for buffer textures Our hardware requires an LOD for all texelFetch commands even if they are on buffer textures. GLSL IR gives us an LOD of 0 in that case, but the LOD is really rather meaningless. This commit allows other NIR producers to be more lazy and not provide one at all. --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 ++++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c52bc04c8ca..6f51ce147f1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2780,6 +2780,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; + /* Our hardware requires a LOD for buffer textures */ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) + lod = brw_imm_d(0); + for (unsigned i = 0; i < instr->num_srcs; i++) { fs_reg src = get_nir_src(instr->src[i].src); switch (instr->src[i].src_type) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index cf1f82f9d78..cfb66a53fe9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1558,6 +1558,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) nir_tex_instr_dest_size(instr)); dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); + /* Our hardware requires a LOD for buffer textures */ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) + lod = brw_imm_d(0); + /* Load the texture operation sources */ for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { -- cgit v1.2.3 From c56186026fce7a28db2fe7cabf58a11328877123 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 12 Dec 2015 16:11:23 -0800 Subject: anv: Add initial support for texel buffers --- src/vulkan/anv_cmd_buffer.c | 5 +++++ src/vulkan/anv_descriptor_set.c | 10 +++++++++- src/vulkan/anv_device.c | 17 ---------------- src/vulkan/anv_image.c | 44 +++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 8 ++++++++ 5 files changed, 66 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 8ade08ea746..0531cd80f4f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -730,6 +730,11 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + surface_state = desc->buffer_view->surface_state; + bo = desc->buffer_view->buffer->bo; + bo_offset = desc->buffer_view->buffer->offset; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: assert(!"Unsupported descriptor type"); break; diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 9c4210025e3..52d2ffecdd6 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -483,7 +483,15 @@ void anv_UpdateDescriptorSets( case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - anv_finishme("texel buffers not implemented"); + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = bview, + }; + } break; case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7556d8353d5..7ddad58cf6f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1514,23 +1514,6 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, } } -VkResult anv_CreateBufferView( - VkDevice _device, - const VkBufferViewCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkBufferView* pView) -{ - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); -} - -void anv_DestroyBufferView( - VkDevice _device, - VkBufferView _bview, - const VkAllocationCallbacks* pAllocator) -{ - stub(); -} - void anv_DestroySampler( VkDevice _device, VkSampler _sampler, diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index dc52e6b368c..ffc7ae8cae6 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -506,6 +506,50 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, anv_free2(&device->alloc, pAllocator, iview); } +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *view; + + /* TODO: Storage texel buffers */ + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->buffer = buffer; + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + anv_fill_buffer_surface_state(device, view->surface_state.map, format, + pCreateInfo->offset, pCreateInfo->range, + format->isl_layout->bpb / 8); + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + + anv_state_pool_free(&device->surface_state_pool, view->surface_state); + anv_free2(&device->alloc, pAllocator, view); +} + struct anv_surface * anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a180b9c69c3..3b5a4be8355 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -851,6 +851,8 @@ struct anv_descriptor { struct anv_sampler *sampler; }; + struct anv_buffer_view *buffer_view; + struct { struct anv_buffer *buffer; uint64_t offset; @@ -1516,6 +1518,11 @@ gen9_image_view_init(struct anv_image_view *iview, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +struct anv_buffer_view { + struct anv_buffer *buffer; + struct anv_state surface_state; +}; + void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, uint32_t offset, uint32_t range, @@ -1636,6 +1643,7 @@ ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -- cgit v1.2.3 From 0da776b733981013cff741ad5765ac8e6920db61 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Dec 2015 08:23:17 -0800 Subject: anv: Fix build for unit tests Clearly no one has been running `make check`, because the unittestbuild has been broken for a long time. After this buildfix, all tests now pass. --- src/vulkan/anv_gem_stubs.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c index d036314c446..f6b494628e0 100644 --- a/src/vulkan/anv_gem_stubs.c +++ b/src/vulkan/anv_gem_stubs.c @@ -51,15 +51,18 @@ anv_gem_create(struct anv_device *device, size_t size) } void -anv_gem_close(struct anv_device *device, int gem_handle) +anv_gem_close(struct anv_device *device, uint32_t gem_handle) { close(gem_handle); } void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size) + uint64_t offset, uint64_t size, uint32_t flags) { + /* Ignore flags, as they're specific to I915_GEM_MMAP. */ + (void) flags; + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gem_handle, offset); } @@ -73,14 +76,14 @@ anv_gem_munmap(void *p, uint64_t size) munmap(p, size); } -int +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size) { return -1; } int -anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) { return 0; } @@ -94,7 +97,21 @@ anv_gem_execbuffer(struct anv_device *device, int anv_gem_set_tiling(struct anv_device *device, - int gem_handle, uint32_t stride, uint32_t tiling) + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, + uint32_t caching) +{ + return 0; +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) { return 0; } @@ -124,12 +141,12 @@ anv_gem_get_aperture(int fd, uint64_t *size) } int -anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) { unreachable("Unused"); } -int +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd) { unreachable("Unused"); -- cgit v1.2.3 From 85a63840147603a1ce2c6b0bae4fc65980bbc0b3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Dec 2015 08:27:59 -0800 Subject: anv/tests: gitignore block_pool_no_free --- src/vulkan/tests/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/tests/.gitignore b/src/vulkan/tests/.gitignore index 9f4be5270f6..5d054055685 100644 --- a/src/vulkan/tests/.gitignore +++ b/src/vulkan/tests/.gitignore @@ -1,4 +1,5 @@ block_pool +block_pool_no_free state_pool state_pool_free_list_only state_pool_no_free -- cgit v1.2.3 From 0bebaeacd74acfafc97c0f66cdcd50369d85ab26 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Dec 2015 08:48:19 -0800 Subject: isl: Rename s/lod_align/image_align/ for consistency Regarding the subimages within a surface, sometimes isl called them "images" and sometimes "LODs". This patch make isl consistently refer to them as "images". I choose the term "image" over "LOD" because LOD is an misnomer when applied to 3D surfaces. The alignment applies to each individual 2D subimage, not to the LOD as a whole. This patch changes no behavior. It's just a manually performed, case-insensitive, replacement s/lod/image/ that maintains correct indentation. any behavior. --- src/vulkan/gen7_state.c | 8 ++--- src/vulkan/gen8_state.c | 16 ++++++---- src/vulkan/isl.c | 85 +++++++++++++++++++++++++------------------------ src/vulkan/isl.h | 20 ++++++------ src/vulkan/isl_gen4.c | 14 ++++---- src/vulkan/isl_gen4.h | 10 +++--- src/vulkan/isl_gen6.c | 24 +++++++------- src/vulkan/isl_gen6.h | 10 +++--- src/vulkan/isl_gen7.c | 16 +++++----- src/vulkan/isl_gen7.h | 10 +++--- src/vulkan/isl_gen8.c | 16 +++++----- src/vulkan/isl_gen8.h | 10 +++--- src/vulkan/isl_gen9.c | 39 ++++++++++++----------- src/vulkan/isl_gen9.h | 10 +++--- 14 files changed, 147 insertions(+), 141 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 4101e84f827..3206f77b831 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -238,15 +238,15 @@ genX(image_view_init)(struct anv_image_view *iview, depth = image->extent.depth; } - const struct isl_extent3d lod_align_sa = - isl_surf_get_lod_alignment_sa(&surface->isl); + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(&surface->isl); struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = image->surface_type, .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, - .SurfaceVerticalAlignment = anv_valign[lod_align_sa.height], - .SurfaceHorizontalAlignment = anv_halign[lod_align_sa.width], + .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if * Tiled Surface is False." diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 59134d5214e..ac1f17f48f9 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -120,9 +120,11 @@ get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valig * format (ETC2 has a block height of 4), then the vertical alignment is * 4 compression blocks or, equivalently, 16 pixels. */ - struct isl_extent3d lod_align_el = isl_surf_get_lod_alignment_el(surf); - *halign = anv_halign[lod_align_el.width]; - *valign = anv_valign[lod_align_el.height]; + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = anv_halign[image_align_el.width]; + *valign = anv_valign[image_align_el.height]; #else /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in * units of surface samples. For example, if SurfaceVerticalAlignment @@ -130,9 +132,11 @@ get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valig * format (compressed or not) the vertical alignment is * 4 pixels. */ - struct isl_extent3d lod_align_sa = isl_surf_get_lod_alignment_sa(surf); - *halign = anv_halign[lod_align_sa.width]; - *valign = anv_valign[lod_align_sa.height]; + struct isl_extent3d image_align_sa + = isl_surf_get_image_alignment_sa(surf); + + *halign = anv_halign[image_align_sa.width]; + *valign = anv_valign[image_align_sa.height]; #endif } diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c index 41b842dd79b..d858ea74745 100644 --- a/src/vulkan/isl.c +++ b/src/vulkan/isl.c @@ -344,27 +344,27 @@ isl_choose_array_pitch_span(const struct isl_device *dev, } static void -isl_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el) +isl_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { if (ISL_DEV_GEN(dev) >= 9) { - gen9_choose_lod_alignment_el(dev, info, tiling, msaa_layout, - lod_align_el); + gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); } else if (ISL_DEV_GEN(dev) >= 8) { - gen8_choose_lod_alignment_el(dev, info, tiling, msaa_layout, - lod_align_el); + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); } else if (ISL_DEV_GEN(dev) >= 7) { - gen7_choose_lod_alignment_el(dev, info, tiling, msaa_layout, - lod_align_el); + gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); } else if (ISL_DEV_GEN(dev) >= 6) { - gen6_choose_lod_alignment_el(dev, info, tiling, msaa_layout, - lod_align_el); + gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); } else { - gen4_choose_lod_alignment_el(dev, info, tiling, msaa_layout, - lod_align_el); + gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); } } @@ -528,7 +528,7 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( const struct isl_device *dev, const struct isl_surf_init_info *restrict info, enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *lod_align_sa, + const struct isl_extent3d *image_align_sa, const struct isl_extent4d *phys_level0_sa, struct isl_extent2d *phys_slice0_sa) { @@ -558,8 +558,8 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); } - uint32_t w = isl_align_npot(W, lod_align_sa->w); - uint32_t h = isl_align_npot(H, lod_align_sa->h); + uint32_t w = isl_align_npot(W, image_align_sa->w); + uint32_t h = isl_align_npot(H, image_align_sa->h); if (l == 0) { slice_top_w = w; @@ -589,7 +589,7 @@ static void isl_calc_phys_slice0_extent_sa_gen4_3d( const struct isl_device *dev, const struct isl_surf_init_info *restrict info, - const struct isl_extent3d *lod_align_sa, + const struct isl_extent3d *image_align_sa, const struct isl_extent4d *phys_level0_sa, struct isl_extent2d *phys_slice0_sa) { @@ -604,9 +604,9 @@ isl_calc_phys_slice0_extent_sa_gen4_3d( uint32_t D0 = phys_level0_sa->d; for (uint32_t l = 0; l < info->levels; ++l) { - uint32_t level_w = isl_align_npot(isl_minify(W0, l), lod_align_sa->w); - uint32_t level_h = isl_align_npot(isl_minify(H0, l), lod_align_sa->h); - uint32_t level_d = isl_align_npot(isl_minify(D0, l), lod_align_sa->d); + uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); + uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); + uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); uint32_t max_layers_horiz = MIN(level_d, 1u << l); uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); @@ -623,14 +623,14 @@ isl_calc_phys_slice0_extent_sa_gen4_3d( /** * Calculate the physical extent of the surface's first array slice, in units - * of surface samples. The result is aligned to \a lod_align_sa. + * of surface samples. The result is aligned to \a image_align_sa. */ static void isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, enum isl_dim_layout dim_layout, enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *lod_align_sa, + const struct isl_extent3d *image_align_sa, const struct isl_extent4d *phys_level0_sa, struct isl_extent2d *phys_slice0_sa) { @@ -642,11 +642,11 @@ isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, /*fallthrough*/ case ISL_DIM_LAYOUT_GEN4_2D: isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, - lod_align_sa, phys_level0_sa, + image_align_sa, phys_level0_sa, phys_slice0_sa); return; case ISL_DIM_LAYOUT_GEN4_3D: - isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, lod_align_sa, + isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, phys_level0_sa, phys_slice0_sa); return; } @@ -654,14 +654,14 @@ isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, /** * Calculate the pitch between physical array slices, in units of rows of - * surface samples. The result is aligned to \a lod_align_sa. + * surface samples. The result is aligned to \a image_align_sa. */ static uint32_t isl_calc_array_pitch_sa_rows(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, enum isl_dim_layout dim_layout, enum isl_array_pitch_span array_pitch_span, - const struct isl_extent3d *lod_align_sa, + const struct isl_extent3d *image_align_sa, const struct isl_extent4d *phys_level0_sa, const struct isl_extent2d *phys_slice0_sa) { @@ -677,7 +677,7 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, case ISL_DIM_LAYOUT_GEN4_2D: switch (array_pitch_span) { case ISL_ARRAY_PITCH_SPAN_COMPACT: - return isl_align_npot(phys_slice0_sa->h, lod_align_sa->h); + return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); case ISL_ARRAY_PITCH_SPAN_FULL: { /* The QPitch equation is found in the Broadwell PRM >> Volume 5: * Memory Views >> Common Surface Formats >> Surface Layout >> 2D @@ -686,8 +686,8 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, uint32_t H0_sa = phys_level0_sa->h; uint32_t H1_sa = isl_minify(H0_sa, 1); - uint32_t h0_sa = isl_align_npot(H0_sa, lod_align_sa->h); - uint32_t h1_sa = isl_align_npot(H1_sa, lod_align_sa->h); + uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); + uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); uint32_t m; if (ISL_DEV_GEN(dev) >= 7) { @@ -697,7 +697,7 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, m = 11; } - uint32_t pitch_sa_rows = h0_sa + h1_sa + (m * lod_align_sa->h); + uint32_t pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && (info->height % 4 == 1)) { @@ -724,7 +724,7 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, case ISL_DIM_LAYOUT_GEN4_3D: assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); - return isl_align_npot(phys_slice0_sa->h, lod_align_sa->h); + return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); } unreachable("bad isl_dim_layout"); @@ -738,7 +738,7 @@ static uint32_t isl_calc_row_pitch(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, const struct isl_tile_info *tile_info, - const struct isl_extent3d *lod_align_sa, + const struct isl_extent3d *image_align_sa, const struct isl_extent2d *phys_slice0_sa) { const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); @@ -974,11 +974,12 @@ isl_surf_init_s(const struct isl_device *dev, if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) return false; - struct isl_extent3d lod_align_el; - isl_choose_lod_alignment_el(dev, info, tiling, msaa_layout, &lod_align_el); + struct isl_extent3d image_align_el; + isl_choose_image_alignment_el(dev, info, tiling, msaa_layout, + &image_align_el); - struct isl_extent3d lod_align_sa = - isl_extent3d_el_to_sa(info->format, lod_align_el); + struct isl_extent3d image_align_sa = + isl_extent3d_el_to_sa(info->format, image_align_el); struct isl_extent4d phys_level0_sa; isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, @@ -989,18 +990,18 @@ isl_surf_init_s(const struct isl_device *dev, struct isl_extent2d phys_slice0_sa; isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, - &lod_align_sa, &phys_level0_sa, + &image_align_sa, &phys_level0_sa, &phys_slice0_sa); assert(phys_slice0_sa.w % fmtl->bw == 0); assert(phys_slice0_sa.h % fmtl->bh == 0); const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, - &lod_align_sa, + &image_align_sa, &phys_slice0_sa); const uint32_t array_pitch_sa_rows = isl_calc_array_pitch_sa_rows(dev, info, dim_layout, array_pitch_span, - &lod_align_sa, &phys_level0_sa, + &image_align_sa, &phys_level0_sa, &phys_slice0_sa); assert(array_pitch_sa_rows % fmtl->bh == 0); @@ -1028,7 +1029,7 @@ isl_surf_init_s(const struct isl_device *dev, .levels = info->levels, .samples = info->samples, - .lod_alignment_el = lod_align_el, + .image_alignment_el = image_align_el, .logical_level0_px = logical_level0_px, .phys_level0_sa = phys_level0_sa, diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h index 50b0c20505b..184b0c5f70a 100644 --- a/src/vulkan/isl.h +++ b/src/vulkan/isl.h @@ -646,10 +646,10 @@ struct isl_surf { enum isl_format format; /** - * Alignment of the upper-left sample of each LOD, in units of surface + * Alignment of the upper-left sample of each subimage, in units of surface * elements. */ - struct isl_extent3d lod_alignment_el; + struct isl_extent3d image_alignment_el; /** * Logical extent of the surface's base level, in units of pixels. This is @@ -859,28 +859,28 @@ isl_surf_init_s(const struct isl_device *dev, const struct isl_surf_init_info *restrict info); /** - * Alignment of the upper-left sample of each LOD, in units of surface + * Alignment of the upper-left sample of each subimage, in units of surface * elements. */ static inline struct isl_extent3d -isl_surf_get_lod_alignment_el(const struct isl_surf *surf) +isl_surf_get_image_alignment_el(const struct isl_surf *surf) { - return surf->lod_alignment_el; + return surf->image_alignment_el; } /** - * Alignment of the upper-left sample of each LOD, in units of surface + * Alignment of the upper-left sample of each subimage, in units of surface * samples. */ static inline struct isl_extent3d -isl_surf_get_lod_alignment_sa(const struct isl_surf *surf) +isl_surf_get_image_alignment_sa(const struct isl_surf *surf) { const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); return (struct isl_extent3d) { - .w = fmtl->bw * surf->lod_alignment_el.w, - .h = fmtl->bh * surf->lod_alignment_el.h, - .d = fmtl->bd * surf->lod_alignment_el.d, + .w = fmtl->bw * surf->image_alignment_el.w, + .h = fmtl->bh * surf->image_alignment_el.h, + .d = fmtl->bd * surf->image_alignment_el.d, }; } diff --git a/src/vulkan/isl_gen4.c b/src/vulkan/isl_gen4.c index bf9bec16f7d..52aa5655bb2 100644 --- a/src/vulkan/isl_gen4.c +++ b/src/vulkan/isl_gen4.c @@ -38,11 +38,11 @@ gen4_choose_msaa_layout(const struct isl_device *dev, } void -gen4_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el) +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { assert(info->samples == 1); assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); @@ -66,9 +66,9 @@ gen4_choose_lod_alignment_el(const struct isl_device *dev, */ if (isl_format_is_compressed(info->format)) { - *lod_align_el = isl_extent3d(1, 1, 1); + *image_align_el = isl_extent3d(1, 1, 1); return; } - *lod_align_el = isl_extent3d(4, 2, 1); + *image_align_el = isl_extent3d(4, 2, 1); } diff --git a/src/vulkan/isl_gen4.h b/src/vulkan/isl_gen4.h index 913a7c68ba9..06cd70b9206 100644 --- a/src/vulkan/isl_gen4.h +++ b/src/vulkan/isl_gen4.h @@ -36,11 +36,11 @@ gen4_choose_msaa_layout(const struct isl_device *dev, enum isl_msaa_layout *msaa_layout); void -gen4_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el); +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); #ifdef __cplusplus } diff --git a/src/vulkan/isl_gen6.c b/src/vulkan/isl_gen6.c index 8d522c37c29..24c393925ed 100644 --- a/src/vulkan/isl_gen6.c +++ b/src/vulkan/isl_gen6.c @@ -83,11 +83,11 @@ gen6_choose_msaa_layout(const struct isl_device *dev, } void -gen6_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el) +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { /* Note that the surface's horizontal image alignment is not programmable * on Sandybridge. @@ -123,38 +123,38 @@ gen6_choose_lod_alignment_el(const struct isl_device *dev, */ if (isl_format_is_compressed(info->format)) { - *lod_align_el = isl_extent3d(1, 1, 1); + *image_align_el = isl_extent3d(1, 1, 1); return; } if (isl_format_is_yuv(info->format)) { - *lod_align_el = isl_extent3d(4, 2, 1); + *image_align_el = isl_extent3d(4, 2, 1); return; } if (info->samples > 1) { - *lod_align_el = isl_extent3d(4, 4, 1); + *image_align_el = isl_extent3d(4, 4, 1); return; } if (isl_surf_usage_is_depth_or_stencil(info->usage) && !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { /* interleaved depthstencil buffer */ - *lod_align_el = isl_extent3d(4, 4, 1); + *image_align_el = isl_extent3d(4, 4, 1); return; } if (isl_surf_usage_is_depth(info->usage)) { /* separate depth buffer */ - *lod_align_el = isl_extent3d(4, 4, 1); + *image_align_el = isl_extent3d(4, 4, 1); return; } if (isl_surf_usage_is_stencil(info->usage)) { /* separate stencil buffer */ - *lod_align_el = isl_extent3d(4, 2, 1); + *image_align_el = isl_extent3d(4, 2, 1); return; } - *lod_align_el = isl_extent3d(4, 2, 1); + *image_align_el = isl_extent3d(4, 2, 1); } diff --git a/src/vulkan/isl_gen6.h b/src/vulkan/isl_gen6.h index 56b7f2cb0b7..0779c674940 100644 --- a/src/vulkan/isl_gen6.h +++ b/src/vulkan/isl_gen6.h @@ -36,11 +36,11 @@ gen6_choose_msaa_layout(const struct isl_device *dev, enum isl_msaa_layout *msaa_layout); void -gen6_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el); +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); #ifdef __cplusplus } diff --git a/src/vulkan/isl_gen7.c b/src/vulkan/isl_gen7.c index 2ac1852402e..9984f61b2a4 100644 --- a/src/vulkan/isl_gen7.c +++ b/src/vulkan/isl_gen7.c @@ -278,7 +278,7 @@ gen7_filter_tiling(const struct isl_device *dev, } /** - * Choose horizontal LOD alignment, in units of surface elements. + * Choose horizontal subimage alignment, in units of surface elements. */ static uint32_t gen7_choose_halign_el(const struct isl_device *dev, @@ -303,7 +303,7 @@ gen7_choose_halign_el(const struct isl_device *dev, } /** - * Choose vertical LOD alignment, in units of surface elements. + * Choose vertical subimage alignment, in units of surface elements. */ static uint32_t gen7_choose_valign_el(const struct isl_device *dev, @@ -375,16 +375,16 @@ gen7_choose_valign_el(const struct isl_device *dev, } void -gen7_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el) +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { /* IVB+ does not support combined depthstencil. */ assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); - *lod_align_el = (struct isl_extent3d) { + *image_align_el = (struct isl_extent3d) { .w = gen7_choose_halign_el(dev, info), .h = gen7_choose_valign_el(dev, info, tiling), .d = 1, diff --git a/src/vulkan/isl_gen7.h b/src/vulkan/isl_gen7.h index c39bd4005f1..2a95b68a9bd 100644 --- a/src/vulkan/isl_gen7.h +++ b/src/vulkan/isl_gen7.h @@ -41,11 +41,11 @@ gen7_choose_msaa_layout(const struct isl_device *dev, enum isl_msaa_layout *msaa_layout); void -gen7_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el); +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); #ifdef __cplusplus } diff --git a/src/vulkan/isl_gen8.c b/src/vulkan/isl_gen8.c index fe118cae000..2f434aabb2e 100644 --- a/src/vulkan/isl_gen8.c +++ b/src/vulkan/isl_gen8.c @@ -100,7 +100,7 @@ gen8_choose_msaa_layout(const struct isl_device *dev, } /** - * Choose horizontal LOD alignment, in units of surface elements. + * Choose horizontal subimage alignment, in units of surface elements. */ static uint32_t gen8_choose_halign_el(const struct isl_device *dev, @@ -154,7 +154,7 @@ gen8_choose_halign_el(const struct isl_device *dev, } /** - * Choose vertical LOD alignment, in units of surface elements. + * Choose vertical subimage alignment, in units of surface elements. */ static uint32_t gen8_choose_valign_el(const struct isl_device *dev, @@ -192,11 +192,11 @@ gen8_choose_valign_el(const struct isl_device *dev, } void -gen8_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el) +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { assert(!isl_tiling_is_std_y(tiling)); @@ -221,7 +221,7 @@ gen8_choose_lod_alignment_el(const struct isl_device *dev, * row.) */ - *lod_align_el = (struct isl_extent3d) { + *image_align_el = (struct isl_extent3d) { .w = gen8_choose_halign_el(dev, info), .h = gen8_choose_valign_el(dev, info), .d = 1, diff --git a/src/vulkan/isl_gen8.h b/src/vulkan/isl_gen8.h index 632d61936c1..2017ea8ddc1 100644 --- a/src/vulkan/isl_gen8.h +++ b/src/vulkan/isl_gen8.h @@ -36,11 +36,11 @@ gen8_choose_msaa_layout(const struct isl_device *dev, enum isl_msaa_layout *msaa_layout); void -gen8_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el); +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); #ifdef __cplusplus } diff --git a/src/vulkan/isl_gen9.c b/src/vulkan/isl_gen9.c index 00634e4e54c..aa290aa1c35 100644 --- a/src/vulkan/isl_gen9.c +++ b/src/vulkan/isl_gen9.c @@ -26,15 +26,15 @@ #include "isl_priv.h" /** - * Calculate the LOD alignment, in units of surface samples, for the standard - * tiling formats Yf and Ys. + * Calculate the surface's subimage alignment, in units of surface samples, + * for the standard tiling formats Yf and Ys. */ static void -gen9_calc_std_lod_alignment_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *align_sa) +gen9_calc_std_image_alignment_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *align_sa) { const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); @@ -97,11 +97,11 @@ gen9_calc_std_lod_alignment_sa(const struct isl_device *dev, } void -gen9_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el) +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) { /* This BSpec text provides some insight into the hardware's alignment * requirements [Skylake BSpec > Memory Views > Common Surface Formats > @@ -150,11 +150,11 @@ gen9_choose_lod_alignment_el(const struct isl_device *dev, */ if (isl_tiling_is_std_y(tiling)) { - struct isl_extent3d lod_align_sa; - gen9_calc_std_lod_alignment_sa(dev, info, tiling, msaa_layout, - &lod_align_sa); + struct isl_extent3d image_align_sa; + gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout, + &image_align_sa); - *lod_align_el = isl_extent3d_sa_to_el(info->format, lod_align_sa); + *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa); return; } @@ -162,7 +162,7 @@ gen9_choose_lod_alignment_el(const struct isl_device *dev, /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. */ - *lod_align_el = isl_extent3d(64, 1, 1); + *image_align_el = isl_extent3d(64, 1, 1); return; } @@ -176,9 +176,10 @@ gen9_choose_lod_alignment_el(const struct isl_device *dev, * To avoid wasting memory, choose the smallest alignment possible: * HALIGN_4 and VALIGN_4. */ - *lod_align_el = isl_extent3d(4, 4, 1); + *image_align_el = isl_extent3d(4, 4, 1); return; } - gen8_choose_lod_alignment_el(dev, info, tiling, msaa_layout, lod_align_el); + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); } diff --git a/src/vulkan/isl_gen9.h b/src/vulkan/isl_gen9.h index 14252b6f70c..64ed0aa44ef 100644 --- a/src/vulkan/isl_gen9.h +++ b/src/vulkan/isl_gen9.h @@ -30,11 +30,11 @@ extern "C" { #endif void -gen9_choose_lod_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *lod_align_el); +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); #ifdef __cplusplus } -- cgit v1.2.3 From f0d11d5a814b792eb5f5161e9332123aaaf689fb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 9 Dec 2015 17:03:14 -0800 Subject: anv/meta: Fix VkImageViewType Meta unconditionally used VK_IMAGE_VIEW_TYPE_2D in the functions below. This caused some out-of-bound memory accesses. anv_CmdCopyImage anv_CmdBlitImage anv_CmdCopyBufferToImage anv_CmdClearColorImage Fix it by adding a new function, anv_meta_get_view_type(). --- src/vulkan/anv_meta.c | 39 +++++++++++++-------------------------- src/vulkan/anv_meta.h | 3 +++ src/vulkan/anv_meta_clear.c | 2 +- 3 files changed, 17 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 008c8904d64..63976cdfe42 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -145,19 +145,15 @@ anv_meta_restore(const struct anv_meta_saved_state *state, cmd_buffer->state.dirty |= state->dynamic_mask; } -static VkImageViewType -meta_blit_get_src_image_view_type(const struct anv_image *src_image) +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image) { - switch (src_image->type) { - case VK_IMAGE_TYPE_1D: - return VK_IMAGE_VIEW_TYPE_1D; - case VK_IMAGE_TYPE_2D: - return VK_IMAGE_VIEW_TYPE_2D; - case VK_IMAGE_TYPE_3D: - return VK_IMAGE_VIEW_TYPE_3D; + switch (image->type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; default: - assert(!"bad VkImageType"); - return 0; + unreachable("bad VkImageViewType"); } } @@ -817,9 +813,6 @@ void anv_CmdCopyImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - const VkImageViewType src_iview_type = - meta_blit_get_src_image_view_type(src_image); - struct anv_meta_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -830,7 +823,7 @@ void anv_CmdCopyImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = src_iview_type, + .viewType = anv_meta_get_view_type(src_image), .format = src_image->format->vk_format, .subresourceRange = { .aspectMask = pRegions[r].srcSubresource.aspectMask, @@ -874,7 +867,7 @@ void anv_CmdCopyImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = anv_meta_get_view_type(dest_image), .format = dest_image->format->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -915,9 +908,6 @@ void anv_CmdBlitImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - const VkImageViewType src_iview_type = - meta_blit_get_src_image_view_type(src_image); - struct anv_meta_saved_state saved_state; anv_finishme("respect VkFilter"); @@ -930,7 +920,7 @@ void anv_CmdBlitImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = src_iview_type, + .viewType = anv_meta_get_view_type(src_image), .format = src_image->format->vk_format, .subresourceRange = { .aspectMask = pRegions[r].srcSubresource.aspectMask, @@ -964,7 +954,7 @@ void anv_CmdBlitImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = anv_meta_get_view_type(dest_image), .format = dest_image->format->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -1100,7 +1090,7 @@ void anv_CmdCopyBufferToImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = anv_meta_get_view_type(dest_image), .format = proxy_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -1161,9 +1151,6 @@ void anv_CmdCopyImageToBuffer( VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_meta_saved_state saved_state; - const VkImageViewType src_iview_type = - meta_blit_get_src_image_view_type(src_image); - meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { @@ -1172,7 +1159,7 @@ void anv_CmdCopyImageToBuffer( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, - .viewType = src_iview_type, + .viewType = anv_meta_get_view_type(src_image), .format = src_image->format->vk_format, .subresourceRange = { .aspectMask = pRegions[r].imageSubresource.aspectMask, diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index d798c6e909f..c8d025bd825 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -53,6 +53,9 @@ void anv_meta_restore(const struct anv_meta_saved_state *state, struct anv_cmd_buffer *cmd_buffer); +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image); + #ifdef __cplusplus } #endif diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index cb1a84ed533..4ce45feb9e6 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -680,7 +680,7 @@ void anv_CmdClearColorImage( &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = _image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = anv_meta_get_view_type(image), .format = image->format->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, -- cgit v1.2.3 From ee57062e1ec5d1d4666239c5a9c426982861f375 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 9 Dec 2015 16:59:02 -0800 Subject: anv: Remove anv_image::surface_type When building RENDER_SURFACE_STATE, the driver set SurfaceType = anv_image::surface_type, which was calculated during anv_image_init(). This was bad because the value of anv_image::surface_type was taken from a gen-specific header, gen8_pack.h, even though the anv_image structure is used for all gens. Replace anv_image::surface_type with a gen-specific lookup function, anv_surftype(), defined in gen${x}_state.c. The lookup function contains some useful asserts that caught some nasty bugs in anv meta, which were fixed in the previous commit. --- src/vulkan/anv_image.c | 11 ----------- src/vulkan/anv_private.h | 2 -- src/vulkan/gen7_state.c | 26 +++++++++++++++++++++++++- src/vulkan/gen8_state.c | 26 +++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index ffc7ae8cae6..3b3751ad1e1 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -34,12 +34,6 @@ */ #include "gen8_pack.h" -static const uint8_t anv_surf_type_from_image_type[] = { - [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, - [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, - [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, -}; - /** * The \a format argument is required and overrides any format found in struct * anv_image_create_info. Exactly one bit must be set in \a aspect. @@ -203,10 +197,6 @@ anv_image_create(VkDevice _device, anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth > 0); - /* TODO(chadv): How should we validate inputs? */ - const uint8_t surf_type = - anv_surf_type_from_image_type[pCreateInfo->imageType]; - image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!image) @@ -219,7 +209,6 @@ anv_image_create(VkDevice _device, image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arrayLayers; image->usage = anv_image_get_full_usage(pCreateInfo); - image->surface_type = surf_type; if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { image->needs_nonrt_surface_state = true; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3b5a4be8355..07854d3e357 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1429,8 +1429,6 @@ struct anv_image { struct anv_bo *bo; VkDeviceSize offset; - uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ - bool needs_nonrt_surface_state:1; bool needs_color_rt_surface_state:1; bool needs_storage_surface_state:1; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 3206f77b831..321dc3f0f5d 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -32,6 +32,30 @@ #include "gen7_pack.h" #include "gen75_pack.h" +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + anv_finishme("%s:%s: cube images", __FILE__, __func__); + /* fallthrough */ + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + GENX_FUNC(GEN7, GEN75) void genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, @@ -242,7 +266,7 @@ genX(image_view_init)(struct anv_image_view *iview, isl_surf_get_image_alignment_sa(&surface->isl); struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = image->surface_type, + .SurfaceType = anv_surftype(image, pCreateInfo->viewType), .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index ac1f17f48f9..0937677e8e4 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -32,6 +32,30 @@ #include "gen8_pack.h" #include "gen9_pack.h" +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + anv_finishme("%s:%s: cube images", __FILE__, __func__); + /* fallthrough */ + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + void genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, uint32_t offset, uint32_t range, uint32_t stride) @@ -222,7 +246,7 @@ genX(image_view_init)(struct anv_image_view *iview, get_halign_valign(&surface->isl, &halign, &valign); struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = image->surface_type, + .SurfaceType = anv_surftype(image, pCreateInfo->viewType), .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = valign, -- cgit v1.2.3 From dba28da07532072fbaa6ef51133234f10e0cac74 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 14:09:35 -0800 Subject: anv/buffer_view: Store a bo + offset instead of buffer pointer This is what image_view does. Also, we really need to do this so that we can properly handle the combined offsets from the buffer and from pCreateInfo. This fixes some of the nonzero offset buffer view CTS tests. --- src/vulkan/anv_cmd_buffer.c | 4 ++-- src/vulkan/anv_image.c | 6 ++++-- src/vulkan/anv_private.h | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 0531cd80f4f..fa6943689d8 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -731,8 +731,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: surface_state = desc->buffer_view->surface_state; - bo = desc->buffer_view->buffer->bo; - bo_offset = desc->buffer_view->buffer->offset; + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 3b3751ad1e1..dcad2affb2c 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -512,7 +512,9 @@ anv_CreateBufferView(VkDevice _device, if (!view) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - view->buffer = buffer; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); @@ -520,7 +522,7 @@ anv_CreateBufferView(VkDevice _device, anv_format_for_vk_format(pCreateInfo->format); anv_fill_buffer_surface_state(device, view->surface_state.map, format, - pCreateInfo->offset, pCreateInfo->range, + view->offset, pCreateInfo->range, format->isl_layout->bpb / 8); *pView = anv_buffer_view_to_handle(view); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 07854d3e357..d2b65711fc4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1517,7 +1517,9 @@ gen9_image_view_init(struct anv_image_view *iview, struct anv_cmd_buffer *cmd_buffer); struct anv_buffer_view { - struct anv_buffer *buffer; + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + struct anv_state surface_state; }; -- cgit v1.2.3 From 2c90f08bf7841e508a4c87e3f44d872e0abb64ba Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 23 Nov 2015 18:32:38 -0800 Subject: i965/fs: Add support for doing MOV_INDIRECT on uniforms --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c833ef0be3b..61b2a4ba80e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -850,7 +850,10 @@ fs_inst::regs_read(int arg) const assert(src[2].file == IMM); unsigned region_length = src[2].ud; - if (src[0].file == FIXED_GRF) { + if (src[0].file == UNIFORM) { + assert(region_length % 4 == 0); + return region_length / 4; + } else if (src[0].file == FIXED_GRF) { /* If the start of the region is not register aligned, then * there's some portion of the register that's technically * unread at the beginning. -- cgit v1.2.3 From 653d8044abb69ff2eaa8637412e240d1a33f4f12 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 09:03:29 -0800 Subject: i965/fs: Don't force MASK_DISABLE on INDIRECT_MOV instructions It should work fine without it and the visitor can set it if it wants. --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c25da07c4ba..d86eee1de4d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -366,7 +366,6 @@ fs_generator::generate_mov_indirect(fs_inst *inst, assert(inst->exec_size == 8 || devinfo->gen >= 8); brw_MOV(p, addr, indirect_byte_offset); - brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); } -- cgit v1.2.3 From 4be9a1c7bbad8c95e1f5aab91e8cdf3c0cfb0ec1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 11:24:57 -0800 Subject: i965/fs: Fix regs_read() for MOV_INDIRECT with a non-zero subnr The subnr field is in bytes so we don't need to multiply by type_sz. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 61b2a4ba80e..b903fbed196 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -867,7 +867,7 @@ fs_inst::regs_read(int arg) const * unread portion at the beginning. */ if (src[0].subnr) - region_length += src[0].subnr * type_sz(src[0].type); + region_length += src[0].subnr; return DIV_ROUND_UP(region_length, REG_SIZE); } else { -- cgit v1.2.3 From 2f1455dbb0e5a2f1b395e767b0c7cd3a58dc76e4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 09:01:11 -0800 Subject: i965/fs: Add support for MOV_INDIRECT on pre-Broadwell hardware While we're at it, we also add support for the possibility that the indirect is, in fact, a constant. This shouldn't happen in the common case (if it does, that means NIR failed to constant-fold something), but it's possible so we should handle it. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 51 +++++++++++++++++++------- 2 files changed, 42 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b903fbed196..93bacadc52b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4423,6 +4423,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return 8; + case SHADER_OPCODE_MOV_INDIRECT: + /* Prior to Broadwell, we only have 8 address subregisters */ + return devinfo->gen < 8 ? 8 : inst->exec_size; + default: return inst->exec_size; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index d86eee1de4d..7fa6d848473 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -351,22 +351,47 @@ fs_generator::generate_mov_indirect(fs_inst *inst, unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ - struct brw_reg addr = vec8(brw_address_reg(0)); + if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) { + imm_byte_offset += indirect_byte_offset.ud; - /* The destination stride of an instruction (in bytes) must be greater - * than or equal to the size of the rest of the instruction. Since the - * address register is of type UW, we can't use a D-type instruction. - * In order to get around this, re re-type to UW and use a stride. - */ - indirect_byte_offset = - retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + reg.nr = imm_byte_offset / REG_SIZE; + reg.subnr = imm_byte_offset % REG_SIZE; + brw_MOV(p, dst, reg); + } else { + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); - /* Prior to Broadwell, there are only 8 address registers. */ - assert(inst->exec_size == 8 || devinfo->gen >= 8); + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); - brw_MOV(p, addr, indirect_byte_offset); - brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re re-type to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + if (devinfo->gen < 8) { + /* Prior to broadwell, we have a restriction that the bottom 5 bits + * of the base offset and the bottom 5 bits of the indirect must add + * to less than 32. In other words, the hardware needs to be able to + * add the bottom five bits of the two to get the subnumber and add + * the next 7 bits of each to get the actual register number. Since + * the indirect may cause us to cross a register boundary, this makes + * it almost useless. We could try and do something clever where we + * use a actual base offset if base_offset % 32 == 0 but that would + * mean we were generating different code depending on the base + * offset. Instead, for the sake of consistency, we'll just do the + * add ourselves. + */ + brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type)); + } else { + brw_MOV(p, addr, indirect_byte_offset); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); + } + } } void -- cgit v1.2.3 From 4115648a6be2e846660a35a0e260ae53b809b7e0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 23 Nov 2015 21:39:15 -0800 Subject: i965/vec4: Add support for SHADER_OPCODE_MOV_INDIRECT --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 45 ++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index c3426ddd1c8..71a7f63be48 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1051,6 +1051,48 @@ generate_set_simd4x2_header_gen9(struct brw_codegen *p, brw_pop_insn_state(p); } +static void +generate_mov_indirect(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, struct brw_reg reg, + struct brw_reg indirect, struct brw_reg length) +{ + assert(indirect.type == BRW_REGISTER_TYPE_UD); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2); + + /* This instruction acts in align1 mode */ + assert(inst->force_writemask_all || reg.writemask == 0xf); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + struct brw_reg addr = vec2(brw_address_reg(0)); + + /* We need to move the indirect value into the address register. In order + * to make things make some sense, we want to respect at least the X + * component of the swizzle. In order to do that, we need to convert the + * subnr (probably 0) to an align1 subnr and add in the swizzle. We then + * use a region of <8,4,0>:uw to pick off the first 2 bytes of the indirect + * and splat it out to all four channels of the given half of a0. + */ + assert(brw_is_single_value_swizzle(indirect.swizzle)); + indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)) * 2; + indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0); + + brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset)); + + /* Use a <4,1> region Vx1 region*/ + struct brw_reg src = brw_VxH_indirect(0, 0); + src.width = BRW_WIDTH_4; + src.hstride = BRW_HORIZONTAL_STRIDE_1; + + brw_MOV(p, dst, retype(src, reg.type)); + + brw_pop_insn_state(p); +} + static void generate_code(struct brw_codegen *p, const struct brw_compiler *compiler, @@ -1538,6 +1580,9 @@ generate_code(struct brw_codegen *p, break; } + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]); + default: unreachable("Unsupported opcode"); } -- cgit v1.2.3 From 7ba70b1b5121577983c3493e181ee3568563f22e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 13:52:49 -0800 Subject: nir: Add another index to load_uniform to specify the range read --- src/glsl/nir/nir_intrinsics.h | 7 +++++-- src/glsl/nir/nir_lower_io.c | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 9811fb391de..5b10423078d 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -263,6 +263,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0) * of the start of the variable being loaded and and the offset source is a * offset into that variable. * + * Uniform load operations have a second index that specifies the size of the + * variable being loaded. If const_index[1] == 0, then the size is unknown. + * * Some load operations such as UBO/SSBO load and per_vertex loads take an * additional source to specify which UBO/SSBO/vertex to load from. * @@ -275,8 +278,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0) #define LOAD(name, srcs, indices, flags) \ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) -/* src[] = { offset }. const_index[] = { base } */ -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base, size } */ +LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { buffer_index, offset }. No const_index */ LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { offset }. const_index[] = { base } */ diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 3d646eb14b4..c1f034ba598 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -216,6 +216,11 @@ nir_lower_io_block(nir_block *block, void *void_state) load->const_index[0] = intrin->variables[0]->var->data.driver_location; + if (load->intrinsic == nir_intrinsic_load_uniform) { + load->const_index[1] = + state->type_size(intrin->variables[0]->var->type); + } + if (per_vertex) load->src[0] = nir_src_for_ssa(vertex_index); -- cgit v1.2.3 From f0313a5569d395c83fd4e31248895f902b88b31c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 14:52:57 -0800 Subject: anv: Add initial support for cube maps This fixes 486 cubemap CTS tests. --- src/vulkan/gen7_state.c | 4 ++-- src/vulkan/gen8_state.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 321dc3f0f5d..108ebe7c225 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -44,8 +44,8 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type) return SURFTYPE_1D; case VK_IMAGE_VIEW_TYPE_CUBE: case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - anv_finishme("%s:%s: cube images", __FILE__, __func__); - /* fallthrough */ + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_CUBE; case VK_IMAGE_VIEW_TYPE_2D: case VK_IMAGE_VIEW_TYPE_2D_ARRAY: assert(image->type == VK_IMAGE_TYPE_2D); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 0937677e8e4..aa57073c3e8 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -44,8 +44,8 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type) return SURFTYPE_1D; case VK_IMAGE_VIEW_TYPE_CUBE: case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - anv_finishme("%s:%s: cube images", __FILE__, __func__); - /* fallthrough */ + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_CUBE; case VK_IMAGE_VIEW_TYPE_2D: case VK_IMAGE_VIEW_TYPE_2D_ARRAY: assert(image->type == VK_IMAGE_TYPE_2D); -- cgit v1.2.3 From c4219bc6fff4c8fa764fd0b07c0e8ce99277c4c6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 14 Dec 2015 15:24:11 -0800 Subject: anv/cmd_buffer: Gen 8 requires 64 byte alignment for push constant data See MEDIA_CURBE_LOAD, CURBE Data Start Address & CURBE Total Data Length Signed-off-by: Jordan Justen --- src/vulkan/anv_cmd_buffer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index fa6943689d8..4dc3704ebf3 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -947,10 +947,14 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) const unsigned threads = pipeline->cs_thread_width_max; const unsigned total_push_constants_size = reg_aligned_constant_size * threads; + const unsigned push_constant_alignment = + cmd_buffer->device->info.gen < 8 ? 32 : 64; + const unsigned aligned_total_push_constants_size = + ALIGN(total_push_constants_size, push_constant_alignment); struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - total_push_constants_size, - 32 /* bottom 5 bits MBZ */); + aligned_total_push_constants_size, + push_constant_alignment); /* Walk through the param array and fill the buffer with data */ uint32_t *u32_map = state.map; -- cgit v1.2.3 From a3cd95a88417d0ee5622d73f6a2f6aa02bb7e54a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 15:12:20 -0800 Subject: i965/fs: Use MOV_INDIRECT for all indirect uniform loads Instead of using reladdr, this commit changes the FS backend to emit a MOV_INDIRECT whenever we need an indirect uniform load. We also have to rework some of the other bits of the backend to handle this new form of uniform load. The obvious change is that demote_pull_constants now acts more like a lowering pass when it hits a MOV_INDIRECT. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 72 +++++++++++++++++++------------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 54 +++++++++++++++++++----- 2 files changed, 87 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 93bacadc52b..7fc3e464b09 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1948,8 +1948,8 @@ fs_visitor::assign_constant_locations() if (inst->src[i].file != UNIFORM) continue; - if (inst->src[i].reladdr) { - int uniform = inst->src[i].nr; + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { + int uniform = inst->src[0].nr; /* If this array isn't already present in the pull constant buffer, * add it. @@ -2031,49 +2031,63 @@ fs_visitor::assign_constant_locations() void fs_visitor::demote_pull_constants() { - foreach_block_and_inst (block, fs_inst, inst, cfg) { + const unsigned index = stage_prog_data->binding_table.pull_constants_start; + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { + /* Set up the annotation tracking for new generated instructions. */ + const fs_builder ibld(this, block, inst); + for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM) continue; - int pull_index; + /* We'll handle this case later */ + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) + continue; + unsigned location = inst->src[i].nr + inst->src[i].reg_offset; - if (location >= uniforms) /* Out of bounds access */ - pull_index = -1; - else - pull_index = pull_constant_loc[location]; + if (location >= uniforms) + continue; /* Out of bounds access */ + + int pull_index = pull_constant_loc[location]; if (pull_index == -1) continue; - /* Set up the annotation tracking for new generated instructions. */ - const fs_builder ibld(this, block, inst); - const unsigned index = stage_prog_data->binding_table.pull_constants_start; - fs_reg dst = vgrf(glsl_type::float_type); - assert(inst->src[i].stride == 0); - /* Generate a pull load into dst. */ - if (inst->src[i].reladdr) { - VARYING_PULL_CONSTANT_LOAD(ibld, dst, - brw_imm_ud(index), - *inst->src[i].reladdr, - pull_index * 4); - inst->src[i].reladdr = NULL; - inst->src[i].stride = 1; - } else { - const fs_builder ubld = ibld.exec_all().group(8, 0); - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); - ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, brw_imm_ud(index), offset); - inst->src[i].set_smear(pull_index & 3); - } - brw_mark_surface_used(prog_data, index); + fs_reg dst = vgrf(glsl_type::float_type); + const fs_builder ubld = ibld.exec_all().group(8, 0); + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + dst, brw_imm_ud(index), offset); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; inst->src[i].reg_offset = 0; + inst->src[i].set_smear(pull_index & 3); + + brw_mark_surface_used(prog_data, index); + } + + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && + inst->src[0].file == UNIFORM) { + + unsigned location = inst->src[0].nr + inst->src[0].reg_offset; + if (location >= uniforms) + continue; /* Out of bounds access */ + + int pull_index = pull_constant_loc[location]; + assert(pull_index >= 0); /* This had better be pull */ + + VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, + brw_imm_ud(index), + inst->src[1], + pull_index * 4); + inst->remove(block); + + brw_mark_surface_used(prog_data, index); } } invalidate_live_intervals(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index f08f910ba27..2681dab3f46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1136,6 +1136,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) { fs_reg image(UNIFORM, deref->var->data.driver_location / 4, BRW_REGISTER_TYPE_UD); + fs_reg indirect; + unsigned indirect_max = 0; for (const nir_deref *tail = &deref->deref; tail->child; tail = tail->child) { @@ -1147,7 +1149,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) image = offset(image, bld, base * element_size); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - fs_reg tmp = vgrf(glsl_type::int_type); + fs_reg tmp = vgrf(glsl_type::uint_type); if (devinfo->gen == 7 && !devinfo->is_haswell) { /* IVB hangs when trying to access an invalid surface index with @@ -1165,15 +1167,31 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) bld.MOV(tmp, get_nir_src(deref_array->indirect)); } + indirect_max += element_size * (tail->type->length - 1); + bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); - if (image.reladdr) - bld.ADD(*image.reladdr, *image.reladdr, tmp); - else - image.reladdr = new(mem_ctx) fs_reg(tmp); + if (indirect.file == BAD_FILE) { + indirect = tmp; + } else { + bld.ADD(indirect, indirect, tmp); + } } } - return image; + if (indirect.file == BAD_FILE) { + return image; + } else { + /* Emit a pile of MOVs to load the uniform into a temporary. The + * dead-code elimination pass will get rid of what we don't use. + */ + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); + for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(tmp, bld, j), offset(image, bld, j), + indirect, brw_imm_ud((indirect_max + 1) * 4)); + } + return tmp; + } } void @@ -2333,12 +2351,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Offsets are in bytes but they should always be multiples of 4 */ assert(const_offset->u[0] % 4 == 0); src.reg_offset = const_offset->u[0] / 4; + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + } } else { - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - } + fs_reg indirect = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); - for (unsigned j = 0; j < instr->num_components; j++) { - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + /* We need to pass a size to the MOV_INDIRECT but we don't want it to + * go past the end of the uniform. In order to keep the n'th + * component from running past, we subtract off the size of all but + * one component of the vector. + */ + assert(instr->const_index[1] >= instr->num_components * 4); + unsigned read_size = instr->const_index[1] - + (instr->num_components - 1) * 4; + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(dest, bld, j), offset(src, bld, j), + indirect, brw_imm_ud(read_size)); + } } break; } -- cgit v1.2.3 From 9f46af9e418e70e71092cccbb3a21a22e36d8e24 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 16:54:41 -0800 Subject: i965/fs: Get rid of reladdr We aren't using it anymore. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 7 +------ src/mesa/drivers/dri/i965/brw_ir_fs.h | 5 +---- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7fc3e464b09..b78f99fcfdb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -433,7 +433,6 @@ fs_reg::fs_reg(struct ::brw_reg reg) : { this->reg_offset = 0; this->subreg_offset = 0; - this->reladdr = NULL; this->stride = 1; if (this->file == IMM && (this->type != BRW_REGISTER_TYPE_V && @@ -448,7 +447,6 @@ fs_reg::equals(const fs_reg &r) const { return (this->backend_reg::equals(r) && subreg_offset == r.subreg_offset && - !reladdr && !r.reladdr && stride == r.stride); } @@ -4723,9 +4721,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) break; case UNIFORM: fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset); - if (inst->src[i].reladdr) { - fprintf(file, "+reladdr"); - } else if (inst->src[i].subreg_offset) { + if (inst->src[i].subreg_offset) { fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); } @@ -4836,7 +4832,6 @@ fs_visitor::get_instruction_generating_reg(fs_inst *start, { if (end == start || end->is_partial_write() || - reg.reladdr || !reg.equals(end->dst)) { return NULL; } else { diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index c3eec2efb42..e4f20f4ffc9 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -58,8 +58,6 @@ public: */ int subreg_offset; - fs_reg *reladdr; - /** Register region horizontal stride */ uint8_t stride; }; @@ -136,8 +134,7 @@ component(fs_reg reg, unsigned idx) static inline bool is_uniform(const fs_reg ®) { - return (reg.stride == 0 || reg.is_null()) && - (!reg.reladdr || is_uniform(*reg.reladdr)); + return (reg.stride == 0 || reg.is_null()); } /** -- cgit v1.2.3 From 9024353db3c3f4b096855307964b3c1f92b21259 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 15:16:14 -0800 Subject: i965/fs: Stop relying on param_size in assign_constant_locations Now that we have MOV_INDIRECT opcodes, we have all of the size information we need directly in the opcode. With a little restructuring of the algorithm used in assign_constant_locations we don't need param_size anymore. The big thing to watch out for now, however, is that you can have two ranges overlap where neither contains the other. In order to deal with this, we make the first pass just flag what needs pulling and handle assigning pull constant locations until later. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 44 ++++++++++++++---------------------- 1 file changed, 17 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b78f99fcfdb..6520424a0b0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1923,14 +1923,12 @@ fs_visitor::assign_constant_locations() if (dispatch_width != 8) return; - unsigned int num_pull_constants = 0; - - pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); - memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms); - bool is_live[uniforms]; memset(is_live, 0, sizeof(is_live)); + bool needs_pull[uniforms]; + memset(needs_pull, 0, sizeof(needs_pull)); + /* First, we walk through the instructions and do two things: * * 1) Figure out which uniforms are live. @@ -1946,20 +1944,15 @@ fs_visitor::assign_constant_locations() if (inst->src[i].file != UNIFORM) continue; - if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { - int uniform = inst->src[0].nr; + int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - assert(param_size[uniform]); - for (int j = 0; j < param_size[uniform]; j++) - pull_constant_loc[uniform + j] = num_pull_constants++; + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { + for (unsigned j = 0; j < inst->src[2].ud / 4; j++) { + is_live[constant_nr + j] = true; + needs_pull[constant_nr + j] = true; } } else { /* Mark the the one accessed uniform as live */ - int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; if (constant_nr >= 0 && constant_nr < (int) uniforms) is_live[constant_nr] = true; } @@ -1976,26 +1969,23 @@ fs_visitor::assign_constant_locations() */ unsigned int max_push_components = 16 * 8; unsigned int num_push_constants = 0; + unsigned int num_pull_constants = 0; push_constant_loc = ralloc_array(mem_ctx, int, uniforms); + pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); for (unsigned int i = 0; i < uniforms; i++) { - if (!is_live[i] || pull_constant_loc[i] != -1) { - /* This UNIFORM register is either dead, or has already been demoted - * to a pull const. Mark it as no longer living in the param[] array. - */ - push_constant_loc[i] = -1; + push_constant_loc[i] = -1; + pull_constant_loc[i] = -1; + + if (!is_live[i]) continue; - } - if (num_push_constants < max_push_components) { - /* Retain as a push constant. Record the location in the params[] - * array. - */ + if (!needs_pull[i] && num_push_constants < max_push_components) { + /* Retain as a push constant */ push_constant_loc[i] = num_push_constants++; } else { - /* Demote to a pull constant. */ - push_constant_loc[i] = -1; + /* We have to pull it */ pull_constant_loc[i] = num_pull_constants++; } } -- cgit v1.2.3 From 9c36c4084529966cb0238a1eadd36e6b13da4798 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 17:02:01 -0800 Subject: i965/fs: Get rid of the param_size array --- src/mesa/drivers/dri/i965/brw_fs.cpp | 1 - src/mesa/drivers/dri/i965/brw_fs.h | 2 -- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 --------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 --- 4 files changed, 15 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6520424a0b0..cbaa8afcdcf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1021,7 +1021,6 @@ fs_visitor::import_uniforms(fs_visitor *v) this->push_constant_loc = v->push_constant_loc; this->pull_constant_loc = v->pull_constant_loc; this->uniforms = v->uniforms; - this->param_size = v->param_size; } fs_reg * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f2e384129cb..7bed2179531 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -313,8 +313,6 @@ public: struct brw_stage_prog_data *prog_data; struct gl_program *prog; - int *param_size; - int *virtual_grf_start; int *virtual_grf_end; brw::fs_live_variables *live_intervals; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 2681dab3f46..82a6ce2b295 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -174,15 +174,6 @@ fs_visitor::nir_setup_uniforms() return; uniforms = nir->num_uniforms / 4; - - nir_foreach_variable(var, &nir->uniforms) { - /* UBO's and atomics don't take up space in the uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic()) - continue; - - if (type_size_scalar(var->type) > 0) - param_size[var->data.driver_location / 4] = type_size_scalar(var->type); - } } static bool diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0582e7831de..75ca1da8a79 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1012,9 +1012,6 @@ fs_visitor::init() this->spilled_any_registers = false; this->do_dual_src = false; - - if (dispatch_width == 8) - this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); } fs_visitor::~fs_visitor() -- cgit v1.2.3 From 46f5396846e5c4d1874c5af83378b91230f0d9da Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 25 Nov 2015 09:12:37 -0800 Subject: i965/vec4: Inline get_pull_constant_offset It's not really doing enough anymore to justify a helper function. --- src/mesa/drivers/dri/i965/brw_vec4.h | 2 -- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 37 ++++++++++---------------- 2 files changed, 14 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index ae5bf6939f5..f2e5ce18ab4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -284,8 +284,6 @@ public: src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, src_reg *reladdr, int reg_offset); - src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset); void emit_scratch_read(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 443d0eb5387..7712d3471ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1464,27 +1464,6 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, } } -src_reg -vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset) -{ - if (reladdr) { - src_reg index = src_reg(this, glsl_type::int_type); - - emit_before(block, inst, ADD(dst_reg(index), *reladdr, - brw_imm_d(reg_offset * 16))); - - return index; - } else if (devinfo->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); - return offset; - } else { - return brw_imm_d(reg_offset * 16); - } -} - /** * Emits an instruction before @inst to load the value named by @orig_src * from scratch space at @base_offset to @temp. @@ -1666,8 +1645,20 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; - src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr, - reg_offset); + + src_reg offset; + if (orig_src.reladdr) { + offset = src_reg(this, glsl_type::int_type); + + emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr, + brw_imm_d(reg_offset * 16))); + } else if (devinfo->gen >= 8) { + /* Store the offset in a GRF so we can send-from-GRF. */ + offset = src_reg(this, glsl_type::int_type); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); + } else { + offset = brw_imm_d(reg_offset * 16); + } emit_pull_constant_load_reg(temp, brw_imm_ud(index), -- cgit v1.2.3 From a487f0284f618416e74f73ca2534d8d93c26531c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 25 Nov 2015 09:36:34 -0800 Subject: i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants This commit moves us to an instruction based model rather than a register-based model for indirects. This is more accurate anyway as we have to emit instructions to resolve the reladdr. It's also a lot simpler because it gets rid of the recursive reladdr problem by design. One side-effect of this is that we need a whole new algorithm in move_uniform_array_access_to_pull_constants. This new algorithm is much more straightforward than the old one and is fairly similar to what we're already doing in the FS backend. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 +-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 ++++++++++++-------------- 4 files changed, 50 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a697bdf84a0..e4a405b8705 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants() dst_reg temp = dst_reg(this, glsl_type::vec4_type); emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + pull_constant_loc[uniform], src_reg()); inst->src[i].file = temp.file; inst->src[i].nr = temp.nr; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f2e5ce18ab4..e6d6c8218be 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -293,7 +293,8 @@ public: void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, - int base_offset); + int base_offset, + src_reg indirect); void emit_pull_constant_load_reg(dst_reg dst, src_reg surf_index, src_reg offset, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index f965b39360f..58b6612de9d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Offsets are in bytes but they should always be multiples of 16 */ assert(const_offset->u[0] % 16 == 0); src.reg_offset = const_offset->u[0] / 16; + + emit(MOV(dest, src)); } else { - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); - src.reladdr = new(mem_ctx) src_reg(tmp); - } + src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); - emit(MOV(dest, src)); + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, + indirect, brw_imm_ud(instr->const_index[1])); + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 7712d3471ea..bc7b30df2ad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; src_reg offset; - if (orig_src.reladdr) { + if (indirect.file != BAD_FILE) { offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr, + emit_before(block, inst, ADD(dst_reg(offset), indirect, brw_imm_d(reg_offset * 16))); } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ @@ -1685,59 +1685,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() { int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); - bool nested_reladdr; - /* Walk through and find array access of uniforms. Put a copy of that - * uniform in the pull constant buffer. - * - * Note that we don't move constant-indexed accesses to arrays. No - * testing has been done of the performance impact of this choice. + /* First, walk through the instructions and determine which things need to + * be pulled. We mark something as needing to be pulled by setting + * pull_constant_loc to 0. */ - do { - nested_reladdr = false; - - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) - continue; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - int uniform = inst->src[i].nr; + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - if (inst->src[i].reladdr->reladdr) - nested_reladdr = true; /* will need another pass */ + for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++) + pull_constant_loc[uniform_nr + j] = 0; + } - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - const gl_constant_value **values = - &stage_prog_data->param[uniform * 4]; + /* Next, we walk the list of uniforms and assign real pull constant + * locations and set their corresponding entries in pull_param. + */ + for (int j = 0; j < this->uniforms; j++) { + if (pull_constant_loc[j] < 0) + continue; - pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4; + pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4; - assert(uniform < uniform_array_size); - for (int j = 0; j < uniform_size[uniform] * 4; j++) { - stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] - = values[j]; - } - } + for (int i = 0; i < 4; i++) { + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] + = stage_prog_data->param[j * 4 + i]; + } + } - /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + /* Finally, we can walk through the instructions and lower MOV_INDIRECT + * instructions to actual uniform pulls. + */ + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); - inst->src[i].file = temp.file; - inst->src[i].nr = temp.nr; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; - } - } - } while (nested_reladdr); + emit_pull_constant_load(block, inst, inst->dst, inst->src[0], + pull_constant_loc[uniform_nr], inst->src[1]); + inst->remove(block); + } /* Now there are no accesses of the UNIFORM file with a reladdr, so * no need to track them as larger-than-vec4 objects. This will be -- cgit v1.2.3 From eb76f226cf0c3497caa1738efffa9c2e8b02201c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 25 Nov 2015 09:59:03 -0800 Subject: i965/fs: Use UD type for offsets in VARYING_PULL_CONSTANT_LOAD --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cbaa8afcdcf..dc0bdba72c1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -174,7 +174,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * CSE can later notice that those loads are all the same and eliminate * the redundant ones. */ - fs_reg vec4_offset = vgrf(glsl_type::int_type); + fs_reg vec4_offset = vgrf(glsl_type::uint_type); bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); int scale = 1; -- cgit v1.2.3 From 75f33a6420af37406edbf64c535b5b29d2d2eefc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Dec 2015 17:02:16 -0800 Subject: i965/vec4: Get rid of the uniform_size array --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 -------- src/mesa/drivers/dri/i965/brw_vec4.h | 2 -- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 9 --------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 ----------- src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 1 - 5 files changed, 31 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e4a405b8705..1304e2312f8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -466,11 +466,6 @@ vec4_visitor::split_uniform_registers() inst->src[i].reg_offset = 0; } } - - /* Update that everything is now vector-sized. */ - for (int i = 0; i < this->uniforms; i++) { - this->uniform_size[i] = 1; - } } void @@ -528,7 +523,6 @@ vec4_visitor::pack_uniform_registers() * push constants. */ for (int src = 0; src < uniforms; src++) { - assert(src < uniform_array_size); int size = chans_used[src]; if (size == 0) @@ -1588,8 +1582,6 @@ vec4_visitor::setup_uniforms(int reg) * matter what, or the GPU would hang. */ if (devinfo->gen < 6 && this->uniforms == 0) { - assert(this->uniforms < this->uniform_array_size); - stage_prog_data->param = reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); for (unsigned int i = 0; i < 4; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index e6d6c8218be..0dc04eaa252 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -115,8 +115,6 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int *uniform_size; - int uniform_array_size; /*< Size of the uniform_size array */ int uniforms; src_reg shader_start_time; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 58b6612de9d..bafc9a514d2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -118,15 +118,6 @@ void vec4_visitor::nir_setup_uniforms() { uniforms = nir->num_uniforms / 16; - - nir_foreach_variable(var, &nir->uniforms) { - /* UBO's and atomics don't take up space in the uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic()) - continue; - - if (type_size_vec4(var->type) > 0) - uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type); - } } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index bc7b30df2ad..357dd4d5a64 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1786,17 +1786,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; - - /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires - * at least one. See setup_uniforms() in brw_vec4.cpp. - */ - this->uniform_array_size = 1; - if (prog_data) { - this->uniform_array_size = - MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1); - } - - this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); } vec4_visitor::~vec4_visitor() diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index fd8be7d972c..205323ce6ed 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -261,7 +261,6 @@ void vec4_vs_visitor::setup_uniform_clipplane_values() { for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { - assert(this->uniforms < uniform_array_size); this->userplane[i] = dst_reg(UNIFORM, this->uniforms); this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { -- cgit v1.2.3 From 63c313de84afa9ee65f5d98e1c843ace3a5c9f21 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Dec 2015 17:14:49 -0800 Subject: i965/fs: Rename demote_pull_constants to lower_constant_loads --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index dc0bdba72c1..7124ee3905b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2016,7 +2016,7 @@ fs_visitor::assign_constant_locations() * or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs. */ void -fs_visitor::demote_pull_constants() +fs_visitor::lower_constant_loads() { const unsigned index = stage_prog_data->binding_table.pull_constants_start; @@ -5033,7 +5033,7 @@ fs_visitor::optimize() bld = fs_builder(this, 64); assign_constant_locations(); - demote_pull_constants(); + lower_constant_loads(); validate(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7bed2179531..d09f2e42425 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -136,7 +136,7 @@ public: void split_virtual_grfs(); bool compact_virtual_grfs(); void assign_constant_locations(); - void demote_pull_constants(); + void lower_constant_loads(); void invalidate_live_intervals(); void calculate_live_intervals(); void calculate_register_pressure(); -- cgit v1.2.3 From 091b6156dd8553979336c15acdaf140e5419c483 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Dec 2015 17:34:38 -0800 Subject: i965/fs: Push small uniform arrays Unfortunately, this also means that we need to use a slightly different algorithm for assign_constant_locations. The old algorithm worked based on the assumption that each read of a uniform value read exactly one float. If it encountered a MOV_INDIRECT, it would immediately bail and push the whole thing. Since we can now read ranges using MOV_INDIRECT, we need to be able to push a series of floats without breaking them up. To do this, we use an algorithm similar to the on in split_virtual_grfs. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 76 +++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7124ee3905b..6c4a70f1dac 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1911,9 +1911,7 @@ fs_visitor::compact_virtual_grfs() * maximum number of fragment shader uniform components (64). If * there are too many of these, they'd fill up all of register space. * So, this will push some of them out to the pull constant buffer and - * update the program to load them. We also use pull constants for all - * indirect constant loads because we don't support indirect accesses in - * registers yet. + * update the program to load them. */ void fs_visitor::assign_constant_locations() @@ -1925,15 +1923,18 @@ fs_visitor::assign_constant_locations() bool is_live[uniforms]; memset(is_live, 0, sizeof(is_live)); - bool needs_pull[uniforms]; - memset(needs_pull, 0, sizeof(needs_pull)); + /* For each uniform slot, a value of true indicates that the given slot and + * the next slot must remain contiguous. This is used to keep us from + * splitting arrays apart. + */ + bool contiguous[uniforms]; + memset(contiguous, 0, sizeof(contiguous)); /* First, we walk through the instructions and do two things: * * 1) Figure out which uniforms are live. * - * 2) Find all indirect access of uniform arrays and flag them as needing - * to go into the pull constant buffer. + * 2) Mark any indirectly used ranges of registers as contiguous. * * Note that we don't move constant-indexed accesses to arrays. No * testing has been done of the performance impact of this choice. @@ -1946,12 +1947,16 @@ fs_visitor::assign_constant_locations() int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { - for (unsigned j = 0; j < inst->src[2].ud / 4; j++) { - is_live[constant_nr + j] = true; - needs_pull[constant_nr + j] = true; + assert(inst->src[2].ud % 4 == 0); + unsigned last = constant_nr + (inst->src[2].ud / 4) - 1; + assert(last < uniforms); + + for (unsigned j = constant_nr; j < last; j++) { + is_live[j] = true; + contiguous[j] = true; } + is_live[last] = true; } else { - /* Mark the the one accessed uniform as live */ if (constant_nr >= 0 && constant_nr < (int) uniforms) is_live[constant_nr] = true; } @@ -1966,26 +1971,49 @@ fs_visitor::assign_constant_locations() * If changing this value, note the limitation about total_regs in * brw_curbe.c. */ - unsigned int max_push_components = 16 * 8; + const unsigned int max_push_components = 16 * 8; + + /* We push small arrays, but no bigger than 16 floats. This is big enough + * for a vec4 but hopefully not large enough to push out other stuff. We + * should probably use a better heuristic at some point. + */ + const unsigned int max_chunk_size = 16; + unsigned int num_push_constants = 0; unsigned int num_pull_constants = 0; push_constant_loc = ralloc_array(mem_ctx, int, uniforms); pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); - for (unsigned int i = 0; i < uniforms; i++) { - push_constant_loc[i] = -1; - pull_constant_loc[i] = -1; + int chunk_start = -1; + for (unsigned u = 0; u < uniforms; u++) { + push_constant_loc[u] = -1; + pull_constant_loc[u] = -1; - if (!is_live[i]) + if (!is_live[u]) continue; - if (!needs_pull[i] && num_push_constants < max_push_components) { - /* Retain as a push constant */ - push_constant_loc[i] = num_push_constants++; - } else { - /* We have to pull it */ - pull_constant_loc[i] = num_pull_constants++; + /* This is the first live uniform in the chunk */ + if (chunk_start < 0) + chunk_start = u; + + /* If this element does not need to be contiguous with the next, we + * split at this point and everthing between chunk_start and u forms a + * single chunk. + */ + if (!contiguous[u]) { + unsigned chunk_size = u - chunk_start + 1; + + if (num_push_constants + chunk_size <= max_push_components && + chunk_size <= max_chunk_size) { + for (unsigned j = chunk_start; j <= u; j++) + push_constant_loc[j] = num_push_constants++; + } else { + for (unsigned j = chunk_start; j <= u; j++) + pull_constant_loc[j] = num_pull_constants++; + } + + chunk_start = -1; } } @@ -2066,7 +2094,9 @@ fs_visitor::lower_constant_loads() continue; /* Out of bounds access */ int pull_index = pull_constant_loc[location]; - assert(pull_index >= 0); /* This had better be pull */ + + if (pull_index == -1) + continue; VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, brw_imm_ud(index), -- cgit v1.2.3 From 1f98bf8da028c4d0e2b47cc1a59e6f004e2207a1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 16:14:20 -0800 Subject: anv: Pass an isl_format into fill_buffer_surface_state --- src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_image.c | 3 ++- src/vulkan/anv_private.h | 10 +++++----- src/vulkan/gen7_state.c | 4 ++-- src/vulkan/gen8_state.c | 4 ++-- 6 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 4dc3704ebf3..984b885a4b6 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -605,7 +605,7 @@ fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, } anv_fill_buffer_surface_state(device, state, - anv_format_for_vk_format(format), + anv_format_for_vk_format(format)->surface_format, offset, range, 1); } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7ddad58cf6f..ff224907854 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1493,7 +1493,7 @@ void anv_DestroyBuffer( void anv_fill_buffer_surface_state(struct anv_device *device, void *state, - const struct anv_format *format, + enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) { switch (device->info.gen) { diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index dcad2affb2c..659fe80b320 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -521,7 +521,8 @@ anv_CreateBufferView(VkDevice _device, const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - anv_fill_buffer_surface_state(device, view->surface_state.map, format, + anv_fill_buffer_surface_state(device, view->surface_state.map, + format->surface_format, view->offset, pCreateInfo->range, format->isl_layout->bpb / 8); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d2b65711fc4..c5ab92c5438 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1524,20 +1524,20 @@ struct anv_buffer_view { }; void anv_fill_buffer_surface_state(struct anv_device *device, void *state, - const struct anv_format *format, + enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride); -void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, +void gen7_fill_buffer_surface_state(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride); -void gen75_fill_buffer_surface_state(void *state, const struct anv_format *format, +void gen75_fill_buffer_surface_state(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride); -void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, +void gen8_fill_buffer_surface_state(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride); -void gen9_fill_buffer_surface_state(void *state, const struct anv_format *format, +void gen9_fill_buffer_surface_state(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride); diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 108ebe7c225..f58283f7d05 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -57,7 +57,7 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type) } GENX_FUNC(GEN7, GEN75) void -genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, +genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) { @@ -65,7 +65,7 @@ genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = SURFTYPE_BUFFER, - .SurfaceFormat = format->surface_format, + .SurfaceFormat = format, .SurfaceVerticalAlignment = VALIGN_4, .SurfaceHorizontalAlignment = HALIGN_4, .TiledSurface = false, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index aa57073c3e8..eee2515e220 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -57,7 +57,7 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type) } void -genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, +genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) { uint32_t num_elements = range / stride; @@ -65,7 +65,7 @@ genX(fill_buffer_surface_state)(void *state, const struct anv_format *format, struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = SURFTYPE_BUFFER, .SurfaceArray = false, - .SurfaceFormat = format->surface_format, + .SurfaceFormat = format, .SurfaceVerticalAlignment = VALIGN4, .SurfaceHorizontalAlignment = HALIGN4, .TileMode = LINEAR, -- cgit v1.2.3 From 783a21192c7cc35113ec089354369e1ad34a7df9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 16:51:12 -0800 Subject: anv: Add support for storage texel buffers --- src/vulkan/anv_cmd_buffer.c | 11 ++++++- src/vulkan/anv_descriptor_set.c | 7 ++++- src/vulkan/anv_device.c | 1 + src/vulkan/anv_image.c | 64 ++++++++++++++++++++++++++++++++++------- src/vulkan/anv_private.h | 8 ++++++ 5 files changed, 78 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 984b885a4b6..759c4677a74 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -736,7 +736,16 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - assert(!"Unsupported descriptor type"); + surface_state = desc->buffer_view->storage_surface_state; + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, + image_param); + image_param->surface_idx = bias + s; break; default: diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 52d2ffecdd6..a7b6b773012 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -133,11 +133,16 @@ VkResult anv_CreateDescriptorSetLayout( break; } - if (binding->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: for_each_bit(s, binding->stageFlags) { set_layout->binding[b].stage[s].image_index = image_count[s]; image_count[s] += binding->descriptorCount; } + break; + default: + break; } if (binding->pImmutableSamplers) { diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index ff224907854..df6472edfde 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1472,6 +1472,7 @@ VkResult anv_CreateBuffer( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; buffer->bo = NULL; buffer->offset = 0; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 659fe80b320..159af6d19b0 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -505,26 +505,45 @@ anv_CreateBufferView(VkDevice _device, ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); struct anv_buffer_view *view; - /* TODO: Storage texel buffers */ - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!view) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->format = format->surface_format; view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; + view->range = pCreateInfo->range; - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); + anv_fill_buffer_surface_state(device, view->surface_state.map, + view->format, + view->offset, pCreateInfo->range, + format->isl_layout->bpb / 8); + } else { + view->surface_state = (struct anv_state){ 0 }; + } + + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + view->storage_surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - anv_fill_buffer_surface_state(device, view->surface_state.map, - format->surface_format, - view->offset, pCreateInfo->range, - format->isl_layout->bpb / 8); + enum isl_format storage_format = + isl_lower_storage_image_format(&device->isl_dev, view->format); + + anv_fill_buffer_surface_state(device, view->storage_surface_state.map, + storage_format, + view->offset, pCreateInfo->range, + format->isl_layout->bpb / 8); + } else { + view->storage_surface_state = (struct anv_state){ 0 }; + } *pView = anv_buffer_view_to_handle(view); @@ -538,7 +557,14 @@ anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); - anv_state_pool_free(&device->surface_state_pool, view->surface_state); + if (view->surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->surface_state); + + if (view->storage_surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->storage_surface_state); + anv_free2(&device->alloc, pAllocator, view); } @@ -599,3 +625,19 @@ anv_image_view_fill_image_param(struct anv_device *device, memset(param, 0, sizeof *param); anv_finishme("Actually fill out brw_image_param"); } + +void +anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param) +{ + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; + + param->stride[0] = isl_format_layouts[view->format].bpb / 8; + param->size[0] = view->range / param->stride[0]; +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c5ab92c5438..8aaa2811fc7 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -921,6 +921,8 @@ struct anv_buffer { struct anv_device * device; VkDeviceSize size; + VkBufferUsageFlags usage; + /* Set when bound */ struct anv_bo * bo; VkDeviceSize offset; @@ -1517,10 +1519,13 @@ gen9_image_view_init(struct anv_image_view *iview, struct anv_cmd_buffer *cmd_buffer); struct anv_buffer_view { + enum isl_format format; /**< VkBufferViewCreateInfo::format */ struct anv_bo *bo; uint32_t offset; /**< Offset into bo. */ + uint64_t range; /**< VkBufferViewCreateInfo::range */ struct anv_state surface_state; + struct anv_state storage_surface_state; }; void anv_fill_buffer_surface_state(struct anv_device *device, void *state, @@ -1544,6 +1549,9 @@ void gen9_fill_buffer_surface_state(void *state, enum isl_format format, void anv_image_view_fill_image_param(struct anv_device *device, struct anv_image_view *view, struct brw_image_param *param); +void anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param); struct anv_sampler { uint32_t state[4]; -- cgit v1.2.3 From f46544dea13b7c16ce2805d947d205477ac1d465 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 16:59:59 -0800 Subject: anv: Fix CUBE storage images --- src/vulkan/gen7_state.c | 10 +++++++--- src/vulkan/gen8_state.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index f58283f7d05..c6de40d3b9a 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -33,7 +33,8 @@ #include "gen75_pack.h" static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type) +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) { switch (view_type) { default: @@ -45,7 +46,7 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type) case VK_IMAGE_VIEW_TYPE_CUBE: case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_CUBE; + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; case VK_IMAGE_VIEW_TYPE_2D: case VK_IMAGE_VIEW_TYPE_2D_ARRAY: assert(image->type == VK_IMAGE_TYPE_2D); @@ -266,7 +267,7 @@ genX(image_view_init)(struct anv_image_view *iview, isl_surf_get_image_alignment_sa(&surface->isl); struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType), + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, .SurfaceFormat = format->surface_format, .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], @@ -360,6 +361,9 @@ genX(image_view_init)(struct anv_image_view *iview, if (image->needs_storage_surface_state) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + surface_state.SurfaceType = + anv_surftype(image, pCreateInfo->viewType, true), + surface_state.SurfaceFormat = isl_lower_storage_image_format(&device->isl_dev, format->surface_format); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index eee2515e220..fe9f088d9ca 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -33,7 +33,8 @@ #include "gen9_pack.h" static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type) +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) { switch (view_type) { default: @@ -45,7 +46,7 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type) case VK_IMAGE_VIEW_TYPE_CUBE: case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_CUBE; + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; case VK_IMAGE_VIEW_TYPE_2D: case VK_IMAGE_VIEW_TYPE_2D_ARRAY: assert(image->type == VK_IMAGE_TYPE_2D); @@ -246,7 +247,7 @@ genX(image_view_init)(struct anv_image_view *iview, get_halign_valign(&surface->isl, &halign, &valign); struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType), + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, .SurfaceFormat = format_info->surface_format, .SurfaceVerticalAlignment = valign, @@ -335,6 +336,9 @@ genX(image_view_init)(struct anv_image_view *iview, iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + surface_state.SurfaceType = + anv_surftype(image, pCreateInfo->viewType, true), + surface_state.SurfaceFormat = isl_lower_storage_image_format(&device->isl_dev, format_info->surface_format); -- cgit v1.2.3 From a3c5c339a8dcda213f8e3f6a56b44080b4c4eda7 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 14 Dec 2015 18:12:57 -0800 Subject: nir/spirv_to_nir: Use a minimum of 1 for GS invocations glslang is giving us 0, which causes the SIMD8 GS compile to hit an assert. Signed-off-by: Jordan Justen --- src/glsl/nir/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index e101e1edcb0..a8f3016e050 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -3075,7 +3075,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvExecutionModeInvocations: assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.invocations = w[3]; + b->shader->info.gs.invocations = MAX2(1, w[3]); break; case SpvExecutionModeDepthReplacing: -- cgit v1.2.3 From 7edcc59a7bd94280f517b161a860e06197c98b4c Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 14 Dec 2015 18:16:38 -0800 Subject: anv: Rename gs_vec4 to gs_kernel The code generated may be vec4 or simd8 depending on how we start the compiler. To run the GS in SIMD8, set the INTEL_SCALAR_GS environment variable. This was added in: commit 36fd65381756ed1b8f774f7fcdd555941a3d39e1 Author: Kenneth Graunke Date: Wed Mar 11 23:14:31 2015 -0700 i965: Add scalar geometry shader support. Signed-off-by: Jordan Justen --- src/vulkan/anv_pipeline.c | 4 ++-- src/vulkan/anv_private.h | 2 +- src/vulkan/gen7_pipeline.c | 4 ++-- src/vulkan/gen8_pipeline.c | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 05d84feba68..bf243cdb6b4 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -513,7 +513,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ - pipeline->gs_vec4 = + pipeline->gs_kernel = anv_pipeline_upload_kernel(pipeline, shader_code, code_size); pipeline->gs_vertex_count = nir->info.gs.vertices_in; @@ -966,7 +966,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; pipeline->total_scratch = 0; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8aaa2811fc7..b763f701570 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1272,7 +1272,7 @@ struct anv_pipeline { uint32_t ps_ksp2; uint32_t ps_grf_start0; uint32_t ps_grf_start2; - uint32_t gs_vec4; + uint32_t gs_kernel; uint32_t gs_vertex_count; uint32_t cs_simd; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 8262956ef07..e3bfc708deb 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -477,14 +477,14 @@ genX(graphics_pipeline_create)( const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - if (pipeline->gs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) { + if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); } else { urb_offset = 1; urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), - .KernelStartPointer = pipeline->gs_vec4, + .KernelStartPointer = pipeline->gs_kernel, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 07a43bdbe55..d3b307ca5f7 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -436,12 +436,12 @@ genX(graphics_pipeline_create)( offset = 1; length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - if (pipeline->gs_vec4 == NO_KERNEL) + if (pipeline->gs_kernel == NO_KERNEL) anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); else anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .SingleProgramFlow = false, - .KernelStartPointer = pipeline->gs_vec4, + .KernelStartPointer = pipeline->gs_kernel, .VectorMaskEnable = Dmask, .SamplerCount = 0, .BindingTableEntryCount = 0, @@ -533,7 +533,7 @@ genX(graphics_pipeline_create)( * shared with other gens. */ const struct brw_vue_map *fs_input_map; - if (pipeline->gs_vec4 == NO_KERNEL) + if (pipeline->gs_kernel == NO_KERNEL) fs_input_map = &vue_prog_data->vue_map; else fs_input_map = &gs_prog_data->base.vue_map; @@ -700,7 +700,7 @@ VkResult genX(compute_pipeline_create)( pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; pipeline->total_scratch = 0; -- cgit v1.2.3 From 8224571ef80e58d0dd1f00f6ee91960ddca799fa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 21:10:26 -0800 Subject: vec4/generator: Actually pass the sampler into generate_tex This is an artifact of the way the separate samplers/textures series ended up getting sent out and rebased. This should fix a number of CTS tests involving geometry shaders. --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 205a86d597a..3299843bfa9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1341,7 +1341,7 @@ generate_code(struct brw_codegen *p, case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(p, prog_data, inst, dst, src[0], src[1], src[1]); + generate_tex(p, prog_data, inst, dst, src[0], src[1], src[2]); break; case VS_OPCODE_URB_WRITE: -- cgit v1.2.3 From f7e36f9f66248e94cc4247e2459d75f6cd32bc46 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Dec 2015 10:28:18 -0800 Subject: isl: Move it a standalone directory The plan all along was to eventualyl move isl out of the Vulkan directory, because I intended i965 and anvil to share it. A small problem I encountered when attempting to write unit tests for isl precipitated the move. I discovered that it's easier to get isl unit tests to build if I remove the extra, unneeded dependencies injected by src/vulkan/Makefile.am. And the easiest way to remove those unneeded dependencies is to move isl out of src/vulkan. (Unit tests come in subsequent commits). --- configure.ac | 1 + src/Makefile.am | 1 + src/isl/.gitignore | 1 + src/isl/Makefile.am | 67 +++ src/isl/isl.c | 1046 +++++++++++++++++++++++++++++++++ src/isl/isl.h | 917 +++++++++++++++++++++++++++++ src/isl/isl_format_layout.csv | 287 +++++++++ src/isl/isl_format_layout_gen.bash | 128 ++++ src/isl/isl_gen4.c | 74 +++ src/isl/isl_gen4.h | 47 ++ src/isl/isl_gen6.c | 160 +++++ src/isl/isl_gen6.h | 47 ++ src/isl/isl_gen7.c | 392 ++++++++++++ src/isl/isl_gen7.h | 52 ++ src/isl/isl_gen8.c | 229 ++++++++ src/isl/isl_gen8.h | 47 ++ src/isl/isl_gen9.c | 185 ++++++ src/isl/isl_gen9.h | 41 ++ src/isl/isl_image.c | 137 +++++ src/isl/isl_priv.h | 141 +++++ src/vulkan/.gitignore | 1 - src/vulkan/Makefile.am | 32 +- src/vulkan/isl.c | 1046 --------------------------------- src/vulkan/isl.h | 917 ----------------------------- src/vulkan/isl_format_layout.csv | 287 --------- src/vulkan/isl_format_layout_gen.bash | 128 ---- src/vulkan/isl_gen4.c | 74 --- src/vulkan/isl_gen4.h | 47 -- src/vulkan/isl_gen6.c | 160 ----- src/vulkan/isl_gen6.h | 47 -- src/vulkan/isl_gen7.c | 392 ------------ src/vulkan/isl_gen7.h | 52 -- src/vulkan/isl_gen8.c | 229 -------- src/vulkan/isl_gen8.h | 47 -- src/vulkan/isl_gen9.c | 185 ------ src/vulkan/isl_gen9.h | 41 -- src/vulkan/isl_image.c | 137 ----- src/vulkan/isl_priv.h | 141 ----- 38 files changed, 4005 insertions(+), 3958 deletions(-) create mode 100644 src/isl/.gitignore create mode 100644 src/isl/Makefile.am create mode 100644 src/isl/isl.c create mode 100644 src/isl/isl.h create mode 100644 src/isl/isl_format_layout.csv create mode 100755 src/isl/isl_format_layout_gen.bash create mode 100644 src/isl/isl_gen4.c create mode 100644 src/isl/isl_gen4.h create mode 100644 src/isl/isl_gen6.c create mode 100644 src/isl/isl_gen6.h create mode 100644 src/isl/isl_gen7.c create mode 100644 src/isl/isl_gen7.h create mode 100644 src/isl/isl_gen8.c create mode 100644 src/isl/isl_gen8.h create mode 100644 src/isl/isl_gen9.c create mode 100644 src/isl/isl_gen9.h create mode 100644 src/isl/isl_image.c create mode 100644 src/isl/isl_priv.h delete mode 100644 src/vulkan/isl.c delete mode 100644 src/vulkan/isl.h delete mode 100644 src/vulkan/isl_format_layout.csv delete mode 100755 src/vulkan/isl_format_layout_gen.bash delete mode 100644 src/vulkan/isl_gen4.c delete mode 100644 src/vulkan/isl_gen4.h delete mode 100644 src/vulkan/isl_gen6.c delete mode 100644 src/vulkan/isl_gen6.h delete mode 100644 src/vulkan/isl_gen7.c delete mode 100644 src/vulkan/isl_gen7.h delete mode 100644 src/vulkan/isl_gen8.c delete mode 100644 src/vulkan/isl_gen8.h delete mode 100644 src/vulkan/isl_gen9.c delete mode 100644 src/vulkan/isl_gen9.h delete mode 100644 src/vulkan/isl_image.c delete mode 100644 src/vulkan/isl_priv.h (limited to 'src') diff --git a/configure.ac b/configure.ac index c26dbf2e126..6cadd040194 100644 --- a/configure.ac +++ b/configure.ac @@ -2437,6 +2437,7 @@ AC_CONFIG_FILES([Makefile src/glx/apple/Makefile src/glx/tests/Makefile src/gtest/Makefile + src/isl/Makefile src/loader/Makefile src/mapi/Makefile src/mapi/es1api/glesv1_cm.pc diff --git a/src/Makefile.am b/src/Makefile.am index da638a811fb..9f51e444dd0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -54,6 +54,7 @@ AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) if HAVE_VULKAN +SUBDIRS += isl SUBDIRS += vulkan endif diff --git a/src/isl/.gitignore b/src/isl/.gitignore new file mode 100644 index 00000000000..e9cfd67b94e --- /dev/null +++ b/src/isl/.gitignore @@ -0,0 +1 @@ +/isl_format_layout.c diff --git a/src/isl/Makefile.am b/src/isl/Makefile.am new file mode 100644 index 00000000000..6a5c29c67cb --- /dev/null +++ b/src/isl/Makefile.am @@ -0,0 +1,67 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . + +noinst_LTLIBRARIES = libisl.la + +# The gallium includes are for the util/u_math.h include from main/macros.h +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src + +libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init + +libisl_la_SOURCES = \ + isl.c \ + isl_format_layout.c \ + isl_gen4.c \ + isl_gen4.h \ + isl_gen6.c \ + isl_gen6.h \ + isl_gen7.c \ + isl_gen7.h \ + isl_gen8.c \ + isl_gen8.h \ + isl_gen9.c \ + isl_gen9.h \ + isl_image.c \ + $(NULL) + +BUILT_SOURCES = \ + isl_format_layout.c + +isl_format_layout.c: isl_format_layout_gen.bash \ + isl_format_layout.csv + $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ + <$(srcdir)/isl_format_layout.csv >$@ + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/isl/isl.c b/src/isl/isl.c new file mode 100644 index 00000000000..d858ea74745 --- /dev/null +++ b/src/isl/isl.c @@ -0,0 +1,1046 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "isl.h" +#include "isl_gen4.h" +#include "isl_gen6.h" +#include "isl_gen7.h" +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...) +{ + va_list ap; + char buf[512]; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); +} + +void +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info) +{ + dev->info = info; + dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; + + /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some + * device properties at buildtime. Verify that the macros with the device + * properties chosen during runtime. + */ + assert(ISL_DEV_GEN(dev) == dev->info->gen); + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil); + + /* Did we break hiz or stencil? */ + if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) + assert(info->has_hiz_and_separate_stencil); + if (info->must_use_separate_stencil) + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); +} + +bool +isl_format_has_sint_channel(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT || + fmtl->channels.b.type == ISL_SINT || + fmtl->channels.a.type == ISL_SINT || + fmtl->channels.l.type == ISL_SINT || + fmtl->channels.i.type == ISL_SINT || + fmtl->channels.p.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT; +} + +/** + * @param[out] info is written only on success + */ +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *tile_info) +{ + const uint32_t bs = format_block_size; + uint32_t width, height; + + assert(bs > 0); + + switch (tiling) { + case ISL_TILING_LINEAR: + width = 1; + height = 1; + break; + + case ISL_TILING_X: + width = 1 << 9; + height = 1 << 3; + break; + + case ISL_TILING_Y0: + width = 1 << 7; + height = 1 << 5; + break; + + case ISL_TILING_W: + /* XXX: Should W tile be same as Y? */ + width = 1 << 6; + height = 1 << 6; + break; + + case ISL_TILING_Yf: + case ISL_TILING_Ys: { + if (ISL_DEV_GEN(dev) < 9) + return false; + + if (!isl_is_pow2(bs)) + return false; + + bool is_Ys = tiling == ISL_TILING_Ys; + + width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); + height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); + break; + } + } /* end switch */ + + *tile_info = (struct isl_tile_info) { + .tiling = tiling, + .width = width, + .height = height, + .size = width * height, + }; + + return true; +} + +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_extent2d *e) +{ + struct isl_tile_info tile_info; + isl_tiling_get_info(dev, tiling, format_block_size, &tile_info); + *e = isl_extent2d(tile_info.width, tile_info.height); +} + +/** + * @param[out] tiling is set only on success + */ +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling) +{ + isl_tiling_flags_t tiling_flags = info->tiling_flags; + + if (ISL_DEV_GEN(dev) >= 7) { + gen7_filter_tiling(dev, info, &tiling_flags); + } else { + isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); + gen7_filter_tiling(dev, info, &tiling_flags); + } + + #define CHOOSE(__tiling) \ + do { \ + if (tiling_flags & (1u << (__tiling))) { \ + *tiling = (__tiling); \ + return true; \ + } \ + } while (0) + + /* Of the tiling modes remaining, choose the one that offers the best + * performance. + */ + CHOOSE(ISL_TILING_Ys); + CHOOSE(ISL_TILING_Yf); + CHOOSE(ISL_TILING_Y0); + CHOOSE(ISL_TILING_X); + CHOOSE(ISL_TILING_W); + CHOOSE(ISL_TILING_LINEAR); + + #undef CHOOSE + + /* No tiling mode accomodates the inputs. */ + return false; +} + +static bool +isl_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + if (ISL_DEV_GEN(dev) >= 8) { + return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 7) { + return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 6) { + return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else { + return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); + } +} + +static void +isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, + uint32_t *width, uint32_t *height) +{ + assert(isl_is_pow2(samples)); + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil surface + * or Multisampled Surface StorageFormat in SURFACE_STATE is + * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before + * proceeding: [...] + */ + *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); + *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); +} + +static enum isl_array_pitch_span +isl_choose_array_pitch_span(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + const struct isl_extent4d *phys_level0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); + /* fallthrough */ + + case ISL_DIM_LAYOUT_GEN4_2D: + if (ISL_DEV_GEN(dev) >= 8) { + /* QPitch becomes programmable in Broadwell. So choose the + * most compact QPitch possible in order to conserve memory. + * + * From the Broadwell PRM >> Volume 2d: Command Reference: Structures + * >> RENDER_SURFACE_STATE Surface QPitch (p325): + * + * - Software must ensure that this field is set to a value + * sufficiently large such that the array slices in the surface + * do not overlap. Refer to the Memory Data Formats section for + * information on how surfaces are stored in memory. + * + * - This field specifies the distance in rows between array + * slices. It is used only in the following cases: + * + * - Surface Array is enabled OR + * - Number of Mulitsamples is not NUMSAMPLES_1 and + * Multisampled Surface Storage Format set to MSFMT_MSS OR + * - Surface Type is SURFTYPE_CUBE + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else if (ISL_DEV_GEN(dev) >= 7) { + /* Note that Ivybridge introduces + * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the + * driver more control over the QPitch. + */ + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) { + /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> + * Section 6.18.4.7: Surface Arrays (p112): + * + * If Surface Array Spacing is set to ARYSPC_FULL (note that + * the depth buffer and stencil buffer have an implied value of + * ARYSPC_FULL): + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (info->levels == 1) { + /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing + * to ARYSPC_LOD0. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, thus + * the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else { + if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, + * thus the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } + + case ISL_DIM_LAYOUT_GEN4_3D: + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + unreachable("bad isl_dim_layout"); + return ISL_ARRAY_PITCH_SPAN_FULL; +} + +static void +isl_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + if (ISL_DEV_GEN(dev) >= 9) { + gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 8) { + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 7) { + gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 6) { + gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else { + gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } +} + +static enum isl_dim_layout +isl_surf_choose_dim_layout(const struct isl_device *dev, + enum isl_surf_dim logical_dim) +{ + if (ISL_DEV_GEN(dev) >= 9) { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + return ISL_DIM_LAYOUT_GEN9_1D; + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_2D; + } + } else { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + case ISL_SURF_DIM_2D: + return ISL_DIM_LAYOUT_GEN4_2D; + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_3D; + } + } + + unreachable("bad isl_surf_dim"); + return ISL_DIM_LAYOUT_GEN4_2D; +} + +/** + * Calculate the physical extent of the surface's first level, in units of + * surface samples. + */ +static void +isl_calc_phys_level0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent4d *phys_level0_sa) +{ + if (isl_format_is_yuv(info->format)) + isl_finishme("%s:%s: YUV format", __FILE__, __func__); + + switch (info->dim) { + case ISL_SURF_DIM_1D: + assert(info->height == 1); + assert(info->depth == 1); + assert(info->samples == 1); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN4_3D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); + /* fallthrough */ + + case ISL_DIM_LAYOUT_GEN4_2D: + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = 1, + .d = 1, + .a = info->array_len, + }; + break; + } + break; + + case ISL_SURF_DIM_2D: + assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); + + if (tiling == ISL_TILING_Ys && info->samples > 1) + isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + assert(info->depth == 1); + assert(info->samples == 1); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->array_len, + }; + break; + + case ISL_MSAA_LAYOUT_ARRAY: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->samples, + }; + break; + + case ISL_MSAA_LAYOUT_INTERLEAVED: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = 1, + }; + + isl_msaa_interleaved_scale_px_to_sa(info->samples, + &phys_level0_sa->w, + &phys_level0_sa->h); + break; + } + break; + + case ISL_SURF_DIM_3D: + assert(info->array_len == 1); + assert(info->samples == 1); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN4_2D: + assert(ISL_DEV_GEN(dev) >= 9); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->depth, + }; + break; + + case ISL_DIM_LAYOUT_GEN4_3D: + assert(ISL_DEV_GEN(dev) < 9); + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = info->depth, + .a = 1, + }; + break; + } + break; + } +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_2d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + assert(phys_level0_sa->depth == 1); + + uint32_t slice_top_w = 0; + uint32_t slice_bottom_w = 0; + uint32_t slice_left_h = 0; + uint32_t slice_right_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t H = isl_minify(H0, l); + + if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil + * surface or Multisampled Surface StorageFormat in + * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be + * adjusted as follows before proceeding: [...] + */ + isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); + } + + uint32_t w = isl_align_npot(W, image_align_sa->w); + uint32_t h = isl_align_npot(H, image_align_sa->h); + + if (l == 0) { + slice_top_w = w; + slice_left_h = h; + slice_right_h = h; + } else if (l == 1) { + slice_bottom_w = w; + slice_left_h += h; + } else if (l == 2) { + slice_bottom_w += w; + } else { + slice_right_h += h; + } + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = MAX(slice_top_w, slice_bottom_w), + .h = MAX(slice_left_h, slice_right_h), + }; +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_3d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + assert(info->samples == 1); + assert(phys_level0_sa->array_len == 1); + + uint32_t slice_w = 0; + uint32_t slice_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + uint32_t D0 = phys_level0_sa->d; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); + uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); + uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); + + uint32_t max_layers_horiz = MIN(level_d, 1u << l); + uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + slice_w = MAX(slice_w, level_w * max_layers_horiz); + slice_h += level_h * max_layers_vert; + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = slice_w, + .h = slice_h, + }; +} + +/** + * Calculate the physical extent of the surface's first array slice, in units + * of surface samples. The result is aligned to \a image_align_sa. + */ +static void +isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", + __FILE__, __func__); + /*fallthrough*/ + case ISL_DIM_LAYOUT_GEN4_2D: + isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, + image_align_sa, phys_level0_sa, + phys_slice0_sa); + return; + case ISL_DIM_LAYOUT_GEN4_3D: + isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, + phys_level0_sa, phys_slice0_sa); + return; + } +} + +/** + * Calculate the pitch between physical array slices, in units of rows of + * surface samples. The result is aligned to \a image_align_sa. + */ +static uint32_t +isl_calc_array_pitch_sa_rows(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_array_pitch_span array_pitch_span, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + if (ISL_DEV_GEN(dev) >= 9) + isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", + __FILE__, __func__); + /*fallthrough*/ + + case ISL_DIM_LAYOUT_GEN4_2D: + switch (array_pitch_span) { + case ISL_ARRAY_PITCH_SPAN_COMPACT: + return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + case ISL_ARRAY_PITCH_SPAN_FULL: { + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: + * Memory Views >> Common Surface Formats >> Surface Layout >> 2D + * Surfaces >> Surface Arrays. + */ + uint32_t H0_sa = phys_level0_sa->h; + uint32_t H1_sa = isl_minify(H0_sa, 1); + + uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); + uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); + + uint32_t m; + if (ISL_DEV_GEN(dev) >= 7) { + /* The QPitch equation changed slightly in Ivybridge. */ + m = 12; + } else { + m = 11; + } + + uint32_t pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); + + if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && + (info->height % 4 == 1)) { + /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than + * the value calculated in the equation above , for every + * other odd Surface Height starting from 1 i.e. 1,5,9,13. + * + * XXX(chadv): Is the errata natural corollary of the physical + * layout of interleaved samples? + */ + pitch_sa_rows += 4; + } + + pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); + + return pitch_sa_rows; + } /* end case */ + break; + } + break; + + case ISL_DIM_LAYOUT_GEN4_3D: + assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); + return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + } + + unreachable("bad isl_dim_layout"); + return 0; +} + +/** + * Calculate the pitch of each surface row, in bytes. + */ +static uint32_t +isl_calc_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t row_pitch = info->min_pitch; + + /* First, align the surface to a cache line boundary, as the PRM explains + * below. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In order + * to avoid these GTT errors, “padding” at the bottom of the surface is + * sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * The sampling engine accesses texels outside of the surface if they + * are contained in the same cache line as texels that are within the + * surface. These texels will not participate in any calculation + * performed by the sampling engine and will not affect the result of + * any sampling engine operation, however if these texels lie outside of + * defined pages in the GTT, a GTT error will result when the cache line + * is accessed. In order to avoid these GTT errors, “padding” at the + * bottom and right side of a sampling engine surface is sometimes + * necessary. + * + * It is possible that a cache line will straddle a page boundary if the + * base address or pitch is not aligned. All pages included in the cache + * lines that are part of the surface must map to valid GTT entries to + * avoid errors. To determine the necessary padding on the bottom and + * right side of the surface, refer to the table in Alignment Unit Size + * section for the i and j parameters for the surface format in use. The + * surface must then be extended to the next multiple of the alignment + * unit size in each dimension, and all texels contained in this + * extended surface must have valid GTT entries. + * + * For example, suppose the surface size is 15 texels by 10 texels and + * the alignment parameters are i=4 and j=2. In this case, the extended + * surface would be 16 by 10. Note that these calculations are done in + * texels, and must be converted to bytes based on the surface format + * being used to determine whether additional pages need to be defined. + */ + row_pitch = MAX(row_pitch, + fmtl->bs * isl_align_div_npot(phys_slice0_sa->w, fmtl->bw)); + + switch (tile_info->tiling) { + case ISL_TILING_LINEAR: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For linear render target surfaces and surfaces accessed with the + * typed data port messages, the pitch must be a multiple of the + * element size for non-YUV surface formats. Pitch must be + * a multiple of 2 * element size for YUV surface formats. + * + * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we + * ignore because isl doesn't do buffers.] + * + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + if (isl_format_is_yuv(info->format)) { + row_pitch = isl_align(row_pitch, fmtl->bs); + } else { + row_pitch = isl_align(row_pitch, 2 * fmtl->bs); + } + } + break; + default: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For tiled surfaces, the pitch must be a multiple of the tile + * width. + */ + row_pitch = isl_align(row_pitch, tile_info->width); + break; + } + + return row_pitch; +} + +/** + * Calculate the surface's total height, including padding, in units of + * surface elements. + */ +static uint32_t +isl_calc_total_height_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + uint32_t phys_array_len, + uint32_t row_pitch, + uint32_t array_pitch_el_rows) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t total_h_el = phys_array_len * array_pitch_el_rows; + uint32_t pad_bytes = 0; + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In + * order to avoid these GTT errors, “padding” at the bottom of the + * surface is sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * ... Lots of padding requirements, all listed separately below. + */ + + /* We can safely ignore the first padding requirement, quoted below, + * because isl doesn't do buffers. + * + * - [pre-BDW] For buffers, which have no inherent “height,” padding + * requirements are different. A buffer must be padded to the next + * multiple of 256 array elements, with an additional 16 bytes added + * beyond that to account for the L1 cache line. + */ + + /* + * - For compressed textures [...], padding at the bottom of the surface + * is to an even compressed row. + */ + if (isl_format_is_compressed(info->format)) + total_h_el = isl_align(total_h_el, 2); + + /* + * - For cube surfaces, an additional two rows of padding are required + * at the bottom of the surface. + */ + if (info->usage & ISL_SURF_USAGE_CUBE_BIT) + total_h_el += 2; + + /* + * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, + * additional padding is required. These surfaces require an extra row + * plus 16 bytes of padding at the bottom in addition to the general + * padding requirements. + */ + if (isl_format_is_yuv(info->format) && + (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) { + total_h_el += 1; + pad_bytes += 16; + } + + /* + * - For linear surfaces, additional padding of 64 bytes is required at + * the bottom of the surface. This is in addition to the padding + * required above. + */ + if (tile_info->tiling == ISL_TILING_LINEAR) + pad_bytes += 64; + + /* The below text weakens, not strengthens, the padding requirements for + * linear surfaces. Therefore we can safely ignore it. + * + * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, + * non-MSAA, non-mip-mapped surfaces in linear memory, the only + * padding requirement is to the next aligned 64-byte boundary beyond + * the end of the surface. The rest of the padding requirements + * documented above do not apply to these surfaces. + */ + + /* + * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and + * height % 4 != 0, the surface must be padded with + * 4-(height % 4)*Surface Pitch # of bytes. + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { + total_h_el = isl_align(total_h_el, 4); + } + + /* + * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded + * to 4 times the Surface Pitch # of bytes + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + info->dim == ISL_SURF_DIM_1D) { + total_h_el += 4; + } + + /* Be sloppy. Align any leftover padding to a row boundary. */ + total_h_el += isl_align_div_npot(pad_bytes, row_pitch); + + return total_h_el; +} + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + const struct isl_extent4d logical_level0_px = { + .w = info->width, + .h = info->height, + .d = info->depth, + .a = info->array_len, + }; + + enum isl_dim_layout dim_layout = + isl_surf_choose_dim_layout(dev, info->dim); + + enum isl_tiling tiling; + if (!isl_surf_choose_tiling(dev, info, &tiling)) + return false; + + struct isl_tile_info tile_info; + if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info)) + return false; + + enum isl_msaa_layout msaa_layout; + if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) + return false; + + struct isl_extent3d image_align_el; + isl_choose_image_alignment_el(dev, info, tiling, msaa_layout, + &image_align_el); + + struct isl_extent3d image_align_sa = + isl_extent3d_el_to_sa(info->format, image_align_el); + + struct isl_extent4d phys_level0_sa; + isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, + &phys_level0_sa); + + enum isl_array_pitch_span array_pitch_span = + isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); + + struct isl_extent2d phys_slice0_sa; + isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, + &image_align_sa, &phys_level0_sa, + &phys_slice0_sa); + assert(phys_slice0_sa.w % fmtl->bw == 0); + assert(phys_slice0_sa.h % fmtl->bh == 0); + + const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, + &image_align_sa, + &phys_slice0_sa); + + const uint32_t array_pitch_sa_rows = + isl_calc_array_pitch_sa_rows(dev, info, dim_layout, array_pitch_span, + &image_align_sa, &phys_level0_sa, + &phys_slice0_sa); + assert(array_pitch_sa_rows % fmtl->bh == 0); + + const uint32_t array_pitch_el_rows = array_pitch_sa_rows / fmtl->bh; + + const uint32_t total_h_el = + isl_calc_total_height_el(dev, info, &tile_info, + phys_level0_sa.array_len, row_pitch, + array_pitch_el_rows); + + const uint32_t total_h_sa = total_h_el * fmtl->bh; + const uint32_t size = row_pitch * total_h_sa; + + /* Alignment of surface base address, in bytes */ + uint32_t base_alignment = info->min_alignment; + base_alignment = isl_align(base_alignment, tile_info.size); + + *surf = (struct isl_surf) { + .dim = info->dim, + .dim_layout = dim_layout, + .msaa_layout = msaa_layout, + .tiling = tiling, + .format = info->format, + + .levels = info->levels, + .samples = info->samples, + + .image_alignment_el = image_align_el, + .logical_level0_px = logical_level0_px, + .phys_level0_sa = phys_level0_sa, + + .size = size, + .alignment = base_alignment, + .row_pitch = row_pitch, + .array_pitch_el_rows = array_pitch_el_rows, + .array_pitch_span = array_pitch_span, + + .usage = info->usage, + }; + + return true; +} diff --git a/src/isl/isl.h b/src/isl/isl.h new file mode 100644 index 00000000000..184b0c5f70a --- /dev/null +++ b/src/isl/isl.h @@ -0,0 +1,917 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file + * @brief Intel Surface Layout + * + * Header Layout + * ============= + * + * The header is ordered as: + * - forward declarations + * - macros that may be overridden at compile-time for specific gens + * - enums and constants + * - structs and unions + * - functions + * + * + * Surface Units + * ============= + * + * Some symbol names have a unit suffix. + * + * - px: logical pixels + * - sa: physical surface samples + * - el: physical surface elements + * - sa_rows: rows of physical surface samples + * - el_rows: rows of physical surface elements + * + * The Broadwell PRM [1] defines a surface element as follows: + * + * An element is defined as a pixel in uncompresed surface formats, and as + * a compression block in compressed surface formats. For + * MSFMT_DEPTH_STENCIL type multisampled surfaces, an element is a sample. + * + * [1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Vertical Alignment (p325) + */ + +#pragma once + +#include +#include +#include + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct brw_device_info; +struct brw_image_param; + +#ifndef ISL_DEV_GEN +/** + * @brief Get the hardware generation of isl_device. + * + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_GEN(dev)=9 ...`. + */ +#define ISL_DEV_GEN(__dev) ((__dev)->info->gen) +#endif + +#ifndef ISL_DEV_USE_SEPARATE_STENCIL +/** + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_USE_SEPARATE_STENCIL(dev)=1 ...`. + */ +#define ISL_DEV_USE_SEPARATE_STENCIL(__dev) ((__dev)->use_separate_stencil) +#endif + +/** + * Hardware enumeration SURFACE_FORMAT. + * + * For the official list, see Broadwell PRM: Volume 2b: Command Reference: + * Enumerations: SURFACE_FORMAT. + */ +enum isl_format { + ISL_FORMAT_R32G32B32A32_FLOAT = 0, + ISL_FORMAT_R32G32B32A32_SINT = 1, + ISL_FORMAT_R32G32B32A32_UINT = 2, + ISL_FORMAT_R32G32B32A32_UNORM = 3, + ISL_FORMAT_R32G32B32A32_SNORM = 4, + ISL_FORMAT_R64G64_FLOAT = 5, + ISL_FORMAT_R32G32B32X32_FLOAT = 6, + ISL_FORMAT_R32G32B32A32_SSCALED = 7, + ISL_FORMAT_R32G32B32A32_USCALED = 8, + ISL_FORMAT_R32G32B32A32_SFIXED = 32, + ISL_FORMAT_R64G64_PASSTHRU = 33, + ISL_FORMAT_R32G32B32_FLOAT = 64, + ISL_FORMAT_R32G32B32_SINT = 65, + ISL_FORMAT_R32G32B32_UINT = 66, + ISL_FORMAT_R32G32B32_UNORM = 67, + ISL_FORMAT_R32G32B32_SNORM = 68, + ISL_FORMAT_R32G32B32_SSCALED = 69, + ISL_FORMAT_R32G32B32_USCALED = 70, + ISL_FORMAT_R32G32B32_SFIXED = 80, + ISL_FORMAT_R16G16B16A16_UNORM = 128, + ISL_FORMAT_R16G16B16A16_SNORM = 129, + ISL_FORMAT_R16G16B16A16_SINT = 130, + ISL_FORMAT_R16G16B16A16_UINT = 131, + ISL_FORMAT_R16G16B16A16_FLOAT = 132, + ISL_FORMAT_R32G32_FLOAT = 133, + ISL_FORMAT_R32G32_SINT = 134, + ISL_FORMAT_R32G32_UINT = 135, + ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS = 136, + ISL_FORMAT_X32_TYPELESS_G8X24_UINT = 137, + ISL_FORMAT_L32A32_FLOAT = 138, + ISL_FORMAT_R32G32_UNORM = 139, + ISL_FORMAT_R32G32_SNORM = 140, + ISL_FORMAT_R64_FLOAT = 141, + ISL_FORMAT_R16G16B16X16_UNORM = 142, + ISL_FORMAT_R16G16B16X16_FLOAT = 143, + ISL_FORMAT_A32X32_FLOAT = 144, + ISL_FORMAT_L32X32_FLOAT = 145, + ISL_FORMAT_I32X32_FLOAT = 146, + ISL_FORMAT_R16G16B16A16_SSCALED = 147, + ISL_FORMAT_R16G16B16A16_USCALED = 148, + ISL_FORMAT_R32G32_SSCALED = 149, + ISL_FORMAT_R32G32_USCALED = 150, + ISL_FORMAT_R32G32_SFIXED = 160, + ISL_FORMAT_R64_PASSTHRU = 161, + ISL_FORMAT_B8G8R8A8_UNORM = 192, + ISL_FORMAT_B8G8R8A8_UNORM_SRGB = 193, + ISL_FORMAT_R10G10B10A2_UNORM = 194, + ISL_FORMAT_R10G10B10A2_UNORM_SRGB = 195, + ISL_FORMAT_R10G10B10A2_UINT = 196, + ISL_FORMAT_R10G10B10_SNORM_A2_UNORM = 197, + ISL_FORMAT_R8G8B8A8_UNORM = 199, + ISL_FORMAT_R8G8B8A8_UNORM_SRGB = 200, + ISL_FORMAT_R8G8B8A8_SNORM = 201, + ISL_FORMAT_R8G8B8A8_SINT = 202, + ISL_FORMAT_R8G8B8A8_UINT = 203, + ISL_FORMAT_R16G16_UNORM = 204, + ISL_FORMAT_R16G16_SNORM = 205, + ISL_FORMAT_R16G16_SINT = 206, + ISL_FORMAT_R16G16_UINT = 207, + ISL_FORMAT_R16G16_FLOAT = 208, + ISL_FORMAT_B10G10R10A2_UNORM = 209, + ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, + ISL_FORMAT_R11G11B10_FLOAT = 211, + ISL_FORMAT_R32_SINT = 214, + ISL_FORMAT_R32_UINT = 215, + ISL_FORMAT_R32_FLOAT = 216, + ISL_FORMAT_R24_UNORM_X8_TYPELESS = 217, + ISL_FORMAT_X24_TYPELESS_G8_UINT = 218, + ISL_FORMAT_L32_UNORM = 221, + ISL_FORMAT_A32_UNORM = 222, + ISL_FORMAT_L16A16_UNORM = 223, + ISL_FORMAT_I24X8_UNORM = 224, + ISL_FORMAT_L24X8_UNORM = 225, + ISL_FORMAT_A24X8_UNORM = 226, + ISL_FORMAT_I32_FLOAT = 227, + ISL_FORMAT_L32_FLOAT = 228, + ISL_FORMAT_A32_FLOAT = 229, + ISL_FORMAT_X8B8_UNORM_G8R8_SNORM = 230, + ISL_FORMAT_A8X8_UNORM_G8R8_SNORM = 231, + ISL_FORMAT_B8X8_UNORM_G8R8_SNORM = 232, + ISL_FORMAT_B8G8R8X8_UNORM = 233, + ISL_FORMAT_B8G8R8X8_UNORM_SRGB = 234, + ISL_FORMAT_R8G8B8X8_UNORM = 235, + ISL_FORMAT_R8G8B8X8_UNORM_SRGB = 236, + ISL_FORMAT_R9G9B9E5_SHAREDEXP = 237, + ISL_FORMAT_B10G10R10X2_UNORM = 238, + ISL_FORMAT_L16A16_FLOAT = 240, + ISL_FORMAT_R32_UNORM = 241, + ISL_FORMAT_R32_SNORM = 242, + ISL_FORMAT_R10G10B10X2_USCALED = 243, + ISL_FORMAT_R8G8B8A8_SSCALED = 244, + ISL_FORMAT_R8G8B8A8_USCALED = 245, + ISL_FORMAT_R16G16_SSCALED = 246, + ISL_FORMAT_R16G16_USCALED = 247, + ISL_FORMAT_R32_SSCALED = 248, + ISL_FORMAT_R32_USCALED = 249, + ISL_FORMAT_B5G6R5_UNORM = 256, + ISL_FORMAT_B5G6R5_UNORM_SRGB = 257, + ISL_FORMAT_B5G5R5A1_UNORM = 258, + ISL_FORMAT_B5G5R5A1_UNORM_SRGB = 259, + ISL_FORMAT_B4G4R4A4_UNORM = 260, + ISL_FORMAT_B4G4R4A4_UNORM_SRGB = 261, + ISL_FORMAT_R8G8_UNORM = 262, + ISL_FORMAT_R8G8_SNORM = 263, + ISL_FORMAT_R8G8_SINT = 264, + ISL_FORMAT_R8G8_UINT = 265, + ISL_FORMAT_R16_UNORM = 266, + ISL_FORMAT_R16_SNORM = 267, + ISL_FORMAT_R16_SINT = 268, + ISL_FORMAT_R16_UINT = 269, + ISL_FORMAT_R16_FLOAT = 270, + ISL_FORMAT_A8P8_UNORM_PALETTE0 = 271, + ISL_FORMAT_A8P8_UNORM_PALETTE1 = 272, + ISL_FORMAT_I16_UNORM = 273, + ISL_FORMAT_L16_UNORM = 274, + ISL_FORMAT_A16_UNORM = 275, + ISL_FORMAT_L8A8_UNORM = 276, + ISL_FORMAT_I16_FLOAT = 277, + ISL_FORMAT_L16_FLOAT = 278, + ISL_FORMAT_A16_FLOAT = 279, + ISL_FORMAT_L8A8_UNORM_SRGB = 280, + ISL_FORMAT_R5G5_SNORM_B6_UNORM = 281, + ISL_FORMAT_B5G5R5X1_UNORM = 282, + ISL_FORMAT_B5G5R5X1_UNORM_SRGB = 283, + ISL_FORMAT_R8G8_SSCALED = 284, + ISL_FORMAT_R8G8_USCALED = 285, + ISL_FORMAT_R16_SSCALED = 286, + ISL_FORMAT_R16_USCALED = 287, + ISL_FORMAT_P8A8_UNORM_PALETTE0 = 290, + ISL_FORMAT_P8A8_UNORM_PALETTE1 = 291, + ISL_FORMAT_A1B5G5R5_UNORM = 292, + ISL_FORMAT_A4B4G4R4_UNORM = 293, + ISL_FORMAT_L8A8_UINT = 294, + ISL_FORMAT_L8A8_SINT = 295, + ISL_FORMAT_R8_UNORM = 320, + ISL_FORMAT_R8_SNORM = 321, + ISL_FORMAT_R8_SINT = 322, + ISL_FORMAT_R8_UINT = 323, + ISL_FORMAT_A8_UNORM = 324, + ISL_FORMAT_I8_UNORM = 325, + ISL_FORMAT_L8_UNORM = 326, + ISL_FORMAT_P4A4_UNORM_PALETTE0 = 327, + ISL_FORMAT_A4P4_UNORM_PALETTE0 = 328, + ISL_FORMAT_R8_SSCALED = 329, + ISL_FORMAT_R8_USCALED = 330, + ISL_FORMAT_P8_UNORM_PALETTE0 = 331, + ISL_FORMAT_L8_UNORM_SRGB = 332, + ISL_FORMAT_P8_UNORM_PALETTE1 = 333, + ISL_FORMAT_P4A4_UNORM_PALETTE1 = 334, + ISL_FORMAT_A4P4_UNORM_PALETTE1 = 335, + ISL_FORMAT_Y8_UNORM = 336, + ISL_FORMAT_L8_UINT = 338, + ISL_FORMAT_L8_SINT = 339, + ISL_FORMAT_I8_UINT = 340, + ISL_FORMAT_I8_SINT = 341, + ISL_FORMAT_DXT1_RGB_SRGB = 384, + ISL_FORMAT_R1_UNORM = 385, + ISL_FORMAT_YCRCB_NORMAL = 386, + ISL_FORMAT_YCRCB_SWAPUVY = 387, + ISL_FORMAT_P2_UNORM_PALETTE0 = 388, + ISL_FORMAT_P2_UNORM_PALETTE1 = 389, + ISL_FORMAT_BC1_UNORM = 390, + ISL_FORMAT_BC2_UNORM = 391, + ISL_FORMAT_BC3_UNORM = 392, + ISL_FORMAT_BC4_UNORM = 393, + ISL_FORMAT_BC5_UNORM = 394, + ISL_FORMAT_BC1_UNORM_SRGB = 395, + ISL_FORMAT_BC2_UNORM_SRGB = 396, + ISL_FORMAT_BC3_UNORM_SRGB = 397, + ISL_FORMAT_MONO8 = 398, + ISL_FORMAT_YCRCB_SWAPUV = 399, + ISL_FORMAT_YCRCB_SWAPY = 400, + ISL_FORMAT_DXT1_RGB = 401, + ISL_FORMAT_FXT1 = 402, + ISL_FORMAT_R8G8B8_UNORM = 403, + ISL_FORMAT_R8G8B8_SNORM = 404, + ISL_FORMAT_R8G8B8_SSCALED = 405, + ISL_FORMAT_R8G8B8_USCALED = 406, + ISL_FORMAT_R64G64B64A64_FLOAT = 407, + ISL_FORMAT_R64G64B64_FLOAT = 408, + ISL_FORMAT_BC4_SNORM = 409, + ISL_FORMAT_BC5_SNORM = 410, + ISL_FORMAT_R16G16B16_FLOAT = 411, + ISL_FORMAT_R16G16B16_UNORM = 412, + ISL_FORMAT_R16G16B16_SNORM = 413, + ISL_FORMAT_R16G16B16_SSCALED = 414, + ISL_FORMAT_R16G16B16_USCALED = 415, + ISL_FORMAT_BC6H_SF16 = 417, + ISL_FORMAT_BC7_UNORM = 418, + ISL_FORMAT_BC7_UNORM_SRGB = 419, + ISL_FORMAT_BC6H_UF16 = 420, + ISL_FORMAT_PLANAR_420_8 = 421, + ISL_FORMAT_R8G8B8_UNORM_SRGB = 424, + ISL_FORMAT_ETC1_RGB8 = 425, + ISL_FORMAT_ETC2_RGB8 = 426, + ISL_FORMAT_EAC_R11 = 427, + ISL_FORMAT_EAC_RG11 = 428, + ISL_FORMAT_EAC_SIGNED_R11 = 429, + ISL_FORMAT_EAC_SIGNED_RG11 = 430, + ISL_FORMAT_ETC2_SRGB8 = 431, + ISL_FORMAT_R16G16B16_UINT = 432, + ISL_FORMAT_R16G16B16_SINT = 433, + ISL_FORMAT_R32_SFIXED = 434, + ISL_FORMAT_R10G10B10A2_SNORM = 435, + ISL_FORMAT_R10G10B10A2_USCALED = 436, + ISL_FORMAT_R10G10B10A2_SSCALED = 437, + ISL_FORMAT_R10G10B10A2_SINT = 438, + ISL_FORMAT_B10G10R10A2_SNORM = 439, + ISL_FORMAT_B10G10R10A2_USCALED = 440, + ISL_FORMAT_B10G10R10A2_SSCALED = 441, + ISL_FORMAT_B10G10R10A2_UINT = 442, + ISL_FORMAT_B10G10R10A2_SINT = 443, + ISL_FORMAT_R64G64B64A64_PASSTHRU = 444, + ISL_FORMAT_R64G64B64_PASSTHRU = 445, + ISL_FORMAT_ETC2_RGB8_PTA = 448, + ISL_FORMAT_ETC2_SRGB8_PTA = 449, + ISL_FORMAT_ETC2_EAC_RGBA8 = 450, + ISL_FORMAT_ETC2_EAC_SRGB8_A8 = 451, + ISL_FORMAT_R8G8B8_UINT = 456, + ISL_FORMAT_R8G8B8_SINT = 457, + ISL_FORMAT_RAW = 511, + + /* Hardware doesn't understand this out-of-band value */ + ISL_FORMAT_UNSUPPORTED = UINT16_MAX, +}; + +/** + * Numerical base type for channels of isl_format. + */ +enum isl_base_type { + ISL_VOID, + ISL_RAW, + ISL_UNORM, + ISL_SNORM, + ISL_UFLOAT, + ISL_SFLOAT, + ISL_UFIXED, + ISL_SFIXED, + ISL_UINT, + ISL_SINT, + ISL_USCALED, + ISL_SSCALED, +}; + +/** + * Colorspace of isl_format. + */ +enum isl_colorspace { + ISL_COLORSPACE_NONE = 0, + ISL_COLORSPACE_LINEAR, + ISL_COLORSPACE_SRGB, + ISL_COLORSPACE_YUV, +}; + +/** + * Texture compression mode of isl_format. + */ +enum isl_txc { + ISL_TXC_NONE = 0, + ISL_TXC_DXT1, + ISL_TXC_DXT3, + ISL_TXC_DXT5, + ISL_TXC_FXT1, + ISL_TXC_RGTC1, + ISL_TXC_RGTC2, + ISL_TXC_BPTC, + ISL_TXC_ETC1, + ISL_TXC_ETC2, +}; + +/** + * @brief Hardware tile mode + * + * WARNING: These values differ from the hardware enum values, which are + * unstable across hardware generations. + * + * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to + * clearly distinguish it from Yf and Ys. + */ +enum isl_tiling { + ISL_TILING_LINEAR = 0, + ISL_TILING_W, + ISL_TILING_X, + ISL_TILING_Y0, /**< Legacy Y tiling */ + ISL_TILING_Yf, + ISL_TILING_Ys, +}; + +/** + * @defgroup Tiling Flags + * @{ + */ +typedef uint32_t isl_tiling_flags_t; +#define ISL_TILING_LINEAR_BIT (1u << ISL_TILING_LINEAR) +#define ISL_TILING_W_BIT (1u << ISL_TILING_W) +#define ISL_TILING_X_BIT (1u << ISL_TILING_X) +#define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) +#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) +#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) +#define ISL_TILING_ANY_MASK (~0u) +#define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) + +/** Any Y tiling, including legacy Y tiling. */ +#define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \ + ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) + +/** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */ +#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) +/** @} */ + +/** + * @brief Logical dimension of surface. + * + * Note: There is no dimension for cube map surfaces. ISL interprets cube maps + * as 2D array surfaces. + */ +enum isl_surf_dim { + ISL_SURF_DIM_1D, + ISL_SURF_DIM_2D, + ISL_SURF_DIM_3D, +}; + +/** + * @brief Physical layout of the surface's dimensions. + */ +enum isl_dim_layout { + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.3: 2D Surfaces. + * + * On many gens, 1D surfaces share the same layout as 2D surfaces. From + * the G35 PRM >> Volume 1: Graphics Core >> Section 6.17.2: 1D Surfaces: + * + * One-dimensional surfaces are identical to 2D surfaces with height of + * one. + */ + ISL_DIM_LAYOUT_GEN4_2D, + + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.5: 3D Surfaces. + */ + ISL_DIM_LAYOUT_GEN4_3D, + + /** + * For details, see the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> » 1D Surfaces. + */ + ISL_DIM_LAYOUT_GEN9_1D, +}; + +/* TODO(chadv): Explain */ +enum isl_array_pitch_span { + ISL_ARRAY_PITCH_SPAN_FULL, + ISL_ARRAY_PITCH_SPAN_COMPACT, +}; + +/** + * @defgroup Surface Usage + * @{ + */ +typedef uint64_t isl_surf_usage_flags_t; +#define ISL_SURF_USAGE_RENDER_TARGET_BIT (1u << 0) +#define ISL_SURF_USAGE_DEPTH_BIT (1u << 1) +#define ISL_SURF_USAGE_STENCIL_BIT (1u << 2) +#define ISL_SURF_USAGE_TEXTURE_BIT (1u << 3) +#define ISL_SURF_USAGE_CUBE_BIT (1u << 4) +#define ISL_SURF_USAGE_DISABLE_AUX_BIT (1u << 5) +#define ISL_SURF_USAGE_DISPLAY_BIT (1u << 6) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT (1u << 7) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT (1u << 8) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) +#define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) +#define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) +/** @} */ + +/** + * @brief Multisample Format + */ +enum isl_msaa_layout { + /** + * @brief Suface is single-sampled. + */ + ISL_MSAA_LAYOUT_NONE, + + /** + * @brief [SNB+] Interleaved Multisample Format + * + * In this format, multiple samples are interleaved into each cacheline. + * In other words, the sample index is swizzled into the low 6 bits of the + * surface's virtual address space. + * + * For example, suppose the surface is legacy Y tiled, is 4x multisampled, + * and its pixel format is 32bpp. Then the first cacheline is arranged + * thus: + * + * (0,0,0) (0,1,0) (0,0,1) (1,0,1) + * (1,0,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * The hardware docs refer to this format with multiple terms. In + * Sandybridge, this is the only multisample format; so no term is used. + * The Ivybridge docs refer to surfaces in this format as IMS (Interleaved + * Multisample Surface). Later hardware docs additionally refer to this + * format as MSFMT_DEPTH_STENCIL (because the format is deprecated for + * color surfaces). + * + * See the Sandybridge PRM, Volume 4, Part 1, Section 2.7 "Multisampled + * Surface Behavior". + * + * See the Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.1 "Interleaved + * Multisampled Surfaces". + */ + ISL_MSAA_LAYOUT_INTERLEAVED, + + /** + * @brief [IVB+] Array Multisample Format + * + * In this format, the surface's physical layout resembles that of a + * 2D array surface. + * + * Suppose the multisample surface's logical extent is (w, h) and its + * sample count is N. Then surface's physical extent is the same as + * a singlesample 2D surface whose logical extent is (w, h) and array + * length is N. Array slice `i` contains the pixel values for sample + * index `i`. + * + * The Ivybridge docs refer to surfaces in this format as UMS + * (Uncompressed Multsample Layout) and CMS (Compressed Multisample + * Surface). The Broadwell docs additionally refer to this format as + * MSFMT_MSS (MSS=Multisample Surface Storage). + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Uncompressed + * Multisample Surfaces". + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed + * Multisample Surfaces". + */ + ISL_MSAA_LAYOUT_ARRAY, +}; + + +struct isl_device { + const struct brw_device_info *info; + bool use_separate_stencil; +}; + +struct isl_extent2d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; +}; + +struct isl_extent3d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; +}; + +struct isl_extent4d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; + union { uint32_t a, array_len; }; +}; + +struct isl_channel_layout { + enum isl_base_type type; + uint8_t bits; /**< Size in bits */ +}; + +struct isl_format_layout { + enum isl_format format; + + uint16_t bpb; /**< Bits per block */ + uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ + uint8_t bw; /**< Block width, in pixels */ + uint8_t bh; /**< Block height, in pixels */ + uint8_t bd; /**< Block depth, in pixels */ + + struct { + struct isl_channel_layout r; /**< Red channel */ + struct isl_channel_layout g; /**< Green channel */ + struct isl_channel_layout b; /**< Blue channel */ + struct isl_channel_layout a; /**< Alpha channel */ + struct isl_channel_layout l; /**< Luminance channel */ + struct isl_channel_layout i; /**< Intensity channel */ + struct isl_channel_layout p; /**< Palette channel */ + } channels; + + enum isl_colorspace colorspace; + enum isl_txc txc; +}; + +struct isl_tile_info { + enum isl_tiling tiling; + uint32_t width; /**< in bytes */ + uint32_t height; /**< in rows of memory */ + uint32_t size; /**< in bytes */ +}; + +/** + * @brief Input to surface initialization + * + * @invariant width >= 1 + * @invariant height >= 1 + * @invariant depth >= 1 + * @invariant levels >= 1 + * @invariant samples >= 1 + * @invariant array_len >= 1 + * + * @invariant if 1D then height == 1 and depth == 1 and samples == 1 + * @invariant if 2D then depth == 1 + * @invariant if 3D then array_len == 1 and samples == 1 + */ +struct isl_surf_init_info { + enum isl_surf_dim dim; + enum isl_format format; + + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t levels; + uint32_t array_len; + uint32_t samples; + + /** Lower bound for isl_surf::alignment, in bytes. */ + uint32_t min_alignment; + + /** Lower bound for isl_surf::pitch, in bytes. */ + uint32_t min_pitch; + + isl_surf_usage_flags_t usage; + + /** Flags that alter how ISL selects isl_surf::tiling. */ + isl_tiling_flags_t tiling_flags; +}; + +struct isl_surf { + enum isl_surf_dim dim; + enum isl_dim_layout dim_layout; + enum isl_msaa_layout msaa_layout; + enum isl_tiling tiling; + enum isl_format format; + + /** + * Alignment of the upper-left sample of each subimage, in units of surface + * elements. + */ + struct isl_extent3d image_alignment_el; + + /** + * Logical extent of the surface's base level, in units of pixels. This is + * identical to the extent defined in isl_surf_init_info. + */ + struct isl_extent4d logical_level0_px; + + /** + * Physical extent of the surface's base level, in units of pixels. + * + * Consider isl_dim_layout as an operator that transforms a logical surface + * layout to a physical surface layout. Then + * + * logical_layout := (isl_surf::dim, isl_surf::logical_level0_px) + * isl_surf::phys_level0_sa := isl_surf::dim_layout * logical_layout + */ + struct isl_extent4d phys_level0_sa; + + uint32_t levels; + uint32_t samples; + + /** Total size of the surface, in bytes. */ + uint32_t size; + + /** Required alignment for the surface's base address. */ + uint32_t alignment; + + /** + * Pitch between vertically adjacent samples, in bytes. + */ + uint32_t row_pitch; + + /** + * Pitch between physical array slices, in rows of surface elements. + */ + uint32_t array_pitch_el_rows; + + enum isl_array_pitch_span array_pitch_span; + + /** Copy of isl_surf_init_info::usage. */ + isl_surf_usage_flags_t usage; +}; + +extern const struct isl_format_layout isl_format_layouts[]; + +void +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info); + +static inline const struct isl_format_layout * ATTRIBUTE_CONST +isl_format_get_layout(enum isl_format fmt) +{ + return &isl_format_layouts[fmt]; +} + +bool +isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; + +static inline bool +isl_format_is_compressed(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->txc != ISL_TXC_NONE; +} + +static inline bool +isl_format_has_bc_compression(enum isl_format fmt) +{ + switch (isl_format_get_layout(fmt)->txc) { + case ISL_TXC_DXT1: + case ISL_TXC_DXT3: + case ISL_TXC_DXT5: + return true; + case ISL_TXC_NONE: + case ISL_TXC_FXT1: + case ISL_TXC_RGTC1: + case ISL_TXC_RGTC2: + case ISL_TXC_BPTC: + case ISL_TXC_ETC1: + case ISL_TXC_ETC2: + return false; + } + + unreachable("bad texture compression mode"); + return false; +} + +static inline bool +isl_format_is_yuv(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->colorspace == ISL_COLORSPACE_YUV; +} + +static inline bool +isl_format_block_is_1x1x1(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; +} + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format fmt); + +static inline bool +isl_tiling_is_std_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_STD_Y_MASK; +} + +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *info); + +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_extent2d *e); +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling); + +static inline bool +isl_surf_usage_is_display(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DISPLAY_BIT; +} + +static inline bool +isl_surf_usage_is_depth(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DEPTH_BIT; +} + +static inline bool +isl_surf_usage_is_stencil(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_STENCIL_BIT; +} + +static inline bool +isl_surf_usage_is_depth_and_stencil(isl_surf_usage_flags_t usage) +{ + return (usage & ISL_SURF_USAGE_DEPTH_BIT) && + (usage & ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_usage_is_depth_or_stencil(isl_surf_usage_flags_t usage) +{ + return usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_info_is_z16(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R16_UNORM); +} + +static inline bool +isl_surf_info_is_z32_float(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R32_FLOAT); +} + +static inline struct isl_extent2d +isl_extent2d(uint32_t width, uint32_t height) +{ + return (struct isl_extent2d) { .w = width, .h = height }; +} + +static inline struct isl_extent3d +isl_extent3d(uint32_t width, uint32_t height, uint32_t depth) +{ + return (struct isl_extent3d) { .w = width, .h = height, .d = depth }; +} + +static inline struct isl_extent4d +isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, + uint32_t array_len) +{ + return (struct isl_extent4d) { + .w = width, + .h = height, + .d = depth, + .a = array_len, + }; +} + +#define isl_surf_init(dev, surf, ...) \ + isl_surf_init_s((dev), (surf), \ + &(struct isl_surf_init_info) { __VA_ARGS__ }); + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info); + +/** + * Alignment of the upper-left sample of each subimage, in units of surface + * elements. + */ +static inline struct isl_extent3d +isl_surf_get_image_alignment_el(const struct isl_surf *surf) +{ + return surf->image_alignment_el; +} + +/** + * Alignment of the upper-left sample of each subimage, in units of surface + * samples. + */ +static inline struct isl_extent3d +isl_surf_get_image_alignment_sa(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return (struct isl_extent3d) { + .w = fmtl->bw * surf->image_alignment_el.w, + .h = fmtl->bh * surf->image_alignment_el.h, + .d = fmtl->bd * surf->image_alignment_el.d, + }; +} + +/** + * Pitch between physical array slices, in rows of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) +{ + return surf->array_pitch_el_rows; +} + +/** + * Pitch between physical array slices, in rows of surface samples. + */ +static inline uint32_t +isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + return fmtl->bh * isl_surf_get_array_pitch_el_rows(surf); +} + +/** + * Pitch between physical array slices, in bytes. + */ +static inline uint32_t +isl_surf_get_array_pitch(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; +} + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_format_layout.csv b/src/isl/isl_format_layout.csv new file mode 100644 index 00000000000..2a302b002ef --- /dev/null +++ b/src/isl/isl_format_layout.csv @@ -0,0 +1,287 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +# +# @file +# @brief Layout of all hardware surface formats +# +# For the official list, see Broadwell PRM: Volume 2b: Command Reference: +# Enumerations: SURFACE_FORMAT. +# + + +# Columns: +# name: format name in PRM +# bpb: bits per block +# bw: block width, in pixels +# bh: block height, in pixels +# bd: block depth, in pixels +# r: red channel, data type and bitwidth +# g: green channel +# b: blue channel +# a: alpha channel +# l: luminance channel +# i: intensity channel +# p: palette channel +# space: colorspace +# txc: texture compression +# +# Data Types: +# x: void +# r: raw +# un: unorm +# sn: snorm +# uf: ufloat +# sf: sfloat +# ux: ufixed +# sx: sfixed +# ui: uint +# si: sint +# us: uscaled +# ss: sscaled + + +# Table is aligned with the Vim commands below, using the Align plugin: +# :AlignCtrl lr+ p8000000000000P1 +# /^# name/,$ Align, + +# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc +R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear, +R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear, +R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear, +R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear, +R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear, +R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear, +R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear, +R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear, +R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear, +R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear, +R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , , +R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear, +R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear, +R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear, +R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear, +R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear, +R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear, +R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear, +R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear, +R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear, +R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear, +R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear, +R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear, +R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear, +R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear, +R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear, +R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear, +R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear, +X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear, +L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear, +R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear, +R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear, +R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear, +R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear, +R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear, +A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha, +L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear, +I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear, +R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear, +R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear, +R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear, +R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear, +R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear, +R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , , +B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear, +R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear, +R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear, +R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear, +R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear, +R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear, +R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear, +R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear, +R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, +B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R11G11B10_FLOAT , 32, 1, 1, 1, sf11, sf11, sf10, , , , , linear, +R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, +R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, +R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, +R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear, +X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear, +L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear, +A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha, +L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear, +I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear, +L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear, +A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha, +I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear, +L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear, +A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha, +X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear, +B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear, +B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear, +L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear, +R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear, +R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear, +R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear, +R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear, +R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear, +R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear, +R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear, +R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear, +R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear, +B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear, +B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb, +B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb, +B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb, +R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear, +R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear, +R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear, +R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear, +R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear, +R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear, +R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear, +R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear, +R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear, +A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear, +L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear, +A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha, +L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear, +I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear, +L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear, +A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha, +L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb, +R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear, +B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear, +B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb, +R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear, +R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear, +R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear, +R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear, +P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear, +L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear, +R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear, +R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear, +R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear, +R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear, +A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha, +I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear, +L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear, +P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear, +R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear, +P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear, +L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear, +P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear, +P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv, +L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear, +L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear, +I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear, +I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear, +DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1 +R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear, +YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv, +P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear, +P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear, +BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1 +BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3 +BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5 +BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1 +BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2 +BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1 +BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3 +BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5 +MONO8 , 1, 1, 1, 1, , , , , , , , , +YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv, +DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1 +FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1 +R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear, +R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear, +R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear, +R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear, +R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear, +R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear, +BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1 +BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2 +R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear, +R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear, +R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear, +R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear, +R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear, +BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc +BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc +BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc +BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc +PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv, +R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb, +ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1 +ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2 +EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2 +EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2 +EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2 +EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2 +ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2 +R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear, +R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear, +R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear, +R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , , +R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , , +ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2 +ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2 +ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2 +ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2 +R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear, +R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear, +RAW , 0, 0, 0, 0, , , , , , , , , diff --git a/src/isl/isl_format_layout_gen.bash b/src/isl/isl_format_layout_gen.bash new file mode 100755 index 00000000000..2511f299a7e --- /dev/null +++ b/src/isl/isl_format_layout_gen.bash @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +set -eu +set -o pipefail + +cat <<'EOF' +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" + +const struct isl_format_layout +isl_format_layouts[] = { +EOF + +sed -r ' +# Delete comment lines and empty lines +/^[[:space:]]*#/d +/^[[:space:]]*$/d + +# Delete spaces +s/[[:space:]]//g + +# Translate formats +s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/ + +# Translate data type of channels +s/\/ISL_COLORSPACE_\1/ +s/\// + +# Translate texture compression +s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/ +' | +tr 'a-z' 'A-Z' | # Convert to uppersace +while IFS=, read -r format bpb bw bh bd \ + red green blue alpha \ + luminance intensity palette \ + colorspace txc +do + : ${colorspace:=ISL_COLORSPACE_NONE} + : ${txc:=ISL_TXC_NONE} + + cat <samples >= 1); + + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; +} + +void +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + assert(info->samples == 1); + assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); + assert(!isl_tiling_is_std_y(tiling)); + + /* Note that neither the surface's horizontal nor vertical image alignment + * is programmable on gen4 nor gen5. + * + * From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression + * cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | 2 | + * | uncompressed formats | 4 | 2 | + * +------------------------+--------+--------+ + */ + + if (isl_format_is_compressed(info->format)) { + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + *image_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/isl/isl_gen4.h b/src/isl/isl_gen4.h new file mode 100644 index 00000000000..06cd70b9206 --- /dev/null +++ b/src/isl/isl_gen4.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen4_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen6.c b/src/isl/isl_gen6.c new file mode 100644 index 00000000000..24c393925ed --- /dev/null +++ b/src/isl/isl_gen6.c @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen6.h" +#include "isl_priv.h" + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(ISL_DEV_GEN(dev) == 6); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return false; + } + + /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: + * + * - any format with greater than 64 bits per element + * - any compressed texture format (BC*) + * - any YCRCB* format + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of + * Multisamples: + * + * If this field is any value other than MULTISAMPLECOUNT_1 the + * following restrictions apply: + * + * - the Surface Type must be SURFTYPE_2D + * - [...] + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + if (info->levels > 1) + return false; + + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; +} + +void +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* Note that the surface's horizontal image alignment is not programmable + * on Sandybridge. + * + * From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | * | + * | uncompressed formats | 4 | * | + * +------------------------+--------+--------+ + * + * * For these formats, the vertical alignment factor “j” is determined + * as follows: + * - j = 4 for any depth buffer + * - j = 2 for separate stencil buffer + * - j = 4 for any render target surface is multisampled (4x) + * - j = 2 for all other render target surface + * + * From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2 + * SURFACE_STATE, Surface Vertical Alignment: + * + * - This field must be set to VALIGN_2 if the Surface Format is 96 bits + * per element (BPE). + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + */ + + if (isl_format_is_compressed(info->format)) { + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + if (isl_format_is_yuv(info->format)) { + *image_align_el = isl_extent3d(4, 2, 1); + return; + } + + if (info->samples > 1) { + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage) && + !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { + /* interleaved depthstencil buffer */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth(info->usage)) { + /* separate depth buffer */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* separate stencil buffer */ + *image_align_el = isl_extent3d(4, 2, 1); + return; + } + + *image_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/isl/isl_gen6.h b/src/isl/isl_gen6.h new file mode 100644 index 00000000000..0779c674940 --- /dev/null +++ b/src/isl/isl_gen6.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen7.c b/src/isl/isl_gen7.c new file mode 100644 index 00000000000..9984f61b2a4 --- /dev/null +++ b/src/isl/isl_gen7.c @@ -0,0 +1,392 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen7.h" +#include "isl_priv.h" + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + bool require_array = false; + bool require_interleaved = false; + + assert(ISL_DEV_GEN(dev) == 7); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: any format with greater than 64 bits per element, any + * compressed texture format (BC*), and any YCRCB* format. + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* The Ivyrbridge PRM insists twice that signed integer formats cannot be + * multisampled. + * + * From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when + * all RT channels are not written. + * + * And errata from the Ivybridge PRM, Volume 4 Part 1 p77, + * RENDER_SURFACE_STATE, MCS Enable: + * + * This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs + * when all RT channels are not written. + * + * Note that the above SINT restrictions apply only to *MSRTs* (that is, + * *multisampled* render targets). The restrictions seem to permit an MCS + * if the render target is singlesampled. + */ + if (isl_format_has_sint_channel(info->format)) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * +---------------------+----------------------------------------------------------------+ + * | MSFMT_MSS | Multsampled surface was/is rendered as a render target | + * | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer | + * +---------------------+----------------------------------------------------------------+ + * + * In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and + * MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED. + */ + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width + * is >= 8192 (meaning the actual surface width is >= 8193 pixels), this + * field must be set to MSFMT_MSS. + */ + if (info->samples == 8 && info->width == 8192) + require_array = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, + * ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number + * of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is + * > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL. + */ + if ((info->samples == 8 && info->height > 4194304u) || + (info->samples == 4 && info->height > 8388608u)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is + * one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or + * R24_UNORM_X8_TYPELESS. + */ + if (info->format == ISL_FORMAT_I24X8_UNORM || + info->format == ISL_FORMAT_L24X8_UNORM || + info->format == ISL_FORMAT_A24X8_UNORM || + info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + /* Default to the array layout because it permits multisample + * compression. + */ + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +static bool +gen7_format_needs_valign2(const struct isl_device *dev, + enum isl_format format) +{ + /* This workaround applies only to gen7 */ + if (ISL_DEV_GEN(dev) > 7) + return false; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + * + * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. + */ + return isl_format_is_yuv(format) || + format == ISL_FORMAT_R32G32B32_FLOAT; +} + +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags) +{ + /* IVB+ requires separate stencil */ + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); + + /* Clear flags unsupported on this hardware */ + if (ISL_DEV_GEN(dev) < 9) { + *flags &= ~ISL_TILING_Yf_BIT; + *flags &= ~ISL_TILING_Ys_BIT; + } + + /* And... clear the Yf and Ys bits anyway because Anvil doesn't support + * them yet. + */ + *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */ + *flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */ + + if (isl_surf_usage_is_depth(info->usage)) { + /* Depth requires Y. */ + *flags &= ISL_TILING_ANY_Y_MASK; + } + + /* Separate stencil requires W tiling, and W tiling requires separate + * stencil. + */ + if (isl_surf_usage_is_stencil(info->usage)) { + *flags &= ISL_TILING_W_BIT; + } else { + *flags &= ~ISL_TILING_W_BIT; + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle rotated display surfaces", + __FILE__, __func__); + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT | + ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle flipped display surfaces", + __FILE__, __func__); + } + + if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { + /* Before Skylake, the display engine does not accept Y */ + /* FINISHME[SKL]: Y tiling for display surfaces */ + *flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT); + } + + if (info->samples > 1) { + /* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled + * Surface: + * + * For multisample render targets, this field must be 1 (true). MSRTs + * can only be tiled. + * + * Multisample surfaces never require X tiling, and Y tiling generally + * performs better than X. So choose Y. (Unless it's stencil, then it + * must be W). + */ + *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); + } + + /* For 1D surfaces, use linear when possible. 1D surfaces (array and + * non-array) do not benefit from tiling. In fact, it leads to less + * efficient use of memory due to tile alignment. + */ + if (info->dim == ISL_SURF_DIM_1D && (*flags & ISL_TILING_LINEAR_BIT)) { + *flags = ISL_TILING_LINEAR_BIT; + } + + /* workaround */ + if (ISL_DEV_GEN(dev) == 7 && + gen7_format_needs_valign2(dev, info->format) && + (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && + info->samples == 1) { + /* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1, + * SURFACE_STATE Surface Vertical Alignment: + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + */ + *flags &= ~ISL_TILING_Y0_BIT; + } +} + +/** + * Choose horizontal subimage alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Hoizontal Alignment: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer, + * since these surfaces support only alignment of 8. Use of HALIGN_8 + * for other surfaces is supported, but uses more memory. + */ + if (isl_surf_info_is_z16(info) || + isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +/** + * Choose vertical subimage alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling) +{ + bool require_valign2 = false; + bool require_valign4 = false; + + if (isl_format_is_compressed(info->format)) + return 1; + + if (gen7_format_needs_valign2(dev, info->format)) + require_valign2 = true; + + /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but uses more memory. This field must be set to + * VALIGN_4 for all tiled Y Render Target surfaces. + * + */ + if (isl_surf_usage_is_depth(info->usage) || + info->samples > 1 || + tiling == ISL_TILING_Y0) { + require_valign4 = true; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* The Ivybridge PRM states that the stencil buffer's vertical alignment + * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment + * Unit Size]. However, valign=8 is outside the set of valid values of + * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 + * (0x0) and VALIGN_4 (0x1). + * + * The PRM is generally confused about the width, height, and alignment + * of the stencil buffer; and this confusion appears elsewhere. For + * example, the following PRM text effectively converts the stencil + * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, + * Volume 1, Part 1, Section + * 6.18.4.2 Base Address and LOD Calculation]: + * + * For separate stencil buffer, the width must be mutiplied by 2 and + * height divided by 2 as follows: + * + * w_L = 2*i*ceil(W_L/i) + * h_L = 1/2*j*ceil(H_L/j) + * + * The root of the confusion is that, in W tiling, each pair of rows is + * interleaved into one. + * + * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API + * is more polished. + */ + require_valign4 = true; + } + + assert(!require_valign2 || !require_valign4); + + if (require_valign4) + return 4; + + /* Prefer VALIGN_2 because it conserves memory. */ + return 2; +} + +void +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* IVB+ does not support combined depthstencil. */ + assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); + + *image_align_el = (struct isl_extent3d) { + .w = gen7_choose_halign_el(dev, info), + .h = gen7_choose_valign_el(dev, info, tiling), + .d = 1, + }; +} diff --git a/src/isl/isl_gen7.h b/src/isl/isl_gen7.h new file mode 100644 index 00000000000..2a95b68a9bd --- /dev/null +++ b/src/isl/isl_gen7.h @@ -0,0 +1,52 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags); + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen8.c b/src/isl/isl_gen8.c new file mode 100644 index 00000000000..2f434aabb2e --- /dev/null +++ b/src/isl/isl_gen8.c @@ -0,0 +1,229 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_priv.h" + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + bool require_array = false; + bool require_interleaved = false; + + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Tile Mode: + * + * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * must be YMAJOR. + * + * As usual, though, stencil is special. + */ + if (!isl_tiling_is_std_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) + return false; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Multisampled Surface Storage Format: + * + * All multisampled render target surfaces must have this field set to + * MSFMT_MSS + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) + require_array = true; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Number of Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D This field must be set to + * MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface + * or Render Target surface. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero. + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +/** + * Choose horizontal subimage alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer. + * In this case it must be set to HALIGN_8 since these surfaces + * support only alignment of 8. [...] + */ + if (isl_surf_info_is_z16(info)) + return 8; + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * [...] For Z32 formats it must be set to HALIGN_4. + */ + if (isl_surf_usage_is_depth(info->usage)) + return 4; + + if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, + * HALIGN 16 must be used. + * + * This case handles color surfaces that may own an auxiliary MCS, CCS_D, + * or CCS_E. Depth buffers, including those that own an auxiliary HiZ + * surface, are handled above and do not require HALIGN_16. + */ + assert(!isl_surf_usage_is_depth(info->usage)); + return 16; + } + + /* XXX(chadv): I believe the hardware requires each image to be + * cache-aligned. If that's true, then defaulting to halign=4 is wrong for + * many formats. Depending on the format's block size, we may need to + * increase halign to 8. + */ + return 4; +} + +/** + * Choose vertical subimage alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + /* From the Broadwell PRM > Volume 2d: Command Reference: Structures + * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but increases memory usage. + * + * - This field is intended to be set to VALIGN_8 only if the surface + * was rendered as a stencil buffer, since stencil buffer surfaces + * support only alignment of 8. If set to VALIGN_8, Surface Format + * must be R8_UINT. + */ + + if (isl_format_is_compressed(info->format)) + return 1; + + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +void +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + assert(!isl_tiling_is_std_y(tiling)); + + /* The below text from the Broadwell PRM provides some insight into the + * hardware's requirements for LOD alignment. From the Broadwell PRM >> + * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: + * + * These [2D surfaces] must adhere to the following memory organization + * rules: + * + * - For non-compressed texture formats, each mipmap must start on an + * even row within the monolithic rectangular area. For + * 1-texel-high mipmaps, this may require a row of padding below + * the previous mipmap. This restriction does not apply to any + * compressed texture formats; each subsequent (lower-res) + * compressed mipmap is positioned directly below the previous + * mipmap. + * + * - Vertical alignment restrictions vary with memory tiling type: + * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled + * mipmaps are not required to start at the left edge of a tile + * row.) + */ + + *image_align_el = (struct isl_extent3d) { + .w = gen8_choose_halign_el(dev, info), + .h = gen8_choose_valign_el(dev, info), + .d = 1, + }; +} diff --git a/src/isl/isl_gen8.h b/src/isl/isl_gen8.h new file mode 100644 index 00000000000..2017ea8ddc1 --- /dev/null +++ b/src/isl/isl_gen8.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen9.c b/src/isl/isl_gen9.c new file mode 100644 index 00000000000..aa290aa1c35 --- /dev/null +++ b/src/isl/isl_gen9.c @@ -0,0 +1,185 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +/** + * Calculate the surface's subimage alignment, in units of surface samples, + * for the standard tiling formats Yf and Ys. + */ +static void +gen9_calc_std_image_alignment_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *align_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(isl_tiling_is_std_y(tiling)); + + const uint32_t bs = fmtl->bs; + const uint32_t is_Ys = tiling == ISL_TILING_Ys; + + switch (info->dim) { + case ISL_SURF_DIM_1D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)), + .h = 1, + .d = 1, + }; + return; + case ISL_SURF_DIM_2D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment + * Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)), + .h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)), + .d = 1, + }; + + if (is_Ys) { + /* FINISHME(chadv): I don't trust this code. Untested. */ + isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + case ISL_MSAA_LAYOUT_INTERLEAVED: + break; + case ISL_MSAA_LAYOUT_ARRAY: + align_sa->w >>= (ffs(info->samples) - 0) / 2; + align_sa->h >>= (ffs(info->samples) - 1) / 2; + break; + } + } + return; + + case ISL_SURF_DIM_3D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)), + .h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)), + .d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)), + }; + return; + } + + unreachable("bad isl_surface_type"); +} + +void +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* This BSpec text provides some insight into the hardware's alignment + * requirements [Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces]: + * + * An LOD must be aligned to a cache-line except for some special cases + * related to Planar YUV surfaces. In general, the cache-alignment + * restriction implies there is a minimum height for an LOD of 4 texels. + * So, LODs which are smaller than 4 high are padded. + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is used for 2D, CUBE, and 3D surface alignment when Tiled + * Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled). + * This field is ignored for 1D surfaces and also when Tiled Resource + * Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled). + * + * See the appropriate Alignment table in the "Surface Layout and + * Tiling" section under Common Surface Formats for the table of + * alignment values for Tiled Resrouces. + * + * - For uncompressed surfaces, the units of "j" are rows of pixels on + * the physical surface. For compressed texture formats, the units of + * "j" are in compression blocks, thus each increment in "j" is equal + * to h pixels, where h is the height of the compression block in + * pixels. + * + * - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16 + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal + * Alignment: + * + * - For uncompressed surfaces, the units of "i" are pixels on the + * physical surface. For compressed texture formats, the units of "i" + * are in compression blocks, thus each increment in "i" is equal to + * w pixels, where w is the width of the compression block in pixels. + * + * - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16 + */ + + if (isl_tiling_is_std_y(tiling)) { + struct isl_extent3d image_align_sa; + gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout, + &image_align_sa); + + *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa); + return; + } + + if (info->dim == ISL_SURF_DIM_1D) { + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *image_align_el = isl_extent3d(64, 1, 1); + return; + } + + if (isl_format_is_compressed(info->format)) { + /* On Gen9, the meaning of RENDER_SURFACE_STATE's + * SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for + * compressed formats. They now indicate a multiple of the compression + * block. For example, if the compression mode is ETC2 then HALIGN_4 + * indicates a horizontal alignment of 16 pixels. + * + * To avoid wasting memory, choose the smallest alignment possible: + * HALIGN_4 and VALIGN_4. + */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); +} diff --git a/src/isl/isl_gen9.h b/src/isl/isl_gen9.h new file mode 100644 index 00000000000..64ed0aa44ef --- /dev/null +++ b/src/isl/isl_gen9.h @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_image.c b/src/isl/isl_image.c new file mode 100644 index 00000000000..2d146d59ac5 --- /dev/null +++ b/src/isl/isl_image.c @@ -0,0 +1,137 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" +#include "brw_compiler.h" + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format format) +{ + switch (format) { + /* These are never lowered. Up to BDW we'll have to fall back to untyped + * surface access for 128bpp formats. + */ + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + return format; + + /* From HSW to BDW the only 64bpp format supported for typed access is + * RGBA_UINT16. IVB falls back to untyped. + */ + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component + * are supported. IVB doesn't support formats with more than one component + * for typed access. For 8 and 16 bpp formats IVB relies on the + * undocumented behavior that typed reads from R_UINT8 and R_UINT16 + * surfaces actually do a 32-bit misaligned read. The alternative would be + * to use two surface state entries with different formats for each image, + * one for reading (using R_UINT32) and another one for writing (using + * R_UINT8 or R_UINT16), but that would complicate the shaders we generate + * even more. + */ + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); + + /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported + * by the hardware. + */ + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + return ISL_FORMAT_R32_UINT; + + /* No normalized fixed-point formats are supported by the hardware. */ + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + return ISL_FORMAT_R16_UINT; + + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return ISL_FORMAT_R8_UINT; + + default: + assert(!"Unknown image format"); + return ISL_FORMAT_UNSUPPORTED; + } +} diff --git a/src/isl/isl_priv.h b/src/isl/isl_priv.h new file mode 100644 index 00000000000..1c9343a7d1f --- /dev/null +++ b/src/isl/isl_priv.h @@ -0,0 +1,141 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +#include "brw_device_info.h" +#include "mesa/main/imports.h" +#include "util/macros.h" + +#include "isl.h" + +#define isl_finishme(format, ...) \ + __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...); + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +ffs(uint32_t n) { + return __builtin_ffs(n); +} + +static inline bool +isl_is_pow2(uintmax_t n) +{ + return !(n & (n - 1)); +} + +/** + * Alignment must be a power of 2. + */ +static inline bool +isl_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(isl_is_pow2(a)); + return (n & (a - 1)) == 0; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align(uintmax_t n, uintmax_t a) +{ + assert(isl_is_pow2(a)); + return (n + a - 1) & ~(a - 1); +} + +static inline uintmax_t +isl_align_npot(uintmax_t n, uintmax_t a) +{ + assert(a > 0); + return ((n + a - 1) / a) * a; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align_div(uintmax_t n, uintmax_t a) +{ + return isl_align(n, a) / a; +} + +static inline uintmax_t +isl_align_div_npot(uintmax_t n, uintmax_t a) +{ + return isl_align_npot(n, a) / a; +} + +/** + * Log base 2, rounding towards zero. + */ +static inline uint32_t +isl_log2u(uint32_t n) +{ + assert(n != 0); + return 31 - __builtin_clz(n); +} + +static inline uint32_t +isl_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline struct isl_extent3d +isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + assert(extent_sa.w % fmtl->bw == 0); + assert(extent_sa.h % fmtl->bh == 0); + assert(extent_sa.d % fmtl->bd == 0); + + return (struct isl_extent3d) { + .w = extent_sa.w / fmtl->bw, + .h = extent_sa.h / fmtl->bh, + .d = extent_sa.d / fmtl->bd, + }; +} + +static inline struct isl_extent3d +isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return (struct isl_extent3d) { + .w = extent_el.w * fmtl->bw, + .h = extent_el.h * fmtl->bh, + .d = extent_el.d * fmtl->bd, + }; +} diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 316d24a9b02..8f9477c4c63 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -5,4 +5,3 @@ /wayland-drm-protocol.c /wayland-drm-client-protocol.h /anv_icd.json -/isl_format_layout.c diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index e51f3cf01d7..efb781e8c0b 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -44,7 +44,7 @@ PER_GEN_LIBS = \ libanv-gen8.la \ libanv-gen9.la -noinst_LTLIBRARIES = $(PER_GEN_LIBS) libisl.la +noinst_LTLIBRARIES = $(PER_GEN_LIBS) # The gallium includes are for the util/u_math.h include from main/macros.h @@ -61,6 +61,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ -I$(top_builddir)/src \ -I$(top_builddir)/src/glsl/nir \ -I$(top_builddir)/src/vulkan @@ -92,28 +93,9 @@ VULKAN_SOURCES = \ anv_wsi.c \ anv_wsi_x11.c -libisl_la_CFLAGS = $(libvulkan_la_CFLAGS) - -libisl_la_SOURCES = \ - isl.c \ - isl_format_layout.c \ - isl_gen4.c \ - isl_gen4.h \ - isl_gen6.c \ - isl_gen6.h \ - isl_gen7.c \ - isl_gen7.h \ - isl_gen8.c \ - isl_gen8.h \ - isl_gen9.c \ - isl_gen9.h \ - isl_image.c \ - $(NULL) - BUILT_SOURCES = \ anv_entrypoints.h \ - anv_entrypoints.c \ - isl_format_layout.c + anv_entrypoints.c libanv_gen7_la_SOURCES = \ genX_cmd_buffer.c \ @@ -171,19 +153,15 @@ anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ -isl_format_layout.c: isl_format_layout_gen.bash \ - isl_format_layout.csv - $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ - <$(srcdir)/isl_format_layout.csv >$@ - CLEANFILES = $(BUILT_SOURCES) libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/isl/libisl.la \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ ../mesa/libmesa.la \ ../mesa/drivers/dri/common/libdri_test_stubs.la \ -lpthread -ldl -lstdc++ \ - $(PER_GEN_LIBS) libisl.la + $(PER_GEN_LIBS) # Libvulkan with dummy gem. Used for unit tests. diff --git a/src/vulkan/isl.c b/src/vulkan/isl.c deleted file mode 100644 index d858ea74745..00000000000 --- a/src/vulkan/isl.c +++ /dev/null @@ -1,1046 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "isl.h" -#include "isl_gen4.h" -#include "isl_gen6.h" -#include "isl_gen7.h" -#include "isl_gen8.h" -#include "isl_gen9.h" -#include "isl_priv.h" - -void PRINTFLIKE(3, 4) UNUSED -__isl_finishme(const char *file, int line, const char *fmt, ...) -{ - va_list ap; - char buf[512]; - - va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); -} - -void -isl_device_init(struct isl_device *dev, - const struct brw_device_info *info) -{ - dev->info = info; - dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; - - /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some - * device properties at buildtime. Verify that the macros with the device - * properties chosen during runtime. - */ - assert(ISL_DEV_GEN(dev) == dev->info->gen); - assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil); - - /* Did we break hiz or stencil? */ - if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) - assert(info->has_hiz_and_separate_stencil); - if (info->must_use_separate_stencil) - assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); -} - -bool -isl_format_has_sint_channel(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->channels.r.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT || - fmtl->channels.b.type == ISL_SINT || - fmtl->channels.a.type == ISL_SINT || - fmtl->channels.l.type == ISL_SINT || - fmtl->channels.i.type == ISL_SINT || - fmtl->channels.p.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT; -} - -/** - * @param[out] info is written only on success - */ -bool -isl_tiling_get_info(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_tile_info *tile_info) -{ - const uint32_t bs = format_block_size; - uint32_t width, height; - - assert(bs > 0); - - switch (tiling) { - case ISL_TILING_LINEAR: - width = 1; - height = 1; - break; - - case ISL_TILING_X: - width = 1 << 9; - height = 1 << 3; - break; - - case ISL_TILING_Y0: - width = 1 << 7; - height = 1 << 5; - break; - - case ISL_TILING_W: - /* XXX: Should W tile be same as Y? */ - width = 1 << 6; - height = 1 << 6; - break; - - case ISL_TILING_Yf: - case ISL_TILING_Ys: { - if (ISL_DEV_GEN(dev) < 9) - return false; - - if (!isl_is_pow2(bs)) - return false; - - bool is_Ys = tiling == ISL_TILING_Ys; - - width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); - height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); - break; - } - } /* end switch */ - - *tile_info = (struct isl_tile_info) { - .tiling = tiling, - .width = width, - .height = height, - .size = width * height, - }; - - return true; -} - -void -isl_tiling_get_extent(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_extent2d *e) -{ - struct isl_tile_info tile_info; - isl_tiling_get_info(dev, tiling, format_block_size, &tile_info); - *e = isl_extent2d(tile_info.width, tile_info.height); -} - -/** - * @param[out] tiling is set only on success - */ -bool -isl_surf_choose_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling *tiling) -{ - isl_tiling_flags_t tiling_flags = info->tiling_flags; - - if (ISL_DEV_GEN(dev) >= 7) { - gen7_filter_tiling(dev, info, &tiling_flags); - } else { - isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); - gen7_filter_tiling(dev, info, &tiling_flags); - } - - #define CHOOSE(__tiling) \ - do { \ - if (tiling_flags & (1u << (__tiling))) { \ - *tiling = (__tiling); \ - return true; \ - } \ - } while (0) - - /* Of the tiling modes remaining, choose the one that offers the best - * performance. - */ - CHOOSE(ISL_TILING_Ys); - CHOOSE(ISL_TILING_Yf); - CHOOSE(ISL_TILING_Y0); - CHOOSE(ISL_TILING_X); - CHOOSE(ISL_TILING_W); - CHOOSE(ISL_TILING_LINEAR); - - #undef CHOOSE - - /* No tiling mode accomodates the inputs. */ - return false; -} - -static bool -isl_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - if (ISL_DEV_GEN(dev) >= 8) { - return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); - } else if (ISL_DEV_GEN(dev) >= 7) { - return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); - } else if (ISL_DEV_GEN(dev) >= 6) { - return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); - } else { - return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); - } -} - -static void -isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, - uint32_t *width, uint32_t *height) -{ - assert(isl_is_pow2(samples)); - - /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level - * Sizes (p133): - * - * If the surface is multisampled and it is a depth or stencil surface - * or Multisampled Surface StorageFormat in SURFACE_STATE is - * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before - * proceeding: [...] - */ - *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); - *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); -} - -static enum isl_array_pitch_span -isl_choose_array_pitch_span(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - const struct isl_extent4d *phys_level0_sa) -{ - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); - /* fallthrough */ - - case ISL_DIM_LAYOUT_GEN4_2D: - if (ISL_DEV_GEN(dev) >= 8) { - /* QPitch becomes programmable in Broadwell. So choose the - * most compact QPitch possible in order to conserve memory. - * - * From the Broadwell PRM >> Volume 2d: Command Reference: Structures - * >> RENDER_SURFACE_STATE Surface QPitch (p325): - * - * - Software must ensure that this field is set to a value - * sufficiently large such that the array slices in the surface - * do not overlap. Refer to the Memory Data Formats section for - * information on how surfaces are stored in memory. - * - * - This field specifies the distance in rows between array - * slices. It is used only in the following cases: - * - * - Surface Array is enabled OR - * - Number of Mulitsamples is not NUMSAMPLES_1 and - * Multisampled Surface Storage Format set to MSFMT_MSS OR - * - Surface Type is SURFTYPE_CUBE - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } else if (ISL_DEV_GEN(dev) >= 7) { - /* Note that Ivybridge introduces - * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the - * driver more control over the QPitch. - */ - - if (phys_level0_sa->array_len == 1) { - /* The hardware will never use the QPitch. So choose the most - * compact QPitch possible in order to conserve memory. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - if (isl_surf_usage_is_depth_or_stencil(info->usage)) { - /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> - * Section 6.18.4.7: Surface Arrays (p112): - * - * If Surface Array Spacing is set to ARYSPC_FULL (note that - * the depth buffer and stencil buffer have an implied value of - * ARYSPC_FULL): - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - if (info->levels == 1) { - /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing - * to ARYSPC_LOD0. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - return ISL_ARRAY_PITCH_SPAN_FULL; - } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && - ISL_DEV_USE_SEPARATE_STENCIL(dev) && - isl_surf_usage_is_stencil(info->usage)) { - /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * The separate stencil buffer does not support mip mapping, thus - * the storage for LODs other than LOD 0 is not needed. - */ - assert(info->levels == 1); - assert(phys_level0_sa->array_len == 1); - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } else { - if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && - ISL_DEV_USE_SEPARATE_STENCIL(dev) && - isl_surf_usage_is_stencil(info->usage)) { - /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * The separate stencil buffer does not support mip mapping, - * thus the storage for LODs other than LOD 0 is not needed. - */ - assert(info->levels == 1); - assert(phys_level0_sa->array_len == 1); - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - if (phys_level0_sa->array_len == 1) { - /* The hardware will never use the QPitch. So choose the most - * compact QPitch possible in order to conserve memory. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - return ISL_ARRAY_PITCH_SPAN_FULL; - } - - case ISL_DIM_LAYOUT_GEN4_3D: - /* The hardware will never use the QPitch. So choose the most - * compact QPitch possible in order to conserve memory. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - unreachable("bad isl_dim_layout"); - return ISL_ARRAY_PITCH_SPAN_FULL; -} - -static void -isl_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - if (ISL_DEV_GEN(dev) >= 9) { - gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else if (ISL_DEV_GEN(dev) >= 8) { - gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else if (ISL_DEV_GEN(dev) >= 7) { - gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else if (ISL_DEV_GEN(dev) >= 6) { - gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else { - gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } -} - -static enum isl_dim_layout -isl_surf_choose_dim_layout(const struct isl_device *dev, - enum isl_surf_dim logical_dim) -{ - if (ISL_DEV_GEN(dev) >= 9) { - switch (logical_dim) { - case ISL_SURF_DIM_1D: - return ISL_DIM_LAYOUT_GEN9_1D; - case ISL_SURF_DIM_2D: - case ISL_SURF_DIM_3D: - return ISL_DIM_LAYOUT_GEN4_2D; - } - } else { - switch (logical_dim) { - case ISL_SURF_DIM_1D: - case ISL_SURF_DIM_2D: - return ISL_DIM_LAYOUT_GEN4_2D; - case ISL_SURF_DIM_3D: - return ISL_DIM_LAYOUT_GEN4_3D; - } - } - - unreachable("bad isl_surf_dim"); - return ISL_DIM_LAYOUT_GEN4_2D; -} - -/** - * Calculate the physical extent of the surface's first level, in units of - * surface samples. - */ -static void -isl_calc_phys_level0_extent_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent4d *phys_level0_sa) -{ - if (isl_format_is_yuv(info->format)) - isl_finishme("%s:%s: YUV format", __FILE__, __func__); - - switch (info->dim) { - case ISL_SURF_DIM_1D: - assert(info->height == 1); - assert(info->depth == 1); - assert(info->samples == 1); - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN4_3D: - unreachable("bad isl_dim_layout"); - - case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); - /* fallthrough */ - - case ISL_DIM_LAYOUT_GEN4_2D: - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = 1, - .d = 1, - .a = info->array_len, - }; - break; - } - break; - - case ISL_SURF_DIM_2D: - assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); - - if (tiling == ISL_TILING_Ys && info->samples > 1) - isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); - - switch (msaa_layout) { - case ISL_MSAA_LAYOUT_NONE: - assert(info->depth == 1); - assert(info->samples == 1); - - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = 1, - .a = info->array_len, - }; - break; - - case ISL_MSAA_LAYOUT_ARRAY: - assert(info->depth == 1); - assert(info->array_len == 1); - assert(!isl_format_is_compressed(info->format)); - - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = 1, - .a = info->samples, - }; - break; - - case ISL_MSAA_LAYOUT_INTERLEAVED: - assert(info->depth == 1); - assert(info->array_len == 1); - assert(!isl_format_is_compressed(info->format)); - - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = 1, - .a = 1, - }; - - isl_msaa_interleaved_scale_px_to_sa(info->samples, - &phys_level0_sa->w, - &phys_level0_sa->h); - break; - } - break; - - case ISL_SURF_DIM_3D: - assert(info->array_len == 1); - assert(info->samples == 1); - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - unreachable("bad isl_dim_layout"); - - case ISL_DIM_LAYOUT_GEN4_2D: - assert(ISL_DEV_GEN(dev) >= 9); - - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = 1, - .a = info->depth, - }; - break; - - case ISL_DIM_LAYOUT_GEN4_3D: - assert(ISL_DEV_GEN(dev) < 9); - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = info->depth, - .a = 1, - }; - break; - } - break; - } -} - -/** - * A variant of isl_calc_phys_slice0_extent_sa() specific to - * ISL_DIM_LAYOUT_GEN4_2D. - */ -static void -isl_calc_phys_slice0_extent_sa_gen4_2d( - const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - assert(phys_level0_sa->depth == 1); - - uint32_t slice_top_w = 0; - uint32_t slice_bottom_w = 0; - uint32_t slice_left_h = 0; - uint32_t slice_right_h = 0; - - uint32_t W0 = phys_level0_sa->w; - uint32_t H0 = phys_level0_sa->h; - - for (uint32_t l = 0; l < info->levels; ++l) { - uint32_t W = isl_minify(W0, l); - uint32_t H = isl_minify(H0, l); - - if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { - /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level - * Sizes (p133): - * - * If the surface is multisampled and it is a depth or stencil - * surface or Multisampled Surface StorageFormat in - * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be - * adjusted as follows before proceeding: [...] - */ - isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); - } - - uint32_t w = isl_align_npot(W, image_align_sa->w); - uint32_t h = isl_align_npot(H, image_align_sa->h); - - if (l == 0) { - slice_top_w = w; - slice_left_h = h; - slice_right_h = h; - } else if (l == 1) { - slice_bottom_w = w; - slice_left_h += h; - } else if (l == 2) { - slice_bottom_w += w; - } else { - slice_right_h += h; - } - } - - *phys_slice0_sa = (struct isl_extent2d) { - .w = MAX(slice_top_w, slice_bottom_w), - .h = MAX(slice_left_h, slice_right_h), - }; -} - -/** - * A variant of isl_calc_phys_slice0_extent_sa() specific to - * ISL_DIM_LAYOUT_GEN4_3D. - */ -static void -isl_calc_phys_slice0_extent_sa_gen4_3d( - const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - assert(info->samples == 1); - assert(phys_level0_sa->array_len == 1); - - uint32_t slice_w = 0; - uint32_t slice_h = 0; - - uint32_t W0 = phys_level0_sa->w; - uint32_t H0 = phys_level0_sa->h; - uint32_t D0 = phys_level0_sa->d; - - for (uint32_t l = 0; l < info->levels; ++l) { - uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); - uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); - uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); - - uint32_t max_layers_horiz = MIN(level_d, 1u << l); - uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); - - slice_w = MAX(slice_w, level_w * max_layers_horiz); - slice_h += level_h * max_layers_vert; - } - - *phys_slice0_sa = (struct isl_extent2d) { - .w = slice_w, - .h = slice_h, - }; -} - -/** - * Calculate the physical extent of the surface's first array slice, in units - * of surface samples. The result is aligned to \a image_align_sa. - */ -static void -isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", - __FILE__, __func__); - /*fallthrough*/ - case ISL_DIM_LAYOUT_GEN4_2D: - isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, - image_align_sa, phys_level0_sa, - phys_slice0_sa); - return; - case ISL_DIM_LAYOUT_GEN4_3D: - isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, - phys_level0_sa, phys_slice0_sa); - return; - } -} - -/** - * Calculate the pitch between physical array slices, in units of rows of - * surface samples. The result is aligned to \a image_align_sa. - */ -static uint32_t -isl_calc_array_pitch_sa_rows(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - enum isl_array_pitch_span array_pitch_span, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - const struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", - __FILE__, __func__); - /*fallthrough*/ - - case ISL_DIM_LAYOUT_GEN4_2D: - switch (array_pitch_span) { - case ISL_ARRAY_PITCH_SPAN_COMPACT: - return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); - case ISL_ARRAY_PITCH_SPAN_FULL: { - /* The QPitch equation is found in the Broadwell PRM >> Volume 5: - * Memory Views >> Common Surface Formats >> Surface Layout >> 2D - * Surfaces >> Surface Arrays. - */ - uint32_t H0_sa = phys_level0_sa->h; - uint32_t H1_sa = isl_minify(H0_sa, 1); - - uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); - uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); - - uint32_t m; - if (ISL_DEV_GEN(dev) >= 7) { - /* The QPitch equation changed slightly in Ivybridge. */ - m = 12; - } else { - m = 11; - } - - uint32_t pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); - - if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && - (info->height % 4 == 1)) { - /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than - * the value calculated in the equation above , for every - * other odd Surface Height starting from 1 i.e. 1,5,9,13. - * - * XXX(chadv): Is the errata natural corollary of the physical - * layout of interleaved samples? - */ - pitch_sa_rows += 4; - } - - pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); - - return pitch_sa_rows; - } /* end case */ - break; - } - break; - - case ISL_DIM_LAYOUT_GEN4_3D: - assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); - return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); - } - - unreachable("bad isl_dim_layout"); - return 0; -} - -/** - * Calculate the pitch of each surface row, in bytes. - */ -static uint32_t -isl_calc_row_pitch(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - const struct isl_extent3d *image_align_sa, - const struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - uint32_t row_pitch = info->min_pitch; - - /* First, align the surface to a cache line boundary, as the PRM explains - * below. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Render Target and Media - * Surfaces: - * - * The data port accesses data (pixels) outside of the surface if they - * are contained in the same cache request as pixels that are within the - * surface. These pixels will not be returned by the requesting message, - * however if these pixels lie outside of defined pages in the GTT, - * a GTT error will result when the cache request is processed. In order - * to avoid these GTT errors, “padding” at the bottom of the surface is - * sometimes necessary. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: - * - * The sampling engine accesses texels outside of the surface if they - * are contained in the same cache line as texels that are within the - * surface. These texels will not participate in any calculation - * performed by the sampling engine and will not affect the result of - * any sampling engine operation, however if these texels lie outside of - * defined pages in the GTT, a GTT error will result when the cache line - * is accessed. In order to avoid these GTT errors, “padding” at the - * bottom and right side of a sampling engine surface is sometimes - * necessary. - * - * It is possible that a cache line will straddle a page boundary if the - * base address or pitch is not aligned. All pages included in the cache - * lines that are part of the surface must map to valid GTT entries to - * avoid errors. To determine the necessary padding on the bottom and - * right side of the surface, refer to the table in Alignment Unit Size - * section for the i and j parameters for the surface format in use. The - * surface must then be extended to the next multiple of the alignment - * unit size in each dimension, and all texels contained in this - * extended surface must have valid GTT entries. - * - * For example, suppose the surface size is 15 texels by 10 texels and - * the alignment parameters are i=4 and j=2. In this case, the extended - * surface would be 16 by 10. Note that these calculations are done in - * texels, and must be converted to bytes based on the surface format - * being used to determine whether additional pages need to be defined. - */ - row_pitch = MAX(row_pitch, - fmtl->bs * isl_align_div_npot(phys_slice0_sa->w, fmtl->bw)); - - switch (tile_info->tiling) { - case ISL_TILING_LINEAR: - /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> - * RENDER_SURFACE_STATE Surface Pitch (p349): - * - * - For linear render target surfaces and surfaces accessed with the - * typed data port messages, the pitch must be a multiple of the - * element size for non-YUV surface formats. Pitch must be - * a multiple of 2 * element size for YUV surface formats. - * - * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we - * ignore because isl doesn't do buffers.] - * - * - For other linear surfaces, the pitch can be any multiple of - * bytes. - */ - if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { - if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align(row_pitch, fmtl->bs); - } else { - row_pitch = isl_align(row_pitch, 2 * fmtl->bs); - } - } - break; - default: - /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> - * RENDER_SURFACE_STATE Surface Pitch (p349): - * - * - For tiled surfaces, the pitch must be a multiple of the tile - * width. - */ - row_pitch = isl_align(row_pitch, tile_info->width); - break; - } - - return row_pitch; -} - -/** - * Calculate the surface's total height, including padding, in units of - * surface elements. - */ -static uint32_t -isl_calc_total_height_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - uint32_t phys_array_len, - uint32_t row_pitch, - uint32_t array_pitch_el_rows) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - uint32_t total_h_el = phys_array_len * array_pitch_el_rows; - uint32_t pad_bytes = 0; - - /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Render Target and Media - * Surfaces: - * - * The data port accesses data (pixels) outside of the surface if they - * are contained in the same cache request as pixels that are within the - * surface. These pixels will not be returned by the requesting message, - * however if these pixels lie outside of defined pages in the GTT, - * a GTT error will result when the cache request is processed. In - * order to avoid these GTT errors, “padding” at the bottom of the - * surface is sometimes necessary. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: - * - * ... Lots of padding requirements, all listed separately below. - */ - - /* We can safely ignore the first padding requirement, quoted below, - * because isl doesn't do buffers. - * - * - [pre-BDW] For buffers, which have no inherent “height,” padding - * requirements are different. A buffer must be padded to the next - * multiple of 256 array elements, with an additional 16 bytes added - * beyond that to account for the L1 cache line. - */ - - /* - * - For compressed textures [...], padding at the bottom of the surface - * is to an even compressed row. - */ - if (isl_format_is_compressed(info->format)) - total_h_el = isl_align(total_h_el, 2); - - /* - * - For cube surfaces, an additional two rows of padding are required - * at the bottom of the surface. - */ - if (info->usage & ISL_SURF_USAGE_CUBE_BIT) - total_h_el += 2; - - /* - * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, - * additional padding is required. These surfaces require an extra row - * plus 16 bytes of padding at the bottom in addition to the general - * padding requirements. - */ - if (isl_format_is_yuv(info->format) && - (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) { - total_h_el += 1; - pad_bytes += 16; - } - - /* - * - For linear surfaces, additional padding of 64 bytes is required at - * the bottom of the surface. This is in addition to the padding - * required above. - */ - if (tile_info->tiling == ISL_TILING_LINEAR) - pad_bytes += 64; - - /* The below text weakens, not strengthens, the padding requirements for - * linear surfaces. Therefore we can safely ignore it. - * - * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, - * non-MSAA, non-mip-mapped surfaces in linear memory, the only - * padding requirement is to the next aligned 64-byte boundary beyond - * the end of the surface. The rest of the padding requirements - * documented above do not apply to these surfaces. - */ - - /* - * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and - * height % 4 != 0, the surface must be padded with - * 4-(height % 4)*Surface Pitch # of bytes. - */ - if (ISL_DEV_GEN(dev) >= 9 && - tile_info->tiling == ISL_TILING_LINEAR && - (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { - total_h_el = isl_align(total_h_el, 4); - } - - /* - * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded - * to 4 times the Surface Pitch # of bytes - */ - if (ISL_DEV_GEN(dev) >= 9 && - tile_info->tiling == ISL_TILING_LINEAR && - info->dim == ISL_SURF_DIM_1D) { - total_h_el += 4; - } - - /* Be sloppy. Align any leftover padding to a row boundary. */ - total_h_el += isl_align_div_npot(pad_bytes, row_pitch); - - return total_h_el; -} - -bool -isl_surf_init_s(const struct isl_device *dev, - struct isl_surf *surf, - const struct isl_surf_init_info *restrict info) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - const struct isl_extent4d logical_level0_px = { - .w = info->width, - .h = info->height, - .d = info->depth, - .a = info->array_len, - }; - - enum isl_dim_layout dim_layout = - isl_surf_choose_dim_layout(dev, info->dim); - - enum isl_tiling tiling; - if (!isl_surf_choose_tiling(dev, info, &tiling)) - return false; - - struct isl_tile_info tile_info; - if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info)) - return false; - - enum isl_msaa_layout msaa_layout; - if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) - return false; - - struct isl_extent3d image_align_el; - isl_choose_image_alignment_el(dev, info, tiling, msaa_layout, - &image_align_el); - - struct isl_extent3d image_align_sa = - isl_extent3d_el_to_sa(info->format, image_align_el); - - struct isl_extent4d phys_level0_sa; - isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, - &phys_level0_sa); - - enum isl_array_pitch_span array_pitch_span = - isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); - - struct isl_extent2d phys_slice0_sa; - isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, - &image_align_sa, &phys_level0_sa, - &phys_slice0_sa); - assert(phys_slice0_sa.w % fmtl->bw == 0); - assert(phys_slice0_sa.h % fmtl->bh == 0); - - const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, - &image_align_sa, - &phys_slice0_sa); - - const uint32_t array_pitch_sa_rows = - isl_calc_array_pitch_sa_rows(dev, info, dim_layout, array_pitch_span, - &image_align_sa, &phys_level0_sa, - &phys_slice0_sa); - assert(array_pitch_sa_rows % fmtl->bh == 0); - - const uint32_t array_pitch_el_rows = array_pitch_sa_rows / fmtl->bh; - - const uint32_t total_h_el = - isl_calc_total_height_el(dev, info, &tile_info, - phys_level0_sa.array_len, row_pitch, - array_pitch_el_rows); - - const uint32_t total_h_sa = total_h_el * fmtl->bh; - const uint32_t size = row_pitch * total_h_sa; - - /* Alignment of surface base address, in bytes */ - uint32_t base_alignment = info->min_alignment; - base_alignment = isl_align(base_alignment, tile_info.size); - - *surf = (struct isl_surf) { - .dim = info->dim, - .dim_layout = dim_layout, - .msaa_layout = msaa_layout, - .tiling = tiling, - .format = info->format, - - .levels = info->levels, - .samples = info->samples, - - .image_alignment_el = image_align_el, - .logical_level0_px = logical_level0_px, - .phys_level0_sa = phys_level0_sa, - - .size = size, - .alignment = base_alignment, - .row_pitch = row_pitch, - .array_pitch_el_rows = array_pitch_el_rows, - .array_pitch_span = array_pitch_span, - - .usage = info->usage, - }; - - return true; -} diff --git a/src/vulkan/isl.h b/src/vulkan/isl.h deleted file mode 100644 index 184b0c5f70a..00000000000 --- a/src/vulkan/isl.h +++ /dev/null @@ -1,917 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file - * @brief Intel Surface Layout - * - * Header Layout - * ============= - * - * The header is ordered as: - * - forward declarations - * - macros that may be overridden at compile-time for specific gens - * - enums and constants - * - structs and unions - * - functions - * - * - * Surface Units - * ============= - * - * Some symbol names have a unit suffix. - * - * - px: logical pixels - * - sa: physical surface samples - * - el: physical surface elements - * - sa_rows: rows of physical surface samples - * - el_rows: rows of physical surface elements - * - * The Broadwell PRM [1] defines a surface element as follows: - * - * An element is defined as a pixel in uncompresed surface formats, and as - * a compression block in compressed surface formats. For - * MSFMT_DEPTH_STENCIL type multisampled surfaces, an element is a sample. - * - * [1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> - * RENDER_SURFACE_STATE Surface Vertical Alignment (p325) - */ - -#pragma once - -#include -#include -#include - -#include "util/macros.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_device_info; -struct brw_image_param; - -#ifndef ISL_DEV_GEN -/** - * @brief Get the hardware generation of isl_device. - * - * You can define this as a compile-time constant in the CFLAGS. For example, - * `gcc -DISL_DEV_GEN(dev)=9 ...`. - */ -#define ISL_DEV_GEN(__dev) ((__dev)->info->gen) -#endif - -#ifndef ISL_DEV_USE_SEPARATE_STENCIL -/** - * You can define this as a compile-time constant in the CFLAGS. For example, - * `gcc -DISL_DEV_USE_SEPARATE_STENCIL(dev)=1 ...`. - */ -#define ISL_DEV_USE_SEPARATE_STENCIL(__dev) ((__dev)->use_separate_stencil) -#endif - -/** - * Hardware enumeration SURFACE_FORMAT. - * - * For the official list, see Broadwell PRM: Volume 2b: Command Reference: - * Enumerations: SURFACE_FORMAT. - */ -enum isl_format { - ISL_FORMAT_R32G32B32A32_FLOAT = 0, - ISL_FORMAT_R32G32B32A32_SINT = 1, - ISL_FORMAT_R32G32B32A32_UINT = 2, - ISL_FORMAT_R32G32B32A32_UNORM = 3, - ISL_FORMAT_R32G32B32A32_SNORM = 4, - ISL_FORMAT_R64G64_FLOAT = 5, - ISL_FORMAT_R32G32B32X32_FLOAT = 6, - ISL_FORMAT_R32G32B32A32_SSCALED = 7, - ISL_FORMAT_R32G32B32A32_USCALED = 8, - ISL_FORMAT_R32G32B32A32_SFIXED = 32, - ISL_FORMAT_R64G64_PASSTHRU = 33, - ISL_FORMAT_R32G32B32_FLOAT = 64, - ISL_FORMAT_R32G32B32_SINT = 65, - ISL_FORMAT_R32G32B32_UINT = 66, - ISL_FORMAT_R32G32B32_UNORM = 67, - ISL_FORMAT_R32G32B32_SNORM = 68, - ISL_FORMAT_R32G32B32_SSCALED = 69, - ISL_FORMAT_R32G32B32_USCALED = 70, - ISL_FORMAT_R32G32B32_SFIXED = 80, - ISL_FORMAT_R16G16B16A16_UNORM = 128, - ISL_FORMAT_R16G16B16A16_SNORM = 129, - ISL_FORMAT_R16G16B16A16_SINT = 130, - ISL_FORMAT_R16G16B16A16_UINT = 131, - ISL_FORMAT_R16G16B16A16_FLOAT = 132, - ISL_FORMAT_R32G32_FLOAT = 133, - ISL_FORMAT_R32G32_SINT = 134, - ISL_FORMAT_R32G32_UINT = 135, - ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS = 136, - ISL_FORMAT_X32_TYPELESS_G8X24_UINT = 137, - ISL_FORMAT_L32A32_FLOAT = 138, - ISL_FORMAT_R32G32_UNORM = 139, - ISL_FORMAT_R32G32_SNORM = 140, - ISL_FORMAT_R64_FLOAT = 141, - ISL_FORMAT_R16G16B16X16_UNORM = 142, - ISL_FORMAT_R16G16B16X16_FLOAT = 143, - ISL_FORMAT_A32X32_FLOAT = 144, - ISL_FORMAT_L32X32_FLOAT = 145, - ISL_FORMAT_I32X32_FLOAT = 146, - ISL_FORMAT_R16G16B16A16_SSCALED = 147, - ISL_FORMAT_R16G16B16A16_USCALED = 148, - ISL_FORMAT_R32G32_SSCALED = 149, - ISL_FORMAT_R32G32_USCALED = 150, - ISL_FORMAT_R32G32_SFIXED = 160, - ISL_FORMAT_R64_PASSTHRU = 161, - ISL_FORMAT_B8G8R8A8_UNORM = 192, - ISL_FORMAT_B8G8R8A8_UNORM_SRGB = 193, - ISL_FORMAT_R10G10B10A2_UNORM = 194, - ISL_FORMAT_R10G10B10A2_UNORM_SRGB = 195, - ISL_FORMAT_R10G10B10A2_UINT = 196, - ISL_FORMAT_R10G10B10_SNORM_A2_UNORM = 197, - ISL_FORMAT_R8G8B8A8_UNORM = 199, - ISL_FORMAT_R8G8B8A8_UNORM_SRGB = 200, - ISL_FORMAT_R8G8B8A8_SNORM = 201, - ISL_FORMAT_R8G8B8A8_SINT = 202, - ISL_FORMAT_R8G8B8A8_UINT = 203, - ISL_FORMAT_R16G16_UNORM = 204, - ISL_FORMAT_R16G16_SNORM = 205, - ISL_FORMAT_R16G16_SINT = 206, - ISL_FORMAT_R16G16_UINT = 207, - ISL_FORMAT_R16G16_FLOAT = 208, - ISL_FORMAT_B10G10R10A2_UNORM = 209, - ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, - ISL_FORMAT_R11G11B10_FLOAT = 211, - ISL_FORMAT_R32_SINT = 214, - ISL_FORMAT_R32_UINT = 215, - ISL_FORMAT_R32_FLOAT = 216, - ISL_FORMAT_R24_UNORM_X8_TYPELESS = 217, - ISL_FORMAT_X24_TYPELESS_G8_UINT = 218, - ISL_FORMAT_L32_UNORM = 221, - ISL_FORMAT_A32_UNORM = 222, - ISL_FORMAT_L16A16_UNORM = 223, - ISL_FORMAT_I24X8_UNORM = 224, - ISL_FORMAT_L24X8_UNORM = 225, - ISL_FORMAT_A24X8_UNORM = 226, - ISL_FORMAT_I32_FLOAT = 227, - ISL_FORMAT_L32_FLOAT = 228, - ISL_FORMAT_A32_FLOAT = 229, - ISL_FORMAT_X8B8_UNORM_G8R8_SNORM = 230, - ISL_FORMAT_A8X8_UNORM_G8R8_SNORM = 231, - ISL_FORMAT_B8X8_UNORM_G8R8_SNORM = 232, - ISL_FORMAT_B8G8R8X8_UNORM = 233, - ISL_FORMAT_B8G8R8X8_UNORM_SRGB = 234, - ISL_FORMAT_R8G8B8X8_UNORM = 235, - ISL_FORMAT_R8G8B8X8_UNORM_SRGB = 236, - ISL_FORMAT_R9G9B9E5_SHAREDEXP = 237, - ISL_FORMAT_B10G10R10X2_UNORM = 238, - ISL_FORMAT_L16A16_FLOAT = 240, - ISL_FORMAT_R32_UNORM = 241, - ISL_FORMAT_R32_SNORM = 242, - ISL_FORMAT_R10G10B10X2_USCALED = 243, - ISL_FORMAT_R8G8B8A8_SSCALED = 244, - ISL_FORMAT_R8G8B8A8_USCALED = 245, - ISL_FORMAT_R16G16_SSCALED = 246, - ISL_FORMAT_R16G16_USCALED = 247, - ISL_FORMAT_R32_SSCALED = 248, - ISL_FORMAT_R32_USCALED = 249, - ISL_FORMAT_B5G6R5_UNORM = 256, - ISL_FORMAT_B5G6R5_UNORM_SRGB = 257, - ISL_FORMAT_B5G5R5A1_UNORM = 258, - ISL_FORMAT_B5G5R5A1_UNORM_SRGB = 259, - ISL_FORMAT_B4G4R4A4_UNORM = 260, - ISL_FORMAT_B4G4R4A4_UNORM_SRGB = 261, - ISL_FORMAT_R8G8_UNORM = 262, - ISL_FORMAT_R8G8_SNORM = 263, - ISL_FORMAT_R8G8_SINT = 264, - ISL_FORMAT_R8G8_UINT = 265, - ISL_FORMAT_R16_UNORM = 266, - ISL_FORMAT_R16_SNORM = 267, - ISL_FORMAT_R16_SINT = 268, - ISL_FORMAT_R16_UINT = 269, - ISL_FORMAT_R16_FLOAT = 270, - ISL_FORMAT_A8P8_UNORM_PALETTE0 = 271, - ISL_FORMAT_A8P8_UNORM_PALETTE1 = 272, - ISL_FORMAT_I16_UNORM = 273, - ISL_FORMAT_L16_UNORM = 274, - ISL_FORMAT_A16_UNORM = 275, - ISL_FORMAT_L8A8_UNORM = 276, - ISL_FORMAT_I16_FLOAT = 277, - ISL_FORMAT_L16_FLOAT = 278, - ISL_FORMAT_A16_FLOAT = 279, - ISL_FORMAT_L8A8_UNORM_SRGB = 280, - ISL_FORMAT_R5G5_SNORM_B6_UNORM = 281, - ISL_FORMAT_B5G5R5X1_UNORM = 282, - ISL_FORMAT_B5G5R5X1_UNORM_SRGB = 283, - ISL_FORMAT_R8G8_SSCALED = 284, - ISL_FORMAT_R8G8_USCALED = 285, - ISL_FORMAT_R16_SSCALED = 286, - ISL_FORMAT_R16_USCALED = 287, - ISL_FORMAT_P8A8_UNORM_PALETTE0 = 290, - ISL_FORMAT_P8A8_UNORM_PALETTE1 = 291, - ISL_FORMAT_A1B5G5R5_UNORM = 292, - ISL_FORMAT_A4B4G4R4_UNORM = 293, - ISL_FORMAT_L8A8_UINT = 294, - ISL_FORMAT_L8A8_SINT = 295, - ISL_FORMAT_R8_UNORM = 320, - ISL_FORMAT_R8_SNORM = 321, - ISL_FORMAT_R8_SINT = 322, - ISL_FORMAT_R8_UINT = 323, - ISL_FORMAT_A8_UNORM = 324, - ISL_FORMAT_I8_UNORM = 325, - ISL_FORMAT_L8_UNORM = 326, - ISL_FORMAT_P4A4_UNORM_PALETTE0 = 327, - ISL_FORMAT_A4P4_UNORM_PALETTE0 = 328, - ISL_FORMAT_R8_SSCALED = 329, - ISL_FORMAT_R8_USCALED = 330, - ISL_FORMAT_P8_UNORM_PALETTE0 = 331, - ISL_FORMAT_L8_UNORM_SRGB = 332, - ISL_FORMAT_P8_UNORM_PALETTE1 = 333, - ISL_FORMAT_P4A4_UNORM_PALETTE1 = 334, - ISL_FORMAT_A4P4_UNORM_PALETTE1 = 335, - ISL_FORMAT_Y8_UNORM = 336, - ISL_FORMAT_L8_UINT = 338, - ISL_FORMAT_L8_SINT = 339, - ISL_FORMAT_I8_UINT = 340, - ISL_FORMAT_I8_SINT = 341, - ISL_FORMAT_DXT1_RGB_SRGB = 384, - ISL_FORMAT_R1_UNORM = 385, - ISL_FORMAT_YCRCB_NORMAL = 386, - ISL_FORMAT_YCRCB_SWAPUVY = 387, - ISL_FORMAT_P2_UNORM_PALETTE0 = 388, - ISL_FORMAT_P2_UNORM_PALETTE1 = 389, - ISL_FORMAT_BC1_UNORM = 390, - ISL_FORMAT_BC2_UNORM = 391, - ISL_FORMAT_BC3_UNORM = 392, - ISL_FORMAT_BC4_UNORM = 393, - ISL_FORMAT_BC5_UNORM = 394, - ISL_FORMAT_BC1_UNORM_SRGB = 395, - ISL_FORMAT_BC2_UNORM_SRGB = 396, - ISL_FORMAT_BC3_UNORM_SRGB = 397, - ISL_FORMAT_MONO8 = 398, - ISL_FORMAT_YCRCB_SWAPUV = 399, - ISL_FORMAT_YCRCB_SWAPY = 400, - ISL_FORMAT_DXT1_RGB = 401, - ISL_FORMAT_FXT1 = 402, - ISL_FORMAT_R8G8B8_UNORM = 403, - ISL_FORMAT_R8G8B8_SNORM = 404, - ISL_FORMAT_R8G8B8_SSCALED = 405, - ISL_FORMAT_R8G8B8_USCALED = 406, - ISL_FORMAT_R64G64B64A64_FLOAT = 407, - ISL_FORMAT_R64G64B64_FLOAT = 408, - ISL_FORMAT_BC4_SNORM = 409, - ISL_FORMAT_BC5_SNORM = 410, - ISL_FORMAT_R16G16B16_FLOAT = 411, - ISL_FORMAT_R16G16B16_UNORM = 412, - ISL_FORMAT_R16G16B16_SNORM = 413, - ISL_FORMAT_R16G16B16_SSCALED = 414, - ISL_FORMAT_R16G16B16_USCALED = 415, - ISL_FORMAT_BC6H_SF16 = 417, - ISL_FORMAT_BC7_UNORM = 418, - ISL_FORMAT_BC7_UNORM_SRGB = 419, - ISL_FORMAT_BC6H_UF16 = 420, - ISL_FORMAT_PLANAR_420_8 = 421, - ISL_FORMAT_R8G8B8_UNORM_SRGB = 424, - ISL_FORMAT_ETC1_RGB8 = 425, - ISL_FORMAT_ETC2_RGB8 = 426, - ISL_FORMAT_EAC_R11 = 427, - ISL_FORMAT_EAC_RG11 = 428, - ISL_FORMAT_EAC_SIGNED_R11 = 429, - ISL_FORMAT_EAC_SIGNED_RG11 = 430, - ISL_FORMAT_ETC2_SRGB8 = 431, - ISL_FORMAT_R16G16B16_UINT = 432, - ISL_FORMAT_R16G16B16_SINT = 433, - ISL_FORMAT_R32_SFIXED = 434, - ISL_FORMAT_R10G10B10A2_SNORM = 435, - ISL_FORMAT_R10G10B10A2_USCALED = 436, - ISL_FORMAT_R10G10B10A2_SSCALED = 437, - ISL_FORMAT_R10G10B10A2_SINT = 438, - ISL_FORMAT_B10G10R10A2_SNORM = 439, - ISL_FORMAT_B10G10R10A2_USCALED = 440, - ISL_FORMAT_B10G10R10A2_SSCALED = 441, - ISL_FORMAT_B10G10R10A2_UINT = 442, - ISL_FORMAT_B10G10R10A2_SINT = 443, - ISL_FORMAT_R64G64B64A64_PASSTHRU = 444, - ISL_FORMAT_R64G64B64_PASSTHRU = 445, - ISL_FORMAT_ETC2_RGB8_PTA = 448, - ISL_FORMAT_ETC2_SRGB8_PTA = 449, - ISL_FORMAT_ETC2_EAC_RGBA8 = 450, - ISL_FORMAT_ETC2_EAC_SRGB8_A8 = 451, - ISL_FORMAT_R8G8B8_UINT = 456, - ISL_FORMAT_R8G8B8_SINT = 457, - ISL_FORMAT_RAW = 511, - - /* Hardware doesn't understand this out-of-band value */ - ISL_FORMAT_UNSUPPORTED = UINT16_MAX, -}; - -/** - * Numerical base type for channels of isl_format. - */ -enum isl_base_type { - ISL_VOID, - ISL_RAW, - ISL_UNORM, - ISL_SNORM, - ISL_UFLOAT, - ISL_SFLOAT, - ISL_UFIXED, - ISL_SFIXED, - ISL_UINT, - ISL_SINT, - ISL_USCALED, - ISL_SSCALED, -}; - -/** - * Colorspace of isl_format. - */ -enum isl_colorspace { - ISL_COLORSPACE_NONE = 0, - ISL_COLORSPACE_LINEAR, - ISL_COLORSPACE_SRGB, - ISL_COLORSPACE_YUV, -}; - -/** - * Texture compression mode of isl_format. - */ -enum isl_txc { - ISL_TXC_NONE = 0, - ISL_TXC_DXT1, - ISL_TXC_DXT3, - ISL_TXC_DXT5, - ISL_TXC_FXT1, - ISL_TXC_RGTC1, - ISL_TXC_RGTC2, - ISL_TXC_BPTC, - ISL_TXC_ETC1, - ISL_TXC_ETC2, -}; - -/** - * @brief Hardware tile mode - * - * WARNING: These values differ from the hardware enum values, which are - * unstable across hardware generations. - * - * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to - * clearly distinguish it from Yf and Ys. - */ -enum isl_tiling { - ISL_TILING_LINEAR = 0, - ISL_TILING_W, - ISL_TILING_X, - ISL_TILING_Y0, /**< Legacy Y tiling */ - ISL_TILING_Yf, - ISL_TILING_Ys, -}; - -/** - * @defgroup Tiling Flags - * @{ - */ -typedef uint32_t isl_tiling_flags_t; -#define ISL_TILING_LINEAR_BIT (1u << ISL_TILING_LINEAR) -#define ISL_TILING_W_BIT (1u << ISL_TILING_W) -#define ISL_TILING_X_BIT (1u << ISL_TILING_X) -#define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) -#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) -#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) -#define ISL_TILING_ANY_MASK (~0u) -#define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) - -/** Any Y tiling, including legacy Y tiling. */ -#define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \ - ISL_TILING_Yf_BIT | \ - ISL_TILING_Ys_BIT) - -/** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */ -#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \ - ISL_TILING_Ys_BIT) -/** @} */ - -/** - * @brief Logical dimension of surface. - * - * Note: There is no dimension for cube map surfaces. ISL interprets cube maps - * as 2D array surfaces. - */ -enum isl_surf_dim { - ISL_SURF_DIM_1D, - ISL_SURF_DIM_2D, - ISL_SURF_DIM_3D, -}; - -/** - * @brief Physical layout of the surface's dimensions. - */ -enum isl_dim_layout { - /** - * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section - * 6.17.3: 2D Surfaces. - * - * On many gens, 1D surfaces share the same layout as 2D surfaces. From - * the G35 PRM >> Volume 1: Graphics Core >> Section 6.17.2: 1D Surfaces: - * - * One-dimensional surfaces are identical to 2D surfaces with height of - * one. - */ - ISL_DIM_LAYOUT_GEN4_2D, - - /** - * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section - * 6.17.5: 3D Surfaces. - */ - ISL_DIM_LAYOUT_GEN4_3D, - - /** - * For details, see the Skylake BSpec >> Memory Views >> Common Surface - * Formats >> Surface Layout and Tiling >> » 1D Surfaces. - */ - ISL_DIM_LAYOUT_GEN9_1D, -}; - -/* TODO(chadv): Explain */ -enum isl_array_pitch_span { - ISL_ARRAY_PITCH_SPAN_FULL, - ISL_ARRAY_PITCH_SPAN_COMPACT, -}; - -/** - * @defgroup Surface Usage - * @{ - */ -typedef uint64_t isl_surf_usage_flags_t; -#define ISL_SURF_USAGE_RENDER_TARGET_BIT (1u << 0) -#define ISL_SURF_USAGE_DEPTH_BIT (1u << 1) -#define ISL_SURF_USAGE_STENCIL_BIT (1u << 2) -#define ISL_SURF_USAGE_TEXTURE_BIT (1u << 3) -#define ISL_SURF_USAGE_CUBE_BIT (1u << 4) -#define ISL_SURF_USAGE_DISABLE_AUX_BIT (1u << 5) -#define ISL_SURF_USAGE_DISPLAY_BIT (1u << 6) -#define ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT (1u << 7) -#define ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT (1u << 8) -#define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) -#define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) -#define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) -/** @} */ - -/** - * @brief Multisample Format - */ -enum isl_msaa_layout { - /** - * @brief Suface is single-sampled. - */ - ISL_MSAA_LAYOUT_NONE, - - /** - * @brief [SNB+] Interleaved Multisample Format - * - * In this format, multiple samples are interleaved into each cacheline. - * In other words, the sample index is swizzled into the low 6 bits of the - * surface's virtual address space. - * - * For example, suppose the surface is legacy Y tiled, is 4x multisampled, - * and its pixel format is 32bpp. Then the first cacheline is arranged - * thus: - * - * (0,0,0) (0,1,0) (0,0,1) (1,0,1) - * (1,0,0) (1,1,0) (0,1,1) (1,1,1) - * - * (0,0,2) (1,0,2) (0,0,3) (1,0,3) - * (0,1,2) (1,1,2) (0,1,3) (1,1,3) - * - * The hardware docs refer to this format with multiple terms. In - * Sandybridge, this is the only multisample format; so no term is used. - * The Ivybridge docs refer to surfaces in this format as IMS (Interleaved - * Multisample Surface). Later hardware docs additionally refer to this - * format as MSFMT_DEPTH_STENCIL (because the format is deprecated for - * color surfaces). - * - * See the Sandybridge PRM, Volume 4, Part 1, Section 2.7 "Multisampled - * Surface Behavior". - * - * See the Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.1 "Interleaved - * Multisampled Surfaces". - */ - ISL_MSAA_LAYOUT_INTERLEAVED, - - /** - * @brief [IVB+] Array Multisample Format - * - * In this format, the surface's physical layout resembles that of a - * 2D array surface. - * - * Suppose the multisample surface's logical extent is (w, h) and its - * sample count is N. Then surface's physical extent is the same as - * a singlesample 2D surface whose logical extent is (w, h) and array - * length is N. Array slice `i` contains the pixel values for sample - * index `i`. - * - * The Ivybridge docs refer to surfaces in this format as UMS - * (Uncompressed Multsample Layout) and CMS (Compressed Multisample - * Surface). The Broadwell docs additionally refer to this format as - * MSFMT_MSS (MSS=Multisample Surface Storage). - * - * See the Broadwell PRM, Volume 5 "Memory Views", Section "Uncompressed - * Multisample Surfaces". - * - * See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed - * Multisample Surfaces". - */ - ISL_MSAA_LAYOUT_ARRAY, -}; - - -struct isl_device { - const struct brw_device_info *info; - bool use_separate_stencil; -}; - -struct isl_extent2d { - union { uint32_t w, width; }; - union { uint32_t h, height; }; -}; - -struct isl_extent3d { - union { uint32_t w, width; }; - union { uint32_t h, height; }; - union { uint32_t d, depth; }; -}; - -struct isl_extent4d { - union { uint32_t w, width; }; - union { uint32_t h, height; }; - union { uint32_t d, depth; }; - union { uint32_t a, array_len; }; -}; - -struct isl_channel_layout { - enum isl_base_type type; - uint8_t bits; /**< Size in bits */ -}; - -struct isl_format_layout { - enum isl_format format; - - uint16_t bpb; /**< Bits per block */ - uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ - uint8_t bw; /**< Block width, in pixels */ - uint8_t bh; /**< Block height, in pixels */ - uint8_t bd; /**< Block depth, in pixels */ - - struct { - struct isl_channel_layout r; /**< Red channel */ - struct isl_channel_layout g; /**< Green channel */ - struct isl_channel_layout b; /**< Blue channel */ - struct isl_channel_layout a; /**< Alpha channel */ - struct isl_channel_layout l; /**< Luminance channel */ - struct isl_channel_layout i; /**< Intensity channel */ - struct isl_channel_layout p; /**< Palette channel */ - } channels; - - enum isl_colorspace colorspace; - enum isl_txc txc; -}; - -struct isl_tile_info { - enum isl_tiling tiling; - uint32_t width; /**< in bytes */ - uint32_t height; /**< in rows of memory */ - uint32_t size; /**< in bytes */ -}; - -/** - * @brief Input to surface initialization - * - * @invariant width >= 1 - * @invariant height >= 1 - * @invariant depth >= 1 - * @invariant levels >= 1 - * @invariant samples >= 1 - * @invariant array_len >= 1 - * - * @invariant if 1D then height == 1 and depth == 1 and samples == 1 - * @invariant if 2D then depth == 1 - * @invariant if 3D then array_len == 1 and samples == 1 - */ -struct isl_surf_init_info { - enum isl_surf_dim dim; - enum isl_format format; - - uint32_t width; - uint32_t height; - uint32_t depth; - uint32_t levels; - uint32_t array_len; - uint32_t samples; - - /** Lower bound for isl_surf::alignment, in bytes. */ - uint32_t min_alignment; - - /** Lower bound for isl_surf::pitch, in bytes. */ - uint32_t min_pitch; - - isl_surf_usage_flags_t usage; - - /** Flags that alter how ISL selects isl_surf::tiling. */ - isl_tiling_flags_t tiling_flags; -}; - -struct isl_surf { - enum isl_surf_dim dim; - enum isl_dim_layout dim_layout; - enum isl_msaa_layout msaa_layout; - enum isl_tiling tiling; - enum isl_format format; - - /** - * Alignment of the upper-left sample of each subimage, in units of surface - * elements. - */ - struct isl_extent3d image_alignment_el; - - /** - * Logical extent of the surface's base level, in units of pixels. This is - * identical to the extent defined in isl_surf_init_info. - */ - struct isl_extent4d logical_level0_px; - - /** - * Physical extent of the surface's base level, in units of pixels. - * - * Consider isl_dim_layout as an operator that transforms a logical surface - * layout to a physical surface layout. Then - * - * logical_layout := (isl_surf::dim, isl_surf::logical_level0_px) - * isl_surf::phys_level0_sa := isl_surf::dim_layout * logical_layout - */ - struct isl_extent4d phys_level0_sa; - - uint32_t levels; - uint32_t samples; - - /** Total size of the surface, in bytes. */ - uint32_t size; - - /** Required alignment for the surface's base address. */ - uint32_t alignment; - - /** - * Pitch between vertically adjacent samples, in bytes. - */ - uint32_t row_pitch; - - /** - * Pitch between physical array slices, in rows of surface elements. - */ - uint32_t array_pitch_el_rows; - - enum isl_array_pitch_span array_pitch_span; - - /** Copy of isl_surf_init_info::usage. */ - isl_surf_usage_flags_t usage; -}; - -extern const struct isl_format_layout isl_format_layouts[]; - -void -isl_device_init(struct isl_device *dev, - const struct brw_device_info *info); - -static inline const struct isl_format_layout * ATTRIBUTE_CONST -isl_format_get_layout(enum isl_format fmt) -{ - return &isl_format_layouts[fmt]; -} - -bool -isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; - -static inline bool -isl_format_is_compressed(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->txc != ISL_TXC_NONE; -} - -static inline bool -isl_format_has_bc_compression(enum isl_format fmt) -{ - switch (isl_format_get_layout(fmt)->txc) { - case ISL_TXC_DXT1: - case ISL_TXC_DXT3: - case ISL_TXC_DXT5: - return true; - case ISL_TXC_NONE: - case ISL_TXC_FXT1: - case ISL_TXC_RGTC1: - case ISL_TXC_RGTC2: - case ISL_TXC_BPTC: - case ISL_TXC_ETC1: - case ISL_TXC_ETC2: - return false; - } - - unreachable("bad texture compression mode"); - return false; -} - -static inline bool -isl_format_is_yuv(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->colorspace == ISL_COLORSPACE_YUV; -} - -static inline bool -isl_format_block_is_1x1x1(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; -} - -enum isl_format -isl_lower_storage_image_format(const struct isl_device *dev, - enum isl_format fmt); - -static inline bool -isl_tiling_is_std_y(enum isl_tiling tiling) -{ - return (1u << tiling) & ISL_TILING_STD_Y_MASK; -} - -bool -isl_tiling_get_info(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_tile_info *info); - -void -isl_tiling_get_extent(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_extent2d *e); -bool -isl_surf_choose_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling *tiling); - -static inline bool -isl_surf_usage_is_display(isl_surf_usage_flags_t usage) -{ - return usage & ISL_SURF_USAGE_DISPLAY_BIT; -} - -static inline bool -isl_surf_usage_is_depth(isl_surf_usage_flags_t usage) -{ - return usage & ISL_SURF_USAGE_DEPTH_BIT; -} - -static inline bool -isl_surf_usage_is_stencil(isl_surf_usage_flags_t usage) -{ - return usage & ISL_SURF_USAGE_STENCIL_BIT; -} - -static inline bool -isl_surf_usage_is_depth_and_stencil(isl_surf_usage_flags_t usage) -{ - return (usage & ISL_SURF_USAGE_DEPTH_BIT) && - (usage & ISL_SURF_USAGE_STENCIL_BIT); -} - -static inline bool -isl_surf_usage_is_depth_or_stencil(isl_surf_usage_flags_t usage) -{ - return usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT); -} - -static inline bool -isl_surf_info_is_z16(const struct isl_surf_init_info *info) -{ - return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && - (info->format == ISL_FORMAT_R16_UNORM); -} - -static inline bool -isl_surf_info_is_z32_float(const struct isl_surf_init_info *info) -{ - return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && - (info->format == ISL_FORMAT_R32_FLOAT); -} - -static inline struct isl_extent2d -isl_extent2d(uint32_t width, uint32_t height) -{ - return (struct isl_extent2d) { .w = width, .h = height }; -} - -static inline struct isl_extent3d -isl_extent3d(uint32_t width, uint32_t height, uint32_t depth) -{ - return (struct isl_extent3d) { .w = width, .h = height, .d = depth }; -} - -static inline struct isl_extent4d -isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, - uint32_t array_len) -{ - return (struct isl_extent4d) { - .w = width, - .h = height, - .d = depth, - .a = array_len, - }; -} - -#define isl_surf_init(dev, surf, ...) \ - isl_surf_init_s((dev), (surf), \ - &(struct isl_surf_init_info) { __VA_ARGS__ }); - -bool -isl_surf_init_s(const struct isl_device *dev, - struct isl_surf *surf, - const struct isl_surf_init_info *restrict info); - -/** - * Alignment of the upper-left sample of each subimage, in units of surface - * elements. - */ -static inline struct isl_extent3d -isl_surf_get_image_alignment_el(const struct isl_surf *surf) -{ - return surf->image_alignment_el; -} - -/** - * Alignment of the upper-left sample of each subimage, in units of surface - * samples. - */ -static inline struct isl_extent3d -isl_surf_get_image_alignment_sa(const struct isl_surf *surf) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - - return (struct isl_extent3d) { - .w = fmtl->bw * surf->image_alignment_el.w, - .h = fmtl->bh * surf->image_alignment_el.h, - .d = fmtl->bd * surf->image_alignment_el.d, - }; -} - -/** - * Pitch between physical array slices, in rows of surface elements. - */ -static inline uint32_t -isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) -{ - return surf->array_pitch_el_rows; -} - -/** - * Pitch between physical array slices, in rows of surface samples. - */ -static inline uint32_t -isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - return fmtl->bh * isl_surf_get_array_pitch_el_rows(surf); -} - -/** - * Pitch between physical array slices, in bytes. - */ -static inline uint32_t -isl_surf_get_array_pitch(const struct isl_surf *surf) -{ - return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; -} - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/isl_format_layout.csv b/src/vulkan/isl_format_layout.csv deleted file mode 100644 index 2a302b002ef..00000000000 --- a/src/vulkan/isl_format_layout.csv +++ /dev/null @@ -1,287 +0,0 @@ -# Copyright 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -# -# @file -# @brief Layout of all hardware surface formats -# -# For the official list, see Broadwell PRM: Volume 2b: Command Reference: -# Enumerations: SURFACE_FORMAT. -# - - -# Columns: -# name: format name in PRM -# bpb: bits per block -# bw: block width, in pixels -# bh: block height, in pixels -# bd: block depth, in pixels -# r: red channel, data type and bitwidth -# g: green channel -# b: blue channel -# a: alpha channel -# l: luminance channel -# i: intensity channel -# p: palette channel -# space: colorspace -# txc: texture compression -# -# Data Types: -# x: void -# r: raw -# un: unorm -# sn: snorm -# uf: ufloat -# sf: sfloat -# ux: ufixed -# sx: sfixed -# ui: uint -# si: sint -# us: uscaled -# ss: sscaled - - -# Table is aligned with the Vim commands below, using the Align plugin: -# :AlignCtrl lr+ p8000000000000P1 -# /^# name/,$ Align, - -# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc -R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear, -R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear, -R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear, -R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear, -R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear, -R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear, -R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear, -R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear, -R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear, -R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear, -R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , , -R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear, -R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear, -R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear, -R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear, -R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear, -R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear, -R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear, -R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear, -R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear, -R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear, -R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear, -R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear, -R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear, -R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear, -R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear, -R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear, -R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear, -X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear, -L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear, -R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear, -R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear, -R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear, -R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear, -R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear, -A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha, -L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear, -I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear, -R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear, -R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear, -R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear, -R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear, -R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear, -R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , , -B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, -B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, -R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, -R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, -R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, -R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear, -R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, -R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, -R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear, -R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear, -R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear, -R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear, -R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear, -R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear, -R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear, -R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, -B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, -B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, -R11G11B10_FLOAT , 32, 1, 1, 1, sf11, sf11, sf10, , , , , linear, -R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, -R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, -R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, -R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear, -X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear, -L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear, -A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha, -L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear, -I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear, -L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear, -A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha, -I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear, -L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear, -A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha, -X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, -A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear, -B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, -B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, -B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, -R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, -R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, -R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear, -B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear, -L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear, -R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear, -R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear, -R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear, -R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear, -R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear, -R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear, -R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear, -R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear, -R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear, -B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear, -B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb, -B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, -B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb, -B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, -B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb, -R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear, -R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear, -R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear, -R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear, -R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear, -R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear, -R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear, -R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear, -R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear, -A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, -A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, -I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear, -L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear, -A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha, -L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear, -I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear, -L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear, -A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha, -L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb, -R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear, -B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear, -B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb, -R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear, -R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear, -R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear, -R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear, -P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, -P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, -A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, -A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, -L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear, -L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear, -R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear, -R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear, -R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear, -R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear, -A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha, -I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear, -L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear, -P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, -A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, -R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear, -R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear, -P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear, -L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear, -P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear, -P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, -A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, -Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv, -L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear, -L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear, -I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear, -I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear, -DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1 -R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear, -YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv, -YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv, -P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear, -P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear, -BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1 -BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3 -BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5 -BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1 -BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2 -BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1 -BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3 -BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5 -MONO8 , 1, 1, 1, 1, , , , , , , , , -YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv, -YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv, -DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1 -FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1 -R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear, -R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear, -R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear, -R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear, -R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear, -R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear, -BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1 -BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2 -R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear, -R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear, -R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear, -R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear, -R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear, -BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc -BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc -BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc -BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc -PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv, -R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb, -ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1 -ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2 -EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2 -EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2 -EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2 -EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2 -ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2 -R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear, -R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear, -R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear, -R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, -R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, -R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, -R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, -B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, -B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, -B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, -B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, -B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, -R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , , -R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , , -ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2 -ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2 -ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2 -ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2 -R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear, -R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear, -RAW , 0, 0, 0, 0, , , , , , , , , diff --git a/src/vulkan/isl_format_layout_gen.bash b/src/vulkan/isl_format_layout_gen.bash deleted file mode 100755 index 2511f299a7e..00000000000 --- a/src/vulkan/isl_format_layout_gen.bash +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -set -eu -set -o pipefail - -cat <<'EOF' -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl.h" - -const struct isl_format_layout -isl_format_layouts[] = { -EOF - -sed -r ' -# Delete comment lines and empty lines -/^[[:space:]]*#/d -/^[[:space:]]*$/d - -# Delete spaces -s/[[:space:]]//g - -# Translate formats -s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/ - -# Translate data type of channels -s/\/ISL_COLORSPACE_\1/ -s/\// - -# Translate texture compression -s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/ -' | -tr 'a-z' 'A-Z' | # Convert to uppersace -while IFS=, read -r format bpb bw bh bd \ - red green blue alpha \ - luminance intensity palette \ - colorspace txc -do - : ${colorspace:=ISL_COLORSPACE_NONE} - : ${txc:=ISL_TXC_NONE} - - cat <samples >= 1); - - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return true; -} - -void -gen4_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - assert(info->samples == 1); - assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); - assert(!isl_tiling_is_std_y(tiling)); - - /* Note that neither the surface's horizontal nor vertical image alignment - * is programmable on gen4 nor gen5. - * - * From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4 - * Alignment Unit Size: - * - * Note that the compressed formats are padded to a full compression - * cell. - * - * +------------------------+--------+--------+ - * | format | halign | valign | - * +------------------------+--------+--------+ - * | YUV 4:2:2 formats | 4 | 2 | - * | uncompressed formats | 4 | 2 | - * +------------------------+--------+--------+ - */ - - if (isl_format_is_compressed(info->format)) { - *image_align_el = isl_extent3d(1, 1, 1); - return; - } - - *image_align_el = isl_extent3d(4, 2, 1); -} diff --git a/src/vulkan/isl_gen4.h b/src/vulkan/isl_gen4.h deleted file mode 100644 index 06cd70b9206..00000000000 --- a/src/vulkan/isl_gen4.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -gen4_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen4_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/isl_gen6.c b/src/vulkan/isl_gen6.c deleted file mode 100644 index 24c393925ed..00000000000 --- a/src/vulkan/isl_gen6.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen6.h" -#include "isl_priv.h" - -bool -gen6_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - assert(ISL_DEV_GEN(dev) == 6); - assert(info->samples >= 1); - - if (info->samples == 1) { - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return false; - } - - /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface - * Format: - * - * If Number of Multisamples is set to a value other than - * MULTISAMPLECOUNT_1, this field cannot be set to the following - * formats: - * - * - any format with greater than 64 bits per element - * - any compressed texture format (BC*) - * - any YCRCB* format - */ - if (fmtl->bs > 8) - return false; - if (isl_format_is_compressed(info->format)) - return false; - if (isl_format_is_yuv(info->format)) - return false; - - /* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of - * Multisamples: - * - * If this field is any value other than MULTISAMPLECOUNT_1 the - * following restrictions apply: - * - * - the Surface Type must be SURFTYPE_2D - * - [...] - */ - if (info->dim != ISL_SURF_DIM_2D) - return false; - - /* More obvious restrictions */ - if (isl_surf_usage_is_display(info->usage)) - return false; - if (tiling == ISL_TILING_LINEAR) - return false; - if (info->levels > 1) - return false; - - *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; - return true; -} - -void -gen6_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - /* Note that the surface's horizontal image alignment is not programmable - * on Sandybridge. - * - * From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4 - * Alignment Unit Size: - * - * Note that the compressed formats are padded to a full compression cell. - * - * +------------------------+--------+--------+ - * | format | halign | valign | - * +------------------------+--------+--------+ - * | YUV 4:2:2 formats | 4 | * | - * | uncompressed formats | 4 | * | - * +------------------------+--------+--------+ - * - * * For these formats, the vertical alignment factor “j” is determined - * as follows: - * - j = 4 for any depth buffer - * - j = 2 for separate stencil buffer - * - j = 4 for any render target surface is multisampled (4x) - * - j = 2 for all other render target surface - * - * From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2 - * SURFACE_STATE, Surface Vertical Alignment: - * - * - This field must be set to VALIGN_2 if the Surface Format is 96 bits - * per element (BPE). - * - * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL - * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY - * (0x190) - */ - - if (isl_format_is_compressed(info->format)) { - *image_align_el = isl_extent3d(1, 1, 1); - return; - } - - if (isl_format_is_yuv(info->format)) { - *image_align_el = isl_extent3d(4, 2, 1); - return; - } - - if (info->samples > 1) { - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - if (isl_surf_usage_is_depth_or_stencil(info->usage) && - !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { - /* interleaved depthstencil buffer */ - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - if (isl_surf_usage_is_depth(info->usage)) { - /* separate depth buffer */ - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - if (isl_surf_usage_is_stencil(info->usage)) { - /* separate stencil buffer */ - *image_align_el = isl_extent3d(4, 2, 1); - return; - } - - *image_align_el = isl_extent3d(4, 2, 1); -} diff --git a/src/vulkan/isl_gen6.h b/src/vulkan/isl_gen6.h deleted file mode 100644 index 0779c674940..00000000000 --- a/src/vulkan/isl_gen6.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -gen6_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen6_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/isl_gen7.c b/src/vulkan/isl_gen7.c deleted file mode 100644 index 9984f61b2a4..00000000000 --- a/src/vulkan/isl_gen7.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen7.h" -#include "isl_priv.h" - -bool -gen7_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - bool require_array = false; - bool require_interleaved = false; - - assert(ISL_DEV_GEN(dev) == 7); - assert(info->samples >= 1); - - if (info->samples == 1) { - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return true; - } - - /* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface - * Format: - * - * If Number of Multisamples is set to a value other than - * MULTISAMPLECOUNT_1, this field cannot be set to the following - * formats: any format with greater than 64 bits per element, any - * compressed texture format (BC*), and any YCRCB* format. - */ - if (fmtl->bs > 8) - return false; - if (isl_format_is_compressed(info->format)) - return false; - if (isl_format_is_yuv(info->format)) - return false; - - /* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of - * Multisamples: - * - * - If this field is any value other than MULTISAMPLECOUNT_1, the - * Surface Type must be SURFTYPE_2D. - * - * - If this field is any value other than MULTISAMPLECOUNT_1, Surface - * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero - */ - if (info->dim != ISL_SURF_DIM_2D) - return false; - if (info->levels > 1) - return false; - - /* The Ivyrbridge PRM insists twice that signed integer formats cannot be - * multisampled. - * - * From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of - * Multisamples: - * - * - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when - * all RT channels are not written. - * - * And errata from the Ivybridge PRM, Volume 4 Part 1 p77, - * RENDER_SURFACE_STATE, MCS Enable: - * - * This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs - * when all RT channels are not written. - * - * Note that the above SINT restrictions apply only to *MSRTs* (that is, - * *multisampled* render targets). The restrictions seem to permit an MCS - * if the render target is singlesampled. - */ - if (isl_format_has_sint_channel(info->format)) - return false; - - /* More obvious restrictions */ - if (isl_surf_usage_is_display(info->usage)) - return false; - if (tiling == ISL_TILING_LINEAR) - return false; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * +---------------------+----------------------------------------------------------------+ - * | MSFMT_MSS | Multsampled surface was/is rendered as a render target | - * | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer | - * +---------------------+----------------------------------------------------------------+ - * - * In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and - * MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED. - */ - if (isl_surf_usage_is_depth_or_stencil(info->usage)) - require_interleaved = true; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width - * is >= 8192 (meaning the actual surface width is >= 8193 pixels), this - * field must be set to MSFMT_MSS. - */ - if (info->samples == 8 && info->width == 8192) - require_array = true; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, - * ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number - * of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is - * > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL. - */ - if ((info->samples == 8 && info->height > 4194304u) || - (info->samples == 4 && info->height > 8388608u)) - require_interleaved = true; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is - * one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or - * R24_UNORM_X8_TYPELESS. - */ - if (info->format == ISL_FORMAT_I24X8_UNORM || - info->format == ISL_FORMAT_L24X8_UNORM || - info->format == ISL_FORMAT_A24X8_UNORM || - info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) - require_interleaved = true; - - if (require_array && require_interleaved) - return false; - - if (require_interleaved) { - *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; - return true; - } - - /* Default to the array layout because it permits multisample - * compression. - */ - *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; - return true; -} - -static bool -gen7_format_needs_valign2(const struct isl_device *dev, - enum isl_format format) -{ - /* This workaround applies only to gen7 */ - if (ISL_DEV_GEN(dev) > 7) - return false; - - /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, - * RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL - * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY - * (0x190) - * - * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. - */ - return isl_format_is_yuv(format) || - format == ISL_FORMAT_R32G32B32_FLOAT; -} - -void -gen7_filter_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - isl_tiling_flags_t *flags) -{ - /* IVB+ requires separate stencil */ - assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); - - /* Clear flags unsupported on this hardware */ - if (ISL_DEV_GEN(dev) < 9) { - *flags &= ~ISL_TILING_Yf_BIT; - *flags &= ~ISL_TILING_Ys_BIT; - } - - /* And... clear the Yf and Ys bits anyway because Anvil doesn't support - * them yet. - */ - *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */ - *flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */ - - if (isl_surf_usage_is_depth(info->usage)) { - /* Depth requires Y. */ - *flags &= ISL_TILING_ANY_Y_MASK; - } - - /* Separate stencil requires W tiling, and W tiling requires separate - * stencil. - */ - if (isl_surf_usage_is_stencil(info->usage)) { - *flags &= ISL_TILING_W_BIT; - } else { - *flags &= ~ISL_TILING_W_BIT; - } - - if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT | - ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT | - ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) { - assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); - isl_finishme("%s:%s: handle rotated display surfaces", - __FILE__, __func__); - } - - if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT | - ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) { - assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); - isl_finishme("%s:%s: handle flipped display surfaces", - __FILE__, __func__); - } - - if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { - /* Before Skylake, the display engine does not accept Y */ - /* FINISHME[SKL]: Y tiling for display surfaces */ - *flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT); - } - - if (info->samples > 1) { - /* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled - * Surface: - * - * For multisample render targets, this field must be 1 (true). MSRTs - * can only be tiled. - * - * Multisample surfaces never require X tiling, and Y tiling generally - * performs better than X. So choose Y. (Unless it's stencil, then it - * must be W). - */ - *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); - } - - /* For 1D surfaces, use linear when possible. 1D surfaces (array and - * non-array) do not benefit from tiling. In fact, it leads to less - * efficient use of memory due to tile alignment. - */ - if (info->dim == ISL_SURF_DIM_1D && (*flags & ISL_TILING_LINEAR_BIT)) { - *flags = ISL_TILING_LINEAR_BIT; - } - - /* workaround */ - if (ISL_DEV_GEN(dev) == 7 && - gen7_format_needs_valign2(dev, info->format) && - (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && - info->samples == 1) { - /* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1, - * SURFACE_STATE Surface Vertical Alignment: - * - * This field must be set to VALIGN_4 for all tiled Y Render Target - * surfaces. - */ - *flags &= ~ISL_TILING_Y0_BIT; - } -} - -/** - * Choose horizontal subimage alignment, in units of surface elements. - */ -static uint32_t -gen7_choose_halign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - if (isl_format_is_compressed(info->format)) - return 1; - - /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, - * RENDER_SURFACE_STATE Surface Hoizontal Alignment: - * - * - This field is intended to be set to HALIGN_8 only if the surface - * was rendered as a depth buffer with Z16 format or a stencil buffer, - * since these surfaces support only alignment of 8. Use of HALIGN_8 - * for other surfaces is supported, but uses more memory. - */ - if (isl_surf_info_is_z16(info) || - isl_surf_usage_is_stencil(info->usage)) - return 8; - - return 4; -} - -/** - * Choose vertical subimage alignment, in units of surface elements. - */ -static uint32_t -gen7_choose_valign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling) -{ - bool require_valign2 = false; - bool require_valign4 = false; - - if (isl_format_is_compressed(info->format)) - return 1; - - if (gen7_format_needs_valign2(dev, info->format)) - require_valign2 = true; - - /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: - * RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - This field is intended to be set to VALIGN_4 if the surface was - * rendered as a depth buffer, for a multisampled (4x) render target, - * or for a multisampled (8x) render target, since these surfaces - * support only alignment of 4. Use of VALIGN_4 for other surfaces is - * supported, but uses more memory. This field must be set to - * VALIGN_4 for all tiled Y Render Target surfaces. - * - */ - if (isl_surf_usage_is_depth(info->usage) || - info->samples > 1 || - tiling == ISL_TILING_Y0) { - require_valign4 = true; - } - - if (isl_surf_usage_is_stencil(info->usage)) { - /* The Ivybridge PRM states that the stencil buffer's vertical alignment - * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment - * Unit Size]. However, valign=8 is outside the set of valid values of - * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 - * (0x0) and VALIGN_4 (0x1). - * - * The PRM is generally confused about the width, height, and alignment - * of the stencil buffer; and this confusion appears elsewhere. For - * example, the following PRM text effectively converts the stencil - * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, - * Volume 1, Part 1, Section - * 6.18.4.2 Base Address and LOD Calculation]: - * - * For separate stencil buffer, the width must be mutiplied by 2 and - * height divided by 2 as follows: - * - * w_L = 2*i*ceil(W_L/i) - * h_L = 1/2*j*ceil(H_L/j) - * - * The root of the confusion is that, in W tiling, each pair of rows is - * interleaved into one. - * - * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API - * is more polished. - */ - require_valign4 = true; - } - - assert(!require_valign2 || !require_valign4); - - if (require_valign4) - return 4; - - /* Prefer VALIGN_2 because it conserves memory. */ - return 2; -} - -void -gen7_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - /* IVB+ does not support combined depthstencil. */ - assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); - - *image_align_el = (struct isl_extent3d) { - .w = gen7_choose_halign_el(dev, info), - .h = gen7_choose_valign_el(dev, info, tiling), - .d = 1, - }; -} diff --git a/src/vulkan/isl_gen7.h b/src/vulkan/isl_gen7.h deleted file mode 100644 index 2a95b68a9bd..00000000000 --- a/src/vulkan/isl_gen7.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void -gen7_filter_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - isl_tiling_flags_t *flags); - -bool -gen7_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen7_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/isl_gen8.c b/src/vulkan/isl_gen8.c deleted file mode 100644 index 2f434aabb2e..00000000000 --- a/src/vulkan/isl_gen8.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen8.h" -#include "isl_priv.h" - -bool -gen8_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - bool require_array = false; - bool require_interleaved = false; - - assert(info->samples >= 1); - - if (info->samples == 1) { - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return true; - } - - /* From the Broadwell PRM >> Volume2d: Command Structures >> - * RENDER_SURFACE_STATE Tile Mode: - * - * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field - * must be YMAJOR. - * - * As usual, though, stencil is special. - */ - if (!isl_tiling_is_std_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) - return false; - - /* From the Broadwell PRM >> Volume2d: Command Structures >> - * RENDER_SURFACE_STATE Multisampled Surface Storage Format: - * - * All multisampled render target surfaces must have this field set to - * MSFMT_MSS - */ - if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) - require_array = true; - - /* From the Broadwell PRM >> Volume2d: Command Structures >> - * RENDER_SURFACE_STATE Number of Multisamples: - * - * - If this field is any value other than MULTISAMPLECOUNT_1, the - * Surface Type must be SURFTYPE_2D This field must be set to - * MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface - * or Render Target surface. - * - * - If this field is any value other than MULTISAMPLECOUNT_1, Surface - * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero. - */ - if (info->dim != ISL_SURF_DIM_2D) - return false; - if (info->levels > 1) - return false; - - /* More obvious restrictions */ - if (isl_surf_usage_is_display(info->usage)) - return false; - if (isl_format_is_compressed(info->format)) - return false; - if (isl_format_is_yuv(info->format)) - return false; - - if (isl_surf_usage_is_depth_or_stencil(info->usage)) - require_interleaved = true; - - if (require_array && require_interleaved) - return false; - - if (require_interleaved) { - *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; - return true; - } - - *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; - return true; -} - -/** - * Choose horizontal subimage alignment, in units of surface elements. - */ -static uint32_t -gen8_choose_halign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - if (isl_format_is_compressed(info->format)) - return 1; - - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * - This field is intended to be set to HALIGN_8 only if the surface - * was rendered as a depth buffer with Z16 format or a stencil buffer. - * In this case it must be set to HALIGN_8 since these surfaces - * support only alignment of 8. [...] - */ - if (isl_surf_info_is_z16(info)) - return 8; - if (isl_surf_usage_is_stencil(info->usage)) - return 8; - - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * [...] For Z32 formats it must be set to HALIGN_4. - */ - if (isl_surf_usage_is_depth(info->usage)) - return 4; - - if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, - * HALIGN 16 must be used. - * - * This case handles color surfaces that may own an auxiliary MCS, CCS_D, - * or CCS_E. Depth buffers, including those that own an auxiliary HiZ - * surface, are handled above and do not require HALIGN_16. - */ - assert(!isl_surf_usage_is_depth(info->usage)); - return 16; - } - - /* XXX(chadv): I believe the hardware requires each image to be - * cache-aligned. If that's true, then defaulting to halign=4 is wrong for - * many formats. Depending on the format's block size, we may need to - * increase halign to 8. - */ - return 4; -} - -/** - * Choose vertical subimage alignment, in units of surface elements. - */ -static uint32_t -gen8_choose_valign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - /* From the Broadwell PRM > Volume 2d: Command Reference: Structures - * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): - * - * - For Sampling Engine and Render Target Surfaces: This field - * specifies the vertical alignment requirement in elements for the - * surface. [...] An element is defined as a pixel in uncompresed - * surface formats, and as a compression block in compressed surface - * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an - * element is a sample. - * - * - This field is intended to be set to VALIGN_4 if the surface was - * rendered as a depth buffer, for a multisampled (4x) render target, - * or for a multisampled (8x) render target, since these surfaces - * support only alignment of 4. Use of VALIGN_4 for other surfaces is - * supported, but increases memory usage. - * - * - This field is intended to be set to VALIGN_8 only if the surface - * was rendered as a stencil buffer, since stencil buffer surfaces - * support only alignment of 8. If set to VALIGN_8, Surface Format - * must be R8_UINT. - */ - - if (isl_format_is_compressed(info->format)) - return 1; - - if (isl_surf_usage_is_stencil(info->usage)) - return 8; - - return 4; -} - -void -gen8_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - assert(!isl_tiling_is_std_y(tiling)); - - /* The below text from the Broadwell PRM provides some insight into the - * hardware's requirements for LOD alignment. From the Broadwell PRM >> - * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: - * - * These [2D surfaces] must adhere to the following memory organization - * rules: - * - * - For non-compressed texture formats, each mipmap must start on an - * even row within the monolithic rectangular area. For - * 1-texel-high mipmaps, this may require a row of padding below - * the previous mipmap. This restriction does not apply to any - * compressed texture formats; each subsequent (lower-res) - * compressed mipmap is positioned directly below the previous - * mipmap. - * - * - Vertical alignment restrictions vary with memory tiling type: - * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled - * mipmaps are not required to start at the left edge of a tile - * row.) - */ - - *image_align_el = (struct isl_extent3d) { - .w = gen8_choose_halign_el(dev, info), - .h = gen8_choose_valign_el(dev, info), - .d = 1, - }; -} diff --git a/src/vulkan/isl_gen8.h b/src/vulkan/isl_gen8.h deleted file mode 100644 index 2017ea8ddc1..00000000000 --- a/src/vulkan/isl_gen8.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -gen8_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen8_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/isl_gen9.c b/src/vulkan/isl_gen9.c deleted file mode 100644 index aa290aa1c35..00000000000 --- a/src/vulkan/isl_gen9.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen8.h" -#include "isl_gen9.h" -#include "isl_priv.h" - -/** - * Calculate the surface's subimage alignment, in units of surface samples, - * for the standard tiling formats Yf and Ys. - */ -static void -gen9_calc_std_image_alignment_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *align_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - assert(isl_tiling_is_std_y(tiling)); - - const uint32_t bs = fmtl->bs; - const uint32_t is_Ys = tiling == ISL_TILING_Ys; - - switch (info->dim) { - case ISL_SURF_DIM_1D: - /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface - * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. - */ - *align_sa = (struct isl_extent3d) { - .w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)), - .h = 1, - .d = 1, - }; - return; - case ISL_SURF_DIM_2D: - /* See the Skylake BSpec > Memory Views > Common Surface Formats > - * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment - * Requirements. - */ - *align_sa = (struct isl_extent3d) { - .w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)), - .h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)), - .d = 1, - }; - - if (is_Ys) { - /* FINISHME(chadv): I don't trust this code. Untested. */ - isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__); - - switch (msaa_layout) { - case ISL_MSAA_LAYOUT_NONE: - case ISL_MSAA_LAYOUT_INTERLEAVED: - break; - case ISL_MSAA_LAYOUT_ARRAY: - align_sa->w >>= (ffs(info->samples) - 0) / 2; - align_sa->h >>= (ffs(info->samples) - 1) / 2; - break; - } - } - return; - - case ISL_SURF_DIM_3D: - /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface - * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. - */ - *align_sa = (struct isl_extent3d) { - .w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)), - .h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)), - .d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)), - }; - return; - } - - unreachable("bad isl_surface_type"); -} - -void -gen9_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - /* This BSpec text provides some insight into the hardware's alignment - * requirements [Skylake BSpec > Memory Views > Common Surface Formats > - * Surface Layout and Tiling > 2D Surfaces]: - * - * An LOD must be aligned to a cache-line except for some special cases - * related to Planar YUV surfaces. In general, the cache-alignment - * restriction implies there is a minimum height for an LOD of 4 texels. - * So, LODs which are smaller than 4 high are padded. - * - * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - For Sampling Engine and Render Target Surfaces: This field - * specifies the vertical alignment requirement in elements for the - * surface. [...] An element is defined as a pixel in uncompresed - * surface formats, and as a compression block in compressed surface - * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an - * element is a sample. - * - * - This field is used for 2D, CUBE, and 3D surface alignment when Tiled - * Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled). - * This field is ignored for 1D surfaces and also when Tiled Resource - * Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled). - * - * See the appropriate Alignment table in the "Surface Layout and - * Tiling" section under Common Surface Formats for the table of - * alignment values for Tiled Resrouces. - * - * - For uncompressed surfaces, the units of "j" are rows of pixels on - * the physical surface. For compressed texture formats, the units of - * "j" are in compression blocks, thus each increment in "j" is equal - * to h pixels, where h is the height of the compression block in - * pixels. - * - * - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16 - * - * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal - * Alignment: - * - * - For uncompressed surfaces, the units of "i" are pixels on the - * physical surface. For compressed texture formats, the units of "i" - * are in compression blocks, thus each increment in "i" is equal to - * w pixels, where w is the width of the compression block in pixels. - * - * - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16 - */ - - if (isl_tiling_is_std_y(tiling)) { - struct isl_extent3d image_align_sa; - gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout, - &image_align_sa); - - *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa); - return; - } - - if (info->dim == ISL_SURF_DIM_1D) { - /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface - * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. - */ - *image_align_el = isl_extent3d(64, 1, 1); - return; - } - - if (isl_format_is_compressed(info->format)) { - /* On Gen9, the meaning of RENDER_SURFACE_STATE's - * SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for - * compressed formats. They now indicate a multiple of the compression - * block. For example, if the compression mode is ETC2 then HALIGN_4 - * indicates a horizontal alignment of 16 pixels. - * - * To avoid wasting memory, choose the smallest alignment possible: - * HALIGN_4 and VALIGN_4. - */ - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); -} diff --git a/src/vulkan/isl_gen9.h b/src/vulkan/isl_gen9.h deleted file mode 100644 index 64ed0aa44ef..00000000000 --- a/src/vulkan/isl_gen9.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void -gen9_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/isl_image.c b/src/vulkan/isl_image.c deleted file mode 100644 index 2d146d59ac5..00000000000 --- a/src/vulkan/isl_image.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl.h" -#include "brw_compiler.h" - -enum isl_format -isl_lower_storage_image_format(const struct isl_device *dev, - enum isl_format format) -{ - switch (format) { - /* These are never lowered. Up to BDW we'll have to fall back to untyped - * surface access for 128bpp formats. - */ - case ISL_FORMAT_R32G32B32A32_UINT: - case ISL_FORMAT_R32G32B32A32_SINT: - case ISL_FORMAT_R32G32B32A32_FLOAT: - case ISL_FORMAT_R32_UINT: - case ISL_FORMAT_R32_SINT: - case ISL_FORMAT_R32_FLOAT: - return format; - - /* From HSW to BDW the only 64bpp format supported for typed access is - * RGBA_UINT16. IVB falls back to untyped. - */ - case ISL_FORMAT_R16G16B16A16_UINT: - case ISL_FORMAT_R16G16B16A16_SINT: - case ISL_FORMAT_R16G16B16A16_FLOAT: - case ISL_FORMAT_R32G32_UINT: - case ISL_FORMAT_R32G32_SINT: - case ISL_FORMAT_R32G32_FLOAT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16B16A16_UINT : - ISL_FORMAT_R32G32_UINT); - - /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component - * are supported. IVB doesn't support formats with more than one component - * for typed access. For 8 and 16 bpp formats IVB relies on the - * undocumented behavior that typed reads from R_UINT8 and R_UINT16 - * surfaces actually do a 32-bit misaligned read. The alternative would be - * to use two surface state entries with different formats for each image, - * one for reading (using R_UINT32) and another one for writing (using - * R_UINT8 or R_UINT16), but that would complicate the shaders we generate - * even more. - */ - case ISL_FORMAT_R8G8B8A8_UINT: - case ISL_FORMAT_R8G8B8A8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R16G16_UINT: - case ISL_FORMAT_R16G16_SINT: - case ISL_FORMAT_R16G16_FLOAT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R8G8_UINT: - case ISL_FORMAT_R8G8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R16_UINT: - case ISL_FORMAT_R16_FLOAT: - case ISL_FORMAT_R16_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R8_UINT: - case ISL_FORMAT_R8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); - - /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported - * by the hardware. - */ - case ISL_FORMAT_R10G10B10A2_UINT: - case ISL_FORMAT_R10G10B10A2_UNORM: - case ISL_FORMAT_R11G11B10_FLOAT: - return ISL_FORMAT_R32_UINT; - - /* No normalized fixed-point formats are supported by the hardware. */ - case ISL_FORMAT_R16G16B16A16_UNORM: - case ISL_FORMAT_R16G16B16A16_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16B16A16_UINT : - ISL_FORMAT_R32G32_UINT); - - case ISL_FORMAT_R8G8B8A8_UNORM: - case ISL_FORMAT_R8G8B8A8_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R16G16_UNORM: - case ISL_FORMAT_R16G16_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R8G8_UNORM: - case ISL_FORMAT_R8G8_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R16_UNORM: - case ISL_FORMAT_R16_SNORM: - return ISL_FORMAT_R16_UINT; - - case ISL_FORMAT_R8_UNORM: - case ISL_FORMAT_R8_SNORM: - return ISL_FORMAT_R8_UINT; - - default: - assert(!"Unknown image format"); - return ISL_FORMAT_UNSUPPORTED; - } -} diff --git a/src/vulkan/isl_priv.h b/src/vulkan/isl_priv.h deleted file mode 100644 index 1c9343a7d1f..00000000000 --- a/src/vulkan/isl_priv.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include - -#include "brw_device_info.h" -#include "mesa/main/imports.h" -#include "util/macros.h" - -#include "isl.h" - -#define isl_finishme(format, ...) \ - __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) - -void PRINTFLIKE(3, 4) UNUSED -__isl_finishme(const char *file, int line, const char *fmt, ...); - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -static inline uint32_t -ffs(uint32_t n) { - return __builtin_ffs(n); -} - -static inline bool -isl_is_pow2(uintmax_t n) -{ - return !(n & (n - 1)); -} - -/** - * Alignment must be a power of 2. - */ -static inline bool -isl_is_aligned(uintmax_t n, uintmax_t a) -{ - assert(isl_is_pow2(a)); - return (n & (a - 1)) == 0; -} - -/** - * Alignment must be a power of 2. - */ -static inline uintmax_t -isl_align(uintmax_t n, uintmax_t a) -{ - assert(isl_is_pow2(a)); - return (n + a - 1) & ~(a - 1); -} - -static inline uintmax_t -isl_align_npot(uintmax_t n, uintmax_t a) -{ - assert(a > 0); - return ((n + a - 1) / a) * a; -} - -/** - * Alignment must be a power of 2. - */ -static inline uintmax_t -isl_align_div(uintmax_t n, uintmax_t a) -{ - return isl_align(n, a) / a; -} - -static inline uintmax_t -isl_align_div_npot(uintmax_t n, uintmax_t a) -{ - return isl_align_npot(n, a) / a; -} - -/** - * Log base 2, rounding towards zero. - */ -static inline uint32_t -isl_log2u(uint32_t n) -{ - assert(n != 0); - return 31 - __builtin_clz(n); -} - -static inline uint32_t -isl_minify(uint32_t n, uint32_t levels) -{ - if (unlikely(n == 0)) - return 0; - else - return MAX(n >> levels, 1); -} - -static inline struct isl_extent3d -isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - assert(extent_sa.w % fmtl->bw == 0); - assert(extent_sa.h % fmtl->bh == 0); - assert(extent_sa.d % fmtl->bd == 0); - - return (struct isl_extent3d) { - .w = extent_sa.w / fmtl->bw, - .h = extent_sa.h / fmtl->bh, - .d = extent_sa.d / fmtl->bd, - }; -} - -static inline struct isl_extent3d -isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return (struct isl_extent3d) { - .w = extent_el.w * fmtl->bw, - .h = extent_el.h * fmtl->bh, - .d = extent_el.d * fmtl->bd, - }; -} -- cgit v1.2.3 From 53504b884e58af6bcf92e3fd258c7d9d3332c1fc Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 14 Dec 2015 12:31:55 -0800 Subject: isl: Fix calculation of array pitch for layout GEN4_2D The height of the miptree's right half was not large enough. Found by `make check` in test_isl_surf_get_offset, which is added in the next commit. --- src/isl/isl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index d858ea74745..333a517f274 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -570,6 +570,7 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( slice_left_h += h; } else if (l == 2) { slice_bottom_w += w; + slice_right_h += h; } else { slice_right_h += h; } -- cgit v1.2.3 From 64f0ee73e0ade5b6f888b143a1db58ffabfffd99 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 11 Dec 2015 17:14:52 -0800 Subject: isl: Add func isl_surf_get_image_offset_sa The function calculates the offset to a subimage within the surface, in units of surface samples. All unit tests pass with `make check`. (Admittedly, though, there are too few unit tests). --- src/isl/Makefile.am | 21 +++ src/isl/isl.c | 124 +++++++++++++++- src/isl/isl.h | 16 ++ src/isl/tests/.gitignore | 1 + src/isl/tests/isl_surf_get_image_offset_test.c | 197 +++++++++++++++++++++++++ 5 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 src/isl/tests/.gitignore create mode 100644 src/isl/tests/isl_surf_get_image_offset_test.c (limited to 'src') diff --git a/src/isl/Makefile.am b/src/isl/Makefile.am index 6a5c29c67cb..134e62ad105 100644 --- a/src/isl/Makefile.am +++ b/src/isl/Makefile.am @@ -23,6 +23,8 @@ SUBDIRS = . noinst_LTLIBRARIES = libisl.la +EXTRA_DIST = tests + # The gallium includes are for the util/u_math.h include from main/macros.h AM_CPPFLAGS = \ $(INTEL_CFLAGS) \ @@ -64,4 +66,23 @@ isl_format_layout.c: isl_format_layout_gen.bash \ $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ <$(srcdir)/isl_format_layout.csv >$@ +# ---------------------------------------------------------------------------- +# Tests +# ---------------------------------------------------------------------------- + +TESTS = tests/isl_surf_get_image_offset_test + +check_PROGRAMS = $(TESTS) + +# Link tests to lib965_compiler.la for brw_get_device_info(). +tests_ldadd = \ + libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la + +tests_isl_surf_get_image_offset_test_SOURCES = \ + tests/isl_surf_get_image_offset_test.c +tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd) + +# ---------------------------------------------------------------------------- + include $(top_srcdir)/install-lib-links.mk diff --git a/src/isl/isl.c b/src/isl/isl.c index 333a517f274..df0aeed01df 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -225,8 +225,10 @@ isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before * proceeding: [...] */ - *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); - *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); + if (width) + *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); + if (height) + *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); } static enum isl_array_pitch_span @@ -1045,3 +1047,121 @@ isl_surf_init_s(const struct isl_device *dev, return true; } + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +get_image_offset_sa_gen4_2d(const struct isl_surf *surf, + uint32_t level, uint32_t layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(layer < surf->phys_level0_sa.array_len); + assert(surf->phys_level0_sa.depth == 1); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + const uint32_t W0 = surf->phys_level0_sa.width; + const uint32_t H0 = surf->phys_level0_sa.height; + + uint32_t x = 0; + uint32_t y = layer * isl_surf_get_array_pitch_sa_rows(surf); + + for (uint32_t l = 0; l < level; ++l) { + if (l == 1) { + uint32_t W = isl_minify(W0, l); + + if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) + isl_msaa_interleaved_scale_px_to_sa(surf->samples, &W, NULL); + + x += isl_align_npot(W, image_align_sa.w); + } else { + uint32_t H = isl_minify(H0, l); + + if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) + isl_msaa_interleaved_scale_px_to_sa(surf->samples, NULL, &H); + + y += isl_align_npot(H, image_align_sa.h); + } + } + + *x_offset_sa = x; + *y_offset_sa = y; +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +get_image_offset_sa_gen4_3d(const struct isl_surf *surf, + uint32_t level, uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); + assert(surf->phys_level0_sa.array_len == 1); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + const uint32_t W0 = surf->phys_level0_sa.width; + const uint32_t H0 = surf->phys_level0_sa.height; + const uint32_t D0 = surf->phys_level0_sa.depth; + + uint32_t x = 0; + uint32_t y = 0; + + for (uint32_t l = 0; l < level; ++l) { + const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); + const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d); + const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + y += level_h * max_layers_vert; + } + + const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); + const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); + const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); + + const uint32_t max_layers_horiz = MIN(level_d, 1u << level); + const uint32_t max_layers_vert = isl_align_div(level_d, 1u << level); + + x += level_w * (logical_z_offset_px % max_layers_horiz); + y += level_h * (logical_z_offset_px / max_layers_vert); + + *x_offset_sa = x; + *y_offset_sa = y; +} + +void +isl_surf_get_image_offset_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(logical_array_layer < surf->logical_level0_px.array_len); + assert(logical_z_offset_px + < isl_minify(surf->logical_level0_px.depth, level)); + + switch (surf->dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + isl_finishme("%s:%s: gen9 1d surfaces", __FILE__, __func__); + case ISL_DIM_LAYOUT_GEN4_2D: + get_image_offset_sa_gen4_2d(surf, level, logical_array_layer, + x_offset_sa, y_offset_sa); + break; + case ISL_DIM_LAYOUT_GEN4_3D: + get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px, + x_offset_sa, y_offset_sa); + break; + } +} diff --git a/src/isl/isl.h b/src/isl/isl.h index 184b0c5f70a..6baac38bc2a 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -912,6 +912,22 @@ isl_surf_get_array_pitch(const struct isl_surf *surf) return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; } +/** + * Get the offset to an subimage within the surface, in units of surface + * samples. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_offset_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa); + #ifdef __cplusplus } #endif diff --git a/src/isl/tests/.gitignore b/src/isl/tests/.gitignore new file mode 100644 index 00000000000..ba70ecfbee4 --- /dev/null +++ b/src/isl/tests/.gitignore @@ -0,0 +1 @@ +/isl_surf_get_image_offset_test diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c new file mode 100644 index 00000000000..78362be4310 --- /dev/null +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -0,0 +1,197 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "brw_device_info.h" +#include "isl.h" + +#define BDW_GT2_DEVID 0x161a + +// An asssert that works regardless of NDEBUG. +#define t_assert(cond) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + abort(); \ + } \ + } while (0) + +static void +t_assert_extent4d(const struct isl_extent4d *e, uint32_t width, + uint32_t height, uint32_t depth, uint32_t array_len) +{ + t_assert(e->width == width); + t_assert(e->height == height); + t_assert(e->depth == depth); + t_assert(e->array_len == array_len); +} + +static void +t_assert_image_alignment_el(const struct isl_surf *surf, + uint32_t w, uint32_t h, uint32_t d) +{ + struct isl_extent3d align_el; + + align_el = isl_surf_get_image_alignment_el(surf); + t_assert(align_el.w == w); + t_assert(align_el.h == h); + t_assert(align_el.d == d); + +} + +static void +t_assert_image_alignment_sa(const struct isl_surf *surf, + uint32_t w, uint32_t h, uint32_t d) +{ + struct isl_extent3d align_sa; + + align_sa = isl_surf_get_image_alignment_sa(surf); + t_assert(align_sa.w == w); + t_assert(align_sa.h == h); + t_assert(align_sa.d == d); + +} + +static void +t_assert_offset(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_x_offset_sa, + uint32_t expected_y_offset_sa) +{ + uint32_t x, y; + isl_surf_get_image_offset_sa(surf, level, logical_array_layer, + logical_z_offset_px, &x, &y); + + t_assert(x == expected_x_offset_sa); + t_assert(y == expected_y_offset_sa); +} + +static void +t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, + uint32_t height, uint32_t depth, uint32_t array_len) +{ + t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len); +} + +static void +test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID)); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 512, + .height = 512, + .depth = 1, + .levels = 10, + .array_len = 1, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert_phys_level0_sa(&surf, 512, 512, 1, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772); + t_assert(isl_surf_get_array_pitch_sa_rows(&surf) >= 772); + + t_assert_offset(&surf, 0, 0, 0, 0, 0); // +0, +0 + t_assert_offset(&surf, 1, 0, 0, 0, 512); // +0, +512 + t_assert_offset(&surf, 2, 0, 0, 256, 512); // +256, +0 + t_assert_offset(&surf, 3, 0, 0, 256, 640); // +0, +128 + t_assert_offset(&surf, 4, 0, 0, 256, 704); // +0, +64 + t_assert_offset(&surf, 5, 0, 0, 256, 736); // +0, +32 + t_assert_offset(&surf, 6, 0, 0, 256, 752); // +0, +16 + t_assert_offset(&surf, 7, 0, 0, 256, 760); // +0, +8 + t_assert_offset(&surf, 8, 0, 0, 256, 764); // +0, +4 + t_assert_offset(&surf, 9, 0, 0, 256, 768); // +0, +4 +} + +static void +test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID)); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 1024, + .height = 1024, + .depth = 1, + .levels = 11, + .array_len = 6, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540); + t_assert(isl_surf_get_array_pitch_sa_rows(&surf) >= 1540); + + for (uint32_t a = 0; a < 6; ++a) { + uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf); + + t_assert_offset(&surf, 0, a, 0, 0, b + 0); // +0, +0 + t_assert_offset(&surf, 1, a, 0, 0, b + 1024); // +0, +1024 + t_assert_offset(&surf, 2, a, 0, 512, b + 1024); // +512, +0 + t_assert_offset(&surf, 3, a, 0, 512, b + 1280); // +0, +256 + t_assert_offset(&surf, 4, a, 0, 512, b + 1408); // +0, +128 + t_assert_offset(&surf, 5, a, 0, 512, b + 1472); // +0, +64 + t_assert_offset(&surf, 6, a, 0, 512, b + 1504); // +0, +32 + t_assert_offset(&surf, 7, a, 0, 512, b + 1520); // +0, +16 + t_assert_offset(&surf, 8, a, 0, 512, b + 1528); // +0, +8 + t_assert_offset(&surf, 9, a, 0, 512, b + 1532); // +0, +4 + t_assert_offset(&surf, 10, a, 0, 512, b + 1536); // +0, +4 + } +} + +int main(void) +{ + /* FINISHME: Add tests for npot sizes */ + /* FINISHME: Add tests for 1D surfaces */ + /* FINISHME: Add tests for 3D surfaces */ + + test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(); + test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(); +} -- cgit v1.2.3 From de67456d6d756e23142b9701e38ef5e11c301ae1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 19:51:49 -0800 Subject: nir/lower_system_values: Stop supporting non-SSA The one user of this (i965) only ever calls it while in SSA form. --- src/glsl/nir/nir_lower_system_values.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index 21904f81b97..03a98147ba9 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -40,17 +40,15 @@ convert_instr(nir_intrinsic_instr *instr) void *mem_ctx = ralloc_parent(instr); + assert(instr->dest.is_ssa); + nir_intrinsic_op op = nir_intrinsic_from_system_value(var->data.location); nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); - if (instr->dest.is_ssa) { - nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, - instr->dest.ssa.num_components, NULL); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, - nir_src_for_ssa(&new_instr->dest.ssa)); - } else { - nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); - } + nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, + instr->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, + nir_src_for_ssa(&new_instr->dest.ssa)); nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_instr_remove(&instr->instr); -- cgit v1.2.3 From c26e889a44abc10d207915bc56372d91ff30ac1c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 19:51:50 -0800 Subject: nir/builder: Add a load_system_value helper While we're at it, go ahead and make nir_lower_clip use it. Cc: Rob Clark --- src/glsl/nir/nir_builder.h | 12 ++++++++++++ src/glsl/nir/nir_lower_clip.c | 13 +++---------- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index fe41c74b608..cb7787f1006 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -353,4 +353,16 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value) nir_builder_instr_insert(build, &store->instr); } +static inline nir_ssa_def * +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) +{ + nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op); + load->num_components = nir_intrinsic_infos[op].dest_components; + load->const_index[0] = index; + nir_ssa_dest_init(&load->instr, &load->dest, + nir_intrinsic_infos[op].dest_components, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; +} + #endif /* NIR_BUILDER_H */ diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c index 36cc578d1ed..46301351c96 100644 --- a/src/glsl/nir/nir_lower_clip.c +++ b/src/glsl/nir/nir_lower_clip.c @@ -180,18 +180,11 @@ lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables, for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { if (ucp_enables & (1 << plane)) { - nir_intrinsic_instr *ucp; - - /* insert intrinsic to fetch ucp[plane]: */ - ucp = nir_intrinsic_instr_create(b.shader, - nir_intrinsic_load_user_clip_plane); - ucp->num_components = 4; - ucp->const_index[0] = plane; - nir_ssa_dest_init(&ucp->instr, &ucp->dest, 4, NULL); - nir_builder_instr_insert(&b, &ucp->instr); + nir_ssa_def *ucp = + nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane); /* calculate clipdist[plane] - dot(ucp, cv): */ - clipdist[plane] = nir_fdot4(&b, &ucp->dest.ssa, cv); + clipdist[plane] = nir_fdot4(&b, ucp, cv); } else { /* 0.0 == don't-clip == disabled: */ -- cgit v1.2.3 From 7ebd84fa4b1d7d4a8eac6d4d9ef2ab507650ec23 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 19:51:51 -0800 Subject: nir/lower_system_values: Refactor and use the builder. Now that we have a helper in the builder for system values and a helper in core NIR to get the intrinsic opcode, there's really no point in having things split out into a helper function. This commit "modernizes" this pass to use helpers better and look more like newer passes. --- src/glsl/nir/nir_lower_system_values.c | 60 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index 03a98147ba9..402f98e319c 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -26,44 +26,43 @@ */ #include "nir.h" -#include "main/mtypes.h" +#include "nir_builder.h" + +struct lower_system_values_state { + nir_builder builder; + bool progress; +}; static bool -convert_instr(nir_intrinsic_instr *instr) +convert_block(nir_block *block, void *void_state) { - if (instr->intrinsic != nir_intrinsic_load_var) - return false; + struct lower_system_values_state *state = void_state; - nir_variable *var = instr->variables[0]->var; - if (var->data.mode != nir_var_system_value) - return false; + nir_builder *b = &state->builder; - void *mem_ctx = ralloc_parent(instr); + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; - assert(instr->dest.is_ssa); + nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr); - nir_intrinsic_op op = nir_intrinsic_from_system_value(var->data.location); - nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); + if (load_var->intrinsic != nir_intrinsic_load_var) + continue; - nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, - instr->dest.ssa.num_components, NULL); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, - nir_src_for_ssa(&new_instr->dest.ssa)); + nir_variable *var = load_var->variables[0]->var; + if (var->data.mode != nir_var_system_value) + continue; - nir_instr_insert_before(&instr->instr, &new_instr->instr); - nir_instr_remove(&instr->instr); + b->cursor = nir_after_instr(&load_var->instr); - return true; -} + nir_intrinsic_op sysval_op = + nir_intrinsic_from_system_value(var->data.location); + nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0); -static bool -convert_block(nir_block *block, void *state) -{ - bool *progress = state; + nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); + nir_instr_remove(&load_var->instr); - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_intrinsic) - *progress = convert_instr(nir_instr_as_intrinsic(instr)) || *progress; + state->progress = true; } return true; @@ -72,12 +71,15 @@ convert_block(nir_block *block, void *state) static bool convert_impl(nir_function_impl *impl) { - bool progress = false; + struct lower_system_values_state state; - nir_foreach_block(impl, convert_block, &progress); + state.progress = false; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, convert_block, &state); nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); - return progress; + return state.progress; } bool -- cgit v1.2.3 From 630b9528b36baf0efc35f461ce69338ba5930e5a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 20:20:58 -0800 Subject: shader_enums: Add enums for gl_GlobalInvocationID and gl_LocalInvocationIndex --- src/glsl/nir/shader_enums.c | 2 ++ src/glsl/nir/shader_enums.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/shader_enums.c b/src/glsl/nir/shader_enums.c index 66a25e72344..f1dbe0158ab 100644 --- a/src/glsl/nir/shader_enums.c +++ b/src/glsl/nir/shader_enums.c @@ -172,6 +172,8 @@ const char * gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER), ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER), ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID), + ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX), + ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_WORK_GROUP_ID), ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS), ENUM(SYSTEM_VALUE_VERTEX_CNT), diff --git a/src/glsl/nir/shader_enums.h b/src/glsl/nir/shader_enums.h index dd0e0bad806..6f4a47dbd18 100644 --- a/src/glsl/nir/shader_enums.h +++ b/src/glsl/nir/shader_enums.h @@ -415,6 +415,8 @@ typedef enum */ /*@{*/ SYSTEM_VALUE_LOCAL_INVOCATION_ID, + SYSTEM_VALUE_LOCAL_INVOCATION_INDEX, + SYSTEM_VALUE_GLOBAL_INVOCATION_ID, SYSTEM_VALUE_WORK_GROUP_ID, SYSTEM_VALUE_NUM_WORK_GROUPS, /*@}*/ -- cgit v1.2.3 From 1035108a7f5ed2fc3698c3e9a0934d1bd0ab104b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 20:22:09 -0800 Subject: nir/lower_system_values: Add support for computed builtins. In particular, this commit adds support for computing gl_GlobalInvocationID and gl_LocalInvocationIndex from other intrinsics. --- src/glsl/nir/nir_lower_system_values.c | 58 ++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index 402f98e319c..5ed7a6ee1fd 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -55,9 +55,61 @@ convert_block(nir_block *block, void *void_state) b->cursor = nir_after_instr(&load_var->instr); - nir_intrinsic_op sysval_op = - nir_intrinsic_from_system_value(var->data.location); - nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0); + nir_ssa_def *sysval; + switch (var->data.location) { + case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: { + /* From the GLSL man page for gl_GlobalInvocationID: + * + * "The value of gl_GlobalInvocationID is equal to + * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" + */ + + nir_const_value local_size; + local_size.u[0] = b->shader->info.cs.local_size[0]; + local_size.u[1] = b->shader->info.cs.local_size[1]; + local_size.u[2] = b->shader->info.cs.local_size[2]; + + nir_ssa_def *group_id = + nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *local_id = + nir_load_system_value(b, nir_intrinsic_load_invocation_id, 0); + + sysval = nir_iadd(b, nir_imul(b, group_id, + nir_build_imm(b, 3, local_size)), + local_id); + break; + } + + case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: { + /* From the GLSL man page for gl_LocalInvocationIndex: + * + * ?The value of gl_LocalInvocationIndex is equal to + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * + * gl_WorkGroupSize.y + gl_LocalInvocationID.y * + * gl_WorkGroupSize.x + gl_LocalInvocationID.x" + */ + nir_ssa_def *local_id = + nir_load_system_value(b, nir_intrinsic_load_invocation_id, 0); + + unsigned stride_y = b->shader->info.cs.local_size[0]; + unsigned stride_z = b->shader->info.cs.local_size[0] * + b->shader->info.cs.local_size[1]; + + sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2), + nir_imm_int(b, stride_z)), + nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1), + nir_imm_int(b, stride_y)), + nir_channel(b, local_id, 0))); + break; + } + + default: { + nir_intrinsic_op sysval_op = + nir_intrinsic_from_system_value(var->data.location); + sysval = nir_load_system_value(b, sysval_op, 0); + break; + } /* default */ + } nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); nir_instr_remove(&load_var->instr); -- cgit v1.2.3 From 2d4b7eda23c04ac7b4b4ec155d7100abcb74ea86 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 14 Dec 2015 20:33:46 -0800 Subject: nir/spirv: Add support for more CS intrinsics --- src/glsl/nir/spirv_to_nir.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a8f3016e050..6fe68f9e6bd 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -818,14 +818,12 @@ vtn_get_builtin_location(struct vtn_builder *b, assert(*mode == nir_var_shader_out); break; case SpvBuiltInNumWorkgroups: + *location = SYSTEM_VALUE_NUM_WORK_GROUPS; + set_mode_system_value(mode); case SpvBuiltInWorkgroupSize: - /* these are constants, need to be handled specially */ + /* This should already be handled */ unreachable("unsupported builtin"); break; - case SpvBuiltInGlobalInvocationId: - case SpvBuiltInLocalInvocationIndex: - /* these are computed values, need to be handled specially */ - unreachable("unsupported builtin"); case SpvBuiltInWorkgroupId: *location = SYSTEM_VALUE_WORK_GROUP_ID; set_mode_system_value(mode); @@ -834,6 +832,14 @@ vtn_get_builtin_location(struct vtn_builder *b, *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; set_mode_system_value(mode); break; + case SpvBuiltInLocalInvocationIndex: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInGlobalInvocationId: + *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + set_mode_system_value(mode); + break; case SpvBuiltInHelperInvocation: default: unreachable("unsupported builtin"); @@ -894,6 +900,19 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationBuiltIn: { SpvBuiltIn builtin = dec->literals[0]; + if (builtin == SpvBuiltInWorkgroupSize) { + /* This shouldn't be a builtin. It's actually a constant. */ + var->data.mode = nir_var_global; + var->data.read_only = true; + + nir_constant *val = ralloc(var, nir_constant); + val->value.u[0] = b->shader->info.cs.local_size[0]; + val->value.u[1] = b->shader->info.cs.local_size[1]; + val->value.u[2] = b->shader->info.cs.local_size[2]; + var->constant_initializer = val; + break; + } + nir_variable_mode mode = var->data.mode; vtn_get_builtin_location(b, builtin, &var->data.location, &mode); var->data.explicit_location = true; -- cgit v1.2.3 From 306abbead34815be303daebba9333b4c4f0975e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Dec 2015 11:36:55 -0800 Subject: anv/pipeline: Properly set IncludeVertexHandles in 3DSTATE_GS --- src/vulkan/gen7_pipeline.c | 1 + src/vulkan/gen8_pipeline.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index e3bfc708deb..63aca1f6b32 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -491,6 +491,7 @@ genX(graphics_pipeline_create)( .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, .OutputTopology = gs_prog_data->output_topology, .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, .DispatchGRFStartRegisterforURBData = gs_prog_data->base.base.dispatch_grf_start_reg, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index d3b307ca5f7..3c3f079b408 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -453,6 +453,7 @@ genX(graphics_pipeline_create)( .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, .OutputTopology = gs_prog_data->output_topology, .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, .DispatchGRFStartRegisterForURBData = gs_prog_data->base.base.dispatch_grf_start_reg, -- cgit v1.2.3 From 28c4ef9d6ce63b03c5849332ed93ada4a563ecee Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Dec 2015 11:49:26 -0800 Subject: anv/device: Bump the size of the instruction block pool Some CTS test shaders were failing to compile. At some point soon, we really need to make a real pipeline cache and stop using a block pool for this. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index df6472edfde..fe44d1cb036 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -714,7 +714,7 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->dynamic_state_pool, &device->dynamic_state_block_pool); - anv_block_pool_init(&device->instruction_block_pool, device, 4096); + anv_block_pool_init(&device->instruction_block_pool, device, 8192); anv_block_pool_init(&device->surface_state_block_pool, device, 4096); anv_state_pool_init(&device->surface_state_pool, -- cgit v1.2.3 From d61ff1ed0827df87b811862cf4e0d2985c075aa7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Dec 2015 16:24:19 -0800 Subject: anv/descriptor_set: Initialize immutable_samplers to NULL Previously this wasn't a problem. However, with the new API update, descriptor sets can now be sparse so the client doesn't have to provide an entry for every binding. This means that it's possible for a binding to be uninitialized other than the memset. In that case, we want to have a null array of immutable samplers. --- src/vulkan/anv_descriptor_set.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index a7b6b773012..904051b4f13 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -70,9 +70,12 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->shader_stages = 0; set_layout->size = 0; - /* Initialize all binding_layout entries to -1 */ - memset(set_layout->binding, -1, - (max_binding + 1) * sizeof(set_layout->binding[0])); + for (uint32_t b = 0; b <= max_binding; b++) { + /* Initialize all binding_layout entries to -1 */ + memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); + + set_layout->binding[b].immutable_samplers = NULL; + } /* Initialize all samplers to 0 */ memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); -- cgit v1.2.3 From 1c51d91bfeb33fc750583729fb1b8a1d1d3e5258 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Dec 2015 16:26:08 -0800 Subject: anv/pipeline: Allow the user to pass a null MultisampleCreateInfo According to section 5.2 of the Vulkan spec, this is allowed for color-only rendering pipelines. --- src/vulkan/anv_pipeline.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index bf243cdb6b4..bf983ed8f2a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -900,7 +900,6 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) assert(info->pInputAssemblyState); assert(info->pViewportState); assert(info->pRasterizationState); - assert(info->pMultisampleState); if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) assert(info->pDepthStencilState); -- cgit v1.2.3 From b2fe8b4673c5c52b6c85c7db4ec7f626e1cb6b79 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Dec 2015 17:24:18 -0800 Subject: nir/spirv: Add a missing break statement --- src/glsl/nir/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 6fe68f9e6bd..1b1d7b5098f 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -820,6 +820,7 @@ vtn_get_builtin_location(struct vtn_builder *b, case SpvBuiltInNumWorkgroups: *location = SYSTEM_VALUE_NUM_WORK_GROUPS; set_mode_system_value(mode); + break; case SpvBuiltInWorkgroupSize: /* This should already be handled */ unreachable("unsupported builtin"); -- cgit v1.2.3 From c643e9cea87b4676e648e431d5c39f2880a1454c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 16 Dec 2015 17:27:35 -0800 Subject: anv/state: Allow levelCount to be 0 This can happen if the client is creating an image view of a textureable surface and they only ever intend to render to that view. --- src/vulkan/gen7_state.c | 4 ++-- src/vulkan/gen8_state.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index c6de40d3b9a..cb299a3278b 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -329,7 +329,7 @@ genX(image_view_init)(struct anv_image_view *iview, * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = range->levelCount - 1; + surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); @@ -369,7 +369,7 @@ genX(image_view_init)(struct anv_image_view *iview, format->surface_format); surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = range->levelCount - 1; + surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index fe9f088d9ca..199905b60dc 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -305,7 +305,7 @@ genX(image_view_init)(struct anv_image_view *iview, * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = range->levelCount - 1; + surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); @@ -344,7 +344,7 @@ genX(image_view_init)(struct anv_image_view *iview, format_info->surface_format); surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = range->levelCount - 1; + surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); -- cgit v1.2.3 From b1325404c5931e0ec744b696eb078774f813505d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Dec 2015 11:00:38 -0800 Subject: anv/device: Handle zero-sized memory allocations --- src/vulkan/anv_device.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index fe44d1cb036..17056e35cb5 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1002,6 +1002,12 @@ VkResult anv_AllocateMemory( assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + /* We support exactly one memory heap. */ assert(pAllocateInfo->memoryTypeIndex == 0 || (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); @@ -1037,6 +1043,9 @@ void anv_FreeMemory( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + if (mem == NULL) + return; + if (mem->bo.map) anv_gem_munmap(mem->bo.map, mem->bo.size); @@ -1057,6 +1066,11 @@ VkResult anv_MapMemory( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only * takes a VkDeviceMemory pointer, it seems like only one map of the memory * at a time is valid. We could just mmap up front and return an offset @@ -1081,6 +1095,9 @@ void anv_UnmapMemory( { ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + if (mem == NULL) + return; + anv_gem_munmap(mem->map, mem->map_size); } @@ -1207,8 +1224,13 @@ VkResult anv_BindBufferMemory( ANV_FROM_HANDLE(anv_device_memory, mem, _memory); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - buffer->bo = &mem->bo; - buffer->offset = memoryOffset; + if (mem) { + buffer->bo = &mem->bo; + buffer->offset = memoryOffset; + } else { + buffer->bo = NULL; + buffer->offset = 0; + } return VK_SUCCESS; } @@ -1222,8 +1244,13 @@ VkResult anv_BindImageMemory( ANV_FROM_HANDLE(anv_device_memory, mem, _memory); ANV_FROM_HANDLE(anv_image, image, _image); - image->bo = &mem->bo; - image->offset = memoryOffset; + if (mem) { + image->bo = &mem->bo; + image->offset = memoryOffset; + } else { + image->bo = NULL; + image->offset = 0; + } return VK_SUCCESS; } -- cgit v1.2.3 From 3395ca17d1a181825759ea2ed0eb19979fad0c9d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Dec 2015 11:45:04 -0800 Subject: isl: Add a is_storage_image_format helper --- src/isl/isl.h | 2 ++ src/isl/isl_image.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 6baac38bc2a..22fc7540ac7 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -754,6 +754,8 @@ isl_format_block_is_1x1x1(enum isl_format fmt) return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; } +bool isl_is_storage_image_format(enum isl_format fmt); + enum isl_format isl_lower_storage_image_format(const struct isl_device *dev, enum isl_format fmt); diff --git a/src/isl/isl_image.c b/src/isl/isl_image.c index 2d146d59ac5..773160432b9 100644 --- a/src/isl/isl_image.c +++ b/src/isl/isl_image.c @@ -24,6 +24,57 @@ #include "isl.h" #include "brw_compiler.h" +bool +isl_is_storage_image_format(enum isl_format format) +{ + /* XXX: Maybe we should put this in the CSV? */ + + switch (format) { + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return true; + default: + return false; + } +} + enum isl_format isl_lower_storage_image_format(const struct isl_device *dev, enum isl_format format) -- cgit v1.2.3 From 952bf05897dadd710b160ff66e1eb8c342fa33ac Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Dec 2015 11:45:24 -0800 Subject: anv/image: Properly report buffer features --- src/vulkan/anv_formats.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index eba0f1122a4..b739b0fb312 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -271,7 +271,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d if (format->surface_format== ISL_FORMAT_UNSUPPORTED) goto unsupported; - uint32_t linear = 0, tiled = 0; + uint32_t linear = 0, tiled = 0, buffer = 0; if (anv_format_is_depth_or_stencil(format)) { tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; if (physical_device->info->gen >= 8) { @@ -292,6 +292,9 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d VK_FORMAT_FEATURE_BLIT_SRC_BIT; linear |= flags; tiled |= flags; + + if (!isl_format_is_compressed(format->surface_format)) + buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; } if (info->render_target <= gen) { flags = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | @@ -304,19 +307,33 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; } if (info->input_vb <= gen) { - linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + } + + if (isl_is_storage_image_format(format->surface_format)) { + tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + } + + if (format->surface_format == ISL_FORMAT_R32_SINT && + format->surface_format == ISL_FORMAT_R32_UINT) { + tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; } } out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; - out_properties->bufferFeatures = 0; /* FINISHME */ + out_properties->bufferFeatures = buffer; return; unsupported: out_properties->linearTilingFeatures = 0; out_properties->optimalTilingFeatures = 0; + out_properties->bufferFeatures = 0; } -- cgit v1.2.3 From 167809365bd132bd303820d6aaa7de6f4ed4faa8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Dec 2015 13:44:50 -0800 Subject: anv/formats: Add more PACK32 formats --- src/vulkan/anv_formats.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index b739b0fb312..2ebf6c73774 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -73,16 +73,23 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT, .num_channels = 4), fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT, .num_channels = 4), fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT, .num_channels = 4), + fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED, .num_channels = 4), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED, .num_channels = 4), fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT, .num_channels = 4), fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED, .num_channels = 4), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED, .num_channels = 4), fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT, .num_channels = 4), fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT, .num_channels = 4), fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1), -- cgit v1.2.3 From 1473a8dc6f4a015558c8c3e818d706f9af584364 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Dec 2015 13:51:09 -0800 Subject: anv/formats: Add more 64-bit formats --- src/vulkan/anv_formats.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 2ebf6c73774..9a895e05432 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -132,9 +132,17 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT, .num_channels = 4,), fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT, .num_channels = 4,), fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT, .num_channels = 4,), + fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU, .num_channels = 1), + fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU, .num_channels = 1), fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT, .num_channels = 1), + fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU, .num_channels = 2), + fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU, .num_channels = 2), fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU, .num_channels = 3), fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU, .num_channels = 4), + fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU, .num_channels = 4), fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT, .num_channels = 4), fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT, .num_channels = 3), fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .num_channels = 3), -- cgit v1.2.3 From d7f66f9f6fb47ddf30c782961eb2fc1dcc593975 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Dec 2015 16:36:23 -0800 Subject: nir/spirv: Array lengths are constants not literals --- src/glsl/nir/spirv_to_nir.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1b1d7b5098f..07d47891770 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -515,8 +515,14 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, struct vtn_type *array_element = vtn_value(b, w[2], vtn_value_type_type)->type; - /* A length of 0 is used to denote unsized arrays */ - unsigned length = (opcode == SpvOpTypeArray) ? w[3] : 0; + unsigned length; + if (opcode == SpvOpTypeRuntimeArray) { + /* A length of 0 is used to denote unsized arrays */ + length = 0; + } else { + length = + vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; + } val->type->type = glsl_array_type(array_element->type, length); val->type->array_element = array_element; -- cgit v1.2.3 From 5e82a91324d13c9194416347521120a93beef133 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 18 Dec 2015 01:42:46 -0800 Subject: anv/gen8: Add support for gl_NumWorkGroups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kristian Høgsberg Signed-off-by: Jordan Justen --- src/vulkan/anv_cmd_buffer.c | 38 ++++++++++++++++++++++++++++++++------ src/vulkan/anv_pipeline.c | 15 ++++++++++++++- src/vulkan/anv_private.h | 2 ++ src/vulkan/gen8_cmd_buffer.c | 19 +++++++++++++++++++ 4 files changed, 67 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 759c4677a74..bfe06591d6d 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -619,17 +619,22 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_pipeline_layout *layout; uint32_t color_count, bias, state_offset; - if (stage == MESA_SHADER_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; - else + switch (stage) { + case MESA_SHADER_FRAGMENT: layout = cmd_buffer->state.pipeline->layout; - - if (stage == MESA_SHADER_FRAGMENT) { bias = MAX_RTS; color_count = subpass->color_count; - } else { + break; + case MESA_SHADER_COMPUTE: + layout = cmd_buffer->state.compute_pipeline->layout; + bias = 1; + color_count = 0; + break; + default: + layout = cmd_buffer->state.pipeline->layout; bias = 0; color_count = 0; + break; } /* This is a little awkward: layout can be NULL but we still have to @@ -657,6 +662,27 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, iview->bo, iview->offset); } + if (stage == MESA_SHADER_COMPUTE && + cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; + uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; + + struct anv_state surface_state; + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + fill_descriptor_buffer_surface_state(cmd_buffer->device, + surface_state.map, stage, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + bo_offset, 12); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(surface_state); + + bt_map[0] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + if (layout == NULL) goto out; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index bf983ed8f2a..1906205c4d0 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -348,7 +348,18 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). */ - unsigned bias = stage == MESA_SHADER_FRAGMENT ? MAX_RTS : 0; + unsigned bias; + switch (stage) { + case MESA_SHADER_FRAGMENT: + bias = MAX_RTS; + break; + case MESA_SHADER_COMPUTE: + bias = 1; + break; + default: + bias = 0; + break; + } prog_data->binding_table.size_bytes = 0; prog_data->binding_table.texture_start = bias; prog_data->binding_table.ubo_start = bias; @@ -619,6 +630,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); + prog_data->binding_table.work_groups_start = 0; + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_COMPUTE, &prog_data->base); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b763f701570..a8583c98db4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1031,6 +1031,8 @@ struct anv_cmd_state { uint32_t vb_dirty; anv_cmd_dirty_mask_t dirty; anv_cmd_dirty_mask_t compute_dirty; + uint32_t num_workgroups_offset; + struct anv_bo *num_workgroups_bo; VkShaderStageFlags descriptors_dirty; VkShaderStageFlags push_constants_dirty; uint32_t scratch_size; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 5e566d3aaa1..73d475eae0e 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -617,6 +617,20 @@ void genX(CmdDispatch)( struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + cmd_buffer_flush_compute_state(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), @@ -649,6 +663,11 @@ void genX(CmdDispatchIndirect)( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + cmd_buffer_flush_compute_state(cmd_buffer); emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); -- cgit v1.2.3 From 8ac46d84ff6ed45671832590d143ad4dfa7ff66c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 15 Dec 2015 13:51:23 -0800 Subject: vk: Fix check for I915_PARAM_MMAP_VERSION Comparing the wrong thing for < 1. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 17056e35cb5..e276cc0b9c0 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -119,7 +119,7 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } - if (anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION < 1)) { + if (anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "kernel missing wc mmap"); goto fail; -- cgit v1.2.3 From c4802bc44c0625fd3bb8b7da7068ce5673ce3d1e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 19 Dec 2015 22:17:19 -0800 Subject: vk/gen8: Implement VkEvent for gen8 We use PIPE_CONTROL for setting and resetting the event from cmd buffers and MI_SEMAPHORE_WAIT in polling mode for waiting on an event. --- src/vulkan/anv_cmd_buffer.c | 28 --------------- src/vulkan/anv_device.c | 82 ++++++++++++++++++++++++++++++++++++-------- src/vulkan/anv_private.h | 6 ++++ src/vulkan/gen7_cmd_buffer.c | 28 +++++++++++++++ src/vulkan/gen8_cmd_buffer.c | 64 ++++++++++++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index bfe06591d6d..9b54dee96bc 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -900,34 +900,6 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, } } -void anv_CmdSetEvent( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdResetEvent( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void anv_CmdWaitEvents( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) -{ - stub(); -} - struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e276cc0b9c0..746fecb760f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1443,41 +1443,95 @@ void anv_DestroySemaphore( // Event functions VkResult anv_CreateEvent( - VkDevice device, + VkDevice _device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent) { - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_event *event; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); + + state = anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(*event), 4); + event = state.map; + event->state = state; + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_sfence(); + __builtin_ia32_clflush(event); + } + + *pEvent = anv_event_to_handle(event); + + return VK_SUCCESS; } void anv_DestroyEvent( - VkDevice device, - VkEvent event, + VkDevice _device, + VkEvent _event, const VkAllocationCallbacks* pAllocator) { - stub(); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_state_pool_free(&device->dynamic_state_pool, event->state); } VkResult anv_GetEventStatus( - VkDevice device, - VkEvent event) + VkDevice _device, + VkEvent _event) { - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_clflush(event); + __builtin_ia32_lfence(); + } + + return event->semaphore; } VkResult anv_SetEvent( - VkDevice device, - VkEvent event) + VkDevice _device, + VkEvent _event) { - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_SET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_sfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; } VkResult anv_ResetEvent( - VkDevice device, - VkEvent event) + VkDevice _device, + VkEvent _event) { - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_sfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; } // Buffer functions diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a8583c98db4..b632f6e85a9 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1208,6 +1208,11 @@ struct anv_fence { bool ready; }; +struct anv_event { + uint32_t semaphore; + struct anv_state state; +}; + struct nir_shader; struct anv_shader_module { @@ -1658,6 +1663,7 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index c31ea338e64..7fdef1027e2 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -856,3 +856,31 @@ void genX(CmdEndRenderPass)( .TextureCacheInvalidationEnable = true, .CommandStreamerStallEnable = true); } + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + stub(); +} diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 73d475eae0e..1a1efed670d 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -1035,3 +1035,67 @@ void genX(CmdCopyQueryPoolResults)( dst_offset += destStride; } } + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_SET); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_RESET); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + for (uint32_t i = 0; i < eventCount; i++) { + ANV_FROM_HANDLE(anv_event, event, pEvents[i]); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), + .WaitMode = PollingMode, + .CompareOperation = SAD_EQUAL_SDD, + .SemaphoreDataDword = VK_EVENT_SET, + .SemaphoreAddress = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }); + } + + genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, + false, /* byRegion */ + memBarrierCount, ppMemBarriers); +} -- cgit v1.2.3 From b49aaf5de0e2959c61ee1dd69e6980e11bf4d06c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 19 Dec 2015 22:18:12 -0800 Subject: vk: Remove stale 48 bit addresses FIXMEs This has worked fine for a long time. --- src/vulkan/gen8_cmd_buffer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 1a1efed670d..d571f9c97e3 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -854,7 +854,7 @@ emit_ps_depth_count(struct anv_batch *batch, anv_batch_emit(batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WritePSDepthCount, - .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ + .Address = { bo, offset }); } void genX(CmdBeginQuery)( @@ -926,8 +926,7 @@ void genX(CmdWriteTimestamp)( anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WriteTimestamp, - .Address = /* FIXME: This is only lower 32 bits */ - { &pool->bo, entry * 8 }); + .Address = { &pool->bo, entry * 8 }); break; } } @@ -1023,13 +1022,11 @@ void genX(CmdCopyQueryPoolResults)( anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = CS_GPR(2), - /* FIXME: This is only lower 32 bits */ .MemoryAddress = { buffer->bo, dst_offset }); if (flags & VK_QUERY_RESULT_64_BIT) anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = CS_GPR(2) + 4, - /* FIXME: This is only lower 32 bits */ .MemoryAddress = { buffer->bo, dst_offset + 4 }); dst_offset += destStride; -- cgit v1.2.3 From 220ac9337b24c7099ac3534ba14c61592e1266ae Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 19 Dec 2015 22:25:57 -0800 Subject: vk: Only require wc bo mmap for !llc GPUs --- src/vulkan/anv_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 746fecb760f..0043bea5d13 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -119,7 +119,8 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } - if (anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { + if (!device->info->has_llc && + anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "kernel missing wc mmap"); goto fail; -- cgit v1.2.3 From 413a9d351746a0e3ecb274a8f39dcefec7010edd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 14:39:02 -0800 Subject: nir/print: Factor variable name lookup into a helper Otherwise, we have a problem when we go to print functions with arguments because their names get added to the hash table during declaration which happens after we print the prototype. --- src/glsl/nir/nir_print.c | 61 ++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 10f46cef1de..26b1cbb467d 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -219,6 +219,35 @@ print_alu_instr(nir_alu_instr *instr, print_state *state) } } +static const char * +get_var_name(nir_variable *var, print_state *state) +{ + if (state->ht == NULL) + return var->name; + + assert(state->syms); + + struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); + if (entry) + return entry->data; + + char *name; + + struct set_entry *set_entry = _mesa_set_search(state->syms, var->name); + if (set_entry != NULL) { + /* we have a collision with another name, append an @ + a unique index */ + name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); + } else { + /* Mark this one as seen */ + _mesa_set_add(state->syms, var->name); + name = var->name; + } + + _mesa_hash_table_insert(state->ht, var, name); + + return name; +} + static void print_var_decl(nir_variable *var, print_state *state) { @@ -239,20 +268,7 @@ print_var_decl(nir_variable *var, print_state *state) glsl_print_type(var->type, fp); - struct set_entry *entry = NULL; - if (state->syms) - entry = _mesa_set_search(state->syms, var->name); - - char *name; - - if (entry != NULL) { - /* we have a collision with another name, append an @ + a unique index */ - name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); - } else { - name = var->name; - } - - fprintf(fp, " %s", name); + fprintf(fp, " %s", get_var_name(var, state)); if (var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out || @@ -296,28 +312,13 @@ print_var_decl(nir_variable *var, print_state *state) } fprintf(fp, "\n"); - - if (state->syms) { - _mesa_set_add(state->syms, name); - _mesa_hash_table_insert(state->ht, var, name); - } } static void print_var(nir_variable *var, print_state *state) { FILE *fp = state->fp; - const char *name; - if (state->ht) { - struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); - - assert(entry != NULL); - name = entry->data; - } else { - name = var->name; - } - - fprintf(fp, "%s", name); + fprintf(fp, "%s", get_var_name(var, state)); } static void -- cgit v1.2.3 From 1749e667ea38ec539d29c92c08f94738afcb91ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 11:16:16 -0800 Subject: nir: Add a stub function inlining pass All it does is remove the return at the end, but it's good enough for simple functions. --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 3 ++ src/glsl/nir/nir_lower_returns.c | 78 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 src/glsl/nir/nir_lower_returns.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index e64c31e17c6..32fbf6d1b6c 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -44,6 +44,7 @@ NIR_FILES = \ nir/nir_lower_alu_to_scalar.c \ nir/nir_lower_atomics.c \ nir/nir_lower_clip.c \ + nir/nir_lower_returns.c \ nir/nir_lower_global_vars_to_local.c \ nir/nir_lower_gs_intrinsics.c \ nir/nir_lower_load_const_to_scalar.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 021c4280557..0cf5f80c591 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1998,6 +1998,9 @@ int nir_gs_count_vertices(const nir_shader *shader); bool nir_split_var_copies(nir_shader *shader); +bool nir_lower_returns_impl(nir_function_impl *impl); +bool nir_lower_returns(nir_shader *shader); + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); void nir_lower_var_copies(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_returns.c b/src/glsl/nir/nir_lower_returns.c new file mode 100644 index 00000000000..a15d0332ed4 --- /dev/null +++ b/src/glsl/nir/nir_lower_returns.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +static bool +assert_no_returns_block(nir_block *block, void *state) +{ + (void)state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_jump) + continue; + + nir_jump_instr *jump = nir_instr_as_jump(instr); + assert(jump->type != nir_jump_return); + } + + return true; +} + +bool +nir_lower_returns_impl(nir_function_impl *impl) +{ + bool progress = false; + + assert(impl->end_block->predecessors->entries == 1); + + struct set_entry *entry = + _mesa_set_next_entry(impl->end_block->predecessors, NULL); + + nir_block *last_block = (nir_block *)entry->key; + + nir_instr *last_instr = nir_block_last_instr(last_block); + if (last_instr && last_instr->type == nir_instr_type_jump) { + nir_jump_instr *jump = nir_instr_as_jump(last_instr); + assert(jump->type == nir_jump_return); + nir_instr_remove(&jump->instr); + progress = true; + } + + nir_foreach_block(impl, assert_no_returns_block, NULL); + + return progress; +} + +bool +nir_lower_returns(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress = nir_lower_returns_impl(overload->impl) || progress; + } + + return progress; +} -- cgit v1.2.3 From 86772c24885be27a7fa9473c471a8eaff54b2ee9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 11:27:00 -0800 Subject: nir/control_flow: Handle relinking top-level blocks This can happen if a function ends in a return instruction and you remove the return. --- src/glsl/nir/nir_control_flow.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c index 96395a41615..ecd9cbd4173 100644 --- a/src/glsl/nir/nir_control_flow.c +++ b/src/glsl/nir/nir_control_flow.c @@ -336,8 +336,7 @@ block_add_normal_succs(nir_block *block) nir_block *next_block = nir_cf_node_as_block(next); link_blocks(block, next_block, NULL); - } else { - assert(parent->type == nir_cf_node_loop); + } else if (parent->type == nir_cf_node_loop) { nir_loop *loop = nir_cf_node_as_loop(parent); nir_cf_node *head = nir_loop_first_cf_node(loop); @@ -346,6 +345,10 @@ block_add_normal_succs(nir_block *block) link_blocks(block, head_block, NULL); insert_phi_undef(head_block, block); + } else { + assert(parent->type == nir_cf_node_function); + nir_function_impl *impl = nir_cf_node_as_function(parent); + link_blocks(block, impl->end_block, NULL); } } else { nir_cf_node *next = nir_cf_node_next(&block->cf_node); -- cgit v1.2.3 From 98291b8f2cad24d6467cf35e00135e4b015cb1f2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 21:34:56 -0700 Subject: nir: Add a helper for creating a "bare" nir_function_impl This is useful if you want to clone a single function_impl if, for instance, you wanted to do function inlining. --- src/glsl/nir/nir.c | 29 +++++++++++++++++++---------- src/glsl/nir/nir.h | 2 ++ 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 94bb76034a2..b1a53308381 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -268,16 +268,11 @@ cf_init(nir_cf_node *node, nir_cf_node_type type) } nir_function_impl * -nir_function_impl_create(nir_function_overload *overload) +nir_function_impl_create_bare(nir_shader *shader) { - assert(overload->impl == NULL); - - void *mem_ctx = ralloc_parent(overload); + nir_function_impl *impl = ralloc(shader, nir_function_impl); - nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl); - - overload->impl = impl; - impl->overload = overload; + impl->overload = NULL; cf_init(&impl->cf_node, nir_cf_node_function); @@ -292,8 +287,8 @@ nir_function_impl_create(nir_function_overload *overload) impl->valid_metadata = nir_metadata_none; /* create start & end blocks */ - nir_block *start_block = nir_block_create(mem_ctx); - nir_block *end_block = nir_block_create(mem_ctx); + nir_block *start_block = nir_block_create(shader); + nir_block *end_block = nir_block_create(shader); start_block->cf_node.parent = &impl->cf_node; end_block->cf_node.parent = &impl->cf_node; impl->end_block = end_block; @@ -305,6 +300,20 @@ nir_function_impl_create(nir_function_overload *overload) return impl; } +nir_function_impl * +nir_function_impl_create(nir_function_overload *overload) +{ + assert(overload->impl == NULL); + + nir_function_impl *impl = + nir_function_impl_create_bare(overload->function->shader); + + overload->impl = impl; + impl->overload = overload; + + return impl; +} + nir_block * nir_block_create(nir_shader *shader) { diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 0cf5f80c591..2ae83b49aef 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1696,6 +1696,8 @@ nir_function *nir_function_create(nir_shader *shader, const char *name); nir_function_overload *nir_function_overload_create(nir_function *func); nir_function_impl *nir_function_impl_create(nir_function_overload *func); +/** creates a function_impl that isn't tied to any particular overload */ +nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); nir_block *nir_block_create(nir_shader *shader); nir_if *nir_if_create(nir_shader *shader); -- cgit v1.2.3 From 4aac03fe61c8d353df08c0d1b95308396bb4f116 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 21:44:27 -0700 Subject: nir/clone: Add support for cloning a single function_impl This will be useful for things such as function inlining. --- src/glsl/nir/nir.h | 3 +- src/glsl/nir/nir_clone.c | 111 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 81 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 2ae83b49aef..05f1fe2f99d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1969,7 +1969,8 @@ void nir_index_blocks(nir_function_impl *impl); void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_instr(const nir_instr *instr, FILE *fp); -nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); +nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); +nir_function_impl *nir_function_impl_clone(const nir_function_impl *impl); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c index 68b72ef5381..37c00f7aa96 100644 --- a/src/glsl/nir/nir_clone.c +++ b/src/glsl/nir/nir_clone.c @@ -32,8 +32,11 @@ */ typedef struct { + /* True if we are cloning an entire shader. */ + bool global_clone; + /* maps orig ptr -> cloned ptr: */ - struct hash_table *ptr_table; + struct hash_table *remap_table; /* List of phi sources. */ struct list_head phi_srcs; @@ -43,28 +46,32 @@ typedef struct { } clone_state; static void -init_clone_state(clone_state *state) +init_clone_state(clone_state *state, bool global) { - state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + state->global_clone = global; + state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); list_inithead(&state->phi_srcs); } static void free_clone_state(clone_state *state) { - _mesa_hash_table_destroy(state->ptr_table, NULL); + _mesa_hash_table_destroy(state->remap_table, NULL); } -static void * -lookup_ptr(clone_state *state, const void *ptr) +static inline void * +_lookup_ptr(clone_state *state, const void *ptr, bool global) { struct hash_entry *entry; if (!ptr) return NULL; - entry = _mesa_hash_table_search(state->ptr_table, ptr); + if (!state->global_clone && global) + return (void *)ptr; + + entry = _mesa_hash_table_search(state->remap_table, ptr); assert(entry && "Failed to find pointer!"); if (!entry) return NULL; @@ -73,9 +80,33 @@ lookup_ptr(clone_state *state, const void *ptr) } static void -store_ptr(clone_state *state, void *nptr, const void *ptr) +add_remap(clone_state *state, void *nptr, const void *ptr) +{ + _mesa_hash_table_insert(state->remap_table, ptr, nptr); +} + +static void * +remap_local(clone_state *state, const void *ptr) +{ + return _lookup_ptr(state, ptr, false); +} + +static void * +remap_global(clone_state *state, const void *ptr) +{ + return _lookup_ptr(state, ptr, true); +} + +static nir_register * +remap_reg(clone_state *state, const nir_register *reg) { - _mesa_hash_table_insert(state->ptr_table, ptr, nptr); + return _lookup_ptr(state, reg, reg->is_global); +} + +static nir_variable * +remap_var(clone_state *state, const nir_variable *var) +{ + return _lookup_ptr(state, var, var->data.mode != nir_var_local); } static nir_constant * @@ -100,7 +131,7 @@ static nir_variable * clone_variable(clone_state *state, const nir_variable *var) { nir_variable *nvar = rzalloc(state->ns, nir_variable); - store_ptr(state, nvar, var); + add_remap(state, nvar, var); nvar->type = var->type; nvar->name = ralloc_strdup(nvar, var->name); @@ -137,7 +168,7 @@ static nir_register * clone_register(clone_state *state, const nir_register *reg) { nir_register *nreg = rzalloc(state->ns, nir_register); - store_ptr(state, nreg, reg); + add_remap(state, nreg, reg); nreg->num_components = reg->num_components; nreg->num_array_elems = reg->num_array_elems; @@ -172,9 +203,9 @@ __clone_src(clone_state *state, void *ninstr_or_if, { nsrc->is_ssa = src->is_ssa; if (src->is_ssa) { - nsrc->ssa = lookup_ptr(state, src->ssa); + nsrc->ssa = remap_local(state, src->ssa); } else { - nsrc->reg.reg = lookup_ptr(state, src->reg.reg); + nsrc->reg.reg = remap_reg(state, src->reg.reg); if (src->reg.indirect) { nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); @@ -190,9 +221,9 @@ __clone_dst(clone_state *state, nir_instr *ninstr, ndst->is_ssa = dst->is_ssa; if (dst->is_ssa) { nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); - store_ptr(state, &ndst->ssa, &dst->ssa); + add_remap(state, &ndst->ssa, &dst->ssa); } else { - ndst->reg.reg = lookup_ptr(state, dst->reg.reg); + ndst->reg.reg = remap_reg(state, dst->reg.reg); if (dst->reg.indirect) { ndst->reg.indirect = ralloc(ninstr, nir_src); __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); @@ -208,7 +239,7 @@ static nir_deref_var * clone_deref_var(clone_state *state, const nir_deref_var *dvar, nir_instr *ninstr) { - nir_variable *nvar = lookup_ptr(state, dvar->var); + nir_variable *nvar = remap_var(state, dvar->var); nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); if (dvar->deref.child) @@ -322,7 +353,7 @@ clone_load_const(clone_state *state, const nir_load_const_instr *lc) memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); - store_ptr(state, &nlc->def, &lc->def); + add_remap(state, &nlc->def, &lc->def); return nlc; } @@ -333,7 +364,7 @@ clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) nir_ssa_undef_instr *nsa = nir_ssa_undef_instr_create(state->ns, sa->def.num_components); - store_ptr(state, &nsa->def, &sa->def); + add_remap(state, &nsa->def, &sa->def); return nsa; } @@ -423,7 +454,7 @@ clone_jump(clone_state *state, const nir_jump_instr *jmp) static nir_call_instr * clone_call(clone_state *state, const nir_call_instr *call) { - nir_function_overload *ncallee = lookup_ptr(state, call->callee); + nir_function_overload *ncallee = remap_global(state, call->callee); nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); for (unsigned i = 0; i < ncall->num_params; i++) @@ -476,7 +507,7 @@ clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) assert(exec_list_is_empty(&nblk->instr_list)); /* We need this for phi sources */ - store_ptr(state, nblk, blk); + add_remap(state, nblk, blk); nir_foreach_instr(blk, instr) { if (instr->type == nir_instr_type_phi) { @@ -549,10 +580,9 @@ clone_cf_list(clone_state *state, struct exec_list *dst, } static nir_function_impl * -clone_function_impl(clone_state *state, const nir_function_impl *fi, - nir_function_overload *nfo) +clone_function_impl(clone_state *state, const nir_function_impl *fi) { - nir_function_impl *nfi = nir_function_impl_create(nfo); + nir_function_impl *nfi = nir_function_impl_create_bare(state->ns); clone_var_list(state, &nfi->locals, &fi->locals); clone_reg_list(state, &nfi->registers, &fi->registers); @@ -561,9 +591,9 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi, nfi->num_params = fi->num_params; nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); for (unsigned i = 0; i < fi->num_params; i++) { - nfi->params[i] = lookup_ptr(state, fi->params[i]); + nfi->params[i] = remap_local(state, fi->params[i]); } - nfi->return_var = lookup_ptr(state, fi->return_var); + nfi->return_var = remap_local(state, fi->return_var); assert(list_empty(&state->phi_srcs)); @@ -575,9 +605,9 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi, * add it to the phi_srcs list and we fix it up here. */ list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { - src->pred = lookup_ptr(state, src->pred); + src->pred = remap_local(state, src->pred); assert(src->src.is_ssa); - src->src.ssa = lookup_ptr(state, src->src.ssa); + src->src.ssa = remap_local(state, src->src.ssa); /* Remove from this list and place in the uses of the SSA def */ list_del(&src->src.use_link); @@ -591,6 +621,22 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi, return nfi; } +nir_function_impl * +nir_function_impl_clone(const nir_function_impl *fi) +{ + clone_state state; + init_clone_state(&state, false); + + /* We use the same shader */ + state.ns = fi->overload->function->shader; + + nir_function_impl *nfi = clone_function_impl(&state, fi); + + free_clone_state(&state); + + return nfi; +} + static nir_function_overload * clone_function_overload(clone_state *state, const nir_function_overload *fo, nir_function *nfxn) @@ -598,7 +644,7 @@ clone_function_overload(clone_state *state, const nir_function_overload *fo, nir_function_overload *nfo = nir_function_overload_create(nfxn); /* Needed for call instructions */ - store_ptr(state, nfo, fo); + add_remap(state, nfo, fo); nfo->num_params = fo->num_params; nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params); @@ -631,7 +677,7 @@ nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s) { clone_state state; - init_clone_state(&state); + init_clone_state(&state, true); nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); state.ns = ns; @@ -652,8 +698,9 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) * will have in the list. */ nir_foreach_overload(s, fo) { - nir_function_overload *nfo = lookup_ptr(&state, fo); - clone_function_impl(&state, fo->impl, nfo); + nir_function_overload *nfo = remap_global(&state, fo); + nfo->impl = clone_function_impl(&state, fo->impl); + nfo->impl->overload = nfo; } clone_reg_list(&state, &ns->registers, &s->registers); -- cgit v1.2.3 From 23cfa683d5496bdbf8b014c095059c36be7ab149 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 11:45:59 -0800 Subject: nir: move nir_copy_var from anv_nir_builder to nir_builder --- src/glsl/nir/nir_builder.h | 10 ++++++++++ src/vulkan/anv_nir_builder.h | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index cb7787f1006..cbbc26d1cc6 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -353,6 +353,16 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value) nir_builder_instr_insert(build, &store->instr); } +static inline void +nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) +{ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, dest); + copy->variables[1] = nir_deref_var_create(copy, src); + nir_builder_instr_insert(build, ©->instr); +} + static inline nir_ssa_def * nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) { diff --git a/src/vulkan/anv_nir_builder.h b/src/vulkan/anv_nir_builder.h index f26cb046a6b..74637579ad4 100644 --- a/src/vulkan/anv_nir_builder.h +++ b/src/vulkan/anv_nir_builder.h @@ -44,13 +44,3 @@ nir_builder_init_simple_shader(nir_builder *b, gl_shader_stage stage) b->impl = nir_function_impl_create(overload); b->cursor = nir_after_cf_list(&b->impl->body); } - -static inline void -nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) -{ - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_var_create(copy, dest); - copy->variables[1] = nir_deref_var_create(copy, src); - nir_builder_instr_insert(build, ©->instr); -} -- cgit v1.2.3 From b21db9cea52a28f841a5cb120260fe6f6a6e75ab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 12:16:45 -0800 Subject: nir/builder: Add a copy_deref_var helper --- src/glsl/nir/nir_builder.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index cbbc26d1cc6..423bddd7a9a 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -353,6 +353,19 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value) nir_builder_instr_insert(build, &store->instr); } +static inline void +nir_copy_deref_var(nir_builder *build, nir_deref_var *dest, nir_deref_var *src) +{ + assert(nir_deref_tail(&dest->deref)->type == + nir_deref_tail(&src->deref)->type); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + nir_builder_instr_insert(build, ©->instr); +} + static inline void nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) { -- cgit v1.2.3 From 8fba4bf79f9caf73a6ee9e724d51fae996ff9161 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 12:17:29 -0800 Subject: nir: Add a function inlining pass --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 3 + src/glsl/nir/nir_inline_functions.c | 138 ++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 src/glsl/nir/nir_inline_functions.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 32fbf6d1b6c..830aacbe5e6 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -36,6 +36,7 @@ NIR_FILES = \ nir/nir_from_ssa.c \ nir/nir_gather_info.c \ nir/nir_gs_count_vertices.c \ + nir/nir_inline_functions.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ nir/nir_instr_set.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 05f1fe2f99d..6888eded864 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2004,6 +2004,9 @@ bool nir_split_var_copies(nir_shader *shader); bool nir_lower_returns_impl(nir_function_impl *impl); bool nir_lower_returns(nir_shader *shader); +bool nir_inline_functions_impl(nir_function_impl *impl); +bool nir_inline_functions(nir_shader *shader); + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); void nir_lower_var_copies(nir_shader *shader); diff --git a/src/glsl/nir/nir_inline_functions.c b/src/glsl/nir/nir_inline_functions.c new file mode 100644 index 00000000000..db5317e6f2d --- /dev/null +++ b/src/glsl/nir/nir_inline_functions.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_control_flow.h" + +struct inline_functions_state { + nir_function_impl *impl; + nir_builder builder; + bool progress; +}; + +static bool +inline_functions_block(nir_block *block, void *void_state) +{ + struct inline_functions_state *state = void_state; + + nir_builder *b = &state->builder; + + /* This is tricky. We're iterating over instructions in a block but, as + * we go, the block and its instruction list are being split into + * pieces. However, this *should* be safe since foreach_safe always + * stashes the next thing in the iteration. That next thing will + * properly get moved to the next block when it gets split, and we + * continue iterating there. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_call) + continue; + + state->progress = true; + + nir_call_instr *call = nir_instr_as_call(instr); + assert(call->callee->impl); + + nir_function_impl *callee_copy = + nir_function_impl_clone(call->callee->impl); + + exec_list_append(&state->impl->locals, &callee_copy->locals); + exec_list_append(&state->impl->registers, &callee_copy->registers); + + b->cursor = nir_before_instr(&call->instr); + + /* Add copies of all in parameters */ + assert(call->num_params == callee_copy->num_params); + for (unsigned i = 0; i < callee_copy->num_params; i++) { + /* Only in or inout parameters */ + if (call->callee->params[i].param_type == nir_parameter_out) + continue; + + nir_copy_deref_var(b, nir_deref_var_create(b->shader, + callee_copy->params[i]), + call->params[i]); + } + + /* Pluck the body out of the function and place it here */ + nir_cf_list body; + nir_cf_list_extract(&body, &callee_copy->body); + nir_cf_reinsert(&body, b->cursor); + + b->cursor = nir_before_instr(&call->instr); + + /* Add copies of all out parameters and the return */ + assert(call->num_params == callee_copy->num_params); + for (unsigned i = 0; i < callee_copy->num_params; i++) { + /* Only out or inout parameters */ + if (call->callee->params[i].param_type == nir_parameter_in) + continue; + + nir_copy_deref_var(b, call->params[i], + nir_deref_var_create(b->shader, + callee_copy->params[i])); + } + if (!glsl_type_is_void(call->callee->return_type)) { + nir_copy_deref_var(b, call->return_deref, + nir_deref_var_create(b->shader, + callee_copy->return_var)); + } + + nir_instr_remove(&call->instr); + } + + return true; +} + +bool +nir_inline_functions_impl(nir_function_impl *impl) +{ + struct inline_functions_state state; + + state.progress = false; + state.impl = impl; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, inline_functions_block, &state); + + /* SSA and register indices are completely messed up now */ + nir_index_ssa_defs(impl); + nir_index_local_regs(impl); + + nir_metadata_preserve(impl, nir_metadata_none); + + return state.progress; +} + +bool +nir_inline_functions(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress = nir_inline_functions_impl(overload->impl) || progress; + } + + return progress; +} -- cgit v1.2.3 From ac975b73cf7d51ae8dd6a4fed8f82902a0c28919 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 11:28:21 -0800 Subject: anv/pipeline: Run lower_returns and inline_functions after spirv_to_nir --- src/vulkan/anv_pipeline.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 1906205c4d0..02be5a81984 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -102,26 +102,38 @@ anv_shader_compile_to_nir(struct anv_device *device, * and just use the NIR shader */ nir = module->nir; nir->options = nir_options; + nir_validate_shader(nir); } else { uint32_t *spirv = (uint32_t *) module->data; assert(spirv[0] == SPIR_V_MAGIC_NUMBER); assert(module->size % 4 == 0); nir = spirv_to_nir(spirv, module->size / 4, stage, nir_options); + nir_validate_shader(nir); + + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); } - nir_validate_shader(nir); /* Vulkan uses the separate-shader linking model */ nir->info.separate_shader = true; - /* Make sure the provided shader has exactly one entrypoint and that the - * name matches the name that came in from the VkShader. - */ + /* Pick off the single entrypoint that we want */ nir_function_impl *entrypoint = NULL; - nir_foreach_overload(nir, overload) { - if (strcmp(entrypoint_name, overload->function->name) == 0 && - overload->impl) { - assert(entrypoint == NULL); + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (strcmp(entrypoint_name, func->name) != 0) { + /* Not our function, get rid of it */ + exec_node_remove(&func->node); + continue; + } + + assert(exec_list_length(&func->overload_list) == 1); + foreach_list_typed(nir_function_overload, overload, node, + &func->overload_list) { + assert(overload->impl); entrypoint = overload->impl; } } -- cgit v1.2.3 From a7f3e113ad7e9c96b9156327f8499a19aeb64068 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 11:28:57 -0800 Subject: i965/nir: Remove return handling This was added because we were getting spurrious returns coming out of SPIR-V. Now that we're calling lower_returns, we don't need this. --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 ------ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 7 ------- 2 files changed, 13 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6f51ce147f1..4e0ff50ddcb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2944,12 +2944,6 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) bld.emit(BRW_OPCODE_CONTINUE); break; case nir_jump_return: - /* This has to be the last block in the shader. We don't handle - * early returns. - */ - assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && - instr->instr.block->cf_node.parent->type == nir_cf_node_function); - break; default: unreachable("unknown jump"); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index cfb66a53fe9..dcecd772ff6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1482,13 +1482,6 @@ vec4_visitor::nir_emit_jump(nir_jump_instr *instr) break; case nir_jump_return: - /* This has to be the last block in the shader. We don't handle - * early returns. - */ - assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && - instr->instr.block->cf_node.parent->type == nir_cf_node_function); - break; - default: unreachable("unknown jump"); } -- cgit v1.2.3 From 95990c96cccede9777e0552beffdf2bd4b494a04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 15:06:02 -0800 Subject: nir: Create the params array in function_impl_create --- src/glsl/nir/nir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index b1a53308381..d2e9afa4479 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -311,6 +311,10 @@ nir_function_impl_create(nir_function_overload *overload) overload->impl = impl; impl->overload = overload; + impl->num_params = overload->num_params; + impl->params = ralloc_array(overload->function->shader, + nir_variable *, impl->num_params); + return impl; } -- cgit v1.2.3 From 79b8b42081a6d4e219350882316bdc971c68ceec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Dec 2015 15:07:16 -0800 Subject: nir/spirv: Handle function calls --- src/glsl/nir/spirv_to_nir.c | 86 ++++++++++++++++++++++++++++++++++--- src/glsl/nir/spirv_to_nir_private.h | 5 ++- 2 files changed, 84 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 07d47891770..d56f9be4447 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1819,7 +1819,44 @@ static void vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Unhandled opcode"); + struct nir_function_overload *overload = + vtn_value(b, w[3], vtn_value_type_function)->func->impl->overload; + + nir_call_instr *call = nir_call_instr_create(b->nb.shader, overload); + for (unsigned i = 0; i < call->num_params; i++) { + unsigned arg_id = w[4 + i]; + struct vtn_value *arg = vtn_untyped_value(b, arg_id); + if (arg->value_type == vtn_value_type_deref) { + call->params[i] = + nir_deref_as_var(nir_copy_deref(call, &arg->deref->deref)); + } else { + struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); + + /* Make a temporary to store the argument in */ + nir_variable *tmp = + nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); + call->params[i] = nir_deref_var_create(call, tmp); + + vtn_variable_store(b, arg_ssa, call->params[i], arg->type); + } + } + + nir_variable *out_tmp = NULL; + if (!glsl_type_is_void(overload->return_type)) { + out_tmp = nir_local_variable_create(b->impl, overload->return_type, + "out_tmp"); + call->return_deref = nir_deref_var_create(call, out_tmp); + } + + nir_builder_instr_insert(&b->nb, &call->instr); + + if (glsl_type_is_void(overload->return_type)) { + vtn_push_value(b, w[2], vtn_value_type_undef); + } else { + struct vtn_type *rettype = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); + retval->ssa = vtn_variable_load(b, call->return_deref, rettype); + } } static struct vtn_ssa_value * @@ -3260,6 +3297,8 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, const struct glsl_type *result_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + val->func = b->func; + const struct glsl_type *func_type = vtn_value(b, w[4], vtn_value_type_type)->type->type; @@ -3290,7 +3329,17 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, } } } - b->func->overload = overload; + + overload->return_type = glsl_get_function_return_type(func_type); + + b->func->impl = nir_function_impl_create(overload); + if (!glsl_type_is_void(overload->return_type)) { + b->func->impl->return_var = + nir_local_variable_create(b->func->impl, + overload->return_type, "retval"); + } + + b->func_param_idx = 0; break; } @@ -3299,8 +3348,22 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, b->func = NULL; break; - case SpvOpFunctionParameter: - break; /* Does nothing */ + case SpvOpFunctionParameter: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + assert(b->func_param_idx < b->func->impl->num_params); + unsigned idx = b->func_param_idx++; + + nir_variable *param = + nir_local_variable_create(b->func->impl, + b->func->impl->overload->params[idx].type, + val->name); + + b->func->impl->params[idx] = param; + val->deref = nir_deref_var_create(b, param); + val->deref_type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } case SpvOpLabel: { assert(b->block == NULL); @@ -3723,6 +3786,18 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } + case SpvOpReturnValue: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[1]); + vtn_variable_store(b, src, + nir_deref_var_create(b, b->impl->return_var), + NULL); + + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + case SpvOpKill: { nir_intrinsic_instr *discard = nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); @@ -3731,7 +3806,6 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, } case SpvOpSwitch: - case SpvOpReturnValue: case SpvOpUnreachable: default: unreachable("Unhandled opcode"); @@ -3781,7 +3855,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, vtn_handle_first_cfg_pass_instruction); foreach_list_typed(struct vtn_function, func, node, &b->functions) { - b->impl = nir_function_impl_create(func->overload); + b->impl = func->impl; b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index 40f0c78ae78..edc3461b846 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -61,7 +61,7 @@ struct vtn_block { struct vtn_function { struct exec_node node; - nir_function_overload *overload; + nir_function_impl *impl; struct vtn_block *start_block; const uint32_t *end; @@ -204,6 +204,9 @@ struct vtn_builder { struct vtn_function *func; struct exec_list functions; + + /* Current function parameter index */ + unsigned func_param_idx; }; static inline struct vtn_value * -- cgit v1.2.3 From af74ce5a19315284f229277b44b84bede5be4d99 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Dec 2015 21:20:50 -0800 Subject: spirv/nir: Handle non-vector extractions in vtn_composite_extract --- src/glsl/nir/spirv_to_nir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d56f9be4447..23c2c481153 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2824,6 +2824,8 @@ vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); ret->def = vtn_vector_extract(b, cur->def, indices[i]); return ret; + } else { + cur = cur->elems[indices[i]]; } } -- cgit v1.2.3 From 3ab1b7afa8fd59ada577573954830a409a667bc4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 11:10:13 -0800 Subject: nir/spirv: Do boolean fixup on block loads We used to do it for variable loads on things of type "uniform" but that never got ported to block loads. --- src/glsl/nir/spirv_to_nir.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 23c2c481153..1afa7eba5a9 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1168,7 +1168,16 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); nir_builder_instr_insert(&b->nb, &load->instr); - val->def = &load->dest.ssa; + + if (glsl_get_base_type(type->type) == GLSL_TYPE_BOOL) { + /* Loads of booleans from externally visible memory need to be + * fixed up since they're defined to be zero/nonzero rather than + * NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } } else { unsigned elems = glsl_get_length(type->type); val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); -- cgit v1.2.3 From 6219a69589a78efd8a40832d5f71ffe42eda9f67 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 11:10:58 -0800 Subject: nir/spirv: Handle derefs in vtn_ssa_value This is kind of a hack, but it makes vtn_ssa_value insert a load if the value requested is actually a deref. This shouldn't happen normally but, thanks to the impedence mismatch of the NIR function parameter model vs. the SPIR-V model, this can happen for function arguments. --- src/glsl/nir/spirv_to_nir.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 1afa7eba5a9..dc165252c25 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -109,6 +109,10 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, return val; } +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type); + struct vtn_ssa_value * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { @@ -119,6 +123,11 @@ vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) case vtn_value_type_ssa: return val->ssa; + + case vtn_value_type_deref: + /* This is needed for function parameters */ + return vtn_variable_load(b, val->deref, val->deref_type); + default: unreachable("Invalid type for an SSA value"); } -- cgit v1.2.3 From 5644b1ceced8bb2a31880482e039c0179e555080 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 11:11:58 -0800 Subject: nir/spirv: Handle LogicalNot --- src/glsl/nir/spirv_to_nir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index dc165252c25..5c94573fd3a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2570,6 +2570,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpLogicalEqual: op = nir_op_ieq; break; case SpvOpLogicalNotEqual: op = nir_op_ine; break; case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpLogicalNot: op = nir_op_inot; break; case SpvOpBitwiseOr: op = nir_op_ior; break; case SpvOpBitwiseXor: op = nir_op_ixor; break; case SpvOpBitwiseAnd: op = nir_op_iand; break; @@ -3562,10 +3563,11 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpShiftRightLogical: case SpvOpShiftRightArithmetic: case SpvOpShiftLeftLogical: - case SpvOpLogicalOr: case SpvOpLogicalEqual: case SpvOpLogicalNotEqual: + case SpvOpLogicalOr: case SpvOpLogicalAnd: + case SpvOpLogicalNot: case SpvOpBitwiseOr: case SpvOpBitwiseXor: case SpvOpBitwiseAnd: -- cgit v1.2.3 From 72ceb99bab592e1ce01bc7855c45bfe58a188b6f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 11:24:23 -0800 Subject: anv: Mask out invalid stages in foreach_stage --- src/vulkan/anv_private.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b632f6e85a9..6bd18952492 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1235,8 +1235,11 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) return (1 << mesa_stage); } +#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) + #define anv_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, __tmp = (gl_shader_stage)(stage_bits);\ + for (gl_shader_stage stage, \ + __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ stage = __builtin_ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) -- cgit v1.2.3 From 3b391892aa5e6af5f1f27ead7a2e09d5c0960ae7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 11:25:35 -0800 Subject: anv/descriptor_set: Use anv_foreach_stage --- src/vulkan/anv_descriptor_set.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 904051b4f13..df53edd902c 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -41,7 +41,6 @@ VkResult anv_CreateDescriptorSetLayout( { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_descriptor_set_layout *set_layout; - uint32_t s; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); @@ -97,7 +96,7 @@ VkResult anv_CreateDescriptorSetLayout( switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for_each_bit(s, binding->stageFlags) { + anv_foreach_stage(s, binding->stageFlags) { set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; sampler_count[s] += binding->descriptorCount; } @@ -117,7 +116,7 @@ VkResult anv_CreateDescriptorSetLayout( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for_each_bit(s, binding->stageFlags) { + anv_foreach_stage(s, binding->stageFlags) { set_layout->binding[b].stage[s].surface_index = surface_count[s]; surface_count[s] += binding->descriptorCount; } @@ -139,7 +138,7 @@ VkResult anv_CreateDescriptorSetLayout( switch (binding->descriptorType) { case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for_each_bit(s, binding->stageFlags) { + anv_foreach_stage(s, binding->stageFlags) { set_layout->binding[b].stage[s].image_index = image_count[s]; image_count[s] += binding->descriptorCount; } -- cgit v1.2.3 From 66168a798ba95c606f33a99f569949260dfa5086 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 12:45:13 -0800 Subject: nir/spirv: Better document vtn_ssa_value.transposed --- src/glsl/nir/spirv_to_nir_private.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index edc3461b846..6b53fa3bfba 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -76,8 +76,9 @@ struct vtn_ssa_value { struct vtn_ssa_value **elems; }; - /* For matrices, a transposed version of the value, or NULL if it hasn't - * been computed + /* For matrices, if this is non-NULL, then this value is actually the + * transpose of some other value. The value that `transposed` points to + * always dominates this value. */ struct vtn_ssa_value *transposed; -- cgit v1.2.3 From 5b301323880e13b4aaf449ea9eb1ccbd05694cc0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 13:13:54 -0800 Subject: nir/spirv: Fix some mem_ctx issues with create_vec --- src/glsl/nir/spirv_to_nir.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 5c94573fd3a..d258f23d0ed 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2273,7 +2273,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, } static nir_alu_instr * -create_vec(void *mem_ctx, unsigned num_components) +create_vec(nir_shader *shader, unsigned num_components) { nir_op op; switch (num_components) { @@ -2284,7 +2284,7 @@ create_vec(void *mem_ctx, unsigned num_components) default: unreachable("bad vector size"); } - nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_alu_instr *vec = nir_alu_instr_create(shader, op); nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); vec->dest.write_mask = (1 << num_components) - 1; @@ -2301,7 +2301,8 @@ vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) vtn_create_ssa_value(b, glsl_transposed_type(src->type)); for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { - nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + nir_alu_instr *vec = create_vec(b->shader, + glsl_get_matrix_columns(src->type)); if (glsl_type_is_vector_or_scalar(src->type)) { vec->src[0].src = nir_src_for_ssa(src->def); vec->src[0].swizzle[0] = i; @@ -2392,7 +2393,7 @@ vtn_matrix_multiply(struct vtn_builder *b, */ for (unsigned i = 0; i < src1_columns; i++) { - nir_alu_instr *vec = create_vec(b, src0_rows); + nir_alu_instr *vec = create_vec(b->shader, src0_rows); for (unsigned j = 0; j < src0_rows; j++) { vec->src[j].src = nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, -- cgit v1.2.3 From 452ba4db2badced650555f8af2359d928a63c5be Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 13:17:00 -0800 Subject: nir/spirv: Create the correct type if a matrix-vector multiply produces a vector --- src/glsl/nir/spirv_to_nir.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d258f23d0ed..91667e11225 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2369,9 +2369,14 @@ vtn_matrix_multiply(struct vtn_builder *b, unsigned src0_columns = glsl_get_matrix_columns(src0->type); unsigned src1_columns = glsl_get_matrix_columns(src1->type); - struct vtn_ssa_value *dest = - vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), - src0_rows, src1_columns)); + const struct glsl_type *dest_type; + if (src1_columns > 1) { + dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns); + } else { + dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); + } + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); dest = vtn_wrap_matrix(b, dest); -- cgit v1.2.3 From 2e823d5754137c7ecb8f766540ff48b39ada48fc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 13:17:18 -0800 Subject: nir/spirv: Properly handle vector times matrix --- src/glsl/nir/spirv_to_nir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 91667e11225..a6b770ef2b7 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2493,7 +2493,11 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); - val->ssa = vtn_matrix_multiply(b, src0, src1); + if (opcode == SpvOpVectorTimesMatrix) { + val->ssa = vtn_matrix_multiply(b, vtn_transpose(b, src1), src0); + } else { + val->ssa = vtn_matrix_multiply(b, src0, src1); + } break; } -- cgit v1.2.3 From 54c870ff61b1860a253729e397c0c83fc759f625 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 14:14:39 -0800 Subject: nir/spirv: Add support for undefs in vtn_ssa_value() --- src/glsl/nir/spirv_to_nir.c | 47 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index a6b770ef2b7..48960b70373 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -29,6 +29,44 @@ #include "nir_vla.h" #include "nir_control_flow.h" +static struct vtn_ssa_value * +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(b->shader, num_components); + + nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr); + val->def = &undef->def; + } else { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_matrix(type)) { + const struct glsl_type *elem_type = + glsl_vector_type(glsl_get_base_type(type), + glsl_get_vector_elements(type)); + + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else if (glsl_type_is_array(type)) { + const struct glsl_type *elem_type = glsl_get_array_element(type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else { + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = glsl_get_struct_field(type, i); + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } + } + } + + return val; +} + static struct vtn_ssa_value * vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, const struct glsl_type *type) @@ -118,6 +156,9 @@ vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { struct vtn_value *val = vtn_untyped_value(b, value_id); switch (val->value_type) { + case vtn_value_type_undef: + return vtn_undef_ssa_value(b, val->type->type); + case vtn_value_type_constant: return vtn_const_ssa_value(b, val->constant, val->const_type); @@ -3458,9 +3499,11 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, /* This is handled by cfg pre-pass and walk_blocks */ break; - case SpvOpUndef: - vtn_push_value(b, w[2], vtn_value_type_undef); + case SpvOpUndef: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; break; + } case SpvOpExtInst: vtn_handle_extension(b, opcode, w, count); -- cgit v1.2.3 From eae352e75c4effcfda306df2c589ade06d1d3fdb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 18:09:42 -0800 Subject: nir: Add a function for comparing cursors --- src/glsl/nir/nir.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/glsl/nir/nir.h | 2 ++ 2 files changed, 65 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index d2e9afa4479..f01d5e295c1 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -711,6 +711,69 @@ nir_cf_node_get_function(nir_cf_node *node) return nir_cf_node_as_function(node); } +/* Reduces a cursor by trying to convert everything to after and trying to + * go up to block granularity when possible. + */ +static nir_cursor +reduce_cursor(nir_cursor cursor) +{ + switch (cursor.option) { + case nir_cursor_before_block: + if (exec_list_is_empty(&cursor.block->instr_list)) { + /* Empty block. After is as good as before. */ + cursor.option = nir_cursor_after_block; + } else { + /* Try to switch to after the previous block if there is one. + * (This isn't likely, but it can happen.) + */ + nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node); + if (prev_node && prev_node->type == nir_cf_node_block) { + cursor.block = nir_cf_node_as_block(prev_node); + cursor.option = nir_cursor_after_block; + } + } + return cursor; + + case nir_cursor_after_block: + return cursor; + + case nir_cursor_before_instr: { + nir_instr *prev_instr = nir_instr_prev(cursor.instr); + if (prev_instr) { + /* Before this instruction is after the previous */ + cursor.instr = prev_instr; + cursor.option = nir_cursor_after_instr; + } else { + /* No previous instruction. Switch to before block */ + cursor.block = cursor.instr->block; + cursor.option = nir_cursor_before_block; + } + return reduce_cursor(cursor); + } + + case nir_cursor_after_instr: + if (nir_instr_next(cursor.instr) == NULL) { + /* This is the last instruction, switch to after block */ + cursor.option = nir_cursor_after_block; + cursor.block = cursor.instr->block; + } + return cursor; + + default: + unreachable("Inavlid cursor option"); + } +} + +bool +nir_cursors_equal(nir_cursor a, nir_cursor b) +{ + /* Reduced cursors should be unique */ + a = reduce_cursor(a); + b = reduce_cursor(b); + + return a.block == b.block && a.option == b.option; +} + static bool add_use_cb(nir_src *src, void *state) { diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 6888eded864..8655c144012 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1778,6 +1778,8 @@ nir_cursor_current_block(nir_cursor cursor) } } +bool nir_cursors_equal(nir_cursor a, nir_cursor b); + static inline nir_cursor nir_before_block(nir_block *block) { -- cgit v1.2.3 From e1b1d58becffcc647acd1f242ceccdb9042cccf1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 18:10:08 -0800 Subject: nir/cf: Make extracting or re-inserting nothing a no-op --- src/glsl/nir/nir_control_flow.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c index ecd9cbd4173..33b06d0cc84 100644 --- a/src/glsl/nir/nir_control_flow.c +++ b/src/glsl/nir/nir_control_flow.c @@ -749,6 +749,12 @@ nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end) { nir_block *block_begin, *block_end, *block_before, *block_after; + if (nir_cursors_equal(begin, end)) { + exec_list_make_empty(&extracted->list); + extracted->impl = NULL; /* we shouldn't need this */ + return; + } + /* In the case where begin points to an instruction in some basic block and * end points to the end of the same basic block, we rely on the fact that * splitting on an instruction moves earlier instructions into a new basic @@ -788,6 +794,9 @@ nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor) { nir_block *before, *after; + if (exec_list_is_empty(&cf_list->list)) + return; + split_block_cursor(cursor, &before, &after); foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) { -- cgit v1.2.3 From 0bc1b0fd2362df78f47d208e0fde4ff2183b5521 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 23:45:47 -0800 Subject: nir/lower_return: Do it for real this time --- src/glsl/nir/nir_lower_returns.c | 205 +++++++++++++++++++++++++++++++++++---- 1 file changed, 185 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_returns.c b/src/glsl/nir/nir_lower_returns.c index a15d0332ed4..f36fc9dd613 100644 --- a/src/glsl/nir/nir_lower_returns.c +++ b/src/glsl/nir/nir_lower_returns.c @@ -22,44 +22,209 @@ */ #include "nir.h" +#include "nir_builder.h" +#include "nir_control_flow.h" + +struct lower_returns_state { + nir_builder builder; + struct exec_list *parent_cf_list; + struct exec_list *cf_list; + nir_loop *loop; + nir_if *if_stmt; + nir_variable *return_flag; +}; + +static bool lower_returns_in_cf_list(struct exec_list *cf_list, + struct lower_returns_state *state); + +static bool +lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state) +{ + nir_loop *parent = state->loop; + state->loop = loop; + bool progress = lower_returns_in_cf_list(&loop->body, state); + state->loop = parent; + + /* Nothing interesting */ + if (!progress) + return false; + + /* In this case, there was a return somewhere inside of the loop. That + * return would have been turned into a write to the return_flag + * variable and a break. We need to insert a predicated return right + * after the loop ends. + */ + + assert(state->return_flag); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(state->builder.shader, nir_intrinsic_load_var); + load->num_components = 1; + load->variables[0] = nir_deref_var_create(load, state->return_flag); + nir_ssa_dest_init(&load->instr, &load->dest, 1, "return"); + nir_instr_insert(nir_after_cf_node(&loop->cf_node), &load->instr); + + nir_if *if_stmt = nir_if_create(state->builder.shader); + if_stmt->condition = nir_src_for_ssa(&load->dest.ssa); + nir_cf_node_insert(nir_after_instr(&load->instr), &if_stmt->cf_node); + + nir_jump_instr *ret = + nir_jump_instr_create(state->builder.shader, nir_jump_return); + nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &ret->instr); + + return true; +} + +static bool +lower_returns_in_if(nir_if *if_stmt, struct lower_returns_state *state) +{ + bool progress; + + nir_if *parent = state->if_stmt; + state->if_stmt = if_stmt; + progress = lower_returns_in_cf_list(&if_stmt->then_list, state); + progress = lower_returns_in_cf_list(&if_stmt->else_list, state) || progress; + state->if_stmt = parent; + + return progress; +} static bool -assert_no_returns_block(nir_block *block, void *state) +lower_returns_in_block(nir_block *block, struct lower_returns_state *state) { - (void)state; + if (block->predecessors->entries == 0 && + block != nir_start_block(state->builder.impl)) { + /* This block is unreachable. Delete it and everything after it. */ + nir_cf_list list; + nir_cf_extract(&list, nir_before_cf_node(&block->cf_node), + nir_after_cf_list(state->cf_list)); + + if (exec_list_is_empty(&list.list)) { + /* There's nothing here, which also means there's nothing in this + * block so we have nothing to do. + */ + return false; + } else { + nir_cf_delete(&list); + return true; + } + } + + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr == NULL) + return false; - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_jump) - continue; + if (last_instr->type != nir_instr_type_jump) + return false; - nir_jump_instr *jump = nir_instr_as_jump(instr); - assert(jump->type != nir_jump_return); + nir_jump_instr *jump = nir_instr_as_jump(last_instr); + if (jump->type != nir_jump_return) + return false; + + if (state->loop) { + /* We're in a loop. Just set the return flag to true and break. + * lower_returns_in_loop will do the rest. + */ + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&jump->instr); + + if (state->return_flag == NULL) { + state->return_flag = + nir_local_variable_create(b->impl, glsl_bool_type(), "return"); + + /* Set a default value of false */ + state->return_flag->constant_initializer = + rzalloc(state->return_flag, nir_constant); + } + + nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE)); + jump->type = nir_jump_return; + } else if (state->if_stmt) { + /* If we're not in a loop but in an if, just move the rest of the CF + * list into the the other case of the if. + */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node(&state->if_stmt->cf_node), + nir_after_cf_list(state->parent_cf_list)); + + nir_instr_remove(&jump->instr); + + if (state->cf_list == &state->if_stmt->then_list) { + nir_cf_reinsert(&list, + nir_after_cf_list(&state->if_stmt->else_list)); + } else if (state->cf_list == &state->if_stmt->else_list) { + nir_cf_reinsert(&list, + nir_after_cf_list(&state->if_stmt->then_list)); + } else { + unreachable("Invalid CF list"); + } + } else { + nir_instr_remove(&jump->instr); + + /* No if, no nothing. Just delete the return and whatever follows. */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node(&block->cf_node), + nir_after_cf_list(state->parent_cf_list)); + nir_cf_delete(&list); } return true; } -bool -nir_lower_returns_impl(nir_function_impl *impl) +static bool +lower_returns_in_cf_list(struct exec_list *cf_list, + struct lower_returns_state *state) { bool progress = false; - assert(impl->end_block->predecessors->entries == 1); + struct exec_list *prev_parent_list = state->parent_cf_list; + state->parent_cf_list = state->cf_list; + state->cf_list = cf_list; - struct set_entry *entry = - _mesa_set_next_entry(impl->end_block->predecessors, NULL); + foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_block: + if (lower_returns_in_block(nir_cf_node_as_block(node), state)) + progress = true; + break; - nir_block *last_block = (nir_block *)entry->key; + case nir_cf_node_if: + if (lower_returns_in_if(nir_cf_node_as_if(node), state)) + progress = true; + break; - nir_instr *last_instr = nir_block_last_instr(last_block); - if (last_instr && last_instr->type == nir_instr_type_jump) { - nir_jump_instr *jump = nir_instr_as_jump(last_instr); - assert(jump->type == nir_jump_return); - nir_instr_remove(&jump->instr); - progress = true; + case nir_cf_node_loop: + if (lower_returns_in_loop(nir_cf_node_as_loop(node), state)) + progress = true; + break; + + default: + unreachable("Invalid inner CF node type"); + } } - nir_foreach_block(impl, assert_no_returns_block, NULL); + state->cf_list = state->parent_cf_list; + state->parent_cf_list = prev_parent_list; + + return progress; +} + +bool +nir_lower_returns_impl(nir_function_impl *impl) +{ + struct lower_returns_state state; + + state.parent_cf_list = NULL; + state.cf_list = &impl->body; + state.loop = NULL; + state.if_stmt = NULL; + state.return_flag = NULL; + nir_builder_init(&state.builder, impl); + + bool progress = lower_returns_in_cf_list(&impl->body, &state); + + if (progress) + nir_metadata_preserve(impl, nir_metadata_none); return progress; } -- cgit v1.2.3 From 4ff4310a783cb4815d99e2991697d9b300bc7d32 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 26 Oct 2015 19:45:30 -0700 Subject: nir/types: Expose glsl_type::count_attribute_slots() --- src/glsl/nir/nir_types.cpp | 6 ++++++ src/glsl/nir/nir_types.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 27db5793a60..54751cbcb5f 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -124,6 +124,12 @@ glsl_get_aoa_size(const struct glsl_type *type) return type->arrays_of_arrays_size(); } +unsigned +glsl_count_attribute_slots(const struct glsl_type *type) +{ + return type->count_attribute_slots(); +} + const char * glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 9cc71e899d7..1bae84a356e 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -67,6 +67,8 @@ unsigned glsl_get_length(const struct glsl_type *type); unsigned glsl_get_aoa_size(const struct glsl_type *type); +unsigned glsl_count_attribute_slots(const struct glsl_type *type); + const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); -- cgit v1.2.3 From bbebd2de13edd35c0b2f70676367be08a227a904 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 27 Oct 2015 11:32:34 -0700 Subject: nir: Add a helper for getting the bitmask for a variable's location --- src/glsl/nir/nir.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 8655c144012..815c22c5e8b 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -366,6 +366,32 @@ typedef struct { #define nir_foreach_variable(var, var_list) \ foreach_list_typed(nir_variable, var, node, var_list) +/** + * Returns the bits in the inputs_read, outputs_written, or + * system_values_read bitfield corresponding to this variable. + */ +static inline uint64_t +nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage) +{ + assert(var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.mode == nir_var_system_value); + assert(var->data.location >= 0); + + const struct glsl_type *var_type = var->type; + if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) { + /* Most geometry shader inputs are per-vertex arrays */ + if (var->data.location >= VARYING_SLOT_VAR0) + assert(glsl_type_is_array(var_type)); + + if (glsl_type_is_array(var_type)) + var_type = glsl_get_array_element(var_type); + } + + unsigned slots = glsl_count_attribute_slots(var_type); + return ((1ull << slots) - 1) << var->data.location; +} + typedef struct { struct exec_node node; -- cgit v1.2.3 From c60456dfaa6decc6f1884dbd40adad61937e3f69 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 26 Oct 2015 20:56:06 -0700 Subject: nir/gather_info: Handle multi-slot variables in io bitfields --- src/glsl/nir/nir_gather_info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_gather_info.c b/src/glsl/nir/nir_gather_info.c index c2413e3bbe2..18c8e3649dc 100644 --- a/src/glsl/nir/nir_gather_info.c +++ b/src/glsl/nir/nir_gather_info.c @@ -93,16 +93,16 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) { shader->info.inputs_read = 0; foreach_list_typed(nir_variable, var, node, &shader->inputs) - shader->info.inputs_read |= (1ull << var->data.location); + shader->info.inputs_read |= nir_variable_get_io_mask(var, shader->stage); /* TODO: Some day we may need to add stream support to NIR */ shader->info.outputs_written = 0; foreach_list_typed(nir_variable, var, node, &shader->outputs) - shader->info.outputs_written |= (1ull << var->data.location); + shader->info.outputs_written |= nir_variable_get_io_mask(var, shader->stage); shader->info.system_values_read = 0; foreach_list_typed(nir_variable, var, node, &shader->system_values) - shader->info.system_values_read |= (1ull << var->data.location); + shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage); nir_foreach_block(entrypoint, gather_info_block, shader); } -- cgit v1.2.3 From 3489f66056c3bacfb3ade806c0eea8f85951eb41 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 27 Dec 2015 22:50:14 -0800 Subject: nir: Add a cursor helper for getting a cursor after any phi nodes --- src/glsl/nir/nir.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 815c22c5e8b..904e444487a 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1871,6 +1871,22 @@ nir_after_cf_node(nir_cf_node *node) return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); } +static inline nir_cursor +nir_after_cf_node_and_phis(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_after_block(nir_cf_node_as_block(node)); + + nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); + assert(block->cf_node.type == nir_cf_node_block); + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + return nir_before_instr(instr); + } + return nir_after_block(block); +} + static inline nir_cursor nir_before_cf_list(struct exec_list *cf_list) { -- cgit v1.2.3 From f948767471ba83427cbcdc244a511fbb954ca9e0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 27 Dec 2015 22:50:45 -0800 Subject: nir/lower_returns: Better algorithm as per connor --- src/glsl/nir/nir_lower_returns.c | 154 ++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 76 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_returns.c b/src/glsl/nir/nir_lower_returns.c index f36fc9dd613..ce0512c770a 100644 --- a/src/glsl/nir/nir_lower_returns.c +++ b/src/glsl/nir/nir_lower_returns.c @@ -27,16 +27,48 @@ struct lower_returns_state { nir_builder builder; - struct exec_list *parent_cf_list; struct exec_list *cf_list; nir_loop *loop; - nir_if *if_stmt; nir_variable *return_flag; }; static bool lower_returns_in_cf_list(struct exec_list *cf_list, struct lower_returns_state *state); +static void +predicate_following(nir_cf_node *node, struct lower_returns_state *state) +{ + nir_builder *b = &state->builder; + b->cursor = nir_after_cf_node_and_phis(node); + + if (nir_cursors_equal(b->cursor, nir_after_cf_list(state->cf_list))) + return; /* Nothing to predicate */ + + assert(state->return_flag); + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(nir_load_var(b, state->return_flag)); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + if (state->loop) { + /* If we're inside of a loop, then all we need to do is insert a + * conditional break. + */ + nir_jump_instr *brk = + nir_jump_instr_create(state->builder.shader, nir_jump_break); + nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &brk->instr); + } else { + /* Otherwise, we need to actually move everything into the else case + * of the if statement. + */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node(&if_stmt->cf_node), + nir_after_cf_list(state->cf_list)); + assert(!exec_list_is_empty(&list.list)); + nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list)); + } +} + static bool lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state) { @@ -45,34 +77,15 @@ lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state) bool progress = lower_returns_in_cf_list(&loop->body, state); state->loop = parent; - /* Nothing interesting */ - if (!progress) - return false; - - /* In this case, there was a return somewhere inside of the loop. That - * return would have been turned into a write to the return_flag - * variable and a break. We need to insert a predicated return right - * after the loop ends. + /* If the recursive call made progress, then there were returns inside + * of the loop. These would have been lowered to breaks with the return + * flag set to true. We need to predicate everything following the loop + * on the return flag. */ + if (progress) + predicate_following(&loop->cf_node, state); - assert(state->return_flag); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(state->builder.shader, nir_intrinsic_load_var); - load->num_components = 1; - load->variables[0] = nir_deref_var_create(load, state->return_flag); - nir_ssa_dest_init(&load->instr, &load->dest, 1, "return"); - nir_instr_insert(nir_after_cf_node(&loop->cf_node), &load->instr); - - nir_if *if_stmt = nir_if_create(state->builder.shader); - if_stmt->condition = nir_src_for_ssa(&load->dest.ssa); - nir_cf_node_insert(nir_after_instr(&load->instr), &if_stmt->cf_node); - - nir_jump_instr *ret = - nir_jump_instr_create(state->builder.shader, nir_jump_return); - nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &ret->instr); - - return true; + return progress; } static bool @@ -80,11 +93,21 @@ lower_returns_in_if(nir_if *if_stmt, struct lower_returns_state *state) { bool progress; - nir_if *parent = state->if_stmt; - state->if_stmt = if_stmt; progress = lower_returns_in_cf_list(&if_stmt->then_list, state); progress = lower_returns_in_cf_list(&if_stmt->else_list, state) || progress; - state->if_stmt = parent; + + /* If either of the recursive calls made progress, then there were + * returns inside of the body of the if. If we're in a loop, then these + * were lowered to breaks which automatically skip to the end of the + * loop so we don't have to do anything. If we're not in a loop, then + * all we know is that the return flag is set appropreately and that the + * recursive calls ensured that nothing gets executed *inside* the if + * after a return. In order to ensure nothing outside gets executed + * after a return, we need to predicate everything following on the + * return flag. + */ + if (progress && !state->loop) + predicate_following(&if_stmt->cf_node, state); return progress; } @@ -121,51 +144,29 @@ lower_returns_in_block(nir_block *block, struct lower_returns_state *state) if (jump->type != nir_jump_return) return false; - if (state->loop) { - /* We're in a loop. Just set the return flag to true and break. - * lower_returns_in_loop will do the rest. - */ - nir_builder *b = &state->builder; - b->cursor = nir_before_instr(&jump->instr); + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&jump->instr); - if (state->return_flag == NULL) { - state->return_flag = - nir_local_variable_create(b->impl, glsl_bool_type(), "return"); + /* Set the return flag */ + if (state->return_flag == NULL) { + state->return_flag = + nir_local_variable_create(b->impl, glsl_bool_type(), "return"); - /* Set a default value of false */ - state->return_flag->constant_initializer = - rzalloc(state->return_flag, nir_constant); - } + /* Set a default value of false */ + state->return_flag->constant_initializer = + rzalloc(state->return_flag, nir_constant); + } + nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE)); - nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE)); + if (state->loop) { + /* We're in a loop. Make the return a break. */ jump->type = nir_jump_return; - } else if (state->if_stmt) { - /* If we're not in a loop but in an if, just move the rest of the CF - * list into the the other case of the if. - */ - nir_cf_list list; - nir_cf_extract(&list, nir_after_cf_node(&state->if_stmt->cf_node), - nir_after_cf_list(state->parent_cf_list)); - - nir_instr_remove(&jump->instr); - - if (state->cf_list == &state->if_stmt->then_list) { - nir_cf_reinsert(&list, - nir_after_cf_list(&state->if_stmt->else_list)); - } else if (state->cf_list == &state->if_stmt->else_list) { - nir_cf_reinsert(&list, - nir_after_cf_list(&state->if_stmt->then_list)); - } else { - unreachable("Invalid CF list"); - } } else { + /* Not in a loop. Just delete the return; we'll deal with + * predicating later. + */ + assert(nir_cf_node_next(&block->cf_node) == NULL); nir_instr_remove(&jump->instr); - - /* No if, no nothing. Just delete the return and whatever follows. */ - nir_cf_list list; - nir_cf_extract(&list, nir_after_cf_node(&block->cf_node), - nir_after_cf_list(state->parent_cf_list)); - nir_cf_delete(&list); } return true; @@ -177,10 +178,14 @@ lower_returns_in_cf_list(struct exec_list *cf_list, { bool progress = false; - struct exec_list *prev_parent_list = state->parent_cf_list; - state->parent_cf_list = state->cf_list; + struct exec_list *parent_list = state->cf_list; state->cf_list = cf_list; + /* We iterate over the list backwards because any given lower call may + * take everything following the given CF node and predicate it. In + * order to avoid recursion/iteration problems, we want everything after + * a given node to already be lowered before this happens. + */ foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) { switch (node->type) { case nir_cf_node_block: @@ -203,8 +208,7 @@ lower_returns_in_cf_list(struct exec_list *cf_list, } } - state->cf_list = state->parent_cf_list; - state->parent_cf_list = prev_parent_list; + state->cf_list = parent_list; return progress; } @@ -214,10 +218,8 @@ nir_lower_returns_impl(nir_function_impl *impl) { struct lower_returns_state state; - state.parent_cf_list = NULL; state.cf_list = &impl->body; state.loop = NULL; - state.if_stmt = NULL; state.return_flag = NULL; nir_builder_init(&state.builder, impl); -- cgit v1.2.3 From d9dcfafacc41660f3b7ce5062dec47985af51c65 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 10:35:18 -0800 Subject: nir/spirv: Use nir_build_alu for alu instructions --- src/glsl/nir/spirv_to_nir.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 91710ba45f9..a6b5828f73c 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2557,10 +2557,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, val->ssa = vtn_create_ssa_value(b, type); /* Collect the various SSA sources */ - unsigned num_inputs = count - 3; + const unsigned num_inputs = count - 3; nir_ssa_def *src[4]; for (unsigned i = 0; i < num_inputs; i++) src[i] = vtn_ssa_value(b, w[i + 3])->def; + for (unsigned i = num_inputs; i < 4; i++) + src[i] = NULL; /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. @@ -2583,7 +2585,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case 3: op = nir_op_bany_inequal3; break; case 4: op = nir_op_bany_inequal4; break; } - num_inputs = 2; src[1] = nir_imm_int(&b->nb, NIR_FALSE); } break; @@ -2597,7 +2598,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case 3: op = nir_op_ball_iequal3; break; case 4: op = nir_op_ball_iequal4; break; } - num_inputs = 2; src[1] = nir_imm_int(&b->nb, NIR_TRUE); } break; @@ -2726,16 +2726,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, src[1] = tmp; } - nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); - nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(type), val->name); - instr->dest.write_mask = (1 << glsl_get_vector_elements(type)) - 1; - val->ssa->def = &instr->dest.dest.ssa; - - for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) - instr->src[i].src = nir_src_for_ssa(src[i]); - - nir_builder_instr_insert(&b->nb, &instr->instr); + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); } static nir_ssa_def * -- cgit v1.2.3 From 7aaed915811e2745ba90e17eb0e3d03f6cf8c036 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 11:49:33 -0800 Subject: nir/spirv: Move to its own directory --- src/glsl/Makefile.am | 2 + src/glsl/Makefile.sources | 10 +- src/glsl/nir/nir_spirv.h | 47 - src/glsl/nir/spirv.h | 870 -------- src/glsl/nir/spirv/nir_spirv.h | 47 + src/glsl/nir/spirv/spirv.h | 870 ++++++++ src/glsl/nir/spirv/spirv_to_nir.c | 3952 +++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv/vtn_glsl450.c | 180 ++ src/glsl/nir/spirv/vtn_private.h | 253 +++ src/glsl/nir/spirv2nir.c | 2 +- src/glsl/nir/spirv_glsl450.h | 127 -- src/glsl/nir/spirv_glsl450_to_nir.c | 180 -- src/glsl/nir/spirv_to_nir.c | 3952 ----------------------------------- src/glsl/nir/spirv_to_nir_private.h | 253 --- src/vulkan/anv_pipeline.c | 2 +- 15 files changed, 5312 insertions(+), 5435 deletions(-) delete mode 100644 src/glsl/nir/nir_spirv.h delete mode 100644 src/glsl/nir/spirv.h create mode 100644 src/glsl/nir/spirv/nir_spirv.h create mode 100644 src/glsl/nir/spirv/spirv.h create mode 100644 src/glsl/nir/spirv/spirv_to_nir.c create mode 100644 src/glsl/nir/spirv/vtn_glsl450.c create mode 100644 src/glsl/nir/spirv/vtn_private.h delete mode 100644 src/glsl/nir/spirv_glsl450.h delete mode 100644 src/glsl/nir/spirv_glsl450_to_nir.c delete mode 100644 src/glsl/nir/spirv_to_nir.c delete mode 100644 src/glsl/nir/spirv_to_nir_private.h (limited to 'src') diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 8b0a73b250a..1396758b7c2 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -144,11 +144,13 @@ libglsl_la_SOURCES = \ glsl_parser.h \ $(LIBGLSL_FILES) \ $(NIR_FILES) \ + $(SPIRV_FILES) \ $(NIR_GENERATED_FILES) libnir_la_SOURCES = \ $(NIR_FILES) \ + $(SPIRV_FILES) \ $(NIR_GENERATED_FILES) glsl_compiler_SOURCES = \ diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 830aacbe5e6..aa87cb1480f 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -79,7 +79,6 @@ NIR_FILES = \ nir/nir_remove_dead_variables.c \ nir/nir_search.c \ nir/nir_search.h \ - nir/nir_spirv.h \ nir/nir_split_var_copies.c \ nir/nir_sweep.c \ nir/nir_to_ssa.c \ @@ -90,9 +89,12 @@ NIR_FILES = \ nir/nir_worklist.h \ nir/nir_types.cpp \ nir/shader_enums.h \ - nir/shader_enums.c \ - nir/spirv_to_nir.c \ - nir/spirv_glsl450_to_nir.c + nir/shader_enums.c + +SPIRV_FILES = \ + nir/spirv/nir_spirv.h \ + nir/spirv/spirv_to_nir.c \ + nir/spirv/vtn_glsl450.c # libglsl diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h deleted file mode 100644 index 1f09174ad7f..00000000000 --- a/src/glsl/nir/nir_spirv.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#pragma once - -#ifndef _NIR_SPIRV_H_ -#define _NIR_SPIRV_H_ - -#include "nir.h" - -#ifdef __cplusplus -extern "C" { -#endif - -nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, - gl_shader_stage stage, - const nir_shader_compiler_options *options); - -#ifdef __cplusplus -} -#endif - -#endif /* _NIR_SPIRV_H_ */ diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h deleted file mode 100644 index 63bcb2f88dd..00000000000 --- a/src/glsl/nir/spirv.h +++ /dev/null @@ -1,870 +0,0 @@ -/* -** Copyright (c) 2014-2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -/* -** This header is automatically generated by the same tool that creates -** the Binary Section of the SPIR-V specification. -*/ - -/* -** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python -** -** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** -** Some tokens act like mask values, which can be OR'd together, -** while others are mutually exclusive. The mask-like ones have -** "Mask" in their name, and a parallel enum that has the shift -** amount (1 << x) for each corresponding enumerant. -*/ - -#ifndef spirv_H -#define spirv_H - -typedef unsigned int SpvId; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 2 - -static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010000; -static const unsigned int SpvRevision = 2; -static const unsigned int SpvOpCodeMask = 0xffff; -static const unsigned int SpvWordCountShift = 16; - -typedef enum SpvSourceLanguage_ { - SpvSourceLanguageUnknown = 0, - SpvSourceLanguageESSL = 1, - SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL_C = 3, - SpvSourceLanguageOpenCL_CPP = 4, -} SpvSourceLanguage; - -typedef enum SpvExecutionModel_ { - SpvExecutionModelVertex = 0, - SpvExecutionModelTessellationControl = 1, - SpvExecutionModelTessellationEvaluation = 2, - SpvExecutionModelGeometry = 3, - SpvExecutionModelFragment = 4, - SpvExecutionModelGLCompute = 5, - SpvExecutionModelKernel = 6, -} SpvExecutionModel; - -typedef enum SpvAddressingModel_ { - SpvAddressingModelLogical = 0, - SpvAddressingModelPhysical32 = 1, - SpvAddressingModelPhysical64 = 2, -} SpvAddressingModel; - -typedef enum SpvMemoryModel_ { - SpvMemoryModelSimple = 0, - SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL = 2, -} SpvMemoryModel; - -typedef enum SpvExecutionMode_ { - SpvExecutionModeInvocations = 0, - SpvExecutionModeSpacingEqual = 1, - SpvExecutionModeSpacingFractionalEven = 2, - SpvExecutionModeSpacingFractionalOdd = 3, - SpvExecutionModeVertexOrderCw = 4, - SpvExecutionModeVertexOrderCcw = 5, - SpvExecutionModePixelCenterInteger = 6, - SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeOriginLowerLeft = 8, - SpvExecutionModeEarlyFragmentTests = 9, - SpvExecutionModePointMode = 10, - SpvExecutionModeXfb = 11, - SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthGreater = 14, - SpvExecutionModeDepthLess = 15, - SpvExecutionModeDepthUnchanged = 16, - SpvExecutionModeLocalSize = 17, - SpvExecutionModeLocalSizeHint = 18, - SpvExecutionModeInputPoints = 19, - SpvExecutionModeInputLines = 20, - SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeTriangles = 22, - SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeQuads = 24, - SpvExecutionModeIsolines = 25, - SpvExecutionModeOutputVertices = 26, - SpvExecutionModeOutputPoints = 27, - SpvExecutionModeOutputLineStrip = 28, - SpvExecutionModeOutputTriangleStrip = 29, - SpvExecutionModeVecTypeHint = 30, - SpvExecutionModeContractionOff = 31, -} SpvExecutionMode; - -typedef enum SpvStorageClass_ { - SpvStorageClassUniformConstant = 0, - SpvStorageClassInput = 1, - SpvStorageClassUniform = 2, - SpvStorageClassOutput = 3, - SpvStorageClassWorkgroup = 4, - SpvStorageClassCrossWorkgroup = 5, - SpvStorageClassPrivate = 6, - SpvStorageClassFunction = 7, - SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, - SpvStorageClassAtomicCounter = 10, - SpvStorageClassImage = 11, -} SpvStorageClass; - -typedef enum SpvDim_ { - SpvDim1D = 0, - SpvDim2D = 1, - SpvDim3D = 2, - SpvDimCube = 3, - SpvDimRect = 4, - SpvDimBuffer = 5, - SpvDimSubpassData = 6, -} SpvDim; - -typedef enum SpvSamplerAddressingMode_ { - SpvSamplerAddressingModeNone = 0, - SpvSamplerAddressingModeClampToEdge = 1, - SpvSamplerAddressingModeClamp = 2, - SpvSamplerAddressingModeRepeat = 3, - SpvSamplerAddressingModeRepeatMirrored = 4, -} SpvSamplerAddressingMode; - -typedef enum SpvSamplerFilterMode_ { - SpvSamplerFilterModeNearest = 0, - SpvSamplerFilterModeLinear = 1, -} SpvSamplerFilterMode; - -typedef enum SpvImageFormat_ { - SpvImageFormatUnknown = 0, - SpvImageFormatRgba32f = 1, - SpvImageFormatRgba16f = 2, - SpvImageFormatR32f = 3, - SpvImageFormatRgba8 = 4, - SpvImageFormatRgba8Snorm = 5, - SpvImageFormatRg32f = 6, - SpvImageFormatRg16f = 7, - SpvImageFormatR11fG11fB10f = 8, - SpvImageFormatR16f = 9, - SpvImageFormatRgba16 = 10, - SpvImageFormatRgb10A2 = 11, - SpvImageFormatRg16 = 12, - SpvImageFormatRg8 = 13, - SpvImageFormatR16 = 14, - SpvImageFormatR8 = 15, - SpvImageFormatRgba16Snorm = 16, - SpvImageFormatRg16Snorm = 17, - SpvImageFormatRg8Snorm = 18, - SpvImageFormatR16Snorm = 19, - SpvImageFormatR8Snorm = 20, - SpvImageFormatRgba32i = 21, - SpvImageFormatRgba16i = 22, - SpvImageFormatRgba8i = 23, - SpvImageFormatR32i = 24, - SpvImageFormatRg32i = 25, - SpvImageFormatRg16i = 26, - SpvImageFormatRg8i = 27, - SpvImageFormatR16i = 28, - SpvImageFormatR8i = 29, - SpvImageFormatRgba32ui = 30, - SpvImageFormatRgba16ui = 31, - SpvImageFormatRgba8ui = 32, - SpvImageFormatR32ui = 33, - SpvImageFormatRgb10a2ui = 34, - SpvImageFormatRg32ui = 35, - SpvImageFormatRg16ui = 36, - SpvImageFormatRg8ui = 37, - SpvImageFormatR16ui = 38, - SpvImageFormatR8ui = 39, -} SpvImageFormat; - -typedef enum SpvImageChannelOrder_ { - SpvImageChannelOrderR = 0, - SpvImageChannelOrderA = 1, - SpvImageChannelOrderRG = 2, - SpvImageChannelOrderRA = 3, - SpvImageChannelOrderRGB = 4, - SpvImageChannelOrderRGBA = 5, - SpvImageChannelOrderBGRA = 6, - SpvImageChannelOrderARGB = 7, - SpvImageChannelOrderIntensity = 8, - SpvImageChannelOrderLuminance = 9, - SpvImageChannelOrderRx = 10, - SpvImageChannelOrderRGx = 11, - SpvImageChannelOrderRGBx = 12, - SpvImageChannelOrderDepth = 13, - SpvImageChannelOrderDepthStencil = 14, - SpvImageChannelOrdersRGB = 15, - SpvImageChannelOrdersRGBx = 16, - SpvImageChannelOrdersRGBA = 17, - SpvImageChannelOrdersBGRA = 18, -} SpvImageChannelOrder; - -typedef enum SpvImageChannelDataType_ { - SpvImageChannelDataTypeSnormInt8 = 0, - SpvImageChannelDataTypeSnormInt16 = 1, - SpvImageChannelDataTypeUnormInt8 = 2, - SpvImageChannelDataTypeUnormInt16 = 3, - SpvImageChannelDataTypeUnormShort565 = 4, - SpvImageChannelDataTypeUnormShort555 = 5, - SpvImageChannelDataTypeUnormInt101010 = 6, - SpvImageChannelDataTypeSignedInt8 = 7, - SpvImageChannelDataTypeSignedInt16 = 8, - SpvImageChannelDataTypeSignedInt32 = 9, - SpvImageChannelDataTypeUnsignedInt8 = 10, - SpvImageChannelDataTypeUnsignedInt16 = 11, - SpvImageChannelDataTypeUnsignedInt32 = 12, - SpvImageChannelDataTypeHalfFloat = 13, - SpvImageChannelDataTypeFloat = 14, - SpvImageChannelDataTypeUnormInt24 = 15, - SpvImageChannelDataTypeUnormInt101010_2 = 16, -} SpvImageChannelDataType; - -typedef enum SpvImageOperandsShift_ { - SpvImageOperandsBiasShift = 0, - SpvImageOperandsLodShift = 1, - SpvImageOperandsGradShift = 2, - SpvImageOperandsConstOffsetShift = 3, - SpvImageOperandsOffsetShift = 4, - SpvImageOperandsConstOffsetsShift = 5, - SpvImageOperandsSampleShift = 6, - SpvImageOperandsMinLodShift = 7, -} SpvImageOperandsShift; - -typedef enum SpvImageOperandsMask_ { - SpvImageOperandsMaskNone = 0, - SpvImageOperandsBiasMask = 0x00000001, - SpvImageOperandsLodMask = 0x00000002, - SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsConstOffsetMask = 0x00000008, - SpvImageOperandsOffsetMask = 0x00000010, - SpvImageOperandsConstOffsetsMask = 0x00000020, - SpvImageOperandsSampleMask = 0x00000040, - SpvImageOperandsMinLodMask = 0x00000080, -} SpvImageOperandsMask; - -typedef enum SpvFPFastMathModeShift_ { - SpvFPFastMathModeNotNaNShift = 0, - SpvFPFastMathModeNotInfShift = 1, - SpvFPFastMathModeNSZShift = 2, - SpvFPFastMathModeAllowRecipShift = 3, - SpvFPFastMathModeFastShift = 4, -} SpvFPFastMathModeShift; - -typedef enum SpvFPFastMathModeMask_ { - SpvFPFastMathModeMaskNone = 0, - SpvFPFastMathModeNotNaNMask = 0x00000001, - SpvFPFastMathModeNotInfMask = 0x00000002, - SpvFPFastMathModeNSZMask = 0x00000004, - SpvFPFastMathModeAllowRecipMask = 0x00000008, - SpvFPFastMathModeFastMask = 0x00000010, -} SpvFPFastMathModeMask; - -typedef enum SpvFPRoundingMode_ { - SpvFPRoundingModeRTE = 0, - SpvFPRoundingModeRTZ = 1, - SpvFPRoundingModeRTP = 2, - SpvFPRoundingModeRTN = 3, -} SpvFPRoundingMode; - -typedef enum SpvLinkageType_ { - SpvLinkageTypeExport = 0, - SpvLinkageTypeImport = 1, -} SpvLinkageType; - -typedef enum SpvAccessQualifier_ { - SpvAccessQualifierReadOnly = 0, - SpvAccessQualifierWriteOnly = 1, - SpvAccessQualifierReadWrite = 2, -} SpvAccessQualifier; - -typedef enum SpvFunctionParameterAttribute_ { - SpvFunctionParameterAttributeZext = 0, - SpvFunctionParameterAttributeSext = 1, - SpvFunctionParameterAttributeByVal = 2, - SpvFunctionParameterAttributeSret = 3, - SpvFunctionParameterAttributeNoAlias = 4, - SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeNoWrite = 6, - SpvFunctionParameterAttributeNoReadWrite = 7, -} SpvFunctionParameterAttribute; - -typedef enum SpvDecoration_ { - SpvDecorationRelaxedPrecision = 0, - SpvDecorationSpecId = 1, - SpvDecorationBlock = 2, - SpvDecorationBufferBlock = 3, - SpvDecorationRowMajor = 4, - SpvDecorationColMajor = 5, - SpvDecorationArrayStride = 6, - SpvDecorationMatrixStride = 7, - SpvDecorationGLSLShared = 8, - SpvDecorationGLSLPacked = 9, - SpvDecorationCPacked = 10, - SpvDecorationBuiltIn = 11, - SpvDecorationNoPerspective = 13, - SpvDecorationFlat = 14, - SpvDecorationPatch = 15, - SpvDecorationCentroid = 16, - SpvDecorationSample = 17, - SpvDecorationInvariant = 18, - SpvDecorationRestrict = 19, - SpvDecorationAliased = 20, - SpvDecorationVolatile = 21, - SpvDecorationConstant = 22, - SpvDecorationCoherent = 23, - SpvDecorationNonWritable = 24, - SpvDecorationNonReadable = 25, - SpvDecorationUniform = 26, - SpvDecorationSaturatedConversion = 28, - SpvDecorationStream = 29, - SpvDecorationLocation = 30, - SpvDecorationComponent = 31, - SpvDecorationIndex = 32, - SpvDecorationBinding = 33, - SpvDecorationDescriptorSet = 34, - SpvDecorationOffset = 35, - SpvDecorationXfbBuffer = 36, - SpvDecorationXfbStride = 37, - SpvDecorationFuncParamAttr = 38, - SpvDecorationFPRoundingMode = 39, - SpvDecorationFPFastMathMode = 40, - SpvDecorationLinkageAttributes = 41, - SpvDecorationNoContraction = 42, - SpvDecorationInputAttachmentIndex = 43, - SpvDecorationAlignment = 44, -} SpvDecoration; - -typedef enum SpvBuiltIn_ { - SpvBuiltInPosition = 0, - SpvBuiltInPointSize = 1, - SpvBuiltInClipDistance = 3, - SpvBuiltInCullDistance = 4, - SpvBuiltInVertexId = 5, - SpvBuiltInInstanceId = 6, - SpvBuiltInPrimitiveId = 7, - SpvBuiltInInvocationId = 8, - SpvBuiltInLayer = 9, - SpvBuiltInViewportIndex = 10, - SpvBuiltInTessLevelOuter = 11, - SpvBuiltInTessLevelInner = 12, - SpvBuiltInTessCoord = 13, - SpvBuiltInPatchVertices = 14, - SpvBuiltInFragCoord = 15, - SpvBuiltInPointCoord = 16, - SpvBuiltInFrontFacing = 17, - SpvBuiltInSampleId = 18, - SpvBuiltInSamplePosition = 19, - SpvBuiltInSampleMask = 20, - SpvBuiltInFragDepth = 22, - SpvBuiltInHelperInvocation = 23, - SpvBuiltInNumWorkgroups = 24, - SpvBuiltInWorkgroupSize = 25, - SpvBuiltInWorkgroupId = 26, - SpvBuiltInLocalInvocationId = 27, - SpvBuiltInGlobalInvocationId = 28, - SpvBuiltInLocalInvocationIndex = 29, - SpvBuiltInWorkDim = 30, - SpvBuiltInGlobalSize = 31, - SpvBuiltInEnqueuedWorkgroupSize = 32, - SpvBuiltInGlobalOffset = 33, - SpvBuiltInGlobalLinearId = 34, - SpvBuiltInSubgroupSize = 36, - SpvBuiltInSubgroupMaxSize = 37, - SpvBuiltInNumSubgroups = 38, - SpvBuiltInNumEnqueuedSubgroups = 39, - SpvBuiltInSubgroupId = 40, - SpvBuiltInSubgroupLocalInvocationId = 41, - SpvBuiltInVertexIndex = 42, - SpvBuiltInInstanceIndex = 43, -} SpvBuiltIn; - -typedef enum SpvSelectionControlShift_ { - SpvSelectionControlFlattenShift = 0, - SpvSelectionControlDontFlattenShift = 1, -} SpvSelectionControlShift; - -typedef enum SpvSelectionControlMask_ { - SpvSelectionControlMaskNone = 0, - SpvSelectionControlFlattenMask = 0x00000001, - SpvSelectionControlDontFlattenMask = 0x00000002, -} SpvSelectionControlMask; - -typedef enum SpvLoopControlShift_ { - SpvLoopControlUnrollShift = 0, - SpvLoopControlDontUnrollShift = 1, -} SpvLoopControlShift; - -typedef enum SpvLoopControlMask_ { - SpvLoopControlMaskNone = 0, - SpvLoopControlUnrollMask = 0x00000001, - SpvLoopControlDontUnrollMask = 0x00000002, -} SpvLoopControlMask; - -typedef enum SpvFunctionControlShift_ { - SpvFunctionControlInlineShift = 0, - SpvFunctionControlDontInlineShift = 1, - SpvFunctionControlPureShift = 2, - SpvFunctionControlConstShift = 3, -} SpvFunctionControlShift; - -typedef enum SpvFunctionControlMask_ { - SpvFunctionControlMaskNone = 0, - SpvFunctionControlInlineMask = 0x00000001, - SpvFunctionControlDontInlineMask = 0x00000002, - SpvFunctionControlPureMask = 0x00000004, - SpvFunctionControlConstMask = 0x00000008, -} SpvFunctionControlMask; - -typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsAcquireShift = 1, - SpvMemorySemanticsReleaseShift = 2, - SpvMemorySemanticsAcquireReleaseShift = 3, - SpvMemorySemanticsSequentiallyConsistentShift = 4, - SpvMemorySemanticsUniformMemoryShift = 6, - SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupMemoryShift = 8, - SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, - SpvMemorySemanticsAtomicCounterMemoryShift = 10, - SpvMemorySemanticsImageMemoryShift = 11, -} SpvMemorySemanticsShift; - -typedef enum SpvMemorySemanticsMask_ { - SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsAcquireMask = 0x00000002, - SpvMemorySemanticsReleaseMask = 0x00000004, - SpvMemorySemanticsAcquireReleaseMask = 0x00000008, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, - SpvMemorySemanticsUniformMemoryMask = 0x00000040, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, - SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, - SpvMemorySemanticsImageMemoryMask = 0x00000800, -} SpvMemorySemanticsMask; - -typedef enum SpvMemoryAccessShift_ { - SpvMemoryAccessVolatileShift = 0, - SpvMemoryAccessAlignedShift = 1, - SpvMemoryAccessNontemporalShift = 2, -} SpvMemoryAccessShift; - -typedef enum SpvMemoryAccessMask_ { - SpvMemoryAccessMaskNone = 0, - SpvMemoryAccessVolatileMask = 0x00000001, - SpvMemoryAccessAlignedMask = 0x00000002, - SpvMemoryAccessNontemporalMask = 0x00000004, -} SpvMemoryAccessMask; - -typedef enum SpvScope_ { - SpvScopeCrossDevice = 0, - SpvScopeDevice = 1, - SpvScopeWorkgroup = 2, - SpvScopeSubgroup = 3, - SpvScopeInvocation = 4, -} SpvScope; - -typedef enum SpvGroupOperation_ { - SpvGroupOperationReduce = 0, - SpvGroupOperationInclusiveScan = 1, - SpvGroupOperationExclusiveScan = 2, -} SpvGroupOperation; - -typedef enum SpvKernelEnqueueFlags_ { - SpvKernelEnqueueFlagsNoWait = 0, - SpvKernelEnqueueFlagsWaitKernel = 1, - SpvKernelEnqueueFlagsWaitWorkGroup = 2, -} SpvKernelEnqueueFlags; - -typedef enum SpvKernelProfilingInfoShift_ { - SpvKernelProfilingInfoCmdExecTimeShift = 0, -} SpvKernelProfilingInfoShift; - -typedef enum SpvKernelProfilingInfoMask_ { - SpvKernelProfilingInfoMaskNone = 0, - SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, -} SpvKernelProfilingInfoMask; - -typedef enum SpvCapability_ { - SpvCapabilityMatrix = 0, - SpvCapabilityShader = 1, - SpvCapabilityGeometry = 2, - SpvCapabilityTessellation = 3, - SpvCapabilityAddresses = 4, - SpvCapabilityLinkage = 5, - SpvCapabilityKernel = 6, - SpvCapabilityVector16 = 7, - SpvCapabilityFloat16Buffer = 8, - SpvCapabilityFloat16 = 9, - SpvCapabilityFloat64 = 10, - SpvCapabilityInt64 = 11, - SpvCapabilityInt64Atomics = 12, - SpvCapabilityImageBasic = 13, - SpvCapabilityImageReadWrite = 14, - SpvCapabilityImageMipmap = 15, - SpvCapabilityPipes = 17, - SpvCapabilityGroups = 18, - SpvCapabilityDeviceEnqueue = 19, - SpvCapabilityLiteralSampler = 20, - SpvCapabilityAtomicStorage = 21, - SpvCapabilityInt16 = 22, - SpvCapabilityTessellationPointSize = 23, - SpvCapabilityGeometryPointSize = 24, - SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageMultisample = 27, - SpvCapabilityUniformBufferArrayDynamicIndexing = 28, - SpvCapabilitySampledImageArrayDynamicIndexing = 29, - SpvCapabilityStorageBufferArrayDynamicIndexing = 30, - SpvCapabilityStorageImageArrayDynamicIndexing = 31, - SpvCapabilityClipDistance = 32, - SpvCapabilityCullDistance = 33, - SpvCapabilityImageCubeArray = 34, - SpvCapabilitySampleRateShading = 35, - SpvCapabilityImageRect = 36, - SpvCapabilitySampledRect = 37, - SpvCapabilityGenericPointer = 38, - SpvCapabilityInt8 = 39, - SpvCapabilityInputAttachment = 40, - SpvCapabilitySparseResidency = 41, - SpvCapabilityMinLod = 42, - SpvCapabilitySampled1D = 43, - SpvCapabilityImage1D = 44, - SpvCapabilitySampledCubeArray = 45, - SpvCapabilitySampledBuffer = 46, - SpvCapabilityImageBuffer = 47, - SpvCapabilityImageMSArray = 48, - SpvCapabilityStorageImageExtendedFormats = 49, - SpvCapabilityImageQuery = 50, - SpvCapabilityDerivativeControl = 51, - SpvCapabilityInterpolationFunction = 52, - SpvCapabilityTransformFeedback = 53, - SpvCapabilityGeometryStreams = 54, - SpvCapabilityStorageImageReadWithoutFormat = 55, - SpvCapabilityStorageImageWriteWithoutFormat = 56, - SpvCapabilityMultiViewport = 57, -} SpvCapability; - -typedef enum SpvOp_ { - SpvOpNop = 0, - SpvOpUndef = 1, - SpvOpSourceContinued = 2, - SpvOpSource = 3, - SpvOpSourceExtension = 4, - SpvOpName = 5, - SpvOpMemberName = 6, - SpvOpString = 7, - SpvOpLine = 8, - SpvOpExtension = 10, - SpvOpExtInstImport = 11, - SpvOpExtInst = 12, - SpvOpMemoryModel = 14, - SpvOpEntryPoint = 15, - SpvOpExecutionMode = 16, - SpvOpCapability = 17, - SpvOpTypeVoid = 19, - SpvOpTypeBool = 20, - SpvOpTypeInt = 21, - SpvOpTypeFloat = 22, - SpvOpTypeVector = 23, - SpvOpTypeMatrix = 24, - SpvOpTypeImage = 25, - SpvOpTypeSampler = 26, - SpvOpTypeSampledImage = 27, - SpvOpTypeArray = 28, - SpvOpTypeRuntimeArray = 29, - SpvOpTypeStruct = 30, - SpvOpTypeOpaque = 31, - SpvOpTypePointer = 32, - SpvOpTypeFunction = 33, - SpvOpTypeEvent = 34, - SpvOpTypeDeviceEvent = 35, - SpvOpTypeReserveId = 36, - SpvOpTypeQueue = 37, - SpvOpTypePipe = 38, - SpvOpTypeForwardPointer = 39, - SpvOpConstantTrue = 41, - SpvOpConstantFalse = 42, - SpvOpConstant = 43, - SpvOpConstantComposite = 44, - SpvOpConstantSampler = 45, - SpvOpConstantNull = 46, - SpvOpSpecConstantTrue = 48, - SpvOpSpecConstantFalse = 49, - SpvOpSpecConstant = 50, - SpvOpSpecConstantComposite = 51, - SpvOpSpecConstantOp = 52, - SpvOpFunction = 54, - SpvOpFunctionParameter = 55, - SpvOpFunctionEnd = 56, - SpvOpFunctionCall = 57, - SpvOpVariable = 59, - SpvOpImageTexelPointer = 60, - SpvOpLoad = 61, - SpvOpStore = 62, - SpvOpCopyMemory = 63, - SpvOpCopyMemorySized = 64, - SpvOpAccessChain = 65, - SpvOpInBoundsAccessChain = 66, - SpvOpPtrAccessChain = 67, - SpvOpArrayLength = 68, - SpvOpGenericPtrMemSemantics = 69, - SpvOpInBoundsPtrAccessChain = 70, - SpvOpDecorate = 71, - SpvOpMemberDecorate = 72, - SpvOpDecorationGroup = 73, - SpvOpGroupDecorate = 74, - SpvOpGroupMemberDecorate = 75, - SpvOpVectorExtractDynamic = 77, - SpvOpVectorInsertDynamic = 78, - SpvOpVectorShuffle = 79, - SpvOpCompositeConstruct = 80, - SpvOpCompositeExtract = 81, - SpvOpCompositeInsert = 82, - SpvOpCopyObject = 83, - SpvOpTranspose = 84, - SpvOpSampledImage = 86, - SpvOpImageSampleImplicitLod = 87, - SpvOpImageSampleExplicitLod = 88, - SpvOpImageSampleDrefImplicitLod = 89, - SpvOpImageSampleDrefExplicitLod = 90, - SpvOpImageSampleProjImplicitLod = 91, - SpvOpImageSampleProjExplicitLod = 92, - SpvOpImageSampleProjDrefImplicitLod = 93, - SpvOpImageSampleProjDrefExplicitLod = 94, - SpvOpImageFetch = 95, - SpvOpImageGather = 96, - SpvOpImageDrefGather = 97, - SpvOpImageRead = 98, - SpvOpImageWrite = 99, - SpvOpImage = 100, - SpvOpImageQueryFormat = 101, - SpvOpImageQueryOrder = 102, - SpvOpImageQuerySizeLod = 103, - SpvOpImageQuerySize = 104, - SpvOpImageQueryLod = 105, - SpvOpImageQueryLevels = 106, - SpvOpImageQuerySamples = 107, - SpvOpConvertFToU = 109, - SpvOpConvertFToS = 110, - SpvOpConvertSToF = 111, - SpvOpConvertUToF = 112, - SpvOpUConvert = 113, - SpvOpSConvert = 114, - SpvOpFConvert = 115, - SpvOpQuantizeToF16 = 116, - SpvOpConvertPtrToU = 117, - SpvOpSatConvertSToU = 118, - SpvOpSatConvertUToS = 119, - SpvOpConvertUToPtr = 120, - SpvOpPtrCastToGeneric = 121, - SpvOpGenericCastToPtr = 122, - SpvOpGenericCastToPtrExplicit = 123, - SpvOpBitcast = 124, - SpvOpSNegate = 126, - SpvOpFNegate = 127, - SpvOpIAdd = 128, - SpvOpFAdd = 129, - SpvOpISub = 130, - SpvOpFSub = 131, - SpvOpIMul = 132, - SpvOpFMul = 133, - SpvOpUDiv = 134, - SpvOpSDiv = 135, - SpvOpFDiv = 136, - SpvOpUMod = 137, - SpvOpSRem = 138, - SpvOpSMod = 139, - SpvOpFRem = 140, - SpvOpFMod = 141, - SpvOpVectorTimesScalar = 142, - SpvOpMatrixTimesScalar = 143, - SpvOpVectorTimesMatrix = 144, - SpvOpMatrixTimesVector = 145, - SpvOpMatrixTimesMatrix = 146, - SpvOpOuterProduct = 147, - SpvOpDot = 148, - SpvOpIAddCarry = 149, - SpvOpISubBorrow = 150, - SpvOpUMulExtended = 151, - SpvOpSMulExtended = 152, - SpvOpAny = 154, - SpvOpAll = 155, - SpvOpIsNan = 156, - SpvOpIsInf = 157, - SpvOpIsFinite = 158, - SpvOpIsNormal = 159, - SpvOpSignBitSet = 160, - SpvOpLessOrGreater = 161, - SpvOpOrdered = 162, - SpvOpUnordered = 163, - SpvOpLogicalEqual = 164, - SpvOpLogicalNotEqual = 165, - SpvOpLogicalOr = 166, - SpvOpLogicalAnd = 167, - SpvOpLogicalNot = 168, - SpvOpSelect = 169, - SpvOpIEqual = 170, - SpvOpINotEqual = 171, - SpvOpUGreaterThan = 172, - SpvOpSGreaterThan = 173, - SpvOpUGreaterThanEqual = 174, - SpvOpSGreaterThanEqual = 175, - SpvOpULessThan = 176, - SpvOpSLessThan = 177, - SpvOpULessThanEqual = 178, - SpvOpSLessThanEqual = 179, - SpvOpFOrdEqual = 180, - SpvOpFUnordEqual = 181, - SpvOpFOrdNotEqual = 182, - SpvOpFUnordNotEqual = 183, - SpvOpFOrdLessThan = 184, - SpvOpFUnordLessThan = 185, - SpvOpFOrdGreaterThan = 186, - SpvOpFUnordGreaterThan = 187, - SpvOpFOrdLessThanEqual = 188, - SpvOpFUnordLessThanEqual = 189, - SpvOpFOrdGreaterThanEqual = 190, - SpvOpFUnordGreaterThanEqual = 191, - SpvOpShiftRightLogical = 194, - SpvOpShiftRightArithmetic = 195, - SpvOpShiftLeftLogical = 196, - SpvOpBitwiseOr = 197, - SpvOpBitwiseXor = 198, - SpvOpBitwiseAnd = 199, - SpvOpNot = 200, - SpvOpBitFieldInsert = 201, - SpvOpBitFieldSExtract = 202, - SpvOpBitFieldUExtract = 203, - SpvOpBitReverse = 204, - SpvOpBitCount = 205, - SpvOpDPdx = 207, - SpvOpDPdy = 208, - SpvOpFwidth = 209, - SpvOpDPdxFine = 210, - SpvOpDPdyFine = 211, - SpvOpFwidthFine = 212, - SpvOpDPdxCoarse = 213, - SpvOpDPdyCoarse = 214, - SpvOpFwidthCoarse = 215, - SpvOpEmitVertex = 218, - SpvOpEndPrimitive = 219, - SpvOpEmitStreamVertex = 220, - SpvOpEndStreamPrimitive = 221, - SpvOpControlBarrier = 224, - SpvOpMemoryBarrier = 225, - SpvOpAtomicLoad = 227, - SpvOpAtomicStore = 228, - SpvOpAtomicExchange = 229, - SpvOpAtomicCompareExchange = 230, - SpvOpAtomicCompareExchangeWeak = 231, - SpvOpAtomicIIncrement = 232, - SpvOpAtomicIDecrement = 233, - SpvOpAtomicIAdd = 234, - SpvOpAtomicISub = 235, - SpvOpAtomicSMin = 236, - SpvOpAtomicUMin = 237, - SpvOpAtomicSMax = 238, - SpvOpAtomicUMax = 239, - SpvOpAtomicAnd = 240, - SpvOpAtomicOr = 241, - SpvOpAtomicXor = 242, - SpvOpPhi = 245, - SpvOpLoopMerge = 246, - SpvOpSelectionMerge = 247, - SpvOpLabel = 248, - SpvOpBranch = 249, - SpvOpBranchConditional = 250, - SpvOpSwitch = 251, - SpvOpKill = 252, - SpvOpReturn = 253, - SpvOpReturnValue = 254, - SpvOpUnreachable = 255, - SpvOpLifetimeStart = 256, - SpvOpLifetimeStop = 257, - SpvOpGroupAsyncCopy = 259, - SpvOpGroupWaitEvents = 260, - SpvOpGroupAll = 261, - SpvOpGroupAny = 262, - SpvOpGroupBroadcast = 263, - SpvOpGroupIAdd = 264, - SpvOpGroupFAdd = 265, - SpvOpGroupFMin = 266, - SpvOpGroupUMin = 267, - SpvOpGroupSMin = 268, - SpvOpGroupFMax = 269, - SpvOpGroupUMax = 270, - SpvOpGroupSMax = 271, - SpvOpReadPipe = 274, - SpvOpWritePipe = 275, - SpvOpReservedReadPipe = 276, - SpvOpReservedWritePipe = 277, - SpvOpReserveReadPipePackets = 278, - SpvOpReserveWritePipePackets = 279, - SpvOpCommitReadPipe = 280, - SpvOpCommitWritePipe = 281, - SpvOpIsValidReserveId = 282, - SpvOpGetNumPipePackets = 283, - SpvOpGetMaxPipePackets = 284, - SpvOpGroupReserveReadPipePackets = 285, - SpvOpGroupReserveWritePipePackets = 286, - SpvOpGroupCommitReadPipe = 287, - SpvOpGroupCommitWritePipe = 288, - SpvOpEnqueueMarker = 291, - SpvOpEnqueueKernel = 292, - SpvOpGetKernelNDrangeSubGroupCount = 293, - SpvOpGetKernelNDrangeMaxSubGroupSize = 294, - SpvOpGetKernelWorkGroupSize = 295, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, - SpvOpRetainEvent = 297, - SpvOpReleaseEvent = 298, - SpvOpCreateUserEvent = 299, - SpvOpIsValidEvent = 300, - SpvOpSetUserEventStatus = 301, - SpvOpCaptureEventProfilingInfo = 302, - SpvOpGetDefaultQueue = 303, - SpvOpBuildNDRange = 304, - SpvOpImageSparseSampleImplicitLod = 305, - SpvOpImageSparseSampleExplicitLod = 306, - SpvOpImageSparseSampleDrefImplicitLod = 307, - SpvOpImageSparseSampleDrefExplicitLod = 308, - SpvOpImageSparseSampleProjImplicitLod = 309, - SpvOpImageSparseSampleProjExplicitLod = 310, - SpvOpImageSparseSampleProjDrefImplicitLod = 311, - SpvOpImageSparseSampleProjDrefExplicitLod = 312, - SpvOpImageSparseFetch = 313, - SpvOpImageSparseGather = 314, - SpvOpImageSparseDrefGather = 315, - SpvOpImageSparseTexelsResident = 316, - SpvOpNoLine = 317, - SpvOpAtomicFlagTestAndSet = 318, - SpvOpAtomicFlagClear = 319, -} SpvOp; - -#endif // #ifndef spirv_H - diff --git a/src/glsl/nir/spirv/nir_spirv.h b/src/glsl/nir/spirv/nir_spirv.h new file mode 100644 index 00000000000..1f09174ad7f --- /dev/null +++ b/src/glsl/nir/spirv/nir_spirv.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, + const nir_shader_compiler_options *options); + +#ifdef __cplusplus +} +#endif + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/glsl/nir/spirv/spirv.h b/src/glsl/nir/spirv/spirv.h new file mode 100644 index 00000000000..63bcb2f88dd --- /dev/null +++ b/src/glsl/nir/spirv/spirv.h @@ -0,0 +1,870 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 2 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 2; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, +} SpvOp; + +#endif // #ifndef spirv_H + diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c new file mode 100644 index 00000000000..815b447857b --- /dev/null +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -0,0 +1,3952 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" +#include "nir/nir_control_flow.h" + +static struct vtn_ssa_value * +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(b->shader, num_components); + + nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr); + val->def = &undef->def; + } else { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_matrix(type)) { + const struct glsl_type *elem_type = + glsl_vector_type(glsl_get_base_type(type), + glsl_get_vector_elements(type)); + + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else if (glsl_type_is_array(type)) { + const struct glsl_type *elem_type = glsl_get_array_element(type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else { + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = glsl_get_struct_field(type, i); + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type); + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_undef: + return vtn_undef_ssa_value(b, val->type->type); + + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + + case vtn_value_type_deref: + /* This is needed for function parameters */ + return vtn_variable_load(b, val->deref, val->deref_type); + + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count) +{ + return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); +} + +static const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + if (opcode == SpvOpNop) { + w++; + continue; + } + + if (!handler(b, opcode, w, count)) + return w; + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int parent_member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + int member; + if (dec->member < 0) { + member = parent_member; + } else { + assert(parent_member == -1); + member = dec->member; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, member, dec->group, + cb, data); + } else { + cb(b, base_value, member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + int member = -1; + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_undef); + break; + + case SpvOpMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpDecorate: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpGroupDecorate: { + struct vtn_value *group = &b->values[target]; + assert(group->value_type == vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = &b->values[*w]; + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->group = group; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_vector_or_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_ARRAY: + dest->array_element = src->array_element; + dest->stride = src->stride; + break; + + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* matrices */ + dest->row_major = src->row_major; + dest->stride = src->stride; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, + ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + case SpvDecorationMatrixStride: + ctx->type->members[member]->stride = dec->literals[0]; + break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: + val->type->type = glsl_int_type(); + break; + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + const struct glsl_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base)); + val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeRuntimeArray: + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + + unsigned length; + if (opcode == SpvOpTypeRuntimeArray) { + /* A length of 0 is used to denote unsized arrays */ + length = 0; + } else { + length = + vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; + } + + val->type->type = glsl_array_type(array_element->type, length); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + /* TODO: Handle decorators */ + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i].type = val->type->members[i]->type; + fields[i].name = ralloc_asprintf(b, "field%d", i); + fields[i].location = -1; + fields[i].interpolation = 0; + fields[i].centroid = 0; + fields[i].sample = 0; + fields[i].matrix_layout = 2; + } + + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + assert(!multisampled && "FIXME: Handl multi-sampled textures"); + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + * + * TODO: Eventually we should consider adding a "bare sampler" type + * to glsl_types. + */ + val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, + GLSL_TYPE_FLOAT); + break; + + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = ralloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +validate_per_vertex_mode(struct vtn_builder *b, nir_variable_mode mode) +{ + switch (b->shader->stage) { + case MESA_SHADER_VERTEX: + assert(mode == nir_var_shader_out); + break; + case MESA_SHADER_GEOMETRY: + assert(mode == nir_var_shader_out || mode == nir_var_shader_in); + break; + default: + assert(!"Invalid shader stage"); + } +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + *location = SYSTEM_VALUE_NUM_WORK_GROUPS; + set_mode_system_value(mode); + break; + case SpvBuiltInWorkgroupSize: + /* This should already be handled */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationIndex: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInGlobalInvocationId: + *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + assert(val->value_type == vtn_value_type_deref); + assert(val->deref->deref.child == NULL); + assert(val->deref->var == void_var); + + nir_variable *var = void_var; + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonWritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + if (builtin == SpvBuiltInWorkgroupSize) { + /* This shouldn't be a builtin. It's actually a constant. */ + var->data.mode = nir_var_global; + var->data.read_only = true; + + nir_constant *val = ralloc(var, nir_constant); + val->value.u[0] = b->shader->info.cs.local_size[0]; + val->value.u[1] = b->shader->info.cs.local_size[1]; + val->value.u[2] = b->shader->info.cs.local_size[2]; + var->constant_initializer = val; + break; + } + + nir_variable_mode mode = var->data.mode; + vtn_get_builtin_location(b, builtin, &var->data.location, &mode); + var->data.explicit_location = true; + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[dec->literals[0]].out = var; + else + b->builtins[dec->literals[0]].in = var; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + nir_variable_mode mode, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var; + if (mode == nir_var_shader_out) + var = b->builtins[builtin].out; + else + var = b->builtins[builtin].in; + + if (!var) { + int location; + vtn_get_builtin_location(b, builtin, &location, &mode); + + var = nir_variable_create(b->shader, mode, type, "builtin"); + + var->data.location = location; + var->data.explicit_location = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[builtin].out = var; + else + b->builtins[builtin].in = var; + } + + return var; +} + +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + src_deref_tail->child = NULL; + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + dest_deref_tail->child = NULL; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); + store->const_index[0] = (1 << store->num_components) - 1; + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +static nir_ssa_def * +nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *array_index) +{ + if (array_index == NULL) + array_index = nir_imm_int(b, 0); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + instr->const_index[0] = set; + instr->const_index[1] = binding; + instr->const_index[2] = mode; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(b, &instr->instr); + + return &instr->dest.ssa; +} + +static struct vtn_ssa_value * +_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, + unsigned set, unsigned binding, nir_variable_mode mode, + nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type->type; + val->transposed = NULL; + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = glsl_get_vector_elements(type->type); + + switch (op) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: { + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + load->src[0] = nir_src_for_ssa(res_index); + load->src[1] = nir_src_for_ssa(offset); + break; + } + + case nir_intrinsic_load_push_constant: + load->src[0] = nir_src_for_ssa(offset); + break; + + default: + unreachable("Invalid block load intrinsic"); + } + + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_builder_instr_insert(&b->nb, &load->instr); + + if (glsl_get_base_type(type->type) == GLSL_TYPE_BOOL) { + /* Loads of booleans from externally visible memory need to be + * fixed up since they're defined to be zero/nonzero rather than + * NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else { + unsigned elems = glsl_get_length(type->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, + child_offset, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, + child_offset,type->array_element); + } + } + } + + return val; +} + +static void +vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type **type, nir_deref *src_tail, + nir_ssa_def **index, nir_ssa_def **offset) +{ + nir_deref *deref = &src->deref; + + if (deref->child->deref_type == nir_deref_type_array) { + deref = deref->child; + *type = (*type)->array_element; + nir_deref_array *deref_array = nir_deref_as_array(deref); + *index = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + *index = nir_iadd(&b->nb, *index, deref_array->indirect.ssa); + } else { + *index = nir_imm_int(&b->nb, 0); + } + + *offset = nir_imm_int(&b->nb, 0); + while (deref != src_tail) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); + nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + off = nir_iadd(&b->nb, off, deref_array->indirect.ssa); + + off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride)); + *offset = nir_iadd(&b->nb, *offset, off); + + *type = (*type)->array_element; + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + + unsigned elem_off = (*type)->offsets[deref_struct->index]; + *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off)); + + *type = (*type)->members[deref_struct->index]; + break; + } + + default: + unreachable("unknown deref type"); + } + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + nir_ssa_def *index; + nir_ssa_def *offset; + vtn_block_get_offset(b, src, &type, src_tail, &index, &offset); + + nir_intrinsic_op op; + if (src->var->data.mode == nir_var_uniform) { + if (src->var->data.descriptor_set >= 0) { + /* UBO load */ + assert(src->var->data.binding >= 0); + + op = nir_intrinsic_load_ubo; + } else { + /* Push constant load */ + assert(src->var->data.descriptor_set == -1 && + src->var->data.binding == -1); + + op = nir_intrinsic_load_push_constant; + } + } else { + assert(src->var->data.mode == nir_var_shader_storage); + op = nir_intrinsic_load_ssbo; + } + + return _vtn_block_load(b, op, src->var->data.descriptor_set, + src->var->data.binding, src->var->data.mode, + index, offset, type); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static bool +variable_is_external_block(nir_variable *var) +{ + return var->interface_type && + glsl_type_is_struct(var->interface_type) && + (var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage); +} + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type) +{ + nir_deref *src_tail = get_deref_tail(src); + + struct vtn_ssa_value *val; + if (variable_is_external_block(src->var)) + val = vtn_block_load(b, src, src_type, src_tail); + else + val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static void +_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, + struct vtn_ssa_value *src, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *index, + nir_ssa_def *offset, struct vtn_type *type) +{ + assert(src->type == type->type); + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); + store->num_components = glsl_get_vector_elements(type->type); + store->const_index[0] = (1 << store->num_components) - 1; + store->src[0] = nir_src_for_ssa(src->def); + + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + store->src[1] = nir_src_for_ssa(res_index); + store->src[2] = nir_src_for_ssa(offset); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else { + unsigned elems = glsl_get_length(type->type); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_store(b, op, src->elems[i], set, binding, mode, + index, child_offset, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_store(b, op, src->elems[i], set, binding, mode, + index, child_offset, type->array_element); + } + } + } +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *type, + nir_deref *dest_tail) +{ + nir_ssa_def *index; + nir_ssa_def *offset; + vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset); + + nir_intrinsic_op op = nir_intrinsic_store_ssbo; + + return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, + dest->var->data.binding, dest->var->data.mode, + index, offset, type); +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *dest_type) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (variable_is_external_block(dest->var)) { + assert(dest->var->data.mode == nir_var_shader_storage); + vtn_block_store(b, src, dest, dest_type, dest_tail); + } else { + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest, struct vtn_type *type) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child || src->var->interface_type) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src, type); + vtn_variable_store(b, val, dest, type); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +static bool +is_interface_type(struct vtn_type *type) +{ + return type->block || type->buffer_block || + glsl_type_is_sampler(type->type) || + glsl_type_is_image(type->type); +} + +static void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + nir_variable *var = rzalloc(b->shader, nir_variable); + + var->type = type->type; + var->name = ralloc_strdup(var, val->name); + + struct vtn_type *interface_type; + if (is_interface_type(type)) { + interface_type = type; + } else if (glsl_type_is_array(type->type) && + is_interface_type(type->array_element)) { + interface_type = type->array_element; + } else { + interface_type = NULL; + } + + if (interface_type) + var->interface_type = interface_type->type; + + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (interface_type && interface_type->buffer_block) { + var->data.mode = nir_var_shader_storage; + b->shader->info.num_ssbos++; + } else { + /* UBO's and samplers */ + var->data.mode = nir_var_uniform; + var->data.read_only = true; + if (interface_type) { + if (glsl_type_is_image(interface_type->type)) { + b->shader->info.num_images++; + var->data.image.format = interface_type->image_format; + } else if (glsl_type_is_sampler(interface_type->type)) { + b->shader->info.num_textures++; + } else { + assert(glsl_type_is_struct(interface_type->type)); + b->shader->info.num_ubos++; + } + } + } + break; + case SpvStorageClassPushConstant: + assert(interface_type && interface_type->block); + var->data.mode = nir_var_uniform; + var->data.read_only = true; + var->data.descriptor_set = -1; + var->data.binding = -1; + + /* We have exactly one push constant block */ + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(type) * 4; + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivate: + var->data.mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + break; + case SpvStorageClassWorkgroup: + case SpvStorageClassCrossWorkgroup: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + var->constant_initializer = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + } + + val->deref = nir_deref_var_create(b, var); + val->deref_type = type; + + /* We handle decorations first because decorations might give us + * location information. We use the data.explicit_location field to + * note that the location provided is the "final" location. If + * data.explicit_location == false, this means that it's relative to + * whatever the base location is. + */ + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (!var->data.explicit_location) { + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + } + + /* Interface block variables aren't actually going to be referenced + * by the generated NIR, so we don't put them in the list + */ + if (interface_type && glsl_type_is_struct(interface_type->type)) + break; + + if (var->data.mode == nir_var_local) { + nir_function_impl_add_variable(b->impl, var); + } else { + nir_shader_add_variable(b->shader, var); + } + + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + nir_deref_var *base; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_deref); + base = base_val->deref; + } + + nir_deref_var *deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + nir_deref *tail = &deref->deref; + while (tail->child) + tail = tail->child; + + for (unsigned i = 0; i < count - 4; i++) { + assert(w[i + 4] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 4]]; + + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; + } else { + assert(glsl_type_is_vector(tail->type)); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); + } + + deref_arr->deref.type = deref_type->type; + + if (idx_val->value_type == vtn_value_type_constant) { + unsigned idx = idx_val->constant->value.u[0]; + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + assert(glsl_type_is_scalar(idx_val->ssa->type)); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + + if (deref_type->is_builtin) { + /* If we encounter a builtin, we throw away the ress of the + * access chain, jump to the builtin, and keep building. + */ + const struct glsl_type *builtin_type = deref_type->type; + + nir_deref_array *per_vertex_deref = NULL; + if (glsl_type_is_array(base->var->type)) { + /* This builtin is a per-vertex builtin */ + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + assert(base->var->data.mode == nir_var_shader_in); + builtin_type = glsl_array_type(builtin_type, + b->shader->info.gs.vertices_in); + + /* The first non-var deref should be an array deref. */ + assert(deref->deref.child->deref_type == + nir_deref_type_array); + per_vertex_deref = nir_deref_as_array(deref->deref.child); + } + + nir_variable *builtin = get_builtin_variable(b, + base->var->data.mode, + builtin_type, + deref_type->builtin); + deref = nir_deref_var_create(b, builtin); + + if (per_vertex_deref) { + /* Since deref chains start at the variable, we can just + * steal that link and use it. + */ + deref->deref.child = &per_vertex_deref->deref; + per_vertex_deref->deref.child = NULL; + per_vertex_deref->deref.type = + glsl_get_array_element(builtin_type); + + tail = &per_vertex_deref->deref; + } else { + tail = &deref->deref; + } + } else { + tail = tail->child; + } + } + + /* For uniform blocks, we don't resolve the access chain until we + * actually access the variable, so we need to keep around the original + * type of the variable. + */ + if (variable_is_external_block(base->var)) + deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = deref; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + val->deref = deref; + val->deref_type = deref_type; + } + + break; + } + + case SpvOpCopyMemory: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + + vtn_variable_copy(b, src, dest, type); + break; + } + + case SpvOpLoad: { + nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; + struct vtn_type *src_type = + vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (src->var->interface_type && + (glsl_type_is_sampler(src->var->interface_type) || + glsl_type_is_image(src->var->interface_type))) { + vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src, src_type); + break; + } + + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + struct vtn_type *dest_type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest, dest_type); + break; + } + + case SpvOpCopyMemorySized: + case SpvOpArrayLength: + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct nir_function *callee = + vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; + + nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); + for (unsigned i = 0; i < call->num_params; i++) { + unsigned arg_id = w[4 + i]; + struct vtn_value *arg = vtn_untyped_value(b, arg_id); + if (arg->value_type == vtn_value_type_deref) { + call->params[i] = + nir_deref_as_var(nir_copy_deref(call, &arg->deref->deref)); + } else { + struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); + + /* Make a temporary to store the argument in */ + nir_variable *tmp = + nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); + call->params[i] = nir_deref_var_create(call, tmp); + + vtn_variable_store(b, arg_ssa, call->params[i], arg->type); + } + } + + nir_variable *out_tmp = NULL; + if (!glsl_type_is_void(callee->return_type)) { + out_tmp = nir_local_variable_create(b->impl, callee->return_type, + "out_tmp"); + call->return_deref = nir_deref_var_create(call, out_tmp); + } + + nir_builder_instr_insert(&b->nb, &call->instr); + + if (glsl_type_is_void(callee->return_type)) { + vtn_push_value(b, w[2], vtn_value_type_undef); + } else { + struct vtn_type *rettype = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); + retval->ssa = vtn_variable_load(b, call->return_deref, rettype); + } +} + +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_deref)->deref; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_deref); + sampled.image = NULL; + sampled.sampler = sampled_val->deref; + } + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + unsigned coord_components = 0; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + /* These all have an explicit depth value as their next source */ + switch (opcode) { + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); + break; + default: + break; + } + + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + + case SpvOpImageFetch: + texop = nir_texop_txf; + break; + + case SpvOpImageGather: + case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* Now we need to handle some number of optional arguments */ + if (idx < count) { + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txs); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + + const struct glsl_type *sampler_type = + nir_deref_tail(&sampled.sampler->deref)->type; + instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + + switch (glsl_get_sampler_result_type(sampler_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + instr->op = texop; + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + instr->coord_components = coord_components; + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + + instr->sampler = + nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); + if (sampled.image) { + instr->texture = + nir_deref_as_var(nir_copy_deref(instr, &sampled.image->deref)); + } else { + instr->texture = NULL; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageRead: + image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + break; + + case SpvOpImageWrite: + image.deref = vtn_value(b, w[1], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); + intrin->src[0] = nir_src_for_ssa(image.coord); + intrin->src[1] = nir_src_for_ssa(image.sample); + + switch (opcode) { + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + glsl_get_vector_elements(type->type), NULL); + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = &intrin->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static nir_alu_instr * +create_vec(nir_shader *shader, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(shader, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b->shader, + glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + const struct glsl_type *dest_type; + if (src1_columns > 1) { + dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns); + } else { + dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); + } + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b->shader, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + if (opcode == SpvOpVectorTimesMatrix) { + val->ssa = vtn_matrix_multiply(b, vtn_transpose(b, src1), src0); + } else { + val->ssa = vtn_matrix_multiply(b, src0, src1); + } + break; + } + + default: unreachable("unknown matrix opcode"); + } +} + +static void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + /* Collect the various SSA sources */ + const unsigned num_inputs = count - 3; + nir_ssa_def *src[4]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 3])->def; + for (unsigned i = num_inputs; i < 4; i++) + src[i] = NULL; + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + if (src[0]->num_components == 1) { + op = nir_op_imov; + } else { + switch (src[0]->num_components) { + case 2: op = nir_op_bany_inequal2; break; + case 3: op = nir_op_bany_inequal3; break; + case 4: op = nir_op_bany_inequal4; break; + } + src[1] = nir_imm_int(&b->nb, NIR_FALSE); + } + break; + + case SpvOpAll: + if (src[0]->num_components == 1) { + op = nir_op_imov; + } else { + switch (src[0]->num_components) { + case 2: op = nir_op_ball_iequal2; break; + case 3: op = nir_op_ball_iequal3; break; + case 4: op = nir_op_ball_iequal4; break; + } + src[1] = nir_imm_int(&b->nb, NIR_TRUE); + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpLogicalNot: op = nir_op_inot; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); +} + +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } else { + cur = cur->elems[indices[i]]; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + intrin->const_index[0] = w[1]; + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_phi_node_create(b, type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + b->nb.cursor = nir_before_block(block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: + switch ((SpvCapability)w[1]) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + /* All shaders support these */ + break; + case SpvCapabilityGeometry: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + break; + default: + assert(!"Unsupported capability"); + } + break; + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: + assert(b->entry_point == NULL); + b->entry_point = &b->values[w[2]]; + b->execution_model = w[1]; + break; + + case SpvOpExecutionMode: + assert(b->entry_point == &b->values[w[1]]); + + SpvExecutionMode mode = w[2]; + switch(mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = (mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = MAX2(1, w[3]); + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = w[3]; + b->shader->info.cs.local_size[1] = w[4]; + b->shader->info.cs.local_size[2] = w[5]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = w[3]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + break; /* OpenCL */ + } + break; + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpLine: + break; /* Ignored for now */ + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + val->func = b->func; + + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + func->num_params = glsl_get_length(func_type); + func->params = ralloc_array(b->shader, nir_parameter, func->num_params); + for (unsigned i = 0; i < func->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + func->params[i].type = param->type; + if (param->in) { + if (param->out) { + func->params[i].param_type = nir_parameter_inout; + } else { + func->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + func->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + func->return_type = glsl_get_function_return_type(func_type); + + b->func->impl = nir_function_impl_create(func); + if (!glsl_type_is_void(func->return_type)) { + b->func->impl->return_var = + nir_local_variable_create(b->func->impl, func->return_type, "ret"); + } + + b->func_param_idx = 0; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + assert(b->func_param_idx < b->func->impl->num_params); + unsigned idx = b->func_param_idx++; + + nir_variable *param = + nir_local_variable_create(b->func->impl, + b->func->impl->function->params[idx].type, + val->name); + + b->func->impl->params[idx] = param; + val->deref = nir_deref_var_create(b, param); + val->deref_type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block); + b->block->branch = w; + b->block = NULL; + break; + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge_op == SpvOpNop); + b->block->merge_op = opcode; + b->block->merge_block_id = w[1]; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: { + struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; + assert(block->block == NULL); + + block->block = nir_cursor_current_block(b->nb.cursor); + break; + } + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpSampledImage: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(!"Atomic buffers not yet implemented"); + } + } + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalOr: + case SpvOpLogicalAnd: + case SpvOpLogicalNot: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_matrix_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +static void +vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, + struct vtn_block *break_block, struct vtn_block *cont_block, + struct vtn_block *end_block) +{ + struct vtn_block *block = start; + while (block != end_block) { + if (block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + /* Reset the merge_op to prerevent infinite recursion */ + block->merge_op = SpvOpNop; + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + block = new_break_block; + continue; + } + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + + b->block = block; + vtn_foreach_instruction(b, block->label, block->branch, + vtn_handle_body_instruction); + + nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); + assert(cur_block == block->block); + _mesa_hash_table_insert(b->block_table, cur_block, block); + + switch (branch_op) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, w[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ + return; + } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ + block = branch_block; + continue; + } + } + + case SpvOpBranchConditional: { + /* Gather up the branch blocks */ + struct vtn_block *then_block = + vtn_value(b, w[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, w[3], vtn_value_type_block)->block; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + if (then_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else if (then_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else { + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ + assert(block->merge_op == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + block = merge_block; + continue; + } + + /* If we got here then we inserted a predicated break or continue + * above and we need to handle the other case. We already set + * `block` above to indicate what block to visit after the + * predicated break. + */ + + /* It's possible that the other branch is also a break/continue. + * If it is, we handle that here. + */ + if (block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } + + /* If we got here then there was a predicated break/continue but + * the other half of the if has stuff in it. `block` was already + * set above so there is nothing left for us to do. + */ + continue; + } + + case SpvOpReturn: { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpReturnValue: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[1]); + vtn_variable_store(b, src, + nir_deref_var_create(b, b->impl->return_var), + NULL); + + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpKill: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + return; + } + + case SpvOpSwitch: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + +nir_shader * +spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] >= 0x10000); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + nir_shader *shader = nir_shader_create(NULL, stage, options); + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->shader = shader; + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + + /* XXX: We shouldn't need these defaults */ + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = 3; + b->shader->info.gs.output_primitive = 4; /* GL_TRIANGLES */ + } + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + /* Do a very quick CFG analysis pass */ + vtn_foreach_instruction(b, words, word_end, + vtn_handle_first_cfg_pass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = func->impl; + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + nir_builder_init(&b->nb, b->impl); + b->nb.cursor = nir_after_cf_list(&b->impl->body); + vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + } + + /* Because we can still have output reads in NIR, we need to lower + * outputs to temporaries before we are truely finished. + */ + nir_lower_outputs_to_temporaries(shader); + + ralloc_free(b); + + return shader; +} diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c new file mode 100644 index 00000000000..551f0540496 --- /dev/null +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -0,0 +1,180 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" +#include "GLSL.std.450.h" + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5])->def; + + nir_op op; + switch (entrypoint) { + case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */ + case GLSLstd450RoundEven: op = nir_op_fround_even; break; + case GLSLstd450Trunc: op = nir_op_ftrunc; break; + case GLSLstd450FAbs: op = nir_op_fabs; break; + case GLSLstd450FSign: op = nir_op_fsign; break; + case GLSLstd450Floor: op = nir_op_ffloor; break; + case GLSLstd450Ceil: op = nir_op_fceil; break; + case GLSLstd450Fract: op = nir_op_ffract; break; + case GLSLstd450Radians: + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + return; + case GLSLstd450Degrees: + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + return; + case GLSLstd450Sin: op = nir_op_fsin; break; + case GLSLstd450Cos: op = nir_op_fcos; break; + case GLSLstd450Tan: + val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); + return; + case GLSLstd450Pow: op = nir_op_fpow; break; + case GLSLstd450Exp2: op = nir_op_fexp2; break; + case GLSLstd450Log2: op = nir_op_flog2; break; + case GLSLstd450Sqrt: op = nir_op_fsqrt; break; + case GLSLstd450InverseSqrt: op = nir_op_frsq; break; + + case GLSLstd450Modf: op = nir_op_fmod; break; + case GLSLstd450FMin: op = nir_op_fmin; break; + case GLSLstd450UMin: op = nir_op_umin; break; + case GLSLstd450SMin: op = nir_op_imin; break; + case GLSLstd450FMax: op = nir_op_fmax; break; + case GLSLstd450UMax: op = nir_op_umax; break; + case GLSLstd450SMax: op = nir_op_imax; break; + case GLSLstd450FMix: op = nir_op_flrp; break; + case GLSLstd450Step: + val->ssa->def = nir_sge(&b->nb, src[1], src[0]); + return; + + case GLSLstd450Fma: op = nir_op_ffma; break; + case GLSLstd450Ldexp: op = nir_op_ldexp; break; + + /* Packing/Unpacking functions */ + case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; + case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; + case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; + case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; + case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break; + case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; + case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; + case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; + case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; + case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; + + case GLSLstd450Length: + val->ssa->def = build_length(&b->nb, src[0]); + return; + case GLSLstd450Distance: + val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + return; + case GLSLstd450Normalize: + val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + return; + + case GLSLstd450Exp: + case GLSLstd450Log: + case GLSLstd450FClamp: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450Asin: + case GLSLstd450Acos: + case GLSLstd450Atan: + case GLSLstd450Atan2: + case GLSLstd450Sinh: + case GLSLstd450Cosh: + case GLSLstd450Tanh: + case GLSLstd450Asinh: + case GLSLstd450Acosh: + case GLSLstd450Atanh: + case GLSLstd450SmoothStep: + case GLSLstd450Frexp: + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackDouble2x32: + case GLSLstd450Cross: + case GLSLstd450FaceForward: + case GLSLstd450Reflect: + case GLSLstd450Refract: + case GLSLstd450IMix: + default: + unreachable("Unhandled opcode"); + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->ssa->type), val->name); + instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1; + val->ssa->def = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count) +{ + switch ((enum GLSLstd450)ext_opcode) { + case GLSLstd450Determinant: + case GLSLstd450MatrixInverse: + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, words, count); + } + + return true; +} diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h new file mode 100644 index 00000000000..2fea244bb74 --- /dev/null +++ b/src/glsl/nir/spirv/vtn_private.h @@ -0,0 +1,253 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir_spirv.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_deref, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, + vtn_value_type_extension, + vtn_value_type_image_pointer, + vtn_value_type_sampled_image, +}; + +struct vtn_block { + /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */ + SpvOp merge_op; + uint32_t merge_block_id; + const uint32_t *label; + const uint32_t *branch; + nir_block *block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_impl *impl; + struct vtn_block *start_block; + + const uint32_t *end; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, if this is non-NULL, then this value is actually the + * transpose of some other value. The value that `transposed` points to + * always dominates this value. + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + +struct vtn_type { + const struct glsl_type *type; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + + /* Image format for image_load_store type images */ + unsigned image_format; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + +struct vtn_image_pointer { + nir_deref_var *deref; + nir_ssa_def *coord; + nir_ssa_def *sample; +}; + +struct vtn_sampled_image { + nir_deref_var *image; /* Image or array of images */ + nir_deref_var *sampler; /* Sampler */ +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + struct vtn_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; + struct { + nir_deref_var *deref; + struct vtn_type *deref_type; + }; + struct vtn_image_pointer *image; + struct vtn_sampled_image *sampled_image; + struct vtn_function *func; + struct vtn_block *block; + struct vtn_ssa_value *ssa; + vtn_instruction_handler ext_handler; + }; +}; + +struct vtn_decoration { + struct vtn_decoration *next; + int member; /* -1 if not a member decoration */ + const uint32_t *literals; + struct vtn_value *group; + SpvDecoration decoration; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from nir_block to the vtn_block which ends with it -- used for + * handling phi nodes. + */ + struct hash_table *block_table; + + /* + * NIR variable for each SPIR-V builtin. + */ + struct { + nir_variable *in; + nir_variable *out; + } builtins[42]; /* XXX need symbolic constant from SPIR-V header */ + + unsigned value_id_bound; + struct vtn_value *values; + + SpvExecutionModel execution_model; + bool origin_upper_left; + struct vtn_value *entry_point; + + struct vtn_function *func; + struct exec_list functions; + + /* Current function parameter index */ + unsigned func_param_idx; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + int member, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c index e06e82595a2..6cf891517c7 100644 --- a/src/glsl/nir/spirv2nir.c +++ b/src/glsl/nir/spirv2nir.c @@ -31,7 +31,7 @@ * spirv_to_nir code. */ -#include "nir_spirv.h" +#include "spirv/nir_spirv.h" #include #include diff --git a/src/glsl/nir/spirv_glsl450.h b/src/glsl/nir/spirv_glsl450.h deleted file mode 100644 index d1c9b5c1d44..00000000000 --- a/src/glsl/nir/spirv_glsl450.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -** Copyright (c) 2014-2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -const int GLSLstd450Version = 99; -const int GLSLstd450Revision = 3; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c deleted file mode 100644 index ee1fca34c31..00000000000 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "spirv_to_nir_private.h" -#include "spirv_glsl450.h" - -static nir_ssa_def* -build_length(nir_builder *b, nir_ssa_def *vec) -{ - switch (vec->num_components) { - case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); - case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); - case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); - case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); - default: - unreachable("Invalid number of components"); - } -} - -static void -handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - - /* Collect the various SSA sources */ - unsigned num_inputs = count - 5; - nir_ssa_def *src[3]; - for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5])->def; - - nir_op op; - switch (entrypoint) { - case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */ - case GLSLstd450RoundEven: op = nir_op_fround_even; break; - case GLSLstd450Trunc: op = nir_op_ftrunc; break; - case GLSLstd450FAbs: op = nir_op_fabs; break; - case GLSLstd450FSign: op = nir_op_fsign; break; - case GLSLstd450Floor: op = nir_op_ffloor; break; - case GLSLstd450Ceil: op = nir_op_fceil; break; - case GLSLstd450Fract: op = nir_op_ffract; break; - case GLSLstd450Radians: - val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); - return; - case GLSLstd450Degrees: - val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); - return; - case GLSLstd450Sin: op = nir_op_fsin; break; - case GLSLstd450Cos: op = nir_op_fcos; break; - case GLSLstd450Tan: - val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), - nir_fcos(&b->nb, src[0])); - return; - case GLSLstd450Pow: op = nir_op_fpow; break; - case GLSLstd450Exp2: op = nir_op_fexp2; break; - case GLSLstd450Log2: op = nir_op_flog2; break; - case GLSLstd450Sqrt: op = nir_op_fsqrt; break; - case GLSLstd450InverseSqrt: op = nir_op_frsq; break; - - case GLSLstd450Modf: op = nir_op_fmod; break; - case GLSLstd450FMin: op = nir_op_fmin; break; - case GLSLstd450UMin: op = nir_op_umin; break; - case GLSLstd450SMin: op = nir_op_imin; break; - case GLSLstd450FMax: op = nir_op_fmax; break; - case GLSLstd450UMax: op = nir_op_umax; break; - case GLSLstd450SMax: op = nir_op_imax; break; - case GLSLstd450FMix: op = nir_op_flrp; break; - case GLSLstd450Step: - val->ssa->def = nir_sge(&b->nb, src[1], src[0]); - return; - - case GLSLstd450Fma: op = nir_op_ffma; break; - case GLSLstd450Ldexp: op = nir_op_ldexp; break; - - /* Packing/Unpacking functions */ - case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; - case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; - case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; - case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; - case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break; - case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; - case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; - case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; - case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; - case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; - - case GLSLstd450Length: - val->ssa->def = build_length(&b->nb, src[0]); - return; - case GLSLstd450Distance: - val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); - return; - case GLSLstd450Normalize: - val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); - return; - - case GLSLstd450Exp: - case GLSLstd450Log: - case GLSLstd450FClamp: - case GLSLstd450UClamp: - case GLSLstd450SClamp: - case GLSLstd450Asin: - case GLSLstd450Acos: - case GLSLstd450Atan: - case GLSLstd450Atan2: - case GLSLstd450Sinh: - case GLSLstd450Cosh: - case GLSLstd450Tanh: - case GLSLstd450Asinh: - case GLSLstd450Acosh: - case GLSLstd450Atanh: - case GLSLstd450SmoothStep: - case GLSLstd450Frexp: - case GLSLstd450PackDouble2x32: - case GLSLstd450UnpackDouble2x32: - case GLSLstd450Cross: - case GLSLstd450FaceForward: - case GLSLstd450Reflect: - case GLSLstd450Refract: - case GLSLstd450IMix: - default: - unreachable("Unhandled opcode"); - } - - nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); - nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(val->ssa->type), val->name); - instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1; - val->ssa->def = &instr->dest.dest.ssa; - - for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) - instr->src[i].src = nir_src_for_ssa(src[i]); - - nir_builder_instr_insert(&b->nb, &instr->instr); -} - -bool -vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *words, unsigned count) -{ - switch ((enum GLSLstd450)ext_opcode) { - case GLSLstd450Determinant: - case GLSLstd450MatrixInverse: - case GLSLstd450InterpolateAtCentroid: - case GLSLstd450InterpolateAtSample: - case GLSLstd450InterpolateAtOffset: - unreachable("Unhandled opcode"); - - default: - handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, words, count); - } - - return true; -} diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c deleted file mode 100644 index ad7a97cc948..00000000000 --- a/src/glsl/nir/spirv_to_nir.c +++ /dev/null @@ -1,3952 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "spirv_to_nir_private.h" -#include "nir_vla.h" -#include "nir_control_flow.h" - -static struct vtn_ssa_value * -vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (glsl_type_is_vector_or_scalar(type)) { - unsigned num_components = glsl_get_vector_elements(val->type); - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(b->shader, num_components); - - nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr); - val->def = &undef->def; - } else { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - if (glsl_type_is_matrix(type)) { - const struct glsl_type *elem_type = - glsl_vector_type(glsl_get_base_type(type), - glsl_get_vector_elements(type)); - - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } else if (glsl_type_is_array(type)) { - const struct glsl_type *elem_type = glsl_get_array_element(type); - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } else { - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *elem_type = glsl_get_struct_field(type, i); - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } - } - } - - return val; -} - -static struct vtn_ssa_value * -vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, - const struct glsl_type *type) -{ - struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); - - if (entry) - return entry->data; - - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - if (glsl_type_is_vector_or_scalar(type)) { - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components); - - for (unsigned i = 0; i < num_components; i++) - load->value.u[i] = constant->value.u[i]; - - nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); - val->def = &load->def; - } else { - assert(glsl_type_is_matrix(type)); - unsigned rows = glsl_get_vector_elements(val->type); - unsigned columns = glsl_get_matrix_columns(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); - - for (unsigned i = 0; i < columns; i++) { - struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); - col_val->type = glsl_get_column_type(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, rows); - - for (unsigned j = 0; j < rows; j++) - load->value.u[j] = constant->value.u[rows * i + j]; - - nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); - col_val->def = &load->def; - - val->elems[i] = col_val; - } - } - break; - - case GLSL_TYPE_ARRAY: { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - const struct glsl_type *elem_type = glsl_get_array_element(val->type); - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], - elem_type); - break; - } - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *elem_type = - glsl_get_struct_field(val->type, i); - val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], - elem_type); - } - break; - } - - default: - unreachable("bad constant type"); - } - - return val; -} - -static struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *src_type); - -struct vtn_ssa_value * -vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - switch (val->value_type) { - case vtn_value_type_undef: - return vtn_undef_ssa_value(b, val->type->type); - - case vtn_value_type_constant: - return vtn_const_ssa_value(b, val->constant, val->const_type); - - case vtn_value_type_ssa: - return val->ssa; - - case vtn_value_type_deref: - /* This is needed for function parameters */ - return vtn_variable_load(b, val->deref, val->deref_type); - - default: - unreachable("Invalid type for an SSA value"); - } -} - -static char * -vtn_string_literal(struct vtn_builder *b, const uint32_t *words, - unsigned word_count) -{ - return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); -} - -static const uint32_t * -vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, - const uint32_t *end, vtn_instruction_handler handler) -{ - const uint32_t *w = start; - while (w < end) { - SpvOp opcode = w[0] & SpvOpCodeMask; - unsigned count = w[0] >> SpvWordCountShift; - assert(count >= 1 && w + count <= end); - - if (opcode == SpvOpNop) { - w++; - continue; - } - - if (!handler(b, opcode, w, count)) - return w; - - w += count; - } - assert(w == end); - return w; -} - -static void -vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpExtInstImport: { - struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); - if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { - val->ext_handler = vtn_handle_glsl450_instruction; - } else { - assert(!"Unsupported extension"); - } - break; - } - - case SpvOpExtInst: { - struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); - bool handled = val->ext_handler(b, w[4], w, count); - (void)handled; - assert(handled); - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -static void -_foreach_decoration_helper(struct vtn_builder *b, - struct vtn_value *base_value, - int parent_member, - struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data) -{ - for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { - int member; - if (dec->member < 0) { - member = parent_member; - } else { - assert(parent_member == -1); - member = dec->member; - } - - if (dec->group) { - assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, member, dec->group, - cb, data); - } else { - cb(b, base_value, member, dec, data); - } - } -} - -/** Iterates (recursively if needed) over all of the decorations on a value - * - * This function iterates over all of the decorations applied to a given - * value. If it encounters a decoration group, it recurses into the group - * and iterates over all of those decorations as well. - */ -void -vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data) -{ - _foreach_decoration_helper(b, value, -1, value, cb, data); -} - -static void -vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - const uint32_t *w_end = w + count; - const uint32_t target = w[1]; - w += 2; - - int member = -1; - switch (opcode) { - case SpvOpDecorationGroup: - vtn_push_value(b, target, vtn_value_type_undef); - break; - - case SpvOpMemberDecorate: - member = *(w++); - /* fallthrough */ - case SpvOpDecorate: { - struct vtn_value *val = &b->values[target]; - - struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->member = member; - dec->decoration = *(w++); - dec->literals = w; - - /* Link into the list */ - dec->next = val->decoration; - val->decoration = dec; - break; - } - - case SpvOpGroupMemberDecorate: - member = *(w++); - /* fallthrough */ - case SpvOpGroupDecorate: { - struct vtn_value *group = &b->values[target]; - assert(group->value_type == vtn_value_type_decoration_group); - - for (; w < w_end; w++) { - struct vtn_value *val = &b->values[*w]; - struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->member = member; - dec->group = group; - - /* Link into the list */ - dec->next = val->decoration; - val->decoration = dec; - } - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -struct member_decoration_ctx { - struct glsl_struct_field *fields; - struct vtn_type *type; -}; - -/* does a shallow copy of a vtn_type */ - -static struct vtn_type * -vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) -{ - struct vtn_type *dest = ralloc(b, struct vtn_type); - dest->type = src->type; - dest->is_builtin = src->is_builtin; - if (src->is_builtin) - dest->builtin = src->builtin; - - if (!glsl_type_is_vector_or_scalar(src->type)) { - switch (glsl_get_base_type(src->type)) { - case GLSL_TYPE_ARRAY: - dest->array_element = src->array_element; - dest->stride = src->stride; - break; - - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - /* matrices */ - dest->row_major = src->row_major; - dest->stride = src->stride; - break; - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(src->type); - - dest->members = ralloc_array(b, struct vtn_type *, elems); - memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); - - dest->offsets = ralloc_array(b, unsigned, elems); - memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); - break; - } - - default: - unreachable("unhandled type"); - } - } - - return dest; -} - -static void -struct_member_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_ctx) -{ - struct member_decoration_ctx *ctx = void_ctx; - - if (member < 0) - return; - - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - ctx->fields[member].centroid = true; - break; - case SpvDecorationSample: - ctx->fields[member].sample = true; - break; - case SpvDecorationLocation: - ctx->fields[member].location = dec->literals[0]; - break; - case SpvDecorationBuiltIn: - ctx->type->members[member] = vtn_type_copy(b, - ctx->type->members[member]); - ctx->type->members[member]->is_builtin = true; - ctx->type->members[member]->builtin = dec->literals[0]; - ctx->type->builtin_block = true; - break; - case SpvDecorationOffset: - ctx->type->offsets[member] = dec->literals[0]; - break; - case SpvDecorationMatrixStride: - ctx->type->members[member]->stride = dec->literals[0]; - break; - case SpvDecorationColMajor: - break; /* Nothing to do here. Column-major is the default. */ - default: - unreachable("Unhandled member decoration"); - } -} - -static void -type_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *ctx) -{ - struct vtn_type *type = val->type; - - if (member != -1) - return; - - switch (dec->decoration) { - case SpvDecorationArrayStride: - type->stride = dec->literals[0]; - break; - case SpvDecorationBlock: - type->block = true; - break; - case SpvDecorationBufferBlock: - type->buffer_block = true; - break; - case SpvDecorationGLSLShared: - case SpvDecorationGLSLPacked: - /* Ignore these, since we get explicit offsets anyways */ - break; - - case SpvDecorationStream: - assert(dec->literals[0] == 0); - break; - - default: - unreachable("Unhandled type decoration"); - } -} - -static unsigned -translate_image_format(SpvImageFormat format) -{ - switch (format) { - case SpvImageFormatUnknown: return 0; /* GL_NONE */ - case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ - case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ - case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ - case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ - case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ - case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ - case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ - case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ - case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ - case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ - case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ - case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ - case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ - case SpvImageFormatR16: return 0x822A; /* GL_R16 */ - case SpvImageFormatR8: return 0x8229; /* GL_R8 */ - case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ - case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ - case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ - case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ - case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ - case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ - case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ - case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ - case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ - case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ - case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ - case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ - case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ - case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ - case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ - case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ - case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ - case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ - case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ - case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ - case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ - case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ - case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ - case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ - default: - assert(!"Invalid image format"); - return 0; - } -} - -static void -vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); - - val->type = rzalloc(b, struct vtn_type); - val->type->is_builtin = false; - - switch (opcode) { - case SpvOpTypeVoid: - val->type->type = glsl_void_type(); - break; - case SpvOpTypeBool: - val->type->type = glsl_bool_type(); - break; - case SpvOpTypeInt: - val->type->type = glsl_int_type(); - break; - case SpvOpTypeFloat: - val->type->type = glsl_float_type(); - break; - - case SpvOpTypeVector: { - const struct glsl_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - unsigned elems = w[3]; - - assert(glsl_type_is_scalar(base)); - val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); - break; - } - - case SpvOpTypeMatrix: { - struct vtn_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type; - unsigned columns = w[3]; - - assert(glsl_type_is_vector(base->type)); - val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), - glsl_get_vector_elements(base->type), - columns); - val->type->array_element = base; - val->type->row_major = false; - val->type->stride = 0; - break; - } - - case SpvOpTypeRuntimeArray: - case SpvOpTypeArray: { - struct vtn_type *array_element = - vtn_value(b, w[2], vtn_value_type_type)->type; - - unsigned length; - if (opcode == SpvOpTypeRuntimeArray) { - /* A length of 0 is used to denote unsized arrays */ - length = 0; - } else { - length = - vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; - } - - val->type->type = glsl_array_type(array_element->type, length); - val->type->array_element = array_element; - val->type->stride = 0; - break; - } - - case SpvOpTypeStruct: { - unsigned num_fields = count - 2; - val->type->members = ralloc_array(b, struct vtn_type *, num_fields); - val->type->offsets = ralloc_array(b, unsigned, num_fields); - - NIR_VLA(struct glsl_struct_field, fields, count); - for (unsigned i = 0; i < num_fields; i++) { - /* TODO: Handle decorators */ - val->type->members[i] = - vtn_value(b, w[i + 2], vtn_value_type_type)->type; - fields[i].type = val->type->members[i]->type; - fields[i].name = ralloc_asprintf(b, "field%d", i); - fields[i].location = -1; - fields[i].interpolation = 0; - fields[i].centroid = 0; - fields[i].sample = 0; - fields[i].matrix_layout = 2; - } - - struct member_decoration_ctx ctx = { - .fields = fields, - .type = val->type - }; - - vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); - - const char *name = val->name ? val->name : "struct"; - - val->type->type = glsl_struct_type(fields, num_fields, name); - break; - } - - case SpvOpTypeFunction: { - const struct glsl_type *return_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - NIR_VLA(struct glsl_function_param, params, count - 3); - for (unsigned i = 0; i < count - 3; i++) { - params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; - - /* FIXME: */ - params[i].in = true; - params[i].out = true; - } - val->type->type = glsl_function_type(return_type, params, count - 3); - break; - } - - case SpvOpTypePointer: - /* FIXME: For now, we'll just do the really lame thing and return - * the same type. The validator should ensure that the proper number - * of dereferences happen - */ - val->type = vtn_value(b, w[3], vtn_value_type_type)->type; - break; - - case SpvOpTypeImage: { - const struct glsl_type *sampled_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - - assert(glsl_type_is_vector_or_scalar(sampled_type)); - - enum glsl_sampler_dim dim; - switch ((SpvDim)w[3]) { - case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; - case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; - case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; - case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; - case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; - case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; - default: - unreachable("Invalid SPIR-V Sampler dimension"); - } - - bool is_shadow = w[4]; - bool is_array = w[5]; - bool multisampled = w[6]; - unsigned sampled = w[7]; - SpvImageFormat format = w[8]; - - assert(!multisampled && "FIXME: Handl multi-sampled textures"); - - val->type->image_format = translate_image_format(format); - - if (sampled == 1) { - val->type->type = glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); - } else if (sampled == 2) { - assert(format); - assert(!is_shadow); - val->type->type = glsl_image_type(dim, is_array, - glsl_get_base_type(sampled_type)); - } else { - assert(!"We need to know if the image will be sampled"); - } - break; - } - - case SpvOpTypeSampledImage: - val->type = vtn_value(b, w[2], vtn_value_type_type)->type; - break; - - case SpvOpTypeSampler: - /* The actual sampler type here doesn't really matter. It gets - * thrown away the moment you combine it with an image. What really - * matters is that it's a sampler type as opposed to an integer type - * so the backend knows what to do. - * - * TODO: Eventually we should consider adding a "bare sampler" type - * to glsl_types. - */ - val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, - GLSL_TYPE_FLOAT); - break; - - case SpvOpTypeOpaque: - case SpvOpTypeEvent: - case SpvOpTypeDeviceEvent: - case SpvOpTypeReserveId: - case SpvOpTypeQueue: - case SpvOpTypePipe: - default: - unreachable("Unhandled opcode"); - } - - vtn_foreach_decoration(b, val, type_decoration_cb, NULL); -} - -static void -vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); - val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->constant = ralloc(b, nir_constant); - switch (opcode) { - case SpvOpConstantTrue: - assert(val->const_type == glsl_bool_type()); - val->constant->value.u[0] = NIR_TRUE; - break; - case SpvOpConstantFalse: - assert(val->const_type == glsl_bool_type()); - val->constant->value.u[0] = NIR_FALSE; - break; - case SpvOpConstant: - assert(glsl_type_is_scalar(val->const_type)); - val->constant->value.u[0] = w[3]; - break; - case SpvOpConstantComposite: { - unsigned elem_count = count - 3; - nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); - for (unsigned i = 0; i < elem_count; i++) - elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - - switch (glsl_get_base_type(val->const_type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(val->const_type)) { - unsigned rows = glsl_get_vector_elements(val->const_type); - assert(glsl_get_matrix_columns(val->const_type) == elem_count); - for (unsigned i = 0; i < elem_count; i++) - for (unsigned j = 0; j < rows; j++) - val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; - } else { - assert(glsl_type_is_vector(val->const_type)); - assert(glsl_get_vector_elements(val->const_type) == elem_count); - for (unsigned i = 0; i < elem_count; i++) - val->constant->value.u[i] = elems[i]->value.u[0]; - } - ralloc_free(elems); - break; - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - ralloc_steal(val->constant, elems); - val->constant->elements = elems; - break; - - default: - unreachable("Unsupported type for constants"); - } - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -static void -set_mode_system_value(nir_variable_mode *mode) -{ - assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); - *mode = nir_var_system_value; -} - -static void -validate_per_vertex_mode(struct vtn_builder *b, nir_variable_mode mode) -{ - switch (b->shader->stage) { - case MESA_SHADER_VERTEX: - assert(mode == nir_var_shader_out); - break; - case MESA_SHADER_GEOMETRY: - assert(mode == nir_var_shader_out || mode == nir_var_shader_in); - break; - default: - assert(!"Invalid shader stage"); - } -} - -static void -vtn_get_builtin_location(struct vtn_builder *b, - SpvBuiltIn builtin, int *location, - nir_variable_mode *mode) -{ - switch (builtin) { - case SpvBuiltInPosition: - *location = VARYING_SLOT_POS; - validate_per_vertex_mode(b, *mode); - break; - case SpvBuiltInPointSize: - *location = VARYING_SLOT_PSIZ; - validate_per_vertex_mode(b, *mode); - break; - case SpvBuiltInClipDistance: - *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ - validate_per_vertex_mode(b, *mode); - break; - case SpvBuiltInCullDistance: - /* XXX figure this out */ - unreachable("unhandled builtin"); - case SpvBuiltInVertexId: - /* Vulkan defines VertexID to be zero-based and reserves the new - * builtin keyword VertexIndex to indicate the non-zero-based value. - */ - *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - set_mode_system_value(mode); - break; - case SpvBuiltInInstanceId: - *location = SYSTEM_VALUE_INSTANCE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInPrimitiveId: - *location = VARYING_SLOT_PRIMITIVE_ID; - *mode = nir_var_shader_out; - break; - case SpvBuiltInInvocationId: - *location = SYSTEM_VALUE_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLayer: - *location = VARYING_SLOT_LAYER; - *mode = nir_var_shader_out; - break; - case SpvBuiltInTessLevelOuter: - case SpvBuiltInTessLevelInner: - case SpvBuiltInTessCoord: - case SpvBuiltInPatchVertices: - unreachable("no tessellation support"); - case SpvBuiltInFragCoord: - *location = VARYING_SLOT_POS; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInPointCoord: - *location = VARYING_SLOT_PNTC; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInFrontFacing: - *location = VARYING_SLOT_FACE; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInSampleId: - *location = SYSTEM_VALUE_SAMPLE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInSamplePosition: - *location = SYSTEM_VALUE_SAMPLE_POS; - set_mode_system_value(mode); - break; - case SpvBuiltInSampleMask: - *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ - set_mode_system_value(mode); - break; - case SpvBuiltInFragDepth: - *location = FRAG_RESULT_DEPTH; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - assert(*mode == nir_var_shader_out); - break; - case SpvBuiltInNumWorkgroups: - *location = SYSTEM_VALUE_NUM_WORK_GROUPS; - set_mode_system_value(mode); - break; - case SpvBuiltInWorkgroupSize: - /* This should already be handled */ - unreachable("unsupported builtin"); - break; - case SpvBuiltInWorkgroupId: - *location = SYSTEM_VALUE_WORK_GROUP_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationId: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationIndex: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; - set_mode_system_value(mode); - break; - case SpvBuiltInGlobalInvocationId: - *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInHelperInvocation: - default: - unreachable("unsupported builtin"); - } -} - -static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_var) -{ - assert(val->value_type == vtn_value_type_deref); - assert(val->deref->deref.child == NULL); - assert(val->deref->var == void_var); - - nir_variable *var = void_var; - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - var->data.interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - var->data.centroid = true; - break; - case SpvDecorationSample: - var->data.sample = true; - break; - case SpvDecorationInvariant: - var->data.invariant = true; - break; - case SpvDecorationConstant: - assert(var->constant_initializer != NULL); - var->data.read_only = true; - break; - case SpvDecorationNonWritable: - var->data.read_only = true; - break; - case SpvDecorationLocation: - var->data.location = dec->literals[0]; - break; - case SpvDecorationComponent: - var->data.location_frac = dec->literals[0]; - break; - case SpvDecorationIndex: - var->data.explicit_index = true; - var->data.index = dec->literals[0]; - break; - case SpvDecorationBinding: - var->data.explicit_binding = true; - var->data.binding = dec->literals[0]; - break; - case SpvDecorationDescriptorSet: - var->data.descriptor_set = dec->literals[0]; - break; - case SpvDecorationBuiltIn: { - SpvBuiltIn builtin = dec->literals[0]; - - if (builtin == SpvBuiltInWorkgroupSize) { - /* This shouldn't be a builtin. It's actually a constant. */ - var->data.mode = nir_var_global; - var->data.read_only = true; - - nir_constant *val = ralloc(var, nir_constant); - val->value.u[0] = b->shader->info.cs.local_size[0]; - val->value.u[1] = b->shader->info.cs.local_size[1]; - val->value.u[2] = b->shader->info.cs.local_size[2]; - var->constant_initializer = val; - break; - } - - nir_variable_mode mode = var->data.mode; - vtn_get_builtin_location(b, builtin, &var->data.location, &mode); - var->data.explicit_location = true; - var->data.mode = mode; - if (mode == nir_var_shader_in || mode == nir_var_system_value) - var->data.read_only = true; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - var->data.origin_upper_left = b->origin_upper_left; - - if (mode == nir_var_shader_out) - b->builtins[dec->literals[0]].out = var; - else - b->builtins[dec->literals[0]].in = var; - break; - } - case SpvDecorationRowMajor: - case SpvDecorationColMajor: - case SpvDecorationGLSLShared: - case SpvDecorationPatch: - case SpvDecorationRestrict: - case SpvDecorationAliased: - case SpvDecorationVolatile: - case SpvDecorationCoherent: - case SpvDecorationNonReadable: - case SpvDecorationUniform: - /* This is really nice but we have no use for it right now. */ - case SpvDecorationCPacked: - case SpvDecorationSaturatedConversion: - case SpvDecorationStream: - case SpvDecorationOffset: - case SpvDecorationXfbBuffer: - case SpvDecorationFuncParamAttr: - case SpvDecorationFPRoundingMode: - case SpvDecorationFPFastMathMode: - case SpvDecorationLinkageAttributes: - case SpvDecorationSpecId: - break; - default: - unreachable("Unhandled variable decoration"); - } -} - -static nir_variable * -get_builtin_variable(struct vtn_builder *b, - nir_variable_mode mode, - const struct glsl_type *type, - SpvBuiltIn builtin) -{ - nir_variable *var; - if (mode == nir_var_shader_out) - var = b->builtins[builtin].out; - else - var = b->builtins[builtin].in; - - if (!var) { - int location; - vtn_get_builtin_location(b, builtin, &location, &mode); - - var = nir_variable_create(b->shader, mode, type, "builtin"); - - var->data.location = location; - var->data.explicit_location = true; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - var->data.origin_upper_left = b->origin_upper_left; - - if (mode == nir_var_shader_out) - b->builtins[builtin].out = var; - else - b->builtins[builtin].in = var; - } - - return var; -} - -static struct vtn_ssa_value * -_vtn_variable_load(struct vtn_builder *b, - nir_deref_var *src_deref, nir_deref *src_deref_tail) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = src_deref_tail->type; - - /* The deref tail may contain a deref to select a component of a vector (in - * other words, it might not be an actual tail) so we have to save it away - * here since we overwrite it later. - */ - nir_deref *old_child = src_deref_tail->child; - - if (glsl_type_is_vector_or_scalar(val->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - src_deref_tail->child = NULL; - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = - nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); - load->num_components = glsl_get_vector_elements(val->type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); - - nir_builder_instr_insert(&b->nb, &load->instr); - - if (src_deref->var->data.mode == nir_var_uniform && - glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->def = &load->dest.ssa; - } - } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(val->type)) { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - - nir_deref_array *deref = nir_deref_array_create(b); - deref->deref_array_type = nir_deref_array_type_direct; - deref->deref.type = glsl_get_array_element(val->type); - src_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->base_offset = i; - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); - } - } else { - assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - - nir_deref_struct *deref = nir_deref_struct_create(b, 0); - src_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->index = i; - deref->deref.type = glsl_get_struct_field(val->type, i); - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); - } - } - - src_deref_tail->child = old_child; - - return val; -} - -static void -_vtn_variable_store(struct vtn_builder *b, - nir_deref_var *dest_deref, nir_deref *dest_deref_tail, - struct vtn_ssa_value *src) -{ - nir_deref *old_child = dest_deref_tail->child; - - if (glsl_type_is_vector_or_scalar(src->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - dest_deref_tail->child = NULL; - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->variables[0] = - nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); - store->num_components = glsl_get_vector_elements(src->type); - store->const_index[0] = (1 << store->num_components) - 1; - store->src[0] = nir_src_for_ssa(src->def); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(src->type)) { - unsigned elems = glsl_get_length(src->type); - - nir_deref_array *deref = nir_deref_array_create(b); - deref->deref_array_type = nir_deref_array_type_direct; - deref->deref.type = glsl_get_array_element(src->type); - dest_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->base_offset = i; - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); - } - } else { - assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(src->type); - - nir_deref_struct *deref = nir_deref_struct_create(b, 0); - dest_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->index = i; - deref->deref.type = glsl_get_struct_field(src->type, i); - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); - } - } - - dest_deref_tail->child = old_child; -} - -static nir_ssa_def * -nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, - nir_variable_mode mode, nir_ssa_def *array_index) -{ - if (array_index == NULL) - array_index = nir_imm_int(b, 0); - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->shader, - nir_intrinsic_vulkan_resource_index); - instr->src[0] = nir_src_for_ssa(array_index); - instr->const_index[0] = set; - instr->const_index[1] = binding; - instr->const_index[2] = mode; - - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); - nir_builder_instr_insert(b, &instr->instr); - - return &instr->dest.ssa; -} - -static struct vtn_ssa_value * -_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, - unsigned set, unsigned binding, nir_variable_mode mode, - nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type) -{ - struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); - val->type = type->type; - val->transposed = NULL; - if (glsl_type_is_vector_or_scalar(type->type)) { - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); - load->num_components = glsl_get_vector_elements(type->type); - - switch (op) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: { - nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, - set, binding, - mode, index); - load->src[0] = nir_src_for_ssa(res_index); - load->src[1] = nir_src_for_ssa(offset); - break; - } - - case nir_intrinsic_load_push_constant: - load->src[0] = nir_src_for_ssa(offset); - break; - - default: - unreachable("Invalid block load intrinsic"); - } - - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); - nir_builder_instr_insert(&b->nb, &load->instr); - - if (glsl_get_base_type(type->type) == GLSL_TYPE_BOOL) { - /* Loads of booleans from externally visible memory need to be - * fixed up since they're defined to be zero/nonzero rather than - * NIR_FALSE/NIR_TRUE. - */ - val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->def = &load->dest.ssa; - } - } else { - unsigned elems = glsl_get_length(type->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - if (glsl_type_is_struct(type->type)) { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - child_offset, type->members[i]); - } - } else { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - child_offset,type->array_element); - } - } - } - - return val; -} - -static void -vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type **type, nir_deref *src_tail, - nir_ssa_def **index, nir_ssa_def **offset) -{ - nir_deref *deref = &src->deref; - - if (deref->child->deref_type == nir_deref_type_array) { - deref = deref->child; - *type = (*type)->array_element; - nir_deref_array *deref_array = nir_deref_as_array(deref); - *index = nir_imm_int(&b->nb, deref_array->base_offset); - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) - *index = nir_iadd(&b->nb, *index, deref_array->indirect.ssa); - } else { - *index = nir_imm_int(&b->nb, 0); - } - - *offset = nir_imm_int(&b->nb, 0); - while (deref != src_tail) { - deref = deref->child; - switch (deref->deref_type) { - case nir_deref_type_array: { - nir_deref_array *deref_array = nir_deref_as_array(deref); - nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset); - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) - off = nir_iadd(&b->nb, off, deref_array->indirect.ssa); - - off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride)); - *offset = nir_iadd(&b->nb, *offset, off); - - *type = (*type)->array_element; - break; - } - - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = nir_deref_as_struct(deref); - - unsigned elem_off = (*type)->offsets[deref_struct->index]; - *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off)); - - *type = (*type)->members[deref_struct->index]; - break; - } - - default: - unreachable("unknown deref type"); - } - } -} - -static struct vtn_ssa_value * -vtn_block_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *type, nir_deref *src_tail) -{ - nir_ssa_def *index; - nir_ssa_def *offset; - vtn_block_get_offset(b, src, &type, src_tail, &index, &offset); - - nir_intrinsic_op op; - if (src->var->data.mode == nir_var_uniform) { - if (src->var->data.descriptor_set >= 0) { - /* UBO load */ - assert(src->var->data.binding >= 0); - - op = nir_intrinsic_load_ubo; - } else { - /* Push constant load */ - assert(src->var->data.descriptor_set == -1 && - src->var->data.binding == -1); - - op = nir_intrinsic_load_push_constant; - } - } else { - assert(src->var->data.mode == nir_var_shader_storage); - op = nir_intrinsic_load_ssbo; - } - - return _vtn_block_load(b, op, src->var->data.descriptor_set, - src->var->data.binding, src->var->data.mode, - index, offset, type); -} - -/* - * Gets the NIR-level deref tail, which may have as a child an array deref - * selecting which component due to OpAccessChain supporting per-component - * indexing in SPIR-V. - */ - -static nir_deref * -get_deref_tail(nir_deref_var *deref) -{ - nir_deref *cur = &deref->deref; - while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) - cur = cur->child; - - return cur; -} - -static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, - nir_ssa_def *src, unsigned index); - -static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, - nir_ssa_def *src, - nir_ssa_def *index); - -static bool -variable_is_external_block(nir_variable *var) -{ - return var->interface_type && - glsl_type_is_struct(var->interface_type) && - (var->data.mode == nir_var_uniform || - var->data.mode == nir_var_shader_storage); -} - -static struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *src_type) -{ - nir_deref *src_tail = get_deref_tail(src); - - struct vtn_ssa_value *val; - if (variable_is_external_block(src->var)) - val = vtn_block_load(b, src, src_type, src_tail); - else - val = _vtn_variable_load(b, src, src_tail); - - if (src_tail->child) { - nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); - assert(vec_deref->deref.child == NULL); - val->type = vec_deref->deref.type; - if (vec_deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); - else - val->def = vtn_vector_extract_dynamic(b, val->def, - vec_deref->indirect.ssa); - } - - return val; -} - -static void -_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, - struct vtn_ssa_value *src, unsigned set, unsigned binding, - nir_variable_mode mode, nir_ssa_def *index, - nir_ssa_def *offset, struct vtn_type *type) -{ - assert(src->type == type->type); - if (glsl_type_is_vector_or_scalar(type->type)) { - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); - store->num_components = glsl_get_vector_elements(type->type); - store->const_index[0] = (1 << store->num_components) - 1; - store->src[0] = nir_src_for_ssa(src->def); - - nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, - set, binding, - mode, index); - store->src[1] = nir_src_for_ssa(res_index); - store->src[2] = nir_src_for_ssa(offset); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else { - unsigned elems = glsl_get_length(type->type); - if (glsl_type_is_struct(type->type)) { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, child_offset, type->members[i]); - } - } else { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, child_offset, type->array_element); - } - } - } -} - -static void -vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest, struct vtn_type *type, - nir_deref *dest_tail) -{ - nir_ssa_def *index; - nir_ssa_def *offset; - vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset); - - nir_intrinsic_op op = nir_intrinsic_store_ssbo; - - return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, - dest->var->data.binding, dest->var->data.mode, - index, offset, type); -} - -static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, - nir_ssa_def *src, nir_ssa_def *insert, - unsigned index); - -static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, - nir_ssa_def *src, - nir_ssa_def *insert, - nir_ssa_def *index); -static void -vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest, struct vtn_type *dest_type) -{ - nir_deref *dest_tail = get_deref_tail(dest); - if (variable_is_external_block(dest->var)) { - assert(dest->var->data.mode == nir_var_shader_storage); - vtn_block_store(b, src, dest, dest_type, dest_tail); - } else { - if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); - nir_deref_array *deref = nir_deref_as_array(dest_tail->child); - assert(deref->deref.child == NULL); - if (deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_insert(b, val->def, src->def, - deref->base_offset); - else - val->def = vtn_vector_insert_dynamic(b, val->def, src->def, - deref->indirect.ssa); - _vtn_variable_store(b, dest, dest_tail, val); - } else { - _vtn_variable_store(b, dest, dest_tail, src); - } - } -} - -static void -vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, - nir_deref_var *dest, struct vtn_type *type) -{ - nir_deref *src_tail = get_deref_tail(src); - - if (src_tail->child || src->var->interface_type) { - assert(get_deref_tail(dest)->child); - struct vtn_ssa_value *val = vtn_variable_load(b, src, type); - vtn_variable_store(b, val, dest, type); - } else { - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); - } -} - -/* Tries to compute the size of an interface block based on the strides and - * offsets that are provided to us in the SPIR-V source. - */ -static unsigned -vtn_type_block_size(struct vtn_type *type) -{ - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_DOUBLE: { - unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : - glsl_get_matrix_columns(type->type); - if (cols > 1) { - assert(type->stride > 0); - return type->stride * cols; - } else if (base_type == GLSL_TYPE_DOUBLE) { - return glsl_get_vector_elements(type->type) * 8; - } else { - return glsl_get_vector_elements(type->type) * 4; - } - } - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: { - unsigned size = 0; - unsigned num_fields = glsl_get_length(type->type); - for (unsigned f = 0; f < num_fields; f++) { - unsigned field_end = type->offsets[f] + - vtn_type_block_size(type->members[f]); - size = MAX2(size, field_end); - } - return size; - } - - case GLSL_TYPE_ARRAY: - assert(type->stride > 0); - assert(glsl_get_length(type->type) > 0); - return type->stride * glsl_get_length(type->type); - - default: - assert(!"Invalid block type"); - return 0; - } -} - -static bool -is_interface_type(struct vtn_type *type) -{ - return type->block || type->buffer_block || - glsl_type_is_sampler(type->type) || - glsl_type_is_image(type->type); -} - -static void -vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpVariable: { - struct vtn_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - - nir_variable *var = rzalloc(b->shader, nir_variable); - - var->type = type->type; - var->name = ralloc_strdup(var, val->name); - - struct vtn_type *interface_type; - if (is_interface_type(type)) { - interface_type = type; - } else if (glsl_type_is_array(type->type) && - is_interface_type(type->array_element)) { - interface_type = type->array_element; - } else { - interface_type = NULL; - } - - if (interface_type) - var->interface_type = interface_type->type; - - switch ((SpvStorageClass)w[3]) { - case SpvStorageClassUniform: - case SpvStorageClassUniformConstant: - if (interface_type && interface_type->buffer_block) { - var->data.mode = nir_var_shader_storage; - b->shader->info.num_ssbos++; - } else { - /* UBO's and samplers */ - var->data.mode = nir_var_uniform; - var->data.read_only = true; - if (interface_type) { - if (glsl_type_is_image(interface_type->type)) { - b->shader->info.num_images++; - var->data.image.format = interface_type->image_format; - } else if (glsl_type_is_sampler(interface_type->type)) { - b->shader->info.num_textures++; - } else { - assert(glsl_type_is_struct(interface_type->type)); - b->shader->info.num_ubos++; - } - } - } - break; - case SpvStorageClassPushConstant: - assert(interface_type && interface_type->block); - var->data.mode = nir_var_uniform; - var->data.read_only = true; - var->data.descriptor_set = -1; - var->data.binding = -1; - - /* We have exactly one push constant block */ - assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(type) * 4; - break; - case SpvStorageClassInput: - var->data.mode = nir_var_shader_in; - var->data.read_only = true; - break; - case SpvStorageClassOutput: - var->data.mode = nir_var_shader_out; - break; - case SpvStorageClassPrivate: - var->data.mode = nir_var_global; - break; - case SpvStorageClassFunction: - var->data.mode = nir_var_local; - break; - case SpvStorageClassWorkgroup: - case SpvStorageClassCrossWorkgroup: - case SpvStorageClassGeneric: - case SpvStorageClassAtomicCounter: - default: - unreachable("Unhandled variable storage class"); - } - - if (count > 4) { - assert(count == 5); - var->constant_initializer = - vtn_value(b, w[4], vtn_value_type_constant)->constant; - } - - val->deref = nir_deref_var_create(b, var); - val->deref_type = type; - - /* We handle decorations first because decorations might give us - * location information. We use the data.explicit_location field to - * note that the location provided is the "final" location. If - * data.explicit_location == false, this means that it's relative to - * whatever the base location is. - */ - vtn_foreach_decoration(b, val, var_decoration_cb, var); - - if (!var->data.explicit_location) { - if (b->execution_model == SpvExecutionModelFragment && - var->data.mode == nir_var_shader_out) { - var->data.location += FRAG_RESULT_DATA0; - } else if (b->execution_model == SpvExecutionModelVertex && - var->data.mode == nir_var_shader_in) { - var->data.location += VERT_ATTRIB_GENERIC0; - } else if (var->data.mode == nir_var_shader_in || - var->data.mode == nir_var_shader_out) { - var->data.location += VARYING_SLOT_VAR0; - } - } - - /* Interface block variables aren't actually going to be referenced - * by the generated NIR, so we don't put them in the list - */ - if (interface_type && glsl_type_is_struct(interface_type->type)) - break; - - if (var->data.mode == nir_var_local) { - nir_function_impl_add_variable(b->impl, var); - } else { - nir_shader_add_variable(b->shader, var); - } - - break; - } - - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: { - nir_deref_var *base; - struct vtn_value *base_val = vtn_untyped_value(b, w[3]); - if (base_val->value_type == vtn_value_type_sampled_image) { - /* This is rather insane. SPIR-V allows you to use OpSampledImage - * to combine an array of images with a single sampler to get an - * array of sampled images that all share the same sampler. - * Fortunately, this means that we can more-or-less ignore the - * sampler when crawling the access chain, but it does leave us - * with this rather awkward little special-case. - */ - base = base_val->sampled_image->image; - } else { - assert(base_val->value_type == vtn_value_type_deref); - base = base_val->deref; - } - - nir_deref_var *deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); - struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - - nir_deref *tail = &deref->deref; - while (tail->child) - tail = tail->child; - - for (unsigned i = 0; i < count - 4; i++) { - assert(w[i + 4] < b->value_id_bound); - struct vtn_value *idx_val = &b->values[w[i + 4]]; - - enum glsl_base_type base_type = glsl_get_base_type(tail->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_ARRAY: { - nir_deref_array *deref_arr = nir_deref_array_create(b); - if (base_type == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(tail->type)) { - deref_type = deref_type->array_element; - } else { - assert(glsl_type_is_vector(tail->type)); - deref_type = ralloc(b, struct vtn_type); - deref_type->type = glsl_scalar_type(base_type); - } - - deref_arr->deref.type = deref_type->type; - - if (idx_val->value_type == vtn_value_type_constant) { - unsigned idx = idx_val->constant->value.u[0]; - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->base_offset = idx; - } else { - assert(idx_val->value_type == vtn_value_type_ssa); - assert(glsl_type_is_scalar(idx_val->ssa->type)); - deref_arr->deref_array_type = nir_deref_array_type_indirect; - deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); - } - tail->child = &deref_arr->deref; - break; - } - - case GLSL_TYPE_STRUCT: { - assert(idx_val->value_type == vtn_value_type_constant); - unsigned idx = idx_val->constant->value.u[0]; - deref_type = deref_type->members[idx]; - nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = deref_type->type; - tail->child = &deref_struct->deref; - break; - } - default: - unreachable("Invalid type for deref"); - } - - if (deref_type->is_builtin) { - /* If we encounter a builtin, we throw away the ress of the - * access chain, jump to the builtin, and keep building. - */ - const struct glsl_type *builtin_type = deref_type->type; - - nir_deref_array *per_vertex_deref = NULL; - if (glsl_type_is_array(base->var->type)) { - /* This builtin is a per-vertex builtin */ - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - assert(base->var->data.mode == nir_var_shader_in); - builtin_type = glsl_array_type(builtin_type, - b->shader->info.gs.vertices_in); - - /* The first non-var deref should be an array deref. */ - assert(deref->deref.child->deref_type == - nir_deref_type_array); - per_vertex_deref = nir_deref_as_array(deref->deref.child); - } - - nir_variable *builtin = get_builtin_variable(b, - base->var->data.mode, - builtin_type, - deref_type->builtin); - deref = nir_deref_var_create(b, builtin); - - if (per_vertex_deref) { - /* Since deref chains start at the variable, we can just - * steal that link and use it. - */ - deref->deref.child = &per_vertex_deref->deref; - per_vertex_deref->deref.child = NULL; - per_vertex_deref->deref.type = - glsl_get_array_element(builtin_type); - - tail = &per_vertex_deref->deref; - } else { - tail = &deref->deref; - } - } else { - tail = tail->child; - } - } - - /* For uniform blocks, we don't resolve the access chain until we - * actually access the variable, so we need to keep around the original - * type of the variable. - */ - if (variable_is_external_block(base->var)) - deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - - if (base_val->value_type == vtn_value_type_sampled_image) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = deref; - val->sampled_image->sampler = base_val->sampled_image->sampler; - } else { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - val->deref = deref; - val->deref_type = deref_type; - } - - break; - } - - case SpvOpCopyMemory: { - nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; - struct vtn_type *type = - vtn_value(b, w[1], vtn_value_type_deref)->deref_type; - - vtn_variable_copy(b, src, dest, type); - break; - } - - case SpvOpLoad: { - nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; - struct vtn_type *src_type = - vtn_value(b, w[3], vtn_value_type_deref)->deref_type; - - if (src->var->interface_type && - (glsl_type_is_sampler(src->var->interface_type) || - glsl_type_is_image(src->var->interface_type))) { - vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; - return; - } - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_variable_load(b, src, src_type); - break; - } - - case SpvOpStore: { - nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - struct vtn_type *dest_type = - vtn_value(b, w[1], vtn_value_type_deref)->deref_type; - struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); - vtn_variable_store(b, src, dest, dest_type); - break; - } - - case SpvOpCopyMemorySized: - case SpvOpArrayLength: - default: - unreachable("Unhandled opcode"); - } -} - -static void -vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct nir_function *callee = - vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; - - nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); - for (unsigned i = 0; i < call->num_params; i++) { - unsigned arg_id = w[4 + i]; - struct vtn_value *arg = vtn_untyped_value(b, arg_id); - if (arg->value_type == vtn_value_type_deref) { - call->params[i] = - nir_deref_as_var(nir_copy_deref(call, &arg->deref->deref)); - } else { - struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); - - /* Make a temporary to store the argument in */ - nir_variable *tmp = - nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); - call->params[i] = nir_deref_var_create(call, tmp); - - vtn_variable_store(b, arg_ssa, call->params[i], arg->type); - } - } - - nir_variable *out_tmp = NULL; - if (!glsl_type_is_void(callee->return_type)) { - out_tmp = nir_local_variable_create(b->impl, callee->return_type, - "out_tmp"); - call->return_deref = nir_deref_var_create(call, out_tmp); - } - - nir_builder_instr_insert(&b->nb, &call->instr); - - if (glsl_type_is_void(callee->return_type)) { - vtn_push_value(b, w[2], vtn_value_type_undef); - } else { - struct vtn_type *rettype = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); - retval->ssa = vtn_variable_load(b, call->return_deref, rettype); - } -} - -static struct vtn_ssa_value * -vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (!glsl_type_is_vector_or_scalar(type)) { - unsigned elems = glsl_get_length(type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *child_type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - child_type = glsl_get_column_type(type); - break; - case GLSL_TYPE_ARRAY: - child_type = glsl_get_array_element(type); - break; - case GLSL_TYPE_STRUCT: - child_type = glsl_get_struct_field(type, i); - break; - default: - unreachable("unkown base type"); - } - - val->elems[i] = vtn_create_ssa_value(b, child_type); - } - } - - return val; -} - -static nir_tex_src -vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) -{ - nir_tex_src src; - src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); - src.src_type = type; - return src; -} - -static void -vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpSampledImage) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = - vtn_value(b, w[3], vtn_value_type_deref)->deref; - val->sampled_image->sampler = - vtn_value(b, w[4], vtn_value_type_deref)->deref; - return; - } - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - struct vtn_sampled_image sampled; - struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); - if (sampled_val->value_type == vtn_value_type_sampled_image) { - sampled = *sampled_val->sampled_image; - } else { - assert(sampled_val->value_type == vtn_value_type_deref); - sampled.image = NULL; - sampled.sampler = sampled_val->deref; - } - - nir_tex_src srcs[8]; /* 8 should be enough */ - nir_tex_src *p = srcs; - - unsigned idx = 4; - - unsigned coord_components = 0; - switch (opcode) { - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - case SpvOpImageFetch: - case SpvOpImageGather: - case SpvOpImageDrefGather: - case SpvOpImageQueryLod: { - /* All these types have the coordinate as their first real argument */ - struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); - coord_components = glsl_get_vector_elements(coord->type); - p->src = nir_src_for_ssa(coord->def); - p->src_type = nir_tex_src_coord; - p++; - break; - } - - default: - break; - } - - /* These all have an explicit depth value as their next source */ - switch (opcode) { - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); - break; - default: - break; - } - - /* Figure out the base texture operation */ - nir_texop texop; - switch (opcode) { - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - texop = nir_texop_tex; - break; - - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - texop = nir_texop_txl; - break; - - case SpvOpImageFetch: - texop = nir_texop_txf; - break; - - case SpvOpImageGather: - case SpvOpImageDrefGather: - texop = nir_texop_tg4; - break; - - case SpvOpImageQuerySizeLod: - case SpvOpImageQuerySize: - texop = nir_texop_txs; - break; - - case SpvOpImageQueryLod: - texop = nir_texop_lod; - break; - - case SpvOpImageQueryLevels: - texop = nir_texop_query_levels; - break; - - case SpvOpImageQuerySamples: - default: - unreachable("Unhandled opcode"); - } - - /* Now we need to handle some number of optional arguments */ - if (idx < count) { - uint32_t operands = w[idx++]; - - if (operands & SpvImageOperandsBiasMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txb; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); - } - - if (operands & SpvImageOperandsLodMask) { - assert(texop == nir_texop_txl || texop == nir_texop_txf || - texop == nir_texop_txs); - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); - } - - if (operands & SpvImageOperandsGradMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txd; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); - } - - if (operands & SpvImageOperandsOffsetMask || - operands & SpvImageOperandsConstOffsetMask) - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); - - if (operands & SpvImageOperandsConstOffsetsMask) - assert(!"Constant offsets to texture gather not yet implemented"); - - if (operands & SpvImageOperandsSampleMask) { - assert(texop == nir_texop_txf); - texop = nir_texop_txf_ms; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); - } - } - /* We should have now consumed exactly all of the arguments */ - assert(idx == count); - - nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); - - const struct glsl_type *sampler_type = - nir_deref_tail(&sampled.sampler->deref)->type; - instr->sampler_dim = glsl_get_sampler_dim(sampler_type); - - switch (glsl_get_sampler_result_type(sampler_type)) { - case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; - case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; - case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; - case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; - default: - unreachable("Invalid base type for sampler result"); - } - - instr->op = texop; - memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); - instr->coord_components = coord_components; - instr->is_array = glsl_sampler_type_is_array(sampler_type); - instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); - - instr->sampler = - nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); - if (sampled.image) { - instr->texture = - nir_deref_as_var(nir_copy_deref(instr, &sampled.image->deref)); - } else { - instr->texture = NULL; - } - - nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); - val->ssa->def = &instr->dest.ssa; - - nir_builder_instr_insert(&b->nb, &instr->instr); -} - -static nir_ssa_def * -get_image_coord(struct vtn_builder *b, uint32_t value) -{ - struct vtn_ssa_value *coord = vtn_ssa_value(b, value); - - /* The image_load_store intrinsics assume a 4-dim coordinate */ - unsigned dim = glsl_get_vector_elements(coord->type); - unsigned swizzle[4]; - for (unsigned i = 0; i < 4; i++) - swizzle[i] = MIN2(i, dim - 1); - - return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); -} - -static void -vtn_handle_image(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - /* Just get this one out of the way */ - if (opcode == SpvOpImageTexelPointer) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_image_pointer); - val->image = ralloc(b, struct vtn_image_pointer); - - val->image->deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; - val->image->coord = get_image_coord(b, w[4]); - val->image->sample = vtn_ssa_value(b, w[5])->def; - return; - } - - struct vtn_image_pointer image; - - switch (opcode) { - case SpvOpAtomicExchange: - case SpvOpAtomicCompareExchange: - case SpvOpAtomicCompareExchangeWeak: - case SpvOpAtomicIIncrement: - case SpvOpAtomicIDecrement: - case SpvOpAtomicIAdd: - case SpvOpAtomicISub: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; - break; - - case SpvOpImageRead: - image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; - image.coord = get_image_coord(b, w[4]); - - if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { - assert(w[5] == SpvImageOperandsSampleMask); - image.sample = vtn_ssa_value(b, w[6])->def; - } else { - image.sample = nir_ssa_undef(&b->nb, 1); - } - break; - - case SpvOpImageWrite: - image.deref = vtn_value(b, w[1], vtn_value_type_deref)->deref; - image.coord = get_image_coord(b, w[2]); - - /* texel = w[3] */ - - if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { - assert(w[4] == SpvImageOperandsSampleMask); - image.sample = vtn_ssa_value(b, w[5])->def; - } else { - image.sample = nir_ssa_undef(&b->nb, 1); - } - - default: - unreachable("Invalid image opcode"); - } - - nir_intrinsic_op op; - switch (opcode) { -#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; - OP(ImageRead, load) - OP(ImageWrite, store) - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_min) - OP(AtomicUMin, atomic_min) - OP(AtomicSMax, atomic_max) - OP(AtomicUMax, atomic_max) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid image opcode"); - } - - nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); - intrin->variables[0] = - nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); - intrin->src[0] = nir_src_for_ssa(image.coord); - intrin->src[1] = nir_src_for_ssa(image.sample); - - switch (opcode) { - case SpvOpImageRead: - break; - case SpvOpImageWrite: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); - break; - case SpvOpAtomicIIncrement: - intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); - break; - case SpvOpAtomicIDecrement: - intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); - break; - - case SpvOpAtomicExchange: - case SpvOpAtomicIAdd: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - case SpvOpAtomicCompareExchange: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - case SpvOpAtomicISub: - intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); - break; - - default: - unreachable("Invalid image opcode"); - } - - if (opcode != SpvOpImageWrite) { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_ssa_dest_init(&intrin->instr, &intrin->dest, - glsl_get_vector_elements(type->type), NULL); - val->ssa = vtn_create_ssa_value(b, type->type); - val->ssa->def = &intrin->dest.ssa; - } - - nir_builder_instr_insert(&b->nb, &intrin->instr); -} - -static nir_alu_instr * -create_vec(nir_shader *shader, unsigned num_components) -{ - nir_op op; - switch (num_components) { - case 1: op = nir_op_fmov; break; - case 2: op = nir_op_vec2; break; - case 3: op = nir_op_vec3; break; - case 4: op = nir_op_vec4; break; - default: unreachable("bad vector size"); - } - - nir_alu_instr *vec = nir_alu_instr_create(shader, op); - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); - vec->dest.write_mask = (1 << num_components) - 1; - - return vec; -} - -static struct vtn_ssa_value * -vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - if (src->transposed) - return src->transposed; - - struct vtn_ssa_value *dest = - vtn_create_ssa_value(b, glsl_transposed_type(src->type)); - - for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { - nir_alu_instr *vec = create_vec(b->shader, - glsl_get_matrix_columns(src->type)); - if (glsl_type_is_vector_or_scalar(src->type)) { - vec->src[0].src = nir_src_for_ssa(src->def); - vec->src[0].swizzle[0] = i; - } else { - for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { - vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); - vec->src[j].swizzle[0] = i; - } - } - nir_builder_instr_insert(&b->nb, &vec->instr); - dest->elems[i]->def = &vec->dest.dest.ssa; - } - - dest->transposed = src; - - return dest; -} - -/* - * Normally, column vectors in SPIR-V correspond to a single NIR SSA - * definition. But for matrix multiplies, we want to do one routine for - * multiplying a matrix by a matrix and then pretend that vectors are matrices - * with one column. So we "wrap" these things, and unwrap the result before we - * send it off. - */ - -static struct vtn_ssa_value * -vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) -{ - if (val == NULL) - return NULL; - - if (glsl_type_is_matrix(val->type)) - return val; - - struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); - dest->type = val->type; - dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); - dest->elems[0] = val; - - return dest; -} - -static struct vtn_ssa_value * -vtn_unwrap_matrix(struct vtn_ssa_value *val) -{ - if (glsl_type_is_matrix(val->type)) - return val; - - return val->elems[0]; -} - -static struct vtn_ssa_value * -vtn_matrix_multiply(struct vtn_builder *b, - struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) -{ - - struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); - struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); - struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); - struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); - - unsigned src0_rows = glsl_get_vector_elements(src0->type); - unsigned src0_columns = glsl_get_matrix_columns(src0->type); - unsigned src1_columns = glsl_get_matrix_columns(src1->type); - - const struct glsl_type *dest_type; - if (src1_columns > 1) { - dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), - src0_rows, src1_columns); - } else { - dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); - } - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); - - dest = vtn_wrap_matrix(b, dest); - - bool transpose_result = false; - if (src0_transpose && src1_transpose) { - /* transpose(A) * transpose(B) = transpose(B * A) */ - src1 = src0_transpose; - src0 = src1_transpose; - src0_transpose = NULL; - src1_transpose = NULL; - transpose_result = true; - } - - if (src0_transpose && !src1_transpose && - glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { - /* We already have the rows of src0 and the columns of src1 available, - * so we can just take the dot product of each row with each column to - * get the result. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - nir_alu_instr *vec = create_vec(b->shader, src0_rows); - for (unsigned j = 0; j < src0_rows; j++) { - vec->src[j].src = - nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, - src1->elems[i]->def)); - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - dest->elems[i]->def = &vec->dest.dest.ssa; - } - } else { - /* We don't handle the case where src1 is transposed but not src0, since - * the general case only uses individual components of src1 so the - * optimizer should chew through the transpose we emitted for src1. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ - dest->elems[i]->def = - nir_fmul(&b->nb, src0->elems[0]->def, - vtn_vector_extract(b, src1->elems[i]->def, 0)); - for (unsigned j = 1; j < src0_columns; j++) { - dest->elems[i]->def = - nir_fadd(&b->nb, dest->elems[i]->def, - nir_fmul(&b->nb, src0->elems[j]->def, - vtn_vector_extract(b, - src1->elems[i]->def, j))); - } - } - } - - dest = vtn_unwrap_matrix(dest); - - if (transpose_result) - dest = vtn_transpose(b, dest); - - return dest; -} - -static struct vtn_ssa_value * -vtn_mat_times_scalar(struct vtn_builder *b, - struct vtn_ssa_value *mat, - nir_ssa_def *scalar) -{ - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); - for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { - if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) - dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); - else - dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); - } - - return dest; -} - -static void -vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - switch (opcode) { - case SpvOpTranspose: { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); - val->ssa = vtn_transpose(b, src); - break; - } - - case SpvOpOuterProduct: { - struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); - struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); - - val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); - break; - } - - case SpvOpMatrixTimesScalar: { - struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); - struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); - - if (mat->transposed) { - val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, - scalar->def)); - } else { - val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); - } - break; - } - - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: { - struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); - struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); - - if (opcode == SpvOpVectorTimesMatrix) { - val->ssa = vtn_matrix_multiply(b, vtn_transpose(b, src1), src0); - } else { - val->ssa = vtn_matrix_multiply(b, src0, src1); - } - break; - } - - default: unreachable("unknown matrix opcode"); - } -} - -static void -vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_create_ssa_value(b, type); - - /* Collect the various SSA sources */ - const unsigned num_inputs = count - 3; - nir_ssa_def *src[4]; - for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 3])->def; - for (unsigned i = num_inputs; i < 4; i++) - src[i] = NULL; - - /* Indicates that the first two arguments should be swapped. This is - * used for implementing greater-than and less-than-or-equal. - */ - bool swap = false; - - nir_op op; - switch (opcode) { - /* Basic ALU operations */ - case SpvOpSNegate: op = nir_op_ineg; break; - case SpvOpFNegate: op = nir_op_fneg; break; - case SpvOpNot: op = nir_op_inot; break; - - case SpvOpAny: - if (src[0]->num_components == 1) { - op = nir_op_imov; - } else { - switch (src[0]->num_components) { - case 2: op = nir_op_bany_inequal2; break; - case 3: op = nir_op_bany_inequal3; break; - case 4: op = nir_op_bany_inequal4; break; - } - src[1] = nir_imm_int(&b->nb, NIR_FALSE); - } - break; - - case SpvOpAll: - if (src[0]->num_components == 1) { - op = nir_op_imov; - } else { - switch (src[0]->num_components) { - case 2: op = nir_op_ball_iequal2; break; - case 3: op = nir_op_ball_iequal3; break; - case 4: op = nir_op_ball_iequal4; break; - } - src[1] = nir_imm_int(&b->nb, NIR_TRUE); - } - break; - - case SpvOpIAdd: op = nir_op_iadd; break; - case SpvOpFAdd: op = nir_op_fadd; break; - case SpvOpISub: op = nir_op_isub; break; - case SpvOpFSub: op = nir_op_fsub; break; - case SpvOpIMul: op = nir_op_imul; break; - case SpvOpFMul: op = nir_op_fmul; break; - case SpvOpUDiv: op = nir_op_udiv; break; - case SpvOpSDiv: op = nir_op_idiv; break; - case SpvOpFDiv: op = nir_op_fdiv; break; - case SpvOpUMod: op = nir_op_umod; break; - case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ - case SpvOpFMod: op = nir_op_fmod; break; - - case SpvOpDot: - assert(src[0]->num_components == src[1]->num_components); - switch (src[0]->num_components) { - case 1: op = nir_op_fmul; break; - case 2: op = nir_op_fdot2; break; - case 3: op = nir_op_fdot3; break; - case 4: op = nir_op_fdot4; break; - } - break; - - case SpvOpShiftRightLogical: op = nir_op_ushr; break; - case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; - case SpvOpShiftLeftLogical: op = nir_op_ishl; break; - case SpvOpLogicalOr: op = nir_op_ior; break; - case SpvOpLogicalEqual: op = nir_op_ieq; break; - case SpvOpLogicalNotEqual: op = nir_op_ine; break; - case SpvOpLogicalAnd: op = nir_op_iand; break; - case SpvOpLogicalNot: op = nir_op_inot; break; - case SpvOpBitwiseOr: op = nir_op_ior; break; - case SpvOpBitwiseXor: op = nir_op_ixor; break; - case SpvOpBitwiseAnd: op = nir_op_iand; break; - case SpvOpSelect: op = nir_op_bcsel; break; - case SpvOpIEqual: op = nir_op_ieq; break; - - /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ - case SpvOpFOrdEqual: op = nir_op_feq; break; - case SpvOpFUnordEqual: op = nir_op_feq; break; - case SpvOpINotEqual: op = nir_op_ine; break; - case SpvOpFOrdNotEqual: op = nir_op_fne; break; - case SpvOpFUnordNotEqual: op = nir_op_fne; break; - case SpvOpULessThan: op = nir_op_ult; break; - case SpvOpSLessThan: op = nir_op_ilt; break; - case SpvOpFOrdLessThan: op = nir_op_flt; break; - case SpvOpFUnordLessThan: op = nir_op_flt; break; - case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; - case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; - case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; - case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; - case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; - case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; - case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; - case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; - case SpvOpUGreaterThanEqual: op = nir_op_uge; break; - case SpvOpSGreaterThanEqual: op = nir_op_ige; break; - case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; - case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; - - /* Conversions: */ - case SpvOpConvertFToU: op = nir_op_f2u; break; - case SpvOpConvertFToS: op = nir_op_f2i; break; - case SpvOpConvertSToF: op = nir_op_i2f; break; - case SpvOpConvertUToF: op = nir_op_u2f; break; - case SpvOpBitcast: op = nir_op_imov; break; - case SpvOpUConvert: - case SpvOpSConvert: - op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ - break; - case SpvOpFConvert: - op = nir_op_fmov; - break; - - /* Derivatives: */ - case SpvOpDPdx: op = nir_op_fddx; break; - case SpvOpDPdy: op = nir_op_fddy; break; - case SpvOpDPdxFine: op = nir_op_fddx_fine; break; - case SpvOpDPdyFine: op = nir_op_fddy_fine; break; - case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; - case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; - case SpvOpFwidth: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); - return; - case SpvOpFwidthFine: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); - return; - case SpvOpFwidthCoarse: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); - return; - - case SpvOpVectorTimesScalar: - /* The builder will take care of splatting for us. */ - val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); - return; - - case SpvOpSRem: - case SpvOpFRem: - unreachable("No NIR equivalent"); - - case SpvOpIsNan: - case SpvOpIsInf: - case SpvOpIsFinite: - case SpvOpIsNormal: - case SpvOpSignBitSet: - case SpvOpLessOrGreater: - case SpvOpOrdered: - case SpvOpUnordered: - default: - unreachable("Unhandled opcode"); - } - - if (swap) { - nir_ssa_def *tmp = src[0]; - src[0] = src[1]; - src[1] = tmp; - } - - val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); -} - -static nir_ssa_def * -vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) -{ - unsigned swiz[4] = { index }; - return nir_swizzle(&b->nb, src, swiz, 1, true); -} - - -static nir_ssa_def * -vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, - unsigned index) -{ - nir_alu_instr *vec = create_vec(b->shader, src->num_components); - - for (unsigned i = 0; i < src->num_components; i++) { - if (i == index) { - vec->src[i].src = nir_src_for_ssa(insert); - } else { - vec->src[i].src = nir_src_for_ssa(src); - vec->src[i].swizzle[0] = i; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -static nir_ssa_def * -vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *index) -{ - nir_ssa_def *dest = vtn_vector_extract(b, src, 0); - for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), - vtn_vector_extract(b, src, i), dest); - - return dest; -} - -static nir_ssa_def * -vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, nir_ssa_def *index) -{ - nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); - for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), - vtn_vector_insert(b, src, insert, i), dest); - - return dest; -} - -static nir_ssa_def * -vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, - nir_ssa_def *src0, nir_ssa_def *src1, - const uint32_t *indices) -{ - nir_alu_instr *vec = create_vec(b->shader, num_components); - - nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); - nir_builder_instr_insert(&b->nb, &undef->instr); - - for (unsigned i = 0; i < num_components; i++) { - uint32_t index = indices[i]; - if (index == 0xffffffff) { - vec->src[i].src = nir_src_for_ssa(&undef->def); - } else if (index < src0->num_components) { - vec->src[i].src = nir_src_for_ssa(src0); - vec->src[i].swizzle[0] = index; - } else { - vec->src[i].src = nir_src_for_ssa(src1); - vec->src[i].swizzle[0] = index - src0->num_components; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -/* - * Concatentates a number of vectors/scalars together to produce a vector - */ -static nir_ssa_def * -vtn_vector_construct(struct vtn_builder *b, unsigned num_components, - unsigned num_srcs, nir_ssa_def **srcs) -{ - nir_alu_instr *vec = create_vec(b->shader, num_components); - - unsigned dest_idx = 0; - for (unsigned i = 0; i < num_srcs; i++) { - nir_ssa_def *src = srcs[i]; - for (unsigned j = 0; j < src->num_components; j++) { - vec->src[dest_idx].src = nir_src_for_ssa(src); - vec->src[dest_idx].swizzle[0] = j; - dest_idx++; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -static struct vtn_ssa_value * -vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) -{ - struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); - dest->type = src->type; - - if (glsl_type_is_vector_or_scalar(src->type)) { - dest->def = src->def; - } else { - unsigned elems = glsl_get_length(src->type); - - dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) - dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); - } - - return dest; -} - -static struct vtn_ssa_value * -vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_ssa_value *insert, const uint32_t *indices, - unsigned num_indices) -{ - struct vtn_ssa_value *dest = vtn_composite_copy(b, src); - - struct vtn_ssa_value *cur = dest; - unsigned i; - for (i = 0; i < num_indices - 1; i++) { - cur = cur->elems[indices[i]]; - } - - if (glsl_type_is_vector_or_scalar(cur->type)) { - /* According to the SPIR-V spec, OpCompositeInsert may work down to - * the component granularity. In that case, the last index will be - * the index to insert the scalar into the vector. - */ - - cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); - } else { - cur->elems[indices[i]] = insert; - } - - return dest; -} - -static struct vtn_ssa_value * -vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, - const uint32_t *indices, unsigned num_indices) -{ - struct vtn_ssa_value *cur = src; - for (unsigned i = 0; i < num_indices; i++) { - if (glsl_type_is_vector_or_scalar(cur->type)) { - assert(i == num_indices - 1); - /* According to the SPIR-V spec, OpCompositeExtract may work down to - * the component granularity. The last index will be the index of the - * vector to extract. - */ - - struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); - ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); - ret->def = vtn_vector_extract(b, cur->def, indices[i]); - return ret; - } else { - cur = cur->elems[indices[i]]; - } - } - - return cur; -} - -static void -vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_create_ssa_value(b, type); - - switch (opcode) { - case SpvOpVectorExtractDynamic: - val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def); - break; - - case SpvOpVectorInsertDynamic: - val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - vtn_ssa_value(b, w[5])->def); - break; - - case SpvOpVectorShuffle: - val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), - vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - w + 5); - break; - - case SpvOpCompositeConstruct: { - unsigned elems = count - 3; - if (glsl_type_is_vector_or_scalar(type)) { - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < elems; i++) - srcs[i] = vtn_ssa_value(b, w[3 + i])->def; - val->ssa->def = - vtn_vector_construct(b, glsl_get_vector_elements(type), - elems, srcs); - } else { - val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) - val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); - } - break; - } - case SpvOpCompositeExtract: - val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), - w + 4, count - 4); - break; - - case SpvOpCompositeInsert: - val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), - vtn_ssa_value(b, w[3]), - w + 5, count - 5); - break; - - case SpvOpCopyObject: - val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); - break; - - default: - unreachable("unknown composite operation"); - } -} - -static void -vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - nir_intrinsic_op intrinsic_op; - switch (opcode) { - case SpvOpEmitVertex: - case SpvOpEmitStreamVertex: - intrinsic_op = nir_intrinsic_emit_vertex; - break; - case SpvOpEndPrimitive: - case SpvOpEndStreamPrimitive: - intrinsic_op = nir_intrinsic_end_primitive; - break; - case SpvOpMemoryBarrier: - intrinsic_op = nir_intrinsic_memory_barrier; - break; - case SpvOpControlBarrier: - default: - unreachable("unknown barrier instruction"); - } - - nir_intrinsic_instr *intrin = - nir_intrinsic_instr_create(b->shader, intrinsic_op); - - if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) - intrin->const_index[0] = w[1]; - - nir_builder_instr_insert(&b->nb, &intrin->instr); -} - -static void -vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) -{ - if (glsl_type_is_vector_or_scalar(val->type)) { - nir_phi_instr *phi = nir_phi_instr_create(b->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - glsl_get_vector_elements(val->type), NULL); - exec_list_make_empty(&phi->srcs); - nir_builder_instr_insert(&b->nb, &phi->instr); - val->def = &phi->dest.ssa; - } else { - unsigned elems = glsl_get_length(val->type); - for (unsigned i = 0; i < elems; i++) - vtn_phi_node_init(b, val->elems[i]); - } -} - -static struct vtn_ssa_value * -vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); - vtn_phi_node_init(b, val); - return val; -} - -static void -vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_phi_node_create(b, type); -} - -static void -vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, - struct vtn_ssa_value *val) -{ - assert(phi->type == val->type); - if (glsl_type_is_vector_or_scalar(phi->type)) { - nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); - nir_phi_src *src = ralloc(phi_instr, nir_phi_src); - src->pred = (nir_block *) pred; - src->src = nir_src_for_ssa(val->def); - exec_list_push_tail(&phi_instr->srcs, &src->node); - } else { - unsigned elems = glsl_get_length(phi->type); - for (unsigned i = 0; i < elems; i++) - vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); - } -} - -static struct vtn_ssa_value * -vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, - const struct glsl_type *type, const uint32_t *w, - unsigned count) -{ - struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); - if (entry) { - struct vtn_block *spv_block = entry->data; - for (unsigned off = 4; off < count; off += 2) { - if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { - return vtn_ssa_value(b, w[off - 1]); - } - } - } - - b->nb.cursor = nir_before_block(block); - struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); - - struct set_entry *entry2; - set_foreach(block->predecessors, entry2) { - nir_block *pred = (nir_block *) entry2->key; - struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, - count); - vtn_phi_node_add_src(phi, pred, val); - } - - return phi; -} - -static bool -vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpLabel) { - b->block = vtn_value(b, w[1], vtn_value_type_block)->block; - return true; - } - - if (opcode != SpvOpPhi) - return true; - - struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; - - struct set_entry *entry; - set_foreach(b->block->block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; - - struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, - count); - vtn_phi_node_add_src(phi, pred, val); - } - - return true; -} - -static unsigned -gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) -{ - switch (mode) { - case SpvExecutionModeInputPoints: - case SpvExecutionModeOutputPoints: - return 0; /* GL_POINTS */ - case SpvExecutionModeInputLines: - return 1; /* GL_LINES */ - case SpvExecutionModeInputLinesAdjacency: - return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ - case SpvExecutionModeTriangles: - return 4; /* GL_TRIANGLES */ - case SpvExecutionModeInputTrianglesAdjacency: - return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ - case SpvExecutionModeQuads: - return 7; /* GL_QUADS */ - case SpvExecutionModeIsolines: - return 0x8E7A; /* GL_ISOLINES */ - case SpvExecutionModeOutputLineStrip: - return 3; /* GL_LINE_STRIP */ - case SpvExecutionModeOutputTriangleStrip: - return 5; /* GL_TRIANGLE_STRIP */ - default: - assert(!"Invalid primitive type"); - return 4; - } -} - -static unsigned -vertices_in_from_spv_execution_mode(SpvExecutionMode mode) -{ - switch (mode) { - case SpvExecutionModeInputPoints: - return 1; - case SpvExecutionModeInputLines: - return 2; - case SpvExecutionModeInputLinesAdjacency: - return 4; - case SpvExecutionModeTriangles: - return 3; - case SpvExecutionModeInputTrianglesAdjacency: - return 6; - default: - assert(!"Invalid GS input mode"); - return 0; - } -} - -static bool -vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpSource: - case SpvOpSourceExtension: - case SpvOpExtension: - /* Unhandled, but these are for debug so that's ok. */ - break; - - case SpvOpCapability: - switch ((SpvCapability)w[1]) { - case SpvCapabilityMatrix: - case SpvCapabilityShader: - /* All shaders support these */ - break; - case SpvCapabilityGeometry: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - break; - default: - assert(!"Unsupported capability"); - } - break; - - case SpvOpExtInstImport: - vtn_handle_extension(b, opcode, w, count); - break; - - case SpvOpMemoryModel: - assert(w[1] == SpvAddressingModelLogical); - assert(w[2] == SpvMemoryModelGLSL450); - break; - - case SpvOpEntryPoint: - assert(b->entry_point == NULL); - b->entry_point = &b->values[w[2]]; - b->execution_model = w[1]; - break; - - case SpvOpExecutionMode: - assert(b->entry_point == &b->values[w[1]]); - - SpvExecutionMode mode = w[2]; - switch(mode) { - case SpvExecutionModeOriginUpperLeft: - case SpvExecutionModeOriginLowerLeft: - b->origin_upper_left = (mode == SpvExecutionModeOriginUpperLeft); - break; - - case SpvExecutionModeEarlyFragmentTests: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.early_fragment_tests = true; - break; - - case SpvExecutionModeInvocations: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.invocations = MAX2(1, w[3]); - break; - - case SpvExecutionModeDepthReplacing: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; - break; - case SpvExecutionModeDepthGreater: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case SpvExecutionModeDepthLess: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; - break; - case SpvExecutionModeDepthUnchanged: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - - case SpvExecutionModeLocalSize: - assert(b->shader->stage == MESA_SHADER_COMPUTE); - b->shader->info.cs.local_size[0] = w[3]; - b->shader->info.cs.local_size[1] = w[4]; - b->shader->info.cs.local_size[2] = w[5]; - break; - case SpvExecutionModeLocalSizeHint: - break; /* Nothing do do with this */ - - case SpvExecutionModeOutputVertices: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.vertices_out = w[3]; - break; - - case SpvExecutionModeInputPoints: - case SpvExecutionModeInputLines: - case SpvExecutionModeInputLinesAdjacency: - case SpvExecutionModeTriangles: - case SpvExecutionModeInputTrianglesAdjacency: - case SpvExecutionModeQuads: - case SpvExecutionModeIsolines: - if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader->info.gs.vertices_in = - vertices_in_from_spv_execution_mode(mode); - } else { - assert(!"Tesselation shaders not yet supported"); - } - break; - - case SpvExecutionModeOutputPoints: - case SpvExecutionModeOutputLineStrip: - case SpvExecutionModeOutputTriangleStrip: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.output_primitive = - gl_primitive_from_spv_execution_mode(mode); - break; - - case SpvExecutionModeSpacingEqual: - case SpvExecutionModeSpacingFractionalEven: - case SpvExecutionModeSpacingFractionalOdd: - case SpvExecutionModeVertexOrderCw: - case SpvExecutionModeVertexOrderCcw: - case SpvExecutionModePointMode: - assert(!"TODO: Add tessellation metadata"); - break; - - case SpvExecutionModePixelCenterInteger: - case SpvExecutionModeXfb: - assert(!"Unhandled execution mode"); - break; - - case SpvExecutionModeVecTypeHint: - case SpvExecutionModeContractionOff: - break; /* OpenCL */ - } - break; - - case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string)->str = - vtn_string_literal(b, &w[2], count - 2); - break; - - case SpvOpName: - b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); - break; - - case SpvOpMemberName: - /* TODO */ - break; - - case SpvOpLine: - break; /* Ignored for now */ - - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - vtn_handle_decoration(b, opcode, w, count); - break; - - case SpvOpTypeVoid: - case SpvOpTypeBool: - case SpvOpTypeInt: - case SpvOpTypeFloat: - case SpvOpTypeVector: - case SpvOpTypeMatrix: - case SpvOpTypeImage: - case SpvOpTypeSampler: - case SpvOpTypeSampledImage: - case SpvOpTypeArray: - case SpvOpTypeRuntimeArray: - case SpvOpTypeStruct: - case SpvOpTypeOpaque: - case SpvOpTypePointer: - case SpvOpTypeFunction: - case SpvOpTypeEvent: - case SpvOpTypeDeviceEvent: - case SpvOpTypeReserveId: - case SpvOpTypeQueue: - case SpvOpTypePipe: - vtn_handle_type(b, opcode, w, count); - break; - - case SpvOpConstantTrue: - case SpvOpConstantFalse: - case SpvOpConstant: - case SpvOpConstantComposite: - case SpvOpConstantSampler: - case SpvOpSpecConstantTrue: - case SpvOpSpecConstantFalse: - case SpvOpSpecConstant: - case SpvOpSpecConstantComposite: - vtn_handle_constant(b, opcode, w, count); - break; - - case SpvOpVariable: - vtn_handle_variables(b, opcode, w, count); - break; - - default: - return false; /* End of preamble */ - } - - return true; -} - -static bool -vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpFunction: { - assert(b->func == NULL); - b->func = rzalloc(b, struct vtn_function); - - const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); - val->func = b->func; - - const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type->type; - - assert(glsl_get_function_return_type(func_type) == result_type); - - nir_function *func = - nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); - - func->num_params = glsl_get_length(func_type); - func->params = ralloc_array(b->shader, nir_parameter, func->num_params); - for (unsigned i = 0; i < func->num_params; i++) { - const struct glsl_function_param *param = - glsl_get_function_param(func_type, i); - func->params[i].type = param->type; - if (param->in) { - if (param->out) { - func->params[i].param_type = nir_parameter_inout; - } else { - func->params[i].param_type = nir_parameter_in; - } - } else { - if (param->out) { - func->params[i].param_type = nir_parameter_out; - } else { - assert(!"Parameter is neither in nor out"); - } - } - } - - func->return_type = glsl_get_function_return_type(func_type); - - b->func->impl = nir_function_impl_create(func); - if (!glsl_type_is_void(func->return_type)) { - b->func->impl->return_var = - nir_local_variable_create(b->func->impl, func->return_type, "ret"); - } - - b->func_param_idx = 0; - break; - } - - case SpvOpFunctionEnd: - b->func->end = w; - b->func = NULL; - break; - - case SpvOpFunctionParameter: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - - assert(b->func_param_idx < b->func->impl->num_params); - unsigned idx = b->func_param_idx++; - - nir_variable *param = - nir_local_variable_create(b->func->impl, - b->func->impl->function->params[idx].type, - val->name); - - b->func->impl->params[idx] = param; - val->deref = nir_deref_var_create(b, param); - val->deref_type = vtn_value(b, w[1], vtn_value_type_type)->type; - break; - } - - case SpvOpLabel: { - assert(b->block == NULL); - b->block = rzalloc(b, struct vtn_block); - b->block->label = w; - vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; - - if (b->func->start_block == NULL) { - /* This is the first block encountered for this function. In this - * case, we set the start block and add it to the list of - * implemented functions that we'll walk later. - */ - b->func->start_block = b->block; - exec_list_push_tail(&b->functions, &b->func->node); - } - break; - } - - case SpvOpBranch: - case SpvOpBranchConditional: - case SpvOpSwitch: - case SpvOpKill: - case SpvOpReturn: - case SpvOpReturnValue: - case SpvOpUnreachable: - assert(b->block); - b->block->branch = w; - b->block = NULL; - break; - - case SpvOpSelectionMerge: - case SpvOpLoopMerge: - assert(b->block && b->block->merge_op == SpvOpNop); - b->block->merge_op = opcode; - b->block->merge_block_id = w[1]; - break; - - default: - /* Continue on as per normal */ - return true; - } - - return true; -} - -static bool -vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpLabel: { - struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; - assert(block->block == NULL); - - block->block = nir_cursor_current_block(b->nb.cursor); - break; - } - - case SpvOpLoopMerge: - case SpvOpSelectionMerge: - /* This is handled by cfg pre-pass and walk_blocks */ - break; - - case SpvOpUndef: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - break; - } - - case SpvOpExtInst: - vtn_handle_extension(b, opcode, w, count); - break; - - case SpvOpVariable: - case SpvOpLoad: - case SpvOpStore: - case SpvOpCopyMemory: - case SpvOpCopyMemorySized: - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: - case SpvOpArrayLength: - vtn_handle_variables(b, opcode, w, count); - break; - - case SpvOpFunctionCall: - vtn_handle_function_call(b, opcode, w, count); - break; - - case SpvOpSampledImage: - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - case SpvOpImageFetch: - case SpvOpImageGather: - case SpvOpImageDrefGather: - case SpvOpImageQuerySizeLod: - case SpvOpImageQuerySize: - case SpvOpImageQueryLod: - case SpvOpImageQueryLevels: - case SpvOpImageQuerySamples: - vtn_handle_texture(b, opcode, w, count); - break; - - case SpvOpImageRead: - case SpvOpImageWrite: - case SpvOpImageTexelPointer: - vtn_handle_image(b, opcode, w, count); - break; - - case SpvOpAtomicExchange: - case SpvOpAtomicCompareExchange: - case SpvOpAtomicCompareExchangeWeak: - case SpvOpAtomicIIncrement: - case SpvOpAtomicIDecrement: - case SpvOpAtomicIAdd: - case SpvOpAtomicISub: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: { - struct vtn_value *pointer = vtn_untyped_value(b, w[3]); - if (pointer->value_type == vtn_value_type_image_pointer) { - vtn_handle_image(b, opcode, w, count); - } else { - assert(!"Atomic buffers not yet implemented"); - } - } - - case SpvOpSNegate: - case SpvOpFNegate: - case SpvOpNot: - case SpvOpAny: - case SpvOpAll: - case SpvOpConvertFToU: - case SpvOpConvertFToS: - case SpvOpConvertSToF: - case SpvOpConvertUToF: - case SpvOpUConvert: - case SpvOpSConvert: - case SpvOpFConvert: - case SpvOpConvertPtrToU: - case SpvOpConvertUToPtr: - case SpvOpPtrCastToGeneric: - case SpvOpGenericCastToPtr: - case SpvOpBitcast: - case SpvOpIsNan: - case SpvOpIsInf: - case SpvOpIsFinite: - case SpvOpIsNormal: - case SpvOpSignBitSet: - case SpvOpLessOrGreater: - case SpvOpOrdered: - case SpvOpUnordered: - case SpvOpIAdd: - case SpvOpFAdd: - case SpvOpISub: - case SpvOpFSub: - case SpvOpIMul: - case SpvOpFMul: - case SpvOpUDiv: - case SpvOpSDiv: - case SpvOpFDiv: - case SpvOpUMod: - case SpvOpSRem: - case SpvOpSMod: - case SpvOpFRem: - case SpvOpFMod: - case SpvOpVectorTimesScalar: - case SpvOpDot: - case SpvOpShiftRightLogical: - case SpvOpShiftRightArithmetic: - case SpvOpShiftLeftLogical: - case SpvOpLogicalEqual: - case SpvOpLogicalNotEqual: - case SpvOpLogicalOr: - case SpvOpLogicalAnd: - case SpvOpLogicalNot: - case SpvOpBitwiseOr: - case SpvOpBitwiseXor: - case SpvOpBitwiseAnd: - case SpvOpSelect: - case SpvOpIEqual: - case SpvOpFOrdEqual: - case SpvOpFUnordEqual: - case SpvOpINotEqual: - case SpvOpFOrdNotEqual: - case SpvOpFUnordNotEqual: - case SpvOpULessThan: - case SpvOpSLessThan: - case SpvOpFOrdLessThan: - case SpvOpFUnordLessThan: - case SpvOpUGreaterThan: - case SpvOpSGreaterThan: - case SpvOpFOrdGreaterThan: - case SpvOpFUnordGreaterThan: - case SpvOpULessThanEqual: - case SpvOpSLessThanEqual: - case SpvOpFOrdLessThanEqual: - case SpvOpFUnordLessThanEqual: - case SpvOpUGreaterThanEqual: - case SpvOpSGreaterThanEqual: - case SpvOpFOrdGreaterThanEqual: - case SpvOpFUnordGreaterThanEqual: - case SpvOpDPdx: - case SpvOpDPdy: - case SpvOpFwidth: - case SpvOpDPdxFine: - case SpvOpDPdyFine: - case SpvOpFwidthFine: - case SpvOpDPdxCoarse: - case SpvOpDPdyCoarse: - case SpvOpFwidthCoarse: - vtn_handle_alu(b, opcode, w, count); - break; - - case SpvOpTranspose: - case SpvOpOuterProduct: - case SpvOpMatrixTimesScalar: - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - vtn_handle_matrix_alu(b, opcode, w, count); - break; - - case SpvOpVectorExtractDynamic: - case SpvOpVectorInsertDynamic: - case SpvOpVectorShuffle: - case SpvOpCompositeConstruct: - case SpvOpCompositeExtract: - case SpvOpCompositeInsert: - case SpvOpCopyObject: - vtn_handle_composite(b, opcode, w, count); - break; - - case SpvOpPhi: - vtn_handle_phi_first_pass(b, w); - break; - - case SpvOpEmitVertex: - case SpvOpEndPrimitive: - case SpvOpEmitStreamVertex: - case SpvOpEndStreamPrimitive: - case SpvOpControlBarrier: - case SpvOpMemoryBarrier: - vtn_handle_barrier(b, opcode, w, count); - break; - - default: - unreachable("Unhandled opcode"); - } - - return true; -} - -static void -vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, - struct vtn_block *break_block, struct vtn_block *cont_block, - struct vtn_block *end_block) -{ - struct vtn_block *block = start; - while (block != end_block) { - if (block->merge_op == SpvOpLoopMerge) { - /* This is the jump into a loop. */ - struct vtn_block *new_cont_block = block; - struct vtn_block *new_break_block = - vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; - - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert(b->nb.cursor, &loop->cf_node); - - /* Reset the merge_op to prerevent infinite recursion */ - block->merge_op = SpvOpNop; - - b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); - - b->nb.cursor = nir_after_cf_node(&loop->cf_node); - block = new_break_block; - continue; - } - - const uint32_t *w = block->branch; - SpvOp branch_op = w[0] & SpvOpCodeMask; - - b->block = block; - vtn_foreach_instruction(b, block->label, block->branch, - vtn_handle_body_instruction); - - nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); - assert(cur_block == block->block); - _mesa_hash_table_insert(b->block_table, cur_block, block); - - switch (branch_op) { - case SpvOpBranch: { - struct vtn_block *branch_block = - vtn_value(b, w[1], vtn_value_type_block)->block; - - if (branch_block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_builder_instr_insert(&b->nb, &jump->instr); - - return; - } else if (branch_block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_builder_instr_insert(&b->nb, &jump->instr); - - return; - } else if (branch_block == end_block) { - /* We're branching to the merge block of an if, since for loops - * and functions end_block == NULL, so we're done here. - */ - return; - } else { - /* We're branching to another block, and according to the rules, - * we can only branch to another block with one predecessor (so - * we're the only one jumping to it) so we can just process it - * next. - */ - block = branch_block; - continue; - } - } - - case SpvOpBranchConditional: { - /* Gather up the branch blocks */ - struct vtn_block *then_block = - vtn_value(b, w[2], vtn_value_type_block)->block; - struct vtn_block *else_block = - vtn_value(b, w[3], vtn_value_type_block)->block; - - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); - nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); - - if (then_block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_instr_insert_after_cf_list(&if_stmt->then_list, - &jump->instr); - block = else_block; - } else if (else_block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_instr_insert_after_cf_list(&if_stmt->else_list, - &jump->instr); - block = then_block; - } else if (then_block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_instr_insert_after_cf_list(&if_stmt->then_list, - &jump->instr); - block = else_block; - } else if (else_block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_instr_insert_after_cf_list(&if_stmt->else_list, - &jump->instr); - block = then_block; - } else { - /* According to the rules we're branching to two blocks that don't - * have any other predecessors, so we can handle this as a - * conventional if. - */ - assert(block->merge_op == SpvOpSelectionMerge); - struct vtn_block *merge_block = - vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); - - b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); - vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); - - b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); - block = merge_block; - continue; - } - - /* If we got here then we inserted a predicated break or continue - * above and we need to handle the other case. We already set - * `block` above to indicate what block to visit after the - * predicated break. - */ - - /* It's possible that the other branch is also a break/continue. - * If it is, we handle that here. - */ - if (block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_builder_instr_insert(&b->nb, &jump->instr); - - return; - } else if (block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_builder_instr_insert(&b->nb, &jump->instr); - - return; - } - - /* If we got here then there was a predicated break/continue but - * the other half of the if has stuff in it. `block` was already - * set above so there is nothing left for us to do. - */ - continue; - } - - case SpvOpReturn: { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_return); - nir_builder_instr_insert(&b->nb, &jump->instr); - return; - } - - case SpvOpReturnValue: { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[1]); - vtn_variable_store(b, src, - nir_deref_var_create(b, b->impl->return_var), - NULL); - - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_return); - nir_builder_instr_insert(&b->nb, &jump->instr); - return; - } - - case SpvOpKill: { - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); - nir_builder_instr_insert(&b->nb, &discard->instr); - return; - } - - case SpvOpSwitch: - case SpvOpUnreachable: - default: - unreachable("Unhandled opcode"); - } - } -} - -nir_shader * -spirv_to_nir(const uint32_t *words, size_t word_count, - gl_shader_stage stage, - const nir_shader_compiler_options *options) -{ - const uint32_t *word_end = words + word_count; - - /* Handle the SPIR-V header (first 4 dwords) */ - assert(word_count > 5); - - assert(words[0] == SpvMagicNumber); - assert(words[1] >= 0x10000); - /* words[2] == generator magic */ - unsigned value_id_bound = words[3]; - assert(words[4] == 0); - - words+= 5; - - nir_shader *shader = nir_shader_create(NULL, stage, options); - - /* Initialize the stn_builder object */ - struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); - b->shader = shader; - b->value_id_bound = value_id_bound; - b->values = rzalloc_array(b, struct vtn_value, value_id_bound); - exec_list_make_empty(&b->functions); - - /* XXX: We shouldn't need these defaults */ - if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader->info.gs.vertices_in = 3; - b->shader->info.gs.output_primitive = 4; /* GL_TRIANGLES */ - } - - /* Handle all the preamble instructions */ - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_preamble_instruction); - - /* Do a very quick CFG analysis pass */ - vtn_foreach_instruction(b, words, word_end, - vtn_handle_first_cfg_pass_instruction); - - foreach_list_typed(struct vtn_function, func, node, &b->functions) { - b->impl = func->impl; - b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); - b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); - nir_builder_init(&b->nb, b->impl); - b->nb.cursor = nir_after_cf_list(&b->impl->body); - vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); - vtn_foreach_instruction(b, func->start_block->label, func->end, - vtn_handle_phi_second_pass); - } - - /* Because we can still have output reads in NIR, we need to lower - * outputs to temporaries before we are truely finished. - */ - nir_lower_outputs_to_temporaries(shader); - - ralloc_free(b); - - return shader; -} diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h deleted file mode 100644 index 6b53fa3bfba..00000000000 --- a/src/glsl/nir/spirv_to_nir_private.h +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" -#include "nir_spirv.h" -#include "nir_builder.h" -#include "spirv.h" - -struct vtn_builder; -struct vtn_decoration; - -enum vtn_value_type { - vtn_value_type_invalid = 0, - vtn_value_type_undef, - vtn_value_type_string, - vtn_value_type_decoration_group, - vtn_value_type_type, - vtn_value_type_constant, - vtn_value_type_deref, - vtn_value_type_function, - vtn_value_type_block, - vtn_value_type_ssa, - vtn_value_type_extension, - vtn_value_type_image_pointer, - vtn_value_type_sampled_image, -}; - -struct vtn_block { - /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */ - SpvOp merge_op; - uint32_t merge_block_id; - const uint32_t *label; - const uint32_t *branch; - nir_block *block; -}; - -struct vtn_function { - struct exec_node node; - - nir_function_impl *impl; - struct vtn_block *start_block; - - const uint32_t *end; -}; - -typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, - const uint32_t *, unsigned); - -struct vtn_ssa_value { - union { - nir_ssa_def *def; - struct vtn_ssa_value **elems; - }; - - /* For matrices, if this is non-NULL, then this value is actually the - * transpose of some other value. The value that `transposed` points to - * always dominates this value. - */ - struct vtn_ssa_value *transposed; - - const struct glsl_type *type; -}; - -struct vtn_type { - const struct glsl_type *type; - - /* for matrices, whether the matrix is stored row-major */ - bool row_major; - - /* for structs, the offset of each member */ - unsigned *offsets; - - /* for structs, whether it was decorated as a "non-SSBO-like" block */ - bool block; - - /* for structs, whether it was decorated as an "SSBO-like" block */ - bool buffer_block; - - /* for structs with block == true, whether this is a builtin block (i.e. a - * block that contains only builtins). - */ - bool builtin_block; - - /* Image format for image_load_store type images */ - unsigned image_format; - - /* for arrays and matrices, the array stride */ - unsigned stride; - - /* for arrays, the vtn_type for the elements of the array */ - struct vtn_type *array_element; - - /* for structures, the vtn_type for each member */ - struct vtn_type **members; - - /* Whether this type, or a parent type, has been decorated as a builtin */ - bool is_builtin; - - SpvBuiltIn builtin; -}; - -struct vtn_image_pointer { - nir_deref_var *deref; - nir_ssa_def *coord; - nir_ssa_def *sample; -}; - -struct vtn_sampled_image { - nir_deref_var *image; /* Image or array of images */ - nir_deref_var *sampler; /* Sampler */ -}; - -struct vtn_value { - enum vtn_value_type value_type; - const char *name; - struct vtn_decoration *decoration; - union { - void *ptr; - char *str; - struct vtn_type *type; - struct { - nir_constant *constant; - const struct glsl_type *const_type; - }; - struct { - nir_deref_var *deref; - struct vtn_type *deref_type; - }; - struct vtn_image_pointer *image; - struct vtn_sampled_image *sampled_image; - struct vtn_function *func; - struct vtn_block *block; - struct vtn_ssa_value *ssa; - vtn_instruction_handler ext_handler; - }; -}; - -struct vtn_decoration { - struct vtn_decoration *next; - int member; /* -1 if not a member decoration */ - const uint32_t *literals; - struct vtn_value *group; - SpvDecoration decoration; -}; - -struct vtn_builder { - nir_builder nb; - - nir_shader *shader; - nir_function_impl *impl; - struct vtn_block *block; - - /* - * In SPIR-V, constants are global, whereas in NIR, the load_const - * instruction we use is per-function. So while we parse each function, we - * keep a hash table of constants we've resolved to nir_ssa_value's so - * far, and we lazily resolve them when we see them used in a function. - */ - struct hash_table *const_table; - - /* - * Map from nir_block to the vtn_block which ends with it -- used for - * handling phi nodes. - */ - struct hash_table *block_table; - - /* - * NIR variable for each SPIR-V builtin. - */ - struct { - nir_variable *in; - nir_variable *out; - } builtins[42]; /* XXX need symbolic constant from SPIR-V header */ - - unsigned value_id_bound; - struct vtn_value *values; - - SpvExecutionModel execution_model; - bool origin_upper_left; - struct vtn_value *entry_point; - - struct vtn_function *func; - struct exec_list functions; - - /* Current function parameter index */ - unsigned func_param_idx; -}; - -static inline struct vtn_value * -vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == vtn_value_type_invalid); - - b->values[value_id].value_type = value_type; - - return &b->values[value_id]; -} - -static inline struct vtn_value * -vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) -{ - assert(value_id < b->value_id_bound); - return &b->values[value_id]; -} - -static inline struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - assert(val->value_type == value_type); - return val; -} - -struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); - -typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - int member, - const struct vtn_decoration *, - void *); - -void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data); - -bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *words, unsigned count); diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 8da5da17e53..a6d62a3f49e 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -30,7 +30,7 @@ #include "anv_private.h" #include "brw_nir.h" #include "anv_nir.h" -#include "glsl/nir/nir_spirv.h" +#include "glsl/nir/spirv/nir_spirv.h" /* Needed for SWIZZLE macros */ #include "program/prog_instruction.h" -- cgit v1.2.3 From 763176a3e2166b2f1757582948e985d6bca9531c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 13:22:09 -0800 Subject: nir/lower_returns: Fix a bug in loop lowering --- src/glsl/nir/nir_lower_returns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_returns.c b/src/glsl/nir/nir_lower_returns.c index 178e454d737..dcdd14e2026 100644 --- a/src/glsl/nir/nir_lower_returns.c +++ b/src/glsl/nir/nir_lower_returns.c @@ -160,7 +160,7 @@ lower_returns_in_block(nir_block *block, struct lower_returns_state *state) if (state->loop) { /* We're in a loop. Make the return a break. */ - jump->type = nir_jump_return; + jump->type = nir_jump_break; } else { /* Not in a loop. Just delete the return; we'll deal with * predicating later. -- cgit v1.2.3 From 9c84b6cce007e6ad7877c4787346b3a45ab1b0c7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 13:26:49 -0800 Subject: anv/device: Set device->info sooner in CreateDevice anv_block_pool_init calls anv_block_pool_grow which checks device->info.has_llc to see if it needs to set caching parameters. If we don't set device->info early enough, this reads an undefined value which is probably 0 and not what we want on llc platforms. Found with valgrind. --- src/vulkan/anv_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0043bea5d13..90a0061dbf8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -706,6 +706,9 @@ VkResult anv_CreateDevice( if (device->context_id == -1) goto fail_fd; + device->info = *physical_device->info; + device->isl_dev = physical_device->isl_dev; + pthread_mutex_init(&device->mutex, NULL); anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); @@ -725,9 +728,6 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); - device->info = *physical_device->info; - device->isl_dev = physical_device->isl_dev; - anv_queue_init(device, &device->queue); anv_device_init_meta(device); -- cgit v1.2.3 From b005fd62f9d90cc145a80a130f7b32b8c7beaa71 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 15:46:20 -0800 Subject: nir/spirv: Add GLSL.std.450.h It accidentally got removed during the mass rename. --- src/glsl/nir/spirv/GLSL.std.450.h | 127 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 src/glsl/nir/spirv/GLSL.std.450.h (limited to 'src') diff --git a/src/glsl/nir/spirv/GLSL.std.450.h b/src/glsl/nir/spirv/GLSL.std.450.h new file mode 100644 index 00000000000..d1c9b5c1d44 --- /dev/null +++ b/src/glsl/nir/spirv/GLSL.std.450.h @@ -0,0 +1,127 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +const int GLSLstd450Version = 99; +const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H -- cgit v1.2.3 From 3eb108ef871f1bd4c909ce627ce76e5226e8d035 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 23:32:55 -0800 Subject: anv/meta: Fix the pos_out location for the vertex shader --- src/vulkan/anv_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index cf6678d852f..c1d600a3ed8 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -48,7 +48,7 @@ build_nir_vertex_shader(bool attr_flat) pos_in->data.location = VERT_ATTRIB_GENERIC0; nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vertex_type, "gl_Position"); - pos_in->data.location = VARYING_SLOT_POS; + pos_out->data.location = VARYING_SLOT_POS; nir_copy_var(&b, pos_out, pos_in); /* Add one more pass-through attribute. For clear shaders, this is used -- cgit v1.2.3 From b090f9dce12747648e81986d7fb5987233d309dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 23 Dec 2015 23:33:18 -0800 Subject: gen8/pipeline: Actually use inputs_read from the VS for laying out inputs --- src/vulkan/gen7_pipeline.c | 2 ++ src/vulkan/gen8_pipeline.c | 45 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 63aca1f6b32..1ab6765a000 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -43,6 +43,8 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, const uint32_t num_dwords = 1 + element_count * 2; uint32_t *p; + anv_finishme("gen7 vertex input needs to use inputs_read"); + if (info->vertexAttributeDescriptionCount == 0 && !sgvs) return; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 3c3f079b408..9b3f38968b7 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -34,16 +34,35 @@ static void emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info) + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) { - const uint32_t num_dwords = 1 + info->vertexAttributeDescriptionCount * 2; - uint32_t *p; static_assert(ANV_GEN >= 8, "should be compiling this for gen < 8"); - if (info->vertexAttributeDescriptionCount > 0) { + uint32_t vb_used; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + vb_used = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + vb_used |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + vb_used = inputs_read >> VERT_ATTRIB_GENERIC0; + } + + const uint32_t num_dwords = 1 + __builtin_popcount(vb_used) * 2; + + uint32_t *p; + if (vb_used != 0) { p = anv_batch_emitn(&pipeline->batch, num_dwords, GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); } for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { @@ -51,6 +70,13 @@ emit_vertex_input(struct anv_pipeline *pipeline, &info->pVertexAttributeDescriptions[i]; const struct anv_format *format = anv_format_for_vk_format(desc->format); + assert(desc->binding < 32); + + if ((vb_used & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(vb_used & ((1 << desc->location) - 1)); + struct GENX(VERTEX_ELEMENT_STATE) element = { .VertexBufferIndex = desc->binding, .Valid = true, @@ -62,23 +88,24 @@ emit_vertex_input(struct anv_pipeline *pipeline, .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP }; - GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element); + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), .InstancingEnable = pipeline->instancing_enable[desc->binding], - .VertexElementIndex = i, + .VertexElementIndex = slot, /* Vulkan so far doesn't have an instance divisor, so * this is always 1 (ignored if not instancing). */ .InstanceDataStepRate = 1); } + const uint32_t id_slot = __builtin_popcount(vb_used); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, .VertexIDComponentNumber = 2, - .VertexIDElementOffset = info->vertexBindingDescriptionCount, + .VertexIDElementOffset = id_slot, .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = info->vertexBindingDescriptionCount); + .InstanceIDElementOffset = id_slot); } static void @@ -354,7 +381,7 @@ genX(graphics_pipeline_create)( return result; assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); assert(pCreateInfo->pInputAssemblyState); emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); assert(pCreateInfo->pRasterizationState); -- cgit v1.2.3 From 5fab35d0903565d8c5862870a920febb60417497 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Dec 2015 01:10:51 -0800 Subject: gen7/pipeline: Actually use inputs_read from the VS for laying out inputs --- src/vulkan/gen7_pipeline.c | 58 +++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 1ab6765a000..837684d4858 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -33,29 +33,54 @@ #include "gen75_pack.h" static void -gen7_emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info) +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) { - const bool sgvs = pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid; - const uint32_t element_count = - info->vertexAttributeDescriptionCount + (sgvs ? 1 : 0); - const uint32_t num_dwords = 1 + element_count * 2; - uint32_t *p; - anv_finishme("gen7 vertex input needs to use inputs_read"); + uint32_t vb_used; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + vb_used = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + vb_used |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + vb_used = inputs_read >> VERT_ATTRIB_GENERIC0; + } + + uint32_t vb_count = __builtin_popcount(vb_used); + + if (pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid) + vb_count++; - if (info->vertexAttributeDescriptionCount == 0 && !sgvs) + if (vb_count == 0) return; - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN7_3DSTATE_VERTEX_ELEMENTS); + const uint32_t num_dwords = 1 + vb_count * 2; + + uint32_t *p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN7_3DSTATE_VERTEX_ELEMENTS); + memset(p + 1, 0, (num_dwords - 1) * 4); for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; const struct anv_format *format = anv_format_for_vk_format(desc->format); + assert(desc->binding < 32); + + if ((vb_used & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(vb_used & ((1 << desc->location) - 1)); + struct GEN7_VERTEX_ELEMENT_STATE element = { .VertexBufferIndex = desc->binding, .Valid = true, @@ -67,10 +92,11 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP }; - GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + slot * 2], &element); } - if (sgvs) { + if (pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid) { struct GEN7_VERTEX_ELEMENT_STATE element = { .Valid = true, /* FIXME: Do we need to provide the base vertex as component 0 here @@ -80,7 +106,7 @@ gen7_emit_vertex_input(struct anv_pipeline *pipeline, .Component2Control = VFCOMP_STORE_VID, .Component3Control = VFCOMP_STORE_IID }; - GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + info->vertexAttributeDescriptionCount * 2], &element); + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + (vb_count - 1) * 2], &element); } } @@ -347,7 +373,7 @@ genX(graphics_pipeline_create)( } assert(pCreateInfo->pVertexInputState); - gen7_emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); assert(pCreateInfo->pRasterizationState); gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); -- cgit v1.2.3 From a00524a216fc5b22d4ec67583b5e9a352f700321 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 20 Dec 2015 22:58:38 -0800 Subject: vk: Unstub VkSemaphore implementation There really is nothing to do for us here, at least with the current kernel interface. --- src/vulkan/anv_device.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 90a0061dbf8..88515c353ee 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1429,8 +1429,13 @@ VkResult anv_CreateSemaphore( const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore) { + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ + *pSemaphore = (VkSemaphore)1; - stub_return(VK_SUCCESS); + + return VK_SUCCESS; } void anv_DestroySemaphore( @@ -1438,7 +1443,6 @@ void anv_DestroySemaphore( VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator) { - stub(); } // Event functions -- cgit v1.2.3 From fc03723bcd9ff88f17673c6a8e2b40d601f00287 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 21 Dec 2015 00:03:28 -0800 Subject: vk: Fill out buffer surface state when updating descriptor set We can do this when we update the descriptor set instead of on the fly. --- src/vulkan/anv_cmd_buffer.c | 57 ++++++++++---------------------- src/vulkan/anv_descriptor_set.c | 73 +++++++++++++++++++++++++++++++++-------- src/vulkan/anv_private.h | 16 +++++---- 3 files changed, 87 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 9b54dee96bc..1eaa3df633c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -536,8 +536,12 @@ void anv_CmdBindDescriptorSets( unsigned array_size = set_layout->binding[b].array_size; for (unsigned j = 0; j < array_size; j++) { + uint32_t range = 0; + if (desc->buffer_view) + range = desc->buffer_view; push->dynamic[d].offset = *(offsets++); - push->dynamic[d].range = (desc++)->range; + push->dynamic[d].range = range; + desc++; d++; } } @@ -582,31 +586,21 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, state.offset + dword * 4, bo, offset); } -static void -fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, - gl_shader_stage stage, - VkDescriptorType type, - uint32_t offset, uint32_t range) +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type) { - VkFormat format; switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - format = VK_FORMAT_R32G32B32A32_SFLOAT; - break; + return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - format = VK_FORMAT_UNDEFINED; - break; + return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); default: unreachable("Invalid descriptor type"); } - - anv_fill_buffer_surface_state(device, state, - anv_format_for_vk_format(format)->surface_format, - offset, range, 1); } VkResult @@ -671,10 +665,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, surface_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer); - fill_descriptor_buffer_surface_state(cmd_buffer->device, - surface_state.map, stage, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - bo_offset, 12); + const struct anv_format *format = + anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map, + format->surface_format, bo_offset, 12, 1); if (!cmd_buffer->device->info.has_llc) anv_state_clflush(surface_state); @@ -712,27 +706,6 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, /* Nothing for us to do here */ continue; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { - bo = desc->buffer->bo; - bo_offset = desc->buffer->offset + desc->offset; - - surface_state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer); - - fill_descriptor_buffer_surface_state(cmd_buffer->device, - surface_state.map, - stage, desc->type, - bo_offset, desc->range); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(surface_state); - - break; - } - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: @@ -755,6 +728,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, break; } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: surface_state = desc->buffer_view->surface_state; bo = desc->buffer_view->bo; diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index df53edd902c..6d38d114f96 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -82,6 +82,7 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t buffer_count = 0; uint32_t dynamic_offset_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { @@ -106,15 +107,19 @@ VkResult anv_CreateDescriptorSetLayout( } switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].buffer_index = buffer_count; + buffer_count += binding->descriptorCount; + /* fall through */ + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: anv_foreach_stage(s, binding->stageFlags) { set_layout->binding[b].stage[s].surface_index = surface_count[s]; @@ -161,6 +166,7 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->shader_stages |= binding->stageFlags; } + set_layout->buffer_count = buffer_count; set_layout->dynamic_offset_count = dynamic_offset_count; *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); @@ -371,6 +377,21 @@ anv_descriptor_set_create(struct anv_device *device, desc += layout->binding[b].array_size; } + /* XXX: Use the pool */ + set->buffer_views = + anv_alloc(&device->alloc, + sizeof(set->buffer_views[0]) * layout->buffer_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set->buffer_views) { + anv_free(&device->alloc, set); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t b = 0; b < layout->buffer_count; b++) { + set->buffer_views[b].surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + *out_set = set; return VK_SUCCESS; @@ -380,7 +401,13 @@ void anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_set *set) { - anv_free(&device->alloc /* XXX: Use the pool */, set); + /* XXX: Use the pool */ + for (uint32_t b = 0; b < set->layout->buffer_count; b++) + anv_state_pool_free(&device->surface_state_pool, + set->buffer_views[b].surface_state); + + anv_free(&device->alloc, set->buffer_views); + anv_free(&device->alloc, set); } VkResult anv_AllocateDescriptorSets( @@ -430,12 +457,14 @@ VkResult anv_FreeDescriptorSets( } void anv_UpdateDescriptorSets( - VkDevice device, + VkDevice _device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies) { + ANV_FROM_HANDLE(anv_device, device, _device); + for (uint32_t i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); @@ -514,19 +543,37 @@ void anv_UpdateDescriptorSets( ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); assert(buffer); - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .buffer = buffer, - .offset = write->pBufferInfo[j].offset, - .range = write->pBufferInfo[j].range, - }; + struct anv_buffer_view *view = + &set->buffer_views[bind_layout->descriptor_index + j]; + + const struct anv_format *format = + anv_format_for_descriptor_type(write->descriptorType); + + view->format = format->surface_format; + view->bo = buffer->bo; + view->offset = buffer->offset + write->pBufferInfo[j].offset; /* For buffers with dynamic offsets, we use the full possible * range in the surface state and do the actual range-checking * in the shader. */ if (bind_layout->dynamic_offset_index >= 0) - desc[j].range = buffer->size - desc[j].offset; + view->range = buffer->size - write->pBufferInfo[j].offset; + else + view->range = write->pBufferInfo[j].range; + + anv_fill_buffer_surface_state(device, view->surface_state.map, + view->format, + view->offset, view->range, 1); + + if (!device->info.has_llc) + anv_state_clflush(view->surface_state); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = view, + }; + } default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6bd18952492..48992d99a71 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -808,6 +808,9 @@ struct anv_descriptor_set_binding_layout { /* Index into the dynamic state array for a dynamic buffer */ int16_t dynamic_offset_index; + /* Index into the descriptor set buffer views */ + int16_t buffer_index; + struct { /* Index into the binding table for the associated surface */ int16_t surface_index; @@ -833,6 +836,9 @@ struct anv_descriptor_set_layout { /* Shader stages affected by this descriptor set */ uint16_t shader_stages; + /* Number of buffers in this descriptor set */ + uint16_t buffer_count; + /* Number of dynamic offsets used by this descriptor set */ uint16_t dynamic_offset_count; @@ -852,17 +858,12 @@ struct anv_descriptor { }; struct anv_buffer_view *buffer_view; - - struct { - struct anv_buffer *buffer; - uint64_t offset; - uint64_t range; - }; }; }; struct anv_descriptor_set { const struct anv_descriptor_set_layout *layout; + struct anv_buffer_view *buffer_views; struct anv_descriptor descriptors[0]; }; @@ -1538,6 +1539,9 @@ struct anv_buffer_view { struct anv_state storage_surface_state; }; +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type); + void anv_fill_buffer_surface_state(struct anv_device *device, void *state, enum isl_format format, uint32_t offset, uint32_t range, -- cgit v1.2.3 From bbf99511d055c84edd16904950881f6ebf668edc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 09:39:19 -0800 Subject: gen7/8/pipeline: s/vb_used/elements in emit_vertex_input --- src/vulkan/gen7_pipeline.c | 15 +++++++-------- src/vulkan/gen8_pipeline.c | 19 +++++++++---------- 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 837684d4858..0ac93b9055c 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -37,24 +37,23 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - - uint32_t vb_used; + uint32_t elements; if (extra && extra->disable_vs) { /* If the VS is disabled, just assume the user knows what they're * doing and apply the layout blindly. This can only come from * meta, so this *should* be safe. */ - vb_used = 0; + elements = 0; for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) - vb_used |= (1 << info->pVertexAttributeDescriptions[i].location); + elements |= (1 << info->pVertexAttributeDescriptions[i].location); } else { /* Pull inputs_read out of the VS prog data */ uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); - vb_used = inputs_read >> VERT_ATTRIB_GENERIC0; + elements = inputs_read >> VERT_ATTRIB_GENERIC0; } - uint32_t vb_count = __builtin_popcount(vb_used); + uint32_t vb_count = __builtin_popcount(elements); if (pipeline->vs_prog_data.uses_vertexid || pipeline->vs_prog_data.uses_instanceid) @@ -76,10 +75,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, assert(desc->binding < 32); - if ((vb_used & (1 << desc->location)) == 0) + if ((elements & (1 << desc->location)) == 0) continue; /* Binding unused */ - uint32_t slot = __builtin_popcount(vb_used & ((1 << desc->location) - 1)); + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); struct GEN7_VERTEX_ELEMENT_STATE element = { .VertexBufferIndex = desc->binding, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 9b3f38968b7..20cd9028093 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -37,29 +37,28 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - static_assert(ANV_GEN >= 8, "should be compiling this for gen < 8"); - uint32_t vb_used; + uint32_t elements; if (extra && extra->disable_vs) { /* If the VS is disabled, just assume the user knows what they're * doing and apply the layout blindly. This can only come from * meta, so this *should* be safe. */ - vb_used = 0; + elements = 0; for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) - vb_used |= (1 << info->pVertexAttributeDescriptions[i].location); + elements |= (1 << info->pVertexAttributeDescriptions[i].location); } else { /* Pull inputs_read out of the VS prog data */ uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); - vb_used = inputs_read >> VERT_ATTRIB_GENERIC0; + elements = inputs_read >> VERT_ATTRIB_GENERIC0; } - const uint32_t num_dwords = 1 + __builtin_popcount(vb_used) * 2; + const uint32_t num_dwords = 1 + __builtin_popcount(elements) * 2; uint32_t *p; - if (vb_used != 0) { + if (elements != 0) { p = anv_batch_emitn(&pipeline->batch, num_dwords, GENX(3DSTATE_VERTEX_ELEMENTS)); memset(p + 1, 0, (num_dwords - 1) * 4); @@ -72,10 +71,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, assert(desc->binding < 32); - if ((vb_used & (1 << desc->location)) == 0) + if ((elements & (1 << desc->location)) == 0) continue; /* Binding unused */ - uint32_t slot = __builtin_popcount(vb_used & ((1 << desc->location) - 1)); + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); struct GENX(VERTEX_ELEMENT_STATE) element = { .VertexBufferIndex = desc->binding, @@ -98,7 +97,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, .InstanceDataStepRate = 1); } - const uint32_t id_slot = __builtin_popcount(vb_used); + const uint32_t id_slot = __builtin_popcount(elements); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, .VertexIDComponentNumber = 2, -- cgit v1.2.3 From 303d095f58e8a4034fd828311cea518bcf23e788 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 18:19:40 -0800 Subject: nir/spirv: Add an actual CFG data structure The current data structure doesn't handle much that we couldn't handle before. However, this will be absolutely crucial for doing swith statements. Also, this should fix structured continues. --- src/glsl/Makefile.sources | 1 + src/glsl/nir/spirv/spirv_to_nir.c | 374 +++++++++----------------------------- src/glsl/nir/spirv/vtn_cfg.c | 312 +++++++++++++++++++++++++++++++ src/glsl/nir/spirv/vtn_private.h | 77 +++++++- 4 files changed, 473 insertions(+), 291 deletions(-) create mode 100644 src/glsl/nir/spirv/vtn_cfg.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index aa87cb1480f..65c493cd677 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -94,6 +94,7 @@ NIR_FILES = \ SPIRV_FILES = \ nir/spirv/nir_spirv.h \ nir/spirv/spirv_to_nir.c \ + nir/spirv/vtn_cfg.c \ nir/spirv/vtn_glsl450.c # libglsl diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 815b447857b..16930c46197 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -181,7 +181,7 @@ vtn_string_literal(struct vtn_builder *b, const uint32_t *words, return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); } -static const uint32_t * +const uint32_t * vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, const uint32_t *end, vtn_instruction_handler handler) { @@ -3359,127 +3359,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, return true; } -static bool -vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpFunction: { - assert(b->func == NULL); - b->func = rzalloc(b, struct vtn_function); - - const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); - val->func = b->func; - - const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type->type; - - assert(glsl_get_function_return_type(func_type) == result_type); - - nir_function *func = - nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); - - func->num_params = glsl_get_length(func_type); - func->params = ralloc_array(b->shader, nir_parameter, func->num_params); - for (unsigned i = 0; i < func->num_params; i++) { - const struct glsl_function_param *param = - glsl_get_function_param(func_type, i); - func->params[i].type = param->type; - if (param->in) { - if (param->out) { - func->params[i].param_type = nir_parameter_inout; - } else { - func->params[i].param_type = nir_parameter_in; - } - } else { - if (param->out) { - func->params[i].param_type = nir_parameter_out; - } else { - assert(!"Parameter is neither in nor out"); - } - } - } - - func->return_type = glsl_get_function_return_type(func_type); - - b->func->impl = nir_function_impl_create(func); - if (!glsl_type_is_void(func->return_type)) { - b->func->impl->return_var = - nir_local_variable_create(b->func->impl, func->return_type, "ret"); - } - - b->func_param_idx = 0; - break; - } - - case SpvOpFunctionEnd: - b->func->end = w; - b->func = NULL; - break; - - case SpvOpFunctionParameter: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - - assert(b->func_param_idx < b->func->impl->num_params); - unsigned idx = b->func_param_idx++; - - nir_variable *param = - nir_local_variable_create(b->func->impl, - b->func->impl->function->params[idx].type, - val->name); - - b->func->impl->params[idx] = param; - val->deref = nir_deref_var_create(b, param); - val->deref_type = vtn_value(b, w[1], vtn_value_type_type)->type; - break; - } - - case SpvOpLabel: { - assert(b->block == NULL); - b->block = rzalloc(b, struct vtn_block); - b->block->label = w; - vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; - - if (b->func->start_block == NULL) { - /* This is the first block encountered for this function. In this - * case, we set the start block and add it to the list of - * implemented functions that we'll walk later. - */ - b->func->start_block = b->block; - exec_list_push_tail(&b->functions, &b->func->node); - } - break; - } - - case SpvOpBranch: - case SpvOpBranchConditional: - case SpvOpSwitch: - case SpvOpKill: - case SpvOpReturn: - case SpvOpReturnValue: - case SpvOpUnreachable: - assert(b->block); - b->block->branch = w; - b->block = NULL; - break; - - case SpvOpSelectionMerge: - case SpvOpLoopMerge: - assert(b->block && b->block->merge_op == SpvOpNop); - b->block->merge_op = opcode; - b->block->merge_block_id = w[1]; - break; - - default: - /* Continue on as per normal */ - return true; - } - - return true; -} - static bool vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -3487,9 +3366,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpLabel: { struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; - assert(block->block == NULL); - - block->block = nir_cursor_current_block(b->nb.cursor); + assert(block->block == nir_cursor_current_block(b->nb.cursor)); break; } @@ -3697,196 +3574,119 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } -static void -vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, - struct vtn_block *break_block, struct vtn_block *cont_block, - struct vtn_block *end_block) -{ - struct vtn_block *block = start; - while (block != end_block) { - if (block->merge_op == SpvOpLoopMerge) { - /* This is the jump into a loop. */ - struct vtn_block *new_cont_block = block; - struct vtn_block *new_break_block = - vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; +static void vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list); - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert(b->nb.cursor, &loop->cf_node); - - /* Reset the merge_op to prerevent infinite recursion */ - block->merge_op = SpvOpNop; - - b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); - - b->nb.cursor = nir_after_cf_node(&loop->cf_node); - block = new_break_block; - continue; - } +static void +vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type) +{ + nir_jump_type jump_type; + switch (branch_type) { + case vtn_branch_type_break: jump_type = nir_jump_break; break; + case vtn_branch_type_continue: jump_type = nir_jump_continue; break; + case vtn_branch_type_return: jump_type = nir_jump_return; break; + default: + unreachable("Invalid branch type"); + } - const uint32_t *w = block->branch; - SpvOp branch_op = w[0] & SpvOpCodeMask; + nir_jump_instr *jump = nir_jump_instr_create(b->shader, jump_type); + nir_builder_instr_insert(&b->nb, &jump->instr); +} - b->block = block; - vtn_foreach_instruction(b, block->label, block->branch, - vtn_handle_body_instruction); +static void +vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list) +{ + list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { + switch (node->type) { + case vtn_cf_node_type_block: { + struct vtn_block *block = (struct vtn_block *)node; - nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); - assert(cur_block == block->block); - _mesa_hash_table_insert(b->block_table, cur_block, block); + block->block = nir_cursor_current_block(b->nb.cursor); + _mesa_hash_table_insert(b->block_table, block->block, block); - switch (branch_op) { - case SpvOpBranch: { - struct vtn_block *branch_block = - vtn_value(b, w[1], vtn_value_type_block)->block; + vtn_foreach_instruction(b, block->label, + block->merge ? block->merge : block->branch, + vtn_handle_body_instruction); - if (branch_block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_builder_instr_insert(&b->nb, &jump->instr); + if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { + struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); + vtn_variable_store(b, src, + nir_deref_var_create(b, b->impl->return_var), + NULL); + } - return; - } else if (branch_block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_builder_instr_insert(&b->nb, &jump->instr); + if (block->branch_type != vtn_branch_type_none) + vtn_emit_branch(b, block->branch_type); - return; - } else if (branch_block == end_block) { - /* We're branching to the merge block of an if, since for loops - * and functions end_block == NULL, so we're done here. - */ - return; - } else { - /* We're branching to another block, and according to the rules, - * we can only branch to another block with one predecessor (so - * we're the only one jumping to it) so we can just process it - * next. - */ - block = branch_block; - continue; - } + break; } - case SpvOpBranchConditional: { - /* Gather up the branch blocks */ - struct vtn_block *then_block = - vtn_value(b, w[2], vtn_value_type_block)->block; - struct vtn_block *else_block = - vtn_value(b, w[3], vtn_value_type_block)->block; + case vtn_cf_node_type_if: { + struct vtn_if *vtn_if = (struct vtn_if *)node; nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + if_stmt->condition = + nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); - if (then_block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_instr_insert_after_cf_list(&if_stmt->then_list, - &jump->instr); - block = else_block; - } else if (else_block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_instr_insert_after_cf_list(&if_stmt->else_list, - &jump->instr); - block = then_block; - } else if (then_block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_instr_insert_after_cf_list(&if_stmt->then_list, - &jump->instr); - block = else_block; - } else if (else_block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_instr_insert_after_cf_list(&if_stmt->else_list, - &jump->instr); - block = then_block; - } else { - /* According to the rules we're branching to two blocks that don't - * have any other predecessors, so we can handle this as a - * conventional if. - */ - assert(block->merge_op == SpvOpSelectionMerge); - struct vtn_block *merge_block = - vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + if (vtn_if->then_type == vtn_branch_type_none) + vtn_emit_cf_list(b, &vtn_if->then_body); + else + vtn_emit_branch(b, vtn_if->then_type); - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + if (vtn_if->else_type == vtn_branch_type_none) + vtn_emit_cf_list(b, &vtn_if->else_body); + else + vtn_emit_branch(b, vtn_if->else_type); - b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); - vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + break; + } - b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); - block = merge_block; - continue; - } + case vtn_cf_node_type_loop: { + struct vtn_loop *vtn_loop = (struct vtn_loop *)node; - /* If we got here then we inserted a predicated break or continue - * above and we need to handle the other case. We already set - * `block` above to indicate what block to visit after the - * predicated break. - */ + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); - /* It's possible that the other branch is also a break/continue. - * If it is, we handle that here. - */ - if (block == break_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_break); - nir_builder_instr_insert(&b->nb, &jump->instr); - - return; - } else if (block == cont_block) { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_continue); - nir_builder_instr_insert(&b->nb, &jump->instr); - - return; - } + if (!list_empty(&vtn_loop->cont_body)) { + /* If we have a non-trivial continue body then we need to put + * it at the beginning of the loop with a flag to ensure that + * it doesn't get executed in the first iteration. + */ + nir_variable *do_cont = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); - /* If we got here then there was a predicated break/continue but - * the other half of the if has stuff in it. `block` was already - * set above so there is nothing left for us to do. - */ - continue; - } + b->nb.cursor = nir_before_cf_node(&loop->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); - case SpvOpReturn: { - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_return); - nir_builder_instr_insert(&b->nb, &jump->instr); - return; - } + b->nb.cursor = nir_after_cf_list(&loop->body); + nir_if *cont_if = nir_if_create(b->shader); + cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); + nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); - case SpvOpReturnValue: { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[1]); - vtn_variable_store(b, src, - nir_deref_var_create(b, b->impl->return_var), - NULL); + b->nb.cursor = nir_after_cf_list(&cont_if->then_list); + vtn_emit_cf_list(b, &vtn_loop->cont_body); - nir_jump_instr *jump = nir_jump_instr_create(b->shader, - nir_jump_return); - nir_builder_instr_insert(&b->nb, &jump->instr); - return; - } + b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); + } - case SpvOpKill: { - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); - nir_builder_instr_insert(&b->nb, &discard->instr); - return; + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + vtn_emit_cf_list(b, &vtn_loop->body); + break; } - case SpvOpSwitch: - case SpvOpUnreachable: + case vtn_cf_node_type_switch: + case vtn_cf_node_type_case: default: - unreachable("Unhandled opcode"); + unreachable("Invalid CF node type"); } } } + nir_shader * spirv_to_nir(const uint32_t *words, size_t word_count, gl_shader_stage stage, @@ -3924,9 +3724,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); - /* Do a very quick CFG analysis pass */ - vtn_foreach_instruction(b, words, word_end, - vtn_handle_first_cfg_pass_instruction); + vtn_build_cfg(b, words, word_end); foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = func->impl; @@ -3936,7 +3734,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); b->nb.cursor = nir_after_cf_list(&b->impl->body); - vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_emit_cf_list(b, &func->body); vtn_foreach_instruction(b, func->start_block->label, func->end, vtn_handle_phi_second_pass); } diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c new file mode 100644 index 00000000000..e6258175c19 --- /dev/null +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -0,0 +1,312 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" + +static bool +vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + list_inithead(&b->func->body); + b->func->control = w[3]; + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + val->func = b->func; + + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + func->num_params = glsl_get_length(func_type); + func->params = ralloc_array(b->shader, nir_parameter, func->num_params); + for (unsigned i = 0; i < func->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + func->params[i].type = param->type; + if (param->in) { + if (param->out) { + func->params[i].param_type = nir_parameter_inout; + } else { + func->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + func->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + func->return_type = glsl_get_function_return_type(func_type); + + b->func->impl = nir_function_impl_create(func); + if (!glsl_type_is_void(func->return_type)) { + b->func->impl->return_var = + nir_local_variable_create(b->func->impl, func->return_type, "ret"); + } + + b->func_param_idx = 0; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + assert(b->func_param_idx < b->func->impl->num_params); + unsigned idx = b->func_param_idx++; + + nir_variable *param = + nir_local_variable_create(b->func->impl, + b->func->impl->function->params[idx].type, + val->name); + + b->func->impl->params[idx] = param; + val->deref = nir_deref_var_create(b, param); + val->deref_type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->node.type = vtn_cf_node_type_block; + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge == NULL); + b->block->merge = w; + break; + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block && b->block->branch == NULL); + b->block->branch = w; + b->block = NULL; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static void +vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, + struct vtn_block *start, struct vtn_block *break_block, + struct vtn_block *cont_block, struct vtn_block *end_block) +{ + struct vtn_block *block = start; + while (block != end_block) { + if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && + !block->loop) { + struct vtn_loop *loop = ralloc(b, struct vtn_loop); + + loop->node.type = vtn_cf_node_type_loop; + list_inithead(&loop->body); + list_inithead(&loop->cont_body); + loop->control = block->merge[3]; + + list_addtail(&loop->node.link, cf_list); + block->loop = loop; + + struct vtn_block *loop_break = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + struct vtn_block *loop_cont = + vtn_value(b, block->merge[2], vtn_value_type_block)->block; + + /* Note: This recursive call will start with the current block as + * its start block. If we weren't careful, we would get here + * again and end up in infinite recursion. This is why we set + * block->loop above and check for it before creating one. This + * way, we only create the loop once and the second call that + * tries to handle this loop goes to the cases below and gets + * handled as a regular block. + */ + vtn_cfg_walk_blocks(b, &loop->body, block, + loop_break, loop_cont, NULL ); + vtn_cfg_walk_blocks(b, &loop->body, loop_cont, NULL, NULL, block); + + block = loop_break; + continue; + } + + list_addtail(&block->node.link, cf_list); + + switch (*block->branch & SpvOpCodeMask) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, block->branch[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + block->branch_type = vtn_branch_type_break; + return; + } else if (branch_block == cont_block) { + block->branch_type = vtn_branch_type_continue; + return; + } else if (branch_block == end_block) { + block->branch_type = vtn_branch_type_none; + return; + } else { + /* If it's not one of the above, then we must be jumping to the + * next block in the current CF list. Just keep going. + */ + block->branch_type = vtn_branch_type_none; + block = branch_block; + continue; + } + } + + case SpvOpReturn: + case SpvOpReturnValue: + block->branch_type = vtn_branch_type_return; + return; + + case SpvOpKill: + block->branch_type = vtn_branch_type_discard; + return; + + case SpvOpBranchConditional: { + struct vtn_block *then_block = + vtn_value(b, block->branch[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, block->branch[3], vtn_value_type_block)->block; + + struct vtn_if *if_stmt = ralloc(b, struct vtn_if); + + if_stmt->node.type = vtn_cf_node_type_if; + if_stmt->condition = block->branch[1]; + list_inithead(&if_stmt->then_body); + list_inithead(&if_stmt->else_body); + + list_addtail(&if_stmt->node.link, cf_list); + + /* OpBranchConditional must be at the end of a block with either + * an OpSelectionMerge or an OpLoopMerge. + */ + assert(block->merge); + if ((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) + if_stmt->control = block->merge[2]; + + struct vtn_block *next_block = NULL; + if (then_block == break_block) { + if_stmt->then_type = vtn_branch_type_break; + } else if (then_block == cont_block) { + if_stmt->then_type = vtn_branch_type_continue; + } else { + if_stmt->then_type = vtn_branch_type_none; + next_block = then_block; + } + + if (else_block == break_block) { + if_stmt->else_type = vtn_branch_type_break; + } else if (else_block == cont_block) { + if_stmt->else_type = vtn_branch_type_continue; + } else { + if_stmt->else_type = vtn_branch_type_none; + next_block = else_block; + } + + if (if_stmt->then_type == vtn_branch_type_none && + if_stmt->else_type == vtn_branch_type_none) { + /* Neither side of the if is something we can short-circuit. */ + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, + break_block, cont_block, merge_block); + vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, + break_block, cont_block, merge_block); + + block = merge_block; + continue; + } else if (if_stmt->then_type != vtn_branch_type_none && + if_stmt->else_type != vtn_branch_type_none) { + /* Both sides were short-circuited. We're done here. */ + return; + } else { + /* Exeactly one side of the branch could be short-circuited. + * We set the branch up as a predicated break/continue and we + * continue on with the other side as if it were what comes + * after the if. + */ + block = next_block; + continue; + } + unreachable("Should have returned or continued"); + } + + case SpvOpSwitch: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + +void +vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) +{ + vtn_foreach_instruction(b, words, end, + vtn_cfg_handle_prepass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) + vtn_cfg_walk_blocks(b, &func->body, func->start_block, NULL, NULL, NULL); +} diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 2fea244bb74..e6d8e190cb9 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -49,12 +49,72 @@ enum vtn_value_type { vtn_value_type_sampled_image, }; +enum vtn_branch_type { + vtn_branch_type_none, + vtn_branch_type_break, + vtn_branch_type_continue, + vtn_branch_type_discard, + vtn_branch_type_return, +}; + +enum vtn_cf_node_type { + vtn_cf_node_type_block, + vtn_cf_node_type_if, + vtn_cf_node_type_loop, + vtn_cf_node_type_switch, + vtn_cf_node_type_case, +}; + +struct vtn_cf_node { + struct list_head link; + enum vtn_cf_node_type type; +}; + +struct vtn_loop { + struct vtn_cf_node node; + + /* The main body of the loop */ + struct list_head body; + + /* The "continue" part of the loop. This gets executed after the body + * and is where you go when you hit a continue. + */ + struct list_head cont_body; + + SpvLoopControlMask control; +}; + +struct vtn_if { + struct vtn_cf_node node; + + uint32_t condition; + + enum vtn_branch_type then_type; + struct list_head then_body; + + enum vtn_branch_type else_type; + struct list_head else_body; + + SpvSelectionControlMask control; +}; + struct vtn_block { - /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */ - SpvOp merge_op; - uint32_t merge_block_id; + struct vtn_cf_node node; + + /** A pointer to the label instruction */ const uint32_t *label; + + /** A pointer to the merge instruction (or NULL if non exists) */ + const uint32_t *merge; + + /** A pointer to the branch instruction that ends this block */ const uint32_t *branch; + + enum vtn_branch_type branch_type; + + /** Points to the loop that this block starts (if it starts a loop) */ + struct vtn_loop *loop; + nir_block *block; }; @@ -64,12 +124,23 @@ struct vtn_function { nir_function_impl *impl; struct vtn_block *start_block; + struct list_head body; + const uint32_t *end; + + SpvFunctionControlMask control; }; +void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, + const uint32_t *end); + typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, const uint32_t *, unsigned); +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler); + struct vtn_ssa_value { union { nir_ssa_def *def; -- cgit v1.2.3 From cf555dc1c2f24fec070c02d2df29c6f0b3b1e262 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Dec 2015 21:26:13 -0800 Subject: nir/spirv: A couple simple loop fixes --- src/glsl/nir/spirv/spirv_to_nir.c | 4 +++- src/glsl/nir/spirv/vtn_cfg.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 16930c46197..7aea750cb02 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3673,8 +3673,10 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list) nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); } - b->nb.cursor = nir_after_cf_node(&loop->cf_node); + b->nb.cursor = nir_after_cf_list(&loop->body); vtn_emit_cf_list(b, &vtn_loop->body); + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); break; } diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index e6258175c19..ee49dc23791 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -181,7 +181,7 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, */ vtn_cfg_walk_blocks(b, &loop->body, block, loop_break, loop_cont, NULL ); - vtn_cfg_walk_blocks(b, &loop->body, loop_cont, NULL, NULL, block); + vtn_cfg_walk_blocks(b, &loop->cont_body, loop_cont, NULL, NULL, block); block = loop_break; continue; -- cgit v1.2.3 From 4e22cd2e32d9d579e3496be710c71b802e08b614 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 10:24:54 -0800 Subject: nir/spirv: Add support for switch statements --- src/glsl/nir/spirv/spirv_to_nir.c | 147 ++++++++++++++++++++---- src/glsl/nir/spirv/vtn_cfg.c | 236 ++++++++++++++++++++++++++++++-------- src/glsl/nir/spirv/vtn_private.h | 37 +++++- 3 files changed, 347 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 7aea750cb02..1539c750036 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3574,26 +3574,42 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } -static void vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list); +/* XXX: This should go in nir_builder.h */ +static inline void +nir_jump(nir_builder *build, nir_jump_type jump_type) +{ + nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); + nir_builder_instr_insert(build, &jump->instr); +} static void -vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type) +vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, + nir_variable *switch_fall_var, bool *has_switch_break) { - nir_jump_type jump_type; switch (branch_type) { - case vtn_branch_type_break: jump_type = nir_jump_break; break; - case vtn_branch_type_continue: jump_type = nir_jump_continue; break; - case vtn_branch_type_return: jump_type = nir_jump_return; break; + case vtn_branch_type_switch_break: + nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + *has_switch_break = true; + break; + case vtn_branch_type_switch_fallthrough: + break; /* Nothing to do */ + case vtn_branch_type_loop_break: + nir_jump(&b->nb, nir_jump_break); + break; + case vtn_branch_type_loop_continue: + nir_jump(&b->nb, nir_jump_continue); + break; + case vtn_branch_type_return: + nir_jump(&b->nb, nir_jump_return); + break; default: unreachable("Invalid branch type"); } - - nir_jump_instr *jump = nir_jump_instr_create(b->shader, jump_type); - nir_builder_instr_insert(&b->nb, &jump->instr); } static void -vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list) +vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, + nir_variable *switch_fall_var, bool *has_switch_break) { list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { switch (node->type) { @@ -3614,8 +3630,10 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list) NULL); } - if (block->branch_type != vtn_branch_type_none) - vtn_emit_branch(b, block->branch_type); + if (block->branch_type != vtn_branch_type_none) { + vtn_emit_branch(b, block->branch_type, + switch_fall_var, has_switch_break); + } break; } @@ -3628,19 +3646,38 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list) nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + bool sw_break = false; + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); if (vtn_if->then_type == vtn_branch_type_none) - vtn_emit_cf_list(b, &vtn_if->then_body); + vtn_emit_cf_list(b, &vtn_if->then_body, switch_fall_var, &sw_break); else - vtn_emit_branch(b, vtn_if->then_type); + vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); if (vtn_if->else_type == vtn_branch_type_none) - vtn_emit_cf_list(b, &vtn_if->else_body); + vtn_emit_cf_list(b, &vtn_if->else_body, switch_fall_var, &sw_break); else - vtn_emit_branch(b, vtn_if->else_type); + vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + + /* If we encountered a switch break somewhere inside of the if, + * then it would have been handled correctly by calling + * emit_cf_list or emit_branch for the interrior. However, we + * need to predicate everything following on wether or not we're + * still going. + */ + if (sw_break) { + *has_switch_break = true; + + nir_if *switch_if = nir_if_create(b->shader); + switch_if->condition = + nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); + nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + } break; } @@ -3667,21 +3704,89 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list) nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); b->nb.cursor = nir_after_cf_list(&cont_if->then_list); - vtn_emit_cf_list(b, &vtn_loop->cont_body); + vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL); b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); } b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_emit_cf_list(b, &vtn_loop->body); + vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL); b->nb.cursor = nir_after_cf_node(&loop->cf_node); break; } - case vtn_cf_node_type_switch: - case vtn_cf_node_type_case: + case vtn_cf_node_type_switch: { + struct vtn_switch *vtn_switch = (struct vtn_switch *)node; + + /* First, we create a variable to keep track of whether or not the + * switch is still going at any given point. Any switch breaks + * will set this variable to false. + */ + nir_variable *fall_var = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); + + /* Next, we gather up all of the conditions. We have to do this + * up-front because we also need to build an "any" condition so + * that we can use !any for default. + */ + const int num_cases = list_length(&vtn_switch->cases); + NIR_VLA(nir_ssa_def *, conditions, num_cases); + + nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; + /* An accumulation of all conditions. Used for the default */ + nir_ssa_def *any = NULL; + + int i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + if (cse->is_default) { + conditions[i++] = NULL; + continue; + } + + nir_ssa_def *cond = NULL; + nir_array_foreach(&cse->values, uint32_t, val) { + nir_ssa_def *is_val = + nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); + + cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; + } + + any = any ? nir_ior(&b->nb, any, cond) : cond; + conditions[i++] = cond; + } + assert(i == num_cases); + + /* Now we can walk the list of cases and actually emit code */ + i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + /* Figure out the condition */ + nir_ssa_def *cond = conditions[i++]; + if (cse->is_default) { + assert(cond == NULL); + cond = nir_inot(&b->nb, any); + } + /* Take fallthrough into account */ + cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); + + nir_if *case_if = nir_if_create(b->nb.shader); + case_if->condition = nir_src_for_ssa(cond); + nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); + + bool has_break = false; + b->nb.cursor = nir_after_cf_list(&case_if->then_list); + vtn_emit_cf_list(b, &cse->body, fall_var, &has_break); + (void)has_break; /* We don't care */ + + b->nb.cursor = nir_after_cf_node(&case_if->cf_node); + } + assert(i == num_cases); + + break; + } + default: unreachable("Invalid CF node type"); } @@ -3736,7 +3841,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); b->nb.cursor = nir_after_cf_list(&b->impl->body); - vtn_emit_cf_list(b, &func->body); + vtn_emit_cf_list(b, &func->body, NULL, NULL); vtn_foreach_instruction(b, func->start_block->label, func->end, vtn_handle_phi_second_pass); } diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index ee49dc23791..9d8d451b5e0 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -147,13 +147,99 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static void +vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, + struct vtn_block *break_block, + uint32_t block_id, uint32_t val, bool is_default) +{ + struct vtn_block *case_block = + vtn_value(b, block_id, vtn_value_type_block)->block; + + /* Don't create dummy cases that just break */ + if (case_block == break_block) + return; + + if (case_block->switch_case == NULL) { + struct vtn_case *c = ralloc(b, struct vtn_case); + + list_inithead(&c->body); + c->fallthrough = NULL; + nir_array_init(&c->values, b); + c->is_default = false; + c->visited = false; + + list_addtail(&c->link, &swtch->cases); + + case_block->switch_case = c; + } + + if (is_default) { + case_block->switch_case->is_default = true; + } else { + nir_array_add(&case_block->switch_case->values, uint32_t, val); + } +} + +/* This function performs a depth-first search of the cases and puts them + * in fall-through order. + */ +static void +vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) +{ + if (cse->visited) + return; + + cse->visited = true; + + list_del(&cse->link); + + if (cse->fallthrough) { + vtn_order_case(swtch, cse->fallthrough); + + /* If we have a fall-through, place this case right before the case it + * falls through to. This ensures that fallthroughs come one after + * the other. These two can never get separated because that would + * imply something else falling through to the same case. Also, this + * can't break ordering because the DFS ensures that this case is + * visited before anything that falls through to it. + */ + list_addtail(&cse->link, &cse->fallthrough->link); + } else { + list_add(&cse->link, &swtch->cases); + } +} + +static enum vtn_branch_type +vtn_get_branch_type(struct vtn_block *block, + struct vtn_case *swcase, struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont) +{ + if (block->switch_case) { + /* This branch is actually a fallthrough */ + assert(swcase->fallthrough == NULL || + swcase->fallthrough == block->switch_case); + swcase->fallthrough = block->switch_case; + return vtn_branch_type_switch_fallthrough; + } else if (block == switch_break) { + return vtn_branch_type_switch_break; + } else if (block == loop_break) { + return vtn_branch_type_loop_break; + } else if (block == loop_cont) { + return vtn_branch_type_loop_continue; + } else { + return vtn_branch_type_none; + } +} + static void vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, - struct vtn_block *start, struct vtn_block *break_block, - struct vtn_block *cont_block, struct vtn_block *end_block) + struct vtn_block *start, struct vtn_case *switch_case, + struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont, + struct vtn_block *end) { struct vtn_block *block = start; - while (block != end_block) { + while (block != end) { if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && !block->loop) { struct vtn_loop *loop = ralloc(b, struct vtn_loop); @@ -166,9 +252,9 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, list_addtail(&loop->node.link, cf_list); block->loop = loop; - struct vtn_block *loop_break = + struct vtn_block *new_loop_break = vtn_value(b, block->merge[1], vtn_value_type_block)->block; - struct vtn_block *loop_cont = + struct vtn_block *new_loop_cont = vtn_value(b, block->merge[2], vtn_value_type_block)->block; /* Note: This recursive call will start with the current block as @@ -178,12 +264,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, * way, we only create the loop once and the second call that * tries to handle this loop goes to the cases below and gets * handled as a regular block. + * + * Note: When we make the recursive walk calls, we pass NULL for + * the switch break since you have to break out of the loop first. + * We do, however, still pass the current switch case because it's + * possible that the merge block for the loop is the start of + * another case. */ - vtn_cfg_walk_blocks(b, &loop->body, block, - loop_break, loop_cont, NULL ); - vtn_cfg_walk_blocks(b, &loop->cont_body, loop_cont, NULL, NULL, block); + vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, + new_loop_break, new_loop_cont, NULL ); + vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, + NULL, NULL, block); - block = loop_break; + block = new_loop_break; continue; } @@ -194,23 +287,15 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, struct vtn_block *branch_block = vtn_value(b, block->branch[1], vtn_value_type_block)->block; - if (branch_block == break_block) { - block->branch_type = vtn_branch_type_break; - return; - } else if (branch_block == cont_block) { - block->branch_type = vtn_branch_type_continue; - return; - } else if (branch_block == end_block) { - block->branch_type = vtn_branch_type_none; + block->branch_type = vtn_get_branch_type(branch_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (block->branch_type != vtn_branch_type_none) return; - } else { - /* If it's not one of the above, then we must be jumping to the - * next block in the current CF list. Just keep going. - */ - block->branch_type = vtn_branch_type_none; - block = branch_block; - continue; - } + + block = branch_block; + continue; } case SpvOpReturn: @@ -244,24 +329,12 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, if ((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) if_stmt->control = block->merge[2]; - struct vtn_block *next_block = NULL; - if (then_block == break_block) { - if_stmt->then_type = vtn_branch_type_break; - } else if (then_block == cont_block) { - if_stmt->then_type = vtn_branch_type_continue; - } else { - if_stmt->then_type = vtn_branch_type_none; - next_block = then_block; - } - - if (else_block == break_block) { - if_stmt->else_type = vtn_branch_type_break; - } else if (else_block == cont_block) { - if_stmt->else_type = vtn_branch_type_continue; - } else { - if_stmt->else_type = vtn_branch_type_none; - next_block = else_block; - } + if_stmt->then_type = vtn_get_branch_type(then_block, + switch_case, switch_break, + loop_break, loop_cont); + if_stmt->else_type = vtn_get_branch_type(else_block, + switch_case, switch_break, + loop_break, loop_cont); if (if_stmt->then_type == vtn_branch_type_none && if_stmt->else_type == vtn_branch_type_none) { @@ -271,9 +344,11 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, vtn_value(b, block->merge[1], vtn_value_type_block)->block; vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, - break_block, cont_block, merge_block); + switch_case, switch_break, + loop_break, loop_cont, merge_block); vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, - break_block, cont_block, merge_block); + switch_case, switch_break, + loop_break, loop_cont, merge_block); block = merge_block; continue; @@ -287,13 +362,74 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, * continue on with the other side as if it were what comes * after the if. */ - block = next_block; + if (if_stmt->then_type == vtn_branch_type_none) { + block = then_block; + } else { + block = else_block; + } continue; } unreachable("Should have returned or continued"); } - case SpvOpSwitch: + case SpvOpSwitch: { + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *break_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + struct vtn_switch *swtch = ralloc(b, struct vtn_switch); + + swtch->node.type = vtn_cf_node_type_switch; + swtch->selector = block->branch[1]; + list_inithead(&swtch->cases); + + list_addtail(&swtch->node.link, cf_list); + + /* First, we go through and record all of the cases. */ + const uint32_t *branch_end = + block->branch + (block->branch[0] >> SpvWordCountShift); + + vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); + for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) + vtn_add_case(b, swtch, break_block, w[1], w[0], false); + + /* Now, we go through and walk the blocks. While we walk through + * the blocks, we also gather the much-needed fall-through + * information. + */ + for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { + struct vtn_block *case_block = + vtn_value(b, *w, vtn_value_type_block)->block; + + if (case_block == break_block) + continue; + + assert(case_block->switch_case); + + vtn_cfg_walk_blocks(b, &case_block->switch_case->body, case_block, + case_block->switch_case, break_block, + NULL, loop_cont, NULL); + } + + /* Finally, we walk over all of the cases one more time and put + * them in fall-through order. + */ + for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { + struct vtn_block *case_block = + vtn_value(b, *w, vtn_value_type_block)->block; + + if (case_block == break_block) + continue; + + assert(case_block->switch_case); + + vtn_order_case(swtch, case_block->switch_case); + } + + block = break_block; + continue; + } + case SpvOpUnreachable: default: unreachable("Unhandled opcode"); @@ -307,6 +443,8 @@ vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) vtn_foreach_instruction(b, words, end, vtn_cfg_handle_prepass_instruction); - foreach_list_typed(struct vtn_function, func, node, &b->functions) - vtn_cfg_walk_blocks(b, &func->body, func->start_block, NULL, NULL, NULL); + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + vtn_cfg_walk_blocks(b, &func->body, func->start_block, + NULL, NULL, NULL, NULL, NULL); + } } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index e6d8e190cb9..6dfd01d5c24 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -27,6 +27,7 @@ #include "nir/nir.h" #include "nir/nir_builder.h" +#include "nir/nir_array.h" #include "nir_spirv.h" #include "spirv.h" @@ -51,8 +52,10 @@ enum vtn_value_type { enum vtn_branch_type { vtn_branch_type_none, - vtn_branch_type_break, - vtn_branch_type_continue, + vtn_branch_type_switch_break, + vtn_branch_type_switch_fallthrough, + vtn_branch_type_loop_break, + vtn_branch_type_loop_continue, vtn_branch_type_discard, vtn_branch_type_return, }; @@ -62,7 +65,6 @@ enum vtn_cf_node_type { vtn_cf_node_type_if, vtn_cf_node_type_loop, vtn_cf_node_type_switch, - vtn_cf_node_type_case, }; struct vtn_cf_node { @@ -98,6 +100,32 @@ struct vtn_if { SpvSelectionControlMask control; }; +struct vtn_case { + struct list_head link; + + struct list_head body; + + /* The fallthrough case, if any */ + struct vtn_case *fallthrough; + + /* The uint32_t values that map to this case */ + nir_array values; + + /* True if this is the default case */ + bool is_default; + + /* Initialized to false; used when sorting the list of cases */ + bool visited; +}; + +struct vtn_switch { + struct vtn_cf_node node; + + uint32_t selector; + + struct list_head cases; +}; + struct vtn_block { struct vtn_cf_node node; @@ -115,6 +143,9 @@ struct vtn_block { /** Points to the loop that this block starts (if it starts a loop) */ struct vtn_loop *loop; + /** Points to the switch case started by this block (if any) */ + struct vtn_case *switch_case; + nir_block *block; }; -- cgit v1.2.3 From 0a2ab87947083e0a0a7be5a0128f1898d21d5162 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 12:09:32 -0800 Subject: nir/spirv: Move CF emit code into vtn_cfg.c --- src/glsl/nir/spirv/spirv_to_nir.c | 226 +----------------------------------- src/glsl/nir/spirv/vtn_cfg.c | 234 ++++++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv/vtn_private.h | 12 +- 3 files changed, 245 insertions(+), 227 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 1539c750036..4a90f1827cf 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1459,7 +1459,7 @@ static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, nir_ssa_def *index); -static void +void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref_var *dest, struct vtn_type *dest_type) { @@ -3574,226 +3574,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } -/* XXX: This should go in nir_builder.h */ -static inline void -nir_jump(nir_builder *build, nir_jump_type jump_type) -{ - nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); - nir_builder_instr_insert(build, &jump->instr); -} - -static void -vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, - nir_variable *switch_fall_var, bool *has_switch_break) -{ - switch (branch_type) { - case vtn_branch_type_switch_break: - nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); - *has_switch_break = true; - break; - case vtn_branch_type_switch_fallthrough: - break; /* Nothing to do */ - case vtn_branch_type_loop_break: - nir_jump(&b->nb, nir_jump_break); - break; - case vtn_branch_type_loop_continue: - nir_jump(&b->nb, nir_jump_continue); - break; - case vtn_branch_type_return: - nir_jump(&b->nb, nir_jump_return); - break; - default: - unreachable("Invalid branch type"); - } -} - -static void -vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, - nir_variable *switch_fall_var, bool *has_switch_break) -{ - list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { - switch (node->type) { - case vtn_cf_node_type_block: { - struct vtn_block *block = (struct vtn_block *)node; - - block->block = nir_cursor_current_block(b->nb.cursor); - _mesa_hash_table_insert(b->block_table, block->block, block); - - vtn_foreach_instruction(b, block->label, - block->merge ? block->merge : block->branch, - vtn_handle_body_instruction); - - if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { - struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); - vtn_variable_store(b, src, - nir_deref_var_create(b, b->impl->return_var), - NULL); - } - - if (block->branch_type != vtn_branch_type_none) { - vtn_emit_branch(b, block->branch_type, - switch_fall_var, has_switch_break); - } - - break; - } - - case vtn_cf_node_type_if: { - struct vtn_if *vtn_if = (struct vtn_if *)node; - - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = - nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); - nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); - - bool sw_break = false; - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - if (vtn_if->then_type == vtn_branch_type_none) - vtn_emit_cf_list(b, &vtn_if->then_body, switch_fall_var, &sw_break); - else - vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); - - b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); - if (vtn_if->else_type == vtn_branch_type_none) - vtn_emit_cf_list(b, &vtn_if->else_body, switch_fall_var, &sw_break); - else - vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); - - b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); - - /* If we encountered a switch break somewhere inside of the if, - * then it would have been handled correctly by calling - * emit_cf_list or emit_branch for the interrior. However, we - * need to predicate everything following on wether or not we're - * still going. - */ - if (sw_break) { - *has_switch_break = true; - - nir_if *switch_if = nir_if_create(b->shader); - switch_if->condition = - nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); - nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - } - break; - } - - case vtn_cf_node_type_loop: { - struct vtn_loop *vtn_loop = (struct vtn_loop *)node; - - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert(b->nb.cursor, &loop->cf_node); - - if (!list_empty(&vtn_loop->cont_body)) { - /* If we have a non-trivial continue body then we need to put - * it at the beginning of the loop with a flag to ensure that - * it doesn't get executed in the first iteration. - */ - nir_variable *do_cont = - nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); - - b->nb.cursor = nir_before_cf_node(&loop->cf_node); - nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); - - b->nb.cursor = nir_after_cf_list(&loop->body); - nir_if *cont_if = nir_if_create(b->shader); - cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); - nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); - - b->nb.cursor = nir_after_cf_list(&cont_if->then_list); - vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL); - - b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); - nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); - } - - b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL); - - b->nb.cursor = nir_after_cf_node(&loop->cf_node); - break; - } - - case vtn_cf_node_type_switch: { - struct vtn_switch *vtn_switch = (struct vtn_switch *)node; - - /* First, we create a variable to keep track of whether or not the - * switch is still going at any given point. Any switch breaks - * will set this variable to false. - */ - nir_variable *fall_var = - nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); - nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); - - /* Next, we gather up all of the conditions. We have to do this - * up-front because we also need to build an "any" condition so - * that we can use !any for default. - */ - const int num_cases = list_length(&vtn_switch->cases); - NIR_VLA(nir_ssa_def *, conditions, num_cases); - - nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; - /* An accumulation of all conditions. Used for the default */ - nir_ssa_def *any = NULL; - - int i = 0; - list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { - if (cse->is_default) { - conditions[i++] = NULL; - continue; - } - - nir_ssa_def *cond = NULL; - nir_array_foreach(&cse->values, uint32_t, val) { - nir_ssa_def *is_val = - nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); - - cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; - } - - any = any ? nir_ior(&b->nb, any, cond) : cond; - conditions[i++] = cond; - } - assert(i == num_cases); - - /* Now we can walk the list of cases and actually emit code */ - i = 0; - list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { - /* Figure out the condition */ - nir_ssa_def *cond = conditions[i++]; - if (cse->is_default) { - assert(cond == NULL); - cond = nir_inot(&b->nb, any); - } - /* Take fallthrough into account */ - cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); - - nir_if *case_if = nir_if_create(b->nb.shader); - case_if->condition = nir_src_for_ssa(cond); - nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); - - bool has_break = false; - b->nb.cursor = nir_after_cf_list(&case_if->then_list); - vtn_emit_cf_list(b, &cse->body, fall_var, &has_break); - (void)has_break; /* We don't care */ - - b->nb.cursor = nir_after_cf_node(&case_if->cf_node); - } - assert(i == num_cases); - - break; - } - - default: - unreachable("Invalid CF node type"); - } - } -} - - nir_shader * spirv_to_nir(const uint32_t *words, size_t word_count, gl_shader_stage stage, @@ -3839,9 +3619,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, _mesa_key_pointer_equal); b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); - nir_builder_init(&b->nb, b->impl); - b->nb.cursor = nir_after_cf_list(&b->impl->body); - vtn_emit_cf_list(b, &func->body, NULL, NULL); + vtn_function_emit(b, func, vtn_handle_body_instruction); vtn_foreach_instruction(b, func->start_block->label, func->end, vtn_handle_phi_second_pass); } diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 9d8d451b5e0..7d25e96c8f0 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -22,6 +22,7 @@ */ #include "vtn_private.h" +#include "nir/nir_vla.h" static bool vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, @@ -448,3 +449,236 @@ vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) NULL, NULL, NULL, NULL, NULL); } } + +/* XXX: This should go in nir_builder.h */ +static inline void +nir_jump(nir_builder *build, nir_jump_type jump_type) +{ + nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); + nir_builder_instr_insert(build, &jump->instr); +} + +static void +vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, + nir_variable *switch_fall_var, bool *has_switch_break) +{ + switch (branch_type) { + case vtn_branch_type_switch_break: + nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + *has_switch_break = true; + break; + case vtn_branch_type_switch_fallthrough: + break; /* Nothing to do */ + case vtn_branch_type_loop_break: + nir_jump(&b->nb, nir_jump_break); + break; + case vtn_branch_type_loop_continue: + nir_jump(&b->nb, nir_jump_continue); + break; + case vtn_branch_type_return: + nir_jump(&b->nb, nir_jump_return); + break; + default: + unreachable("Invalid branch type"); + } +} + +static void +vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, + nir_variable *switch_fall_var, bool *has_switch_break, + vtn_instruction_handler handler) +{ + list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { + switch (node->type) { + case vtn_cf_node_type_block: { + struct vtn_block *block = (struct vtn_block *)node; + + block->block = nir_cursor_current_block(b->nb.cursor); + _mesa_hash_table_insert(b->block_table, block->block, block); + + vtn_foreach_instruction(b, block->label, + block->merge ? block->merge : block->branch, + handler); + + if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { + struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); + vtn_variable_store(b, src, + nir_deref_var_create(b, b->impl->return_var), + NULL); + } + + if (block->branch_type != vtn_branch_type_none) { + vtn_emit_branch(b, block->branch_type, + switch_fall_var, has_switch_break); + } + + break; + } + + case vtn_cf_node_type_if: { + struct vtn_if *vtn_if = (struct vtn_if *)node; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = + nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + bool sw_break = false; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + if (vtn_if->then_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->then_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + if (vtn_if->else_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->else_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + + /* If we encountered a switch break somewhere inside of the if, + * then it would have been handled correctly by calling + * emit_cf_list or emit_branch for the interrior. However, we + * need to predicate everything following on wether or not we're + * still going. + */ + if (sw_break) { + *has_switch_break = true; + + nir_if *switch_if = nir_if_create(b->shader); + switch_if->condition = + nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); + nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + } + break; + } + + case vtn_cf_node_type_loop: { + struct vtn_loop *vtn_loop = (struct vtn_loop *)node; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + if (!list_empty(&vtn_loop->cont_body)) { + /* If we have a non-trivial continue body then we need to put + * it at the beginning of the loop with a flag to ensure that + * it doesn't get executed in the first iteration. + */ + nir_variable *do_cont = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); + + b->nb.cursor = nir_before_cf_node(&loop->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); + + b->nb.cursor = nir_after_cf_list(&loop->body); + nir_if *cont_if = nir_if_create(b->shader); + cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); + nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&cont_if->then_list); + vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); + + b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); + } + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + break; + } + + case vtn_cf_node_type_switch: { + struct vtn_switch *vtn_switch = (struct vtn_switch *)node; + + /* First, we create a variable to keep track of whether or not the + * switch is still going at any given point. Any switch breaks + * will set this variable to false. + */ + nir_variable *fall_var = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); + + /* Next, we gather up all of the conditions. We have to do this + * up-front because we also need to build an "any" condition so + * that we can use !any for default. + */ + const int num_cases = list_length(&vtn_switch->cases); + NIR_VLA(nir_ssa_def *, conditions, num_cases); + + nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; + /* An accumulation of all conditions. Used for the default */ + nir_ssa_def *any = NULL; + + int i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + if (cse->is_default) { + conditions[i++] = NULL; + continue; + } + + nir_ssa_def *cond = NULL; + nir_array_foreach(&cse->values, uint32_t, val) { + nir_ssa_def *is_val = + nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); + + cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; + } + + any = any ? nir_ior(&b->nb, any, cond) : cond; + conditions[i++] = cond; + } + assert(i == num_cases); + + /* Now we can walk the list of cases and actually emit code */ + i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + /* Figure out the condition */ + nir_ssa_def *cond = conditions[i++]; + if (cse->is_default) { + assert(cond == NULL); + cond = nir_inot(&b->nb, any); + } + /* Take fallthrough into account */ + cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); + + nir_if *case_if = nir_if_create(b->nb.shader); + case_if->condition = nir_src_for_ssa(cond); + nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); + + bool has_break = false; + b->nb.cursor = nir_after_cf_list(&case_if->then_list); + vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); + (void)has_break; /* We don't care */ + + b->nb.cursor = nir_after_cf_node(&case_if->cf_node); + } + assert(i == num_cases); + + break; + } + + default: + unreachable("Invalid CF node type"); + } + } +} + +void +vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler) +{ + nir_builder_init(&b->nb, func->impl); + b->nb.cursor = nir_after_cf_list(&func->impl->body); + vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); +} diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 6dfd01d5c24..7ed62ee712b 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -162,12 +162,14 @@ struct vtn_function { SpvFunctionControlMask control; }; -void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, - const uint32_t *end); - typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, const uint32_t *, unsigned); +void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, + const uint32_t *end); +void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler); + const uint32_t * vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, const uint32_t *end, vtn_instruction_handler handler); @@ -342,6 +344,10 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *dest_type); + + typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, int member, -- cgit v1.2.3 From e10b0e2b4985327a48434e7fec297d33b27bc47f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 13:03:01 -0800 Subject: anv/pipeline: Use vs_prog_data.inputs_read when computing vb_used --- src/vulkan/anv_pipeline.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index a6d62a3f49e..c17809c519f 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1026,12 +1026,31 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; + + uint64_t inputs_read; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + inputs_read = ~0ull; + } else { + inputs_read = pipeline->vs_prog_data.inputs_read; + } + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + pipeline->vb_used |= 1 << desc->binding; + } + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { const VkVertexInputBindingDescription *desc = &vi_info->pVertexBindingDescriptions[i]; - pipeline->vb_used |= 1 << desc->binding; pipeline->binding_stride[desc->binding] = desc->stride; /* Step rate is programmed per vertex element (attribute), not -- cgit v1.2.3 From 5dd4386b92a665869b249eae07af78e648bd891d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 13:15:17 -0800 Subject: nir/spirv: Use a C99-style initializer for structure fields This ensures that all unknown fields get zero-initizlied so we don't have undefined values floating around. --- src/glsl/nir/spirv/spirv_to_nir.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 4a90f1827cf..da586533529 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -587,16 +587,13 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, NIR_VLA(struct glsl_struct_field, fields, count); for (unsigned i = 0; i < num_fields; i++) { - /* TODO: Handle decorators */ val->type->members[i] = vtn_value(b, w[i + 2], vtn_value_type_type)->type; - fields[i].type = val->type->members[i]->type; - fields[i].name = ralloc_asprintf(b, "field%d", i); - fields[i].location = -1; - fields[i].interpolation = 0; - fields[i].centroid = 0; - fields[i].sample = 0; - fields[i].matrix_layout = 2; + fields[i] = (struct glsl_struct_field) { + .type = val->type->members[i]->type, + .name = ralloc_asprintf(b, "field%d", i), + .location = -1, + }; } struct member_decoration_ctx ctx = { -- cgit v1.2.3 From b33f5d388979f23c583c78f10a18a941cb04ce04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 14:29:03 -0800 Subject: nir/spirv: Update to the 1.0 GLSL.std.450 header --- src/glsl/nir/spirv/GLSL.std.450.h | 89 ++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/GLSL.std.450.h b/src/glsl/nir/spirv/GLSL.std.450.h index d1c9b5c1d44..779d108205e 100644 --- a/src/glsl/nir/spirv/GLSL.std.450.h +++ b/src/glsl/nir/spirv/GLSL.std.450.h @@ -27,8 +27,8 @@ #ifndef GLSLstd450_H #define GLSLstd450_H -const int GLSLstd450Version = 99; -const int GLSLstd450Revision = 3; +const int GLSLstd450Version = 100; +const int GLSLstd450Revision = 1; enum GLSLstd450 { GLSLstd450Bad = 0, // Don't use @@ -74,52 +74,55 @@ enum GLSLstd450 { GLSLstd450Modf = 35, // second operand needs an OpVariable to write to GLSLstd450ModfStruct = 36, // no OpVariable operand GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, + GLSLstd450NMin = 38, + GLSLstd450UMin = 39, + GLSLstd450SMin = 40, + GLSLstd450FMax = 41, + GLSLstd450NMax = 42, + GLSLstd450UMax = 43, + GLSLstd450SMax = 44, + GLSLstd450FClamp = 45, + GLSLstd450NClamp = 46, + GLSLstd450UClamp = 47, + GLSLstd450SClamp = 48, + GLSLstd450FMix = 49, + GLSLstd450IMix = 50, + GLSLstd450Step = 51, + GLSLstd450SmoothStep = 52, - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, + GLSLstd450Fma = 53, + GLSLstd450Frexp = 54, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 55, // no OpVariable operand + GLSLstd450Ldexp = 56, - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, + GLSLstd450PackSnorm4x8 = 57, + GLSLstd450PackUnorm4x8 = 58, + GLSLstd450PackSnorm2x16 = 59, + GLSLstd450PackUnorm2x16 = 60, + GLSLstd450PackHalf2x16 = 61, + GLSLstd450PackDouble2x32 = 62, + GLSLstd450UnpackSnorm2x16 = 63, + GLSLstd450UnpackUnorm2x16 = 64, + GLSLstd450UnpackHalf2x16 = 65, + GLSLstd450UnpackSnorm4x8 = 66, + GLSLstd450UnpackUnorm4x8 = 67, + GLSLstd450UnpackDouble2x32 = 68, - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, + GLSLstd450Length = 69, + GLSLstd450Distance = 70, + GLSLstd450Cross = 71, + GLSLstd450Normalize = 72, + GLSLstd450FaceForward = 73, + GLSLstd450Reflect = 74, + GLSLstd450Refract = 75, - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, + GLSLstd450FindILsb = 76, + GLSLstd450FindSMsb = 77, + GLSLstd450FindUMsb = 78, - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, + GLSLstd450InterpolateAtCentroid = 79, + GLSLstd450InterpolateAtSample = 80, + GLSLstd450InterpolateAtOffset = 81, GLSLstd450Count }; -- cgit v1.2.3 From 37a38548d467ea78ac25e88d505c8cf4fd1a979b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 14:32:10 -0800 Subject: glsl/types.cpp: Fix function_key_compare --- src/glsl/nir/glsl_types.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp index d86609718ea..71095fa92e4 100644 --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@ -1055,7 +1055,7 @@ function_key_compare(const void *a, const void *b) return 1; return memcmp(key1->fields.parameters, key2->fields.parameters, - (key1->length + 1) * sizeof(*key1->fields.parameters)); + (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0; } -- cgit v1.2.3 From 6fa47c9c175498edd1bfda4f58e95fed48f1c50d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 14:48:34 -0800 Subject: nir/builder: Add a nir_jump helper --- src/glsl/nir/nir_builder.h | 7 +++++++ src/glsl/nir/spirv/vtn_cfg.c | 8 -------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 8ba48589aa6..985f4af5339 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -401,4 +401,11 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) return &load->dest.ssa; } +static inline void +nir_jump(nir_builder *build, nir_jump_type jump_type) +{ + nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); + nir_builder_instr_insert(build, &jump->instr); +} + #endif /* NIR_BUILDER_H */ diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 7d25e96c8f0..03d2c43018d 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -450,14 +450,6 @@ vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) } } -/* XXX: This should go in nir_builder.h */ -static inline void -nir_jump(nir_builder *build, nir_jump_type jump_type) -{ - nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); - nir_builder_instr_insert(build, &jump->instr); -} - static void vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, nir_variable *switch_fall_var, bool *has_switch_break) -- cgit v1.2.3 From 5f04a61219fadf8e4e99fe1254674c6b3258a28a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 14:51:45 -0800 Subject: nir/lower_returns: Don't just change the type of a jump. It doesn't give core NIR the opportunity to update predecessors and successors. Instead, we have to remove and re-insert the instruction. --- src/glsl/nir/nir_lower_returns.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_returns.c b/src/glsl/nir/nir_lower_returns.c index dcdd14e2026..af16676b822 100644 --- a/src/glsl/nir/nir_lower_returns.c +++ b/src/glsl/nir/nir_lower_returns.c @@ -144,8 +144,10 @@ lower_returns_in_block(nir_block *block, struct lower_returns_state *state) if (jump->type != nir_jump_return) return false; + nir_instr_remove(&jump->instr); + nir_builder *b = &state->builder; - b->cursor = nir_before_instr(&jump->instr); + b->cursor = nir_after_block(block); /* Set the return flag */ if (state->return_flag == NULL) { @@ -159,14 +161,11 @@ lower_returns_in_block(nir_block *block, struct lower_returns_state *state) nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE), 1); if (state->loop) { - /* We're in a loop. Make the return a break. */ - jump->type = nir_jump_break; + /* We're in a loop; we need to break out of it. */ + nir_jump(b, nir_jump_break); } else { - /* Not in a loop. Just delete the return; we'll deal with - * predicating later. - */ + /* Not in a loop; we'll deal with predicating later*/ assert(nir_cf_node_next(&block->cf_node) == NULL); - nir_instr_remove(&jump->instr); } return true; -- cgit v1.2.3 From 9c9edd1ce8325b0b80f49661fdb1ada384bbfe0b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 21:09:19 -0800 Subject: nir/spirv/glsl450: Add an 'nb' shortcut variable. "nb" is shorter and more convenient than "&b->nb", especially when several operations are composed together into a larger expression tree. --- src/glsl/nir/spirv/vtn_glsl450.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 551f0540496..575580e5997 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -45,6 +45,7 @@ static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) { + struct nir_builder *nb = &b->nb; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->ssa = rzalloc(b, struct vtn_ssa_value); val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; @@ -66,16 +67,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Ceil: op = nir_op_fceil; break; case GLSLstd450Fract: op = nir_op_ffract; break; case GLSLstd450Radians: - val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); return; case GLSLstd450Degrees: - val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); return; case GLSLstd450Sin: op = nir_op_fsin; break; case GLSLstd450Cos: op = nir_op_fcos; break; case GLSLstd450Tan: - val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), - nir_fcos(&b->nb, src[0])); + val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), + nir_fcos(nb, src[0])); return; case GLSLstd450Pow: op = nir_op_fpow; break; case GLSLstd450Exp2: op = nir_op_fexp2; break; @@ -92,7 +93,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450SMax: op = nir_op_imax; break; case GLSLstd450FMix: op = nir_op_flrp; break; case GLSLstd450Step: - val->ssa->def = nir_sge(&b->nb, src[1], src[0]); + val->ssa->def = nir_sge(nb, src[1], src[0]); return; case GLSLstd450Fma: op = nir_op_ffma; break; @@ -111,13 +112,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; case GLSLstd450Length: - val->ssa->def = build_length(&b->nb, src[0]); + val->ssa->def = build_length(nb, src[0]); return; case GLSLstd450Distance: - val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); return; case GLSLstd450Normalize: - val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); return; case GLSLstd450Exp: @@ -157,7 +158,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); - nir_builder_instr_insert(&b->nb, &instr->instr); + nir_builder_instr_insert(nb, &instr->instr); } bool -- cgit v1.2.3 From 0f801752f22dacfd070c82b6a30da62ce317870c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 15:15:05 -0800 Subject: nir/spirv/glsl450: Add helpers for calculating exp() and log(). --- src/glsl/nir/spirv/vtn_glsl450.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 575580e5997..d4cb1bb9f3c 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -41,6 +41,25 @@ build_length(nir_builder *b, nir_ssa_def *vec) } } +/** + * Return e^x. + */ +static nir_ssa_def * +build_exp(nir_builder *b, nir_ssa_def *x) +{ + return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); +} + +/** + * Return ln(x) - the natural logarithm of x. + */ +static nir_ssa_def * +build_log(nir_builder *b, nir_ssa_def *x) +{ + return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); +} + + static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) -- cgit v1.2.3 From 227e2500050de49cfae838bfd99318c3731e253c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 15:18:05 -0800 Subject: nir/spirv/glsl450: Add a helper for doing fclamp(). --- src/glsl/nir/spirv/vtn_glsl450.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index d4cb1bb9f3c..f4550baaf93 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -41,6 +41,13 @@ build_length(nir_builder *b, nir_ssa_def *vec) } } +static inline nir_ssa_def * +build_fclamp(nir_builder *b, + nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) +{ + return nir_fmin(b, nir_fmax(b, x, min_val), max_val); +} + /** * Return e^x. */ -- cgit v1.2.3 From ffc5ae7c9eb338c6af63c4b534bcb5aca1f67367 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 21:04:30 -0800 Subject: nir/spirv/glsl450: Implement Exp built-in. --- src/glsl/nir/spirv/vtn_glsl450.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index f4550baaf93..33444eb8c76 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -148,6 +148,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450Exp: + val->ssa->def = build_exp(nb, src[0]); + return; + case GLSLstd450Log: case GLSLstd450FClamp: case GLSLstd450UClamp: -- cgit v1.2.3 From 034010924e046662fce21dee240bdb47619ff6a7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 21:21:51 -0800 Subject: nir/spirv/glsl450: Implement the Log built-in. --- src/glsl/nir/spirv/vtn_glsl450.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 33444eb8c76..ecd24c4a3d9 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -152,6 +152,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450Log: + val->ssa->def = build_log(nb, src[0]); + return; + case GLSLstd450FClamp: case GLSLstd450UClamp: case GLSLstd450SClamp: -- cgit v1.2.3 From 083fd6ec2a15db60edbcc55e86a7c6cc686e862f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 20:40:10 -0800 Subject: nir/spirv/glsl450: Implement Clamp/SClamp/UClamp. --- src/glsl/nir/spirv/vtn_glsl450.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index ecd24c4a3d9..2d5faa7d574 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -156,8 +156,15 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450FClamp: + val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); + return; case GLSLstd450UClamp: + val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); + return; case GLSLstd450SClamp: + val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); + return; + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: -- cgit v1.2.3 From 6a0fa2d758bc67041f2b66c3b7520a0ca041817e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 23:39:14 -0800 Subject: nir/spirv/glsl450: Implement Cross built-in. --- src/glsl/nir/spirv/vtn_glsl450.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 2d5faa7d574..cec7592a5e8 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -165,6 +165,17 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); return; + case GLSLstd450Cross: { + unsigned yzx[4] = { 1, 2, 0, 0 }; + unsigned zxy[4] = { 2, 0, 1, 0 }; + val->ssa->def = + nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), + nir_swizzle(nb, src[1], zxy, 3, true)), + nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), + nir_swizzle(nb, src[1], yzx, 3, true))); + return; + } + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: @@ -179,7 +190,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Frexp: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: - case GLSLstd450Cross: case GLSLstd450FaceForward: case GLSLstd450Reflect: case GLSLstd450Refract: -- cgit v1.2.3 From b10af36d93c44537bf0858d0508020058e375d34 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 23:52:10 -0800 Subject: nir/spirv/glsl450: Implement SmoothStep. --- src/glsl/nir/spirv/vtn_glsl450.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index cec7592a5e8..1b7751a6a88 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -176,6 +176,20 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } + case GLSLstd450SmoothStep: { + /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ + nir_ssa_def *t = + build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), + nir_fsub(nb, src[1], src[0])), + nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); + /* result = t * t * (3 - 2 * t) */ + val->ssa->def = + nir_fmul(nb, t, nir_fmul(nb, t, + nir_fsub(nb, nir_imm_float(nb, 3.0), + nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); + return; + } + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: @@ -186,7 +200,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Asinh: case GLSLstd450Acosh: case GLSLstd450Atanh: - case GLSLstd450SmoothStep: case GLSLstd450Frexp: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: -- cgit v1.2.3 From 659a3623b0fa4217fc4e69cccdb3e7a268bca388 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Dec 2015 23:59:32 -0800 Subject: nir/spirv/glsl450: Implement FaceForward built-in. --- src/glsl/nir/spirv/vtn_glsl450.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 1b7751a6a88..f81f16789be 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -190,6 +190,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } + case GLSLstd450FaceForward: + val->ssa->def = + nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), + nir_imm_float(nb, 0.0)), + src[0], nir_fneg(nb, src[0])); + return; + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: @@ -203,7 +210,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Frexp: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: - case GLSLstd450FaceForward: case GLSLstd450Reflect: case GLSLstd450Refract: case GLSLstd450IMix: -- cgit v1.2.3 From 0b1a436ac83edc939388f22f1a335c28dae8f69e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 00:10:39 -0800 Subject: nir/spirv/glsl450: implement Reflect built-in. --- src/glsl/nir/spirv/vtn_glsl450.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index f81f16789be..38dea8caa4d 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -197,6 +197,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, src[0], nir_fneg(nb, src[0])); return; + case GLSLstd450Reflect: + /* I - 2 * dot(N, I) * N */ + val->ssa->def = + nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), + nir_fmul(nb, nir_fdot(nb, src[0], src[1]), + src[1]))); + return; + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: @@ -210,7 +218,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Frexp: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: - case GLSLstd450Reflect: case GLSLstd450Refract: case GLSLstd450IMix: default: -- cgit v1.2.3 From 74529a2c50b5d12cdc9f59df9e03336d1b9d669d Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 00:18:54 -0800 Subject: nir/spirv/glsl450: Implement hyperbolic trig built-ins. --- src/glsl/nir/spirv/vtn_glsl450.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 38dea8caa4d..d0a6e783735 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -66,7 +66,6 @@ build_log(nir_builder *b, nir_ssa_def *x) return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); } - static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) @@ -205,13 +204,35 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, src[1]))); return; + case GLSLstd450Sinh: + /* 0.5 * (e^x - e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Cosh: + /* 0.5 * (e^x + e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Tanh: + /* (e^x - e^(-x)) / (e^x + e^(-x)) */ + val->ssa->def = + nir_fdiv(nb, nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0]))), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: case GLSLstd450Atan2: - case GLSLstd450Sinh: - case GLSLstd450Cosh: - case GLSLstd450Tanh: case GLSLstd450Asinh: case GLSLstd450Acosh: case GLSLstd450Atanh: -- cgit v1.2.3 From 2ea111664c7d6c174886e1532639123599274552 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 00:28:42 -0800 Subject: nir/spirv/glsl450: Implement Refract built-in. --- src/glsl/nir/spirv/vtn_glsl450.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index d0a6e783735..64357308c77 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -204,6 +204,26 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, src[1]))); return; + case GLSLstd450Refract: { + nir_ssa_def *I = src[0]; + nir_ssa_def *N = src[1]; + nir_ssa_def *eta = src[2]; + nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); + nir_ssa_def *one = nir_imm_float(nb, 1.0); + nir_ssa_def *zero = nir_imm_float(nb, 0.0); + /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ + nir_ssa_def *k = + nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, + nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); + nir_ssa_def *result = + nir_fsub(nb, nir_fmul(nb, eta, I), + nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), + nir_fsqrt(nb, k)), N)); + /* XXX: bcsel, or if statement? */ + val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); + return; + } + case GLSLstd450Sinh: /* 0.5 * (e^x - e^(-x)) */ val->ssa->def = @@ -239,7 +259,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Frexp: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: - case GLSLstd450Refract: case GLSLstd450IMix: default: unreachable("Unhandled opcode"); -- cgit v1.2.3 From b4a1c9b506487bff1c411d91dca87c4fef5e788f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 15:13:46 -0800 Subject: nir/spirv/glsl450: Implement inverse hyperbolic trig built-ins. --- src/glsl/nir/spirv/vtn_glsl450.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 64357308c77..d0e98577884 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -249,13 +249,29 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, build_exp(nb, nir_fneg(nb, src[0])))); return; + case GLSLstd450Asinh: + val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), + build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), + nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f)))))); + return; + case GLSLstd450Acosh: + val->ssa->def = build_log(nb, nir_fadd(nb, src[0], + nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f))))); + return; + case GLSLstd450Atanh: { + nir_ssa_def *one = nir_imm_float(nb, 1.0); + val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), + build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), + nir_fsub(nb, one, src[0])))); + return; + } + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: case GLSLstd450Atan2: - case GLSLstd450Asinh: - case GLSLstd450Acosh: - case GLSLstd450Atanh: case GLSLstd450Frexp: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: -- cgit v1.2.3 From 51b04d03d5f52b0328c70157e3b102727c7dc9af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 15:25:43 -0800 Subject: nir/dominance: Handle unreachable blocks Previously, nir_dominance.c didn't properly handle unreachable blocks. This can happen if, for instance, you have something like this: loop { if (...) { break; } else { break; } } In this case, the block right after the if statement will be unreachable. This commit makes two changes to handle this. First, it removes an assert and allows block->imm_dom to be null if the block is unreachable. Second, it properly skips unreachable blocks in calc_dom_frontier_cb. --- src/glsl/nir/nir_dominance.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c index b345b85e8a0..d95f3968074 100644 --- a/src/glsl/nir/nir_dominance.c +++ b/src/glsl/nir/nir_dominance.c @@ -94,7 +94,6 @@ calc_dominance_cb(nir_block *block, void *_state) } } - assert(new_idom); if (block->imm_dom != new_idom) { block->imm_dom = new_idom; state->progress = true; @@ -112,6 +111,11 @@ calc_dom_frontier_cb(nir_block *block, void *state) struct set_entry *entry; set_foreach(block->predecessors, entry) { nir_block *runner = (nir_block *) entry->key; + + /* Skip unreachable predecessors */ + if (runner->imm_dom == NULL) + continue; + while (runner != block->imm_dom) { _mesa_set_add(runner->dom_frontier, block); runner = runner->imm_dom; -- cgit v1.2.3 From 69d5838aee2fe80548517eaccbd3a9a6555ae99e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 15:35:29 -0800 Subject: nir/validate: Don't validate the return deref for void function calls --- src/glsl/nir/nir_validate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index e4db68db3c0..91c01ff8727 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -454,10 +454,12 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) static void validate_call_instr(nir_call_instr *instr, validate_state *state) { - if (instr->return_deref == NULL) + if (instr->return_deref == NULL) { assert(glsl_type_is_void(instr->callee->return_type)); - else + } else { assert(instr->return_deref->deref.type == instr->callee->return_type); + validate_deref_var(instr, instr->return_deref, state); + } assert(instr->num_params == instr->callee->num_params); @@ -465,8 +467,6 @@ validate_call_instr(nir_call_instr *instr, validate_state *state) assert(instr->callee->params[i].type == instr->params[i]->deref.type); validate_deref_var(instr, instr->params[i], state); } - - validate_deref_var(instr, instr->return_deref, state); } static void -- cgit v1.2.3 From 2a58cb03d04b8cff632e6be70f1bef0ec12d9f0b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 15:44:37 -0800 Subject: nir/spirv: Use instr_rewrite_src for updating phi sources You can't just add a new source to a phi because use/def information won't get updated properly. Instead, you have to use one of the core helpers. Some day, we may want to add a nir_phi_instr_add_src helper. --- src/glsl/nir/spirv/spirv_to_nir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index da586533529..b6e38704887 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3039,8 +3039,10 @@ vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); nir_phi_src *src = ralloc(phi_instr, nir_phi_src); src->pred = (nir_block *) pred; - src->src = nir_src_for_ssa(val->def); + src->src = NIR_SRC_INIT; exec_list_push_tail(&phi_instr->srcs, &src->node); + nir_instr_rewrite_src(&phi_instr->instr, &src->src, + nir_src_for_ssa(val->def)); } else { unsigned elems = glsl_get_length(phi->type); for (unsigned i = 0; i < elems; i++) -- cgit v1.2.3 From ccd84848f0ff71ccc4a257ee2628f5ebc94be3b5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 15:50:15 -0800 Subject: anv/state: Fix reversed MIN vs. MAX in levelCount handling. The point is to promote a levelCount of 0 to 1 before subtracting 1. This needs MAX, not MIN. --- src/vulkan/gen7_state.c | 4 ++-- src/vulkan/gen8_state.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index cb299a3278b..88a508a1be9 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -329,7 +329,7 @@ genX(image_view_init)(struct anv_image_view *iview, * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; + surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); @@ -369,7 +369,7 @@ genX(image_view_init)(struct anv_image_view *iview, format->surface_format); surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; + surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 199905b60dc..34c4d26b20f 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -305,7 +305,7 @@ genX(image_view_init)(struct anv_image_view *iview, * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; + surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); @@ -344,7 +344,7 @@ genX(image_view_init)(struct anv_image_view *iview, format_info->surface_format); surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MIN2(range->levelCount, 1) - 1; + surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); -- cgit v1.2.3 From 7cdcee3bed2c8a0305f24cfabe13ecb69b55e7ac Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 16:06:19 -0800 Subject: nir/spirv/glsl450: Enumerate more built-in opcodes. --- src/glsl/nir/spirv/vtn_glsl450.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index d0e98577884..820aed7f66a 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -88,6 +88,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Trunc: op = nir_op_ftrunc; break; case GLSLstd450FAbs: op = nir_op_fabs; break; case GLSLstd450FSign: op = nir_op_fsign; break; + case GLSLstd450SSign: op = nir_op_isign; break; case GLSLstd450Floor: op = nir_op_ffloor; break; case GLSLstd450Ceil: op = nir_op_fceil; break; case GLSLstd450Fract: op = nir_op_ffract; break; @@ -272,7 +273,15 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Acos: case GLSLstd450Atan: case GLSLstd450Atan2: + case GLSLstd450ModfStruct: + case GLSLstd450NMin: + case GLSLstd450NMax: + case GLSLstd450NClamp: case GLSLstd450Frexp: + case GLSLstd450FrexpStruct: + case GLSLstd450FindILsb: + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: case GLSLstd450IMix: -- cgit v1.2.3 From 8cc55780fd9041d1384f4e59ab09a28f61cf8840 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 16:57:01 -0800 Subject: nir/inline_functions: Switch to inlining everything --- src/glsl/nir/nir.h | 1 - src/glsl/nir/nir_inline_functions.c | 37 ++++++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 714edc1541c..b413b38220e 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2027,7 +2027,6 @@ bool nir_split_var_copies(nir_shader *shader); bool nir_lower_returns_impl(nir_function_impl *impl); bool nir_lower_returns(nir_shader *shader); -bool nir_inline_functions_impl(nir_function_impl *impl); bool nir_inline_functions(nir_shader *shader); void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); diff --git a/src/glsl/nir/nir_inline_functions.c b/src/glsl/nir/nir_inline_functions.c index e7e17ca7600..3cf83279053 100644 --- a/src/glsl/nir/nir_inline_functions.c +++ b/src/glsl/nir/nir_inline_functions.c @@ -26,11 +26,13 @@ #include "nir_control_flow.h" struct inline_functions_state { - nir_function_impl *impl; + struct set *inlined; nir_builder builder; bool progress; }; +static bool inline_function_impl(nir_function_impl *impl, struct set *inlined); + static bool inline_functions_block(nir_block *block, void *void_state) { @@ -54,11 +56,13 @@ inline_functions_block(nir_block *block, void *void_state) nir_call_instr *call = nir_instr_as_call(instr); assert(call->callee->impl); + inline_function_impl(call->callee->impl, state->inlined); + nir_function_impl *callee_copy = nir_function_impl_clone(call->callee->impl); - exec_list_append(&state->impl->locals, &callee_copy->locals); - exec_list_append(&state->impl->registers, &callee_copy->registers); + exec_list_append(&b->impl->locals, &callee_copy->locals); + exec_list_append(&b->impl->registers, &callee_copy->registers); b->cursor = nir_before_instr(&call->instr); @@ -104,22 +108,29 @@ inline_functions_block(nir_block *block, void *void_state) return true; } -bool -nir_inline_functions_impl(nir_function_impl *impl) +static bool +inline_function_impl(nir_function_impl *impl, struct set *inlined) { + if (_mesa_set_search(inlined, impl)) + return false; /* Already inlined */ + struct inline_functions_state state; + state.inlined = inlined; state.progress = false; - state.impl = impl; nir_builder_init(&state.builder, impl); nir_foreach_block(impl, inline_functions_block, &state); - /* SSA and register indices are completely messed up now */ - nir_index_ssa_defs(impl); - nir_index_local_regs(impl); + if (state.progress) { + /* SSA and register indices are completely messed up now */ + nir_index_ssa_defs(impl); + nir_index_local_regs(impl); + + nir_metadata_preserve(impl, nir_metadata_none); + } - nir_metadata_preserve(impl, nir_metadata_none); + _mesa_set_add(inlined, impl); return state.progress; } @@ -127,12 +138,16 @@ nir_inline_functions_impl(nir_function_impl *impl) bool nir_inline_functions(nir_shader *shader) { + struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); bool progress = false; nir_foreach_function(shader, function) { if (function->impl) - progress = nir_inline_functions_impl(function->impl) || progress; + progress = inline_function_impl(function->impl, inlined) || progress; } + _mesa_set_destroy(inlined, NULL); + return progress; } -- cgit v1.2.3 From 5693637faa4711b61dd8a1c5bc1f2bd48927673b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 16:57:21 -0800 Subject: nir/print: Handle variables with var->name == NULL --- src/glsl/nir/nir_print.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 176057ffa4b..a231494d4ad 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -232,15 +232,20 @@ get_var_name(nir_variable *var, print_state *state) return entry->data; char *name; - - struct set_entry *set_entry = _mesa_set_search(state->syms, var->name); - if (set_entry != NULL) { - /* we have a collision with another name, append an @ + a unique index */ - name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); + if (var->name == NULL) { + name = ralloc_asprintf(state->syms, "@%u", state->index++); } else { - /* Mark this one as seen */ - _mesa_set_add(state->syms, var->name); - name = var->name; + struct set_entry *set_entry = _mesa_set_search(state->syms, var->name); + if (set_entry != NULL) { + /* we have a collision with another name, append an @ + a unique + * index */ + name = ralloc_asprintf(state->syms, "%s@%u", var->name, + state->index++); + } else { + /* Mark this one as seen */ + _mesa_set_add(state->syms, var->name); + name = var->name; + } } _mesa_hash_table_insert(state->ht, var, name); -- cgit v1.2.3 From 451fe2670c0106633d765792d0852ddd3073e4a9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Dec 2015 19:23:25 -0800 Subject: nir/spirv/cfg: Handle discard --- src/glsl/nir/spirv/vtn_cfg.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 03d2c43018d..eddaa8c4672 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -470,6 +470,12 @@ vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, case vtn_branch_type_return: nir_jump(&b->nb, nir_jump_return); break; + case vtn_branch_type_discard: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + break; + } default: unreachable("Invalid branch type"); } -- cgit v1.2.3 From 91d93f79083596d761a245643647c4c0066556b3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 29 Dec 2015 23:59:38 -0800 Subject: nir/spirv: Lower gl_GlobalInvocationID correctly Use nir_intrinsic_load_local_invocation_id, not nir_intrinsic_load_invocation_id (missing 'local'), which is a geometry shader built-in. --- src/glsl/nir/nir_lower_system_values.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index 33586e6b46e..f642c38561a 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -72,7 +72,7 @@ convert_block(nir_block *block, void *void_state) nir_ssa_def *group_id = nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *local_id = - nir_load_system_value(b, nir_intrinsic_load_invocation_id, 0); + nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); sysval = nir_iadd(b, nir_imul(b, group_id, nir_build_imm(b, 3, local_size)), -- cgit v1.2.3 From a0b2829f200288a85899884019c2aea530a95c46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 10:33:09 -0800 Subject: anv/stream_alloc: Properly manage valgrind NOACCESS and UNDEFINED status When I first did the valgrindifying for stream allocators, I misunderstood some things about valgrind's expectations for NOACCESS and UNDEFINED. First off, valgrind expects things to be marked NOACCESS before you allocate out of them. Since our blocks came from a pool backed by a mmapped memfd, they came in as UNDEFINED; we needed to mark them as NOACCESS. Also, I didn't realize that VALGRIND_MEMPOOL_CHANGE only updated the mempool allocation state and didn't actually change definedness; we had to add a VALGRIND_MAKE_MEM_UNDEFINED to get rid of the NOACCESS on the newly allocated portion. --- src/vulkan/anv_allocator.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 4cff84131aa..e87538cac5c 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -682,6 +682,14 @@ struct stream_block { void *current_map; #ifdef HAVE_VALGRIND + /* The pointer pointing to the beginning of the user portion of the + * block. More specifically, this value is: + * + * current_map + ALIGN(sizeof(stream_block), first_chunk_alignment) + * + * where first_chunk_alignment is the alignment of the first chunk + * allocated out of this particular block. + */ void *_vg_ptr; #endif }; @@ -733,9 +741,13 @@ anv_state_stream_alloc(struct anv_state_stream *stream, block = anv_block_pool_alloc(stream->block_pool); void *current_map = stream->block_pool->map; sb = current_map + block; - VG_NOACCESS_WRITE(&sb->current_map, current_map); - VG_NOACCESS_WRITE(&sb->next, stream->current_block); - VG(VG_NOACCESS_WRITE(&sb->_vg_ptr, 0)); + sb->current_map = current_map; + sb->next = stream->current_block; + VG(sb->_vg_ptr = NULL); + + /* Blocks come in from the block_pool as UNDEFINED */ + VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); + stream->current_block = block; stream->next = block + sizeof(*sb); stream->end = block + stream->block_pool->block_size; @@ -759,8 +771,12 @@ anv_state_stream_alloc(struct anv_state_stream *stream, ptrdiff_t vg_offset = vg_ptr - current_map; assert(vg_offset >= stream->current_block && vg_offset < stream->end); + /* This only updates the mempool. The newly allocated chunk is still + * marked as NOACCESS. */ VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, (state.offset + size) - vg_offset); + /* Mark the newly allocated chunk as undefined */ + VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); } #endif -- cgit v1.2.3 From 28243b2fbadeb4055e1ef83c14d94d62f01fe413 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 10:37:50 -0800 Subject: gen7/8/cmd_buffer: Allocate the correct ammount for COLOR_CALC_STATE We were allocating 6 bytes when we should have been allocating 6 dwords. --- src/vulkan/gen7_cmd_buffer.c | 3 ++- src/vulkan/gen8_cmd_buffer.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 7fdef1027e2..feed3611805 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -428,7 +428,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN7_COLOR_CALC_STATE_length, 64); + GEN7_COLOR_CALC_STATE_length * 4, + 64); struct GEN7_COLOR_CALC_STATE cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index d571f9c97e3..9614da705e7 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -265,7 +265,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN8_COLOR_CALC_STATE_length, 64); + GEN8_COLOR_CALC_STATE_length * 4, + 64); struct GEN8_COLOR_CALC_STATE cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], @@ -319,7 +320,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN9_COLOR_CALC_STATE_length, 64); + GEN9_COLOR_CALC_STATE_length * 4, + 64); struct GEN9_COLOR_CALC_STATE cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], -- cgit v1.2.3 From e6fc170afb68774549e9889fcbbd94d23ffc46f6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 11:40:38 -0800 Subject: anv/allocator: Rework state streams again If we're going to hav valgrind verify state streams then we need to ensure that once we choose a pointer into a block we always use that pointer until the block is freed. I was trying to do this with the "current_map" thing. However, that breaks down because you have to use the map from the block pool to get to the stream_block to get at current_map. Instead, this commit changes things to track the stream_block by pointer instead of by offset into the block pool. --- src/vulkan/anv_allocator.c | 81 ++++++++++++++++++++++------------------------ src/vulkan/anv_private.h | 11 ++++++- 2 files changed, 48 insertions(+), 44 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index e87538cac5c..e49a684aaef 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -675,20 +675,16 @@ anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) } #define NULL_BLOCK 1 -struct stream_block { - uint32_t next; +struct anv_state_stream_block { + /* The next block */ + struct anv_state_stream_block *next; - /* The map for the BO at the time the block was givne to us */ - void *current_map; + /* The offset into the block pool at which this block starts */ + uint32_t offset; #ifdef HAVE_VALGRIND - /* The pointer pointing to the beginning of the user portion of the - * block. More specifically, this value is: - * - * current_map + ALIGN(sizeof(stream_block), first_chunk_alignment) - * - * where first_chunk_alignment is the alignment of the first chunk - * allocated out of this particular block. + /* A pointer to the first user-allocated thing in this block. This is + * what valgrind sees as the start of the block. */ void *_vg_ptr; #endif @@ -702,9 +698,13 @@ anv_state_stream_init(struct anv_state_stream *stream, struct anv_block_pool *block_pool) { stream->block_pool = block_pool; - stream->next = 0; + stream->block = NULL; + + /* Ensure that next + whatever > end. This way the first call to + * state_stream_alloc fetches a new block. + */ + stream->next = 1; stream->end = 0; - stream->current_block = NULL_BLOCK; VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); } @@ -712,17 +712,16 @@ anv_state_stream_init(struct anv_state_stream *stream, void anv_state_stream_finish(struct anv_state_stream *stream) { - struct stream_block *sb; - uint32_t block, next_block; - - block = stream->current_block; - while (block != NULL_BLOCK) { - assert(block % stream->block_pool->block_size == 0); - sb = stream->block_pool->map + block; - next_block = VG_NOACCESS_READ(&sb->next); - VG(VALGRIND_MEMPOOL_FREE(stream, VG_NOACCESS_READ(&sb->_vg_ptr))); - anv_block_pool_free(stream->block_pool, block); - block = next_block; + const uint32_t block_size = stream->block_pool->block_size; + + struct anv_state_stream_block *next = stream->block; + while (next != NULL) { + VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); + struct anv_state_stream_block sb = VG_NOACCESS_READ(next); + VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); + anv_block_pool_free(stream->block_pool, sb.offset); + next = sb.next; } VG(VALGRIND_DESTROY_MEMPOOL(stream)); @@ -732,33 +731,32 @@ struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t size, uint32_t alignment) { - struct stream_block *sb; + struct anv_state_stream_block *sb = stream->block; + struct anv_state state; - uint32_t block; state.offset = align_u32(stream->next, alignment); if (state.offset + size > stream->end) { - block = anv_block_pool_alloc(stream->block_pool); - void *current_map = stream->block_pool->map; - sb = current_map + block; - sb->current_map = current_map; - sb->next = stream->current_block; - VG(sb->_vg_ptr = NULL); + uint32_t block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; - /* Blocks come in from the block_pool as UNDEFINED */ + VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb)); + sb->next = stream->block; + sb->offset = block; + VG(sb->_vg_ptr = NULL); VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); - stream->current_block = block; + stream->block = sb; + stream->start = block; stream->next = block + sizeof(*sb); stream->end = block + stream->block_pool->block_size; + state.offset = align_u32(stream->next, alignment); assert(state.offset + size <= stream->end); } - sb = stream->block_pool->map + stream->current_block; - void *current_map = VG_NOACCESS_READ(&sb->current_map); - - state.map = current_map + state.offset; + assert(state.offset > stream->start); + state.map = (void *)sb + (state.offset - stream->start); state.alloc_size = size; #ifdef HAVE_VALGRIND @@ -768,13 +766,10 @@ anv_state_stream_alloc(struct anv_state_stream *stream, VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); } else { - ptrdiff_t vg_offset = vg_ptr - current_map; - assert(vg_offset >= stream->current_block && - vg_offset < stream->end); + void *state_end = state.map + state.alloc_size; /* This only updates the mempool. The newly allocated chunk is still * marked as NOACCESS. */ - VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, - (state.offset + size) - vg_offset); + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); /* Mark the newly allocated chunk as undefined */ VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 48992d99a71..95096748a9d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -384,10 +384,19 @@ struct anv_state_pool { struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; }; +struct anv_state_stream_block; + struct anv_state_stream { struct anv_block_pool *block_pool; + + /* The current working block */ + struct anv_state_stream_block *block; + + /* Offset at which the current block starts */ + uint32_t start; + /* Offset at which to allocate the next state */ uint32_t next; - uint32_t current_block; + /* Offset at which the current block ends */ uint32_t end; }; -- cgit v1.2.3 From 9f23116bfa4dc61b77e31361b2fe3aa2ce052611 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 30 Dec 2015 13:22:32 -0800 Subject: Revert "nir/spirv: Update to the 1.0 GLSL.std.450 header" This reverts commit b33f5d388979f23c583c78f10a18a941cb04ce04, and also removes the (empty) case statements for the new built-ins. It doesn't look like glslang has updated yet, so updating the header just breaks everything, as we no longer agree on opcode numbers. --- src/glsl/nir/spirv/GLSL.std.450.h | 89 +++++++++++++++++++-------------------- src/glsl/nir/spirv/vtn_glsl450.c | 3 -- 2 files changed, 43 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/GLSL.std.450.h b/src/glsl/nir/spirv/GLSL.std.450.h index 779d108205e..d1c9b5c1d44 100644 --- a/src/glsl/nir/spirv/GLSL.std.450.h +++ b/src/glsl/nir/spirv/GLSL.std.450.h @@ -27,8 +27,8 @@ #ifndef GLSLstd450_H #define GLSLstd450_H -const int GLSLstd450Version = 100; -const int GLSLstd450Revision = 1; +const int GLSLstd450Version = 99; +const int GLSLstd450Revision = 3; enum GLSLstd450 { GLSLstd450Bad = 0, // Don't use @@ -74,55 +74,52 @@ enum GLSLstd450 { GLSLstd450Modf = 35, // second operand needs an OpVariable to write to GLSLstd450ModfStruct = 36, // no OpVariable operand GLSLstd450FMin = 37, - GLSLstd450NMin = 38, - GLSLstd450UMin = 39, - GLSLstd450SMin = 40, - GLSLstd450FMax = 41, - GLSLstd450NMax = 42, - GLSLstd450UMax = 43, - GLSLstd450SMax = 44, - GLSLstd450FClamp = 45, - GLSLstd450NClamp = 46, - GLSLstd450UClamp = 47, - GLSLstd450SClamp = 48, - GLSLstd450FMix = 49, - GLSLstd450IMix = 50, - GLSLstd450Step = 51, - GLSLstd450SmoothStep = 52, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, - GLSLstd450Fma = 53, - GLSLstd450Frexp = 54, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 55, // no OpVariable operand - GLSLstd450Ldexp = 56, + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, - GLSLstd450PackSnorm4x8 = 57, - GLSLstd450PackUnorm4x8 = 58, - GLSLstd450PackSnorm2x16 = 59, - GLSLstd450PackUnorm2x16 = 60, - GLSLstd450PackHalf2x16 = 61, - GLSLstd450PackDouble2x32 = 62, - GLSLstd450UnpackSnorm2x16 = 63, - GLSLstd450UnpackUnorm2x16 = 64, - GLSLstd450UnpackHalf2x16 = 65, - GLSLstd450UnpackSnorm4x8 = 66, - GLSLstd450UnpackUnorm4x8 = 67, - GLSLstd450UnpackDouble2x32 = 68, + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, - GLSLstd450Length = 69, - GLSLstd450Distance = 70, - GLSLstd450Cross = 71, - GLSLstd450Normalize = 72, - GLSLstd450FaceForward = 73, - GLSLstd450Reflect = 74, - GLSLstd450Refract = 75, + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, - GLSLstd450FindILsb = 76, - GLSLstd450FindSMsb = 77, - GLSLstd450FindUMsb = 78, + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, - GLSLstd450InterpolateAtCentroid = 79, - GLSLstd450InterpolateAtSample = 80, - GLSLstd450InterpolateAtOffset = 81, + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, GLSLstd450Count }; diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 820aed7f66a..8905bdfbdde 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -274,9 +274,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Atan: case GLSLstd450Atan2: case GLSLstd450ModfStruct: - case GLSLstd450NMin: - case GLSLstd450NMax: - case GLSLstd450NClamp: case GLSLstd450Frexp: case GLSLstd450FrexpStruct: case GLSLstd450FindILsb: -- cgit v1.2.3 From a7e827192bdbc82f2e0b3c9c5cd9c17004c3f77b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 14:02:20 -0800 Subject: isl: Tile-align height in image size calculation This fixes a bunch of gpu hangs on the dEQP-VK.glsl.ShaderExecutor.common group of CTS tests. --- src/isl/isl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index df0aeed01df..75f65001a1e 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1016,7 +1016,7 @@ isl_surf_init_s(const struct isl_device *dev, array_pitch_el_rows); const uint32_t total_h_sa = total_h_el * fmtl->bh; - const uint32_t size = row_pitch * total_h_sa; + const uint32_t size = row_pitch * isl_align(total_h_sa, tile_info.height); /* Alignment of surface base address, in bytes */ uint32_t base_alignment = info->min_alignment; -- cgit v1.2.3 From e6cd0c0e1c7bd9575c723165bce393fd66c0266d Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Dec 2015 16:21:09 -0800 Subject: nir/spirv: Implement IsInf and IsNan built-ins. --- src/glsl/nir/spirv/spirv_to_nir.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index b6e38704887..b6f5dd761bb 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2706,7 +2706,14 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, unreachable("No NIR equivalent"); case SpvOpIsNan: + val->ssa->def = nir_fne(&b->nb, src[0], src[0]); + return; + case SpvOpIsInf: + val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), + nir_imm_float(&b->nb, INFINITY)); + return; + case SpvOpIsFinite: case SpvOpIsNormal: case SpvOpSignBitSet: -- cgit v1.2.3 From 07b4f17aaffdf59d9e2e6e8e7c45be09c17ea417 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 14:41:13 -0800 Subject: nir/spirv/GLSL450: Add support for SAbs --- src/glsl/nir/spirv/vtn_glsl450.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 8905bdfbdde..2d22e37de2a 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -87,6 +87,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450RoundEven: op = nir_op_fround_even; break; case GLSLstd450Trunc: op = nir_op_ftrunc; break; case GLSLstd450FAbs: op = nir_op_fabs; break; + case GLSLstd450SAbs: op = nir_op_iabs; break; case GLSLstd450FSign: op = nir_op_fsign; break; case GLSLstd450SSign: op = nir_op_isign; break; case GLSLstd450Floor: op = nir_op_ffloor; break; -- cgit v1.2.3 From 96d1baa88d37c51c94579f650cfd9465d28634f4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 30 Dec 2015 16:26:10 -0800 Subject: isl: Fix assertion failure for npot pixel formats When aligning to isl_format_layout::bs (which is the number of bytes in the pixel), use isl_align_npot() instead of isl_align(), because isl_align() works only for power-of-2 alignment. Fixes assertion in dEQP-VK.pipeline.image.view_type.1d.format.r16g16b16_sfloat.size.512x1. --- src/isl/isl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 75f65001a1e..2bf15017e2f 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -813,9 +813,9 @@ isl_calc_row_pitch(const struct isl_device *dev, */ if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align(row_pitch, fmtl->bs); + row_pitch = isl_align_npot(row_pitch, fmtl->bs); } else { - row_pitch = isl_align(row_pitch, 2 * fmtl->bs); + row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs); } } break; -- cgit v1.2.3 From 2b6bcaf91ab4f3dd7b9a31013725078a38db966d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 16:35:34 -0800 Subject: nir/spirv: Separate handling of preamble from type/var/const instructions --- src/glsl/nir/spirv/spirv_to_nir.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index b6f5dd761bb..fd7e9e8dbc1 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3319,6 +3319,38 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_decoration(b, opcode, w, count); break; + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpExtension: + case SpvOpCapability: + case SpvOpExtInstImport: + case SpvOpMemoryModel: + case SpvOpEntryPoint: + case SpvOpExecutionMode: + case SpvOpString: + case SpvOpName: + case SpvOpMemberName: + case SpvOpLine: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + assert(!"Invalid opcode types and variables section"); + break; + case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: @@ -3617,6 +3649,10 @@ spirv_to_nir(const uint32_t *words, size_t word_count, words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); + /* Handle all variable, type, and constant instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_variable_or_type_instruction); + vtn_build_cfg(b, words, word_end); foreach_list_typed(struct vtn_function, func, node, &b->functions) { -- cgit v1.2.3 From d9c9a117dc7a4f8ca562926c811e4030d95d78f7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 16:38:16 -0800 Subject: nir/spirv: Handle execution modes as decorations They're basically the same thing. --- src/glsl/nir/spirv/spirv_to_nir.c | 249 ++++++++++++++++++++++---------------- src/glsl/nir/spirv/vtn_private.h | 26 +++- 2 files changed, 168 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index fd7e9e8dbc1..6a4cc182ff7 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -242,11 +242,14 @@ _foreach_decoration_helper(struct vtn_builder *b, { for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { int member; - if (dec->member < 0) { + if (dec->scope == VTN_DEC_DECORATION) { member = parent_member; - } else { + } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { assert(parent_member == -1); - member = dec->member; + member = dec->scope - VTN_DEC_STRUCT_MEMBER0; + } else { + /* Not a decoration */ + continue; } if (dec->group) { @@ -272,6 +275,19 @@ vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, _foreach_decoration_helper(b, value, -1, value, cb, data); } +void +vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->scope != VTN_DEC_EXECUTION_MODE) + continue; + + assert(dec->group == NULL); + cb(b, value, dec, data); + } +} + static void vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -280,20 +296,30 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, const uint32_t target = w[1]; w += 2; - int member = -1; switch (opcode) { case SpvOpDecorationGroup: vtn_push_value(b, target, vtn_value_type_undef); break; + case SpvOpDecorate: case SpvOpMemberDecorate: - member = *(w++); - /* fallthrough */ - case SpvOpDecorate: { + case SpvOpExecutionMode: { struct vtn_value *val = &b->values[target]; struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->member = member; + switch (opcode) { + case SpvOpDecorate: + dec->scope = VTN_DEC_DECORATION; + break; + case SpvOpMemberDecorate: + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + break; + case SpvOpExecutionMode: + dec->scope = VTN_DEC_EXECUTION_MODE; + break; + default: + unreachable("Invalid decoration opcode"); + } dec->decoration = *(w++); dec->literals = w; @@ -304,16 +330,21 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } case SpvOpGroupMemberDecorate: - member = *(w++); - /* fallthrough */ case SpvOpGroupDecorate: { struct vtn_value *group = &b->values[target]; assert(group->value_type == vtn_value_type_decoration_group); + int scope; + if (opcode == SpvOpGroupDecorate) { + scope = VTN_DEC_DECORATION; + } else { + scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + } + for (; w < w_end; w++) { struct vtn_value *val = &b->values[*w]; struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->member = member; + dec->scope = scope; dec->group = group; /* Link into the list */ @@ -3201,100 +3232,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, b->execution_model = w[1]; break; - case SpvOpExecutionMode: - assert(b->entry_point == &b->values[w[1]]); - - SpvExecutionMode mode = w[2]; - switch(mode) { - case SpvExecutionModeOriginUpperLeft: - case SpvExecutionModeOriginLowerLeft: - b->origin_upper_left = (mode == SpvExecutionModeOriginUpperLeft); - break; - - case SpvExecutionModeEarlyFragmentTests: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.early_fragment_tests = true; - break; - - case SpvExecutionModeInvocations: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.invocations = MAX2(1, w[3]); - break; - - case SpvExecutionModeDepthReplacing: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; - break; - case SpvExecutionModeDepthGreater: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case SpvExecutionModeDepthLess: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; - break; - case SpvExecutionModeDepthUnchanged: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - - case SpvExecutionModeLocalSize: - assert(b->shader->stage == MESA_SHADER_COMPUTE); - b->shader->info.cs.local_size[0] = w[3]; - b->shader->info.cs.local_size[1] = w[4]; - b->shader->info.cs.local_size[2] = w[5]; - break; - case SpvExecutionModeLocalSizeHint: - break; /* Nothing do do with this */ - - case SpvExecutionModeOutputVertices: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.vertices_out = w[3]; - break; - - case SpvExecutionModeInputPoints: - case SpvExecutionModeInputLines: - case SpvExecutionModeInputLinesAdjacency: - case SpvExecutionModeTriangles: - case SpvExecutionModeInputTrianglesAdjacency: - case SpvExecutionModeQuads: - case SpvExecutionModeIsolines: - if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader->info.gs.vertices_in = - vertices_in_from_spv_execution_mode(mode); - } else { - assert(!"Tesselation shaders not yet supported"); - } - break; - - case SpvExecutionModeOutputPoints: - case SpvExecutionModeOutputLineStrip: - case SpvExecutionModeOutputTriangleStrip: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.output_primitive = - gl_primitive_from_spv_execution_mode(mode); - break; - - case SpvExecutionModeSpacingEqual: - case SpvExecutionModeSpacingFractionalEven: - case SpvExecutionModeSpacingFractionalOdd: - case SpvExecutionModeVertexOrderCw: - case SpvExecutionModeVertexOrderCcw: - case SpvExecutionModePointMode: - assert(!"TODO: Add tessellation metadata"); - break; - - case SpvExecutionModePixelCenterInteger: - case SpvExecutionModeXfb: - assert(!"Unhandled execution mode"); - break; - - case SpvExecutionModeVecTypeHint: - case SpvExecutionModeContractionOff: - break; /* OpenCL */ - } - break; - case SpvOpString: vtn_push_value(b, w[1], vtn_value_type_string)->str = vtn_string_literal(b, &w[2], count - 2); @@ -3311,6 +3248,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpLine: break; /* Ignored for now */ + case SpvOpExecutionMode: case SpvOpDecorationGroup: case SpvOpDecorate: case SpvOpMemberDecorate: @@ -3326,6 +3264,103 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static void +vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, + const struct vtn_decoration *mode, void *data) +{ + assert(b->entry_point == entry_point); + + switch(mode->exec_mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = + (mode->exec_mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = mode->literals[0]; + b->shader->info.cs.local_size[1] = mode->literals[1]; + b->shader->info.cs.local_size[2] = mode->literals[2]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = mode->literals[0]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode->exec_mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode->exec_mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + break; /* OpenCL */ + } +} + static bool vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -3649,6 +3684,10 @@ spirv_to_nir(const uint32_t *words, size_t word_count, words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); + /* Parse execution modes */ + vtn_foreach_execution_mode(b, b->entry_point, + vtn_handle_execution_mode, NULL); + /* Handle all variable, type, and constant instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_variable_or_type_instruction); diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 7ed62ee712b..3498215ad5e 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -263,12 +263,26 @@ struct vtn_value { }; }; +#define VTN_DEC_DECORATION -1 +#define VTN_DEC_EXECUTION_MODE -2 +#define VTN_DEC_STRUCT_MEMBER0 0 + struct vtn_decoration { struct vtn_decoration *next; - int member; /* -1 if not a member decoration */ + + /* Specifies how to apply this decoration. Negative values represent a + * decoration or execution mode. (See the VTN_DEC_ #defines above.) + * Non-negative values specify that it applies to a structure member. + */ + int scope; + const uint32_t *literals; struct vtn_value *group; - SpvDecoration decoration; + + union { + SpvDecoration decoration; + SpvExecutionMode exec_mode; + }; }; struct vtn_builder { @@ -357,5 +371,13 @@ typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, vtn_decoration_foreach_cb cb, void *data); +typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data); + bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, const uint32_t *words, unsigned count); -- cgit v1.2.3 From d7ae2200f9471c5804380746f73158c7d868c34b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 16:41:54 -0800 Subject: nir/spirv: Get rid of default GS info shaderc has been fixed for a while now. --- src/glsl/nir/spirv/spirv_to_nir.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 6a4cc182ff7..e184ef93359 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3674,12 +3674,6 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); - /* XXX: We shouldn't need these defaults */ - if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader->info.gs.vertices_in = 3; - b->shader->info.gs.output_primitive = 4; /* GL_TRIANGLES */ - } - /* Handle all the preamble instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); -- cgit v1.2.3 From db3a64fcea6318ab5ded186a347e732b7956f1f7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 16:48:59 -0800 Subject: nir/spirv: Use shader stage for determining variable locations --- src/glsl/nir/spirv/spirv_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index e184ef93359..5e8ec8f8260 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1690,10 +1690,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, vtn_foreach_decoration(b, val, var_decoration_cb, var); if (!var->data.explicit_location) { - if (b->execution_model == SpvExecutionModelFragment && + if (b->shader->stage == MESA_SHADER_FRAGMENT && var->data.mode == nir_var_shader_out) { var->data.location += FRAG_RESULT_DATA0; - } else if (b->execution_model == SpvExecutionModelVertex && + } else if (b->shader->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in) { var->data.location += VERT_ATTRIB_GENERIC0; } else if (var->data.mode == nir_var_shader_in || -- cgit v1.2.3 From e993e45eb157014fe6a7f65be8edda9fd716dc52 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 17:00:16 -0800 Subject: nir/spirv: Get the shader stage from the SPIR-V Previously, we depended on it being passed in. --- src/glsl/nir/spirv/nir_spirv.h | 1 - src/glsl/nir/spirv/spirv_to_nir.c | 36 +++++++++++++++++++++++++++--------- src/glsl/nir/spirv/vtn_private.h | 2 +- src/glsl/nir/spirv2nir.c | 3 +-- src/vulkan/anv_pipeline.c | 3 ++- 5 files changed, 31 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/nir_spirv.h b/src/glsl/nir/spirv/nir_spirv.h index 1f09174ad7f..3254f10a88d 100644 --- a/src/glsl/nir/spirv/nir_spirv.h +++ b/src/glsl/nir/spirv/nir_spirv.h @@ -37,7 +37,6 @@ extern "C" { #endif nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, - gl_shader_stage stage, const nir_shader_compiler_options *options); #ifdef __cplusplus diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 5e8ec8f8260..9caba9746e6 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3207,10 +3207,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, switch ((SpvCapability)w[1]) { case SpvCapabilityMatrix: case SpvCapabilityShader: - /* All shaders support these */ - break; case SpvCapabilityGeometry: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); break; default: assert(!"Unsupported capability"); @@ -3647,9 +3644,29 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } +static gl_shader_stage +stage_for_execution_model(SpvExecutionModel model) +{ + switch (model) { + case SpvExecutionModelVertex: + return MESA_SHADER_VERTEX; + case SpvExecutionModelTessellationControl: + return MESA_SHADER_TESS_CTRL; + case SpvExecutionModelTessellationEvaluation: + return MESA_SHADER_TESS_EVAL; + case SpvExecutionModelGeometry: + return MESA_SHADER_GEOMETRY; + case SpvExecutionModelFragment: + return MESA_SHADER_FRAGMENT; + case SpvExecutionModelGLCompute: + return MESA_SHADER_COMPUTE; + default: + unreachable("Unsupported execution model"); + } +} + nir_shader * spirv_to_nir(const uint32_t *words, size_t word_count, - gl_shader_stage stage, const nir_shader_compiler_options *options) { const uint32_t *word_end = words + word_count; @@ -3665,11 +3682,8 @@ spirv_to_nir(const uint32_t *words, size_t word_count, words+= 5; - nir_shader *shader = nir_shader_create(NULL, stage, options); - /* Initialize the stn_builder object */ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); - b->shader = shader; b->value_id_bound = value_id_bound; b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); @@ -3678,6 +3692,10 @@ spirv_to_nir(const uint32_t *words, size_t word_count, words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); + gl_shader_stage stage = stage_for_execution_model(b->execution_model); + nir_shader *shader = nir_shader_create(NULL, stage, options); + b->shader = shader; + /* Parse execution modes */ vtn_foreach_execution_mode(b, b->entry_point, vtn_handle_execution_mode, NULL); @@ -3699,12 +3717,12 @@ spirv_to_nir(const uint32_t *words, size_t word_count, vtn_handle_phi_second_pass); } + ralloc_free(b); + /* Because we can still have output reads in NIR, we need to lower * outputs to temporaries before we are truely finished. */ nir_lower_outputs_to_temporaries(shader); - ralloc_free(b); - return shader; } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 3498215ad5e..318f60804cc 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -317,9 +317,9 @@ struct vtn_builder { unsigned value_id_bound; struct vtn_value *values; + struct vtn_value *entry_point; SpvExecutionModel execution_model; bool origin_upper_left; - struct vtn_value *entry_point; struct vtn_function *func; struct exec_list functions; diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c index 6cf891517c7..f86825bedc5 100644 --- a/src/glsl/nir/spirv2nir.c +++ b/src/glsl/nir/spirv2nir.c @@ -49,7 +49,6 @@ int main(int argc, char **argv) const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); assert(map != NULL); - nir_shader *shader = spirv_to_nir(map, MESA_SHADER_FRAGMENT, - word_count, NULL); + nir_shader *shader = spirv_to_nir(map, word_count, NULL); nir_print_shader(shader, stderr); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index c17809c519f..0f7835ea84f 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -108,7 +108,8 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(spirv[0] == SPIR_V_MAGIC_NUMBER); assert(module->size % 4 == 0); - nir = spirv_to_nir(spirv, module->size / 4, stage, nir_options); + nir = spirv_to_nir(spirv, module->size / 4, nir_options); + assert(nir->stage == stage); nir_validate_shader(nir); nir_lower_returns(nir); -- cgit v1.2.3 From 0fe4580e64f01d86fb48cdba665ede4e54200658 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 17:17:12 -0800 Subject: nir/spirv: Add support for multiple entrypoints per shader This is done by passing the entrypoint name into spirv_to_nir. It will then process the shader as if that were the only entrypoint we care about. Instead of returning a nir_shader, it now returns a nir_function. --- src/glsl/nir/spirv/nir_spirv.h | 5 +++-- src/glsl/nir/spirv/spirv_to_nir.c | 28 ++++++++++++++++++++++------ src/glsl/nir/spirv/vtn_private.h | 1 + src/glsl/nir/spirv2nir.c | 4 ++-- src/vulkan/anv_pipeline.c | 22 ++++++++++------------ 5 files changed, 38 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/nir_spirv.h b/src/glsl/nir/spirv/nir_spirv.h index 3254f10a88d..506bd981101 100644 --- a/src/glsl/nir/spirv/nir_spirv.h +++ b/src/glsl/nir/spirv/nir_spirv.h @@ -36,8 +36,9 @@ extern "C" { #endif -nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, - const nir_shader_compiler_options *options); +nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, + const char *entry_point_name, + const nir_shader_compiler_options *options); #ifdef __cplusplus } diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 9caba9746e6..5b31f7e7e2a 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3223,11 +3223,16 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, assert(w[2] == SpvMemoryModelGLSL450); break; - case SpvOpEntryPoint: + case SpvOpEntryPoint: { + char *name = vtn_string_literal(b, &w[3], count - 3); + if (strcmp(name, b->entry_point_name) != 0) + break; + assert(b->entry_point == NULL); b->entry_point = &b->values[w[2]]; b->execution_model = w[1]; break; + } case SpvOpString: vtn_push_value(b, w[1], vtn_value_type_string)->str = @@ -3665,8 +3670,9 @@ stage_for_execution_model(SpvExecutionModel model) } } -nir_shader * +nir_function * spirv_to_nir(const uint32_t *words, size_t word_count, + const char *entry_point_name, const nir_shader_compiler_options *options) { const uint32_t *word_end = words + word_count; @@ -3687,14 +3693,20 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->value_id_bound = value_id_bound; b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); + b->entry_point_name = entry_point_name; /* Handle all the preamble instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_preamble_instruction); + if (b->entry_point == NULL) { + assert(!"Entry point not found"); + ralloc_free(b); + return NULL; + } + gl_shader_stage stage = stage_for_execution_model(b->execution_model); - nir_shader *shader = nir_shader_create(NULL, stage, options); - b->shader = shader; + b->shader = nir_shader_create(NULL, stage, options); /* Parse execution modes */ vtn_foreach_execution_mode(b, b->entry_point, @@ -3717,12 +3729,16 @@ spirv_to_nir(const uint32_t *words, size_t word_count, vtn_handle_phi_second_pass); } + assert(b->entry_point->value_type == vtn_value_type_function); + nir_function *entry_point = b->entry_point->func->impl->function; + assert(entry_point); + ralloc_free(b); /* Because we can still have output reads in NIR, we need to lower * outputs to temporaries before we are truely finished. */ - nir_lower_outputs_to_temporaries(shader); + nir_lower_outputs_to_temporaries(entry_point->shader); - return shader; + return entry_point; } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 318f60804cc..2af0e357acd 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -317,6 +317,7 @@ struct vtn_builder { unsigned value_id_bound; struct vtn_value *values; + const char *entry_point_name; struct vtn_value *entry_point; SpvExecutionModel execution_model; bool origin_upper_left; diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c index f86825bedc5..db56d09c98d 100644 --- a/src/glsl/nir/spirv2nir.c +++ b/src/glsl/nir/spirv2nir.c @@ -49,6 +49,6 @@ int main(int argc, char **argv) const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); assert(map != NULL); - nir_shader *shader = spirv_to_nir(map, word_count, NULL); - nir_print_shader(shader, stderr); + nir_function *func = spirv_to_nir(map, word_count, "main", NULL); + nir_print_shader(func->shader, stderr); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 0f7835ea84f..12430f824ad 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -96,6 +96,7 @@ anv_shader_compile_to_nir(struct anv_device *device, compiler->glsl_compiler_options[stage].NirOptions; nir_shader *nir; + nir_function *entry_point; if (module->nir) { /* Some things such as our meta clear/blit code will give us a NIR * shader directly. In that case, we just ignore the SPIR-V entirely @@ -103,12 +104,18 @@ anv_shader_compile_to_nir(struct anv_device *device, nir = module->nir; nir->options = nir_options; nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); } else { uint32_t *spirv = (uint32_t *) module->data; assert(spirv[0] == SPIR_V_MAGIC_NUMBER); assert(module->size % 4 == 0); - nir = spirv_to_nir(spirv, module->size / 4, nir_options); + entry_point = spirv_to_nir(spirv, module->size / 4, entrypoint_name, + nir_options); + nir = entry_point->shader; assert(nir->stage == stage); nir_validate_shader(nir); @@ -126,24 +133,15 @@ anv_shader_compile_to_nir(struct anv_device *device, nir->info.separate_shader = true; /* Pick off the single entrypoint that we want */ - nir_function_impl *entrypoint = NULL; foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (strcmp(entrypoint_name, func->name) != 0) { - /* Not our function, get rid of it */ + if (func != entry_point) exec_node_remove(&func->node); - continue; - } - - assert(entrypoint == NULL); - assert(func->impl); - entrypoint = func->impl; } assert(exec_list_length(&nir->functions) == 1); - assert(entrypoint != NULL); nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); - nir_shader_gather_info(nir, entrypoint); + nir_shader_gather_info(nir, entry_point->impl); return nir; } -- cgit v1.2.3 From 5f7f88524c808e05320cb438b76ccb6ef2cc0ebf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 17:31:19 -0800 Subject: nir/lower_outputs_to_temporaries: Take a nir_function entrypoint --- src/glsl/nir/glsl_to_nir.cpp | 11 ++++++++++- src/glsl/nir/nir.h | 5 ++--- src/glsl/nir/nir_lower_outputs_to_temporaries.c | 4 ++-- src/glsl/nir/spirv/spirv_to_nir.c | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index cadcea50346..3cc42d229a0 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -145,7 +145,16 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, v2.run(sh->ir); visit_exec_list(sh->ir, &v1); - nir_lower_outputs_to_temporaries(shader); + nir_function *main = NULL; + nir_foreach_function(shader, func) { + if (strcmp(func->name, "main") == 0) { + main = func; + break; + } + } + assert(main); + + nir_lower_outputs_to_temporaries(shader, main); shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); if (shader_prog->Label) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index b413b38220e..a050be4d53b 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2036,9 +2036,8 @@ bool nir_lower_global_vars_to_local(nir_shader *shader); bool nir_lower_locals_to_regs(nir_shader *shader); -void nir_lower_outputs_to_temporaries(nir_shader *shader); - -void nir_lower_outputs_to_temporaries(nir_shader *shader); +void nir_lower_outputs_to_temporaries(nir_shader *shader, + nir_function *entrypoint); void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c index 71b06b81fcc..70d85138552 100644 --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c @@ -74,7 +74,7 @@ emit_output_copies_block(nir_block *block, void *state) } void -nir_lower_outputs_to_temporaries(nir_shader *shader) +nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint) { struct lower_outputs_state state; @@ -114,7 +114,7 @@ nir_lower_outputs_to_temporaries(nir_shader *shader) * before each EmitVertex call. */ nir_foreach_block(function->impl, emit_output_copies_block, &state); - } else if (strcmp(function->name, "main") == 0) { + } else if (function == entrypoint) { /* For all other shader types, we need to do the copies right before * the jumps to the end block. */ diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 5b31f7e7e2a..c4b0c50c52e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3738,7 +3738,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, /* Because we can still have output reads in NIR, we need to lower * outputs to temporaries before we are truely finished. */ - nir_lower_outputs_to_temporaries(entry_point->shader); + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); return entry_point; } -- cgit v1.2.3 From 149f35bbba1b86db112f9d8a71000760cf7fb54c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 17:31:51 -0800 Subject: nir/spirv: Let OpEntryPoint act as an OpName --- src/glsl/nir/spirv/spirv_to_nir.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index c4b0c50c52e..e5c99cd7bf4 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3223,16 +3223,17 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, assert(w[2] == SpvMemoryModelGLSL450); break; - case SpvOpEntryPoint: { - char *name = vtn_string_literal(b, &w[3], count - 3); - if (strcmp(name, b->entry_point_name) != 0) + case SpvOpEntryPoint: + /* Let this be a name label regardless */ + b->values[w[2]].name = vtn_string_literal(b, &w[3], count - 3); + + if (strcmp(b->values[w[2]].name, b->entry_point_name) != 0) break; assert(b->entry_point == NULL); b->entry_point = &b->values[w[2]]; b->execution_model = w[1]; break; - } case SpvOpString: vtn_push_value(b, w[1], vtn_value_type_string)->str = -- cgit v1.2.3 From 5afac62b28ad3061928d623727a802f975eaae46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 17:32:03 -0800 Subject: nir/spirv: Handle OpLine --- src/glsl/nir/spirv/spirv_to_nir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index e5c99cd7bf4..1118d75351d 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3248,9 +3248,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, /* TODO */ break; - case SpvOpLine: - break; /* Ignored for now */ - case SpvOpExecutionMode: case SpvOpDecorationGroup: case SpvOpDecorate: @@ -3380,7 +3377,6 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpString: case SpvOpName: case SpvOpMemberName: - case SpvOpLine: case SpvOpDecorationGroup: case SpvOpDecorate: case SpvOpMemberDecorate: @@ -3389,6 +3385,9 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, assert(!"Invalid opcode types and variables section"); break; + case SpvOpLine: + break; /* Ignored for now */ + case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: @@ -3440,6 +3439,9 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { + case SpvOpLine: + break; /* Ignored for now */ + case SpvOpLabel: { struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; assert(block->block == nir_cursor_current_block(b->nb.cursor)); -- cgit v1.2.3 From fed98df4286452aa3b6a9a950fca658a752fd31d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 17:43:00 -0800 Subject: nir/gather_info: Add support for end_primitive_with_counter --- src/glsl/nir/nir_gather_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/nir_gather_info.c b/src/glsl/nir/nir_gather_info.c index 18c8e3649dc..b84915c2d2b 100644 --- a/src/glsl/nir/nir_gather_info.c +++ b/src/glsl/nir/nir_gather_info.c @@ -50,6 +50,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) break; case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: assert(shader->stage == MESA_SHADER_GEOMETRY); shader->info.gs.uses_end_primitive = 1; break; -- cgit v1.2.3 From 7d57528233b8c3f87834221a12db9c3563d011e1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 18:44:19 -0800 Subject: nir/clone: Expose nir_constant_clone --- src/glsl/nir/nir.h | 1 + src/glsl/nir/nir_clone.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index a050be4d53b..674064c0e20 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1994,6 +1994,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp); nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); nir_function_impl *nir_function_impl_clone(const nir_function_impl *impl); +nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c index 116c4b8a7d6..0ea8da77421 100644 --- a/src/glsl/nir/nir_clone.c +++ b/src/glsl/nir/nir_clone.c @@ -109,8 +109,8 @@ remap_var(clone_state *state, const nir_variable *var) return _lookup_ptr(state, var, var->data.mode != nir_var_local); } -static nir_constant * -clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) +nir_constant * +nir_constant_clone(const nir_constant *c, nir_variable *nvar) { nir_constant *nc = ralloc(nvar, nir_constant); @@ -118,7 +118,7 @@ clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) nc->num_elements = c->num_elements; nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); for (unsigned i = 0; i < c->num_elements; i++) { - nc->elements[i] = clone_constant(state, c->elements[i], nvar); + nc->elements[i] = nir_constant_clone(c->elements[i], nvar); } return nc; @@ -142,7 +142,7 @@ clone_variable(clone_state *state, const nir_variable *var) var->num_state_slots * sizeof(nir_state_slot)); if (var->constant_initializer) { nvar->constant_initializer = - clone_constant(state, var->constant_initializer, nvar); + nir_constant_clone(var->constant_initializer, nvar); } nvar->interface_type = var->interface_type; -- cgit v1.2.3 From 601b7d5f985b9831ddfeab268b28d14e1c7ea1b7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 18:51:06 -0800 Subject: nir/lower_outputs_to_temporaries: Reparent constant initializers --- src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c index 70d85138552..00ac09114cf 100644 --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c @@ -97,6 +97,9 @@ nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint) /* Reparent the name to the new variable */ ralloc_steal(output, output->name); + /* Reparent the constant initializer (if any) */ + ralloc_steal(output, output->constant_initializer); + /* Give the output a new name with @out-temp appended */ temp->name = ralloc_asprintf(var, "%s@out-temp", output->name); temp->data.mode = nir_var_global; -- cgit v1.2.3 From cf6ce424e0405392fa92a41ace5d601b343a0f15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 18:51:57 -0800 Subject: nir/spirv: Fix constant num_elements and allocation Thanks to the addition of nir_clone, we now have a num_elements field in nir_constant which we weren't setting. Also, constants have to be parented to the variable they initialize, so we have to make a copy. --- src/glsl/nir/spirv/spirv_to_nir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 1118d75351d..7a017ee38a6 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -741,7 +741,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->constant = ralloc(b, nir_constant); + val->constant = rzalloc(b, nir_constant); switch (opcode) { case SpvOpConstantTrue: assert(val->const_type == glsl_bool_type()); @@ -784,6 +784,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, case GLSL_TYPE_STRUCT: case GLSL_TYPE_ARRAY: ralloc_steal(val->constant, elems); + val->constant->num_elements = elem_count; val->constant->elements = elems; break; @@ -990,7 +991,7 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, var->data.mode = nir_var_global; var->data.read_only = true; - nir_constant *val = ralloc(var, nir_constant); + nir_constant *val = rzalloc(var, nir_constant); val->value.u[0] = b->shader->info.cs.local_size[0]; val->value.u[1] = b->shader->info.cs.local_size[1]; val->value.u[2] = b->shader->info.cs.local_size[2]; @@ -1674,8 +1675,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, if (count > 4) { assert(count == 5); - var->constant_initializer = + nir_constant *constant = vtn_value(b, w[4], vtn_value_type_constant)->constant; + var->constant_initializer = nir_constant_clone(constant, var); } val->deref = nir_deref_var_create(b, var); -- cgit v1.2.3 From 3421ba1843e67109daf17711a2d411444ad62b13 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 19:32:41 -0800 Subject: anv/device: Place memory types at heapIndex == 0 Previously, they were at heapIndex == 1 even though we only advertised one heap. --- src/vulkan/anv_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 88515c353ee..88a4b8d067d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -568,7 +568,7 @@ void anv_GetPhysicalDeviceMemoryProperties( VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 1, + .heapIndex = 0, }; } else { /* The spec requires that we expose a host-visible, coherent memory @@ -581,13 +581,13 @@ void anv_GetPhysicalDeviceMemoryProperties( .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 1, + .heapIndex = 0, }; pMemoryProperties->memoryTypes[1] = (VkMemoryType) { .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 1, + .heapIndex = 0, }; } -- cgit v1.2.3 From 0bb103d0104da78c31b982e62278c201a416a68b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 20:54:27 -0800 Subject: nir/spirv: Handle push constants after decorations --- src/glsl/nir/spirv/spirv_to_nir.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 7a017ee38a6..8527c8b232f 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1599,6 +1599,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + SpvStorageClass storage_class = w[3]; nir_variable *var = rzalloc(b->shader, nir_variable); @@ -1618,7 +1619,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, if (interface_type) var->interface_type = interface_type->type; - switch ((SpvStorageClass)w[3]) { + switch (storage_class) { case SpvStorageClassUniform: case SpvStorageClassUniformConstant: if (interface_type && interface_type->buffer_block) { @@ -1704,6 +1705,18 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } } + /* XXX: Work around what appears to be a glslang bug. While the + * SPIR-V spec doesn't say that setting a descriptor set on a push + * constant is invalid, it certainly makes no sense. However, at + * some point, glslang started setting descriptor set 0 on push + * constants for some unknown reason. Hopefully this can be removed + * at some point in the future. + */ + if (storage_class == SpvStorageClassPushConstant) { + var->data.descriptor_set = -1; + var->data.binding = -1; + } + /* Interface block variables aren't actually going to be referenced * by the generated NIR, so we don't put them in the list */ -- cgit v1.2.3 From 4f9a211b4a036c3d5854e99ddbebb4bc0ef327df Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 21:01:55 -0800 Subject: Revert "isl: Fix assertion failure for npot pixel formats" This reverts commit 96d1baa88d37c51c94579f650cfd9465d28634f4. --- src/isl/isl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 2bf15017e2f..75f65001a1e 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -813,9 +813,9 @@ isl_calc_row_pitch(const struct isl_device *dev, */ if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align_npot(row_pitch, fmtl->bs); + row_pitch = isl_align(row_pitch, fmtl->bs); } else { - row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs); + row_pitch = isl_align(row_pitch, 2 * fmtl->bs); } } break; -- cgit v1.2.3 From 1ddcbbf05f051365345bec465054efcafb579bdb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Dec 2015 21:57:04 -0800 Subject: nir/spirv: Add a missing break statement in handle_image --- src/glsl/nir/spirv/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 8527c8b232f..681adc74d1f 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2273,6 +2273,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, } else { image.sample = nir_ssa_undef(&b->nb, 1); } + break; default: unreachable("Invalid image opcode"); -- cgit v1.2.3 From 373fd89e4bc7c36c5df2dda16f148ccf55197635 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 31 Dec 2015 11:55:48 -0800 Subject: isl: Document the 3D block extent of isl_format --- src/isl/isl.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 22fc7540ac7..2f5a6ffe1c4 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -571,6 +571,13 @@ struct isl_channel_layout { uint8_t bits; /**< Size in bits */ }; +/** + * Each format has 3D block extent (width, height, depth). The block extent + * of compressed formats is that of the format's compression block. For + * example, the block extent of ISL_FORMAT_ETC2_RGB8 is (w=4, h=4, d=1). + * The block extent of uncompressed pixel formats, such as + * ISL_FORMAT_R8G8B8A8_UNORM, is is (w=1, h=1, d=1). + */ struct isl_format_layout { enum isl_format format; -- cgit v1.2.3 From d25cff687ba1cac75cdd59eb726da1d6c13035be Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 31 Dec 2015 11:56:13 -0800 Subject: isl: Better document surface units Logical pixels, physical surface samples, and physical surface elements. Requested-by: Jason Ekstrand --- src/isl/isl.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 2f5a6ffe1c4..22435e923b9 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -39,7 +39,14 @@ * Surface Units * ============= * - * Some symbol names have a unit suffix. + * Intro + * ----- + * ISL takes care in its equations to correctly handle conversion among + * surface units (such as pixels and compression blocks) and to carefully + * distinguish between a surface's logical layout in the client API and its + * physical layout in memory. + * + * Symbol names often explicitly declare their unit with a suffix: * * - px: logical pixels * - sa: physical surface samples @@ -47,7 +54,58 @@ * - sa_rows: rows of physical surface samples * - el_rows: rows of physical surface elements * - * The Broadwell PRM [1] defines a surface element as follows: + * Logical units are independent of hardware generation and are closely + * related to the user-facing API (OpenGL and Vulkan). Physical units are + * dependent on hardware generation and reflect the surface's layout in + * memory. + * + * Definitions + * ----------- + * - Logical Pixels (px): + * + * The surface's layout from the perspective of the client API (OpenGL and + * Vulkan) is in units of logical pixels. Logical pixels are independent of + * the surface's layout in memory. + * + * A surface's width and height, in units of logical pixels, is not affected + * by the surface's sample count. For example, consider a VkImage created + * with VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's + * width and height at level 0 is, in units of logical pixels, w0 and h0 + * regardless of the value of s0. + * + * For example, the logical array length of a 3D surface is always 1, even + * on Gen9 where the surface's memory layout is that of an array surface + * (ISL_DIM_LAYOUT_GEN4_2D). + * + * - Physical Surface Samples (sa): + * + * For a multisampled surface, this unit has the obvious meaning. + * A singlesampled surface, from ISL's perspective, is simply a multisampled + * surface whose sample count is 1. + * + * For example, consider a 2D single-level non-array surface with samples=4, + * width_px=64, and height_px=64 (note that the suffix 'px' indicates + * logical pixels). If the surface's multisample layout is + * ISL_MSAA_LAYOUT_INTERLEAVED, then the extent of level 0 is, in units of + * physical surface samples, width_sa=128, height_sa=128, depth_sa=1, + * array_length_sa=1. If ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, + * height_sa=64, depth_sa=1, array_length_sa=4. + * + * - Physical Surface Elements (el): + * + * This unit allows ISL to treat compressed and uncompressed formats + * identically in many calculations. + * + * If the surface's pixel format is compressed, such as ETC2, then a surface + * element is equivalent to a compression block. If uncompressed, then + * a surface element is equivalent to a surface sample. As a corollary, for + * a given surface a surface element is at least as large as a surface + * sample. + * + * Errata + * ------ + * ISL acquired the term 'element' from the Broadwell PRM [1], which defines + * a surface element as follows: * * An element is defined as a pixel in uncompresed surface formats, and as * a compression block in compressed surface formats. For -- cgit v1.2.3 From c6364495b24531cf2feb9fab356e16d028fd879e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 11:58:24 -0800 Subject: anv/pipeline: Move vk_to_gen tables into a shared header --- src/vulkan/gen7_pipeline.c | 91 +------------------------------- src/vulkan/gen8_pipeline.c | 91 +------------------------------- src/vulkan/genX_pipeline_util.h | 111 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 178 deletions(-) create mode 100644 src/vulkan/genX_pipeline_util.h (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 0ac93b9055c..9d83c76d5f2 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -32,6 +32,8 @@ #include "gen7_pack.h" #include "gen75_pack.h" +#include "genX_pipeline_util.h" + static void emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, @@ -109,24 +111,6 @@ emit_vertex_input(struct anv_pipeline *pipeline, } } -static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH -}; - -static const uint32_t vk_to_gen_fillmode[] = { - [VK_POLYGON_MODE_FILL] = RASTER_SOLID, - [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, - [VK_POLYGON_MODE_POINT] = RASTER_POINT, -}; - -static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, - [VK_FRONT_FACE_CLOCKWISE] = 0 -}; - static void gen7_emit_rs_state(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *info, @@ -168,77 +152,6 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); } -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, - [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, - [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, - [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, - [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, - [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, - [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, - [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, -}; - -static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, -}; - -static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, -}; - -static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, -}; - static void gen7_emit_ds_state(struct anv_pipeline *pipeline, const VkPipelineDepthStencilStateCreateInfo *info) diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 20cd9028093..5ecc8cfdf0e 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -32,6 +32,8 @@ #include "gen8_pack.h" #include "gen9_pack.h" +#include "genX_pipeline_util.h" + static void emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, @@ -121,24 +123,6 @@ emit_rs_state(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH - }; - - static const uint32_t vk_to_gen_fillmode[] = { - [VK_POLYGON_MODE_FILL] = RASTER_SOLID, - [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, - [VK_POLYGON_MODE_POINT] = RASTER_POINT, - }; - - static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, - [VK_FRONT_FACE_CLOCKWISE] = 0 - }; - struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), .ViewportTransformEnable = !(extra && extra->disable_viewport), @@ -179,55 +163,6 @@ emit_cb_state(struct anv_pipeline *pipeline, { struct anv_device *device = pipeline->device; - static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, - }; - - static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, - }; - - static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, - }; - uint32_t num_dwords = GENX(BLEND_STATE_length); pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); @@ -293,28 +228,6 @@ emit_cb_state(struct anv_pipeline *pipeline, .BlendStatePointerValid = true); } -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, - [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, - [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, - [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, - [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, - [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, - [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, - [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, -}; - static void emit_ds_state(struct anv_pipeline *pipeline, const VkPipelineDepthStencilStateCreateInfo *info) diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h new file mode 100644 index 00000000000..fd294f8befc --- /dev/null +++ b/src/vulkan/genX_pipeline_util.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_POLYGON_MODE_FILL] = RASTER_SOLID, + [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, + [VK_POLYGON_MODE_POINT] = RASTER_POINT, +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, +}; -- cgit v1.2.3 From 5318424d4999b49cca84b11bc4d2eb70a7444fb8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 12:00:58 -0800 Subject: anv/pipeline: Better vertex input channel setup First off, it now uses isl formats instead of anv_format. Also, it properly handles integer vs. floating-point default channels and can properly handle alpha-only channels. (Not sure if those are allowed). --- src/vulkan/gen7_pipeline.c | 13 +++++++------ src/vulkan/gen8_pipeline.c | 13 +++++++------ src/vulkan/genX_pipeline_util.h | 26 ++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 9d83c76d5f2..f4070743710 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -73,7 +73,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; - const struct anv_format *format = anv_format_for_vk_format(desc->format); + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT); assert(desc->binding < 32); @@ -85,13 +86,13 @@ emit_vertex_input(struct anv_pipeline *pipeline, struct GEN7_VERTEX_ELEMENT_STATE element = { .VertexBufferIndex = desc->binding, .Valid = true, - .SourceElementFormat = format->surface_format, + .SourceElementFormat = format, .EdgeFlagEnable = false, .SourceElementOffset = desc->offset, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), }; GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + slot * 2], &element); } diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 5ecc8cfdf0e..ae8ab404746 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -69,7 +69,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; - const struct anv_format *format = anv_format_for_vk_format(desc->format); + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT); assert(desc->binding < 32); @@ -81,13 +82,13 @@ emit_vertex_input(struct anv_pipeline *pipeline, struct GENX(VERTEX_ELEMENT_STATE) element = { .VertexBufferIndex = desc->binding, .Valid = true, - .SourceElementFormat = format->surface_format, + .SourceElementFormat = format, .EdgeFlagEnable = false, .SourceElementOffset = desc->offset, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), }; GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index fd294f8befc..08fe6aac6a4 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -21,6 +21,32 @@ * IN THE SOFTWARE. */ +static uint32_t +vertex_element_comp_control(enum isl_format format, unsigned comp) +{ + uint8_t bits; + switch (comp) { + case 0: bits = isl_format_layouts[format].channels.r.bits; break; + case 1: bits = isl_format_layouts[format].channels.g.bits; break; + case 2: bits = isl_format_layouts[format].channels.b.bits; break; + case 3: bits = isl_format_layouts[format].channels.a.bits; break; + default: unreachable("Invalid component"); + } + + if (bits) { + return VFCOMP_STORE_SRC; + } else if (comp < 3) { + return VFCOMP_STORE_0; + } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || + isl_format_layouts[format].channels.r.type == ISL_SINT) { + assert(comp == 3); + return VFCOMP_STORE_1_INT; + } else { + assert(comp == 3); + return VFCOMP_STORE_1_FP; + } +} + static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, -- cgit v1.2.3 From 86ecb28ec6edd662775aa48513c013cadf4ddd6b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 31 Dec 2015 12:02:04 -0800 Subject: isl: Document some isl_surf::phys_level0_sa invariants isl_dim_layout restricts the range of isl_surf::phys_level0_sa. --- src/isl/isl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 22435e923b9..68d05e37186 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -492,12 +492,16 @@ enum isl_dim_layout { * * One-dimensional surfaces are identical to 2D surfaces with height of * one. + * + * @invariant isl_surf::phys_level0_sa::depth == 1 */ ISL_DIM_LAYOUT_GEN4_2D, /** * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section * 6.17.5: 3D Surfaces. + * + * @invariant isl_surf::phys_level0_sa::array_len == 1 */ ISL_DIM_LAYOUT_GEN4_3D, -- cgit v1.2.3 From 3fe1f118f8f5ad61d4dd58a7568627790ff31348 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 12:07:10 -0800 Subject: anv/cmd_buffer: Fix a pointer-cast typo --- src/vulkan/anv_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 1eaa3df633c..2f19c75e7cc 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -538,7 +538,7 @@ void anv_CmdBindDescriptorSets( for (unsigned j = 0; j < array_size; j++) { uint32_t range = 0; if (desc->buffer_view) - range = desc->buffer_view; + range = desc->buffer_view->range; push->dynamic[d].offset = *(offsets++); push->dynamic[d].range = range; desc++; -- cgit v1.2.3 From 6b5cbdb317c770b7cf767bc95b3ed79f01b10cf9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 12:07:23 -0800 Subject: anv/format: Get rid of num_channels --- src/vulkan/anv_formats.c | 218 +++++++++++++++++++++++------------------------ src/vulkan/anv_private.h | 1 - 2 files changed, 109 insertions(+), 110 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 9a895e05432..6465fabd5b5 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -36,7 +36,7 @@ } static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW, .num_channels = 1), + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), @@ -45,115 +45,115 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM, .num_channels = 1), - fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM, .num_channels = 1), - fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED, .num_channels = 1), - fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED, .num_channels = 1), - fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT, .num_channels = 1), - fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT, .num_channels = 1), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM, .num_channels = 2), - fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED, .num_channels = 2), - fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT, .num_channels = 2), - fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8X8_UNORM, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT, .num_channels = 3), - fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8X8_UNORM), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT, .num_channels = 4), - fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT, .num_channels = 4), - fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT, .num_channels = 4), - fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT, .num_channels = 4), - fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT, .num_channels = 4), - fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1), - fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM, .num_channels = 1), - fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED, .num_channels = 1), - fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED, .num_channels = 1), - fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT, .num_channels = 1), - fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT, .num_channels = 1), - fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT, .num_channels = 1), - fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM, .num_channels = 2), - fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED, .num_channels = 2), - fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT, .num_channels = 2), - fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT, .num_channels = 2), - fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT, .num_channels = 3), - fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT, .num_channels = 4), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT, .num_channels = 4), - fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT, .num_channels = 1,), - fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT, .num_channels = 1,), - fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT, .num_channels = 1,), - fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT, .num_channels = 2,), - fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT, .num_channels = 2,), - fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT, .num_channels = 3,), - fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT, .num_channels = 4,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT, .num_channels = 4,), - fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU, .num_channels = 1), - fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU, .num_channels = 1), - fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT, .num_channels = 1), - fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU, .num_channels = 2), - fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU, .num_channels = 2), - fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT, .num_channels = 2), - fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT, .num_channels = 3), - fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU, .num_channels = 4), - fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU, .num_channels = 4), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT, .num_channels = 4), - fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT, .num_channels = 3), - fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP, .num_channels = 3), - - fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .num_channels = 1, .depth_format = D16_UNORM), - fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), - fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .num_channels = 1, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .num_channels = 1, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), + fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), + fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), + fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), + fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), + fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), + fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), + fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), + fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), + fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), + fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .depth_format = D16_UNORM), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT, .has_stencil = true), fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), @@ -216,13 +216,13 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), }; #undef fmt diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 95096748a9d..44d537ac5a8 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1401,7 +1401,6 @@ struct anv_format { const char *name; enum isl_format surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ const struct isl_format_layout *isl_layout; - uint8_t num_channels; uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ bool has_stencil; }; -- cgit v1.2.3 From f076d5330dca4b7a882de17853d6aedccb4e50ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Jan 2016 09:26:06 -0800 Subject: anv/device: Handle non-4k-aligned calls to MapMemory As per the spec: minMemoryMapAlignment is the minimum required alignment, in bytes, of host-visible memory allocations within the host address space. When mapping a memory allocation with vkMapMemory, subtracting offset bytes from the returned pointer will always produce a multiple of the value of this limit. --- src/vulkan/anv_device.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 88a4b8d067d..9a56d9b7de0 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -468,7 +468,7 @@ void anv_GetPhysicalDeviceProperties( .maxViewportDimensions = { (1 << 14), (1 << 14) }, .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ .viewportSubPixelBits = 13, /* We take a float? */ - .minMemoryMapAlignment = 64, /* A cache line */ + .minMemoryMapAlignment = 4096, /* A page */ .minTexelBufferOffsetAlignment = 1, .minUniformBufferOffsetAlignment = 1, .minStorageBufferOffsetAlignment = 1, @@ -1082,10 +1082,19 @@ VkResult anv_MapMemory( if (!device->info.has_llc && mem->type_index == 0) gem_flags |= I915_MMAP_WC; - mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size, gem_flags); - mem->map_size = size; + /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ + uint64_t map_offset = offset & ~4095ull; + assert(offset >= map_offset); + uint64_t map_size = (offset + size) - map_offset; - *ppData = mem->map; + /* Let's map whole pages */ + map_size = (map_size + 4095) & ~4095ull; + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, + map_offset, map_size, gem_flags); + mem->map_size = map_size; + + *ppData = mem->map + (offset - map_offset); return VK_SUCCESS; } -- cgit v1.2.3 From 6b0b57225cf27aa5316b6c3085fa3254f0f4b1c2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 2 Jan 2016 07:52:22 -0800 Subject: anv/device: Only allocate whole pages in AllocateMemory The kernel is going to give us whole pages anyway, so allocating part of a page doesn't help. And this ensures that we can always work with whole pages. --- src/vulkan/anv_device.c | 7 +++++-- src/vulkan/anv_private.h | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9a56d9b7de0..c070aaf2125 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1020,7 +1020,10 @@ VkResult anv_AllocateMemory( if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_init_new(&mem->bo, device, pAllocateInfo->allocationSize); + /* The kernel is going to give us whole pages anyway */ + uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + + result = anv_bo_init_new(&mem->bo, device, alloc_size); if (result != VK_SUCCESS) goto fail; @@ -1088,7 +1091,7 @@ VkResult anv_MapMemory( uint64_t map_size = (offset + size) - map_offset; /* Let's map whole pages */ - map_size = (map_size + 4095) & ~4095ull; + map_size = align_u64(map_size, 4096); mem->map = anv_gem_mmap(device, mem->bo.gem_handle, map_offset, map_size, gem_flags); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 44d537ac5a8..cc9e139c034 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -86,6 +86,12 @@ align_u32(uint32_t v, uint32_t a) return (v + a - 1) & ~(a - 1); } +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + return (v + a - 1) & ~(a - 1); +} + static inline int32_t align_i32(int32_t v, int32_t a) { -- cgit v1.2.3 From f6c4658cde51fb1db3d880c2ffc5921929e5615f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 2 Jan 2016 11:51:40 -0800 Subject: nir/spirv: Fix group decorations They were completely bogus before. For one thing, OpDecorationGroup created a value of type undef rather than decoration_group. Also OpGroupMemberDecorate didn't properly apply the decoration to the different members of the different groups. It *should* be correct now but there's no good way to test it yet. --- src/glsl/nir/spirv/spirv_to_nir.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 681adc74d1f..3a8949438f4 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -298,7 +298,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpDecorationGroup: - vtn_push_value(b, target, vtn_value_type_undef); + vtn_push_value(b, target, vtn_value_type_decoration_group); break; case SpvOpDecorate: @@ -331,21 +331,19 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, case SpvOpGroupMemberDecorate: case SpvOpGroupDecorate: { - struct vtn_value *group = &b->values[target]; - assert(group->value_type == vtn_value_type_decoration_group); - - int scope; - if (opcode == SpvOpGroupDecorate) { - scope = VTN_DEC_DECORATION; - } else { - scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); - } + struct vtn_value *group = + vtn_value(b, target, vtn_value_type_decoration_group); for (; w < w_end; w++) { - struct vtn_value *val = &b->values[*w]; + struct vtn_value *val = vtn_untyped_value(b, *w); struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - dec->scope = scope; + dec->group = group; + if (opcode == SpvOpGroupDecorate) { + dec->scope = VTN_DEC_DECORATION; + } else { + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + } /* Link into the list */ dec->next = val->decoration; -- cgit v1.2.3 From a827b553d9c13f7c31244667daf320add65bb96b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 1 Jan 2016 10:37:25 -0800 Subject: isl: Fix swapped if-else in isl_calc_row_pitch The YUV case was applied to non-YUV formats. Oops. --- src/isl/isl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 75f65001a1e..fe878f7fb9a 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -813,9 +813,9 @@ isl_calc_row_pitch(const struct isl_device *dev, */ if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align(row_pitch, fmtl->bs); - } else { row_pitch = isl_align(row_pitch, 2 * fmtl->bs); + } else { + row_pitch = isl_align(row_pitch, fmtl->bs); } } break; -- cgit v1.2.3 From 05c22f2d74309fec3432523a26e7772e1863af2f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 1 Jan 2016 09:52:57 -0800 Subject: isl: Fix row pitch for linear buffers isl always aligned the row pitch to the surface's image alignment. This was sometimes wrong when the surface backed a VkBuffer. For a VkBuffer, the surface's row pitch is set by VkBufferImageCopy::bufferRowLength, whose required alignment is only that of the VkFormat. In particular, VkBuffer rows are packed in many dEQP and Crucible tests. And packed rows are rarely aligned to the surface's image alignment. Fixes: dEQP-VK.pipeline.image.view_type.2d.format.r8g8b8a8_unorm.size.13x13 --- src/isl/isl.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index fe878f7fb9a..3823a720660 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -536,6 +536,25 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( { assert(phys_level0_sa->depth == 1); + if (info->levels == 1) { + /* Do not align single-level surfaces to the image alignment. + * + * For tiled surfaces, skipping the alignment here avoids wasting CPU + * cycles on the below mipmap layout caluclations. Skipping the + * alignment here is safe because we later align the row pitch and array + * pitch to the tile boundary. It is safe even for + * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled + * to accomodate the interleaved samples. + * + * For linear surfaces, skipping the alignment here permits us to later + * choose an arbitrary, non-aligned row pitch. If the surface backs + * a VkBuffer, then an arbitrary pitch may be needed to accomodate + * VkBufferImageCopy::bufferRowLength. + */ + *phys_slice0_sa = isl_extent2d(phys_level0_sa->w, phys_level0_sa->h); + return; + } + uint32_t slice_top_w = 0; uint32_t slice_bottom_w = 0; uint32_t slice_left_h = 0; @@ -626,7 +645,8 @@ isl_calc_phys_slice0_extent_sa_gen4_3d( /** * Calculate the physical extent of the surface's first array slice, in units - * of surface samples. The result is aligned to \a image_align_sa. + * of surface samples. If the surface is multi-leveled, then the result will + * be aligned to \a image_align_sa. */ static void isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, -- cgit v1.2.3 From 75e01c8b2d2c2a40b82fda67113e9a08a426fbeb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 3 Jan 2016 21:15:29 -0800 Subject: vk: Only finish wayland wsi if we created it Failure during instance creation will leave instance->wayland_wsi undefined. When we then try to clean that up we crash. Set instance->wayland_wsi to NULL on failure and only clean it up if it's non-NULL. Fixes part of dEQP-VK.api.object_management.alloc_callback_fail.* --- src/vulkan/anv_wsi_wayland.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 79453b594d8..d51a96b9f11 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -862,9 +862,11 @@ anv_wl_finish_wsi(struct anv_instance *instance) { struct wsi_wayland *wsi = instance->wayland_wsi; - _mesa_hash_table_destroy(wsi->displays, NULL); + if (wsi) { + _mesa_hash_table_destroy(wsi->displays, NULL); - pthread_mutex_destroy(&wsi->mutex); + pthread_mutex_destroy(&wsi->mutex); - anv_free(&instance->alloc, wsi); + anv_free(&instance->alloc, wsi); + } } -- cgit v1.2.3 From 3954594eb4385eef7cea606bc2bb0b92d19c1705 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 3 Jan 2016 21:31:22 -0800 Subject: vk: Call vk_error when we generate a VK_ERROR --- src/vulkan/anv_wsi_wayland.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index d51a96b9f11..c9eb51012dc 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -820,18 +820,20 @@ anv_wl_init_wsi(struct anv_instance *instance) wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!wsi) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } wsi->instance = instance; int ret = pthread_mutex_init(&wsi->mutex, NULL); if (ret != 0) { if (ret == ENOMEM) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } else { /* FINISHME: Choose a better error. */ - result = VK_ERROR_OUT_OF_HOST_MEMORY; + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } goto fail_alloc; @@ -840,7 +842,7 @@ anv_wl_init_wsi(struct anv_instance *instance) wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); if (!wsi->displays) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail_mutex; } @@ -853,6 +855,8 @@ fail_mutex: fail_alloc: anv_free(&instance->alloc, wsi); +fail: + instance->wayland_wsi = NULL; return result; } -- cgit v1.2.3 From b2ad2a20b67645ac37b5f35daa47bfe454286d90 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 3 Jan 2016 22:42:01 -0800 Subject: vk: Handle allocation failure in anv_pipeline_init() Fixes dEQP-VK.api.object_management.alloc_callback_fail.* failures. --- src/vulkan/anv_pipeline.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 12430f824ad..ad47f9661e4 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -950,6 +950,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc) { + VkResult result; + anv_validate { anv_pipeline_validate_create_info(pCreateInfo); } @@ -960,8 +962,9 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - anv_reloc_list_init(&pipeline->batch_relocs, alloc); - /* TODO: Handle allocation fail */ + result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); + if (result != VK_SUCCESS) + return result; pipeline->batch.alloc = alloc; pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; -- cgit v1.2.3 From 5526c1782a01e73d3deac785449456018b53fd65 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 3 Jan 2016 22:43:47 -0800 Subject: vk: Handle allocation failures in meta init paths Fixes dEQP-VK.api.object_management.alloc_callback_fail.* failures. --- src/vulkan/anv_device.c | 15 ++++++-- src/vulkan/anv_meta.c | 62 +++++++++++++++++++++++++----- src/vulkan/anv_meta_clear.c | 94 ++++++++++++++++++++++++++++++++++----------- src/vulkan/anv_meta_clear.h | 2 +- src/vulkan/anv_private.h | 2 +- 5 files changed, 136 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c070aaf2125..40914aeb1cc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -664,6 +664,7 @@ VkResult anv_CreateDevice( VkDevice* pDevice) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkResult result; struct anv_device *device; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); @@ -699,12 +700,16 @@ VkResult anv_CreateDevice( /* XXX(chadv): Can we dup() physicalDevice->fd here? */ device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); - if (device->fd == -1) + if (device->fd == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); goto fail_device; + } device->context_id = anv_gem_create_context(device); - if (device->context_id == -1) + if (device->context_id == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); goto fail_fd; + } device->info = *physical_device->info; device->isl_dev = physical_device->isl_dev; @@ -730,7 +735,9 @@ VkResult anv_CreateDevice( anv_queue_init(device, &device->queue); - anv_device_init_meta(device); + result = anv_device_init_meta(device); + if (result != VK_SUCCESS) + goto fail_fd; anv_device_init_border_colors(device); @@ -743,7 +750,7 @@ VkResult anv_CreateDevice( fail_device: anv_free(&device->alloc, device); - return vk_error(VK_ERROR_INITIALIZATION_FAILED); + return result; } void anv_DestroyDevice( diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 5b564eb4dc2..b72ec48afe2 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -178,10 +178,12 @@ meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, } } -static void +static VkResult anv_device_init_meta_blit_state(struct anv_device *device) { - anv_CreateRenderPass(anv_device_to_handle(device), + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, @@ -214,6 +216,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, .dependencyCount = 0, }, NULL, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; /* We don't use a vertex shader for clearing, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need @@ -286,16 +290,21 @@ anv_device_init_meta_blit_state(struct anv_device *device) }, } }; - anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, - NULL, &device->meta_state.blit.ds_layout); + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, NULL, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; - anv_CreatePipelineLayout(anv_device_to_handle(device), + result = anv_CreatePipelineLayout(anv_device_to_handle(device), &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout, }, NULL, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { @@ -382,18 +391,43 @@ anv_device_init_meta_blit_state(struct anv_device *device) }; pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); - anv_graphics_pipeline_create(anv_device_to_handle(device), + result = anv_graphics_pipeline_create(anv_device_to_handle(device), &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); - anv_graphics_pipeline_create(anv_device_to_handle(device), + result = anv_graphics_pipeline_create(anv_device_to_handle(device), &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; ralloc_free(vs.nir); ralloc_free(fs_2d.nir); ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, NULL); + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, NULL); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, NULL); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, NULL); + + ralloc_free(vs.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; } static void @@ -1272,11 +1306,19 @@ void anv_CmdResolveImage( stub(); } -void +VkResult anv_device_init_meta(struct anv_device *device) { - anv_device_init_meta_clear_state(device); - anv_device_init_meta_blit_state(device); + VkResult result; + result = anv_device_init_meta_clear_state(device); + if (result != VK_SUCCESS) + return result; + + result = anv_device_init_meta_blit_state(device); + if (result != VK_SUCCESS) + return result; + + return VK_SUCCESS; } void diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 8e1470e1531..6a0517a2228 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -112,22 +112,24 @@ build_color_shaders(struct nir_shader **out_vs, *out_fs = fs_b.shader; } -static struct anv_pipeline * +static VkResult create_pipeline(struct anv_device *device, struct nir_shader *vs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, const VkPipelineColorBlendStateCreateInfo *cb_state, - const VkAllocationCallbacks *alloc) + const VkAllocationCallbacks *alloc, + struct anv_pipeline **pipeline) { VkDevice device_h = anv_device_to_handle(device); + VkResult result; struct anv_shader_module vs_m = { .nir = vs_nir }; struct anv_shader_module fs_m = { .nir = fs_nir }; VkPipeline pipeline_h; - anv_graphics_pipeline_create(device_h, + result = anv_graphics_pipeline_create(device_h, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 2, @@ -212,10 +214,12 @@ create_pipeline(struct anv_device *device, ralloc_free(vs_nir); ralloc_free(fs_nir); - return anv_pipeline_from_handle(pipeline_h); + *pipeline = anv_pipeline_from_handle(pipeline_h); + + return result; } -static void +static VkResult init_color_pipeline(struct anv_device *device) { struct nir_shader *vs_nir; @@ -281,9 +285,10 @@ init_color_pipeline(struct anv_device *device) }, }; - device->meta_state.clear.color_pipeline = + return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, NULL); + &cb_state, NULL, + &device->meta_state.clear.color_pipeline); } static void @@ -393,9 +398,10 @@ build_depthstencil_shaders(struct nir_shader **out_vs, *out_fs = fs_b.shader; } -static struct anv_pipeline * +static VkResult create_depthstencil_pipeline(struct anv_device *device, - VkImageAspectFlags aspects) + VkImageAspectFlags aspects, + struct anv_pipeline **pipeline) { struct nir_shader *vs_nir; struct nir_shader *fs_nir; @@ -455,7 +461,7 @@ create_depthstencil_pipeline(struct anv_device *device, }; return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, NULL); + &cb_state, NULL, pipeline); } static void @@ -553,25 +559,67 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); } -static void +static VkResult init_depthstencil_pipelines(struct anv_device *device) { - device->meta_state.clear.depth_only_pipeline = - create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT); - - device->meta_state.clear.stencil_only_pipeline = - create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT); - - device->meta_state.clear.depthstencil_pipeline = - create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT); + VkResult result; + struct anv_meta_state *state = &device->meta_state; + + result = + create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, + &state->clear.depth_only_pipeline); + if (result != VK_SUCCESS) + goto fail; + + result = + create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, + &state->clear.stencil_only_pipeline); + if (result != VK_SUCCESS) + goto fail_depth_only; + + result = + create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, + &state->clear.depthstencil_pipeline); + if (result != VK_SUCCESS) + goto fail_stencil_only; + + return result; + + fail_stencil_only: + anv_DestroyPipeline(anv_device_to_handle(device), + anv_pipeline_to_handle(state->clear.stencil_only_pipeline), + NULL); + fail_depth_only: + anv_DestroyPipeline(anv_device_to_handle(device), + anv_pipeline_to_handle(state->clear.depth_only_pipeline), + NULL); + fail: + return result; } -void +VkResult anv_device_init_meta_clear_state(struct anv_device *device) { - init_color_pipeline(device); - init_depthstencil_pipelines(device); + VkResult result; + + result = init_color_pipeline(device); + if (result != VK_SUCCESS) + goto fail; + + result = init_depthstencil_pipelines(device); + if (result != VK_SUCCESS) + goto fail_color_pipeline; + + return VK_SUCCESS; + + fail_color_pipeline: + anv_DestroyPipeline(anv_device_to_handle(device), + anv_pipeline_to_handle(device->meta_state.clear.color_pipeline), + NULL); + fail: + return result; } void diff --git a/src/vulkan/anv_meta_clear.h b/src/vulkan/anv_meta_clear.h index e53bd979763..853d9f241d8 100644 --- a/src/vulkan/anv_meta_clear.h +++ b/src/vulkan/anv_meta_clear.h @@ -29,7 +29,7 @@ extern "C" { struct anv_device; -void anv_device_init_meta_clear_state(struct anv_device *device); +VkResult anv_device_init_meta_clear_state(struct anv_device *device); void anv_device_finish_meta_clear_state(struct anv_device *device); #ifdef __cplusplus diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cc9e139c034..8b17d9b3546 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1631,7 +1631,7 @@ struct anv_query_pool { struct anv_bo bo; }; -void anv_device_init_meta(struct anv_device *device); +VkResult anv_device_init_meta(struct anv_device *device); void anv_device_finish_meta(struct anv_device *device); void *anv_lookup_entrypoint(const char *name); -- cgit v1.2.3 From fca1c08e34368dbfa7a10eb1a1508cdd9cda6bf8 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 3 Jan 2016 22:58:00 -0800 Subject: vk: Allocate subpass attachment in one big block This avoids making a lot of small allocations and handles allocation failure correctly. Fixes dEQP-VK.api.object_management.alloc_callback_fail.* failures. --- src/vulkan/anv_pass.c | 51 ++++++++++++++++++++++++++---------------------- src/vulkan/anv_private.h | 1 + 2 files changed, 29 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index 6742274c72a..ccd8cedf561 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -65,6 +65,27 @@ VkResult anv_CreateRenderPass( // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; } + uint32_t subpass_attachment_count = 0, *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + + desc->colorAttachmentCount + + /* Count colorAttachmentCount again for resolve_attachments */ + desc->colorAttachmentCount; + } + + pass->subpass_attachments = + anv_alloc2(&device->alloc, pAllocator, + subpass_attachment_count * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + anv_free2(&device->alloc, pAllocator, pass); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + p = pass->subpass_attachments; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; struct anv_subpass *subpass = &pass->subpasses[i]; @@ -73,10 +94,8 @@ VkResult anv_CreateRenderPass( subpass->color_count = desc->colorAttachmentCount; if (desc->inputAttachmentCount > 0) { - subpass->input_attachments = - anv_alloc2(&device->alloc, pAllocator, - desc->inputAttachmentCount * sizeof(uint32_t), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + subpass->input_attachments = p; + p += desc->inputAttachmentCount; for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { subpass->input_attachments[j] @@ -85,10 +104,8 @@ VkResult anv_CreateRenderPass( } if (desc->colorAttachmentCount > 0) { - subpass->color_attachments = - anv_alloc2(&device->alloc, pAllocator, - desc->colorAttachmentCount * sizeof(uint32_t), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + subpass->color_attachments = p; + p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->color_attachments[j] @@ -97,10 +114,8 @@ VkResult anv_CreateRenderPass( } if (desc->pResolveAttachments) { - subpass->resolve_attachments = - anv_alloc2(&device->alloc, pAllocator, - desc->colorAttachmentCount * sizeof(uint32_t), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->resolve_attachments[j] @@ -129,17 +144,7 @@ void anv_DestroyRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - for (uint32_t i = 0; i < pass->subpass_count; i++) { - /* In VkSubpassCreateInfo, each of the attachment arrays may be null. - * Don't free the null arrays. - */ - struct anv_subpass *subpass = &pass->subpasses[i]; - - anv_free2(&device->alloc, pAllocator, subpass->input_attachments); - anv_free2(&device->alloc, pAllocator, subpass->color_attachments); - anv_free2(&device->alloc, pAllocator, subpass->resolve_attachments); - } - + anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); anv_free2(&device->alloc, pAllocator, pass); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8b17d9b3546..626d7bbedfe 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1613,6 +1613,7 @@ struct anv_render_pass_attachment { struct anv_render_pass { uint32_t attachment_count; uint32_t subpass_count; + uint32_t * subpass_attachments; struct anv_render_pass_attachment * attachments; struct anv_subpass subpasses[0]; }; -- cgit v1.2.3 From abc1c9878f552b7b7212a5a04048091847ef1c23 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 4 Jan 2016 10:42:50 -0800 Subject: vk: Don't leak pipeline if initialization fails --- src/vulkan/gen8_pipeline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index ae8ab404746..827a013ebdc 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -290,8 +290,10 @@ genX(graphics_pipeline_create)( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); result = anv_pipeline_init(pipeline, device, pCreateInfo, extra, pAllocator); - if (result != VK_SUCCESS) + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); return result; + } assert(pCreateInfo->pVertexInputState); emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); -- cgit v1.2.3 From 0f34a4ec4ee296121c0f2ae416d5899c846efd90 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 4 Jan 2016 10:53:47 -0800 Subject: isl: Use isl_align_npot for row_pitch Many formats are not power-of-two bytes per pixels and we need the non-power-of-two align macro here. This reverts the revert from 4f9a211b, but keeps the change from a827b553 that fixed the yuv if-else mix-up. --- src/isl/isl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 3823a720660..6c49164615b 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -833,9 +833,9 @@ isl_calc_row_pitch(const struct isl_device *dev, */ if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align(row_pitch, 2 * fmtl->bs); + row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs); } else { - row_pitch = isl_align(row_pitch, fmtl->bs); + row_pitch = isl_align_npot(row_pitch, fmtl->bs); } } break; -- cgit v1.2.3 From 0d7614dce64d63c01e671b0c180453ed0ba44759 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 4 Jan 2016 15:30:27 -0800 Subject: isl: Document mnemonic in Yf and Ys tiling The 'f' means "four K". The 's' means "sixty-four K". --- src/isl/isl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 68d05e37186..c6baca0978d 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -439,8 +439,8 @@ enum isl_tiling { ISL_TILING_W, ISL_TILING_X, ISL_TILING_Y0, /**< Legacy Y tiling */ - ISL_TILING_Yf, - ISL_TILING_Ys, + ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */ + ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */ }; /** -- cgit v1.2.3 From 3200a81a553b9cf5e3d0853307d8f75eadff8f97 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 12:39:34 -0800 Subject: anv/image: Add a vk_format field We've been trying to move away from anv_format for a while and this should help with the transition. There are cases (mostly in meta) where we need the original format for the image and not the isl_format. These will be moved over to the new vk_format and everythign else will use the isl_format from the particular anv_surface. --- src/vulkan/anv_image.c | 1 + src/vulkan/anv_meta.c | 14 +++++++------- src/vulkan/anv_meta_clear.c | 2 +- src/vulkan/anv_private.h | 4 ++++ 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 159af6d19b0..1cb860f75f5 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -205,6 +205,7 @@ anv_image_create(VkDevice _device, memset(image, 0, sizeof(*image)); image->type = pCreateInfo->imageType; image->extent = pCreateInfo->extent; + image->vk_format = pCreateInfo->format; image->format = anv_format_for_vk_format(pCreateInfo->format); image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arrayLayers; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index b72ec48afe2..d9a5783e349 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -858,7 +858,7 @@ void anv_CmdCopyImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = anv_meta_get_view_type(src_image), - .format = src_image->format->vk_format, + .format = src_image->vk_format, .subresourceRange = { .aspectMask = pRegions[r].srcSubresource.aspectMask, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, @@ -902,7 +902,7 @@ void anv_CmdCopyImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->format->vk_format, + .format = dest_image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, @@ -955,7 +955,7 @@ void anv_CmdBlitImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = anv_meta_get_view_type(src_image), - .format = src_image->format->vk_format, + .format = src_image->vk_format, .subresourceRange = { .aspectMask = pRegions[r].srcSubresource.aspectMask, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, @@ -989,7 +989,7 @@ void anv_CmdBlitImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->format->vk_format, + .format = dest_image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, @@ -1067,7 +1067,7 @@ void anv_CmdCopyBufferToImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - const VkFormat orig_format = dest_image->format->vk_format; + const VkFormat orig_format = dest_image->vk_format; struct anv_meta_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); @@ -1194,7 +1194,7 @@ void anv_CmdCopyImageToBuffer( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = anv_meta_get_view_type(src_image), - .format = src_image->format->vk_format, + .format = src_image->vk_format, .subresourceRange = { .aspectMask = pRegions[r].imageSubresource.aspectMask, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, @@ -1205,7 +1205,7 @@ void anv_CmdCopyImageToBuffer( }, cmd_buffer); - VkFormat dest_format = src_image->format->vk_format; + VkFormat dest_format = src_image->vk_format; if (dest_format == VK_FORMAT_S8_UINT) { dest_format = VK_FORMAT_R8_UINT; } diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 6a0517a2228..17a40cd6be6 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -729,7 +729,7 @@ void anv_CmdClearColorImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = _image, .viewType = anv_meta_get_view_type(image), - .format = image->format->vk_format, + .format = image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRanges[r].baseMipLevel + l, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 626d7bbedfe..187a6e822b2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1443,6 +1443,10 @@ struct anv_surface { struct anv_image { VkImageType type; + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; const struct anv_format *format; VkExtent3D extent; uint32_t levels; -- cgit v1.2.3 From ad9ff4f2b25c0b4f6b83c9ab4dfc98ced4227fd6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 13:22:43 -0800 Subject: meta/blit: Rework how format and aspect choices are made This commit does two things. First, it introduces choose_* functions for chosing formats and aspects. Second, it changes the copy (not blit) code to use appropreately sized UINT formats for everything except depth. There are two main reasons for this: First, it means that compressed and other non-renderable texture upload should "just work" because it won't be tripping over non-renderable formats. Second, it allows us to easly copy an RGB buffer to and from an RGBX image because the formats will get switched over to their UINT variants and the shader will deal with the extra channel for us. --- src/vulkan/anv_meta.c | 98 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index d9a5783e349..a435c26ee59 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -650,6 +650,10 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, static VkFormat vk_format_for_size(int bs) { + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ switch (bs) { case 1: return VK_FORMAT_R8_UINT; case 2: return VK_FORMAT_R8G8_UINT; @@ -834,6 +838,50 @@ void anv_CmdCopyBuffer( meta_finish_blit(cmd_buffer, &saved_state); } +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(image->vk_format, aspect); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + void anv_CmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -852,15 +900,23 @@ void anv_CmdCopyImage( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = anv_meta_get_view_type(src_image), - .format = src_image->vk_format, + .format = src_format, .subresourceRange = { - .aspectMask = pRegions[r].srcSubresource.aspectMask, + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].srcSubresource.mipLevel, .levelCount = 1, .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, @@ -902,7 +958,7 @@ void anv_CmdCopyImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = destImage, .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->vk_format, + .format = dst_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, @@ -1067,22 +1123,18 @@ void anv_CmdCopyBufferToImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - const VkFormat orig_format = dest_image->vk_format; struct anv_meta_saved_state saved_state; meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { - VkFormat proxy_format = orig_format; - VkImageAspectFlags proxy_aspect = pRegions[r].imageSubresource.aspectMask; + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - if (orig_format == VK_FORMAT_S8_UINT) { - proxy_format = VK_FORMAT_R8_UINT; - proxy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - } + VkFormat image_format = choose_iview_format(dest_image, aspect); + VkFormat buffer_format = choose_buffer_format(dest_image, aspect); struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, proxy_format, + make_image_for_buffer(vk_device, srcBuffer, buffer_format, VK_IMAGE_USAGE_SAMPLED_BIT, dest_image->type, &cmd_buffer->pool->alloc, &pRegions[r]); @@ -1108,9 +1160,9 @@ void anv_CmdCopyBufferToImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(src_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = proxy_format, + .format = buffer_format, .subresourceRange = { - .aspectMask = proxy_aspect, + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, @@ -1125,7 +1177,7 @@ void anv_CmdCopyBufferToImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), .viewType = anv_meta_get_view_type(dest_image), - .format = proxy_format, + .format = image_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, @@ -1188,15 +1240,20 @@ void anv_CmdCopyImageToBuffer( meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + VkFormat buffer_format = choose_buffer_format(src_image, aspect); + struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = srcImage, .viewType = anv_meta_get_view_type(src_image), - .format = src_image->vk_format, + .format = image_format, .subresourceRange = { - .aspectMask = pRegions[r].imageSubresource.aspectMask, + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .levelCount = 1, .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, @@ -1205,13 +1262,8 @@ void anv_CmdCopyImageToBuffer( }, cmd_buffer); - VkFormat dest_format = src_image->vk_format; - if (dest_format == VK_FORMAT_S8_UINT) { - dest_format = VK_FORMAT_R8_UINT; - } - struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, dest_format, + make_image_for_buffer(vk_device, destBuffer, buffer_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, src_image->type, &cmd_buffer->pool->alloc, &pRegions[r]); @@ -1235,7 +1287,7 @@ void anv_CmdCopyImageToBuffer( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_format, + .format = buffer_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, -- cgit v1.2.3 From 5f5fc23e7c622990a9b048b9d6bb4dabf61cf1f4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 13:51:15 -0800 Subject: genX/state: Pull some generic helpers into a shared header --- src/vulkan/gen7_state.c | 43 +--------------------------- src/vulkan/gen8_state.c | 43 +--------------------------- src/vulkan/genX_state_util.h | 67 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 84 deletions(-) create mode 100644 src/vulkan/genX_state_util.h (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 88a508a1be9..c5c4cdaaf85 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -32,30 +32,7 @@ #include "gen7_pack.h" #include "gen75_pack.h" -static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type, - bool storage) -{ - switch (view_type) { - default: - unreachable("bad VkImageViewType"); - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_1D); - return SURFTYPE_1D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return storage ? SURFTYPE_2D : SURFTYPE_CUBE; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_2D; - case VK_IMAGE_VIEW_TYPE_3D: - assert(image->type == VK_IMAGE_TYPE_3D); - return SURFTYPE_3D; - } -} +#include "genX_state_util.h" GENX_FUNC(GEN7, GEN75) void genX(fill_buffer_surface_state)(void *state, enum isl_format format, @@ -208,24 +185,6 @@ static const uint8_t anv_valign[] = { [4] = VALIGN_4, }; -static const uint32_t vk_to_gen_swizzle_map[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, - [VK_COMPONENT_SWIZZLE_R] = SCS_RED, - [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, - [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, - [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA -}; - -static inline uint32_t -vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) -{ - if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) - return vk_to_gen_swizzle_map[component]; - else - return vk_to_gen_swizzle_map[swizzle]; -} - GENX_FUNC(GEN7, GEN75) void genX(image_view_init)(struct anv_image_view *iview, struct anv_device *device, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 34c4d26b20f..c7cc585c5e1 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -32,30 +32,7 @@ #include "gen8_pack.h" #include "gen9_pack.h" -static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type, - bool storage) -{ - switch (view_type) { - default: - unreachable("bad VkImageViewType"); - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_1D); - return SURFTYPE_1D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return storage ? SURFTYPE_2D : SURFTYPE_CUBE; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_2D; - case VK_IMAGE_VIEW_TYPE_3D: - assert(image->type == VK_IMAGE_TYPE_3D); - return SURFTYPE_3D; - } -} +#include "genX_state_util.h" void genX(fill_buffer_surface_state)(void *state, enum isl_format format, @@ -112,24 +89,6 @@ alloc_surface_state(struct anv_device *device, } } -static const uint32_t vk_to_gen_swizzle_map[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, - [VK_COMPONENT_SWIZZLE_R] = SCS_RED, - [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, - [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, - [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA -}; - -static inline uint32_t -vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) -{ - if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) - return vk_to_gen_swizzle_map[component]; - else - return vk_to_gen_swizzle_map[swizzle]; -} - /** * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment * and SurfaceVerticalAlignment. diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h new file mode 100644 index 00000000000..cedfabda1f3 --- /dev/null +++ b/src/vulkan/genX_state_util.h @@ -0,0 +1,67 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + +#if ANV_GEN > 7 || ANV_IS_HASWELL +static const uint32_t vk_to_gen_swizzle_map[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA +}; + +static inline uint32_t +vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + return vk_to_gen_swizzle_map[component]; + else + return vk_to_gen_swizzle_map[swizzle]; +} +#endif -- cgit v1.2.3 From 0639f44d0f545b30032695f402c6a4a7963ee8be Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 14:30:51 -0800 Subject: isl: Add a file for format helpers --- src/isl/Makefile.am | 1 + src/isl/isl.c | 15 --------------- src/isl/isl_format.c | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 15 deletions(-) create mode 100644 src/isl/isl_format.c (limited to 'src') diff --git a/src/isl/Makefile.am b/src/isl/Makefile.am index 134e62ad105..6fd1da669a3 100644 --- a/src/isl/Makefile.am +++ b/src/isl/Makefile.am @@ -44,6 +44,7 @@ libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init libisl_la_SOURCES = \ isl.c \ + isl_format.c \ isl_format_layout.c \ isl_gen4.c \ isl_gen4.h \ diff --git a/src/isl/isl.c b/src/isl/isl.c index 6c49164615b..00eb249b23f 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -65,21 +65,6 @@ isl_device_init(struct isl_device *dev, assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); } -bool -isl_format_has_sint_channel(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->channels.r.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT || - fmtl->channels.b.type == ISL_SINT || - fmtl->channels.a.type == ISL_SINT || - fmtl->channels.l.type == ISL_SINT || - fmtl->channels.i.type == ISL_SINT || - fmtl->channels.p.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT; -} - /** * @param[out] info is written only on success */ diff --git a/src/isl/isl_format.c b/src/isl/isl_format.c new file mode 100644 index 00000000000..bebbd6efbc0 --- /dev/null +++ b/src/isl/isl_format.c @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "isl.h" + +bool +isl_format_has_sint_channel(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT || + fmtl->channels.b.type == ISL_SINT || + fmtl->channels.a.type == ISL_SINT || + fmtl->channels.l.type == ISL_SINT || + fmtl->channels.i.type == ISL_SINT || + fmtl->channels.p.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT; +} -- cgit v1.2.3 From 603a3a943931092df0e6e697921156713467e59a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 14:57:51 -0800 Subject: isl/format: Add some helpers for working with RGB formats --- src/isl/isl.h | 12 ++++++++++++ src/isl/isl_format.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index c6baca0978d..842bbbb0265 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -823,6 +823,18 @@ isl_format_block_is_1x1x1(enum isl_format fmt) return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; } +static inline bool +isl_format_is_rgb(enum isl_format fmt) +{ + return isl_format_layouts[fmt].channels.r.bits > 0 && + isl_format_layouts[fmt].channels.g.bits > 0 && + isl_format_layouts[fmt].channels.b.bits > 0 && + isl_format_layouts[fmt].channels.a.bits == 0; +} + +enum isl_format isl_format_rgb_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST; +enum isl_format isl_format_rgb_to_rgbx(enum isl_format rgb) ATTRIBUTE_CONST; + bool isl_is_storage_image_format(enum isl_format fmt); enum isl_format diff --git a/src/isl/isl_format.c b/src/isl/isl_format.c index bebbd6efbc0..0fe6e9b83ab 100644 --- a/src/isl/isl_format.c +++ b/src/isl/isl_format.c @@ -39,3 +39,57 @@ isl_format_has_sint_channel(enum isl_format fmt) fmtl->channels.p.type == ISL_SINT || fmtl->channels.g.type == ISL_SINT; } + +enum isl_format +isl_format_rgb_to_rgba(enum isl_format rgb) +{ + assert(isl_format_is_rgb(rgb)); + + switch (rgb) { + case ISL_FORMAT_R32G32B32_FLOAT: return ISL_FORMAT_R32G32B32A32_FLOAT; + case ISL_FORMAT_R32G32B32_SINT: return ISL_FORMAT_R32G32B32A32_SINT; + case ISL_FORMAT_R32G32B32_UINT: return ISL_FORMAT_R32G32B32A32_UINT; + case ISL_FORMAT_R32G32B32_UNORM: return ISL_FORMAT_R32G32B32A32_UNORM; + case ISL_FORMAT_R32G32B32_SNORM: return ISL_FORMAT_R32G32B32A32_SNORM; + case ISL_FORMAT_R32G32B32_SSCALED: return ISL_FORMAT_R32G32B32A32_SSCALED; + case ISL_FORMAT_R32G32B32_USCALED: return ISL_FORMAT_R32G32B32A32_USCALED; + case ISL_FORMAT_R32G32B32_SFIXED: return ISL_FORMAT_R32G32B32A32_SFIXED; + case ISL_FORMAT_R8G8B8_UNORM: return ISL_FORMAT_R8G8B8A8_UNORM; + case ISL_FORMAT_R8G8B8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM; + case ISL_FORMAT_R8G8B8_SSCALED: return ISL_FORMAT_R8G8B8A8_SSCALED; + case ISL_FORMAT_R8G8B8_USCALED: return ISL_FORMAT_R8G8B8A8_USCALED; + case ISL_FORMAT_R16G16B16_FLOAT: return ISL_FORMAT_R16G16B16A16_FLOAT; + case ISL_FORMAT_R16G16B16_UNORM: return ISL_FORMAT_R16G16B16A16_UNORM; + case ISL_FORMAT_R16G16B16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM; + case ISL_FORMAT_R16G16B16_SSCALED: return ISL_FORMAT_R16G16B16A16_SSCALED; + case ISL_FORMAT_R16G16B16_USCALED: return ISL_FORMAT_R16G16B16A16_USCALED; + case ISL_FORMAT_R8G8B8_UNORM_SRGB: return ISL_FORMAT_R8G8B8A8_UNORM_SRGB; + case ISL_FORMAT_R16G16B16_UINT: return ISL_FORMAT_R16G16B16A16_UINT; + case ISL_FORMAT_R16G16B16_SINT: return ISL_FORMAT_R16G16B16A16_SINT; + case ISL_FORMAT_R8G8B8_UINT: return ISL_FORMAT_R8G8B8A8_UINT; + case ISL_FORMAT_R8G8B8_SINT: return ISL_FORMAT_R8G8B8A8_SINT; + default: + return ISL_FORMAT_UNSUPPORTED; + } +} + +enum isl_format +isl_format_rgb_to_rgbx(enum isl_format rgb) +{ + assert(isl_format_is_rgb(rgb)); + + switch (rgb) { + case ISL_FORMAT_R32G32B32_FLOAT: + return ISL_FORMAT_R32G32B32X32_FLOAT; + case ISL_FORMAT_R16G16B16_UNORM: + return ISL_FORMAT_R16G16B16X16_UNORM; + case ISL_FORMAT_R16G16B16_FLOAT: + return ISL_FORMAT_R16G16B16X16_FLOAT; + case ISL_FORMAT_R8G8B8_UNORM: + return ISL_FORMAT_R8G8B8X8_UNORM; + case ISL_FORMAT_R8G8B8_UNORM_SRGB: + return ISL_FORMAT_R8G8B8X8_UNORM_SRGB; + default: + return ISL_FORMAT_UNSUPPORTED; + } +} -- cgit v1.2.3 From 2712c0cca32f85f6572e4c8f5593766e2e2e8a77 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 18:31:31 -0800 Subject: anv/formats: Add a tiling parameter to get_isl_format Currently, this parameter does nothing. --- src/vulkan/anv_formats.c | 3 ++- src/vulkan/anv_image.c | 2 +- src/vulkan/anv_meta.c | 3 ++- src/vulkan/anv_private.h | 3 ++- src/vulkan/gen7_pipeline.c | 3 ++- src/vulkan/gen8_pipeline.c | 3 ++- 6 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 6465fabd5b5..7aacc4f94d2 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -237,7 +237,8 @@ anv_format_for_vk_format(VkFormat format) * Exactly one bit must be set in \a aspect. */ enum isl_format -anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect) +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling) { const struct anv_format *anv_fmt = &anv_formats[format]; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 1cb860f75f5..5a9f826ec43 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -133,7 +133,7 @@ make_surface(const struct anv_device *dev, ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], - .format = anv_get_isl_format(vk_info->format, aspect), + .format = anv_get_isl_format(vk_info->format, aspect, vk_info->tiling), .width = vk_info->extent.width, .height = vk_info->extent.height, .depth = vk_info->extent.depth, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a435c26ee59..bb37899ce37 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -877,7 +877,8 @@ choose_buffer_format(struct anv_image *image, VkImageAspectFlagBits aspect) * an RGB format here even if the tiled image is RGBA. XXX: This doesn't * work if the buffer is the destination. */ - enum isl_format linear_format = anv_get_isl_format(image->vk_format, aspect); + enum isl_format linear_format = anv_get_isl_format(image->vk_format, aspect, + VK_IMAGE_TILING_LINEAR); return vk_format_for_size(isl_format_layouts[linear_format].bs); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 187a6e822b2..d915f694347 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1415,7 +1415,8 @@ const struct anv_format * anv_format_for_vk_format(VkFormat format); enum isl_format -anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect); +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling); static inline bool anv_format_is_color(const struct anv_format *format) diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index f4070743710..7dedcac08af 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -74,7 +74,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; enum isl_format format = anv_get_isl_format(desc->format, - VK_IMAGE_ASPECT_COLOR_BIT); + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR); assert(desc->binding < 32); diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 827a013ebdc..a405d290d50 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -70,7 +70,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkVertexInputAttributeDescription *desc = &info->pVertexAttributeDescriptions[i]; enum isl_format format = anv_get_isl_format(desc->format, - VK_IMAGE_ASPECT_COLOR_BIT); + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR); assert(desc->binding < 32); -- cgit v1.2.3 From 87dd59e5784774aebc6ff8eab8086ee2067590d3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Jan 2016 08:42:35 -0800 Subject: anv/formats: Rework GetPhysicalDeviceFormatProperties It now calls get_isl_format to get both linear and tiled views of the format and determines linear/tiled properties from that. Buffer properties are determined from the linear format. --- src/vulkan/anv_formats.c | 139 ++++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 7aacc4f94d2..9f522ee7b91 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -269,75 +269,94 @@ void anv_validate_GetPhysicalDeviceFormatProperties( anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); } +static VkFormatFeatureFlags +get_image_format_properties(int gen, enum isl_format base, + enum isl_format actual) +{ + const struct brw_surface_format_info *info = &surface_formats[actual]; + + if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen) { + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + + if (info->render_target <= gen) { + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (info->alpha_blend <= gen) + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + /* Load/store is determined based on base format. This prevents RGB + * formats from showing up as load/store capable. + */ + if (isl_is_storage_image_format(base)) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + + if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + + return flags; +} + +static VkFormatFeatureFlags +get_buffer_format_properties(int gen, enum isl_format format) +{ + const struct brw_surface_format_info *info = &surface_formats[format]; + + if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen && !isl_format_is_compressed(format)) + flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + if (info->input_vb <= gen) + flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + + if (isl_is_storage_image_format(format)) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + + if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + + return flags; +} + static void anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, - const struct anv_format *format, + VkFormat format, VkFormatProperties *out_properties) { - const struct brw_surface_format_info *info; - int gen; - VkFormatFeatureFlags flags; - - assert(format != NULL); - - gen = physical_device->info->gen * 10; + int gen = physical_device->info->gen * 10; if (physical_device->info->is_haswell) gen += 5; - if (format->surface_format== ISL_FORMAT_UNSUPPORTED) - goto unsupported; - - uint32_t linear = 0, tiled = 0, buffer = 0; - if (anv_format_is_depth_or_stencil(format)) { + VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; + if (anv_format_is_depth_or_stencil(&anv_formats[format])) { tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; if (physical_device->info->gen >= 8) { tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; } - if (format->depth_format) { + if (anv_formats[format].depth_format) { tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; } } else { - /* The surface_formats table only contains color formats */ - info = &surface_formats[format->surface_format]; - if (!info->exists) - goto unsupported; - - if (info->sampling <= gen) { - flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_BLIT_SRC_BIT; - linear |= flags; - tiled |= flags; - - if (!isl_format_is_compressed(format->surface_format)) - buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - } - if (info->render_target <= gen) { - flags = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_BLIT_DST_BIT; - linear |= flags; - tiled |= flags; - } - if (info->alpha_blend <= gen) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - } - if (info->input_vb <= gen) { - buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - } - - if (isl_is_storage_image_format(format->surface_format)) { - tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; - linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; - buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; - } - - if (format->surface_format == ISL_FORMAT_R32_SINT && - format->surface_format == ISL_FORMAT_R32_UINT) { - tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; - linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; - buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; - } + enum isl_format linear_fmt, tiled_fmt; + linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR); + tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_OPTIMAL); + + linear = get_image_format_properties(gen, linear_fmt, linear_fmt); + tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt); + buffer = get_buffer_format_properties(gen, linear_fmt); } out_properties->linearTilingFeatures = linear; @@ -345,11 +364,6 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d out_properties->bufferFeatures = buffer; return; - - unsupported: - out_properties->linearTilingFeatures = 0; - out_properties->optimalTilingFeatures = 0; - out_properties->bufferFeatures = 0; } @@ -362,13 +376,13 @@ void anv_GetPhysicalDeviceFormatProperties( anv_physical_device_get_format_properties( physical_device, - anv_format_for_vk_format(format), + format, pFormatProperties); } VkResult anv_GetPhysicalDeviceImageFormatProperties( VkPhysicalDevice physicalDevice, - VkFormat _format, + VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, @@ -376,7 +390,6 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( VkImageFormatProperties* pImageFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - const struct anv_format *format = anv_format_for_vk_format(_format); VkFormatProperties format_props; VkFormatFeatureFlags format_feature_flags; VkExtent3D maxExtent; @@ -434,7 +447,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( } if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - if (format->has_stencil) { + if (anv_format_for_vk_format(format)->has_stencil) { /* Not yet implemented because copying to a W-tiled surface is crazy * hard. */ -- cgit v1.2.3 From a7cc12910d1963d21fda8165ce790c4cb4241fba Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Jan 2016 13:47:18 -0800 Subject: anv/image: Do more work in anv_image_view_init There was a bunch of common code in gen7/8_image_view_init that we really should be sharing. --- src/vulkan/anv_image.c | 16 ++++++++++++++++ src/vulkan/anv_private.h | 2 ++ src/vulkan/gen7_state.c | 18 ++---------------- src/vulkan/gen8_state.c | 18 ++---------------- 4 files changed, 22 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 5a9f826ec43..5bee5a236e8 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -432,6 +432,22 @@ anv_image_view_init(struct anv_image_view *iview, break; } + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + iview->image = image; + iview->bo = image->bo; + iview->offset = image->offset + surface->offset; + + iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; + iview->format = anv_format_for_vk_format(pCreateInfo->format); + + iview->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + switch (device->info.gen) { case 7: if (device->info.is_haswell) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d915f694347..76c47de594a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1492,6 +1492,8 @@ struct anv_image_view { const struct anv_format *format; /**< VkImageViewCreateInfo::format */ struct anv_bo *bo; uint32_t offset; /**< Offset into bo. */ + + VkImageAspectFlags aspect_mask; VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ /** RENDER_SURFACE_STATE when using image as a color render target. */ diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index c5c4cdaaf85..fe2967c7ef4 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -198,23 +198,9 @@ genX(image_view_init)(struct anv_image_view *iview, struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) anv_finishme("non-2D image views"); - iview->image = image; - iview->bo = image->bo; - iview->offset = image->offset + surface->offset; - iview->format = anv_format_for_vk_format(pCreateInfo->format); - - iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, range->baseMipLevel), - .height = anv_minify(image->extent.height, range->baseMipLevel), - .depth = anv_minify(image->extent.depth, range->baseMipLevel), - }; - uint32_t depth = 1; if (range->layerCount > 1) { depth = range->layerCount; @@ -228,7 +214,7 @@ genX(image_view_init)(struct anv_image_view *iview, struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format->surface_format, + .SurfaceFormat = iview->format->surface_format, .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], @@ -325,7 +311,7 @@ genX(image_view_init)(struct anv_image_view *iview, surface_state.SurfaceFormat = isl_lower_storage_image_format(&device->isl_dev, - format->surface_format); + iview->format->surface_format); surface_state.SurfaceMinLOD = range->baseMipLevel; surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c7cc585c5e1..5562a252a21 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -140,20 +140,6 @@ genX(image_view_init)(struct anv_image_view *iview, uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ - const struct anv_format *format_info = - anv_format_for_vk_format(pCreateInfo->format); - - iview->image = image; - iview->bo = image->bo; - iview->offset = image->offset + surface->offset; - iview->format = format_info; - - iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, range->baseMipLevel), - .height = anv_minify(image->extent.height, range->baseMipLevel), - .depth = anv_minify(image->extent.depth, range->baseMipLevel), - }; - switch (image->type) { case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: @@ -208,7 +194,7 @@ genX(image_view_init)(struct anv_image_view *iview, struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = format_info->surface_format, + .SurfaceFormat = iview->format->surface_format, .SurfaceVerticalAlignment = valign, .SurfaceHorizontalAlignment = halign, .TileMode = isl_to_gen_tiling[surface->isl.tiling], @@ -300,7 +286,7 @@ genX(image_view_init)(struct anv_image_view *iview, surface_state.SurfaceFormat = isl_lower_storage_image_format(&device->isl_dev, - format_info->surface_format); + iview->format->surface_format); surface_state.SurfaceMinLOD = range->baseMipLevel; surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; -- cgit v1.2.3 From ceb05131daffea3de2777cfb22a9be2dc53d88cd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Jan 2016 14:12:22 -0800 Subject: anv_get_isl_format: Support depth+stencil aspect value You just get the depth format in this case. --- src/vulkan/anv_formats.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 9f522ee7b91..88c72f53251 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -246,6 +246,7 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, case VK_IMAGE_ASPECT_COLOR_BIT: return anv_fmt->surface_format; case VK_IMAGE_ASPECT_DEPTH_BIT: + case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): assert(anv_fmt->depth_format != 0); return anv_fmt->surface_format; case VK_IMAGE_ASPECT_STENCIL_BIT: -- cgit v1.2.3 From f665fdf0e7295808edc2de3397f6f434bf223b39 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Jan 2016 14:09:17 -0800 Subject: anv/image_view: Separate vulkan and isl formats Previously, anv_image_view had a anv_format pointer that we used for everything. This commit replaces that pointer with a VkFormat enum copied from the API and an isl_format. In order to implement RGB formats, we have to use a different isl_format for the actual surface state than the obvious one from the VkFormat. Separating the two helps us keep things streight. --- src/vulkan/anv_cmd_buffer.c | 3 ++- src/vulkan/anv_image.c | 5 ++++- src/vulkan/anv_meta_clear.c | 2 +- src/vulkan/anv_private.h | 4 +++- src/vulkan/gen7_cmd_buffer.c | 15 ++++++++++----- src/vulkan/gen7_state.c | 5 ++--- src/vulkan/gen8_cmd_buffer.c | 13 +++++++++---- src/vulkan/gen8_state.c | 5 ++--- 8 files changed, 33 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 2f19c75e7cc..49bb298a188 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -1075,7 +1075,8 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) const struct anv_image_view *iview = fb->attachments[subpass->depth_stencil_attachment]; - assert(anv_format_is_depth_or_stencil(iview->format)); + assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); return iview; } diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 5bee5a236e8..bd550890596 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -210,6 +210,7 @@ anv_image_create(VkDevice _device, image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arrayLayers; image->usage = anv_image_get_full_usage(pCreateInfo); + image->tiling = pCreateInfo->tiling; if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { image->needs_nonrt_surface_state = true; @@ -440,7 +441,9 @@ anv_image_view_init(struct anv_image_view *iview, iview->offset = image->offset + surface->offset; iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - iview->format = anv_format_for_vk_format(pCreateInfo->format); + iview->vk_format = pCreateInfo->format; + iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, + image->tiling); iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 17a40cd6be6..6873c4e8e6b 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -759,7 +759,7 @@ void anv_CmdClearColorImage( .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { - .format = iview.format->vk_format, + .format = iview.vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 76c47de594a..8667b45cfc0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1453,6 +1453,7 @@ struct anv_image { uint32_t levels; uint32_t array_size; VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ VkDeviceSize size; uint32_t alignment; @@ -1489,11 +1490,12 @@ struct anv_image { struct anv_image_view { const struct anv_image *image; /**< VkImageViewCreateInfo::image */ - const struct anv_format *format; /**< VkImageViewCreateInfo::format */ struct anv_bo *bo; uint32_t offset; /**< Offset into bo. */ VkImageAspectFlags aspect_mask; + VkFormat vk_format; + enum isl_format format; VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ /** RENDER_SURFACE_STATE when using image as a color render target. */ diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index feed3611805..fa9cb8fa914 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -459,7 +459,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { - .StencilBufferWriteEnable = iview && iview->format->has_stencil, + .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), .StencilTestMask = cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, @@ -698,17 +698,22 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; - const bool has_depth = iview && iview->format->depth_format; - const bool has_stencil = iview && iview->format->has_stencil; + + /* XXX: isl needs to grow depth format support */ + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + + const bool has_depth = iview && anv_format->depth_format; + const bool has_stencil = iview && anv_format->has_stencil; /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = iview->format->depth_format, + .DepthWriteEnable = true, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = iview->format->depth_format, + .SurfaceFormat = anv_format->depth_format, .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index fe2967c7ef4..a7940ca9e2f 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -214,7 +214,7 @@ genX(image_view_init)(struct anv_image_view *iview, struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = iview->format->surface_format, + .SurfaceFormat = iview->format, .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], @@ -310,8 +310,7 @@ genX(image_view_init)(struct anv_image_view *iview, anv_surftype(image, pCreateInfo->viewType, true), surface_state.SurfaceFormat = - isl_lower_storage_image_format(&device->isl_dev, - iview->format->surface_format); + isl_lower_storage_image_format(&device->isl_dev, iview->format); surface_state.SurfaceMinLOD = range->baseMipLevel; surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 9614da705e7..965a9c14317 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -695,8 +695,13 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; - const bool has_depth = iview && iview->format->depth_format; - const bool has_stencil = iview && iview->format->has_stencil; + + /* XXX: isl needs to grow depth format support */ + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + + const bool has_depth = iview && anv_format->depth_format; + const bool has_stencil = iview && anv_format->has_stencil; /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ @@ -705,10 +710,10 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = iview->format->depth_format, + .DepthWriteEnable = anv_format->depth_format, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = iview->format->depth_format, + .SurfaceFormat = anv_format->depth_format, .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 5562a252a21..a24eb192493 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -194,7 +194,7 @@ genX(image_view_init)(struct anv_image_view *iview, struct GENX(RENDER_SURFACE_STATE) surface_state = { .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = iview->format->surface_format, + .SurfaceFormat = iview->format, .SurfaceVerticalAlignment = valign, .SurfaceHorizontalAlignment = halign, .TileMode = isl_to_gen_tiling[surface->isl.tiling], @@ -285,8 +285,7 @@ genX(image_view_init)(struct anv_image_view *iview, anv_surftype(image, pCreateInfo->viewType, true), surface_state.SurfaceFormat = - isl_lower_storage_image_format(&device->isl_dev, - iview->format->surface_format); + isl_lower_storage_image_format(&device->isl_dev, iview->format); surface_state.SurfaceMinLOD = range->baseMipLevel; surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; -- cgit v1.2.3 From 151694228d818db2ba3124acfe2eb1eeaff4dd2f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Jan 2016 08:45:35 -0800 Subject: anv/formats: Hand out different formats based on tiled vs. linear --- src/vulkan/anv_formats.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 88c72f53251..a5c015ada08 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -59,7 +59,7 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8X8_UNORM), + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), @@ -244,14 +244,33 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, switch (aspect) { case VK_IMAGE_ASPECT_COLOR_BIT: - return anv_fmt->surface_format; + if (anv_fmt->surface_format == ISL_FORMAT_UNSUPPORTED) { + return ISL_FORMAT_UNSUPPORTED; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL && + !util_is_power_of_two(anv_fmt->isl_layout->bpb)) { + /* Tiled formats *must* be power-of-two because we need up upload + * them with the render pipeline. For 3-channel formats, we fix + * this by switching them over to RGBX or RGBA formats under the + * hood. + */ + enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->surface_format); + if (rgbx != ISL_FORMAT_UNSUPPORTED) + return rgbx; + else + return isl_format_rgb_to_rgba(anv_fmt->surface_format); + } else { + return anv_fmt->surface_format; + } + case VK_IMAGE_ASPECT_DEPTH_BIT: case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): assert(anv_fmt->depth_format != 0); return anv_fmt->surface_format; + case VK_IMAGE_ASPECT_STENCIL_BIT: assert(anv_fmt->has_stencil); return ISL_FORMAT_R8_UINT; + default: unreachable("bad VkImageAspect"); return ISL_FORMAT_UNSUPPORTED; @@ -358,6 +377,20 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d linear = get_image_format_properties(gen, linear_fmt, linear_fmt); tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt); buffer = get_buffer_format_properties(gen, linear_fmt); + + /* XXX: We handle 3-channel formats by switching them out for RGBX or + * RGBA formats behind-the-scenes. This works fine for textures + * because the upload process will fill in the extra channel. + * We could also support it for render targets, but it will take + * substantially more work and we have enough RGBX formats to handle + * what most clients will want. + */ + if (linear_fmt != ISL_FORMAT_UNSUPPORTED && + isl_format_is_rgb(linear_fmt) && + isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { + tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & + ~VK_FORMAT_FEATURE_BLIT_DST_BIT; + } } out_properties->linearTilingFeatures = linear; -- cgit v1.2.3 From 8cc21d3aeaf14e16f2431ff3bdd5fa7c2e780b77 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 4 Jan 2016 16:31:10 -0800 Subject: isl: Align single-level 2D surfaces to compression block This fixes an assertion failure at isl.c:1003. Reported-by: Nanley Chery --- src/isl/isl.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 00eb249b23f..eee612826ce 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -519,24 +519,30 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( const struct isl_extent4d *phys_level0_sa, struct isl_extent2d *phys_slice0_sa) { + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + assert(phys_level0_sa->depth == 1); - if (info->levels == 1) { - /* Do not align single-level surfaces to the image alignment. + if (info->levels == 1 && msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) { + /* Do not pad the surface to the image alignment. Instead, pad it only + * to the pixel format's block alignment. * - * For tiled surfaces, skipping the alignment here avoids wasting CPU - * cycles on the below mipmap layout caluclations. Skipping the + * For tiled surfaces, using a reduced alignment here avoids wasting CPU + * cycles on the below mipmap layout caluclations. Reducing the * alignment here is safe because we later align the row pitch and array * pitch to the tile boundary. It is safe even for * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled * to accomodate the interleaved samples. * - * For linear surfaces, skipping the alignment here permits us to later + * For linear surfaces, reducing the alignment here permits us to later * choose an arbitrary, non-aligned row pitch. If the surface backs * a VkBuffer, then an arbitrary pitch may be needed to accomodate * VkBufferImageCopy::bufferRowLength. */ - *phys_slice0_sa = isl_extent2d(phys_level0_sa->w, phys_level0_sa->h); + *phys_slice0_sa = (struct isl_extent2d) { + .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), + .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), + }; return; } -- cgit v1.2.3 From 0309199802cccb472ac1e1020b14770e3e596d6a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 16:57:01 -0800 Subject: nir/spirv: Add initial support for ConstantNull --- src/glsl/nir/spirv/spirv_to_nir.c | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 3a8949438f4..53228e4781f 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -733,6 +733,46 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, vtn_foreach_decoration(b, val, type_decoration_cb, NULL); } +static nir_constant * +vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) +{ + nir_constant *c = rzalloc(b, nir_constant); + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* Nothing to do here. It's already initialized to zero */ + break; + + case GLSL_TYPE_ARRAY: + assert(glsl_get_length(type) > 0); + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); + for (unsigned i = 1; i < c->num_elements; i++) + c->elements[i] = c->elements[0]; + break; + + case GLSL_TYPE_STRUCT: + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + for (unsigned i = 0; i < c->num_elements; i++) { + c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); + } + break; + + default: + unreachable("Invalid type for null constant"); + } + + return c; +} + static void vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -792,6 +832,10 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpConstantNull: + val->constant = vtn_null_constant(b, val->const_type); + break; + default: unreachable("Unhandled opcode"); } -- cgit v1.2.3 From f32370a5362107a1bd6b1392724b2fb0f39a6e8a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 17:04:26 -0800 Subject: nir/spirv: Add a documenting assert for OpConstantSampler --- src/glsl/nir/spirv/spirv_to_nir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 53228e4781f..9a5cedd5d95 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -836,6 +836,10 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, val->constant = vtn_null_constant(b, val->const_type); break; + case SpvOpConstantSampler: + assert(!"OpConstantSampler requires Kernel Capability"); + break; + default: unreachable("Unhandled opcode"); } -- cgit v1.2.3 From 01ba96e0593251aed33bc44960d8f3ae0013b9a2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 17:16:44 -0800 Subject: nir/spirv: Add support for msb/lsb opcodes --- src/glsl/nir/spirv/vtn_glsl450.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 2d22e37de2a..fd8ec3eb2dc 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -270,6 +270,10 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } + case GLSLstd450FindILsb: op = nir_op_find_lsb; break; + case GLSLstd450FindSMsb: op = nir_op_ifind_msb; break; + case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; + case GLSLstd450Asin: case GLSLstd450Acos: case GLSLstd450Atan: @@ -277,9 +281,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450ModfStruct: case GLSLstd450Frexp: case GLSLstd450FrexpStruct: - case GLSLstd450FindILsb: - case GLSLstd450FindSMsb: - case GLSLstd450FindUMsb: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: case GLSLstd450IMix: -- cgit v1.2.3 From 3a3c4aecf16baad74417f1bcb05730043af6f9cf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 17:32:33 -0800 Subject: nir/spirv: Add support for bitfield operations --- src/glsl/nir/spirv/spirv_to_nir.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 9a5cedd5d95..1dfce1f87bc 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2729,6 +2729,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpSelect: op = nir_op_bcsel; break; case SpvOpIEqual: op = nir_op_ieq; break; + case SpvOpBitFieldInsert: op = nir_op_bitfield_insert; break; + case SpvOpBitFieldSExtract: op = nir_op_ibitfield_extract; break; + case SpvOpBitFieldUExtract: op = nir_op_ubitfield_extract; break; + case SpvOpBitReverse: op = nir_op_bitfield_reverse; break; + case SpvOpBitCount: op = nir_op_bit_count; break; + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ case SpvOpFOrdEqual: op = nir_op_feq; break; case SpvOpFUnordEqual: op = nir_op_feq; break; @@ -3672,6 +3678,11 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpDPdxCoarse: case SpvOpDPdyCoarse: case SpvOpFwidthCoarse: + case SpvOpBitFieldInsert: + case SpvOpBitFieldSExtract: + case SpvOpBitFieldUExtract: + case SpvOpBitReverse: + case SpvOpBitCount: vtn_handle_alu(b, opcode, w, count); break; -- cgit v1.2.3 From b8f0bea07a6e8517962777702ebed4649a7d0d0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 20:59:16 -0800 Subject: nir/spirv: Implement extended add, sub, and mul --- src/glsl/nir/spirv/spirv_to_nir.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 1dfce1f87bc..ede347f97f8 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2715,6 +2715,30 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, } break; + case SpvOpIAddCarry: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); + return; + + case SpvOpISubBorrow: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); + return; + + case SpvOpUMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); + return; + + case SpvOpSMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); + return; + case SpvOpShiftRightLogical: op = nir_op_ushr; break; case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; case SpvOpShiftLeftLogical: op = nir_op_ishl; break; @@ -3635,6 +3659,10 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpFMod: case SpvOpVectorTimesScalar: case SpvOpDot: + case SpvOpIAddCarry: + case SpvOpISubBorrow: + case SpvOpUMulExtended: + case SpvOpSMulExtended: case SpvOpShiftRightLogical: case SpvOpShiftRightArithmetic: case SpvOpShiftLeftLogical: -- cgit v1.2.3 From ba7b5edc267c8a33eb55b83f9b0ea30e73e4b2c2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 21:36:11 -0800 Subject: anv/UpdateDescriptorSets: Use the correct index for the buffer view --- src/vulkan/anv_descriptor_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 6d38d114f96..6e53f3897c8 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -544,7 +544,7 @@ void anv_UpdateDescriptorSets( assert(buffer); struct anv_buffer_view *view = - &set->buffer_views[bind_layout->descriptor_index + j]; + &set->buffer_views[bind_layout->buffer_index + j]; const struct anv_format *format = anv_format_for_descriptor_type(write->descriptorType); -- cgit v1.2.3 From 8b403d599b7acf97616879d78e61456508f35ad3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Jan 2016 22:08:24 -0800 Subject: nir/spirv: Add support for the ControlBarrier instruction --- src/glsl/nir/spirv/spirv_to_nir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index ede347f97f8..06a99f3031e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3113,6 +3113,8 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, intrinsic_op = nir_intrinsic_memory_barrier; break; case SpvOpControlBarrier: + intrinsic_op = nir_intrinsic_barrier; + break; default: unreachable("unknown barrier instruction"); } -- cgit v1.2.3 From 2172f0e9bb54671dc0bd8970b1e06a7818b83c47 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 4 Jan 2016 17:00:49 -0800 Subject: isl: Fix mis-documented units of isl_surf::phys_level_sa It's in physical surface samples. Hence the _sa suffix. --- src/isl/isl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 842bbbb0265..02b92703c27 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -727,7 +727,8 @@ struct isl_surf { struct isl_extent4d logical_level0_px; /** - * Physical extent of the surface's base level, in units of pixels. + * Physical extent of the surface's base level, in units of physical + * surface samples. * * Consider isl_dim_layout as an operator that transforms a logical surface * layout to a physical surface layout. Then -- cgit v1.2.3 From a1d64ae56123db2bf302ce5ffe3a965075df7ead Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 4 Jan 2016 17:09:11 -0800 Subject: isl: Align isl_surf::phys_level0_sa to the format's compression block --- src/isl/isl.c | 24 +++++++++++++++++------- src/isl/isl.h | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index eee612826ce..f6b392cfe72 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -383,7 +383,7 @@ isl_surf_choose_dim_layout(const struct isl_device *dev, /** * Calculate the physical extent of the surface's first level, in units of - * surface samples. + * surface samples. The result is aligned to the format's compression block. */ static void isl_calc_phys_level0_extent_sa(const struct isl_device *dev, @@ -393,6 +393,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, enum isl_msaa_layout msaa_layout, struct isl_extent4d *phys_level0_sa) { + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + if (isl_format_is_yuv(info->format)) isl_finishme("%s:%s: YUV format", __FILE__, __func__); @@ -401,6 +403,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, assert(info->height == 1); assert(info->depth == 1); assert(info->samples == 1); + assert(!isl_format_is_compressed(info->format)); switch (dim_layout) { case ISL_DIM_LAYOUT_GEN4_3D: @@ -434,8 +437,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, assert(info->samples == 1); *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), .d = 1, .a = info->array_len, }; @@ -477,6 +480,11 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, assert(info->array_len == 1); assert(info->samples == 1); + if (fmtl->bd > 1) { + isl_finishme("%s:%s: compression block with depth > 1", + __FILE__, __func__); + } + switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: unreachable("bad isl_dim_layout"); @@ -485,8 +493,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, assert(ISL_DEV_GEN(dev) >= 9); *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), .d = 1, .a = info->depth, }; @@ -495,8 +503,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, case ISL_DIM_LAYOUT_GEN4_3D: assert(ISL_DEV_GEN(dev) < 9); *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), .d = info->depth, .a = 1, }; @@ -998,6 +1006,8 @@ isl_surf_init_s(const struct isl_device *dev, struct isl_extent4d phys_level0_sa; isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, &phys_level0_sa); + assert(phys_level0_sa.w % fmtl->bw == 0); + assert(phys_level0_sa.h % fmtl->bh == 0); enum isl_array_pitch_span array_pitch_span = isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); diff --git a/src/isl/isl.h b/src/isl/isl.h index 02b92703c27..0faf671af2b 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -728,7 +728,7 @@ struct isl_surf { /** * Physical extent of the surface's base level, in units of physical - * surface samples. + * surface samples and aligned to the format's compression block. * * Consider isl_dim_layout as an operator that transforms a logical surface * layout to a physical surface layout. Then -- cgit v1.2.3 From 89b68dc8d09a41fc719cbdf7a5bff42476601dab Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 09:59:07 -0800 Subject: anv: Use isl_format_layout::bs instead of ::bpb For all formats used by Vulkan, 8 * bs == bpb. (bs=block_size_in_bytes, bpb=bits_per_block) --- src/vulkan/anv_formats.c | 2 +- src/vulkan/anv_image.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index a5c015ada08..d480ee7cb9d 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -247,7 +247,7 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, if (anv_fmt->surface_format == ISL_FORMAT_UNSUPPORTED) { return ISL_FORMAT_UNSUPPORTED; } else if (tiling == VK_IMAGE_TILING_OPTIMAL && - !util_is_power_of_two(anv_fmt->isl_layout->bpb)) { + !util_is_power_of_two(anv_fmt->isl_layout->bs)) { /* Tiled formats *must* be power-of-two because we need up upload * them with the render pipeline. For 3-channel formats, we fix * this by switching them over to RGBX or RGBA formats under the diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index bd550890596..79c6ba15a03 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -545,7 +545,7 @@ anv_CreateBufferView(VkDevice _device, anv_fill_buffer_surface_state(device, view->surface_state.map, view->format, view->offset, pCreateInfo->range, - format->isl_layout->bpb / 8); + format->isl_layout->bs); } else { view->surface_state = (struct anv_state){ 0 }; } @@ -560,7 +560,7 @@ anv_CreateBufferView(VkDevice _device, anv_fill_buffer_surface_state(device, view->storage_surface_state.map, storage_format, view->offset, pCreateInfo->range, - format->isl_layout->bpb / 8); + format->isl_layout->bs); } else { view->storage_surface_state = (struct anv_state){ 0 }; } @@ -658,6 +658,6 @@ anv_buffer_view_fill_image_param(struct anv_device *device, param->swizzling[0] = 0xff; param->swizzling[1] = 0xff; - param->stride[0] = isl_format_layouts[view->format].bpb / 8; + param->stride[0] = isl_format_layouts[view->format].bs; param->size[0] = view->range / param->stride[0]; } -- cgit v1.2.3 From 98af1cc6d7b9cd087cf70135b0f18a8a61160b88 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 09:53:11 -0800 Subject: isl: Remove isl_format_layout::bpb struct isl_format_layout contained two near-redundant members: bpb (bits per block) and bs (block size). There do exist some hardware formats for which bpb != 8 * bs, but Vulkan does not use them. Therefore we don't need bpb. --- src/isl/isl.h | 1 - src/isl/isl_format_layout_gen.bash | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 0faf671af2b..8dbacb6d4e8 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -643,7 +643,6 @@ struct isl_channel_layout { struct isl_format_layout { enum isl_format format; - uint16_t bpb; /**< Bits per block */ uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ uint8_t bw; /**< Block width, in pixels */ uint8_t bh; /**< Block height, in pixels */ diff --git a/src/isl/isl_format_layout_gen.bash b/src/isl/isl_format_layout_gen.bash index 2511f299a7e..db883827376 100755 --- a/src/isl/isl_format_layout_gen.bash +++ b/src/isl/isl_format_layout_gen.bash @@ -99,7 +99,7 @@ do cat < Date: Tue, 5 Jan 2016 10:39:21 -0800 Subject: isl: Prefer linear tiling for 1D surfaces --- src/isl/isl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index f6b392cfe72..4a1c9f4a94a 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -166,6 +166,16 @@ isl_surf_choose_tiling(const struct isl_device *dev, /* Of the tiling modes remaining, choose the one that offers the best * performance. */ + + if (info->dim == ISL_SURF_DIM_1D) { + /* Prefer linear for 1D surfaces because they do not benefit from + * tiling. To the contrary, tiling leads to wasted memory and poor + * memory locality due to the swizzling and alignment restrictions + * required in tiled surfaces. + */ + CHOOSE(ISL_TILING_LINEAR); + } + CHOOSE(ISL_TILING_Ys); CHOOSE(ISL_TILING_Yf); CHOOSE(ISL_TILING_Y0); -- cgit v1.2.3 From 81357866051fe5049f7327081da257278a3a24a3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 11:28:49 -0800 Subject: isl: Document gen7_filter_tiling() --- src/isl/isl_gen7.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/isl/isl_gen7.c b/src/isl/isl_gen7.c index 9984f61b2a4..4484abb8191 100644 --- a/src/isl/isl_gen7.c +++ b/src/isl/isl_gen7.c @@ -185,6 +185,17 @@ gen7_format_needs_valign2(const struct isl_device *dev, format == ISL_FORMAT_R32G32B32_FLOAT; } +/** + * @brief Filter out tiling flags that are incompatible with the surface. + * + * The resultant outgoing @a flags is a subset of the incoming @a flags. The + * outgoing flags may be empty (0x0) if the incoming flags were too + * restrictive. + * + * For example, if the surface will be used for a display + * (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling + * flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT. + */ void gen7_filter_tiling(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, -- cgit v1.2.3 From 8d6f0a1b8020c9ab33a56d3aacdbc641d20fdacf Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 11:35:44 -0800 Subject: isl: Don't force linear for 1d surfaces in gen7_filter_tiling() gen7_filter_tiling() should filter out only tiling flags that are incompatible with the surface. It shouldn't make performance decisions, such as forcing linear for 1D; that's the role of the caller. --- src/isl/isl_gen7.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src') diff --git a/src/isl/isl_gen7.c b/src/isl/isl_gen7.c index 4484abb8191..7064e852e65 100644 --- a/src/isl/isl_gen7.c +++ b/src/isl/isl_gen7.c @@ -265,14 +265,6 @@ gen7_filter_tiling(const struct isl_device *dev, *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); } - /* For 1D surfaces, use linear when possible. 1D surfaces (array and - * non-array) do not benefit from tiling. In fact, it leads to less - * efficient use of memory due to tile alignment. - */ - if (info->dim == ISL_SURF_DIM_1D && (*flags & ISL_TILING_LINEAR_BIT)) { - *flags = ISL_TILING_LINEAR_BIT; - } - /* workaround */ if (ISL_DEV_GEN(dev) == 7 && gen7_format_needs_valign2(dev, info->format) && -- cgit v1.2.3 From f551047751e8de826ebc13c9f5069d9842d2f884 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 5 Jan 2016 11:43:25 -0800 Subject: vk: Destroy device->mutex when destroying the device --- src/vulkan/anv_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 40914aeb1cc..30ab0b2e739 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -783,6 +783,8 @@ void anv_DestroyDevice( close(device->fd); + pthread_mutex_destroy(&device->mutex); + anv_free(&device->alloc, device); } -- cgit v1.2.3 From 30521fb19e01b6e34cba4eaff40b9b782000b5db Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 5 Jan 2016 12:00:54 -0800 Subject: vk: Implement a basic pipeline cache This is not really a cache yet, but it allows us to share one state stream for all pipelines, which means we can bump the block size without wasting a lot of memory. --- src/vulkan/anv_device.c | 4 +- src/vulkan/anv_meta.c | 2 + src/vulkan/anv_meta_clear.c | 1 + src/vulkan/anv_pipeline.c | 147 ++++++++++++++++++++++++++++++-------------- src/vulkan/anv_private.h | 25 +++++++- src/vulkan/gen7_pipeline.c | 5 +- src/vulkan/gen8_pipeline.c | 10 +-- 7 files changed, 141 insertions(+), 53 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 30ab0b2e739..74b813e9e40 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -723,7 +723,9 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->dynamic_state_pool, &device->dynamic_state_block_pool); - anv_block_pool_init(&device->instruction_block_pool, device, 8192); + anv_block_pool_init(&device->instruction_block_pool, device, 64 * 1024); + anv_pipeline_cache_init(&device->default_pipeline_cache, device); + anv_block_pool_init(&device->surface_state_block_pool, device, 4096); anv_state_pool_init(&device->surface_state_pool, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index bb37899ce37..75473a1b18c 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -392,6 +392,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_2d_src); if (result != VK_SUCCESS) @@ -399,6 +400,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_3d_src); if (result != VK_SUCCESS) diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 6873c4e8e6b..1a4300c07e3 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -130,6 +130,7 @@ create_pipeline(struct anv_device *device, VkPipeline pipeline_h; result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 2, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index ad47f9661e4..97ad96b46bd 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -146,15 +146,67 @@ anv_shader_compile_to_nir(struct anv_device *device, return nir; } +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); +} + +static uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + pthread_mutex_lock(&cache->mutex); + + struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + pthread_mutex_unlock(&cache->mutex); + + assert(size < cache->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset; +} + VkResult anv_CreatePipelineCache( - VkDevice device, + VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache) { - *pPipelineCache = (VkPipelineCache)1; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; - stub_return(VK_SUCCESS); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; } void anv_DestroyPipelineCache( @@ -162,6 +214,12 @@ void anv_DestroyPipelineCache( VkPipelineCache _cache, const VkAllocationCallbacks* pAllocator) { + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); } VkResult anv_GetPipelineCacheData( @@ -171,7 +229,8 @@ VkResult anv_GetPipelineCacheData( void* pData) { *pDataSize = 0; - stub_return(VK_SUCCESS); + + return VK_SUCCESS; } VkResult anv_MergePipelineCaches( @@ -193,7 +252,6 @@ void anv_DestroyPipeline( anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); - anv_state_stream_finish(&pipeline->program_stream); if (pipeline->blend_state.map) anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); anv_free2(&device->alloc, pAllocator, pipeline); @@ -390,23 +448,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, return nir; } -static uint32_t -anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, - const void *data, size_t size) -{ - struct anv_state state = - anv_state_stream_alloc(&pipeline->program_stream, size, 64); - - assert(size < pipeline->program_stream.block_pool->block_size); - - memcpy(state.map, data, size); - - if (!pipeline->device->info.has_llc) - anv_state_clflush(state); - - return state.offset; -} - static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, @@ -432,6 +473,7 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, static VkResult anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, struct anv_shader_module *module, const char *entrypoint) @@ -476,7 +518,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } const uint32_t offset = - anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { pipeline->vs_simd8 = offset; pipeline->vs_vec4 = NO_KERNEL; @@ -495,6 +537,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, static VkResult anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, struct anv_shader_module *module, const char *entrypoint) @@ -537,7 +580,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, /* TODO: SIMD8 GS */ pipeline->gs_kernel = - anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); pipeline->gs_vertex_count = nir->info.gs.vertices_in; ralloc_free(mem_ctx); @@ -550,6 +593,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, static VkResult anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, struct anv_shader_module *module, const char *entrypoint) @@ -590,8 +634,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - uint32_t offset = anv_pipeline_upload_kernel(pipeline, - shader_code, code_size); + uint32_t offset = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); if (prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else @@ -627,6 +671,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, VkResult anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, struct anv_shader_module *module, const char *entrypoint) @@ -664,8 +709,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline, - shader_code, code_size); + pipeline->cs_simd = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); ralloc_free(mem_ctx); anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, @@ -945,7 +990,9 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) } VkResult -anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc) @@ -971,9 +1018,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); if (pCreateInfo->pTessellationState) @@ -1005,13 +1049,13 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, switch (pCreateInfo->pStages[i].stage) { case VK_SHADER_STAGE_VERTEX_BIT: - anv_pipeline_compile_vs(pipeline, pCreateInfo, module, entrypoint); + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, entrypoint); break; case VK_SHADER_STAGE_GEOMETRY_BIT: - anv_pipeline_compile_gs(pipeline, pCreateInfo, module, entrypoint); + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, entrypoint); break; case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, pCreateInfo, module, entrypoint); + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, module, entrypoint); break; default: anv_finishme("Unsupported shader stage"); @@ -1083,23 +1127,28 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, VkResult anv_graphics_pipeline_create( VkDevice _device, + VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; switch (device->info.gen) { case 7: if (device->info.is_haswell) - return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); else - return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); case 8: - return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); case 9: - return gen9_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); default: unreachable("unsupported gen\n"); } @@ -1117,7 +1166,9 @@ VkResult anv_CreateGraphicsPipelines( unsigned i = 0; for (; i < count; i++) { - result = anv_graphics_pipeline_create(_device, &pCreateInfos[i], + result = anv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], NULL, pAllocator, &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { @@ -1133,22 +1184,27 @@ VkResult anv_CreateGraphicsPipelines( static VkResult anv_compute_pipeline_create( VkDevice _device, + VkPipelineCache _cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; switch (device->info.gen) { case 7: if (device->info.is_haswell) - return gen75_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); else - return gen7_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); case 8: - return gen8_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); case 9: - return gen9_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); default: unreachable("unsupported gen\n"); } @@ -1166,7 +1222,8 @@ VkResult anv_CreateComputePipelines( unsigned i = 0; for (; i < count; i++) { - result = anv_compute_pipeline_create(_device, &pCreateInfos[i], + result = anv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], pAllocator, &pPipelines[i]); if (result != VK_SUCCESS) { for (unsigned j = 0; j < i; j++) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8667b45cfc0..59e8005904d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -577,6 +577,16 @@ struct anv_queue { struct anv_state_pool * pool; }; +struct anv_pipeline_cache { + struct anv_device * device; + struct anv_state_stream program_stream; + pthread_mutex_t mutex; +}; + +void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device); +void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); + struct anv_device { VK_LOADER_DATA _loader_data; @@ -595,6 +605,8 @@ struct anv_device { struct anv_state_pool dynamic_state_pool; struct anv_block_pool instruction_block_pool; + struct anv_pipeline_cache default_pipeline_cache; + struct anv_block_pool surface_state_block_pool; struct anv_state_pool surface_state_pool; @@ -1288,7 +1300,6 @@ struct anv_pipeline { } urb; VkShaderStageFlags active_stages; - struct anv_state_stream program_stream; struct anv_state blend_state; uint32_t vs_simd8; uint32_t vs_vec4; @@ -1337,18 +1348,21 @@ struct anv_graphics_pipeline_create_info { VkResult anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc); VkResult anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, struct anv_shader_module *module, const char *entrypoint_name); VkResult anv_graphics_pipeline_create(VkDevice device, + VkPipelineCache cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc, @@ -1356,6 +1370,7 @@ anv_graphics_pipeline_create(VkDevice device, VkResult gen7_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc, @@ -1363,6 +1378,7 @@ gen7_graphics_pipeline_create(VkDevice _device, VkResult gen75_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc, @@ -1370,34 +1386,40 @@ gen75_graphics_pipeline_create(VkDevice _device, VkResult gen8_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen9_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen7_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen75_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen8_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); VkResult gen9_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); @@ -1698,6 +1720,7 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 7dedcac08af..9b90c6e3120 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -264,6 +264,7 @@ scratch_space(const struct brw_stage_prog_data *prog_data) GENX_FUNC(GEN7, GEN75) VkResult genX(graphics_pipeline_create)( VkDevice _device, + struct anv_pipeline_cache * cache, const VkGraphicsPipelineCreateInfo* pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks* pAllocator, @@ -280,7 +281,8 @@ genX(graphics_pipeline_create)( if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_pipeline_init(pipeline, device, pCreateInfo, extra, pAllocator); + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); if (result != VK_SUCCESS) { anv_free2(&device->alloc, pAllocator, pipeline); return result; @@ -520,6 +522,7 @@ genX(graphics_pipeline_create)( GENX_FUNC(GEN7, GEN75) VkResult genX(compute_pipeline_create)( VkDevice _device, + struct anv_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index a405d290d50..e6cb145d522 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -273,6 +273,7 @@ emit_ds_state(struct anv_pipeline *pipeline, VkResult genX(graphics_pipeline_create)( VkDevice _device, + struct anv_pipeline_cache * cache, const VkGraphicsPipelineCreateInfo* pCreateInfo, const struct anv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks* pAllocator, @@ -290,7 +291,8 @@ genX(graphics_pipeline_create)( if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_pipeline_init(pipeline, device, pCreateInfo, extra, pAllocator); + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); if (result != VK_SUCCESS) { anv_free2(&device->alloc, pAllocator, pipeline); return result; @@ -603,6 +605,7 @@ genX(graphics_pipeline_create)( VkResult genX(compute_pipeline_create)( VkDevice _device, + struct anv_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) @@ -633,9 +636,6 @@ VkResult genX(compute_pipeline_create)( pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; - anv_state_stream_init(&pipeline->program_stream, - &device->instruction_block_pool); - /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ @@ -651,7 +651,7 @@ VkResult genX(compute_pipeline_create)( assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); - anv_pipeline_compile_cs(pipeline, pCreateInfo, module, + anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName); pipeline->use_repclear = false; -- cgit v1.2.3 From bff45dc44e92ab9dd790a2ba00d5b29a5527072b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 13:04:07 -0800 Subject: nir: Add an indirect deref lowering pass --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_lower_indirect_derefs.c | 239 +++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 src/glsl/nir/nir_lower_indirect_derefs.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 65c493cd677..97fac8609b6 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -48,6 +48,7 @@ NIR_FILES = \ nir/nir_lower_returns.c \ nir/nir_lower_global_vars_to_local.c \ nir/nir_lower_gs_intrinsics.c \ + nir/nir_lower_indirect_derefs.c \ nir/nir_lower_load_const_to_scalar.c \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 674064c0e20..59f6f6829a4 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2035,6 +2035,8 @@ void nir_lower_var_copies(nir_shader *shader); bool nir_lower_global_vars_to_local(nir_shader *shader); +bool nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask); + bool nir_lower_locals_to_regs(nir_shader *shader); void nir_lower_outputs_to_temporaries(nir_shader *shader, diff --git a/src/glsl/nir/nir_lower_indirect_derefs.c b/src/glsl/nir/nir_lower_indirect_derefs.c new file mode 100644 index 00000000000..69f2df4ba6d --- /dev/null +++ b/src/glsl/nir/nir_lower_indirect_derefs.c @@ -0,0 +1,239 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +static void +emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_var *deref, nir_deref *tail, + nir_ssa_def **dest, nir_ssa_def *src); + +static void +emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_var *deref, nir_deref *arr_parent, + int start, int end, + nir_ssa_def **dest, nir_ssa_def *src) +{ + assert(arr_parent->child && + arr_parent->child->deref_type == nir_deref_type_array); + nir_deref_array *arr = nir_deref_as_array(arr_parent->child); + assert(arr->deref_array_type == nir_deref_array_type_indirect); + assert(arr->indirect.is_ssa); + + assert(start < end); + if (start == end - 1) { + /* Base case. Just emit the load/store op */ + nir_deref_array direct = *arr; + direct.deref_array_type = nir_deref_array_type_direct; + direct.base_offset += start; + direct.indirect = NIR_SRC_INIT; + + arr_parent->child = &direct.deref; + emit_load_store(b, orig_instr, deref, &arr->deref, dest, src); + arr_parent->child = &arr->deref; + } else { + int mid = start + (end - start) / 2; + + nir_ssa_def *then_dest, *else_dest; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(nir_ilt(b, arr->indirect.ssa, + nir_imm_int(b, mid))); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + b->cursor = nir_after_cf_list(&if_stmt->then_list); + emit_indirect_load_store(b, orig_instr, deref, arr_parent, + start, mid, &then_dest, src); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + emit_indirect_load_store(b, orig_instr, deref, arr_parent, + mid, end, &else_dest, src); + + b->cursor = nir_after_cf_node(&if_stmt->cf_node); + + if (src == NULL) { + /* We're a load. We need to insert a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + then_dest->num_components, NULL); + + nir_phi_src *src0 = ralloc(phi, nir_phi_src); + src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt)); + src0->src = nir_src_for_ssa(then_dest); + exec_list_push_tail(&phi->srcs, &src0->node); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + src1->pred = nir_cf_node_as_block(nir_if_last_else_node(if_stmt)); + src1->src = nir_src_for_ssa(else_dest); + exec_list_push_tail(&phi->srcs, &src1->node); + + nir_builder_instr_insert(b, &phi->instr); + *dest = &phi->dest.ssa; + } + } +} + +static void +emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_var *deref, nir_deref *tail, + nir_ssa_def **dest, nir_ssa_def *src) +{ + for (; tail->child; tail = tail->child) { + if (tail->child->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail->child); + if (arr->deref_array_type != nir_deref_array_type_indirect) + continue; + + int length = glsl_get_length(tail->type); + + emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset, + length - arr->base_offset, dest, src); + return; + } + + assert(tail && tail->child == NULL); + + /* We reached the end of the deref chain. Emit the instruction */ + + if (src == NULL) { + /* This is a load instruction */ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->num_components = orig_instr->num_components; + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &deref->deref)); + nir_ssa_dest_init(&load->instr, &load->dest, + load->num_components, NULL); + nir_builder_instr_insert(b, &load->instr); + *dest = &load->dest.ssa; + } else { + /* This is a store instruction */ + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->num_components = orig_instr->num_components; + store->const_index[0] = orig_instr->const_index[0]; /* writemask */ + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &deref->deref)); + store->src[0] = nir_src_for_ssa(src); + nir_builder_instr_insert(b, &store->instr); + } +} + +static bool +deref_has_indirect(nir_deref_var *deref) +{ + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + } + + return false; +} + +struct lower_indirect_state { + nir_builder builder; + uint32_t mode_mask; + bool progress; +}; + +static bool +lower_indirect_block(nir_block *block, void *void_state) +{ + struct lower_indirect_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_store_var) + continue; + + if (!deref_has_indirect(intrin->variables[0])) + continue; + + /* Only lower variables whose mode is in the mask */ + if (!(state->mode_mask & (1 << intrin->variables[0]->var->data.mode))) + continue; + + state->builder.cursor = nir_before_instr(&intrin->instr); + + if (intrin->intrinsic == nir_intrinsic_load_var) { + nir_ssa_def *result; + emit_load_store(&state->builder, intrin, intrin->variables[0], + &intrin->variables[0]->deref, &result, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result)); + } else { + assert(intrin->src[0].is_ssa); + emit_load_store(&state->builder, intrin, intrin->variables[0], + &intrin->variables[0]->deref, NULL, intrin->src[0].ssa); + } + nir_instr_remove(&intrin->instr); + state->progress = true; + } + + return true; +} + +static bool +lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask) +{ + struct lower_indirect_state state; + + state.progress = false; + state.mode_mask = mode_mask; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, lower_indirect_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return state.progress; +} + +/** Lowers indirect variable loads/stores to direct loads/stores. + * + * The pass works by replacing any indirect load or store with an if-ladder + * that does a binary search on the array index. + */ +bool +nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = lower_indirects_impl(function->impl, mode_mask) || progress; + } + + return progress; +} -- cgit v1.2.3 From ec899f6b4220509626c14b32a2b048dc3df49cce Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 13:42:00 -0800 Subject: anv/pipeline: Lower indirect temporaries and inputs --- src/vulkan/anv_pipeline.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 97ad96b46bd..769afe8ed72 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -143,6 +143,12 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_shader_gather_info(nir, entry_point->impl); + uint32_t indirect_mask = (1 << nir_var_shader_in); + if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) + indirect_mask |= 1 << nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); + return nir; } -- cgit v1.2.3 From 71a25a0b074ecaf4d287d1338746075170a17d4f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 14:59:40 -0800 Subject: nir/spirv: Simplify phi node handling Instead of trying to crawl through predecessor chains and build phi nodes, we just do a poor-man's out-of-ssa on the spot. The into-SSA pass will deal with putting the actual phi nodes in for us. --- src/glsl/nir/spirv/spirv_to_nir.c | 119 ++++++++++---------------------------- src/glsl/nir/spirv/vtn_cfg.c | 5 +- src/glsl/nir/spirv/vtn_private.h | 9 +-- 3 files changed, 38 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 06a99f3031e..875b18cebdd 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3128,87 +3128,28 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &intrin->instr); } -static void -vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) -{ - if (glsl_type_is_vector_or_scalar(val->type)) { - nir_phi_instr *phi = nir_phi_instr_create(b->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - glsl_get_vector_elements(val->type), NULL); - exec_list_make_empty(&phi->srcs); - nir_builder_instr_insert(&b->nb, &phi->instr); - val->def = &phi->dest.ssa; - } else { - unsigned elems = glsl_get_length(val->type); - for (unsigned i = 0; i < elems; i++) - vtn_phi_node_init(b, val->elems[i]); - } -} - -static struct vtn_ssa_value * -vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); - vtn_phi_node_init(b, val); - return val; -} - static void vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) { + /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. + * For each phi, we create a variable with the appropreate type and do a + * load from that variable. Then, in a second pass, we add stores to + * that variable to each of the predecessor blocks. + * + * We could do something more intelligent here. However, in order to + * handle loops and things properly, we really need dominance + * information. It would end up basically being the into-SSA algorithm + * all over again. It's easier if we just let lower_vars_to_ssa do that + * for us instead of repeating it here. + */ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_phi_node_create(b, type); -} -static void -vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, - struct vtn_ssa_value *val) -{ - assert(phi->type == val->type); - if (glsl_type_is_vector_or_scalar(phi->type)) { - nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); - nir_phi_src *src = ralloc(phi_instr, nir_phi_src); - src->pred = (nir_block *) pred; - src->src = NIR_SRC_INIT; - exec_list_push_tail(&phi_instr->srcs, &src->node); - nir_instr_rewrite_src(&phi_instr->instr, &src->src, - nir_src_for_ssa(val->def)); - } else { - unsigned elems = glsl_get_length(phi->type); - for (unsigned i = 0; i < elems; i++) - vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); - } -} + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_variable *phi_var = + nir_local_variable_create(b->nb.impl, type->type, "phi"); + _mesa_hash_table_insert(b->phi_table, w, phi_var); -static struct vtn_ssa_value * -vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, - const struct glsl_type *type, const uint32_t *w, - unsigned count) -{ - struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); - if (entry) { - struct vtn_block *spv_block = entry->data; - for (unsigned off = 4; off < count; off += 2) { - if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { - return vtn_ssa_value(b, w[off - 1]); - } - } - } - - b->nb.cursor = nir_before_block(block); - struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); - - struct set_entry *entry2; - set_foreach(block->predecessors, entry2) { - nir_block *pred = (nir_block *) entry2->key; - struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, - count); - vtn_phi_node_add_src(phi, pred, val); - } - - return phi; + val->ssa = vtn_variable_load(b, nir_deref_var_create(b, phi_var), type); } static bool @@ -3223,15 +3164,20 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, if (opcode != SpvOpPhi) return true; - struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); + assert(phi_entry); + nir_variable *phi_var = phi_entry->data; - struct set_entry *entry; - set_foreach(b->block->block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, - count); - vtn_phi_node_add_src(phi, pred, val); + for (unsigned i = 3; i < count; i += 2) { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); + struct vtn_block *pred = + vtn_value(b, w[i + 1], vtn_value_type_block)->block; + + b->nb.cursor = nir_after_block_before_jump(pred->end_block); + + vtn_variable_store(b, src, nir_deref_var_create(b, phi_var), type); } return true; @@ -3536,11 +3482,8 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpLine: break; /* Ignored for now */ - case SpvOpLabel: { - struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; - assert(block->block == nir_cursor_current_block(b->nb.cursor)); + case SpvOpLabel: break; - } case SpvOpLoopMerge: case SpvOpSelectionMerge: @@ -3828,8 +3771,8 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->impl = func->impl; b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); - b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); + b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); vtn_function_emit(b, func, vtn_handle_body_instruction); vtn_foreach_instruction(b, func->start_block->label, func->end, vtn_handle_phi_second_pass); diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index eddaa8c4672..a8e149a00a6 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -491,13 +491,12 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, case vtn_cf_node_type_block: { struct vtn_block *block = (struct vtn_block *)node; - block->block = nir_cursor_current_block(b->nb.cursor); - _mesa_hash_table_insert(b->block_table, block->block, block); - vtn_foreach_instruction(b, block->label, block->merge ? block->merge : block->branch, handler); + block->end_block = nir_cursor_current_block(b->nb.cursor); + if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); vtn_variable_store(b, src, diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 2af0e357acd..5e2b3563d15 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -146,7 +146,8 @@ struct vtn_block { /** Points to the switch case started by this block (if any) */ struct vtn_case *switch_case; - nir_block *block; + /** The last block in this SPIR-V block. */ + nir_block *end_block; }; struct vtn_function { @@ -301,10 +302,10 @@ struct vtn_builder { struct hash_table *const_table; /* - * Map from nir_block to the vtn_block which ends with it -- used for - * handling phi nodes. + * Map from phi instructions (pointer to the start of the instruction) + * to the variable corresponding to it. */ - struct hash_table *block_table; + struct hash_table *phi_table; /* * NIR variable for each SPIR-V builtin. -- cgit v1.2.3 From 506a467f16453fe51c65f3f14fb2a37d5ba662d2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 15:56:59 -0800 Subject: nir/spirv/cfg: Assert that blocks only ever get added once This effectively prevents infinite loops in cfg_walk_blocks. --- src/glsl/nir/spirv/vtn_cfg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index a8e149a00a6..646b960d179 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -281,6 +281,7 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, continue; } + assert(block->node.link.next == NULL); list_addtail(&block->node.link, cf_list); switch (*block->branch & SpvOpCodeMask) { -- cgit v1.2.3 From 7a069bea5dd76daf531d0febef1d0a2cf154d6d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 16:18:01 -0800 Subject: nir/spirv: Fix switch statements with duplicate cases --- src/glsl/nir/spirv/vtn_cfg.c | 17 +++++------------ src/glsl/nir/spirv/vtn_private.h | 3 +++ 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 646b960d179..9330ac03769 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -164,6 +164,7 @@ vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, struct vtn_case *c = ralloc(b, struct vtn_case); list_inithead(&c->body); + c->start_block = case_block; c->fallthrough = NULL; nir_array_init(&c->values, b); c->is_default = false; @@ -399,18 +400,10 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, * the blocks, we also gather the much-needed fall-through * information. */ - for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { - struct vtn_block *case_block = - vtn_value(b, *w, vtn_value_type_block)->block; - - if (case_block == break_block) - continue; - - assert(case_block->switch_case); - - vtn_cfg_walk_blocks(b, &case_block->switch_case->body, case_block, - case_block->switch_case, break_block, - NULL, loop_cont, NULL); + list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { + assert(cse->start_block != break_block); + vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, + break_block, NULL, loop_cont, NULL); } /* Finally, we walk over all of the cases one more time and put diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 5e2b3563d15..0fa7dd4b041 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -105,6 +105,9 @@ struct vtn_case { struct list_head body; + /* The block that starts this case */ + struct vtn_block *start_block; + /* The fallthrough case, if any */ struct vtn_case *fallthrough; -- cgit v1.2.3 From 94566d9b680f8ddb775213b4569d71e9c43d4772 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:37:45 -0800 Subject: anv/meta: Teach meta how to blit from a 1D image Meta needed a VkShader with a 1D sampler type. --- src/vulkan/anv_meta.c | 29 +++++++++++++++++++++++++---- src/vulkan/anv_private.h | 3 +++ 2 files changed, 28 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 75473a1b18c..e351db08728 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -95,8 +95,9 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) tex->src[0].src = nir_src_for_ssa(nir_load_var(&b, tex_pos_in)); tex->dest_type = nir_type_float; /* TODO */ - if (tex_dim == GLSL_SAMPLER_DIM_2D) + if (tex_dim != GLSL_SAMPLER_DIM_3D) tex->is_array = true; + tex->coord_components = 3; tex->sampler = nir_deref_var_create(tex, sampler); @@ -228,6 +229,10 @@ anv_device_init_meta_blit_state(struct anv_device *device) .nir = build_nir_vertex_shader(false), }; + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + struct anv_shader_module fs_2d = { .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), }; @@ -390,13 +395,21 @@ anv_device_init_meta_blit_state(struct anv_device *device) .use_rectlist = true }; + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + NULL, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, NULL, &device->meta_state.blit.pipeline_2d_src); if (result != VK_SUCCESS) - goto fail_pipeline_layout; + goto fail_pipeline_1d; pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), @@ -407,6 +420,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) goto fail_pipeline_2d; ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); ralloc_free(fs_2d.nir); ralloc_free(fs_3d.nir); @@ -415,6 +429,11 @@ anv_device_init_meta_blit_state(struct anv_device *device) fail_pipeline_2d: anv_DestroyPipeline(anv_device_to_handle(device), device->meta_state.blit.pipeline_2d_src, NULL); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, NULL); + fail_pipeline_layout: anv_DestroyPipelineLayout(anv_device_to_handle(device), device->meta_state.blit.pipeline_layout, NULL); @@ -426,6 +445,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) device->meta_state.blit.render_pass, NULL); ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); ralloc_free(fs_2d.nir); ralloc_free(fs_3d.nir); fail: @@ -595,8 +615,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, switch (src_image->type) { case VK_IMAGE_TYPE_1D: - anv_finishme("VK_IMAGE_TYPE_1D"); - pipeline = device->meta_state.blit.pipeline_2d_src; + pipeline = device->meta_state.blit.pipeline_1d_src; break; case VK_IMAGE_TYPE_2D: pipeline = device->meta_state.blit.pipeline_2d_src; @@ -1384,6 +1403,8 @@ anv_device_finish_meta(struct anv_device *device) /* Blit */ anv_DestroyRenderPass(anv_device_to_handle(device), device->meta_state.blit.render_pass, NULL); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, NULL); anv_DestroyPipeline(anv_device_to_handle(device), device->meta_state.blit.pipeline_2d_src, NULL); anv_DestroyPipeline(anv_device_to_handle(device), diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 59e8005904d..5bd50258c8a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -558,6 +558,9 @@ struct anv_meta_state { struct { VkRenderPass render_pass; + /** Pipeline that blits from a 1D image. */ + VkPipeline pipeline_1d_src; + /** Pipeline that blits from a 2D image. */ VkPipeline pipeline_2d_src; -- cgit v1.2.3 From dcb9c11dc707476a555a20c5940339f31ed53610 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:32:44 -0800 Subject: anv/gen9: Fix oob lookup of surface halign, valign For 1D surfaces and for surfaces with Yf or Ys tiling, the hardware ignores SurfaceVerticalAlignment and SurfaceHorizontalAlignment. Moreover, the anv_halign[] and anv_valign[] lookup tables may not even contain the surface's actual alignment values. So don't do the lookup for those surfaces. --- src/vulkan/gen8_state.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index a24eb192493..a2919a7c961 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -97,18 +97,28 @@ static void get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) { #if ANV_GENx10 >= 90 - /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units - * of surface elements (not pixels nor samples). For compressed formats, - * a "surface element" is defined as a compression block. For example, - * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 - * format (ETC2 has a block height of 4), then the vertical alignment is - * 4 compression blocks or, equivalently, 16 pixels. - */ - struct isl_extent3d image_align_el - = isl_surf_get_image_alignment_el(surf); - - *halign = anv_halign[image_align_el.width]; - *valign = anv_valign[image_align_el.height]; + if (isl_tiling_is_std_y(surf->tiling) || + surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* The hardware ignores the alignment values. Anyway, the surface's + * true alignment is likely outside the enum range of HALIGN* and + * VALIGN*. + */ + *halign = 0; + *valign = 0; + } else { + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = anv_halign[image_align_el.width]; + *valign = anv_valign[image_align_el.height]; + } #else /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in * units of surface samples. For example, if SurfaceVerticalAlignment -- cgit v1.2.3 From 39d043f94a6c09d57d7dfcb15d2e5d913c85d611 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:26:30 -0800 Subject: isl: Fix the documented units of isl_surf::row_pitch It's the pitch between surface elements, not between surface samples. --- src/isl/isl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 8dbacb6d4e8..15cfd0f4c56 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -747,7 +747,7 @@ struct isl_surf { uint32_t alignment; /** - * Pitch between vertically adjacent samples, in bytes. + * Pitch between vertically adjacent surface elements, in bytes. */ uint32_t row_pitch; -- cgit v1.2.3 From eea2d4d05987b4f8ad90a1588267f9495f1e9e99 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:28:28 -0800 Subject: isl: Don't align phys_slice0_sa.width twice It's already aligned to the format's block width. Don't align it again in isl_calc_row_pitch(). --- src/isl/isl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 4a1c9f4a94a..72fc4b8d9e1 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -821,8 +821,8 @@ isl_calc_row_pitch(const struct isl_device *dev, * texels, and must be converted to bytes based on the surface format * being used to determine whether additional pages need to be defined. */ - row_pitch = MAX(row_pitch, - fmtl->bs * isl_align_div_npot(phys_slice0_sa->w, fmtl->bw)); + assert(phys_slice0_sa->w % fmtl->bw == 0); + row_pitch = MAX(row_pitch, fmtl->bs * phys_slice0_sa->w); switch (tile_info->tiling) { case ISL_TILING_LINEAR: -- cgit v1.2.3 From c1e890541ee5b4077841542eacfca371c815ab18 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:30:03 -0800 Subject: isl/gen9: Support ISL_DIM_LAYOUT_GEN9_1D --- src/isl/isl.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 72fc4b8d9e1..1fd7d01d1c3 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -234,10 +234,6 @@ isl_choose_array_pitch_span(const struct isl_device *dev, { switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); - /* fallthrough */ - case ISL_DIM_LAYOUT_GEN4_2D: if (ISL_DEV_GEN(dev) >= 8) { /* QPitch becomes programmable in Broadwell. So choose the @@ -420,10 +416,6 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, unreachable("bad isl_dim_layout"); case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] 1d surface layout", __FILE__, __func__); - /* fallthrough */ - case ISL_DIM_LAYOUT_GEN4_2D: *phys_level0_sa = (struct isl_extent4d) { .w = info->width, @@ -652,6 +644,38 @@ isl_calc_phys_slice0_extent_sa_gen4_3d( }; } +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN9_1D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen9_1d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(phys_level0_sa->height == 1); + assert(phys_level0_sa->depth == 1); + assert(info->samples == 1); + assert(image_align_sa->w >= fmtl->bw); + + uint32_t slice_w = 0; + const uint32_t W0 = phys_level0_sa->w; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t w = isl_align_npot(W, image_align_sa->w); + + slice_w += w; + } + + *phys_slice0_sa = isl_extent2d(slice_w, 1); +} + /** * Calculate the physical extent of the surface's first array slice, in units * of surface samples. If the surface is multi-leveled, then the result will @@ -668,10 +692,10 @@ isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, { switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", - __FILE__, __func__); - /*fallthrough*/ + isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info, + image_align_sa, phys_level0_sa, + phys_slice0_sa); + return; case ISL_DIM_LAYOUT_GEN4_2D: isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, image_align_sa, phys_level0_sa, @@ -701,11 +725,8 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: - if (ISL_DEV_GEN(dev) >= 9) - isl_finishme("%s:%s: [SKL+] physical layout of 1d surfaces", - __FILE__, __func__); - /*fallthrough*/ - + /* Each row is an array slice */ + return 1; case ISL_DIM_LAYOUT_GEN4_2D: switch (array_pitch_span) { case ISL_ARRAY_PITCH_SPAN_COMPACT: @@ -1170,6 +1191,39 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf, *y_offset_sa = y; } +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN9_1D. + */ +static void +get_image_offset_sa_gen9_1d(const struct isl_surf *surf, + uint32_t level, uint32_t layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(layer < surf->phys_level0_sa.array_len); + assert(surf->phys_level0_sa.height == 1); + assert(surf->phys_level0_sa.depth == 1); + assert(surf->samples == 1); + + const uint32_t W0 = surf->phys_level0_sa.width; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + uint32_t x = layer * isl_surf_get_array_pitch_sa_rows(surf); + + for (uint32_t l = 0; l < level; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t w = isl_align_npot(W, image_align_sa.w); + + x += w; + } + + *x_offset_sa = x; + *y_offset_sa = 0; +} + void isl_surf_get_image_offset_sa(const struct isl_surf *surf, uint32_t level, @@ -1185,7 +1239,9 @@ isl_surf_get_image_offset_sa(const struct isl_surf *surf, switch (surf->dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: - isl_finishme("%s:%s: gen9 1d surfaces", __FILE__, __func__); + get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, + x_offset_sa, y_offset_sa); + break; case ISL_DIM_LAYOUT_GEN4_2D: get_image_offset_sa_gen4_2d(surf, level, logical_array_layer, x_offset_sa, y_offset_sa); -- cgit v1.2.3 From e05b3079429a9e5446194d2a9f3fcea2004a5c8c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:30:23 -0800 Subject: isl: Add isl_surf_get_array_pitch_el() Will be needed to program SurfaceQPitch for Skylake 1D arrays. --- src/isl/isl.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 15cfd0f4c56..9e07b2ff63c 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -967,6 +967,27 @@ isl_surf_get_image_alignment_sa(const struct isl_surf *surf) }; } +/** + * Pitch between vertically adjacent surface elements, in bytes. + */ +static inline uint32_t +isl_surf_get_row_pitch(const struct isl_surf *surf) +{ + return surf->row_pitch; +} + +/** + * Pitch between vertically adjacent surface elements, in units of surface elements. + */ +static inline uint32_t +isl_surf_get_row_pitch_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + assert(surf->row_pitch % fmtl->bs == 0); + return surf->row_pitch / fmtl->bs; +} + /** * Pitch between physical array slices, in rows of surface elements. */ @@ -976,6 +997,16 @@ isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) return surf->array_pitch_el_rows; } +/** + * Pitch between physical array slices, in units of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_el_rows(surf) * + isl_surf_get_row_pitch_el(surf); +} + /** * Pitch between physical array slices, in rows of surface samples. */ -- cgit v1.2.3 From 8284786c5d2468521dddab25b6685487f3ede262 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 5 Jan 2016 14:35:52 -0800 Subject: anv/gen9: Teach gen9_image_view_init() about 1D surface qpitch QPitch is usually expressed as rows of surface elements (where a surface element is an compression block or a single surface sample. Skylake 1D is an outlier; there QPitch is expressed as individual surface elements. --- src/vulkan/gen8_state.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index a2919a7c961..14076d3f6c4 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -134,6 +134,34 @@ get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valig #endif } +static uint32_t +get_qpitch(const struct isl_surf *surf) +{ + switch (surf->dim) { + default: + unreachable(!"bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + #if ANV_GENx10 >= 90 + /* QPitch is usually expressed as rows of surface elements (where + * a surface element is an compression block or a single surface + * sample). Skylake 1D is an outlier. + * + * From the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> 1D Surfaces: + * + * Surface QPitch specifies the distance in pixels between array + * slices. + */ + return isl_surf_get_array_pitch_el(surf); + #else + return isl_surf_get_array_pitch_el_rows(surf); + #endif + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + return isl_surf_get_array_pitch_el_rows(surf); + } +} + void genX(image_view_init)(struct anv_image_view *iview, struct anv_device *device, @@ -220,7 +248,7 @@ genX(image_view_init)(struct anv_image_view *iview, */ .BaseMipLevel = 0.0, - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&surface->isl) >> 2, + .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, .Depth = depth - 1, -- cgit v1.2.3 From 1f3593d8a14e44d597231d31dc9b94a55ecf7f46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 15:24:24 -0800 Subject: nir/builder: Add a helper for storing to a deref --- src/glsl/nir/nir_builder.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 985f4af5339..e842b2252ff 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -366,6 +366,22 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, nir_builder_instr_insert(build, &store->instr); } +static inline void +nir_store_deref_var(nir_builder *build, nir_deref_var *deref, + nir_ssa_def *value, unsigned writemask) +{ + const unsigned num_components = + glsl_get_vector_elements(nir_deref_tail(&deref->deref)->type); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); + store->num_components = num_components; + store->const_index[0] = writemask & ((1 << num_components) - 1); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &deref->deref)); + store->src[0] = nir_src_for_ssa(value); + nir_builder_instr_insert(build, &store->instr); +} + static inline void nir_copy_deref_var(nir_builder *build, nir_deref_var *dest, nir_deref_var *src) { -- cgit v1.2.3 From 22804de110b97dce1415318fd02c1003e16ef14a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 15:24:38 -0800 Subject: nir/spirv: Properly implement Modf --- src/glsl/nir/spirv/vtn_glsl450.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index fd8ec3eb2dc..739e4394954 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -111,7 +111,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Sqrt: op = nir_op_fsqrt; break; case GLSLstd450InverseSqrt: op = nir_op_frsq; break; - case GLSLstd450Modf: op = nir_op_fmod; break; + case GLSLstd450Modf: { + val->ssa->def = nir_ffract(nb, src[0]); + nir_deref_var *out = vtn_value(b, w[6], vtn_value_type_deref)->deref; + nir_store_deref_var(nb, out, nir_ffloor(nb, src[0]), 0xf); + return; + } + + op = nir_op_fmod; break; case GLSLstd450FMin: op = nir_op_fmin; break; case GLSLstd450UMin: op = nir_op_umin; break; case GLSLstd450SMin: op = nir_op_imin; break; -- cgit v1.2.3 From 1f503603d3b41c2f4b502dbb8c31916c5b521f14 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 14:46:31 -0800 Subject: nir/opcodes: Fix the folding expression for usub_borrow --- src/glsl/nir/nir_opcodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 1cd01a4fe92..4bc6d16cbad 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -366,7 +366,7 @@ binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0") # returns a boolean representing the borrow resulting from the subtraction # of the two unsigned arguments. -binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0") +binop_convert("usub_borrow", tbool, tuint, "", "src0 < src1") binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") -- cgit v1.2.3 From 573351cb0f0da08afe7040e76e668c2528616d5a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 15:05:10 -0800 Subject: nir/algebraic: Add more lowering This commit adds lowering options for the following opcodes: - nir_op_fmod - nir_op_bitfield_insert - nir_op_uadd_carry - nir_op_usub_borrow --- src/glsl/nir/nir.h | 4 ++++ src/glsl/nir/nir_opt_algebraic.py | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 59f6f6829a4..61e51da5867 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1509,6 +1509,10 @@ typedef struct nir_shader_compiler_options { bool lower_fpow; bool lower_fsat; bool lower_fsqrt; + bool lower_fmod; + bool lower_bitfield_insert; + bool lower_uadd_carry; + bool lower_usub_borrow; /** lowers fneg and ineg to fsub and isub. */ bool lower_negate; /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index a5a4841f6b1..f4a863239b6 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -222,6 +222,12 @@ optimizations = [ (('iadd', a, ('isub', 0, b)), ('isub', a, b)), (('fabs', ('fsub', 0.0, a)), ('fabs', a)), (('iabs', ('isub', 0, a)), ('iabs', a)), + + # Misc. lowering + (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'), + (('bitfield_insert', a, b, c, d), ('bfi', ('bfm', d, c), b, a), 'options->lower_bitfield_insert'), + (('uadd_carry', a, b), ('ult', ('iadd', a, b), a), 'options->lower_uadd_carry'), + (('usub_borrow', a, b), ('ult', a, b), 'options->lower_usub_borrow'), ] # Add optimizations to handle the case where the result of a ternary is -- cgit v1.2.3 From 5bbf060ece581b4ddfd403387e707776b4f3e127 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 14:09:47 -0800 Subject: i965/compiler: Enable more lowering in NIR We don't need these for GLSL or ARB, but we need them for SPIR-V --- src/mesa/drivers/dri/i965/brw_shader.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6d15c60fa40..4ae403c2baa 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -104,6 +104,11 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) nir_options->lower_ffma = true; nir_options->lower_sub = true; nir_options->lower_fdiv = true; + nir_options->lower_scmp = true; + nir_options->lower_fmod = true; + nir_options->lower_bitfield_insert = true; + nir_options->lower_uadd_carry = true; + nir_options->lower_usub_borrow = true; /* In the vec4 backend, our dpN instruction replicates its result to all * the components of a vec4. We would like NIR to give us replicated fdot -- cgit v1.2.3 From de65d4dcafcf6b22a71689ef1ef19bbd3dd953da Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 6 Jan 2016 10:27:57 -0800 Subject: anv: Fix build without VALGRIND Signed-off-by: Jordan Justen --- src/vulkan/anv_allocator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index e49a684aaef..4be149ea695 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -740,7 +740,7 @@ anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t block = anv_block_pool_alloc(stream->block_pool); sb = stream->block_pool->map + block; - VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); sb->next = stream->block; sb->offset = block; VG(sb->_vg_ptr = NULL); -- cgit v1.2.3 From 000eb00862545aa1a0e42cea800a06bc57b406cf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 17:00:01 -0800 Subject: nir/spirv/cfg: Only set fall to true at the start of a case Previously, we were setting it to true at the top of the switch statement. However, this causes all of the cases to get executed until you hit a break. Instead, you want to be not executing at the start, start executing when you hit your case, and end at a break. --- src/glsl/nir/spirv/vtn_cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 9330ac03769..db1163d0707 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -598,7 +598,7 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, */ nir_variable *fall_var = nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); - nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); /* Next, we gather up all of the conditions. We have to do this * up-front because we also need to build an "any" condition so @@ -649,6 +649,7 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, bool has_break = false; b->nb.cursor = nir_after_cf_list(&case_if->then_list); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); (void)has_break; /* We don't care */ -- cgit v1.2.3 From 195c60deb47dd5b7da0045facd5b07ba6e3cb2a2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 17:13:04 -0800 Subject: nir/spirv: Wrap borrow/carry ops in b2i NIR specifies them as booleans but SPIR-V wants ints. --- src/glsl/nir/spirv/spirv_to_nir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 875b18cebdd..8acfc4b392e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2718,13 +2718,15 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpIAddCarry: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = + nir_b2i(&b->nb, nir_uadd_carry(&b->nb, src[0], src[1])); return; case SpvOpISubBorrow: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = + nir_b2i(&b->nb, nir_usub_borrow(&b->nb, src[0], src[1])); return; case SpvOpUMulExtended: -- cgit v1.2.3 From d8cd5e333e8095c87f9819d732ed1d7dfe63c2c8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 19:33:46 -0800 Subject: anv/state: Pull sampler vk-to-gen maps into genX_state_util.h --- src/vulkan/gen7_state.c | 30 ------------------------------ src/vulkan/gen8_state.c | 30 ------------------------------ src/vulkan/genX_state_util.h | 30 ++++++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index a7940ca9e2f..257cb35aca9 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -65,36 +65,6 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); } -static const uint32_t vk_to_gen_tex_filter[] = { - [VK_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_FILTER_LINEAR] = MAPFILTER_LINEAR -}; - -static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR -}; - -static const uint32_t vk_to_gen_tex_address[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; - static struct anv_state alloc_surface_state(struct anv_device *device, struct anv_cmd_buffer *cmd_buffer) diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 14076d3f6c4..34be6677915 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -350,36 +350,6 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - static const uint32_t vk_to_gen_tex_filter[] = { - [VK_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_FILTER_LINEAR] = MAPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, - [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR - }; - - static const uint32_t vk_to_gen_tex_address[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, - }; - - static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, - }; - if (pCreateInfo->maxAnisotropy > 1) { mag_filter = MAPFILTER_ANISOTROPIC; min_filter = MAPFILTER_ANISOTROPIC; diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h index cedfabda1f3..0741d766edd 100644 --- a/src/vulkan/genX_state_util.h +++ b/src/vulkan/genX_state_util.h @@ -65,3 +65,33 @@ vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) return vk_to_gen_swizzle_map[swizzle]; } #endif + +static const uint32_t vk_to_gen_tex_filter[] = { + [VK_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_FILTER_LINEAR] = MAPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; -- cgit v1.2.3 From be91f23e3b9381711c21978001862a6edb0bb2fd Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 6 Jan 2016 15:43:11 -0800 Subject: isl: Fix image alignment calculation The previous code was resulting in an alignment of 0. Signed-off-by: Jordan Justen --- src/isl/isl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 1fd7d01d1c3..edc540b7ad2 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1071,8 +1071,9 @@ isl_surf_init_s(const struct isl_device *dev, const uint32_t size = row_pitch * isl_align(total_h_sa, tile_info.height); /* Alignment of surface base address, in bytes */ - uint32_t base_alignment = info->min_alignment; - base_alignment = isl_align(base_alignment, tile_info.size); + uint32_t base_alignment = MAX(1, info->min_alignment); + assert(isl_is_pow2(base_alignment) && isl_is_pow2(tile_info.size)); + base_alignment = MAX(base_alignment, tile_info.size); *surf = (struct isl_surf) { .dim = info->dim, -- cgit v1.2.3 From 4d68c477ad29bc39794eb5f3e5f0886129c6c9c5 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 6 Jan 2016 15:40:01 -0800 Subject: anv: Assert that alignments are not 0 for align_* Signed-off-by: Jordan Justen --- src/vulkan/anv_private.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5bd50258c8a..0cd8ab6facd 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -83,18 +83,21 @@ typedef union _VK_LOADER_DATA { static inline uint32_t align_u32(uint32_t v, uint32_t a) { + assert(a != 0 && a == (a & -a)); return (v + a - 1) & ~(a - 1); } static inline uint64_t align_u64(uint64_t v, uint64_t a) { + assert(a != 0 && a == (a & -a)); return (v + a - 1) & ~(a - 1); } static inline int32_t align_i32(int32_t v, int32_t a) { + assert(a != 0 && a == (a & -a)); return (v + a - 1) & ~(a - 1); } -- cgit v1.2.3 From 2f0a10149c6e5ea7f2c5ddfa13f71011db0ef420 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 6 Jan 2016 15:42:18 -0800 Subject: isl: Assert that alignments are not 0 for isl_align Signed-off-by: Jordan Justen --- src/isl/isl_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl_priv.h b/src/isl/isl_priv.h index 1c9343a7d1f..b399e0f8116 100644 --- a/src/isl/isl_priv.h +++ b/src/isl/isl_priv.h @@ -67,7 +67,7 @@ isl_is_aligned(uintmax_t n, uintmax_t a) static inline uintmax_t isl_align(uintmax_t n, uintmax_t a) { - assert(isl_is_pow2(a)); + assert(a != 0 && isl_is_pow2(a)); return (n + a - 1) & ~(a - 1); } -- cgit v1.2.3 From 0af77fe5b6d0201be564f98e58e7f62cd55cc05e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 7 Jan 2016 10:58:29 -0800 Subject: isl: Refactor func isl_calc_array_pitch_sa_rows Update the function to calculate the array pitch is *element rows*, and it rename it accordingly to isl_calc_array_pitch_el_rows. --- src/isl/isl.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index edc540b7ad2..67e2ff6fae9 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -710,11 +710,12 @@ isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, /** * Calculate the pitch between physical array slices, in units of rows of - * surface samples. The result is aligned to \a image_align_sa. + * surface elements. */ static uint32_t -isl_calc_array_pitch_sa_rows(const struct isl_device *dev, +isl_calc_array_pitch_el_rows(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, enum isl_dim_layout dim_layout, enum isl_array_pitch_span array_pitch_span, const struct isl_extent3d *image_align_sa, @@ -722,15 +723,17 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, const struct isl_extent2d *phys_slice0_sa) { const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + uint32_t pitch_sa_rows = 0; switch (dim_layout) { case ISL_DIM_LAYOUT_GEN9_1D: /* Each row is an array slice */ - return 1; + pitch_sa_rows = 1; + break; case ISL_DIM_LAYOUT_GEN4_2D: switch (array_pitch_span) { case ISL_ARRAY_PITCH_SPAN_COMPACT: - return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); case ISL_ARRAY_PITCH_SPAN_FULL: { /* The QPitch equation is found in the Broadwell PRM >> Volume 5: * Memory Views >> Common Surface Formats >> Surface Layout >> 2D @@ -750,7 +753,7 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, m = 11; } - uint32_t pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); + pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && (info->height % 4 == 1)) { @@ -768,20 +771,21 @@ isl_calc_array_pitch_sa_rows(const struct isl_device *dev, } pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); - - return pitch_sa_rows; } /* end case */ break; } break; - case ISL_DIM_LAYOUT_GEN4_3D: assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); - return isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; + default: + unreachable("bad isl_dim_layout"); + break; } - unreachable("bad isl_dim_layout"); - return 0; + assert(pitch_sa_rows % fmtl->bh == 0); + return pitch_sa_rows / fmtl->bh; } /** @@ -1054,13 +1058,10 @@ isl_surf_init_s(const struct isl_device *dev, &image_align_sa, &phys_slice0_sa); - const uint32_t array_pitch_sa_rows = - isl_calc_array_pitch_sa_rows(dev, info, dim_layout, array_pitch_span, - &image_align_sa, &phys_level0_sa, - &phys_slice0_sa); - assert(array_pitch_sa_rows % fmtl->bh == 0); - - const uint32_t array_pitch_el_rows = array_pitch_sa_rows / fmtl->bh; + const uint32_t array_pitch_el_rows = + isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout, + array_pitch_span, &image_align_sa, + &phys_level0_sa, &phys_slice0_sa); const uint32_t total_h_el = isl_calc_total_height_el(dev, info, &tile_info, -- cgit v1.2.3 From d1e6c1b29bf45b807e298921e5b1386fec61669e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 7 Jan 2016 11:00:29 -0800 Subject: isl/gen9: Fix array pitch of 3d surfaces For tiled 3D surfaces, the array pitch must aligned to the tile height. From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: Tile Mode != Linear: This field must be set to an integer multiple of the tile height Fixes CTS tests 'dEQP-VK.pipeline.image.view_type.3d.format.r8g8b8a8_unorm.*'. Fixes Crucible tests 'func.miptree.r8g8b8a8-unorm.aspect-color.view-3d.*'. --- src/isl/isl.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 67e2ff6fae9..0e6f1e31d12 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -785,7 +785,20 @@ isl_calc_array_pitch_el_rows(const struct isl_device *dev, } assert(pitch_sa_rows % fmtl->bh == 0); - return pitch_sa_rows / fmtl->bh; + uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; + + if (ISL_DEV_GEN(dev) >= 9 && + info->dim == ISL_SURF_DIM_3D && + tile_info->tiling != ISL_TILING_LINEAR) { + /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: + * + * Tile Mode != Linear: This field must be set to an integer multiple + * of the tile height + */ + pitch_el_rows = isl_align(pitch_el_rows, tile_info->height); + } + + return pitch_el_rows; } /** -- cgit v1.2.3 From a50c78a5cfb760f88b3d053efc4e58dca8d66ff5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 7 Jan 2016 11:07:44 -0800 Subject: isl: Add missing break statement in array pitch calculation Fixes regression in ed98c374bd3f1952fbab3031afaf5ff4d178ef41. --- src/isl/isl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 0e6f1e31d12..015b14ea3d2 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -734,6 +734,7 @@ isl_calc_array_pitch_el_rows(const struct isl_device *dev, switch (array_pitch_span) { case ISL_ARRAY_PITCH_SPAN_COMPACT: pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; case ISL_ARRAY_PITCH_SPAN_FULL: { /* The QPitch equation is found in the Broadwell PRM >> Volume 5: * Memory Views >> Common Surface Formats >> Surface Layout >> 2D -- cgit v1.2.3 From 4c7f4c25d03cb1bd88fade526386e50e45e71e87 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 7 Jan 2016 13:46:20 -0800 Subject: anv/meta: Fix hardcoded format size in anv_CmdCopy* When looping through VkBufferImageCopy regions, for each region we incremented the offset into the VkBuffer assuming the format size was 4. Fixes CTS tests dEQP-VK.pipeline.image.view_type.cube_array.3d.* on Skylake. --- src/vulkan/anv_meta.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index e351db08728..b61cda793d5 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1234,9 +1234,9 @@ void anv_CmdCopyBufferToImage( * increment the offset directly in the image effectively * re-binding it to different backing memory. */ - /* XXX: Insert a real CPP */ src_image->offset += src_image->extent.width * - src_image->extent.height * 4; + src_image->extent.height * + src_image->format->isl_layout->bs; } anv_DestroyImage(vk_device, anv_image_to_handle(src_image), @@ -1336,9 +1336,9 @@ void anv_CmdCopyImageToBuffer( * increment the offset directly in the image effectively * re-binding it to different backing memory. */ - /* XXX: Insert a real CPP */ dest_image->offset += dest_image->extent.width * - dest_image->extent.height * 4; + dest_image->extent.height * + src_image->format->isl_layout->bs; } anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), -- cgit v1.2.3 From 36a2304686bde4824543dcc1a2ea72f96dcc741d Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 7 Jan 2016 16:25:35 -0800 Subject: anv/gen8: Setup state to enable barrier() function Signed-off-by: Jordan Justen --- src/vulkan/gen8_cmd_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 965a9c14317..9b03b0a5c56 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -544,7 +544,9 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .SamplerCount = 0, .ConstantIndirectURBEntryReadLength = push_constant_regs, .ConstantURBEntryReadOffset = 0, - .NumberofThreadsinGPGPUThreadGroup = 0); + .BarrierEnable = cs_prog_data->uses_barrier, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), -- cgit v1.2.3 From d24e88b98e05800a56aff35dc54b61a800d1c71b Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 7 Jan 2016 17:10:02 -0800 Subject: anv/gen7: Setup state to enable barrier() function Signed-off-by: Jordan Justen --- src/vulkan/gen7_cmd_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index fa9cb8fa914..85eec0b055e 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -269,13 +269,17 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, GEN7_INTERFACE_DESCRIPTOR_DATA, 64, .KernelStartPointer = pipeline->cs_simd, .BindingTablePointer = surfaces.offset, .SamplerStatePointer = samplers.offset, - .NumberofThreadsinGPGPUThreadGroup = 0); + .BarrierEnable = cs_prog_data->uses_barrier, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, -- cgit v1.2.3 From 067dbd7a17e04cacf74fd08f5bc8a692fb68e202 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 6 Jan 2016 16:41:22 -0800 Subject: vk: Issue PIPELINE_SELECT before setting up render pass We need to make sure we're selected the 3D pipeline before we start setting up depth and stencil buffers. --- src/vulkan/gen8_cmd_buffer.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 9b03b0a5c56..74f7e32952d 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -145,15 +145,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) #endif static void -cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer) { - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - if (cmd_buffer->state.current_pipeline != _3D) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if ANV_GEN >= 9 @@ -162,6 +155,19 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .PipelineSelection = _3D); cmd_buffer->state.current_pipeline = _3D; } +} + +static void +cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + flush_pipeline_select_3d(cmd_buffer); if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); @@ -807,6 +813,8 @@ void genX(CmdBeginRenderPass)( cmd_buffer->state.framebuffer = framebuffer; cmd_buffer->state.pass = pass; + flush_pipeline_select_3d(cmd_buffer); + const VkRect2D *render_area = &pRenderPassBegin->renderArea; anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), -- cgit v1.2.3 From bbf3fc815baaad3f71cf92338c55734808318361 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 6 Jan 2016 16:42:14 -0800 Subject: vk: Add missing DepthStallEnable to OQ pipe control --- src/vulkan/gen8_cmd_buffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 74f7e32952d..b1c5161a5e9 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -871,6 +871,7 @@ emit_ps_depth_count(struct anv_batch *batch, anv_batch_emit(batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, .Address = { bo, offset }); } -- cgit v1.2.3 From a18b5e642ce70c3d4d5f7f243ee562baa3161306 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 6 Jan 2016 21:57:24 -0800 Subject: vk: Implement VK_QUERY_RESULT_WITH_AVAILABILITY_BIT --- src/vulkan/anv_query.c | 42 +++++++++++------- src/vulkan/gen8_cmd_buffer.c | 102 ++++++++++++++++++++++++++++++------------- 2 files changed, 97 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 6f9d7d89aa6..3b29a235cd3 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -45,17 +45,15 @@ VkResult anv_CreateQueryPool( switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: - slot_size = sizeof(struct anv_query_pool_slot); + case VK_QUERY_TYPE_TIMESTAMP: break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: return VK_ERROR_INCOMPATIBLE_DRIVER; - case VK_QUERY_TYPE_TIMESTAMP: - slot_size = sizeof(uint64_t); - break; default: assert(!"Invalid query type"); } + slot_size = sizeof(struct anv_query_pool_slot); pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) @@ -110,12 +108,6 @@ VkResult anv_GetQueryPoolResults( uint64_t result; int ret; - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return VK_ERROR_INCOMPATIBLE_DRIVER; - } - assert(pool->type == VK_QUERY_TYPE_OCCLUSION || pool->type == VK_QUERY_TYPE_TIMESTAMP); @@ -132,11 +124,11 @@ VkResult anv_GetQueryPoolResults( } void *data_end = pData + dataSize; + struct anv_query_pool_slot *slot = pool->bo.map; for (uint32_t i = 0; i < queryCount; i++) { switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: { - struct anv_query_pool_slot *slot = pool->bo.map; result = slot[startQuery + i].end - slot[startQuery + i].begin; break; } @@ -144,8 +136,7 @@ VkResult anv_GetQueryPoolResults( /* Not yet implemented */ break; case VK_QUERY_TYPE_TIMESTAMP: { - uint64_t *slot = pool->bo.map; - result = slot[startQuery + i]; + result = slot[startQuery + i].begin; break; } default: @@ -153,12 +144,19 @@ VkResult anv_GetQueryPoolResults( } if (flags & VK_QUERY_RESULT_64_BIT) { - *(uint64_t *)pData = result; + uint64_t *dst = pData; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[startQuery + i].available; } else { + uint32_t *dst = pData; if (result > UINT32_MAX) result = UINT32_MAX; - *(uint32_t *)pData = result; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[startQuery + i].available; } + pData += stride; if (pData >= data_end) break; @@ -173,5 +171,17 @@ void anv_CmdResetQueryPool( uint32_t startQuery, uint32_t queryCount) { - stub(); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: { + struct anv_query_pool_slot *slot = pool->bo.map; + slot[startQuery + i].available = 0; + break; + } + default: + assert(!"Invalid query type"); + } + } } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b1c5161a5e9..0ba9beac9c8 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -875,6 +875,17 @@ emit_ps_depth_count(struct anv_batch *batch, .Address = { bo, offset }); } +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + void genX(CmdBeginQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -908,6 +919,9 @@ void genX(CmdEndQuery)( case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, entry * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + entry * sizeof(struct anv_query_pool_slot) + 16); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: @@ -926,6 +940,7 @@ void genX(CmdWriteTimestamp)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = entry * sizeof(struct anv_query_pool_slot); assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); @@ -933,10 +948,10 @@ void genX(CmdWriteTimestamp)( case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = TIMESTAMP, - .MemoryAddress = { &pool->bo, entry * 8 }); + .MemoryAddress = { &pool->bo, offset }); anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { &pool->bo, entry * 8 + 4 }); + .MemoryAddress = { &pool->bo, offset + 4 }); break; default: @@ -944,9 +959,11 @@ void genX(CmdWriteTimestamp)( anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DestinationAddressType = DAT_PPGTT, .PostSyncOperation = WriteTimestamp, - .Address = { &pool->bo, entry * 8 }); + .Address = { &pool->bo, offset }); break; } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, entry + 16); } #define alu_opcode(v) __gen_field((v), 20, 31) @@ -993,6 +1010,20 @@ emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, .MemoryAddress = { bo, offset + 4 }); } +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + void genX(CmdCopyQueryPoolResults)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -1008,15 +1039,6 @@ void genX(CmdCopyQueryPoolResults)( ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); uint32_t slot_offset, dst_offset; - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - /* FIXME: If we're not waiting, should we just do this on the CPU? */ if (flags & VK_QUERY_RESULT_WAIT_BIT) anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .CommandStreamerStallEnable = true, @@ -1026,26 +1048,44 @@ void genX(CmdCopyQueryPoolResults)( for (uint32_t i = 0; i < queryCount; i++) { slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = CS_GPR(2), - .MemoryAddress = { buffer->bo, dst_offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = CS_GPR(2) + 4, - .MemoryAddress = { buffer->bo, dst_offset + 4 }); + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } dst_offset += destStride; } -- cgit v1.2.3 From 1b1dca75a401b2d885ae7583837a63c3f66da022 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 7 Jan 2016 16:25:49 -0800 Subject: vk: Make sure we emit binding table pointers after push constants SKL needs this to make sure we flush the push constants. It gets a little tricky, since we also need to emit binding tables before push constants, since that may affect the push constants (dynamic buffer offsets and storage image parameters). This patch splits emitting binding tables from emitting the pointers so that we can emit push constants after binding tables but before emitting binding table pointers. --- src/vulkan/anv_cmd_buffer.c | 8 +++-- src/vulkan/anv_private.h | 6 +++- src/vulkan/gen7_cmd_buffer.c | 76 ++++++++++++++++++++++++-------------------- src/vulkan/gen8_cmd_buffer.c | 29 +++++++++-------- 4 files changed, 68 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 49bb298a188..0407ad92fba 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -636,8 +636,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, * targets. */ uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - if (color_count + surface_count == 0) + if (color_count + surface_count == 0) { + *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; + } *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, bias + surface_count, @@ -781,8 +783,10 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, layout = cmd_buffer->state.pipeline->layout; sampler_count = layout ? layout->stage[stage].sampler_count : 0; - if (sampler_count == 0) + if (sampler_count == 0) { + *state = (struct anv_state) { 0, }; return VK_SUCCESS; + } uint32_t size = sampler_count * 16; *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 0cd8ab6facd..ded2d9a5e24 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1079,6 +1079,8 @@ struct anv_cmd_state { struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set * descriptors[MAX_SETS]; struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_state binding_tables[MESA_SHADER_STAGES]; + struct anv_state samplers[MESA_SHADER_STAGES]; struct anv_dynamic_state dynamic; struct { @@ -1176,7 +1178,9 @@ VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *bt_state); VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage, struct anv_state *state); -void gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages); struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, const void *data, uint32_t size, uint32_t alignment); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 85eec0b055e..e69bf47782e 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -32,7 +32,7 @@ #include "gen7_pack.h" #include "gen75_pack.h" -static void +static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { static const uint32_t push_constant_opcodes[] = { @@ -63,21 +63,14 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) } cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; } -static VkResult -flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) { - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); - if (result != VK_SUCCESS) - return result; - static const uint32_t sampler_state_opcodes[] = { [MESA_SHADER_VERTEX] = 43, [MESA_SHADER_TESS_CTRL] = 44, /* HS */ @@ -96,24 +89,24 @@ flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) [MESA_SHADER_COMPUTE] = 0, }; - if (samplers.alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = samplers.offset); - } + anv_foreach_stage(s, stages) { + if (cmd_buffer->state.samplers[s].alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[s], + .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); + } - if (surfaces.alloc_size > 0) { + /* Always emit binding table pointers if we're asked to, since on SKL + * this is what flushes push constants. */ anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = surfaces.offset); + ._3DCommandSubOpcode = binding_table_opcodes[s], + .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); } - - return VK_SUCCESS; } -GENX_FUNC(GEN7, GEN7) void +GENX_FUNC(GEN7, GEN7) uint32_t genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) { VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & @@ -121,7 +114,12 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) VkResult result = VK_SUCCESS; anv_foreach_stage(s, dirty) { - result = flush_descriptor_set(cmd_buffer, s); + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + break; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); if (result != VK_SUCCESS) break; } @@ -138,15 +136,22 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_emit_state_base_address(cmd_buffer); /* Re-emit all active binding tables */ - anv_foreach_stage(s, cmd_buffer->state.pipeline->active_stages) { - result = flush_descriptor_set(cmd_buffer, s); - - /* It had better succeed this time */ - assert(result == VK_SUCCESS); + dirty |= cmd_buffer->state.pipeline->active_stages; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + return result; } } - cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; + cmd_buffer->state.descriptors_dirty &= ~dirty; + + return dirty; } static inline int64_t @@ -389,8 +394,11 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) .Address = { &cmd_buffer->device->workaround_bo, 0 }); } - if (cmd_buffer->state.descriptors_dirty) - gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) { + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + } if (cmd_buffer->state.push_constants_dirty) cmd_buffer_flush_push_constants(cmd_buffer); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 0ba9beac9c8..308b72b3d7b 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -32,7 +32,7 @@ #include "gen8_pack.h" #include "gen9_pack.h" -static void +static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) { static const uint32_t push_constant_opcodes[] = { @@ -66,6 +66,8 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) } cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; } #if ANV_GEN == 8 @@ -205,23 +207,22 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); } -#if ANV_GEN >= 9 - /* On SKL+ the new constants don't take effect until the next corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure - * that is sent. As it is, we re-emit binding tables but we could hold on - * to the offset of the most recent binding table and only re-emit the - * 3DSTATE_BINDING_TABLE_POINTER_* command. + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. */ - cmd_buffer->state.descriptors_dirty |= - cmd_buffer->state.push_constants_dirty & - cmd_buffer->state.pipeline->active_stages; -#endif - + uint32_t dirty = 0; if (cmd_buffer->state.descriptors_dirty) - gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) gen8_cmd_buffer_emit_viewport(cmd_buffer); -- cgit v1.2.3 From 24d82a3f79e34acadb27dc5148002cd8f8e13674 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 7 Jan 2016 17:02:49 -0800 Subject: anv/gen8: Refactor genX_image_view_init() Drop the temporary variables for RENDER_SURFACE_STATE's Depth and RenderTargetViewExtent. Instead, assign them in-place. This simplifies the next commit, which fixes gen9 cube surfaces. --- src/vulkan/gen8_state.c | 88 ++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 34be6677915..a446cb26391 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -175,48 +175,6 @@ genX(image_view_init)(struct anv_image_view *iview, struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - uint32_t depth = 1; /* RENDER_SURFACE_STATE::Depth */ - uint32_t rt_view_extent = 1; /* RENDER_SURFACE_STATE::RenderTargetViewExtent */ - - switch (image->type) { - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced - * by one for each increase from zero of Minimum Array Element. For - * example, if Minimum Array Element is set to 1024 on a 2D surface, - * the range of this field is reduced to [0,1023]. - */ - depth = range->layerCount; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 1D and 2D Surfaces: - * This field must be set to the same value as the Depth field. - */ - rt_view_extent = depth; - break; - case VK_IMAGE_TYPE_3D: - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * If the volume texture is MIP-mapped, this field specifies the - * depth of the base MIP level. - */ - depth = image->extent.depth; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 3D Surfaces: This field - * indicates the extent of the accessible 'R' coordinates minus 1 on - * the LOD currently being rendered to. - */ - rt_view_extent = iview->extent.depth; - break; - default: - unreachable(!"bad VkImageType"); - } - static const uint8_t isl_to_gen_tiling[] = { [ISL_TILING_LINEAR] = LINEAR, [ISL_TILING_X] = XMAJOR, @@ -251,9 +209,9 @@ genX(image_view_init)(struct anv_image_view *iview, .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, .Height = image->extent.height - 1, .Width = image->extent.width - 1, - .Depth = depth - 1, + .Depth = 0, /* TEMPLATE */ .SurfacePitch = surface->isl.row_pitch - 1, - .RenderTargetViewExtent = rt_view_extent - 1, + .RenderTargetViewExtent = 0, /* TEMPLATE */ .MinimumArrayElement = range->baseArrayLayer, .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, @@ -279,6 +237,48 @@ genX(image_view_init)(struct anv_image_view *iview, .SurfaceBaseAddress = { NULL, iview->offset }, }; + switch (surface_state.SurfaceType) { + case SURFTYPE_1D: + case SURFTYPE_2D: + case SURFTYPE_CUBE: + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + * + * In other words, 'Depth' is the number of array layers. + */ + surface_state.Depth = range->layerCount - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + surface_state.RenderTargetViewExtent = surface_state.Depth; + break; + case SURFTYPE_3D: + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ + surface_state.Depth = image->extent.depth - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + surface_state.RenderTargetViewExtent = iview->extent.depth - 1; + break; + default: + unreachable(!"bad SurfaceType"); + } + if (image->needs_nonrt_surface_state) { iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); -- cgit v1.2.3 From 1818463733639e039cffc94310556ace35dccff7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 7 Jan 2016 17:05:22 -0800 Subject: anv/gen9: Fix cube surface state For gen9 SURFTYPE_CUBE, the RENDER_SURFACE_STATE's Depth, MinimumArrayElement, and RenderTargetViewExtent is in units of full cubes and so must be divided by 6. Fixes 'dEQP-VK.pipeline.image.view_type.cube_array.cube_array.*'. Now all of 'dEQP-VK.pipeline.image.*' passes. --- src/vulkan/gen8_state.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index a446cb26391..c29d100f9f5 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -212,7 +212,7 @@ genX(image_view_init)(struct anv_image_view *iview, .Depth = 0, /* TEMPLATE */ .SurfacePitch = surface->isl.row_pitch - 1, .RenderTargetViewExtent = 0, /* TEMPLATE */ - .MinimumArrayElement = range->baseArrayLayer, + .MinimumArrayElement = 0, /* TEMPLATE */ .NumberofMultisamples = MULTISAMPLECOUNT_1, .XOffset = 0, .YOffset = 0, @@ -240,7 +240,8 @@ genX(image_view_init)(struct anv_image_view *iview, switch (surface_state.SurfaceType) { case SURFTYPE_1D: case SURFTYPE_2D: - case SURFTYPE_CUBE: + surface_state.MinimumArrayElement = range->baseArrayLayer; + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: * * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced @@ -259,7 +260,22 @@ genX(image_view_init)(struct anv_image_view *iview, */ surface_state.RenderTargetViewExtent = surface_state.Depth; break; + case SURFTYPE_CUBE: + #if ANV_GENx10 >= 90 + /* Like SURFTYPE_2D, but divided by 6. */ + surface_state.MinimumArrayElement = range->baseArrayLayer / 6; + surface_state.Depth = range->layerCount / 6 - 1; + surface_state.RenderTargetViewExtent = surface_state.Depth; + #else + /* Same as SURFTYPE_2D */ + surface_state.MinimumArrayElement = range->baseArrayLayer; + surface_state.Depth = range->layerCount - 1; + surface_state.RenderTargetViewExtent = surface_state.Depth; + #endif + break; case SURFTYPE_3D: + surface_state.MinimumArrayElement = range->baseArrayLayer; + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: * * If the volume texture is MIP-mapped, this field specifies the -- cgit v1.2.3 From fe57ad62a6a67d635c9486648f3f6e9a64b679bf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 7 Jan 2016 16:55:56 -0800 Subject: nir/spirv: Rework UBOs and SSBOs This completely reworks all block load/store operations. In particular, it should get row-major matrices working. --- src/glsl/nir/spirv/spirv_to_nir.c | 416 ++++++++++++++++++++++---------------- 1 file changed, 241 insertions(+), 175 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 8acfc4b392e..9b3d0cebbbd 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -452,6 +452,9 @@ struct_member_decoration_cb(struct vtn_builder *b, break; case SpvDecorationColMajor: break; /* Nothing to do here. Column-major is the default. */ + case SpvDecorationRowMajor: + ctx->type->members[member]->row_major = true; + break; default: unreachable("Unhandled member decoration"); } @@ -565,18 +568,23 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, break; case SpvOpTypeVector: { - const struct glsl_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type->type; + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; unsigned elems = w[3]; - assert(glsl_type_is_scalar(base)); - val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); + assert(glsl_type_is_scalar(base->type)); + val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); + + /* Vectors implicitly have sizeof(base_type) stride. For now, this + * is always 4 bytes. This will have to change if we want to start + * supporting doubles or half-floats. + */ + val->type->stride = 4; + val->type->array_element = base; break; } case SpvOpTypeMatrix: { - struct vtn_type *base = - vtn_value(b, w[2], vtn_value_type_type)->type; + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; unsigned columns = w[3]; assert(glsl_type_is_vector(base->type)); @@ -1241,153 +1249,251 @@ _vtn_variable_store(struct vtn_builder *b, } static nir_ssa_def * -nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, - nir_variable_mode mode, nir_ssa_def *array_index) +deref_array_offset(struct vtn_builder *b, nir_deref *deref) { - if (array_index == NULL) - array_index = nir_imm_int(b, 0); + assert(deref->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref); + nir_ssa_def *offset = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + offset = nir_iadd(&b->nb, offset, deref_array->indirect.ssa); + + return offset; +} + +static nir_ssa_def * +get_vulkan_resource_index(struct vtn_builder *b, + nir_deref **deref, struct vtn_type **type) +{ + assert((*deref)->deref_type == nir_deref_type_var); + nir_variable *var = nir_deref_as_var(*deref)->var; + + assert(var->interface_type && "variable is a block"); + assert((*deref)->child); + + nir_ssa_def *array_index; + if ((*deref)->child->deref_type == nir_deref_type_array) { + *deref = (*deref)->child; + *type = (*type)->array_element; + array_index = deref_array_offset(b, *deref); + } else { + array_index = nir_imm_int(&b->nb, 0); + } nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->shader, + nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_vulkan_resource_index); instr->src[0] = nir_src_for_ssa(array_index); - instr->const_index[0] = set; - instr->const_index[1] = binding; - instr->const_index[2] = mode; + instr->const_index[0] = var->data.descriptor_set; + instr->const_index[1] = var->data.binding; + instr->const_index[2] = var->data.mode; nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); - nir_builder_instr_insert(b, &instr->instr); + nir_builder_instr_insert(&b->nb, &instr->instr); return &instr->dest.ssa; } -static struct vtn_ssa_value * -_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, - unsigned set, unsigned binding, nir_variable_mode mode, - nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type) -{ - struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); - val->type = type->type; - val->transposed = NULL; - if (glsl_type_is_vector_or_scalar(type->type)) { - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); - load->num_components = glsl_get_vector_elements(type->type); - - switch (op) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: { - nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, - set, binding, - mode, index); - load->src[0] = nir_src_for_ssa(res_index); - load->src[1] = nir_src_for_ssa(offset); - break; - } +static void +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_ssa_value **inout, const struct glsl_type *type) +{ + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); + instr->num_components = glsl_get_vector_elements(type); - case nir_intrinsic_load_push_constant: - load->src[0] = nir_src_for_ssa(offset); - break; + int src = 0; + if (!load) { + instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */ + instr->src[src++] = nir_src_for_ssa((*inout)->def); + } - default: - unreachable("Invalid block load intrinsic"); - } + if (index) + instr->src[src++] = nir_src_for_ssa(index); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); - nir_builder_instr_insert(&b->nb, &load->instr); + instr->src[src++] = nir_src_for_ssa(offset); - if (glsl_get_base_type(type->type) == GLSL_TYPE_BOOL) { - /* Loads of booleans from externally visible memory need to be - * fixed up since they're defined to be zero/nonzero rather than - * NIR_FALSE/NIR_TRUE. - */ - val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->def = &load->dest.ssa; - } - } else { - unsigned elems = glsl_get_length(type->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - if (glsl_type_is_struct(type->type)) { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - child_offset, type->members[i]); - } - } else { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - child_offset,type->array_element); - } - } + if (load) { + nir_ssa_dest_init(&instr->instr, &instr->dest, + instr->num_components, NULL); + *inout = rzalloc(b, struct vtn_ssa_value); + (*inout)->def = &instr->dest.ssa; + (*inout)->type = type; } - return val; + nir_builder_instr_insert(&b->nb, &instr->instr); + + if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) + (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); } +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src); + static void -vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type **type, nir_deref *src_tail, - nir_ssa_def **index, nir_ssa_def **offset) +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, nir_deref *deref, + struct vtn_type *type, struct vtn_ssa_value **inout) { - nir_deref *deref = &src->deref; - - if (deref->child->deref_type == nir_deref_type_array) { - deref = deref->child; - *type = (*type)->array_element; - nir_deref_array *deref_array = nir_deref_as_array(deref); - *index = nir_imm_int(&b->nb, deref_array->base_offset); - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) - *index = nir_iadd(&b->nb, *index, deref_array->indirect.ssa); - } else { - *index = nir_imm_int(&b->nb, 0); + if (deref == NULL && load) { + assert(*inout == NULL); + *inout = rzalloc(b, struct vtn_ssa_value); + (*inout)->type = type->type; } - *offset = nir_imm_int(&b->nb, 0); - while (deref != src_tail) { - deref = deref->child; - switch (deref->deref_type) { - case nir_deref_type_array: { - nir_deref_array *deref_array = nir_deref_as_array(deref); - nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset); + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* This is where things get interesting. At this point, we've hit + * a vector, a scalar, or a matrix. + */ + if (glsl_type_is_matrix(type->type)) { + if (deref == NULL) { + /* Loading the whole matrix */ + if (load) + (*inout)->elems = ralloc_array(b, struct vtn_ssa_value *, 4); + + struct vtn_ssa_value *transpose; + unsigned num_ops, vec_width; + if (type->row_major) { + num_ops = glsl_get_vector_elements(type->type); + vec_width = glsl_get_matrix_columns(type->type); + if (load) { + (*inout)->type = + glsl_matrix_type(base_type, vec_width, num_ops); + } else { + transpose = vtn_transpose(b, *inout); + inout = &transpose; + } + } else { + num_ops = glsl_get_matrix_columns(type->type); + vec_width = glsl_get_vector_elements(type->type); + } - if (deref_array->deref_array_type == nir_deref_array_type_indirect) - off = nir_iadd(&b->nb, off, deref_array->indirect.ssa); + for (unsigned i = 0; i < num_ops; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + _vtn_load_store_tail(b, op, load, index, elem_offset, + &(*inout)->elems[i], + glsl_vector_type(base_type, vec_width)); + } - off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride)); - *offset = nir_iadd(&b->nb, *offset, off); + if (load && type->row_major) + *inout = vtn_transpose(b, *inout); + + return; + } else if (type->row_major) { + /* Row-major but with a deref. */ + nir_ssa_def *col_offset = + nir_imul(&b->nb, deref_array_offset(b, deref), + nir_imm_int(&b->nb, type->array_element->stride)); + offset = nir_iadd(&b->nb, offset, col_offset); + + if (deref->child) { + /* Picking off a single element */ + nir_ssa_def *row_offset = + nir_imul(&b->nb, deref_array_offset(b, deref->child), + nir_imm_int(&b->nb, type->stride)); + offset = nir_iadd(&b->nb, offset, row_offset); + _vtn_load_store_tail(b, op, load, index, offset, inout, + glsl_scalar_type(base_type)); + return; + } else { + unsigned num_comps = glsl_get_vector_elements(type->type); + nir_ssa_def *comps[4]; + for (unsigned i = 0; i < num_comps; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + + struct vtn_ssa_value *comp = NULL, temp_val; + if (!load) { + temp_val.def = nir_channel(&b->nb, (*inout)->def, i); + temp_val.type = glsl_scalar_type(base_type); + comp = &temp_val; + } + _vtn_load_store_tail(b, op, load, index, elem_offset, + &comp, glsl_scalar_type(base_type)); + comps[i] = comp->def; + } - *type = (*type)->array_element; - break; + if (load) + (*inout)->def = nir_vec(&b->nb, comps, num_comps); + return; + } + } else { + /* Column-major with a deref. Fall through to array case. */ + } + } else if (deref == NULL) { + assert(glsl_type_is_vector_or_scalar(type->type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); + return; + } else { + /* Single component of a vector. Fall through to array case. */ } + /* Fall through */ - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + case GLSL_TYPE_ARRAY: + if (deref) { + offset = nir_iadd(&b->nb, offset, + nir_imul(&b->nb, deref_array_offset(b, deref), + nir_imm_int(&b->nb, type->stride))); - unsigned elem_off = (*type)->offsets[deref_struct->index]; - *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off)); + _vtn_block_load_store(b, op, load, index, offset, deref->child, + type->array_element, inout); + return; + } else { + unsigned elems = glsl_get_length(type->type); + if (load) + (*inout)->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - *type = (*type)->members[deref_struct->index]; - break; + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, + type->array_element, &(*inout)->elems[i]); + } + return; } + unreachable("Both branches above return"); - default: - unreachable("unknown deref type"); + case GLSL_TYPE_STRUCT: + if (deref) { + unsigned member = nir_deref_as_struct(deref)->index; + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + + _vtn_block_load_store(b, op, load, index, offset, deref->child, + type->members[member], inout); + return; + } else { + unsigned elems = glsl_get_length(type->type); + if (load) + (*inout)->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, + type->members[i], &(*inout)->elems[i]); + } + return; } + unreachable("Both branches above return"); + + default: + unreachable("Invalid block member type"); } } static struct vtn_ssa_value * vtn_block_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *type, nir_deref *src_tail) + struct vtn_type *type) { - nir_ssa_def *index; - nir_ssa_def *offset; - vtn_block_get_offset(b, src, &type, src_tail, &index, &offset); - nir_intrinsic_op op; if (src->var->data.mode == nir_var_uniform) { if (src->var->data.descriptor_set >= 0) { @@ -1407,9 +1513,15 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, op = nir_intrinsic_load_ssbo; } - return _vtn_block_load(b, op, src->var->data.descriptor_set, - src->var->data.binding, src->var->data.mode, - index, offset, type); + nir_deref *block_deref = &src->deref; + nir_ssa_def *index = NULL; + if (op == nir_intrinsic_load_ubo || op == nir_intrinsic_load_ssbo) + index = get_vulkan_resource_index(b, &block_deref, &type); + + struct vtn_ssa_value *value = NULL; + _vtn_block_load_store(b, op, true, index, nir_imm_int(&b->nb, 0), + block_deref->child, type, &value); + return value; } /* @@ -1448,13 +1560,11 @@ static struct vtn_ssa_value * vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, struct vtn_type *src_type) { - nir_deref *src_tail = get_deref_tail(src); - - struct vtn_ssa_value *val; if (variable_is_external_block(src->var)) - val = vtn_block_load(b, src, src_type, src_tail); - else - val = _vtn_variable_load(b, src, src_tail); + return vtn_block_load(b, src, src_type); + + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); if (src_tail->child) { nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); @@ -1470,60 +1580,16 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, return val; } -static void -_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, - struct vtn_ssa_value *src, unsigned set, unsigned binding, - nir_variable_mode mode, nir_ssa_def *index, - nir_ssa_def *offset, struct vtn_type *type) -{ - assert(src->type == type->type); - if (glsl_type_is_vector_or_scalar(type->type)) { - nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); - store->num_components = glsl_get_vector_elements(type->type); - store->const_index[0] = (1 << store->num_components) - 1; - store->src[0] = nir_src_for_ssa(src->def); - - nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, - set, binding, - mode, index); - store->src[1] = nir_src_for_ssa(res_index); - store->src[2] = nir_src_for_ssa(offset); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else { - unsigned elems = glsl_get_length(type->type); - if (glsl_type_is_struct(type->type)) { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, child_offset, type->members[i]); - } - } else { - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *child_offset = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, child_offset, type->array_element); - } - } - } -} - static void vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest, struct vtn_type *type, - nir_deref *dest_tail) + nir_deref_var *dest, struct vtn_type *type) { - nir_ssa_def *index; - nir_ssa_def *offset; - vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset); - - nir_intrinsic_op op = nir_intrinsic_store_ssbo; + nir_deref *block_deref = &dest->deref; + nir_ssa_def *index = get_vulkan_resource_index(b, &block_deref, &type); - return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, - dest->var->data.binding, dest->var->data.mode, - index, offset, type); + _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, + nir_imm_int(&b->nb, 0), block_deref->child, + type, &src); } static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, @@ -1538,11 +1604,11 @@ void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref_var *dest, struct vtn_type *dest_type) { - nir_deref *dest_tail = get_deref_tail(dest); if (variable_is_external_block(dest->var)) { assert(dest->var->data.mode == nir_var_shader_storage); - vtn_block_store(b, src, dest, dest_type, dest_tail); + vtn_block_store(b, src, dest, dest_type); } else { + nir_deref *dest_tail = get_deref_tail(dest); if (dest_tail->child) { struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); nir_deref_array *deref = nir_deref_as_array(dest_tail->child); -- cgit v1.2.3 From 72bff62e7f230621db0a33c04dac3eb94424d9dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 7 Jan 2016 20:44:04 -0800 Subject: nir/spirv: Add support for SSBO atomics --- src/glsl/nir/spirv/spirv_to_nir.c | 113 +++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 9b3d0cebbbd..919be098fbb 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2470,6 +2470,115 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &intrin->instr); } +static void +vtn_handle_ssbo_atomic(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *pointer = vtn_value(b, w[3], vtn_value_type_deref); + struct vtn_type *type = pointer->deref_type; + nir_deref *deref = &pointer->deref->deref; + nir_ssa_def *index = get_vulkan_resource_index(b, &deref, &type); + + nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + while (deref->child) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: + offset = nir_iadd(&b->nb, offset, + nir_imul(&b->nb, deref_array_offset(b, deref), + nir_imm_int(&b->nb, type->stride))); + type = type->array_element; + continue; + + case nir_deref_type_struct: { + unsigned member = nir_deref_as_struct(deref)->index; + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + type = type->members[member]; + continue; + } + + default: + unreachable("Invalid deref type"); + } + } + + /* + SpvScope scope = w[4]; + SpvMemorySemanticsMask semantics = w[5]; + */ + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_ssbo_##N; break; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid SSBO atomic"); + } + + nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->src[0] = nir_src_for_ssa(index); + atomic->src[1] = nir_src_for_ssa(offset); + + switch (opcode) { + case SpvOpAtomicIIncrement: + atomic->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + + case SpvOpAtomicIDecrement: + atomic->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicISub: + atomic->src[2] = + nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + case SpvOpAtomicCompareExchange: + atomic->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + atomic->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + break; + /* Fall through */ + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + atomic->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + default: + unreachable("Invalid SSBO atomic"); + } + + nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL); + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->def = &atomic->dest.ssa; + val->ssa->type = type->type; + + nir_builder_instr_insert(&b->nb, &atomic->instr); +} + static nir_alu_instr * create_vec(nir_shader *shader, unsigned num_components) { @@ -3627,8 +3736,10 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, if (pointer->value_type == vtn_value_type_image_pointer) { vtn_handle_image(b, opcode, w, count); } else { - assert(!"Atomic buffers not yet implemented"); + assert(pointer->value_type == vtn_value_type_deref); + vtn_handle_ssbo_atomic(b, opcode, w, count); } + break; } case SpvOpSNegate: -- cgit v1.2.3 From 393562f47b3a0a5f06f3c031e8e777dab6e6cc97 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 11:02:17 -0800 Subject: nir/spirv: Split ALU operations out into their own file --- src/glsl/Makefile.sources | 4 +- src/glsl/nir/spirv/spirv_to_nir.c | 423 +------------------------------------- src/glsl/nir/spirv/vtn_alu.c | 420 +++++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv/vtn_private.h | 9 + 4 files changed, 438 insertions(+), 418 deletions(-) create mode 100644 src/glsl/nir/spirv/vtn_alu.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 97fac8609b6..89113bcd1c6 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -95,8 +95,10 @@ NIR_FILES = \ SPIRV_FILES = \ nir/spirv/nir_spirv.h \ nir/spirv/spirv_to_nir.c \ + nir/spirv/vtn_alu.c \ nir/spirv/vtn_cfg.c \ - nir/spirv/vtn_glsl450.c + nir/spirv/vtn_glsl450.c \ + nir/spirv/vtn_private.h # libglsl diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 919be098fbb..191d35d9102 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1327,9 +1327,6 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); } -static struct vtn_ssa_value * -vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src); - static void _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, nir_deref *deref, @@ -1365,7 +1362,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, (*inout)->type = glsl_matrix_type(base_type, vec_width, num_ops); } else { - transpose = vtn_transpose(b, *inout); + transpose = vtn_ssa_transpose(b, *inout); inout = &transpose; } } else { @@ -1383,7 +1380,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, } if (load && type->row_major) - *inout = vtn_transpose(b, *inout); + *inout = vtn_ssa_transpose(b, *inout); return; } else if (type->row_major) { @@ -2074,7 +2071,7 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, } } -static struct vtn_ssa_value * +struct vtn_ssa_value * vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) { struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); @@ -2598,8 +2595,8 @@ create_vec(nir_shader *shader, unsigned num_components) return vec; } -static struct vtn_ssa_value * -vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +struct vtn_ssa_value * +vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) { if (src->transposed) return src->transposed; @@ -2628,411 +2625,6 @@ vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) return dest; } -/* - * Normally, column vectors in SPIR-V correspond to a single NIR SSA - * definition. But for matrix multiplies, we want to do one routine for - * multiplying a matrix by a matrix and then pretend that vectors are matrices - * with one column. So we "wrap" these things, and unwrap the result before we - * send it off. - */ - -static struct vtn_ssa_value * -vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) -{ - if (val == NULL) - return NULL; - - if (glsl_type_is_matrix(val->type)) - return val; - - struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); - dest->type = val->type; - dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); - dest->elems[0] = val; - - return dest; -} - -static struct vtn_ssa_value * -vtn_unwrap_matrix(struct vtn_ssa_value *val) -{ - if (glsl_type_is_matrix(val->type)) - return val; - - return val->elems[0]; -} - -static struct vtn_ssa_value * -vtn_matrix_multiply(struct vtn_builder *b, - struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) -{ - - struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); - struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); - struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); - struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); - - unsigned src0_rows = glsl_get_vector_elements(src0->type); - unsigned src0_columns = glsl_get_matrix_columns(src0->type); - unsigned src1_columns = glsl_get_matrix_columns(src1->type); - - const struct glsl_type *dest_type; - if (src1_columns > 1) { - dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), - src0_rows, src1_columns); - } else { - dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); - } - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); - - dest = vtn_wrap_matrix(b, dest); - - bool transpose_result = false; - if (src0_transpose && src1_transpose) { - /* transpose(A) * transpose(B) = transpose(B * A) */ - src1 = src0_transpose; - src0 = src1_transpose; - src0_transpose = NULL; - src1_transpose = NULL; - transpose_result = true; - } - - if (src0_transpose && !src1_transpose && - glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { - /* We already have the rows of src0 and the columns of src1 available, - * so we can just take the dot product of each row with each column to - * get the result. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - nir_alu_instr *vec = create_vec(b->shader, src0_rows); - for (unsigned j = 0; j < src0_rows; j++) { - vec->src[j].src = - nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, - src1->elems[i]->def)); - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - dest->elems[i]->def = &vec->dest.dest.ssa; - } - } else { - /* We don't handle the case where src1 is transposed but not src0, since - * the general case only uses individual components of src1 so the - * optimizer should chew through the transpose we emitted for src1. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ - dest->elems[i]->def = - nir_fmul(&b->nb, src0->elems[0]->def, - vtn_vector_extract(b, src1->elems[i]->def, 0)); - for (unsigned j = 1; j < src0_columns; j++) { - dest->elems[i]->def = - nir_fadd(&b->nb, dest->elems[i]->def, - nir_fmul(&b->nb, src0->elems[j]->def, - vtn_vector_extract(b, - src1->elems[i]->def, j))); - } - } - } - - dest = vtn_unwrap_matrix(dest); - - if (transpose_result) - dest = vtn_transpose(b, dest); - - return dest; -} - -static struct vtn_ssa_value * -vtn_mat_times_scalar(struct vtn_builder *b, - struct vtn_ssa_value *mat, - nir_ssa_def *scalar) -{ - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); - for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { - if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) - dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); - else - dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); - } - - return dest; -} - -static void -vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - switch (opcode) { - case SpvOpTranspose: { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); - val->ssa = vtn_transpose(b, src); - break; - } - - case SpvOpOuterProduct: { - struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); - struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); - - val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); - break; - } - - case SpvOpMatrixTimesScalar: { - struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); - struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); - - if (mat->transposed) { - val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, - scalar->def)); - } else { - val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); - } - break; - } - - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: { - struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); - struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); - - if (opcode == SpvOpVectorTimesMatrix) { - val->ssa = vtn_matrix_multiply(b, vtn_transpose(b, src1), src0); - } else { - val->ssa = vtn_matrix_multiply(b, src0, src1); - } - break; - } - - default: unreachable("unknown matrix opcode"); - } -} - -static void -vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_create_ssa_value(b, type); - - /* Collect the various SSA sources */ - const unsigned num_inputs = count - 3; - nir_ssa_def *src[4]; - for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 3])->def; - for (unsigned i = num_inputs; i < 4; i++) - src[i] = NULL; - - /* Indicates that the first two arguments should be swapped. This is - * used for implementing greater-than and less-than-or-equal. - */ - bool swap = false; - - nir_op op; - switch (opcode) { - /* Basic ALU operations */ - case SpvOpSNegate: op = nir_op_ineg; break; - case SpvOpFNegate: op = nir_op_fneg; break; - case SpvOpNot: op = nir_op_inot; break; - - case SpvOpAny: - if (src[0]->num_components == 1) { - op = nir_op_imov; - } else { - switch (src[0]->num_components) { - case 2: op = nir_op_bany_inequal2; break; - case 3: op = nir_op_bany_inequal3; break; - case 4: op = nir_op_bany_inequal4; break; - } - src[1] = nir_imm_int(&b->nb, NIR_FALSE); - } - break; - - case SpvOpAll: - if (src[0]->num_components == 1) { - op = nir_op_imov; - } else { - switch (src[0]->num_components) { - case 2: op = nir_op_ball_iequal2; break; - case 3: op = nir_op_ball_iequal3; break; - case 4: op = nir_op_ball_iequal4; break; - } - src[1] = nir_imm_int(&b->nb, NIR_TRUE); - } - break; - - case SpvOpIAdd: op = nir_op_iadd; break; - case SpvOpFAdd: op = nir_op_fadd; break; - case SpvOpISub: op = nir_op_isub; break; - case SpvOpFSub: op = nir_op_fsub; break; - case SpvOpIMul: op = nir_op_imul; break; - case SpvOpFMul: op = nir_op_fmul; break; - case SpvOpUDiv: op = nir_op_udiv; break; - case SpvOpSDiv: op = nir_op_idiv; break; - case SpvOpFDiv: op = nir_op_fdiv; break; - case SpvOpUMod: op = nir_op_umod; break; - case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ - case SpvOpFMod: op = nir_op_fmod; break; - - case SpvOpDot: - assert(src[0]->num_components == src[1]->num_components); - switch (src[0]->num_components) { - case 1: op = nir_op_fmul; break; - case 2: op = nir_op_fdot2; break; - case 3: op = nir_op_fdot3; break; - case 4: op = nir_op_fdot4; break; - } - break; - - case SpvOpIAddCarry: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = - nir_b2i(&b->nb, nir_uadd_carry(&b->nb, src[0], src[1])); - return; - - case SpvOpISubBorrow: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = - nir_b2i(&b->nb, nir_usub_borrow(&b->nb, src[0], src[1])); - return; - - case SpvOpUMulExtended: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); - return; - - case SpvOpSMulExtended: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); - return; - - case SpvOpShiftRightLogical: op = nir_op_ushr; break; - case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; - case SpvOpShiftLeftLogical: op = nir_op_ishl; break; - case SpvOpLogicalOr: op = nir_op_ior; break; - case SpvOpLogicalEqual: op = nir_op_ieq; break; - case SpvOpLogicalNotEqual: op = nir_op_ine; break; - case SpvOpLogicalAnd: op = nir_op_iand; break; - case SpvOpLogicalNot: op = nir_op_inot; break; - case SpvOpBitwiseOr: op = nir_op_ior; break; - case SpvOpBitwiseXor: op = nir_op_ixor; break; - case SpvOpBitwiseAnd: op = nir_op_iand; break; - case SpvOpSelect: op = nir_op_bcsel; break; - case SpvOpIEqual: op = nir_op_ieq; break; - - case SpvOpBitFieldInsert: op = nir_op_bitfield_insert; break; - case SpvOpBitFieldSExtract: op = nir_op_ibitfield_extract; break; - case SpvOpBitFieldUExtract: op = nir_op_ubitfield_extract; break; - case SpvOpBitReverse: op = nir_op_bitfield_reverse; break; - case SpvOpBitCount: op = nir_op_bit_count; break; - - /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ - case SpvOpFOrdEqual: op = nir_op_feq; break; - case SpvOpFUnordEqual: op = nir_op_feq; break; - case SpvOpINotEqual: op = nir_op_ine; break; - case SpvOpFOrdNotEqual: op = nir_op_fne; break; - case SpvOpFUnordNotEqual: op = nir_op_fne; break; - case SpvOpULessThan: op = nir_op_ult; break; - case SpvOpSLessThan: op = nir_op_ilt; break; - case SpvOpFOrdLessThan: op = nir_op_flt; break; - case SpvOpFUnordLessThan: op = nir_op_flt; break; - case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; - case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; - case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; - case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; - case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; - case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; - case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; - case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; - case SpvOpUGreaterThanEqual: op = nir_op_uge; break; - case SpvOpSGreaterThanEqual: op = nir_op_ige; break; - case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; - case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; - - /* Conversions: */ - case SpvOpConvertFToU: op = nir_op_f2u; break; - case SpvOpConvertFToS: op = nir_op_f2i; break; - case SpvOpConvertSToF: op = nir_op_i2f; break; - case SpvOpConvertUToF: op = nir_op_u2f; break; - case SpvOpBitcast: op = nir_op_imov; break; - case SpvOpUConvert: - case SpvOpSConvert: - op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ - break; - case SpvOpFConvert: - op = nir_op_fmov; - break; - - /* Derivatives: */ - case SpvOpDPdx: op = nir_op_fddx; break; - case SpvOpDPdy: op = nir_op_fddy; break; - case SpvOpDPdxFine: op = nir_op_fddx_fine; break; - case SpvOpDPdyFine: op = nir_op_fddy_fine; break; - case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; - case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; - case SpvOpFwidth: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); - return; - case SpvOpFwidthFine: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); - return; - case SpvOpFwidthCoarse: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); - return; - - case SpvOpVectorTimesScalar: - /* The builder will take care of splatting for us. */ - val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); - return; - - case SpvOpSRem: - case SpvOpFRem: - unreachable("No NIR equivalent"); - - case SpvOpIsNan: - val->ssa->def = nir_fne(&b->nb, src[0], src[0]); - return; - - case SpvOpIsInf: - val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), - nir_imm_float(&b->nb, INFINITY)); - return; - - case SpvOpIsFinite: - case SpvOpIsNormal: - case SpvOpSignBitSet: - case SpvOpLessOrGreater: - case SpvOpOrdered: - case SpvOpUnordered: - default: - unreachable("Unhandled opcode"); - } - - if (swap) { - nir_ssa_def *tmp = src[0]; - src[0] = src[1]; - src[1] = tmp; - } - - val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); -} - static nir_ssa_def * vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) { @@ -3835,16 +3427,13 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpBitFieldUExtract: case SpvOpBitReverse: case SpvOpBitCount: - vtn_handle_alu(b, opcode, w, count); - break; - case SpvOpTranspose: case SpvOpOuterProduct: case SpvOpMatrixTimesScalar: case SpvOpVectorTimesMatrix: case SpvOpMatrixTimesVector: case SpvOpMatrixTimesMatrix: - vtn_handle_matrix_alu(b, opcode, w, count); + vtn_handle_alu(b, opcode, w, count); break; case SpvOpVectorExtractDynamic: diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c new file mode 100644 index 00000000000..a8c6e5cd890 --- /dev/null +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -0,0 +1,420 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + const struct glsl_type *dest_type; + if (src1_columns > 1) { + dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns); + } else { + dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); + } + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); + + dest = wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_ssa_def *vec_src[4]; + for (unsigned j = 0; j < src0_rows; j++) { + vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def); + } + dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + nir_channel(&b->nb, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + nir_channel(&b->nb, src1->elems[i]->def, j))); + } + } + } + + dest = unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_ssa_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + struct vtn_value *dest, + struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) +{ + switch (opcode) { + case SpvOpTranspose: + dest->ssa = vtn_ssa_transpose(b, src0); + break; + + case SpvOpOuterProduct: + dest->ssa = matrix_multiply(b, src0, vtn_ssa_transpose(b, src1)); + break; + + case SpvOpMatrixTimesScalar: + if (src0->transposed) { + dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, + src1->def)); + } else { + dest->ssa = mat_times_scalar(b, src0, src1->def); + } + break; + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + if (opcode == SpvOpVectorTimesMatrix) { + dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); + } else { + dest->ssa = matrix_multiply(b, src0, src1); + } + break; + + default: unreachable("unknown matrix opcode"); + } +} + +void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + /* Collect the various SSA sources */ + const unsigned num_inputs = count - 3; + struct vtn_ssa_value *vtn_src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) + vtn_src[i] = vtn_ssa_value(b, w[i + 3]); + + if (glsl_type_is_matrix(vtn_src[0]->type) || + (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { + vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); + return; + } + + val->ssa = vtn_create_ssa_value(b, type); + nir_ssa_def *src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) { + assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); + src[i] = vtn_src[i]->def; + } + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + if (src[0]->num_components == 1) { + op = nir_op_imov; + } else { + switch (src[0]->num_components) { + case 2: op = nir_op_bany_inequal2; break; + case 3: op = nir_op_bany_inequal3; break; + case 4: op = nir_op_bany_inequal4; break; + } + src[1] = nir_imm_int(&b->nb, NIR_FALSE); + } + break; + + case SpvOpAll: + if (src[0]->num_components == 1) { + op = nir_op_imov; + } else { + switch (src[0]->num_components) { + case 2: op = nir_op_ball_iequal2; break; + case 3: op = nir_op_ball_iequal3; break; + case 4: op = nir_op_ball_iequal4; break; + } + src[1] = nir_imm_int(&b->nb, NIR_TRUE); + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpIAddCarry: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = + nir_b2i(&b->nb, nir_uadd_carry(&b->nb, src[0], src[1])); + return; + + case SpvOpISubBorrow: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = + nir_b2i(&b->nb, nir_usub_borrow(&b->nb, src[0], src[1])); + return; + + case SpvOpUMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); + return; + + case SpvOpSMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); + return; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpLogicalNot: op = nir_op_inot; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + case SpvOpBitFieldInsert: op = nir_op_bitfield_insert; break; + case SpvOpBitFieldSExtract: op = nir_op_ibitfield_extract; break; + case SpvOpBitFieldUExtract: op = nir_op_ubitfield_extract; break; + case SpvOpBitReverse: op = nir_op_bitfield_reverse; break; + case SpvOpBitCount: op = nir_op_bit_count; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + val->ssa->def = nir_fne(&b->nb, src[0], src[0]); + return; + + case SpvOpIsInf: + val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), + nir_imm_float(&b->nb, INFINITY)); + return; + + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); +} diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 0fa7dd4b041..14355c901f0 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -363,6 +363,12 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, + const struct glsl_type *type); + +struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, + struct vtn_ssa_value *src); + void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref_var *dest, struct vtn_type *dest_type); @@ -384,5 +390,8 @@ typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, vtn_execution_mode_foreach_cb cb, void *data); +void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, const uint32_t *words, unsigned count); -- cgit v1.2.3 From 927ef0ea4ecc006e0f37d9a0064931ade6482298 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 11:18:47 -0800 Subject: nir/spirv: Add support for add, subtract, and negate on matrices --- src/glsl/nir/spirv/vtn_alu.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c index a8c6e5cd890..e8316d1f42e 100644 --- a/src/glsl/nir/spirv/vtn_alu.c +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -157,6 +157,32 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) { switch (opcode) { + case SpvOpFNegate: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); + break; + } + + case SpvOpFAdd: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpFSub: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + case SpvOpTranspose: dest->ssa = vtn_ssa_transpose(b, src0); break; -- cgit v1.2.3 From 8b9dfb4b6d05c0af48dd75e9ab601bbb6ebbd2e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 11:38:59 -0800 Subject: nir/spirv: Add real support for outer products --- src/glsl/nir/spirv/vtn_alu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c index e8316d1f42e..70875007ec7 100644 --- a/src/glsl/nir/spirv/vtn_alu.c +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -187,10 +187,6 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, dest->ssa = vtn_ssa_transpose(b, src0); break; - case SpvOpOuterProduct: - dest->ssa = matrix_multiply(b, src0, vtn_ssa_transpose(b, src1)); - break; - case SpvOpMatrixTimesScalar: if (src0->transposed) { dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, @@ -292,6 +288,14 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ case SpvOpFMod: op = nir_op_fmod; break; + case SpvOpOuterProduct: { + for (unsigned i = 0; i < src[1]->num_components; i++) { + val->ssa->elems[i]->def = + nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); + } + return; + } + case SpvOpDot: assert(src[0]->num_components == src[1]->num_components); switch (src[0]->num_components) { -- cgit v1.2.3 From fe2f44f2a4176795aeb3ba4ecb8387ed0ca1a800 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 11:50:32 -0800 Subject: nir/spirv: Use create_ssa_value for block_load_store --- src/glsl/nir/spirv/spirv_to_nir.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 191d35d9102..4f5639c4cf4 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1316,9 +1316,7 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (load) { nir_ssa_dest_init(&instr->instr, &instr->dest, instr->num_components, NULL); - *inout = rzalloc(b, struct vtn_ssa_value); (*inout)->def = &instr->dest.ssa; - (*inout)->type = type; } nir_builder_instr_insert(&b->nb, &instr->instr); @@ -1332,11 +1330,8 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, nir_deref *deref, struct vtn_type *type, struct vtn_ssa_value **inout) { - if (deref == NULL && load) { - assert(*inout == NULL); - *inout = rzalloc(b, struct vtn_ssa_value); - (*inout)->type = type->type; - } + if (load && deref == NULL && *inout == NULL) + *inout = vtn_create_ssa_value(b, type->type); enum glsl_base_type base_type = glsl_get_base_type(type->type); switch (base_type) { @@ -1350,18 +1345,12 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (glsl_type_is_matrix(type->type)) { if (deref == NULL) { /* Loading the whole matrix */ - if (load) - (*inout)->elems = ralloc_array(b, struct vtn_ssa_value *, 4); - struct vtn_ssa_value *transpose; unsigned num_ops, vec_width; if (type->row_major) { num_ops = glsl_get_vector_elements(type->type); vec_width = glsl_get_matrix_columns(type->type); - if (load) { - (*inout)->type = - glsl_matrix_type(base_type, vec_width, num_ops); - } else { + if (!load) { transpose = vtn_ssa_transpose(b, *inout); inout = &transpose; } @@ -1445,9 +1434,6 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, return; } else { unsigned elems = glsl_get_length(type->type); - if (load) - (*inout)->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { nir_ssa_def *elem_off = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); @@ -1469,9 +1455,6 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, return; } else { unsigned elems = glsl_get_length(type->type); - if (load) - (*inout)->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { nir_ssa_def *elem_off = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); -- cgit v1.2.3 From 4e15d26e47c7b8f11ab9fd1bfefb9b7515a71427 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 12:27:25 -0800 Subject: nir/spirv: Fix a small bug in row-major matrix loading --- src/glsl/nir/spirv/spirv_to_nir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 4f5639c4cf4..49b9a6500f8 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1350,7 +1350,11 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (type->row_major) { num_ops = glsl_get_vector_elements(type->type); vec_width = glsl_get_matrix_columns(type->type); - if (!load) { + if (load) { + const struct glsl_type *transpose_type = + glsl_matrix_type(base_type, vec_width, num_ops); + *inout = vtn_create_ssa_value(b, transpose_type); + } else { transpose = vtn_ssa_transpose(b, *inout); inout = &transpose; } -- cgit v1.2.3 From c7f6e42a7d1280a0f67e52adc5c8807b78be46e8 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 8 Jan 2016 12:15:29 -0800 Subject: anv: Increate dynamic pool block size from 2k to 16k This is needed because compute push constant data is replicated per invocation. For gen7, this can be up to 64. With a push constant data max of 128 bytes, this is 8k of data. We need additional space for local-id payloads, so we are going with 16k for now. Signed-off-by: Jordan Justen --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 74b813e9e40..9c54875a736 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -718,7 +718,7 @@ VkResult anv_CreateDevice( anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); - anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); + anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); anv_state_pool_init(&device->dynamic_state_pool, &device->dynamic_state_block_pool); -- cgit v1.2.3 From 7a1c4a0ccc56e0b633777b26a9a02d62660a4a1b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 16:02:06 -0800 Subject: nir/spirv: Add matrix determinants and inverses --- src/glsl/nir/spirv/vtn_glsl450.c | 155 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 739e4394954..82cfc8c91a9 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -28,6 +28,141 @@ #include "vtn_private.h" #include "GLSL.std.450.h" +static nir_ssa_def * +build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) +{ + unsigned swiz[4] = {1, 0, 0, 0}; + nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); + return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); +} + +static nir_ssa_def * +build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) +{ + unsigned yzx[4] = {1, 2, 0, 0}; + unsigned zxy[4] = {2, 0, 1, 0}; + + nir_ssa_def *prod0 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), + nir_swizzle(b, col[2], zxy, 3, true))); + nir_ssa_def *prod1 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), + nir_swizzle(b, col[2], yzx, 3, true))); + + nir_ssa_def *diff = nir_fsub(b, prod0, prod1); + + return nir_fadd(b, nir_channel(b, diff, 0), + nir_fadd(b, nir_channel(b, diff, 1), + nir_channel(b, diff, 2))); +} + +static nir_ssa_def * +build_mat4_det(nir_builder *b, nir_ssa_def **col) +{ + nir_ssa_def *subdet[4]; + for (unsigned i = 0; i < 4; i++) { + unsigned swiz[3]; + for (unsigned j = 0; j < 4; j++) + swiz[j - (j > i)] = j; + + nir_ssa_def *subcol[3]; + subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); + subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); + subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); + + subdet[i] = build_mat3_det(b, subcol); + } + + nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); + + return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), + nir_channel(b, prod, 1)), + nir_fsub(b, nir_channel(b, prod, 2), + nir_channel(b, prod, 3))); +} + +static nir_ssa_def * +build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + unsigned size = glsl_get_vector_elements(src->type); + + nir_ssa_def *cols[4]; + for (unsigned i = 0; i < size; i++) + cols[i] = src->elems[i]->def; + + switch(size) { + case 2: return build_mat2_det(&b->nb, cols); + case 3: return build_mat3_det(&b->nb, cols); + case 4: return build_mat4_det(&b->nb, cols); + default: + unreachable("Invalid matrix size"); + } +} + +/* Computes the determinate of the submatrix given by taking src and + * removing the specified row and column. + */ +static nir_ssa_def * +build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, + unsigned size, unsigned row, unsigned col) +{ + assert(row < size && col < size); + if (size == 2) { + return nir_channel(b, src->elems[1 - col]->def, 1 - row); + } else { + /* Swizzle to get all but the specified row */ + unsigned swiz[3]; + for (unsigned j = 0; j < 4; j++) + swiz[j - (j > row)] = j; + + /* Grab all but the specified column */ + nir_ssa_def *subcol[3]; + for (unsigned j = 0; j < size; j++) { + if (j != col) { + subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, + swiz, size - 1, true); + } + } + + if (size == 3) { + return build_mat2_det(b, subcol); + } else { + assert(size == 4); + return build_mat3_det(b, subcol); + } + } +} + +static struct vtn_ssa_value * +matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + nir_ssa_def *adj_col[4]; + unsigned size = glsl_get_vector_elements(src->type); + + /* Build up an adjugate matrix */ + for (unsigned c = 0; c < size; c++) { + nir_ssa_def *elem[4]; + for (unsigned r = 0; r < size; r++) { + elem[r] = build_mat_subdet(&b->nb, src, size, c, r); + + if ((r + c) % 2) + elem[r] = nir_fneg(&b->nb, elem[r]); + } + + adj_col[c] = nir_vec(&b->nb, elem, size); + } + + nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); + + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); + for (unsigned i = 0; i < size; i++) + val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); + + return val; +} + static nir_ssa_def* build_length(nir_builder *b, nir_ssa_def *vec) { @@ -309,18 +444,30 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *words, unsigned count) + const uint32_t *w, unsigned count) { switch ((enum GLSLstd450)ext_opcode) { - case GLSLstd450Determinant: - case GLSLstd450MatrixInverse: + case GLSLstd450Determinant: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450MatrixInverse: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); + break; + } + case GLSLstd450InterpolateAtCentroid: case GLSLstd450InterpolateAtSample: case GLSLstd450InterpolateAtOffset: unreachable("Unhandled opcode"); default: - handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, words, count); + handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); } return true; -- cgit v1.2.3 From cfdc955fd5e1e965c458e50f4dc877653a463684 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Jan 2016 16:44:54 -0800 Subject: anv/reloc_list: Make valgrind explicitly check relocation data --- src/vulkan/anv_batch_chain.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 41bae981397..466a3624234 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -160,6 +160,7 @@ anv_reloc_list_add(struct anv_reloc_list *list, entry->presumed_offset = target_bo->offset; entry->read_domains = 0; entry->write_domain = 0; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); return target_bo->offset + delta; } -- cgit v1.2.3 From 7c5e1fd9984614ff3952bbabf946dac0e2a0c2b6 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 8 Jan 2016 22:24:58 -0800 Subject: vk: Remove unsupported warnings for Skylake and Broxton These are working as well as Broadwell and Cherryiew. The recent merge from mesa master brings in Kabylake device info and that should be all we need to enable that. --- src/vulkan/anv_device.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9c54875a736..0bd040b13f2 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -89,12 +89,9 @@ anv_physical_device_init(struct anv_physical_device *device, fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); } else if (device->info->gen == 7 && device->info->is_baytrail) { fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); - } else if (device->info->gen == 9 && !device->info->is_broxton) { - fprintf(stderr, "WARNING: Skylake Vulkan support is incomplete\n"); - } else if (device->info->gen == 9 && device->info->is_broxton) { - fprintf(stderr, "WARNING: Broxton Vulkan support is incomplete\n"); - } else if (device->info->gen == 8) { - /* Broadwell/Cherryview is as fully supported as anything */ + } else if (device->info->gen >= 8) { + /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully + * supported as anything */ } else { result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, "Vulkan not yet supported on %s", device->name); -- cgit v1.2.3 From a8cdef3dcef0e5da809f306a82aa6ffc9c1e6a71 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 8 Jan 2016 22:50:18 -0800 Subject: vk: Only begin subpass if we're continuing a render pass If VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT is not set in pBeginInfo->flags, we don't have a render pass or framebuffer. Change the condition that guard looking up render pass and framebuffer to test for VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT instead of VK_COMMAND_BUFFER_LEVEL_SECONDARY. Fixes all remaining crashes in dEQP-VK.api.command_buffers.*. --- src/vulkan/anv_cmd_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 0407ad92fba..5507400c84a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -297,7 +297,11 @@ VkResult anv_BeginCommandBuffer( cmd_buffer->usage_flags = pBeginInfo->flags; - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { cmd_buffer->state.framebuffer = anv_framebuffer_from_handle(pBeginInfo->framebuffer); cmd_buffer->state.pass = -- cgit v1.2.3 From bbb2a85c8115bd003639e5e854c0753d613cec95 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 8 Jan 2016 23:43:20 -0800 Subject: vk: Assert on use of uninitialized surface state This exposes a case where we want to anv_CmdCopyBufferToImage() on an image that wasn't created with VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT and end up using uninitialized color_rt_surface_state from the meta image view. --- src/vulkan/anv_cmd_buffer.c | 5 +++++ src/vulkan/gen7_state.c | 6 ++++++ src/vulkan/gen8_state.c | 6 ++++++ 3 files changed, 17 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 5507400c84a..d146f9ac467 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -657,6 +657,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const struct anv_image_view *iview = fb->attachments[subpass->color_attachments[a]]; + assert(iview->color_rt_surface_state.alloc_size); bt_map[a] = iview->color_rt_surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, iview->bo, iview->offset); @@ -716,12 +717,14 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: surface_state = desc->image_view->nonrt_surface_state; + assert(surface_state.alloc_size); bo = desc->image_view->bo; bo_offset = desc->image_view->offset; break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { surface_state = desc->image_view->storage_surface_state; + assert(surface_state.alloc_size); bo = desc->image_view->bo; bo_offset = desc->image_view->offset; @@ -740,12 +743,14 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: surface_state = desc->buffer_view->surface_state; + assert(surface_state.alloc_size); bo = desc->buffer_view->bo; bo_offset = desc->buffer_view->offset; break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: surface_state = desc->buffer_view->storage_surface_state; + assert(surface_state.alloc_size); bo = desc->buffer_view->bo; bo_offset = desc->buffer_view->offset; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 257cb35aca9..88598cea18e 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -251,6 +251,8 @@ genX(image_view_init)(struct anv_image_view *iview, if (!device->info.has_llc) anv_state_clflush(iview->nonrt_surface_state); + } else { + iview->nonrt_surface_state.alloc_size = 0; } if (image->needs_color_rt_surface_state) { @@ -271,6 +273,8 @@ genX(image_view_init)(struct anv_image_view *iview, &surface_state); if (!device->info.has_llc) anv_state_clflush(iview->color_rt_surface_state); + } else { + iview->color_rt_surface_state.alloc_size = 0; } if (image->needs_storage_surface_state) { @@ -287,5 +291,7 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); + } else { + iview->storage_surface_state.alloc_size = 0; } } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c29d100f9f5..13b7e1149d9 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -310,6 +310,8 @@ genX(image_view_init)(struct anv_image_view *iview, &surface_state); if (!device->info.has_llc) anv_state_clflush(iview->nonrt_surface_state); + } else { + iview->nonrt_surface_state.alloc_size = 0; } if (image->needs_color_rt_surface_state) { @@ -329,6 +331,8 @@ genX(image_view_init)(struct anv_image_view *iview, &surface_state); if (!device->info.has_llc) anv_state_clflush(iview->color_rt_surface_state); + } else { + iview->color_rt_surface_state.alloc_size = 0; } if (image->needs_storage_surface_state) { @@ -346,6 +350,8 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); + } else { + iview->storage_surface_state.alloc_size = 0; } } -- cgit v1.2.3 From dae800daa80c24022085cc691ca61e2dc1e6a600 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 9 Jan 2016 00:50:04 -0800 Subject: vk: Expose correct timestampPeriod for SKL Skylake uses 83.333ms per tick. --- src/vulkan/anv_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0bd040b13f2..57232298e3d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -384,6 +384,8 @@ void anv_GetPhysicalDeviceProperties( anv_finishme("Get correct values for VkPhysicalDeviceLimits"); + const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; + VkSampleCountFlags sample_counts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | @@ -490,7 +492,7 @@ void anv_GetPhysicalDeviceProperties( .sampledImageStencilSampleCounts = sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, - .timestampPeriod = 80.0 / (1000 * 1000 * 1000), + .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), .maxClipDistances = 0 /* FIXME */, .maxCullDistances = 0 /* FIXME */, .maxCombinedClipAndCullDistances = 0 /* FIXME */, -- cgit v1.2.3 From 925ad847008f12b888b9949fffa8df23347f0d13 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 9 Jan 2016 00:51:14 -0800 Subject: vk: Advertise number of timestamp bits We have 36 bits. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 57232298e3d..1da0cb4367c 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -540,7 +540,7 @@ void anv_GetPhysicalDeviceQueueFamilyProperties( VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 1, - .timestampValidBits = 0, /* XXX: Real value here */ + .timestampValidBits = 36, /* XXX: Real value here */ .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, }; } -- cgit v1.2.3 From b538ec5409c88f9fc08e0dbbaf3ae71346a1e398 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 9 Jan 2016 00:51:50 -0800 Subject: vk: Support reseting timestamp query pools --- src/vulkan/anv_query.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 3b29a235cd3..1bcac097104 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -175,7 +175,8 @@ void anv_CmdResetQueryPool( for (uint32_t i = 0; i < queryCount; i++) { switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: { struct anv_query_pool_slot *slot = pool->bo.map; slot[startQuery + i].available = 0; break; -- cgit v1.2.3 From a9c0e8f00f2ac7543e69d7e4ec3f55fc4af79872 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 9 Jan 2016 01:03:20 -0800 Subject: vk: Handle uninitialized FS inputs and gl_PrimitiveID These show up as varying_to_slot[attr] == -1. Instead of storing -1 - 2 in swiz.Attribute[input_index].SourceAttribute, handle it correctly. --- src/vulkan/gen8_pipeline.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index e6cb145d522..dee3c4049c2 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -495,16 +495,30 @@ genX(graphics_pipeline_create)( if (input_index < 0) continue; - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - int source_attr = fs_input_map->varying_to_slot[attr] - 2; + int source_attr = fs_input_map->varying_to_slot[attr]; max_source_attr = MAX2(max_source_attr, source_attr); if (input_index >= 16) continue; - swiz.Attribute[input_index].SourceAttribute = source_attr; + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by the + * vertex shader or it's gl_PrimitiveID. In the first case the value + * is undefined, in the second it needs to be gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } } anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), -- cgit v1.2.3 From 83bf1f752dbd80722174d9df203fff1e1794e35d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 10:27:47 -0800 Subject: nir/dead_variables: Add a a mode parameter This allows dead_variables to be used on any type of variable. --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_remove_dead_variables.c | 33 +++++++++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_nir.c | 2 +- 3 files changed, 26 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index a2c642ec6fc..6bbc7d123fc 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2092,7 +2092,7 @@ nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); void nir_lower_vars_to_ssa(nir_shader *shader); -bool nir_remove_dead_variables(nir_shader *shader); +bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode); void nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader); diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c index db754e56b1c..792c5d4aae6 100644 --- a/src/glsl/nir/nir_remove_dead_variables.c +++ b/src/glsl/nir/nir_remove_dead_variables.c @@ -115,7 +115,7 @@ remove_dead_vars(struct exec_list *var_list, struct set *live) } bool -nir_remove_dead_variables(nir_shader *shader) +nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode) { bool progress = false; struct set *live = @@ -123,15 +123,30 @@ nir_remove_dead_variables(nir_shader *shader) add_var_use_shader(shader, live); - progress = remove_dead_vars(&shader->globals, live) || progress; + if (mode == nir_var_uniform || mode == nir_var_all) + progress = remove_dead_vars(&shader->uniforms, live) || progress; - nir_foreach_function(shader, function) { - if (function->impl) { - if (remove_dead_vars(&function->impl->locals, live)) { - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance | - nir_metadata_live_ssa_defs); - progress = true; + if (mode == nir_var_shader_in || mode == nir_var_all) + progress = remove_dead_vars(&shader->inputs, live) || progress; + + if (mode == nir_var_shader_out || mode == nir_var_all) + progress = remove_dead_vars(&shader->outputs, live) || progress; + + if (mode == nir_var_global || mode == nir_var_all) + progress = remove_dead_vars(&shader->globals, live) || progress; + + if (mode == nir_var_system_value || mode == nir_var_all) + progress = remove_dead_vars(&shader->system_values, live) || progress; + + if (mode == nir_var_local || mode == nir_var_all) { + nir_foreach_function(shader, function) { + if (function->impl) { + if (remove_dead_vars(&function->impl->locals, live)) { + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs); + progress = true; + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index f8b258bf96c..28870b05e9d 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -484,7 +484,7 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar) /* Get rid of split copies */ nir = nir_optimize(nir, is_scalar); - OPT(nir_remove_dead_variables); + OPT(nir_remove_dead_variables, nir_var_local); return nir; } -- cgit v1.2.3 From 9f4ba499d1a8bedab9b1659d30bc0de17a355fa0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 10:31:25 -0800 Subject: nir/spirv: Take an entrypoint stage as well as a name --- src/glsl/nir/spirv/nir_spirv.h | 2 +- src/glsl/nir/spirv/spirv_to_nir.c | 73 ++++++++++++++++++++++----------------- src/glsl/nir/spirv/vtn_private.h | 2 +- src/glsl/nir/spirv2nir.c | 3 +- src/vulkan/anv_pipeline.c | 4 +-- 5 files changed, 47 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/nir_spirv.h b/src/glsl/nir/spirv/nir_spirv.h index 506bd981101..354c0a902df 100644 --- a/src/glsl/nir/spirv/nir_spirv.h +++ b/src/glsl/nir/spirv/nir_spirv.h @@ -37,7 +37,7 @@ extern "C" { #endif nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, - const char *entry_point_name, + gl_shader_stage stage, const char *entry_point_name, const nir_shader_compiler_options *options); #ifdef __cplusplus diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 49b9a6500f8..d72846f7197 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -176,9 +176,15 @@ vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) static char * vtn_string_literal(struct vtn_builder *b, const uint32_t *words, - unsigned word_count) + unsigned word_count, unsigned *words_used) { - return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); + char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); + if (words_used) { + /* Ammount of space taken by the string (including the null) */ + unsigned len = strlen(dup) + 1; + *words_used = DIV_ROUND_UP(len, sizeof(*words)); + } + return dup; } const uint32_t * @@ -2988,6 +2994,27 @@ vertices_in_from_spv_execution_mode(SpvExecutionMode mode) } } +static gl_shader_stage +stage_for_execution_model(SpvExecutionModel model) +{ + switch (model) { + case SpvExecutionModelVertex: + return MESA_SHADER_VERTEX; + case SpvExecutionModelTessellationControl: + return MESA_SHADER_TESS_CTRL; + case SpvExecutionModelTessellationEvaluation: + return MESA_SHADER_TESS_EVAL; + case SpvExecutionModelGeometry: + return MESA_SHADER_GEOMETRY; + case SpvExecutionModelFragment: + return MESA_SHADER_FRAGMENT; + case SpvExecutionModelGLCompute: + return MESA_SHADER_COMPUTE; + default: + unreachable("Unsupported execution model"); + } +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -3019,25 +3046,28 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, assert(w[2] == SpvMemoryModelGLSL450); break; - case SpvOpEntryPoint: + case SpvOpEntryPoint: { + struct vtn_value *entry_point = &b->values[w[2]]; /* Let this be a name label regardless */ - b->values[w[2]].name = vtn_string_literal(b, &w[3], count - 3); + unsigned name_words; + entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); - if (strcmp(b->values[w[2]].name, b->entry_point_name) != 0) + if (strcmp(entry_point->name, b->entry_point_name) != 0 || + stage_for_execution_model(w[1]) != b->entry_point_stage) break; assert(b->entry_point == NULL); - b->entry_point = &b->values[w[2]]; - b->execution_model = w[1]; + b->entry_point = entry_point; break; + } case SpvOpString: vtn_push_value(b, w[1], vtn_value_type_string)->str = - vtn_string_literal(b, &w[2], count - 2); + vtn_string_literal(b, &w[2], count - 2, NULL); break; case SpvOpName: - b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); break; case SpvOpMemberName: @@ -3453,30 +3483,9 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, return true; } -static gl_shader_stage -stage_for_execution_model(SpvExecutionModel model) -{ - switch (model) { - case SpvExecutionModelVertex: - return MESA_SHADER_VERTEX; - case SpvExecutionModelTessellationControl: - return MESA_SHADER_TESS_CTRL; - case SpvExecutionModelTessellationEvaluation: - return MESA_SHADER_TESS_EVAL; - case SpvExecutionModelGeometry: - return MESA_SHADER_GEOMETRY; - case SpvExecutionModelFragment: - return MESA_SHADER_FRAGMENT; - case SpvExecutionModelGLCompute: - return MESA_SHADER_COMPUTE; - default: - unreachable("Unsupported execution model"); - } -} - nir_function * spirv_to_nir(const uint32_t *words, size_t word_count, - const char *entry_point_name, + gl_shader_stage stage, const char *entry_point_name, const nir_shader_compiler_options *options) { const uint32_t *word_end = words + word_count; @@ -3497,6 +3506,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->value_id_bound = value_id_bound; b->values = rzalloc_array(b, struct vtn_value, value_id_bound); exec_list_make_empty(&b->functions); + b->entry_point_stage = stage; b->entry_point_name = entry_point_name; /* Handle all the preamble instructions */ @@ -3509,7 +3519,6 @@ spirv_to_nir(const uint32_t *words, size_t word_count, return NULL; } - gl_shader_stage stage = stage_for_execution_model(b->execution_model); b->shader = nir_shader_create(NULL, stage, options); /* Parse execution modes */ diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 14355c901f0..7ab3c9fba4c 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -321,9 +321,9 @@ struct vtn_builder { unsigned value_id_bound; struct vtn_value *values; + gl_shader_stage entry_point_stage; const char *entry_point_name; struct vtn_value *entry_point; - SpvExecutionModel execution_model; bool origin_upper_left; struct vtn_function *func; diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c index db56d09c98d..4cb484cd074 100644 --- a/src/glsl/nir/spirv2nir.c +++ b/src/glsl/nir/spirv2nir.c @@ -49,6 +49,7 @@ int main(int argc, char **argv) const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); assert(map != NULL); - nir_function *func = spirv_to_nir(map, word_count, "main", NULL); + nir_function *func = spirv_to_nir(map, word_count, MESA_SHADER_FRAGMENT, + "main", NULL); nir_print_shader(func->shader, stderr); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 769afe8ed72..53c7d1f8b74 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -113,8 +113,8 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(spirv[0] == SPIR_V_MAGIC_NUMBER); assert(module->size % 4 == 0); - entry_point = spirv_to_nir(spirv, module->size / 4, entrypoint_name, - nir_options); + entry_point = spirv_to_nir(spirv, module->size / 4, stage, + entrypoint_name, nir_options); nir = entry_point->shader; assert(nir->stage == stage); nir_validate_shader(nir); -- cgit v1.2.3 From 30883adfb82f7ec9cf195370c32b606a4244010f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 10:33:12 -0800 Subject: nir/spirv: Get rid of a bunch of stage asserts Since we may have multiple entrypoints from different stages, we don't know what stage we are actually in so these asserts are invalid. --- src/glsl/nir/spirv/spirv_to_nir.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index d72846f7197..26c2e58b9c3 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -866,21 +866,6 @@ set_mode_system_value(nir_variable_mode *mode) *mode = nir_var_system_value; } -static void -validate_per_vertex_mode(struct vtn_builder *b, nir_variable_mode mode) -{ - switch (b->shader->stage) { - case MESA_SHADER_VERTEX: - assert(mode == nir_var_shader_out); - break; - case MESA_SHADER_GEOMETRY: - assert(mode == nir_var_shader_out || mode == nir_var_shader_in); - break; - default: - assert(!"Invalid shader stage"); - } -} - static void vtn_get_builtin_location(struct vtn_builder *b, SpvBuiltIn builtin, int *location, @@ -889,15 +874,12 @@ vtn_get_builtin_location(struct vtn_builder *b, switch (builtin) { case SpvBuiltInPosition: *location = VARYING_SLOT_POS; - validate_per_vertex_mode(b, *mode); break; case SpvBuiltInPointSize: *location = VARYING_SLOT_PSIZ; - validate_per_vertex_mode(b, *mode); break; case SpvBuiltInClipDistance: *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ - validate_per_vertex_mode(b, *mode); break; case SpvBuiltInCullDistance: /* XXX figure this out */ @@ -932,17 +914,14 @@ vtn_get_builtin_location(struct vtn_builder *b, unreachable("no tessellation support"); case SpvBuiltInFragCoord: *location = VARYING_SLOT_POS; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); assert(*mode == nir_var_shader_in); break; case SpvBuiltInPointCoord: *location = VARYING_SLOT_PNTC; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); assert(*mode == nir_var_shader_in); break; case SpvBuiltInFrontFacing: *location = VARYING_SLOT_FACE; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); assert(*mode == nir_var_shader_in); break; case SpvBuiltInSampleId: @@ -959,7 +938,6 @@ vtn_get_builtin_location(struct vtn_builder *b, break; case SpvBuiltInFragDepth: *location = FRAG_RESULT_DEPTH; - assert(b->shader->stage == MESA_SHADER_FRAGMENT); assert(*mode == nir_var_shader_out); break; case SpvBuiltInNumWorkgroups: -- cgit v1.2.3 From b8ec48ee76adda4b643fb153507ac72942ecbe26 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 10:54:26 -0800 Subject: anv/pipeline: Only delete functions for SPIR-V shaders We can assume that direct NIR shaders only have one entrypoint --- src/vulkan/anv_pipeline.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 53c7d1f8b74..9054d76892d 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -125,6 +125,14 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_inline_functions(nir); nir_validate_shader(nir); + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + nir_lower_system_values(nir); nir_validate_shader(nir); } @@ -132,13 +140,6 @@ anv_shader_compile_to_nir(struct anv_device *device, /* Vulkan uses the separate-shader linking model */ nir->info.separate_shader = true; - /* Pick off the single entrypoint that we want */ - foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (func != entry_point) - exec_node_remove(&func->node); - } - assert(exec_list_length(&nir->functions) == 1); - nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); nir_shader_gather_info(nir, entry_point->impl); -- cgit v1.2.3 From 790565b06ec80de663820e136105ecb18125743f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 10:55:57 -0800 Subject: anv/pipeline: Handle output lowering in anv_pipeline instead of spirv_to_nir While we're at it, we delete any unused variables. This allows us to prune variables that are not used in the current stage from the shader. --- src/glsl/nir/spirv/spirv_to_nir.c | 5 ----- src/vulkan/anv_pipeline.c | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 26c2e58b9c3..44d03652e98 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3526,10 +3526,5 @@ spirv_to_nir(const uint32_t *words, size_t word_count, ralloc_free(b); - /* Because we can still have output reads in NIR, we need to lower - * outputs to temporaries before we are truely finished. - */ - nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); - return entry_point; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 9054d76892d..db4e19bf486 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -133,6 +133,13 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(exec_list_length(&nir->functions) == 1); entry_point->name = ralloc_strdup(entry_point, "main"); + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); + + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); + nir_lower_system_values(nir); nir_validate_shader(nir); } -- cgit v1.2.3 From 52d4af6a3c37641586a18b8437644729270c67d2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 11 Jan 2016 14:07:35 -0800 Subject: anv/gen7: Remove unheeded helper begin_render_pass() The helper didn't help much. It looks like a leftover from past code-reuse. Now it's called from exactly one location, gen7_CmdBeginRenderPass(). So fold it into its caller. --- src/vulkan/gen7_cmd_buffer.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index e69bf47782e..d3a9de5038f 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -805,10 +805,12 @@ genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer_emit_depth_stencil(cmd_buffer); } -static void -begin_render_pass(struct anv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo* pRenderPassBegin) +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); @@ -829,17 +831,6 @@ begin_render_pass(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_clear_attachments(cmd_buffer, pass, pRenderPassBegin->pClearValues); -} - -void genX(CmdBeginRenderPass)( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - - begin_render_pass(cmd_buffer, pRenderPassBegin); gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } -- cgit v1.2.3 From 17cfafd83a84a7ea2aeacee238be2c9c0814adc1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 12:14:45 -0800 Subject: nir/spirv: Handle OpNoLine --- src/glsl/nir/spirv/spirv_to_nir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 44d03652e98..9000e895dd4 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3190,6 +3190,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpLine: + case SpvOpNoLine: break; /* Ignored for now */ case SpvOpTypeVoid: @@ -3244,6 +3245,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, { switch (opcode) { case SpvOpLine: + case SpvOpNoLine: break; /* Ignored for now */ case SpvOpLabel: -- cgit v1.2.3 From d032ede26fcd716ab66fd64d14e6f22b37f15fbf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 12:39:03 -0800 Subject: nir/types: Add an is_error helper --- src/glsl/nir/nir_types.cpp | 6 ++++++ src/glsl/nir/nir_types.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 86f8508b859..f4408def9a0 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -164,6 +164,12 @@ glsl_type_is_void(const glsl_type *type) return type->is_void(); } +bool +glsl_type_is_error(const glsl_type *type) +{ + return type->is_error(); +} + bool glsl_type_is_vector(const struct glsl_type *type) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 535d36373de..8cb7ec16ce3 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -80,6 +80,7 @@ unsigned glsl_get_record_location_offset(const struct glsl_type *type, unsigned length); bool glsl_type_is_void(const struct glsl_type *type); +bool glsl_type_is_error(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); -- cgit v1.2.3 From 96683065f29b08516c28a18275beda1632c66550 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 12:39:16 -0800 Subject: nir/spirv: Assert that matrix types are valid --- src/glsl/nir/spirv/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 9000e895dd4..da6c0086b9e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -597,6 +597,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), glsl_get_vector_elements(base->type), columns); + assert(!glsl_type_is_error(val->type->type)); val->type->array_element = base; val->type->row_major = false; val->type->stride = 0; -- cgit v1.2.3 From c974b94578d558a88228011090d7edc9421ece46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 12:39:32 -0800 Subject: nir/spirv: Properly handle OpConstantNull --- src/glsl/nir/spirv/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index da6c0086b9e..e27a755c106 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3222,6 +3222,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpConstant: case SpvOpConstantComposite: case SpvOpConstantSampler: + case SpvOpConstantNull: case SpvOpSpecConstantTrue: case SpvOpSpecConstantFalse: case SpvOpSpecConstant: -- cgit v1.2.3 From fc3f659aa9c312661bf2a4a63643e40fbebda338 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 15:33:41 -0800 Subject: nir/vars_to_ssa: Add phi sources for unreachable predecessors It is possible to end up with unreachable blocks if, for instance, you have an "if (...) { break; } else { continue; } unreachable()". In this case, the unreachable block does not show up in the dominance tree so it never gets visited. Instead, we go and visit all of those in follow-on pass. --- src/glsl/nir/nir_lower_vars_to_ssa.c | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index 75d31ff60af..a5f5ef2de3c 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -581,6 +581,33 @@ add_phi_sources(nir_block *block, nir_block *pred, } } +static void +add_undef_phi_sources(nir_block *block, nir_block *pred, + struct lower_variables_state *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + phi->dest.ssa.num_components); + nir_instr_insert(nir_before_cf_list(&state->impl->body), &undef->instr); + + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = pred; + src->src.parent_instr = &phi->instr; + src->src.is_ssa = true; + src->src.ssa = &undef->def; + + list_addtail(&src->src.use_link, &undef->def.uses); + + exec_list_push_tail(&phi->srcs, &src->node); + } +} + /* Performs variable renaming by doing a DFS of the dominance tree * * This algorithm is very similar to the one outlined in "Efficiently @@ -774,6 +801,23 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) return true; } +static bool +add_unreachable_phi_srcs_block(nir_block *block, void *void_state) +{ + struct lower_variables_state *state = void_state; + + /* Only run on unreachable blocks */ + if (block->imm_dom || block == nir_start_block(state->impl)) + return true; + + if (block->successors[0]) + add_undef_phi_sources(block->successors[0], block, state); + if (block->successors[1]) + add_undef_phi_sources(block->successors[1], block, state); + + return true; +} + /* Inserts phi nodes for all variables marked lower_to_ssa * * This is the same algorithm as presented in "Efficiently Computing Static @@ -955,6 +999,8 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) insert_phi_nodes(&state); rename_variables_block(nir_start_block(impl), &state); + nir_foreach_block(impl, add_unreachable_phi_srcs_block, &state); + nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); -- cgit v1.2.3 From bb5882e6afb507fdbf48511103ad97c080ee5076 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 15:35:15 -0800 Subject: nir/spirv/cfg: Handle unreachable instructions --- src/glsl/nir/spirv/vtn_cfg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index db1163d0707..bc71e8afa7c 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -426,6 +426,8 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, } case SpvOpUnreachable: + return; + default: unreachable("Unhandled opcode"); } -- cgit v1.2.3 From 1ca97cefb04f1ed4520261c5037cc5016051e3cb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 16:06:06 -0800 Subject: nir/spirv: Add no-op support for OpSourceContinued --- src/glsl/nir/spirv/spirv_to_nir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index e27a755c106..f85e131e678 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3001,6 +3001,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, switch (opcode) { case SpvOpSource: case SpvOpSourceExtension: + case SpvOpSourceContinued: case SpvOpExtension: /* Unhandled, but these are for debug so that's ok. */ break; @@ -3172,6 +3173,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, { switch (opcode) { case SpvOpSource: + case SpvOpSourceContinued: case SpvOpSourceExtension: case SpvOpExtension: case SpvOpCapability: -- cgit v1.2.3 From dee09d7393e90b3758e943033690310ea60430ae Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 16:29:38 -0800 Subject: nir/spirv: Better handle OpCopyMemory --- src/glsl/nir/spirv/spirv_to_nir.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index f85e131e678..ade3fd4019c 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1596,15 +1596,13 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, } static void -vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, - nir_deref_var *dest, struct vtn_type *type) +vtn_variable_copy(struct vtn_builder *b, + nir_deref_var *dest, struct vtn_type *dest_type, + nir_deref_var *src, struct vtn_type *src_type) { - nir_deref *src_tail = get_deref_tail(src); - - if (src_tail->child || src->var->interface_type) { - assert(get_deref_tail(dest)->child); - struct vtn_ssa_value *val = vtn_variable_load(b, src, type); - vtn_variable_store(b, val, dest, type); + if (src->var->interface_type || dest->var->interface_type) { + struct vtn_ssa_value *val = vtn_variable_load(b, src, src_type); + vtn_variable_store(b, val, dest, dest_type); } else { nir_intrinsic_instr *copy = nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); @@ -1957,12 +1955,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } case SpvOpCopyMemory: { - nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; - struct vtn_type *type = - vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_deref); + struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_deref); - vtn_variable_copy(b, src, dest, type); + vtn_variable_copy(b, dest->deref, dest->deref_type, + src->deref, src->deref_type); break; } -- cgit v1.2.3 From c381906bbd1d9700edbb87b03267391d111000cd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 17:02:27 -0800 Subject: nir/spirv: Stop wrapping carry/borrow in b2i The upstream versions now return an integer like GLSL/SPIR-V want. --- src/glsl/nir/spirv/vtn_alu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c index 70875007ec7..f1bca439d87 100644 --- a/src/glsl/nir/spirv/vtn_alu.c +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -309,15 +309,13 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpIAddCarry: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = - nir_b2i(&b->nb, nir_uadd_carry(&b->nb, src[0], src[1])); + val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); return; case SpvOpISubBorrow: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = - nir_b2i(&b->nb, nir_usub_borrow(&b->nb, src[0], src[1])); + val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); return; case SpvOpUMulExtended: -- cgit v1.2.3 From 24523e98a466e7a7788006b7755673b92074df5e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 17:03:16 -0800 Subject: nir/spirv/cfg: Allow breaking from the continue block --- src/glsl/nir/spirv/vtn_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index bc71e8afa7c..a927fb728c1 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -276,7 +276,7 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, new_loop_break, new_loop_cont, NULL ); vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, - NULL, NULL, block); + new_loop_break, NULL, block); block = new_loop_break; continue; -- cgit v1.2.3 From 1c5393d57d46e20612cee6c567421da10e0fcd48 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 17:03:50 -0800 Subject: nir/spirv: Allow OpBranchConditional without a merge This can happen if you have a predicated break/continue. --- src/glsl/nir/spirv/vtn_cfg.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index a927fb728c1..b3061ce47bb 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -325,12 +325,10 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, list_addtail(&if_stmt->node.link, cf_list); - /* OpBranchConditional must be at the end of a block with either - * an OpSelectionMerge or an OpLoopMerge. - */ - assert(block->merge); - if ((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) + if (block->merge && + (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { if_stmt->control = block->merge[2]; + } if_stmt->then_type = vtn_get_branch_type(then_block, switch_case, switch_break, -- cgit v1.2.3 From 350bbd3d15ff5ac3280129dca2cbcf3e4ca6e195 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 17:45:24 -0800 Subject: nir/spirv: Allow base derefs in get_vulkan_resource_index --- src/glsl/nir/spirv/spirv_to_nir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index ade3fd4019c..ccdc248f708 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1254,10 +1254,9 @@ get_vulkan_resource_index(struct vtn_builder *b, nir_variable *var = nir_deref_as_var(*deref)->var; assert(var->interface_type && "variable is a block"); - assert((*deref)->child); nir_ssa_def *array_index; - if ((*deref)->child->deref_type == nir_deref_type_array) { + if ((*deref)->child && (*deref)->child->deref_type == nir_deref_type_array) { *deref = (*deref)->child; *type = (*type)->array_element; array_index = deref_array_offset(b, *deref); -- cgit v1.2.3 From b208620fd240a9d74025cdf1ca1d2a54972ebc55 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 11 Jan 2016 17:45:52 -0800 Subject: nir/spirv: Allow creating local/global variables from interface types Not sure if this is actually allowed, but it's not that hard to just strip the interface information from the type. --- src/glsl/nir/spirv/spirv_to_nir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index ccdc248f708..edf44f2849d 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1740,9 +1740,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; case SpvStorageClassPrivate: var->data.mode = nir_var_global; + var->interface_type = NULL; break; case SpvStorageClassFunction: var->data.mode = nir_var_local; + var->interface_type = NULL; break; case SpvStorageClassWorkgroup: case SpvStorageClassCrossWorkgroup: @@ -1798,7 +1800,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, /* Interface block variables aren't actually going to be referenced * by the generated NIR, so we don't put them in the list */ - if (interface_type && glsl_type_is_struct(interface_type->type)) + if (var->interface_type) break; if (var->data.mode == nir_var_local) { -- cgit v1.2.3 From 4141d13de5a6c5de6398bf716724359da975912c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 01:04:41 -0800 Subject: nir/spirv: Handle matrix decorations on arrays of matrices Connor's original shallow-copy plan works great except that a couple of the decorations apply to a matrix which may be some levels down in an array. We weren't properly unpacking that. This fixes most of the remaining SSBO and UBO layout tests. --- src/glsl/nir/spirv/spirv_to_nir.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index edf44f2849d..78d5940a75e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -415,6 +415,23 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) return dest; } +static struct vtn_type * +mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) +{ + type->members[member] = vtn_type_copy(b, type->members[member]); + type = type->members[member]; + + /* We may have an array of matrices.... Oh, joy! */ + while (glsl_type_is_array(type->type)) { + type->array_element = vtn_type_copy(b, type->array_element); + type = type->array_element; + } + + assert(glsl_type_is_matrix(type->type)); + + return type; +} + static void struct_member_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, @@ -444,8 +461,7 @@ struct_member_decoration_cb(struct vtn_builder *b, ctx->fields[member].location = dec->literals[0]; break; case SpvDecorationBuiltIn: - ctx->type->members[member] = vtn_type_copy(b, - ctx->type->members[member]); + ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); ctx->type->members[member]->is_builtin = true; ctx->type->members[member]->builtin = dec->literals[0]; ctx->type->builtin_block = true; @@ -454,12 +470,12 @@ struct_member_decoration_cb(struct vtn_builder *b, ctx->type->offsets[member] = dec->literals[0]; break; case SpvDecorationMatrixStride: - ctx->type->members[member]->stride = dec->literals[0]; + mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; break; case SpvDecorationColMajor: break; /* Nothing to do here. Column-major is the default. */ case SpvDecorationRowMajor: - ctx->type->members[member]->row_major = true; + mutable_matrix_member(b, ctx->type, member)->row_major = true; break; default: unreachable("Unhandled member decoration"); -- cgit v1.2.3 From 62e56492c3caa3979e5064fe177a9d030b0a0f2c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 01:31:10 -0800 Subject: nir/spirv: Allow non-block variables with interface types in lists The original objective was to disallow UBO and SSBO variables from the variable lists. This was accidentally broken in b208620fd when fixing some other interface issues. --- src/glsl/nir/spirv/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 78d5940a75e..71e589d88b7 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1816,7 +1816,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, /* Interface block variables aren't actually going to be referenced * by the generated NIR, so we don't put them in the list */ - if (var->interface_type) + if (var->interface_type && glsl_type_is_struct(var->interface_type)) break; if (var->data.mode == nir_var_local) { -- cgit v1.2.3 From 2b4bacb84bf18d52ef86b048ea90b93c506009a1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 11 Jan 2016 12:27:51 -0800 Subject: vk: Use the correct stride for CC_VIEWPORT structs --- src/vulkan/gen8_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 308b72b3d7b..b12f6635afd 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -109,7 +109,7 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sf_clip_viewport); - GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); } if (!cmd_buffer->device->info.has_llc) { -- cgit v1.2.3 From 7df20f0c1456738aa598ba8d4ce88679765ca13e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 11 Jan 2016 12:29:42 -0800 Subject: vk: Support SpvBuiltInViewportIndex --- src/glsl/nir/spirv/spirv_to_nir.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 71e589d88b7..d5b9afefa7c 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -924,6 +924,15 @@ vtn_get_builtin_location(struct vtn_builder *b, *location = VARYING_SLOT_LAYER; *mode = nir_var_shader_out; break; + case SpvBuiltInViewportIndex: + *location = VARYING_SLOT_VIEWPORT; + if (b->shader->stage == MESA_SHADER_GEOMETRY) + *mode = nir_var_shader_out; + else if (b->shader->stage == MESA_SHADER_FRAGMENT) + *mode = nir_var_shader_in; + else + unreachable("invalid stage for SpvBuiltInViewportIndex"); + break; case SpvBuiltInTessLevelOuter: case SpvBuiltInTessLevelInner: case SpvBuiltInTessCoord: -- cgit v1.2.3 From 6fc278ae4f888b1b30410910c10d64e22e9224c3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 11:43:48 -0800 Subject: anv/UpdateDescriptorSets: Respect write.dstArrayElement --- src/vulkan/anv_descriptor_set.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 6e53f3897c8..3e11047c673 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -472,6 +472,7 @@ void anv_UpdateDescriptorSets( &set->layout->binding[write->dstBinding]; struct anv_descriptor *desc = &set->descriptors[bind_layout->descriptor_index]; + desc += write->dstArrayElement; switch (write->descriptorType) { case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -544,7 +545,8 @@ void anv_UpdateDescriptorSets( assert(buffer); struct anv_buffer_view *view = - &set->buffer_views[bind_layout->buffer_index + j]; + &set->buffer_views[bind_layout->buffer_index]; + view += write->dstArrayElement + j; const struct anv_format *format = anv_format_for_descriptor_type(write->descriptorType); -- cgit v1.2.3 From d7a193327b3d121e4a45766b360fcc11d3a56b93 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 12 Jan 2016 11:46:09 -0800 Subject: vk: Implement workaround for occlusion queries We have an issue with occlusion queries (PIPE_CONTROL depth writes) after using the pipeline with the VS disabled. We work around it by using a depth cache flush PIPE_CONTROL before doing a depth write. Fixes dEQP-VK.query_pool.* --- src/vulkan/anv_cmd_buffer.c | 1 + src/vulkan/anv_meta.c | 5 +++++ src/vulkan/anv_private.h | 1 + src/vulkan/gen8_cmd_buffer.c | 13 +++++++++++++ 4 files changed, 20 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index d146f9ac467..bca2deafa7d 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -123,6 +123,7 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->pipeline = NULL; state->restart_index = UINT32_MAX; state->dynamic = default_dynamic_state; + state->need_query_wa = true; state->gen7.index_buffer = NULL; } diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index b61cda793d5..0c1b43925ed 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -144,6 +144,11 @@ anv_meta_restore(const struct anv_meta_saved_state *state, anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, state->dynamic_mask); cmd_buffer->state.dirty |= state->dynamic_mask; + + /* Since we've used the pipeline with the VS disabled, set + * need_query_wa. See CmdBeginQuery. + */ + cmd_buffer->state.need_query_wa = true; } VkImageViewType diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ded2d9a5e24..138a40751f2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1082,6 +1082,7 @@ struct anv_cmd_state { struct anv_state binding_tables[MESA_SHADER_STAGES]; struct anv_state samplers[MESA_SHADER_STAGES]; struct anv_dynamic_state dynamic; + bool need_query_wa; struct { struct anv_buffer * index_buffer; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b12f6635afd..ec86bb2221c 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -896,6 +896,19 @@ void genX(CmdBeginQuery)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, -- cgit v1.2.3 From aee970c844f85c00dcb5d0df85ef56981c255291 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 13:48:32 -0800 Subject: anv/device: Bump the max program size again No one will ever need more than 128K, right? --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 1da0cb4367c..fa2c24a80f6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -722,7 +722,7 @@ VkResult anv_CreateDevice( anv_state_pool_init(&device->dynamic_state_pool, &device->dynamic_state_block_pool); - anv_block_pool_init(&device->instruction_block_pool, device, 64 * 1024); + anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); anv_pipeline_cache_init(&device->default_pipeline_cache, device); anv_block_pool_init(&device->surface_state_block_pool, device, 4096); -- cgit v1.2.3 From 15a56459d7562c02fb288f1b9a5a77f41c2d3ee1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 15:32:51 -0800 Subject: nir: Add a fquantize2f16 opcode This opcode simply takes a 32-bit floating-point value and reduces its effective precision to 16 bits. --- src/glsl/nir/nir_opcodes.py | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index d31507fe531..9dbb341d91c 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -176,6 +176,7 @@ unop("ffloor", tfloat, "floorf(src0)") unop("ffract", tfloat, "src0 - floorf(src0)") unop("fround_even", tfloat, "_mesa_roundevenf(src0)") +unop("fquantize2f16", tfloat, "_mesa_half_to_float(_mesa_float_to_half(src0))") # Trigonometric operations. -- cgit v1.2.3 From 282a837317be85ba677dd075e077dd467c027465 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 15:35:00 -0800 Subject: i965: Implement nir_op_fquantize2f16 --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 13 +++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 ++++++++++ 2 files changed, 23 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 62133784dcf..ffb805965b3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -943,6 +943,19 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D); + + /* The destination stride must be at least as big as the source stride. */ + tmp.type = BRW_REGISTER_TYPE_W; + tmp.stride = 2; + + bld.emit(BRW_OPCODE_F32TO16, tmp, op[0]); + inst = bld.emit(BRW_OPCODE_F16TO32, result, tmp); + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 37f517d4efd..77a2f8b85fb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1177,6 +1177,16 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + /* See also vec4_visitor::emit_pack_half_2x16() */ + src_reg tmp = src_reg(this, glsl_type::uvec4_type); + + emit(F32TO16(dst_reg(tmp), op[0])); + inst = emit(F16TO32(dst, tmp)); + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: -- cgit v1.2.3 From 610aa00cdf54cf7c0bb31c54bc87580d66cd3d14 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 15:36:38 -0800 Subject: nir/spirv: Add support for OpQuantize --- src/glsl/nir/spirv/spirv_to_nir.c | 1 + src/glsl/nir/spirv/vtn_alu.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index d5b9afefa7c..bbde4554c6b 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -3370,6 +3370,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpUConvert: case SpvOpSConvert: case SpvOpFConvert: + case SpvOpQuantizeToF16: case SpvOpConvertPtrToU: case SpvOpConvertUToPtr: case SpvOpPtrCastToGeneric: diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c index f1bca439d87..03ed1f0caaa 100644 --- a/src/glsl/nir/spirv/vtn_alu.c +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -387,6 +387,10 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, op = nir_op_fmov; break; + case SpvOpQuantizeToF16: + op = nir_op_fquantize2f16; + break; + /* Derivatives: */ case SpvOpDPdx: op = nir_op_fddx; break; case SpvOpDPdy: op = nir_op_fddy; break; -- cgit v1.2.3 From c95c3b2c213137e807522d132e06c841f33bfd25 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 16:28:28 -0800 Subject: nir/spirv: Add initial support for specialization constants --- src/glsl/nir/spirv/nir_spirv.h | 7 ++++++ src/glsl/nir/spirv/spirv_to_nir.c | 46 +++++++++++++++++++++++++++++++++++++++ src/glsl/nir/spirv/vtn_private.h | 3 +++ src/glsl/nir/spirv2nir.c | 4 ++-- src/vulkan/anv_pipeline.c | 2 +- 5 files changed, 59 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/nir_spirv.h b/src/glsl/nir/spirv/nir_spirv.h index 354c0a902df..9c9c93d36c2 100644 --- a/src/glsl/nir/spirv/nir_spirv.h +++ b/src/glsl/nir/spirv/nir_spirv.h @@ -36,7 +36,14 @@ extern "C" { #endif +struct nir_spirv_specialization { + uint32_t id; + uint32_t data; +}; + nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *specializations, + unsigned num_specializations, gl_shader_stage stage, const char *entry_point_name, const nir_shader_compiler_options *options); diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index bbde4554c6b..deea1adb3ea 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -804,6 +804,33 @@ vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) return c; } +static void +spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, + int member, const struct vtn_decoration *dec, + void *data) +{ + assert(member == -1); + if (dec->decoration != SpvDecorationSpecId) + return; + + uint32_t *const_value = data; + + for (unsigned i = 0; i < b->num_specializations; i++) { + if (b->specializations[i].id == dec->literals[0]) { + *const_value = b->specializations[i].data; + return; + } + } +} + +static uint32_t +get_specialization(struct vtn_builder *b, struct vtn_value *val, + uint32_t const_value) +{ + vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); + return const_value; +} + static void vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -820,10 +847,25 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, assert(val->const_type == glsl_bool_type()); val->constant->value.u[0] = NIR_FALSE; break; + + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: { + assert(val->const_type == glsl_bool_type()); + uint32_t int_val = + get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); + val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; + break; + } + case SpvOpConstant: assert(glsl_type_is_scalar(val->const_type)); val->constant->value.u[0] = w[3]; break; + case SpvOpSpecConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = get_specialization(b, val, w[3]); + break; + case SpvOpSpecConstantComposite: case SpvOpConstantComposite: { unsigned elem_count = count - 3; nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); @@ -3493,6 +3535,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, nir_function * spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *spec, unsigned num_spec, gl_shader_stage stage, const char *entry_point_name, const nir_shader_compiler_options *options) { @@ -3533,6 +3576,9 @@ spirv_to_nir(const uint32_t *words, size_t word_count, vtn_foreach_execution_mode(b, b->entry_point, vtn_handle_execution_mode, NULL); + b->specializations = spec; + b->num_specializations = num_spec; + /* Handle all variable, type, and constant instructions */ words = vtn_foreach_instruction(b, words, word_end, vtn_handle_variable_or_type_instruction); diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 7ab3c9fba4c..1f88eeda941 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -310,6 +310,9 @@ struct vtn_builder { */ struct hash_table *phi_table; + unsigned num_specializations; + struct nir_spirv_specialization *specializations; + /* * NIR variable for each SPIR-V builtin. */ diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c index 4cb484cd074..c837186bdfc 100644 --- a/src/glsl/nir/spirv2nir.c +++ b/src/glsl/nir/spirv2nir.c @@ -49,7 +49,7 @@ int main(int argc, char **argv) const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); assert(map != NULL); - nir_function *func = spirv_to_nir(map, word_count, MESA_SHADER_FRAGMENT, - "main", NULL); + nir_function *func = spirv_to_nir(map, word_count, NULL, 0, + MESA_SHADER_FRAGMENT, "main", NULL); nir_print_shader(func->shader, stderr); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index db4e19bf486..3d632dec038 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -113,7 +113,7 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(spirv[0] == SPIR_V_MAGIC_NUMBER); assert(module->size % 4 == 0); - entry_point = spirv_to_nir(spirv, module->size / 4, stage, + entry_point = spirv_to_nir(spirv, module->size / 4, NULL, 0, stage, entrypoint_name, nir_options); nir = entry_point->shader; assert(nir->stage == stage); -- cgit v1.2.3 From 9b7e08118b382b1a0d4915275c6a760a517cb878 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 16:30:43 -0800 Subject: anv/pipeline: Pass through specialization constants --- src/vulkan/anv_pipeline.c | 63 ++++++++++++++++++++++++++++++++++------------ src/vulkan/anv_private.h | 3 ++- src/vulkan/gen8_pipeline.c | 3 ++- 3 files changed, 51 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 3d632dec038..8de889306cc 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -84,7 +84,8 @@ static nir_shader * anv_shader_compile_to_nir(struct anv_device *device, struct anv_shader_module *module, const char *entrypoint_name, - gl_shader_stage stage) + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) { if (strcmp(entrypoint_name, "main") != 0) { anv_finishme("Multiple shaders per module not really supported"); @@ -113,12 +114,31 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(spirv[0] == SPIR_V_MAGIC_NUMBER); assert(module->size % 4 == 0); - entry_point = spirv_to_nir(spirv, module->size / 4, NULL, 0, stage, - entrypoint_name, nir_options); + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + const uint32_t *data = + spec_info->pData + spec_info->pMapEntries[i].offset; + assert((const void *)(data + 1) <= + spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + spec_entries[i].data = *data; + } + } + + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, nir_options); nir = entry_point->shader; assert(nir->stage == stage); nir_validate_shader(nir); + free(spec_entries); + nir_lower_returns(nir); nir_validate_shader(nir); @@ -374,13 +394,15 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, struct anv_shader_module *module, const char *entrypoint, gl_shader_stage stage, + const VkSpecializationInfo *spec_info, struct brw_stage_prog_data *prog_data) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, - module, entrypoint, stage); + module, entrypoint, stage, + spec_info); if (nir == NULL) return NULL; @@ -490,7 +512,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, struct anv_shader_module *module, - const char *entrypoint) + const char *entrypoint, + const VkSpecializationInfo *spec_info) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -504,7 +527,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_VERTEX, + MESA_SHADER_VERTEX, spec_info, &prog_data->base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -554,7 +577,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, struct anv_shader_module *module, - const char *entrypoint) + const char *entrypoint, + const VkSpecializationInfo *spec_info) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -568,7 +592,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, memset(prog_data, 0, sizeof(*prog_data)); nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_GEOMETRY, + MESA_SHADER_GEOMETRY, spec_info, &prog_data->base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -610,7 +634,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, struct anv_shader_module *module, - const char *entrypoint) + const char *entrypoint, + const VkSpecializationInfo *spec_info) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -629,7 +654,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, prog_data->binding_table.render_target_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_FRAGMENT, + MESA_SHADER_FRAGMENT, spec_info, &prog_data->base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -688,7 +713,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, struct anv_shader_module *module, - const char *entrypoint) + const char *entrypoint, + const VkSpecializationInfo *spec_info) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -704,7 +730,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, prog_data->binding_table.work_groups_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_COMPUTE, + MESA_SHADER_COMPUTE, spec_info, &prog_data->base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1059,17 +1085,22 @@ anv_pipeline_init(struct anv_pipeline *pipeline, for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->pStages[i].module); - const char *entrypoint = pCreateInfo->pStages[i].pName; switch (pCreateInfo->pStages[i].stage) { case VK_SHADER_STAGE_VERTEX_BIT: - anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, entrypoint); + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); break; case VK_SHADER_STAGE_GEOMETRY_BIT: - anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, entrypoint); + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); break; case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, module, entrypoint); + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); break; default: anv_finishme("Unsupported shader stage"); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 138a40751f2..a0ac340cc62 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1369,7 +1369,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, struct anv_shader_module *module, - const char *entrypoint_name); + const char *entrypoint, + const VkSpecializationInfo *spec_info); VkResult anv_graphics_pipeline_create(VkDevice device, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index dee3c4049c2..2be71a05af8 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -666,7 +666,8 @@ VkResult genX(compute_pipeline_create)( assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, - pCreateInfo->stage.pName); + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); pipeline->use_repclear = false; -- cgit v1.2.3 From 8c408b9b81d1a1ac740ca6b5bb5488f89574784d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 17:16:48 -0800 Subject: nir/spirv/alu: Factor out the opcode table --- src/glsl/nir/spirv/vtn_alu.c | 246 +++++++++++++++++++-------------------- src/glsl/nir/spirv/vtn_private.h | 2 + 2 files changed, 123 insertions(+), 125 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c index 03ed1f0caaa..4c45e234998 100644 --- a/src/glsl/nir/spirv/vtn_alu.c +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -210,6 +210,101 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, } } +nir_op +vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) +{ + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + *swap = false; + + switch (opcode) { + case SpvOpSNegate: return nir_op_ineg; + case SpvOpFNegate: return nir_op_fneg; + case SpvOpNot: return nir_op_inot; + case SpvOpIAdd: return nir_op_iadd; + case SpvOpFAdd: return nir_op_fadd; + case SpvOpISub: return nir_op_isub; + case SpvOpFSub: return nir_op_fsub; + case SpvOpIMul: return nir_op_imul; + case SpvOpFMul: return nir_op_fmul; + case SpvOpUDiv: return nir_op_udiv; + case SpvOpSDiv: return nir_op_idiv; + case SpvOpFDiv: return nir_op_fdiv; + case SpvOpUMod: return nir_op_umod; + case SpvOpSMod: return nir_op_umod; /* FIXME? */ + case SpvOpFMod: return nir_op_fmod; + + case SpvOpShiftRightLogical: return nir_op_ushr; + case SpvOpShiftRightArithmetic: return nir_op_ishr; + case SpvOpShiftLeftLogical: return nir_op_ishl; + case SpvOpLogicalOr: return nir_op_ior; + case SpvOpLogicalEqual: return nir_op_ieq; + case SpvOpLogicalNotEqual: return nir_op_ine; + case SpvOpLogicalAnd: return nir_op_iand; + case SpvOpLogicalNot: return nir_op_inot; + case SpvOpBitwiseOr: return nir_op_ior; + case SpvOpBitwiseXor: return nir_op_ixor; + case SpvOpBitwiseAnd: return nir_op_iand; + case SpvOpSelect: return nir_op_bcsel; + case SpvOpIEqual: return nir_op_ieq; + + case SpvOpBitFieldInsert: return nir_op_bitfield_insert; + case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; + case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; + case SpvOpBitReverse: return nir_op_bitfield_reverse; + case SpvOpBitCount: return nir_op_bit_count; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: return nir_op_feq; + case SpvOpFUnordEqual: return nir_op_feq; + case SpvOpINotEqual: return nir_op_ine; + case SpvOpFOrdNotEqual: return nir_op_fne; + case SpvOpFUnordNotEqual: return nir_op_fne; + case SpvOpULessThan: return nir_op_ult; + case SpvOpSLessThan: return nir_op_ilt; + case SpvOpFOrdLessThan: return nir_op_flt; + case SpvOpFUnordLessThan: return nir_op_flt; + case SpvOpUGreaterThan: *swap = true; return nir_op_ult; + case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; + case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; + case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; + case SpvOpULessThanEqual: *swap = true; return nir_op_uge; + case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; + case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpUGreaterThanEqual: return nir_op_uge; + case SpvOpSGreaterThanEqual: return nir_op_ige; + case SpvOpFOrdGreaterThanEqual: return nir_op_fge; + case SpvOpFUnordGreaterThanEqual: return nir_op_fge; + + /* Conversions: */ + case SpvOpConvertFToU: return nir_op_f2u; + case SpvOpConvertFToS: return nir_op_f2i; + case SpvOpConvertSToF: return nir_op_i2f; + case SpvOpConvertUToF: return nir_op_u2f; + case SpvOpBitcast: return nir_op_imov; + case SpvOpUConvert: + case SpvOpQuantizeToF16: return nir_op_fquantize2f16; + /* TODO: NIR is 32-bit only; these are no-ops. */ + case SpvOpSConvert: return nir_op_imov; + case SpvOpFConvert: return nir_op_fmov; + + /* Derivatives: */ + case SpvOpDPdx: return nir_op_fddx; + case SpvOpDPdy: return nir_op_fddy; + case SpvOpDPdxFine: return nir_op_fddx_fine; + case SpvOpDPdyFine: return nir_op_fddy_fine; + case SpvOpDPdxCoarse: return nir_op_fddx_coarse; + case SpvOpDPdyCoarse: return nir_op_fddy_coarse; + + case SpvOpSRem: + case SpvOpFRem: + default: + unreachable("No NIR equivalent"); + } +} + void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -237,56 +332,38 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, src[i] = vtn_src[i]->def; } - /* Indicates that the first two arguments should be swapped. This is - * used for implementing greater-than and less-than-or-equal. - */ - bool swap = false; - - nir_op op; switch (opcode) { - /* Basic ALU operations */ - case SpvOpSNegate: op = nir_op_ineg; break; - case SpvOpFNegate: op = nir_op_fneg; break; - case SpvOpNot: op = nir_op_inot; break; - case SpvOpAny: if (src[0]->num_components == 1) { - op = nir_op_imov; + val->ssa->def = nir_imov(&b->nb, src[0]); } else { + nir_op op; switch (src[0]->num_components) { case 2: op = nir_op_bany_inequal2; break; case 3: op = nir_op_bany_inequal3; break; case 4: op = nir_op_bany_inequal4; break; } - src[1] = nir_imm_int(&b->nb, NIR_FALSE); + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_FALSE), + NULL, NULL); } - break; + return; case SpvOpAll: if (src[0]->num_components == 1) { - op = nir_op_imov; + val->ssa->def = nir_imov(&b->nb, src[0]); } else { + nir_op op; switch (src[0]->num_components) { case 2: op = nir_op_ball_iequal2; break; case 3: op = nir_op_ball_iequal3; break; case 4: op = nir_op_ball_iequal4; break; } - src[1] = nir_imm_int(&b->nb, NIR_TRUE); + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_TRUE), + NULL, NULL); } - break; - - case SpvOpIAdd: op = nir_op_iadd; break; - case SpvOpFAdd: op = nir_op_fadd; break; - case SpvOpISub: op = nir_op_isub; break; - case SpvOpFSub: op = nir_op_fsub; break; - case SpvOpIMul: op = nir_op_imul; break; - case SpvOpFMul: op = nir_op_fmul; break; - case SpvOpUDiv: op = nir_op_udiv; break; - case SpvOpSDiv: op = nir_op_idiv; break; - case SpvOpFDiv: op = nir_op_fdiv; break; - case SpvOpUMod: op = nir_op_umod; break; - case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ - case SpvOpFMod: op = nir_op_fmod; break; + return; case SpvOpOuterProduct: { for (unsigned i = 0; i < src[1]->num_components; i++) { @@ -297,14 +374,8 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, } case SpvOpDot: - assert(src[0]->num_components == src[1]->num_components); - switch (src[0]->num_components) { - case 1: op = nir_op_fmul; break; - case 2: op = nir_op_fdot2; break; - case 3: op = nir_op_fdot3; break; - case 4: op = nir_op_fdot4; break; - } - break; + val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); + return; case SpvOpIAddCarry: assert(glsl_type_is_struct(val->ssa->type)); @@ -330,74 +401,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); return; - case SpvOpShiftRightLogical: op = nir_op_ushr; break; - case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; - case SpvOpShiftLeftLogical: op = nir_op_ishl; break; - case SpvOpLogicalOr: op = nir_op_ior; break; - case SpvOpLogicalEqual: op = nir_op_ieq; break; - case SpvOpLogicalNotEqual: op = nir_op_ine; break; - case SpvOpLogicalAnd: op = nir_op_iand; break; - case SpvOpLogicalNot: op = nir_op_inot; break; - case SpvOpBitwiseOr: op = nir_op_ior; break; - case SpvOpBitwiseXor: op = nir_op_ixor; break; - case SpvOpBitwiseAnd: op = nir_op_iand; break; - case SpvOpSelect: op = nir_op_bcsel; break; - case SpvOpIEqual: op = nir_op_ieq; break; - - case SpvOpBitFieldInsert: op = nir_op_bitfield_insert; break; - case SpvOpBitFieldSExtract: op = nir_op_ibitfield_extract; break; - case SpvOpBitFieldUExtract: op = nir_op_ubitfield_extract; break; - case SpvOpBitReverse: op = nir_op_bitfield_reverse; break; - case SpvOpBitCount: op = nir_op_bit_count; break; - - /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ - case SpvOpFOrdEqual: op = nir_op_feq; break; - case SpvOpFUnordEqual: op = nir_op_feq; break; - case SpvOpINotEqual: op = nir_op_ine; break; - case SpvOpFOrdNotEqual: op = nir_op_fne; break; - case SpvOpFUnordNotEqual: op = nir_op_fne; break; - case SpvOpULessThan: op = nir_op_ult; break; - case SpvOpSLessThan: op = nir_op_ilt; break; - case SpvOpFOrdLessThan: op = nir_op_flt; break; - case SpvOpFUnordLessThan: op = nir_op_flt; break; - case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; - case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; - case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; - case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; - case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; - case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; - case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; - case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; - case SpvOpUGreaterThanEqual: op = nir_op_uge; break; - case SpvOpSGreaterThanEqual: op = nir_op_ige; break; - case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; - case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; - - /* Conversions: */ - case SpvOpConvertFToU: op = nir_op_f2u; break; - case SpvOpConvertFToS: op = nir_op_f2i; break; - case SpvOpConvertSToF: op = nir_op_i2f; break; - case SpvOpConvertUToF: op = nir_op_u2f; break; - case SpvOpBitcast: op = nir_op_imov; break; - case SpvOpUConvert: - case SpvOpSConvert: - op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ - break; - case SpvOpFConvert: - op = nir_op_fmov; - break; - - case SpvOpQuantizeToF16: - op = nir_op_fquantize2f16; - break; - - /* Derivatives: */ - case SpvOpDPdx: op = nir_op_fddx; break; - case SpvOpDPdy: op = nir_op_fddy; break; - case SpvOpDPdxFine: op = nir_op_fddx_fine; break; - case SpvOpDPdyFine: op = nir_op_fddy_fine; break; - case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; - case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; case SpvOpFwidth: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), @@ -419,10 +422,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); return; - case SpvOpSRem: - case SpvOpFRem: - unreachable("No NIR equivalent"); - case SpvOpIsNan: val->ssa->def = nir_fne(&b->nb, src[0], src[0]); return; @@ -432,21 +431,18 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_imm_float(&b->nb, INFINITY)); return; - case SpvOpIsFinite: - case SpvOpIsNormal: - case SpvOpSignBitSet: - case SpvOpLessOrGreater: - case SpvOpOrdered: - case SpvOpUnordered: - default: - unreachable("Unhandled opcode"); - } + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); - if (swap) { - nir_ssa_def *tmp = src[0]; - src[0] = src[1]; - src[1] = tmp; - } + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } - val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); + return; + } /* default */ + } } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 1f88eeda941..129414a4001 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -393,6 +393,8 @@ typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, vtn_execution_mode_foreach_cb cb, void *data); +nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); + void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count); -- cgit v1.2.3 From 0079523a0d42538020ba89add961f09507e41949 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 12 Jan 2016 20:02:04 -0800 Subject: nir/spirv: Add support for OpSpecConstantOp --- src/glsl/nir/spirv/spirv_to_nir.c | 132 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index deea1adb3ea..ca9781d263d 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -28,6 +28,7 @@ #include "vtn_private.h" #include "nir/nir_vla.h" #include "nir/nir_control_flow.h" +#include "nir/nir_constant_expressions.h" static struct vtn_ssa_value * vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) @@ -905,6 +906,136 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpSpecConstantOp: { + SpvOp opcode = get_specialization(b, val, w[3]); + switch (opcode) { + case SpvOpVectorShuffle: { + struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); + struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); + unsigned len0 = glsl_get_vector_elements(v0->const_type); + unsigned len1 = glsl_get_vector_elements(v1->const_type); + + uint32_t u[8]; + for (unsigned i = 0; i < len0; i++) + u[i] = v0->constant->value.u[i]; + for (unsigned i = 0; i < len1; i++) + u[len0 + i] = v1->constant->value.u[i]; + + for (unsigned i = 0; i < count - 6; i++) { + uint32_t comp = w[i + 6]; + if (comp == (uint32_t)-1) { + val->constant->value.u[i] = 0xdeadbeef; + } else { + val->constant->value.u[i] = u[comp]; + } + } + return; + } + + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: { + struct vtn_value *comp; + unsigned deref_start; + struct nir_constant **c; + if (opcode == SpvOpCompositeExtract) { + comp = vtn_value(b, w[4], vtn_value_type_constant); + deref_start = 5; + c = &comp->constant; + } else { + comp = vtn_value(b, w[5], vtn_value_type_constant); + deref_start = 6; + val->constant = nir_constant_clone(comp->constant, + (nir_variable *)b); + c = &val->constant; + } + + int elem = -1; + const struct glsl_type *type = comp->const_type; + for (unsigned i = deref_start; i < count; i++) { + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* If we hit this granularity, we're picking off an element */ + if (elem < 0) + elem = 0; + + if (glsl_type_is_matrix(type)) { + elem += w[i] * glsl_get_vector_elements(type); + type = glsl_get_column_type(type); + } else { + assert(glsl_type_is_vector(type)); + elem += w[i]; + type = glsl_scalar_type(glsl_get_base_type(type)); + } + continue; + + case GLSL_TYPE_ARRAY: + c = &(*c)->elements[w[i]]; + type = glsl_get_array_element(type); + continue; + + case GLSL_TYPE_STRUCT: + c = &(*c)->elements[w[i]]; + type = glsl_get_struct_field(type, w[i]); + continue; + + default: + unreachable("Invalid constant type"); + } + } + + if (opcode == SpvOpCompositeExtract) { + if (elem == -1) { + val->constant = *c; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + val->constant->value.u[i] = (*c)->value.u[elem + i]; + } + } else { + struct vtn_value *insert = + vtn_value(b, w[4], vtn_value_type_constant); + assert(insert->const_type == type); + if (elem == -1) { + *c = insert->constant; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + (*c)->value.u[elem + i] = insert->constant->value.u[i]; + } + } + return; + } + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + unsigned num_components = glsl_get_vector_elements(val->const_type); + + nir_const_value src[3]; + assert(count <= 7); + for (unsigned i = 0; i < count - 4; i++) { + nir_constant *c = + vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; + + unsigned j = swap ? 1 - i : i; + for (unsigned k = 0; k < num_components; k++) + src[j].u[k] = c->value.u[k]; + } + + nir_const_value res = nir_eval_const_opcode(op, num_components, src); + + for (unsigned k = 0; k < num_components; k++) + val->constant->value.u[k] = res.u[k]; + + return; + } /* default */ + } + } + case SpvOpConstantNull: val->constant = vtn_null_constant(b, val->const_type); break; @@ -3294,6 +3425,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSpecConstantFalse: case SpvOpSpecConstant: case SpvOpSpecConstantComposite: + case SpvOpSpecConstantOp: vtn_handle_constant(b, opcode, w, count); break; -- cgit v1.2.3 From cac99fffdb87a42ab4a2a75747d11afca9a57bf7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 Jan 2016 15:05:39 -0800 Subject: nir: Add more modulus and remainder opcodes SPIR-V makes a distinction between "modulus" and "remainder" for both floating-point and signed integer variants. The difference is primarily one of which source they take their sign from. The "remainder" opcode for integers is equivalent to the C/C++ "%" operation while the "modulus" opcode is more mathematically correct (at least for an unsigned divisor). This commit adds corresponding opcodes to NIR. --- src/glsl/nir/nir_opcodes.py | 16 +++++++++++++++- src/glsl/nir/nir_opt_algebraic.py | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 9dbb341d91c..d4f400d020d 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -369,9 +369,23 @@ binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0") binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1") -binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") +# For signed integers, there are several different possible definitions of +# "modulus" or "remainder". We follow the conventions used by LLVM and +# SPIR-V. The irem opcode implements the standard C/C++ signed "%" +# operation while the imod opcode implements the more mathematical +# "modulus" operation. For details on the difference, see +# +# http://mathforum.org/library/drmath/view/52343.html + +binop("irem", tint, "", "src1 == 0 ? 0 : src0 % src1") +binop("imod", tint, "", + "src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ?" + " src0 % src1 : src0 % src1 + src1)") +binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") +binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)") + # # Comparisons # diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index db18fc9619b..416904669a4 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -226,6 +226,7 @@ optimizations = [ # Misc. lowering (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'), + (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'), (('bitfield_insert', a, b, c, d), ('bfi', ('bfm', d, c), b, a), 'options->lower_bitfield_insert'), (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), -- cgit v1.2.3 From 7d5ae2d34babbb5302bdfc6658499156a125c008 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 Jan 2016 15:09:00 -0800 Subject: i965: Implement nir_op_irem and nir_op_srem --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 32 ++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 32 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index ffb805965b3..396583acead 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -803,9 +803,41 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) unreachable("Should have been lowered by borrow_to_arith()."); case nir_op_umod: + case nir_op_irem: + /* According to the sign table for INT DIV in the Ivy Bridge PRM, it + * appears that our hardware just does the right thing for signed + * remainder. + */ bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); break; + case nir_op_imod: { + /* Get a regular C-style remainder. If a % b == 0, set the predicate. */ + bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); + + /* Math instructions don't support conditional mod */ + inst = bld.MOV(bld.null_reg_d(), result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Now, we need to determine if signs of the sources are different. + * When we XOR the sources, the top bit is 0 if they are the same and 1 + * if they are different. We can then use a conditional modifier to + * turn that into a predicate. This leads us to an XOR.l instruction. + */ + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D); + inst = bld.XOR(tmp, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_L; + + /* If the result of the initial remainder operation is non-zero and the + * two sources have different signs, add in a copy of op[1] to get the + * final integer modulus value. + */ + inst = bld.ADD(result, result, op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } + case nir_op_flt: case nir_op_ilt: case nir_op_ult: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 77a2f8b85fb..2ceaa888b0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1105,9 +1105,41 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_umod: + case nir_op_irem: + /* According to the sign table for INT DIV in the Ivy Bridge PRM, it + * appears that our hardware just does the right thing for signed + * remainder. + */ emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); break; + case nir_op_imod: { + /* Get a regular C-style remainder. If a % b == 0, set the predicate. */ + inst = emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); + + /* Math instructions don't support conditional mod */ + inst = emit(MOV(dst_null_d(), src_reg(dst))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Now, we need to determine if signs of the sources are different. + * When we XOR the sources, the top bit is 0 if they are the same and 1 + * if they are different. We can then use a conditional modifier to + * turn that into a predicate. This leads us to an XOR.l instruction. + */ + src_reg tmp = src_reg(this, glsl_type::ivec4_type); + inst = emit(XOR(dst_reg(tmp), op[0], op[1])); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_L; + + /* If the result of the initial remainder operation is non-zero and the + * two sources have different signs, add in a copy of op[1] to get the + * final integer modulus value. + */ + inst = emit(ADD(dst, src_reg(dst), op[1])); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } + case nir_op_ldexp: unreachable("not reached: should be handled by ldexp_to_arith()"); -- cgit v1.2.3 From 4507d8a57af7df679fc0e7b56f4441519ff7e3d4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 Jan 2016 15:09:45 -0800 Subject: nir/spirv/alu: Properly implement mod/rem --- src/glsl/nir/spirv/vtn_alu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_alu.c b/src/glsl/nir/spirv/vtn_alu.c index 4c45e234998..d866da7445e 100644 --- a/src/glsl/nir/spirv/vtn_alu.c +++ b/src/glsl/nir/spirv/vtn_alu.c @@ -232,8 +232,10 @@ vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) case SpvOpSDiv: return nir_op_idiv; case SpvOpFDiv: return nir_op_fdiv; case SpvOpUMod: return nir_op_umod; - case SpvOpSMod: return nir_op_umod; /* FIXME? */ + case SpvOpSMod: return nir_op_imod; case SpvOpFMod: return nir_op_fmod; + case SpvOpSRem: return nir_op_irem; + case SpvOpFRem: return nir_op_frem; case SpvOpShiftRightLogical: return nir_op_ushr; case SpvOpShiftRightArithmetic: return nir_op_ishr; @@ -298,8 +300,6 @@ vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) case SpvOpDPdxCoarse: return nir_op_fddx_coarse; case SpvOpDPdyCoarse: return nir_op_fddy_coarse; - case SpvOpSRem: - case SpvOpFRem: default: unreachable("No NIR equivalent"); } -- cgit v1.2.3 From 8ce2b0e1405b3d6d3590dcd3ce1ac2d04d228ad4 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 11 Jan 2016 21:53:35 -0800 Subject: nir/spirv: Add support for ArrayLength op Signed-off-by: Jordan Justen --- src/glsl/nir/spirv/spirv_to_nir.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index ca9781d263d..4462c9f5a80 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2188,8 +2188,39 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpArrayLength: { + struct vtn_value *v_deref = vtn_value(b, w[3], vtn_value_type_deref); + struct vtn_type *type = v_deref->deref_type; + const uint32_t offset = type->offsets[w[4]]; + const uint32_t stride = type->members[w[4]]->stride; + nir_deref *n_deref = &v_deref->deref->deref; + nir_ssa_def *index = + get_vulkan_resource_index(b, &n_deref, &type); + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_get_buffer_size); + instr->src[0] = nir_src_for_ssa(index); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + nir_ssa_def *buf_size = &instr->dest.ssa; + + /* array_length = max(buffer_size - offset, 0) / stride */ + nir_ssa_def *array_length = + nir_idiv(&b->nb, + nir_imax(&b->nb, + nir_isub(&b->nb, + buf_size, + nir_imm_int(&b->nb, offset)), + nir_imm_int(&b->nb, 0u)), + nir_imm_int(&b->nb, stride)); + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); + val->ssa->def = array_length; + break; + } + case SpvOpCopyMemorySized: - case SpvOpArrayLength: default: unreachable("Unhandled opcode"); } -- cgit v1.2.3 From 102c74277f989703dfb3783c5c0b93e3731012bf Mon Sep 17 00:00:00 2001 From: BogDan Vatra Date: Tue, 5 Jan 2016 21:44:16 +0200 Subject: WIP: Partially upgrade to vulkan v0.221.0 TODO, make use of: - VkPhysicalDeviceFeatures.drawIndirectFirstInstance, - VkPhysicalDeviceFeatures.inheritedQueries - VkPhysicalDeviceLimits.timestampComputeAndGraphics - VkSubmitInfo.pWaitDstStageMask - VkSubresourceLayout.arrayPitch - VkSamplerCreateInfo.anisotropyEnable --- include/vulkan/vulkan.h | 273 +++++++++++++++++++++++++--------------- src/vulkan/anv_cmd_buffer.c | 16 +-- src/vulkan/anv_descriptor_set.c | 8 +- src/vulkan/anv_meta.c | 4 +- src/vulkan/anv_meta_clear.c | 8 +- src/vulkan/anv_query.c | 14 +-- src/vulkan/anv_wsi_wayland.c | 13 +- src/vulkan/anv_wsi_x11.c | 13 +- src/vulkan/gen8_cmd_buffer.c | 4 +- 9 files changed, 215 insertions(+), 138 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index fa58c593fb6..9f4506b580c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -41,7 +41,7 @@ extern "C" { ((major << 22) | (minor << 12) | patch) // Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 210, 1) +#define VK_API_VERSION VK_MAKE_VERSION(0, 221, 0) #define VK_NULL_HANDLE 0 @@ -107,6 +107,14 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) #define VK_MAX_DESCRIPTION_SIZE 256 +typedef enum VkPipelineCacheHeaderVersion { + VK_PIPELINE_CACHE_HEADER_VERSION_ONE = 1, + VK_PIPELINE_CACHE_HEADER_VERSION_BEGIN_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + VK_PIPELINE_CACHE_HEADER_VERSION_END_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + VK_PIPELINE_CACHE_HEADER_VERSION_RANGE_SIZE = (VK_PIPELINE_CACHE_HEADER_VERSION_ONE - VK_PIPELINE_CACHE_HEADER_VERSION_ONE + 1), + VK_PIPELINE_CACHE_HEADER_VERSION_MAX_ENUM = 0x7FFFFFFF +} VkPipelineCacheHeaderVersion; + typedef enum VkResult { VK_SUCCESS = 0, VK_NOT_READY = 1, @@ -130,6 +138,7 @@ typedef enum VkResult { VK_ERROR_OUT_OF_DATE_KHR = -1000001004, VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000008000, + VK_ERROR_VALIDATION_FAILED_EXT = -1000011001, VK_RESULT_BEGIN_RANGE = VK_ERROR_FORMAT_NOT_SUPPORTED, VK_RESULT_END_RANGE = VK_INCOMPLETE, VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FORMAT_NOT_SUPPORTED + 1), @@ -190,6 +199,13 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_DISPLAY_MODE_CREATE_INFO_KHR = 1000002000, VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR = 1000002001, VK_STRUCTURE_TYPE_DISPLAY_PRESENT_INFO_KHR = 1000003000, + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR = 1000004000, + VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR = 1000005000, + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR = 1000006000, + VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR = 1000007000, + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000, + VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000, + VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 1000011000, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), @@ -846,6 +862,27 @@ typedef enum VkMemoryHeapFlagBits { typedef VkFlags VkMemoryHeapFlags; typedef VkFlags VkDeviceCreateFlags; typedef VkFlags VkDeviceQueueCreateFlags; + +typedef enum VkPipelineStageFlagBits { + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010, + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020, + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, + VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT = 0x00002000, + VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00008000, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00010000, +} VkPipelineStageFlagBits; +typedef VkFlags VkPipelineStageFlags; typedef VkFlags VkMemoryMapFlags; typedef enum VkImageAspectFlagBits { @@ -892,7 +929,6 @@ typedef enum VkQueryPipelineStatisticFlagBits { typedef VkFlags VkQueryPipelineStatisticFlags; typedef enum VkQueryResultFlagBits { - VK_QUERY_RESULT_DEFAULT = 0, VK_QUERY_RESULT_64_BIT = 0x00000001, VK_QUERY_RESULT_WAIT_BIT = 0x00000002, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004, @@ -944,7 +980,7 @@ typedef enum VkShaderStageFlagBits { } VkShaderStageFlagBits; typedef VkFlags VkPipelineVertexInputStateCreateFlags; typedef VkFlags VkPipelineInputAssemblyStateCreateFlags; -typedef VkFlags VkPipelineTesselationStateCreateFlags; +typedef VkFlags VkPipelineTessellationStateCreateFlags; typedef VkFlags VkPipelineViewportStateCreateFlags; typedef VkFlags VkPipelineRasterizationStateCreateFlags; @@ -986,26 +1022,6 @@ typedef enum VkAttachmentDescriptionFlagBits { typedef VkFlags VkAttachmentDescriptionFlags; typedef VkFlags VkSubpassDescriptionFlags; -typedef enum VkPipelineStageFlagBits { - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010, - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020, - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, - VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, - VK_PIPELINE_STAGE_HOST_BIT = 0x00002000, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00004000, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00008000, -} VkPipelineStageFlagBits; -typedef VkFlags VkPipelineStageFlags; - typedef enum VkAccessFlagBits { VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, VK_ACCESS_INDEX_READ_BIT = 0x00000002, @@ -1061,7 +1077,6 @@ typedef enum VkCommandBufferResetFlagBits { typedef VkFlags VkCommandBufferResetFlags; typedef enum VkStencilFaceFlagBits { - VK_STENCIL_FACE_NONE = 0, VK_STENCIL_FACE_FRONT_BIT = 0x00000001, VK_STENCIL_FACE_BACK_BIT = 0x00000002, VK_STENCIL_FRONT_AND_BACK = 0x3, @@ -1140,6 +1155,7 @@ typedef struct VkPhysicalDeviceFeatures { VkBool32 dualSrcBlend; VkBool32 logicOp; VkBool32 multiDrawIndirect; + VkBool32 drawIndirectFirstInstance; VkBool32 depthClamp; VkBool32 depthBiasClamp; VkBool32 fillModeNonSolid; @@ -1183,6 +1199,7 @@ typedef struct VkPhysicalDeviceFeatures { VkBool32 sparseResidency16Samples; VkBool32 sparseResidencyAliased; VkBool32 variableMultisampleRate; + VkBool32 inheritedQueries; } VkPhysicalDeviceFeatures; typedef struct VkFormatProperties { @@ -1297,6 +1314,7 @@ typedef struct VkPhysicalDeviceLimits { VkSampleCountFlags sampledImageStencilSampleCounts; VkSampleCountFlags storageImageSampleCounts; uint32_t maxSampleMaskWords; + VkBool32 timestampComputeAndGraphics; float timestampPeriod; uint32_t maxClipDistances; uint32_t maxCullDistances; @@ -1396,6 +1414,7 @@ typedef struct VkSubmitInfo { const void* pNext; uint32_t waitSemaphoreCount; const VkSemaphore* pWaitSemaphores; + const VkPipelineStageFlags* pWaitDstStageMask; uint32_t commandBufferCount; const VkCommandBuffer* pCommandBuffers; uint32_t signalSemaphoreCount; @@ -1431,7 +1450,7 @@ typedef struct VkSparseImageFormatProperties { typedef struct VkSparseImageMemoryRequirements { VkSparseImageFormatProperties formatProperties; - uint32_t imageMipTailStartLod; + uint32_t imageMipTailFirstLod; VkDeviceSize imageMipTailSize; VkDeviceSize imageMipTailOffset; VkDeviceSize imageMipTailStride; @@ -1569,6 +1588,7 @@ typedef struct VkSubresourceLayout { VkDeviceSize offset; VkDeviceSize size; VkDeviceSize rowPitch; + VkDeviceSize arrayPitch; VkDeviceSize depthPitch; } VkSubresourceLayout; @@ -1671,7 +1691,7 @@ typedef struct VkPipelineInputAssemblyStateCreateInfo { typedef struct VkPipelineTessellationStateCreateInfo { VkStructureType sType; const void* pNext; - VkPipelineTesselationStateCreateFlags flags; + VkPipelineTessellationStateCreateFlags flags; uint32_t patchControlPoints; } VkPipelineTessellationStateCreateInfo; @@ -1851,6 +1871,7 @@ typedef struct VkSamplerCreateInfo { VkSamplerAddressMode addressModeV; VkSamplerAddressMode addressModeW; float mipLodBias; + VkBool32 anisotropyEnable; float maxAnisotropy; VkBool32 compareEnable; VkCompareOp compareOp; @@ -1873,7 +1894,7 @@ typedef struct VkDescriptorSetLayoutCreateInfo { const void* pNext; VkDescriptorSetLayoutCreateFlags flags; uint32_t bindingCount; - const VkDescriptorSetLayoutBinding* pBinding; + const VkDescriptorSetLayoutBinding* pBindings; } VkDescriptorSetLayoutCreateInfo; typedef struct VkDescriptorPoolSize { @@ -2214,7 +2235,7 @@ typedef VkResult (VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, size_t dataSize, void* pData, VkDeviceSize stride, VkQueryResultFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void* pData, VkDeviceSize stride, VkQueryResultFlags flags); typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer); typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBufferView* pView); @@ -2259,8 +2280,8 @@ typedef VkResult (VKAPI_PTR *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBu typedef VkResult (VKAPI_PTR *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); typedef VkResult (VKAPI_PTR *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); -typedef void (VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); -typedef void (VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); +typedef void (VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports); +typedef void (VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, const VkRect2D* pScissors); typedef void (VKAPI_PTR *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor); typedef void (VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConstants[4]); @@ -2270,7 +2291,7 @@ typedef void (VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBu typedef void (VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference); typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); typedef void (VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); -typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); typedef void (VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); @@ -2294,9 +2315,9 @@ typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uin typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry, VkQueryControlFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry); -typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount); +typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t entry); -typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void* pValues); typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); @@ -2562,7 +2583,7 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyQueryPool( VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( VkDevice device, VkQueryPool queryPool, - uint32_t startQuery, + uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void* pData, @@ -2813,11 +2834,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline( VKAPI_ATTR void VKAPI_CALL vkCmdSetViewport( VkCommandBuffer commandBuffer, + uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports); VKAPI_ATTR void VKAPI_CALL vkCmdSetScissor( VkCommandBuffer commandBuffer, + uint32_t firstScissor, uint32_t scissorCount, const VkRect2D* pScissors); @@ -2873,7 +2896,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer( VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers( VkCommandBuffer commandBuffer, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); @@ -3047,7 +3070,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t startQuery, + uint32_t firstQuery, uint32_t queryCount); VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( @@ -3059,7 +3082,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t startQuery, + uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, @@ -3095,8 +3118,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( #define VK_KHR_surface 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR) -#define VK_KHR_SURFACE_REVISION 24 -#define VK_KHR_SURFACE_EXTENSION_NUMBER 1 +#define VK_KHR_SURFACE_SPEC_VERSION 24 #define VK_KHR_SURFACE_EXTENSION_NAME "VK_KHR_surface" @@ -3121,7 +3143,7 @@ typedef enum VkPresentModeKHR { typedef enum VkSurfaceTransformFlagBitsKHR { - VK_SURFACE_TRANSFORM_NONE_BIT_KHR = 0x00000001, + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR = 0x00000001, VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR = 0x00000002, VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR = 0x00000004, VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR = 0x00000008, @@ -3160,7 +3182,7 @@ typedef struct VkSurfaceFormatKHR { } VkSurfaceFormatKHR; -typedef void (VKAPI_PTR *PFN_vkDestroySurfaceKHR)(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDestroySurfaceKHR)(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR surface, VkBool32* pSupported); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormatsKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pSurfaceFormatCount, VkSurfaceFormatKHR* pSurfaceFormats); @@ -3168,8 +3190,8 @@ typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)(VkPh #ifdef VK_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR( - VkInstance instance, - VkSurfaceKHR surface, + VkInstance instance, + VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( @@ -3199,8 +3221,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( #define VK_KHR_swapchain 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR) -#define VK_KHR_SWAPCHAIN_REVISION 67 -#define VK_KHR_SWAPCHAIN_EXTENSION_NUMBER 2 +#define VK_KHR_SWAPCHAIN_SPEC_VERSION 67 #define VK_KHR_SWAPCHAIN_EXTENSION_NAME "VK_KHR_swapchain" typedef VkFlags VkSwapchainCreateFlagsKHR; @@ -3238,49 +3259,48 @@ typedef struct VkPresentInfoKHR { } VkPresentInfoKHR; -typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain); -typedef void (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages); -typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex); -typedef VkResult (VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, const VkPresentInfoKHR* pPresentInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain); +typedef void (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex); +typedef VkResult (VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, const VkPresentInfoKHR* pPresentInfo); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( - VkDevice device, - const VkSwapchainCreateInfoKHR* pCreateInfo, + VkDevice device, + const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, - VkSwapchainKHR* pSwapchain); + VkSwapchainKHR* pSwapchain); VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR( - VkDevice device, - VkSwapchainKHR swapchain, + VkDevice device, + VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator); VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR( - VkDevice device, - VkSwapchainKHR swapchain, - uint32_t* pSwapchainImageCount, - VkImage* pSwapchainImages); + VkDevice device, + VkSwapchainKHR swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages); VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( - VkDevice device, - VkSwapchainKHR swapchain, - uint64_t timeout, - VkSemaphore semaphore, - VkFence fence, - uint32_t* pImageIndex); + VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex); VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR( - VkQueue queue, - const VkPresentInfoKHR* pPresentInfo); + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo); #endif #define VK_KHR_display 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayModeKHR) -#define VK_KHR_DISPLAY_REVISION 21 -#define VK_KHR_DISPLAY_EXTENSION_NUMBER 3 +#define VK_KHR_DISPLAY_SPEC_VERSION 21 #define VK_KHR_DISPLAY_EXTENSION_NAME "VK_KHR_display" @@ -3403,8 +3423,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayPlaneSurfaceKHR( #endif #define VK_KHR_display_swapchain 1 -#define VK_KHR_DISPLAY_SWAPCHAIN_REVISION 9 -#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NUMBER 4 +#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 9 #define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" typedef struct VkDisplayPresentInfoKHR { @@ -3431,18 +3450,27 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( #define VK_KHR_xlib_surface 1 #include -#define VK_KHR_XLIB_SURFACE_REVISION 5 -#define VK_KHR_XLIB_SURFACE_EXTENSION_NUMBER 5 +#define VK_KHR_XLIB_SURFACE_SPEC_VERSION 6 #define VK_KHR_XLIB_SURFACE_EXTENSION_NAME "VK_KHR_xlib_surface" -typedef VkResult (VKAPI_PTR *PFN_vkCreateXlibSurfaceKHR)(VkInstance instance, Display* dpy, Window window, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkFlags VkXlibSurfaceCreateFlagsKHR; + +typedef struct VkXlibSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkXlibSurfaceCreateFlagsKHR flags; + Display* dpy; + Window window; +} VkXlibSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateXlibSurfaceKHR)(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display* dpy, VisualID visualID); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateXlibSurfaceKHR( VkInstance instance, - Display* dpy, - Window window, + const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); @@ -3458,18 +3486,27 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceXlibPresentationSupportKHR( #define VK_KHR_xcb_surface 1 #include -#define VK_KHR_XCB_SURFACE_REVISION 5 -#define VK_KHR_XCB_SURFACE_EXTENSION_NUMBER 6 +#define VK_KHR_XCB_SURFACE_SPEC_VERSION 6 #define VK_KHR_XCB_SURFACE_EXTENSION_NAME "VK_KHR_xcb_surface" -typedef VkResult (VKAPI_PTR *PFN_vkCreateXcbSurfaceKHR)(VkInstance instance, xcb_connection_t* connection, xcb_window_t window, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkFlags VkXcbSurfaceCreateFlagsKHR; + +typedef struct VkXcbSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkXcbSurfaceCreateFlagsKHR flags; + xcb_connection_t* connection; + xcb_window_t window; +} VkXcbSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateXcbSurfaceKHR)(VkInstance instance, const VkXcbSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateXcbSurfaceKHR( VkInstance instance, - xcb_connection_t* connection, - xcb_window_t window, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); @@ -3485,18 +3522,27 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceXcbPresentationSupportKHR( #define VK_KHR_wayland_surface 1 #include -#define VK_KHR_WAYLAND_SURFACE_REVISION 4 -#define VK_KHR_WAYLAND_SURFACE_EXTENSION_NUMBER 7 +#define VK_KHR_WAYLAND_SURFACE_SPEC_VERSION 5 #define VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME "VK_KHR_wayland_surface" -typedef VkResult (VKAPI_PTR *PFN_vkCreateWaylandSurfaceKHR)(VkInstance instance, struct wl_display* display, struct wl_surface* surface, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkFlags VkWaylandSurfaceCreateFlagsKHR; + +typedef struct VkWaylandSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkWaylandSurfaceCreateFlagsKHR flags; + struct wl_display* display; + struct wl_surface* surface; +} VkWaylandSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateWaylandSurfaceKHR)(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateWaylandSurfaceKHR( VkInstance instance, - struct wl_display* display, - struct wl_surface* surface, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); @@ -3511,18 +3557,27 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWaylandPresentationSupportKHR( #define VK_KHR_mir_surface 1 #include -#define VK_KHR_MIR_SURFACE_REVISION 4 -#define VK_KHR_MIR_SURFACE_EXTENSION_NUMBER 8 +#define VK_KHR_MIR_SURFACE_SPEC_VERSION 4 #define VK_KHR_MIR_SURFACE_EXTENSION_NAME "VK_KHR_mir_surface" -typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, MirConnection* connection, MirSurface* mirSurface, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkFlags VkMirSurfaceCreateFlagsKHR; + +typedef struct VkMirSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkMirSurfaceCreateFlagsKHR flags; + MirConnection* connection; + MirSurface* mirSurface; +} VkMirSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateMirSurfaceKHR( VkInstance instance, - MirConnection* connection, - MirSurface* mirSurface, + const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); @@ -3537,16 +3592,25 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceMirPresentationSupportKHR( #define VK_KHR_android_surface 1 #include -#define VK_KHR_ANDROID_SURFACE_REVISION 4 -#define VK_KHR_ANDROID_SURFACE_EXTENSION_NUMBER 9 +#define VK_KHR_ANDROID_SURFACE_SPEC_VERSION 5 #define VK_KHR_ANDROID_SURFACE_EXTENSION_NAME "VK_KHR_android_surface" -typedef VkResult (VKAPI_PTR *PFN_vkCreateAndroidSurfaceKHR)(VkInstance instance, ANativeWindow* window, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkFlags VkAndroidSurfaceCreateFlagsKHR; + +typedef struct VkAndroidSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkAndroidSurfaceCreateFlagsKHR flags; + ANativeWindow* window; +} VkAndroidSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateAndroidSurfaceKHR)(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateAndroidSurfaceKHR( VkInstance instance, - ANativeWindow* window, + const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #endif @@ -3556,18 +3620,27 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateAndroidSurfaceKHR( #define VK_KHR_win32_surface 1 #include -#define VK_KHR_WIN32_SURFACE_REVISION 4 -#define VK_KHR_WIN32_SURFACE_EXTENSION_NUMBER 10 +#define VK_KHR_WIN32_SURFACE_SPEC_VERSION 5 #define VK_KHR_WIN32_SURFACE_EXTENSION_NAME "VK_KHR_win32_surface" -typedef VkResult (VKAPI_PTR *PFN_vkCreateWin32SurfaceKHR)(VkInstance instance, HINSTANCE hinstance, HWND hwnd, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkFlags VkWin32SurfaceCreateFlagsKHR; + +typedef struct VkWin32SurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkWin32SurfaceCreateFlagsKHR flags; + HINSTANCE hinstance; + HWND hwnd; +} VkWin32SurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateWin32SurfaceKHR)(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateWin32SurfaceKHR( VkInstance instance, - HINSTANCE hinstance, - HWND hwnd, + const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index bca2deafa7d..ae33822b633 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -377,13 +377,14 @@ void anv_CmdBindPipeline( void anv_CmdSetViewport( VkCommandBuffer commandBuffer, + uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.viewport.count = viewportCount; - memcpy(cmd_buffer->state.dynamic.viewport.viewports, + memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, pViewports, viewportCount * sizeof(*pViewports)); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; @@ -391,13 +392,14 @@ void anv_CmdSetViewport( void anv_CmdSetScissor( VkCommandBuffer commandBuffer, + uint32_t firstScissor, uint32_t scissorCount, const VkRect2D* pScissors) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->state.dynamic.scissor.count = scissorCount; - memcpy(cmd_buffer->state.dynamic.scissor.scissors, + memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, pScissors, scissorCount * sizeof(*pScissors)); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; @@ -558,7 +560,7 @@ void anv_CmdBindDescriptorSets( void anv_CmdBindVertexBuffers( VkCommandBuffer commandBuffer, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets) @@ -569,11 +571,11 @@ void anv_CmdBindVertexBuffers( /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ - assert(startBinding + bindingCount < MAX_VBS); + assert(firstBinding + bindingCount < MAX_VBS); for (uint32_t i = 0; i < bindingCount; i++) { - vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); - vb[startBinding + i].offset = pOffsets[i]; - cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); + vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[firstBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); } } diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 3e11047c673..828e20ae9c9 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -47,9 +47,9 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t max_binding = 0; uint32_t immutable_sampler_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - max_binding = MAX2(max_binding, pCreateInfo->pBinding[j].binding); - if (pCreateInfo->pBinding[j].pImmutableSamplers) - immutable_sampler_count += pCreateInfo->pBinding[j].descriptorCount; + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + if (pCreateInfo->pBindings[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; } size_t size = sizeof(struct anv_descriptor_set_layout) + @@ -86,7 +86,7 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t dynamic_offset_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBinding[j]; + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; uint32_t b = binding->binding; assert(binding->descriptorCount > 0); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 0c1b43925ed..a92eb7fcc97 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -290,7 +290,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) VkDescriptorSetLayoutCreateInfo ds_layout_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = 1, - .pBinding = (VkDescriptorSetLayoutBinding[]) { + .pBindings = (VkDescriptorSetLayoutBinding[]) { { .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, @@ -637,7 +637,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 1, + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { .x = 0.0f, .y = 0.0f, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 1a4300c07e3..003e0e023ca 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -338,7 +338,7 @@ emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, }); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 1, + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, (VkViewport[]) { { .x = 0, @@ -350,7 +350,7 @@ emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, }, }); - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 1, + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, (VkRect2D[]) { { .offset = { 0, 0 }, @@ -506,7 +506,7 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, .depth_stencil_attachment = attachment, }); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 1, + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, (VkViewport[]) { { .x = 0, @@ -520,7 +520,7 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, }, }); - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 1, + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, (VkRect2D[]) { { .offset = { 0, 0 }, diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 1bcac097104..d4c34c7f021 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -95,7 +95,7 @@ void anv_DestroyQueryPool( VkResult anv_GetQueryPoolResults( VkDevice _device, VkQueryPool queryPool, - uint32_t startQuery, + uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void* pData, @@ -129,14 +129,14 @@ VkResult anv_GetQueryPoolResults( for (uint32_t i = 0; i < queryCount; i++) { switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: { - result = slot[startQuery + i].end - slot[startQuery + i].begin; + result = slot[firstQuery + i].end - slot[firstQuery + i].begin; break; } case VK_QUERY_TYPE_PIPELINE_STATISTICS: /* Not yet implemented */ break; case VK_QUERY_TYPE_TIMESTAMP: { - result = slot[startQuery + i].begin; + result = slot[firstQuery + i].begin; break; } default: @@ -147,14 +147,14 @@ VkResult anv_GetQueryPoolResults( uint64_t *dst = pData; dst[0] = result; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - dst[1] = slot[startQuery + i].available; + dst[1] = slot[firstQuery + i].available; } else { uint32_t *dst = pData; if (result > UINT32_MAX) result = UINT32_MAX; dst[0] = result; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - dst[1] = slot[startQuery + i].available; + dst[1] = slot[firstQuery + i].available; } pData += stride; @@ -168,7 +168,7 @@ VkResult anv_GetQueryPoolResults( void anv_CmdResetQueryPool( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t startQuery, + uint32_t firstQuery, uint32_t queryCount) { ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); @@ -178,7 +178,7 @@ void anv_CmdResetQueryPool( case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: { struct anv_query_pool_slot *slot = pool->bo.map; - slot[startQuery + i].available = 0; + slot[firstQuery + i].available = 0; break; } default: diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index c9eb51012dc..d708341c98f 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -351,8 +351,8 @@ wsi_wl_surface_get_capabilities(struct anv_wsi_surface *surface, caps->currentExtent = (VkExtent2D) { -1, -1 }; caps->minImageExtent = (VkExtent2D) { 1, 1 }; caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - caps->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; - caps->currentTransform = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->maxImageArrayLayers = 1; caps->supportedCompositeAlpha = @@ -432,11 +432,12 @@ wsi_wl_surface_create_swapchain(struct anv_wsi_surface *surface, VkResult anv_CreateWaylandSurfaceKHR( VkInstance _instance, - struct wl_display* wl_display, - struct wl_surface* wl_surface, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) { + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + ANV_FROM_HANDLE(anv_instance, instance, _instance); struct wsi_wl_surface *surface; @@ -445,8 +446,8 @@ VkResult anv_CreateWaylandSurfaceKHR( if (surface == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - surface->display = wl_display; - surface->surface = wl_surface; + surface->display = pCreateInfo->display; + surface->surface = pCreateInfo->surface; surface->base.instance = instance; surface->base.destroy = wsi_wl_surface_destroy; diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index f340de476ca..8e35191576a 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -83,8 +83,8 @@ x11_surface_get_capabilities(struct anv_wsi_surface *wsi_surface, caps->minImageCount = 2; caps->maxImageCount = 4; - caps->supportedTransforms = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; - caps->currentTransform = VK_SURFACE_TRANSFORM_NONE_BIT_KHR; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->maxImageArrayLayers = 1; caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; caps->supportedUsageFlags = @@ -146,11 +146,12 @@ x11_surface_create_swapchain(struct anv_wsi_surface *surface, VkResult anv_CreateXcbSurfaceKHR( VkInstance _instance, - xcb_connection_t* connection, - xcb_window_t window, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) { + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); + ANV_FROM_HANDLE(anv_instance, instance, _instance); struct x11_surface *surface; @@ -159,8 +160,8 @@ VkResult anv_CreateXcbSurfaceKHR( if (surface == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - surface->connection = connection; - surface->window = window; + surface->connection = pCreateInfo->connection; + surface->window = pCreateInfo->window; surface->base.instance = instance; surface->base.destroy = x11_surface_destroy; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index ec86bb2221c..20a9ec75bca 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -1041,7 +1041,7 @@ store_query_result(struct anv_batch *batch, uint32_t reg, void genX(CmdCopyQueryPoolResults)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t startQuery, + uint32_t firstQuery, uint32_t queryCount, VkBuffer destBuffer, VkDeviceSize destOffset, @@ -1061,7 +1061,7 @@ void genX(CmdCopyQueryPoolResults)( dst_offset = buffer->offset + destOffset; for (uint32_t i = 0; i < queryCount; i++) { - slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_load_alu_reg_u64(&cmd_buffer->batch, -- cgit v1.2.3 From 8a81d136f833ca2fda27438d0b16d3aed7c78746 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 19:27:10 -0800 Subject: anv/image: Fill out VkSubresourceLayout.arrayPitch --- src/vulkan/anv_image.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 79c6ba15a03..f79583eac04 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -295,6 +295,7 @@ anv_surface_get_subresource_layout(struct anv_image *image, layout->offset = surface->offset; layout->rowPitch = surface->isl.row_pitch; layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); + layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); layout->size = surface->isl.size; } -- cgit v1.2.3 From ed4fe3e9ba9018e68afe6fdd4f267218a537fdaa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Jan 2016 19:48:57 -0800 Subject: anv/state: Respect SamplerCreateInfo.anisotropyEnable --- src/vulkan/gen7_state.c | 18 +++++------------- src/vulkan/gen8_state.c | 18 +++++------------- src/vulkan/genX_state_util.h | 22 ++++++++++++++++++---- 3 files changed, 28 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 88598cea18e..09c1332e450 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -84,7 +84,6 @@ VkResult genX(CreateSampler)( { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_sampler *sampler; - uint32_t mag_filter, min_filter, max_anisotropy; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); @@ -93,23 +92,16 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - if (pCreateInfo->maxAnisotropy > 1) { - mag_filter = MAPFILTER_ANISOTROPIC; - min_filter = MAPFILTER_ANISOTROPIC; - max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; - } else { - mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; - min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; - max_anisotropy = RATIO21; - } + uint32_t filter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable); struct GEN7_SAMPLER_STATE sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .BaseMipLevel = 0.0, .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = mag_filter, - .MinModeFilter = min_filter, + .MagModeFilter = filter, + .MinModeFilter = filter, .TextureLODBias = pCreateInfo->mipLodBias * 256, .AnisotropicAlgorithm = EWAApproximation, .MinLOD = pCreateInfo->minLod, @@ -124,7 +116,7 @@ VkResult genX(CreateSampler)( device->border_colors.offset + pCreateInfo->borderColor * sizeof(float) * 4, - .MaximumAnisotropy = max_anisotropy, + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), .RAddressMinFilterRoundingEnable = 0, .RAddressMagFilterRoundingEnable = 0, .VAddressMinFilterRoundingEnable = 0, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 13b7e1149d9..b6741e005d3 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -363,7 +363,6 @@ VkResult genX(CreateSampler)( { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_sampler *sampler; - uint32_t mag_filter, min_filter, max_anisotropy; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); @@ -372,15 +371,8 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - if (pCreateInfo->maxAnisotropy > 1) { - mag_filter = MAPFILTER_ANISOTROPIC; - min_filter = MAPFILTER_ANISOTROPIC; - max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; - } else { - mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; - min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; - max_anisotropy = RATIO21; - } + uint32_t filter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable); struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, @@ -390,8 +382,8 @@ VkResult genX(CreateSampler)( .BaseMipLevel = 0.0, #endif .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = mag_filter, - .MinModeFilter = min_filter, + .MagModeFilter = filter, + .MinModeFilter = filter, .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), .AnisotropicAlgorithm = EWAApproximation, .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), @@ -407,7 +399,7 @@ VkResult genX(CreateSampler)( pCreateInfo->borderColor * sizeof(float) * 4, .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = max_anisotropy, + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), .RAddressMinFilterRoundingEnable = 0, .RAddressMagFilterRoundingEnable = 0, .VAddressMinFilterRoundingEnable = 0, diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h index 0741d766edd..f7a860e796b 100644 --- a/src/vulkan/genX_state_util.h +++ b/src/vulkan/genX_state_util.h @@ -66,10 +66,24 @@ vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) } #endif -static const uint32_t vk_to_gen_tex_filter[] = { - [VK_FILTER_NEAREST] = MAPFILTER_NEAREST, - [VK_FILTER_LINEAR] = MAPFILTER_LINEAR -}; +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} static const uint32_t vk_to_gen_mipmap_mode[] = { [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, -- cgit v1.2.3 From 08735ba91c9aa2cdfe138fdc314f8071cb0b5617 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 06:50:18 -0800 Subject: anv/cmd_buffer: Fix setting of viewport/scissor count --- src/vulkan/anv_cmd_buffer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index ae33822b633..3df34a511eb 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -383,7 +383,10 @@ void anv_CmdSetViewport( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.dynamic.viewport.count = viewportCount; + const uint32_t total_count = firstViewport + viewportCount; + if (cmd_buffer->state.dynamic.viewport.count < total_count); + cmd_buffer->state.dynamic.viewport.count = total_count; + memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, pViewports, viewportCount * sizeof(*pViewports)); @@ -398,7 +401,10 @@ void anv_CmdSetScissor( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->state.dynamic.scissor.count = scissorCount; + const uint32_t total_count = firstScissor + scissorCount; + if (cmd_buffer->state.dynamic.scissor.count < total_count); + cmd_buffer->state.dynamic.scissor.count = total_count; + memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, pScissors, scissorCount * sizeof(*pScissors)); -- cgit v1.2.3 From 802f00219addb31d6e8b73c818c42212406993eb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 06:58:11 -0800 Subject: anv/device: Update features and limits --- src/vulkan/anv_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index fa2c24a80f6..204f457e41f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -338,7 +338,8 @@ void anv_GetPhysicalDeviceFeatures( .sampleRateShading = false, .dualSrcBlend = true, .logicOp = true, - .multiDrawIndirect = true, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, .depthClamp = false, .depthBiasClamp = false, .fillModeNonSolid = true, @@ -372,6 +373,7 @@ void anv_GetPhysicalDeviceFeatures( .shaderInt16 = false, .alphaToOne = true, .variableMultisampleRate = false, + .inheritedQueries = false, }; } @@ -492,6 +494,7 @@ void anv_GetPhysicalDeviceProperties( .sampledImageStencilSampleCounts = sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), .maxClipDistances = 0 /* FIXME */, .maxCullDistances = 0 /* FIXME */, -- cgit v1.2.3 From 7b81637762268f94bdf0576ca83bb2e6896b3565 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 07:29:58 -0800 Subject: vulkan-1.0.0: Convert pPreserveAttachments to a uint32_t --- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_meta.c | 5 +---- src/vulkan/anv_meta_clear.c | 5 +---- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 9f4506b580c..5f75865c581 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1995,7 +1995,7 @@ typedef struct VkSubpassDescription { const VkAttachmentReference* pResolveAttachments; const VkAttachmentReference* pDepthStencilAttachment; uint32_t preserveAttachmentCount; - const VkAttachmentReference* pPreserveAttachments; + const uint32_t* pPreserveAttachments; } VkSubpassDescription; typedef struct VkSubpassDependency { diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a92eb7fcc97..18978afd219 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -215,10 +215,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .layout = VK_IMAGE_LAYOUT_GENERAL, }, .preserveAttachmentCount = 1, - .pPreserveAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, + .pPreserveAttachments = (uint32_t[]) { 0 }, }, .dependencyCount = 0, }, NULL, &device->meta_state.blit.render_pass); diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 003e0e023ca..0469c57dabf 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -781,10 +781,7 @@ void anv_CmdClearColorImage( .layout = VK_IMAGE_LAYOUT_GENERAL, }, .preserveAttachmentCount = 1, - .pPreserveAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, + .pPreserveAttachments = (uint32_t[]) { 0 }, }, .dependencyCount = 0, }, &cmd_buffer->pool->alloc, &pass); -- cgit v1.2.3 From d877095e66e7d5be234ed5b20ad59c3a1d3539ec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 07:32:16 -0800 Subject: vulkan-1.0.0: Get rid of MIPMAP_MODE_BASE --- include/vulkan/vulkan.h | 9 ++++----- src/vulkan/genX_state_util.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 5f75865c581..1d3ef3faafe 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -682,12 +682,11 @@ typedef enum VkFilter { } VkFilter; typedef enum VkSamplerMipmapMode { - VK_SAMPLER_MIPMAP_MODE_BASE = 0, - VK_SAMPLER_MIPMAP_MODE_NEAREST = 1, - VK_SAMPLER_MIPMAP_MODE_LINEAR = 2, - VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_BASE, + VK_SAMPLER_MIPMAP_MODE_NEAREST = 0, + VK_SAMPLER_MIPMAP_MODE_LINEAR = 1, + VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_MIPMAP_MODE_END_RANGE = VK_SAMPLER_MIPMAP_MODE_LINEAR, - VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_BASE + 1), + VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_NEAREST + 1), VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF } VkSamplerMipmapMode; diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h index f7a860e796b..215e9ba30eb 100644 --- a/src/vulkan/genX_state_util.h +++ b/src/vulkan/genX_state_util.h @@ -86,7 +86,6 @@ vk_to_gen_max_anisotropy(float ratio) } static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_BASE] = MIPFILTER_NONE, [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR }; -- cgit v1.2.3 From aab9517f3dc5a2f28eaae4a2fa64cb0ad82bb4f7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 07:41:45 -0800 Subject: vulkan-1.0.0: Misc. field and argument renames --- include/vulkan/vulkan.h | 30 +++++++++++++++--------------- src/vulkan/anv_cmd_buffer.c | 6 +++--- src/vulkan/anv_descriptor_set.c | 2 +- src/vulkan/anv_device.c | 4 ++-- src/vulkan/anv_dump.c | 2 +- src/vulkan/anv_meta.c | 2 +- src/vulkan/anv_query.c | 4 ++-- src/vulkan/gen8_cmd_buffer.c | 16 ++++++++-------- 8 files changed, 33 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 1d3ef3faafe..a1f460189ce 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -1128,9 +1128,9 @@ typedef struct VkInstanceCreateInfo { const void* pNext; VkInstanceCreateFlags flags; const VkApplicationInfo* pApplicationInfo; - uint32_t enabledLayerNameCount; + uint32_t enabledLayerCount; const char* const* ppEnabledLayerNames; - uint32_t enabledExtensionNameCount; + uint32_t enabledExtensionCount; const char* const* ppEnabledExtensionNames; } VkInstanceCreateInfo; @@ -1389,9 +1389,9 @@ typedef struct VkDeviceCreateInfo { VkDeviceCreateFlags flags; uint32_t queueCreateInfoCount; const VkDeviceQueueCreateInfo* pQueueCreateInfos; - uint32_t enabledLayerNameCount; + uint32_t enabledLayerCount; const char* const* ppEnabledLayerNames; - uint32_t enabledExtensionNameCount; + uint32_t enabledExtensionCount; const char* const* ppEnabledExtensionNames; const VkPhysicalDeviceFeatures* pEnabledFeatures; } VkDeviceCreateInfo; @@ -1540,7 +1540,7 @@ typedef struct VkQueryPoolCreateInfo { const void* pNext; VkQueryPoolCreateFlags flags; VkQueryType queryType; - uint32_t entryCount; + uint32_t queryCount; VkQueryPipelineStatisticFlags pipelineStatistics; } VkQueryPoolCreateInfo; @@ -1914,7 +1914,7 @@ typedef struct VkDescriptorSetAllocateInfo { VkStructureType sType; const void* pNext; VkDescriptorPool descriptorPool; - uint32_t setLayoutCount; + uint32_t descriptorSetCount; const VkDescriptorSetLayout* pSetLayouts; } VkDescriptorSetAllocateInfo; @@ -2031,7 +2031,7 @@ typedef struct VkCommandBufferAllocateInfo { const void* pNext; VkCommandPool commandPool; VkCommandBufferLevel level; - uint32_t bufferCount; + uint32_t commandBufferCount; } VkCommandBufferAllocateInfo; typedef struct VkCommandBufferBeginInfo { @@ -2312,16 +2312,16 @@ typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEve typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); -typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry, VkQueryControlFlags flags); -typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t entry); +typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query); typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); -typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t entry); +typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t query); typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void* pValues); typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); -typedef void (VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBuffersCount, const VkCommandBuffer* pCommandBuffers); +typedef void (VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); #ifdef VK_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( @@ -3058,13 +3058,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t entry, + uint32_t query, VkQueryControlFlags flags); VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t entry); + uint32_t query); VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( VkCommandBuffer commandBuffer, @@ -3076,7 +3076,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, - uint32_t entry); + uint32_t query); VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( VkCommandBuffer commandBuffer, @@ -3110,7 +3110,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass( VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( VkCommandBuffer commandBuffer, - uint32_t commandBuffersCount, + uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); #endif diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 3df34a511eb..91e97372b2e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -217,7 +217,7 @@ VkResult anv_AllocateCommandBuffers( VkResult result = VK_SUCCESS; uint32_t i; - for (i = 0; i < pAllocateInfo->bufferCount; i++) { + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]); if (result != VK_SUCCESS) @@ -1010,7 +1010,7 @@ void anv_CmdPushConstants( void anv_CmdExecuteCommands( VkCommandBuffer commandBuffer, - uint32_t commandBuffersCount, + uint32_t commandBufferCount, const VkCommandBuffer* pCmdBuffers) { ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); @@ -1019,7 +1019,7 @@ void anv_CmdExecuteCommands( anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - for (uint32_t i = 0; i < commandBuffersCount; i++) { + for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 828e20ae9c9..c414c7c276d 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -421,7 +421,7 @@ VkResult anv_AllocateDescriptorSets( struct anv_descriptor_set *set; uint32_t i; - for (i = 0; i < pAllocateInfo->setLayoutCount; i++) { + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 204f457e41f..265ac5ff7e3 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -213,7 +213,7 @@ VkResult anv_CreateInstance( if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 210, 1)) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionNameCount; i++) { + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { bool found = false; for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], @@ -671,7 +671,7 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionNameCount; i++) { + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { bool found = false; for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 3a1430d49a0..7aef32b8648 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -87,7 +87,7 @@ anv_dump_image_to_ppm(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = commandPool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .bufferCount = 1, + .commandBufferCount = 1, }, &cmd); assert(result == VK_SUCCESS); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 18978afd219..aa5fd0488c0 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -564,7 +564,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = dummy_desc_pool, - .setLayoutCount = 1, + .descriptorSetCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout }, &set); anv_UpdateDescriptorSets(anv_device_to_handle(device), diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index d4c34c7f021..5b052341e0c 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -60,9 +60,9 @@ VkResult anv_CreateQueryPool( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pool->type = pCreateInfo->queryType; - pool->slots = pCreateInfo->entryCount; + pool->slots = pCreateInfo->queryCount; - size = pCreateInfo->entryCount * slot_size; + size = pCreateInfo->queryCount * slot_size; result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) goto fail; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 20a9ec75bca..4066c39aac8 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -890,7 +890,7 @@ emit_query_availability(struct anv_batch *batch, void genX(CmdBeginQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t entry, + uint32_t query, VkQueryControlFlags flags) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); @@ -912,7 +912,7 @@ void genX(CmdBeginQuery)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - entry * sizeof(struct anv_query_pool_slot)); + query * sizeof(struct anv_query_pool_slot)); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: @@ -924,7 +924,7 @@ void genX(CmdBeginQuery)( void genX(CmdEndQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t entry) + uint32_t query) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); @@ -932,10 +932,10 @@ void genX(CmdEndQuery)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - entry * sizeof(struct anv_query_pool_slot) + 8); + query * sizeof(struct anv_query_pool_slot) + 8); emit_query_availability(&cmd_buffer->batch, &pool->bo, - entry * sizeof(struct anv_query_pool_slot) + 16); + query * sizeof(struct anv_query_pool_slot) + 16); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: @@ -950,11 +950,11 @@ void genX(CmdWriteTimestamp)( VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, - uint32_t entry) + uint32_t query) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - uint32_t offset = entry * sizeof(struct anv_query_pool_slot); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); @@ -977,7 +977,7 @@ void genX(CmdWriteTimestamp)( break; } - emit_query_availability(&cmd_buffer->batch, &pool->bo, entry + 16); + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); } #define alu_opcode(v) __gen_field((v), 20, 31) -- cgit v1.2.3 From f6cae9929441da7c851372a2433b91be8d9ff317 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 07:45:15 -0800 Subject: vulkan-1.0.0: Split out command buffer inheritance info --- include/vulkan/vulkan.h | 25 ++++++++++++++++--------- src/vulkan/anv_cmd_buffer.c | 6 +++--- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index a1b81b6a50f..0aefccf1d5c 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -187,13 +187,14 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38, VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40, - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 41, - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 42, - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 43, - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 44, - VK_STRUCTURE_TYPE_MEMORY_BARRIER = 45, - VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 46, - VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 47, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO = 41, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 42, + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 44, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 45, + VK_STRUCTURE_TYPE_MEMORY_BARRIER = 46, + VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 47, + VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 48, VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001, VK_STRUCTURE_TYPE_DISPLAY_MODE_CREATE_INFO_KHR = 1000002000, @@ -2034,16 +2035,22 @@ typedef struct VkCommandBufferAllocateInfo { uint32_t commandBufferCount; } VkCommandBufferAllocateInfo; -typedef struct VkCommandBufferBeginInfo { +typedef struct VkCommandBufferInheritanceInfo { VkStructureType sType; const void* pNext; - VkCommandBufferUsageFlags flags; VkRenderPass renderPass; uint32_t subpass; VkFramebuffer framebuffer; VkBool32 occlusionQueryEnable; VkQueryControlFlags queryFlags; VkQueryPipelineStatisticFlags pipelineStatistics; +} VkCommandBufferInheritanceInfo; + +typedef struct VkCommandBufferBeginInfo { + VkStructureType sType; + const void* pNext; + VkCommandBufferUsageFlags flags; + const VkCommandBufferInheritanceInfo* pInheritanceInfo; } VkCommandBufferBeginInfo; typedef struct VkBufferCopy { diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 91e97372b2e..8cf4dc7202c 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -304,12 +304,12 @@ VkResult anv_BeginCommandBuffer( if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { cmd_buffer->state.framebuffer = - anv_framebuffer_from_handle(pBeginInfo->framebuffer); + anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); cmd_buffer->state.pass = - anv_render_pass_from_handle(pBeginInfo->renderPass); + anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); struct anv_subpass *subpass = - &cmd_buffer->state.pass->subpasses[pBeginInfo->subpass]; + &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; anv_cmd_buffer_begin_subpass(cmd_buffer, subpass); } -- cgit v1.2.3 From b57c72d9646c3f7ef595379328e5b7d64d8e94dc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 07:59:37 -0800 Subject: vulkan-1.0.0: Rework blits to use four offsets --- include/vulkan/vulkan.h | 6 ++---- src/vulkan/anv_dump.c | 16 ++++++---------- src/vulkan/anv_meta.c | 31 +++++++++++++++++++++++-------- 3 files changed, 31 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 0aefccf1d5c..36459b14c58 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2076,11 +2076,9 @@ typedef struct VkImageCopy { typedef struct VkImageBlit { VkImageSubresourceLayers srcSubresource; - VkOffset3D srcOffset; - VkExtent3D srcExtent; + VkOffset3D srcOffsets[2]; VkImageSubresourceLayers dstSubresource; - VkOffset3D dstOffset; - VkExtent3D dstExtent; + VkOffset3D dstOffsets[2]; } VkImageBlit; typedef struct VkBufferImageCopy { diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 7aef32b8648..2a747d46924 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -108,11 +108,9 @@ anv_dump_image_to_ppm(struct anv_device *device, .baseArrayLayer = array_layer, .layerCount = 1, }, - .srcOffset = (VkOffset3D) { 0, 0, 0 }, - .srcExtent = (VkExtent3D) { - extent.width, - extent.height, - 1 + .srcOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, }, .dstSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -120,11 +118,9 @@ anv_dump_image_to_ppm(struct anv_device *device, .baseArrayLayer = 0, .layerCount = 1, }, - .dstOffset = (VkOffset3D) { 0, 0, 0 }, - .dstExtent = (VkExtent3D) { - extent.width, - extent.height, - 1 + .dstOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, }, }, VK_FILTER_NEAREST); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index aa5fd0488c0..3cbb3a65f22 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1047,20 +1047,37 @@ void anv_CmdBlitImage( cmd_buffer); const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffset.x, - .y = pRegions[r].dstOffset.y, + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, .z = 0, }; + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + const uint32_t dest_array_slice = meta_blit_get_dest_view_base_array_slice(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); + &pRegions[r].dstOffsets[0]); if (pRegions[r].srcSubresource.layerCount > 1) anv_finishme("FINISHME: copy multiple array layers"); - if (pRegions[r].dstExtent.depth > 1) + if (pRegions[r].srcOffsets[0].z != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z != pRegions[r].dstOffsets[1].z) anv_finishme("FINISHME: copy multiple depth layers"); struct anv_image_view dest_iview; @@ -1082,11 +1099,9 @@ void anv_CmdBlitImage( meta_emit_blit(cmd_buffer, src_image, &src_iview, - pRegions[r].srcOffset, - pRegions[r].srcExtent, + pRegions[r].srcOffsets[0], src_extent, dest_image, &dest_iview, - dest_offset, - pRegions[r].dstExtent, + dest_offset, dest_extent, filter); } -- cgit v1.2.3 From c310fb032d406984e72895b6c94e2f96bff8e70d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 08:09:39 -0800 Subject: vulkan-1.0.0: Rework memory barriers --- include/vulkan/vulkan.h | 16 ++++++++++++---- src/vulkan/anv_dump.c | 6 +++--- src/vulkan/gen7_cmd_buffer.c | 8 ++++++-- src/vulkan/gen8_cmd_buffer.c | 12 +++++++++--- src/vulkan/genX_cmd_buffer.c | 45 +++++++++++++++++++------------------------- 5 files changed, 49 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index 49c1eaea51c..c0208e35342 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -2315,8 +2315,8 @@ typedef void (VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffe typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); -typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); -typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const void* const* ppMemoryBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers); typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags); typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query); typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); @@ -3050,7 +3050,11 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents( VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, - const void* const* ppMemoryBarriers); + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers); VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( VkCommandBuffer commandBuffer, @@ -3058,7 +3062,11 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, - const void* const* ppMemoryBarriers); + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers); VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c index 2a747d46924..b7fa28be787 100644 --- a/src/vulkan/anv_dump.c +++ b/src/vulkan/anv_dump.c @@ -127,8 +127,8 @@ anv_dump_image_to_ppm(struct anv_device *device, ANV_CALL(CmdPipelineBarrier)(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - true, 1, - (const void * []) { &(VkImageMemoryBarrier) { + true, 0, NULL, 0, NULL, 1, + &(VkImageMemoryBarrier) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_HOST_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, @@ -144,7 +144,7 @@ anv_dump_image_to_ppm(struct anv_device *device, .baseArrayLayer = 0, .layerCount = 1, }, - }}); + }); result = anv_EndCommandBuffer(cmd); assert(result == VK_SUCCESS); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index d3a9de5038f..e6501124139 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -888,8 +888,12 @@ void genX(CmdWaitEvents)( const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) { stub(); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 4066c39aac8..65b4514d35e 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -1147,8 +1147,12 @@ void genX(CmdWaitEvents)( const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); for (uint32_t i = 0; i < eventCount; i++) { @@ -1166,5 +1170,7 @@ void genX(CmdWaitEvents)( genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, false, /* byRegion */ - memBarrierCount, ppMemBarriers); + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); } diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index ac1ab822b49..4f7054f83d2 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -143,8 +143,12 @@ void genX(CmdPipelineBarrier)( VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, - uint32_t memBarrierCount, - const void* const* ppMemBarriers) + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); uint32_t b, *dw; @@ -197,30 +201,19 @@ void genX(CmdPipelineBarrier)( VkAccessFlags src_flags = 0; VkAccessFlags dst_flags = 0; - for (uint32_t i = 0; i < memBarrierCount; i++) { - const struct anv_common *common = ppMemBarriers[i]; - switch (common->sType) { - case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); - src_flags |= barrier->srcAccessMask; - dst_flags |= barrier->dstAccessMask; - break; - } - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); - src_flags |= barrier->srcAccessMask; - dst_flags |= barrier->dstAccessMask; - break; - } - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { - ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); - src_flags |= barrier->srcAccessMask; - dst_flags |= barrier->dstAccessMask; - break; - } - default: - unreachable("Invalid memory barrier type"); - } + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + src_flags |= pMemoryBarriers[i].srcAccessMask; + dst_flags |= pMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + src_flags |= pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + src_flags |= pImageMemoryBarriers[i].srcAccessMask; + dst_flags |= pImageMemoryBarriers[i].dstAccessMask; } /* The src flags represent how things were used previously. This is -- cgit v1.2.3 From 24a6fcba77abe770e508c323d1dabf31b78bc9b0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 08:10:07 -0800 Subject: vulkan-1.0.0: Bump the version to 1.0.0 --- include/vulkan/vulkan.h | 2 +- src/vulkan/anv_device.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h index c0208e35342..014f8e8cfb8 100644 --- a/include/vulkan/vulkan.h +++ b/include/vulkan/vulkan.h @@ -41,7 +41,7 @@ extern "C" { ((major << 22) | (minor << 12) | patch) // Vulkan API version supported by this file -#define VK_API_VERSION VK_MAKE_VERSION(0, 221, 0) +#define VK_API_VERSION VK_MAKE_VERSION(1, 0, 0) #define VK_NULL_HANDLE 0 diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 265ac5ff7e3..c7e70412eb8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -210,7 +210,7 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(0, 210, 1)) + if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(1, 0, 0)) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { @@ -512,7 +512,7 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(0, 210, 1), + .apiVersion = VK_MAKE_VERSION(1, 0, 0), .driverVersion = 1, .vendorID = 0x8086, .deviceID = pdevice->chipset_id, -- cgit v1.2.3 From f46f4e488641437556188a3182ce4ddabd6f42d0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 09:12:32 -0800 Subject: nir/spirv: Add initial support for Vertex/Instance index --- src/glsl/nir/spirv/spirv_to_nir.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 4462c9f5a80..cdaf97294e0 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1074,6 +1074,10 @@ vtn_get_builtin_location(struct vtn_builder *b, case SpvBuiltInCullDistance: /* XXX figure this out */ unreachable("unhandled builtin"); + case SpvBuiltInVertexIndex: + *location = SYSTEM_VALUE_VERTEX_ID; + set_mode_system_value(mode); + break; case SpvBuiltInVertexId: /* Vulkan defines VertexID to be zero-based and reserves the new * builtin keyword VertexIndex to indicate the non-zero-based value. @@ -1081,6 +1085,8 @@ vtn_get_builtin_location(struct vtn_builder *b, *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; set_mode_system_value(mode); break; + case SpvBuiltInInstanceIndex: + /* XXX */ case SpvBuiltInInstanceId: *location = SYSTEM_VALUE_INSTANCE_ID; set_mode_system_value(mode); -- cgit v1.2.3 From 32f8bcb84f7285a2959c19bbb4597056f0c753b2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 12:04:20 -0800 Subject: i965/vec4: Use UW type for multiply into accumulator on GEN8+ BDW adds the following restriction: "When multiplying DW x DW, the dst cannot be accumulator." --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index c228743acf8..b2335bdb8df 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1069,7 +1069,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_umul_high: { struct brw_reg acc = retype(brw_acc_reg(8), dst.type); - emit(MUL(acc, op[0], op[1])); + if (devinfo->gen >=8) + emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW))); + else + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst, op[0], op[1])); break; } -- cgit v1.2.3 From 0415dfcfe7fc082973630424ecad2ab183c7dc64 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 11 Jan 2016 15:05:47 -0800 Subject: anv/meta: Add FINISHME for clearing multi-layer framebuffers --- src/vulkan/anv_meta_clear.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 0469c57dabf..cec98dbcf47 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -677,6 +677,9 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, meta_clear_begin(&saved_state, cmd_buffer); + if (cmd_buffer->state.framebuffer->layers > 1) + anv_finishme("clearing multi-layer framebuffer"); + for (uint32_t a = 0; a < pass->attachment_count; ++a) { struct anv_render_pass_attachment *att = &pass->attachments[a]; -- cgit v1.2.3 From 20fd816b6bb6dbd64abd953f2958b3c9ff6ddf07 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 19:37:40 -0800 Subject: anv: Remove duplicate func prototype anv_private.h declared anv_cmd_buffer_begin_subpass twice. --- src/vulkan/anv_private.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a0ac340cc62..4f99ec6c327 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1188,8 +1188,6 @@ struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, uint32_t *a, uint32_t *b, uint32_t dwords, uint32_t alignment); -void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); struct anv_address anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); -- cgit v1.2.3 From ea20389320b251f6498eb9c47e525ced10aab91d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 14 Jan 2016 12:58:46 -0800 Subject: anv: Add FIXME for vkResetCommandPool vkResetCommandPool currently destroys its command buffers. The Vulkan 1.0 spec requires that it only reset them: Resetting a command pool recycles all of the resources from all of the command buffers allocated from the command pool back to the command pool. All command buffers that have been allocated from the command pool are put in the initial state. --- src/vulkan/anv_cmd_buffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 8cf4dc7202c..a888a542751 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -1074,6 +1074,14 @@ VkResult anv_ResetCommandPool( { ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + /* FIXME: vkResetCommandPool must not destroy its command buffers. The + * Vulkan 1.0 spec requires that it only reset them: + * + * Resetting a command pool recycles all of the resources from all of + * the command buffers allocated from the command pool back to the + * command pool. All command buffers that have been allocated from the + * command pool are put in the initial state. + */ list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { anv_cmd_buffer_destroy(cmd_buffer); -- cgit v1.2.3 From ed33ccde635eda9d6accf2ff69fcf805902ec082 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 14 Jan 2016 13:12:35 -0800 Subject: anv: Make vkBeginCommandBuffer reset the command buffer If its the command buffer's first call to vkBeginCommandBuffer, we must *initialize* the command buffer's state. Otherwise, we must *reset* its state. In both cases, let's use anv_ResetCommandBuffer. From the Vulkan 1.0 spec: If a command buffer is in the executable state and the command buffer was allocated from a command pool with the VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then vkBeginCommandBuffer implicitly resets the command buffer, behaving as if vkResetCommandBuffer had been called with VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts the command buffer in the recording state. --- src/vulkan/anv_cmd_buffer.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index a888a542751..689bc53c93a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -111,7 +111,7 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, } static void -anv_cmd_state_init(struct anv_cmd_state *state) +anv_cmd_state_reset(struct anv_cmd_state *state) { memset(&state->descriptors, 0, sizeof(state->descriptors)); memset(&state->push_constants, 0, sizeof(state->push_constants)); @@ -172,6 +172,7 @@ static VkResult anv_create_cmd_buffer( cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cmd_buffer->device = device; cmd_buffer->pool = pool; + cmd_buffer->level = level; result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) @@ -182,11 +183,6 @@ static VkResult anv_create_cmd_buffer( anv_state_stream_init(&cmd_buffer->dynamic_state_stream, &device->dynamic_state_block_pool); - cmd_buffer->level = level; - cmd_buffer->usage_flags = 0; - - anv_cmd_state_init(&cmd_buffer->state); - if (pool) { list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); } else { @@ -263,9 +259,10 @@ VkResult anv_ResetCommandBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + cmd_buffer->usage_flags = 0; + cmd_buffer->state.current_pipeline = UINT32_MAX; anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - - anv_cmd_state_init(&cmd_buffer->state); + anv_cmd_state_reset(&cmd_buffer->state); return VK_SUCCESS; } @@ -294,7 +291,21 @@ VkResult anv_BeginCommandBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); cmd_buffer->usage_flags = pBeginInfo->flags; @@ -315,7 +326,6 @@ VkResult anv_BeginCommandBuffer( } anv_cmd_buffer_emit_state_base_address(cmd_buffer); - cmd_buffer->state.current_pipeline = UINT32_MAX; return VK_SUCCESS; } -- cgit v1.2.3 From 5dea9d0039f5d052fcac526d24f2b507bc943016 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 14 Jan 2016 13:18:40 -0800 Subject: anv: Document anv_cmd_state::current_pipeline It's the value of PIPELINE_SELECT.PipelineSelection. --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 4f99ec6c327..a0a952ef4e4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1061,7 +1061,7 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest, /** State required while building cmd buffer */ struct anv_cmd_state { - uint32_t current_pipeline; + uint32_t current_pipeline; /**< PIPELINE_SELECT.PipelineSelection */ uint32_t vb_dirty; anv_cmd_dirty_mask_t dirty; anv_cmd_dirty_mask_t compute_dirty; -- cgit v1.2.3 From 2eb52198ffd7b23f67bdffde787afbb28f0b28fb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 14 Jan 2016 15:18:40 -0800 Subject: vk: Fix struct field indentation --- src/vulkan/anv_private.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a0a952ef4e4..1065f8a7359 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1061,7 +1061,8 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest, /** State required while building cmd buffer */ struct anv_cmd_state { - uint32_t current_pipeline; /**< PIPELINE_SELECT.PipelineSelection */ + /* PIPELINE_SELECT.PipelineSelection */ + uint32_t current_pipeline; uint32_t vb_dirty; anv_cmd_dirty_mask_t dirty; anv_cmd_dirty_mask_t compute_dirty; -- cgit v1.2.3 From 6483d3f8fe4301544550cf7665c26ec1d8616728 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 18:40:35 -0800 Subject: nir/spirv: Fix texture return types We were just hard-coding everything to a vec4. This meant we weren't handling shadow samplers at all and integer things were getting the wrong return type. --- src/glsl/nir/spirv/spirv_to_nir.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index cdaf97294e0..54030dd1d16 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2337,6 +2337,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, return; } + struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); struct vtn_sampled_image sampled; @@ -2496,6 +2497,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->coord_components = coord_components; instr->is_array = glsl_sampler_type_is_array(sampler_type); instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + instr->is_new_style_shadow = instr->is_shadow; instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); @@ -2506,8 +2508,13 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->texture = NULL; } - nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); + nir_ssa_dest_init(&instr->instr, &instr->dest, + nir_tex_instr_dest_size(instr), NULL); + + assert(glsl_get_vector_elements(ret_type->type) == + nir_tex_instr_dest_size(instr)); + + val->ssa = vtn_create_ssa_value(b, ret_type->type); val->ssa->def = &instr->dest.ssa; nir_builder_instr_insert(&b->nb, &instr->instr); -- cgit v1.2.3 From 47af950df5782c520f47e23192acfa6b324b98fd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 18:58:25 -0800 Subject: anv/apply_pipeline_layout: Stomp texture array size to 1 --- src/vulkan/anv_nir_apply_pipeline_layout.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index 7410bd8363f..b7b8bd18ef9 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -203,6 +203,11 @@ lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) lower_tex_deref(tex, tex->sampler, &tex->sampler_index, nir_tex_src_sampler_offset, state); + /* The backend only ever uses this to mark used surfaces. We don't care + * about that little optimization so it just needs to be non-zero. + */ + tex->texture_array_size = 1; + if (tex->texture) cleanup_tex_deref(tex, tex->texture); cleanup_tex_deref(tex, tex->sampler); -- cgit v1.2.3 From e1d13cd058478d4f93ef17a49ea1a9d6a33f815b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 20:27:51 -0800 Subject: i965/fs/generator: Take an actual shader stage rather than a string --- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++-- src/mesa/drivers/dri/i965/brw_fs.h | 4 ++-- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 ++++--- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- 7 files changed, 14 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index b8990cef89e..fd23e23b6f8 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -32,7 +32,7 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, generator(brw->intelScreen->compiler, brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data), - 0, false, "BLORP") + 0, false, MESA_SHADER_FRAGMENT) { if (debug_flag) generator.enable_debug("blorp"); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 489461c6d95..4230822c2cd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5628,7 +5628,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base, - v.promoted_constants, v.runtime_check_aads_emit, "FS"); + v.promoted_constants, v.runtime_check_aads_emit, + MESA_SHADER_FRAGMENT); if (unlikely(INTEL_DEBUG & DEBUG_WM)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s", @@ -5753,7 +5754,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, } fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base, - v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); + v8.promoted_constants, v8.runtime_check_aads_emit, + MESA_SHADER_COMPUTE); if (INTEL_DEBUG & DEBUG_CS) { char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s", shader->info.label ? shader->info.label : diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index bdbfd0c4546..2dddd7c0b4f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -427,7 +427,7 @@ public: struct brw_stage_prog_data *prog_data, unsigned promoted_constants, bool runtime_check_aads_emit, - const char *stage_abbrev); + gl_shader_stage stage); ~fs_generator(); void enable_debug(const char *shader_name); @@ -539,7 +539,7 @@ private: bool runtime_check_aads_emit; bool debug_flag; const char *shader_name; - const char *stage_abbrev; + gl_shader_stage stage; void *mem_ctx; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index b097582841f..eebb485b001 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -111,14 +111,14 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data, struct brw_stage_prog_data *prog_data, unsigned promoted_constants, bool runtime_check_aads_emit, - const char *stage_abbrev) + gl_shader_stage stage) : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), key(key), prog_data(prog_data), promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), - stage_abbrev(stage_abbrev), mem_ctx(mem_ctx) + stage(stage), mem_ctx(mem_ctx) { p = rzalloc(mem_ctx, struct brw_codegen); brw_init_codegen(devinfo, p, mem_ctx); @@ -2314,7 +2314,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) "%s SIMD%d shader: %d inst, %d loops, %u cycles, " "%d:%d spills:fills, Promoted %u constants, " "compacted %d to %d bytes.", - stage_abbrev, dispatch_width, before_size / 16, + _mesa_shader_stage_to_abbrev(stage), + dispatch_width, before_size / 16, loop_count, cfg->cycle_count, spill_count, fill_count, promoted_constants, before_size, after_size); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index e405ffe5a7d..d507e99d352 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1412,7 +1412,7 @@ brw_compile_tes(const struct brw_compiler *compiler, fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, v.promoted_constants, false, - "TES"); + MESA_SHADER_TESS_EVAL); if (unlikely(INTEL_DEBUG & DEBUG_TES)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s tessellation evaluation shader %s", diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index c6a52c5d183..09db25f10da 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2052,7 +2052,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, v.promoted_constants, - v.runtime_check_aads_emit, "VS"); + v.runtime_check_aads_emit, MESA_SHADER_VERTEX); if (INTEL_DEBUG & DEBUG_VS) { const char *debug_name = ralloc_asprintf(mem_ctx, "%s vertex shader %s", diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b13d36e2c7d..b2a971a40ff 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -839,7 +839,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, fs_generator g(compiler, log_data, mem_ctx, &c.key, &prog_data->base.base, v.promoted_constants, - false, "GS"); + false, MESA_SHADER_GEOMETRY); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { const char *label = shader->info.label ? shader->info.label : "unnamed"; -- cgit v1.2.3 From 6be517b20e8de18790d717329a90439a0693f769 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 20:42:47 -0800 Subject: i965/fs: Always set hannel 2 of texture headers in some stages --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index eebb485b001..70ca7cd5a3a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -914,6 +914,22 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* Set the offset bits in DWord 2. */ brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(inst->offset)); + } else if (stage != MESA_SHADER_VERTEX && + stage != MESA_SHADER_FRAGMENT) { + /* In the vertex and fragment stages, the hardware is nice to us + * and leaves g0.2 zerod out for us so we can use it for headers. + * However, in compute, geometry, and tessellation stages, the + * hardware is not so nice. In particular, for compute shaders on + * BDW, the hardware places some debug bits in 23:15. As it + * happens, bit 15 is the alpha channel mask. This means that if + * you use a texturing instruction with a header in a compute + * shader, you may randomly get the alpha channel randomly + * disabled. Since channel masks affect the return length of the + * sampler message, this can lead the GPU to expect a different + * mlen to the one you specified in the shader (probably 4 or 8) + * and this, in turn, hangs your GPU. + */ + brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0)); } brw_adjust_sampler_state_pointer(p, header_reg, sampler_index); -- cgit v1.2.3 From 5d1c2736b6eae9bab1a66cc22197d053de3444a7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jan 2016 22:03:15 -0800 Subject: i965/fs/generator: Change a comment as per jordan's suggestion --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 70ca7cd5a3a..34d40e921cd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -916,18 +916,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_imm_ud(inst->offset)); } else if (stage != MESA_SHADER_VERTEX && stage != MESA_SHADER_FRAGMENT) { - /* In the vertex and fragment stages, the hardware is nice to us - * and leaves g0.2 zerod out for us so we can use it for headers. - * However, in compute, geometry, and tessellation stages, the - * hardware is not so nice. In particular, for compute shaders on - * BDW, the hardware places some debug bits in 23:15. As it - * happens, bit 15 is the alpha channel mask. This means that if - * you use a texturing instruction with a header in a compute - * shader, you may randomly get the alpha channel randomly - * disabled. Since channel masks affect the return length of the - * sampler message, this can lead the GPU to expect a different - * mlen to the one you specified in the shader (probably 4 or 8) - * and this, in turn, hangs your GPU. + /* The vertex and fragment stages have g0.2 set to 0, so + * header0.2 is 0 when g0 is copied. Other stages may not, so we + * must set it to 0 to avoid setting undesirable bits in the + * message. */ brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0)); } -- cgit v1.2.3 From a4b045ca44fb8210537e5a2b7c772f836be20e97 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 11:28:35 -0800 Subject: anv: Add anv_cmd_state::attachments This array contains attachment state when recording a renderpass instance. It's populated on each call to anv_cmd_buffer_set_pass. The data is currently set but unused. We'll use it later to defer each attachment clear to the subpass that first uses the attachment. --- src/vulkan/anv_cmd_buffer.c | 68 ++++++++++++++++++++++++++++++++++++++++++-- src/vulkan/anv_private.h | 18 ++++++++++++ src/vulkan/gen7_cmd_buffer.c | 1 + src/vulkan/gen8_cmd_buffer.c | 1 + 4 files changed, 86 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 689bc53c93a..065c2f64922 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -111,8 +111,10 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest, } static void -anv_cmd_state_reset(struct anv_cmd_state *state) +anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) { + struct anv_cmd_state *state = &cmd_buffer->state; + memset(&state->descriptors, 0, sizeof(state->descriptors)); memset(&state->push_constants, 0, sizeof(state->push_constants)); @@ -125,9 +127,69 @@ anv_cmd_state_reset(struct anv_cmd_state *state) state->dynamic = default_dynamic_state; state->need_query_wa = true; + if (state->attachments != NULL) { + anv_free(&cmd_buffer->pool->alloc, state->attachments); + state->attachments = NULL; + } + state->gen7.index_buffer = NULL; } +/** + * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. + */ +void +anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); + + anv_free(&cmd_buffer->pool->alloc, state->attachments); + + if (pass->attachment_count == 0) { + state->attachments = NULL; + return; + } + + state->attachments = anv_alloc(&cmd_buffer->pool->alloc, + pass->attachment_count * + sizeof(state->attachments[0]), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ + abort(); + } + + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags clear_aspects = 0; + + if (anv_format_is_color(att->format)) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + state->attachments[i].pending_clear_aspects = clear_aspects; + if (clear_aspects) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } + } +} + static VkResult anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, uint32_t size) @@ -173,6 +235,7 @@ static VkResult anv_create_cmd_buffer( cmd_buffer->device = device; cmd_buffer->pool = pool; cmd_buffer->level = level; + cmd_buffer->state.attachments = NULL; result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) @@ -237,6 +300,7 @@ anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); anv_free(&cmd_buffer->pool->alloc, cmd_buffer); } @@ -262,7 +326,7 @@ VkResult anv_ResetCommandBuffer( cmd_buffer->usage_flags = 0; cmd_buffer->state.current_pipeline = UINT32_MAX; anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - anv_cmd_state_reset(&cmd_buffer->state); + anv_cmd_state_reset(cmd_buffer); return VK_SUCCESS; } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1065f8a7359..f3232c69029 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1059,6 +1059,16 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest, const struct anv_dynamic_state *src, uint32_t copy_mask); +/** + * Attachment state when recording a renderpass instance. + * + * The clear value is valid only if there exists a pending clear. + */ +struct anv_attachment_state { + VkImageAspectFlags pending_clear_aspects; + VkClearValue clear_value; +}; + /** State required while building cmd buffer */ struct anv_cmd_state { /* PIPELINE_SELECT.PipelineSelection */ @@ -1085,6 +1095,12 @@ struct anv_cmd_state { struct anv_dynamic_state dynamic; bool need_query_wa; + /** + * Array length is anv_cmd_state::pass::attachment_count. Array content is + * valid only when recording a render pass instance. + */ + struct anv_attachment_state * attachments; + struct { struct anv_buffer * index_buffer; uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ @@ -1214,6 +1230,8 @@ void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info); void gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index e6501124139..7309b1688f5 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -816,6 +816,7 @@ void genX(CmdBeginRenderPass)( cmd_buffer->state.framebuffer = framebuffer; cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); const VkRect2D *render_area = &pRenderPassBegin->renderArea; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 65b4514d35e..0e2b3047bcd 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -813,6 +813,7 @@ void genX(CmdBeginRenderPass)( cmd_buffer->state.framebuffer = framebuffer; cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); flush_pipeline_select_3d(cmd_buffer); -- cgit v1.2.3 From 356f952f87408ee30d1d42d582d57fee41318ba7 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 11:52:23 -0800 Subject: anv/meta: Use anv_cmd_state::attachments for clears Rewrite anv_cmd_buffer_clear_attachments, which emits the top-of-pass clears, to use the data provided in anv_cmd_state::attachments. This prepares for deferring each attachment clear to the first subpass that uses the attachment. --- src/vulkan/anv_meta_clear.c | 93 ++++++++++++++++++++------------------------ src/vulkan/anv_private.h | 5 +-- src/vulkan/gen7_cmd_buffer.c | 4 +- src/vulkan/gen8_cmd_buffer.c | 4 +- 4 files changed, 47 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index cec98dbcf47..43303fbd616 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -642,69 +642,62 @@ anv_device_finish_meta_clear_state(struct anv_device *device) NULL); } -void -anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values) +/** + * At least one aspect must be specified. + */ +static void +emit_clear(struct anv_cmd_buffer *cmd_buffer, + uint32_t attachment, + VkImageAspectFlags aspects, + const VkClearValue *value) { - struct anv_meta_saved_state saved_state; + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_load_color_clear(cmd_buffer, attachment, value->color); + } else { + assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_load_depthstencil_clear(cmd_buffer, attachment, aspects, + value->depthStencil); + } +} - /* Figure out whether or not we actually need to clear anything to avoid - * trashing state when clearing is a no-op. - */ - bool needs_clear = false; - for (uint32_t a = 0; a < pass->attachment_count; ++a) { - struct anv_render_pass_attachment *att = &pass->attachments[a]; - - if (anv_format_is_color(att->format)) { - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - needs_clear = true; - break; - } - } else { - if ((att->format->depth_format && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) || - (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)) { - needs_clear = true; - break; - } +static bool +pass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + + for (uint32_t i = 0; i < cmd_state->pass->attachment_count; ++i) { + if (cmd_state->attachments[i].pending_clear_aspects) { + return true; } } - if (!needs_clear) + return false; +} + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_meta_saved_state saved_state; + + if (!pass_needs_clear(cmd_buffer)) return; meta_clear_begin(&saved_state, cmd_buffer); - if (cmd_buffer->state.framebuffer->layers > 1) + if (cmd_state->framebuffer->layers > 1) anv_finishme("clearing multi-layer framebuffer"); - for (uint32_t a = 0; a < pass->attachment_count; ++a) { - struct anv_render_pass_attachment *att = &pass->attachments[a]; + for (uint32_t a = 0; a < cmd_state->pass->attachment_count; ++a) { + if (!cmd_state->attachments[a].pending_clear_aspects) + continue; - if (anv_format_is_color(att->format)) { - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - emit_load_color_clear(cmd_buffer, a, clear_values[a].color); - } - } else { - VkImageAspectFlags clear_aspects = 0; + emit_clear(cmd_buffer, a, + cmd_state->attachments[a].pending_clear_aspects, + &cmd_state->attachments[a].clear_value); - if (att->format->depth_format && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - } - - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - - if (clear_aspects) { - emit_load_depthstencil_clear(cmd_buffer, a, clear_aspects, - clear_values[a].depthStencil); - } - } + cmd_state->attachments[a].pending_clear_aspects = 0; } meta_clear_end(&saved_state, cmd_buffer); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f3232c69029..3acf9796a5e 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1249,9 +1249,8 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, - struct anv_render_pass *pass, - const VkClearValue *clear_values); +void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer); + const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 7309b1688f5..7257d6595dd 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -830,9 +830,7 @@ void genX(CmdBeginRenderPass)( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pClearValues); - + anv_cmd_buffer_clear_attachments(cmd_buffer); gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 0e2b3047bcd..277cee08974 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -829,9 +829,7 @@ void genX(CmdBeginRenderPass)( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - anv_cmd_buffer_clear_attachments(cmd_buffer, pass, - pRenderPassBegin->pClearValues); - + anv_cmd_buffer_clear_attachments(cmd_buffer); genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses); } -- cgit v1.2.3 From f2700d665c99c45ab79c3cb42dad24c0c3c13edd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 14:09:36 -0800 Subject: anv/meta: Rename emit_load_*_clear funcs The functions will soon handle clears unrelated to VK_ATTACHMENT_LOAD_OP_CLEAR, namely vkCmdClearAttachments. So remove "load" from their name: emit_load_color_clear -> emit_color_clear emit_load_depthstencil_clear -> emit_depthstencil_clear --- src/vulkan/anv_meta_clear.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 43303fbd616..9a572ba984b 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -293,9 +293,9 @@ init_color_pipeline(struct anv_device *device) } static void -emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer, - uint32_t attachment, - VkClearColorValue clear_value) +emit_color_clear(struct anv_cmd_buffer *cmd_buffer, + uint32_t attachment, + VkClearColorValue clear_value) { struct anv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); @@ -466,10 +466,10 @@ create_depthstencil_pipeline(struct anv_device *device, } static void -emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, - uint32_t attachment, - VkImageAspectFlags aspects, - VkClearDepthStencilValue clear_value) +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + uint32_t attachment, + VkImageAspectFlags aspects, + VkClearDepthStencilValue clear_value) { struct anv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); @@ -652,12 +652,12 @@ emit_clear(struct anv_cmd_buffer *cmd_buffer, const VkClearValue *value) { if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { - emit_load_color_clear(cmd_buffer, attachment, value->color); + emit_color_clear(cmd_buffer, attachment, value->color); } else { assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); - emit_load_depthstencil_clear(cmd_buffer, attachment, aspects, - value->depthStencil); + emit_depthstencil_clear(cmd_buffer, attachment, aspects, + value->depthStencil); } } -- cgit v1.2.3 From 4c2bafb9bff8f5979c4cf31f5decdaaffdf1c2cb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 16:24:14 -0800 Subject: anv: Define zero() macro zero(x) memsets x to zero. Eliminates bugs due to errors in memset's size param. --- src/vulkan/anv_private.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 3acf9796a5e..5a31ec9c50a 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -152,6 +152,8 @@ anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) memcpy((dest), (src), (count) * sizeof(*(src))); \ }) +#define zero(x) (memset(&(x), 0, sizeof(x))) + /* Define no kernel as 1, since that's an illegal offset for a kernel */ #define NO_KERNEL 1 -- cgit v1.2.3 From 6a1a760e3c76af62d2b75dd4e54483ebb2f1fca9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 16:03:09 -0800 Subject: anv: Move MAX_* defs to top of anv_private.h Because I need to use MAX_RTS in struct anv_meta_state. --- src/vulkan/anv_private.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 5a31ec9c50a..e9c41e8d5b2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -67,6 +67,15 @@ typedef uint32_t xcb_window_t; extern "C" { #endif +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_IMAGES 8 + #define ICD_LOADER_MAGIC 0x01CDC0DE typedef union _VK_LOADER_DATA { @@ -911,15 +920,6 @@ void anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_set *set); -#define MAX_VBS 32 -#define MAX_SETS 8 -#define MAX_RTS 8 -#define MAX_VIEWPORTS 16 -#define MAX_SCISSORS 16 -#define MAX_PUSH_CONSTANTS_SIZE 128 -#define MAX_DYNAMIC_BUFFERS 16 -#define MAX_IMAGES 8 - struct anv_pipeline_binding { /* The descriptor set this surface corresponds to */ uint16_t set; -- cgit v1.2.3 From 13610c03a712fac8110aa77bf19712846e394c40 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 18:09:01 -0800 Subject: anv/meta: Name the nir shaders The names appear in debug output. --- src/vulkan/anv_meta.c | 2 ++ src/vulkan/anv_meta_clear.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3cbb3a65f22..3ddeab1ef2d 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -42,6 +42,7 @@ build_nir_vertex_shader(bool attr_flat) const struct glsl_type *vertex_type = glsl_vec4_type(); nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vertex_type, "a_pos"); @@ -74,6 +75,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); const struct glsl_type *color_type = glsl_vec4_type(); diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 9a572ba984b..5eea02380d8 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -69,6 +69,9 @@ build_color_shaders(struct nir_shader **out_vs, nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + const struct glsl_type *position_type = glsl_vec4_type(); const struct glsl_type *color_type = glsl_vec4_type(); @@ -381,6 +384,9 @@ build_depthstencil_shaders(struct nir_shader **out_vs, nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs"); + const struct glsl_type *position_type = glsl_vec4_type(); nir_variable *vs_in_pos = -- cgit v1.2.3 From 2997b0da4a71fb3019ba9234cc94ec42fa2e5641 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 18:24:18 -0800 Subject: anv: Allow override of pipeline color attachment count Add anv_graphics_pipeline_create_info::color_attachment_count. If non-negative, then it overrides the color attachment count in the pipeline's subpass. Useful for meta. (All the hacks for meta!) --- src/vulkan/anv_meta.c | 1 + src/vulkan/anv_meta_clear.c | 1 + src/vulkan/anv_pipeline.c | 13 ++++++++++--- src/vulkan/anv_private.h | 6 ++++++ 4 files changed, 18 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3ddeab1ef2d..57138fbd6a3 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -392,6 +392,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) }; const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, .use_repclear = false, .disable_viewport = true, .disable_scissor = true, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 5eea02380d8..d225e98033c 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -207,6 +207,7 @@ create_pipeline(struct anv_device *device, .subpass = 0, }, &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = MAX_RTS, .use_repclear = true, .disable_viewport = true, .disable_vs = true, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 8de889306cc..517fcb0ac3a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -341,6 +341,7 @@ populate_gs_prog_key(const struct brw_device_info *devinfo, static void populate_wm_prog_key(const struct brw_device_info *devinfo, const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, struct brw_wm_prog_key *key) { ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); @@ -361,7 +362,12 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, key->drawable_height = 0; key->render_to_fbo = false; - key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; + if (extra && extra->color_attachment_count >= 0) { + key->nr_color_regions = extra->color_attachment_count; + } else { + key->nr_color_regions = + render_pass->subpasses[info->subpass].color_count; + } key->replicate_alpha = key->nr_color_regions > 1 && info->pMultisampleState && @@ -633,6 +639,7 @@ static VkResult anv_pipeline_compile_fs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, struct anv_shader_module *module, const char *entrypoint, const VkSpecializationInfo *spec_info) @@ -642,7 +649,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; struct brw_wm_prog_key key; - populate_wm_prog_key(&pipeline->device->info, info, &key); + populate_wm_prog_key(&pipeline->device->info, info, extra, &key); if (pipeline->use_repclear) key.nr_color_regions = 1; @@ -1098,7 +1105,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pCreateInfo->pStages[i].pSpecializationInfo); break; case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, module, + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, pCreateInfo->pStages[i].pName, pCreateInfo->pStages[i].pSpecializationInfo); break; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index e9c41e8d5b2..4f208965041 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1368,6 +1368,12 @@ struct anv_pipeline { }; struct anv_graphics_pipeline_create_info { + /** + * If non-negative, overrides the color attachment count of the pipeline's + * subpass. + */ + int8_t color_attachment_count; + bool use_repclear; bool disable_viewport; bool disable_scissor; -- cgit v1.2.3 From 0679bef49f5c4c1c3f2fa20b2da090bcc9ebaed6 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 16:29:45 -0800 Subject: anv/meta: Create 8 pipelines for color clears This prepares for moving the clear ops from the start of the render pass into each subpass. Pipeline N will be used to clear color attachment N of the current subpass. Currently meta color clears still create a throwaway subpass with exactly one attachment, so currently only pipeline 0 is used. This is an ugly hack to workaround the compiler's current inability to dynamically set the render target index in the render target write message. --- src/vulkan/anv_meta_clear.c | 82 ++++++++++++++++++++++++++++++++------------- src/vulkan/anv_private.h | 11 +++++- 2 files changed, 68 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index d225e98033c..969fbe62874 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -61,7 +61,8 @@ meta_clear_end(struct anv_meta_saved_state *saved_state, static void build_color_shaders(struct nir_shader **out_vs, - struct nir_shader **out_fs) + struct nir_shader **out_fs, + uint32_t frag_output) { nir_builder vs_b; nir_builder fs_b; @@ -105,7 +106,7 @@ build_color_shaders(struct nir_shader **out_vs, nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); - fs_out_color->data.location = FRAG_RESULT_DATA0; + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); nir_copy_var(&vs_b, vs_out_color, vs_in_color); @@ -123,6 +124,7 @@ create_pipeline(struct anv_device *device, const VkPipelineDepthStencilStateCreateInfo *ds_state, const VkPipelineColorBlendStateCreateInfo *cb_state, const VkAllocationCallbacks *alloc, + bool use_repclear, struct anv_pipeline **pipeline) { VkDevice device_h = anv_device_to_handle(device); @@ -208,7 +210,7 @@ create_pipeline(struct anv_device *device, }, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = MAX_RTS, - .use_repclear = true, + .use_repclear = use_repclear, .disable_viewport = true, .disable_vs = true, .use_rectlist = true @@ -225,11 +227,12 @@ create_pipeline(struct anv_device *device, } static VkResult -init_color_pipeline(struct anv_device *device) +create_color_pipeline(struct anv_device *device, uint32_t frag_output, + struct anv_pipeline **pipeline) { struct nir_shader *vs_nir; struct nir_shader *fs_nir; - build_color_shaders(&vs_nir, &fs_nir); + build_color_shaders(&vs_nir, &fs_nir, frag_output); const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -290,10 +293,44 @@ init_color_pipeline(struct anv_device *device) }, }; + /* Disable repclear because we do not want the compiler to replace the + * shader. We need the shader to write to the specified color attachment, + * but the repclear shader writes to all color attachments. + */ return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, NULL, - &device->meta_state.clear.color_pipeline); + &cb_state, NULL, /*use_repclear*/ false, + pipeline); +} + +static VkResult +init_color_pipelines(struct anv_device *device) +{ + VkResult result; + struct anv_pipeline **pipelines = device->meta_state.clear.color_pipelines; + uint32_t n = ARRAY_SIZE(device->meta_state.clear.color_pipelines); + + zero(device->meta_state.clear.color_pipelines); + + for (uint32_t i = 0; i < n; ++i) { + result = create_color_pipeline(device, i, &pipelines[i]); + if (result < 0) + goto fail; + } + + return VK_SUCCESS; + +fail: + for (uint32_t i = 0; i < n; ++i) { + if (pipelines[i] == NULL) + break; + + anv_DestroyPipeline(anv_device_to_handle(device), + anv_pipeline_to_handle(pipelines[i]), + NULL); + } + + return result; } static void @@ -304,8 +341,8 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - VkPipeline pipeline_h = - anv_pipeline_to_handle(device->meta_state.clear.color_pipeline); + struct anv_pipeline *pipeline = device->meta_state.clear.color_pipelines[0]; + VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); const struct color_clear_vattrs vertex_data[3] = { { @@ -366,7 +403,7 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, (VkDeviceSize[]) { 0 }); - if (cmd_buffer->state.pipeline != device->meta_state.clear.color_pipeline) { + if (cmd_buffer->state.pipeline != pipeline) { ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_h); } @@ -469,7 +506,7 @@ create_depthstencil_pipeline(struct anv_device *device, }; return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, NULL, pipeline); + &cb_state, NULL, /*use_repclear*/ true, pipeline); } static void @@ -612,22 +649,15 @@ anv_device_init_meta_clear_state(struct anv_device *device) { VkResult result; - result = init_color_pipeline(device); + result = init_color_pipelines(device); if (result != VK_SUCCESS) - goto fail; + return result; result = init_depthstencil_pipelines(device); if (result != VK_SUCCESS) - goto fail_color_pipeline; + return result; return VK_SUCCESS; - - fail_color_pipeline: - anv_DestroyPipeline(anv_device_to_handle(device), - anv_pipeline_to_handle(device->meta_state.clear.color_pipeline), - NULL); - fail: - return result; } void @@ -635,9 +665,13 @@ anv_device_finish_meta_clear_state(struct anv_device *device) { VkDevice device_h = anv_device_to_handle(device); - ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.color_pipeline), - NULL); + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.clear.color_pipelines); ++i) { + ANV_CALL(DestroyPipeline)(device_h, + anv_pipeline_to_handle(device->meta_state.clear.color_pipelines[i]), + NULL); + } + ANV_CALL(DestroyPipeline)(device_h, anv_pipeline_to_handle(device->meta_state.clear.depth_only_pipeline), NULL); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 4f208965041..9e6c3bf57fc 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -563,7 +563,16 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { struct { - struct anv_pipeline *color_pipeline; + /** + * Pipeline N is used to clear color attachment N of the current + * subpass. + * + * HACK: We use one pipeline per color attachment to work around the + * compiler's inability to dynamically set the render target index of + * the render target write message. + */ + struct anv_pipeline *color_pipelines[MAX_RTS]; + struct anv_pipeline *depth_only_pipeline; struct anv_pipeline *stencil_only_pipeline; struct anv_pipeline *depthstencil_pipeline; -- cgit v1.2.3 From deb8dd89b5b211436eea2b8142a6b0acceeec6fd Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 14:47:51 -0800 Subject: anv: Emit load clears at start of each subpass This should improve cache residency for render targets. Pre-patch, vkCmdBeginRenderPass emitted all the meta clears for VK_ATTACHMENT_LOAD_OP_CLEAR before any subpass began. Post-patch, vCmdBeginRenderPass and vkCmdNextSubpass emit only the clears needed for that current subpass. --- src/vulkan/anv_meta_clear.c | 113 ++++++++++++++++++++++++++++--------------- src/vulkan/anv_private.h | 2 +- src/vulkan/gen7_cmd_buffer.c | 3 +- src/vulkan/gen8_cmd_buffer.c | 3 +- 4 files changed, 78 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 969fbe62874..39b7b968662 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -335,15 +335,21 @@ fail: static void emit_color_clear(struct anv_cmd_buffer *cmd_buffer, - uint32_t attachment, - VkClearColorValue clear_value) + const VkClearAttachment *clear_att) { struct anv_device *device = cmd_buffer->device; - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_pipeline *pipeline = device->meta_state.clear.color_pipelines[0]; + VkClearColorValue clear_value = clear_att->clearValue.color; + struct anv_pipeline *pipeline = + device->meta_state.clear.color_pipelines[clear_att->colorAttachment]; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(clear_att->colorAttachment < subpass->color_count); + const struct color_clear_vattrs vertex_data[3] = { { .vue_header = { 0 }, @@ -372,13 +378,6 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, .offset = state.offset, }; - anv_cmd_buffer_begin_subpass(cmd_buffer, - &(struct anv_subpass) { - .color_count = 1, - .color_attachments = (uint32_t[]) { attachment }, - .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, - }); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, (VkViewport[]) { { @@ -511,13 +510,22 @@ create_depthstencil_pipeline(struct anv_device *device, static void emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, - uint32_t attachment, - VkImageAspectFlags aspects, - VkClearDepthStencilValue clear_value) + const VkClearAttachment *clear_att) { struct anv_device *device = cmd_buffer->device; - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + uint32_t attachment = subpass->depth_stencil_attachment; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || + aspects == VK_IMAGE_ASPECT_STENCIL_BIT || + aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + assert(attachment != VK_ATTACHMENT_UNUSED); const struct depthstencil_clear_vattrs vertex_data[3] = { { @@ -544,12 +552,6 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, .offset = state.offset, }; - anv_cmd_buffer_begin_subpass(cmd_buffer, - &(struct anv_subpass) { - .color_count = 0, - .depth_stencil_attachment = attachment, - }); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, (VkViewport[]) { { @@ -684,45 +686,54 @@ anv_device_finish_meta_clear_state(struct anv_device *device) } /** - * At least one aspect must be specified. + * The parameters mean that same as those in vkCmdClearAttachments. */ static void emit_clear(struct anv_cmd_buffer *cmd_buffer, - uint32_t attachment, - VkImageAspectFlags aspects, - const VkClearValue *value) + const VkClearAttachment *clear_att) { - if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { - emit_color_clear(cmd_buffer, attachment, value->color); + if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_color_clear(cmd_buffer, clear_att); } else { - assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - emit_depthstencil_clear(cmd_buffer, attachment, aspects, - value->depthStencil); + assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_depthstencil_clear(cmd_buffer, clear_att); } } static bool -pass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) { const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; - for (uint32_t i = 0; i < cmd_state->pass->attachment_count; ++i) { - if (cmd_state->attachments[i].pending_clear_aspects) { + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (cmd_state->attachments[a].pending_clear_aspects) { return true; } } + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + return true; + } + return false; } +/** + * Emit any pending attachment clears for the current subpass. + * + * @see anv_attachment_state::pending_clear_aspects + */ void -anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer) +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) { struct anv_cmd_state *cmd_state = &cmd_buffer->state; struct anv_meta_saved_state saved_state; - if (!pass_needs_clear(cmd_buffer)) + if (!subpass_needs_clear(cmd_buffer)) return; meta_clear_begin(&saved_state, cmd_buffer); @@ -730,17 +741,39 @@ anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer) if (cmd_state->framebuffer->layers > 1) anv_finishme("clearing multi-layer framebuffer"); - for (uint32_t a = 0; a < cmd_state->pass->attachment_count; ++a) { + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (!cmd_state->attachments[a].pending_clear_aspects) continue; - emit_clear(cmd_buffer, a, - cmd_state->attachments[a].pending_clear_aspects, - &cmd_state->attachments[a].clear_value); + assert(cmd_state->attachments[a].pending_clear_aspects == + VK_IMAGE_ASPECT_COLOR_BIT); + VkClearAttachment clear_att = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, /* Use attachment index relative to subpass */ + .clearValue = cmd_state->attachments[a].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att); cmd_state->attachments[a].pending_clear_aspects = 0; } + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + + VkClearAttachment clear_att = { + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, + .clearValue = cmd_state->attachments[ds].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att); + cmd_state->attachments[ds].pending_clear_aspects = 0; + } + meta_clear_end(&saved_state, cmd_buffer); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 9e6c3bf57fc..a76e78aa8e4 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1260,7 +1260,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 7257d6595dd..bea0abea1cc 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -830,8 +830,8 @@ void genX(CmdBeginRenderPass)( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - anv_cmd_buffer_clear_attachments(cmd_buffer); gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); } void genX(CmdNextSubpass)( @@ -843,6 +843,7 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); } void genX(CmdEndRenderPass)( diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 277cee08974..10bc8f7bf54 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -829,8 +829,8 @@ void genX(CmdBeginRenderPass)( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - anv_cmd_buffer_clear_attachments(cmd_buffer); genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); } void genX(CmdNextSubpass)( @@ -842,6 +842,7 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); } void genX(CmdEndRenderPass)( -- cgit v1.2.3 From 11f543371599823bbc8b49cf44d6fb012b78ad3a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 14 Jan 2016 15:18:20 -0800 Subject: anv: Distinguish between subpass setup and subpass start vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all *setup* the command buffer for recording commands for some subpass. But only the first two, vkCmdBeginRenderPass and vkCmdNextSubpass, can *start* a subpass. Therefore, calling anv_cmd_buffer_begin_subpass() inside vkCmdBeginCommandBuffer is misleading. Clarify its purpose by renaming it to anv_cmd_buffer_set_subpass() and adding comments. --- src/vulkan/anv_cmd_buffer.c | 24 ++++++++++++++++++------ src/vulkan/anv_private.h | 11 +++++------ src/vulkan/gen7_cmd_buffer.c | 11 +++++++---- src/vulkan/gen8_cmd_buffer.c | 11 +++++++---- 4 files changed, 37 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 065c2f64922..070b8490e32 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -386,7 +386,7 @@ VkResult anv_BeginCommandBuffer( struct anv_subpass *subpass = &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; - anv_cmd_buffer_begin_subpass(cmd_buffer, subpass); + anv_cmd_buffer_set_subpass(cmd_buffer, subpass); } anv_cmd_buffer_emit_state_base_address(cmd_buffer); @@ -954,19 +954,31 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, return state; } +/** + * @brief Setup the command buffer for recording commands inside the given + * subpass. + * + * This does not record all commands needed for starting the subpass. + * Starting the subpass may require additional commands. + * + * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer + * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the + * command buffer for recording commands for some subpass. But only the first + * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. + */ void -anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { switch (cmd_buffer->device->info.gen) { case 7: - gen7_cmd_buffer_begin_subpass(cmd_buffer, subpass); + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); break; case 8: - gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); + gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); break; case 9: - gen9_cmd_buffer_begin_subpass(cmd_buffer, subpass); + gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); break; default: unreachable("unsupported gen\n"); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a76e78aa8e4..e8ac70dc498 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1243,15 +1243,14 @@ void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo *info); -void gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, +void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); -void gen9_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, +void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); - -void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, +void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); struct anv_state diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index bea0abea1cc..b83bfdadbae 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -794,9 +794,12 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); } +/** + * @see anv_cmd_buffer_set_subpass() + */ GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { cmd_buffer->state.subpass = subpass; cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; @@ -830,7 +833,7 @@ void genX(CmdBeginRenderPass)( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); + gen7_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses); anv_cmd_buffer_clear_subpass(cmd_buffer); } @@ -842,7 +845,7 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); + gen7_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); anv_cmd_buffer_clear_subpass(cmd_buffer); } diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 10bc8f7bf54..e2bbd94ddf8 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -791,9 +791,12 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); } +/** + * @see anv_cmd_buffer_set_subpass() + */ void -genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { cmd_buffer->state.subpass = subpass; @@ -829,7 +832,7 @@ void genX(CmdBeginRenderPass)( .DrawingRectangleOriginY = 0, .DrawingRectangleOriginX = 0); - genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses); + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); anv_cmd_buffer_clear_subpass(cmd_buffer); } @@ -841,7 +844,7 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); anv_cmd_buffer_clear_subpass(cmd_buffer); } -- cgit v1.2.3 From 0038ae2e4a6f177da076a9b2f10944d54bc9289e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 18:34:20 -0800 Subject: anv/meta: Add VkClearRect param to emit_clear() Prepares for vkCmdClearAttachments. --- src/vulkan/anv_meta_clear.c | 57 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 39b7b968662..722751cda80 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -335,7 +335,8 @@ fail: static void emit_color_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att) + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) { struct anv_device *device = cmd_buffer->device; const struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -353,17 +354,26 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, const struct color_clear_vattrs vertex_data[3] = { { .vue_header = { 0 }, - .position = { 0.0, 0.0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, .color = clear_value, }, { .vue_header = { 0 }, - .position = { fb->width, 0.0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, .color = clear_value, }, { .vue_header = { 0 }, - .position = { fb->width, fb->height }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, .color = clear_value, }, }; @@ -510,7 +520,8 @@ create_depthstencil_pipeline(struct anv_device *device, static void emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att) + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) { struct anv_device *device = cmd_buffer->device; const struct anv_subpass *subpass = cmd_buffer->state.subpass; @@ -530,15 +541,24 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, const struct depthstencil_clear_vattrs vertex_data[3] = { { .vue_header = { 0 }, - .position = { 0.0, 0.0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, }, { .vue_header = { 0 }, - .position = { fb->width, 0.0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, }, { .vue_header = { 0 }, - .position = { fb->width, fb->height }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, }, }; @@ -690,14 +710,15 @@ anv_device_finish_meta_clear_state(struct anv_device *device) */ static void emit_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att) + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) { if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - emit_color_clear(cmd_buffer, clear_att); + emit_color_clear(cmd_buffer, clear_att, clear_rect); } else { assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); - emit_depthstencil_clear(cmd_buffer, clear_att); + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); } } @@ -731,6 +752,7 @@ void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) { struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_meta_saved_state saved_state; if (!subpass_needs_clear(cmd_buffer)) @@ -741,6 +763,15 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) if (cmd_state->framebuffer->layers > 1) anv_finishme("clearing multi-layer framebuffer"); + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { uint32_t a = cmd_state->subpass->color_attachments[i]; @@ -756,7 +787,7 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) .clearValue = cmd_state->attachments[a].clear_value, }; - emit_clear(cmd_buffer, &clear_att); + emit_clear(cmd_buffer, &clear_att, &clear_rect); cmd_state->attachments[a].pending_clear_aspects = 0; } @@ -770,7 +801,7 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) .clearValue = cmd_state->attachments[ds].clear_value, }; - emit_clear(cmd_buffer, &clear_att); + emit_clear(cmd_buffer, &clear_att, &clear_rect); cmd_state->attachments[ds].pending_clear_aspects = 0; } -- cgit v1.2.3 From e4b17a2e1a9f1b8536bed6faca9d8e62483fab35 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 13 Jan 2016 18:37:27 -0800 Subject: anv/meta: Implement vkCmdClearAttachments --- src/vulkan/anv_meta_clear.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 722751cda80..b7f2f18b994 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -933,5 +933,19 @@ void anv_CmdClearAttachments( uint32_t rectCount, const VkClearRect* pRects) { - stub(); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + /* FINISHME: We can do better than this dumb loop. It thrashes too much + * state. + */ + for (uint32_t a = 0; a < attachmentCount; ++a) { + for (uint32_t r = 0; r < rectCount; ++r) { + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); + } + } + + meta_clear_end(&saved_state, cmd_buffer); } -- cgit v1.2.3 From 842b424d3b2fb75c3abd928869dc0cb2935a1bed Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 14 Jan 2016 20:09:38 -0800 Subject: anv/meta: Implement vkCmdClearDepthStencilImage --- src/vulkan/anv_meta_clear.c | 117 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index b7f2f18b994..19df58bee4e 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -916,14 +916,125 @@ void anv_CmdClearColorImage( } void anv_CmdClearDepthStencilImage( - VkCommandBuffer commandBuffer, - VkImage image, + VkCommandBuffer cmd_buffer_h, + VkImage image_h, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - stub(); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + + meta_clear_begin(&saved_state, cmd_buffer); + + for (uint32_t r = 0; r < rangeCount; r++) { + const VkImageSubresourceRange *range = &pRanges[r]; + + for (uint32_t l = 0; l < range->levelCount; ++l) { + for (uint32_t s = 0; s < range->layerCount; ++s) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = image_h, + .viewType = anv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer); + + VkFramebuffer fb; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentLoadOp depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + depth_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; + + VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + stencil_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; + + VkRenderPass pass; + anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = iview.vk_format, + .loadOp = depth_load_op, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = stencil_load_op, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = imageLayout, + .finalLayout = imageLayout, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = 0, + .layout = imageLayout, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + &cmd_buffer->pool->alloc, + &pass); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 1, + .pClearValues = (VkClearValue[]) { + { .depthStencil = *pDepthStencil }, + }, + }, + VK_SUBPASS_CONTENTS_INLINE); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + /* XXX: We're leaking the render pass and framebuffer */ + } + } + } + + meta_clear_end(&saved_state, cmd_buffer); } void anv_CmdClearAttachments( -- cgit v1.2.3 From 1afe33f8b30d66ac9952e996cb3ce132c55c534f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 14 Jan 2016 21:29:46 -0800 Subject: anv/gen8: Fix SF_CLIP_VIEWPORT's Z elements SF_CLIP_VIEWPORT does not clamp Z values. It only scales and shifts them. Clamping to VkViewport::minDepth,maxDepth is instead handled by CC_VIEWPORT. Fixes dEQP-VK.renderpass.simple.depth on Broadwell. --- src/vulkan/gen8_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index e2bbd94ddf8..9c1b6e51b07 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -88,10 +88,10 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { .ViewportMatrixElementm00 = vp->width / 2, .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm22 = 1.0, .ViewportMatrixElementm30 = vp->x + vp->width / 2, .ViewportMatrixElementm31 = vp->y + vp->height / 2, - .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .ViewportMatrixElementm32 = 0.0, .XMinClipGuardband = -1.0f, .XMaxClipGuardband = 1.0f, .YMinClipGuardband = -1.0f, -- cgit v1.2.3 From 482a1f5eab1b0ec4e15833ba192adbc237d30f34 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 15 Jan 2016 07:43:31 -0800 Subject: anv/meta: Reuse code for vkCmdClear{Color,DepthStencil}Image The two function bodies were very similar. Move common code to anv_cmd_clear_image(). Fixes all 'dEQP-VK.renderpass.formats.*' on Skylake. --- src/vulkan/anv_meta_clear.c | 228 +++++++++++++++----------------------------- 1 file changed, 79 insertions(+), 149 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 19df58bee4e..fd401bc968e 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -808,120 +808,13 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) meta_clear_end(&saved_state, cmd_buffer); } -void anv_CmdClearColorImage( - VkCommandBuffer commandBuffer, - VkImage _image, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, image, _image); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - for (uint32_t r = 0; r < rangeCount; r++) { - for (uint32_t l = 0; l < pRanges[r].levelCount; l++) { - for (uint32_t s = 0; s < pRanges[r].layerCount; s++) { - struct anv_image_view iview; - anv_image_view_init(&iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = _image, - .viewType = anv_meta_get_view_type(image), - .format = image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRanges[r].baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = pRanges[r].baseArrayLayer + s, - .layerCount = 1 - }, - }, - cmd_buffer); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&iview), - }, - .width = iview.extent.width, - .height = iview.extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - VkRenderPass pass; - anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = iview.vk_format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &cmd_buffer->pool->alloc, &pass); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderArea = { - .offset = { 0, 0, }, - .extent = { - .width = iview.extent.width, - .height = iview.extent.height, - }, - }, - .renderPass = pass, - .framebuffer = fb, - .clearValueCount = 1, - .pClearValues = (VkClearValue[]) { - { .color = *pColor }, - }, - }, VK_SUBPASS_CONTENTS_INLINE); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* XXX: We're leaking the render pass and framebuffer */ - } - } - } - - meta_clear_end(&saved_state, cmd_buffer); -} - -void anv_CmdClearDepthStencilImage( - VkCommandBuffer cmd_buffer_h, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) +static void +anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, + VkImage image_h, + VkImageLayout image_layout, + const VkClearValue *clear_value, + uint32_t range_count, + const VkImageSubresourceRange *ranges) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); ANV_FROM_HANDLE(anv_image, image, image_h); @@ -930,8 +823,8 @@ void anv_CmdClearDepthStencilImage( meta_clear_begin(&saved_state, cmd_buffer); - for (uint32_t r = 0; r < rangeCount; r++) { - const VkImageSubresourceRange *range = &pRanges[r]; + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; for (uint32_t l = 0; l < range->levelCount; ++l) { for (uint32_t s = 0; s < range->layerCount; ++s) { @@ -967,43 +860,56 @@ void anv_CmdClearDepthStencilImage( &cmd_buffer->pool->alloc, &fb); - VkAttachmentLoadOp depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; - if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) - depth_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; - - VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; - if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) - stencil_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + att_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; + } else { + if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + att_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + } + if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + att_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + } + + subpass_desc.pDepthStencilAttachment = &att_ref; + } VkRenderPass pass; anv_CreateRenderPass(device_h, &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = iview.vk_format, - .loadOp = depth_load_op, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = stencil_load_op, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = imageLayout, - .finalLayout = imageLayout, - }, + .pAttachments = &att_desc, .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 0, - .pColorAttachments = NULL, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = 0, - .layout = imageLayout, - }, - .preserveAttachmentCount = 0, - .pPreserveAttachments = NULL, - }, - .dependencyCount = 0, + .pSubpasses = &subpass_desc, }, &cmd_buffer->pool->alloc, &pass); @@ -1021,9 +927,7 @@ void anv_CmdClearDepthStencilImage( .renderPass = pass, .framebuffer = fb, .clearValueCount = 1, - .pClearValues = (VkClearValue[]) { - { .depthStencil = *pDepthStencil }, - }, + .pClearValues = clear_value, }, VK_SUBPASS_CONTENTS_INLINE); @@ -1037,6 +941,32 @@ void anv_CmdClearDepthStencilImage( meta_clear_end(&saved_state, cmd_buffer); } +void anv_CmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + anv_cmd_clear_image(commandBuffer, image, imageLayout, + (const VkClearValue *) pColor, + rangeCount, pRanges); +} + +void anv_CmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + anv_cmd_clear_image(commandBuffer, image, imageLayout, + (const VkClearValue *) pDepthStencil, + rangeCount, pRanges); +} + void anv_CmdClearAttachments( VkCommandBuffer commandBuffer, uint32_t attachmentCount, -- cgit v1.2.3 From eab6212efd36fadc76e7f6b451d4ecb79b867c3e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 15 Jan 2016 10:07:16 -0800 Subject: anv/meta: Stop leaking renderpass and framebuffer --- src/vulkan/anv_meta_clear.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index fd401bc968e..34087804075 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -932,8 +932,10 @@ anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, VK_SUBPASS_CONTENTS_INLINE); ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); - - /* XXX: We're leaking the render pass and framebuffer */ + ANV_CALL(DestroyRenderPass)(device_h, pass, + &cmd_buffer->pool->alloc); + ANV_CALL(DestroyFramebuffer)(device_h, fb, + &cmd_buffer->pool->alloc); } } } -- cgit v1.2.3 From 0e420cb67f8eb540bb726cd004f2b4e4fc78af58 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 15 Jan 2016 10:54:05 -0800 Subject: anv: Populate SURFACE_STATE more safely genX_image_view_init allocates up to 3 separate SURFACE_STATE structures, and populates each from a single template. Stop mutating the template between each final SURFACE_STATE. --- src/vulkan/gen7_state.c | 8 +++++++- src/vulkan/gen8_state.c | 34 ++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 09c1332e450..b24e484262a 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -173,7 +173,7 @@ genX(image_view_init)(struct anv_image_view *iview, const struct isl_extent3d image_align_sa = isl_surf_get_image_alignment_sa(&surface->isl); - struct GENX(RENDER_SURFACE_STATE) surface_state = { + const struct GENX(RENDER_SURFACE_STATE) template = { .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, .SurfaceFormat = iview->format, @@ -227,6 +227,8 @@ genX(image_view_init)(struct anv_image_view *iview, }; if (image->needs_nonrt_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = template; + iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); surface_state.RenderCacheReadWriteMode = false; @@ -248,6 +250,8 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_color_rt_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = template; + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); surface_state.RenderCacheReadWriteMode = 0; /* Write only */ @@ -270,6 +274,8 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_storage_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = template; + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); surface_state.SurfaceType = diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index b6741e005d3..9fa2a0554b9 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -187,7 +187,7 @@ genX(image_view_init)(struct anv_image_view *iview, uint32_t halign, valign; get_halign_valign(&surface->isl, &halign, &valign); - struct GENX(RENDER_SURFACE_STATE) surface_state = { + struct GENX(RENDER_SURFACE_STATE) template = { .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), .SurfaceArray = image->array_size > 1, .SurfaceFormat = iview->format, @@ -237,10 +237,10 @@ genX(image_view_init)(struct anv_image_view *iview, .SurfaceBaseAddress = { NULL, iview->offset }, }; - switch (surface_state.SurfaceType) { + switch (template.SurfaceType) { case SURFTYPE_1D: case SURFTYPE_2D: - surface_state.MinimumArrayElement = range->baseArrayLayer; + template.MinimumArrayElement = range->baseArrayLayer; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: * @@ -251,37 +251,37 @@ genX(image_view_init)(struct anv_image_view *iview, * * In other words, 'Depth' is the number of array layers. */ - surface_state.Depth = range->layerCount - 1; + template.Depth = range->layerCount - 1; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: * * For Render Target and Typed Dataport 1D and 2D Surfaces: * This field must be set to the same value as the Depth field. */ - surface_state.RenderTargetViewExtent = surface_state.Depth; + template.RenderTargetViewExtent = template.Depth; break; case SURFTYPE_CUBE: #if ANV_GENx10 >= 90 /* Like SURFTYPE_2D, but divided by 6. */ - surface_state.MinimumArrayElement = range->baseArrayLayer / 6; - surface_state.Depth = range->layerCount / 6 - 1; - surface_state.RenderTargetViewExtent = surface_state.Depth; + template.MinimumArrayElement = range->baseArrayLayer / 6; + template.Depth = range->layerCount / 6 - 1; + template.RenderTargetViewExtent = template.Depth; #else /* Same as SURFTYPE_2D */ - surface_state.MinimumArrayElement = range->baseArrayLayer; - surface_state.Depth = range->layerCount - 1; - surface_state.RenderTargetViewExtent = surface_state.Depth; + template.MinimumArrayElement = range->baseArrayLayer; + template.Depth = range->layerCount - 1; + template.RenderTargetViewExtent = template.Depth; #endif break; case SURFTYPE_3D: - surface_state.MinimumArrayElement = range->baseArrayLayer; + template.MinimumArrayElement = range->baseArrayLayer; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: * * If the volume texture is MIP-mapped, this field specifies the * depth of the base MIP level. */ - surface_state.Depth = image->extent.depth - 1; + template.Depth = image->extent.depth - 1; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: * @@ -289,13 +289,15 @@ genX(image_view_init)(struct anv_image_view *iview, * indicates the extent of the accessible 'R' coordinates minus 1 on * the LOD currently being rendered to. */ - surface_state.RenderTargetViewExtent = iview->extent.depth - 1; + template.RenderTargetViewExtent = iview->extent.depth - 1; break; default: unreachable(!"bad SurfaceType"); } if (image->needs_nonrt_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = template; + iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); @@ -315,6 +317,8 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_color_rt_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = template; + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); @@ -336,6 +340,8 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_storage_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = template; + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); -- cgit v1.2.3 From 117cac75d07c5f0022cd2940162f5d239c3bed3b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Jan 2016 11:10:58 -0800 Subject: nir/spirv: Stop trusting the SPIR-V for the number of texture coordinates --- src/glsl/nir/spirv/spirv_to_nir.c | 42 ++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 54030dd1d16..dc95b40f9c3 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2355,7 +2355,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, unsigned idx = 4; - unsigned coord_components = 0; + bool has_coord = false; switch (opcode) { case SpvOpImageSampleImplicitLod: case SpvOpImageSampleExplicitLod: @@ -2371,7 +2371,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, case SpvOpImageQueryLod: { /* All these types have the coordinate as their first real argument */ struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); - coord_components = glsl_get_vector_elements(coord->type); + has_coord = true; p->src = nir_src_for_ssa(coord->def); p->src_type = nir_tex_src_coord; p++; @@ -2478,10 +2478,41 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, assert(idx == count); nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + instr->op = texop; + + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); const struct glsl_type *sampler_type = nir_deref_tail(&sampled.sampler->deref)->type; instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + instr->is_new_style_shadow = instr->is_shadow; + + if (has_coord) { + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + instr->coord_components = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + instr->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + case GLSL_SAMPLER_DIM_MS: + instr->coord_components = 3; + break; + default: + assert("Invalid sampler type"); + } + + if (instr->is_array) + instr->coord_components++; + } else { + instr->coord_components = 0; + } switch (glsl_get_sampler_result_type(sampler_type)) { case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; @@ -2492,13 +2523,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, unreachable("Invalid base type for sampler result"); } - instr->op = texop; - memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); - instr->coord_components = coord_components; - instr->is_array = glsl_sampler_type_is_array(sampler_type); - instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); - instr->is_new_style_shadow = instr->is_shadow; - instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); if (sampled.image) { -- cgit v1.2.3 From 67bf74f020a3f9c08ad82d5578d4979a70940f67 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Jan 2016 15:48:18 -0800 Subject: anv/batch_chain: Don't call current_batch_bo() again We call it once at the top of the function and then hold on to the pointer. It shouldn't have changed, so there's no reason to query for it again. --- src/vulkan/anv_batch_chain.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 466a3624234..9c80f5e3b03 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -667,8 +667,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * actual ExecuteCommands implementation. */ if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && - (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < - ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { /* If the secondary has exactly one batch buffer in its list *and* * that batch buffer is less than half of the maximum size, we're * probably better of simply copying it into our batch. @@ -683,7 +682,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * so we can unconditionally decrement right before adding the * MI_BATCH_BUFFER_START command. */ - anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; + batch_bo->relocs.num_relocs++; cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; } else { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; -- cgit v1.2.3 From 6b64dddd7118e75dbb9d6aa4917b296f384aba89 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Jan 2016 15:59:58 -0800 Subject: anv/batch_chain: Remove padding from the BO before emitting BUFFER_END --- src/vulkan/anv_batch_chain.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 9c80f5e3b03..89215fe6992 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -649,6 +649,15 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* When we start a batch buffer, we subtract a certain amount of + * padding from the end to ensure that we always have room to emit a + * BATCH_BUFFER_START to chain to the next BO. We need to remove + * that padding before we end the batch; otherwise, we may end up + * with our BATCH_BUFFER_END in another BO. + */ + cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); /* Round batch up to an even number of dwords. */ -- cgit v1.2.3 From f509a890828dc54ffc113a581147ccfa8408b082 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Jan 2016 16:15:50 -0800 Subject: nir/lower_system_values: Lower vertexID to id+base if needed --- src/glsl/nir/nir.h | 3 +++ src/glsl/nir/nir_lower_system_values.c | 10 ++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + 3 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 3d2d280db12..725703d0588 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1534,6 +1534,9 @@ typedef struct nir_shader_compiler_options { * are simulated by floats.) */ bool native_integers; + + /* Indicates that the driver only has zero-based vertex id */ + bool vertex_id_zero_based; } nir_shader_compiler_options; typedef struct nir_shader_info { diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index f642c38561a..69d0554fbca 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -103,6 +103,16 @@ convert_block(nir_block *block, void *void_state) break; } + case SYSTEM_VALUE_VERTEX_ID: + if (b->shader->options->vertex_id_zero_based) { + sysval = nir_iadd(b, + nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0), + nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0)); + } else { + sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0); + } + break; + default: { nir_intrinsic_op sysval_op = nir_intrinsic_from_system_value(var->data.location); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d507e99d352..fec96bac923 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -96,6 +96,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) nir_shader_compiler_options *nir_options = rzalloc(compiler, nir_shader_compiler_options); nir_options->native_integers = true; + nir_options->vertex_id_zero_based = true; nir_options->lower_fdiv = true; /* In order to help allow for better CSE at the NIR level we tell NIR * to split all ffma instructions during opt_algebraic and we then -- cgit v1.2.3 From b1f1200e80f70105dc4b3e44a0c0ea447bd7891d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 16 Jan 2016 21:26:10 -0800 Subject: util/bitset: Allow iterating over const bitsets --- src/util/bitset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/util/bitset.h b/src/util/bitset.h index c452819414f..2404ce7f630 100644 --- a/src/util/bitset.h +++ b/src/util/bitset.h @@ -98,7 +98,7 @@ __bitset_ffs(const BITSET_WORD *x, int n) static inline unsigned __bitset_next_set(unsigned i, BITSET_WORD *tmp, - BITSET_WORD *set, unsigned size) + const BITSET_WORD *set, unsigned size) { unsigned bit, word; -- cgit v1.2.3 From 8aab4a7bd2c8966dd5f24ed81b7c96074ed0eb97 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 08:38:57 -0800 Subject: nir: Add a phi node placement helper Right now, we have phi placement code in two places and there are other places where it would be nice to be able to do this analysis. Instead of repeating it all over the place, this commit adds a helper for placing all of the needed phi nodes for a value. --- src/glsl/Makefile.sources | 2 + src/glsl/nir/nir_phi_builder.c | 254 +++++++++++++++++++++++++++++++++++++++++ src/glsl/nir/nir_phi_builder.h | 84 ++++++++++++++ 3 files changed, 340 insertions(+) create mode 100644 src/glsl/nir/nir_phi_builder.c create mode 100644 src/glsl/nir/nir_phi_builder.h (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 80d21a0718d..202f2fd4f5a 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -74,6 +74,8 @@ NIR_FILES = \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_opt_undef.c \ + nir/nir_phi_builder.c \ + nir/nir_phi_builder.h \ nir/nir_print.c \ nir/nir_remove_dead_variables.c \ nir/nir_search.c \ diff --git a/src/glsl/nir/nir_phi_builder.c b/src/glsl/nir/nir_phi_builder.c new file mode 100644 index 00000000000..5429083e5c8 --- /dev/null +++ b/src/glsl/nir/nir_phi_builder.c @@ -0,0 +1,254 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_phi_builder.h" +#include "nir/nir_vla.h" + +struct nir_phi_builder { + nir_shader *shader; + nir_function_impl *impl; + + /* Copied from the impl for easy access */ + unsigned num_blocks; + + /* Array of all blocks indexed by block->index. */ + nir_block **blocks; + + /* Hold on to the values so we can easily iterate over them. */ + struct exec_list values; + + /* Worklist for phi adding */ + unsigned iter_count; + unsigned *work; + nir_block **W; +}; + +#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1) + +struct nir_phi_builder_value { + struct exec_node node; + + struct nir_phi_builder *builder; + + /* Needed so we can create phis and undefs */ + unsigned num_components; + + /* The list of phi nodes associated with this value. Phi nodes are not + * added directly. Instead, they are created, the instr->block pointer + * set, and then added to this list. Later, in phi_builder_finish, we + * set up their sources and add them to the top of their respective + * blocks. + */ + struct exec_list phis; + + /* Array of SSA defs, indexed by block. If a phi needs to be inserted + * in a given block, it will have the magic value NEEDS_PHI. + */ + nir_ssa_def *defs[0]; +}; + +static bool +fill_block_array(nir_block *block, void *void_data) +{ + nir_block **blocks = void_data; + blocks[block->index] = block; + return true; +} + +struct nir_phi_builder * +nir_phi_builder_create(nir_function_impl *impl) +{ + struct nir_phi_builder *pb = ralloc(NULL, struct nir_phi_builder); + + pb->shader = impl->function->shader; + pb->impl = impl; + + assert(impl->valid_metadata & (nir_metadata_block_index | + nir_metadata_dominance)); + + pb->num_blocks = impl->num_blocks; + pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks); + nir_foreach_block(impl, fill_block_array, pb->blocks); + + exec_list_make_empty(&pb->values); + + pb->iter_count = 0; + pb->work = rzalloc_array(pb, unsigned, pb->num_blocks); + pb->W = ralloc_array(pb, nir_block *, pb->num_blocks); + + return pb; +} + +struct nir_phi_builder_value * +nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, + const BITSET_WORD *defs) +{ + struct nir_phi_builder_value *val; + unsigned i, w_start = 0, w_end = 0; + + val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks); + val->builder = pb; + val->num_components = num_components; + exec_list_make_empty(&val->phis); + exec_list_push_tail(&pb->values, &val->node); + + pb->iter_count++; + + BITSET_WORD tmp; + BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) { + if (pb->work[i] < pb->iter_count) + pb->W[w_end++] = pb->blocks[i]; + pb->work[i] = pb->iter_count; + } + + while (w_start != w_end) { + nir_block *cur = pb->W[w_start++]; + struct set_entry *dom_entry; + set_foreach(cur->dom_frontier, dom_entry) { + nir_block *next = (nir_block *) dom_entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == pb->impl->end_block) + continue; + + if (val->defs[next->index] == NULL) { + val->defs[next->index] = NEEDS_PHI; + + if (pb->work[next->index] < pb->iter_count) { + pb->work[next->index] = pb->iter_count; + pb->W[w_end++] = next; + } + } + } + } + + return val; +} + +void +nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, + nir_block *block, nir_ssa_def *def) +{ + val->defs[block->index] = def; +} + +nir_ssa_def * +nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, + nir_block *block) +{ + if (val->defs[block->index] == NULL) { + if (block->imm_dom) { + /* Grab it from our immediate dominator. We'll stash it here for + * easy access later. + */ + val->defs[block->index] = + nir_phi_builder_value_get_block_def(val, block->imm_dom); + return val->defs[block->index]; + } else { + /* No immediate dominator means that this block is either the + * start block or unreachable. In either case, the value is + * undefined so we need an SSA undef. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(val->builder->shader, + val->num_components); + nir_instr_insert(nir_before_cf_list(&val->builder->impl->body), + &undef->instr); + val->defs[block->index] = &undef->def; + return &undef->def; + } + } else if (val->defs[block->index] == NEEDS_PHI) { + /* If we need a phi instruction, go ahead and create one but don't + * add it to the program yet. Later, we'll go through and set up phi + * sources and add the instructions will be added at that time. + */ + nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL); + phi->instr.block = block; + exec_list_push_tail(&val->phis, &phi->instr.node); + val->defs[block->index] = &phi->dest.ssa; + return &phi->dest.ssa; + } else { + return val->defs[block->index]; + } +} + +static int +compare_blocks(const void *_a, const void *_b) +{ + nir_block * const * a = _a; + nir_block * const * b = _b; + + return (*a)->index - (*b)->index; +} + +void +nir_phi_builder_finish(struct nir_phi_builder *pb) +{ + const unsigned num_blocks = pb->num_blocks; + NIR_VLA(nir_block *, preds, num_blocks); + + foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) { + /* We can't iterate over the list of phis normally because we are + * removing them as we go and, in some cases, adding new phis as we + * build the source lists of others. + */ + while (!exec_list_is_empty(&val->phis)) { + struct exec_node *head = exec_list_get_head(&val->phis); + nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node); + assert(phi->instr.type == nir_instr_type_phi); + + exec_node_remove(&phi->instr.node); + + /* Construct an array of predecessors. We sort it to ensure + * determinism in the phi insertion algorithm. + * + * XXX: Calling qsort this many times seems expensive. + */ + int num_preds = 0; + struct set_entry *entry; + set_foreach(phi->instr.block->predecessors, entry) + preds[num_preds++] = (nir_block *)entry->key; + qsort(preds, num_preds, sizeof(*preds), compare_blocks); + + for (unsigned i = 0; i < num_preds; i++) { + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = preds[i]; + src->src = nir_src_for_ssa( + nir_phi_builder_value_get_block_def(val, preds[i])); + exec_list_push_tail(&phi->srcs, &src->node); + } + + nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr); + } + } + + ralloc_free(pb); +} diff --git a/src/glsl/nir/nir_phi_builder.h b/src/glsl/nir/nir_phi_builder.h new file mode 100644 index 00000000000..50251bf1ba3 --- /dev/null +++ b/src/glsl/nir/nir_phi_builder.h @@ -0,0 +1,84 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir.h" + +struct nir_phi_builder; +struct nir_phi_builder_value; + +/* Create a new phi builder. + * + * While this is fairly cheap, it does allocate some memory and walk the list + * of blocks so it's recommended that you only call it once and use it to + * build phis for several values. + */ +struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl); + +/* Register a value with the builder. + * + * The 'defs' parameter specifies a bitset of blocks in which the given value + * is defined. This is used to determine where to place the phi nodes. + */ +struct nir_phi_builder_value * +nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, + const BITSET_WORD *defs); + +/* Register a definition for the given value and block. + * + * It is safe to call this function as many times as you wish for any given + * block/value pair. However, it always replaces whatever was there + * previously even if that definition is from a phi node. The phi builder + * always uses the latest information it has, so you must be careful about the + * order in which you register definitions. The final value at the end of the + * block must be the last value registered. + */ +void +nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, + nir_block *block, nir_ssa_def *def); + +/* Get the definition for the given value in the given block. + * + * This definition will always be the latest definition known for the given + * block. If no definition is immediately available, it will crawl up the + * dominance tree and insert phi nodes as needed until it finds one. In the + * case that no suitable definition is found, it will return the result of a + * nir_ssa_undef_instr with the correct number of components. + * + * Because this function only uses the latest available information for any + * given block, you must have already finished registering definitions for any + * blocks that dominate the current block in order to get the correct result. + */ +nir_ssa_def * +nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, + nir_block *block); + +/* Finish building phi nodes and free the builder. + * + * This function does far more than just free memory. Prior to calling + * nir_phi_builder_finish, no phi nodes have actually been inserted in the + * program. This function is what finishes setting up phi node sources and + * adds the phi nodes to the program. + */ +void nir_phi_builder_finish(struct nir_phi_builder *pb); -- cgit v1.2.3 From a7a5e8a2de129a70838c46b100d156f2f39b5b04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 08:40:16 -0800 Subject: nir/vars_to_ssa: Use the new nir_phi_builder helper The efficiency should be approximately the same. We do a little more work per phi node because we have to sort the predecessors. However, we no longer have to walk the blocks a second time to pop things off the stack. The bigger advantage, however, is that we can now re-use the phi placement and per-block SSA value tracking in other passes. --- src/glsl/nir/nir_lower_vars_to_ssa.c | 526 +++++++++-------------------------- 1 file changed, 129 insertions(+), 397 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index a5f5ef2de3c..e1f368d2f2b 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -27,6 +27,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_phi_builder.h" #include "nir_vla.h" @@ -47,8 +48,7 @@ struct deref_node { struct set *stores; struct set *copies; - nir_ssa_def **def_stack; - nir_ssa_def **def_stack_tail; + struct nir_phi_builder_value *pb_value; struct deref_node *wildcard; struct deref_node *indirect; @@ -87,8 +87,7 @@ struct lower_variables_state { */ bool add_to_direct_deref_nodes; - /* A hash table mapping phi nodes to deref_state data */ - struct hash_table *phi_table; + struct nir_phi_builder *phi_builder; }; static struct deref_node * @@ -473,141 +472,6 @@ lower_copies_to_load_store(struct deref_node *node, return true; } -/** Pushes an SSA def onto the def stack for the given node - * - * Each node is potentially associated with a stack of SSA definitions. - * This stack is used for determining what SSA definition reaches a given - * point in the program for variable renaming. The stack is always kept in - * dominance-order with at most one SSA def per block. If the SSA - * definition on the top of the stack is in the same block as the one being - * pushed, the top element is replaced. - */ -static void -def_stack_push(struct deref_node *node, nir_ssa_def *def, - struct lower_variables_state *state) -{ - if (node->def_stack == NULL) { - node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, - state->impl->num_blocks); - node->def_stack_tail = node->def_stack - 1; - } - - if (node->def_stack_tail >= node->def_stack) { - nir_ssa_def *top_def = *node->def_stack_tail; - - if (def->parent_instr->block == top_def->parent_instr->block) { - /* They're in the same block, just replace the top */ - *node->def_stack_tail = def; - return; - } - } - - *(++node->def_stack_tail) = def; -} - -/* Pop the top of the def stack if it's in the given block */ -static void -def_stack_pop_if_in_block(struct deref_node *node, nir_block *block) -{ - /* If we're popping, then we have presumably pushed at some time in the - * past so this should exist. - */ - assert(node->def_stack != NULL); - - /* The stack is already empty. Do nothing. */ - if (node->def_stack_tail < node->def_stack) - return; - - nir_ssa_def *def = *node->def_stack_tail; - if (def->parent_instr->block == block) - node->def_stack_tail--; -} - -/** Retrieves the SSA definition on the top of the stack for the given - * node, if one exists. If the stack is empty, then we return the constant - * initializer (if it exists) or an SSA undef. - */ -static nir_ssa_def * -get_ssa_def_for_block(struct deref_node *node, nir_block *block, - struct lower_variables_state *state) -{ - /* If we have something on the stack, go ahead and return it. We're - * assuming that the top of the stack dominates the given block. - */ - if (node->def_stack && node->def_stack_tail >= node->def_stack) - return *node->def_stack_tail; - - /* If we got here then we don't have a definition that dominates the - * given block. This means that we need to add an undef and use that. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - glsl_get_vector_elements(node->type)); - nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); - def_stack_push(node, &undef->def, state); - return &undef->def; -} - -/* Given a block and one of its predecessors, this function fills in the - * souces of the phi nodes to take SSA defs from the given predecessor. - * This function must be called exactly once per block/predecessor pair. - */ -static void -add_phi_sources(nir_block *block, nir_block *pred, - struct lower_variables_state *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - if (!entry) - continue; - - struct deref_node *node = entry->data; - - nir_phi_src *src = ralloc(phi, nir_phi_src); - src->pred = pred; - src->src.parent_instr = &phi->instr; - src->src.is_ssa = true; - src->src.ssa = get_ssa_def_for_block(node, pred, state); - - list_addtail(&src->src.use_link, &src->src.ssa->uses); - - exec_list_push_tail(&phi->srcs, &src->node); - } -} - -static void -add_undef_phi_sources(nir_block *block, nir_block *pred, - struct lower_variables_state *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - phi->dest.ssa.num_components); - nir_instr_insert(nir_before_cf_list(&state->impl->body), &undef->instr); - - nir_phi_src *src = ralloc(phi, nir_phi_src); - src->pred = pred; - src->src.parent_instr = &phi->instr; - src->src.is_ssa = true; - src->src.ssa = &undef->def; - - list_addtail(&src->src.use_link, &undef->def.uses); - - exec_list_push_tail(&phi->srcs, &src->node); - } -} - /* Performs variable renaming by doing a DFS of the dominance tree * * This algorithm is very similar to the one outlined in "Efficiently @@ -622,282 +486,126 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_builder_init(&b, state->impl); nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - - def_stack_push(node, &phi->dest.ssa, state); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* If we hit this path then we are referencing an invalid - * value. Most likely, we unrolled something and are - * reading past the end of some array. In any case, this - * should result in an undefined value. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - intrin->num_components); - - nir_instr_insert_before(&intrin->instr, &undef->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&undef->def)); - continue; - } - - if (!node->lower_to_ssa) - continue; - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src.is_ssa = true; - mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); - for (unsigned i = intrin->num_components; i < 4; i++) - mov->src[0].swizzle[i] = 0; + if (instr->type != nir_instr_type_intrinsic) + continue; - assert(intrin->dest.is_ssa); + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, NULL); + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + intrin->num_components); - nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_insert_before(&intrin->instr, &undef->instr); nir_instr_remove(&intrin->instr); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - break; + nir_src_for_ssa(&undef->def)); + continue; } - case nir_intrinsic_store_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* Probably an out-of-bounds array store. That should be a - * no-op. */ - nir_instr_remove(&intrin->instr); - continue; - } + if (!node->lower_to_ssa) + continue; - if (!node->lower_to_ssa) - continue; - - assert(intrin->num_components == - glsl_get_vector_elements(node->type)); - - assert(intrin->src[0].is_ssa); - - nir_ssa_def *new_def; - b.cursor = nir_before_instr(&intrin->instr); - - if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { - /* Whole variable store - just copy the source. Note that - * intrin->num_components and intrin->src[0].ssa->num_components - * may differ. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < intrin->num_components ? i : 0; - - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, - intrin->num_components, false); - } else { - nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state); - /* For writemasked store_var intrinsics, we combine the newly - * written values with the existing contents of unwritten - * channels, creating a new SSA value for the whole vector. - */ - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < intrin->num_components; i++) { - if (intrin->const_index[0] & (1 << i)) { - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - srcs[i] = nir_channel(&b, old_def, i); - } - } - new_def = nir_vec(&b, srcs, intrin->num_components); - } + nir_alu_instr *mov = nir_alu_instr_create(state->shader, + nir_op_imov); + mov->src[0].src = nir_src_for_ssa( + nir_phi_builder_value_get_block_def(node->pb_value, block)); + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; - assert(new_def->num_components == intrin->num_components); + assert(intrin->dest.is_ssa); - def_stack_push(node, new_def, state); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); - /* We'll wait to remove the instruction until the next pass - * where we pop the node we just pushed back off the stack. - */ - break; - } + nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_remove(&intrin->instr); - default: - break; - } + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + break; } - } - - if (block->successors[0]) - add_phi_sources(block->successors[0], block, state); - if (block->successors[1]) - add_phi_sources(block->successors[1], block, state); - for (unsigned i = 0; i < block->num_dom_children; ++i) - rename_variables_block(block->dom_children[i], state); - - /* Now we iterate over the instructions and pop off any SSA defs that we - * pushed in the first loop. - */ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); + case nir_intrinsic_store_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - - def_stack_pop_if_in_block(node, block); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - if (intrin->intrinsic != nir_intrinsic_store_var) - continue; - - struct deref_node *node = get_deref_node(intrin->variables[0], state); - if (!node) + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); continue; + } if (!node->lower_to_ssa) continue; - def_stack_pop_if_in_block(node, block); - nir_instr_remove(&intrin->instr); - } - } - - return true; -} - -static bool -add_unreachable_phi_srcs_block(nir_block *block, void *void_state) -{ - struct lower_variables_state *state = void_state; - - /* Only run on unreachable blocks */ - if (block->imm_dom || block == nir_start_block(state->impl)) - return true; - - if (block->successors[0]) - add_undef_phi_sources(block->successors[0], block, state); - if (block->successors[1]) - add_undef_phi_sources(block->successors[1], block, state); - - return true; -} - -/* Inserts phi nodes for all variables marked lower_to_ssa - * - * This is the same algorithm as presented in "Efficiently Computing Static - * Single Assignment Form and the Control Dependence Graph" by Cytron et. - * al. - */ -static void -insert_phi_nodes(struct lower_variables_state *state) -{ - NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks); - NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks); - - /* - * Since the work flags already prevent us from inserting a node that has - * ever been inserted into W, we don't need to use a set to represent W. - * Also, since no block can ever be inserted into W more than once, we know - * that the maximum size of W is the number of basic blocks in the - * function. So all we need to handle W is an array and a pointer to the - * next element to be inserted and the next element to be removed. - */ - NIR_VLA(nir_block *, W, state->impl->num_blocks); - - unsigned w_start, w_end; - unsigned iter_count = 0; - - foreach_list_typed(struct deref_node, node, direct_derefs_link, - &state->direct_deref_nodes) { - if (node->stores == NULL) - continue; - - if (!node->lower_to_ssa) - continue; + assert(intrin->num_components == + glsl_get_vector_elements(node->type)); - w_start = w_end = 0; - iter_count++; + assert(intrin->src[0].is_ssa); - struct set_entry *store_entry; - set_foreach(node->stores, store_entry) { - nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; - if (work[store->instr.block->index] < iter_count) - W[w_end++] = store->instr.block; - work[store->instr.block->index] = iter_count; - } + nir_ssa_def *new_def; + b.cursor = nir_before_instr(&intrin->instr); - while (w_start != w_end) { - nir_block *cur = W[w_start++]; - struct set_entry *dom_entry; - set_foreach(cur->dom_frontier, dom_entry) { - nir_block *next = (nir_block *) dom_entry->key; - - /* - * If there's more than one return statement, then the end block - * can be a join point for some definitions. However, there are - * no instructions in the end block, so nothing would use those - * phi nodes. Of course, we couldn't place those phi nodes - * anyways due to the restriction of having no instructions in the - * end block... + if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { + /* Whole variable store - just copy the source. Note that + * intrin->num_components and intrin->src[0].ssa->num_components + * may differ. */ - if (next == state->impl->end_block) - continue; + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < intrin->num_components ? i : 0; - if (has_already[next->index] < iter_count) { - nir_phi_instr *phi = nir_phi_instr_create(state->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_block(next, &phi->instr); - - _mesa_hash_table_insert(state->phi_table, phi, node); - - has_already[next->index] = iter_count; - if (work[next->index] < iter_count) { - work[next->index] = iter_count; - W[w_end++] = next; + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, + intrin->num_components, false); + } else { + nir_ssa_def *old_def = + nir_phi_builder_value_get_block_def(node->pb_value, block); + /* For writemasked store_var intrinsics, we combine the newly + * written values with the existing contents of unwritten + * channels, creating a new SSA value for the whole vector. + */ + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (intrin->const_index[0] & (1 << i)) { + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); + } else { + srcs[i] = nir_channel(&b, old_def, i); } } + new_def = nir_vec(&b, srcs, intrin->num_components); } + + assert(new_def->num_components == intrin->num_components); + + nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); + nir_instr_remove(&intrin->instr); + break; + } + + default: + break; } } -} + for (unsigned i = 0; i < block->num_dom_children; ++i) + rename_variables_block(block->dom_children[i], state); + + return true; +} /** Implements a pass to lower variable uses to SSA values * @@ -939,9 +647,6 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) _mesa_hash_pointer, _mesa_key_pointer_equal); exec_list_make_empty(&state.direct_deref_nodes); - state.phi_table = _mesa_hash_table_create(state.dead_ctx, - _mesa_hash_pointer, - _mesa_key_pointer_equal); /* Build the initial deref structures and direct_deref_nodes table */ state.add_to_direct_deref_nodes = true; @@ -971,15 +676,6 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) node->lower_to_ssa = true; progress = true; - if (deref->var->constant_initializer) { - nir_load_const_instr *load = - nir_deref_get_const_initializer_load(state.shader, deref); - nir_ssa_def_init(&load->instr, &load->def, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_cf_list(&impl->body, &load->instr); - def_stack_push(node, &load->def, &state); - } - foreach_deref_node_match(deref, lower_copies_to_load_store, &state); } @@ -996,10 +692,46 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) */ nir_foreach_block(impl, register_variable_uses_block, &state); - insert_phi_nodes(&state); + state.phi_builder = nir_phi_builder_create(state.impl); + + NIR_VLA(BITSET_WORD, store_blocks, BITSET_WORDS(state.impl->num_blocks)); + foreach_list_typed(struct deref_node, node, direct_derefs_link, + &state.direct_deref_nodes) { + if (!node->lower_to_ssa) + continue; + + memset(store_blocks, 0, + BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks)); + + if (node->stores) { + struct set_entry *store_entry; + set_foreach(node->stores, store_entry) { + nir_intrinsic_instr *store = + (nir_intrinsic_instr *)store_entry->key; + BITSET_SET(store_blocks, store->instr.block->index); + } + } + + if (node->deref->var->constant_initializer) + BITSET_SET(store_blocks, 0); + + node->pb_value = + nir_phi_builder_add_value(state.phi_builder, + glsl_get_vector_elements(node->type), + store_blocks); + + if (node->deref->var->constant_initializer) { + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state.shader, node->deref); + nir_instr_insert_before_cf_list(&impl->body, &load->instr); + nir_phi_builder_value_set_block_def(node->pb_value, + nir_start_block(impl), &load->def); + } + } + rename_variables_block(nir_start_block(impl), &state); - nir_foreach_block(impl, add_unreachable_phi_srcs_block, &state); + nir_phi_builder_finish(state.phi_builder); nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); -- cgit v1.2.3 From b11825590d31a1e41de4c38cf96281d75be7a263 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 09:19:01 -0800 Subject: nir: Add a pass to repair SSA form --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 3 + src/glsl/nir/nir_repair_ssa.c | 157 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+) create mode 100644 src/glsl/nir/nir_repair_ssa.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 202f2fd4f5a..777abf1bd49 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -78,6 +78,7 @@ NIR_FILES = \ nir/nir_phi_builder.h \ nir/nir_print.c \ nir/nir_remove_dead_variables.c \ + nir/nir_repair_ssa.c \ nir/nir_search.c \ nir/nir_search.h \ nir/nir_split_var_copies.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 725703d0588..49af2f31e9e 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -2180,6 +2180,9 @@ bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); void nir_convert_to_ssa_impl(nir_function_impl *impl); void nir_convert_to_ssa(nir_shader *shader); +bool nir_repair_ssa_impl(nir_function_impl *impl); +bool nir_repair_ssa(nir_shader *shader); + /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi * node) to registers. diff --git a/src/glsl/nir/nir_repair_ssa.c b/src/glsl/nir/nir_repair_ssa.c new file mode 100644 index 00000000000..3ab4f0f6db7 --- /dev/null +++ b/src/glsl/nir/nir_repair_ssa.c @@ -0,0 +1,157 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_phi_builder.h" + +struct repair_ssa_state { + nir_function_impl *impl; + + BITSET_WORD *def_set; + struct nir_phi_builder *phi_builder; + + bool progress; +}; + +/* Get ready to build a phi and return the builder */ +static struct nir_phi_builder * +prep_build_phi(struct repair_ssa_state *state) +{ + const unsigned num_words = BITSET_WORDS(state->impl->num_blocks); + + /* We create the phi builder on-demand. */ + if (state->phi_builder == NULL) { + state->phi_builder = nir_phi_builder_create(state->impl); + state->def_set = ralloc_array(NULL, BITSET_WORD, num_words); + } + + /* We're going to build a phi. That's progress. */ + state->progress = true; + + /* Set the defs set to empty */ + memset(state->def_set, 0, num_words * sizeof(*state->def_set)); + + return state->phi_builder; +} + +static nir_block * +get_src_block(nir_src *src) +{ + if (src->parent_instr->type == nir_instr_type_phi) { + return exec_node_data(nir_phi_src, src, src)->pred; + } else { + return src->parent_instr->block; + } +} + +static bool +repair_ssa_def(nir_ssa_def *def, void *void_state) +{ + struct repair_ssa_state *state = void_state; + + bool is_valid = true; + nir_foreach_use(def, src) { + if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) { + is_valid = false; + break; + } + } + + if (is_valid) + return true; + + struct nir_phi_builder *pb = prep_build_phi(state); + + BITSET_SET(state->def_set, def->parent_instr->block->index); + + struct nir_phi_builder_value *val = + nir_phi_builder_add_value(pb, def->num_components, state->def_set); + + nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def); + + nir_foreach_use_safe(def, src) { + nir_block *src_block = get_src_block(src); + if (!nir_block_dominates(def->parent_instr->block, src_block)) { + nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa( + nir_phi_builder_value_get_block_def(val, src_block))); + } + } + + return true; +} + +static bool +repair_ssa_block(nir_block *block, void *state) +{ + nir_foreach_instr_safe(block, instr) { + nir_foreach_ssa_def(instr, repair_ssa_def, state); + } + + return true; +} + +bool +nir_repair_ssa_impl(nir_function_impl *impl) +{ + struct repair_ssa_state state; + + state.impl = impl; + state.phi_builder = NULL; + state.progress = false; + + nir_metadata_require(impl, nir_metadata_block_index | + nir_metadata_dominance); + + nir_foreach_block(impl, repair_ssa_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + if (state.phi_builder) { + nir_phi_builder_finish(state.phi_builder); + ralloc_free(state.def_set); + } + + return state.progress; +} + +/** This pass can be used to repair SSA form in a shader. + * + * Sometimes a transformation (such as return lowering) will have to make + * changes to a shader which, while still correct, break some of NIR's SSA + * invariants. This pass will insert ssa_undefs and phi nodes as needed to + * get the shader back into SSA that the validator will like. + */ +bool +nir_repair_ssa(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = nir_repair_ssa_impl(function->impl) || progress; + } + + return progress; +} -- cgit v1.2.3 From 61ba97522ea8551714c5b8b2a90983594c208165 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 09:24:07 -0800 Subject: nir/lower_returns: Repair SSA after doing return lowering --- src/glsl/nir/nir_lower_returns.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_returns.c b/src/glsl/nir/nir_lower_returns.c index af16676b822..91bb2f7dfeb 100644 --- a/src/glsl/nir/nir_lower_returns.c +++ b/src/glsl/nir/nir_lower_returns.c @@ -224,8 +224,10 @@ nir_lower_returns_impl(nir_function_impl *impl) bool progress = lower_returns_in_cf_list(&impl->body, &state); - if (progress) + if (progress) { nir_metadata_preserve(impl, nir_metadata_none); + nir_repair_ssa_impl(impl); + } return progress; } -- cgit v1.2.3 From 14ebd0fdd799d4b721633d25a08d966ee8069243 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 09:33:56 -0800 Subject: nir/spirv: Hanle continues that use SSA values from the loop body Instead of emitting the continue before the loop body we emit it afterwards. Then, once we've finished with the entire function, we run nir_repair_ssa to add whatever phi nodes are needed. --- src/glsl/nir/spirv/vtn_cfg.c | 19 +++++++++++++++---- src/glsl/nir/spirv/vtn_private.h | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index b3061ce47bb..a57a44363dd 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -559,6 +559,9 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, nir_loop *loop = nir_loop_create(b->shader); nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); + if (!list_empty(&vtn_loop->cont_body)) { /* If we have a non-trivial continue body then we need to put * it at the beginning of the loop with a flag to ensure that @@ -570,7 +573,7 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, b->nb.cursor = nir_before_cf_node(&loop->cf_node); nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); - b->nb.cursor = nir_after_cf_list(&loop->body); + b->nb.cursor = nir_before_cf_list(&loop->body); nir_if *cont_if = nir_if_create(b->shader); cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); @@ -580,10 +583,9 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); - } - b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); + b->has_loop_continue = true; + } b->nb.cursor = nir_after_cf_node(&loop->cf_node); break; @@ -672,5 +674,14 @@ vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, { nir_builder_init(&b->nb, func->impl); b->nb.cursor = nir_after_cf_list(&func->impl->body); + b->has_loop_continue = false; + vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); + + /* Continue blocks for loops get inserted before the body of the loop + * but instructions in the continue may use SSA defs in the loop body. + * Therefore, we need to repair SSA to insert the needed phi nodes. + */ + if (b->has_loop_continue) + nir_repair_ssa_impl(func->impl); } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 129414a4001..f91330ad486 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -334,6 +334,8 @@ struct vtn_builder { /* Current function parameter index */ unsigned func_param_idx; + + bool has_loop_continue; }; static inline struct vtn_value * -- cgit v1.2.3 From 15e6af070870b2d7105a90826dc9a4026e6f5846 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 10:10:47 -0800 Subject: nir/spirv: Handle if's where the merge is also a break or continue --- src/glsl/nir/spirv/vtn_cfg.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index a57a44363dd..9c2e271cda1 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -351,8 +351,15 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, switch_case, switch_break, loop_break, loop_cont, merge_block); - block = merge_block; - continue; + enum vtn_branch_type merge_type = + vtn_get_branch_type(merge_block, switch_case, switch_break, + loop_break, loop_cont); + if (merge_type == vtn_branch_type_none) { + block = merge_block; + continue; + } else { + return; + } } else if (if_stmt->then_type != vtn_branch_type_none && if_stmt->else_type != vtn_branch_type_none) { /* Both sides were short-circuited. We're done here. */ -- cgit v1.2.3 From 5a67df2546a4552cf7b9b401e5e3960ea55bf14f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 10:36:53 -0800 Subject: anv/pack: Make TextureLODBias a proper 4.8 float XXX: We need to update the generators so this doesn't get stompped. --- src/vulkan/gen75_pack.h | 4 ++-- src/vulkan/gen7_pack.h | 4 ++-- src/vulkan/gen8_pack.h | 4 ++-- src/vulkan/gen9_pack.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 3ed685bed0c..3d336848592 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -7984,7 +7984,7 @@ struct GEN75_SAMPLER_STATE { #define MAPFILTER_ANISOTROPIC 2 #define MAPFILTER_MONO 6 uint32_t MinModeFilter; - uint32_t TextureLODBias; + float TextureLODBias; #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; @@ -8048,7 +8048,7 @@ GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 5f2dbc470ec..0fe13de89df 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -6567,7 +6567,7 @@ struct GEN7_SAMPLER_STATE { #define MAPFILTER_ANISOTROPIC 2 #define MAPFILTER_MONO 6 uint32_t MinModeFilter; - uint32_t TextureLODBias; + float TextureLODBias; #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; @@ -6630,7 +6630,7 @@ GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 4d6b7c0a04d..0d77b68a9c5 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -8599,7 +8599,7 @@ struct GEN8_SAMPLER_STATE { #define MAPFILTER_ANISOTROPIC 2 #define MAPFILTER_MONO 6 uint32_t MinModeFilter; - uint32_t TextureLODBias; + float TextureLODBias; #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; @@ -8666,7 +8666,7 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; diff --git a/src/vulkan/gen9_pack.h b/src/vulkan/gen9_pack.h index b9dc30ad01b..fabcd7bb1e7 100644 --- a/src/vulkan/gen9_pack.h +++ b/src/vulkan/gen9_pack.h @@ -9159,7 +9159,7 @@ struct GEN9_SAMPLER_STATE { #define MAPFILTER_ANISOTROPIC 2 #define MAPFILTER_MONO 6 uint32_t MinModeFilter; - uint32_t TextureLODBias; + float TextureLODBias; #define LEGACY 0 #define EWAApproximation 1 uint32_t AnisotropicAlgorithm; @@ -9232,7 +9232,7 @@ GEN9_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; -- cgit v1.2.3 From bfcc7448920644f232f5370e56c10c2ba15e0731 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 11:35:04 -0800 Subject: genX/pack: Add a __gen_fixed helper and use it for TextureLODBias The __gen_fixed helper properly clamps the value and also handles negative values correctly. Eventually, we need to make the scripts generate this and use it for more things. --- src/vulkan/gen75_pack.h | 32 +++++++++++++++++++++++++++++++- src/vulkan/gen7_pack.h | 32 +++++++++++++++++++++++++++++++- src/vulkan/gen8_pack.h | 32 +++++++++++++++++++++++++++++++- src/vulkan/gen9_pack.h | 32 +++++++++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index 3d336848592..b012032190e 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -62,6 +62,36 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) return v << start; } +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { @@ -8048,7 +8078,7 @@ GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index 0fe13de89df..a3ba30a9745 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -62,6 +62,36 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) return v << start; } +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { @@ -6630,7 +6660,7 @@ GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 0d77b68a9c5..042e0290a74 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -62,6 +62,36 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) return v << start; } +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { @@ -8666,7 +8696,7 @@ GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; diff --git a/src/vulkan/gen9_pack.h b/src/vulkan/gen9_pack.h index fabcd7bb1e7..db54e9cf85b 100644 --- a/src/vulkan/gen9_pack.h +++ b/src/vulkan/gen9_pack.h @@ -62,6 +62,36 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) return v << start; } +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + static inline uint64_t __gen_offset(uint64_t v, uint32_t start, uint32_t end) { @@ -9232,7 +9262,7 @@ GEN9_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, __gen_field(values->MipModeFilter, 20, 21) | __gen_field(values->MagModeFilter, 17, 19) | __gen_field(values->MinModeFilter, 14, 16) | - __gen_field(values->TextureLODBias * (1 << 8), 1, 13) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | __gen_field(values->AnisotropicAlgorithm, 0, 0) | 0; -- cgit v1.2.3 From d49298c702e6cd1ca7ac77396bd6dd377ed9b635 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 12:16:31 -0800 Subject: gen8: Fix border color The border color packet is specified as a 64-byte aligned address relative to dynamic state base address. The way the packing functions are currently set up, we need to provide it with (offset >> 6) because it just shoves the bits in where the PRM says they go and isn't really aware that it's an address. --- src/vulkan/anv_device.c | 14 ++++++++++++-- src/vulkan/gen8_state.c | 7 ++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c7e70412eb8..7493ff7b7fc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -643,10 +643,19 @@ anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, return state; } +struct gen8_border_color { + union { + float float32[4]; + uint32_t uint32[4]; + }; + /* Pad out to 64 bytes */ + uint32_t _pad[12]; +}; + static void anv_device_init_border_colors(struct anv_device *device) { - static const VkClearColorValue border_colors[] = { + static const struct gen8_border_color border_colors[] = { [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, @@ -656,7 +665,8 @@ anv_device_init_border_colors(struct anv_device *device) }; device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, - sizeof(border_colors), 32, border_colors); + sizeof(border_colors), 64, + border_colors); } VkResult anv_CreateDevice( diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 9fa2a0554b9..53a75ef5fb6 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -380,6 +380,9 @@ VkResult genX(CreateSampler)( uint32_t filter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable); + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, @@ -400,9 +403,7 @@ VkResult genX(CreateSampler)( .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = 0, - .IndirectStatePointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, + .IndirectStatePointer = border_color_offset >> 6, .LODClampMagnificationMode = MIPNONE, .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), -- cgit v1.2.3 From 3dfa6a881ca68f8c28c9e2240090d3badcce8c49 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 13:05:02 -0800 Subject: anv/meta: Initialize a handle to null --- src/vulkan/anv_meta_clear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 34087804075..a033f92773e 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -133,7 +133,7 @@ create_pipeline(struct anv_device *device, struct anv_shader_module vs_m = { .nir = vs_nir }; struct anv_shader_module fs_m = { .nir = fs_nir }; - VkPipeline pipeline_h; + VkPipeline pipeline_h = VK_NULL_HANDLE; result = anv_graphics_pipeline_create(device_h, VK_NULL_HANDLE, &(VkGraphicsPipelineCreateInfo) { -- cgit v1.2.3 From 378af64e30a6be1eff3b06b896ccbfc32bfd840d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 14:03:20 -0800 Subject: anv/meta: Add a meta allocator that uses SCOPE_DEVICE The Vulkan spec requires all allocations that happen for device creation to happen with SCOPE_DEVICE. Since meta calls into other things that allocate memory, the easiest way to do this is with an allocator. --- src/vulkan/anv_meta.c | 79 +++++++++++++++++++++++++++++++++++---------- src/vulkan/anv_meta_clear.c | 9 +++--- src/vulkan/anv_private.h | 2 ++ 3 files changed, 69 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 57138fbd6a3..a02bfed5af3 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -220,7 +220,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .pPreserveAttachments = (uint32_t[]) { 0 }, }, .dependencyCount = 0, - }, NULL, &device->meta_state.blit.render_pass); + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); if (result != VK_SUCCESS) goto fail; @@ -300,7 +300,8 @@ anv_device_init_meta_blit_state(struct anv_device *device) } }; result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, NULL, + &ds_layout_info, + &device->meta_state.alloc, &device->meta_state.blit.ds_layout); if (result != VK_SUCCESS) goto fail_render_pass; @@ -311,7 +312,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) .setLayoutCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout, }, - NULL, &device->meta_state.blit.pipeline_layout); + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); if (result != VK_SUCCESS) goto fail_descriptor_set_layout; @@ -404,7 +405,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - NULL, &device->meta_state.blit.pipeline_1d_src); + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); if (result != VK_SUCCESS) goto fail_pipeline_layout; @@ -412,7 +413,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - NULL, &device->meta_state.blit.pipeline_2d_src); + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); if (result != VK_SUCCESS) goto fail_pipeline_1d; @@ -420,7 +421,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - NULL, &device->meta_state.blit.pipeline_3d_src); + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); if (result != VK_SUCCESS) goto fail_pipeline_2d; @@ -433,21 +434,26 @@ anv_device_init_meta_blit_state(struct anv_device *device) fail_pipeline_2d: anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, NULL); + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); fail_pipeline_1d: anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, NULL); + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); fail_pipeline_layout: anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, NULL); + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); fail_descriptor_set_layout: anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, NULL); + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); fail_render_pass: anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, NULL); + device->meta_state.blit.render_pass, + &device->meta_state.alloc); ralloc_free(vs.nir); ralloc_free(fs_1d.nir); @@ -1400,9 +1406,42 @@ void anv_CmdResolveImage( stub(); } +static void * +meta_alloc(void* _device, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void * +meta_realloc(void* _device, void *original, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnReallocation(device->alloc.pUserData, original, + size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void +meta_free(void* _device, void *data) +{ + struct anv_device *device = _device; + return device->alloc.pfnFree(device->alloc.pUserData, data); +} + VkResult anv_device_init_meta(struct anv_device *device) { + device->meta_state.alloc = (VkAllocationCallbacks) { + .pUserData = device, + .pfnAllocation = meta_alloc, + .pfnReallocation = meta_realloc, + .pfnFree = meta_free, + }; + VkResult result; result = anv_device_init_meta_clear_state(device); if (result != VK_SUCCESS) @@ -1422,15 +1461,21 @@ anv_device_finish_meta(struct anv_device *device) /* Blit */ anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, NULL); + device->meta_state.blit.render_pass, + &device->meta_state.alloc); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, NULL); + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, NULL); + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src, NULL); + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, NULL); + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, NULL); + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); } diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index a033f92773e..8d93ced9ccf 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -299,8 +299,8 @@ create_color_pipeline(struct anv_device *device, uint32_t frag_output, */ return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, NULL, /*use_repclear*/ false, - pipeline); + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ false, pipeline); } static VkResult @@ -327,7 +327,7 @@ fail: anv_DestroyPipeline(anv_device_to_handle(device), anv_pipeline_to_handle(pipelines[i]), - NULL); + &device->meta_state.alloc); } return result; @@ -515,7 +515,8 @@ create_depthstencil_pipeline(struct anv_device *device, }; return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, NULL, /*use_repclear*/ true, pipeline); + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ true, pipeline); } static void diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index e8ac70dc498..574fd66e4c7 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -562,6 +562,8 @@ VkResult anv_init_wsi(struct anv_instance *instance); void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { + VkAllocationCallbacks alloc; + struct { /** * Pipeline N is used to clear color attachment N of the current -- cgit v1.2.3 From 45d17fcf9bd41dfcd34f01bf62b031d8ca071edd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 14:04:13 -0800 Subject: anv: Misc allocation scope fixes --- src/vulkan/anv_device.c | 4 ++-- src/vulkan/anv_wsi_wayland.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7493ff7b7fc..abc9ed66dd0 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -227,7 +227,7 @@ VkResult anv_CreateInstance( } instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -698,7 +698,7 @@ VkResult anv_CreateDevice( device = anv_alloc2(&physical_device->instance->alloc, pAllocator, sizeof(*device), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index d708341c98f..5e8a3a56f68 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -239,7 +239,7 @@ wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) { struct wsi_wl_display *display = anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, - VK_SYSTEM_ALLOCATION_SCOPE_CACHE); + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!display) return NULL; @@ -820,7 +820,7 @@ anv_wl_init_wsi(struct anv_instance *instance) VkResult result; wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!wsi) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; -- cgit v1.2.3 From 6f956b0b22b18d789f70864828635f688ce98219 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 14:07:46 -0800 Subject: anv/meta: Improve meta clear cleanup a bit --- src/vulkan/anv_meta.c | 4 +++- src/vulkan/anv_meta_clear.c | 35 ++++++++++++++++++++--------------- 2 files changed, 23 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a02bfed5af3..87728943190 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1448,8 +1448,10 @@ anv_device_init_meta(struct anv_device *device) return result; result = anv_device_init_meta_blit_state(device); - if (result != VK_SUCCESS) + if (result != VK_SUCCESS) { + anv_device_finish_meta_clear_state(device); return result; + } return VK_SUCCESS; } diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 8d93ced9ccf..0c4b740da24 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -303,6 +303,21 @@ create_color_pipeline(struct anv_device *device, uint32_t frag_output, /*use_repclear*/ false, pipeline); } +static void +free_color_pipelines(struct anv_device *device) +{ + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.clear.color_pipelines); ++i) { + if (device->meta_state.clear.color_pipelines[i] == NULL) + continue; + + ANV_CALL(DestroyPipeline)( + anv_device_to_handle(device), + anv_pipeline_to_handle(device->meta_state.clear.color_pipelines[i]), + &device->meta_state.alloc); + } +} + static VkResult init_color_pipelines(struct anv_device *device) { @@ -321,14 +336,7 @@ init_color_pipelines(struct anv_device *device) return VK_SUCCESS; fail: - for (uint32_t i = 0; i < n; ++i) { - if (pipelines[i] == NULL) - break; - - anv_DestroyPipeline(anv_device_to_handle(device), - anv_pipeline_to_handle(pipelines[i]), - &device->meta_state.alloc); - } + free_color_pipelines(device); return result; } @@ -677,8 +685,10 @@ anv_device_init_meta_clear_state(struct anv_device *device) return result; result = init_depthstencil_pipelines(device); - if (result != VK_SUCCESS) + if (result != VK_SUCCESS) { + free_color_pipelines(device); return result; + } return VK_SUCCESS; } @@ -688,12 +698,7 @@ anv_device_finish_meta_clear_state(struct anv_device *device) { VkDevice device_h = anv_device_to_handle(device); - for (uint32_t i = 0; - i < ARRAY_SIZE(device->meta_state.clear.color_pipelines); ++i) { - ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.color_pipelines[i]), - NULL); - } + free_color_pipelines(device); ANV_CALL(DestroyPipeline)(device_h, anv_pipeline_to_handle(device->meta_state.clear.depth_only_pipeline), -- cgit v1.2.3 From bb8cadd169782f463f79c70f5478ccd8f13826da Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 15:00:01 -0800 Subject: nir/spirv: Insert movs around image intrinsics Image intrinsics always take a vec4 coordinate and always return a vec4. This simplifies the intrinsics a but but also means that they don't actually match the incomming SPIR-V. In order to compensate for this, we add swizzling movs for both source and destination to get the right number of components. --- src/glsl/nir/spirv/spirv_to_nir.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index dc95b40f9c3..ef24156a48e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2650,7 +2650,15 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); intrin->variables[0] = nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); - intrin->src[0] = nir_src_for_ssa(image.coord); + + /* The image coordinate is always 4 components but we may not have that + * many. Swizzle to compensate. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < image.coord->num_components ? i : 0; + intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, + swiz, 4, false)); intrin->src[1] = nir_src_for_ssa(image.sample); switch (opcode) { @@ -2694,13 +2702,20 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (opcode != SpvOpImageWrite) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_ssa_dest_init(&intrin->instr, &intrin->dest, - glsl_get_vector_elements(type->type), NULL); + nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL); + + nir_builder_instr_insert(&b->nb, &intrin->instr); + + /* The image intrinsics always return 4 channels but we may not want + * that many. Emit a mov to trim it down. + */ + unsigned swiz[4] = {0, 1, 2, 3}; val->ssa = vtn_create_ssa_value(b, type->type); - val->ssa->def = &intrin->dest.ssa; + val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, + glsl_get_vector_elements(type->type), false); + } else { + nir_builder_instr_insert(&b->nb, &intrin->instr); } - - nir_builder_instr_insert(&b->nb, &intrin->instr); } static void -- cgit v1.2.3 From 56c8a5f2b8863987519d7881c495e3c5b71d21a3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 14:03:58 -0800 Subject: nir/spirv: Implement ImageQuerySize for storage iamges SPIR-V only has one ImageQuerySize opcode that has to work for both textures and storage images. Therefore, we have to special-case that one a bit and look at the type of the incoming image handle. --- src/glsl/nir/spirv/spirv_to_nir.c | 41 +++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index ef24156a48e..4110c3571c6 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2594,6 +2594,12 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; break; + case SpvOpImageQuerySize: + image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.coord = NULL; + image.sample = NULL; + break; + case SpvOpImageRead: image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; image.coord = get_image_coord(b, w[4]); @@ -2627,6 +2633,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, nir_intrinsic_op op; switch (opcode) { #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageQuerySize, size) OP(ImageRead, load) OP(ImageWrite, store) OP(AtomicExchange, atomic_exchange) @@ -2651,17 +2658,21 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, intrin->variables[0] = nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); - /* The image coordinate is always 4 components but we may not have that - * many. Swizzle to compensate. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < image.coord->num_components ? i : 0; - intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, - swiz, 4, false)); - intrin->src[1] = nir_src_for_ssa(image.sample); + /* ImageQuerySize doesn't take any extra parameters */ + if (opcode != SpvOpImageQuerySize) { + /* The image coordinate is always 4 components but we may not have that + * many. Swizzle to compensate. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < image.coord->num_components ? i : 0; + intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, + swiz, 4, false)); + intrin->src[1] = nir_src_for_ssa(image.sample); + } switch (opcode) { + case SpvOpImageQuerySize: case SpvOpImageRead: break; case SpvOpImageWrite: @@ -3578,7 +3589,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpImageGather: case SpvOpImageDrefGather: case SpvOpImageQuerySizeLod: - case SpvOpImageQuerySize: case SpvOpImageQueryLod: case SpvOpImageQueryLevels: case SpvOpImageQuerySamples: @@ -3591,6 +3601,17 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_image(b, opcode, w, count); break; + case SpvOpImageQuerySize: { + nir_deref_var *image = vtn_value(b, w[3], vtn_value_type_deref)->deref; + const struct glsl_type *image_type = nir_deref_tail(&image->deref)->type; + if (glsl_type_is_image(image_type)) { + vtn_handle_image(b, opcode, w, count); + } else { + vtn_handle_texture(b, opcode, w, count); + } + break; + } + case SpvOpAtomicExchange: case SpvOpAtomicCompareExchange: case SpvOpAtomicCompareExchangeWeak: -- cgit v1.2.3 From c2a6f4302e39bc868765e91e0618c0fd334f1407 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 16:01:00 -0800 Subject: nir/spirv: Patch through image qualifiers --- src/glsl/nir/spirv/spirv_to_nir.c | 16 ++++++++++++++++ src/glsl/nir/spirv/vtn_private.h | 3 +++ 2 files changed, 19 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 4110c3571c6..1fcdb1aa14a 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -717,6 +717,11 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, unsigned sampled = w[7]; SpvImageFormat format = w[8]; + if (count > 9) + val->type->access_qualifier = w[9]; + else + val->type->access_qualifier = SpvAccessQualifierReadWrite; + assert(!multisampled && "FIXME: Handl multi-sampled textures"); val->type->image_format = translate_image_format(format); @@ -1915,6 +1920,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, if (glsl_type_is_image(interface_type->type)) { b->shader->info.num_images++; var->data.image.format = interface_type->image_format; + + switch (interface_type->access_qualifier) { + case SpvAccessQualifierReadOnly: + var->data.image.read_only = true; + break; + case SpvAccessQualifierWriteOnly: + var->data.image.write_only = true; + break; + default: + break; + } } else if (glsl_type_is_sampler(interface_type->type)) { b->shader->info.num_textures++; } else { diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index f91330ad486..cf75bc92a7a 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -216,6 +216,9 @@ struct vtn_type { /* Image format for image_load_store type images */ unsigned image_format; + /* Access qualifier for storage images */ + SpvAccessQualifier access_qualifier; + /* for arrays and matrices, the array stride */ unsigned stride; -- cgit v1.2.3 From 587842a0cab1e24f561bf99de94e91fa6710f2a5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 15:42:41 -0800 Subject: anv/gem: Add a helper for getting bit6 swizzling information --- src/vulkan/anv_gem.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 1 + 2 files changed, 56 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c index 9d0d0332001..0a7be353327 100644 --- a/src/vulkan/anv_gem.c +++ b/src/vulkan/anv_gem.c @@ -227,6 +227,61 @@ anv_gem_get_param(int fd, uint32_t param) return 0; } +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + struct drm_gem_close close; + int ret; + + struct drm_i915_gem_create gem_create; + VG_CLEAR(gem_create); + gem_create.size = 4096; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { + assert(!"Failed to create GEM BO"); + return false; + } + + bool swizzled = false; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + struct drm_i915_gem_set_tiling set_tiling; + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_create.handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + if (ret != 0) { + assert(!"Failed to set BO tiling"); + goto close_and_return; + } + + struct drm_i915_gem_get_tiling get_tiling; + VG_CLEAR(get_tiling); + get_tiling.handle = gem_create.handle; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { + assert(!"Failed to get BO tiling"); + goto close_and_return; + } + + swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; + +close_and_return: + + VG_CLEAR(close); + close.handle = gem_create.handle; + anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + + return swizzled; +} + int anv_gem_create_context(struct anv_device *device) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 574fd66e4c7..307e07ef246 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -665,6 +665,7 @@ int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, int anv_gem_create_context(struct anv_device *device); int anv_gem_destroy_context(struct anv_device *device, int context); int anv_gem_get_param(int fd, uint32_t param); +bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); int anv_gem_get_aperture(int fd, uint64_t *size); int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); -- cgit v1.2.3 From 580b2e85e48dbb82d168192c9f085d1c70355795 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 13:53:05 -0800 Subject: isl/device: Add a flag for bit 6 swizzling --- src/isl/isl.c | 4 +++- src/isl/isl.h | 4 +++- src/vulkan/anv_device.c | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 015b14ea3d2..bb3d59576e0 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -46,10 +46,12 @@ __isl_finishme(const char *file, int line, const char *fmt, ...) void isl_device_init(struct isl_device *dev, - const struct brw_device_info *info) + const struct brw_device_info *info, + bool has_bit6_swizzling) { dev->info = info; dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; + dev->has_bit6_swizzling = has_bit6_swizzling; /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some * device properties at buildtime. Verify that the macros with the device diff --git a/src/isl/isl.h b/src/isl/isl.h index 9e07b2ff63c..2194818d7b7 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -608,6 +608,7 @@ enum isl_msaa_layout { struct isl_device { const struct brw_device_info *info; bool use_separate_stencil; + bool has_bit6_swizzling; }; struct isl_extent2d { @@ -766,7 +767,8 @@ extern const struct isl_format_layout isl_format_layouts[]; void isl_device_init(struct isl_device *dev, - const struct brw_device_info *info); + const struct brw_device_info *info, + bool has_bit6_swizzling); static inline const struct isl_format_layout * ATTRIBUTE_CONST isl_format_get_layout(enum isl_format fmt) diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index abc9ed66dd0..ddf5f4bed29 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -123,6 +123,8 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } + bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); + close(fd); brw_process_intel_debug_variable(); @@ -135,7 +137,8 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; - isl_device_init(&device->isl_dev, device->info); + /* XXX: Actually detect bit6 swizzling */ + isl_device_init(&device->isl_dev, device->info, swizzled); return VK_SUCCESS; -- cgit v1.2.3 From 3276610ea6a3c36176fcf0089a47093d12f9f982 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 17:45:12 -0800 Subject: getX/state: Set LOD pre-clamp to OpenGL mode This gets us another couple hundred sampler tests --- src/vulkan/gen7_state.c | 1 + src/vulkan/gen8_state.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index b24e484262a..0c830d5c8ed 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -98,6 +98,7 @@ VkResult genX(CreateSampler)( struct GEN7_SAMPLER_STATE sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, + .LODPreClampEnable = OGL, .BaseMipLevel = 0.0, .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], .MagModeFilter = filter, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 53a75ef5fb6..d42d0b1b5a6 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -386,7 +386,7 @@ VkResult genX(CreateSampler)( struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = 0, + .LODPreClampMode = CLAMP_OGL, #if ANV_GEN == 8 .BaseMipLevel = 0.0, #endif -- cgit v1.2.3 From ba5ef49dcbf9e001231658ad8c6588649a2dbd57 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Tue, 19 Jan 2016 16:51:56 +0000 Subject: anv/wsi: Avoid stuck Wayland connection In acquire_next_image, we are waiting for a wl_buffer::release to arrive and release one of the buffers in our swapchain. Most compositors don't explicitly flush release events, so we may need to perform a roundtrip instead, to ensure the event arrives. Signed-off-by: Daniel Stone --- src/vulkan/anv_wsi_wayland.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 5e8a3a56f68..c8f9cf27ab3 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -536,8 +536,8 @@ wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, /* This time we do a blocking dispatch because we can't go * anywhere until we get an event. */ - int ret = wl_display_dispatch_queue(chain->display->display, - chain->queue); + int ret = wl_display_roundtrip_queue(chain->display->display, + chain->queue); if (ret < 0) return vk_error(VK_ERROR_OUT_OF_DATE_KHR); } -- cgit v1.2.3 From f9ca780ea4fa1b11639a880be501171017585f2c Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Tue, 19 Jan 2016 16:53:16 +0000 Subject: anv/wsi: Mark Wayland buffers as busy We were diligently setting Wayland buffers as non-busy, but nowhere in the code did we set them to busy when submitted to the server. This meant that acquire_next_image would only ever find the same buffer in a loop, over and over. Signed-off-by: Daniel Stone --- src/vulkan/anv_wsi_wayland.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index c8f9cf27ab3..28f0ed834df 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -584,6 +584,7 @@ wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, chain->fifo_ready = false; } + chain->images[image_index].busy = true; wl_surface_commit(chain->surface); wl_display_flush(chain->display->display); -- cgit v1.2.3 From 5e57a87dcfea5b1e363e96946c456651d43176c2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 12:02:53 -0800 Subject: anv/pipeline: Fix point size --- src/vulkan/anv_pipeline.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 517fcb0ac3a..2a22497815a 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -544,7 +544,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, ralloc_steal(mem_ctx, nir); prog_data->inputs_read = nir->info.inputs_read; - pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, &prog_data->base.vue_map, @@ -608,6 +609,9 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->nir == NULL) ralloc_steal(mem_ctx, nir); + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + brw_compute_vue_map(&pipeline->device->info, &prog_data->base.vue_map, nir->info.outputs_written, -- cgit v1.2.3 From bc9d9bc2e3601281321f09dd811c3618fa2dc44d Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 12:15:36 -0800 Subject: nir/spirv/glsl450: Implement Asin and Acos. --- src/glsl/nir/spirv/vtn_glsl450.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 82cfc8c91a9..b840e093dd5 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -28,6 +28,10 @@ #include "vtn_private.h" #include "GLSL.std.450.h" +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + static nir_ssa_def * build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) { @@ -201,6 +205,22 @@ build_log(nir_builder *b, nir_ssa_def *x) return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); } +static nir_ssa_def * +build_asin(nir_builder *b, nir_ssa_def *x) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + return nir_fmul(b, nir_fsign(b, x), + nir_fsub(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), + nir_fadd(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, 0.086566724f), + nir_fmul(b, abs_x, + nir_imm_float(b, -0.03102955f)))))))))); +} + static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) @@ -417,7 +437,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; case GLSLstd450Asin: + val->ssa->def = build_asin(nb, src[0]); + return; + case GLSLstd450Acos: + val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), + build_asin(nb, src[0])); + return; + case GLSLstd450Atan: case GLSLstd450Atan2: case GLSLstd450ModfStruct: -- cgit v1.2.3 From 2ab3efa0ad19118dc5bfd9854b512e5786d4b2c4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 14:30:02 -0800 Subject: nir/spirv/glsl450: Implement Atan. --- src/glsl/nir/spirv/vtn_glsl450.c | 71 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index b840e093dd5..1c5f115f128 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -221,6 +221,74 @@ build_asin(nir_builder *b, nir_ssa_def *x) nir_imm_float(b, -0.03102955f)))))))))); } +/** + * Compute xs[0] + xs[1] + xs[2] + ... using fadd. + */ +static nir_ssa_def * +build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) +{ + nir_ssa_def *accum = xs[0]; + + for (int i = 1; i < terms; i++) + accum = nir_fadd(b, accum, xs[i]); + + return accum; +} + +static nir_ssa_def * +build_atan(nir_builder *b, nir_ssa_def *y_over_x) +{ + nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); + nir_ssa_def *one = nir_imm_float(b, 1.0f); + + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), + nir_fmax(b, abs_y_over_x, one)); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + nir_ssa_def *x_2 = nir_fmul(b, x, x); + nir_ssa_def *x_3 = nir_fmul(b, x_2, x); + nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); + nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); + nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); + nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); + + nir_ssa_def *polynomial_terms[] = { + nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), + nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), + nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), + nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), + nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), + nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), + }; + + nir_ssa_def *tmp = + build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); + + /* range-reduction fixup */ + tmp = nir_fadd(b, tmp, + nir_fmul(b, + nir_b2f(b, nir_flt(b, one, abs_y_over_x)), + nir_fadd(b, nir_fmul(b, tmp, + nir_imm_float(b, -2.0f)), + nir_imm_float(b, M_PI_2f)))); + + /* sign fixup */ + return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); +} + static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) @@ -446,6 +514,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450Atan: + val->ssa->def = build_atan(nb, src[0]); + return; + case GLSLstd450Atan2: case GLSLstd450ModfStruct: case GLSLstd450Frexp: -- cgit v1.2.3 From 68c9ca1a94de785660c61789c2d9e5b26569ee45 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 15:56:50 -0800 Subject: nir/spirv/glsl450: Blindly implement Atan2. This is untested and probably broken. We already passed the atan2 CTS tests before implementing this opcode. Presumably, glslang or something was giving us a plain Atan opcode instead of Atan2. I don't know why. --- src/glsl/nir/spirv/vtn_glsl450.c | 51 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 1c5f115f128..c9a7195c47f 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -289,6 +289,54 @@ build_atan(nir_builder *b, nir_ssa_def *y_over_x) return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); } +static nir_ssa_def * +build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) +{ + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* If |x| >= 1.0e-8 * |y|: */ + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa( + nir_fge(b, nir_fabs(b, x), + nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)))); + nir_builder_cf_insert(b, &if_stmt->cf_node); + + /* Then...call atan(y/x) and fix it up: */ + b->cursor = nir_after_cf_list(&if_stmt->then_list); + nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); + nir_ssa_def *r_then = + nir_bcsel(b, nir_flt(b, x, zero), + nir_fadd(b, atan1, + nir_bcsel(b, nir_fge(b, y, zero), + nir_imm_float(b, M_PIf), + nir_imm_float(b, -M_PIf))), + atan1); + + /* Else... */ + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *r_else = + nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); + + b->cursor = nir_after_cf_node(&if_stmt->cf_node); + + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, r_then->num_components, NULL); + + nir_phi_src *phi_src0 = ralloc(phi, nir_phi_src); + nir_phi_src *phi_src1 = ralloc(phi, nir_phi_src); + + phi_src0->pred = nir_cf_node_as_block((nir_cf_node *) exec_list_get_head(&if_stmt->then_list)); + phi_src0->src = nir_src_for_ssa(r_then); + exec_list_push_tail(&phi->srcs, &phi_src0->node); + phi_src1->pred = nir_cf_node_as_block((nir_cf_node *) exec_list_get_head(&if_stmt->else_list)); + phi_src1->src = nir_src_for_ssa(r_else); + exec_list_push_tail(&phi->srcs, &phi_src1->node); + + nir_builder_instr_insert(b, &phi->instr); + + return &phi->dest.ssa; +} + static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) @@ -518,6 +566,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450Atan2: + val->ssa->def = build_atan2(nb, src[0], src[1]); + return; + case GLSLstd450ModfStruct: case GLSLstd450Frexp: case GLSLstd450FrexpStruct: -- cgit v1.2.3 From 549be68258f278ca9ca0d385ff98041bc6b71d16 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 16:46:03 -0800 Subject: nir/spirv/glsl450: Implement Frexp. --- src/glsl/nir/spirv/vtn_glsl450.c | 42 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index c9a7195c47f..b59411e1f8d 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -337,6 +337,39 @@ build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) return &phi->dest.ssa; } +static nir_ssa_def * +build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + nir_ssa_def *exponent_shift = nir_imm_int(b, 23); + nir_ssa_def *exponent_bias = nir_imm_int(b, -126); + + nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); + + nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); + + *exponent = + nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), + nir_bcsel(b, is_not_zero, exponent_bias, zero)); + + return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), + nir_bcsel(b, is_not_zero, exponent_value, zero)); +} + static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) @@ -569,8 +602,15 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, val->ssa->def = build_atan2(nb, src[0], src[1]); return; + case GLSLstd450Frexp: { + nir_ssa_def *exponent; + val->ssa->def = build_frexp(nb, src[0], &exponent); + nir_deref_var *out = vtn_value(b, w[6], vtn_value_type_deref)->deref; + nir_store_deref_var(nb, out, exponent, 0xf); + return; + } + case GLSLstd450ModfStruct: - case GLSLstd450Frexp: case GLSLstd450FrexpStruct: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: -- cgit v1.2.3 From 56dbf13045c551984c362fdeed42baf9d74517a6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 15:01:10 -0800 Subject: anv: Add support for VK_WHOLE_SIZE several places --- src/vulkan/anv_descriptor_set.c | 3 ++- src/vulkan/anv_device.c | 3 +++ src/vulkan/anv_image.c | 7 ++++--- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index c414c7c276d..8b3b5dfadbc 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -559,7 +559,8 @@ void anv_UpdateDescriptorSets( * range in the surface state and do the actual range-checking * in the shader. */ - if (bind_layout->dynamic_offset_index >= 0) + if (bind_layout->dynamic_offset_index >= 0 || + write->pBufferInfo[j].range == VK_WHOLE_SIZE) view->range = buffer->size - write->pBufferInfo[j].offset; else view->range = write->pBufferInfo[j].range; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index ddf5f4bed29..9248f912c69 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1101,6 +1101,9 @@ VkResult anv_MapMemory( return VK_SUCCESS; } + if (size == VK_WHOLE_SIZE) + size = mem->bo.size - offset; + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only * takes a VkDeviceMemory pointer, it seems like only one map of the memory * at a time is valid. We could just mmap up front and return an offset diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f79583eac04..3b7ea42aa63 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -537,7 +537,8 @@ anv_CreateBufferView(VkDevice _device, view->format = format->surface_format; view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; - view->range = pCreateInfo->range; + view->range = pCreateInfo->range == VK_WHOLE_SIZE ? + buffer->size - view->offset : pCreateInfo->range; if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { view->surface_state = @@ -545,7 +546,7 @@ anv_CreateBufferView(VkDevice _device, anv_fill_buffer_surface_state(device, view->surface_state.map, view->format, - view->offset, pCreateInfo->range, + view->offset, view->range, format->isl_layout->bs); } else { view->surface_state = (struct anv_state){ 0 }; @@ -560,7 +561,7 @@ anv_CreateBufferView(VkDevice _device, anv_fill_buffer_surface_state(device, view->storage_surface_state.map, storage_format, - view->offset, pCreateInfo->range, + view->offset, view->range, format->isl_layout->bs); } else { view->storage_surface_state = (struct anv_state){ 0 }; -- cgit v1.2.3 From 6325a750111ff19e263e8fe0f1c82818a42f0f7d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 15:27:20 -0800 Subject: anv/meta_clear: Do save/restore in actual entry points --- src/vulkan/anv_meta_clear.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 0c4b740da24..b5e257b1602 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -815,20 +815,15 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) } static void -anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, - VkImage image_h, +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, VkImageLayout image_layout, const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); - ANV_FROM_HANDLE(anv_image, image, image_h); - struct anv_meta_saved_state saved_state; VkDevice device_h = anv_device_to_handle(cmd_buffer->device); - meta_clear_begin(&saved_state, cmd_buffer); - for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; @@ -838,7 +833,7 @@ anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, anv_image_view_init(&iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = image_h, + .image = anv_image_to_handle(image), .viewType = anv_meta_get_view_type(image), .format = image->vk_format, .subresourceRange = { @@ -920,7 +915,7 @@ anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, &cmd_buffer->pool->alloc, &pass); - ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderArea = { @@ -937,7 +932,7 @@ anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, }, VK_SUBPASS_CONTENTS_INLINE); - ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); ANV_CALL(DestroyRenderPass)(device_h, pass, &cmd_buffer->pool->alloc); ANV_CALL(DestroyFramebuffer)(device_h, fb, @@ -945,34 +940,48 @@ anv_cmd_clear_image(VkCommandBuffer cmd_buffer_h, } } } - - meta_clear_end(&saved_state, cmd_buffer); } void anv_CmdClearColorImage( VkCommandBuffer commandBuffer, - VkImage image, + VkImage image_h, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - anv_cmd_clear_image(commandBuffer, image, imageLayout, + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *) pColor, rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); } void anv_CmdClearDepthStencilImage( VkCommandBuffer commandBuffer, - VkImage image, + VkImage image_h, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - anv_cmd_clear_image(commandBuffer, image, imageLayout, + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *) pDepthStencil, rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); } void anv_CmdClearAttachments( -- cgit v1.2.3 From 46eef313119f0d08e674ee7f6f9934e75303892b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 15:52:03 -0800 Subject: anv/meta_clear: Call emit_clear directly in ClearImage Using the load op means that we end up with recursive meta. We shouldn't be doing that. --- src/vulkan/anv_meta_clear.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index b5e257b1602..1bc470fb5de 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -888,18 +888,9 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, }; if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - att_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - subpass_desc.colorAttachmentCount = 1; subpass_desc.pColorAttachments = &att_ref; } else { - if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { - att_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - } - if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - att_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - } - subpass_desc.pDepthStencilAttachment = &att_ref; } @@ -927,11 +918,28 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, }, .renderPass = pass, .framebuffer = fb, - .clearValueCount = 1, - .pClearValues = clear_value, + .clearValueCount = 0, + .pClearValues = NULL, }, VK_SUBPASS_CONTENTS_INLINE); + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); ANV_CALL(DestroyRenderPass)(device_h, pass, &cmd_buffer->pool->alloc); -- cgit v1.2.3 From 0ae1bd321e5700dae4978ad61701b7470dd973ab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 16:17:13 -0800 Subject: anv/meta: Implement CmdFillBuffer --- src/vulkan/anv_meta.c | 10 ---- src/vulkan/anv_meta_clear.c | 114 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 87728943190..7d8403f0e33 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1384,16 +1384,6 @@ void anv_CmdUpdateBuffer( stub(); } -void anv_CmdFillBuffer( - VkCommandBuffer commandBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize fillSize, - uint32_t data) -{ - stub(); -} - void anv_CmdResolveImage( VkCommandBuffer commandBuffer, VkImage srcImage, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 1bc470fb5de..6ba27b97fe2 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -1015,3 +1015,117 @@ void anv_CmdClearAttachments( meta_clear_end(&saved_state, cmd_buffer); } + +static void +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat fill_format, uint32_t data) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = fill_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }; + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + const VkClearValue clear_value = { + .color = { + .uint32 = { data, data, data, data } + } + }; + + const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &clear_value, 1, &range); +} + +void anv_CmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + VkFormat format; + int bs; + if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + /* This is maximum possible width/height our HW can handle */ + const uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; + while (fillSize > max_fill_size) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, max_surface_dim, format, data); + fillSize -= max_fill_size; + dstOffset += max_fill_size; + } + + uint64_t height = fillSize / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + const uint64_t rect_fill_size = height * max_surface_dim * bs; + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, height, format, data); + fillSize -= rect_fill_size; + dstOffset += rect_fill_size; + } + + if (fillSize != 0) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + fillSize / bs, 1, format, data); + } + + meta_clear_end(&saved_state, cmd_buffer); +} -- cgit v1.2.3 From eb2a119da284174068d9ad0a2ec4314a77ad1697 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 16:39:48 -0800 Subject: anv/meta: Implement UpdateBuffer --- src/vulkan/anv_meta.c | 64 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 7d8403f0e33..a58b6b421cf 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -870,6 +870,60 @@ void anv_CmdCopyBuffer( meta_finish_blit(cmd_buffer, &saved_state); } +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + pData = (void *)pData + copy_size; + } +} + static VkFormat choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) { @@ -1374,16 +1428,6 @@ void anv_CmdCopyImageToBuffer( meta_finish_blit(cmd_buffer, &saved_state); } -void anv_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - stub(); -} - void anv_CmdResolveImage( VkCommandBuffer commandBuffer, VkImage srcImage, -- cgit v1.2.3 From b3cc10f3b24527e06da175cb8422f1e4f301cd87 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 17:34:24 -0800 Subject: nir: Let nir_opt_algebraic rules contain unsigned constants > INT_MAX. struct.pack('i', val) interprets `val` as a signed integer, and dies if `val` > INT_MAX. For larger constants, we need to use 'I' which interprets it as an unsigned value. This patch makes us use 'I' for all values >= 0, and 'i' for negative values. This should work in all cases. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir_algebraic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_algebraic.py b/src/glsl/nir/nir_algebraic.py index a30652f2afd..14c0e822ad8 100644 --- a/src/glsl/nir/nir_algebraic.py +++ b/src/glsl/nir/nir_algebraic.py @@ -108,7 +108,7 @@ class Constant(Value): if isinstance(self.value, (bool)): return 'NIR_TRUE' if self.value else 'NIR_FALSE' if isinstance(self.value, (int, long)): - return hex(struct.unpack('I', struct.pack('i', self.value))[0]) + return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0]) elif isinstance(self.value, float): return hex(struct.unpack('I', struct.pack('f', self.value))[0]) else: -- cgit v1.2.3 From e79f8a4926dc79e32531f705b2db3bbd2d3984f4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 17:40:58 -0800 Subject: nir: Lower ldexp to arithmetic. This is a port of Matt's GLSL IR lowering pass to NIR. It's required because we translate SPIR-V directly to NIR, bypassing GLSL IR. I haven't introduced a lower_ldexp flag, as I believe all current NIR consumers would set the flag. i965 wants this, vc4 doesn't implement this feature, and st_glsl_to_tgsi currently lowers ldexp unconditionally anyway. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir_opt_algebraic.py | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 1e80ba718ed..188c5b1b043 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +# -*- encoding: utf-8 -*- # # Copyright (C) 2014 Intel Corporation # @@ -267,6 +268,68 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] +def ldexp_to_arith(x, exp): + """ + Translates + ldexp x exp + into + + extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + resulting_biased_exp = extracted_biased_exp + exp; + + if (resulting_biased_exp < 1) { + return copysign(0.0, x); + } + + return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + lshift(i2u(resulting_biased_exp), exp_shift)); + + which we can't actually implement as such, since NIR doesn't have + vectorized if-statements. We actually implement it without branches + using conditional-select: + + extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + resulting_biased_exp = extracted_biased_exp + exp; + + is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); + x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); + resulting_biased_exp = csel(is_not_zero_or_underflow, + resulting_biased_exp, 0); + + return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + lshift(i2u(resulting_biased_exp), exp_shift)); + """ + + sign_mask = 0x80000000 + exp_shift = 23 + exp_width = 8 + + # Extract the biased exponent from . + extracted_biased_exp = ('ushr', ('iabs', x), exp_shift) + resulting_biased_exp = ('iadd', extracted_biased_exp, exp) + + # Test if result is ±0.0, subnormal, or underflow by checking if the + # resulting biased exponent would be less than 0x1. If so, the result is + # 0.0 with the sign of x. (Actually, invert the conditions so that + # immediate values are the second arguments, which is better for i965) + zero_sign_x = ('iand', x, sign_mask) + + is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1) + + # We could test for overflows by checking if the resulting biased exponent + # would be greater than 0xFE. Turns out we don't need to because the GLSL + # spec says: + # + # "If this product is too large to be represented in the + # floating-point type, the result is undefined." + + return ('bitfield_insert', + ('bcsel', is_not_zero_or_underflow, x, zero_sign_x), + ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0), + exp_shift, exp_width) + +optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))] + # This section contains "late" optimizations that should be run after the # regular optimizations have finished. Optimizations should go here if # they help code generation but do not necessarily produce code that is -- cgit v1.2.3 From 891564adb90f090e0c635a30fb947baa0291703c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 18:44:44 -0800 Subject: nir/spirv: Handle OpLine and OpNoLine in foreach_instruction This way we don't have to explicitly handle them everywhere. --- src/glsl/nir/spirv/spirv_to_nir.c | 38 ++++++++++++++++++++++++-------------- src/glsl/nir/spirv/vtn_private.h | 6 ++++++ 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 1fcdb1aa14a..a117175341e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -192,19 +192,37 @@ const uint32_t * vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, const uint32_t *end, vtn_instruction_handler handler) { + b->file = NULL; + b->line = -1; + b->col = -1; + const uint32_t *w = start; while (w < end) { SpvOp opcode = w[0] & SpvOpCodeMask; unsigned count = w[0] >> SpvWordCountShift; assert(count >= 1 && w + count <= end); - if (opcode == SpvOpNop) { - w++; - continue; - } + switch (opcode) { + case SpvOpNop: + break; /* Do nothing */ - if (!handler(b, opcode, w, count)) - return w; + case SpvOpLine: + b->file = vtn_value(b, w[1], vtn_value_type_string)->str; + b->line = w[2]; + b->col = w[3]; + break; + + case SpvOpNoLine: + b->file = NULL; + b->line = -1; + b->col = -1; + break; + + default: + if (!handler(b, opcode, w, count)) + return w; + break; + } w += count; } @@ -3498,10 +3516,6 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, assert(!"Invalid opcode types and variables section"); break; - case SpvOpLine: - case SpvOpNoLine: - break; /* Ignored for now */ - case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: @@ -3555,10 +3569,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { - case SpvOpLine: - case SpvOpNoLine: - break; /* Ignored for now */ - case SpvOpLabel: break; diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index cf75bc92a7a..a0cf1b9fd42 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -299,6 +299,12 @@ struct vtn_builder { nir_function_impl *impl; struct vtn_block *block; + /* Current file, line, and column. Useful for debugging. Set + * automatically by vtn_foreach_instruction. + */ + char *file; + int line, col; + /* * In SPIR-V, constants are global, whereas in NIR, the load_const * instruction we use is per-function. So while we parse each function, we -- cgit v1.2.3 From c7203aa621ee5cbc4a10fd5ae9a3d10dd38b8a98 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 Jan 2016 18:58:31 -0800 Subject: nir/spirv: Move OpPhi handling to vtn_cfg.c Phi handling is somewhat intrinsically tied to the CFG. Moving it here makes it a bit easier to handle that. In particular, we can now do SSA repair after we've done the phi node second-pass. This fixes 6 CTS tests. --- src/glsl/nir/spirv/spirv_to_nir.c | 70 ++---------------------------------- src/glsl/nir/spirv/vtn_cfg.c | 76 +++++++++++++++++++++++++++++++++++++-- src/glsl/nir/spirv/vtn_private.h | 4 +++ 3 files changed, 79 insertions(+), 71 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index a117175341e..56fcd934373 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -148,10 +148,6 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, return val; } -static struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *src_type); - struct vtn_ssa_value * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { @@ -1751,7 +1747,7 @@ variable_is_external_block(nir_variable *var) var->data.mode == nir_var_shader_storage); } -static struct vtn_ssa_value * +struct vtn_ssa_value * vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, struct vtn_type *src_type) { @@ -3193,61 +3189,6 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &intrin->instr); } -static void -vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) -{ - /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. - * For each phi, we create a variable with the appropreate type and do a - * load from that variable. Then, in a second pass, we add stores to - * that variable to each of the predecessor blocks. - * - * We could do something more intelligent here. However, in order to - * handle loops and things properly, we really need dominance - * information. It would end up basically being the into-SSA algorithm - * all over again. It's easier if we just let lower_vars_to_ssa do that - * for us instead of repeating it here. - */ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_variable *phi_var = - nir_local_variable_create(b->nb.impl, type->type, "phi"); - _mesa_hash_table_insert(b->phi_table, w, phi_var); - - val->ssa = vtn_variable_load(b, nir_deref_var_create(b, phi_var), type); -} - -static bool -vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpLabel) { - b->block = vtn_value(b, w[1], vtn_value_type_block)->block; - return true; - } - - if (opcode != SpvOpPhi) - return true; - - struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); - assert(phi_entry); - nir_variable *phi_var = phi_entry->data; - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - - for (unsigned i = 3; i < count; i += 2) { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); - struct vtn_block *pred = - vtn_value(b, w[i + 1], vtn_value_type_block)->block; - - b->nb.cursor = nir_after_block_before_jump(pred->end_block); - - vtn_variable_store(b, src, nir_deref_var_create(b, phi_var), type); - } - - return true; -} - static unsigned gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) { @@ -3775,10 +3716,6 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_composite(b, opcode, w, count); break; - case SpvOpPhi: - vtn_handle_phi_first_pass(b, w); - break; - case SpvOpEmitVertex: case SpvOpEndPrimitive: case SpvOpEmitStreamVertex: @@ -3851,11 +3788,8 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->impl = func->impl; b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, _mesa_key_pointer_equal); - b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); + vtn_function_emit(b, func, vtn_handle_body_instruction); - vtn_foreach_instruction(b, func->start_block->label, func->end, - vtn_handle_phi_second_pass); } assert(b->entry_point->value_type == vtn_value_type_function); diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 9c2e271cda1..0d3702c37b2 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -451,6 +451,66 @@ vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) } } +static bool +vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) + return true; /* Nothing to do */ + + /* If this isn't a phi node, stop. */ + if (opcode != SpvOpPhi) + return false; + + /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. + * For each phi, we create a variable with the appropreate type and + * do a load from that variable. Then, in a second pass, we add + * stores to that variable to each of the predecessor blocks. + * + * We could do something more intelligent here. However, in order to + * handle loops and things properly, we really need dominance + * information. It would end up basically being the into-SSA + * algorithm all over again. It's easier if we just let + * lower_vars_to_ssa do that for us instead of repeating it here. + */ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_variable *phi_var = + nir_local_variable_create(b->nb.impl, type->type, "phi"); + _mesa_hash_table_insert(b->phi_table, w, phi_var); + + val->ssa = vtn_variable_load(b, nir_deref_var_create(b, phi_var), type); + + return true; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode != SpvOpPhi) + return true; + + struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); + assert(phi_entry); + nir_variable *phi_var = phi_entry->data; + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + + for (unsigned i = 3; i < count; i += 2) { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); + struct vtn_block *pred = + vtn_value(b, w[i + 1], vtn_value_type_block)->block; + + b->nb.cursor = nir_after_block_before_jump(pred->end_block); + + vtn_variable_store(b, src, nir_deref_var_create(b, phi_var), type); + } + + return true; +} + static void vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, nir_variable *switch_fall_var, bool *has_switch_break) @@ -492,9 +552,14 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, case vtn_cf_node_type_block: { struct vtn_block *block = (struct vtn_block *)node; - vtn_foreach_instruction(b, block->label, - block->merge ? block->merge : block->branch, - handler); + const uint32_t *block_start = block->label; + const uint32_t *block_end = block->merge ? block->merge : + block->branch; + + block_start = vtn_foreach_instruction(b, block_start, block_end, + vtn_handle_phis_first_pass); + + vtn_foreach_instruction(b, block_start, block_end, handler); block->end_block = nir_cursor_current_block(b->nb.cursor); @@ -682,9 +747,14 @@ vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, nir_builder_init(&b->nb, func->impl); b->nb.cursor = nir_after_cf_list(&func->impl->body); b->has_loop_continue = false; + b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + /* Continue blocks for loops get inserted before the body of the loop * but instructions in the continue may use SSA defs in the loop body. * Therefore, we need to repair SSA to insert the needed phi nodes. diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index a0cf1b9fd42..9a066d6cdba 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -383,6 +383,10 @@ struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src); +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type); + void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref_var *dest, struct vtn_type *dest_type); -- cgit v1.2.3 From a0516cfbace8c7d30d01fe6e6c76c2414187c21a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 07:33:41 -0800 Subject: anv/meta: Fix a finishme --- src/vulkan/anv_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index a58b6b421cf..3331d3abe08 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1139,8 +1139,8 @@ void anv_CmdBlitImage( if (pRegions[r].srcSubresource.layerCount > 1) anv_finishme("FINISHME: copy multiple array layers"); - if (pRegions[r].srcOffsets[0].z != pRegions[r].srcOffsets[1].z || - pRegions[r].dstOffsets[0].z != pRegions[r].dstOffsets[1].z) + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) anv_finishme("FINISHME: copy multiple depth layers"); struct anv_image_view dest_iview; -- cgit v1.2.3 From 59ef7c6507b797447420fe308d7819605342119d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 07:56:48 -0800 Subject: anv/meta: fix UpdateBuffer in the case where we do multiple updates --- src/vulkan/anv_meta.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3331d3abe08..9e13299e3a2 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -920,6 +920,7 @@ void anv_CmdUpdateBuffer( copy_size / bs, 1, format); dataSize -= copy_size; + dstOffset += copy_size; pData = (void *)pData + copy_size; } } -- cgit v1.2.3 From 34f9a5f3018b40476d1a126f02842914abc0f1dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 11:11:30 -0800 Subject: nir/spirv: Pull texture dimensionality out of the image when available --- src/glsl/nir/spirv/spirv_to_nir.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 56fcd934373..e8ce588e14e 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2512,11 +2512,16 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); - const struct glsl_type *sampler_type = - nir_deref_tail(&sampled.sampler->deref)->type; - instr->sampler_dim = glsl_get_sampler_dim(sampler_type); - instr->is_array = glsl_sampler_type_is_array(sampler_type); - instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + const struct glsl_type *image_type; + if (sampled.image) { + image_type = nir_deref_tail(&sampled.image->deref)->type; + } else { + image_type = nir_deref_tail(&sampled.sampler->deref)->type; + } + + instr->sampler_dim = glsl_get_sampler_dim(image_type); + instr->is_array = glsl_sampler_type_is_array(image_type); + instr->is_shadow = glsl_sampler_type_is_shadow(image_type); instr->is_new_style_shadow = instr->is_shadow; if (has_coord) { @@ -2544,7 +2549,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->coord_components = 0; } - switch (glsl_get_sampler_result_type(sampler_type)) { + switch (glsl_get_sampler_result_type(image_type)) { case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; -- cgit v1.2.3 From e45748badea913b90a51744ee1d8a509a3ae8165 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 11:16:44 -0800 Subject: anv/device: Default to scalar GS on BDW+ --- src/vulkan/anv_device.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 9248f912c69..949cfe97772 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -30,6 +30,7 @@ #include "anv_private.h" #include "mesa/main/git_sha1.h" #include "util/strtod.h" +#include "util/debug.h" #include "gen7_pack.h" @@ -137,6 +138,10 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; + /* Default to use scalar GS on BDW+ */ + device->compiler->scalar_stage[MESA_SHADER_GEOMETRY] = + device->info->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); + /* XXX: Actually detect bit6 swizzling */ isl_device_init(&device->isl_dev, device->info, swizzled); -- cgit v1.2.3 From c7896d1868a938e00c776e029a42cc4088eab0bb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 11:36:26 -0800 Subject: spirv/nir/glsl450: Use vtn_create_ssa_value to create SSA values --- src/glsl/nir/spirv/vtn_glsl450.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index b59411e1f8d..d38e1c73d9d 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -375,9 +375,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) { struct nir_builder *nb = &b->nb; + const struct glsl_type *dest_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, dest_type); /* Collect the various SSA sources */ unsigned num_inputs = count - 5; -- cgit v1.2.3 From 21b2d87408232db710a1ae51d2cd0c79b00e4355 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 11:36:41 -0800 Subject: nir/spirv/glsl450: Implement FrexpStruct --- src/glsl/nir/spirv/vtn_glsl450.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index d38e1c73d9d..eb81620ea11 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -612,8 +612,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } + case GLSLstd450FrexpStruct: { + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = build_frexp(nb, src[0], + &val->ssa->elems[1]->def); + return; + } + case GLSLstd450ModfStruct: - case GLSLstd450FrexpStruct: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: case GLSLstd450IMix: -- cgit v1.2.3 From 947ebd9c71391b18a684170d7312c558f100b84e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 20 Jan 2016 12:02:48 -0800 Subject: isl: Add ish.h to libsil_la_SOURCES --- src/isl/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/isl/Makefile.am b/src/isl/Makefile.am index 6fd1da669a3..72f5460554f 100644 --- a/src/isl/Makefile.am +++ b/src/isl/Makefile.am @@ -44,6 +44,7 @@ libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init libisl_la_SOURCES = \ isl.c \ + isl.h \ isl_format.c \ isl_format_layout.c \ isl_gen4.c \ -- cgit v1.2.3 From b76e4458f90ee887ea3c3108e7807099b70df057 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 20 Jan 2016 12:09:29 -0800 Subject: nir/spirv/glsl450: Use fabs not iabs in ldexp. This was just wrong. --- src/glsl/nir/nir_opt_algebraic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 188c5b1b043..a46cbf711ac 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -305,7 +305,7 @@ def ldexp_to_arith(x, exp): exp_width = 8 # Extract the biased exponent from . - extracted_biased_exp = ('ushr', ('iabs', x), exp_shift) + extracted_biased_exp = ('ushr', ('fabs', x), exp_shift) resulting_biased_exp = ('iadd', extracted_biased_exp, exp) # Test if result is ±0.0, subnormal, or underflow by checking if the -- cgit v1.2.3 From 420e8664cbe5e17d7460987f5306bc6e1c74f63f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 20 Jan 2016 13:45:25 -0800 Subject: vk/tests: Add isl include path --- src/vulkan/tests/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/tests/Makefile.am b/src/vulkan/tests/Makefile.am index 7b15bb002be..883013d86c6 100644 --- a/src/vulkan/tests/Makefile.am +++ b/src/vulkan/tests/Makefile.am @@ -30,6 +30,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ -I$(top_srcdir)/src/vulkan LDADD = \ -- cgit v1.2.3 From 8ef002dd7a284e69cf8b725765528225afcb8ebd Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 20 Jan 2016 13:46:54 -0800 Subject: vk/tests: Add stub for anv_gem_get_bit6_swizzle() --- src/vulkan/anv_gem_stubs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c index f6b494628e0..3204fefb28e 100644 --- a/src/vulkan/anv_gem_stubs.c +++ b/src/vulkan/anv_gem_stubs.c @@ -122,6 +122,12 @@ anv_gem_get_param(int fd, uint32_t param) unreachable("Unused"); } +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + unreachable("Unused"); +} + int anv_gem_create_context(struct anv_device *device) { -- cgit v1.2.3 From 7b7a7c2bfcf9f5cd51e8937125e736af6a21cab0 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 20 Jan 2016 14:36:52 -0800 Subject: vk: Make maxSamplerAllocationCount more reasonable We can't allocate 4 billion samplers. Let's go with 64k. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 949cfe97772..7d0f25e7d70 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -413,7 +413,7 @@ void anv_GetPhysicalDeviceProperties( .maxStorageBufferRange = UINT32_MAX, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = UINT32_MAX, - .maxSamplerAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, .bufferImageGranularity = 64, /* A cache line */ .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, -- cgit v1.2.3 From 8ab527de03a5bb9acdf1b88378623ebfadfccf42 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 15 Jan 2016 10:00:13 -0800 Subject: isl: Add a README Most of the file-level comment in isl.h is moved to the README. --- src/isl/README | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/isl/isl.h | 82 +---------------------------------------- 2 files changed, 114 insertions(+), 81 deletions(-) create mode 100644 src/isl/README (limited to 'src') diff --git a/src/isl/README b/src/isl/README new file mode 100644 index 00000000000..1ab4313fcc5 --- /dev/null +++ b/src/isl/README @@ -0,0 +1,113 @@ +Intel Surface Layout + +Introduction +============ +isl is a small library that calculates the layout of Intel GPU surfaces, queries +those layouts, and queries the properties of surface formats. + + +Independence from User APIs +=========================== +isl's API is independent of any user-facing graphics API, such as OpenGL and +Vulkan. This independence allows isl to be used a shared component by multiple +Intel drivers. + +Rather than mimic the user-facing APIs, the isl API attempts to reflect Intel +hardware: the actual memory layout of Intel GPU surfaces and how one programs +the GPU to use those surfaces. For example: + + - The tokens of `enum isl_format` (such as `ISL_FORMAT_R8G8B8A8_UNORM`) + match those of the hardware enum `SURFACE_FORMAT` rather than the OpenGL + or Vulkan format tokens. And the values of `isl_format` and + `SURFACE_FORMAT` are identical. + + - The OpenGL and Vulkan APIs contain depth and stencil formats. However the + hardware enum `SURFACE_FORMAT` does not, and therefore neither does `enum + isl_format`. Rather than define new pixel formats that have no hardware + counterpart, isl records the intent to use a surface as a depth or stencil + buffer with the usage flags `ISL_SURF_USAGE_DEPTH_BIT` and + `ISL_SURF_USAGE_STENCIL_BIT`. + + - `struct isl_surf` distinguishes between the surface's logical dimension + from the user API's perspective (`enum isl_surf_dim`, which may be 1D, 2D, + or 3D) and the layout of those dimensions in memory (`enum isl_dim_layout`). + + +Surface Units +============= + +Intro +----- +ISL takes care in its equations to correctly handle conversion among surface +units (such as pixels and compression blocks) and to carefully distinguish +between a surface's logical layout in the client API and its physical layout +in memory. + +Symbol names often explicitly declare their unit with a suffix: + + - px: logical pixels + - sa: physical surface samples + - el: physical surface elements + - sa_rows: rows of physical surface samples + - el_rows: rows of physical surface elements + +Logical units are independent of hardware generation and are closely related +to the user-facing API (OpenGL and Vulkan). Physical units are dependent on +hardware generation and reflect the surface's layout in memory. + +Definitions +----------- +- Logical Pixels (px): + + The surface's layout from the perspective of the client API (OpenGL and + Vulkan) is in units of logical pixels. Logical pixels are independent of the + surface's layout in memory. + + A surface's width and height, in units of logical pixels, is not affected by + the surface's sample count. For example, consider a VkImage created with + VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's width and + height at level 0 is, in units of logical pixels, w0 and h0 regardless of + the value of s0. + + For example, the logical array length of a 3D surface is always 1, even on + Gen9 where the surface's memory layout is that of an array surface + (ISL_DIM_LAYOUT_GEN4_2D). + +- Physical Surface Samples (sa): + + For a multisampled surface, this unit has the obvious meaning. + A singlesampled surface, from ISL's perspective, is simply a multisampled + surface whose sample count is 1. + + For example, consider a 2D single-level non-array surface with samples=4, + width_px=64, and height_px=64 (note that the suffix 'px' indicates logical + pixels). If the surface's multisample layout is ISL_MSAA_LAYOUT_INTERLEAVED, + then the extent of level 0 is, in units of physical surface samples, + width_sa=128, height_sa=128, depth_sa=1, array_length_sa=1. If + ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, height_sa=64, depth_sa=1, + array_length_sa=4. + +- Physical Surface Elements (el): + + This unit allows ISL to treat compressed and uncompressed formats + identically in many calculations. + + If the surface's pixel format is compressed, such as ETC2, then a surface + element is equivalent to a compression block. If uncompressed, then + a surface element is equivalent to a surface sample. As a corollary, for + a given surface a surface element is at least as large as a surface sample. + +Errata +------ +ISL acquired the term 'surface element' from the Broadwell PRM [1], which +defines it as follows: + + An element is defined as a pixel in uncompresed surface formats, and as + a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL + type multisampled surfaces, an element is a sample. + + +References +========== +[1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> + RENDER_SURFACE_STATE Surface Vertical Alignment (p325) diff --git a/src/isl/isl.h b/src/isl/isl.h index 2194818d7b7..4b3f179303d 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -26,93 +26,13 @@ * @brief Intel Surface Layout * * Header Layout - * ============= - * + * ------------- * The header is ordered as: * - forward declarations * - macros that may be overridden at compile-time for specific gens * - enums and constants * - structs and unions * - functions - * - * - * Surface Units - * ============= - * - * Intro - * ----- - * ISL takes care in its equations to correctly handle conversion among - * surface units (such as pixels and compression blocks) and to carefully - * distinguish between a surface's logical layout in the client API and its - * physical layout in memory. - * - * Symbol names often explicitly declare their unit with a suffix: - * - * - px: logical pixels - * - sa: physical surface samples - * - el: physical surface elements - * - sa_rows: rows of physical surface samples - * - el_rows: rows of physical surface elements - * - * Logical units are independent of hardware generation and are closely - * related to the user-facing API (OpenGL and Vulkan). Physical units are - * dependent on hardware generation and reflect the surface's layout in - * memory. - * - * Definitions - * ----------- - * - Logical Pixels (px): - * - * The surface's layout from the perspective of the client API (OpenGL and - * Vulkan) is in units of logical pixels. Logical pixels are independent of - * the surface's layout in memory. - * - * A surface's width and height, in units of logical pixels, is not affected - * by the surface's sample count. For example, consider a VkImage created - * with VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's - * width and height at level 0 is, in units of logical pixels, w0 and h0 - * regardless of the value of s0. - * - * For example, the logical array length of a 3D surface is always 1, even - * on Gen9 where the surface's memory layout is that of an array surface - * (ISL_DIM_LAYOUT_GEN4_2D). - * - * - Physical Surface Samples (sa): - * - * For a multisampled surface, this unit has the obvious meaning. - * A singlesampled surface, from ISL's perspective, is simply a multisampled - * surface whose sample count is 1. - * - * For example, consider a 2D single-level non-array surface with samples=4, - * width_px=64, and height_px=64 (note that the suffix 'px' indicates - * logical pixels). If the surface's multisample layout is - * ISL_MSAA_LAYOUT_INTERLEAVED, then the extent of level 0 is, in units of - * physical surface samples, width_sa=128, height_sa=128, depth_sa=1, - * array_length_sa=1. If ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, - * height_sa=64, depth_sa=1, array_length_sa=4. - * - * - Physical Surface Elements (el): - * - * This unit allows ISL to treat compressed and uncompressed formats - * identically in many calculations. - * - * If the surface's pixel format is compressed, such as ETC2, then a surface - * element is equivalent to a compression block. If uncompressed, then - * a surface element is equivalent to a surface sample. As a corollary, for - * a given surface a surface element is at least as large as a surface - * sample. - * - * Errata - * ------ - * ISL acquired the term 'element' from the Broadwell PRM [1], which defines - * a surface element as follows: - * - * An element is defined as a pixel in uncompresed surface formats, and as - * a compression block in compressed surface formats. For - * MSFMT_DEPTH_STENCIL type multisampled surfaces, an element is a sample. - * - * [1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> - * RENDER_SURFACE_STATE Surface Vertical Alignment (p325) */ #pragma once -- cgit v1.2.3 From 5ce5a7d0210758fc3cd3edefa7a8de12ddda0276 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 Jan 2016 15:41:25 -0800 Subject: anv/image: Stop including gen8_pack.h in common file --- src/vulkan/anv_image.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 3b7ea42aa63..ba3b3b254cd 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -29,11 +29,6 @@ #include "anv_private.h" -/* FIXME: We shouldn't be using the actual hardware enum values here. They - * change across gens. Once we get that fixed, this include needs to go. - */ -#include "gen8_pack.h" - /** * The \a format argument is required and overrides any format found in struct * anv_image_create_info. Exactly one bit must be set in \a aspect. -- cgit v1.2.3 From 9f4a72c9e351249917a1308beffb7606e8fd1b41 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 15 Jan 2016 18:38:19 -0800 Subject: i965/fs/nir: Move shared variable load/store to nir_emit_cs_intrinsic Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 152 +++++++++++++++---------------- 1 file changed, 76 insertions(+), 76 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 134542620d4..62d801f7264 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2321,6 +2321,82 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); break; + case nir_intrinsic_load_shared: { + assert(devinfo->gen >= 7); + + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Get the offset to read from */ + fs_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + + /* Read the vector */ + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (int i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + + break; + } + + case nir_intrinsic_store_shared: { + assert(devinfo->gen >= 7); + + /* Block index */ + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Value */ + fs_reg val_reg = get_nir_src(instr->src[0]); + + /* Writemask */ + unsigned writemask = instr->const_index[1]; + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + + 4 * first_component); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0] + 4 * first_component)); + } + + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); + } + + break; + } + default: nir_emit_intrinsic(bld, instr); break; @@ -2646,82 +2722,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_shared: { - assert(devinfo->gen >= 7); - - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Get the offset to read from */ - fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0])); - } - - /* Read the vector */ - fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, - instr->num_components, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - for (int i = 0; i < instr->num_components; i++) - bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); - - break; - } - - case nir_intrinsic_store_shared: { - assert(devinfo->gen >= 7); - - /* Block index */ - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Value */ - fs_reg val_reg = get_nir_src(instr->src[0]); - - /* Writemask */ - unsigned writemask = instr->const_index[1]; - - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the length of - * the block of enabled bits. - */ - while (writemask) { - unsigned first_component = ffs(writemask) - 1; - unsigned length = ffs(~(writemask >> first_component)) - 1; - fs_reg offset_reg; - - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + - 4 * first_component); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0] + 4 * first_component)); - } - - emit_untyped_write(bld, surf_index, offset_reg, - offset(val_reg, bld, first_component), - 1 /* dims */, length, - BRW_PREDICATE_NONE); - - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - writemask &= (15 << (first_component + length)); - } - - break; - } - case nir_intrinsic_load_input: { fs_reg src; if (stage == MESA_SHADER_VERTEX) { -- cgit v1.2.3 From 65a5407931b2be0b4a01de56586ba4694ee8fc13 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 16 Jan 2016 23:11:16 -0800 Subject: nir/print: Add space after shader_storage var mode Signed-off-by: Jordan Justen --- src/glsl/nir/nir_print.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index a231494d4ad..e1261aed5ef 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -312,7 +312,7 @@ print_var_decl(nir_variable *var, print_state *state) const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage", "system " }; + "uniform ", "shader_storage ", "system " }; fprintf(fp, "%s%s%s%s%s%s ", cent, samp, patch, inv, mode[var->data.mode], -- cgit v1.2.3 From 10db985fa06177289a9ce8a463d676845f54c141 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 8 Jan 2016 17:16:29 -0800 Subject: nir: Add compute shader shared variable storage class Previously we were receiving shared variable accesses via a lowered intrinsic function from glsl. This change allows us to send in variables instead. For example, when converting from SPIR-V. Signed-off-by: Jordan Justen --- src/glsl/nir/nir.c | 6 ++++++ src/glsl/nir/nir.h | 4 ++++ src/glsl/nir/nir_clone.c | 1 + src/glsl/nir/nir_lower_atomics.c | 3 ++- src/glsl/nir/nir_print.c | 7 ++++++- src/glsl/nir/nir_sweep.c | 1 + src/glsl/nir/nir_validate.c | 5 +++++ 7 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 273cac3e0ac..deed3233da0 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -39,6 +39,7 @@ nir_shader_create(void *mem_ctx, exec_list_make_empty(&shader->uniforms); exec_list_make_empty(&shader->inputs); exec_list_make_empty(&shader->outputs); + exec_list_make_empty(&shader->shared); shader->options = options; memset(&shader->info, 0, sizeof(shader->info)); @@ -132,6 +133,11 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var) exec_list_push_tail(&shader->uniforms, &var->node); break; + case nir_var_shared: + assert(shader->stage == MESA_SHADER_COMPUTE); + exec_list_push_tail(&shader->shared, &var->node); + break; + case nir_var_system_value: exec_list_push_tail(&shader->system_values, &var->node); break; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 49af2f31e9e..80e93c10471 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -88,6 +88,7 @@ typedef enum { nir_var_local, nir_var_uniform, nir_var_shader_storage, + nir_var_shared, nir_var_system_value } nir_variable_mode; @@ -1635,6 +1636,9 @@ typedef struct nir_shader { /** list of outputs (nir_variable) */ struct exec_list outputs; + /** list of shared compute variables (nir_variable) */ + struct exec_list shared; + /** Set of driver-specific options for the shader. * * The memory for the options is expected to be kept in a single static diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c index 0ea8da77421..702219a5a34 100644 --- a/src/glsl/nir/nir_clone.c +++ b/src/glsl/nir/nir_clone.c @@ -673,6 +673,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) clone_var_list(&state, &ns->uniforms, &s->uniforms); clone_var_list(&state, &ns->inputs, &s->inputs); clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->shared, &s->shared); clone_var_list(&state, &ns->globals, &s->globals); clone_var_list(&state, &ns->system_values, &s->system_values); diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index 1aa78e18a85..a9d0ddbbd41 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -63,7 +63,8 @@ lower_instr(nir_intrinsic_instr *instr, } if (instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage) + instr->variables[0]->var->data.mode != nir_var_shader_storage && + instr->variables[0]->var->data.mode != nir_var_shared) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index e1261aed5ef..1bbdaa21177 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -312,7 +312,8 @@ print_var_decl(nir_variable *var, print_state *state) const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage ", "system " }; + "uniform ", "shader_storage ", "shared ", + "system "}; fprintf(fp, "%s%s%s%s%s%s ", cent, samp, patch, inv, mode[var->data.mode], @@ -1053,6 +1054,10 @@ nir_print_shader(nir_shader *shader, FILE *fp) print_var_decl(var, &state); } + nir_foreach_variable(var, &shader->shared) { + print_var_decl(var, &state); + } + nir_foreach_variable(var, &shader->globals) { print_var_decl(var, &state); } diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c index 0710bdba7c7..5c62154ec7f 100644 --- a/src/glsl/nir/nir_sweep.c +++ b/src/glsl/nir/nir_sweep.c @@ -159,6 +159,7 @@ nir_sweep(nir_shader *nir) steal_list(nir, nir_variable, &nir->uniforms); steal_list(nir, nir_variable, &nir->inputs); steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->shared); steal_list(nir, nir_variable, &nir->globals); steal_list(nir, nir_variable, &nir->system_values); steal_list(nir, nir_register, &nir->registers); diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index 91c01ff8727..1a943d76314 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -1036,6 +1036,11 @@ nir_validate_shader(nir_shader *shader) validate_var_decl(var, true, &state); } + exec_list_validate(&shader->shared); + nir_foreach_variable(var, &shader->shared) { + validate_var_decl(var, true, &state); + } + exec_list_validate(&shader->globals); nir_foreach_variable(var, &shader->globals) { validate_var_decl(var, true, &state); -- cgit v1.2.3 From 7a9a54b5c81d3e64a5e5fa2bf52b14b3acf28c2f Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:53:44 -0800 Subject: nir: Add atomic operations on variables This allows us to first generate atomic operations for shared variables using these opcodes, and then later we can lower those to the shared atomics intrinsics with nir_lower_io. Signed-off-by: Jordan Justen --- src/glsl/nir/nir_intrinsics.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 78c25707dc9..b6996ee3f47 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -194,6 +194,33 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* + * variable atomic intrinsics + * + * All of these variable atomic memory operations read a value from memory, + * compute a new value using one of the operations below, write the new value + * to memory, and return the original value read. + * + * All operations take 1 source except CompSwap that takes 2. These sources + * represent: + * + * 0: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 1: For CompSwap only: the second data parameter. + * + * All operations take 1 variable deref. + */ +INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0) + /* * SSBO atomic intrinsics * -- cgit v1.2.3 From 36157cd5eaafe3cdf3968fadb69e78895dc7751f Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:44:31 -0800 Subject: nir: Add support for lowering load/stores of shared variables Signed-off-by: Jordan Justen --- src/glsl/nir/nir.c | 1 + src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_clone.c | 1 + src/glsl/nir/nir_lower_io.c | 35 ++++++++++++++++++++++++++++------- src/glsl/nir/nir_print.c | 1 + 5 files changed, 32 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index deed3233da0..42a53f6f3db 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -53,6 +53,7 @@ nir_shader_create(void *mem_ctx, shader->num_inputs = 0; shader->num_outputs = 0; shader->num_uniforms = 0; + shader->num_shared = 0; shader->stage = stage; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 80e93c10471..4e3533189e4 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1667,7 +1667,7 @@ typedef struct nir_shader { * the highest index a load_input_*, load_uniform_*, etc. intrinsic can * access plus one */ - unsigned num_inputs, num_uniforms, num_outputs; + unsigned num_inputs, num_uniforms, num_outputs, num_shared; /** The shader stage, such as MESA_SHADER_VERTEX. */ gl_shader_stage stage; diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c index 702219a5a34..bc6df56b753 100644 --- a/src/glsl/nir/nir_clone.c +++ b/src/glsl/nir/nir_clone.c @@ -703,6 +703,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_inputs = s->num_inputs; ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; + ns->num_shared = s->num_shared; free_clone_state(&state); diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 7ad5c6521a7..31490313ebc 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -160,6 +160,29 @@ load_op(struct lower_io_state *state, case nir_var_uniform: op = nir_intrinsic_load_uniform; break; + case nir_var_shared: + op = nir_intrinsic_load_shared; + break; + default: + unreachable("Unknown variable mode"); + } + return op; +} + +static nir_intrinsic_op +store_op(struct lower_io_state *state, + nir_variable_mode mode, bool per_vertex) +{ + nir_intrinsic_op op; + switch (mode) { + case nir_var_shader_in: + case nir_var_shader_out: + op = per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; + break; + case nir_var_shared: + op = nir_intrinsic_store_shared; + break; default: unreachable("Unknown variable mode"); } @@ -190,6 +213,7 @@ nir_lower_io_block(nir_block *block, void *void_state) if (mode != nir_var_shader_in && mode != nir_var_shader_out && + mode != nir_var_shared && mode != nir_var_uniform) continue; @@ -236,7 +260,7 @@ nir_lower_io_block(nir_block *block, void *void_state) } case nir_intrinsic_store_var: { - assert(mode == nir_var_shader_out); + assert(mode == nir_var_shader_out || mode == nir_var_shared); nir_ssa_def *offset; nir_ssa_def *vertex_index; @@ -248,12 +272,9 @@ nir_lower_io_block(nir_block *block, void *void_state) per_vertex ? &vertex_index : NULL, state->type_size); - nir_intrinsic_op store_op = - per_vertex ? nir_intrinsic_store_per_vertex_output : - nir_intrinsic_store_output; - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, - store_op); + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(state->mem_ctx, + store_op(state, mode, per_vertex)); store->num_components = intrin->num_components; nir_src_copy(&store->src[0], &intrin->src[0], store); diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 1bbdaa21177..850774b1099 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -1041,6 +1041,7 @@ nir_print_shader(nir_shader *shader, FILE *fp) fprintf(fp, "inputs: %u\n", shader->num_inputs); fprintf(fp, "outputs: %u\n", shader->num_outputs); fprintf(fp, "uniforms: %u\n", shader->num_uniforms); + fprintf(fp, "shared: %u\n", shader->num_shared); nir_foreach_variable(var, &shader->uniforms) { print_var_decl(var, &state); -- cgit v1.2.3 From ca55817fa1cd21bf129255fb8681b05e70dea338 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:59:19 -0800 Subject: nir: Lower shared var atomics during nir_lower_io Signed-off-by: Jordan Justen --- src/glsl/nir/nir_lower_io.c | 86 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 31490313ebc..90cf2e3acfd 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -189,6 +189,27 @@ store_op(struct lower_io_state *state, return op; } +static nir_intrinsic_op +atomic_op(nir_intrinsic_op opcode) +{ + switch (opcode) { +#define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O; + OP(atomic_exchange) + OP(atomic_comp_swap) + OP(atomic_add) + OP(atomic_imin) + OP(atomic_umin) + OP(atomic_imax) + OP(atomic_umax) + OP(atomic_and) + OP(atomic_or) + OP(atomic_xor) +#undef OP + default: + unreachable("Invalid atomic"); + } +} + static bool nir_lower_io_block(nir_block *block, void *void_state) { @@ -202,9 +223,25 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_var && - intrin->intrinsic != nir_intrinsic_store_var) + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + case nir_intrinsic_store_var: + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: + /* We can lower the io for this nir instrinsic */ + break; + default: + /* We can't lower the io for this nir instrinsic, so skip it */ continue; + } nir_variable_mode mode = intrin->variables[0]->var->data.mode; @@ -295,6 +332,51 @@ nir_lower_io_block(nir_block *block, void *void_state) break; } + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: { + assert(mode == nir_var_shared); + + nir_ssa_def *offset; + + offset = get_io_offset(b, intrin->variables[0], + NULL, state->type_size); + + nir_intrinsic_instr *atomic = + nir_intrinsic_instr_create(state->mem_ctx, + atomic_op(intrin->intrinsic)); + + atomic->src[0] = nir_src_for_ssa(offset); + + atomic->const_index[0] = + intrin->variables[0]->var->data.driver_location; + + nir_src_copy(&atomic->src[1], &intrin->src[0], atomic); + + if (intrin->intrinsic == nir_intrinsic_var_atomic_comp_swap) + nir_src_copy(&atomic->src[2], &intrin->src[1], atomic); + + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&atomic->instr, &atomic->dest, + intrin->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&atomic->dest.ssa)); + } else { + nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx); + } + + nir_instr_insert_before(&intrin->instr, &atomic->instr); + nir_instr_remove(&intrin->instr); + break; + } + default: break; } -- cgit v1.2.3 From 86daceb7f2ce7f53cffe256253a321e1e5f05269 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:45:46 -0800 Subject: i965/nir: Lower nir compute shader shared variables Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_nir.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 28870b05e9d..0985c2cab4f 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -405,6 +405,14 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) } } +static void +brw_nir_lower_shared(nir_shader *nir) +{ + nir_assign_var_locations(&nir->shared, &nir->num_shared, + type_size_scalar_bytes); + nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes); +} + #define OPT(pass, ...) ({ \ bool this_progress = false; \ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ @@ -500,6 +508,8 @@ brw_nir_lower_io(nir_shader *nir, OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); OPT_V(brw_nir_lower_outputs, devinfo, is_scalar); + if (nir->stage == MESA_SHADER_COMPUTE) + OPT_V(brw_nir_lower_shared); OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); return nir_optimize(nir, is_scalar); -- cgit v1.2.3 From 97b09a92686d75ba2faca2fb20276c75f630f691 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 10:00:49 -0800 Subject: anv/pipeline: Set size of shared variables in prog_data Signed-off-by: Jordan Justen --- src/vulkan/anv_pipeline.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 2a22497815a..d66987f1a8c 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -746,6 +746,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + prog_data->base.total_shared = nir->num_shared; + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) -- cgit v1.2.3 From 19830031cb2760e3ebbfae0203ebefdd900c6039 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 9 Jan 2016 01:18:24 -0800 Subject: anv/gen8: Enable SLM in L3 cache control register Signed-off-by: Jordan Justen --- src/vulkan/anv_private.h | 1 + src/vulkan/gen8_cmd_buffer.c | 85 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 69 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 307e07ef246..05a6342e2f1 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1087,6 +1087,7 @@ struct anv_attachment_state { struct anv_cmd_state { /* PIPELINE_SELECT.PipelineSelection */ uint32_t current_pipeline; + uint32_t current_l3_config; uint32_t vb_dirty; anv_cmd_dirty_mask_t dirty; anv_cmd_dirty_mask_t compute_dirty; diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 9c1b6e51b07..c15e5a5ef42 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -146,9 +146,74 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } #endif +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN8_L3CNTLREG 0x7034 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t val = enable_slm ? + /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ + 0x60000021 : + /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ + 0x60000060; + bool changed = cmd_buffer->state.current_l3_config != val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); + cmd_buffer->state.current_l3_config = val; + } +} + static void flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer) { + config_l3(cmd_buffer, false); + if (cmd_buffer->state.current_pipeline != _3D) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if ANV_GEN >= 9 @@ -426,23 +491,6 @@ void genX(CmdDrawIndexed)( .BaseVertexLocation = vertexOffset); } -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - /* Auto-Draw / Indirect Registers */ #define GEN7_3DPRIM_END_OFFSET 0x2420 #define GEN7_3DPRIM_START_VERTEX 0x2430 @@ -571,6 +619,9 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if ANV_GEN >= 9 -- cgit v1.2.3 From 819cb694347b59baec60f58ad02aed1e70b602fe Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 20 Jan 2016 17:26:41 -0800 Subject: anv/gen8+9: Invalidate color calc state when switching to the GPGPU pipeline Port 044acb9256046bebec890cac7e42043754459fc2 to anv. Signed-off-by: Jordan Justen --- src/vulkan/gen8_cmd_buffer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index c15e5a5ef42..d4d01ea151b 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -623,6 +623,20 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { +#if ANV_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if ANV_GEN >= 9 .MaskBits = 3, -- cgit v1.2.3 From bc035db3c8138d63e700a86b4137dfdba07e3d53 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 9 Jan 2016 01:28:40 -0800 Subject: anv/gen8: Set SLM size in interface descriptor Signed-off-by: Jordan Justen --- src/vulkan/gen8_cmd_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index d4d01ea151b..096ced5694f 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -588,6 +588,16 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .CURBEDataStartAddress = push_state.offset); } + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, GENX(INTERFACE_DESCRIPTOR_DATA), 64, @@ -600,6 +610,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .ConstantIndirectURBEntryReadLength = push_constant_regs, .ConstantURBEntryReadOffset = 0, .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, .NumberofThreadsinGPGPUThreadGroup = pipeline->cs_thread_width_max); -- cgit v1.2.3 From a7e5b683cabc29ffc1c85367498fc6760b802fa5 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 8 Jan 2016 19:45:45 -0800 Subject: nir/spirv: Support workgroup (shared) variable translation Signed-off-by: Jordan Justen --- src/glsl/nir/spirv/spirv_to_nir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index e8ce588e14e..de97612a2fc 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1981,6 +1981,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->interface_type = NULL; break; case SpvStorageClassWorkgroup: + var->data.mode = nir_var_shared; + break; case SpvStorageClassCrossWorkgroup: case SpvStorageClassGeneric: case SpvStorageClassAtomicCounter: -- cgit v1.2.3 From b1a7a27d6008d5153523f96d146e4a19b33d1354 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 10 Jan 2016 11:12:17 -0800 Subject: nir/spirv: Handle compute shared atomics Signed-off-by: Jordan Justen --- src/glsl/nir/spirv/spirv_to_nir.c | 140 ++++++++++++++++++++++++++------------ 1 file changed, 96 insertions(+), 44 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index de97612a2fc..d0211221e17 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -2766,16 +2766,59 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, } } -static void -vtn_handle_ssbo_atomic(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +static nir_intrinsic_op +get_ssbo_nir_atomic_op(SpvOp opcode) { - struct vtn_value *pointer = vtn_value(b, w[3], vtn_value_type_deref); - struct vtn_type *type = pointer->deref_type; - nir_deref *deref = &pointer->deref->deref; - nir_ssa_def *index = get_vulkan_resource_index(b, &deref, &type); + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid SSBO atomic"); + } +} +static nir_intrinsic_op +get_shared_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid shared atomic"); + } +} + +static nir_ssa_def * +get_ssbo_atomic_offset(struct vtn_builder *b, nir_deref *deref, struct vtn_type *type) +{ nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + while (deref->child) { deref = deref->child; switch (deref->deref_type) { @@ -2799,53 +2842,30 @@ vtn_handle_ssbo_atomic(struct vtn_builder *b, SpvOp opcode, } } - /* - SpvScope scope = w[4]; - SpvMemorySemanticsMask semantics = w[5]; - */ - - nir_intrinsic_op op; - switch (opcode) { -#define OP(S, N) case SpvOp##S: op = nir_intrinsic_ssbo_##N; break; - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_imin) - OP(AtomicUMin, atomic_umin) - OP(AtomicSMax, atomic_imax) - OP(AtomicUMax, atomic_umax) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid SSBO atomic"); - } - - nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->nb.shader, op); - atomic->src[0] = nir_src_for_ssa(index); - atomic->src[1] = nir_src_for_ssa(offset); + return offset; +} +static void +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, nir_src *src) +{ switch (opcode) { case SpvOpAtomicIIncrement: - atomic->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); break; case SpvOpAtomicIDecrement: - atomic->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); break; case SpvOpAtomicISub: - atomic->src[2] = + src[0] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); break; case SpvOpAtomicCompareExchange: - atomic->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - atomic->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); break; /* Fall through */ @@ -2858,11 +2878,43 @@ vtn_handle_ssbo_atomic(struct vtn_builder *b, SpvOp opcode, case SpvOpAtomicAnd: case SpvOpAtomicOr: case SpvOpAtomicXor: - atomic->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); break; default: - unreachable("Invalid SSBO atomic"); + unreachable("Invalid SPIR-V atomic"); + } +} + +static void +vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *pointer = vtn_value(b, w[3], vtn_value_type_deref); + struct vtn_type *type = pointer->deref_type; + nir_deref *deref = &pointer->deref->deref; + nir_intrinsic_instr *atomic; + + /* + SpvScope scope = w[4]; + SpvMemorySemanticsMask semantics = w[5]; + */ + + if (pointer->deref->var->data.mode == nir_var_shared) { + nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->variables[0] = + nir_deref_as_var(nir_copy_deref(atomic, &pointer->deref->deref)); + fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); + } else { + nir_ssa_def *index = get_vulkan_resource_index(b, &deref, &type); + nir_ssa_def *offset = get_ssbo_atomic_offset(b, deref, type); + nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); + + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->src[0] = nir_src_for_ssa(index); + atomic->src[1] = nir_src_for_ssa(offset); + fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); } nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL); @@ -3605,7 +3657,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_image(b, opcode, w, count); } else { assert(pointer->value_type == vtn_value_type_deref); - vtn_handle_ssbo_atomic(b, opcode, w, count); + vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); } break; } -- cgit v1.2.3 From c5490d027731a1379c099cae6c1fb63ab30cea7c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 21 Jan 2016 10:43:32 -0800 Subject: vk: Fix indirect push constants This currently sets the base and size of all push constants to the entire push constant block. The idea is that we'll use the base and size to eventually optimize the amount we actually push, but for now we don't do that. --- src/glsl/nir/spirv/spirv_to_nir.c | 8 +++++++ src/mesa/drivers/dri/i965/brw_fs.cpp | 7 +++--- src/vulkan/anv_nir_lower_push_constants.c | 39 ++++--------------------------- 3 files changed, 15 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index d0211221e17..74ad6518b9c 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1515,6 +1515,14 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, instr->src[src++] = nir_src_for_ssa((*inout)->def); } + /* We set the base and size for push constant load to the entire push + * constant block for now. + */ + if (op == nir_intrinsic_load_push_constant) { + instr->const_index[0] = 0; + instr->const_index[1] = 128; + } + if (index) instr->src[src++] = nir_src_for_ssa(index); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index fc883f458fa..1ba5075731a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1992,11 +1992,10 @@ fs_visitor::assign_constant_locations() */ const unsigned int max_push_components = 16 * 8; - /* We push small arrays, but no bigger than 16 floats. This is big enough - * for a vec4 but hopefully not large enough to push out other stuff. We - * should probably use a better heuristic at some point. + /* For vulkan we don't limit the max_chunk_size. We set it to 32 float = + * 128 bytes, which is the maximum vulkan push constant size. */ - const unsigned int max_chunk_size = 16; + const unsigned int max_chunk_size = 32; unsigned int num_push_constants = 0; unsigned int num_pull_constants = 0; diff --git a/src/vulkan/anv_nir_lower_push_constants.c b/src/vulkan/anv_nir_lower_push_constants.c index 7fc3953ac33..53cd3d73793 100644 --- a/src/vulkan/anv_nir_lower_push_constants.c +++ b/src/vulkan/anv_nir_lower_push_constants.c @@ -43,45 +43,14 @@ lower_push_constants_block(nir_block *block, void *void_state) if (intrin->intrinsic != nir_intrinsic_load_push_constant) continue; + /* This wont work for vec4 stages. */ + assert(state->is_scalar); + assert(intrin->const_index[0] % 4 == 0); - unsigned dword_offset = intrin->const_index[0] / 4; + assert(intrin->const_index[1] == 128); /* We just turn them into uniform loads with the appropreate offset */ intrin->intrinsic = nir_intrinsic_load_uniform; - intrin->const_index[0] = 0; - if (state->is_scalar) { - intrin->const_index[1] = dword_offset; - } else { - unsigned shift = dword_offset % 4; - /* Can't cross the vec4 boundary */ - assert(shift + intrin->num_components <= 4); - - /* vec4 shifts are in units of vec4's */ - intrin->const_index[1] = dword_offset / 4; - - if (shift) { - /* If there's a non-zero shift then we need to load a whole vec4 - * and use a move to swizzle it into place. - */ - assert(intrin->dest.is_ssa); - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src = nir_src_for_ssa(&intrin->dest.ssa); - for (unsigned i = 0; i < intrin->num_components; i++) - mov->src[0].swizzle[i] = i + shift; - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, NULL); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - nir_instr_insert_after(&intrin->instr, &mov->instr); - - /* Stomp the number of components to 4 */ - intrin->num_components = 4; - intrin->dest.ssa.num_components = 4; - } - } } return true; -- cgit v1.2.3 From 9eab8fc6834bc3b71b0ac477d09252bfac22c605 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 21 Jan 2016 13:52:41 -0800 Subject: vk: Emit surface state base address before renderpass If we're continuing a render pass, make sure we don't emit the depth and stencil buffer addresses before we set the state base addresses. Fixes crucible func.cmd-buffer.small-secondaries --- src/vulkan/anv_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 070b8490e32..4591dd95fec 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -376,6 +376,8 @@ VkResult anv_BeginCommandBuffer( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { cmd_buffer->state.framebuffer = @@ -389,8 +391,6 @@ VkResult anv_BeginCommandBuffer( anv_cmd_buffer_set_subpass(cmd_buffer, subpass); } - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - return VK_SUCCESS; } -- cgit v1.2.3 From ac60e98a582737897b6e1f858023eba0543d10f3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 21 Jan 2016 14:14:01 -0800 Subject: vk: Do render cache flush for GEN8+ This is needed for SKL as well. --- src/vulkan/genX_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 4f7054f83d2..1ab17470932 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -48,7 +48,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) scratch_bo = &device->scratch_block_pool.bo; /* XXX: Do we need this on more than just BDW? */ -#if (ANV_GEN == 8) +#if (ANV_GEN >= 8) /* Emit a render target cache flush. * * This isn't documented anywhere in the PRM. However, it seems to be @@ -56,7 +56,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) * this, we get GPU hangs when using multi-level command buffers which * clear depth, reset state base address, and then go render stuff. */ - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .RenderTargetCacheFlushEnable = true); #endif -- cgit v1.2.3 From f89d5cb80709f3f61a15e1176f9b3351bc437d92 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 21 Jan 2016 14:53:44 -0800 Subject: nir/spirv: Delete stray fmod remnants. Jason left these stray code fragments in 22804de110b97dce1415318fd02c1003e16ef14a. --- src/glsl/nir/spirv/vtn_glsl450.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index eb81620ea11..afcf8e2a7da 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -424,7 +424,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } - op = nir_op_fmod; break; case GLSLstd450FMin: op = nir_op_fmin; break; case GLSLstd450UMin: op = nir_op_umin; break; case GLSLstd450SMin: op = nir_op_imin; break; -- cgit v1.2.3 From 824f7763550bf83ade57a45fcce4c5a6ac0c4a85 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 21 Jan 2016 14:56:34 -0800 Subject: nir/spirv: Implement ModfStruct opcode. --- src/glsl/nir/spirv/vtn_glsl450.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index afcf8e2a7da..a675aa4c7f3 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -424,6 +424,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } + case GLSLstd450ModfStruct: { + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_ffract(nb, src[0]); + val->ssa->elems[1]->def = nir_ffloor(nb, src[0]); + return; + } + case GLSLstd450FMin: op = nir_op_fmin; break; case GLSLstd450UMin: op = nir_op_umin; break; case GLSLstd450SMin: op = nir_op_imin; break; @@ -618,7 +625,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } - case GLSLstd450ModfStruct: case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: case GLSLstd450IMix: -- cgit v1.2.3 From 1112bf633f96b4fac4dfba56ce19d43fccf53200 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 17:11:55 -0800 Subject: nir/spirv: Rework access chains Previously, we were creating nir_deref's immediately. Now, instead, we have an intermediate vtn_access_chain structure. While a little more awkward initially, this will allow us to more easily do structure splitting on-the-fly. --- src/glsl/nir/spirv/spirv_to_nir.c | 747 +++++++++++++++++++++----------------- src/glsl/nir/spirv/vtn_cfg.c | 23 +- src/glsl/nir/spirv/vtn_glsl450.c | 7 +- src/glsl/nir/spirv/vtn_private.h | 35 +- 4 files changed, 447 insertions(+), 365 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 74ad6518b9c..771c8345941 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -162,9 +162,9 @@ vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) case vtn_value_type_ssa: return val->ssa; - case vtn_value_type_deref: + case vtn_value_type_access_chain: /* This is needed for function parameters */ - return vtn_variable_load(b, val->deref, val->deref_type); + return vtn_variable_load(b, val->access_chain); default: unreachable("Invalid type for an SSA value"); @@ -1198,9 +1198,9 @@ static void var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, const struct vtn_decoration *dec, void *void_var) { - assert(val->value_type == vtn_value_type_deref); - assert(val->deref->deref.child == NULL); - assert(val->deref->var == void_var); + assert(val->value_type == vtn_value_type_access_chain); + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); nir_variable *var = void_var; switch (dec->decoration) { @@ -1457,43 +1457,30 @@ _vtn_variable_store(struct vtn_builder *b, } static nir_ssa_def * -deref_array_offset(struct vtn_builder *b, nir_deref *deref) +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, + struct vtn_type **type, unsigned *chain_idx) { - assert(deref->deref_type == nir_deref_type_array); - nir_deref_array *deref_array = nir_deref_as_array(deref); - nir_ssa_def *offset = nir_imm_int(&b->nb, deref_array->base_offset); - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) - offset = nir_iadd(&b->nb, offset, deref_array->indirect.ssa); - - return offset; -} - -static nir_ssa_def * -get_vulkan_resource_index(struct vtn_builder *b, - nir_deref **deref, struct vtn_type **type) -{ - assert((*deref)->deref_type == nir_deref_type_var); - nir_variable *var = nir_deref_as_var(*deref)->var; - - assert(var->interface_type && "variable is a block"); + assert(chain->var->interface_type && "variable is a block"); nir_ssa_def *array_index; - if ((*deref)->child && (*deref)->child->deref_type == nir_deref_type_array) { - *deref = (*deref)->child; - *type = (*type)->array_element; - array_index = deref_array_offset(b, *deref); + if (glsl_type_is_array(chain->var->type)) { + assert(chain->length > 0); + array_index = vtn_ssa_value(b, chain->ids[0])->def; + *chain_idx = 1; + *type = chain->var_type->array_element; } else { array_index = nir_imm_int(&b->nb, 0); + *chain_idx = 0; + *type = chain->var_type; } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_vulkan_resource_index); instr->src[0] = nir_src_for_ssa(array_index); - instr->const_index[0] = var->data.descriptor_set; - instr->const_index[1] = var->data.binding; - instr->const_index[2] = var->data.mode; + instr->const_index[0] = chain->var->data.descriptor_set; + instr->const_index[1] = chain->var->data.binding; + instr->const_index[2] = chain->var->data.mode; nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); nir_builder_instr_insert(&b->nb, &instr->instr); @@ -1501,6 +1488,81 @@ get_vulkan_resource_index(struct vtn_builder *b, return &instr->dest.ssa; } +static bool +variable_is_external_block(nir_variable *var) +{ + return var->interface_type && + glsl_type_is_struct(var->interface_type) && + (var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage); +} + +static nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix) +{ + unsigned idx = 0; + struct vtn_type *type; + *index_out = get_vulkan_resource_index(b, chain, &type, &idx); + + nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + for (; idx < chain->length; idx++) { + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + /* Some users may not want matrix or vector derefs */ + if (stop_at_matrix) { + idx++; + goto end; + } + /* Fall through */ + + case GLSL_TYPE_ARRAY: + offset = nir_iadd(&b->nb, offset, + nir_imul(&b->nb, + vtn_ssa_value(b, chain->ids[idx])->def, + nir_imm_int(&b->nb, type->stride))); + + if (glsl_type_is_vector(type->type)) { + /* This had better be the tail */ + assert(idx == chain->length - 1); + type = rzalloc(b, struct vtn_type); + type->type = glsl_scalar_type(base_type); + } else { + type = type->array_element; + } + break; + + case GLSL_TYPE_STRUCT: { + struct vtn_value *member_val = + vtn_value(b, chain->ids[idx], vtn_value_type_constant); + unsigned member = member_val->constant->value.u[0]; + + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + type = type->members[member]; + break; + } + + default: + unreachable("Invalid type for deref"); + } + } + +end: + *type_out = type; + if (end_idx_out) + *end_idx_out = idx; + + return offset; +} + static void _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_ssa_def *index, nir_ssa_def *offset, @@ -1542,10 +1604,14 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, static void _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, nir_deref *deref, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_access_chain *chain, unsigned chain_idx, struct vtn_type *type, struct vtn_ssa_value **inout) { - if (load && deref == NULL && *inout == NULL) + if (chain_idx >= chain->length) + chain = NULL; + + if (load && chain == NULL && *inout == NULL) *inout = vtn_create_ssa_value(b, type->type); enum glsl_base_type base_type = glsl_get_base_type(type->type); @@ -1558,7 +1624,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, * a vector, a scalar, or a matrix. */ if (glsl_type_is_matrix(type->type)) { - if (deref == NULL) { + if (chain == NULL) { /* Loading the whole matrix */ struct vtn_ssa_value *transpose; unsigned num_ops, vec_width; @@ -1589,25 +1655,24 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (load && type->row_major) *inout = vtn_ssa_transpose(b, *inout); - - return; } else if (type->row_major) { - /* Row-major but with a deref. */ + /* Row-major but with an access chiain. */ nir_ssa_def *col_offset = - nir_imul(&b->nb, deref_array_offset(b, deref), + nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, nir_imm_int(&b->nb, type->array_element->stride)); offset = nir_iadd(&b->nb, offset, col_offset); - if (deref->child) { + if (chain_idx + 1 < chain->length) { /* Picking off a single element */ nir_ssa_def *row_offset = - nir_imul(&b->nb, deref_array_offset(b, deref->child), + nir_imul(&b->nb, + vtn_ssa_value(b, chain->ids[chain_idx + 1])->def, nir_imm_int(&b->nb, type->stride)); offset = nir_iadd(&b->nb, offset, row_offset); _vtn_load_store_tail(b, op, load, index, offset, inout, glsl_scalar_type(base_type)); - return; } else { + /* Picking one element off each column */ unsigned num_comps = glsl_get_vector_elements(type->type); nir_ssa_def *comps[4]; for (unsigned i = 0; i < num_comps; i++) { @@ -1628,61 +1693,55 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (load) (*inout)->def = nir_vec(&b->nb, comps, num_comps); - return; } } else { /* Column-major with a deref. Fall through to array case. */ + nir_ssa_def *col_offset = + nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, + nir_imm_int(&b->nb, type->stride)); + offset = nir_iadd(&b->nb, offset, col_offset); + + _vtn_block_load_store(b, op, load, index, offset, + chain, chain_idx + 1, + type->array_element, inout); } - } else if (deref == NULL) { + } else if (chain == NULL) { + /* Single whole vector */ assert(glsl_type_is_vector_or_scalar(type->type)); _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); - return; } else { /* Single component of a vector. Fall through to array case. */ - } - /* Fall through */ - - case GLSL_TYPE_ARRAY: - if (deref) { - offset = nir_iadd(&b->nb, offset, - nir_imul(&b->nb, deref_array_offset(b, deref), - nir_imm_int(&b->nb, type->stride))); + nir_ssa_def *elem_offset = + nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, + nir_imm_int(&b->nb, type->stride)); + offset = nir_iadd(&b->nb, offset, elem_offset); - _vtn_block_load_store(b, op, load, index, offset, deref->child, + _vtn_block_load_store(b, op, load, index, offset, NULL, 0, type->array_element, inout); - return; - } else { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, - type->array_element, &(*inout)->elems[i]); - } - return; } - unreachable("Both branches above return"); + return; - case GLSL_TYPE_STRUCT: - if (deref) { - unsigned member = nir_deref_as_struct(deref)->index; - offset = nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, type->offsets[member])); + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->array_element, &(*inout)->elems[i]); + } + return; + } - _vtn_block_load_store(b, op, load, index, offset, deref->child, - type->members[member], inout); - return; - } else { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, - type->members[i], &(*inout)->elems[i]); - } - return; + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->members[i], &(*inout)->elems[i]); } - unreachable("Both branches above return"); + return; + } default: unreachable("Invalid block member type"); @@ -1690,8 +1749,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, } static struct vtn_ssa_value * -vtn_block_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *type) +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) { nir_intrinsic_op op; if (src->var->data.mode == nir_var_uniform) { @@ -1712,17 +1770,33 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, op = nir_intrinsic_load_ssbo; } - nir_deref *block_deref = &src->deref; - nir_ssa_def *index = NULL; - if (op == nir_intrinsic_load_ubo || op == nir_intrinsic_load_ssbo) - index = get_vulkan_resource_index(b, &block_deref, &type); + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); + + if (op == nir_intrinsic_load_push_constant) + index = NULL; struct vtn_ssa_value *value = NULL; - _vtn_block_load_store(b, op, true, index, nir_imm_int(&b->nb, 0), - block_deref->child, type, &value); + _vtn_block_load_store(b, op, true, index, offset, + src, chain_idx, type, &value); return value; } +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dst) +{ + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); + + _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, + dst, chain_idx, type, &src); +} + /* * Gets the NIR-level deref tail, which may have as a child an array deref * selecting which component due to OpAccessChain supporting per-component @@ -1746,22 +1820,13 @@ static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *index); -static bool -variable_is_external_block(nir_variable *var) -{ - return var->interface_type && - glsl_type_is_struct(var->interface_type) && - (var->data.mode == nir_var_uniform || - var->data.mode == nir_var_shader_storage); -} +static nir_deref_var * +vtn_access_chain_to_deref(struct vtn_builder *b, + struct vtn_access_chain *chain); struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *src_type) +vtn_local_load(struct vtn_builder *b, nir_deref_var *src) { - if (variable_is_external_block(src->var)) - return vtn_block_load(b, src, src_type); - nir_deref *src_tail = get_deref_tail(src); struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); @@ -1779,18 +1844,6 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, return val; } -static void -vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest, struct vtn_type *type) -{ - nir_deref *block_deref = &dest->deref; - nir_ssa_def *index = get_vulkan_resource_index(b, &block_deref, &type); - - _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, - nir_imm_int(&b->nb, 0), block_deref->child, - type, &src); -} - static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, unsigned index); @@ -1799,45 +1852,67 @@ static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, nir_ssa_def *index); +void +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } +} + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + if (variable_is_external_block(src->var)) + return vtn_block_load(b, src); + else + return vtn_local_load(b, vtn_access_chain_to_deref(b, src)); +} + void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest, struct vtn_type *dest_type) + struct vtn_access_chain *dest) { if (variable_is_external_block(dest->var)) { assert(dest->var->data.mode == nir_var_shader_storage); - vtn_block_store(b, src, dest, dest_type); + vtn_block_store(b, src, dest); } else { - nir_deref *dest_tail = get_deref_tail(dest); - if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); - nir_deref_array *deref = nir_deref_as_array(dest_tail->child); - assert(deref->deref.child == NULL); - if (deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_insert(b, val->def, src->def, - deref->base_offset); - else - val->def = vtn_vector_insert_dynamic(b, val->def, src->def, - deref->indirect.ssa); - _vtn_variable_store(b, dest, dest_tail, val); - } else { - _vtn_variable_store(b, dest, dest_tail, src); - } + vtn_local_store(b, src, vtn_access_chain_to_deref(b, dest)); } } static void -vtn_variable_copy(struct vtn_builder *b, - nir_deref_var *dest, struct vtn_type *dest_type, - nir_deref_var *src, struct vtn_type *src_type) +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src) { if (src->var->interface_type || dest->var->interface_type) { - struct vtn_ssa_value *val = vtn_variable_load(b, src, src_type); - vtn_variable_store(b, val, dest, dest_type); + struct vtn_ssa_value *val = vtn_variable_load(b, src); + vtn_variable_store(b, val, dest); } else { + /* TODO: Handle single components of vectors */ + nir_deref_var *src_deref = vtn_access_chain_to_deref(b, src); + nir_deref_var *dest_deref = vtn_access_chain_to_deref(b, dest); + nir_intrinsic_instr *copy = nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + copy->variables[0] = + nir_deref_as_var(nir_copy_deref(copy, &dest_deref->deref)); + copy->variables[1] = + nir_deref_as_var(nir_copy_deref(copy, &src_deref->deref)); nir_builder_instr_insert(&b->nb, ©->instr); } @@ -1907,7 +1982,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpVariable: { struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); SpvStorageClass storage_class = w[3]; nir_variable *var = rzalloc(b->shader, nir_variable); @@ -2005,8 +2081,10 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->constant_initializer = nir_constant_clone(constant, var); } - val->deref = nir_deref_var_create(b, var); - val->deref_type = type; + val->access_chain = ralloc(b, struct vtn_access_chain); + val->access_chain->var = var; + val->access_chain->var_type = type; + val->access_chain->length = 0; /* We handle decorations first because decorations might give us * location information. We use the data.explicit_location field to @@ -2058,7 +2136,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpAccessChain: case SpvOpInBoundsAccessChain: { - nir_deref_var *base; + struct vtn_access_chain *base, *chain; struct vtn_value *base_val = vtn_untyped_value(b, w[3]); if (base_val->value_type == vtn_value_type_sampled_image) { /* This is rather insane. SPIR-V allows you to use OpSampledImage @@ -2070,178 +2148,81 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, */ base = base_val->sampled_image->image; } else { - assert(base_val->value_type == vtn_value_type_deref); - base = base_val->deref; + assert(base_val->value_type == vtn_value_type_access_chain); + base = base_val->access_chain; } - nir_deref_var *deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); - struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + uint32_t new_len = base->length + count - 4; + chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->ids[0])); - nir_deref *tail = &deref->deref; - while (tail->child) - tail = tail->child; - - for (unsigned i = 0; i < count - 4; i++) { - assert(w[i + 4] < b->value_id_bound); - struct vtn_value *idx_val = &b->values[w[i + 4]]; - - enum glsl_base_type base_type = glsl_get_base_type(tail->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_ARRAY: { - nir_deref_array *deref_arr = nir_deref_array_create(b); - if (base_type == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(tail->type)) { - deref_type = deref_type->array_element; - } else { - assert(glsl_type_is_vector(tail->type)); - deref_type = ralloc(b, struct vtn_type); - deref_type->type = glsl_scalar_type(base_type); - } + *chain = *base; - deref_arr->deref.type = deref_type->type; + chain->length = new_len; + unsigned idx = 0; + for (int i = 0; i < base->length; i++) + chain->ids[idx++] = base->ids[i]; - if (idx_val->value_type == vtn_value_type_constant) { - unsigned idx = idx_val->constant->value.u[0]; - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->base_offset = idx; - } else { - assert(idx_val->value_type == vtn_value_type_ssa); - assert(glsl_type_is_scalar(idx_val->ssa->type)); - deref_arr->deref_array_type = nir_deref_array_type_indirect; - deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); - } - tail->child = &deref_arr->deref; - break; - } - - case GLSL_TYPE_STRUCT: { - assert(idx_val->value_type == vtn_value_type_constant); - unsigned idx = idx_val->constant->value.u[0]; - deref_type = deref_type->members[idx]; - nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = deref_type->type; - tail->child = &deref_struct->deref; - break; - } - default: - unreachable("Invalid type for deref"); - } - - if (deref_type->is_builtin) { - /* If we encounter a builtin, we throw away the ress of the - * access chain, jump to the builtin, and keep building. - */ - const struct glsl_type *builtin_type = deref_type->type; - - nir_deref_array *per_vertex_deref = NULL; - if (glsl_type_is_array(base->var->type)) { - /* This builtin is a per-vertex builtin */ - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - assert(base->var->data.mode == nir_var_shader_in); - builtin_type = glsl_array_type(builtin_type, - b->shader->info.gs.vertices_in); - - /* The first non-var deref should be an array deref. */ - assert(deref->deref.child->deref_type == - nir_deref_type_array); - per_vertex_deref = nir_deref_as_array(deref->deref.child); - } - - nir_variable *builtin = get_builtin_variable(b, - base->var->data.mode, - builtin_type, - deref_type->builtin); - deref = nir_deref_var_create(b, builtin); - - if (per_vertex_deref) { - /* Since deref chains start at the variable, we can just - * steal that link and use it. - */ - deref->deref.child = &per_vertex_deref->deref; - per_vertex_deref->deref.child = NULL; - per_vertex_deref->deref.type = - glsl_get_array_element(builtin_type); - - tail = &per_vertex_deref->deref; - } else { - tail = &deref->deref; - } - } else { - tail = tail->child; - } - } - - /* For uniform blocks, we don't resolve the access chain until we - * actually access the variable, so we need to keep around the original - * type of the variable. - */ - if (variable_is_external_block(base->var)) - deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + for (int i = 4; i < count; i++) + chain->ids[idx++] = w[i]; if (base_val->value_type == vtn_value_type_sampled_image) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_sampled_image); val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = deref; + val->sampled_image->image = chain; val->sampled_image->sampler = base_val->sampled_image->sampler; } else { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); - val->deref = deref; - val->deref_type = deref_type; + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = chain; } - break; } case SpvOpCopyMemory: { - struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_deref); - struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_deref); + struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); + struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); - vtn_variable_copy(b, dest->deref, dest->deref_type, - src->deref, src->deref_type); + vtn_variable_copy(b, dest->access_chain, src->access_chain); break; } case SpvOpLoad: { - nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; - struct vtn_type *src_type = - vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + struct vtn_access_chain *src = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; if (src->var->interface_type && (glsl_type_is_sampler(src->var->interface_type) || glsl_type_is_image(src->var->interface_type))) { - vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; return; } struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_variable_load(b, src, src_type); + val->ssa = vtn_variable_load(b, src); break; } case SpvOpStore: { - nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - struct vtn_type *dest_type = - vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_access_chain *dest = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); - vtn_variable_store(b, src, dest, dest_type); + vtn_variable_store(b, src, dest); break; } case SpvOpArrayLength: { - struct vtn_value *v_deref = vtn_value(b, w[3], vtn_value_type_deref); - struct vtn_type *type = v_deref->deref_type; - const uint32_t offset = type->offsets[w[4]]; - const uint32_t stride = type->members[w[4]]->stride; - nir_deref *n_deref = &v_deref->deref->deref; + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + const uint32_t offset = chain->var_type->offsets[w[4]]; + const uint32_t stride = chain->var_type->members[w[4]]->stride; + + unsigned chain_idx; + struct vtn_type *type; nir_ssa_def *index = - get_vulkan_resource_index(b, &n_deref, &type); + get_vulkan_resource_index(b, chain, &type, &chain_idx); + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_get_buffer_size); @@ -2272,6 +2253,118 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } } +static nir_deref_var * +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) +{ + nir_deref_var *deref_var = nir_deref_var_create(b, chain->var); + nir_deref *tail = &deref_var->deref; + struct vtn_type *deref_type = chain->var_type; + + for (unsigned i = 0; i < chain->length; i++) { + struct vtn_value *idx_val = vtn_untyped_value(b, chain->ids[i]); + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; + } else { + assert(glsl_type_is_vector(tail->type)); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); + } + + deref_arr->deref.type = deref_type->type; + + if (idx_val->value_type == vtn_value_type_constant) { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx_val->constant->value.u[0]; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + assert(glsl_type_is_scalar(idx_val->ssa->type)); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + + if (deref_type->is_builtin) { + /* If we encounter a builtin, we throw away the ress of the + * access chain, jump to the builtin, and keep building. + */ + const struct glsl_type *builtin_type = deref_type->type; + + nir_deref_array *per_vertex_deref = NULL; + if (glsl_type_is_array(chain->var->type)) { + /* This builtin is a per-vertex builtin */ + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + assert(chain->var->data.mode == nir_var_shader_in); + builtin_type = glsl_array_type(builtin_type, + b->shader->info.gs.vertices_in); + + /* The first non-var deref should be an array deref. */ + assert(deref_var->deref.child->deref_type == + nir_deref_type_array); + per_vertex_deref = nir_deref_as_array(deref_var->deref.child); + } + + nir_variable *builtin = get_builtin_variable(b, + chain->var->data.mode, + builtin_type, + deref_type->builtin); + deref_var = nir_deref_var_create(b, builtin); + + if (per_vertex_deref) { + /* Since deref chains start at the variable, we can just + * steal that link and use it. + */ + deref_var->deref.child = &per_vertex_deref->deref; + per_vertex_deref->deref.child = NULL; + per_vertex_deref->deref.type = + glsl_get_array_element(builtin_type); + + tail = &per_vertex_deref->deref; + } else { + tail = &deref_var->deref; + } + } else { + tail = tail->child; + } + } + + return deref_var; +} + +nir_deref_var * +vtn_nir_deref(struct vtn_builder *b, uint32_t id) +{ + struct vtn_access_chain *chain = + vtn_value(b, id, vtn_value_type_access_chain)->access_chain; + + return vtn_access_chain_to_deref(b, chain); +} + static void vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -2283,9 +2376,9 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, for (unsigned i = 0; i < call->num_params; i++) { unsigned arg_id = w[4 + i]; struct vtn_value *arg = vtn_untyped_value(b, arg_id); - if (arg->value_type == vtn_value_type_deref) { - call->params[i] = - nir_deref_as_var(nir_copy_deref(call, &arg->deref->deref)); + if (arg->value_type == vtn_value_type_access_chain) { + nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); + call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); } else { struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); @@ -2294,7 +2387,7 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); call->params[i] = nir_deref_var_create(call, tmp); - vtn_variable_store(b, arg_ssa, call->params[i], arg->type); + vtn_local_store(b, arg_ssa, call->params[i]); } } @@ -2310,9 +2403,8 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, if (glsl_type_is_void(callee->return_type)) { vtn_push_value(b, w[2], vtn_value_type_undef); } else { - struct vtn_type *rettype = vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); - retval->ssa = vtn_variable_load(b, call->return_deref, rettype); + retval->ssa = vtn_local_load(b, call->return_deref); } } @@ -2371,9 +2463,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, vtn_push_value(b, w[2], vtn_value_type_sampled_image); val->sampled_image = ralloc(b, struct vtn_sampled_image); val->sampled_image->image = - vtn_value(b, w[3], vtn_value_type_deref)->deref; + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; val->sampled_image->sampler = - vtn_value(b, w[4], vtn_value_type_deref)->deref; + vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; return; } @@ -2385,9 +2477,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, if (sampled_val->value_type == vtn_value_type_sampled_image) { sampled = *sampled_val->sampled_image; } else { - assert(sampled_val->value_type == vtn_value_type_deref); + assert(sampled_val->value_type == vtn_value_type_access_chain); sampled.image = NULL; - sampled.sampler = sampled_val->deref; + sampled.sampler = sampled_val->access_chain; } nir_tex_src srcs[8]; /* 8 should be enough */ @@ -2524,9 +2616,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const struct glsl_type *image_type; if (sampled.image) { - image_type = nir_deref_tail(&sampled.image->deref)->type; + image_type = sampled.image->var->interface_type; } else { - image_type = nir_deref_tail(&sampled.sampler->deref)->type; + image_type = sampled.sampler->var->interface_type; } instr->sampler_dim = glsl_get_sampler_dim(image_type); @@ -2568,11 +2660,11 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, unreachable("Invalid base type for sampler result"); } - instr->sampler = - nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); + nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); if (sampled.image) { - instr->texture = - nir_deref_as_var(nir_copy_deref(instr, &sampled.image->deref)); + nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); } else { instr->texture = NULL; } @@ -2613,7 +2705,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, vtn_push_value(b, w[2], vtn_value_type_image_pointer); val->image = ralloc(b, struct vtn_image_pointer); - val->image->deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; val->image->coord = get_image_coord(b, w[4]); val->image->sample = vtn_ssa_value(b, w[5])->def; return; @@ -2640,13 +2733,15 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, break; case SpvOpImageQuerySize: - image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; image.coord = NULL; image.sample = NULL; break; case SpvOpImageRead: - image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; image.coord = get_image_coord(b, w[4]); if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { @@ -2658,7 +2753,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, break; case SpvOpImageWrite: - image.deref = vtn_value(b, w[1], vtn_value_type_deref)->deref; + image.image = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; image.coord = get_image_coord(b, w[2]); /* texel = w[3] */ @@ -2700,8 +2796,10 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, } nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + + nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); intrin->variables[0] = - nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); /* ImageQuerySize doesn't take any extra parameters */ if (opcode != SpvOpImageQuerySize) { @@ -2822,37 +2920,6 @@ get_shared_nir_atomic_op(SpvOp opcode) } } -static nir_ssa_def * -get_ssbo_atomic_offset(struct vtn_builder *b, nir_deref *deref, struct vtn_type *type) -{ - nir_ssa_def *offset = nir_imm_int(&b->nb, 0); - - while (deref->child) { - deref = deref->child; - switch (deref->deref_type) { - case nir_deref_type_array: - offset = nir_iadd(&b->nb, offset, - nir_imul(&b->nb, deref_array_offset(b, deref), - nir_imm_int(&b->nb, type->stride))); - type = type->array_element; - continue; - - case nir_deref_type_struct: { - unsigned member = nir_deref_as_struct(deref)->index; - offset = nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, type->offsets[member])); - type = type->members[member]; - continue; - } - - default: - unreachable("Invalid deref type"); - } - } - - return offset; -} - static void fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, nir_src *src) @@ -2898,9 +2965,8 @@ static void vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - struct vtn_value *pointer = vtn_value(b, w[3], vtn_value_type_deref); - struct vtn_type *type = pointer->deref_type; - nir_deref *deref = &pointer->deref->deref; + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; nir_intrinsic_instr *atomic; /* @@ -2908,15 +2974,17 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, SpvMemorySemanticsMask semantics = w[5]; */ - if (pointer->deref->var->data.mode == nir_var_shared) { + if (chain->var->data.mode == nir_var_shared) { + nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); atomic = nir_intrinsic_instr_create(b->nb.shader, op); - atomic->variables[0] = - nir_deref_as_var(nir_copy_deref(atomic, &pointer->deref->deref)); + atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); } else { - nir_ssa_def *index = get_vulkan_resource_index(b, &deref, &type); - nir_ssa_def *offset = get_ssbo_atomic_offset(b, deref, type); + struct vtn_type *type; + nir_ssa_def *offset, *index; + offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); + nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); atomic = nir_intrinsic_instr_create(b->nb.shader, op); @@ -2927,6 +2995,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->ssa = rzalloc(b, struct vtn_ssa_value); val->ssa->def = &atomic->dest.ssa; @@ -3636,9 +3705,9 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpImageQuerySize: { - nir_deref_var *image = vtn_value(b, w[3], vtn_value_type_deref)->deref; - const struct glsl_type *image_type = nir_deref_tail(&image->deref)->type; - if (glsl_type_is_image(image_type)) { + struct vtn_access_chain *image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + if (glsl_type_is_image(image->var->interface_type)) { vtn_handle_image(b, opcode, w, count); } else { vtn_handle_texture(b, opcode, w, count); @@ -3664,7 +3733,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, if (pointer->value_type == vtn_value_type_image_pointer) { vtn_handle_image(b, opcode, w, count); } else { - assert(pointer->value_type == vtn_value_type_deref); + assert(pointer->value_type == vtn_value_type_access_chain); vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); } break; diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index 0d3702c37b2..e08a2d8bc81 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -88,7 +88,8 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpFunctionParameter: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); assert(b->func_param_idx < b->func->impl->num_params); unsigned idx = b->func_param_idx++; @@ -97,10 +98,13 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, nir_local_variable_create(b->func->impl, b->func->impl->function->params[idx].type, val->name); - b->func->impl->params[idx] = param; - val->deref = nir_deref_var_create(b, param); - val->deref_type = vtn_value(b, w[1], vtn_value_type_type)->type; + + val->access_chain = ralloc(b, struct vtn_access_chain); + val->access_chain->var = param; + val->access_chain->length = 0; + val->access_chain->var_type = + vtn_value(b, w[1], vtn_value_type_type)->type; break; } @@ -480,7 +484,7 @@ vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, nir_local_variable_create(b->nb.impl, type->type, "phi"); _mesa_hash_table_insert(b->phi_table, w, phi_var); - val->ssa = vtn_variable_load(b, nir_deref_var_create(b, phi_var), type); + val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); return true; } @@ -496,8 +500,6 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, assert(phi_entry); nir_variable *phi_var = phi_entry->data; - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - for (unsigned i = 3; i < count; i += 2) { struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); struct vtn_block *pred = @@ -505,7 +507,7 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, b->nb.cursor = nir_after_block_before_jump(pred->end_block); - vtn_variable_store(b, src, nir_deref_var_create(b, phi_var), type); + vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); } return true; @@ -565,9 +567,8 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); - vtn_variable_store(b, src, - nir_deref_var_create(b, b->impl->return_var), - NULL); + vtn_local_store(b, src, + nir_deref_var_create(b, b->impl->return_var)); } if (block->branch_type != vtn_branch_type_none) { diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index a675aa4c7f3..6470dc11b4d 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -419,8 +419,8 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Modf: { val->ssa->def = nir_ffract(nb, src[0]); - nir_deref_var *out = vtn_value(b, w[6], vtn_value_type_deref)->deref; - nir_store_deref_var(nb, out, nir_ffloor(nb, src[0]), 0xf); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), + nir_ffloor(nb, src[0]), 0xf); return; } @@ -613,8 +613,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450Frexp: { nir_ssa_def *exponent; val->ssa->def = build_frexp(nb, src[0], &exponent); - nir_deref_var *out = vtn_value(b, w[6], vtn_value_type_deref)->deref; - nir_store_deref_var(nb, out, exponent, 0xf); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); return; } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 9a066d6cdba..d15dfa846f3 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -41,7 +41,7 @@ enum vtn_value_type { vtn_value_type_decoration_group, vtn_value_type_type, vtn_value_type_constant, - vtn_value_type_deref, + vtn_value_type_access_chain, vtn_value_type_function, vtn_value_type_block, vtn_value_type_ssa, @@ -234,15 +234,25 @@ struct vtn_type { SpvBuiltIn builtin; }; +struct vtn_access_chain { + nir_variable *var; + struct vtn_type *var_type; + + uint32_t length; + + /* Struct elements and array offsets */ + uint32_t ids[0]; +}; + struct vtn_image_pointer { - nir_deref_var *deref; + struct vtn_access_chain *image; nir_ssa_def *coord; nir_ssa_def *sample; }; struct vtn_sampled_image { - nir_deref_var *image; /* Image or array of images */ - nir_deref_var *sampler; /* Sampler */ + struct vtn_access_chain *image; /* Image or array of images */ + struct vtn_access_chain *sampler; /* Sampler */ }; struct vtn_value { @@ -257,10 +267,7 @@ struct vtn_value { nir_constant *constant; const struct glsl_type *const_type; }; - struct { - nir_deref_var *deref; - struct vtn_type *deref_type; - }; + struct vtn_access_chain *access_chain; struct vtn_image_pointer *image; struct vtn_sampled_image *sampled_image; struct vtn_function *func; @@ -383,12 +390,18 @@ struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src); +nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); + +struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); + +void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest); + struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, - struct vtn_type *src_type); +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest, struct vtn_type *dest_type); + struct vtn_access_chain *dest); typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, -- cgit v1.2.3 From 2892693d56c5409721d1636627257e091fd196ec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 20 Jan 2016 17:40:49 -0800 Subject: nir/spirv: Split variable handling out into its own file It's 1300 lines all by itself and it will only grow. --- src/glsl/Makefile.sources | 3 +- src/glsl/nir/spirv/spirv_to_nir.c | 1306 +---------------------------------- src/glsl/nir/spirv/vtn_private.h | 20 + src/glsl/nir/spirv/vtn_variables.c | 1311 ++++++++++++++++++++++++++++++++++++ 4 files changed, 1337 insertions(+), 1303 deletions(-) create mode 100644 src/glsl/nir/spirv/vtn_variables.c (limited to 'src') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 777abf1bd49..348b4880875 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -99,7 +99,8 @@ SPIRV_FILES = \ nir/spirv/vtn_alu.c \ nir/spirv/vtn_cfg.c \ nir/spirv/vtn_glsl450.c \ - nir/spirv/vtn_private.h + nir/spirv/vtn_private.h \ + nir/spirv/vtn_variables.c # libglsl diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 771c8345941..6ce2e0c16a2 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1068,1303 +1068,6 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } } -static void -set_mode_system_value(nir_variable_mode *mode) -{ - assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); - *mode = nir_var_system_value; -} - -static void -vtn_get_builtin_location(struct vtn_builder *b, - SpvBuiltIn builtin, int *location, - nir_variable_mode *mode) -{ - switch (builtin) { - case SpvBuiltInPosition: - *location = VARYING_SLOT_POS; - break; - case SpvBuiltInPointSize: - *location = VARYING_SLOT_PSIZ; - break; - case SpvBuiltInClipDistance: - *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ - break; - case SpvBuiltInCullDistance: - /* XXX figure this out */ - unreachable("unhandled builtin"); - case SpvBuiltInVertexIndex: - *location = SYSTEM_VALUE_VERTEX_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInVertexId: - /* Vulkan defines VertexID to be zero-based and reserves the new - * builtin keyword VertexIndex to indicate the non-zero-based value. - */ - *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - set_mode_system_value(mode); - break; - case SpvBuiltInInstanceIndex: - /* XXX */ - case SpvBuiltInInstanceId: - *location = SYSTEM_VALUE_INSTANCE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInPrimitiveId: - *location = VARYING_SLOT_PRIMITIVE_ID; - *mode = nir_var_shader_out; - break; - case SpvBuiltInInvocationId: - *location = SYSTEM_VALUE_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLayer: - *location = VARYING_SLOT_LAYER; - *mode = nir_var_shader_out; - break; - case SpvBuiltInViewportIndex: - *location = VARYING_SLOT_VIEWPORT; - if (b->shader->stage == MESA_SHADER_GEOMETRY) - *mode = nir_var_shader_out; - else if (b->shader->stage == MESA_SHADER_FRAGMENT) - *mode = nir_var_shader_in; - else - unreachable("invalid stage for SpvBuiltInViewportIndex"); - break; - case SpvBuiltInTessLevelOuter: - case SpvBuiltInTessLevelInner: - case SpvBuiltInTessCoord: - case SpvBuiltInPatchVertices: - unreachable("no tessellation support"); - case SpvBuiltInFragCoord: - *location = VARYING_SLOT_POS; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInPointCoord: - *location = VARYING_SLOT_PNTC; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInFrontFacing: - *location = VARYING_SLOT_FACE; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInSampleId: - *location = SYSTEM_VALUE_SAMPLE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInSamplePosition: - *location = SYSTEM_VALUE_SAMPLE_POS; - set_mode_system_value(mode); - break; - case SpvBuiltInSampleMask: - *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ - set_mode_system_value(mode); - break; - case SpvBuiltInFragDepth: - *location = FRAG_RESULT_DEPTH; - assert(*mode == nir_var_shader_out); - break; - case SpvBuiltInNumWorkgroups: - *location = SYSTEM_VALUE_NUM_WORK_GROUPS; - set_mode_system_value(mode); - break; - case SpvBuiltInWorkgroupSize: - /* This should already be handled */ - unreachable("unsupported builtin"); - break; - case SpvBuiltInWorkgroupId: - *location = SYSTEM_VALUE_WORK_GROUP_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationId: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationIndex: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; - set_mode_system_value(mode); - break; - case SpvBuiltInGlobalInvocationId: - *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInHelperInvocation: - default: - unreachable("unsupported builtin"); - } -} - -static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_var) -{ - assert(val->value_type == vtn_value_type_access_chain); - assert(val->access_chain->length == 0); - assert(val->access_chain->var == void_var); - - nir_variable *var = void_var; - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - var->data.interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - var->data.centroid = true; - break; - case SpvDecorationSample: - var->data.sample = true; - break; - case SpvDecorationInvariant: - var->data.invariant = true; - break; - case SpvDecorationConstant: - assert(var->constant_initializer != NULL); - var->data.read_only = true; - break; - case SpvDecorationNonWritable: - var->data.read_only = true; - break; - case SpvDecorationLocation: - var->data.location = dec->literals[0]; - break; - case SpvDecorationComponent: - var->data.location_frac = dec->literals[0]; - break; - case SpvDecorationIndex: - var->data.explicit_index = true; - var->data.index = dec->literals[0]; - break; - case SpvDecorationBinding: - var->data.explicit_binding = true; - var->data.binding = dec->literals[0]; - break; - case SpvDecorationDescriptorSet: - var->data.descriptor_set = dec->literals[0]; - break; - case SpvDecorationBuiltIn: { - SpvBuiltIn builtin = dec->literals[0]; - - if (builtin == SpvBuiltInWorkgroupSize) { - /* This shouldn't be a builtin. It's actually a constant. */ - var->data.mode = nir_var_global; - var->data.read_only = true; - - nir_constant *val = rzalloc(var, nir_constant); - val->value.u[0] = b->shader->info.cs.local_size[0]; - val->value.u[1] = b->shader->info.cs.local_size[1]; - val->value.u[2] = b->shader->info.cs.local_size[2]; - var->constant_initializer = val; - break; - } - - nir_variable_mode mode = var->data.mode; - vtn_get_builtin_location(b, builtin, &var->data.location, &mode); - var->data.explicit_location = true; - var->data.mode = mode; - if (mode == nir_var_shader_in || mode == nir_var_system_value) - var->data.read_only = true; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - var->data.origin_upper_left = b->origin_upper_left; - - if (mode == nir_var_shader_out) - b->builtins[dec->literals[0]].out = var; - else - b->builtins[dec->literals[0]].in = var; - break; - } - case SpvDecorationRowMajor: - case SpvDecorationColMajor: - case SpvDecorationGLSLShared: - case SpvDecorationPatch: - case SpvDecorationRestrict: - case SpvDecorationAliased: - case SpvDecorationVolatile: - case SpvDecorationCoherent: - case SpvDecorationNonReadable: - case SpvDecorationUniform: - /* This is really nice but we have no use for it right now. */ - case SpvDecorationCPacked: - case SpvDecorationSaturatedConversion: - case SpvDecorationStream: - case SpvDecorationOffset: - case SpvDecorationXfbBuffer: - case SpvDecorationFuncParamAttr: - case SpvDecorationFPRoundingMode: - case SpvDecorationFPFastMathMode: - case SpvDecorationLinkageAttributes: - case SpvDecorationSpecId: - break; - default: - unreachable("Unhandled variable decoration"); - } -} - -static nir_variable * -get_builtin_variable(struct vtn_builder *b, - nir_variable_mode mode, - const struct glsl_type *type, - SpvBuiltIn builtin) -{ - nir_variable *var; - if (mode == nir_var_shader_out) - var = b->builtins[builtin].out; - else - var = b->builtins[builtin].in; - - if (!var) { - int location; - vtn_get_builtin_location(b, builtin, &location, &mode); - - var = nir_variable_create(b->shader, mode, type, "builtin"); - - var->data.location = location; - var->data.explicit_location = true; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - var->data.origin_upper_left = b->origin_upper_left; - - if (mode == nir_var_shader_out) - b->builtins[builtin].out = var; - else - b->builtins[builtin].in = var; - } - - return var; -} - -static struct vtn_ssa_value * -_vtn_variable_load(struct vtn_builder *b, - nir_deref_var *src_deref, nir_deref *src_deref_tail) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = src_deref_tail->type; - - /* The deref tail may contain a deref to select a component of a vector (in - * other words, it might not be an actual tail) so we have to save it away - * here since we overwrite it later. - */ - nir_deref *old_child = src_deref_tail->child; - - if (glsl_type_is_vector_or_scalar(val->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - src_deref_tail->child = NULL; - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = - nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); - load->num_components = glsl_get_vector_elements(val->type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); - - nir_builder_instr_insert(&b->nb, &load->instr); - - if (src_deref->var->data.mode == nir_var_uniform && - glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->def = &load->dest.ssa; - } - } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(val->type)) { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - - nir_deref_array *deref = nir_deref_array_create(b); - deref->deref_array_type = nir_deref_array_type_direct; - deref->deref.type = glsl_get_array_element(val->type); - src_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->base_offset = i; - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); - } - } else { - assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - - nir_deref_struct *deref = nir_deref_struct_create(b, 0); - src_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->index = i; - deref->deref.type = glsl_get_struct_field(val->type, i); - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); - } - } - - src_deref_tail->child = old_child; - - return val; -} - -static void -_vtn_variable_store(struct vtn_builder *b, - nir_deref_var *dest_deref, nir_deref *dest_deref_tail, - struct vtn_ssa_value *src) -{ - nir_deref *old_child = dest_deref_tail->child; - - if (glsl_type_is_vector_or_scalar(src->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - dest_deref_tail->child = NULL; - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->variables[0] = - nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); - store->num_components = glsl_get_vector_elements(src->type); - store->const_index[0] = (1 << store->num_components) - 1; - store->src[0] = nir_src_for_ssa(src->def); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(src->type)) { - unsigned elems = glsl_get_length(src->type); - - nir_deref_array *deref = nir_deref_array_create(b); - deref->deref_array_type = nir_deref_array_type_direct; - deref->deref.type = glsl_get_array_element(src->type); - dest_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->base_offset = i; - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); - } - } else { - assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(src->type); - - nir_deref_struct *deref = nir_deref_struct_create(b, 0); - dest_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->index = i; - deref->deref.type = glsl_get_struct_field(src->type, i); - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); - } - } - - dest_deref_tail->child = old_child; -} - -static nir_ssa_def * -get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, - struct vtn_type **type, unsigned *chain_idx) -{ - assert(chain->var->interface_type && "variable is a block"); - - nir_ssa_def *array_index; - if (glsl_type_is_array(chain->var->type)) { - assert(chain->length > 0); - array_index = vtn_ssa_value(b, chain->ids[0])->def; - *chain_idx = 1; - *type = chain->var_type->array_element; - } else { - array_index = nir_imm_int(&b->nb, 0); - *chain_idx = 0; - *type = chain->var_type; - } - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->nb.shader, - nir_intrinsic_vulkan_resource_index); - instr->src[0] = nir_src_for_ssa(array_index); - instr->const_index[0] = chain->var->data.descriptor_set; - instr->const_index[1] = chain->var->data.binding; - instr->const_index[2] = chain->var->data.mode; - - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); - nir_builder_instr_insert(&b->nb, &instr->instr); - - return &instr->dest.ssa; -} - -static bool -variable_is_external_block(nir_variable *var) -{ - return var->interface_type && - glsl_type_is_struct(var->interface_type) && - (var->data.mode == nir_var_uniform || - var->data.mode == nir_var_shader_storage); -} - -static nir_ssa_def * -vtn_access_chain_to_offset(struct vtn_builder *b, - struct vtn_access_chain *chain, - nir_ssa_def **index_out, struct vtn_type **type_out, - unsigned *end_idx_out, bool stop_at_matrix) -{ - unsigned idx = 0; - struct vtn_type *type; - *index_out = get_vulkan_resource_index(b, chain, &type, &idx); - - nir_ssa_def *offset = nir_imm_int(&b->nb, 0); - for (; idx < chain->length; idx++) { - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - /* Some users may not want matrix or vector derefs */ - if (stop_at_matrix) { - idx++; - goto end; - } - /* Fall through */ - - case GLSL_TYPE_ARRAY: - offset = nir_iadd(&b->nb, offset, - nir_imul(&b->nb, - vtn_ssa_value(b, chain->ids[idx])->def, - nir_imm_int(&b->nb, type->stride))); - - if (glsl_type_is_vector(type->type)) { - /* This had better be the tail */ - assert(idx == chain->length - 1); - type = rzalloc(b, struct vtn_type); - type->type = glsl_scalar_type(base_type); - } else { - type = type->array_element; - } - break; - - case GLSL_TYPE_STRUCT: { - struct vtn_value *member_val = - vtn_value(b, chain->ids[idx], vtn_value_type_constant); - unsigned member = member_val->constant->value.u[0]; - - offset = nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, type->offsets[member])); - type = type->members[member]; - break; - } - - default: - unreachable("Invalid type for deref"); - } - } - -end: - *type_out = type; - if (end_idx_out) - *end_idx_out = idx; - - return offset; -} - -static void -_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, - struct vtn_ssa_value **inout, const struct glsl_type *type) -{ - nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); - instr->num_components = glsl_get_vector_elements(type); - - int src = 0; - if (!load) { - instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */ - instr->src[src++] = nir_src_for_ssa((*inout)->def); - } - - /* We set the base and size for push constant load to the entire push - * constant block for now. - */ - if (op == nir_intrinsic_load_push_constant) { - instr->const_index[0] = 0; - instr->const_index[1] = 128; - } - - if (index) - instr->src[src++] = nir_src_for_ssa(index); - - instr->src[src++] = nir_src_for_ssa(offset); - - if (load) { - nir_ssa_dest_init(&instr->instr, &instr->dest, - instr->num_components, NULL); - (*inout)->def = &instr->dest.ssa; - } - - nir_builder_instr_insert(&b->nb, &instr->instr); - - if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) - (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); -} - -static void -_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, - struct vtn_access_chain *chain, unsigned chain_idx, - struct vtn_type *type, struct vtn_ssa_value **inout) -{ - if (chain_idx >= chain->length) - chain = NULL; - - if (load && chain == NULL && *inout == NULL) - *inout = vtn_create_ssa_value(b, type->type); - - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* This is where things get interesting. At this point, we've hit - * a vector, a scalar, or a matrix. - */ - if (glsl_type_is_matrix(type->type)) { - if (chain == NULL) { - /* Loading the whole matrix */ - struct vtn_ssa_value *transpose; - unsigned num_ops, vec_width; - if (type->row_major) { - num_ops = glsl_get_vector_elements(type->type); - vec_width = glsl_get_matrix_columns(type->type); - if (load) { - const struct glsl_type *transpose_type = - glsl_matrix_type(base_type, vec_width, num_ops); - *inout = vtn_create_ssa_value(b, transpose_type); - } else { - transpose = vtn_ssa_transpose(b, *inout); - inout = &transpose; - } - } else { - num_ops = glsl_get_matrix_columns(type->type); - vec_width = glsl_get_vector_elements(type->type); - } - - for (unsigned i = 0; i < num_ops; i++) { - nir_ssa_def *elem_offset = - nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, i * type->stride)); - _vtn_load_store_tail(b, op, load, index, elem_offset, - &(*inout)->elems[i], - glsl_vector_type(base_type, vec_width)); - } - - if (load && type->row_major) - *inout = vtn_ssa_transpose(b, *inout); - } else if (type->row_major) { - /* Row-major but with an access chiain. */ - nir_ssa_def *col_offset = - nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, - nir_imm_int(&b->nb, type->array_element->stride)); - offset = nir_iadd(&b->nb, offset, col_offset); - - if (chain_idx + 1 < chain->length) { - /* Picking off a single element */ - nir_ssa_def *row_offset = - nir_imul(&b->nb, - vtn_ssa_value(b, chain->ids[chain_idx + 1])->def, - nir_imm_int(&b->nb, type->stride)); - offset = nir_iadd(&b->nb, offset, row_offset); - _vtn_load_store_tail(b, op, load, index, offset, inout, - glsl_scalar_type(base_type)); - } else { - /* Picking one element off each column */ - unsigned num_comps = glsl_get_vector_elements(type->type); - nir_ssa_def *comps[4]; - for (unsigned i = 0; i < num_comps; i++) { - nir_ssa_def *elem_offset = - nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, i * type->stride)); - - struct vtn_ssa_value *comp = NULL, temp_val; - if (!load) { - temp_val.def = nir_channel(&b->nb, (*inout)->def, i); - temp_val.type = glsl_scalar_type(base_type); - comp = &temp_val; - } - _vtn_load_store_tail(b, op, load, index, elem_offset, - &comp, glsl_scalar_type(base_type)); - comps[i] = comp->def; - } - - if (load) - (*inout)->def = nir_vec(&b->nb, comps, num_comps); - } - } else { - /* Column-major with a deref. Fall through to array case. */ - nir_ssa_def *col_offset = - nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, - nir_imm_int(&b->nb, type->stride)); - offset = nir_iadd(&b->nb, offset, col_offset); - - _vtn_block_load_store(b, op, load, index, offset, - chain, chain_idx + 1, - type->array_element, inout); - } - } else if (chain == NULL) { - /* Single whole vector */ - assert(glsl_type_is_vector_or_scalar(type->type)); - _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); - } else { - /* Single component of a vector. Fall through to array case. */ - nir_ssa_def *elem_offset = - nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, - nir_imm_int(&b->nb, type->stride)); - offset = nir_iadd(&b->nb, offset, elem_offset); - - _vtn_block_load_store(b, op, load, index, offset, NULL, 0, - type->array_element, inout); - } - return; - - case GLSL_TYPE_ARRAY: { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, - type->array_element, &(*inout)->elems[i]); - } - return; - } - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, - type->members[i], &(*inout)->elems[i]); - } - return; - } - - default: - unreachable("Invalid block member type"); - } -} - -static struct vtn_ssa_value * -vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) -{ - nir_intrinsic_op op; - if (src->var->data.mode == nir_var_uniform) { - if (src->var->data.descriptor_set >= 0) { - /* UBO load */ - assert(src->var->data.binding >= 0); - - op = nir_intrinsic_load_ubo; - } else { - /* Push constant load */ - assert(src->var->data.descriptor_set == -1 && - src->var->data.binding == -1); - - op = nir_intrinsic_load_push_constant; - } - } else { - assert(src->var->data.mode == nir_var_shader_storage); - op = nir_intrinsic_load_ssbo; - } - - nir_ssa_def *offset, *index = NULL; - struct vtn_type *type; - unsigned chain_idx; - offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); - - if (op == nir_intrinsic_load_push_constant) - index = NULL; - - struct vtn_ssa_value *value = NULL; - _vtn_block_load_store(b, op, true, index, offset, - src, chain_idx, type, &value); - return value; -} - -static void -vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dst) -{ - nir_ssa_def *offset, *index = NULL; - struct vtn_type *type; - unsigned chain_idx; - offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); - - _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, - dst, chain_idx, type, &src); -} - -/* - * Gets the NIR-level deref tail, which may have as a child an array deref - * selecting which component due to OpAccessChain supporting per-component - * indexing in SPIR-V. - */ - -static nir_deref * -get_deref_tail(nir_deref_var *deref) -{ - nir_deref *cur = &deref->deref; - while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) - cur = cur->child; - - return cur; -} - -static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, - nir_ssa_def *src, unsigned index); - -static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, - nir_ssa_def *src, - nir_ssa_def *index); - -static nir_deref_var * -vtn_access_chain_to_deref(struct vtn_builder *b, - struct vtn_access_chain *chain); - -struct vtn_ssa_value * -vtn_local_load(struct vtn_builder *b, nir_deref_var *src) -{ - nir_deref *src_tail = get_deref_tail(src); - struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); - - if (src_tail->child) { - nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); - assert(vec_deref->deref.child == NULL); - val->type = vec_deref->deref.type; - if (vec_deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); - else - val->def = vtn_vector_extract_dynamic(b, val->def, - vec_deref->indirect.ssa); - } - - return val; -} - -static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, - nir_ssa_def *src, nir_ssa_def *insert, - unsigned index); - -static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, - nir_ssa_def *src, - nir_ssa_def *insert, - nir_ssa_def *index); -void -vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest) -{ - nir_deref *dest_tail = get_deref_tail(dest); - - if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); - nir_deref_array *deref = nir_deref_as_array(dest_tail->child); - assert(deref->deref.child == NULL); - if (deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_insert(b, val->def, src->def, - deref->base_offset); - else - val->def = vtn_vector_insert_dynamic(b, val->def, src->def, - deref->indirect.ssa); - _vtn_variable_store(b, dest, dest_tail, val); - } else { - _vtn_variable_store(b, dest, dest_tail, src); - } -} - -struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) -{ - if (variable_is_external_block(src->var)) - return vtn_block_load(b, src); - else - return vtn_local_load(b, vtn_access_chain_to_deref(b, src)); -} - -void -vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dest) -{ - if (variable_is_external_block(dest->var)) { - assert(dest->var->data.mode == nir_var_shader_storage); - vtn_block_store(b, src, dest); - } else { - vtn_local_store(b, src, vtn_access_chain_to_deref(b, dest)); - } -} - -static void -vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, - struct vtn_access_chain *src) -{ - if (src->var->interface_type || dest->var->interface_type) { - struct vtn_ssa_value *val = vtn_variable_load(b, src); - vtn_variable_store(b, val, dest); - } else { - /* TODO: Handle single components of vectors */ - nir_deref_var *src_deref = vtn_access_chain_to_deref(b, src); - nir_deref_var *dest_deref = vtn_access_chain_to_deref(b, dest); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = - nir_deref_as_var(nir_copy_deref(copy, &dest_deref->deref)); - copy->variables[1] = - nir_deref_as_var(nir_copy_deref(copy, &src_deref->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); - } -} - -/* Tries to compute the size of an interface block based on the strides and - * offsets that are provided to us in the SPIR-V source. - */ -static unsigned -vtn_type_block_size(struct vtn_type *type) -{ - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_DOUBLE: { - unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : - glsl_get_matrix_columns(type->type); - if (cols > 1) { - assert(type->stride > 0); - return type->stride * cols; - } else if (base_type == GLSL_TYPE_DOUBLE) { - return glsl_get_vector_elements(type->type) * 8; - } else { - return glsl_get_vector_elements(type->type) * 4; - } - } - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: { - unsigned size = 0; - unsigned num_fields = glsl_get_length(type->type); - for (unsigned f = 0; f < num_fields; f++) { - unsigned field_end = type->offsets[f] + - vtn_type_block_size(type->members[f]); - size = MAX2(size, field_end); - } - return size; - } - - case GLSL_TYPE_ARRAY: - assert(type->stride > 0); - assert(glsl_get_length(type->type) > 0); - return type->stride * glsl_get_length(type->type); - - default: - assert(!"Invalid block type"); - return 0; - } -} - -static bool -is_interface_type(struct vtn_type *type) -{ - return type->block || type->buffer_block || - glsl_type_is_sampler(type->type) || - glsl_type_is_image(type->type); -} - -static void -vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpVariable: { - struct vtn_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - SpvStorageClass storage_class = w[3]; - - nir_variable *var = rzalloc(b->shader, nir_variable); - - var->type = type->type; - var->name = ralloc_strdup(var, val->name); - - struct vtn_type *interface_type; - if (is_interface_type(type)) { - interface_type = type; - } else if (glsl_type_is_array(type->type) && - is_interface_type(type->array_element)) { - interface_type = type->array_element; - } else { - interface_type = NULL; - } - - if (interface_type) - var->interface_type = interface_type->type; - - switch (storage_class) { - case SpvStorageClassUniform: - case SpvStorageClassUniformConstant: - if (interface_type && interface_type->buffer_block) { - var->data.mode = nir_var_shader_storage; - b->shader->info.num_ssbos++; - } else { - /* UBO's and samplers */ - var->data.mode = nir_var_uniform; - var->data.read_only = true; - if (interface_type) { - if (glsl_type_is_image(interface_type->type)) { - b->shader->info.num_images++; - var->data.image.format = interface_type->image_format; - - switch (interface_type->access_qualifier) { - case SpvAccessQualifierReadOnly: - var->data.image.read_only = true; - break; - case SpvAccessQualifierWriteOnly: - var->data.image.write_only = true; - break; - default: - break; - } - } else if (glsl_type_is_sampler(interface_type->type)) { - b->shader->info.num_textures++; - } else { - assert(glsl_type_is_struct(interface_type->type)); - b->shader->info.num_ubos++; - } - } - } - break; - case SpvStorageClassPushConstant: - assert(interface_type && interface_type->block); - var->data.mode = nir_var_uniform; - var->data.read_only = true; - var->data.descriptor_set = -1; - var->data.binding = -1; - - /* We have exactly one push constant block */ - assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(type) * 4; - break; - case SpvStorageClassInput: - var->data.mode = nir_var_shader_in; - var->data.read_only = true; - break; - case SpvStorageClassOutput: - var->data.mode = nir_var_shader_out; - break; - case SpvStorageClassPrivate: - var->data.mode = nir_var_global; - var->interface_type = NULL; - break; - case SpvStorageClassFunction: - var->data.mode = nir_var_local; - var->interface_type = NULL; - break; - case SpvStorageClassWorkgroup: - var->data.mode = nir_var_shared; - break; - case SpvStorageClassCrossWorkgroup: - case SpvStorageClassGeneric: - case SpvStorageClassAtomicCounter: - default: - unreachable("Unhandled variable storage class"); - } - - if (count > 4) { - assert(count == 5); - nir_constant *constant = - vtn_value(b, w[4], vtn_value_type_constant)->constant; - var->constant_initializer = nir_constant_clone(constant, var); - } - - val->access_chain = ralloc(b, struct vtn_access_chain); - val->access_chain->var = var; - val->access_chain->var_type = type; - val->access_chain->length = 0; - - /* We handle decorations first because decorations might give us - * location information. We use the data.explicit_location field to - * note that the location provided is the "final" location. If - * data.explicit_location == false, this means that it's relative to - * whatever the base location is. - */ - vtn_foreach_decoration(b, val, var_decoration_cb, var); - - if (!var->data.explicit_location) { - if (b->shader->stage == MESA_SHADER_FRAGMENT && - var->data.mode == nir_var_shader_out) { - var->data.location += FRAG_RESULT_DATA0; - } else if (b->shader->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in) { - var->data.location += VERT_ATTRIB_GENERIC0; - } else if (var->data.mode == nir_var_shader_in || - var->data.mode == nir_var_shader_out) { - var->data.location += VARYING_SLOT_VAR0; - } - } - - /* XXX: Work around what appears to be a glslang bug. While the - * SPIR-V spec doesn't say that setting a descriptor set on a push - * constant is invalid, it certainly makes no sense. However, at - * some point, glslang started setting descriptor set 0 on push - * constants for some unknown reason. Hopefully this can be removed - * at some point in the future. - */ - if (storage_class == SpvStorageClassPushConstant) { - var->data.descriptor_set = -1; - var->data.binding = -1; - } - - /* Interface block variables aren't actually going to be referenced - * by the generated NIR, so we don't put them in the list - */ - if (var->interface_type && glsl_type_is_struct(var->interface_type)) - break; - - if (var->data.mode == nir_var_local) { - nir_function_impl_add_variable(b->impl, var); - } else { - nir_shader_add_variable(b->shader, var); - } - - break; - } - - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: { - struct vtn_access_chain *base, *chain; - struct vtn_value *base_val = vtn_untyped_value(b, w[3]); - if (base_val->value_type == vtn_value_type_sampled_image) { - /* This is rather insane. SPIR-V allows you to use OpSampledImage - * to combine an array of images with a single sampler to get an - * array of sampled images that all share the same sampler. - * Fortunately, this means that we can more-or-less ignore the - * sampler when crawling the access chain, but it does leave us - * with this rather awkward little special-case. - */ - base = base_val->sampled_image->image; - } else { - assert(base_val->value_type == vtn_value_type_access_chain); - base = base_val->access_chain; - } - - uint32_t new_len = base->length + count - 4; - chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->ids[0])); - - *chain = *base; - - chain->length = new_len; - unsigned idx = 0; - for (int i = 0; i < base->length; i++) - chain->ids[idx++] = base->ids[i]; - - for (int i = 4; i < count; i++) - chain->ids[idx++] = w[i]; - - if (base_val->value_type == vtn_value_type_sampled_image) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = chain; - val->sampled_image->sampler = base_val->sampled_image->sampler; - } else { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - val->access_chain = chain; - } - break; - } - - case SpvOpCopyMemory: { - struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); - struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); - - vtn_variable_copy(b, dest->access_chain, src->access_chain); - break; - } - - case SpvOpLoad: { - struct vtn_access_chain *src = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - - if (src->var->interface_type && - (glsl_type_is_sampler(src->var->interface_type) || - glsl_type_is_image(src->var->interface_type))) { - vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; - return; - } - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_variable_load(b, src); - break; - } - - case SpvOpStore: { - struct vtn_access_chain *dest = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; - struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); - vtn_variable_store(b, src, dest); - break; - } - - case SpvOpArrayLength: { - struct vtn_access_chain *chain = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - - const uint32_t offset = chain->var_type->offsets[w[4]]; - const uint32_t stride = chain->var_type->members[w[4]]->stride; - - unsigned chain_idx; - struct vtn_type *type; - nir_ssa_def *index = - get_vulkan_resource_index(b, chain, &type, &chain_idx); - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->nb.shader, - nir_intrinsic_get_buffer_size); - instr->src[0] = nir_src_for_ssa(index); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); - nir_builder_instr_insert(&b->nb, &instr->instr); - nir_ssa_def *buf_size = &instr->dest.ssa; - - /* array_length = max(buffer_size - offset, 0) / stride */ - nir_ssa_def *array_length = - nir_idiv(&b->nb, - nir_imax(&b->nb, - nir_isub(&b->nb, - buf_size, - nir_imm_int(&b->nb, offset)), - nir_imm_int(&b->nb, 0u)), - nir_imm_int(&b->nb, stride)); - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); - val->ssa->def = array_length; - break; - } - - case SpvOpCopyMemorySized: - default: - unreachable("Unhandled opcode"); - } -} - -static nir_deref_var * -vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) -{ - nir_deref_var *deref_var = nir_deref_var_create(b, chain->var); - nir_deref *tail = &deref_var->deref; - struct vtn_type *deref_type = chain->var_type; - - for (unsigned i = 0; i < chain->length; i++) { - struct vtn_value *idx_val = vtn_untyped_value(b, chain->ids[i]); - enum glsl_base_type base_type = glsl_get_base_type(tail->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_ARRAY: { - nir_deref_array *deref_arr = nir_deref_array_create(b); - if (base_type == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(tail->type)) { - deref_type = deref_type->array_element; - } else { - assert(glsl_type_is_vector(tail->type)); - deref_type = ralloc(b, struct vtn_type); - deref_type->type = glsl_scalar_type(base_type); - } - - deref_arr->deref.type = deref_type->type; - - if (idx_val->value_type == vtn_value_type_constant) { - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->base_offset = idx_val->constant->value.u[0]; - } else { - assert(idx_val->value_type == vtn_value_type_ssa); - assert(glsl_type_is_scalar(idx_val->ssa->type)); - deref_arr->deref_array_type = nir_deref_array_type_indirect; - deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); - } - tail->child = &deref_arr->deref; - break; - } - - case GLSL_TYPE_STRUCT: { - assert(idx_val->value_type == vtn_value_type_constant); - unsigned idx = idx_val->constant->value.u[0]; - deref_type = deref_type->members[idx]; - nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = deref_type->type; - tail->child = &deref_struct->deref; - break; - } - default: - unreachable("Invalid type for deref"); - } - - if (deref_type->is_builtin) { - /* If we encounter a builtin, we throw away the ress of the - * access chain, jump to the builtin, and keep building. - */ - const struct glsl_type *builtin_type = deref_type->type; - - nir_deref_array *per_vertex_deref = NULL; - if (glsl_type_is_array(chain->var->type)) { - /* This builtin is a per-vertex builtin */ - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - assert(chain->var->data.mode == nir_var_shader_in); - builtin_type = glsl_array_type(builtin_type, - b->shader->info.gs.vertices_in); - - /* The first non-var deref should be an array deref. */ - assert(deref_var->deref.child->deref_type == - nir_deref_type_array); - per_vertex_deref = nir_deref_as_array(deref_var->deref.child); - } - - nir_variable *builtin = get_builtin_variable(b, - chain->var->data.mode, - builtin_type, - deref_type->builtin); - deref_var = nir_deref_var_create(b, builtin); - - if (per_vertex_deref) { - /* Since deref chains start at the variable, we can just - * steal that link and use it. - */ - deref_var->deref.child = &per_vertex_deref->deref; - per_vertex_deref->deref.child = NULL; - per_vertex_deref->deref.type = - glsl_get_array_element(builtin_type); - - tail = &per_vertex_deref->deref; - } else { - tail = &deref_var->deref; - } - } else { - tail = tail->child; - } - } - - return deref_var; -} - -nir_deref_var * -vtn_nir_deref(struct vtn_builder *b, uint32_t id) -{ - struct vtn_access_chain *chain = - vtn_value(b, id, vtn_value_type_access_chain)->access_chain; - - return vtn_access_chain_to_deref(b, chain); -} - static void vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -3053,15 +1756,14 @@ vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) return dest; } -static nir_ssa_def * +nir_ssa_def * vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) { unsigned swiz[4] = { index }; return nir_swizzle(&b->nb, src, swiz, 1, true); } - -static nir_ssa_def * +nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, unsigned index) { @@ -3081,7 +1783,7 @@ vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, return &vec->dest.dest.ssa; } -static nir_ssa_def * +nir_ssa_def * vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *index) { @@ -3093,7 +1795,7 @@ vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, return dest; } -static nir_ssa_def * +nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, nir_ssa_def *index) { diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index d15dfa846f3..17b8167630c 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -390,8 +390,25 @@ struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src); +nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, + unsigned index); +nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index); +nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, unsigned index); +nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index); + nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); +nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, + struct vtn_access_chain *chain); +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix); + struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, @@ -403,6 +420,9 @@ vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_access_chain *dest); +void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *, diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c new file mode 100644 index 00000000000..7b1d0e123cc --- /dev/null +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -0,0 +1,1311 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" + +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + nir_variable_mode mode, + const struct glsl_type *type, + SpvBuiltIn builtin); + +nir_deref_var * +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) +{ + nir_deref_var *deref_var = nir_deref_var_create(b, chain->var); + nir_deref *tail = &deref_var->deref; + struct vtn_type *deref_type = chain->var_type; + + for (unsigned i = 0; i < chain->length; i++) { + struct vtn_value *idx_val = vtn_untyped_value(b, chain->ids[i]); + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; + } else { + assert(glsl_type_is_vector(tail->type)); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); + } + + deref_arr->deref.type = deref_type->type; + + if (idx_val->value_type == vtn_value_type_constant) { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx_val->constant->value.u[0]; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + assert(glsl_type_is_scalar(idx_val->ssa->type)); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + + if (deref_type->is_builtin) { + /* If we encounter a builtin, we throw away the ress of the + * access chain, jump to the builtin, and keep building. + */ + const struct glsl_type *builtin_type = deref_type->type; + + nir_deref_array *per_vertex_deref = NULL; + if (glsl_type_is_array(chain->var->type)) { + /* This builtin is a per-vertex builtin */ + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + assert(chain->var->data.mode == nir_var_shader_in); + builtin_type = glsl_array_type(builtin_type, + b->shader->info.gs.vertices_in); + + /* The first non-var deref should be an array deref. */ + assert(deref_var->deref.child->deref_type == + nir_deref_type_array); + per_vertex_deref = nir_deref_as_array(deref_var->deref.child); + } + + nir_variable *builtin = get_builtin_variable(b, + chain->var->data.mode, + builtin_type, + deref_type->builtin); + deref_var = nir_deref_var_create(b, builtin); + + if (per_vertex_deref) { + /* Since deref chains start at the variable, we can just + * steal that link and use it. + */ + deref_var->deref.child = &per_vertex_deref->deref; + per_vertex_deref->deref.child = NULL; + per_vertex_deref->deref.type = + glsl_get_array_element(builtin_type); + + tail = &per_vertex_deref->deref; + } else { + tail = &deref_var->deref; + } + } else { + tail = tail->child; + } + } + + return deref_var; +} + +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + src_deref_tail->child = NULL; + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + dest_deref_tail->child = NULL; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); + store->const_index[0] = (1 << store->num_components) - 1; + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +nir_deref_var * +vtn_nir_deref(struct vtn_builder *b, uint32_t id) +{ + struct vtn_access_chain *chain = + vtn_value(b, id, vtn_value_type_access_chain)->access_chain; + + return vtn_access_chain_to_deref(b, chain); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +struct vtn_ssa_value * +vtn_local_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +void +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } +} + +static nir_ssa_def * +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, + struct vtn_type **type, unsigned *chain_idx) +{ + assert(chain->var->interface_type && "variable is a block"); + + nir_ssa_def *array_index; + if (glsl_type_is_array(chain->var->type)) { + assert(chain->length > 0); + array_index = vtn_ssa_value(b, chain->ids[0])->def; + *chain_idx = 1; + *type = chain->var_type->array_element; + } else { + array_index = nir_imm_int(&b->nb, 0); + *chain_idx = 0; + *type = chain->var_type; + } + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + instr->const_index[0] = chain->var->data.descriptor_set; + instr->const_index[1] = chain->var->data.binding; + instr->const_index[2] = chain->var->data.mode; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + + return &instr->dest.ssa; +} + +static bool +variable_is_external_block(nir_variable *var) +{ + return var->interface_type && + glsl_type_is_struct(var->interface_type) && + (var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage); +} + +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix) +{ + unsigned idx = 0; + struct vtn_type *type; + *index_out = get_vulkan_resource_index(b, chain, &type, &idx); + + nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + for (; idx < chain->length; idx++) { + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + /* Some users may not want matrix or vector derefs */ + if (stop_at_matrix) { + idx++; + goto end; + } + /* Fall through */ + + case GLSL_TYPE_ARRAY: + offset = nir_iadd(&b->nb, offset, + nir_imul(&b->nb, + vtn_ssa_value(b, chain->ids[idx])->def, + nir_imm_int(&b->nb, type->stride))); + + if (glsl_type_is_vector(type->type)) { + /* This had better be the tail */ + assert(idx == chain->length - 1); + type = rzalloc(b, struct vtn_type); + type->type = glsl_scalar_type(base_type); + } else { + type = type->array_element; + } + break; + + case GLSL_TYPE_STRUCT: { + struct vtn_value *member_val = + vtn_value(b, chain->ids[idx], vtn_value_type_constant); + unsigned member = member_val->constant->value.u[0]; + + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + type = type->members[member]; + break; + } + + default: + unreachable("Invalid type for deref"); + } + } + +end: + *type_out = type; + if (end_idx_out) + *end_idx_out = idx; + + return offset; +} + +static void +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_ssa_value **inout, const struct glsl_type *type) +{ + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); + instr->num_components = glsl_get_vector_elements(type); + + int src = 0; + if (!load) { + instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */ + instr->src[src++] = nir_src_for_ssa((*inout)->def); + } + + /* We set the base and size for push constant load to the entire push + * constant block for now. + */ + if (op == nir_intrinsic_load_push_constant) { + instr->const_index[0] = 0; + instr->const_index[1] = 128; + } + + if (index) + instr->src[src++] = nir_src_for_ssa(index); + + instr->src[src++] = nir_src_for_ssa(offset); + + if (load) { + nir_ssa_dest_init(&instr->instr, &instr->dest, + instr->num_components, NULL); + (*inout)->def = &instr->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &instr->instr); + + if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) + (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); +} + +static void +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_access_chain *chain, unsigned chain_idx, + struct vtn_type *type, struct vtn_ssa_value **inout) +{ + if (chain_idx >= chain->length) + chain = NULL; + + if (load && chain == NULL && *inout == NULL) + *inout = vtn_create_ssa_value(b, type->type); + + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* This is where things get interesting. At this point, we've hit + * a vector, a scalar, or a matrix. + */ + if (glsl_type_is_matrix(type->type)) { + if (chain == NULL) { + /* Loading the whole matrix */ + struct vtn_ssa_value *transpose; + unsigned num_ops, vec_width; + if (type->row_major) { + num_ops = glsl_get_vector_elements(type->type); + vec_width = glsl_get_matrix_columns(type->type); + if (load) { + const struct glsl_type *transpose_type = + glsl_matrix_type(base_type, vec_width, num_ops); + *inout = vtn_create_ssa_value(b, transpose_type); + } else { + transpose = vtn_ssa_transpose(b, *inout); + inout = &transpose; + } + } else { + num_ops = glsl_get_matrix_columns(type->type); + vec_width = glsl_get_vector_elements(type->type); + } + + for (unsigned i = 0; i < num_ops; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + _vtn_load_store_tail(b, op, load, index, elem_offset, + &(*inout)->elems[i], + glsl_vector_type(base_type, vec_width)); + } + + if (load && type->row_major) + *inout = vtn_ssa_transpose(b, *inout); + } else if (type->row_major) { + /* Row-major but with an access chiain. */ + nir_ssa_def *col_offset = + nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, + nir_imm_int(&b->nb, type->array_element->stride)); + offset = nir_iadd(&b->nb, offset, col_offset); + + if (chain_idx + 1 < chain->length) { + /* Picking off a single element */ + nir_ssa_def *row_offset = + nir_imul(&b->nb, + vtn_ssa_value(b, chain->ids[chain_idx + 1])->def, + nir_imm_int(&b->nb, type->stride)); + offset = nir_iadd(&b->nb, offset, row_offset); + _vtn_load_store_tail(b, op, load, index, offset, inout, + glsl_scalar_type(base_type)); + } else { + /* Picking one element off each column */ + unsigned num_comps = glsl_get_vector_elements(type->type); + nir_ssa_def *comps[4]; + for (unsigned i = 0; i < num_comps; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + + struct vtn_ssa_value *comp = NULL, temp_val; + if (!load) { + temp_val.def = nir_channel(&b->nb, (*inout)->def, i); + temp_val.type = glsl_scalar_type(base_type); + comp = &temp_val; + } + _vtn_load_store_tail(b, op, load, index, elem_offset, + &comp, glsl_scalar_type(base_type)); + comps[i] = comp->def; + } + + if (load) + (*inout)->def = nir_vec(&b->nb, comps, num_comps); + } + } else { + /* Column-major with a deref. Fall through to array case. */ + nir_ssa_def *col_offset = + nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, + nir_imm_int(&b->nb, type->stride)); + offset = nir_iadd(&b->nb, offset, col_offset); + + _vtn_block_load_store(b, op, load, index, offset, + chain, chain_idx + 1, + type->array_element, inout); + } + } else if (chain == NULL) { + /* Single whole vector */ + assert(glsl_type_is_vector_or_scalar(type->type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); + } else { + /* Single component of a vector. Fall through to array case. */ + nir_ssa_def *elem_offset = + nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, + nir_imm_int(&b->nb, type->stride)); + offset = nir_iadd(&b->nb, offset, elem_offset); + + _vtn_block_load_store(b, op, load, index, offset, NULL, 0, + type->array_element, inout); + } + return; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->array_element, &(*inout)->elems[i]); + } + return; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->members[i], &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid block member type"); + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + nir_intrinsic_op op; + if (src->var->data.mode == nir_var_uniform) { + if (src->var->data.descriptor_set >= 0) { + /* UBO load */ + assert(src->var->data.binding >= 0); + + op = nir_intrinsic_load_ubo; + } else { + /* Push constant load */ + assert(src->var->data.descriptor_set == -1 && + src->var->data.binding == -1); + + op = nir_intrinsic_load_push_constant; + } + } else { + assert(src->var->data.mode == nir_var_shader_storage); + op = nir_intrinsic_load_ssbo; + } + + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); + + if (op == nir_intrinsic_load_push_constant) + index = NULL; + + struct vtn_ssa_value *value = NULL; + _vtn_block_load_store(b, op, true, index, offset, + src, chain_idx, type, &value); + return value; +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dst) +{ + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); + + _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, + dst, chain_idx, type, &src); +} + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + if (variable_is_external_block(src->var)) + return vtn_block_load(b, src); + else + return vtn_local_load(b, vtn_access_chain_to_deref(b, src)); +} + +void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest) +{ + if (variable_is_external_block(dest->var)) { + assert(dest->var->data.mode == nir_var_shader_storage); + vtn_block_store(b, src, dest); + } else { + vtn_local_store(b, src, vtn_access_chain_to_deref(b, dest)); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src) +{ + if (src->var->interface_type || dest->var->interface_type) { + struct vtn_ssa_value *val = vtn_variable_load(b, src); + vtn_variable_store(b, val, dest); + } else { + /* TODO: Handle single components of vectors */ + nir_deref_var *src_deref = vtn_access_chain_to_deref(b, src); + nir_deref_var *dest_deref = vtn_access_chain_to_deref(b, dest); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = + nir_deref_as_var(nir_copy_deref(copy, &dest_deref->deref)); + copy->variables[1] = + nir_deref_as_var(nir_copy_deref(copy, &src_deref->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexIndex: + *location = SYSTEM_VALUE_VERTEX_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceIndex: + /* XXX */ + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInViewportIndex: + *location = VARYING_SLOT_VIEWPORT; + if (b->shader->stage == MESA_SHADER_GEOMETRY) + *mode = nir_var_shader_out; + else if (b->shader->stage == MESA_SHADER_FRAGMENT) + *mode = nir_var_shader_in; + else + unreachable("invalid stage for SpvBuiltInViewportIndex"); + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + *location = SYSTEM_VALUE_NUM_WORK_GROUPS; + set_mode_system_value(mode); + break; + case SpvBuiltInWorkgroupSize: + /* This should already be handled */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationIndex: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInGlobalInvocationId: + *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + assert(val->value_type == vtn_value_type_access_chain); + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); + + nir_variable *var = void_var; + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonWritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + if (builtin == SpvBuiltInWorkgroupSize) { + /* This shouldn't be a builtin. It's actually a constant. */ + var->data.mode = nir_var_global; + var->data.read_only = true; + + nir_constant *val = rzalloc(var, nir_constant); + val->value.u[0] = b->shader->info.cs.local_size[0]; + val->value.u[1] = b->shader->info.cs.local_size[1]; + val->value.u[2] = b->shader->info.cs.local_size[2]; + var->constant_initializer = val; + break; + } + + nir_variable_mode mode = var->data.mode; + vtn_get_builtin_location(b, builtin, &var->data.location, &mode); + var->data.explicit_location = true; + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[dec->literals[0]].out = var; + else + b->builtins[dec->literals[0]].in = var; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + nir_variable_mode mode, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var; + if (mode == nir_var_shader_out) + var = b->builtins[builtin].out; + else + var = b->builtins[builtin].in; + + if (!var) { + int location; + vtn_get_builtin_location(b, builtin, &location, &mode); + + var = nir_variable_create(b->shader, mode, type, "builtin"); + + var->data.location = location; + var->data.explicit_location = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[builtin].out = var; + else + b->builtins[builtin].in = var; + } + + return var; +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +static bool +is_interface_type(struct vtn_type *type) +{ + return type->block || type->buffer_block || + glsl_type_is_sampler(type->type) || + glsl_type_is_image(type->type); +} + +void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + SpvStorageClass storage_class = w[3]; + + nir_variable *var = rzalloc(b->shader, nir_variable); + + var->type = type->type; + var->name = ralloc_strdup(var, val->name); + + struct vtn_type *interface_type; + if (is_interface_type(type)) { + interface_type = type; + } else if (glsl_type_is_array(type->type) && + is_interface_type(type->array_element)) { + interface_type = type->array_element; + } else { + interface_type = NULL; + } + + if (interface_type) + var->interface_type = interface_type->type; + + switch (storage_class) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (interface_type && interface_type->buffer_block) { + var->data.mode = nir_var_shader_storage; + b->shader->info.num_ssbos++; + } else { + /* UBO's and samplers */ + var->data.mode = nir_var_uniform; + var->data.read_only = true; + if (interface_type) { + if (glsl_type_is_image(interface_type->type)) { + b->shader->info.num_images++; + var->data.image.format = interface_type->image_format; + + switch (interface_type->access_qualifier) { + case SpvAccessQualifierReadOnly: + var->data.image.read_only = true; + break; + case SpvAccessQualifierWriteOnly: + var->data.image.write_only = true; + break; + default: + break; + } + } else if (glsl_type_is_sampler(interface_type->type)) { + b->shader->info.num_textures++; + } else { + assert(glsl_type_is_struct(interface_type->type)); + b->shader->info.num_ubos++; + } + } + } + break; + case SpvStorageClassPushConstant: + assert(interface_type && interface_type->block); + var->data.mode = nir_var_uniform; + var->data.read_only = true; + var->data.descriptor_set = -1; + var->data.binding = -1; + + /* We have exactly one push constant block */ + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(type) * 4; + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivate: + var->data.mode = nir_var_global; + var->interface_type = NULL; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + var->interface_type = NULL; + break; + case SpvStorageClassWorkgroup: + var->data.mode = nir_var_shared; + break; + case SpvStorageClassCrossWorkgroup: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + nir_constant *constant = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + var->constant_initializer = nir_constant_clone(constant, var); + } + + val->access_chain = ralloc(b, struct vtn_access_chain); + val->access_chain->var = var; + val->access_chain->var_type = type; + val->access_chain->length = 0; + + /* We handle decorations first because decorations might give us + * location information. We use the data.explicit_location field to + * note that the location provided is the "final" location. If + * data.explicit_location == false, this means that it's relative to + * whatever the base location is. + */ + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (!var->data.explicit_location) { + if (b->shader->stage == MESA_SHADER_FRAGMENT && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + } + + /* XXX: Work around what appears to be a glslang bug. While the + * SPIR-V spec doesn't say that setting a descriptor set on a push + * constant is invalid, it certainly makes no sense. However, at + * some point, glslang started setting descriptor set 0 on push + * constants for some unknown reason. Hopefully this can be removed + * at some point in the future. + */ + if (storage_class == SpvStorageClassPushConstant) { + var->data.descriptor_set = -1; + var->data.binding = -1; + } + + /* Interface block variables aren't actually going to be referenced + * by the generated NIR, so we don't put them in the list + */ + if (var->interface_type && glsl_type_is_struct(var->interface_type)) + break; + + if (var->data.mode == nir_var_local) { + nir_function_impl_add_variable(b->impl, var); + } else { + nir_shader_add_variable(b->shader, var); + } + + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_access_chain *base, *chain; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_access_chain); + base = base_val->access_chain; + } + + uint32_t new_len = base->length + count - 4; + chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->ids[0])); + + *chain = *base; + + chain->length = new_len; + unsigned idx = 0; + for (int i = 0; i < base->length; i++) + chain->ids[idx++] = base->ids[i]; + + for (int i = 4; i < count; i++) + chain->ids[idx++] = w[i]; + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = chain; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = chain; + } + break; + } + + case SpvOpCopyMemory: { + struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); + struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); + + vtn_variable_copy(b, dest->access_chain, src->access_chain); + break; + } + + case SpvOpLoad: { + struct vtn_access_chain *src = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + if (src->var->interface_type && + (glsl_type_is_sampler(src->var->interface_type) || + glsl_type_is_image(src->var->interface_type))) { + vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src); + break; + } + + case SpvOpStore: { + struct vtn_access_chain *dest = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); + break; + } + + case SpvOpArrayLength: { + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + const uint32_t offset = chain->var_type->offsets[w[4]]; + const uint32_t stride = chain->var_type->members[w[4]]->stride; + + unsigned chain_idx; + struct vtn_type *type; + nir_ssa_def *index = + get_vulkan_resource_index(b, chain, &type, &chain_idx); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_get_buffer_size); + instr->src[0] = nir_src_for_ssa(index); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + nir_ssa_def *buf_size = &instr->dest.ssa; + + /* array_length = max(buffer_size - offset, 0) / stride */ + nir_ssa_def *array_length = + nir_idiv(&b->nb, + nir_imax(&b->nb, + nir_isub(&b->nb, + buf_size, + nir_imm_int(&b->nb, offset)), + nir_imm_int(&b->nb, 0u)), + nir_imm_int(&b->nb, stride)); + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); + val->ssa->def = array_length; + break; + } + + case SpvOpCopyMemorySized: + default: + unreachable("Unhandled opcode"); + } +} -- cgit v1.2.3 From b298743d7b8e12aeb732f0d9ae3b723e9ea01e9d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 02:09:38 -0800 Subject: nir/spirv: Add an actual variable struct to spirv_to_nir This allows us, among other things, to do structure splitting on-the-fly to more correctly handle input/output structs. --- src/glsl/nir/spirv/spirv_to_nir.c | 9 +- src/glsl/nir/spirv/vtn_cfg.c | 13 +- src/glsl/nir/spirv/vtn_private.h | 41 ++- src/glsl/nir/spirv/vtn_variables.c | 587 +++++++++++++++++++------------------ 4 files changed, 340 insertions(+), 310 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 6ce2e0c16a2..7d8699b1c00 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1319,9 +1319,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, const struct glsl_type *image_type; if (sampled.image) { - image_type = sampled.image->var->interface_type; + image_type = sampled.image->var->var->interface_type; } else { - image_type = sampled.sampler->var->interface_type; + image_type = sampled.sampler->var->var->interface_type; } instr->sampler_dim = glsl_get_sampler_dim(image_type); @@ -1677,13 +1677,14 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, SpvMemorySemanticsMask semantics = w[5]; */ - if (chain->var->data.mode == nir_var_shared) { + if (chain->var->mode == vtn_variable_mode_workgroup) { nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); atomic = nir_intrinsic_instr_create(b->nb.shader, op); atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); } else { + assert(chain->var->mode == vtn_variable_mode_ssbo); struct vtn_type *type; nir_ssa_def *offset, *index; offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); @@ -2409,7 +2410,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpImageQuerySize: { struct vtn_access_chain *image = vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - if (glsl_type_is_image(image->var->interface_type)) { + if (glsl_type_is_image(image->var->var->interface_type)) { vtn_handle_image(b, opcode, w, count); } else { vtn_handle_texture(b, opcode, w, count); diff --git a/src/glsl/nir/spirv/vtn_cfg.c b/src/glsl/nir/spirv/vtn_cfg.c index e08a2d8bc81..041408b1cfb 100644 --- a/src/glsl/nir/spirv/vtn_cfg.c +++ b/src/glsl/nir/spirv/vtn_cfg.c @@ -100,11 +100,14 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, val->name); b->func->impl->params[idx] = param; - val->access_chain = ralloc(b, struct vtn_access_chain); - val->access_chain->var = param; - val->access_chain->length = 0; - val->access_chain->var_type = - vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); + vtn_var->mode = vtn_variable_mode_param; + vtn_var->type = vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_var->var = param; + vtn_var->chain.var = vtn_var; + vtn_var->chain.length = 0; + + val->access_chain = &vtn_var->chain; break; } diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 17b8167630c..682bff5e8bb 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -234,9 +234,10 @@ struct vtn_type { SpvBuiltIn builtin; }; +struct vtn_variable; + struct vtn_access_chain { - nir_variable *var; - struct vtn_type *var_type; + struct vtn_variable *var; uint32_t length; @@ -244,6 +245,34 @@ struct vtn_access_chain { uint32_t ids[0]; }; +enum vtn_variable_mode { + vtn_variable_mode_local, + vtn_variable_mode_global, + vtn_variable_mode_param, + vtn_variable_mode_ubo, + vtn_variable_mode_ssbo, + vtn_variable_mode_push_constant, + vtn_variable_mode_image, + vtn_variable_mode_sampler, + vtn_variable_mode_workgroup, + vtn_variable_mode_input, + vtn_variable_mode_output, +}; + +struct vtn_variable { + enum vtn_variable_mode mode; + + struct vtn_type *type; + + unsigned descriptor_set; + unsigned binding; + + nir_variable *var; + nir_variable **members; + + struct vtn_access_chain chain; +}; + struct vtn_image_pointer { struct vtn_access_chain *image; nir_ssa_def *coord; @@ -329,14 +358,6 @@ struct vtn_builder { unsigned num_specializations; struct nir_spirv_specialization *specializations; - /* - * NIR variable for each SPIR-V builtin. - */ - struct { - nir_variable *in; - nir_variable *out; - } builtins[42]; /* XXX need symbolic constant from SPIR-V header */ - unsigned value_id_bound; struct vtn_value *values; diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index 7b1d0e123cc..e15fe6ef2a8 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -27,22 +27,41 @@ #include "vtn_private.h" -static nir_variable * -get_builtin_variable(struct vtn_builder *b, - nir_variable_mode mode, - const struct glsl_type *type, - SpvBuiltIn builtin); +/* Crawls a chain of array derefs and rewrites the types so that the + * lengths stay the same but the terminal type is the one given by + * tail_type. This is useful for split structures. + */ +static void +rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) +{ + deref->type = type; + if (deref->child) { + assert(deref->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_array(deref->type)); + rewrite_deref_types(deref->child, glsl_get_array_element(type)); + } +} nir_deref_var * vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) { - nir_deref_var *deref_var = nir_deref_var_create(b, chain->var); + nir_deref_var *deref_var; + if (chain->var->var) { + deref_var = nir_deref_var_create(b, chain->var->var); + } else { + assert(chain->var->members); + /* Create the deref_var manually. It will get filled out later. */ + deref_var = rzalloc(b, nir_deref_var); + deref_var->deref.deref_type = nir_deref_type_var; + } + + struct vtn_type *deref_type = chain->var->type; nir_deref *tail = &deref_var->deref; - struct vtn_type *deref_type = chain->var_type; + nir_variable **members = chain->var->members; for (unsigned i = 0; i < chain->length; i++) { struct vtn_value *idx_val = vtn_untyped_value(b, chain->ids[i]); - enum glsl_base_type base_type = glsl_get_base_type(tail->type); + enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); switch (base_type) { case GLSL_TYPE_UINT: case GLSL_TYPE_INT: @@ -52,10 +71,10 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) case GLSL_TYPE_ARRAY: { nir_deref_array *deref_arr = nir_deref_array_create(b); if (base_type == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(tail->type)) { + glsl_type_is_matrix(deref_type->type)) { deref_type = deref_type->array_element; } else { - assert(glsl_type_is_vector(tail->type)); + assert(glsl_type_is_vector(deref_type->type)); deref_type = ralloc(b, struct vtn_type); deref_type->type = glsl_scalar_type(base_type); } @@ -73,6 +92,7 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); } tail->child = &deref_arr->deref; + tail = tail->child; break; } @@ -80,59 +100,26 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) assert(idx_val->value_type == vtn_value_type_constant); unsigned idx = idx_val->constant->value.u[0]; deref_type = deref_type->members[idx]; - nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = deref_type->type; - tail->child = &deref_struct->deref; + if (members) { + /* This is a pre-split structure. */ + deref_var->var = members[idx]; + rewrite_deref_types(&deref_var->deref, members[idx]->type); + assert(tail->type == deref_type->type); + members = NULL; + } else { + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + tail = tail->child; + } break; } default: unreachable("Invalid type for deref"); } - - if (deref_type->is_builtin) { - /* If we encounter a builtin, we throw away the ress of the - * access chain, jump to the builtin, and keep building. - */ - const struct glsl_type *builtin_type = deref_type->type; - - nir_deref_array *per_vertex_deref = NULL; - if (glsl_type_is_array(chain->var->type)) { - /* This builtin is a per-vertex builtin */ - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - assert(chain->var->data.mode == nir_var_shader_in); - builtin_type = glsl_array_type(builtin_type, - b->shader->info.gs.vertices_in); - - /* The first non-var deref should be an array deref. */ - assert(deref_var->deref.child->deref_type == - nir_deref_type_array); - per_vertex_deref = nir_deref_as_array(deref_var->deref.child); - } - - nir_variable *builtin = get_builtin_variable(b, - chain->var->data.mode, - builtin_type, - deref_type->builtin); - deref_var = nir_deref_var_create(b, builtin); - - if (per_vertex_deref) { - /* Since deref chains start at the variable, we can just - * steal that link and use it. - */ - deref_var->deref.child = &per_vertex_deref->deref; - per_vertex_deref->deref.child = NULL; - per_vertex_deref->deref.type = - glsl_get_array_element(builtin_type); - - tail = &per_vertex_deref->deref; - } else { - tail = &deref_var->deref; - } - } else { - tail = tail->child; - } } + assert(members == NULL); return deref_var; } @@ -325,27 +312,31 @@ static nir_ssa_def * get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, struct vtn_type **type, unsigned *chain_idx) { - assert(chain->var->interface_type && "variable is a block"); + /* Push constants have no explicit binding */ + if (chain->var->mode == vtn_variable_mode_push_constant) { + *chain_idx = 0; + *type = chain->var->type; + return NULL; + } nir_ssa_def *array_index; - if (glsl_type_is_array(chain->var->type)) { + if (glsl_type_is_array(chain->var->type->type)) { assert(chain->length > 0); array_index = vtn_ssa_value(b, chain->ids[0])->def; *chain_idx = 1; - *type = chain->var_type->array_element; + *type = chain->var->type->array_element; } else { array_index = nir_imm_int(&b->nb, 0); *chain_idx = 0; - *type = chain->var_type; + *type = chain->var->type; } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_vulkan_resource_index); instr->src[0] = nir_src_for_ssa(array_index); - instr->const_index[0] = chain->var->data.descriptor_set; - instr->const_index[1] = chain->var->data.binding; - instr->const_index[2] = chain->var->data.mode; + instr->const_index[0] = chain->var->descriptor_set; + instr->const_index[1] = chain->var->binding; nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); nir_builder_instr_insert(&b->nb, &instr->instr); @@ -353,15 +344,6 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, return &instr->dest.ssa; } -static bool -variable_is_external_block(nir_variable *var) -{ - return var->interface_type && - glsl_type_is_struct(var->interface_type) && - (var->data.mode == nir_var_uniform || - var->data.mode == nir_var_shader_storage); -} - nir_ssa_def * vtn_access_chain_to_offset(struct vtn_builder *b, struct vtn_access_chain *chain, @@ -617,22 +599,18 @@ static struct vtn_ssa_value * vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) { nir_intrinsic_op op; - if (src->var->data.mode == nir_var_uniform) { - if (src->var->data.descriptor_set >= 0) { - /* UBO load */ - assert(src->var->data.binding >= 0); - - op = nir_intrinsic_load_ubo; - } else { - /* Push constant load */ - assert(src->var->data.descriptor_set == -1 && - src->var->data.binding == -1); - - op = nir_intrinsic_load_push_constant; - } - } else { - assert(src->var->data.mode == nir_var_shader_storage); + switch (src->var->mode) { + case vtn_variable_mode_ubo: + op = nir_intrinsic_load_ubo; + break; + case vtn_variable_mode_ssbo: op = nir_intrinsic_load_ssbo; + break; + case vtn_variable_mode_push_constant: + op = nir_intrinsic_load_push_constant; + break; + default: + assert(!"Invalid block variable mode"); } nir_ssa_def *offset, *index = NULL; @@ -640,9 +618,6 @@ vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) unsigned chain_idx; offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); - if (op == nir_intrinsic_load_push_constant) - index = NULL; - struct vtn_ssa_value *value = NULL; _vtn_block_load_store(b, op, true, index, offset, src, chain_idx, type, &value); @@ -662,10 +637,18 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, dst, chain_idx, type, &src); } +static bool +vtn_variable_is_external_block(struct vtn_variable *var) +{ + return var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_push_constant; +} + struct vtn_ssa_value * vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) { - if (variable_is_external_block(src->var)) + if (vtn_variable_is_external_block(src->var)) return vtn_block_load(b, src); else return vtn_local_load(b, vtn_access_chain_to_deref(b, src)); @@ -675,8 +658,8 @@ void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_access_chain *dest) { - if (variable_is_external_block(dest->var)) { - assert(dest->var->data.mode == nir_var_shader_storage); + if (vtn_variable_is_external_block(dest->var)) { + assert(dest->var->mode == vtn_variable_mode_ssbo); vtn_block_store(b, src, dest); } else { vtn_local_store(b, src, vtn_access_chain_to_deref(b, dest)); @@ -687,7 +670,8 @@ static void vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, struct vtn_access_chain *src) { - if (src->var->interface_type || dest->var->interface_type) { + if (vtn_variable_is_external_block(src->var) || + vtn_variable_is_external_block(dest->var)) { struct vtn_ssa_value *val = vtn_variable_load(b, src); vtn_variable_store(b, val, dest); } else { @@ -836,83 +820,107 @@ static void var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, const struct vtn_decoration *dec, void *void_var) { - assert(val->value_type == vtn_value_type_access_chain); - assert(val->access_chain->length == 0); - assert(val->access_chain->var == void_var); + struct vtn_variable *vtn_var = void_var; + + /* Handle decorations that apply to a vtn_variable as a whole */ + switch (dec->decoration) { + case SpvDecorationBinding: + vtn_var->binding = dec->literals[0]; + return; + case SpvDecorationDescriptorSet: + vtn_var->descriptor_set = dec->literals[0]; + return; + default: + break; + } + + /* Now we handle decorations that apply to a particular nir_variable */ + nir_variable *nir_var = vtn_var->var; + if (val->value_type == vtn_value_type_access_chain) { + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); + assert(member == -1); + } else { + assert(val->value_type == vtn_value_type_type); + assert(vtn_var->type == val->type); + if (member != -1) + nir_var = vtn_var->members[member]; + } + + if (nir_var == NULL) + return; - nir_variable *var = void_var; switch (dec->decoration) { case SpvDecorationRelaxedPrecision: break; /* FIXME: Do nothing with this for now. */ case SpvDecorationNoPerspective: - var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; break; case SpvDecorationFlat: - var->data.interpolation = INTERP_QUALIFIER_FLAT; + nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; break; case SpvDecorationCentroid: - var->data.centroid = true; + nir_var->data.centroid = true; break; case SpvDecorationSample: - var->data.sample = true; + nir_var->data.sample = true; break; case SpvDecorationInvariant: - var->data.invariant = true; + nir_var->data.invariant = true; break; case SpvDecorationConstant: - assert(var->constant_initializer != NULL); - var->data.read_only = true; + assert(nir_var->constant_initializer != NULL); + nir_var->data.read_only = true; break; case SpvDecorationNonWritable: - var->data.read_only = true; + nir_var->data.read_only = true; break; case SpvDecorationLocation: - var->data.location = dec->literals[0]; + nir_var->data.location = dec->literals[0]; + if (b->shader->stage == MESA_SHADER_FRAGMENT && + nir_var->data.mode == nir_var_shader_out) { + nir_var->data.location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + nir_var->data.mode == nir_var_shader_in) { + nir_var->data.location += VERT_ATTRIB_GENERIC0; + } else if (nir_var->data.mode == nir_var_shader_in || + nir_var->data.mode == nir_var_shader_out) { + nir_var->data.location += VARYING_SLOT_VAR0; + } else { + assert(!"Location must be on input or output variable"); + } + nir_var->data.explicit_location = true; break; case SpvDecorationComponent: - var->data.location_frac = dec->literals[0]; + nir_var->data.location_frac = dec->literals[0]; break; case SpvDecorationIndex: - var->data.explicit_index = true; - var->data.index = dec->literals[0]; - break; - case SpvDecorationBinding: - var->data.explicit_binding = true; - var->data.binding = dec->literals[0]; - break; - case SpvDecorationDescriptorSet: - var->data.descriptor_set = dec->literals[0]; + nir_var->data.explicit_index = true; + nir_var->data.index = dec->literals[0]; break; case SpvDecorationBuiltIn: { SpvBuiltIn builtin = dec->literals[0]; if (builtin == SpvBuiltInWorkgroupSize) { /* This shouldn't be a builtin. It's actually a constant. */ - var->data.mode = nir_var_global; - var->data.read_only = true; - - nir_constant *val = rzalloc(var, nir_constant); - val->value.u[0] = b->shader->info.cs.local_size[0]; - val->value.u[1] = b->shader->info.cs.local_size[1]; - val->value.u[2] = b->shader->info.cs.local_size[2]; - var->constant_initializer = val; + nir_var->data.mode = nir_var_global; + nir_var->data.read_only = true; + + nir_constant *c = rzalloc(nir_var, nir_constant); + c->value.u[0] = b->shader->info.cs.local_size[0]; + c->value.u[1] = b->shader->info.cs.local_size[1]; + c->value.u[2] = b->shader->info.cs.local_size[2]; + nir_var->constant_initializer = c; break; } - nir_variable_mode mode = var->data.mode; - vtn_get_builtin_location(b, builtin, &var->data.location, &mode); - var->data.explicit_location = true; - var->data.mode = mode; - if (mode == nir_var_shader_in || mode == nir_var_system_value) - var->data.read_only = true; + nir_variable_mode mode = nir_var->data.mode; + vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); + nir_var->data.explicit_location = true; + nir_var->data.mode = mode; if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - var->data.origin_upper_left = b->origin_upper_left; - - if (mode == nir_var_shader_out) - b->builtins[dec->literals[0]].out = var; - else - b->builtins[dec->literals[0]].in = var; + nir_var->data.origin_upper_left = b->origin_upper_left; break; } case SpvDecorationRowMajor: @@ -942,39 +950,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, } } -static nir_variable * -get_builtin_variable(struct vtn_builder *b, - nir_variable_mode mode, - const struct glsl_type *type, - SpvBuiltIn builtin) -{ - nir_variable *var; - if (mode == nir_var_shader_out) - var = b->builtins[builtin].out; - else - var = b->builtins[builtin].in; - - if (!var) { - int location; - vtn_get_builtin_location(b, builtin, &location, &mode); - - var = nir_variable_create(b->shader, mode, type, "builtin"); - - var->data.location = location; - var->data.explicit_location = true; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - var->data.origin_upper_left = b->origin_upper_left; - - if (mode == nir_var_shader_out) - b->builtins[builtin].out = var; - else - b->builtins[builtin].in = var; - } - - return var; -} - /* Tries to compute the size of an interface block based on the strides and * offsets that are provided to us in the SPIR-V source. */ @@ -1023,106 +998,72 @@ vtn_type_block_size(struct vtn_type *type) } } -static bool -is_interface_type(struct vtn_type *type) -{ - return type->block || type->buffer_block || - glsl_type_is_sampler(type->type) || - glsl_type_is_image(type->type); -} - void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpVariable: { - struct vtn_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - SpvStorageClass storage_class = w[3]; - - nir_variable *var = rzalloc(b->shader, nir_variable); + struct vtn_variable *var = rzalloc(b, struct vtn_variable); + var->type = vtn_value(b, w[1], vtn_value_type_type)->type; - var->type = type->type; - var->name = ralloc_strdup(var, val->name); + var->chain.var = var; + var->chain.length = 0; - struct vtn_type *interface_type; - if (is_interface_type(type)) { - interface_type = type; - } else if (glsl_type_is_array(type->type) && - is_interface_type(type->array_element)) { - interface_type = type->array_element; - } else { - interface_type = NULL; - } + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = &var->chain; - if (interface_type) - var->interface_type = interface_type->type; + struct vtn_type *without_array = var->type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; - switch (storage_class) { + nir_variable_mode nir_mode; + switch ((SpvStorageClass)w[3]) { case SpvStorageClassUniform: case SpvStorageClassUniformConstant: - if (interface_type && interface_type->buffer_block) { - var->data.mode = nir_var_shader_storage; + if (without_array->block) { + var->mode = vtn_variable_mode_ubo; + b->shader->info.num_ubos++; + } else if (without_array->buffer_block) { + var->mode = vtn_variable_mode_ssbo; b->shader->info.num_ssbos++; + } else if (glsl_type_is_image(without_array->type)) { + var->mode = vtn_variable_mode_image; + nir_mode = nir_var_uniform; + b->shader->info.num_images++; + } else if (glsl_type_is_sampler(without_array->type)) { + var->mode = vtn_variable_mode_sampler; + nir_mode = nir_var_uniform; + b->shader->info.num_textures++; } else { - /* UBO's and samplers */ - var->data.mode = nir_var_uniform; - var->data.read_only = true; - if (interface_type) { - if (glsl_type_is_image(interface_type->type)) { - b->shader->info.num_images++; - var->data.image.format = interface_type->image_format; - - switch (interface_type->access_qualifier) { - case SpvAccessQualifierReadOnly: - var->data.image.read_only = true; - break; - case SpvAccessQualifierWriteOnly: - var->data.image.write_only = true; - break; - default: - break; - } - } else if (glsl_type_is_sampler(interface_type->type)) { - b->shader->info.num_textures++; - } else { - assert(glsl_type_is_struct(interface_type->type)); - b->shader->info.num_ubos++; - } - } + assert(!"Invalid uniform variable type"); } break; case SpvStorageClassPushConstant: - assert(interface_type && interface_type->block); - var->data.mode = nir_var_uniform; - var->data.read_only = true; - var->data.descriptor_set = -1; - var->data.binding = -1; - - /* We have exactly one push constant block */ + var->mode = vtn_variable_mode_push_constant; assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(type) * 4; + b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; break; case SpvStorageClassInput: - var->data.mode = nir_var_shader_in; - var->data.read_only = true; + var->mode = vtn_variable_mode_input; + nir_mode = nir_var_shader_in; break; case SpvStorageClassOutput: - var->data.mode = nir_var_shader_out; + var->mode = vtn_variable_mode_output; + nir_mode = nir_var_shader_out; break; case SpvStorageClassPrivate: - var->data.mode = nir_var_global; - var->interface_type = NULL; + var->mode = vtn_variable_mode_global; + nir_mode = nir_var_global; break; case SpvStorageClassFunction: - var->data.mode = nir_var_local; - var->interface_type = NULL; + var->mode = vtn_variable_mode_local; + nir_mode = nir_var_local; break; case SpvStorageClassWorkgroup: - var->data.mode = nir_var_shared; + var->mode = vtn_variable_mode_workgroup; + nir_mode = nir_var_shared; break; case SpvStorageClassCrossWorkgroup: case SpvStorageClassGeneric: @@ -1131,63 +1072,128 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled variable storage class"); } + switch (var->mode) { + case vtn_variable_mode_local: + case vtn_variable_mode_global: + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + case vtn_variable_mode_workgroup: + /* For these, we create the variable normally */ + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->data.mode = nir_mode; + + switch (var->mode) { + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + var->var->interface_type = without_array->type; + break; + default: + var->var->interface_type = NULL; + break; + } + break; + + case vtn_variable_mode_input: + case vtn_variable_mode_output: { + /* For inputs and outputs, we immediately split structures. This + * is for a couple of reasons. For one, builtins may all come in + * a struct and we really want those split out into separate + * variables. For another, interpolation qualifiers can be + * applied to members of the top-level struct ane we need to be + * able to preserve that information. + */ + + int array_length = -1; + struct vtn_type *interface_type = var->type; + if (b->shader->stage == MESA_SHADER_GEOMETRY && + glsl_type_is_array(var->type->type)) { + /* In Geometry shaders (and some tessellation), inputs come + * in per-vertex arrays. However, some builtins come in + * non-per-vertex, hence the need for the is_array check. In + * any case, there are no non-builtin arrays allowed so this + * check should be sufficient. + */ + interface_type = var->type->array_element; + array_length = glsl_get_length(var->type->type); + } + + if (glsl_type_is_struct(interface_type->type)) { + /* It's a struct. Split it. */ + unsigned num_members = glsl_get_length(interface_type->type); + var->members = ralloc_array(b, nir_variable *, num_members); + + for (unsigned i = 0; i < num_members; i++) { + const struct glsl_type *mtype = interface_type->members[i]->type; + if (array_length >= 0) + mtype = glsl_array_type(mtype, array_length); + + var->members[i] = rzalloc(b->shader, nir_variable); + var->members[i]->name = + ralloc_asprintf(var->members[i], "%s.%d", val->name, i); + var->members[i]->type = mtype; + var->members[i]->interface_type = + interface_type->members[i]->type; + var->members[i]->data.mode = nir_mode; + } + } else { + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->interface_type = interface_type->type; + var->var->data.mode = nir_mode; + } + break; + case vtn_variable_mode_param: + unreachable("Not created through OpVariable"); + } + + case vtn_variable_mode_ubo: + case vtn_variable_mode_ssbo: + case vtn_variable_mode_push_constant: + /* These don't need actual variables. */ + break; + } + if (count > 4) { assert(count == 5); nir_constant *constant = vtn_value(b, w[4], vtn_value_type_constant)->constant; - var->constant_initializer = nir_constant_clone(constant, var); + var->var->constant_initializer = + nir_constant_clone(constant, var->var); } - val->access_chain = ralloc(b, struct vtn_access_chain); - val->access_chain->var = var; - val->access_chain->var_type = type; - val->access_chain->length = 0; - - /* We handle decorations first because decorations might give us - * location information. We use the data.explicit_location field to - * note that the location provided is the "final" location. If - * data.explicit_location == false, this means that it's relative to - * whatever the base location is. - */ vtn_foreach_decoration(b, val, var_decoration_cb, var); - if (!var->data.explicit_location) { - if (b->shader->stage == MESA_SHADER_FRAGMENT && - var->data.mode == nir_var_shader_out) { - var->data.location += FRAG_RESULT_DATA0; - } else if (b->shader->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in) { - var->data.location += VERT_ATTRIB_GENERIC0; - } else if (var->data.mode == nir_var_shader_in || - var->data.mode == nir_var_shader_out) { - var->data.location += VARYING_SLOT_VAR0; - } - } + if (var->mode == vtn_variable_mode_image || + var->mode == vtn_variable_mode_sampler) { + /* XXX: We still need the binding information in the nir_variable + * for these. We should fix that. + */ + var->var->data.binding = var->binding; + var->var->data.descriptor_set = var->descriptor_set; - /* XXX: Work around what appears to be a glslang bug. While the - * SPIR-V spec doesn't say that setting a descriptor set on a push - * constant is invalid, it certainly makes no sense. However, at - * some point, glslang started setting descriptor set 0 on push - * constants for some unknown reason. Hopefully this can be removed - * at some point in the future. - */ - if (storage_class == SpvStorageClassPushConstant) { - var->data.descriptor_set = -1; - var->data.binding = -1; + if (var->mode == vtn_variable_mode_image) + var->var->data.image.format = without_array->image_format; } - /* Interface block variables aren't actually going to be referenced - * by the generated NIR, so we don't put them in the list - */ - if (var->interface_type && glsl_type_is_struct(var->interface_type)) - break; - - if (var->data.mode == nir_var_local) { - nir_function_impl_add_variable(b->impl, var); + if (var->mode == vtn_variable_mode_local) { + assert(var->members == NULL && var->var != NULL); + nir_function_impl_add_variable(b->impl, var->var); + } else if (var->var) { + nir_shader_add_variable(b->shader, var->var); + } else if (var->members) { + unsigned count = glsl_get_length(without_array->type); + for (unsigned i = 0; i < count; i++) { + assert(var->members[i]->data.mode != nir_var_local); + nir_shader_add_variable(b->shader, var->members[i]); + } } else { - nir_shader_add_variable(b->shader, var); + assert(var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_push_constant); } - break; } @@ -1248,9 +1254,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_access_chain *src = vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - if (src->var->interface_type && - (glsl_type_is_sampler(src->var->interface_type) || - glsl_type_is_image(src->var->interface_type))) { + if (src->var->mode == vtn_variable_mode_image || + src->var->mode == vtn_variable_mode_sampler) { vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; return; } @@ -1272,8 +1277,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, struct vtn_access_chain *chain = vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - const uint32_t offset = chain->var_type->offsets[w[4]]; - const uint32_t stride = chain->var_type->members[w[4]]->stride; + const uint32_t offset = chain->var->type->offsets[w[4]]; + const uint32_t stride = chain->var->type->members[w[4]]->stride; unsigned chain_idx; struct vtn_type *type; -- cgit v1.2.3 From 5d9a6fd52608df7e3fcdaeb441f5edd1c75eb811 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 08:53:04 -0800 Subject: vtn/variables: Compact local loads/stores into one function This is similar to what we did for block loads/stores. --- src/glsl/nir/spirv/vtn_variables.c | 141 +++++++++++-------------------------- 1 file changed, 42 insertions(+), 99 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index e15fe6ef2a8..a9c2857f4cf 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -123,123 +123,64 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) return deref_var; } -static struct vtn_ssa_value * -_vtn_variable_load(struct vtn_builder *b, - nir_deref_var *src_deref, nir_deref *src_deref_tail) +static void +_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, + nir_deref *tail, struct vtn_ssa_value *inout) { - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = src_deref_tail->type; - /* The deref tail may contain a deref to select a component of a vector (in * other words, it might not be an actual tail) so we have to save it away * here since we overwrite it later. */ - nir_deref *old_child = src_deref_tail->child; + nir_deref *old_child = tail->child; - if (glsl_type_is_vector_or_scalar(val->type)) { + if (glsl_type_is_vector_or_scalar(tail->type)) { /* Terminate the deref chain in case there is one more link to pick * off a component of the vector. */ - src_deref_tail->child = NULL; + tail->child = NULL; - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = - nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); - load->num_components = glsl_get_vector_elements(val->type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_intrinsic_op op = load ? nir_intrinsic_load_var : + nir_intrinsic_store_var; - nir_builder_instr_insert(&b->nb, &load->instr); + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); + intrin->num_components = glsl_get_vector_elements(tail->type); - if (src_deref->var->data.mode == nir_var_uniform && - glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + if (load) { + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + intrin->num_components, NULL); + inout->def = &intrin->dest.ssa; } else { - val->def = &load->dest.ssa; - } - } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(val->type)) { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - - nir_deref_array *deref = nir_deref_array_create(b); - deref->deref_array_type = nir_deref_array_type_direct; - deref->deref.type = glsl_get_array_element(val->type); - src_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->base_offset = i; - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + intrin->const_index[0] = (1 << intrin->num_components) - 1; + intrin->src[0] = nir_src_for_ssa(inout->def); } - } else { - assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - - nir_deref_struct *deref = nir_deref_struct_create(b, 0); - src_deref_tail->child = &deref->deref; - for (unsigned i = 0; i < elems; i++) { - deref->index = i; - deref->deref.type = glsl_get_struct_field(val->type, i); - val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); - } - } - src_deref_tail->child = old_child; - - return val; -} - -static void -_vtn_variable_store(struct vtn_builder *b, - nir_deref_var *dest_deref, nir_deref *dest_deref_tail, - struct vtn_ssa_value *src) -{ - nir_deref *old_child = dest_deref_tail->child; - - if (glsl_type_is_vector_or_scalar(src->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - dest_deref_tail->child = NULL; - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->variables[0] = - nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); - store->num_components = glsl_get_vector_elements(src->type); - store->const_index[0] = (1 << store->num_components) - 1; - store->src[0] = nir_src_for_ssa(src->def); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(src->type)) { - unsigned elems = glsl_get_length(src->type); - - nir_deref_array *deref = nir_deref_array_create(b); - deref->deref_array_type = nir_deref_array_type_direct; - deref->deref.type = glsl_get_array_element(src->type); - dest_deref_tail->child = &deref->deref; + nir_builder_instr_insert(&b->nb, &intrin->instr); + } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + unsigned elems = glsl_get_length(tail->type); + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->deref.type = glsl_get_array_element(tail->type); + tail->child = &deref_arr->deref; for (unsigned i = 0; i < elems; i++) { - deref->base_offset = i; - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + deref_arr->base_offset = i; + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } else { - assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(src->type); - - nir_deref_struct *deref = nir_deref_struct_create(b, 0); - dest_deref_tail->child = &deref->deref; + assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(tail->type); + nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); + tail->child = &deref_struct->deref; for (unsigned i = 0; i < elems; i++) { - deref->index = i; - deref->deref.type = glsl_get_struct_field(src->type, i); - _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + deref_struct->index = i; + deref_struct->deref.type = glsl_get_struct_field(tail->type, i); + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); } } - dest_deref_tail->child = old_child; + tail->child = old_child; } nir_deref_var * @@ -270,7 +211,8 @@ struct vtn_ssa_value * vtn_local_load(struct vtn_builder *b, nir_deref_var *src) { nir_deref *src_tail = get_deref_tail(src); - struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); + _vtn_local_load_store(b, true, src, src_tail, val); if (src_tail->child) { nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); @@ -293,7 +235,8 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref *dest_tail = get_deref_tail(dest); if (dest_tail->child) { - struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); + _vtn_local_load_store(b, true, dest, dest_tail, val); nir_deref_array *deref = nir_deref_as_array(dest_tail->child); assert(deref->deref.child == NULL); if (deref->deref_array_type == nir_deref_array_type_direct) @@ -302,9 +245,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, else val->def = vtn_vector_insert_dynamic(b, val->def, src->def, deref->indirect.ssa); - _vtn_variable_store(b, dest, dest_tail, val); + _vtn_local_load_store(b, false, dest, dest_tail, val); } else { - _vtn_variable_store(b, dest, dest_tail, src); + _vtn_local_load_store(b, false, dest, dest_tail, src); } } -- cgit v1.2.3 From a8af0f536cc043edc20ad13e93e820c88f9cff59 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 10:20:50 -0800 Subject: nir/spirv: Rework access chains a bit to allow for literals This makes them much easier to construct because you can also just specify a literal number and it doesn't have to be a valid SPIR-V id. --- src/glsl/nir/spirv/vtn_private.h | 12 ++++- src/glsl/nir/spirv/vtn_variables.c | 100 ++++++++++++++++++++++++------------- 2 files changed, 75 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index 682bff5e8bb..e0f4b220c4c 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -236,13 +236,23 @@ struct vtn_type { struct vtn_variable; +enum vtn_access_mode { + vtn_access_mode_id, + vtn_access_mode_literal, +}; + +struct vtn_access_link { + enum vtn_access_mode mode; + uint32_t id; +}; + struct vtn_access_chain { struct vtn_variable *var; uint32_t length; /* Struct elements and array offsets */ - uint32_t ids[0]; + struct vtn_access_link link[0]; }; enum vtn_variable_mode { diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index a9c2857f4cf..d41f5cd3dec 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -27,6 +27,39 @@ #include "vtn_private.h" +static struct vtn_access_chain * +vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, + unsigned new_ids) +{ + struct vtn_access_chain *chain; + + unsigned new_len = old->length + new_ids; + chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); + + chain->var = old->var; + chain->length = new_len; + + for (unsigned i = 0; i < old->length; i++) + chain->link[i] = old->link[i]; + + return chain; +} + +static nir_ssa_def * +vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, + unsigned stride) +{ + assert(stride > 0); + if (link.mode == vtn_access_mode_literal) { + return nir_imm_int(&b->nb, link.id * stride); + } else if (stride == 1) { + return vtn_ssa_value(b, link.id)->def; + } else { + return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, + nir_imm_int(&b->nb, stride)); + } +} + /* Crawls a chain of array derefs and rewrites the types so that the * lengths stay the same but the terminal type is the one given by * tail_type. This is useful for split structures. @@ -60,7 +93,6 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) nir_variable **members = chain->var->members; for (unsigned i = 0; i < chain->length; i++) { - struct vtn_value *idx_val = vtn_untyped_value(b, chain->ids[i]); enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); switch (base_type) { case GLSL_TYPE_UINT: @@ -81,15 +113,15 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) deref_arr->deref.type = deref_type->type; - if (idx_val->value_type == vtn_value_type_constant) { + if (chain->link[i].mode == vtn_access_mode_literal) { deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->base_offset = idx_val->constant->value.u[0]; + deref_arr->base_offset = chain->link[i].id; } else { - assert(idx_val->value_type == vtn_value_type_ssa); - assert(glsl_type_is_scalar(idx_val->ssa->type)); + assert(chain->link[i].mode == vtn_access_mode_id); deref_arr->deref_array_type = nir_deref_array_type_indirect; deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); } tail->child = &deref_arr->deref; tail = tail->child; @@ -97,8 +129,8 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) } case GLSL_TYPE_STRUCT: { - assert(idx_val->value_type == vtn_value_type_constant); - unsigned idx = idx_val->constant->value.u[0]; + assert(chain->link[i].mode == vtn_access_mode_literal); + unsigned idx = chain->link[i].id; deref_type = deref_type->members[idx]; if (members) { /* This is a pre-split structure. */ @@ -265,7 +297,7 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, nir_ssa_def *array_index; if (glsl_type_is_array(chain->var->type->type)) { assert(chain->length > 0); - array_index = vtn_ssa_value(b, chain->ids[0])->def; + array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); *chain_idx = 1; *type = chain->var->type->array_element; } else { @@ -315,9 +347,8 @@ vtn_access_chain_to_offset(struct vtn_builder *b, case GLSL_TYPE_ARRAY: offset = nir_iadd(&b->nb, offset, - nir_imul(&b->nb, - vtn_ssa_value(b, chain->ids[idx])->def, - nir_imm_int(&b->nb, type->stride))); + vtn_access_link_as_ssa(b, chain->link[idx], + type->stride)); if (glsl_type_is_vector(type->type)) { /* This had better be the tail */ @@ -330,10 +361,8 @@ vtn_access_chain_to_offset(struct vtn_builder *b, break; case GLSL_TYPE_STRUCT: { - struct vtn_value *member_val = - vtn_value(b, chain->ids[idx], vtn_value_type_constant); - unsigned member = member_val->constant->value.u[0]; - + assert(chain->link[idx].mode == vtn_access_mode_literal); + unsigned member = chain->link[idx].id; offset = nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[member])); type = type->members[member]; @@ -448,16 +477,15 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, } else if (type->row_major) { /* Row-major but with an access chiain. */ nir_ssa_def *col_offset = - nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, - nir_imm_int(&b->nb, type->array_element->stride)); + vtn_access_link_as_ssa(b, chain->link[chain_idx], + type->array_element->stride); offset = nir_iadd(&b->nb, offset, col_offset); if (chain_idx + 1 < chain->length) { /* Picking off a single element */ nir_ssa_def *row_offset = - nir_imul(&b->nb, - vtn_ssa_value(b, chain->ids[chain_idx + 1])->def, - nir_imm_int(&b->nb, type->stride)); + vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], + type->stride); offset = nir_iadd(&b->nb, offset, row_offset); _vtn_load_store_tail(b, op, load, index, offset, inout, glsl_scalar_type(base_type)); @@ -487,8 +515,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, } else { /* Column-major with a deref. Fall through to array case. */ nir_ssa_def *col_offset = - nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, - nir_imm_int(&b->nb, type->stride)); + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); offset = nir_iadd(&b->nb, offset, col_offset); _vtn_block_load_store(b, op, load, index, offset, @@ -502,8 +529,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, } else { /* Single component of a vector. Fall through to array case. */ nir_ssa_def *elem_offset = - nir_imul(&b->nb, vtn_ssa_value(b, chain->ids[chain_idx])->def, - nir_imm_int(&b->nb, type->stride)); + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); offset = nir_iadd(&b->nb, offset, elem_offset); _vtn_block_load_store(b, op, load, index, offset, NULL, 0, @@ -1158,18 +1184,20 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, base = base_val->access_chain; } - uint32_t new_len = base->length + count - 4; - chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->ids[0])); - - *chain = *base; + chain = vtn_access_chain_extend(b, base, count - 4); - chain->length = new_len; - unsigned idx = 0; - for (int i = 0; i < base->length; i++) - chain->ids[idx++] = base->ids[i]; - - for (int i = 4; i < count; i++) - chain->ids[idx++] = w[i]; + unsigned idx = base->length; + for (int i = 4; i < count; i++) { + struct vtn_value *link_val = vtn_untyped_value(b, w[i]); + if (link_val->value_type == vtn_value_type_constant) { + chain->link[idx].mode = vtn_access_mode_literal; + chain->link[idx].id = link_val->constant->value.u[0]; + } else { + chain->link[idx].mode = vtn_access_mode_id; + chain->link[idx].id = w[i]; + } + idx++; + } if (base_val->value_type == vtn_value_type_sampled_image) { struct vtn_value *val = -- cgit v1.2.3 From 7e5e64c8a9612de307ae6d17c94853f6106626d6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 10:58:03 -0800 Subject: nir/spirv: Make vectors a proper array time with an array_element This makes dealing with single-component derefs easier --- src/glsl/nir/spirv/spirv_to_nir.c | 10 +++------- src/glsl/nir/spirv/vtn_variables.c | 20 +++----------------- 2 files changed, 6 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 7d8699b1c00..7d9d0981269 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -394,21 +394,17 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) if (src->is_builtin) dest->builtin = src->builtin; - if (!glsl_type_is_vector_or_scalar(src->type)) { + if (!glsl_type_is_scalar(src->type)) { switch (glsl_get_base_type(src->type)) { - case GLSL_TYPE_ARRAY: - dest->array_element = src->array_element; - dest->stride = src->stride; - break; - case GLSL_TYPE_INT: case GLSL_TYPE_UINT: case GLSL_TYPE_BOOL: case GLSL_TYPE_FLOAT: case GLSL_TYPE_DOUBLE: - /* matrices */ + case GLSL_TYPE_ARRAY: dest->row_major = src->row_major; dest->stride = src->stride; + dest->array_element = src->array_element; break; case GLSL_TYPE_STRUCT: { diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index d41f5cd3dec..edd2263b167 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -101,16 +101,9 @@ vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) case GLSL_TYPE_DOUBLE: case GLSL_TYPE_BOOL: case GLSL_TYPE_ARRAY: { - nir_deref_array *deref_arr = nir_deref_array_create(b); - if (base_type == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(deref_type->type)) { - deref_type = deref_type->array_element; - } else { - assert(glsl_type_is_vector(deref_type->type)); - deref_type = ralloc(b, struct vtn_type); - deref_type->type = glsl_scalar_type(base_type); - } + deref_type = deref_type->array_element; + nir_deref_array *deref_arr = nir_deref_array_create(b); deref_arr->deref.type = deref_type->type; if (chain->link[i].mode == vtn_access_mode_literal) { @@ -350,14 +343,7 @@ vtn_access_chain_to_offset(struct vtn_builder *b, vtn_access_link_as_ssa(b, chain->link[idx], type->stride)); - if (glsl_type_is_vector(type->type)) { - /* This had better be the tail */ - assert(idx == chain->length - 1); - type = rzalloc(b, struct vtn_type); - type->type = glsl_scalar_type(base_type); - } else { - type = type->array_element; - } + type = type->array_element; break; case GLSL_TYPE_STRUCT: { -- cgit v1.2.3 From 514507825cd683ca2ebe9d25446dfc48b07bb9f6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 10:58:53 -0800 Subject: nir/spirv: Improve handling of variable loads and copies Before we were asuming that a deref would either be something in a block or something that we could pass off to NIR directly. However, it is possible that someone would choose to load/store/copy a split structure all in one go. We need to be able to handle that. --- src/glsl/nir/spirv/vtn_variables.c | 154 ++++++++++++++++++++++++++++++------- 1 file changed, 128 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index edd2263b167..8171a5fc156 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -60,6 +60,22 @@ vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, } } +static struct vtn_type * +vtn_access_chain_tail_type(struct vtn_builder *b, + struct vtn_access_chain *chain) +{ + struct vtn_type *type = chain->var->type; + for (unsigned i = 0; i < chain->length; i++) { + if (glsl_type_is_struct(type->type)) { + assert(chain->link[i].mode == vtn_access_mode_literal); + type = type->members[chain->link[i].id]; + } else { + type = type->array_element; + } + } + return type; +} + /* Crawls a chain of array derefs and rewrites the types so that the * lengths stay the same but the terminal type is the one given by * tail_type. This is useful for split structures. @@ -332,10 +348,8 @@ vtn_access_chain_to_offset(struct vtn_builder *b, case GLSL_TYPE_DOUBLE: case GLSL_TYPE_BOOL: /* Some users may not want matrix or vector derefs */ - if (stop_at_matrix) { - idx++; + if (stop_at_matrix) goto end; - } /* Fall through */ case GLSL_TYPE_ARRAY: @@ -413,7 +427,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, struct vtn_access_chain *chain, unsigned chain_idx, struct vtn_type *type, struct vtn_ssa_value **inout) { - if (chain_idx >= chain->length) + if (chain && chain_idx >= chain->length) chain = NULL; if (load && chain == NULL && *inout == NULL) @@ -600,13 +614,69 @@ vtn_variable_is_external_block(struct vtn_variable *var) var->mode == vtn_variable_mode_push_constant; } +static void +_vtn_variable_load_store(struct vtn_builder *b, bool load, + struct vtn_access_chain *chain, + struct vtn_type *tail_type, + struct vtn_ssa_value **inout) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + if (load) { + *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); + } else { + vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); + } + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_chain = + vtn_access_chain_extend(b, chain, 1); + new_chain->link[chain->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + if (load) { + assert(*inout == NULL); + *inout = rzalloc(b, struct vtn_ssa_value); + (*inout)->type = tail_type->type; + (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); + } + for (unsigned i = 0; i < elems; i++) { + new_chain->link[chain->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_load_store(b, load, new_chain, elem_type, + &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + struct vtn_ssa_value * vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) { - if (vtn_variable_is_external_block(src->var)) + if (vtn_variable_is_external_block(src->var)) { return vtn_block_load(b, src); - else - return vtn_local_load(b, vtn_access_chain_to_deref(b, src)); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + struct vtn_ssa_value *val = NULL; + _vtn_variable_load_store(b, true, src, tail_type, &val); + return val; + } } void @@ -617,7 +687,50 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, assert(dest->var->mode == vtn_variable_mode_ssbo); vtn_block_store(b, src, dest); } else { - vtn_local_store(b, src, vtn_access_chain_to_deref(b, dest)); + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); + _vtn_variable_load_store(b, false, dest, tail_type, &src); + } +} + +static void +_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src, struct vtn_type *tail_type) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + vtn_variable_store(b, vtn_variable_load(b, src), dest); + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_src, *new_dest; + new_src = vtn_access_chain_extend(b, src, 1); + new_dest = vtn_access_chain_extend(b, dest, 1); + new_src->link[src->length].mode = vtn_access_mode_literal; + new_dest->link[dest->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + for (unsigned i = 0; i < elems; i++) { + new_src->link[src->length].id = i; + new_dest->link[dest->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_copy(b, new_dest, new_src, elem_type); + } + return; + } + + default: + unreachable("Invalid access chain type"); } } @@ -625,24 +738,13 @@ static void vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, struct vtn_access_chain *src) { - if (vtn_variable_is_external_block(src->var) || - vtn_variable_is_external_block(dest->var)) { - struct vtn_ssa_value *val = vtn_variable_load(b, src); - vtn_variable_store(b, val, dest); - } else { - /* TODO: Handle single components of vectors */ - nir_deref_var *src_deref = vtn_access_chain_to_deref(b, src); - nir_deref_var *dest_deref = vtn_access_chain_to_deref(b, dest); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = - nir_deref_as_var(nir_copy_deref(copy, &dest_deref->deref)); - copy->variables[1] = - nir_deref_as_var(nir_copy_deref(copy, &src_deref->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); - } + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); + + /* TODO: At some point, we should add a special-case for when we can + * just emit a copy_var intrinsic. + */ + _vtn_variable_copy(b, dest, src, tail_type); } static void -- cgit v1.2.3 From d8c0e0805b3693f279d31b190173518231965fbc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 12:41:45 -0800 Subject: nir/spirv: Properly assign locations to split structures --- src/glsl/nir/spirv/spirv_to_nir.c | 1 + src/glsl/nir/spirv/vtn_private.h | 3 ++ src/glsl/nir/spirv/vtn_variables.c | 60 +++++++++++++++++++++++++++----------- 3 files changed, 47 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 7d9d0981269..32188e5f364 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -585,6 +585,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type = rzalloc(b, struct vtn_type); val->type->is_builtin = false; + val->type->val = val; switch (opcode) { case SpvOpTypeVoid: diff --git a/src/glsl/nir/spirv/vtn_private.h b/src/glsl/nir/spirv/vtn_private.h index e0f4b220c4c..3840d8c4b65 100644 --- a/src/glsl/nir/spirv/vtn_private.h +++ b/src/glsl/nir/spirv/vtn_private.h @@ -196,6 +196,9 @@ struct vtn_ssa_value { struct vtn_type { const struct glsl_type *type; + /* The value that declares this type. Used for finding decorations */ + struct vtn_value *val; + /* for matrices, whether the matrix is stored row-major */ bool row_major; diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index 8171a5fc156..5c372774126 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -887,6 +887,43 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationDescriptorSet: vtn_var->descriptor_set = dec->literals[0]; return; + + case SpvDecorationLocation: { + unsigned location = dec->literals[0]; + bool is_vertex_input; + if (b->shader->stage == MESA_SHADER_FRAGMENT && + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + vtn_var->mode == vtn_variable_mode_input) { + is_vertex_input = true; + location += VERT_ATTRIB_GENERIC0; + } else if (vtn_var->mode == vtn_variable_mode_input || + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += VARYING_SLOT_VAR0; + } else { + assert(!"Location must be on input or output variable"); + } + + if (vtn_var->var) { + vtn_var->var->data.location = location; + vtn_var->var->data.explicit_location = true; + } else { + assert(vtn_var->members); + unsigned length = glsl_get_length(vtn_var->type->type); + for (unsigned i = 0; i < length; i++) { + vtn_var->members[i]->data.location = location; + vtn_var->members[i]->data.explicit_location = true; + location += + glsl_count_attribute_slots(vtn_var->members[i]->interface_type, + is_vertex_input); + } + } + return; + } + default: break; } @@ -899,7 +936,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, assert(member == -1); } else { assert(val->value_type == vtn_value_type_type); - assert(vtn_var->type == val->type); if (member != -1) nir_var = vtn_var->members[member]; } @@ -932,22 +968,6 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, case SpvDecorationNonWritable: nir_var->data.read_only = true; break; - case SpvDecorationLocation: - nir_var->data.location = dec->literals[0]; - if (b->shader->stage == MESA_SHADER_FRAGMENT && - nir_var->data.mode == nir_var_shader_out) { - nir_var->data.location += FRAG_RESULT_DATA0; - } else if (b->shader->stage == MESA_SHADER_VERTEX && - nir_var->data.mode == nir_var_shader_in) { - nir_var->data.location += VERT_ATTRIB_GENERIC0; - } else if (nir_var->data.mode == nir_var_shader_in || - nir_var->data.mode == nir_var_shader_out) { - nir_var->data.location += VARYING_SLOT_VAR0; - } else { - assert(!"Location must be on input or output variable"); - } - nir_var->data.explicit_location = true; - break; case SpvDecorationComponent: nir_var->data.location_frac = dec->literals[0]; break; @@ -1201,7 +1221,13 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, var->var->interface_type = interface_type->type; var->var->data.mode = nir_mode; } + + /* For inputs and outputs, we need to grab locations and builtin + * information from the interface type. + */ + vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); break; + case vtn_variable_mode_param: unreachable("Not created through OpVariable"); } -- cgit v1.2.3 From 13858a1c1aef407777a839b8a342ebe56c9b4c52 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 16:00:47 -0800 Subject: nir/lower_system_values: Use the correct invication id for CS --- src/glsl/nir/nir_lower_system_values.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index 69d0554fbca..aeaa3107c1f 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -89,7 +89,7 @@ convert_block(nir_block *block, void *void_state) * gl_WorkGroupSize.x + gl_LocalInvocationID.x" */ nir_ssa_def *local_id = - nir_load_system_value(b, nir_intrinsic_load_invocation_id, 0); + nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); unsigned stride_y = b->shader->info.cs.local_size[0]; unsigned stride_z = b->shader->info.cs.local_size[0] * -- cgit v1.2.3 From 13aaf9004809b400a22a0cb1d0e85afdff169189 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 21 Jan 2016 16:03:06 -0800 Subject: nir/spirv: Ignore cull distance --- src/glsl/nir/spirv/vtn_variables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index 5c372774126..8fea43b2e96 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -771,7 +771,7 @@ vtn_get_builtin_location(struct vtn_builder *b, break; case SpvBuiltInCullDistance: /* XXX figure this out */ - unreachable("unhandled builtin"); + break; case SpvBuiltInVertexIndex: *location = SYSTEM_VALUE_VERTEX_ID; set_mode_system_value(mode); -- cgit v1.2.3 From 2e54381622357622221f0ca936462ea0a6a69a0f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 21 Jan 2016 14:14:07 -0800 Subject: anv/batch_chain: Fix patching up of block pool relocations on Gen8+. Relocations are 64 bits on Gen8+. Most CTS tests that send non-trivial work to the GPU would fail when run from a single deqp-vk invocation because they were effectively relying on reloc presumed offsets to be wrong so the kernel would come and apply relocations correctly. --- src/vulkan/anv_batch_chain.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 89215fe6992..ee6e39d3a75 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -898,6 +898,24 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, } } +static uint64_t +read_reloc(const struct anv_device *device, const void *p) +{ + if (device->info.gen >= 8) + return *(uint64_t *)p; + else + return *(uint32_t *)p; +} + +static void +write_reloc(const struct anv_device *device, void *p, uint64_t v) +{ + if (device->info.gen >= 8) + *(uint64_t *)p = v; + else + *(uint32_t *)p = v; +} + static void adjust_relocations_from_block_pool(struct anv_block_pool *pool, struct anv_reloc_list *relocs) @@ -911,13 +929,14 @@ adjust_relocations_from_block_pool(struct anv_block_pool *pool, * block pool. Then the kernel will update it for us if needed. */ assert(relocs->relocs[i].offset < pool->state.end); - uint32_t *reloc_data = pool->map + relocs->relocs[i].offset; + const void *p = pool->map + relocs->relocs[i].offset; /* We're reading back the relocated value from potentially incoherent * memory here. However, any change to the value will be from the kernel * writing out relocations, which will keep the CPU cache up to date. */ - relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta; + relocs->relocs[i].presumed_offset = + read_reloc(pool->device, p) - relocs->relocs[i].delta; /* All of the relocations from this block pool to other BO's should * have been emitted relative to the surface block pool center. We @@ -958,9 +977,9 @@ adjust_relocations_to_block_pool(struct anv_block_pool *pool, * use by the GPU at the moment. */ assert(relocs->relocs[i].offset < from_bo->size); - uint32_t *reloc_data = from_bo->map + relocs->relocs[i].offset; - *reloc_data = relocs->relocs[i].presumed_offset + - relocs->relocs[i].delta; + write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, + relocs->relocs[i].presumed_offset + + relocs->relocs[i].delta); } } -- cgit v1.2.3 From 63d999b76206c58b6a8938ff0816535cf76fb925 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 07:17:06 -0800 Subject: isl/tests: Fix build isl_device_init() acquired a new param for bit6 swizzling. --- src/isl/tests/isl_surf_get_image_offset_test.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index 78362be4310..63c12cb83cf 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -105,7 +105,8 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(void) bool ok; struct isl_device dev; - isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID)); + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); struct isl_surf surf; ok = isl_surf_init(&dev, &surf, @@ -146,7 +147,8 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(void) bool ok; struct isl_device dev; - isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID)); + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); struct isl_surf surf; ok = isl_surf_init(&dev, &surf, -- cgit v1.2.3 From fbc87ce4be234548c259ab6bb7829c245783c5c9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 07:17:34 -0800 Subject: isl/tests: Remove copy-paste assertion --- src/isl/tests/isl_surf_get_image_offset_test.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index 63c12cb83cf..a794e537809 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -167,7 +167,6 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(void) t_assert_image_alignment_el(&surf, 4, 4, 1); t_assert_image_alignment_sa(&surf, 4, 4, 1); - t_assert_image_alignment_sa(&surf, 4, 4, 1); t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540); t_assert(isl_surf_get_array_pitch_sa_rows(&surf) >= 1540); -- cgit v1.2.3 From 891ed5ca8c794dac3cffd7a91e8c3c7f2e02fc93 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 08:31:29 -0800 Subject: isl/tests: Add test for bdw 3d surface test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0 Currently fails. --- src/isl/tests/isl_surf_get_image_offset_test.c | 65 +++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index a794e537809..4b30f913183 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -28,6 +28,7 @@ #include "brw_device_info.h" #include "isl.h" +#include "isl_priv.h" #define BDW_GT2_DEVID 0x161a @@ -99,6 +100,25 @@ t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len); } +static void +t_assert_gen4_3d_layer(const struct isl_surf *surf, + uint32_t level, + uint32_t aligned_width, + uint32_t aligned_height, + uint32_t depth, + uint32_t horiz_layers, + uint32_t vert_layers, + uint32_t *base_y) +{ + for (uint32_t z = 0; z < depth; ++z) { + t_assert_offset(surf, level, 0, z, + aligned_width * (z % horiz_layers), + *base_y + aligned_height * (z / horiz_layers)); + } + + *base_y += aligned_height * vert_layers; +} + static void test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(void) { @@ -187,12 +207,55 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(void) } } +static void +test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_3D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 256, + .height = 256, + .depth = 256, + .levels = 9, + .array_len = 1, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 74916); + t_assert(isl_surf_get_array_pitch_sa_rows(&surf) == + isl_surf_get_array_pitch_el_rows(&surf)); + + uint32_t base_y = 0; + + t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y); + t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y); + t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y); + t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y); + t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y); +} + int main(void) { /* FINISHME: Add tests for npot sizes */ /* FINISHME: Add tests for 1D surfaces */ - /* FINISHME: Add tests for 3D surfaces */ test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(); test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(); + test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(); } -- cgit v1.2.3 From f9d4d0954992e367992fca3c017b5720cb213ded Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 08:32:00 -0800 Subject: isl: Fix isl_surf_get_image_offset_sa for gen4_3d layout Bug found by unit test test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0. --- src/isl/isl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index bb3d59576e0..40663ca18af 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1200,10 +1200,9 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf, const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); const uint32_t max_layers_horiz = MIN(level_d, 1u << level); - const uint32_t max_layers_vert = isl_align_div(level_d, 1u << level); x += level_w * (logical_z_offset_px % max_layers_horiz); - y += level_h * (logical_z_offset_px / max_layers_vert); + y += level_h * (logical_z_offset_px / max_layers_horiz); *x_offset_sa = x; *y_offset_sa = y; -- cgit v1.2.3 From 65f3c420c3e15c30ab8f87881ae9302a9d4f932b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 09:46:48 -0800 Subject: isl/tests: Give tests less cryptic names --- src/isl/tests/isl_surf_get_image_offset_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index 4b30f913183..525180ea204 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -120,7 +120,7 @@ t_assert_gen4_3d_layer(const struct isl_surf *surf, } static void -test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(void) +test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) { bool ok; @@ -162,7 +162,7 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(void) } static void -test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(void) +test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) { bool ok; @@ -255,7 +255,7 @@ int main(void) /* FINISHME: Add tests for npot sizes */ /* FINISHME: Add tests for 1D surfaces */ - test_bdw_2d_r8g8b8a8_unorm_512x512_a1_s1_noaux_y0(); - test_bdw_2d_r8g8b8a8_unorm_1024x1024_a6_s1_noaux_y0(); + test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(); + test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(); test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(); } -- cgit v1.2.3 From d9abbbe0d871f0467dbe3b4265a15960dc6f20b1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 09:48:11 -0800 Subject: isl: Fix indentation of isl_format_layout comment --- src/isl/isl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 4b3f179303d..9b99e00ec1c 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -555,11 +555,11 @@ struct isl_channel_layout { }; /** - * Each format has 3D block extent (width, height, depth). The block extent - * of compressed formats is that of the format's compression block. For - * example, the block extent of ISL_FORMAT_ETC2_RGB8 is (w=4, h=4, d=1). - * The block extent of uncompressed pixel formats, such as - * ISL_FORMAT_R8G8B8A8_UNORM, is is (w=1, h=1, d=1). + * Each format has 3D block extent (width, height, depth). The block extent of + * compressed formats is that of the format's compression block. For example, + * the block extent of ISL_FORMAT_ETC2_RGB8 is (w=4, h=4, d=1). The block + * extent of uncompressed pixel formats, such as ISL_FORMAT_R8G8B8A8_UNORM, is + * is (w=1, h=1, d=1). */ struct isl_format_layout { enum isl_format format; -- cgit v1.2.3 From 16780632c2d970d7a7029f067401aeaed2cd9121 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 11:09:24 -0800 Subject: i965/nir: Temporariliy disable mul+add fusion We don't want to do this in the long-run but it's needed for passing the NoContraction tests at the moment. Eventually, we want to plumb this through NIR properly. --- src/mesa/drivers/dri/i965/brw_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 0985c2cab4f..d983f58765e 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -535,7 +535,7 @@ brw_postprocess_nir(nir_shader *nir, if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ - OPT(brw_nir_opt_peephole_ffma); +// OPT(brw_nir_opt_peephole_ffma); } OPT(nir_opt_algebraic_late); -- cgit v1.2.3 From d533c3796d0cfcd05ad13ecae9b0740be6fa8833 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 21 Jan 2016 19:21:34 -0800 Subject: anv/state: Factor out surface state calculation from genX_image_view_init. Some fields of the surface state template were dependent on the surface type, which is dependent on the usage of the image view, which wasn't known until the bottom of the function after the template had been constructed. This caused failures in all image load/store CTS tests using cubemaps. Refactor the surface state calculation into a function that is called once for each required usage. --- src/vulkan/gen7_state.c | 100 +++++++++++++++++++++++++++--------------------- src/vulkan/gen8_state.c | 90 +++++++++++++++++++++++++------------------ 2 files changed, 109 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 0c830d5c8ed..c722ff06f9f 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -148,22 +148,22 @@ static const uint8_t anv_valign[] = { [4] = VALIGN_4, }; -GENX_FUNC(GEN7, GEN75) void -genX(image_view_init)(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) +static struct GENX(RENDER_SURFACE_STATE) +surface_state_for_image_view(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) { - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - uint32_t depth = 1; if (range->layerCount > 1) { depth = range->layerCount; @@ -174,10 +174,13 @@ genX(image_view_init)(struct anv_image_view *iview, const struct isl_extent3d image_align_sa = isl_surf_get_image_alignment_sa(&surface->isl); - const struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, + usage == VK_IMAGE_USAGE_STORAGE_BIT), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = iview->format, + .SurfaceFormat = (usage != VK_IMAGE_USAGE_STORAGE_BIT ? iview->format : + isl_lower_storage_image_format( + &device->isl_dev, iview->format)), .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], @@ -227,19 +230,44 @@ genX(image_view_init)(struct anv_image_view *iview, .SurfaceBaseAddress = { NULL, iview->offset }, }; - if (image->needs_nonrt_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = template; - - iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); - - surface_state.RenderCacheReadWriteMode = false; - + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { /* For non render target surfaces, the hardware interprets field * MIPCount/LOD as MIPCount. The range of levels accessible by the * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ - surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + return template; +} + +GENX_FUNC(GEN7, GEN75) void +genX(image_view_init)(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + if (image->needs_nonrt_surface_state) { + struct GENX(RENDER_SURFACE_STATE) surface_state = + surface_state_for_image_view(iview, device, pCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + + iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); @@ -251,21 +279,12 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_color_rt_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = template; + struct GENX(RENDER_SURFACE_STATE) surface_state = + surface_state_for_image_view(iview, device, pCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - surface_state.RenderCacheReadWriteMode = 0; /* Write only */ - - /* For render target surfaces, the hardware interprets field MIPCount/LOD as - * LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - surface_state.MIPCountLOD = range->baseMipLevel; - surface_state.SurfaceMinLOD = 0; - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, &surface_state); if (!device->info.has_llc) @@ -275,19 +294,12 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_storage_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = template; + struct GENX(RENDER_SURFACE_STATE) surface_state = + surface_state_for_image_view(iview, device, pCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - surface_state.SurfaceType = - anv_surftype(image, pCreateInfo->viewType, true), - - surface_state.SurfaceFormat = - isl_lower_storage_image_format(&device->isl_dev, iview->format); - - surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); } else { diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index d42d0b1b5a6..c71193b7cd6 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -162,16 +162,19 @@ get_qpitch(const struct isl_surf *surf) } } -void -genX(image_view_init)(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) +static struct GENX(RENDER_SURFACE_STATE) +surface_state_for_image_view(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) { - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -188,9 +191,12 @@ genX(image_view_init)(struct anv_image_view *iview, get_halign_valign(&surface->isl, &halign, &valign); struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, false), + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, + usage == VK_IMAGE_USAGE_STORAGE_BIT), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = iview->format, + .SurfaceFormat = (usage != VK_IMAGE_USAGE_STORAGE_BIT ? iview->format : + isl_lower_storage_image_format( + &device->isl_dev, iview->format)), .SurfaceVerticalAlignment = valign, .SurfaceHorizontalAlignment = halign, .TileMode = isl_to_gen_tiling[surface->isl.tiling], @@ -295,18 +301,42 @@ genX(image_view_init)(struct anv_image_view *iview, unreachable(!"bad SurfaceType"); } - if (image->needs_nonrt_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = template; - - iview->nonrt_surface_state = - alloc_surface_state(device, cmd_buffer); - + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { /* For non render target surfaces, the hardware interprets field * MIPCount/LOD as MIPCount. The range of levels accessible by the * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. */ - surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + return template; +} + +void +genX(image_view_init)(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + if (image->needs_nonrt_surface_state) { + const struct GENX(RENDER_SURFACE_STATE) surface_state = + surface_state_for_image_view(iview, device, pCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + + iview->nonrt_surface_state = + alloc_surface_state(device, cmd_buffer); GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, &surface_state); @@ -317,20 +347,13 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_color_rt_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = template; + const struct GENX(RENDER_SURFACE_STATE) surface_state = + surface_state_for_image_view(iview, device, pCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - surface_state.MIPCountLOD = range->baseMipLevel; - surface_state.SurfaceMinLOD = 0; - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, &surface_state); if (!device->info.has_llc) @@ -340,20 +363,13 @@ genX(image_view_init)(struct anv_image_view *iview, } if (image->needs_storage_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = template; + struct GENX(RENDER_SURFACE_STATE) surface_state = + surface_state_for_image_view(iview, device, pCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - surface_state.SurfaceType = - anv_surftype(image, pCreateInfo->viewType, true), - - surface_state.SurfaceFormat = - isl_lower_storage_image_format(&device->isl_dev, iview->format); - - surface_state.SurfaceMinLOD = range->baseMipLevel; - surface_state.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); } else { -- cgit v1.2.3 From 448285ebf202d7975a92ac01e1e70e683103c24a Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 21 Jan 2016 19:21:35 -0800 Subject: anv/state: Add missing clflushes for storage image surface state. --- src/vulkan/gen7_state.c | 2 ++ src/vulkan/gen8_state.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index c722ff06f9f..aecdfe2fe4d 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -302,6 +302,8 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); + if (!device->info.has_llc) + anv_state_clflush(iview->storage_surface_state); } else { iview->storage_surface_state.alloc_size = 0; } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c71193b7cd6..9fad7961b3f 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -372,6 +372,8 @@ genX(image_view_init)(struct anv_image_view *iview, GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, &surface_state); + if (!device->info.has_llc) + anv_state_clflush(iview->storage_surface_state); } else { iview->storage_surface_state.alloc_size = 0; } -- cgit v1.2.3 From 84612f4014089b089c9a04a6592b5a1ea7423c70 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 11:40:56 -0800 Subject: anv/state: Refactor surface state setup into a "fill" function --- src/vulkan/gen7_state.c | 49 ++++++++++++++++++------------------------------- src/vulkan/gen8_state.c | 48 ++++++++++++++++++------------------------------ 2 files changed, 36 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index aecdfe2fe4d..2d16bf3861b 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -148,11 +148,11 @@ static const uint8_t anv_valign[] = { [4] = VALIGN_4, }; -static struct GENX(RENDER_SURFACE_STATE) -surface_state_for_image_view(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) +static void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) { assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | @@ -248,7 +248,10 @@ surface_state_for_image_view(struct anv_image_view *iview, template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; } - return template; + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); + + if (!device->info.has_llc) + anv_state_clflush(iview->nonrt_surface_state); } GENX_FUNC(GEN7, GEN75) void @@ -263,47 +266,31 @@ genX(image_view_init)(struct anv_image_view *iview, anv_finishme("non-2D image views"); if (image->needs_nonrt_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = - surface_state_for_image_view(iview, device, pCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); - iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, - &surface_state); - - if (!device->info.has_llc) - anv_state_clflush(iview->nonrt_surface_state); + genX(fill_image_surface_state)(device, iview->nonrt_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); } else { iview->nonrt_surface_state.alloc_size = 0; } if (image->needs_color_rt_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = - surface_state_for_image_view(iview, device, pCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, - &surface_state); - if (!device->info.has_llc) - anv_state_clflush(iview->color_rt_surface_state); + genX(fill_image_surface_state)(device, iview->color_rt_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); } else { iview->color_rt_surface_state.alloc_size = 0; } if (image->needs_storage_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = - surface_state_for_image_view(iview, device, pCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, - &surface_state); - if (!device->info.has_llc) - anv_state_clflush(iview->storage_surface_state); + genX(fill_image_surface_state)(device, iview->storage_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); } else { iview->storage_surface_state.alloc_size = 0; } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 9fad7961b3f..15bf0ffe4ac 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -162,11 +162,11 @@ get_qpitch(const struct isl_surf *surf) } } -static struct GENX(RENDER_SURFACE_STATE) -surface_state_for_image_view(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) +static void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) { assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | @@ -319,7 +319,10 @@ surface_state_for_image_view(struct anv_image_view *iview, template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; } - return template; + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); + + if (!device->info.has_llc) + anv_state_clflush(iview->nonrt_surface_state); } void @@ -331,49 +334,34 @@ genX(image_view_init)(struct anv_image_view *iview, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); if (image->needs_nonrt_surface_state) { - const struct GENX(RENDER_SURFACE_STATE) surface_state = - surface_state_for_image_view(iview, device, pCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); - iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map, - &surface_state); - if (!device->info.has_llc) - anv_state_clflush(iview->nonrt_surface_state); + genX(fill_image_surface_state)(device, iview->nonrt_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); } else { iview->nonrt_surface_state.alloc_size = 0; } if (image->needs_color_rt_surface_state) { - const struct GENX(RENDER_SURFACE_STATE) surface_state = - surface_state_for_image_view(iview, device, pCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map, - &surface_state); - if (!device->info.has_llc) - anv_state_clflush(iview->color_rt_surface_state); + genX(fill_image_surface_state)(device, iview->color_rt_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); } else { iview->color_rt_surface_state.alloc_size = 0; } if (image->needs_storage_surface_state) { - struct GENX(RENDER_SURFACE_STATE) surface_state = - surface_state_for_image_view(iview, device, pCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->storage_surface_state.map, - &surface_state); - if (!device->info.has_llc) - anv_state_clflush(iview->storage_surface_state); + genX(fill_image_surface_state)(device, iview->storage_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); } else { iview->storage_surface_state.alloc_size = 0; } -- cgit v1.2.3 From e5558ffa64207e121f0745ff62eeec10fce08b23 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 11:57:01 -0800 Subject: anv/image: Move common code to anv_image.c --- src/vulkan/anv_image.c | 85 +++++++++++++++++++++++++++++++++++++++--------- src/vulkan/anv_private.h | 40 ++++++++++++----------- src/vulkan/gen7_state.c | 61 +++------------------------------- src/vulkan/gen8_state.c | 58 +-------------------------------- 4 files changed, 96 insertions(+), 148 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index ba3b3b254cd..b51938740bd 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -400,6 +400,48 @@ anv_validate_CreateImageView(VkDevice _device, return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); } +void +anv_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_image_surface_state(device, state_map, iview, + pCreateInfo, usage); + else + gen7_fill_image_surface_state(device, state_map, iview, + pCreateInfo, usage); + break; + case 8: + gen8_fill_image_surface_state(device, state_map, iview, + pCreateInfo, usage); + break; + case 9: + gen9_fill_image_surface_state(device, state_map, iview, + pCreateInfo, usage); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(iview->nonrt_surface_state); +} + +static struct anv_state +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + void anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, @@ -447,21 +489,34 @@ anv_image_view_init(struct anv_image_view *iview, .depth = anv_minify(image->extent.depth, range->baseMipLevel), }; - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_image_view_init(iview, device, pCreateInfo, cmd_buffer); - else - gen7_image_view_init(iview, device, pCreateInfo, cmd_buffer); - break; - case 8: - gen8_image_view_init(iview, device, pCreateInfo, cmd_buffer); - break; - case 9: - gen9_image_view_init(iview, device, pCreateInfo, cmd_buffer); - break; - default: - unreachable("unsupported gen\n"); + if (image->needs_nonrt_surface_state) { + iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->nonrt_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + } else { + iview->nonrt_surface_state.alloc_size = 0; + } + + if (image->needs_color_rt_surface_state) { + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->color_rt_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } else { + iview->color_rt_surface_state.alloc_size = 0; + } + + if (image->needs_storage_surface_state) { + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->storage_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); + } else { + iview->storage_surface_state.alloc_size = 0; } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 05a6342e2f1..6d5551a5b69 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1599,28 +1599,30 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_cmd_buffer *cmd_buffer); void -gen7_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - +anv_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); void -gen75_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - +gen7_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); void -gen8_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); - +gen75_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen8_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); void -gen9_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); +gen9_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); struct anv_buffer_view { enum isl_format format; /**< VkBufferViewCreateInfo::format */ diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 2d16bf3861b..1f829be9121 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -65,17 +65,6 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); } -static struct anv_state -alloc_surface_state(struct anv_device *device, - struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer) { - return anv_cmd_buffer_alloc_surface_state(cmd_buffer); - } else { - return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, @@ -148,12 +137,15 @@ static const uint8_t anv_valign[] = { [4] = VALIGN_4, }; -static void +void genX(fill_image_surface_state)(struct anv_device *device, void *state_map, struct anv_image_view *iview, const VkImageViewCreateInfo *pCreateInfo, VkImageUsageFlagBits usage) { + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); @@ -249,49 +241,4 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, } GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); - - if (!device->info.has_llc) - anv_state_clflush(iview->nonrt_surface_state); -} - -GENX_FUNC(GEN7, GEN75) void -genX(image_view_init)(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - if (image->needs_nonrt_surface_state) { - iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); - - genX(fill_image_surface_state)(device, iview->nonrt_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); - } else { - iview->nonrt_surface_state.alloc_size = 0; - } - - if (image->needs_color_rt_surface_state) { - iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - - genX(fill_image_surface_state)(device, iview->color_rt_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - } else { - iview->color_rt_surface_state.alloc_size = 0; - } - - if (image->needs_storage_surface_state) { - iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - - genX(fill_image_surface_state)(device, iview->storage_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - } else { - iview->storage_surface_state.alloc_size = 0; - } } diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 15bf0ffe4ac..620a9d4ef13 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -78,17 +78,6 @@ static const uint8_t anv_valign[] = { [16] = VALIGN16, }; -static struct anv_state -alloc_surface_state(struct anv_device *device, - struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer) { - return anv_cmd_buffer_alloc_surface_state(cmd_buffer); - } else { - return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } -} - /** * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment * and SurfaceVerticalAlignment. @@ -162,7 +151,7 @@ get_qpitch(const struct isl_surf *surf) } } -static void +void genX(fill_image_surface_state)(struct anv_device *device, void *state_map, struct anv_image_view *iview, const VkImageViewCreateInfo *pCreateInfo, @@ -320,51 +309,6 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, } GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); - - if (!device->info.has_llc) - anv_state_clflush(iview->nonrt_surface_state); -} - -void -genX(image_view_init)(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - - if (image->needs_nonrt_surface_state) { - iview->nonrt_surface_state = - alloc_surface_state(device, cmd_buffer); - - genX(fill_image_surface_state)(device, iview->nonrt_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); - } else { - iview->nonrt_surface_state.alloc_size = 0; - } - - if (image->needs_color_rt_surface_state) { - iview->color_rt_surface_state = - alloc_surface_state(device, cmd_buffer); - - genX(fill_image_surface_state)(device, iview->color_rt_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - } else { - iview->color_rt_surface_state.alloc_size = 0; - } - - if (image->needs_storage_surface_state) { - iview->storage_surface_state = - alloc_surface_state(device, cmd_buffer); - - genX(fill_image_surface_state)(device, iview->storage_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - } else { - iview->storage_surface_state.alloc_size = 0; - } } VkResult genX(CreateSampler)( -- cgit v1.2.3 From 107a109d1ce18169231865252d0d5d6649f0ecf5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 11:57:49 -0800 Subject: isl/format_layout: R11G11B10_FLOAT is unsigned --- src/isl/isl_format_layout.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl_format_layout.csv b/src/isl/isl_format_layout.csv index 2a302b002ef..af2786ae630 100644 --- a/src/isl/isl_format_layout.csv +++ b/src/isl/isl_format_layout.csv @@ -126,7 +126,7 @@ R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, -R11G11B10_FLOAT , 32, 1, 1, 1, sf11, sf11, sf10, , , , , linear, +R11G11B10_FLOAT , 32, 1, 1, 1, uf11, uf11, uf10, , , , , linear, R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, -- cgit v1.2.3 From 3cd8c0bb04ffd56a843bc55921a6a174f04ebe14 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 22 Jan 2016 12:17:25 -0800 Subject: gen8_state: Enable all cube faces These fields are ignored for non-cube surfaces. For cube surfaces these fields should be enabled when using TEXCOORDMODE_CLAMP and TEXCOORDMODE_CUBE. TODO: Determine if these are the only two modes used in Vulkan. --- src/vulkan/gen8_state.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 620a9d4ef13..823bfa047db 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -193,6 +193,12 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .VerticalLineStrideOffset = 0, .SamplerL2BypassModeDisable = true, .RenderCacheReadWriteMode = WriteOnlyCache, + .CubeFaceEnablePositiveZ = 1, + .CubeFaceEnableNegativeZ = 1, + .CubeFaceEnablePositiveY = 1, + .CubeFaceEnableNegativeY = 1, + .CubeFaceEnablePositiveX = 1, + .CubeFaceEnableNegativeX = 1, .MemoryObjectControlState = GENX(MOCS), /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in -- cgit v1.2.3 From 35879fe8295cca5483d4eb42e19298e36c00b903 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 16:13:36 -0800 Subject: gen8/state: Divide depth by 6 for cube maps for GEN8 For Broadwell cube maps, MinimumArrayElement is in terms of 2d slices (a multiple of 6) but Depth is in terms of whole cubes. --- src/vulkan/gen8_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 823bfa047db..3c9d15257a3 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -270,7 +270,7 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, #else /* Same as SURFTYPE_2D */ template.MinimumArrayElement = range->baseArrayLayer; - template.Depth = range->layerCount - 1; + template.Depth = range->layerCount / 6 - 1; template.RenderTargetViewExtent = template.Depth; #endif break; -- cgit v1.2.3 From 53b83899e084f121a35c75539ad9888d5641ec59 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 16:34:13 -0800 Subject: genX/state: Set CubeSurfaceControlMode to OVERRIDE This makes it act like the address mode is set to TEXCOORDMODE_CUBE whenever this sampler is combined with a cube surface. This *should* be what we need for Vulkan. Interestingly, the PRM contains a programming note for this field that says simply, "This field must be set to CUBECTRLMODE_PROGRAMMED". However, emprical evidence suggests that it does what the PRM says it does and OVERRIDE is just fine. --- src/vulkan/gen7_state.c | 2 +- src/vulkan/gen8_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 1f829be9121..eff5dd20eb7 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -100,7 +100,7 @@ VkResult genX(CreateSampler)( .ChromaKeyIndex = 0, .ChromaKeyMode = 0, .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = 0, + .CubeSurfaceControlMode = OVERRIDE, .BorderColorPointer = device->border_colors.offset + diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 3c9d15257a3..9d56ecd1845 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -357,7 +357,7 @@ VkResult genX(CreateSampler)( .ChromaKeyIndex = 0, .ChromaKeyMode = 0, .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = 0, + .CubeSurfaceControlMode = OVERRIDE, .IndirectStatePointer = border_color_offset >> 6, -- cgit v1.2.3 From bf151b8892ca6c82bac591202f7d519afabf5d9d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 22 Jan 2016 15:57:01 -0800 Subject: anv/meta: Fix meta blit fragment shader for 1D arrays. --- src/vulkan/anv_meta.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 9e13299e3a2..e273dd7608d 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -83,6 +83,14 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) glsl_vec4_type(), "v_attr"); tex_pos_in->data.location = VARYING_SLOT_VAR0; + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + const struct glsl_type *sampler_type = glsl_sampler_type(tex_dim, false, false, glsl_get_base_type(color_type)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, @@ -94,14 +102,13 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) tex->sampler_dim = tex_dim; tex->op = nir_texop_tex; tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(nir_load_var(&b, tex_pos_in)); + tex->src[0].src = nir_src_for_ssa(tex_pos); tex->dest_type = nir_type_float; /* TODO */ if (tex_dim != GLSL_SAMPLER_DIM_3D) tex->is_array = true; - tex->coord_components = 3; - + tex->coord_components = tex_pos->num_components; tex->sampler = nir_deref_var_create(tex, sampler); nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); -- cgit v1.2.3 From 11d5c1905ce3856a3832718bbcb0c65928a3b26b Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 22 Jan 2016 15:57:02 -0800 Subject: anv/meta: Set sampler type and instruction arrayness consistently in blit shader. --- src/vulkan/anv_meta.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index e273dd7608d..083ee371f60 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -92,7 +92,8 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); const struct glsl_type *sampler_type = - glsl_sampler_type(tex_dim, false, false, glsl_get_base_type(color_type)); + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(color_type)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); sampler->data.descriptor_set = 0; @@ -104,10 +105,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); tex->dest_type = nir_type_float; /* TODO */ - - if (tex_dim != GLSL_SAMPLER_DIM_3D) - tex->is_array = true; - + tex->is_array = glsl_sampler_type_is_array(sampler_type); tex->coord_components = tex_pos->num_components; tex->sampler = nir_deref_var_create(tex, sampler); -- cgit v1.2.3 From 6a03c69adb646b6f7c11acc888550c1fb68b5a5d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 14:48:36 -0800 Subject: anv/state: Dedupe code for lowering surface format Add helper anv_surface_format(). --- src/vulkan/gen7_state.c | 5 ++--- src/vulkan/gen8_state.c | 8 +++----- src/vulkan/genX_state_util.h | 11 +++++++++++ 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index eff5dd20eb7..55cff90a723 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -153,6 +153,7 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -170,9 +171,7 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .SurfaceType = anv_surftype(image, pCreateInfo->viewType, usage == VK_IMAGE_USAGE_STORAGE_BIT), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = (usage != VK_IMAGE_USAGE_STORAGE_BIT ? iview->format : - isl_lower_storage_image_format( - &device->isl_dev, iview->format)), + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 9d56ecd1845..01099f4e3d9 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -164,6 +164,7 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); struct anv_surface *surface = anv_image_get_surface_for_aspect_mask(image, range->aspectMask); @@ -180,12 +181,9 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, get_halign_valign(&surface->isl, &halign, &valign); struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, - usage == VK_IMAGE_USAGE_STORAGE_BIT), + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), .SurfaceArray = image->array_size > 1, - .SurfaceFormat = (usage != VK_IMAGE_USAGE_STORAGE_BIT ? iview->format : - isl_lower_storage_image_format( - &device->isl_dev, iview->format)), + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), .SurfaceVerticalAlignment = valign, .SurfaceHorizontalAlignment = halign, .TileMode = isl_to_gen_tiling[surface->isl.tiling], diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h index 215e9ba30eb..78fe1d4da19 100644 --- a/src/vulkan/genX_state_util.h +++ b/src/vulkan/genX_state_util.h @@ -46,6 +46,17 @@ anv_surftype(const struct anv_image *image, VkImageViewType view_type, } } +static enum isl_format +anv_surface_format(const struct anv_device *device, enum isl_format format, + bool storage) +{ + if (storage) { + return isl_lower_storage_image_format(&device->isl_dev, format); + } else { + return format; + } +} + #if ANV_GEN > 7 || ANV_IS_HASWELL static const uint32_t vk_to_gen_swizzle_map[] = { [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, -- cgit v1.2.3 From d4de918ad086683c4df4b7d861c27bbce61d6c92 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 22 Jan 2016 15:07:12 -0800 Subject: gen8/state: Remove SKL special-casing for MinimumArrayElement MinimumArrayElement carries the same meaning for BDW and SKL. Suggested by Jason. No regressions in dEQP-VK.pipeline.image.view_type.cube_array.* Fixes a number of cube tests, including cube_array_base_slice and cube_base_slice tests. --- src/vulkan/gen8_state.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 01099f4e3d9..ce142e6fb26 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -260,17 +260,10 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, template.RenderTargetViewExtent = template.Depth; break; case SURFTYPE_CUBE: - #if ANV_GENx10 >= 90 - /* Like SURFTYPE_2D, but divided by 6. */ - template.MinimumArrayElement = range->baseArrayLayer / 6; - template.Depth = range->layerCount / 6 - 1; - template.RenderTargetViewExtent = template.Depth; - #else - /* Same as SURFTYPE_2D */ - template.MinimumArrayElement = range->baseArrayLayer; - template.Depth = range->layerCount / 6 - 1; - template.RenderTargetViewExtent = template.Depth; - #endif + template.MinimumArrayElement = range->baseArrayLayer; + /* Same as SURFTYPE_2D, but divided by 6 */ + template.Depth = range->layerCount / 6 - 1; + template.RenderTargetViewExtent = template.Depth; break; case SURFTYPE_3D: template.MinimumArrayElement = range->baseArrayLayer; -- cgit v1.2.3 From 14b753f666eed3933ba155fb493e6142c33cb96e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 Jan 2016 15:53:54 -0800 Subject: isl: Add func isl_device_get_sample_counts() --- src/isl/isl.c | 32 ++++++++++++++++++++++++++++++++ src/isl/isl.h | 15 +++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 40663ca18af..acc80eae59c 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -67,6 +67,38 @@ isl_device_init(struct isl_device *dev, assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); } +/** + * @brief Query the set of multisamples supported by the device. + * + * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always + * supported. + */ +isl_sample_count_mask_t ATTRIBUTE_CONST +isl_device_get_sample_counts(struct isl_device *dev) +{ + if (ISL_DEV_GEN(dev) >= 9) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_2_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT | + ISL_SAMPLE_COUNT_16_BIT; + } else if (ISL_DEV_GEN(dev) >= 8) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_2_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT; + } else if (ISL_DEV_GEN(dev) >= 7) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT; + } else if (ISL_DEV_GEN(dev) >= 6) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_4_BIT; + } else { + return ISL_SAMPLE_COUNT_1_BIT; + } +} + /** * @param[out] info is written only on success */ diff --git a/src/isl/isl.h b/src/isl/isl.h index 9b99e00ec1c..85e43b8b35b 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -457,6 +457,18 @@ typedef uint64_t isl_surf_usage_flags_t; #define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) /** @} */ +/** + * Identical to VkSampleCountFlagBits. + */ +enum isl_sample_count { + ISL_SAMPLE_COUNT_1_BIT = 1u, + ISL_SAMPLE_COUNT_2_BIT = 2u, + ISL_SAMPLE_COUNT_4_BIT = 4u, + ISL_SAMPLE_COUNT_8_BIT = 8u, + ISL_SAMPLE_COUNT_16_BIT = 16u, +}; +typedef uint32_t isl_sample_count_mask_t; + /** * @brief Multisample Format */ @@ -690,6 +702,9 @@ isl_device_init(struct isl_device *dev, const struct brw_device_info *info, bool has_bit6_swizzling); +isl_sample_count_mask_t ATTRIBUTE_CONST +isl_device_get_sample_counts(struct isl_device *dev); + static inline const struct isl_format_layout * ATTRIBUTE_CONST isl_format_get_layout(enum isl_format fmt) { -- cgit v1.2.3 From 1c5d7b38e288314d1affbf75887b40de6596ca4d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 Jan 2016 16:04:28 -0800 Subject: anv: Use isl_device_get_sample_counts() Use it in vkGetPhysicalDeviceProperties. --- src/vulkan/anv_device.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 7d0f25e7d70..ed9bb0852c8 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -397,10 +397,7 @@ void anv_GetPhysicalDeviceProperties( const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; VkSampleCountFlags sample_counts = - VK_SAMPLE_COUNT_1_BIT | - VK_SAMPLE_COUNT_2_BIT | - VK_SAMPLE_COUNT_4_BIT | - VK_SAMPLE_COUNT_8_BIT; + isl_device_get_sample_counts(&pdevice->isl_dev); VkPhysicalDeviceLimits limits = { .maxImageDimension1D = (1 << 14), -- cgit v1.2.3 From dfcb4ee6df5d069ab409ae532a6ad5c5eeda91db Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 Jan 2016 16:34:23 -0800 Subject: anv: Add anv_image::samples It's set but not yet used. --- src/vulkan/anv_image.c | 1 + src/vulkan/anv_private.h | 1 + 2 files changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index b51938740bd..6fcf9885a1e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -204,6 +204,7 @@ anv_image_create(VkDevice _device, image->format = anv_format_for_vk_format(pCreateInfo->format); image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; image->usage = anv_image_get_full_usage(pCreateInfo); image->tiling = pCreateInfo->tiling; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6d5551a5b69..d121857352d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1522,6 +1522,7 @@ struct anv_image { VkExtent3D extent; uint32_t levels; uint32_t array_size; + uint32_t samples; /**< VkImageCreateInfo::samples */ VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ VkImageTiling tiling; /** VkImageCreateInfo::tiling */ -- cgit v1.2.3 From fa5f45e8aa95189244edb12a3dc0af165418086a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 Jan 2016 17:03:29 -0800 Subject: anv/meta: Assert correct sample counts for blit funcs Add assertions to: anv_CmdBlitImage anv_CmdCopyImage anv_CmdCopyImageToBuffer anv_CmdCopyBufferToImage --- src/vulkan/anv_meta.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 083ee371f60..483469348c8 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -503,6 +503,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, float tex_coord[3]; } *vb_data; + assert(src_image->samples == dest_image->samples); + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); struct anv_state vb_state = @@ -987,9 +989,15 @@ void anv_CmdCopyImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { @@ -1090,9 +1098,16 @@ void anv_CmdBlitImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + anv_finishme("respect VkFilter"); meta_prepare_blit(cmd_buffer, &saved_state); @@ -1233,6 +1248,11 @@ void anv_CmdCopyBufferToImage( VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_meta_saved_state saved_state; + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { @@ -1345,6 +1365,12 @@ void anv_CmdCopyImageToBuffer( VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { -- cgit v1.2.3 From 2fa1f745ea2bbb7a65883c3e6d05de38b21f76a9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 17:06:44 -0800 Subject: isl: Add func isl_tiling_is_any_y() --- src/isl/isl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/isl/isl.h b/src/isl/isl.h index 85e43b8b35b..8905d868f4e 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -778,6 +778,12 @@ enum isl_format isl_lower_storage_image_format(const struct isl_device *dev, enum isl_format fmt); +static inline bool +isl_tiling_is_any_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_ANY_MASK; +} + static inline bool isl_tiling_is_std_y(enum isl_tiling tiling) { -- cgit v1.2.3 From fda074b23fbc56fa3f53de234a77d6259f82fbea Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 17:06:55 -0800 Subject: isl: Fix gen8_choose_msaa_layout() Gen8 requires any Y tiling, not any *standard* Y tiling. --- src/isl/isl_gen8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl_gen8.c b/src/isl/isl_gen8.c index 2f434aabb2e..a46427aacc8 100644 --- a/src/isl/isl_gen8.c +++ b/src/isl/isl_gen8.c @@ -48,7 +48,7 @@ gen8_choose_msaa_layout(const struct isl_device *dev, * * As usual, though, stencil is special. */ - if (!isl_tiling_is_std_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) + if (!isl_tiling_is_any_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) return false; /* From the Broadwell PRM >> Volume2d: Command Structures >> -- cgit v1.2.3 From d96d78c3b6b768752a955d3f1d018b743c52d596 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 17:16:20 -0800 Subject: anv/image: Drop assertion that samples == 1 --- src/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 6fcf9885a1e..afd01e9d9fb 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -187,7 +187,7 @@ anv_image_create(VkDevice _device, anv_assert(pCreateInfo->mipLevels > 0); anv_assert(pCreateInfo->arrayLayers > 0); - anv_assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT); + anv_assert(pCreateInfo->samples > 0); anv_assert(pCreateInfo->extent.width > 0); anv_assert(pCreateInfo->extent.height > 0); anv_assert(pCreateInfo->extent.depth > 0); -- cgit v1.2.3 From 149d5ba64d1ba8fce647c90ed9f9384d461919c2 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 17:47:59 -0800 Subject: anv/formats: Advertise multisample formats Teach vkGetPhysicalDeviceImageFormatProperties() to advertise multisampled formats. --- src/vulkan/anv_formats.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index d480ee7cb9d..8d634ca85f2 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -429,6 +429,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( VkExtent3D maxExtent; uint32_t maxMipLevels; uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; anv_physical_device_get_format_properties(physical_device, format, &format_props); @@ -453,6 +454,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( maxExtent.depth = 1; maxMipLevels = 15; /* log2(maxWidth) + 1 */ maxArraySize = 2048; + sampleCounts = VK_SAMPLE_COUNT_1_BIT; break; case VK_IMAGE_TYPE_2D: /* FINISHME: Does this really differ for cube maps? The documentation @@ -473,6 +475,15 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( break; } + if (tiling == VK_IMAGE_TILING_OPTIMAL && + type == VK_IMAGE_TYPE_2D && + (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); + } + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta implements transfers by sampling from the source image. */ if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { @@ -529,9 +540,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( .maxExtent = maxExtent, .maxMipLevels = maxMipLevels, .maxArrayLayers = maxArraySize, - - /* FINISHME: Support multisampling */ - .sampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampleCounts = sampleCounts, /* FINISHME: Accurately calculate * VkImageFormatProperties::maxResourceSize. -- cgit v1.2.3 From 99a48853289372b0709dfe67367d6f339639e35e Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 22 Jan 2016 17:51:24 -0800 Subject: anv/formats: Rename ambiguous func parameter vkGetPhysicalDeviceImageFormatProperties has multiple 'flags' parameters. --- src/vulkan/anv_formats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 8d634ca85f2..1f735a4dcd2 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -420,7 +420,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, - VkImageCreateFlags flags, + VkImageCreateFlags createFlags, VkImageFormatProperties* pImageFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); @@ -479,7 +479,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( type == VK_IMAGE_TYPE_2D && (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !(flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); } -- cgit v1.2.3 From 0b6c1275d02fd9a20ea627bcf43dc9381e96bc87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 19:02:55 -0800 Subject: anv/pipeline: Add a default L3$ setup --- src/vulkan/anv_cmd_buffer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 4591dd95fec..fe85e3e164e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -237,6 +237,9 @@ static VkResult anv_create_cmd_buffer( cmd_buffer->level = level; cmd_buffer->state.attachments = NULL; + /* 0 isn't a valid config. This ensures that we always configure L3$. */ + cmd_buffer->state.current_l3_config = 0; + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) goto fail; -- cgit v1.2.3 From 259e1bdf79491d355cb306545048d58e0544eb09 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 19:03:27 -0800 Subject: anv/formats: Add support for 3 more formats --- src/vulkan/anv_formats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 1f735a4dcd2..3f3eace3c2f 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -38,13 +38,13 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), -- cgit v1.2.3 From 2bfb9f29b828ebc6bba4807043cd2214078fe2c5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 19:14:41 -0800 Subject: anv/format: Add a helpful comment about format names --- src/vulkan/anv_formats.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 3f3eace3c2f..642bc7d3cb1 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -35,6 +35,11 @@ __VA_ARGS__ \ } +/* HINT: For array formats, the ISL name should match the VK name. For + * packed formats, they should have the channels in reverse order from each + * other. The reason for this is that, for packed formats, the ISL (and + * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. + */ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), -- cgit v1.2.3 From 89672d81f3c773e2fabc42773f36b7e0aedcc819 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 21:24:53 -0800 Subject: i965/nir: Properly flush denormals in nir_op_fquantize2f16 --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 27 +++++++++++++++++++++------ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 24 ++++++++++++++++++++---- 2 files changed, 41 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index eded5a90f7d..65a0ffc4d8d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -967,14 +967,29 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_fquantize2f16: { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D); + fs_reg tmp16 = bld.vgrf(BRW_REGISTER_TYPE_D); + fs_reg tmp32 = bld.vgrf(BRW_REGISTER_TYPE_F); + fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F); /* The destination stride must be at least as big as the source stride. */ - tmp.type = BRW_REGISTER_TYPE_W; - tmp.stride = 2; - - bld.emit(BRW_OPCODE_F32TO16, tmp, op[0]); - inst = bld.emit(BRW_OPCODE_F16TO32, result, tmp); + tmp16.type = BRW_REGISTER_TYPE_W; + tmp16.stride = 2; + + /* Check for denormal */ + fs_reg abs_src0 = op[0]; + abs_src0.abs = true; + bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L); + /* Get the appropriately signed zero */ + bld.AND(retype(zero, BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000)); + /* Do the actual F32 -> F16 -> F32 conversion */ + bld.emit(BRW_OPCODE_F32TO16, tmp16, op[0]); + bld.emit(BRW_OPCODE_F16TO32, tmp32, tmp16); + /* Select that or zero based on normal status */ + inst = bld.SEL(result, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; inst->saturate = instr->dest.saturate; break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 46cbbfaa590..531113a9df5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1208,10 +1208,26 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_fquantize2f16: { /* See also vec4_visitor::emit_pack_half_2x16() */ - src_reg tmp = src_reg(this, glsl_type::uvec4_type); - - emit(F32TO16(dst_reg(tmp), op[0])); - inst = emit(F16TO32(dst, tmp)); + src_reg tmp16 = src_reg(this, glsl_type::uvec4_type); + src_reg tmp32 = src_reg(this, glsl_type::vec4_type); + src_reg zero = src_reg(this, glsl_type::vec4_type); + + /* Check for denormal */ + src_reg abs_src0 = op[0]; + abs_src0.abs = true; + emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L)); + /* Get the appropriately signed zero */ + emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000))); + /* Do the actual F32 -> F16 -> F32 conversion */ + emit(F32TO16(dst_reg(tmp16), op[0])); + emit(F16TO32(dst_reg(tmp32), tmp16)); + /* Select that or zero based on normal status */ + inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; inst->saturate = instr->dest.saturate; break; } -- cgit v1.2.3 From 9e0bc29f803429606e86ee9bef93ab71fe572c32 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 22:04:09 -0800 Subject: nir/opcodes: Properly flush denormals in fquantize2f16 --- src/glsl/nir/nir_opcodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 1b176209e20..c5fb0420bb6 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -176,7 +176,7 @@ unop("ffloor", tfloat, "floorf(src0)") unop("ffract", tfloat, "src0 - floorf(src0)") unop("fround_even", tfloat, "_mesa_roundevenf(src0)") -unop("fquantize2f16", tfloat, "_mesa_half_to_float(_mesa_float_to_half(src0))") +unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))") # Trigonometric operations. -- cgit v1.2.3 From a804d82ef6062cc16c5c3beb254560bfe448bfe0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Jan 2016 22:57:02 -0800 Subject: anv/cmd_buffer: Zero out binding tables and samplers in state_reset This fixes a use of an undefined value if the client uses push constants in a stage without ever setting any descriptors on GEN8-9. --- src/vulkan/anv_cmd_buffer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index fe85e3e164e..d2cbf87578a 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -117,6 +117,11 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) memset(&state->descriptors, 0, sizeof(state->descriptors)); memset(&state->push_constants, 0, sizeof(state->push_constants)); + memset(state->binding_tables, 0, sizeof(state->binding_tables)); + memset(state->samplers, 0, sizeof(state->samplers)); + + /* 0 isn't a valid config. This ensures that we always configure L3$. */ + cmd_buffer->state.current_l3_config = 0; state->dirty = ~0; state->vb_dirty = 0; @@ -237,9 +242,6 @@ static VkResult anv_create_cmd_buffer( cmd_buffer->level = level; cmd_buffer->state.attachments = NULL; - /* 0 isn't a valid config. This ensures that we always configure L3$. */ - cmd_buffer->state.current_l3_config = 0; - result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); if (result != VK_SUCCESS) goto fail; -- cgit v1.2.3 From b126039784997ad49c329683443370f85c1bfebd Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 09:54:19 -0800 Subject: nir: Make argument order of unop_convert match binop_convert. Strangely the return and parameter types were reversed. --- src/glsl/nir/nir_opcodes.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index e79810c1991..a8bbe1a0b82 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -105,7 +105,7 @@ def opcode(name, output_size, output_type, input_sizes, input_types, opcodes[name] = Opcode(name, output_size, output_type, input_sizes, input_types, algebraic_properties, const_expr) -def unop_convert(name, in_type, out_type, const_expr): +def unop_convert(name, out_type, in_type, const_expr): opcode(name, 0, out_type, [0], [in_type], "", const_expr) def unop(name, ty, const_expr): @@ -155,17 +155,17 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") -unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. -unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion -unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. +unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion. +unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion +unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion. # Float-to-boolean conversion -unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") +unop_convert("f2b", tbool, tfloat, "src0 != 0.0f") # Boolean-to-float conversion -unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") +unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion -unop_convert("i2b", tint, tbool, "src0 != 0") -unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. +unop_convert("i2b", tbool, tint, "src0 != 0") +unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion +unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion. # Unary floating-point rounding operations. @@ -264,7 +264,7 @@ for (unsigned bit = 0; bit < 32; bit++) { } """) -unop_convert("ufind_msb", tuint, tint, """ +unop_convert("ufind_msb", tint, tuint, """ dst = -1; for (int bit = 31; bit > 0; bit--) { if ((src0 >> bit) & 1) { -- cgit v1.2.3 From b6bb3b9bcd8fb03c8d307cafa34bc9ed8a4e2f28 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 09:19:53 -0800 Subject: i965: Move brw_compiler_create() to new brw_compiler.c. A future patch will want to use designated initalizers, which aren't available in C++, but this is C. --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_compiler.c | 157 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_compiler.h | 3 + src/mesa/drivers/dri/i965/brw_shader.cpp | 130 ------------------------ src/mesa/drivers/dri/i965/brw_shader.h | 3 - 5 files changed, 161 insertions(+), 133 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_compiler.c (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index cea1e87ccde..caabb0decfb 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -1,6 +1,7 @@ i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ + brw_compiler.c \ brw_compiler.h \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c new file mode 100644 index 00000000000..6ca59c7392c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -0,0 +1,157 @@ +/* + * Copyright © 2015-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_compiler.h" +#include "brw_context.h" +#include "glsl/nir/nir.h" +#include "main/errors.h" +#include "util/debug.h" + +static void +shader_debug_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + va_list args; + + va_start(args, fmt); + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); + va_end(args); +} + +static void +shader_perf_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + va_list args_copy; + va_copy(args_copy, args); + vfprintf(stderr, fmt, args_copy); + va_end(args_copy); + } + + if (brw->perf_debug) { + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_PERFORMANCE, + MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); + } + va_end(args); +} + +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) +{ + struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); + + compiler->devinfo = devinfo; + compiler->shader_debug_log = shader_debug_log_mesa; + compiler->shader_perf_log = shader_perf_log_mesa; + + brw_fs_alloc_reg_sets(compiler); + brw_vec4_alloc_reg_set(compiler); + + compiler->scalar_stage[MESA_SHADER_VERTEX] = + devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); + compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); + compiler->scalar_stage[MESA_SHADER_GEOMETRY] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); + compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; + compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; + + nir_shader_compiler_options *nir_options = + rzalloc(compiler, nir_shader_compiler_options); + nir_options->native_integers = true; + nir_options->lower_fdiv = true; + /* In order to help allow for better CSE at the NIR level we tell NIR + * to split all ffma instructions during opt_algebraic and we then + * re-combine them as a later step. + */ + nir_options->lower_ffma = true; + nir_options->lower_sub = true; + nir_options->lower_fdiv = true; + nir_options->lower_scmp = true; + nir_options->lower_fmod = true; + nir_options->lower_bitfield_extract = true; + nir_options->lower_bitfield_insert = true; + nir_options->lower_uadd_carry = true; + nir_options->lower_usub_borrow = true; + + /* In the vec4 backend, our dpN instruction replicates its result to all + * the components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + * + * For the FS backend, it should be lowered away by the scalarizing pass so + * we should never see fdot anyway. + */ + nir_options->fdot_replicates = true; + + /* We want the GLSL compiler to emit code that uses condition codes */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; + compiler->glsl_compiler_options[i].MaxIfDepth = + devinfo->gen < 6 ? 16 : UINT_MAX; + + compiler->glsl_compiler_options[i].EmitCondCodes = true; + compiler->glsl_compiler_options[i].EmitNoNoise = true; + compiler->glsl_compiler_options[i].EmitNoMainReturn = true; + compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; + compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; + compiler->glsl_compiler_options[i].LowerClipDistance = true; + + bool is_scalar = compiler->scalar_stage[i]; + + compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; + compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; + compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; + + /* !ARB_gpu_shader5 */ + if (devinfo->gen < 7) + compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; + + compiler->glsl_compiler_options[i].NirOptions = nir_options; + + compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; + } + + compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; + compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; + + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; + + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] + .LowerShaderSharedVariables = true; + + return compiler; +} diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 224ddb14ed1..ec7d8be434b 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -686,6 +686,9 @@ struct brw_gs_prog_data /** @} */ +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + /** * Compile a vertex shader. * diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d92bad25a72..e42601b6f3e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -29,136 +29,6 @@ #include "brw_vec4_tes.h" #include "main/shaderobj.h" #include "main/uniforms.h" -#include "util/debug.h" - -static void -shader_debug_log_mesa(void *data, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - va_list args; - - va_start(args, fmt); - GLuint msg_id = 0; - _mesa_gl_vdebug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); - va_end(args); -} - -static void -shader_perf_log_mesa(void *data, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - - va_list args; - va_start(args, fmt); - - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { - va_list args_copy; - va_copy(args_copy, args); - vfprintf(stderr, fmt, args_copy); - va_end(args_copy); - } - - if (brw->perf_debug) { - GLuint msg_id = 0; - _mesa_gl_vdebug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_PERFORMANCE, - MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); - } - va_end(args); -} - -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) -{ - struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); - - compiler->devinfo = devinfo; - compiler->shader_debug_log = shader_debug_log_mesa; - compiler->shader_perf_log = shader_perf_log_mesa; - - brw_fs_alloc_reg_sets(compiler); - brw_vec4_alloc_reg_set(compiler); - - compiler->scalar_stage[MESA_SHADER_VERTEX] = - devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); - compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; - compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); - compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); - compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; - compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; - - nir_shader_compiler_options *nir_options = - rzalloc(compiler, nir_shader_compiler_options); - nir_options->native_integers = true; - nir_options->lower_fdiv = true; - /* In order to help allow for better CSE at the NIR level we tell NIR - * to split all ffma instructions during opt_algebraic and we then - * re-combine them as a later step. - */ - nir_options->lower_ffma = true; - nir_options->lower_sub = true; - nir_options->lower_fdiv = true; - nir_options->lower_scmp = true; - nir_options->lower_fmod = true; - nir_options->lower_bitfield_extract = true; - nir_options->lower_bitfield_insert = true; - nir_options->lower_uadd_carry = true; - nir_options->lower_usub_borrow = true; - - /* In the vec4 backend, our dpN instruction replicates its result to all - * the components of a vec4. We would like NIR to give us replicated fdot - * instructions because it can optimize better for us. - * - * For the FS backend, it should be lowered away by the scalarizing pass so - * we should never see fdot anyway. - */ - nir_options->fdot_replicates = true; - - /* We want the GLSL compiler to emit code that uses condition codes */ - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; - compiler->glsl_compiler_options[i].MaxIfDepth = - devinfo->gen < 6 ? 16 : UINT_MAX; - - compiler->glsl_compiler_options[i].EmitCondCodes = true; - compiler->glsl_compiler_options[i].EmitNoNoise = true; - compiler->glsl_compiler_options[i].EmitNoMainReturn = true; - compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; - compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; - compiler->glsl_compiler_options[i].LowerClipDistance = true; - - bool is_scalar = compiler->scalar_stage[i]; - - compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; - compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; - compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; - - /* !ARB_gpu_shader5 */ - if (devinfo->gen < 7) - compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; - - compiler->glsl_compiler_options[i].NirOptions = nir_options; - - compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; - } - - compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; - compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; - - if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) - compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; - - compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] - .LowerShaderSharedVariables = true; - - return compiler; -} extern "C" struct gl_shader * brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 593361348fd..82374a46c18 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -259,9 +259,6 @@ struct brw_gs_compile unsigned control_data_header_size_bits; }; -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); - void brw_assign_common_binding_table_offsets(gl_shader_stage stage, const struct brw_device_info *devinfo, -- cgit v1.2.3 From 84166aed927cfa4da8fbf7ece05f8b262192754c Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 09:30:05 -0800 Subject: i965: Make separate nir_options for scalar/vector stages. We'll want to have different lowering options set for scalar/vector stages. --- src/mesa/drivers/dri/i965/brw_compiler.c | 61 +++++++++++++++++--------------- 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 6ca59c7392c..dbd5ad23cfd 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -67,6 +67,37 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) va_end(args); } +#define COMMON_OPTIONS \ + /* In order to help allow for better CSE at the NIR level we tell NIR to \ + * split all ffma instructions during opt_algebraic and we then re-combine \ + * them as a later step. \ + */ \ + .lower_ffma = true, \ + .lower_sub = true, \ + .lower_fdiv = true, \ + .lower_scmp = true, \ + .lower_fmod = true, \ + .lower_bitfield_extract = true, \ + .lower_bitfield_insert = true, \ + .lower_uadd_carry = true, \ + .lower_usub_borrow = true, \ + .lower_fdiv = true, \ + .native_integers = true + +static const struct nir_shader_compiler_options scalar_nir_options = { + COMMON_OPTIONS, +}; + +static const struct nir_shader_compiler_options vector_nir_options = { + COMMON_OPTIONS, + + /* In the vec4 backend, our dpN instruction replicates its result to all the + * components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + */ + .fdot_replicates = true, +}; + struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { @@ -89,33 +120,6 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; - nir_shader_compiler_options *nir_options = - rzalloc(compiler, nir_shader_compiler_options); - nir_options->native_integers = true; - nir_options->lower_fdiv = true; - /* In order to help allow for better CSE at the NIR level we tell NIR - * to split all ffma instructions during opt_algebraic and we then - * re-combine them as a later step. - */ - nir_options->lower_ffma = true; - nir_options->lower_sub = true; - nir_options->lower_fdiv = true; - nir_options->lower_scmp = true; - nir_options->lower_fmod = true; - nir_options->lower_bitfield_extract = true; - nir_options->lower_bitfield_insert = true; - nir_options->lower_uadd_carry = true; - nir_options->lower_usub_borrow = true; - - /* In the vec4 backend, our dpN instruction replicates its result to all - * the components of a vec4. We would like NIR to give us replicated fdot - * instructions because it can optimize better for us. - * - * For the FS backend, it should be lowered away by the scalarizing pass so - * we should never see fdot anyway. - */ - nir_options->fdot_replicates = true; - /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; @@ -139,7 +143,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) if (devinfo->gen < 7) compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; - compiler->glsl_compiler_options[i].NirOptions = nir_options; + compiler->glsl_compiler_options[i].NirOptions = + is_scalar ? &scalar_nir_options : &vector_nir_options; compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } -- cgit v1.2.3 From 5eb1145434b3c1bd4bf7260dc3421e0159452d82 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 15:46:47 -0800 Subject: nir: Add lowering of nir_op_unpack_half_2x16. --- src/glsl/nir/nir.h | 3 +++ src/glsl/nir/nir_lower_alu_to_scalar.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index e2bd2bfa025..11130304170 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1468,6 +1468,9 @@ typedef struct nir_shader_compiler_options { /** lowers ffract to fsub+ffloor: */ bool lower_ffract; + bool lower_pack_half_2x16; + bool lower_unpack_half_2x16; + /** * Does the driver support real 32-bit integers? (Otherwise, integers * are simulated by floats.) diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c index 0a27e66cf0f..5372fbeed88 100644 --- a/src/glsl/nir/nir_lower_alu_to_scalar.c +++ b/src/glsl/nir/nir_lower_alu_to_scalar.c @@ -97,6 +97,20 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) */ return; + case nir_op_pack_half_2x16: + if (!b->shader->options->lower_pack_half_2x16) + return; + + nir_ssa_def *val = + nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa, + instr->src[0].swizzle[0]), + nir_channel(b, instr->src[0].src.ssa, + instr->src[0].swizzle[1])); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + return; + case nir_op_unpack_unorm_4x8: case nir_op_unpack_snorm_4x8: case nir_op_unpack_unorm_2x16: @@ -106,11 +120,19 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) */ return; - case nir_op_unpack_half_2x16: - /* We could split this into unpack_half_2x16_split_[xy], but should - * we? - */ + case nir_op_unpack_half_2x16: { + if (!b->shader->options->lower_unpack_half_2x16) + return; + + nir_ssa_def *comps[2]; + comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa); + comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa); + nir_ssa_def *vec = nir_vec(b, comps, 2); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); + nir_instr_remove(&instr->instr); return; + } case nir_op_fdph: { nir_ssa_def *sum[4]; -- cgit v1.2.3 From 24d385f85ca040246d549c835905e868d55ee210 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 15:30:57 -0800 Subject: i965/fs: Switch from GLSL IR to NIR for un/packHalf2x16 lowering. --- src/mesa/drivers/dri/i965/brw_compiler.c | 2 ++ src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 4 ++++ src/mesa/drivers/dri/i965/brw_link.cpp | 12 +----------- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index dbd5ad23cfd..21fff1ddf4f 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -86,6 +86,8 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) static const struct nir_shader_compiler_options scalar_nir_options = { COMMON_OPTIONS, + .lower_pack_half_2x16 = true, + .lower_unpack_half_2x16 = true, }; static const struct nir_shader_compiler_options vector_nir_options = { diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 21f0b703d00..566257cf79a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -72,6 +72,9 @@ channel_expressions_predicate(ir_instruction *ir) return false; switch (expr->operation) { + case ir_unop_pack_half_2x16: + return false; + /* these opcodes need to act on the whole vector, * just like texturing. */ @@ -162,6 +165,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) return visit_continue; switch (expr->operation) { + case ir_unop_pack_half_2x16: case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 234afd554df..8f2d7600146 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -87,17 +87,7 @@ brw_lower_packing_builtins(struct brw_context *brw, | LOWER_PACK_SNORM_4x8; } - if (brw->gen >= 7) { - /* Gen7 introduced the f32to16 and f16to32 instructions, which can be - * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no - * lowering is needed. For SOA code, the Half2x16 ops must be - * scalarized. - */ - if (compiler->scalar_stage[shader_type]) { - ops |= LOWER_PACK_HALF_2x16_TO_SPLIT - | LOWER_UNPACK_HALF_2x16_TO_SPLIT; - } - } else { + if (brw->gen < 7) { ops |= LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16; } -- cgit v1.2.3 From 26b2cc6f3a23d9e9047c9735d20e3fbcf2291ac2 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 11:46:22 -0800 Subject: glsl: Remove 2x16 half-precision pack/unpack opcodes. i965/fs was the only consumer, and we're now doing the lowering in NIR. --- src/glsl/ir.cpp | 9 -- src/glsl/ir.h | 19 ---- src/glsl/ir_optimization.h | 15 ++- src/glsl/ir_validate.cpp | 12 --- src/glsl/lower_packing_builtins.cpp | 105 +-------------------- src/glsl/nir/glsl_to_nir.cpp | 9 -- .../dri/i965/brw_fs_channel_expressions.cpp | 3 - src/mesa/program/ir_to_mesa.cpp | 3 - src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 - 9 files changed, 8 insertions(+), 170 deletions(-) (limited to 'src') diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index b424edd8e96..db1947453ea 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -298,8 +298,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) break; case ir_unop_noise: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: this->type = glsl_type::float_type; break; @@ -422,10 +420,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) this->type = op0->type->get_base_type(); break; - case ir_binop_pack_half_2x16_split: - this->type = glsl_type::uint_type; - break; - case ir_binop_imul_high: case ir_binop_carry: case ir_binop_borrow: @@ -555,8 +549,6 @@ static const char *const operator_strs[] = { "unpackUnorm2x16", "unpackUnorm4x8", "unpackHalf2x16", - "unpackHalf2x16_split_x", - "unpackHalf2x16_split_y", "bitfield_reverse", "bit_count", "find_msb", @@ -599,7 +591,6 @@ static const char *const operator_strs[] = { "min", "max", "pow", - "packHalf2x16_split", "ubo_load", "ldexp", "vector_extract", diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 5b845c6e856..b453187c32a 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1401,16 +1401,6 @@ enum ir_expression_operation { ir_unop_unpack_half_2x16, /*@}*/ - /** - * \name Lowered floating point unpacking operations. - * - * \see lower_packing_builtins_visitor::split_unpack_half_2x16 - */ - /*@{*/ - ir_unop_unpack_half_2x16_split_x, - ir_unop_unpack_half_2x16_split_y, - /*@}*/ - /** * \name Bit operations, part of ARB_gpu_shader5. */ @@ -1541,15 +1531,6 @@ enum ir_expression_operation { ir_binop_pow, - /** - * \name Lowered floating point packing operations. - * - * \see lower_packing_builtins_visitor::split_pack_half_2x16 - */ - /*@{*/ - ir_binop_pack_half_2x16_split, - /*@}*/ - /** * Load a value the size of a given GLSL type from a uniform block. * diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index be86f547f77..b56413a1500 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -58,17 +58,14 @@ enum lower_packing_builtins_op { LOWER_PACK_HALF_2x16 = 0x0010, LOWER_UNPACK_HALF_2x16 = 0x0020, - LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, - LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + LOWER_PACK_SNORM_4x8 = 0x0040, + LOWER_UNPACK_SNORM_4x8 = 0x0080, - LOWER_PACK_SNORM_4x8 = 0x0100, - LOWER_UNPACK_SNORM_4x8 = 0x0200, + LOWER_PACK_UNORM_4x8 = 0x0100, + LOWER_UNPACK_UNORM_4x8 = 0x0200, - LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800, - - LOWER_PACK_USE_BFI = 0x1000, - LOWER_PACK_USE_BFE = 0x2000, + LOWER_PACK_USE_BFI = 0x0400, + LOWER_PACK_USE_BFE = 0x0800, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 94814799b9b..12928836597 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -372,12 +372,6 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == glsl_type::uint_type); break; - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - assert(ir->type == glsl_type::float_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - case ir_unop_unpack_double_2x32: assert(ir->type == glsl_type::uvec2_type); assert(ir->operands[0]->type == glsl_type::double_type); @@ -567,12 +561,6 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == ir->operands[1]->type); break; - case ir_binop_pack_half_2x16_split: - assert(ir->type == glsl_type::uint_type); - assert(ir->operands[0]->type == glsl_type::float_type); - assert(ir->operands[1]->type == glsl_type::float_type); - break; - case ir_binop_ubo_load: assert(ir->operands[0]->type == glsl_type::uint_type); diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 7f18238bc6e..a41627bd561 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -43,13 +43,6 @@ public: : op_mask(op_mask), progress(false) { - /* Mutually exclusive options. */ - assert(!((op_mask & LOWER_PACK_HALF_2x16) && - (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); - - assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && - (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); - factory.instructions = &factory_instructions; } @@ -96,9 +89,6 @@ public: case LOWER_PACK_HALF_2x16: *rvalue = lower_pack_half_2x16(op0); break; - case LOWER_PACK_HALF_2x16_TO_SPLIT: - *rvalue = split_pack_half_2x16(op0); - break; case LOWER_UNPACK_SNORM_2x16: *rvalue = lower_unpack_snorm_2x16(op0); break; @@ -114,9 +104,6 @@ public: case LOWER_UNPACK_HALF_2x16: *rvalue = lower_unpack_half_2x16(op0); break; - case LOWER_UNPACK_HALF_2x16_TO_SPLIT: - *rvalue = split_unpack_half_2x16(op0); - break; case LOWER_PACK_UNPACK_NONE: case LOWER_PACK_USE_BFI: case LOWER_PACK_USE_BFE: @@ -161,7 +148,7 @@ private: result = op_mask & LOWER_PACK_UNORM_4x8; break; case ir_unop_pack_half_2x16: - result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); + result = op_mask & LOWER_PACK_HALF_2x16; break; case ir_unop_unpack_snorm_2x16: result = op_mask & LOWER_UNPACK_SNORM_2x16; @@ -176,7 +163,7 @@ private: result = op_mask & LOWER_UNPACK_UNORM_4x8; break; case ir_unop_unpack_half_2x16: - result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); + result = op_mask & LOWER_UNPACK_HALF_2x16; break; default: result = LOWER_PACK_UNPACK_NONE; @@ -1092,41 +1079,6 @@ private: return result; } - /** - * \brief Split packHalf2x16's vec2 operand into two floats. - * - * \param vec2_rval is packHalf2x16's input - * \return a uint rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, packHalf2x16 cannot be scalarized by the same mechanism as - * a true vector operation because its input and output have a differing - * number of vector components. - * - * This method scalarizes packHalf2x16 by transforming it from an unary - * operation having vector input to a binary operation having scalar input. - * That is, it transforms - * - * packHalf2x16(VEC2_RVAL); - * - * into - * - * vec2 v = VEC2_RVAL; - * return packHalf2x16_split(v.x, v.y); - */ - ir_rvalue* - split_pack_half_2x16(ir_rvalue *vec2_rval) - { - assert(vec2_rval->type == glsl_type::vec2_type); - - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_pack_half_2x16_v"); - factory.emit(assign(v, vec2_rval)); - - return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); - } - /** * \brief Lower the component-wise calculation of unpackHalf2x16. * @@ -1341,59 +1293,6 @@ private: assert(result->type == glsl_type::vec2_type); return result; } - - /** - * \brief Split unpackHalf2x16 into two operations. - * - * \param uint_rval is unpackHalf2x16's input - * \return a vec2 rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, unpackHalf2x16 cannot be scalarized by the same method as - * a true vector operation because the number of components of its input - * and output differ. - * - * This method scalarizes unpackHalf2x16 by transforming it from a single - * operation having vec2 output to a pair of operations each having float - * output. That is, it transforms - * - * unpackHalf2x16(UINT_RVAL) - * - * into - * - * uint u = UINT_RVAL; - * vec2 v; - * - * v.x = unpackHalf2x16_split_x(u); - * v.y = unpackHalf2x16_split_y(u); - * - * return v; - */ - ir_rvalue* - split_unpack_half_2x16(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = uint_rval; */ - ir_variable *u = factory.make_temp(glsl_type::uint_type, - "tmp_split_unpack_half_2x16_u"); - factory.emit(assign(u, uint_rval)); - - /* vec2 v; */ - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_unpack_half_2x16_v"); - - /* v.x = unpack_half_2x16_split_x(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), - WRITEMASK_X)); - - /* v.y = unpack_half_2x16_split_y(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), - WRITEMASK_Y)); - - return deref(v).val; - } }; } // namespace anonymous diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index c7399ebba0b..bec59c79fde 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -1429,12 +1429,6 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_unpack_half_2x16: result = nir_unpack_half_2x16(&b, srcs[0]); break; - case ir_unop_unpack_half_2x16_split_x: - result = nir_unpack_half_2x16_split_x(&b, srcs[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - result = nir_unpack_half_2x16_split_y(&b, srcs[0]); - break; case ir_unop_bitfield_reverse: result = nir_bitfield_reverse(&b, srcs[0]); break; @@ -1718,9 +1712,6 @@ nir_visitor::visit(ir_expression *ir) } break; - case ir_binop_pack_half_2x16_split: - result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]); - break; case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; case ir_triop_fma: result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 566257cf79a..b16dd2ffd9e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -403,9 +403,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_ssbo_unsized_array_length: unreachable("should have been lowered"); - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - case ir_binop_pack_half_2x16_split: case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 9cde28dfc0a..6b2d431af07 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1244,10 +1244,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: case ir_unop_unpack_double_2x32: - case ir_binop_pack_half_2x16_split: case ir_unop_bitfield_reverse: case ir_unop_bit_count: case ir_unop_find_msb: diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d424e3b335f..a06683f31c8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2177,12 +2177,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_4x8: - case ir_binop_pack_half_2x16_split: case ir_quadop_vector: case ir_binop_vector_extract: case ir_triop_vector_insert: -- cgit v1.2.3 From 292031a1a520d6ddc34ef6b96c262d1f9e335410 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 25 Jan 2016 11:24:19 -0800 Subject: anv: Disable fs dispatch for depth/stencil only pipelines Fixes most renderpass bugs. --- src/vulkan/anv_meta_clear.c | 14 +-- src/vulkan/anv_pipeline.c | 1 + src/vulkan/gen8_pipeline.c | 258 +++++++++++++++++++++++--------------------- 3 files changed, 138 insertions(+), 135 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 6ba27b97fe2..470b13480d8 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -138,7 +138,7 @@ create_pipeline(struct anv_device *device, VK_NULL_HANDLE, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, + .stageCount = fs_nir ? 2 : 1, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -430,17 +430,13 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, static void -build_depthstencil_shaders(struct nir_shader **out_vs, - struct nir_shader **out_fs) +build_depthstencil_shader(struct nir_shader **out_vs) { nir_builder vs_b; - nir_builder fs_b; nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); - nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); - fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs"); const struct glsl_type *position_type = glsl_vec4_type(); @@ -457,7 +453,6 @@ build_depthstencil_shaders(struct nir_shader **out_vs, nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); *out_vs = vs_b.shader; - *out_fs = fs_b.shader; } static VkResult @@ -466,9 +461,8 @@ create_depthstencil_pipeline(struct anv_device *device, struct anv_pipeline **pipeline) { struct nir_shader *vs_nir; - struct nir_shader *fs_nir; - build_depthstencil_shaders(&vs_nir, &fs_nir); + build_depthstencil_shader(&vs_nir); const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -522,7 +516,7 @@ create_depthstencil_pipeline(struct anv_device *device, .pAttachments = NULL, }; - return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, + return create_pipeline(device, vs_nir, NULL, &vi_state, &ds_state, &cb_state, &device->meta_state.alloc, /*use_repclear*/ true, pipeline); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index d66987f1a8c..f52b78628cc 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1091,6 +1091,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; pipeline->active_stages = 0; pipeline->total_scratch = 0; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 2be71a05af8..b23bb4b8895 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -475,142 +475,150 @@ genX(graphics_pipeline_create)( const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - /* TODO: We should clean this up. Among other things, this is mostly - * shared with other gens. - */ - const struct brw_vue_map *fs_input_map; - if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &vue_prog_data->vue_map; - else - fs_input_map = &gs_prog_data->base.vue_map; - - struct GENX(3DSTATE_SBE_SWIZ) swiz = { - GENX(3DSTATE_SBE_SWIZ_header), - }; + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = false); + } else { + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; - int max_source_attr = 0; - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = wm_prog_data->urb_setup[attr]; - - if (input_index < 0) - continue; - - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); - - if (input_index >= 16) - continue; - - if (source_attr == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by the - * vertex shader or it's gl_PrimitiveID. In the first case the value - * is undefined, in the second it needs to be gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - } else { - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } } - } - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .AttributeSwizzleEnable = true, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2), - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), + .AttributeSwizzleEnable = true, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = + DIV_ROUND_UP(max_source_attr + 1, 2), + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, #if ANV_GEN >= 9 - .Attribute0ActiveComponentFormat = ACF_XYZW, - .Attribute1ActiveComponentFormat = ACF_XYZW, - .Attribute2ActiveComponentFormat = ACF_XYZW, - .Attribute3ActiveComponentFormat = ACF_XYZW, - .Attribute4ActiveComponentFormat = ACF_XYZW, - .Attribute5ActiveComponentFormat = ACF_XYZW, - .Attribute6ActiveComponentFormat = ACF_XYZW, - .Attribute7ActiveComponentFormat = ACF_XYZW, - .Attribute8ActiveComponentFormat = ACF_XYZW, - .Attribute9ActiveComponentFormat = ACF_XYZW, - .Attribute10ActiveComponentFormat = ACF_XYZW, - .Attribute11ActiveComponentFormat = ACF_XYZW, - .Attribute12ActiveComponentFormat = ACF_XYZW, - .Attribute13ActiveComponentFormat = ACF_XYZW, - .Attribute14ActiveComponentFormat = ACF_XYZW, - .Attribute15ActiveComponentFormat = ACF_XYZW, - /* wow, much field, very attribute */ - .Attribute16ActiveComponentFormat = ACF_XYZW, - .Attribute17ActiveComponentFormat = ACF_XYZW, - .Attribute18ActiveComponentFormat = ACF_XYZW, - .Attribute19ActiveComponentFormat = ACF_XYZW, - .Attribute20ActiveComponentFormat = ACF_XYZW, - .Attribute21ActiveComponentFormat = ACF_XYZW, - .Attribute22ActiveComponentFormat = ACF_XYZW, - .Attribute23ActiveComponentFormat = ACF_XYZW, - .Attribute24ActiveComponentFormat = ACF_XYZW, - .Attribute25ActiveComponentFormat = ACF_XYZW, - .Attribute26ActiveComponentFormat = ACF_XYZW, - .Attribute27ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute30ActiveComponentFormat = ACF_XYZW, + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, #endif - ); + ); - uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GENX(3DSTATE_SBE_SWIZ_length)); - GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); - const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), - - .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - bool per_sample_ps = false; - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps, #if ANV_GEN >= 9 - .PixelShaderPullsBary = wm_prog_data->pulls_bary, - .InputCoverageMaskState = ICMS_NONE + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = ICMS_NONE #endif - ); + ); + } *pPipeline = anv_pipeline_to_handle(pipeline); -- cgit v1.2.3 From c21de2bf04c774870f5e58eb160794ca83d96346 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 25 Jan 2016 11:34:32 -0800 Subject: anv: Don't use uninitialized barycentric_interp_modes If we don't have a fragment shader, wm_prog_data in undefined. --- src/vulkan/gen8_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index b23bb4b8895..314a9589e75 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -343,7 +343,8 @@ genX(graphics_pipeline_create)( .ForceThreadDispatchEnable = NORMAL, .PointRasterizationRule = RASTRULE_UPPER_RIGHT, .BarycentricInterpolationMode = - pipeline->wm_prog_data.barycentric_interp_modes); + pipeline->ps_ksp0 == NO_KERNEL ? + 0 : pipeline->wm_prog_data.barycentric_interp_modes); uint32_t samples = 1; uint32_t log2_samples = __builtin_ffs(samples) - 1; -- cgit v1.2.3 From 2c94f659e8453bb584e1d50b188d4199fe7b8194 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 30 Dec 2015 16:00:47 -0800 Subject: anv/meta: Fix CopyBuffer when size matches HW limit Perform a copy when the copy_size matches the HW limit (max_copy_size). Otherwise the current behavior is that we fail the following assertion: assert(height < max_surface_dim); because the values are equal. --- src/vulkan/anv_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 483469348c8..351af219e7f 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -846,7 +846,7 @@ void anv_CmdCopyBuffer( /* First, we make a bunch of max-sized copies */ uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size > max_copy_size) { + while (copy_size >= max_copy_size) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, max_surface_dim, max_surface_dim, copy_format); -- cgit v1.2.3 From 26f0444ead45bd9d5f7fbbca6292b284f382ecd6 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 21 Jan 2016 09:09:29 -0800 Subject: nir: Add opcodes to extract bytes or words. The uint versions zero extend while the int versions sign extend. --- src/glsl/nir/nir.h | 3 +++ src/glsl/nir/nir_opcodes.py | 9 +++++++++ src/glsl/nir/nir_opt_algebraic.py | 16 ++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 11130304170..7b39cbb4c12 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1471,6 +1471,9 @@ typedef struct nir_shader_compiler_options { bool lower_pack_half_2x16; bool lower_unpack_half_2x16; + bool lower_extract_byte; + bool lower_extract_word; + /** * Does the driver support real 32-bit integers? (Otherwise, integers * are simulated by floats.) diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index a8bbe1a0b82..be3cd17193f 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -536,6 +536,15 @@ dst.x = src0.x; dst.y = src1.x; """) +# Byte extraction +binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))") +binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))") + +# Word extraction +binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))") +binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))") + + def triop(name, ty, const_expr): opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 7745b76f7ce..b761b54cd70 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -242,6 +242,22 @@ optimizations = [ ('bcsel', ('ult', 31, 'bits'), 'value', ('ubfe', 'value', 'offset', 'bits')), 'options->lower_bitfield_extract'), + + (('extract_ibyte', a, b), + ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8), + 'options->lower_extract_byte'), + + (('extract_ubyte', a, b), + ('iand', ('ushr', a, ('imul', b, 8)), 0xff), + 'options->lower_extract_byte'), + + (('extract_iword', a, b), + ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16), + 'options->lower_extract_word'), + + (('extract_uword', a, b), + ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), + 'options->lower_extract_word'), ] # Add optimizations to handle the case where the result of a ternary is -- cgit v1.2.3 From 6c1b3bc950d480a21d4957b5b0cab84ffc49769b Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 20 Jan 2016 18:56:37 -0800 Subject: i965/fs: Implement support for extract_word. The vec4 backend will lower it. --- src/mesa/drivers/dri/i965/brw_defines.h | 12 ++++++++++++ src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 22 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 ++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++ 5 files changed, 56 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 10a6d39db85..01e0c99e440 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1085,6 +1085,18 @@ enum opcode { */ SHADER_OPCODE_BROADCAST, + /** + * Pick the byte from its first source register given by the index + * specified as second source. + */ + SHADER_OPCODE_EXTRACT_BYTE, + + /** + * Pick the word from its first source register given by the index + * specified as second source. + */ + SHADER_OPCODE_EXTRACT_WORD, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 3b65a382dc8..cde6566c05c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -78,6 +78,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_EXTRACT_BYTE: + case SHADER_OPCODE_EXTRACT_WORD: case SHADER_OPCODE_MOV_INDIRECT: return true; case SHADER_OPCODE_RCP: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e05622ae7a8..1916a995020 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2201,6 +2201,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_broadcast(p, dst, src[0], src[1]); break; + case SHADER_OPCODE_EXTRACT_BYTE: { + assert(src[0].type == BRW_REGISTER_TYPE_D || + src[0].type == BRW_REGISTER_TYPE_UD); + + enum brw_reg_type type = + src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_B + : BRW_REGISTER_TYPE_UB; + brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 4)); + break; + } + + case SHADER_OPCODE_EXTRACT_WORD: { + assert(src[0].type == BRW_REGISTER_TYPE_D || + src[0].type == BRW_REGISTER_TYPE_UD); + + enum brw_reg_type type = + src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_W + : BRW_REGISTER_TYPE_UW; + brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 2)); + break; + } + case FS_OPCODE_SET_SAMPLE_ID: generate_set_sample_id(inst, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index d7bcc1c5374..3efee50b273 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1075,6 +1075,22 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->predicate = BRW_PREDICATE_NORMAL; break; + case nir_op_extract_ubyte: + case nir_op_extract_ibyte: { + nir_const_value *byte = nir_src_as_const_value(instr->src[1].src); + bld.emit(SHADER_OPCODE_EXTRACT_BYTE, + result, op[0], brw_imm_ud(byte->u[0])); + break; + } + + case nir_op_extract_uword: + case nir_op_extract_iword: { + nir_const_value *word = nir_src_as_const_value(instr->src[1].src); + bld.emit(SHADER_OPCODE_EXTRACT_WORD, + result, op[0], brw_imm_ud(word->u[0])); + break; + } + default: unreachable("unhandled instruction"); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index e42601b6f3e..6a6efa9aea2 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -312,6 +312,10 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_BROADCAST: return "broadcast"; + case SHADER_OPCODE_EXTRACT_BYTE: + return "extract_byte"; + case SHADER_OPCODE_EXTRACT_WORD: + return "extract_word"; case VEC4_OPCODE_MOV_BYTES: return "mov_bytes"; case VEC4_OPCODE_PACK_BYTES: -- cgit v1.2.3 From d7781038f51ee569aacc07bfd17b70dad318f043 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 25 Jan 2016 11:05:52 -0800 Subject: nir: Add lowering support for packing opcodes. --- src/glsl/nir/nir.h | 4 ++++ src/glsl/nir/nir_lower_alu_to_scalar.c | 32 ++++++++++++++++++++++++++++++++ src/glsl/nir/nir_opcodes.py | 10 ++++++++++ src/glsl/nir/nir_opt_algebraic.py | 20 ++++++++++++++++++++ 4 files changed, 66 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 7b39cbb4c12..bbd5b1af64e 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1469,6 +1469,10 @@ typedef struct nir_shader_compiler_options { bool lower_ffract; bool lower_pack_half_2x16; + bool lower_pack_unorm_2x16; + bool lower_pack_snorm_2x16; + bool lower_pack_unorm_4x8; + bool lower_pack_snorm_4x8; bool lower_unpack_half_2x16; bool lower_extract_byte; diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c index 5372fbeed88..37cb0221e0b 100644 --- a/src/glsl/nir/nir_lower_alu_to_scalar.c +++ b/src/glsl/nir/nir_lower_alu_to_scalar.c @@ -134,6 +134,38 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) return; } + case nir_op_pack_uvec2_to_uint: { + assert(b->shader->options->lower_pack_snorm_2x16 || + b->shader->options->lower_pack_unorm_2x16); + + nir_ssa_def *word = + nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_ssa_def *val = + nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), + nir_channel(b, word, 0)); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + break; + } + + case nir_op_pack_uvec4_to_uint: { + assert(b->shader->options->lower_pack_snorm_4x8 || + b->shader->options->lower_pack_unorm_4x8); + + nir_ssa_def *byte = + nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_ssa_def *val = + nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)), + nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))), + nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)), + nir_channel(b, byte, 0))); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + break; + } + case nir_op_fdph: { nir_ssa_def *sum[4]; for (unsigned i = 0; i < 3; i++) { diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index be3cd17193f..3b82c3ce1f9 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -237,6 +237,16 @@ unpack_2x16("unorm") unpack_4x8("unorm") unpack_2x16("half") +unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """ +dst = (src0.x & 0xffff) | (src0.y >> 16); +""") + +unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """ +dst = (src0.x << 0) | + (src0.y << 8) | + (src0.z << 16) | + (src0.w << 24); +""") # Lowered floating point unpacking operations. diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index b761b54cd70..56b0f5ea7b2 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -258,6 +258,26 @@ optimizations = [ (('extract_uword', a, b), ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), 'options->lower_extract_word'), + + (('pack_unorm_2x16', 'v'), + ('pack_uvec2_to_uint', + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), + 'options->lower_pack_unorm_2x16'), + + (('pack_unorm_4x8', 'v'), + ('pack_uvec4_to_uint', + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), + 'options->lower_pack_unorm_4x8'), + + (('pack_snorm_2x16', 'v'), + ('pack_uvec2_to_uint', + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), + 'options->lower_pack_snorm_2x16'), + + (('pack_snorm_4x8', 'v'), + ('pack_uvec4_to_uint', + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), + 'options->lower_pack_snorm_4x8'), ] # Add optimizations to handle the case where the result of a ternary is -- cgit v1.2.3 From 8bb22dc3518b86ed2e0194c127f0438a0c073018 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 25 Jan 2016 11:07:02 -0800 Subject: nir: Add lowering support for unpacking opcodes. --- src/glsl/nir/nir.h | 4 ++++ src/glsl/nir/nir_opt_algebraic.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index bbd5b1af64e..3b90b5129f1 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1474,6 +1474,10 @@ typedef struct nir_shader_compiler_options { bool lower_pack_unorm_4x8; bool lower_pack_snorm_4x8; bool lower_unpack_half_2x16; + bool lower_unpack_unorm_2x16; + bool lower_unpack_snorm_2x16; + bool lower_unpack_unorm_4x8; + bool lower_unpack_snorm_4x8; bool lower_extract_byte; bool lower_extract_word; diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 56b0f5ea7b2..a0d6c074682 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -278,6 +278,34 @@ optimizations = [ ('pack_uvec4_to_uint', ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), 'options->lower_pack_snorm_4x8'), + + (('unpack_unorm_2x16', 'v'), + ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0), + ('extract_uword', 'v', 1), 0, 0)), + 65535.0), + 'options->lower_unpack_unorm_2x16'), + + (('unpack_unorm_4x8', 'v'), + ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0), + ('extract_ubyte', 'v', 1), + ('extract_ubyte', 'v', 2), + ('extract_ubyte', 'v', 3))), + 255.0), + 'options->lower_unpack_unorm_4x8'), + + (('unpack_snorm_2x16', 'v'), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0), + ('extract_iword', 'v', 1), 0, 0)), + 32767.0))), + 'options->lower_unpack_snorm_2x16'), + + (('unpack_snorm_4x8', 'v'), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0), + ('extract_ibyte', 'v', 1), + ('extract_ibyte', 'v', 2), + ('extract_ibyte', 'v', 3))), + 127.0))), + 'options->lower_unpack_snorm_4x8'), ] # Add optimizations to handle the case where the result of a ternary is -- cgit v1.2.3 From 5deba3f00a9614e9ab60a40c737e87dc9f5a5a43 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 25 Jan 2016 10:49:15 -0800 Subject: i965/vec4: Implement nir_op_pack_uvec2_to_uint. And mark nir_op_pack_uvec4_to_uint unreachable, since it's only produced by lowering pack[SU]norm4x8 which the vec4 backend does not need. --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 1b87e3044c2..d3ac7ab61f7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1325,6 +1325,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_pack_unorm_2x16: unreachable("not reached: should be handled by lower_packing_builtins"); + case nir_op_pack_uvec4_to_uint: + unreachable("not reached"); + + case nir_op_pack_uvec2_to_uint: { + dst_reg tmp1 = dst_reg(this, glsl_type::uint_type); + tmp1.writemask = WRITEMASK_X; + op[0].swizzle = BRW_SWIZZLE_YYYY; + emit(SHL(tmp1, op[0], src_reg(brw_imm_ud(16u)))); + + dst_reg tmp2 = dst_reg(this, glsl_type::uint_type); + tmp2.writemask = WRITEMASK_X; + op[0].swizzle = BRW_SWIZZLE_XXXX; + emit(AND(tmp2, op[0], src_reg(brw_imm_ud(0xffffu)))); + + emit(OR(dst, src_reg(tmp1), src_reg(tmp2))); + break; + } + case nir_op_unpack_half_2x16: /* As NIR does not guarantee that we have a correct swizzle outside the * boundaries of a vector, and the implementation of emit_unpack_half_2x16 -- cgit v1.2.3 From 874ede498334c9ec39383be9b24c2c368dcb349e Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 25 Jan 2016 11:07:28 -0800 Subject: i965/gen7+: Use NIR for lowering of pack/unpack opcodes. --- src/mesa/drivers/dri/i965/brw_compiler.c | 15 +++++++++++++ .../dri/i965/brw_fs_channel_expressions.cpp | 8 +++++++ src/mesa/drivers/dri/i965/brw_link.cpp | 25 ++++++---------------- 3 files changed, 29 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 21fff1ddf4f..f9e22d1d6b5 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -87,7 +87,15 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) static const struct nir_shader_compiler_options scalar_nir_options = { COMMON_OPTIONS, .lower_pack_half_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_unorm_4x8 = true, .lower_unpack_half_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, }; static const struct nir_shader_compiler_options vector_nir_options = { @@ -98,6 +106,13 @@ static const struct nir_shader_compiler_options vector_nir_options = { * instructions because it can optimize better for us. */ .fdot_replicates = true, + + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_extract_byte = true, + .lower_extract_word = true, }; struct brw_compiler * diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index b16dd2ffd9e..cbad47ee40a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -73,6 +73,10 @@ channel_expressions_predicate(ir_instruction *ir) switch (expr->operation) { case ir_unop_pack_half_2x16: + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: return false; /* these opcodes need to act on the whole vector, @@ -166,6 +170,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) switch (expr->operation) { case ir_unop_pack_half_2x16: + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 8f2d7600146..ab9d7929c05 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -73,26 +73,13 @@ brw_lower_packing_builtins(struct brw_context *brw, gl_shader_stage shader_type, exec_list *ir) { - const struct brw_compiler *compiler = brw->intelScreen->compiler; - - int ops = LOWER_PACK_SNORM_2x16 - | LOWER_UNPACK_SNORM_2x16 - | LOWER_PACK_UNORM_2x16 - | LOWER_UNPACK_UNORM_2x16; - - if (compiler->scalar_stage[shader_type]) { - ops |= LOWER_UNPACK_UNORM_4x8 - | LOWER_UNPACK_SNORM_4x8 - | LOWER_PACK_UNORM_4x8 - | LOWER_PACK_SNORM_4x8; - } - - if (brw->gen < 7) { - ops |= LOWER_PACK_HALF_2x16 - | LOWER_UNPACK_HALF_2x16; - } + /* Gens < 7 don't have instructions to convert to or from half-precision, + * and Gens < 6 don't expose that functionality. + */ + if (brw->gen != 6) + return; - lower_packing_builtins(ir, ops); + lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16); } static void -- cgit v1.2.3 From 76c096f0e71a0dab3be49ad4d56cec74e3296842 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 25 Jan 2016 15:12:43 -0800 Subject: anv: Remove stale assert This goes back to when we didn't have the subpass number in the command buffer begin info. --- src/vulkan/anv_cmd_buffer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index d2cbf87578a..50ecf000167 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -1108,8 +1108,6 @@ void anv_CmdExecuteCommands( assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); -- cgit v1.2.3 From 8e07f7942e129e838398e94be5cf986f78d94dc3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 25 Jan 2016 15:14:47 -0800 Subject: anv: Remove a few finished finishme --- src/vulkan/anv_image.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index afd01e9d9fb..f8782b6e848 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -652,7 +652,6 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag * what surface the Meta Dragons really want. */ if (image->format->depth_format && image->format->has_stencil) { - anv_finishme("combined depth stencil formats"); return &image->depth_surface; } else if (image->format->depth_format) { return &image->depth_surface; @@ -670,13 +669,17 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag return &image->stencil_surface; case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: if (image->format->depth_format && image->format->has_stencil) { - /* FINISHME: The Vulkan spec (git a511ba2) requires support for combined - * depth stencil formats. Specifically, it states: + /* FINISHME: The Vulkan spec (git a511ba2) requires support for + * combined depth stencil formats. Specifically, it states: * * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. + * + * Image views with both depth and stencil aspects are only valid for + * render target attachments, in which case + * cmd_buffer_emit_depth_stencil() will pick out both the depth and + * stencil surfaces from the underlying surface. */ - anv_finishme("combined depthstencil aspect"); return &image->depth_surface; } else if (image->format->depth_format) { return &image->depth_surface; -- cgit v1.2.3 From 9c0109a1f6e0c9c8dd81fc215f8c0c5976e05790 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 Jan 2016 12:42:01 -0800 Subject: i965/fs: Properly write-mask spills For unspills (scratch reads), we can just set WE_all all the time because we always unspill into a new GRF. For spills, we have two options: If the instruction has a 32-bit-per-channel destination and "normal" regioning, then we just do a regular write and it will interleave channels from different control-flow paths properly. If, on the other hand, the the regioning is non-normal, then we have to unspill, run the instruction, and spill afterwards. In this second case, we need to do the spill with we_ALL. --- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5a7a0eb5f63..c931910136d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -224,7 +224,7 @@ public: void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, - uint32_t spill_offset, int count); + uint32_t spill_offset, int count, bool we_all); void emit_nir_code(); void nir_setup_inputs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 2347cd5d33f..791da0e038e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -751,6 +751,7 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, dst); unspill_inst->offset = spill_offset; unspill_inst->regs_written = reg_size; + unspill_inst->force_writemask_all = true; if (!gen7_read) { unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; @@ -764,11 +765,11 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, void fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, - uint32_t spill_offset, int count) + uint32_t spill_offset, int count, bool we_all) { int reg_size = 1; int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; - if (dispatch_width == 16 && count % 2 == 0) { + if (inst->exec_size == 16 && count % 2 == 0) { spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen); reg_size = 2; } @@ -784,6 +785,8 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; spill_inst->mlen = 1 + reg_size; /* header, value */ spill_inst->base_mrf = spill_base_mrf; + spill_inst->force_writemask_all = we_all; + spill_inst->force_sechalf = inst->force_sechalf; } } @@ -938,12 +941,15 @@ fs_visitor::spill_reg(int spill_reg) * inst->regs_written(), then we need to unspill the destination * since we write back out all of the regs_written(). */ - if (inst->is_partial_write()) + bool need_unspill = inst->is_partial_write() || + type_sz(inst->dst.type) != 4; + if (need_unspill) emit_unspill(block, inst, spill_src, subset_spill_offset, inst->regs_written); emit_spill(block, inst, spill_src, subset_spill_offset, - inst->regs_written); + inst->regs_written, + need_unspill || inst->force_writemask_all); } } -- cgit v1.2.3 From 2434ceabf41e66f2a3627ea8591e5ca427a78cce Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 Jan 2016 15:00:38 -0800 Subject: i965/fs: Feel free to spill partial reads/writes Now that we properly handle write-masking, this should be safe. --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 791da0e038e..8396854fcb1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -808,30 +808,13 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) */ foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == VGRF) { + if (inst->src[i].file == VGRF) spill_costs[inst->src[i].nr] += loop_scale; - - /* Register spilling logic assumes full-width registers; smeared - * registers have a width of 1 so if we try to spill them we'll - * generate invalid assembly. This shouldn't be a problem because - * smeared registers are only used as short-term temporaries when - * loading pull constants, so spilling them is unlikely to reduce - * register pressure anyhow. - */ - if (!inst->src[i].is_contiguous()) { - no_spill[inst->src[i].nr] = true; - } - } } - if (inst->dst.file == VGRF) { + if (inst->dst.file == VGRF) spill_costs[inst->dst.nr] += inst->regs_written * loop_scale; - if (!inst->dst.is_contiguous()) { - no_spill[inst->dst.nr] = true; - } - } - switch (inst->opcode) { case BRW_OPCODE_DO: -- cgit v1.2.3 From 9c69f4632d7ae36efb59f4e7002de28dac08a896 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 25 Jan 2016 15:26:25 -0800 Subject: gen8/state: Apply min/mag filters individually for samplers This fixes tests which apply different min and mag filters, and depend on the min filter to be correct. --- src/vulkan/gen8_state.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index ce142e6fb26..6e4c3eafe11 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -324,9 +324,6 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - uint32_t filter = vk_to_gen_tex_filter(pCreateInfo->magFilter, - pCreateInfo->anisotropyEnable); - uint32_t border_color_offset = device->border_colors.offset + pCreateInfo->borderColor * 64; @@ -338,8 +335,8 @@ VkResult genX(CreateSampler)( .BaseMipLevel = 0.0, #endif .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = filter, - .MinModeFilter = filter, + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), .AnisotropicAlgorithm = EWAApproximation, .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), -- cgit v1.2.3 From 6bbf3814dc937371c2918ea56b18d4c32000b951 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 Jan 2016 15:33:08 -0800 Subject: gen7/state: Apply min/mag filters individually for samplers This fixes tests which apply different min and mag filters, and depend on the min filter to be correct. --- src/vulkan/gen7_state.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 55cff90a723..b3abe74090d 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -81,17 +81,16 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - uint32_t filter = vk_to_gen_tex_filter(pCreateInfo->magFilter, - pCreateInfo->anisotropyEnable); - struct GEN7_SAMPLER_STATE sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampEnable = OGL, .BaseMipLevel = 0.0, .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = filter, - .MinModeFilter = filter, + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), .TextureLODBias = pCreateInfo->mipLodBias * 256, .AnisotropicAlgorithm = EWAApproximation, .MinLOD = pCreateInfo->minLod, -- cgit v1.2.3 From 6b6a8a99f8ef5fbc7df19dafa574c49720245607 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 Jan 2016 15:52:53 -0800 Subject: HACK/i965: Default to scalar GS on BDW+ --- src/mesa/drivers/dri/i965/brw_compiler.c | 2 +- src/vulkan/anv_device.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index f9e22d1d6b5..682e61b7d63 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -133,7 +133,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index ed9bb0852c8..f9aa3149207 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -138,10 +138,6 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; - /* Default to use scalar GS on BDW+ */ - device->compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - device->info->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); - /* XXX: Actually detect bit6 swizzling */ isl_device_init(&device->isl_dev, device->info, swizzled); -- cgit v1.2.3 From 725fb3623fc3b377f726101e0116a0d35f9090d4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 25 Jan 2016 16:10:12 -0800 Subject: i965/compiler: Set nir_options.vertex_id_zero_based --- src/mesa/drivers/dri/i965/brw_compiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 682e61b7d63..3d93772c691 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -82,7 +82,8 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) .lower_uadd_carry = true, \ .lower_usub_borrow = true, \ .lower_fdiv = true, \ - .native_integers = true + .native_integers = true, \ + .vertex_id_zero_based = true static const struct nir_shader_compiler_options scalar_nir_options = { COMMON_OPTIONS, -- cgit v1.2.3 From df5f6d824bb566ba19854af7a1134c2fb95edf17 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 26 Jan 2016 11:01:24 -0800 Subject: anv/meta: Fix sample mask in clear pipelines Once we begin emitting the correct sample mask, genX_3DSTATE_SAMPLE_MASK_pack will hit an assertion if the mask contains too many bits. --- src/vulkan/anv_meta_clear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 470b13480d8..ac369e9f9be 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -178,7 +178,7 @@ create_pipeline(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .rasterizationSamples = 1, /* FINISHME: Multisampling */ .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + .pSampleMask = (VkSampleMask[]) { 0x1 }, .alphaToCoverageEnable = false, .alphaToOneEnable = false, }, -- cgit v1.2.3 From 725d969753661735c82fdf7ff01662fd44616033 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 26 Jan 2016 14:41:06 -0800 Subject: anv: Reemit STATE_BASE_ADDRESS after second level cmd buffers Otherwise the primary batch will continue using the state base addresses set by the secondary. Fixes remaining renderpass tests. --- src/vulkan/anv_batch_chain.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index ee6e39d3a75..e9bd67c9442 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -721,6 +721,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, switch (secondary->exec_mode) { case ANV_CMD_BUFFER_EXEC_MODE_EMIT: anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); break; case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { struct anv_batch_bo *first_bbo = @@ -761,6 +762,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, } } + anv_cmd_buffer_emit_state_base_address(primary); break; } case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { -- cgit v1.2.3 From 074a7c7d7ced001a82db4b51e1a7a4d2cac74f70 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 26 Jan 2016 14:43:07 -0800 Subject: anv: Dirty fragment shader descriptors in meta restore We need to reemit render targets, so dirtying VK_SHADER_STAGE_VERTEX_BIT doesn't help us much. --- src/vulkan/anv_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 351af219e7f..7034f70d6ac 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -146,7 +146,7 @@ anv_meta_restore(const struct anv_meta_saved_state *state, cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, state->dynamic_mask); -- cgit v1.2.3 From fe6ccb6031578afd81cfef0aaf661ce3913850fe Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 26 Jan 2016 14:53:00 -0800 Subject: anv: Remove long unused anv_aub.h --- src/vulkan/anv_aub.h | 153 --------------------------------------------------- 1 file changed, 153 deletions(-) delete mode 100644 src/vulkan/anv_aub.h (limited to 'src') diff --git a/src/vulkan/anv_aub.h b/src/vulkan/anv_aub.h deleted file mode 100644 index 7a67712ff9c..00000000000 --- a/src/vulkan/anv_aub.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** @file intel_aub.h - * - * The AUB file is a file format used by Intel's internal simulation - * and other validation tools. It can be used at various levels by a - * driver to input state to the simulated hardware or a replaying - * debugger. - * - * We choose to dump AUB files using the trace block format for ease - * of implementation -- dump out the blocks of memory as plain blobs - * and insert ring commands to execute the batchbuffer blob. - */ - -#ifndef _INTEL_AUB_H -#define _INTEL_AUB_H - -#define AUB_MI_NOOP (0) -#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) -#define AUB_PIPE_CONTROL (0x7a000002) - -/* DW0: instruction type. */ - -#define CMD_AUB (7 << 29) - -#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) -/* DW1 */ -# define AUB_HEADER_MAJOR_SHIFT 24 -# define AUB_HEADER_MINOR_SHIFT 16 - -#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) -#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) - -/* DW1 */ -#define AUB_TRACE_OPERATION_MASK 0x000000ff -#define AUB_TRACE_OP_COMMENT 0x00000000 -#define AUB_TRACE_OP_DATA_WRITE 0x00000001 -#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 -#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 -// operation = TRACE_DATA_WRITE, Type -#define AUB_TRACE_TYPE_MASK 0x0000ff00 -#define AUB_TRACE_TYPE_NOTYPE (0 << 8) -#define AUB_TRACE_TYPE_BATCH (1 << 8) -#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) -#define AUB_TRACE_TYPE_2D_MAP (6 << 8) -#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) -#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) -#define AUB_TRACE_TYPE_1D_MAP (10 << 8) -#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) -#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) -#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) -#define AUB_TRACE_TYPE_GENERAL (14 << 8) -#define AUB_TRACE_TYPE_SURFACE (15 << 8) - - -// operation = TRACE_COMMAND_WRITE, Type = -#define AUB_TRACE_TYPE_RING_HWB (1 << 8) -#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) -#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) -#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) - -// Address space -#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 -#define AUB_TRACE_MEMTYPE_GTT (0 << 16) -#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) -#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) -#define AUB_TRACE_MEMTYPE_PCI (3 << 16) -#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) - -/* DW2 */ - -/** - * aub_state_struct_type enum values are encoded with the top 16 bits - * representing the type to be delivered to the .aub file, and the bottom 16 - * bits representing the subtype. This macro performs the encoding. - */ -#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) - -enum aub_state_struct_type { - AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), - AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), - AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), - AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), - AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), - AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), - AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), - AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), - AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), - AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), - AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), - AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), - AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), - - AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), - AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), - AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), - - AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), - AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), - AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), - AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), - AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), -}; - -#undef ENCODE_SS_TYPE - -/** - * Decode a aub_state_struct_type value to determine the type that should be - * stored in the .aub file. - */ -static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) -{ - return (ss_type & 0xFFFF0000) >> 16; -} - -/** - * Decode a state_struct_type value to determine the subtype that should be - * stored in the .aub file. - */ -static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) -{ - return ss_type & 0xFFFF; -} - -/* DW3: address */ -/* DW4: len */ - -#endif /* _INTEL_AUB_H */ -- cgit v1.2.3 From da75492879c7ba061720bf24c2755634a2cf7ef2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 10:52:56 -0800 Subject: genX/pipeline: Break emit_vertex_input out into common code It's mostly the same and contains some non-trivial logic, so it really should be shared. Also, we're about to make some modifications here that we would really like to share. --- src/vulkan/gen7_pipeline.c | 79 ----------------------------- src/vulkan/gen8_pipeline.c | 77 ----------------------------- src/vulkan/genX_pipeline_util.h | 107 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 156 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 9b90c6e3120..679510ab2bd 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -34,85 +34,6 @@ #include "genX_pipeline_util.h" -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - uint32_t elements; - if (extra && extra->disable_vs) { - /* If the VS is disabled, just assume the user knows what they're - * doing and apply the layout blindly. This can only come from - * meta, so this *should* be safe. - */ - elements = 0; - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) - elements |= (1 << info->pVertexAttributeDescriptions[i].location); - } else { - /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; - assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); - elements = inputs_read >> VERT_ATTRIB_GENERIC0; - } - - uint32_t vb_count = __builtin_popcount(elements); - - if (pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid) - vb_count++; - - if (vb_count == 0) - return; - - const uint32_t num_dwords = 1 + vb_count * 2; - - uint32_t *p = anv_batch_emitn(&pipeline->batch, num_dwords, - GEN7_3DSTATE_VERTEX_ELEMENTS); - memset(p + 1, 0, (num_dwords - 1) * 4); - - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - enum isl_format format = anv_get_isl_format(desc->format, - VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR); - - assert(desc->binding < 32); - - if ((elements & (1 << desc->location)) == 0) - continue; /* Binding unused */ - - uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); - - struct GEN7_VERTEX_ELEMENT_STATE element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offset, - .Component0Control = vertex_element_comp_control(format, 0), - .Component1Control = vertex_element_comp_control(format, 1), - .Component2Control = vertex_element_comp_control(format, 2), - .Component3Control = vertex_element_comp_control(format, 3), - }; - GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + slot * 2], &element); - } - - if (pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid) { - struct GEN7_VERTEX_ELEMENT_STATE element = { - .Valid = true, - /* FIXME: Do we need to provide the base vertex as component 0 here - * to support the correct base vertex ID? */ - .Component0Control = VFCOMP_STORE_0, - .Component1Control = VFCOMP_STORE_0, - .Component2Control = VFCOMP_STORE_VID, - .Component3Control = VFCOMP_STORE_IID - }; - GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + (vb_count - 1) * 2], &element); - } -} - static void gen7_emit_rs_state(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *info, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 314a9589e75..261563943f7 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -34,83 +34,6 @@ #include "genX_pipeline_util.h" -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - static_assert(ANV_GEN >= 8, "should be compiling this for gen < 8"); - - uint32_t elements; - if (extra && extra->disable_vs) { - /* If the VS is disabled, just assume the user knows what they're - * doing and apply the layout blindly. This can only come from - * meta, so this *should* be safe. - */ - elements = 0; - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) - elements |= (1 << info->pVertexAttributeDescriptions[i].location); - } else { - /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; - assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); - elements = inputs_read >> VERT_ATTRIB_GENERIC0; - } - - const uint32_t num_dwords = 1 + __builtin_popcount(elements) * 2; - - uint32_t *p; - if (elements != 0) { - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GENX(3DSTATE_VERTEX_ELEMENTS)); - memset(p + 1, 0, (num_dwords - 1) * 4); - } - - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - enum isl_format format = anv_get_isl_format(desc->format, - VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR); - - assert(desc->binding < 32); - - if ((elements & (1 << desc->location)) == 0) - continue; /* Binding unused */ - - uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); - - struct GENX(VERTEX_ELEMENT_STATE) element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offset, - .Component0Control = vertex_element_comp_control(format, 0), - .Component1Control = vertex_element_comp_control(format, 1), - .Component2Control = vertex_element_comp_control(format, 2), - .Component3Control = vertex_element_comp_control(format, 3), - }; - GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), - .InstancingEnable = pipeline->instancing_enable[desc->binding], - .VertexElementIndex = slot, - /* Vulkan so far doesn't have an instance divisor, so - * this is always 1 (ignored if not instancing). */ - .InstanceDataStepRate = 1); - } - - const uint32_t id_slot = __builtin_popcount(elements); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, - .VertexIDComponentNumber = 2, - .VertexIDElementOffset = id_slot, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, - .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = id_slot); -} - static void emit_ia_state(struct anv_pipeline *pipeline, const VkPipelineInputAssemblyStateCreateInfo *info, diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index 08fe6aac6a4..050c33591cb 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -47,6 +47,113 @@ vertex_element_comp_control(enum isl_format format, unsigned comp) } } +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t elements; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + elements = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + elements |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + elements = inputs_read >> VERT_ATTRIB_GENERIC0; + } + + uint32_t elem_count = __builtin_popcount(elements); + +#if ANV_GEN <= 7 + /* On Haswell and prior, vertex and instance id are created by using the + * ComponentControl fields, so we need an element for any of them. + */ + if (pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid) + elem_count++; +#endif + + uint32_t *p; + if (elem_count > 0) { + const uint32_t num_dwords = 1 + elem_count * 2; + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); + } + + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR); + + assert(desc->binding < 32); + + if ((elements & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offset, + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); + +#if ANV_GEN >= 8 + /* On Broadwell and later, we have a separate VF_INSTANCING packet + * that controls instancing. On Haswell and prior, that's part of + * VERTEX_BUFFER_STATE which we emit later. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), + .InstancingEnable = pipeline->instancing_enable[desc->binding], + .VertexElementIndex = slot, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); +#endif + } + + const uint32_t id_slot = __builtin_popcount(elements); +#if ANV_GEN >= 8 + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = id_slot, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = id_slot); +#else + if (pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid) { + struct GEN7_VERTEX_ELEMENT_STATE element = { + .Valid = true, + /* FIXME: Do we need to provide the base vertex as component 0 here + * to support the correct base vertex ID? */ + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID + }; + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + id_slot * 2], &element); + } +#endif +} + static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, -- cgit v1.2.3 From b2b7c93318fa8d417f8d31beb3839c43bfbd894e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 11:47:23 -0800 Subject: glsl/enums: Add an enum for Vulkan instance index --- src/glsl/nir/shader_enums.c | 1 + src/glsl/nir/shader_enums.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/shader_enums.c b/src/glsl/nir/shader_enums.c index 41da4a7b9ea..ff2f564dc98 100644 --- a/src/glsl/nir/shader_enums.c +++ b/src/glsl/nir/shader_enums.c @@ -201,6 +201,7 @@ gl_system_value_name(gl_system_value sysval) static const char *names[] = { ENUM(SYSTEM_VALUE_VERTEX_ID), ENUM(SYSTEM_VALUE_INSTANCE_ID), + ENUM(SYSTEM_VALUE_INSTANCE_INDEX), ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE), ENUM(SYSTEM_VALUE_BASE_VERTEX), ENUM(SYSTEM_VALUE_INVOCATION_ID), diff --git a/src/glsl/nir/shader_enums.h b/src/glsl/nir/shader_enums.h index 3a06b14a46b..e3f46e3d739 100644 --- a/src/glsl/nir/shader_enums.h +++ b/src/glsl/nir/shader_enums.h @@ -378,6 +378,13 @@ typedef enum */ SYSTEM_VALUE_INSTANCE_ID, + /** + * Vulkan InstanceIndex. + * + * InstanceIndex = gl_InstanceID + gl_BaseInstance + */ + SYSTEM_VALUE_INSTANCE_INDEX, + /** * DirectX-style vertex ID. * -- cgit v1.2.3 From 1c3b7fe1ee28a53b972859fce7384db965771ef0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 11:48:07 -0800 Subject: nir/lower_io: Lower INSTNACE_INDEX --- src/glsl/nir/nir_lower_system_values.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index aeaa3107c1f..79f6bedc990 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -113,6 +113,12 @@ convert_block(nir_block *block, void *void_state) } break; + case SYSTEM_VALUE_INSTANCE_INDEX: + sysval = nir_iadd(b, + nir_load_system_value(b, nir_intrinsic_load_instance_id, 0), + nir_load_system_value(b, nir_intrinsic_load_base_instance, 0)); + break; + default: { nir_intrinsic_op sysval_op = nir_intrinsic_from_system_value(var->data.location); -- cgit v1.2.3 From 6ba67795db924832c1a209d1466abfa13d9d68d5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 11:48:23 -0800 Subject: nir/spirv: Add proper support for InstanceIndex --- src/glsl/nir/spirv/vtn_variables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index 8fea43b2e96..dd006c355d9 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -784,7 +784,9 @@ vtn_get_builtin_location(struct vtn_builder *b, set_mode_system_value(mode); break; case SpvBuiltInInstanceIndex: - /* XXX */ + *location = SYSTEM_VALUE_INSTANCE_INDEX; + set_mode_system_value(mode); + break; case SpvBuiltInInstanceId: *location = SYSTEM_VALUE_INSTANCE_ID; set_mode_system_value(mode); -- cgit v1.2.3 From 4bf3cadb660f389098254c198c7a6d327ee8ba50 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 12:08:31 -0800 Subject: gen8: Add support for base vertex/instance --- src/vulkan/gen8_cmd_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++ src/vulkan/genX_pipeline_util.h | 55 ++++++++++++++++++++++++++--------------- 2 files changed, 87 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 096ced5694f..c919c065ca2 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -450,6 +450,41 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = 0, + .BufferStartingAddress = { bo, offset }, + .BufferSize = 8 + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[0] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + void genX(CmdDraw)( VkCommandBuffer commandBuffer, uint32_t vertexCount, @@ -461,6 +496,10 @@ void genX(CmdDraw)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), .VertexAccessType = SEQUENTIAL, .VertexCountPerInstance = vertexCount, @@ -482,6 +521,10 @@ void genX(CmdDrawIndexed)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), .VertexAccessType = RANDOM, .VertexCountPerInstance = indexCount, @@ -513,6 +556,10 @@ void genX(CmdDrawIndirect)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); @@ -683,6 +730,11 @@ void genX(CmdDrawIndexedIndirect)( cmd_buffer_flush_state(cmd_buffer); + /* TODO: We need to stomp base vertex to 0 somehow */ + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index 050c33591cb..363a1fd23ac 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -68,17 +68,24 @@ emit_vertex_input(struct anv_pipeline *pipeline, elements = inputs_read >> VERT_ATTRIB_GENERIC0; } - uint32_t elem_count = __builtin_popcount(elements); - -#if ANV_GEN <= 7 +#if ANV_GEN >= 8 + /* On BDW+, we only need to allocate space for base ids. Setting up + * the actual vertex and instance id is a separate packet. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#else /* On Haswell and prior, vertex and instance id are created by using the * ComponentControl fields, so we need an element for any of them. */ - if (pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid) - elem_count++; + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid || + pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; #endif + uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; + uint32_t *p; if (elem_count > 0) { const uint32_t num_dwords = 1 + elem_count * 2; @@ -129,6 +136,28 @@ emit_vertex_input(struct anv_pipeline *pipeline, } const uint32_t id_slot = __builtin_popcount(elements); + if (needs_svgs_elem) { + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = 32, /* Reserved for this */ + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32_UINT, + /* FIXME: Do we need to provide the base vertex as component 0 here + * to support the correct base vertex ID? */ + .Component0Control = pipeline->vs_prog_data.uses_basevertex ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component1Control = pipeline->vs_prog_data.uses_baseinstance ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0, +#if ANV_GEN >= 8 + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#else + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID, +#endif + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); + } + #if ANV_GEN >= 8 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, @@ -137,20 +166,6 @@ emit_vertex_input(struct anv_pipeline *pipeline, .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, .InstanceIDComponentNumber = 3, .InstanceIDElementOffset = id_slot); -#else - if (pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid) { - struct GEN7_VERTEX_ELEMENT_STATE element = { - .Valid = true, - /* FIXME: Do we need to provide the base vertex as component 0 here - * to support the correct base vertex ID? */ - .Component0Control = VFCOMP_STORE_0, - .Component1Control = VFCOMP_STORE_0, - .Component2Control = VFCOMP_STORE_VID, - .Component3Control = VFCOMP_STORE_IID - }; - GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + id_slot * 2], &element); - } #endif } -- cgit v1.2.3 From 42cd994177b823229ac5b2b3fab4dcb5c4ee5353 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 12:09:33 -0800 Subject: gen7: Add support for base vertex/instance --- src/vulkan/gen7_cmd_buffer.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index b83bfdadbae..8fae1351848 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -519,6 +519,41 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .VertexBufferMemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = 0, + .BufferStartingAddress = { bo, offset }, + .EndAddress = { bo, offset + 8 }, + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[0] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + void genX(CmdDraw)( VkCommandBuffer commandBuffer, uint32_t vertexCount, @@ -531,6 +566,10 @@ void genX(CmdDraw)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, .VertexAccessType = SEQUENTIAL, .PrimitiveTopologyType = pipeline->topology, @@ -554,6 +593,10 @@ void genX(CmdDrawIndexed)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, .VertexAccessType = RANDOM, .PrimitiveTopologyType = pipeline->topology, @@ -604,6 +647,10 @@ void genX(CmdDrawIndirect)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); @@ -631,6 +678,10 @@ void genX(CmdDrawIndexedIndirect)( cmd_buffer_flush_state(cmd_buffer); + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); -- cgit v1.2.3 From aa9987a39566e673e4f25e46a257df96f57c9475 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 13:54:02 -0800 Subject: anv/image_view: Add base mip and base layer fields These will be needed by image_load_store --- src/vulkan/anv_image.c | 2 ++ src/vulkan/anv_private.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f8782b6e848..e83358a002d 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -484,6 +484,8 @@ anv_image_view_init(struct anv_image_view *iview, iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, image->tiling); + iview->base_layer = range->baseArrayLayer; + iview->base_mip = range->baseMipLevel; iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width, range->baseMipLevel), .height = anv_minify(image->extent.height, range->baseMipLevel), diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d121857352d..16ca0f5ce48 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1567,6 +1567,8 @@ struct anv_image_view { VkImageAspectFlags aspect_mask; VkFormat vk_format; enum isl_format format; + uint32_t base_layer; + uint32_t base_mip; VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ /** RENDER_SURFACE_STATE when using image as a color render target. */ -- cgit v1.2.3 From ba393c9d819de0549a6148bf69177e37e9e2193a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Jan 2016 13:55:00 -0800 Subject: anv/image: Actually fill out brw_image_param structs --- src/vulkan/anv_image.c | 91 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index e83358a002d..e40fb3d51d3 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -695,13 +695,93 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag } } +static void +image_param_defaults(struct brw_image_param *param) +{ + memset(param, 0, sizeof *param); + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + void anv_image_view_fill_image_param(struct anv_device *device, struct anv_image_view *view, struct brw_image_param *param) { - memset(param, 0, sizeof *param); - anv_finishme("Actually fill out brw_image_param"); + image_param_defaults(param); + + const struct isl_surf *surf = &view->image->color_surface.isl; + + const int cpp = isl_format_get_layout(surf->format)->bs; + + param->size[0] = minify(surf->logical_level0_px.width, view->base_mip); + param->size[1] = minify(surf->logical_level0_px.height, view->base_mip); + if (surf->dim == ISL_SURF_DIM_3D) { + param->size[2] = minify(surf->logical_level0_px.depth, view->base_mip); + } else { + param->size[2] = surf->logical_level0_px.array_len - view->base_layer; + } + + isl_surf_get_image_offset_sa(surf, view->base_mip, view->base_layer, 0, + ¶m->offset[0], ¶m->offset[1]); + + param->stride[0] = cpp; + param->stride[1] = surf->row_pitch / cpp; + if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) + param->stride[2] = util_align_npot(param->size[0], surf->alignment); + param->stride[3] = /* TODO */ 0; + + switch (surf->tiling) { + case ISL_TILING_LINEAR: + /* image_param_defaults is good enough */ + break; + + case ISL_TILING_X: + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = util_logbase2(512 / cpp); + param->tiling[1] = util_logbase2(8); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + break; + + case ISL_TILING_Y0: + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = util_logbase2(16 / cpp); + param->tiling[1] = util_logbase2(32); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 0xff; + } + break; + + default: + assert(!"Unhandled storage image tiling"); + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (surf->dim == ISL_SURF_DIM_3D) ? view->base_mip : 0; } void @@ -709,12 +789,7 @@ anv_buffer_view_fill_image_param(struct anv_device *device, struct anv_buffer_view *view, struct brw_image_param *param) { - /* Set the swizzling shifts to all-ones to effectively disable swizzling -- - * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more - * detailed explanation of these parameters. - */ - param->swizzling[0] = 0xff; - param->swizzling[1] = 0xff; + image_param_defaults(param); param->stride[0] = isl_format_layouts[view->format].bs; param->size[0] = view->range / param->stride[0]; -- cgit v1.2.3 From cc065e0ad739cc0219a95c0cb6684451fe2d9f9f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 18 Jan 2016 17:30:59 -0800 Subject: i965/fs_surface_builder: Mask signed integers after conversion --- src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 45694ec0894..f2faceeb579 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -717,6 +717,15 @@ namespace { bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), brw_imm_d(-(int)scale(widths[c] - s) - 1), BRW_CONDITIONAL_GE); + + /* Mask off all but the bits we actually want. Otherwise, if + * we pass a negative number into the hardware when it's + * expecting something like UINT8, it will happily clamp it to + * +255 for us. + */ + if (is_signed && widths[c] < 32) + bld.AND(offset(dst, bld, c), offset(dst, bld, c), + brw_imm_d((1 << widths[c]) - 1)); } } @@ -787,6 +796,15 @@ namespace { /* Convert to integer. */ bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c)); bld.MOV(offset(dst, bld, c), offset(fdst, bld, c)); + + /* Mask off all but the bits we actually want. Otherwise, if + * we pass a negative number into the hardware when it's + * expecting something like UINT8, it will happily clamp it to + * +255 for us. + */ + if (is_signed && widths[c] < 32) + bld.AND(offset(dst, bld, c), offset(dst, bld, c), + brw_imm_d((1 << widths[c]) - 1)); } } -- cgit v1.2.3 From d9e0b9a06a478a21f21f93a3d66a198e42e84c30 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 25 Jan 2016 19:20:57 -0800 Subject: isl/gen9: Fix slice offset calculation for 1D array images. The X component of the offset is set to the layer index times layer height which is obviously bogus, return the vertical offset of the slice as Y component instead. Fixes a few image load/store tests that use 1D arrays on SKL when forcing it to fall back to untyped reads and writes. --- src/isl/isl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index acc80eae59c..6015c5c305c 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1260,7 +1260,7 @@ get_image_offset_sa_gen9_1d(const struct isl_surf *surf, const struct isl_extent3d image_align_sa = isl_surf_get_image_alignment_sa(surf); - uint32_t x = layer * isl_surf_get_array_pitch_sa_rows(surf); + uint32_t x = 0; for (uint32_t l = 0; l < level; ++l) { uint32_t W = isl_minify(W0, l); @@ -1270,7 +1270,7 @@ get_image_offset_sa_gen9_1d(const struct isl_surf *surf, } *x_offset_sa = x; - *y_offset_sa = 0; + *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); } void -- cgit v1.2.3 From d2ec510ddab78e8089c3dc77dfaab5ef1206034c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 26 Jan 2016 12:20:01 -0800 Subject: anv/image: Fix image parameter initialization. --- src/vulkan/anv_image.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index e40fb3d51d3..753af1253ab 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -715,13 +715,14 @@ anv_image_view_fill_image_param(struct anv_device *device, image_param_defaults(param); const struct isl_surf *surf = &view->image->color_surface.isl; - const int cpp = isl_format_get_layout(surf->format)->bs; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); - param->size[0] = minify(surf->logical_level0_px.width, view->base_mip); - param->size[1] = minify(surf->logical_level0_px.height, view->base_mip); + param->size[0] = view->extent.width; + param->size[1] = view->extent.height; if (surf->dim == ISL_SURF_DIM_3D) { - param->size[2] = minify(surf->logical_level0_px.depth, view->base_mip); + param->size[2] = view->extent.depth; } else { param->size[2] = surf->logical_level0_px.array_len - view->base_layer; } @@ -731,9 +732,14 @@ anv_image_view_fill_image_param(struct anv_device *device, param->stride[0] = cpp; param->stride[1] = surf->row_pitch / cpp; - if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) - param->stride[2] = util_align_npot(param->size[0], surf->alignment); - param->stride[3] = /* TODO */ 0; + + if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { + param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); + param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); + } else { + param->stride[2] = 0; + param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); + } switch (surf->tiling) { case ISL_TILING_LINEAR: @@ -781,7 +787,8 @@ anv_image_view_fill_image_param(struct anv_device *device, * brw_fs_surface_builder.cpp) handles this as a sort of tiling with * modulus equal to the LOD. */ - param->tiling[2] = (surf->dim == ISL_SURF_DIM_3D) ? view->base_mip : 0; + param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? + view->base_mip : 0); } void -- cgit v1.2.3 From a50dc70e21bb78b5f72d1edf7d66224767e1ae6c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 26 Jan 2016 12:23:08 -0800 Subject: anv/image: Upload raw buffer surface state for untyped storage image and texel buffer access. --- src/vulkan/anv_image.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 753af1253ab..3e344c3aa71 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -443,6 +443,16 @@ alloc_surface_state(struct anv_device *device, } } +static bool +has_matching_storage_typed_format(const struct anv_device *device, + enum isl_format format) +{ + return (isl_format_get_layout(format)->bs <= 4 || + (isl_format_get_layout(format)->bs <= 8 && + (device->info.gen >= 8 || device->info.is_haswell)) || + device->info.gen >= 9); +} + void anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, @@ -515,9 +525,16 @@ anv_image_view_init(struct anv_image_view *iview, if (image->needs_storage_surface_state) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->storage_surface_state.map, - iview, pCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); + if (has_matching_storage_typed_format(device, iview->format)) + anv_fill_image_surface_state(device, iview->storage_surface_state.map, + iview, pCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); + else + anv_fill_buffer_surface_state(device, iview->storage_surface_state.map, + ISL_FORMAT_RAW, + iview->offset, + iview->bo->size - iview->offset, 1); + } else { iview->storage_surface_state.alloc_size = 0; } @@ -610,12 +627,16 @@ anv_CreateBufferView(VkDevice _device, anv_state_pool_alloc(&device->surface_state_pool, 64, 64); enum isl_format storage_format = - isl_lower_storage_image_format(&device->isl_dev, view->format); + has_matching_storage_typed_format(device, view->format) ? + isl_lower_storage_image_format(&device->isl_dev, view->format) : + ISL_FORMAT_RAW; anv_fill_buffer_surface_state(device, view->storage_surface_state.map, storage_format, view->offset, view->range, - format->isl_layout->bs); + (storage_format == ISL_FORMAT_RAW ? 1 : + format->isl_layout->bs)); + } else { view->storage_surface_state = (struct anv_state){ 0 }; } -- cgit v1.2.3 From fc7a7b31c5c7d5030b4cffc7fff011962969db47 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 26 Jan 2016 14:45:46 -0800 Subject: anv/image: clflush the right state map in anv_fill_image_surface_state(). It was clflushing the nonrt_surface_state structure regardless of which state structure was actually being initialized. --- src/vulkan/anv_image.c | 18 +++++++++--------- src/vulkan/anv_private.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 3e344c3aa71..d53363c627d 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -402,7 +402,7 @@ anv_validate_CreateImageView(VkDevice _device, } void -anv_fill_image_surface_state(struct anv_device *device, void *state_map, +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, struct anv_image_view *iview, const VkImageViewCreateInfo *pCreateInfo, VkImageUsageFlagBits usage) @@ -410,18 +410,18 @@ anv_fill_image_surface_state(struct anv_device *device, void *state_map, switch (device->info.gen) { case 7: if (device->info.is_haswell) - gen75_fill_image_surface_state(device, state_map, iview, + gen75_fill_image_surface_state(device, state.map, iview, pCreateInfo, usage); else - gen7_fill_image_surface_state(device, state_map, iview, + gen7_fill_image_surface_state(device, state.map, iview, pCreateInfo, usage); break; case 8: - gen8_fill_image_surface_state(device, state_map, iview, + gen8_fill_image_surface_state(device, state.map, iview, pCreateInfo, usage); break; case 9: - gen9_fill_image_surface_state(device, state_map, iview, + gen9_fill_image_surface_state(device, state.map, iview, pCreateInfo, usage); break; default: @@ -429,7 +429,7 @@ anv_fill_image_surface_state(struct anv_device *device, void *state_map, } if (!device->info.has_llc) - anv_state_clflush(iview->nonrt_surface_state); + anv_state_clflush(state); } static struct anv_state @@ -505,7 +505,7 @@ anv_image_view_init(struct anv_image_view *iview, if (image->needs_nonrt_surface_state) { iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->nonrt_surface_state.map, + anv_fill_image_surface_state(device, iview->nonrt_surface_state, iview, pCreateInfo, VK_IMAGE_USAGE_SAMPLED_BIT); } else { @@ -515,7 +515,7 @@ anv_image_view_init(struct anv_image_view *iview, if (image->needs_color_rt_surface_state) { iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->color_rt_surface_state.map, + anv_fill_image_surface_state(device, iview->color_rt_surface_state, iview, pCreateInfo, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); } else { @@ -526,7 +526,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); if (has_matching_storage_typed_format(device, iview->format)) - anv_fill_image_surface_state(device, iview->storage_surface_state.map, + anv_fill_image_surface_state(device, iview->storage_surface_state, iview, pCreateInfo, VK_IMAGE_USAGE_STORAGE_BIT); else diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 16ca0f5ce48..a1a55ddca6b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1602,7 +1602,7 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_cmd_buffer *cmd_buffer); void -anv_fill_image_surface_state(struct anv_device *device, void *state_map, +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, struct anv_image_view *iview, const VkImageViewCreateInfo *pCreateInfo, VkImageUsageFlagBits usage); -- cgit v1.2.3 From 6840cc15136fb405abd999222f2b08a068ba2e3c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 26 Jan 2016 14:50:52 -0800 Subject: anv/image: clflush surface state map in anv_fill_buffer_surface_state(). Some of its users had the required clflush on non-LLC platforms, some didn't. Put the clflush in anv_fill_buffer_surface_state() so we don't forget. --- src/vulkan/anv_cmd_buffer.c | 5 +---- src/vulkan/anv_descriptor_set.c | 5 +---- src/vulkan/anv_device.c | 15 ++++++++++----- src/vulkan/anv_image.c | 6 +++--- src/vulkan/anv_private.h | 3 ++- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 50ecf000167..0966e7658bf 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -762,12 +762,9 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const struct anv_format *format = anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - anv_fill_buffer_surface_state(cmd_buffer->device, surface_state.map, + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, format->surface_format, bo_offset, 12, 1); - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(surface_state); - bt_map[0] = surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 8b3b5dfadbc..8997f50297a 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -565,13 +565,10 @@ void anv_UpdateDescriptorSets( else view->range = write->pBufferInfo[j].range; - anv_fill_buffer_surface_state(device, view->surface_state.map, + anv_fill_buffer_surface_state(device, view->surface_state, view->format, view->offset, view->range, 1); - if (!device->info.has_llc) - anv_state_clflush(view->surface_state); - desc[j] = (struct anv_descriptor) { .type = write->descriptorType, .buffer_view = view, diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index f9aa3149207..557f4ca16fb 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1618,26 +1618,31 @@ void anv_DestroyBuffer( } void -anv_fill_buffer_surface_state(struct anv_device *device, void *state, +anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) { switch (device->info.gen) { case 7: if (device->info.is_haswell) - gen75_fill_buffer_surface_state(state, format, offset, range, stride); + gen75_fill_buffer_surface_state(state.map, format, offset, range, + stride); else - gen7_fill_buffer_surface_state(state, format, offset, range, stride); + gen7_fill_buffer_surface_state(state.map, format, offset, range, + stride); break; case 8: - gen8_fill_buffer_surface_state(state, format, offset, range, stride); + gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); break; case 9: - gen9_fill_buffer_surface_state(state, format, offset, range, stride); + gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); break; default: unreachable("unsupported gen\n"); } + + if (!device->info.has_llc) + anv_state_clflush(state); } void anv_DestroySampler( diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index d53363c627d..f3fced5e704 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -530,7 +530,7 @@ anv_image_view_init(struct anv_image_view *iview, iview, pCreateInfo, VK_IMAGE_USAGE_STORAGE_BIT); else - anv_fill_buffer_surface_state(device, iview->storage_surface_state.map, + anv_fill_buffer_surface_state(device, iview->storage_surface_state, ISL_FORMAT_RAW, iview->offset, iview->bo->size - iview->offset, 1); @@ -614,7 +614,7 @@ anv_CreateBufferView(VkDevice _device, view->surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - anv_fill_buffer_surface_state(device, view->surface_state.map, + anv_fill_buffer_surface_state(device, view->surface_state, view->format, view->offset, view->range, format->isl_layout->bs); @@ -631,7 +631,7 @@ anv_CreateBufferView(VkDevice _device, isl_lower_storage_image_format(&device->isl_dev, view->format) : ISL_FORMAT_RAW; - anv_fill_buffer_surface_state(device, view->storage_surface_state.map, + anv_fill_buffer_surface_state(device, view->storage_surface_state, storage_format, view->offset, view->range, (storage_format == ISL_FORMAT_RAW ? 1 : diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a1a55ddca6b..215727163ed 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1640,7 +1640,8 @@ struct anv_buffer_view { const struct anv_format * anv_format_for_descriptor_type(VkDescriptorType type); -void anv_fill_buffer_surface_state(struct anv_device *device, void *state, +void anv_fill_buffer_surface_state(struct anv_device *device, + struct anv_state state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride); -- cgit v1.2.3 From 7ef0d39cb2c8c2931e6afc56322ceb84a2fe0cbe Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 15:44:02 -0800 Subject: anv/cmd_buffer: Put base_instance in the second component --- src/vulkan/gen7_cmd_buffer.c | 2 +- src/vulkan/gen8_cmd_buffer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 8fae1351848..9f66364c6c1 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -545,7 +545,7 @@ emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); ((uint32_t *)id_state.map)[0] = base_vertex; - ((uint32_t *)id_state.map)[0] = base_instance; + ((uint32_t *)id_state.map)[1] = base_instance; if (!cmd_buffer->device->info.has_llc) anv_state_clflush(id_state); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index c919c065ca2..560608fe346 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -476,7 +476,7 @@ emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); ((uint32_t *)id_state.map)[0] = base_vertex; - ((uint32_t *)id_state.map)[0] = base_instance; + ((uint32_t *)id_state.map)[1] = base_instance; if (!cmd_buffer->device->info.has_llc) anv_state_clflush(id_state); -- cgit v1.2.3 From a1ea45b8578efaae6d6928f750279785e5dc6b27 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 15:44:18 -0800 Subject: genX/pipeline: Don't make vertex bindings with holes --- src/vulkan/genX_pipeline_util.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index 363a1fd23ac..e9c7d16a985 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -137,16 +137,25 @@ emit_vertex_input(struct anv_pipeline *pipeline, const uint32_t id_slot = __builtin_popcount(elements); if (needs_svgs_elem) { + /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: + * "Within a VERTEX_ELEMENT_STATE structure, if a Component + * Control field is set to something other than VFCOMP_STORE_SRC, + * no higher-numbered Component Control fields may be set to + * VFCOMP_STORE_SRC" + * + * This means, that if we have BaseInstance, we need BaseVertex as + * well. Just do all or nothing. + */ + uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance) ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0; + struct GENX(VERTEX_ELEMENT_STATE) element = { .VertexBufferIndex = 32, /* Reserved for this */ .Valid = true, .SourceElementFormat = ISL_FORMAT_R32G32_UINT, - /* FIXME: Do we need to provide the base vertex as component 0 here - * to support the correct base vertex ID? */ - .Component0Control = pipeline->vs_prog_data.uses_basevertex ? - VFCOMP_STORE_SRC : VFCOMP_STORE_0, - .Component1Control = pipeline->vs_prog_data.uses_baseinstance ? - VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component0Control = base_ctrl, + .Component1Control = base_ctrl, #if ANV_GEN >= 8 .Component2Control = VFCOMP_STORE_0, .Component3Control = VFCOMP_STORE_0, -- cgit v1.2.3 From c5dc6cdf26a8d2bf51a2a9a08b53eefcc00e74f0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 26 Jan 2016 15:42:08 -0800 Subject: i965/skl: Utilize new 5th bit for gateway messages Cc: Jordan Justen Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0ff5cd6de19..4011bf5a2b7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -926,6 +926,8 @@ void fs_visitor::emit_barrier() { assert(devinfo->gen >= 7); + const uint32_t barrier_id_mask = + devinfo->gen >= 9 ? 0x8f000000u : 0x0f000000u; /* We are getting the barrier ID from the compute shader header */ assert(stage == MESA_SHADER_COMPUTE); @@ -939,7 +941,7 @@ fs_visitor::emit_barrier() /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); - pbld.AND(component(payload, 2), r0_2, brw_imm_ud(0x0f000000u)); + pbld.AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask)); /* Emit a gateway "barrier" message using the payload we set up, followed * by a wait instruction. -- cgit v1.2.3 From 32dcfc953e6f2ef62e705bcb470f291c7ecb1814 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 26 Jan 2016 17:02:35 -0800 Subject: vtn: Delete references to IMix opcode. This is being removed in SPIR-V. Bugzilla: https://cvs.khronos.org/bugzilla/show_bug.cgi?id=15452 --- src/glsl/nir/spirv/vtn_glsl450.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 6470dc11b4d..515a743fe48 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -626,7 +626,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450PackDouble2x32: case GLSLstd450UnpackDouble2x32: - case GLSLstd450IMix: default: unreachable("Unhandled opcode"); } -- cgit v1.2.3 From 7d84fe9b1f35da32c435c23eb07dfac4cbde7502 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 15:59:43 -0800 Subject: HACK: Expose support for stencil blits If someone actually tries to use them, they won't work, but at least we don't fail to return format properties now. --- src/vulkan/anv_formats.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 642bc7d3cb1..2cfcac45b9b 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -496,6 +496,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( } } +#if 0 if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { if (anv_format_for_vk_format(format)->has_stencil) { /* Not yet implemented because copying to a W-tiled surface is crazy @@ -506,6 +507,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( goto unsupported; } } +#endif if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { -- cgit v1.2.3 From 9bc72a9213432622364bdeb0ddd053a494896a6e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 20:16:43 -0800 Subject: anv/image: Do swizzle remapping in anv_image.c TODO: At some point, we really need to make an image_view_init_info that's a flyweight and stop stuffing everything into image_view. --- src/vulkan/anv_image.c | 17 +++++++++++++++++ src/vulkan/anv_private.h | 1 + src/vulkan/gen7_state.c | 12 ++++-------- src/vulkan/gen8_state.c | 12 ++++-------- src/vulkan/genX_state_util.h | 11 +---------- 5 files changed, 27 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index f3fced5e704..8417177105c 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -453,6 +453,15 @@ has_matching_storage_typed_format(const struct anv_device *device, device->info.gen >= 9); } +static VkComponentSwizzle +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + return component; + else + return swizzle; +} + void anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, @@ -493,6 +502,14 @@ anv_image_view_init(struct anv_image_view *iview, iview->vk_format = pCreateInfo->format; iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, image->tiling); + iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R); + iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G); + iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B); + iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A); iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 215727163ed..1b3d80e61be 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1566,6 +1566,7 @@ struct anv_image_view { VkImageAspectFlags aspect_mask; VkFormat vk_format; + VkComponentMapping swizzle; enum isl_format format; uint32_t base_layer; uint32_t base_mip; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index b3abe74090d..4c27716d18e 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -202,14 +202,10 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .MCSEnable = false, # if (ANV_IS_HASWELL) - .ShaderChannelSelectR = vk_to_gen_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R), - .ShaderChannelSelectG = vk_to_gen_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G), - .ShaderChannelSelectB = vk_to_gen_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B), - .ShaderChannelSelectA = vk_to_gen_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A), + .ShaderChannelSelectR = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectG = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectB = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectA = vk_to_gen_swizzle[iview->swizzle.a], # else /* XXX: Seriously? */ .RedClearColor = 0, .GreenClearColor = 0, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 6e4c3eafe11..5b3691d22d0 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -224,14 +224,10 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .GreenClearColor = 0, .BlueClearColor = 0, .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R), - .ShaderChannelSelectGreen = vk_to_gen_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G), - .ShaderChannelSelectBlue = vk_to_gen_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B), - .ShaderChannelSelectAlpha = vk_to_gen_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A), + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], .ResourceMinLOD = 0.0, .SurfaceBaseAddress = { NULL, iview->offset }, }; diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h index 78fe1d4da19..67f798ab66e 100644 --- a/src/vulkan/genX_state_util.h +++ b/src/vulkan/genX_state_util.h @@ -58,7 +58,7 @@ anv_surface_format(const struct anv_device *device, enum isl_format format, } #if ANV_GEN > 7 || ANV_IS_HASWELL -static const uint32_t vk_to_gen_swizzle_map[] = { +static const uint32_t vk_to_gen_swizzle[] = { [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, [VK_COMPONENT_SWIZZLE_R] = SCS_RED, @@ -66,15 +66,6 @@ static const uint32_t vk_to_gen_swizzle_map[] = { [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA }; - -static inline uint32_t -vk_to_gen_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) -{ - if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) - return vk_to_gen_swizzle_map[component]; - else - return vk_to_gen_swizzle_map[swizzle]; -} #endif static inline uint32_t -- cgit v1.2.3 From c20f78dc5d7e0fabd58c2d8548d5a6ead1ec1072 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 26 Jan 2016 11:10:56 -0800 Subject: anv: Support swizzled formats. Some formats require a swizzle in order to map them to actual hardware formats. This allows us to turn on two new Vulkan formats. --- src/vulkan/anv_formats.c | 40 +++++++++++++++++++++++++++++----------- src/vulkan/anv_image.c | 39 +++++++++++++++++++++++++++++---------- src/vulkan/anv_meta.c | 3 ++- src/vulkan/anv_private.h | 10 +++++++++- src/vulkan/genX_pipeline_util.h | 3 ++- 5 files changed, 71 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 2cfcac45b9b..30e865c39b6 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -26,15 +26,22 @@ #include "gen7_pack.h" -#define fmt(__vk_fmt, __hw_fmt, ...) \ +#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) +#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) + +#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ [__vk_fmt] = { \ .vk_format = __vk_fmt, \ .name = #__vk_fmt, \ .surface_format = __hw_fmt, \ .isl_layout = &isl_format_layouts[__hw_fmt], \ + .swizzle = __swizzle, \ __VA_ARGS__ \ } +#define fmt(__vk_fmt, __hw_fmt, ...) \ + swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) + /* HINT: For array formats, the ISL name should match the VK name. For * packed formats, they should have the channels in reverse order from each * other. The reason for this is that, for packed formats, the ISL (and @@ -44,9 +51,9 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), - fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), - fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), @@ -243,10 +250,13 @@ anv_format_for_vk_format(VkFormat format) */ enum isl_format anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, - VkImageTiling tiling) + VkImageTiling tiling, struct anv_format_swizzle *swizzle) { const struct anv_format *anv_fmt = &anv_formats[format]; + if (swizzle) + *swizzle = anv_fmt->swizzle; + switch (aspect) { case VK_IMAGE_ASPECT_COLOR_BIT: if (anv_fmt->surface_format == ISL_FORMAT_UNSUPPORTED) { @@ -296,7 +306,8 @@ void anv_validate_GetPhysicalDeviceFormatProperties( static VkFormatFeatureFlags get_image_format_properties(int gen, enum isl_format base, - enum isl_format actual) + enum isl_format actual, + struct anv_format_swizzle swizzle) { const struct brw_surface_format_info *info = &surface_formats[actual]; @@ -309,12 +320,16 @@ get_image_format_properties(int gen, enum isl_format base, VK_FORMAT_FEATURE_BLIT_SRC_BIT; } - if (info->render_target <= gen) { + /* We can render to swizzled formats. However, if the alpha channel is + * moved, then blending won't work correctly. The PRM tells us + * straight-up not to render to such a surface. + */ + if (info->render_target <= gen && swizzle.a == 3) { flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; } - if (info->alpha_blend <= gen) + if (info->alpha_blend <= gen && swizzle.a == 3) flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; /* Load/store is determined based on base format. This prevents RGB @@ -374,13 +389,16 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d } } else { enum isl_format linear_fmt, tiled_fmt; + struct anv_format_swizzle linear_swizzle, tiled_swizzle; linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR); + VK_IMAGE_TILING_LINEAR, &linear_swizzle); tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_OPTIMAL); + VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); - linear = get_image_format_properties(gen, linear_fmt, linear_fmt); - tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt); + linear = get_image_format_properties(gen, linear_fmt, linear_fmt, + linear_swizzle); + tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, + tiled_swizzle); buffer = get_buffer_format_properties(gen, linear_fmt); /* XXX: We handle 3-channel formats by switching them out for RGBX or diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 8417177105c..a0a1312f81d 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -128,7 +128,8 @@ make_surface(const struct anv_device *dev, ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], - .format = anv_get_isl_format(vk_info->format, aspect, vk_info->tiling), + .format = anv_get_isl_format(vk_info->format, aspect, + vk_info->tiling, NULL), .width = vk_info->extent.width, .height = vk_info->extent.height, .depth = vk_info->extent.depth, @@ -454,12 +455,28 @@ has_matching_storage_typed_format(const struct anv_device *device, } static VkComponentSwizzle -remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component) +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, + struct anv_format_swizzle format_swizzle) { if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) - return component; - else - return swizzle; + swizzle = component; + + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case VK_COMPONENT_SWIZZLE_ONE: + return VK_COMPONENT_SWIZZLE_ONE; + case VK_COMPONENT_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + case VK_COMPONENT_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + case VK_COMPONENT_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + case VK_COMPONENT_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + default: + unreachable("Invalid swizzle"); + } } void @@ -500,16 +517,18 @@ anv_image_view_init(struct anv_image_view *iview, iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; iview->vk_format = pCreateInfo->format; + + struct anv_format_swizzle swizzle; iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, - image->tiling); + image->tiling, &swizzle); iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R); + VK_COMPONENT_SWIZZLE_R, swizzle); iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G); + VK_COMPONENT_SWIZZLE_G, swizzle); iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B); + VK_COMPONENT_SWIZZLE_B, swizzle); iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A); + VK_COMPONENT_SWIZZLE_A, swizzle); iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 7034f70d6ac..4fed2ea9d54 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -972,7 +972,8 @@ choose_buffer_format(struct anv_image *image, VkImageAspectFlagBits aspect) * work if the buffer is the destination. */ enum isl_format linear_format = anv_get_isl_format(image->vk_format, aspect, - VK_IMAGE_TILING_LINEAR); + VK_IMAGE_TILING_LINEAR, + NULL); return vk_format_for_size(isl_format_layouts[linear_format].bs); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1b3d80e61be..1ae7066f5fd 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1472,12 +1472,20 @@ gen9_compute_pipeline_create(VkDevice _device, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); +struct anv_format_swizzle { + unsigned r:2; + unsigned g:2; + unsigned b:2; + unsigned a:2; +}; + struct anv_format { const VkFormat vk_format; const char *name; enum isl_format surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ const struct isl_format_layout *isl_layout; uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + struct anv_format_swizzle swizzle; bool has_stencil; }; @@ -1486,7 +1494,7 @@ anv_format_for_vk_format(VkFormat format); enum isl_format anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, - VkImageTiling tiling); + VkImageTiling tiling, struct anv_format_swizzle *swizzle); static inline bool anv_format_is_color(const struct anv_format *format) diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index e9c7d16a985..da61a1e5d39 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -99,7 +99,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, &info->pVertexAttributeDescriptions[i]; enum isl_format format = anv_get_isl_format(desc->format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR); + VK_IMAGE_TILING_LINEAR, + NULL); assert(desc->binding < 32); -- cgit v1.2.3 From 2af3acd0612fe2d08d18b274d977530fa6f913ab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 17:10:57 -0800 Subject: HACK/i965/surface_formats: Mark A4B4G4R4 as being supported The table has this marked as unsupported on all gens, but I don't really believe that given how early it is in the table. I've tested and it seems to work on Broadwell. The Bspec says that it sould be renderable on SKL+ but alpha blending is questionable. Side note: We really need to audit the format table again. --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 56813bf9a6f..7c16b84eb87 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -205,7 +205,7 @@ const struct brw_surface_format_info surface_formats[] = { SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( Y, Y, x, Y, 90, x, x, x, x, x, A4B4G4R4_UNORM) SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT) SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT) SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) -- cgit v1.2.3 From 9ac624751ea8343a30865fefacd2b999268c541f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 17:28:16 -0800 Subject: anv/formats: Use is_power_of_two instead of is_rgb to determine renderability --- src/vulkan/anv_formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 30e865c39b6..3b63c97e5f4 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -409,7 +409,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d * what most clients will want. */ if (linear_fmt != ISL_FORMAT_UNSUPPORTED && - isl_format_is_rgb(linear_fmt) && + !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & ~VK_FORMAT_FEATURE_BLIT_DST_BIT; -- cgit v1.2.3 From dc3de6f8df8784c477946d50baa7b5634f06b461 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 21:45:21 -0800 Subject: anv/pipeline: Only lower input indirects if EmitNoIndirectInput is set --- src/vulkan/anv_pipeline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index f52b78628cc..40f26d708ee 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -171,7 +171,9 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_shader_gather_info(nir, entry_point->impl); - uint32_t indirect_mask = (1 << nir_var_shader_in); + uint32_t indirect_mask = 0; + if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) + indirect_mask |= (1 << nir_var_shader_in); if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) indirect_mask |= 1 << nir_var_local; -- cgit v1.2.3 From f2f03c5b65a07b24df284ae46b975cdf8f3d3f06 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 21:52:59 -0800 Subject: anv/pipeline: Set MaximumVPIndex in 3DSTATE_CLIP --- src/vulkan/gen7_pipeline.c | 3 ++- src/vulkan/gen8_pipeline.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 679510ab2bd..ea5b3401121 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -266,7 +266,8 @@ genX(graphics_pipeline_create)( .LineStripListProvokingVertexSelect = 0, .TriangleFanProvokingVertexSelect = 0, .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875); + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); uint32_t samples = 1; uint32_t log2_samples = __builtin_ffs(samples) - 1; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 261563943f7..5d6b87064e9 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -256,7 +256,8 @@ genX(graphics_pipeline_create)( .ClipEnable = true, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875); + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, -- cgit v1.2.3 From d3607351fecd80c5e875204af394e4aaee06754f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 26 Jan 2016 22:10:11 -0800 Subject: gen7/cmd_buffer: SCISSOR_RECT structs are tightly packed The pointer has to be 32-byte aligned, but the structs themselves are 2 dwords each, tightly packed. --- src/vulkan/gen7_cmd_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 9f66364c6c1..f201c151acb 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -170,7 +170,7 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, uint32_t count, const VkRect2D *scissors) { struct anv_state scissor_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 32, 32); + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); for (uint32_t i = 0; i < count; i++) { const VkRect2D *s = &scissors[i]; @@ -197,10 +197,10 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, }; if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &empty_scissor); } else { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 32, &scissor); + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); } } -- cgit v1.2.3 From 38a3a535eb395dc3a7718cea1bc135f0d94a6dad Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 26 Jan 2016 23:09:45 -0800 Subject: anv: Update the device limits. Fixes dEQP-VK.api.info.device.properties. I haven't tested any others. --- src/vulkan/anv_device.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 557f4ca16fb..f116f9a7354 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -401,7 +401,7 @@ void anv_GetPhysicalDeviceProperties( .maxImageDimension3D = (1 << 10), .maxImageDimensionCube = (1 << 14), .maxImageArrayLayers = (1 << 10), - .maxTexelBufferElements = (1 << 14), + .maxTexelBufferElements = 128 * 1024 * 1024, .maxUniformBufferRange = UINT32_MAX, .maxStorageBufferRange = UINT32_MAX, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, @@ -427,9 +427,9 @@ void anv_GetPhysicalDeviceProperties( .maxDescriptorSetInputAttachments = 256, .maxVertexInputAttributes = 32, .maxVertexInputBindings = 32, - .maxVertexInputAttributeOffset = 256, - .maxVertexInputBindingStride = 256, - .maxVertexOutputComponents = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, .maxTessellationGenerationLevel = 0, .maxTessellationPatchSize = 0, .maxTessellationControlPerVertexInputComponents = 0, @@ -438,21 +438,17 @@ void anv_GetPhysicalDeviceProperties( .maxTessellationControlTotalOutputComponents = 0, .maxTessellationEvaluationInputComponents = 0, .maxTessellationEvaluationOutputComponents = 0, - .maxGeometryShaderInvocations = 6, - .maxGeometryInputComponents = 16, - .maxGeometryOutputComponents = 16, - .maxGeometryOutputVertices = 16, - .maxGeometryTotalOutputComponents = 16, - .maxFragmentInputComponents = 16, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, .maxFragmentOutputAttachments = 8, .maxFragmentDualSrcAttachments = 2, .maxFragmentCombinedOutputResources = 8, - .maxComputeSharedMemorySize = 1024, - .maxComputeWorkGroupCount = { - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - }, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, .maxComputeWorkGroupSize = { 16 * devinfo->max_cs_threads, @@ -468,16 +464,16 @@ void anv_GetPhysicalDeviceProperties( .maxSamplerAnisotropy = 16, .maxViewports = MAX_VIEWPORTS, .maxViewportDimensions = { (1 << 14), (1 << 14) }, - .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ + .viewportBoundsRange = { -16384.0, 16384.0 }, .viewportSubPixelBits = 13, /* We take a float? */ .minMemoryMapAlignment = 4096, /* A page */ .minTexelBufferOffsetAlignment = 1, .minUniformBufferOffsetAlignment = 1, .minStorageBufferOffsetAlignment = 1, - .minTexelOffset = 0, /* FIXME */ - .maxTexelOffset = 0, /* FIXME */ - .minTexelGatherOffset = 0, /* FIXME */ - .maxTexelGatherOffset = 0, /* FIXME */ + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, .minInterpolationOffset = 0, /* FIXME */ .maxInterpolationOffset = 0, /* FIXME */ .subPixelInterpolationOffsetBits = 0, /* FIXME */ -- cgit v1.2.3 From b833e7a63c0df2c6d4c24e11d8196c90bfd0906e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 27 Jan 2016 11:36:44 -0800 Subject: anv: Put back code to grow shader scratch space This was lost in commit a71e614d33e8d869bbaced8948349a7180783ab7. --- src/vulkan/anv_pipeline.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 40f26d708ee..e879b35388d 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1182,6 +1182,10 @@ anv_pipeline_init(struct anv_pipeline *pipeline, if (extra && extra->use_rectlist) pipeline->topology = _3DPRIM_RECTLIST; + while (anv_block_pool_size(&device->scratch_block_pool) < + pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + return VK_SUCCESS; } -- cgit v1.2.3 From 4acfc9effbe4513075881a045bdfbec8ad2433a4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 27 Jan 2016 12:21:04 -0800 Subject: i965: Fix SIN/COS precision problems. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 26 ++++++++++++++++++++------ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 26 ++++++++++++++++++++------ 2 files changed, 40 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index f41854c2c09..11e7c7dc102 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -715,15 +715,29 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_fsin: - inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fsin: { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); + inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]); + if (instr->dest.saturate) { + inst->dst = result; + inst->saturate = true; + } else { + bld.MUL(result, tmp, brw_imm_f(0.99997)); + } break; + } - case nir_op_fcos: - inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fcos: { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); + inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]); + if (instr->dest.saturate) { + inst->dst = result; + inst->saturate = true; + } else { + bld.MUL(result, tmp, brw_imm_f(0.99997)); + } break; + } case nir_op_fddx: if (fs_key->high_quality_derivatives) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index a608dca03ff..2b261fcc952 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1086,15 +1086,29 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_fsin: - inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fsin: { + src_reg tmp = src_reg(this, glsl_type::vec4_type); + inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]); + if (instr->dest.saturate) { + inst->dst = dst; + inst->saturate = true; + } else { + emit(MUL(dst, tmp, brw_imm_f(0.99997))); + } break; + } - case nir_op_fcos: - inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fcos: { + src_reg tmp = src_reg(this, glsl_type::vec4_type); + inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]); + if (instr->dest.saturate) { + inst->dst = dst; + inst->saturate = true; + } else { + emit(MUL(dst, tmp, brw_imm_f(0.99997))); + } break; + } case nir_op_idiv: case nir_op_udiv: -- cgit v1.2.3 From f92a35d831cf54f2244d5510932fd17c97b02ce4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 27 Jan 2016 14:20:47 -0800 Subject: vtn: Fix Modf. We were botching this for negative numbers - floor of a negative rounds the wrong way. Additionally, both results are supposed to retain the sign of the original. To fix this, just take the abs of both values, then put the sign back. There's probably a better way to do this, but this works for now. --- src/glsl/nir/spirv/vtn_glsl450.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 515a743fe48..b98ef052cbf 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -418,16 +418,20 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450InverseSqrt: op = nir_op_frsq; break; case GLSLstd450Modf: { - val->ssa->def = nir_ffract(nb, src[0]); + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), - nir_ffloor(nb, src[0]), 0xf); + nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); return; } case GLSLstd450ModfStruct: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_ffract(nb, src[0]); - val->ssa->elems[1]->def = nir_ffloor(nb, src[0]); + val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); return; } -- cgit v1.2.3 From 9f954310e82a1500a7b1370e95b60670f8a84ac0 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 27 Jan 2016 15:07:18 -0800 Subject: vtn: Fix atan2 for non-scalars. The if/then/else block was bogus, as it can only take a scalar condition, and we need to select component-wise. The GLSL IR implementation of atan2 handles this by looping over components, but I decided to try and do it vector-wise, and messed up. For now, just bcsel. It means that we do the atan1 math even if all components hit the quick case, but it works, and presumably at least one component will hit the expensive path anyway. --- src/glsl/nir/spirv/vtn_glsl450.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index b98ef052cbf..8d730168c08 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -295,14 +295,11 @@ build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) nir_ssa_def *zero = nir_imm_float(b, 0.0f); /* If |x| >= 1.0e-8 * |y|: */ - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa( + nir_ssa_def *condition = nir_fge(b, nir_fabs(b, x), - nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)))); - nir_builder_cf_insert(b, &if_stmt->cf_node); + nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); /* Then...call atan(y/x) and fix it up: */ - b->cursor = nir_after_cf_list(&if_stmt->then_list); nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); nir_ssa_def *r_then = nir_bcsel(b, nir_flt(b, x, zero), @@ -313,28 +310,10 @@ build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) atan1); /* Else... */ - b->cursor = nir_after_cf_list(&if_stmt->else_list); nir_ssa_def *r_else = nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); - b->cursor = nir_after_cf_node(&if_stmt->cf_node); - - nir_phi_instr *phi = nir_phi_instr_create(b->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, r_then->num_components, NULL); - - nir_phi_src *phi_src0 = ralloc(phi, nir_phi_src); - nir_phi_src *phi_src1 = ralloc(phi, nir_phi_src); - - phi_src0->pred = nir_cf_node_as_block((nir_cf_node *) exec_list_get_head(&if_stmt->then_list)); - phi_src0->src = nir_src_for_ssa(r_then); - exec_list_push_tail(&phi->srcs, &phi_src0->node); - phi_src1->pred = nir_cf_node_as_block((nir_cf_node *) exec_list_get_head(&if_stmt->else_list)); - phi_src1->src = nir_src_for_ssa(r_else); - exec_list_push_tail(&phi->srcs, &phi_src1->node); - - nir_builder_instr_insert(b, &phi->instr); - - return &phi->dest.ssa; + return nir_bcsel(b, condition, r_then, r_else); } static nir_ssa_def * -- cgit v1.2.3 From 45ecfcd63777499dd809259dec6beabacbf022df Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 11:45:12 -0800 Subject: isl: Add func isl_surf_get_tile_info() --- src/isl/isl.c | 9 +++++++++ src/isl/isl.h | 5 +++++ 2 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 6015c5c305c..357d0ea69c1 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1150,6 +1150,15 @@ isl_surf_init_s(const struct isl_device *dev, return true; } +void +isl_surf_get_tile_info(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_tile_info *tile_info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + isl_tiling_get_info(dev, surf->tiling, fmtl->bs, tile_info); +} + /** * A variant of isl_surf_get_image_offset_sa() specific to * ISL_DIM_LAYOUT_GEN4_2D. diff --git a/src/isl/isl.h b/src/isl/isl.h index 8905d868f4e..3eada623fbe 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -884,6 +884,11 @@ isl_surf_init_s(const struct isl_device *dev, struct isl_surf *surf, const struct isl_surf_init_info *restrict info); +void +isl_surf_get_tile_info(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_tile_info *tile_info); + /** * Alignment of the upper-left sample of each subimage, in units of surface * elements. -- cgit v1.2.3 From ea44d31528219eb97ce08706d2a6fb4da32636be Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 12:23:57 -0800 Subject: isl: Fix row pitch for compressed formats When calculating row pitch, the row's width in samples must be divided by the format's block width. The commit below accidentally removed the division. commit eea2d4d05987b4f8ad90a1588267f9495f1e9e99 Author: Chad Versace Date: Tue Jan 5 14:28:28 2016 -0800 Subject: isl: Don't align phys_slice0_sa.width twice --- src/isl/isl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 357d0ea69c1..ec66f97b779 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -895,7 +895,7 @@ isl_calc_row_pitch(const struct isl_device *dev, * being used to determine whether additional pages need to be defined. */ assert(phys_slice0_sa->w % fmtl->bw == 0); - row_pitch = MAX(row_pitch, fmtl->bs * phys_slice0_sa->w); + row_pitch = MAX(row_pitch, fmtl->bs * (phys_slice0_sa->w / fmtl->bw)); switch (tile_info->tiling) { case ISL_TILING_LINEAR: -- cgit v1.2.3 From fa08f95ff55d06c0c195eba623de38a5d40e196b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 12:15:07 -0800 Subject: isl: Add func isl_surf_get_image_offset_el() This replaces function isl_surf_get_image_offset_sa() --- src/isl/isl.c | 48 ++++++++++++++++++++++---- src/isl/isl.h | 10 +++--- src/isl/tests/isl_surf_get_image_offset_test.c | 10 +++--- 3 files changed, 51 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index ec66f97b779..65f624ce95e 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1282,13 +1282,21 @@ get_image_offset_sa_gen9_1d(const struct isl_surf *surf, *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); } -void -isl_surf_get_image_offset_sa(const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t *x_offset_sa, - uint32_t *y_offset_sa) +/** + * Calculate the offset, in units of surface samples, to a subimage in the + * surface. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +static void +get_image_offset_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) { assert(level < surf->levels); assert(logical_array_layer < surf->logical_level0_px.array_len); @@ -1310,3 +1318,29 @@ isl_surf_get_image_offset_sa(const struct isl_surf *surf, break; } } + +void +isl_surf_get_image_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + assert(level < surf->levels); + assert(logical_array_layer < surf->logical_level0_px.array_len); + assert(logical_z_offset_px + < isl_minify(surf->logical_level0_px.depth, level)); + + uint32_t x_offset_sa, y_offset_sa; + get_image_offset_sa(surf, level, + logical_array_layer, + logical_z_offset_px, + &x_offset_sa, + &y_offset_sa); + + *x_offset_el = x_offset_sa / fmtl->bw; + *y_offset_el = y_offset_sa / fmtl->bh; +} diff --git a/src/isl/isl.h b/src/isl/isl.h index 3eada623fbe..392aaf7e9c5 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -975,20 +975,20 @@ isl_surf_get_array_pitch(const struct isl_surf *surf) } /** - * Get the offset to an subimage within the surface, in units of surface - * samples. + * Calculate the offset, in units of surface elements, to a subimage in the + * surface. * * @invariant level < surface levels * @invariant logical_array_layer < logical array length of surface * @invariant logical_z_offset_px < logical depth of surface at level */ void -isl_surf_get_image_offset_sa(const struct isl_surf *surf, +isl_surf_get_image_offset_el(const struct isl_surf *surf, uint32_t level, uint32_t logical_array_layer, uint32_t logical_z_offset_px, - uint32_t *x_offset_sa, - uint32_t *y_offset_sa); + uint32_t *x_offset_el, + uint32_t *y_offset_el); #ifdef __cplusplus } diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index 525180ea204..b0b4635359c 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -82,15 +82,15 @@ t_assert_offset(const struct isl_surf *surf, uint32_t level, uint32_t logical_array_layer, uint32_t logical_z_offset_px, - uint32_t expected_x_offset_sa, - uint32_t expected_y_offset_sa) + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) { uint32_t x, y; - isl_surf_get_image_offset_sa(surf, level, logical_array_layer, + isl_surf_get_image_offset_el(surf, level, logical_array_layer, logical_z_offset_px, &x, &y); - t_assert(x == expected_x_offset_sa); - t_assert(y == expected_y_offset_sa); + t_assert(x == expected_x_offset_el); + t_assert(y == expected_y_offset_el); } static void -- cgit v1.2.3 From 18a83eaa8c818654d43669a8f5758823d91caeac Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 15:08:33 -0800 Subject: isl/tests: Rename t_assert_offset() Rename it to t_assert_offset_el(), clarifying that the offset in units of surface elements, not samples. --- src/isl/tests/isl_surf_get_image_offset_test.c | 60 +++++++++++++------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index b0b4635359c..95cd0532be2 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -78,12 +78,12 @@ t_assert_image_alignment_sa(const struct isl_surf *surf, } static void -t_assert_offset(const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t expected_x_offset_el, - uint32_t expected_y_offset_el) +t_assert_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) { uint32_t x, y; isl_surf_get_image_offset_el(surf, level, logical_array_layer, @@ -111,9 +111,9 @@ t_assert_gen4_3d_layer(const struct isl_surf *surf, uint32_t *base_y) { for (uint32_t z = 0; z < depth; ++z) { - t_assert_offset(surf, level, 0, z, - aligned_width * (z % horiz_layers), - *base_y + aligned_height * (z / horiz_layers)); + t_assert_offset_el(surf, level, 0, z, + aligned_width * (z % horiz_layers), + *base_y + aligned_height * (z / horiz_layers)); } *base_y += aligned_height * vert_layers; @@ -149,16 +149,16 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772); t_assert(isl_surf_get_array_pitch_sa_rows(&surf) >= 772); - t_assert_offset(&surf, 0, 0, 0, 0, 0); // +0, +0 - t_assert_offset(&surf, 1, 0, 0, 0, 512); // +0, +512 - t_assert_offset(&surf, 2, 0, 0, 256, 512); // +256, +0 - t_assert_offset(&surf, 3, 0, 0, 256, 640); // +0, +128 - t_assert_offset(&surf, 4, 0, 0, 256, 704); // +0, +64 - t_assert_offset(&surf, 5, 0, 0, 256, 736); // +0, +32 - t_assert_offset(&surf, 6, 0, 0, 256, 752); // +0, +16 - t_assert_offset(&surf, 7, 0, 0, 256, 760); // +0, +8 - t_assert_offset(&surf, 8, 0, 0, 256, 764); // +0, +4 - t_assert_offset(&surf, 9, 0, 0, 256, 768); // +0, +4 + t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0 + t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512 + t_assert_offset_el(&surf, 2, 0, 0, 256, 512); // +256, +0 + t_assert_offset_el(&surf, 3, 0, 0, 256, 640); // +0, +128 + t_assert_offset_el(&surf, 4, 0, 0, 256, 704); // +0, +64 + t_assert_offset_el(&surf, 5, 0, 0, 256, 736); // +0, +32 + t_assert_offset_el(&surf, 6, 0, 0, 256, 752); // +0, +16 + t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8 + t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4 + t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4 } static void @@ -193,17 +193,17 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) for (uint32_t a = 0; a < 6; ++a) { uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf); - t_assert_offset(&surf, 0, a, 0, 0, b + 0); // +0, +0 - t_assert_offset(&surf, 1, a, 0, 0, b + 1024); // +0, +1024 - t_assert_offset(&surf, 2, a, 0, 512, b + 1024); // +512, +0 - t_assert_offset(&surf, 3, a, 0, 512, b + 1280); // +0, +256 - t_assert_offset(&surf, 4, a, 0, 512, b + 1408); // +0, +128 - t_assert_offset(&surf, 5, a, 0, 512, b + 1472); // +0, +64 - t_assert_offset(&surf, 6, a, 0, 512, b + 1504); // +0, +32 - t_assert_offset(&surf, 7, a, 0, 512, b + 1520); // +0, +16 - t_assert_offset(&surf, 8, a, 0, 512, b + 1528); // +0, +8 - t_assert_offset(&surf, 9, a, 0, 512, b + 1532); // +0, +4 - t_assert_offset(&surf, 10, a, 0, 512, b + 1536); // +0, +4 + t_assert_offset_el(&surf, 0, a, 0, 0, b + 0); // +0, +0 + t_assert_offset_el(&surf, 1, a, 0, 0, b + 1024); // +0, +1024 + t_assert_offset_el(&surf, 2, a, 0, 512, b + 1024); // +512, +0 + t_assert_offset_el(&surf, 3, a, 0, 512, b + 1280); // +0, +256 + t_assert_offset_el(&surf, 4, a, 0, 512, b + 1408); // +0, +128 + t_assert_offset_el(&surf, 5, a, 0, 512, b + 1472); // +0, +64 + t_assert_offset_el(&surf, 6, a, 0, 512, b + 1504); // +0, +32 + t_assert_offset_el(&surf, 7, a, 0, 512, b + 1520); // +0, +16 + t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8 + t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4 + t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4 } } -- cgit v1.2.3 From 7ab0d2e2c07b46ea620f296d9886b7d11498b3e8 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 12:43:56 -0800 Subject: isl: Add func isl_get_intratile_image_offset_el() --- src/isl/isl.c | 37 +++++++++++++++++++++++++++++++++++++ src/isl/isl.h | 20 ++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 65f624ce95e..716ce29fe8e 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1344,3 +1344,40 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, *x_offset_el = x_offset_sa / fmtl->bw; *y_offset_el = y_offset_sa / fmtl->bh; } + +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + struct isl_tile_info tile_info; + isl_surf_get_tile_info(dev, surf, &tile_info); + + uint32_t total_x_offset_el; + uint32_t total_y_offset_el; + isl_surf_get_image_offset_el(surf, level, + logical_array_layer, + logical_z_offset, + &total_x_offset_el, + &total_y_offset_el); + + uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; + uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; + uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; + + uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; + uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; + uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; + uint32_t big_x_offset_B = total_x_offset_B - small_x_offset_B; + + *base_address_offset = big_y_offset_B + big_x_offset_B; + *x_offset_el = small_x_offset_el; + *y_offset_el = small_y_offset_el; +} diff --git a/src/isl/isl.h b/src/isl/isl.h index 392aaf7e9c5..bc7a315e8ae 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -990,6 +990,26 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, uint32_t *x_offset_el, uint32_t *y_offset_el); +/** + * @brief Calculate the intratile offsets to a subimage in the surface. + * + * In @a base_address_offset return the offset from the base of the surface to + * the base address of the first tile of the subimage. In @a x_offset_el and + * @a y_offset_el, return the offset, in units of surface elements, from the + * tile's base to the subimage's first surface element. The x and y offsets + * are intratile offsets; that is, they do not exceed the boundary of the + * surface's tiling format. + */ +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From a6ecfe1dd32ca3779c904b4b9b8148267144a73a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 16:22:17 -0800 Subject: isl/tests: Add some tests for intratile offsets Test isl_surf_get_image_intratile_offset_el() in the tests: test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0 test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0 --- src/isl/tests/isl_surf_get_image_offset_test.c | 96 +++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c index 95cd0532be2..cda8583daeb 100644 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -93,6 +93,31 @@ t_assert_offset_el(const struct isl_surf *surf, t_assert(y == expected_y_offset_el); } +static void +t_assert_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_base_address_offset, + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) +{ + uint32_t base_address_offset; + uint32_t x_offset_el, y_offset_el; + isl_surf_get_image_intratile_offset_el(dev, surf, + level, + logical_array_layer, + logical_z_offset_px, + &base_address_offset, + &x_offset_el, + &y_offset_el); + + t_assert(base_address_offset == expected_base_address_offset); + t_assert(x_offset_el == expected_x_offset_el); + t_assert(y_offset_el == expected_y_offset_el); +} + static void t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, uint32_t height, uint32_t depth, uint32_t array_len) @@ -147,7 +172,11 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) t_assert_image_alignment_sa(&surf, 4, 4, 1); t_assert_phys_level0_sa(&surf, 512, 512, 1, 1); t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772); - t_assert(isl_surf_get_array_pitch_sa_rows(&surf) >= 772); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == + isl_surf_get_array_pitch_sa_rows(&surf)); + + /* Row pitch should be minimal possible */ + t_assert(surf.row_pitch == 2048); t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0 t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512 @@ -159,6 +188,17 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8 t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4 t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4 + + t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x100000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x100400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x140400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x160400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x170400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x170400, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x170400, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x170400, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x180400, 0, 0); } static void @@ -187,8 +227,13 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) t_assert_image_alignment_el(&surf, 4, 4, 1); t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540); - t_assert(isl_surf_get_array_pitch_sa_rows(&surf) >= 1540); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == + isl_surf_get_array_pitch_sa_rows(&surf)); + + /* Row pitch should be minimal possible */ + t_assert(surf.row_pitch == 4096); for (uint32_t a = 0; a < 6; ++a) { uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf); @@ -204,7 +249,54 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8 t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4 t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4 + } + + /* The layout below assumes a specific array pitch. It will need updating + * if isl's array pitch calculations ever change. + */ + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540); + + /* array layer 0 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x400000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x400800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x500800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x580800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5c0800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5e0800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5e0800, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5e0800, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5e0800, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x600800, 0, 0); + + /* array layer 1 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 1, 0, 0x600000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 1, 1, 0, 0xa00000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb80800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbc0800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbe0800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbe0800, 0, 20); + t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbe0800, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc00800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc00800, 0, 4); + + /* array layer 2 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 2, 0, 0xc00000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 1, 2, 0, 0x1000000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1000800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1100800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1180800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11c0800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11e0800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11e0800, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1200800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1200800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1200800, 0, 8); + + /* skip the remaining array layers */ } static void -- cgit v1.2.3 From 02629a16d1bb1f065a65ca7eb01b150339de6cd4 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 22:56:21 -0800 Subject: isl: Add logical z offset to GEN4_2D surfaces 3D surfaces in Skylake are stored with ISL_DIM_LAYOUT_GEN4_2D. Any delta in the logical z offset causes an equivalent delta in the surface's array layer. --- src/isl/isl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index 716ce29fe8e..ec6323741e8 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1309,7 +1309,8 @@ get_image_offset_sa(const struct isl_surf *surf, x_offset_sa, y_offset_sa); break; case ISL_DIM_LAYOUT_GEN4_2D: - get_image_offset_sa_gen4_2d(surf, level, logical_array_layer, + get_image_offset_sa_gen4_2d(surf, level, logical_array_layer + + logical_z_offset_px, x_offset_sa, y_offset_sa); break; case ISL_DIM_LAYOUT_GEN4_3D: -- cgit v1.2.3 From 308ec0279b341c1d79bb05fa965ab5182bacbe4a Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 27 Jan 2016 09:29:09 -0800 Subject: anv/image: Update usages of isl_surf_get_image_offset_sa --- src/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index a0a1312f81d..1af316b4936 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -784,7 +784,7 @@ anv_image_view_fill_image_param(struct anv_device *device, param->size[2] = surf->logical_level0_px.array_len - view->base_layer; } - isl_surf_get_image_offset_sa(surf, view->base_mip, view->base_layer, 0, + isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, ¶m->offset[0], ¶m->offset[1]); param->stride[0] = cpp; -- cgit v1.2.3 From c3546685edc2a4f51baf0eb05854b000dc84145c Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 29 Dec 2015 11:06:25 -0800 Subject: i965: Update the surface_format table for ETC formats Enable ETC support for BDW+. In Vulkan, an array lookup on surface_format[] is used to determine HW support for certain formats. In contrast, Mesa dynamically populates an array which reports this information. --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 7c16b84eb87..f42a9531683 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -268,13 +268,13 @@ const struct brw_surface_format_info surface_formats[] = { SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16) SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, x, EAC_R11) - SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_R11) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_RG11) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8) SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT) SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT) SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED) @@ -289,10 +289,10 @@ const struct brw_surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT) SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT) SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) -- cgit v1.2.3 From 2fb8b859f696240177f7ab2dc61b9023d7ab0ea4 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 5 Jan 2016 12:13:56 -0800 Subject: anv/meta: Use the uncompressed rectangle when blitting For an uncompressed ImageView of a compressed Image, the dimensions and offsets are all divided by the appropriate block dimensions. We are not yet using an uncompressed ImageView for a compressed Image, but will do so in a future commit. --- src/vulkan/anv_meta.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 4fed2ea9d54..6680ef9c20d 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -483,6 +483,39 @@ struct blit_region { VkExtent3D dest_extent; }; +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, @@ -1318,22 +1351,21 @@ void anv_CmdCopyBufferToImage( cmd_buffer); VkOffset3D src_offset = { 0, 0, slice }; - - const VkOffset3D dest_offset = { - .x = pRegions[r].imageOffset.x, - .y = pRegions[r].imageOffset.y, - .z = 0, - }; + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, + &pRegions[r].imageOffset); + dest_offset_el.z = 0; + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); meta_emit_blit(cmd_buffer, src_image, &src_iview, src_offset, - pRegions[r].imageExtent, + img_extent_el, dest_image, &dest_iview, - dest_offset, - pRegions[r].imageExtent, + dest_offset_el, + img_extent_el, VK_FILTER_NEAREST); /* Once we've done the blit, all of the actual information about -- cgit v1.2.3 From 010ab34839e7bf118b8f5dc974f4d348c77fbc37 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 16:19:33 -0800 Subject: anv/meta: Set depth to 0 for buffer image in CopyBufferToImage() The buffer image is a flat 2D surface. Each surface represents an array/depth layer, therefore, the Z-offset is 0 when blitting. --- src/vulkan/anv_meta.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 6680ef9c20d..cbdb2fa1889 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1350,7 +1350,6 @@ void anv_CmdCopyBufferToImage( }, cmd_buffer); - VkOffset3D src_offset = { 0, 0, slice }; VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, &pRegions[r].imageOffset); dest_offset_el.z = 0; @@ -1360,7 +1359,7 @@ void anv_CmdCopyBufferToImage( meta_emit_blit(cmd_buffer, src_image, &src_iview, - src_offset, + (VkOffset3D){0, 0, 0}, img_extent_el, dest_image, &dest_iview, -- cgit v1.2.3 From 3f01bbe7f3e2639ca176c7873ea5e16ba16e4042 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 4 Jan 2016 12:41:22 -0800 Subject: anv/image: Scale iview extent by backing image Aligns with formula's presented in Vulkan spec concerning CopyBufferToImage. 18.4 Copying Data Between Buffers and Images This won't conflict with valid API usage, because: 1) Users are not allowed to create an uncompressed ImageView with a compressed Image. see: VkSpec - 11.5 Image Views - VkImageViewCreateInfo's Valid Usage box 2) If users create a differently formatted compressed ImageView with a compressed Image, the block dimensions will still match. see: VkSpec - 28.3.1.5 Format Compatibility Classes - Table 28.5 --- src/vulkan/anv_image.c | 24 +++++++++++++++++++++--- src/vulkan/anv_private.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 1af316b4936..8b2003587ac 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -532,10 +532,28 @@ anv_image_view_init(struct anv_image_view *iview, iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; + + if (!isl_format_is_compressed(iview->format) && + isl_format_is_compressed(image->format->surface_format)) { + /* Scale the ImageView extent by the backing Image. This is used + * internally when an uncompressed ImageView is created on a + * compressed Image. The ImageView can therefore be used for copying + * data from a source Image to a destination Image. + */ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + iview->level_0_extent.width = DIV_ROUND_UP(image->extent.width , isl_layout->bw); + iview->level_0_extent.height = DIV_ROUND_UP(image->extent.height, isl_layout->bh); + iview->level_0_extent.depth = DIV_ROUND_UP(image->extent.depth , isl_layout->bd); + } else { + iview->level_0_extent.width = image->extent.width ; + iview->level_0_extent.height = image->extent.height; + iview->level_0_extent.depth = image->extent.depth ; + } + iview->extent = (VkExtent3D) { - .width = anv_minify(image->extent.width, range->baseMipLevel), - .height = anv_minify(image->extent.height, range->baseMipLevel), - .depth = anv_minify(image->extent.depth, range->baseMipLevel), + .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), + .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), + .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), }; if (image->needs_nonrt_surface_state) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 1ae7066f5fd..79af4102e3f 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1578,6 +1578,7 @@ struct anv_image_view { enum isl_format format; uint32_t base_layer; uint32_t base_mip; + VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ /** RENDER_SURFACE_STATE when using image as a color render target. */ -- cgit v1.2.3 From 8c0c25abdee557c69cf89d8f1d3624a12b551e74 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 4 Jan 2016 14:53:55 -0800 Subject: gen8_state: use iview extent to program RENDER_SURFACE_STATE When creating an uncompressed ImageView on an compressed Image, the SurfaceFormat is updated to match the ImageView's. The surface dimensions must also change so that the HW sees the same size image instead of a 4x larger one. Fixes the following error which results from running many VulkanCTS compressed tests in one shot: ResourceError (vk.queueSubmit(queue, 1, &submitInfo, *m_fence): VK_ERROR_OUT_OF_DEVICE_MEMORY at vktPipelineImageSamplingInstance.cpp:921) Makes all compressed format tests with a height > 1 pass. --- src/vulkan/gen8_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 5b3691d22d0..3fd3187fbe7 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -206,8 +206,8 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .BaseMipLevel = 0.0, .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, + .Height = iview->level_0_extent.height - 1, + .Width = iview->level_0_extent.width - 1, .Depth = 0, /* TEMPLATE */ .SurfacePitch = surface->isl.row_pitch - 1, .RenderTargetViewExtent = 0, /* TEMPLATE */ -- cgit v1.2.3 From 1c87cb51be74f28a40fac1e25c4db07139d033b0 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 18:00:14 -0800 Subject: anv/meta: Disambiguate slice variable value This will simplify the usage of isl_surf_get_image_intratile_offset_el(). --- src/vulkan/anv_meta.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index cbdb2fa1889..65f07eb4bce 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1306,16 +1306,11 @@ void anv_CmdCopyBufferToImage( &pRegions[r].imageSubresource, &pRegions[r].imageOffset); - unsigned num_slices; - if (dest_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.layerCount == 1); - num_slices = pRegions[r].imageExtent.depth; - } else { - assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.layerCount; - } - - for (unsigned slice = 0; slice < num_slices; slice++) { + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -1344,7 +1339,8 @@ void anv_CmdCopyBufferToImage( .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].imageSubresource.mipLevel, .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + slice, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, .layerCount = 1 }, }, @@ -1375,6 +1371,11 @@ void anv_CmdCopyBufferToImage( src_image->offset += src_image->extent.width * src_image->extent.height * src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; } anv_DestroyImage(vk_device, anv_image_to_handle(src_image), -- cgit v1.2.3 From 4a0075feeb68f742f723a12b1ceae07ba674a531 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 18:10:12 -0800 Subject: anv/meta: Calculate mip offset for compressed texture This value will be used in a later commit. --- src/vulkan/anv_meta.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 65f07eb4bce..0b0adfc224f 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1328,6 +1328,22 @@ void anv_CmdCopyBufferToImage( }, cmd_buffer); + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->surface_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -1346,9 +1362,6 @@ void anv_CmdCopyBufferToImage( }, cmd_buffer); - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, - &pRegions[r].imageOffset); - dest_offset_el.z = 0; const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, &pRegions[r].imageExtent); -- cgit v1.2.3 From 6a579ded87a4192ed2f1a015ca8c6535d323ce1c Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 18:53:21 -0800 Subject: anv: Add offset parameter to anv_image_view_init() This is the offset of the tile that contains the mip specified by isl_surf_get_image_intratile_offset_el(). Used to draw to/from the specified mip. --- src/vulkan/anv_image.c | 7 ++++--- src/vulkan/anv_meta.c | 20 ++++++++++---------- src/vulkan/anv_meta_clear.c | 2 +- src/vulkan/anv_private.h | 3 ++- 4 files changed, 17 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 8b2003587ac..6c1c92d61c6 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -483,7 +483,8 @@ void anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer) + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; @@ -513,7 +514,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->image = image; iview->bo = image->bo; - iview->offset = image->offset + surface->offset; + iview->offset = image->offset + surface->offset + offset; iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; iview->vk_format = pCreateInfo->format; @@ -608,7 +609,7 @@ anv_CreateImageView(VkDevice _device, if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_image_view_init(view, device, pCreateInfo, NULL); + anv_image_view_init(view, device, pCreateInfo, NULL, 0); *pView = anv_image_view_to_handle(view); diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 0b0adfc224f..3faabc159c4 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -798,7 +798,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, 0); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -815,7 +815,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1, }, }, - cmd_buffer); + cmd_buffer, 0); meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), @@ -1058,7 +1058,7 @@ void anv_CmdCopyImage( .layerCount = pRegions[r].dstSubresource.layerCount, }, }, - cmd_buffer); + cmd_buffer, 0); const VkOffset3D dest_offset = { .x = pRegions[r].dstOffset.x, @@ -1102,7 +1102,7 @@ void anv_CmdCopyImage( .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, 0); meta_emit_blit(cmd_buffer, src_image, &src_iview, @@ -1162,7 +1162,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, 0); const VkOffset3D dest_offset = { .x = pRegions[r].dstOffsets[0].x, @@ -1213,7 +1213,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, 0); meta_emit_blit(cmd_buffer, src_image, &src_iview, @@ -1326,7 +1326,7 @@ void anv_CmdCopyBufferToImage( .layerCount = 1, }, }, - cmd_buffer); + cmd_buffer, 0); uint32_t img_x = 0; uint32_t img_y = 0; @@ -1360,7 +1360,7 @@ void anv_CmdCopyBufferToImage( .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, img_o); const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, &pRegions[r].imageExtent); @@ -1440,7 +1440,7 @@ void anv_CmdCopyImageToBuffer( .layerCount = pRegions[r].imageSubresource.layerCount, }, }, - cmd_buffer); + cmd_buffer, 0); struct anv_image *dest_image = make_image_for_buffer(vk_device, destBuffer, buffer_format, @@ -1476,7 +1476,7 @@ void anv_CmdCopyImageToBuffer( .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, 0); meta_emit_blit(cmd_buffer, anv_image_from_handle(srcImage), diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index ac369e9f9be..da234fcbdf1 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -838,7 +838,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer); + cmd_buffer, 0); VkFramebuffer fb; anv_CreateFramebuffer(device_h, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 79af4102e3f..a6f24b55dca 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1609,7 +1609,8 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer); + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset); void anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, -- cgit v1.2.3 From d3c1fd53e284db7e8a63e8fd1d9219c710a2890e Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 18:40:54 -0800 Subject: anv/image: Use custom VkBufferImageCopy for iview initialization Use a custom VkBufferImageCopy with the user-provided struct as the base. A few fields are modified when the iview is uncompressed and the underlying image is compressed. --- src/vulkan/anv_image.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 6c1c92d61c6..9294bab621d 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -488,6 +488,8 @@ anv_image_view_init(struct anv_image_view *iview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + VkImageViewCreateInfo mCreateInfo; + memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); assert(range->layerCount > 0); assert(range->baseMipLevel < image->levels); @@ -545,6 +547,11 @@ anv_image_view_init(struct anv_image_view *iview, iview->level_0_extent.width = DIV_ROUND_UP(image->extent.width , isl_layout->bw); iview->level_0_extent.height = DIV_ROUND_UP(image->extent.height, isl_layout->bh); iview->level_0_extent.depth = DIV_ROUND_UP(image->extent.depth , isl_layout->bd); + iview->level_0_extent.width = anv_minify(iview->level_0_extent.width , range->baseMipLevel); + iview->level_0_extent.height = anv_minify(iview->level_0_extent.height, range->baseMipLevel); + iview->level_0_extent.depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel); + mCreateInfo.subresourceRange.baseMipLevel = 0; + mCreateInfo.subresourceRange.baseArrayLayer = 0; } else { iview->level_0_extent.width = image->extent.width ; iview->level_0_extent.height = image->extent.height; @@ -561,7 +568,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_image_surface_state(device, iview->nonrt_surface_state, - iview, pCreateInfo, + iview, &mCreateInfo, VK_IMAGE_USAGE_SAMPLED_BIT); } else { iview->nonrt_surface_state.alloc_size = 0; @@ -571,7 +578,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_image_surface_state(device, iview->color_rt_surface_state, - iview, pCreateInfo, + iview, &mCreateInfo, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); } else { iview->color_rt_surface_state.alloc_size = 0; @@ -582,7 +589,7 @@ anv_image_view_init(struct anv_image_view *iview, if (has_matching_storage_typed_format(device, iview->format)) anv_fill_image_surface_state(device, iview->storage_surface_state, - iview, pCreateInfo, + iview, &mCreateInfo, VK_IMAGE_USAGE_STORAGE_BIT); else anv_fill_buffer_surface_state(device, iview->storage_surface_state, -- cgit v1.2.3 From dd22b5c914a6c34df4adf136b2a85d2bd91dcf34 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 26 Jan 2016 21:51:36 -0800 Subject: anv/meta: Modify make_image_for_buffer()'s image Always use a valid buffer format and convert the extent to units of elements with respect to original image format. --- src/vulkan/anv_meta.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 3faabc159c4..cd3e30e5f64 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -992,7 +992,7 @@ choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) } static VkFormat -choose_buffer_format(struct anv_image *image, VkImageAspectFlagBits aspect) +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) { assert(__builtin_popcount(aspect) == 1); @@ -1004,7 +1004,7 @@ choose_buffer_format(struct anv_image *image, VkImageAspectFlagBits aspect) * an RGB format here even if the tiled image is RGBA. XXX: This doesn't * work if the buffer is the destination. */ - enum isl_format linear_format = anv_get_isl_format(image->vk_format, aspect, + enum isl_format linear_format = anv_get_isl_format(format, aspect, VK_IMAGE_TILING_LINEAR, NULL); @@ -1241,13 +1241,17 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, if (copy->bufferImageHeight) extent.height = copy->bufferImageHeight; extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); VkImage vk_image; VkResult result = anv_CreateImage(vk_device, &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = format, + .format = buffer_format, .extent = extent, .mipLevels = 1, .arrayLayers = 1, @@ -1293,10 +1297,9 @@ void anv_CmdCopyBufferToImage( VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; VkFormat image_format = choose_iview_format(dest_image, aspect); - VkFormat buffer_format = choose_buffer_format(dest_image, aspect); struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, buffer_format, + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, VK_IMAGE_USAGE_SAMPLED_BIT, dest_image->type, &cmd_buffer->pool->alloc, &pRegions[r]); @@ -1317,7 +1320,7 @@ void anv_CmdCopyBufferToImage( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(src_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = buffer_format, + .format = src_image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, @@ -1423,7 +1426,7 @@ void anv_CmdCopyImageToBuffer( VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; VkFormat image_format = choose_iview_format(src_image, aspect); - VkFormat buffer_format = choose_buffer_format(src_image, aspect); + VkFormat buffer_format = choose_buffer_format(src_image->vk_format, aspect); struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, -- cgit v1.2.3 From 1bea1eff38b5b71c1019efce4cbf2e05ce7cd80b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 10:47:42 -0800 Subject: anv/meta: Don't double-call choose_buffer_format This fixes all the renderpass tests --- src/vulkan/anv_meta.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index cd3e30e5f64..21c5f87eb54 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1426,7 +1426,6 @@ void anv_CmdCopyImageToBuffer( VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; VkFormat image_format = choose_iview_format(src_image, aspect); - VkFormat buffer_format = choose_buffer_format(src_image->vk_format, aspect); struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, @@ -1446,7 +1445,7 @@ void anv_CmdCopyImageToBuffer( cmd_buffer, 0); struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, buffer_format, + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, src_image->type, &cmd_buffer->pool->alloc, &pRegions[r]); @@ -1470,7 +1469,7 @@ void anv_CmdCopyImageToBuffer( .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(dest_image), .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = buffer_format, + .format = dest_image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, -- cgit v1.2.3 From 96cf5cfee14633a77d5cf3f01df3183f9e164434 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 11:47:07 -0800 Subject: anv/image: Minify before dividing by block dimensions --- src/vulkan/anv_image.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 9294bab621d..2622b11b89e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -544,18 +544,18 @@ anv_image_view_init(struct anv_image_view *iview, * data from a source Image to a destination Image. */ const struct isl_format_layout * isl_layout = image->format->isl_layout; - iview->level_0_extent.width = DIV_ROUND_UP(image->extent.width , isl_layout->bw); - iview->level_0_extent.height = DIV_ROUND_UP(image->extent.height, isl_layout->bh); - iview->level_0_extent.depth = DIV_ROUND_UP(image->extent.depth , isl_layout->bd); - iview->level_0_extent.width = anv_minify(iview->level_0_extent.width , range->baseMipLevel); - iview->level_0_extent.height = anv_minify(iview->level_0_extent.height, range->baseMipLevel); - iview->level_0_extent.depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel); + iview->level_0_extent.width = anv_minify(image->extent.width, range->baseMipLevel); + iview->level_0_extent.height = anv_minify(image->extent.height, range->baseMipLevel); + iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); + iview->level_0_extent.width = DIV_ROUND_UP(iview->level_0_extent.width, isl_layout->bw); + iview->level_0_extent.height = DIV_ROUND_UP(iview->level_0_extent.height, isl_layout->bh); + iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); mCreateInfo.subresourceRange.baseMipLevel = 0; mCreateInfo.subresourceRange.baseArrayLayer = 0; } else { - iview->level_0_extent.width = image->extent.width ; + iview->level_0_extent.width = image->extent.width; iview->level_0_extent.height = image->extent.height; - iview->level_0_extent.depth = image->extent.depth ; + iview->level_0_extent.depth = image->extent.depth; } iview->extent = (VkExtent3D) { -- cgit v1.2.3 From 235abfb7e689a3fcf400764510d4bb670c9bea98 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 27 Jan 2016 12:08:56 -0800 Subject: anv/image: Enlarge the image level 0 extent The extent previously was supposed to match the mip at a given level under the assumption that the base address would be that of the mip as well. Now however, the base address only matches the offset of the containing tile. Therefore, enlarge the extent to match that of phys_slice0, so that we don't draw/fetch in out of bounds territory. This solution isn't perfect because the base adress isn't always at the first tile, therefore the assumed valid memory region by the HW contains some number of invalid tiles on two edges. --- src/vulkan/anv_image.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2622b11b89e..889a9e87d79 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -544,12 +544,12 @@ anv_image_view_init(struct anv_image_view *iview, * data from a source Image to a destination Image. */ const struct isl_format_layout * isl_layout = image->format->isl_layout; - iview->level_0_extent.width = anv_minify(image->extent.width, range->baseMipLevel); - iview->level_0_extent.height = anv_minify(image->extent.height, range->baseMipLevel); + iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); - iview->level_0_extent.width = DIV_ROUND_UP(iview->level_0_extent.width, isl_layout->bw); - iview->level_0_extent.height = DIV_ROUND_UP(iview->level_0_extent.height, isl_layout->bh); iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + + iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl); + iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); mCreateInfo.subresourceRange.baseMipLevel = 0; mCreateInfo.subresourceRange.baseArrayLayer = 0; } else { -- cgit v1.2.3 From 162c66258511a40f86187d271cb842b82dd39912 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 12:37:15 -0800 Subject: anv/image: Use the entire image height for compressed meta blits --- src/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 889a9e87d79..febfaa42539 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -548,7 +548,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); - iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl); + iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); mCreateInfo.subresourceRange.baseMipLevel = 0; mCreateInfo.subresourceRange.baseArrayLayer = 0; -- cgit v1.2.3 From f7d6b8ccfe3f04382d34bcd49cdbdfec88709719 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 12:55:36 -0800 Subject: gen8/state: Fix QPitch for compressed textures on Broadwell --- src/vulkan/gen8_state.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 3fd3187fbe7..2da9c62e153 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -147,7 +147,20 @@ get_qpitch(const struct isl_surf *surf) #endif case ISL_SURF_DIM_2D: case ISL_SURF_DIM_3D: - return isl_surf_get_array_pitch_el_rows(surf); + #if ANV_GEN >= 9 + return isl_surf_get_array_pitch_el_rows(surf); + #else + /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch + * + * "This field must be set to an integer multiple of the Surface + * Vertical Alignment. For compressed textures (BC*, FXT1, + * ETC*, and EAC* Surface Formats), this field is in units of + * rows in the uncompressed surface, and must be set to an + * integer multiple of the vertical alignment parameter "j" + * defined in the Common Surface Formats section." + */ + return isl_surf_get_array_pitch_sa_rows(surf); + #endif } } -- cgit v1.2.3 From 8f0ef9bbeb2932209f4bdc408f79c2313bd6ba51 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 15:21:28 -0800 Subject: nir/opt_algebraic: Use a more elementary mechanism for lowering ldexp --- src/glsl/nir/nir_opt_algebraic.py | 64 ++------------------------------------- 1 file changed, 2 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 190e4b7b43b..f4bfd3a921a 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -230,6 +230,8 @@ optimizations = [ (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'), (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), + (('ldexp', 'x', 'exp'), + ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))), (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), ('bcsel', ('ilt', 31, 'bits'), 'insert', @@ -332,68 +334,6 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] -def ldexp_to_arith(x, exp): - """ - Translates - ldexp x exp - into - - extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); - resulting_biased_exp = extracted_biased_exp + exp; - - if (resulting_biased_exp < 1) { - return copysign(0.0, x); - } - - return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | - lshift(i2u(resulting_biased_exp), exp_shift)); - - which we can't actually implement as such, since NIR doesn't have - vectorized if-statements. We actually implement it without branches - using conditional-select: - - extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); - resulting_biased_exp = extracted_biased_exp + exp; - - is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); - x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); - resulting_biased_exp = csel(is_not_zero_or_underflow, - resulting_biased_exp, 0); - - return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | - lshift(i2u(resulting_biased_exp), exp_shift)); - """ - - sign_mask = 0x80000000 - exp_shift = 23 - exp_width = 8 - - # Extract the biased exponent from . - extracted_biased_exp = ('ushr', ('fabs', x), exp_shift) - resulting_biased_exp = ('iadd', extracted_biased_exp, exp) - - # Test if result is ±0.0, subnormal, or underflow by checking if the - # resulting biased exponent would be less than 0x1. If so, the result is - # 0.0 with the sign of x. (Actually, invert the conditions so that - # immediate values are the second arguments, which is better for i965) - zero_sign_x = ('iand', x, sign_mask) - - is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1) - - # We could test for overflows by checking if the resulting biased exponent - # would be greater than 0xFE. Turns out we don't need to because the GLSL - # spec says: - # - # "If this product is too large to be represented in the - # floating-point type, the result is undefined." - - return ('bitfield_insert', - ('bcsel', is_not_zero_or_underflow, x, zero_sign_x), - ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0), - exp_shift, exp_width) - -optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))] - # This section contains "late" optimizations that should be run after the # regular optimizations have finished. Optimizations should go here if # they help code generation but do not necessarily produce code that is -- cgit v1.2.3 From 32e4c5ae30240c6759257ef68e80055f4cf8d612 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 27 Jan 2016 15:31:53 -0800 Subject: vtn: Make tanh implementation even stupider The dEQP "precision" test tries to verify that the reference functions float sinh(float a) { return ((exp(a) - exp(-a)) / 2); } float cosh(float a) { return ((exp(a) + exp(-a)) / 2); } float tanh(float a) { return (sinh(a) / cosh(a)); } produce the same values as the built-ins. We simplified away the multiplication by 0.5 in the numerator and denominator, and apparently this causes them not to match for exactly 1 out of 13,632 values. So, put it back in, fixing the test, but making our code generation (and precision?) worse. --- src/glsl/nir/spirv/vtn_glsl450.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 8d730168c08..219a9c7dc5f 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -545,12 +545,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450Tanh: - /* (e^x - e^(-x)) / (e^x + e^(-x)) */ + /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ val->ssa->def = - nir_fdiv(nb, nir_fsub(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0]))), - nir_fadd(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))); + nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))), + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0]))))); return; case GLSLstd450Asinh: -- cgit v1.2.3 From 9b3d66087811b7120253e26a585817eec198b521 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 08:56:36 -0800 Subject: anv/meta: Merge anv_meta_clear.h into anv_meta.h The header was too small. --- src/vulkan/anv_meta.c | 1 - src/vulkan/anv_meta.h | 3 +++ src/vulkan/anv_meta_clear.c | 1 - src/vulkan/anv_meta_clear.h | 37 ------------------------------------- 4 files changed, 3 insertions(+), 39 deletions(-) delete mode 100644 src/vulkan/anv_meta_clear.h (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 21c5f87eb54..4fd61476635 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -28,7 +28,6 @@ #include #include "anv_meta.h" -#include "anv_meta_clear.h" #include "anv_private.h" #include "glsl/nir/nir_builder.h" diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index c8d025bd825..31d344a4cbe 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -44,6 +44,9 @@ struct anv_meta_saved_state { struct anv_dynamic_state dynamic; }; +VkResult anv_device_init_meta_clear_state(struct anv_device *device); +void anv_device_finish_meta_clear_state(struct anv_device *device); + void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index da234fcbdf1..52da5b25cad 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -22,7 +22,6 @@ */ #include "anv_meta.h" -#include "anv_meta_clear.h" #include "anv_private.h" #include "glsl/nir/nir_builder.h" diff --git a/src/vulkan/anv_meta_clear.h b/src/vulkan/anv_meta_clear.h deleted file mode 100644 index 853d9f241d8..00000000000 --- a/src/vulkan/anv_meta_clear.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct anv_device; - -VkResult anv_device_init_meta_clear_state(struct anv_device *device); -void anv_device_finish_meta_clear_state(struct anv_device *device); - -#ifdef __cplusplus -} -#endif -- cgit v1.2.3 From 57e4a5ea993baee9459cf34f1912bd4b0abc8be1 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 26 Jan 2016 09:55:05 -0800 Subject: anv/gen8: Set multisample surface state --- src/vulkan/gen8_state.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 2da9c62e153..c749cbfed0c 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -34,6 +34,13 @@ #include "genX_state_util.h" +static const uint32_t +isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSS, +}; + void genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) @@ -225,7 +232,10 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .SurfacePitch = surface->isl.row_pitch - 1, .RenderTargetViewExtent = 0, /* TEMPLATE */ .MinimumArrayElement = 0, /* TEMPLATE */ - .NumberofMultisamples = MULTISAMPLECOUNT_1, + .MultisampledSurfaceStorageFormat = + isl_to_gen_multisample_layout[surface->isl.msaa_layout], + .NumberofMultisamples = ffs(surface->isl.samples) - 1, + .MultisamplePositionPaletteIndex = 0, /* UNUSED */ .XOffset = 0, .YOffset = 0, -- cgit v1.2.3 From 8cc6f058ce30d77e45bc26d4769a7adde46deb11 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 26 Jan 2016 10:56:06 -0800 Subject: anv/gen8: Begin enabling pipeline multisample state As far as I can tell, this patch sets all pipeline multisample state except: - alpha to coverage - alpha to one - the dispatch count for per-sample dispatch --- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_pipeline.c | 3 - src/vulkan/gen7_pipeline.c | 4 ++ src/vulkan/gen8_pipeline.c | 146 ++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 137 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index f116f9a7354..adf33a84cf4 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -502,7 +502,7 @@ void anv_GetPhysicalDeviceProperties( .pointSizeGranularity = (1.0 / 8.0), .lineWidthGranularity = (1.0 / 128.0), .strictLines = false, /* FINISHME */ - .standardSampleLocations = true, /* FINISHME */ + .standardSampleLocations = true, .optimalBufferCopyOffsetAlignment = 128, .optimalBufferCopyRowPitchAlignment = 128, .nonCoherentAtomSize = 64, diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index e879b35388d..106b9221dd7 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1077,9 +1077,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, if (pCreateInfo->pTessellationState) anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - if (pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->rasterizationSamples > 1) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; pipeline->writes_point_size = false; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index ea5b3401121..7b5330e7448 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -269,6 +269,10 @@ genX(graphics_pipeline_create)( .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterizationSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + uint32_t samples = 1; uint32_t log2_samples = __builtin_ffs(samples) - 1; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 5d6b87064e9..1e9aa0e12bc 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -46,8 +46,14 @@ emit_ia_state(struct anv_pipeline *pipeline, static void emit_rs_state(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info, const struct anv_graphics_pipeline_create_info *extra) { + uint32_t samples = 1; + + if (ms_info) + samples = ms_info->rasterizationSamples; + struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), .ViewportTransformEnable = !(extra && extra->disable_viewport), @@ -64,6 +70,14 @@ emit_rs_state(struct anv_pipeline *pipeline, struct GENX(3DSTATE_RASTER) raster = { GENX(3DSTATE_RASTER_header), + + /* For details on 3DSTATE_RASTER multisample state, see the BSpec table + * "Multisample Modes State". + */ + .DXMultisampleRasterizationEnable = samples > 1, + .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, + .ForceMultisampling = false, + .FrontWinding = vk_to_gen_front_face[info->frontFace], .CullMode = vk_to_gen_cullmode[info->cullMode], .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], @@ -193,6 +207,118 @@ emit_ds_state(struct anv_pipeline *pipeline, GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); } +static void +emit_ms_state(struct anv_pipeline *pipeline, + const VkPipelineMultisampleStateCreateInfo *info) +{ + uint32_t samples = 1; + uint32_t log2_samples = 0; + + /* From the Vulkan 1.0 spec: + * If pSampleMask is NULL, it is treated as if the mask has all bits + * enabled, i.e. no coverage is removed from fragments. + * + * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. + */ + uint32_t sample_mask = 0xffff; + + if (info) { + samples = info->rasterizationSamples; + log2_samples = __builtin_ffs(samples) - 1; + } + + if (info && info->pSampleMask) + sample_mask &= info->pSampleMask[0]; + + if (info && info->sampleShadingEnable) + anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), + + /* The PRM says that this bit is valid only for DX9: + * + * SW can choose to set this bit only for DX9 API. DX10/OGL API's + * should not have any effect by setting or not setting this bit. + */ + .PixelPositionOffsetEnable = false, + + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), + .SampleMask = sample_mask); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if ANV_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); +} + VkResult genX(graphics_pipeline_create)( VkDevice _device, @@ -226,7 +352,9 @@ genX(graphics_pipeline_create)( assert(pCreateInfo->pInputAssemblyState); emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); assert(pCreateInfo->pRasterizationState); - emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); + emit_rs_state(pipeline, pCreateInfo->pRasterizationState, + pCreateInfo->pMultisampleState, extra); + emit_ms_state(pipeline, pCreateInfo->pMultisampleState); emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); emit_cb_state(pipeline, pCreateInfo->pColorBlendState, pCreateInfo->pMultisampleState); @@ -270,18 +398,6 @@ genX(graphics_pipeline_create)( pipeline->ps_ksp0 == NO_KERNEL ? 0 : pipeline->wm_prog_data.barycentric_interp_modes); - uint32_t samples = 1; - uint32_t log2_samples = __builtin_ffs(samples) - 1; - bool enable_sampling = samples > 1 ? true : false; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), - .PixelPositionOffsetEnable = enable_sampling, - .PixelLocation = CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), - .SampleMask = 0xffff); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), .VSURBStartingAddress = pipeline->urb.vs_start, .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, @@ -530,7 +646,9 @@ genX(graphics_pipeline_create)( .KernelStartPointer1 = 0, .KernelStartPointer2 = pipeline->ps_ksp2); - bool per_sample_ps = false; + bool per_sample_ps = pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), .PixelShaderValid = true, .PixelShaderKillsPixel = wm_prog_data->uses_kill, -- cgit v1.2.3 From 8cc1e59d61a71cb6444fc849a9f2d3dd4803d2ae Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 25 Jan 2016 09:14:03 -0800 Subject: anv/meta: Add func anv_meta_get_iview_layer() This function is just meta_blit_get_dest_view_base_array_slice(), but moved to the shared header anv_meta.h. Will be needed by anv_meta_resolve.c. --- src/vulkan/anv_meta.c | 27 ++++++++++++++------------- src/vulkan/anv_meta.h | 5 +++++ 2 files changed, 19 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 4fd61476635..2c70e6708ea 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -169,10 +169,14 @@ anv_meta_get_view_type(const struct anv_image *image) } } -static uint32_t -meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, - const VkImageSubresourceLayers *dest_subresource, - const VkOffset3D *dest_offset) +/** + * When creating a destination VkImageView, this function provides the needed + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. + */ +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset) { switch (dest_image->type) { case VK_IMAGE_TYPE_1D: @@ -1078,9 +1082,8 @@ void anv_CmdCopyImage( } const uint32_t dest_base_array_slice = - meta_blit_get_dest_view_base_array_slice(dest_image, - &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); for (unsigned slice = 0; slice < num_slices; slice++) { VkOffset3D src_offset = pRegions[r].srcOffset; @@ -1186,9 +1189,8 @@ void anv_CmdBlitImage( }; const uint32_t dest_array_slice = - meta_blit_get_dest_view_base_array_slice(dest_image, - &pRegions[r].dstSubresource, - &pRegions[r].dstOffsets[0]); + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); if (pRegions[r].srcSubresource.layerCount > 1) anv_finishme("FINISHME: copy multiple array layers"); @@ -1304,9 +1306,8 @@ void anv_CmdCopyBufferToImage( &pRegions[r]); const uint32_t dest_base_array_slice = - meta_blit_get_dest_view_base_array_slice(dest_image, - &pRegions[r].imageSubresource, - &pRegions[r].imageOffset); + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); unsigned num_slices_3d = pRegions[r].imageExtent.depth; unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index 31d344a4cbe..47c6d335438 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -59,6 +59,11 @@ anv_meta_restore(const struct anv_meta_saved_state *state, VkImageViewType anv_meta_get_view_type(const struct anv_image *image); +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 4085f1f230cef4ea3387978239a55fd78ef077cb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 20 Jan 2016 17:10:41 -0800 Subject: anv/meta: Implement vkCmdResolveImage This handles multisample color images that have a floating-point or normalized format and have a single array layer. This does not yet handle integer formats nor multisample array images. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_meta.c | 35 +- src/vulkan/anv_meta.h | 3 + src/vulkan/anv_meta_resolve.c | 781 ++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 11 + 5 files changed, 813 insertions(+), 18 deletions(-) create mode 100644 src/vulkan/anv_meta_resolve.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index efb781e8c0b..d76fa206a95 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -82,6 +82,7 @@ VULKAN_SOURCES = \ anv_intel.c \ anv_meta.c \ anv_meta_clear.c \ + anv_meta_resolve.c \ anv_nir_apply_dynamic_offsets.c \ anv_nir_apply_pipeline_layout.c \ anv_nir_lower_push_constants.c \ diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 2c70e6708ea..b40151c2be6 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1508,18 +1508,6 @@ void anv_CmdCopyImageToBuffer( meta_finish_blit(cmd_buffer, &saved_state); } -void anv_CmdResolveImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageResolve* pRegions) -{ - stub(); -} - static void * meta_alloc(void* _device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) @@ -1549,6 +1537,8 @@ meta_free(void* _device, void *data) VkResult anv_device_init_meta(struct anv_device *device) { + VkResult result; + device->meta_state.alloc = (VkAllocationCallbacks) { .pUserData = device, .pfnAllocation = meta_alloc, @@ -1556,23 +1546,32 @@ anv_device_init_meta(struct anv_device *device) .pfnFree = meta_free, }; - VkResult result; result = anv_device_init_meta_clear_state(device); if (result != VK_SUCCESS) - return result; + goto fail_clear; + + result = anv_device_init_meta_resolve_state(device); + if (result != VK_SUCCESS) + goto fail_resolve; result = anv_device_init_meta_blit_state(device); - if (result != VK_SUCCESS) { - anv_device_finish_meta_clear_state(device); - return result; - } + if (result != VK_SUCCESS) + goto fail_blit; return VK_SUCCESS; + +fail_blit: + anv_device_finish_meta_resolve_state(device); +fail_resolve: + anv_device_finish_meta_clear_state(device); +fail_clear: + return result; } void anv_device_finish_meta(struct anv_device *device) { + anv_device_finish_meta_resolve_state(device); anv_device_finish_meta_clear_state(device); /* Blit */ diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index 47c6d335438..91c3c7d21ca 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -47,6 +47,9 @@ struct anv_meta_saved_state { VkResult anv_device_init_meta_clear_state(struct anv_device *device); void anv_device_finish_meta_clear_state(struct anv_device *device); +VkResult anv_device_init_meta_resolve_state(struct anv_device *device); +void anv_device_finish_meta_resolve_state(struct anv_device *device); + void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c new file mode 100644 index 00000000000..9fa47949288 --- /dev/null +++ b/src/vulkan/anv_meta_resolve.c @@ -0,0 +1,781 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_meta.h" +#include "anv_private.h" +#include "glsl/nir/nir_builder.h" + +/** + * Vertex attributes used by all pipelines. + */ +struct vertex_attrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; +}; + +static nir_shader * +build_nir_vs(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + nir_builder b; + nir_variable *a_position; + nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); + + a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_position"); + a_position->data.location = VERT_ATTRIB_GENERIC0; + + v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "gl_Position"); + v_position->data.location = VARYING_SLOT_POS; + + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); + + return b.shader; +} + +static nir_shader * +build_nir_fs(uint32_t num_samples) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + const struct glsl_type *sampler2DMS = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, + /*is_shadow*/ false, + /*is_array*/ false, + GLSL_TYPE_FLOAT); + + nir_builder b; + nir_variable *u_tex; /* uniform sampler */ + nir_variable *v_position; /* vec4, varying fragment position */ + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ + nir_variable *f_color; /* vec4, fragment output color */ + nir_ssa_def *accum; /* vec4, accumulation of sample values */ + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs_samples%02d", + num_samples); + + u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, + "u_tex"); + u_tex->data.descriptor_set = 0; + u_tex->data.binding = 0; + + v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_position"); + v_position->data.location = VARYING_SLOT_POS; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "f_color"); + f_color->data.location = FRAG_RESULT_DATA0; + + accum = nir_imm_vec4(&b, 0, 0, 0, 0); + + nir_ssa_def *tex_position_ivec = + nir_f2i(&b, nir_load_var(&b, v_tex_position)); + + for (uint32_t i = 0; i < num_samples; ++i) { + nir_tex_instr *tex; + + tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->sampler = nir_deref_var_create(tex, u_tex); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src = nir_src_for_ssa(tex_position_ivec); + tex->src[0].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 3; + nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + accum = nir_fadd(&b, accum, &tex->dest.ssa); + } + + accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); + nir_store_var(&b, f_color, accum, /*writemask*/ 4); + + return b.shader; +} + +static VkResult +create_pass(struct anv_device *device, VkRenderPass *pass_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + result = anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .samples = 1, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + alloc, + pass_h); + + return result; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t num_samples, + VkShaderModule vs_module_h, + VkRenderPass pass_h, + VkPipeline *pipeline_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + + struct anv_shader_module fs_module = { + .nir = build_nir_fs(num_samples), + }; + + if (!fs_module.nir) { + /* XXX: Need more accurate error */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vs_module_h, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_module), + .pName = "main", + }, + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct vertex_attrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct vertex_attrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, position), + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, + }, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { 0x1 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = device->meta_state.resolve.pipeline_layout, + .renderPass = pass_h, + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.alloc, + pipeline_h); + if (result != VK_SUCCESS) + goto fail; + + goto cleanup; + +fail: + *pipeline_h = VK_NULL_HANDLE; + +cleanup: + ralloc_free(fs_module.nir); + return result; +} + +void +anv_device_finish_meta_resolve_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + VkDevice device_h = anv_device_to_handle(device); + VkRenderPass pass_h = device->meta_state.resolve.pass; + VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; + VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + if (pass_h) + ANV_CALL(DestroyRenderPass)(device_h, pass_h, + &device->meta_state.alloc); + + if (pipeline_layout_h) + ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); + + if (ds_layout_h) + ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { + VkPipeline pipeline_h = state->resolve.pipelines[i]; + + if (!pipeline_h) { + ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); + } + } +} + +VkResult +anv_device_init_meta_resolve_state(struct anv_device *device) +{ + VkResult res = VK_SUCCESS; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + const isl_sample_count_mask_t sample_count_mask = + isl_device_get_sample_counts(&device->isl_dev); + + zero(device->meta_state.resolve); + + struct anv_shader_module vs_module = { .nir = build_nir_vs() }; + if (!vs_module.nir) { + /* XXX: Need more accurate error */ + res = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); + + res = anv_CreateDescriptorSetLayout(device_h, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }, + }, + alloc, + &device->meta_state.resolve.ds_layout); + if (res != VK_SUCCESS) + goto fail; + + res = anv_CreatePipelineLayout(device_h, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + alloc, + &device->meta_state.resolve.pipeline_layout); + if (res != VK_SUCCESS) + goto fail; + + + res = create_pass(device, &device->meta_state.resolve.pass); + if (res != VK_SUCCESS) + goto fail; + + + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { + uint32_t sample_count = 1 << i; + + if (!(sample_count_mask & sample_count)) + continue; + + res = create_pipeline(device, sample_count, vs_module_h, + device->meta_state.resolve.pass, + &device->meta_state.resolve.pipelines[i]); + if (res != VK_SUCCESS) + goto fail; + } + + goto cleanup; + +fail: + anv_device_finish_meta_resolve_state(device); + +cleanup: + ralloc_free(vs_module.nir); + + return res; +} + +static void +emit_resolve(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src_iview, + uint32_t src_level, + const VkOffset2D *src_offset, + struct anv_image_view *dest_iview, + uint32_t dest_level, + const VkOffset2D *dest_offset, + const VkExtent2D *resolve_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice device_h = anv_device_to_handle(device); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_image *src_image = src_iview->image; + const struct anv_image *dest_image = dest_iview->image; + VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; + uint32_t samples_log2 = ffs(src_image->samples) - 1; + + const VkExtent2D dest_iview_extent = { + anv_minify(dest_image->extent.width, dest_level), + anv_minify(dest_image->extent.height, dest_level), + }; + + const struct vertex_attrs vertex_data[3] = { + { + .vue_header = {0}, + .position = { + dest_offset->x + resolve_extent->width, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, + }, + }, + }; + + struct anv_state vertex_mem = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, + sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, + .offset = vertex_mem.offset, + }; + + VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); + + anv_CmdBindVertexBuffers(cmd_buffer_h, + /*firstBinding*/ 0, + /*bindingCount*/ 1, + (VkBuffer[]) { vertex_buffer_h }, + (VkDeviceSize[]) { 0 }); + + VkSampler sampler_h; + ANV_CALL(CreateSampler)(device_h, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0, + .anisotropyEnable = false, + .compareEnable = false, + .minLod = 0.0, + .maxLod = 0.0, + .unnormalizedCoordinates = false, + }, + &cmd_buffer->pool->alloc, + &sampler_h); + + VkDescriptorSet desc_set_h; + anv_AllocateDescriptorSets(device_h, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool_h, + .descriptorSetCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.blit.ds_layout, + }, + }, + &desc_set_h); + + ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); + + anv_UpdateDescriptorSets(device_h, + /*writeCount*/ 1, + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = desc_set_h, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler_h, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview_extent.width, + .height = dest_iview_extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { dest_offset->x, dest_offset->y }, + .extent = { resolve_extent->width, resolve_extent->height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, + /*firstViewport*/ 0, + /*viewportCount*/ 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = dest_iview_extent.width, + .height = dest_iview_extent.height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, + /*firstScissor*/ 0, + /*scissorCount*/ 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = dest_iview_extent, + }, + }); + + VkPipeline pipeline_h = device->meta_state.resolve.pipelines[samples_log2]; + ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); + + if (cmd_buffer->state.pipeline != pipeline) { + anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + anv_CmdBindDescriptorSets(cmd_buffer_h, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline_layout, + /*firstSet*/ 0, + /* setCount */ 1, + (VkDescriptorSet[]) { + desc_set_h, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + /* All objects below are consumed by the draw call. We may safely destroy + * them. + */ + anv_descriptor_set_destroy(device, desc_set); + anv_DestroySampler(device_h, sampler_h, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); +} + +void anv_CmdResolveImage( + VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve* regions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, src_image, src_image_h); + ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_meta_saved_state state; + + anv_meta_save(&state, cmd_buffer, 0); + + assert(src_image->samples > 1); + assert(dest_image->samples == 1); + + if (src_image->samples >= 16) { + /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the + * glBlitFramebuffer workaround for samples >= 16. + */ + anv_finishme("vkCmdResolveImage: need interpolation workaround when " + "samples >= 16"); + } + + if (src_image->array_size > 1) + anv_finishme("vkCmdResolveImage: multisample array images"); + + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + + /* From the Vulkan 1.0 spec: + * + * - The aspectMask member of srcSubresource and dstSubresource must + * only contain VK_IMAGE_ASPECT_COLOR_BIT + * + * - The layerCount member of srcSubresource and dstSubresource must + * match + */ + assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->srcSubresource.layerCount == + region->dstSubresource.layerCount); + + const uint32_t src_base_layer = + anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + + const uint32_t dest_base_layer = + anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, + ®ion->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; + ++layer) { + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image_h, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image_h, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + emit_resolve(cmd_buffer, + &src_iview, + region->srcSubresource.mipLevel, + &(VkOffset2D) { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + &dest_iview, + region->dstSubresource.mipLevel, + &(VkOffset2D) { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + &(VkExtent2D) { + .width = region->extent.width, + .height = region->extent.height, + }); + } + } + + anv_meta_restore(&state, cmd_buffer); +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a6f24b55dca..6a0e227b078 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -595,6 +595,17 @@ struct anv_meta_state { VkPipelineLayout pipeline_layout; VkDescriptorSetLayout ds_layout; } blit; + + struct { + /** + * Use pipeline `i` to resolve an image with `log2(i)` samples. + */ + VkPipeline pipelines[4]; + + VkRenderPass pass; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } resolve; }; struct anv_queue { -- cgit v1.2.3 From eb6fb65fd1562aa61ea5a0ec82a6e248bf5b2423 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 14:02:57 -0800 Subject: anv/meta: Simplify failure handling during clear init Remove all the fine-grained cleanup in anv_device_init_meta_clear_state(). Instead, if anything fails during initialization, simply call anv_device_finish_meta_clear_state() and let it clean up the partially initialized anv_meta_state::clear. --- src/vulkan/anv_meta_clear.c | 86 +++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 54 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 52da5b25cad..07ae8694c16 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -303,18 +303,27 @@ create_color_pipeline(struct anv_device *device, uint32_t frag_output, } static void -free_color_pipelines(struct anv_device *device) +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) { - for (uint32_t i = 0; - i < ARRAY_SIZE(device->meta_state.clear.color_pipelines); ++i) { - if (device->meta_state.clear.color_pipelines[i] == NULL) - continue; + if (!pipeline) + return; - ANV_CALL(DestroyPipeline)( - anv_device_to_handle(device), - anv_pipeline_to_handle(device->meta_state.clear.color_pipelines[i]), - &device->meta_state.alloc); + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline), + &device->meta_state.alloc); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + for (uint32_t j = 0; + j < ARRAY_SIZE(device->meta_state.clear.color_pipelines); ++j) { + destroy_pipeline(device, device->meta_state.clear.color_pipelines[j]); } + + destroy_pipeline(device, device->meta_state.clear.depth_only_pipeline); + destroy_pipeline(device, device->meta_state.clear.stencil_only_pipeline); + destroy_pipeline(device, device->meta_state.clear.depthstencil_pipeline); } static VkResult @@ -324,20 +333,14 @@ init_color_pipelines(struct anv_device *device) struct anv_pipeline **pipelines = device->meta_state.clear.color_pipelines; uint32_t n = ARRAY_SIZE(device->meta_state.clear.color_pipelines); - zero(device->meta_state.clear.color_pipelines); - for (uint32_t i = 0; i < n; ++i) { result = create_color_pipeline(device, i, &pipelines[i]); - if (result < 0) - goto fail; + if (result != VK_SUCCESS) + return result; + } return VK_SUCCESS; - -fail: - free_color_pipelines(device); - - return result; } static void @@ -638,13 +641,13 @@ init_depthstencil_pipelines(struct anv_device *device) create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, &state->clear.depth_only_pipeline); if (result != VK_SUCCESS) - goto fail; + return result; result = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, &state->clear.stencil_only_pipeline); if (result != VK_SUCCESS) - goto fail_depth_only; + return result; result = create_depthstencil_pipeline(device, @@ -652,19 +655,8 @@ init_depthstencil_pipelines(struct anv_device *device) VK_IMAGE_ASPECT_STENCIL_BIT, &state->clear.depthstencil_pipeline); if (result != VK_SUCCESS) - goto fail_stencil_only; - - return result; + return result; - fail_stencil_only: - anv_DestroyPipeline(anv_device_to_handle(device), - anv_pipeline_to_handle(state->clear.stencil_only_pipeline), - NULL); - fail_depth_only: - anv_DestroyPipeline(anv_device_to_handle(device), - anv_pipeline_to_handle(state->clear.depth_only_pipeline), - NULL); - fail: return result; } @@ -673,35 +665,21 @@ anv_device_init_meta_clear_state(struct anv_device *device) { VkResult result; + zero(device->meta_state.clear); + result = init_color_pipelines(device); if (result != VK_SUCCESS) - return result; + goto fail; result = init_depthstencil_pipelines(device); - if (result != VK_SUCCESS) { - free_color_pipelines(device); - return result; - } + if (result != VK_SUCCESS) + goto fail; return VK_SUCCESS; -} - -void -anv_device_finish_meta_clear_state(struct anv_device *device) -{ - VkDevice device_h = anv_device_to_handle(device); - free_color_pipelines(device); - - ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.depth_only_pipeline), - NULL); - ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.stencil_only_pipeline), - NULL); - ANV_CALL(DestroyPipeline)(device_h, - anv_pipeline_to_handle(device->meta_state.clear.depthstencil_pipeline), - NULL); +fail: + anv_device_finish_meta_clear_state(device); + return result; } /** -- cgit v1.2.3 From 5d4f3298ae51430df7d554a1a34671e382cc4c57 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 14:34:27 -0800 Subject: anv/meta: Implement multisample clears --- src/vulkan/anv_meta_clear.c | 137 +++++++++++++++++++++----------------------- src/vulkan/anv_private.h | 5 +- 2 files changed, 70 insertions(+), 72 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 07ae8694c16..027217b88dc 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -117,6 +117,7 @@ build_color_shaders(struct nir_shader **out_vs, static VkResult create_pipeline(struct anv_device *device, + uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, @@ -175,9 +176,9 @@ create_pipeline(struct anv_device *device, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, /* FINISHME: Multisampling */ + .rasterizationSamples = samples, .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { 0x1 }, + .pSampleMask = (VkSampleMask[]) { ~0 }, .alphaToCoverageEnable = false, .alphaToOneEnable = false, }, @@ -226,7 +227,9 @@ create_pipeline(struct anv_device *device, } static VkResult -create_color_pipeline(struct anv_device *device, uint32_t frag_output, +create_color_pipeline(struct anv_device *device, + uint32_t samples, + uint32_t frag_output, struct anv_pipeline **pipeline) { struct nir_shader *vs_nir; @@ -297,7 +300,7 @@ create_color_pipeline(struct anv_device *device, uint32_t frag_output, * but the repclear shader writes to all color attachments. */ return - create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, + create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &device->meta_state.alloc, /*use_repclear*/ false, pipeline); } @@ -316,31 +319,17 @@ destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) void anv_device_finish_meta_clear_state(struct anv_device *device) { - for (uint32_t j = 0; - j < ARRAY_SIZE(device->meta_state.clear.color_pipelines); ++j) { - destroy_pipeline(device, device->meta_state.clear.color_pipelines[j]); - } - - destroy_pipeline(device, device->meta_state.clear.depth_only_pipeline); - destroy_pipeline(device, device->meta_state.clear.stencil_only_pipeline); - destroy_pipeline(device, device->meta_state.clear.depthstencil_pipeline); -} - -static VkResult -init_color_pipelines(struct anv_device *device) -{ - VkResult result; - struct anv_pipeline **pipelines = device->meta_state.clear.color_pipelines; - uint32_t n = ARRAY_SIZE(device->meta_state.clear.color_pipelines); + struct anv_meta_state *state = &device->meta_state; - for (uint32_t i = 0; i < n; ++i) { - result = create_color_pipeline(device, i, &pipelines[i]); - if (result != VK_SUCCESS) - return result; + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + destroy_pipeline(device, state->clear[i].color_pipelines[j]); + } + destroy_pipeline(device, state->clear[i].depth_only_pipeline); + destroy_pipeline(device, state->clear[i].stencil_only_pipeline); + destroy_pipeline(device, state->clear[i].depthstencil_pipeline); } - - return VK_SUCCESS; } static void @@ -351,13 +340,19 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_device *device = cmd_buffer->device; const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - VkClearColorValue clear_value = clear_att->clearValue.color; + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att]; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; struct anv_pipeline *pipeline = - device->meta_state.clear.color_pipelines[clear_att->colorAttachment]; + device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; + VkClearColorValue clear_value = clear_att->clearValue.color; VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(clear_att->colorAttachment < subpass->color_count); @@ -460,6 +455,7 @@ build_depthstencil_shader(struct nir_shader **out_vs) static VkResult create_depthstencil_pipeline(struct anv_device *device, VkImageAspectFlags aspects, + uint32_t samples, struct anv_pipeline **pipeline) { struct nir_shader *vs_nir; @@ -518,7 +514,7 @@ create_depthstencil_pipeline(struct anv_device *device, .pAttachments = NULL, }; - return create_pipeline(device, vs_nir, NULL, &vi_state, &ds_state, + return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, &cb_state, &device->meta_state.alloc, /*use_repclear*/ true, pipeline); } @@ -529,19 +525,24 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, const VkClearRect *clear_rect) { struct anv_device *device = cmd_buffer->device; + struct anv_meta_state *meta_state = &device->meta_state; const struct anv_subpass *subpass = cmd_buffer->state.subpass; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - uint32_t attachment = subpass->depth_stencil_attachment; + const uint32_t pass_att = subpass->depth_stencil_attachment; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; VkImageAspectFlags aspects = clear_att->aspectMask; VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || aspects == VK_IMAGE_ASPECT_STENCIL_BIT || aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); - assert(attachment != VK_ATTACHMENT_UNUSED); + assert(pass_att != VK_ATTACHMENT_UNUSED); const struct depthstencil_clear_vattrs vertex_data[3] = { { @@ -611,13 +612,13 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, struct anv_pipeline *pipeline; switch (aspects) { case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = device->meta_state.clear.depthstencil_pipeline; + pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; break; case VK_IMAGE_ASPECT_DEPTH_BIT: - pipeline = device->meta_state.clear.depth_only_pipeline; + pipeline = meta_state->clear[samples_log2].depth_only_pipeline; break; case VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = device->meta_state.clear.stencil_only_pipeline; + pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; break; default: unreachable("expected depth or stencil aspect"); @@ -631,55 +632,49 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); } -static VkResult -init_depthstencil_pipelines(struct anv_device *device) -{ - VkResult result; - struct anv_meta_state *state = &device->meta_state; - - result = - create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, - &state->clear.depth_only_pipeline); - if (result != VK_SUCCESS) - return result; - - result = - create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, - &state->clear.stencil_only_pipeline); - if (result != VK_SUCCESS) - return result; - - result = - create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT, - &state->clear.depthstencil_pipeline); - if (result != VK_SUCCESS) - return result; - - return result; -} - VkResult anv_device_init_meta_clear_state(struct anv_device *device) { - VkResult result; + VkResult res; + struct anv_meta_state *state = &device->meta_state; zero(device->meta_state.clear); - result = init_color_pipelines(device); - if (result != VK_SUCCESS) - goto fail; + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + uint32_t samples = 1 << i; - result = init_depthstencil_pipelines(device); - if (result != VK_SUCCESS) - goto fail; + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + res = create_color_pipeline(device, samples, /* frag_output */ j, + &state->clear[i].color_pipelines[j]); + if (res != VK_SUCCESS) + goto fail; + } + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT, samples, + &state->clear[i].depth_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].stencil_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].depthstencil_pipeline); + if (res != VK_SUCCESS) + goto fail; + } return VK_SUCCESS; fail: anv_device_finish_meta_clear_state(device); - return result; + return res; } /** diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6a0e227b078..43d4781dc20 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -564,6 +564,9 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { VkAllocationCallbacks alloc; + /** + * Use array element `i` to clear an image with `log2(i)` samples. + */ struct { /** * Pipeline N is used to clear color attachment N of the current @@ -578,7 +581,7 @@ struct anv_meta_state { struct anv_pipeline *depth_only_pipeline; struct anv_pipeline *stencil_only_pipeline; struct anv_pipeline *depthstencil_pipeline; - } clear; + } clear[4]; struct { VkRenderPass render_pass; -- cgit v1.2.3 From 2af3281fee16eda128b85c62062608687bd4c548 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 27 Jan 2016 11:37:23 -0800 Subject: anv/push constants: Use constant buffer #2 SKL has a workaround which requires either some weird programming of buffer 3, OR, just never using buffer 0. Since we don't actually use multiple constant buffers, it's easier to just not use 0. Only SKL requires this workaround, but there is no harm in applying it to all platforms. The big change here is that buffer #0 is relative to dynamic state base normally (depending upon ISTPM), where buffer 1-3 is a GPU virtual address. --- src/vulkan/gen8_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 560608fe346..daa049e98e0 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -58,8 +58,8 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), ._3DCommandSubOpcode = push_constant_opcodes[stage], .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), }); flushed |= mesa_to_vk_shader_stage(stage); -- cgit v1.2.3 From 61d3d49820ebcdc8113f9471acee4f4a87f80ceb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 17:22:20 -0800 Subject: anv: Fix comment for anv_meta_state arrays Array element i is for 2^i samples, not log2(i) samples. --- src/vulkan/anv_private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 43d4781dc20..e328d3dcd71 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -565,7 +565,7 @@ struct anv_meta_state { VkAllocationCallbacks alloc; /** - * Use array element `i` to clear an image with `log2(i)` samples. + * Use array element `i` for images with `2^i` samples. */ struct { /** @@ -601,7 +601,7 @@ struct anv_meta_state { struct { /** - * Use pipeline `i` to resolve an image with `log2(i)` samples. + * Use pipeline `i` for images with `2^i` samples. */ VkPipeline pipelines[4]; -- cgit v1.2.3 From 9b240a1e3d875c38dce9dbae57de73e03b8690cb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 17:26:18 -0800 Subject: anv/skl: Fix crash in 16x multisampling We built meta clear and resolve pipelines for only up to 8x samples. There were no 16x pipelines. --- src/vulkan/anv_private.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index e328d3dcd71..6e44e88e10c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -75,6 +75,7 @@ extern "C" { #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_DYNAMIC_BUFFERS 16 #define MAX_IMAGES 8 +#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ #define ICD_LOADER_MAGIC 0x01CDC0DE @@ -581,7 +582,7 @@ struct anv_meta_state { struct anv_pipeline *depth_only_pipeline; struct anv_pipeline *stencil_only_pipeline; struct anv_pipeline *depthstencil_pipeline; - } clear[4]; + } clear[1 + MAX_SAMPLES_LOG2]; struct { VkRenderPass render_pass; @@ -603,7 +604,7 @@ struct anv_meta_state { /** * Use pipeline `i` for images with `2^i` samples. */ - VkPipeline pipelines[4]; + VkPipeline pipelines[1 + MAX_SAMPLES_LOG2]; VkRenderPass pass; VkPipelineLayout pipeline_layout; -- cgit v1.2.3 From 983db2b8042ee8ca25a3870767e0d76940011034 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 19:48:43 -0800 Subject: anv/meta_resolve: Fix a bug in the meta pipeline destroy path --- src/vulkan/anv_meta_resolve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index 9fa47949288..30ef727bb06 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -358,7 +358,7 @@ anv_device_finish_meta_resolve_state(struct anv_device *device) for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { VkPipeline pipeline_h = state->resolve.pipelines[i]; - if (!pipeline_h) { + if (pipeline_h) { ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); } } -- cgit v1.2.3 From 7fb35a82281c2d245c7ae421893ffa287405b975 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 16:58:42 -0800 Subject: An alternate arccosine implementation --- src/glsl/nir/spirv/vtn_glsl450.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 219a9c7dc5f..01d72a1531e 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -208,6 +208,9 @@ build_log(nir_builder *b, nir_ssa_def *x) static nir_ssa_def * build_asin(nir_builder *b, nir_ssa_def *x) { + /* + * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955))) + */ nir_ssa_def *abs_x = nir_fabs(b, x); return nir_fmul(b, nir_fsign(b, x), nir_fsub(b, nir_imm_float(b, M_PI_2f), @@ -221,6 +224,26 @@ build_asin(nir_builder *b, nir_ssa_def *x) nir_imm_float(b, -0.03102955f)))))))))); } +static nir_ssa_def * +build_acos(nir_builder *b, nir_ssa_def *x) +{ + /* + * acos(x) = sign(x) * sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955)) + */ + nir_ssa_def *abs_x = nir_fabs(b, x); + nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), + nir_fadd(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, 0.086566724f), + nir_fmul(b, abs_x, + nir_imm_float(b, -0.03102955f)))))))); + return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)), + nir_fsub(b, nir_imm_float(b, M_PI), poly), + poly); +} + /** * Compute xs[0] + xs[1] + xs[2] + ... using fadd. */ @@ -583,8 +606,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; case GLSLstd450Acos: - val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), - build_asin(nb, src[0])); + val->ssa->def = build_acos(nb, src[0]); return; case GLSLstd450Atan: -- cgit v1.2.3 From 4604b2871a165e63be2764c1dc0102998e2c4a93 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 27 Jan 2016 18:39:48 -0800 Subject: vtn: Improve accuracy of acos approximation. The adjusted polynomial coefficients come from the numerical minimization of the L2 norm of the relative error. The old coefficients would give a maximum relative error of about 15000 ULP in the neighborhood around acos(x) = 0, the new ones give a relative error bounded by less than 2000 ULP in the same neighborhood. --- src/glsl/nir/spirv/vtn_glsl450.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 01d72a1531e..9c82c07894a 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -228,7 +228,7 @@ static nir_ssa_def * build_acos(nir_builder *b, nir_ssa_def *x) { /* - * acos(x) = sign(x) * sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955)) + * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318))) */ nir_ssa_def *abs_x = nir_fabs(b, x); nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), @@ -236,9 +236,9 @@ build_acos(nir_builder *b, nir_ssa_def *x) nir_fmul(b, abs_x, nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, 0.086566724f), + nir_fadd(b, nir_imm_float(b, 0.08132463f), nir_fmul(b, abs_x, - nir_imm_float(b, -0.03102955f)))))))); + nir_imm_float(b, -0.02363318f)))))))); return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)), nir_fsub(b, nir_imm_float(b, M_PI), poly), poly); -- cgit v1.2.3 From c64bc5463d840e46a022e9fd77a47be78a1933ef Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 21:56:23 -0800 Subject: anv/device: Improve the api version check to allow 1.0.X --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index adf33a84cf4..66105f0a83d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -214,7 +214,7 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - if (pCreateInfo->pApplicationInfo->apiVersion != VK_MAKE_VERSION(1, 0, 0)) + if (pCreateInfo->pApplicationInfo->apiVersion > VK_MAKE_VERSION(1, 0, 0xfff)) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { -- cgit v1.2.3 From ec80d6388aaa5cf0318d707a7aef6971ccfe8800 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 22:02:03 -0800 Subject: anv/formats: Properly set FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT This was added last minute and the API bumped to 1.0.2. --- src/vulkan/anv_formats.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 3b63c97e5f4..17ccae0cea2 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -318,6 +318,9 @@ get_image_format_properties(int gen, enum isl_format base, if (info->sampling <= gen) { flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT; + + if (info->filtering <= gen) + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; } /* We can render to swizzled formats. However, if the alpha channel is -- cgit v1.2.3 From 6286a74f6ba0906fe7df9556c1def149de40a88b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 27 Jan 2016 22:02:51 -0800 Subject: anv/device: Advertise 1.0.2 --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 66105f0a83d..c82002780eb 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -509,7 +509,7 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = VK_MAKE_VERSION(1, 0, 2), .driverVersion = 1, .vendorID = 0x8086, .deviceID = pdevice->chipset_id, -- cgit v1.2.3 From 608b411e9f69f5e1b39233c1dd4d7a69f49d782c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 Jan 2016 08:18:50 -0800 Subject: anv/device: Add a better version check. We now check that the requested version is precicely within the range of versions that we support. --- src/vulkan/anv_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c82002780eb..27968bdf371 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -214,8 +214,11 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - if (pCreateInfo->pApplicationInfo->apiVersion > VK_MAKE_VERSION(1, 0, 0xfff)) + uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion; + if (VK_MAKE_VERSION(1, 0, 0) < client_version || + client_version > VK_MAKE_VERSION(1, 0, 2)) { return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); + } for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { bool found = false; -- cgit v1.2.3 From 2bab3cd681d1201556f23043deb24681c6d0f3e3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 Jan 2016 06:28:01 -0800 Subject: anv/image: Update usage flags for multisample images Meta resolves multisample images by binding them as textures. Therefore we must add VK_IMAGE_USAGE_SAMPLED_BIT. --- src/vulkan/anv_image.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index febfaa42539..6cc5700c21e 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -158,6 +158,12 @@ anv_image_get_full_usage(const VkImageCreateInfo *info) { VkImageUsageFlags usage = info->usage; + if (info->samples > 1 && + (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + /* Meta will resolve the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta will transfer from the image by binding it as a texture. */ usage |= VK_IMAGE_USAGE_SAMPLED_BIT; -- cgit v1.2.3 From 8487569fa7839d9646ef92b8b64b0e249f6efcae Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 20:00:29 -0800 Subject: anv/meta_resolve: Begin pass outside emit_resolve() This refactor is preparation for handling subpass resolve attachments. --- src/vulkan/anv_meta_resolve.c | 93 ++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index 30ef727bb06..be8407869c8 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -450,26 +450,19 @@ cleanup: static void emit_resolve(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, - uint32_t src_level, const VkOffset2D *src_offset, struct anv_image_view *dest_iview, - uint32_t dest_level, const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent) { struct anv_device *device = cmd_buffer->device; VkDevice device_h = anv_device_to_handle(device); VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image *src_image = src_iview->image; - const struct anv_image *dest_image = dest_iview->image; VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; uint32_t samples_log2 = ffs(src_image->samples) - 1; - const VkExtent2D dest_iview_extent = { - anv_minify(dest_image->extent.width, dest_level), - anv_minify(dest_image->extent.height, dest_level), - }; - const struct vertex_attrs vertex_data[3] = { { .vue_header = {0}, @@ -581,35 +574,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /*copyCount*/ 0, /*copies */ NULL); - VkFramebuffer fb_h; - anv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), - }, - .width = dest_iview_extent.width, - .height = dest_iview_extent.height, - .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb_h); - - ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.resolve.pass, - .framebuffer = fb_h, - .renderArea = { - .offset = { dest_offset->x, dest_offset->y }, - .extent = { resolve_extent->width, resolve_extent->height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, - VK_SUBPASS_CONTENTS_INLINE); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, /*firstViewport*/ 0, /*viewportCount*/ 1, @@ -617,8 +581,8 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, { .x = 0, .y = 0, - .width = dest_iview_extent.width, - .height = dest_iview_extent.height, + .width = fb->width, + .height = fb->height, .minDepth = 0.0, .maxDepth = 1.0, }, @@ -630,7 +594,7 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, (VkRect2D[]) { { .offset = { 0, 0 }, - .extent = dest_iview_extent, + .extent = (VkExtent2D) { fb->width, fb->height }, }, }); @@ -654,7 +618,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /*copies */ NULL); ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); - ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); /* All objects below are consumed by the draw call. We may safely destroy * them. @@ -662,8 +625,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, anv_descriptor_set_destroy(device, desc_set); anv_DestroySampler(device_h, sampler_h, &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(device_h, fb_h, - &cmd_buffer->pool->alloc); } void anv_CmdResolveImage( @@ -678,7 +639,9 @@ void anv_CmdResolveImage( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); ANV_FROM_HANDLE(anv_image, src_image, src_image_h); ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_device *device = cmd_buffer->device; struct anv_meta_saved_state state; + VkDevice device_h = anv_device_to_handle(device); anv_meta_save(&state, cmd_buffer, 0); @@ -757,15 +720,50 @@ void anv_CmdResolveImage( }, cmd_buffer, 0); + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dest_iview), + }, + .width = anv_minify(dest_image->extent.width, + region->dstSubresource.mipLevel), + .height = anv_minify(dest_image->extent.height, + region->dstSubresource.mipLevel), + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { + region->dstOffset.x, + region->dstOffset.y, + }, + .extent = { + region->extent.width, + region->extent.height, + } + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + emit_resolve(cmd_buffer, &src_iview, - region->srcSubresource.mipLevel, &(VkOffset2D) { .x = region->srcOffset.x, .y = region->srcOffset.y, }, &dest_iview, - region->dstSubresource.mipLevel, &(VkOffset2D) { .x = region->dstOffset.x, .y = region->dstOffset.y, @@ -774,6 +772,11 @@ void anv_CmdResolveImage( .width = region->extent.width, .height = region->extent.height, }); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); } } -- cgit v1.2.3 From 3d863e8dadcc3c2d21d7ecf9219c9255d6a2f949 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 20:13:10 -0800 Subject: anv/meta_resolve: Save/Restore viewport and scissor --- src/vulkan/anv_meta_resolve.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index be8407869c8..1be87c4c198 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -37,6 +37,25 @@ struct vertex_attrs { float tex_position[2]; }; +static void +meta_resolve_save(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_resolve_restore(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + static nir_shader * build_nir_vs(void) { @@ -643,7 +662,7 @@ void anv_CmdResolveImage( struct anv_meta_saved_state state; VkDevice device_h = anv_device_to_handle(device); - anv_meta_save(&state, cmd_buffer, 0); + meta_resolve_save(&state, cmd_buffer); assert(src_image->samples > 1); assert(dest_image->samples == 1); @@ -780,5 +799,5 @@ void anv_CmdResolveImage( } } - anv_meta_restore(&state, cmd_buffer); + meta_resolve_restore(&state, cmd_buffer); } -- cgit v1.2.3 From 22258e279d5553f3c2f6983ed569169a0094b90f Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 18:03:34 -0800 Subject: anv: Add anv_subpass::has_resolve Indicates that the subpass has at least one resolve attachment. --- src/vulkan/anv_pass.c | 6 ++++-- src/vulkan/anv_private.h | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index ccd8cedf561..a8e85664d48 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -118,8 +118,10 @@ VkResult anv_CreateRenderPass( p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->resolve_attachments[j] - = desc->pResolveAttachments[j].attachment; + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = a; + if (a != VK_ATTACHMENT_UNUSED) + subpass->has_resolve = true; } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6e44e88e10c..cad1e1fd5ff 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1712,6 +1712,9 @@ struct anv_subpass { uint32_t * color_attachments; uint32_t * resolve_attachments; uint32_t depth_stencil_attachment; + + /** Subpass has at least one resolve attachment */ + bool has_resolve; }; struct anv_render_pass_attachment { -- cgit v1.2.3 From 142da00486989eb669ac1f93fb9ff48acab0e3fb Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 20:42:45 -0800 Subject: anv: Drop const on anv_framebuffer::attachments The attachments should be const, but the driver's function signatures are generally not const-friendly. Drop the const because it conflicts with upcoming anv_cmd_buffer_resolve_subpass(). --- src/vulkan/anv_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index cad1e1fd5ff..835a5e81e1e 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1702,7 +1702,7 @@ struct anv_framebuffer { uint32_t layers; uint32_t attachment_count; - const struct anv_image_view * attachments[0]; + struct anv_image_view * attachments[0]; }; struct anv_subpass { -- cgit v1.2.3 From ac5594fa716b3d2d971a10d71df175433926e280 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 Jan 2016 10:14:39 -0800 Subject: anv/meta_resolve: Remove redundant initialization params --- src/vulkan/anv_meta_resolve.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index 1be87c4c198..06324c9a0be 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -164,7 +164,7 @@ build_nir_fs(uint32_t num_samples) } static VkResult -create_pass(struct anv_device *device, VkRenderPass *pass_h) +create_pass(struct anv_device *device) { VkResult result; VkDevice device_h = anv_device_to_handle(device); @@ -201,7 +201,7 @@ create_pass(struct anv_device *device, VkRenderPass *pass_h) .dependencyCount = 0, }, alloc, - pass_h); + &device->meta_state.resolve.pass); return result; } @@ -209,12 +209,11 @@ create_pass(struct anv_device *device, VkRenderPass *pass_h) static VkResult create_pipeline(struct anv_device *device, uint32_t num_samples, - VkShaderModule vs_module_h, - VkRenderPass pass_h, - VkPipeline *pipeline_h) + VkShaderModule vs_module_h) { VkResult result; VkDevice device_h = anv_device_to_handle(device); + uint32_t samples_log2 = ffs(num_samples) - 1; struct anv_shader_module fs_module = { .nir = build_nir_fs(num_samples), @@ -223,7 +222,7 @@ create_pipeline(struct anv_device *device, if (!fs_module.nir) { /* XXX: Need more accurate error */ result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; + goto cleanup; } result = anv_graphics_pipeline_create(device_h, @@ -328,7 +327,7 @@ create_pipeline(struct anv_device *device, }, }, .layout = device->meta_state.resolve.pipeline_layout, - .renderPass = pass_h, + .renderPass = device->meta_state.resolve.pass, .subpass = 0, }, &(struct anv_graphics_pipeline_create_info) { @@ -340,15 +339,12 @@ create_pipeline(struct anv_device *device, .use_rectlist = true }, &device->meta_state.alloc, - pipeline_h); + &device->meta_state.resolve.pipelines[samples_log2]); if (result != VK_SUCCESS) - goto fail; + goto cleanup; goto cleanup; -fail: - *pipeline_h = VK_NULL_HANDLE; - cleanup: ralloc_free(fs_module.nir); return result; @@ -435,12 +431,10 @@ anv_device_init_meta_resolve_state(struct anv_device *device) if (res != VK_SUCCESS) goto fail; - - res = create_pass(device, &device->meta_state.resolve.pass); + res = create_pass(device); if (res != VK_SUCCESS) goto fail; - for (uint32_t i = 0; i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { uint32_t sample_count = 1 << i; @@ -448,9 +442,7 @@ anv_device_init_meta_resolve_state(struct anv_device *device) if (!(sample_count_mask & sample_count)) continue; - res = create_pipeline(device, sample_count, vs_module_h, - device->meta_state.resolve.pass, - &device->meta_state.resolve.pipelines[i]); + res = create_pipeline(device, sample_count, vs_module_h); if (res != VK_SUCCESS) goto fail; } -- cgit v1.2.3 From bef8456ede5815598fef48215d2e7d1e3c98dc29 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 28 Jan 2016 10:20:10 -0800 Subject: anv/meta: Remove unneeded resolve pipeline Vulkan does not allow resolving a single-sample image. So remove that pipeline from anv_meta_state::resolve::pipelines. --- src/vulkan/anv_meta_resolve.c | 19 ++++++++++++++----- src/vulkan/anv_private.h | 6 ++---- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index 06324c9a0be..9969a0e5d88 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -56,6 +56,17 @@ meta_resolve_restore(struct anv_meta_saved_state *saved_state, anv_meta_restore(saved_state, cmd_buffer); } +static VkPipeline * +get_pipeline_h(struct anv_device *device, uint32_t samples) +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ + + assert(samples >= 2); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + + return &device->meta_state.resolve.pipelines[i]; +} + static nir_shader * build_nir_vs(void) { @@ -213,7 +224,6 @@ create_pipeline(struct anv_device *device, { VkResult result; VkDevice device_h = anv_device_to_handle(device); - uint32_t samples_log2 = ffs(num_samples) - 1; struct anv_shader_module fs_module = { .nir = build_nir_fs(num_samples), @@ -339,7 +349,7 @@ create_pipeline(struct anv_device *device, .use_rectlist = true }, &device->meta_state.alloc, - &device->meta_state.resolve.pipelines[samples_log2]); + get_pipeline_h(device, num_samples)); if (result != VK_SUCCESS) goto cleanup; @@ -437,8 +447,8 @@ anv_device_init_meta_resolve_state(struct anv_device *device) for (uint32_t i = 0; i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { - uint32_t sample_count = 1 << i; + uint32_t sample_count = 1 << (1 + i); if (!(sample_count_mask & sample_count)) continue; @@ -472,7 +482,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image *src_image = src_iview->image; VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; - uint32_t samples_log2 = ffs(src_image->samples) - 1; const struct vertex_attrs vertex_data[3] = { { @@ -609,7 +618,7 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, }, }); - VkPipeline pipeline_h = device->meta_state.resolve.pipelines[samples_log2]; + VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); if (cmd_buffer->state.pipeline != pipeline) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 835a5e81e1e..6a670190ab9 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -601,10 +601,8 @@ struct anv_meta_state { } blit; struct { - /** - * Use pipeline `i` for images with `2^i` samples. - */ - VkPipeline pipelines[1 + MAX_SAMPLES_LOG2]; + /** Pipeline [i] resolves an image with 2^(i+1) samples. */ + VkPipeline pipelines[MAX_SAMPLES_LOG2]; VkRenderPass pass; VkPipelineLayout pipeline_layout; -- cgit v1.2.3 From f8a4abcd15d9e27afd6be12c67bec7ccaf179e54 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jan 2016 20:42:54 -0800 Subject: anv: Do resolves at end of subpass --- src/vulkan/anv_meta_resolve.c | 61 +++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 1 + src/vulkan/gen7_cmd_buffer.c | 3 +++ src/vulkan/gen8_cmd_buffer.c | 3 +++ 4 files changed, 68 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index 9969a0e5d88..ffd41857f13 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -802,3 +802,64 @@ void anv_CmdResolveImage( meta_resolve_restore(&state, cmd_buffer); } + +/** + * Emit any needed resolves for the current subpass. + */ +void +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_meta_saved_state saved_state; + + /* FINISHME(perf): Skip clears for resolve attachments. + * + * From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a resolve + * attachment, then the loadOp is effectively ignored as the resolve is + * guaranteed to overwrite all pixels in the render area. + */ + + if (!subpass->has_resolve) + return; + + meta_resolve_save(&saved_state, cmd_buffer); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i]; + uint32_t dest_att = subpass->resolve_attachments[i]; + + if (dest_att == VK_ATTACHMENT_UNUSED) + continue; + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dest_iview = fb->attachments[dest_att]; + + struct anv_subpass resolve_subpass = { + .color_count = 1, + .color_attachments = (uint32_t[]) { dest_att }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + /* Subpass resolves must respect the render area. We can ignore the + * render area here because vkCmdBeginRenderPass set the render area + * with 3DSTATE_DRAWING_RECTANGLE. + * + * XXX(chadv): Does the hardware really respect + * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? + */ + emit_resolve(cmd_buffer, + src_iview, + &(VkOffset2D) { 0, 0 }, + dest_iview, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }); + } + + cmd_buffer->state.subpass = subpass; + meta_resolve_restore(&saved_state, cmd_buffer); +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6a670190ab9..b1d4577f93e 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1277,6 +1277,7 @@ struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index f201c151acb..609606bdf6a 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -896,6 +896,7 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + anv_cmd_buffer_resolve_subpass(cmd_buffer); gen7_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); anv_cmd_buffer_clear_subpass(cmd_buffer); } @@ -905,6 +906,8 @@ void genX(CmdEndRenderPass)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + anv_cmd_buffer_resolve_subpass(cmd_buffer); + /* Emit a flushing pipe control at the end of a pass. This is kind of a * hack but it ensures that render targets always actually get written. * Eventually, we should do flushing based on image format transitions diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index daa049e98e0..4b1c51a8c59 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -972,6 +972,7 @@ void genX(CmdNextSubpass)( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + anv_cmd_buffer_resolve_subpass(cmd_buffer); genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); anv_cmd_buffer_clear_subpass(cmd_buffer); } @@ -981,6 +982,8 @@ void genX(CmdEndRenderPass)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + anv_cmd_buffer_resolve_subpass(cmd_buffer); + /* Emit a flushing pipe control at the end of a pass. This is kind of a * hack but it ensures that render targets always actually get written. * Eventually, we should do flushing based on image format transitions -- cgit v1.2.3 From 31508bd0ce626f92eb66f14ab92ec1e05422c2d0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 29 Jan 2016 13:55:41 -0800 Subject: anv/gen8: Extract SF state For upcoming patch to address difference in Cherryview. --- src/vulkan/gen8_cmd_buffer.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 4b1c51a8c59..045846b1072 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -224,6 +224,20 @@ flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer) } } +static void +__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} + static void cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { @@ -297,14 +311,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { - uint32_t sf_dw[GENX(3DSTATE_SF_length)]; - struct GENX(3DSTATE_SF) sf = { - GENX(3DSTATE_SF_header), - .LineWidth = cmd_buffer->state.dynamic.line_width, - }; - GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); - /* FIXME: gen9.fs */ - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf); + __emit_sf_state(cmd_buffer); } if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | -- cgit v1.2.3 From 89ec36f221099cf47ade0b90d09c7731460ab381 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 29 Jan 2016 14:10:52 -0800 Subject: anv/cmd_buffer: Emit gen9 style SF state for CHV The state for line width changes on Cherryview to use the GEN9 bits (for extra precision). --- src/vulkan/gen8_cmd_buffer.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 045846b1072..56d80e26eeb 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -225,7 +225,7 @@ flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer) } static void -__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) { uint32_t sf_dw[GENX(3DSTATE_SF_length)]; struct GENX(3DSTATE_SF) sf = { @@ -237,6 +237,28 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, cmd_buffer->state.pipeline->gen8.sf); } +static void +__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GEN9_3DSTATE_SF sf = { + GEN9_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} + +static void +__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->device->info.is_cherryview) + __emit_gen9_sf_state(cmd_buffer); + else + __emit_genx_sf_state(cmd_buffer); +} static void cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) -- cgit v1.2.3 From 31d3486bd2dda4b9dd65c8b24544b8f8cb54054b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 29 Jan 2016 12:07:34 -0800 Subject: anv: Limit flushing to the range of mapped memory --- src/vulkan/anv_device.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 27968bdf371..c7a9fd15c1d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1148,7 +1148,12 @@ clflush_mapped_ranges(struct anv_device *device, for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); - void *end = mem->map + ranges[i].offset + ranges[i].size; + void *end; + + if (ranges[i].offset + ranges[i].size > mem->map_size) + end = mem->map + mem->map_size; + else + end = mem->map + ranges[i].offset + ranges[i].size; while (p < end) { __builtin_ia32_clflush(p); -- cgit v1.2.3 From 0c4ef36360627686a0f3b56d64409ffb8bfbcb8c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 29 Jan 2016 12:10:12 -0800 Subject: anv: clflush is only orderered against mfence We can't use the more fine-grained load and store fence commands (lfence and mfence), since clflush is only guaranteed to be ordered with respect to mfence. --- src/vulkan/anv_batch_chain.c | 4 ++-- src/vulkan/anv_device.c | 17 +++++++++-------- src/vulkan/anv_private.h | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index e9bd67c9442..d74c5995168 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -755,7 +755,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, if (!primary->device->info.has_llc) { void *inst = secondary->batch.next - inst_size; void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); while (p < secondary->batch.next) { __builtin_ia32_clflush(p); p += CACHELINE_SIZE; @@ -1047,7 +1047,7 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); if (!cmd_buffer->device->info.has_llc) { - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) __builtin_ia32_clflush((*bbo)->bo.map + i); diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c7a9fd15c1d..5bb9fec0085 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1173,7 +1173,7 @@ VkResult anv_FlushMappedMemoryRanges( return VK_SUCCESS; /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); @@ -1193,7 +1193,7 @@ VkResult anv_InvalidateMappedMemoryRanges( clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); /* Make sure no reads get moved up above the invalidate. */ - __builtin_ia32_lfence(); + __builtin_ia32_mfence(); return VK_SUCCESS; } @@ -1342,7 +1342,7 @@ VkResult anv_CreateFence( if (!device->info.has_llc) { assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); assert(batch.next - fence->bo.map <= CACHELINE_SIZE); - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); __builtin_ia32_clflush(fence->bo.map); } @@ -1510,7 +1510,7 @@ VkResult anv_CreateEvent( if (!device->info.has_llc) { /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); __builtin_ia32_clflush(event); } @@ -1538,9 +1538,10 @@ VkResult anv_GetEventStatus( ANV_FROM_HANDLE(anv_event, event, _event); if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ + /* Invalidate read cache before reading event written by GPU. */ __builtin_ia32_clflush(event); - __builtin_ia32_lfence(); + __builtin_ia32_mfence(); + } return event->semaphore; @@ -1557,7 +1558,7 @@ VkResult anv_SetEvent( if (!device->info.has_llc) { /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); __builtin_ia32_clflush(event); } @@ -1575,7 +1576,7 @@ VkResult anv_ResetEvent( if (!device->info.has_llc) { /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); __builtin_ia32_clflush(event); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b1d4577f93e..c5ce1484bc2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -433,7 +433,7 @@ anv_state_clflush(struct anv_state state) void *end = state.map + state.alloc_size; void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK); - __builtin_ia32_sfence(); + __builtin_ia32_mfence(); while (p < end) { __builtin_ia32_clflush(p); p += CACHELINE_SIZE; -- cgit v1.2.3 From f28645f71cbe49099908dbdfd8c05ae3f8d051d6 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 29 Jan 2016 15:45:18 -0800 Subject: anv: Don't disable snooping for mempools There's an intermittent flushing problem with VkEvent that we need to root cause. For now, using the snooping feature keeps the memory pools up to date with GPU writes and fixes the problem. --- src/vulkan/anv_allocator.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 4be149ea695..e935cd71df0 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -441,6 +441,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) goto fail; cleanup->gem_handle = gem_handle; +#if 0 /* Regular objects are created I915_CACHING_CACHED on LLC platforms and * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are * always created as I915_CACHING_CACHED, which on non-LLC means @@ -452,6 +453,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) anv_gem_set_domain(pool->device, gem_handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); } +#endif /* Now that we successfull allocated everything, we can write the new * values back into pool. */ -- cgit v1.2.3 From a19ceee46c553821332aca2ea0dff40fc83794b3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 Jan 2016 15:43:44 -0800 Subject: anv/device: Fix version check The bottom-end check was wrong so it was only working on <= 1.0.0. Oops. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 5bb9fec0085..fe9808f0bf7 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -215,7 +215,7 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion; - if (VK_MAKE_VERSION(1, 0, 0) < client_version || + if (VK_MAKE_VERSION(1, 0, 0) > client_version || client_version > VK_MAKE_VERSION(1, 0, 2)) { return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); } -- cgit v1.2.3 From d4953fb340d7f2c3410172e9898f7eaba71cc0f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 Jan 2016 13:36:41 -0800 Subject: vulkan: Import vk_icd.h --- include/vulkan/vk_icd.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 8 +---- 2 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 include/vulkan/vk_icd.h (limited to 'src') diff --git a/include/vulkan/vk_icd.h b/include/vulkan/vk_icd.h new file mode 100644 index 00000000000..d664f2c06a7 --- /dev/null +++ b/include/vulkan/vk_icd.h @@ -0,0 +1,85 @@ +#ifndef VKICD_H +#define VKICD_H + +#include "vk_platform.h" + +/* + * The ICD must reserve space for a pointer for the loader's dispatch + * table, at the start of . + * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro. + */ + +#define ICD_LOADER_MAGIC 0x01CDC0DE + +typedef union _VK_LOADER_DATA { + uintptr_t loaderMagic; + void *loaderData; +} VK_LOADER_DATA; + +static inline void set_loader_magic_value(void* pNewObject) { + VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject; + loader_info->loaderMagic = ICD_LOADER_MAGIC; +} + +static inline bool valid_loader_magic_value(void* pNewObject) { + const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject; + return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC; +} + +/* + * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that + * contains the platform-specific connection and surface information. + */ +typedef enum _VkIcdWsiPlatform { + VK_ICD_WSI_PLATFORM_MIR, + VK_ICD_WSI_PLATFORM_WAYLAND, + VK_ICD_WSI_PLATFORM_WIN32, + VK_ICD_WSI_PLATFORM_XCB, + VK_ICD_WSI_PLATFORM_XLIB, +} VkIcdWsiPlatform; + +typedef struct _VkIcdSurfaceBase { + VkIcdWsiPlatform platform; +} VkIcdSurfaceBase; + +#ifdef VK_USE_PLATFORM_MIR_KHR +typedef struct _VkIcdSurfaceMir { + VkIcdSurfaceBase base; + MirConnection* connection; + MirSurface* mirSurface; +} VkIcdSurfaceMir; +#endif // VK_USE_PLATFORM_MIR_KHR + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +typedef struct _VkIcdSurfaceWayland { + VkIcdSurfaceBase base; + struct wl_display* display; + struct wl_surface* surface; +} VkIcdSurfaceWayland; +#endif // VK_USE_PLATFORM_WAYLAND_KHR + +#ifdef VK_USE_PLATFORM_WIN32_KHR +typedef struct _VkIcdSurfaceWin32 { + VkIcdSurfaceBase base; + HINSTANCE hinstance; + HWND hwnd; +} VkIcdSurfaceWin32; +#endif // VK_USE_PLATFORM_WIN32_KHR + +#ifdef VK_USE_PLATFORM_XCB_KHR +typedef struct _VkIcdSurfaceXcb { + VkIcdSurfaceBase base; + xcb_connection_t* connection; + xcb_window_t window; +} VkIcdSurfaceXcb; +#endif // VK_USE_PLATFORM_XCB_KHR + +#ifdef VK_USE_PLATFORM_XLIB_KHR +typedef struct _VkIcdSurfaceXlib { + VkIcdSurfaceBase base; + Display* dpy; + Window window; +} VkIcdSurfaceXlib; +#endif // VK_USE_PLATFORM_XLIB_KHR + +#endif // VKICD_H diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index c5ce1484bc2..211b83a3bab 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -57,6 +57,7 @@ typedef uint32_t xcb_window_t; #define VK_PROTOTYPES #include #include +#include #include "anv_entrypoints.h" #include "anv_gen_macros.h" @@ -77,13 +78,6 @@ extern "C" { #define MAX_IMAGES 8 #define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ -#define ICD_LOADER_MAGIC 0x01CDC0DE - -typedef union _VK_LOADER_DATA { - uintptr_t loaderMagic; - void *loaderData; -} VK_LOADER_DATA; - #define anv_noreturn __attribute__((__noreturn__)) #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) -- cgit v1.2.3 From c688e4db11ce0f55e7ce1f3335b0ecd9e74cd4b5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 Jan 2016 15:34:22 -0800 Subject: anv/wsi: Rework to be compatable with the loader --- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_private.h | 6 ++++- src/vulkan/anv_wsi.c | 38 +++++++++++++++----------- src/vulkan/anv_wsi.h | 18 +++++-------- src/vulkan/anv_wsi_wayland.c | 63 ++++++++++++++++++------------------------- src/vulkan/anv_wsi_x11.c | 64 +++++++++++++++++++++++--------------------- 6 files changed, 94 insertions(+), 97 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index fe9808f0bf7..1e0bfb080d9 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -157,7 +157,7 @@ anv_physical_device_finish(struct anv_physical_device *device) static const VkExtensionProperties global_extensions[] = { { .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, - .specVersion = 24, + .specVersion = 25, }, { .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 211b83a3bab..f4794d9fb9d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -541,6 +541,10 @@ struct anv_physical_device { struct isl_device isl_dev; }; +struct anv_wsi_interaface; + +#define VK_ICD_WSI_PLATFORM_MAX 5 + struct anv_instance { VK_LOADER_DATA _loader_data; @@ -550,7 +554,7 @@ struct anv_instance { int physicalDeviceCount; struct anv_physical_device physicalDevice; - void * wayland_wsi; + struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; }; VkResult anv_init_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c index c181cd4d729..c5911a3635b 100644 --- a/src/vulkan/anv_wsi.c +++ b/src/vulkan/anv_wsi.c @@ -53,13 +53,14 @@ anv_finish_wsi(struct anv_instance *instance) } void anv_DestroySurfaceKHR( - VkInstance instance, + VkInstance _instance, VkSurfaceKHR _surface, const VkAllocationCallbacks* pAllocator) { - ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); + ANV_FROM_HANDLE(anv_instance, instance, _instance); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - surface->destroy(surface, pAllocator); + anv_free2(&instance->alloc, pAllocator, surface); } VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( @@ -69,9 +70,10 @@ VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( VkBool32* pSupported) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - return surface->get_support(surface, device, queueFamilyIndex, pSupported); + return iface->get_support(surface, device, queueFamilyIndex, pSupported); } VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( @@ -80,9 +82,10 @@ VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - return surface->get_capabilities(surface, device, pSurfaceCapabilities); + return iface->get_capabilities(surface, device, pSurfaceCapabilities); } VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( @@ -92,10 +95,11 @@ VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( VkSurfaceFormatKHR* pSurfaceFormats) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - return surface->get_formats(surface, device, pSurfaceFormatCount, - pSurfaceFormats); + return iface->get_formats(surface, device, pSurfaceFormatCount, + pSurfaceFormats); } VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( @@ -105,10 +109,11 @@ VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( VkPresentModeKHR* pPresentModes) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(anv_wsi_surface, surface, _surface); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - return surface->get_present_modes(surface, device, pPresentModeCount, - pPresentModes); + return iface->get_present_modes(surface, device, pPresentModeCount, + pPresentModes); } VkResult anv_CreateSwapchainKHR( @@ -118,11 +123,12 @@ VkResult anv_CreateSwapchainKHR( VkSwapchainKHR* pSwapchain) { ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_wsi_surface, surface, pCreateInfo->surface); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; struct anv_swapchain *swapchain; - VkResult result = surface->create_swapchain(surface, device, pCreateInfo, - pAllocator, &swapchain); + VkResult result = iface->create_swapchain(surface, device, pCreateInfo, + pAllocator, &swapchain); if (result != VK_SUCCESS) return result; diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h index 15b3f862499..6e9ff9b8447 100644 --- a/src/vulkan/anv_wsi.h +++ b/src/vulkan/anv_wsi.h @@ -27,27 +27,23 @@ struct anv_swapchain; -struct anv_wsi_surface { - struct anv_instance *instance; - - void (*destroy)(struct anv_wsi_surface *surface, - const VkAllocationCallbacks *pAllocator); - VkResult (*get_support)(struct anv_wsi_surface *surface, +struct anv_wsi_interface { + VkResult (*get_support)(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t queueFamilyIndex, VkBool32* pSupported); - VkResult (*get_capabilities)(struct anv_wsi_surface *surface, + VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, struct anv_physical_device *device, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); - VkResult (*get_formats)(struct anv_wsi_surface *surface, + VkResult (*get_formats)(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t* pSurfaceFormatCount, VkSurfaceFormatKHR* pSurfaceFormats); - VkResult (*get_present_modes)(struct anv_wsi_surface *surface, + VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t* pPresentModeCount, VkPresentModeKHR* pPresentModes); - VkResult (*create_swapchain)(struct anv_wsi_surface *surface, + VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, struct anv_device *device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, @@ -69,7 +65,7 @@ struct anv_swapchain { uint32_t image_index); }; -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_wsi_surface, VkSurfaceKHR) +ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) VkResult anv_x11_init_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 5e8a3a56f68..fa5d340eee5 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -30,13 +30,6 @@ #define MIN_NUM_IMAGES 2 -struct wsi_wl_surface { - struct anv_wsi_surface base; - - struct wl_display *display; - struct wl_surface *surface; -}; - struct wsi_wl_display { struct wl_display * display; struct wl_drm * drm; @@ -48,6 +41,8 @@ struct wsi_wl_display { }; struct wsi_wayland { + struct anv_wsi_interface base; + struct anv_instance * instance; pthread_mutex_t mutex; @@ -285,7 +280,8 @@ fail: static struct wsi_wl_display * wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) { - struct wsi_wayland *wsi = instance->wayland_wsi; + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; pthread_mutex_lock(&wsi->mutex); @@ -326,7 +322,7 @@ VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( } static VkResult -wsi_wl_surface_get_support(struct anv_wsi_surface *surface, +wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t queueFamilyIndex, VkBool32* pSupported) @@ -342,7 +338,7 @@ static const VkPresentModeKHR present_modes[] = { }; static VkResult -wsi_wl_surface_get_capabilities(struct anv_wsi_surface *surface, +wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, struct anv_physical_device *device, VkSurfaceCapabilitiesKHR* caps) { @@ -367,12 +363,12 @@ wsi_wl_surface_get_capabilities(struct anv_wsi_surface *surface, } static VkResult -wsi_wl_surface_get_formats(struct anv_wsi_surface *wsi_surface, +wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, struct anv_physical_device *device, uint32_t* pSurfaceFormatCount, VkSurfaceFormatKHR* pSurfaceFormats) { - struct wsi_wl_surface *surface = (struct wsi_wl_surface *)wsi_surface; + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; struct wsi_wl_display *display = wsi_wl_get_display(device->instance, surface->display); @@ -399,7 +395,7 @@ wsi_wl_surface_get_formats(struct anv_wsi_surface *wsi_surface, } static VkResult -wsi_wl_surface_get_present_modes(struct anv_wsi_surface *surface, +wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t* pPresentModeCount, VkPresentModeKHR* pPresentModes) @@ -416,15 +412,8 @@ wsi_wl_surface_get_present_modes(struct anv_wsi_surface *surface, return VK_SUCCESS; } -static void -wsi_wl_surface_destroy(struct anv_wsi_surface *surface, - const VkAllocationCallbacks *pAllocator) -{ - anv_free2(&surface->instance->alloc, pAllocator, surface); -} - static VkResult -wsi_wl_surface_create_swapchain(struct anv_wsi_surface *surface, +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, struct anv_device *device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, @@ -436,10 +425,11 @@ VkResult anv_CreateWaylandSurfaceKHR( const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) { + ANV_FROM_HANDLE(anv_instance, instance, _instance); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); - ANV_FROM_HANDLE(anv_instance, instance, _instance); - struct wsi_wl_surface *surface; + VkIcdSurfaceWayland *surface; surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -449,15 +439,7 @@ VkResult anv_CreateWaylandSurfaceKHR( surface->display = pCreateInfo->display; surface->surface = pCreateInfo->surface; - surface->base.instance = instance; - surface->base.destroy = wsi_wl_surface_destroy; - surface->base.get_support = wsi_wl_surface_get_support; - surface->base.get_capabilities = wsi_wl_surface_get_capabilities; - surface->base.get_formats = wsi_wl_surface_get_formats; - surface->base.get_present_modes = wsi_wl_surface_get_present_modes; - surface->base.create_swapchain = wsi_wl_surface_create_swapchain; - - *pSurface = anv_wsi_surface_to_handle(&surface->base); + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); return VK_SUCCESS; } @@ -734,13 +716,13 @@ wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, } static VkResult -wsi_wl_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, struct anv_device *device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, struct anv_swapchain **swapchain_out) { - struct wsi_wl_surface *surface = (struct wsi_wl_surface *)wsi_surface; + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; struct wsi_wl_swapchain *chain; VkResult result; @@ -847,7 +829,13 @@ anv_wl_init_wsi(struct anv_instance *instance) goto fail_mutex; } - instance->wayland_wsi = wsi; + wsi->base.get_support = wsi_wl_surface_get_support; + wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; + wsi->base.get_formats = wsi_wl_surface_get_formats; + wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; + wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; return VK_SUCCESS; @@ -857,7 +845,7 @@ fail_mutex: fail_alloc: anv_free(&instance->alloc, wsi); fail: - instance->wayland_wsi = NULL; + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; return result; } @@ -865,7 +853,8 @@ fail: void anv_wl_finish_wsi(struct anv_instance *instance) { - struct wsi_wayland *wsi = instance->wayland_wsi; + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; if (wsi) { _mesa_hash_table_destroy(wsi->displays, NULL); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 8e35191576a..8d0d1580d3e 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -27,13 +27,6 @@ #include "anv_wsi.h" -struct x11_surface { - struct anv_wsi_surface base; - - xcb_connection_t *connection; - xcb_window_t window; -}; - static const VkSurfaceFormatKHR formats[] = { { .format = VK_FORMAT_B8G8R8A8_UNORM, }, }; @@ -53,11 +46,23 @@ VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( } static VkResult -x11_surface_get_capabilities(struct anv_wsi_surface *wsi_surface, +x11_surface_get_support(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + anv_finishme("Check that we actually have DRI3"); + *pSupported = true; + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, struct anv_physical_device *device, VkSurfaceCapabilitiesKHR *caps) { - struct x11_surface *surface = (struct x11_surface *)wsi_surface; + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; xcb_get_geometry_cookie_t cookie = xcb_get_geometry(surface->connection, surface->window); @@ -95,7 +100,7 @@ x11_surface_get_capabilities(struct anv_wsi_surface *wsi_surface, } static VkResult -x11_surface_get_formats(struct anv_wsi_surface *surface, +x11_surface_get_formats(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t *pSurfaceFormatCount, VkSurfaceFormatKHR *pSurfaceFormats) @@ -113,7 +118,7 @@ x11_surface_get_formats(struct anv_wsi_surface *surface, } static VkResult -x11_surface_get_present_modes(struct anv_wsi_surface *surface, +x11_surface_get_present_modes(VkIcdSurfaceBase *surface, struct anv_physical_device *device, uint32_t *pPresentModeCount, VkPresentModeKHR *pPresentModes) @@ -130,30 +135,32 @@ x11_surface_get_present_modes(struct anv_wsi_surface *surface, return VK_SUCCESS; } -static void -x11_surface_destroy(struct anv_wsi_surface *surface, - const VkAllocationCallbacks *pAllocator) -{ - anv_free2(&surface->instance->alloc, pAllocator, surface); -} - static VkResult -x11_surface_create_swapchain(struct anv_wsi_surface *surface, +x11_surface_create_swapchain(VkIcdSurfaceBase *surface, struct anv_device *device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, struct anv_swapchain **swapchain); +static struct anv_wsi_interface x11_interface = { + .get_support = x11_surface_get_support, + .get_capabilities = x11_surface_get_capabilities, + .get_formats = x11_surface_get_formats, + .get_present_modes = x11_surface_get_present_modes, + .create_swapchain = x11_surface_create_swapchain, +}; + VkResult anv_CreateXcbSurfaceKHR( VkInstance _instance, const VkXcbSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) { + ANV_FROM_HANDLE(anv_instance, instance, _instance); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); - ANV_FROM_HANDLE(anv_instance, instance, _instance); - struct x11_surface *surface; + VkIcdSurfaceXcb *surface; surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -163,14 +170,7 @@ VkResult anv_CreateXcbSurfaceKHR( surface->connection = pCreateInfo->connection; surface->window = pCreateInfo->window; - surface->base.instance = instance; - surface->base.destroy = x11_surface_destroy; - surface->base.get_capabilities = x11_surface_get_capabilities; - surface->base.get_formats = x11_surface_get_formats; - surface->base.get_present_modes = x11_surface_get_present_modes; - surface->base.create_swapchain = x11_surface_create_swapchain; - - *pSurface = anv_wsi_surface_to_handle(&surface->base); + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); return VK_SUCCESS; } @@ -303,13 +303,13 @@ x11_swapchain_destroy(struct anv_swapchain *anv_chain, } static VkResult -x11_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, +x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, struct anv_device *device, const VkSwapchainCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks* pAllocator, struct anv_swapchain **swapchain_out) { - struct x11_surface *surface = (struct x11_surface *)wsi_surface; + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; struct x11_swapchain *chain; xcb_void_cookie_t cookie; VkResult result; @@ -451,6 +451,8 @@ x11_surface_create_swapchain(struct anv_wsi_surface *wsi_surface, VkResult anv_x11_init_wsi(struct anv_instance *instance) { + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &x11_interface; + return VK_SUCCESS; } -- cgit v1.2.3 From 337c1e0871eb46e3c07cce2e291636653eb5f3bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 Jan 2016 15:55:32 -0800 Subject: anv/formats: Add more compressed formats This adds support for the DX compression formats. Given that ETC and EAC are working fine, these should be ok too. --- src/vulkan/anv_formats.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 17ccae0cea2..e3c786ccca7 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -167,22 +167,22 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT, .has_stencil = true), - fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), + fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), + fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), + fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), + fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), + fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), + fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), + fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), + fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), + fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), + fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), + fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), + fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), + fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), + fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), + fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), -- cgit v1.2.3 From 44ec860cd6851c5843faa85e041d47a25e640c8f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 29 Jan 2016 20:52:42 -0800 Subject: anv/WSI: Support more usage bits They're just images and we have no intention of stompping alpha channels (at least not yet), so there's no reason why you can't sample. --- src/vulkan/anv_wsi_wayland.c | 2 ++ src/vulkan/anv_wsi_x11.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index fa5d340eee5..9f4fee910c3 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -356,6 +356,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 8d0d1580d3e..1cd3d7fd419 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -93,6 +93,8 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, caps->maxImageArrayLayers = 1; caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; -- cgit v1.2.3 From 66e8b5cf2b9762ca8a3e315e249254a03a00f808 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 Jan 2016 01:33:00 -0800 Subject: anv/wsi/x11: Actually check for DRI3 --- src/vulkan/anv_wsi_x11.c | 188 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 173 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 1cd3d7fd419..c9e61fc4cdd 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -27,6 +27,95 @@ #include "anv_wsi.h" +#include "util/hash_table.h" + +struct wsi_x11_connection { + bool has_dri3; + bool has_present; +}; + +struct wsi_x11 { + struct anv_wsi_interface base; + + pthread_mutex_t mutex; + /* Hash table of xcb_connection -> wsi_x11_connection mappings */ + struct hash_table *connections; +}; + +static struct wsi_x11_connection * +wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) +{ + xcb_query_extension_cookie_t dri3_cookie, pres_cookie; + xcb_query_extension_reply_t *dri3_reply, *pres_reply; + + struct wsi_x11_connection *wsi_conn = + anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi_conn) + return NULL; + + dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); + pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); + + dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); + pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); + if (dri3_reply == NULL || pres_reply == NULL) { + free(dri3_reply); + free(pres_reply); + anv_free(&instance->alloc, wsi_conn); + return NULL; + } + + wsi_conn->has_dri3 = dri3_reply->present != 0; + wsi_conn->has_present = pres_reply->present != 0; + + free(dri3_reply); + free(pres_reply); + + return wsi_conn; +} + +static void +wsi_x11_connection_destroy(struct anv_instance *instance, + struct wsi_x11_connection *conn) +{ + anv_free(&instance->alloc, conn); +} + +static struct wsi_x11_connection * +wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_connection_create(instance, conn); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->connections, conn); + if (entry) { + /* Oops, someone raced us to it */ + wsi_x11_connection_destroy(instance, wsi_conn); + } else { + entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + static const VkSurfaceFormatKHR formats[] = { { .format = VK_FORMAT_B8G8R8A8_UNORM, }, }; @@ -41,19 +130,41 @@ VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( xcb_connection_t* connection, xcb_visualid_t visual_id) { - anv_finishme("Check that we actually have DRI3"); - stub_return(true); + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, connection); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + return false; + } + + anv_finishme("Check visuals"); + + return true; } static VkResult -x11_surface_get_support(VkIcdSurfaceBase *surface, +x11_surface_get_support(VkIcdSurfaceBase *icd_surface, struct anv_physical_device *device, uint32_t queueFamilyIndex, VkBool32* pSupported) { - anv_finishme("Check that we actually have DRI3"); - *pSupported = true; + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, surface->connection); + if (!wsi_conn) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + *pSupported = false; + return VK_SUCCESS; + } + + *pSupported = true; return VK_SUCCESS; } @@ -144,14 +255,6 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *surface, const VkAllocationCallbacks* pAllocator, struct anv_swapchain **swapchain); -static struct anv_wsi_interface x11_interface = { - .get_support = x11_surface_get_support, - .get_capabilities = x11_surface_get_capabilities, - .get_formats = x11_surface_get_formats, - .get_present_modes = x11_surface_get_present_modes, - .create_swapchain = x11_surface_create_swapchain, -}; - VkResult anv_CreateXcbSurfaceKHR( VkInstance _instance, const VkXcbSurfaceCreateInfoKHR* pCreateInfo, @@ -453,11 +556,66 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, VkResult anv_x11_init_wsi(struct anv_instance *instance) { - instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &x11_interface; + struct wsi_x11 *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->connections) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = x11_surface_get_support; + wsi->base.get_capabilities = x11_surface_get_capabilities; + wsi->base.get_formats = x11_surface_get_formats; + wsi->base.get_present_modes = x11_surface_get_present_modes; + wsi->base.create_swapchain = x11_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; + + return result; } void anv_x11_finish_wsi(struct anv_instance *instance) -{ } +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->connections, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} -- cgit v1.2.3 From 5acc4e2ebfe0e482d4f612c440877601c54c8b05 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 Jan 2016 03:49:10 -0800 Subject: anv/wsi/x11: Actually pull information from the window's visual --- src/vulkan/anv_wsi_x11.c | 150 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 143 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index c9e61fc4cdd..ac24d927dd1 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -118,12 +118,115 @@ wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) static const VkSurfaceFormatKHR formats[] = { { .format = VK_FORMAT_B8G8R8A8_UNORM, }, + { .format = VK_FORMAT_B8G8R8_UNORM, }, }; static const VkPresentModeKHR present_modes[] = { VK_PRESENT_MODE_MAILBOX_KHR, }; +static xcb_screen_t * +get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + if (screen_iter.data->root == root) + return screen_iter.data; + } + + return NULL; +} + +static xcb_visualtype_t * +screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_depth_iterator_t depth_iter = + xcb_screen_allowed_depths_iterator(screen); + + for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { + xcb_visualtype_iterator_t visual_iter = + xcb_depth_visuals_iterator (depth_iter.data); + + for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { + if (visual_iter.data->visual_id == visual_id) { + if (depth) + *depth = depth_iter.data->depth; + return visual_iter.data; + } + } + } + + return NULL; +} + +static xcb_visualtype_t * +connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + /* For this we have to iterate over all of the screens which is rather + * annoying. Fortunately, there is probably only 1. + */ + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, + visual_id, depth); + if (visual) + return visual; + } + + return NULL; +} + +static xcb_visualtype_t * +get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, + unsigned *depth) +{ + xcb_query_tree_cookie_t tree_cookie; + xcb_get_window_attributes_cookie_t attrib_cookie; + xcb_query_tree_reply_t *tree; + xcb_get_window_attributes_reply_t *attrib; + + tree_cookie = xcb_query_tree(conn, window); + attrib_cookie = xcb_get_window_attributes(conn, window); + + tree = xcb_query_tree_reply(conn, tree_cookie, NULL); + attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); + if (attrib == NULL || tree == NULL) { + free(attrib); + free(tree); + return NULL; + } + + xcb_window_t root = tree->root; + xcb_visualid_t visual_id = attrib->visual; + free(attrib); + free(tree); + + xcb_screen_t *screen = get_screen_for_root(conn, root); + if (screen == NULL) + return NULL; + + return screen_get_visualtype(screen, visual_id, depth); +} + +static bool +visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) +{ + uint32_t rgb_mask = visual->red_mask | + visual->green_mask | + visual->blue_mask; + + uint32_t all_mask = 0xffffffff >> (32 - depth); + + /* Do we have bits left over after RGB? */ + return (all_mask & ~rgb_mask) != 0; +} + VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, @@ -140,7 +243,12 @@ VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( return false; } - anv_finishme("Check visuals"); + unsigned visual_depth; + if (!connection_get_visualtype(connection, visual_id, &visual_depth)) + return false; + + if (visual_depth != 24 && visual_depth != 32) + return false; return true; } @@ -164,6 +272,18 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface, return VK_SUCCESS; } + unsigned visual_depth; + if (!get_visualtype_for_window(surface->connection, surface->window, + &visual_depth)) { + *pSupported = false; + return VK_SUCCESS; + } + + if (visual_depth != 24 && visual_depth != 32) { + *pSupported = false; + return VK_SUCCESS; + } + *pSupported = true; return VK_SUCCESS; } @@ -174,12 +294,21 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, VkSurfaceCapabilitiesKHR *caps) { VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - - xcb_get_geometry_cookie_t cookie = xcb_get_geometry(surface->connection, - surface->window); + xcb_get_geometry_cookie_t geom_cookie; xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = xcb_get_geometry_reply(surface->connection, - cookie, &err); + xcb_get_geometry_reply_t *geom; + unsigned visual_depth; + + geom_cookie = xcb_get_geometry(surface->connection, surface->window); + + /* This does a round-trip. This is why we do get_geometry first and + * wait to read the reply until after we have a visual. + */ + xcb_visualtype_t *visual = + get_visualtype_for_window(surface->connection, surface->window, + &visual_depth); + + geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); if (geom) { VkExtent2D extent = { geom->width, geom->height }; caps->currentExtent = extent; @@ -197,12 +326,19 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, free(err); free(geom); + if (visual_has_alpha(visual, visual_depth)) { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + } else { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } + caps->minImageCount = 2; caps->maxImageCount = 4; caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->maxImageArrayLayers = 1; - caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; caps->supportedUsageFlags = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | -- cgit v1.2.3 From ad813b072a69d92ca4a1bfdf6b4dd7f6197b6ab4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 Jan 2016 07:05:53 -0800 Subject: anv/wsi: Set the platform field of VkIcdSurfaceBase --- src/vulkan/anv_wsi_wayland.c | 1 + src/vulkan/anv_wsi_x11.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 9f4fee910c3..feecf22782e 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -438,6 +438,7 @@ VkResult anv_CreateWaylandSurfaceKHR( if (surface == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; surface->display = pCreateInfo->display; surface->surface = pCreateInfo->surface; diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index ac24d927dd1..d5e5b88c17b 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -408,6 +408,7 @@ VkResult anv_CreateXcbSurfaceKHR( if (surface == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; surface->connection = pCreateInfo->connection; surface->window = pCreateInfo->window; -- cgit v1.2.3 From c668dc9f7597009a2795f898df8907a99b22e8d7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 30 Jan 2016 07:16:33 -0800 Subject: anv/pass: Initialize has_resolve --- src/vulkan/anv_pass.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c index a8e85664d48..d07e9fec6cc 100644 --- a/src/vulkan/anv_pass.c +++ b/src/vulkan/anv_pass.c @@ -113,6 +113,7 @@ VkResult anv_CreateRenderPass( } } + subpass->has_resolve = false; if (desc->pResolveAttachments) { subpass->resolve_attachments = p; p += desc->colorAttachmentCount; -- cgit v1.2.3 From b1158ced45f648f9ab8217b2deddcfaa981bc652 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 22 Jan 2016 15:59:02 -0800 Subject: anv/genX: Add genX_pipeline.c for compute_pipeline_create Adds initial compute_pipeline_create implementation for gen7. Signed-off-by: Jordan Justen --- src/vulkan/Makefile.am | 4 ++ src/vulkan/gen7_pipeline.c | 12 ---- src/vulkan/gen8_pipeline.c | 88 ------------------------------ src/vulkan/genX_pipeline.c | 133 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 100 deletions(-) create mode 100644 src/vulkan/genX_pipeline.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index d76fa206a95..06f67cfd5f8 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -100,6 +100,7 @@ BUILT_SOURCES = \ libanv_gen7_la_SOURCES = \ genX_cmd_buffer.c \ + genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c @@ -107,6 +108,7 @@ libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70 libanv_gen75_la_SOURCES = \ genX_cmd_buffer.c \ + genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c @@ -114,6 +116,7 @@ libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 libanv_gen8_la_SOURCES = \ genX_cmd_buffer.c \ + genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c @@ -121,6 +124,7 @@ libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 libanv_gen9_la_SOURCES = \ genX_cmd_buffer.c \ + genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 7b5330e7448..8206432ffc3 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -444,15 +444,3 @@ genX(graphics_pipeline_create)( return VK_SUCCESS; } - -GENX_FUNC(GEN7, GEN75) VkResult -genX(compute_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkComputePipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - anv_finishme("primitive_id needs sbe swizzling setup"); - abort(); -} diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 1e9aa0e12bc..fce2331873c 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -667,91 +667,3 @@ genX(graphics_pipeline_create)( return VK_SUCCESS; } - -VkResult genX(compute_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkComputePipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - pipeline->blend_state.map = NULL; - - result = anv_reloc_list_init(&pipeline->batch_relocs, - pAllocator ? pAllocator : &device->alloc); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_kernel = NO_KERNEL; - - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); - ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); - anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, - pCreateInfo->stage.pName, - pCreateInfo->stage.pSpecializationInfo); - - pipeline->use_repclear = false; - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - - anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], - .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), - .ScratchSpaceBasePointerHigh = 0, - .StackSize = 0, - - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = 2, - .ResetGatewayTimer = true, -#if ANV_GEN == 8 - .BypassGatewayControl = true, -#endif - .URBEntryAllocationSize = 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c new file mode 100644 index 00000000000..14a608b9fca --- /dev/null +++ b/src/vulkan/genX_pipeline.c @@ -0,0 +1,133 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "gen9_pack.h" +#elif (ANV_GEN == 8) +# include "gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "gen75_pack.h" +#elif (ANV_GEN == 7) +# include "gen7_pack.h" +#endif + +#include "genX_pipeline_util.h" + +VkResult +genX(compute_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + pipeline->blend_state.map = NULL; + + result = anv_reloc_list_init(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); + anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); + + pipeline->use_repclear = false; + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), +#if ANV_GEN > 7 + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, +#endif + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, +#if ANV_GEN == 8 + .BypassGatewayControl = true, +#endif + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} -- cgit v1.2.3 From 7e46cc86038a57cb13e63acde555f5135cb2a987 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 22 Jan 2016 11:18:22 -0800 Subject: anv/gen7/compute: Setup push constants and local ids Signed-off-by: Jordan Justen --- src/vulkan/gen7_cmd_buffer.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 609606bdf6a..613971651b2 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -47,6 +47,9 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) VkShaderStageFlags flushed = 0; anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); if (state.offset == 0) @@ -274,7 +277,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) if (result != VK_SUCCESS) return result; + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, @@ -282,6 +300,9 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .KernelStartPointer = pipeline->cs_simd, .BindingTablePointer = surfaces.offset, .SamplerStatePointer = samplers.offset, + .ConstantURBEntryReadLength = + push_constant_regs, + .ConstantURBEntryReadOffset = 0, .BarrierEnable = cs_prog_data->uses_barrier, .NumberofThreadsinGPGPUThreadGroup = pipeline->cs_thread_width_max); -- cgit v1.2.3 From f5b3a2fe32b3cb84a8f3e932e7eebdc2ed900e76 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 22 Jan 2016 16:26:34 -0800 Subject: anv/gen7: Add support for gl_NumWorkGroups Signed-off-by: Jordan Justen --- src/vulkan/gen7_cmd_buffer.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 613971651b2..e4a35388170 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -725,6 +725,20 @@ void genX(CmdDispatch)( struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + cmd_buffer_flush_compute_state(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, @@ -757,6 +771,11 @@ void genX(CmdDispatchIndirect)( struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + cmd_buffer_flush_compute_state(cmd_buffer); gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); -- cgit v1.2.3 From 4bb1e7937a28d7dd2ac2d98d79c873d478679b99 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 27 Jan 2016 16:59:54 -0800 Subject: anv/gen7: Disable fs dispatch for depth/stencil only pipelines 292031a for gen7 Signed-off-by: Jordan Justen --- src/vulkan/gen7_pipeline.c | 104 ++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 8206432ffc3..42c50310e37 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -381,64 +381,70 @@ genX(graphics_pipeline_create)( .GSEnable = true); } - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || - wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) - anv_finishme("two-sided color needs sbe swizzling setup"); - if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) - anv_finishme("primitive_id needs sbe swizzling setup"); - - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, - .VertexURBEntryReadLength = urb_length, - .VertexURBEntryReadOffset = urb_offset, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_finishme("gen7 alternative to " + "3DSTATE_PS_EXTRA.PixelShaderValid = false"); + } else { + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), - .MaximumNumberofThreads = device->info.max_wm_threads - 1, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .RenderTargetFastClearEnable = false, - .DualSourceBlendEnable = false, - .RenderTargetResolveEnable = false, + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE : POSOFFSET_NONE, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, - ._32PixelDispatchEnable = false, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterforConstantSetupData1 = 0, - .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, #if 0 - /* Haswell requires the sample mask to be set in this packet as well as - * in 3DSTATE_SAMPLE_MASK; the values should match. */ - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ #endif - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, - .StatisticsEnable = true, - .ThreadDispatchEnable = true, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + } *pPipeline = anv_pipeline_to_handle(pipeline); -- cgit v1.2.3 From dd2effb0e75c614333c7e4e99ea47e29fd1ac415 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 28 Jan 2016 17:19:15 -0800 Subject: anv/gen7: Subtract 1 from num_elements when setting up buffer surface state e8f51fe4 for gen7 Signed-off-by: Jordan Justen --- src/vulkan/gen7_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 4c27716d18e..2375070636e 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -49,9 +49,9 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, .TiledSurface = false, .RenderCacheReadWriteMode = false, .SurfaceObjectControlState = GENX(MOCS), - .Height = (num_elements >> 7) & 0x3fff, - .Width = num_elements & 0x7f, - .Depth = (num_elements >> 21) & 0x3f, + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, .SurfacePitch = stride - 1, # if (ANV_IS_HASWELL) .ShaderChannelSelectR = SCS_RED, -- cgit v1.2.3 From ab0d8608d269e9bc872f489eee50bc616bd6e076 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 30 Jan 2016 00:31:58 -0800 Subject: anv: Support MEDIA_VFE_STATE for gen7 Signed-off-by: Jordan Justen --- src/vulkan/genX_pipeline.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c index 14a608b9fca..e78cfb55ef9 100644 --- a/src/vulkan/genX_pipeline.c +++ b/src/vulkan/genX_pipeline.c @@ -104,15 +104,16 @@ genX(compute_pipeline_create)( #if ANV_GEN > 7 .ScratchSpaceBasePointerHigh = 0, .StackSize = 0, +#else + .GPGPUMode = true, #endif - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = 2, + .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, .ResetGatewayTimer = true, #if ANV_GEN == 8 .BypassGatewayControl = true, #endif - .URBEntryAllocationSize = 2, + .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, .CURBEAllocationSize = 0); struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; -- cgit v1.2.3 From 8e48ff3ad669cc3ace7dfdbe22e6bf3dbde938a8 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 30 Jan 2016 00:49:31 -0800 Subject: anv/gen7: Set SLM size in interface descriptor Signed-off-by: Jordan Justen --- src/vulkan/gen7_cmd_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index e4a35388170..091faad7c51 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -294,6 +294,16 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) .CURBEDataStartAddress = push_state.offset); } + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, GEN7_INTERFACE_DESCRIPTOR_DATA, 64, @@ -304,6 +314,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) push_constant_regs, .ConstantURBEntryReadOffset = 0, .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, .NumberofThreadsinGPGPUThreadGroup = pipeline->cs_thread_width_max); -- cgit v1.2.3 From 2d8726a4b7d0b3444f62b22e48115c1de19e5f7d Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 30 Jan 2016 09:30:50 -0800 Subject: anv/genX_pipeline: Remove unnecessary #include files Signed-off-by: Jordan Justen --- src/vulkan/genX_pipeline.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c index e78cfb55ef9..b10db45bd35 100644 --- a/src/vulkan/genX_pipeline.c +++ b/src/vulkan/genX_pipeline.c @@ -21,12 +21,6 @@ * IN THE SOFTWARE. */ -#include -#include -#include -#include -#include - #include "anv_private.h" #if (ANV_GEN == 9) @@ -39,8 +33,6 @@ # include "gen7_pack.h" #endif -#include "genX_pipeline_util.h" - VkResult genX(compute_pipeline_create)( VkDevice _device, -- cgit v1.2.3 From b207a6b5aa8fb3bb45750dea6619b5d9a91a7ee3 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 30 Jan 2016 14:56:03 -0800 Subject: anv/gen7: Set BypassGatewayControl in MEDIA_VFE_STATE Signed-off-by: Jordan Justen --- src/vulkan/genX_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c index b10db45bd35..b62e401c4ee 100644 --- a/src/vulkan/genX_pipeline.c +++ b/src/vulkan/genX_pipeline.c @@ -102,7 +102,7 @@ genX(compute_pipeline_create)( .MaximumNumberofThreads = device->info.max_cs_threads - 1, .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, .ResetGatewayTimer = true, -#if ANV_GEN == 8 +#if ANV_GEN <= 8 .BypassGatewayControl = true, #endif .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, -- cgit v1.2.3 From f96a6c65a37829bdfe5539f407931ffa9c275342 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 29 Jan 2016 15:08:19 -0800 Subject: anv/gen7: Rename gen7_batch_lr* to emit_lr* Signed-off-by: Jordan Justen --- src/vulkan/gen7_cmd_buffer.c | 60 ++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 091faad7c51..7108d74ba65 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -326,6 +326,23 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + static void cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { @@ -639,23 +656,6 @@ void genX(CmdDrawIndexed)( .BaseVertexLocation = vertexOffset); } -static void -gen7_batch_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - /* Auto-Draw / Indirect Registers */ #define GEN7_3DPRIM_END_OFFSET 0x2420 #define GEN7_3DPRIM_START_VERTEX 0x2430 @@ -683,11 +683,11 @@ void genX(CmdDrawIndirect)( cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - gen7_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, .IndirectParameterEnable = true, @@ -714,11 +714,11 @@ void genX(CmdDrawIndexedIndirect)( cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, .IndirectParameterEnable = true, @@ -789,9 +789,9 @@ void genX(CmdDispatchIndirect)( cmd_buffer_flush_compute_state(cmd_buffer); - gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, .IndirectParameterEnable = true, -- cgit v1.2.3 From ea63663a729facf1d5db2c10f89c822db5f385b3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 05:58:37 -0800 Subject: wsi/x11: Remove B8G8R8_UNORM We don't actually support that format yet because ISL doesn't have an enum for it. We need to beef up the formats table to allow for tiled-only formats. --- src/vulkan/anv_wsi_x11.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index d5e5b88c17b..c4a82706b00 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -118,7 +118,6 @@ wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) static const VkSurfaceFormatKHR formats[] = { { .format = VK_FORMAT_B8G8R8A8_UNORM, }, - { .format = VK_FORMAT_B8G8R8_UNORM, }, }; static const VkPresentModeKHR present_modes[] = { -- cgit v1.2.3 From ffbc32f8d936ab009ef0ed46526fbb5c35ff7258 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 1 Feb 2016 10:51:01 -0800 Subject: anv/meta: Strip trailing whitespace --- src/vulkan/anv_meta_resolve.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index ffd41857f13..2107a758fde 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -56,16 +56,16 @@ meta_resolve_restore(struct anv_meta_saved_state *saved_state, anv_meta_restore(saved_state, cmd_buffer); } -static VkPipeline * +static VkPipeline * get_pipeline_h(struct anv_device *device, uint32_t samples) -{ - uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ assert(samples >= 2); - assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); return &device->meta_state.resolve.pipelines[i]; -} +} static nir_shader * build_nir_vs(void) -- cgit v1.2.3 From dc5fdcd6b7463955ba4733a27791cf85008f5e7a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 1 Feb 2016 11:54:40 -0800 Subject: anv: Advertise robustBufferAccess The GPU does most of this for us as long as we set up tight bounds for the buffers, which we do. Additionally, we range check dynamically buffers in the shader. With that it's safe to turn on robustBufferAccess. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 1e0bfb080d9..09aca1ab1e9 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -336,7 +336,7 @@ void anv_GetPhysicalDeviceFeatures( anv_finishme("Get correct values for PhysicalDeviceFeatures"); *pFeatures = (VkPhysicalDeviceFeatures) { - .robustBufferAccess = false, + .robustBufferAccess = true, .fullDrawIndexUint32 = false, .imageCubeArray = false, .independentBlend = false, -- cgit v1.2.3 From afb327a985d8ad70eb8fcd8b94fdbc040167c22b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 1 Feb 2016 12:18:10 -0800 Subject: anv: Structify a one-member union anv_descriptor contained a union with one member. --- src/vulkan/anv_private.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index f4794d9fb9d..b9490ae31e2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -918,9 +918,7 @@ struct anv_descriptor { union { struct { - union { - struct anv_image_view *image_view; - }; + struct anv_image_view *image_view; struct anv_sampler *sampler; }; -- cgit v1.2.3 From 443c578bca973e960f823f753c9081d24ec50225 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 12:55:23 -0800 Subject: anv/wsi/x11: Expose SRGB all the time After a long discussion with Eric Anholt and Owen Taylor, I learned that X11 is basically always sRGB as that's what the scanout hardware does and X doesn't modify anything. Therefore, we should just always expose sRGB formats. --- src/vulkan/anv_wsi_x11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index c4a82706b00..acb4a60e65f 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -117,7 +117,7 @@ wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) } static const VkSurfaceFormatKHR formats[] = { - { .format = VK_FORMAT_B8G8R8A8_UNORM, }, + { .format = VK_FORMAT_B8G8R8A8_SRGB, }, }; static const VkPresentModeKHR present_modes[] = { -- cgit v1.2.3 From 2d2c6fc6bbfe9ecd8b0f6072b0dc29d48a89b92c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 13:06:21 -0800 Subject: anv/wsi/wayland: Advertise sRGB --- src/vulkan/anv_wsi_wayland.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 815218ecc69..31d5f836a9f 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -103,9 +103,9 @@ wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) case VK_FORMAT_B5G5R5A1_UNORM: return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; #endif - case VK_FORMAT_B8G8R8_UNORM: + case VK_FORMAT_B8G8R8_SRGB: return WL_DRM_FORMAT_BGRX8888; - case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; #if 0 case VK_FORMAT_B10G10R10A2_UNORM: @@ -159,10 +159,10 @@ drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) break; #endif case WL_DRM_FORMAT_XRGB8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_UNORM); + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); /* fallthrough */ case WL_DRM_FORMAT_ARGB8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_UNORM); + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); break; #if 0 case WL_DRM_FORMAT_ARGB2101010: -- cgit v1.2.3 From abc0e5c1b8f7a9b5bbe5363156c13ef0c6de17c0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 13:26:59 -0800 Subject: nir/spirv: Fix the UBO loading case of a single row-major matric column --- src/glsl/nir/spirv/vtn_variables.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index dd006c355d9..ee1e3da2e08 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -498,19 +498,23 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - struct vtn_ssa_value *comp = NULL, temp_val; + struct vtn_ssa_value *comp, temp_val; if (!load) { temp_val.def = nir_channel(&b->nb, (*inout)->def, i); temp_val.type = glsl_scalar_type(base_type); - comp = &temp_val; } + comp = &temp_val; _vtn_load_store_tail(b, op, load, index, elem_offset, &comp, glsl_scalar_type(base_type)); comps[i] = comp->def; } - if (load) + if (load) { + if (*inout == NULL) + *inout = vtn_create_ssa_value(b, type->type); + (*inout)->def = nir_vec(&b->nb, comps, num_comps); + } } } else { /* Column-major with a deref. Fall through to array case. */ -- cgit v1.2.3 From 593f88c0db10c31486db872c0eeda0c66ca22afb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 13:26:59 -0800 Subject: nir/spirv: Fix the UBO loading case of a single row-major matric column --- src/glsl/nir/spirv/vtn_variables.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index ee1e3da2e08..7d44cf4be9e 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -490,8 +490,11 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, _vtn_load_store_tail(b, op, load, index, offset, inout, glsl_scalar_type(base_type)); } else { - /* Picking one element off each column */ + /* Grabbing a column; picking one element off each row */ unsigned num_comps = glsl_get_vector_elements(type->type); + const struct glsl_type *column_type = + glsl_get_column_type(type->type); + nir_ssa_def *comps[4]; for (unsigned i = 0; i < num_comps; i++) { nir_ssa_def *elem_offset = @@ -511,7 +514,7 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (load) { if (*inout == NULL) - *inout = vtn_create_ssa_value(b, type->type); + *inout = vtn_create_ssa_value(b, column_type); (*inout)->def = nir_vec(&b->nb, comps, num_comps); } -- cgit v1.2.3 From b1a16232932058f1b3db478964098c3a7c76e8d4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 13:47:13 -0800 Subject: nir/spirv: Add support for SpvOpImage --- src/glsl/nir/spirv/spirv_to_nir.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index 32188e5f364..ed6ad6d7e63 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1167,6 +1167,17 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, val->sampled_image->sampler = vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; return; + } else if (opcode == SpvOpImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + struct vtn_value *src_val = vtn_untyped_value(b, w[3]); + if (src_val->value_type == vtn_value_type_sampled_image) { + val->access_chain = src_val->sampled_image->image; + } else { + assert(src_val->value_type == vtn_value_type_access_chain); + val->access_chain = src_val->access_chain; + } + return; } struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; @@ -2380,6 +2391,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, break; case SpvOpSampledImage: + case SpvOpImage: case SpvOpImageSampleImplicitLod: case SpvOpImageSampleExplicitLod: case SpvOpImageSampleDrefImplicitLod: -- cgit v1.2.3 From 499f7c2f0b2780bc51a0301816d2b91c89d484c0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 13:52:47 -0800 Subject: nir/spirv: Handle the LOD parameter of OpImageQuerySizeLod --- src/glsl/nir/spirv/spirv_to_nir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/spirv_to_nir.c b/src/glsl/nir/spirv/spirv_to_nir.c index ed6ad6d7e63..c002457ce12 100644 --- a/src/glsl/nir/spirv/spirv_to_nir.c +++ b/src/glsl/nir/spirv/spirv_to_nir.c @@ -1237,6 +1237,10 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, break; } + /* For OpImageQuerySizeLod, we always have an LOD */ + if (opcode == SpvOpImageQuerySizeLod) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + /* Figure out the base texture operation */ nir_texop texop; switch (opcode) { -- cgit v1.2.3 From 8776d3cb8e86d4999613590a7eeac2322acdd99f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Feb 2016 14:00:25 -0800 Subject: nir/spirv: Fix UBO loads of a single element of a row-major matrix --- src/glsl/nir/spirv/vtn_variables.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_variables.c b/src/glsl/nir/spirv/vtn_variables.c index 7d44cf4be9e..3ad98aa5310 100644 --- a/src/glsl/nir/spirv/vtn_variables.c +++ b/src/glsl/nir/spirv/vtn_variables.c @@ -487,6 +487,8 @@ _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], type->stride); offset = nir_iadd(&b->nb, offset, row_offset); + if (load) + *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); _vtn_load_store_tail(b, op, load, index, offset, inout, glsl_scalar_type(base_type)); } else { -- cgit v1.2.3 From ac0589b213f21f22dc330d2e8fb8dceda3960ddc Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Mon, 1 Feb 2016 14:28:00 -0800 Subject: i965: fix unsigned long overflows for i386 bit-shifts on 32 bit unsigned longs overflow in several places. The intention was for 64 bit integers to be used. --- src/vulkan/gen75_pack.h | 6 +++--- src/vulkan/gen7_pack.h | 6 +++--- src/vulkan/gen8_pack.h | 6 +++--- src/vulkan/gen9_pack.h | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h index b012032190e..7b098894ee2 100644 --- a/src/vulkan/gen75_pack.h +++ b/src/vulkan/gen75_pack.h @@ -47,7 +47,7 @@ union __gen_value { static inline uint64_t __gen_mbo(uint32_t start, uint32_t end) { - return (~0ul >> (64 - (end - start + 1))) << start; + return (~0ull >> (64 - (end - start + 1))) << start; } static inline uint64_t @@ -56,7 +56,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) - assert(v < 1ul << (end - start + 1)); + assert(v < 1ull << (end - start + 1)); #endif return v << start; @@ -97,7 +97,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) { __gen_validate_value(v); #if DEBUG - uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; assert((v & ~mask) == 0); #endif diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h index a3ba30a9745..7b104c3ab3c 100644 --- a/src/vulkan/gen7_pack.h +++ b/src/vulkan/gen7_pack.h @@ -47,7 +47,7 @@ union __gen_value { static inline uint64_t __gen_mbo(uint32_t start, uint32_t end) { - return (~0ul >> (64 - (end - start + 1))) << start; + return (~0ull >> (64 - (end - start + 1))) << start; } static inline uint64_t @@ -56,7 +56,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) - assert(v < 1ul << (end - start + 1)); + assert(v < 1ull << (end - start + 1)); #endif return v << start; @@ -97,7 +97,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) { __gen_validate_value(v); #if DEBUG - uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; assert((v & ~mask) == 0); #endif diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h index 042e0290a74..3c014b96147 100644 --- a/src/vulkan/gen8_pack.h +++ b/src/vulkan/gen8_pack.h @@ -47,7 +47,7 @@ union __gen_value { static inline uint64_t __gen_mbo(uint32_t start, uint32_t end) { - return (~0ul >> (64 - (end - start + 1))) << start; + return (~0ull >> (64 - (end - start + 1))) << start; } static inline uint64_t @@ -56,7 +56,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) - assert(v < 1ul << (end - start + 1)); + assert(v < 1ull << (end - start + 1)); #endif return v << start; @@ -97,7 +97,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) { __gen_validate_value(v); #if DEBUG - uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; assert((v & ~mask) == 0); #endif diff --git a/src/vulkan/gen9_pack.h b/src/vulkan/gen9_pack.h index db54e9cf85b..df295f4900a 100644 --- a/src/vulkan/gen9_pack.h +++ b/src/vulkan/gen9_pack.h @@ -47,7 +47,7 @@ union __gen_value { static inline uint64_t __gen_mbo(uint32_t start, uint32_t end) { - return (~0ul >> (64 - (end - start + 1))) << start; + return (~0ull >> (64 - (end - start + 1))) << start; } static inline uint64_t @@ -56,7 +56,7 @@ __gen_field(uint64_t v, uint32_t start, uint32_t end) __gen_validate_value(v); #if DEBUG if (end - start + 1 < 64) - assert(v < 1ul << (end - start + 1)); + assert(v < 1ull << (end - start + 1)); #endif return v << start; @@ -97,7 +97,7 @@ __gen_offset(uint64_t v, uint32_t start, uint32_t end) { __gen_validate_value(v); #if DEBUG - uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; assert((v & ~mask) == 0); #endif -- cgit v1.2.3 From 0d2145b50f4b8b85702c9442f754c060901f38e0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 2 Feb 2016 12:22:00 -0800 Subject: anv/fence: Default to not ready This is kind-of silly. We *really* need to do a better job of making sure all objects have all their default values set. We probably also want to, eventually, put everything into the BO (to save memory) and, more specifically, make the GPU write the "ready" flag. That way GetFenceStatus won't ever have to call into the kernel. --- src/vulkan/anv_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 09aca1ab1e9..82514677977 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1369,6 +1369,8 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; + fence->ready = false; + *pFence = anv_fence_to_handle(fence); return VK_SUCCESS; -- cgit v1.2.3 From fd99f3d65856288aaea8076cbff56f76bfb7cfa1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 2 Feb 2016 13:15:18 -0800 Subject: anv/device: Improve version error reporting --- src/vulkan/anv_device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 82514677977..c39c506c78f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -217,7 +217,11 @@ VkResult anv_CreateInstance( uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion; if (VK_MAKE_VERSION(1, 0, 0) > client_version || client_version > VK_MAKE_VERSION(1, 0, 2)) { - return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); + return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Client requested version %d.%d.%d", + VK_VERSION_MAJOR(client_version), + VK_VERSION_MINOR(client_version), + VK_VERSION_PATCH(client_version)); } for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { -- cgit v1.2.3 From 5a06bac4a06cfd4db8cc172083a72795ff5b07d8 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 2 Feb 2016 14:35:40 -0800 Subject: anv: Use @LIB_DIR@ in anv_icd.json Otherwise we may get a lib vs lib64 mismatch. --- src/vulkan/anv_icd.json.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_icd.json.in b/src/vulkan/anv_icd.json.in index cef6a30b402..ad069b3e2ff 100644 --- a/src/vulkan/anv_icd.json.in +++ b/src/vulkan/anv_icd.json.in @@ -1,7 +1,7 @@ { "file_format_version": "1.0.0", "ICD": { - "library_path": "@abs_top_builddir@/lib/libvulkan.so.0.0.0", + "library_path": "@abs_top_builddir@/@LIB_DIR@/libvulkan.so.0.0.0", "abi_versions": "0.210.1" } } -- cgit v1.2.3 From ea8c2d118a8c9645bedc86259ba42968ac27c239 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Mon, 1 Feb 2016 17:13:02 -0800 Subject: anv: Fix anv_descriptor_set reference error on deletion anv_descriptor_set_destroy uses the descriptor sets's set_layout member to iterate the set's buffer views. However, the set_layout reference may have previously been freed. On 64 bit builds, this bug generated valgrind errors but did not affect CTS test results. On 32 bit builds, it reliably produces assertions and memory corruption. --- src/vulkan/anv_descriptor_set.c | 5 +++-- src/vulkan/anv_private.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 8997f50297a..f93ea819b0c 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -391,7 +391,7 @@ anv_descriptor_set_create(struct anv_device *device, set->buffer_views[b].surface_state = anv_state_pool_alloc(&device->surface_state_pool, 64, 64); } - + set->buffer_count = layout->buffer_count; *out_set = set; return VK_SUCCESS; @@ -402,7 +402,7 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_set *set) { /* XXX: Use the pool */ - for (uint32_t b = 0; b < set->layout->buffer_count; b++) + for (uint32_t b = 0; b < set->buffer_count; b++) anv_state_pool_free(&device->surface_state_pool, set->buffer_views[b].surface_state); @@ -589,5 +589,6 @@ void anv_UpdateDescriptorSets( dest->descriptors[copy->dstBinding + j] = src->descriptors[copy->srcBinding + j]; } + dest->buffer_count = src->buffer_count; } } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b9490ae31e2..ad51b1fa472 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -928,6 +928,7 @@ struct anv_descriptor { struct anv_descriptor_set { const struct anv_descriptor_set_layout *layout; + uint32_t buffer_count; struct anv_buffer_view *buffer_views; struct anv_descriptor descriptors[0]; }; -- cgit v1.2.3 From 6a7e2904e0a2a6f8efbf739a1b3cad7e1e4ab42d Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Tue, 2 Feb 2016 15:30:54 -0800 Subject: nir/spirv: fix build_mat4_det stack smasher When generating a sub-determinate matrix, a 3-element swizzle array was indexed with clever inline boolean logic. Unfortunately, when i and j are both 3, the index overruns the array, smashing the next variable on the stack. For 64 bit builds, the alignment of the 3-element unsigned array leaves 32 bits of spacing before the next local variable, hiding this bug. On i386, a subcolumn pointer was smashed then dereferenced. --- src/glsl/nir/spirv/vtn_glsl450.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/glsl/nir/spirv/vtn_glsl450.c b/src/glsl/nir/spirv/vtn_glsl450.c index 9c82c07894a..bc38aa4b1be 100644 --- a/src/glsl/nir/spirv/vtn_glsl450.c +++ b/src/glsl/nir/spirv/vtn_glsl450.c @@ -68,8 +68,11 @@ build_mat4_det(nir_builder *b, nir_ssa_def **col) nir_ssa_def *subdet[4]; for (unsigned i = 0; i < 4; i++) { unsigned swiz[3]; - for (unsigned j = 0; j < 4; j++) - swiz[j - (j > i)] = j; + for (unsigned j = 0, k = 0; j < 3; j++, k++) { + if (k == i) + k++; /* skip column */ + swiz[j] = k; + } nir_ssa_def *subcol[3]; subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); -- cgit v1.2.3 From 1f5d56304f51a09943e72f2b1aad0683d9220482 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 3 Feb 2016 02:50:06 -0800 Subject: anv/descriptor_set: Fix descriptor copies We weren't pulling the actual binding location information out of the set layout. The new code mirrors the descriptor write code. --- src/vulkan/anv_descriptor_set.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index f93ea819b0c..0ddd4bf1bc2 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -584,11 +584,21 @@ void anv_UpdateDescriptorSets( for (uint32_t i = 0; i < descriptorCopyCount; i++) { const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); - ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->dstSet); - for (uint32_t j = 0; j < copy->descriptorCount; j++) { - dest->descriptors[copy->dstBinding + j] = - src->descriptors[copy->srcBinding + j]; - } - dest->buffer_count = src->buffer_count; + ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); + + const struct anv_descriptor_set_binding_layout *src_layout = + &src->layout->binding[copy->srcBinding]; + struct anv_descriptor *src_desc = + &src->descriptors[src_layout->descriptor_index]; + src_desc += copy->srcArrayElement; + + const struct anv_descriptor_set_binding_layout *dst_layout = + &dst->layout->binding[copy->dstBinding]; + struct anv_descriptor *dst_desc = + &dst->descriptors[dst_layout->descriptor_index]; + dst_desc += copy->dstArrayElement; + + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; } } -- cgit v1.2.3 From 42b9320fbf67844d97f054db1a427894c444edf3 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 4 Feb 2016 11:41:59 -0800 Subject: anv/image: Rename nonrt_surface_state Let's call it what it is, not what it is not. Rename it to 'sampler_surface_state'. --- src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/anv_image.c | 14 +++++++------- src/vulkan/anv_private.h | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 0966e7658bf..a0f9bab8e1e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -801,7 +801,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - surface_state = desc->image_view->nonrt_surface_state; + surface_state = desc->image_view->sampler_surface_state; assert(surface_state.alloc_size); bo = desc->image_view->bo; bo_offset = desc->image_view->offset; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 6cc5700c21e..be91cdc5d92 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -216,7 +216,7 @@ anv_image_create(VkDevice _device, image->tiling = pCreateInfo->tiling; if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - image->needs_nonrt_surface_state = true; + image->needs_sampler_surface_state = true; } if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { @@ -570,14 +570,14 @@ anv_image_view_init(struct anv_image_view *iview, .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), }; - if (image->needs_nonrt_surface_state) { - iview->nonrt_surface_state = alloc_surface_state(device, cmd_buffer); + if (image->needs_sampler_surface_state) { + iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->nonrt_surface_state, + anv_fill_image_surface_state(device, iview->sampler_surface_state, iview, &mCreateInfo, VK_IMAGE_USAGE_SAMPLED_BIT); } else { - iview->nonrt_surface_state.alloc_size = 0; + iview->sampler_surface_state.alloc_size = 0; } if (image->needs_color_rt_surface_state) { @@ -641,9 +641,9 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, iview->color_rt_surface_state); } - if (iview->image->needs_nonrt_surface_state) { + if (iview->image->needs_sampler_surface_state) { anv_state_pool_free(&device->surface_state_pool, - iview->nonrt_surface_state); + iview->sampler_surface_state); } if (iview->image->needs_storage_surface_state) { diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ad51b1fa472..8c4136b8cfd 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1552,7 +1552,7 @@ struct anv_image { struct anv_bo *bo; VkDeviceSize offset; - bool needs_nonrt_surface_state:1; + bool needs_sampler_surface_state:1; bool needs_color_rt_surface_state:1; bool needs_storage_surface_state:1; @@ -1595,8 +1595,8 @@ struct anv_image_view { /** RENDER_SURFACE_STATE when using image as a color render target. */ struct anv_state color_rt_surface_state; - /** RENDER_SURFACE_STATE when using image as a non render target. */ - struct anv_state nonrt_surface_state; + /** RENDER_SURFACE_STATE when using image as a sampler surface. */ + struct anv_state sampler_surface_state; /** RENDER_SURFACE_STATE when using image as a storage image. */ struct anv_state storage_surface_state; -- cgit v1.2.3 From 3eebf3686be3de10cbeda8acd884e82df3e1438a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 4 Feb 2016 11:58:05 -0800 Subject: anv: Drop anv_image::needs_*_surface_state anv_image::needs_sampler_surface_state was a redundant member, identical to (usage & VK_IMAGE_USAGE_SAMPLED_BIT). Likewise for the other needs_* members. --- src/vulkan/anv_image.c | 24 ++++++------------------ src/vulkan/anv_private.h | 4 ---- 2 files changed, 6 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index be91cdc5d92..da71406cf27 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -215,18 +215,6 @@ anv_image_create(VkDevice _device, image->usage = anv_image_get_full_usage(pCreateInfo); image->tiling = pCreateInfo->tiling; - if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - image->needs_sampler_surface_state = true; - } - - if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - image->needs_color_rt_surface_state = true; - } - - if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { - image->needs_storage_surface_state = true; - } - if (likely(anv_format_is_color(image->format))) { r = make_surface(device, image, create_info, VK_IMAGE_ASPECT_COLOR_BIT); @@ -570,7 +558,7 @@ anv_image_view_init(struct anv_image_view *iview, .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), }; - if (image->needs_sampler_surface_state) { + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_image_surface_state(device, iview->sampler_surface_state, @@ -580,7 +568,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->sampler_surface_state.alloc_size = 0; } - if (image->needs_color_rt_surface_state) { + if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_image_surface_state(device, iview->color_rt_surface_state, @@ -590,7 +578,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state.alloc_size = 0; } - if (image->needs_storage_surface_state) { + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); if (has_matching_storage_typed_format(device, iview->format)) @@ -636,17 +624,17 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); - if (iview->image->needs_color_rt_surface_state) { + if (iview->color_rt_surface_state.alloc_size > 0) { anv_state_pool_free(&device->surface_state_pool, iview->color_rt_surface_state); } - if (iview->image->needs_sampler_surface_state) { + if (iview->sampler_surface_state.alloc_size > 0) { anv_state_pool_free(&device->surface_state_pool, iview->sampler_surface_state); } - if (iview->image->needs_storage_surface_state) { + if (iview->storage_surface_state.alloc_size > 0) { anv_state_pool_free(&device->surface_state_pool, iview->storage_surface_state); } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8c4136b8cfd..8e12792456b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1552,10 +1552,6 @@ struct anv_image { struct anv_bo *bo; VkDeviceSize offset; - bool needs_sampler_surface_state:1; - bool needs_color_rt_surface_state:1; - bool needs_storage_surface_state:1; - /** * Image subsurfaces * -- cgit v1.2.3 From d1617dbec3d4c7cb0d91c0f436a1f2de488c4633 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Feb 2016 12:41:51 -0800 Subject: anv: Share URB setup between gen7 and gen8+ --- src/vulkan/gen7_pipeline.c | 43 +--------------------------------- src/vulkan/gen8_pipeline.c | 30 +----------------------- src/vulkan/genX_pipeline_util.h | 51 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 71 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 42c50310e37..bf30f59be7b 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -227,28 +227,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false); anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false); - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall - * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, - * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL - * needs to be sent before any combination of VS associated 3DSTATE." - */ - anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &device->workaround_bo, 0 }); - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); + emit_urb_setup(pipeline); anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); @@ -283,26 +262,6 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, .SampleMask = 0xff); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_VS, - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_GS, - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_HS, - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_DS, - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; /* The last geometry producing stage will set urb_offset and urb_length, * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index fce2331873c..c1295a0b83a 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -366,15 +366,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), .FunctionEnable = false); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); + emit_urb_setup(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM_CHROMAKEY), .ChromaKeyKillEnable = false); @@ -398,26 +390,6 @@ genX(graphics_pipeline_create)( pipeline->ps_ksp0 == NO_KERNEL ? 0 : pipeline->wm_prog_data.barycentric_interp_modes); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; offset = 1; length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index da61a1e5d39..059b72514a7 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -179,6 +179,57 @@ emit_vertex_input(struct anv_pipeline *pipeline, #endif } +static inline void +emit_urb_setup(struct anv_pipeline *pipeline) +{ +#if ANV_GEN == 7 + struct anv_device *device = pipeline->device; + + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &device->workaround_bo, 0 }); +#endif + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); +} + static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, -- cgit v1.2.3 From 381d85545a433e194610ed1af672cf7b5c1d07fc Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Feb 2016 12:47:02 -0800 Subject: anv: Share scratch_space helper between gen7 and gen8+ The gen7 pipeline has a useful helper function for this, let's use it in gen8_pipeline.c too. The gen7 function has an off-by-one bug though: we have to compute log2(size / 1024) - 1, but we divide by 2048 instead so as to avoid the case where size is less than 1024 and we'd return -1. --- src/vulkan/gen7_pipeline.c | 6 ------ src/vulkan/gen8_pipeline.c | 6 +++--- src/vulkan/genX_pipeline_util.h | 6 ++++++ 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index bf30f59be7b..3fedd74f1ea 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -176,12 +176,6 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, .BlendStatePointer = pipeline->blend_state.offset); } -static inline uint32_t -scratch_space(const struct brw_stage_prog_data *prog_data) -{ - return ffs(prog_data->total_scratch / 1024); -} - GENX_FUNC(GEN7, GEN75) VkResult genX(graphics_pipeline_create)( VkDevice _device, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index c1295a0b83a..4097de177ae 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -406,7 +406,7 @@ genX(graphics_pipeline_create)( .ExpectedVertexCount = pipeline->gs_vertex_count, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], - .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, .OutputTopology = gs_prog_data->output_topology, @@ -468,7 +468,7 @@ genX(graphics_pipeline_create)( .SoftwareExceptionEnable = false, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), .DispatchGRFStartRegisterForURBData = vue_prog_data->base.dispatch_grf_start_reg, @@ -601,7 +601,7 @@ genX(graphics_pipeline_create)( .SamplerCount = 1, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index 059b72514a7..9e0f82e7167 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -230,6 +230,12 @@ emit_urb_setup(struct anv_pipeline *pipeline) .DSNumberofURBEntries = 0); } +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 2048); +} + static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, -- cgit v1.2.3 From c9c3344c4f5bb12ff5f872603671c2ac66878479 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Feb 2016 15:23:38 -0800 Subject: anv: Split out batch submit helper from anv_DeviceWaitIdle We'll reuse this mechanism in the next commit. --- src/vulkan/anv_device.c | 126 +++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c39c506c78f..91485a191c0 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -673,6 +673,67 @@ anv_device_init_border_colors(struct anv_device *device) border_colors); } +static VkResult +submit_simple_batch(struct anv_device *device, struct anv_batch *batch) +{ + struct anv_state state; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo *bo = NULL; + VkResult result = VK_SUCCESS; + uint32_t size; + int64_t timeout; + int ret; + + size = align_u32(batch->next - batch->start, 8); + state = anv_state_pool_alloc(&device->dynamic_state_pool, MAX(size, 64), 32); + bo = &device->dynamic_state_pool.block_pool->bo; + memcpy(state.map, batch->start, size); + + exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = state.offset; + execbuf.batch_len = batch->next - state.map; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + fail: + anv_state_pool_free(&device->dynamic_state_pool, state); + + return result; +} + VkResult anv_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, @@ -936,71 +997,16 @@ VkResult anv_DeviceWaitIdle( VkDevice _device) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_state state; struct anv_batch batch; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo *bo = NULL; - VkResult result; - int64_t timeout; - int ret; - state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); - bo = &device->dynamic_state_pool.block_pool->bo; - batch.start = batch.next = state.map; - batch.end = state.map + 32; + uint32_t cmds[8]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN7_MI_NOOP); - if (!device->info.has_llc) - anv_state_clflush(state); - - exec2_objects[0].handle = bo->gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo->offset; - exec2_objects[0].flags = 0; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = state.offset; - execbuf.batch_len = batch.next - state.map; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - ret = anv_gem_execbuffer(device, &execbuf); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); - goto fail; - } - - timeout = INT64_MAX; - ret = anv_gem_wait(device, bo->gem_handle, &timeout); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); - goto fail; - } - - anv_state_pool_free(&device->dynamic_state_pool, state); - - return VK_SUCCESS; - - fail: - anv_state_pool_free(&device->dynamic_state_pool, state); - - return result; + return submit_simple_batch(device, &batch); } VkResult -- cgit v1.2.3 From 6cdada0360145a620b0d7a279342f8bced87c902 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Feb 2016 16:11:12 -0800 Subject: anv: Move invariant state to small initial batch We use the simple batch helper to submit a batch at driver startup time which holds all the state that never changes. We don't have a whole lot and once we enable tesselation there'll be even less. Even so, it's a simple mechanism and reduces our steady state batch sizes a bit. --- src/vulkan/anv_device.c | 27 ++++++++++-- src/vulkan/anv_private.h | 7 ++++ src/vulkan/gen7_pipeline.c | 9 ---- src/vulkan/gen7_state.c | 26 ++++++++++++ src/vulkan/gen8_pipeline.c | 81 ----------------------------------- src/vulkan/gen8_state.c | 102 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 158 insertions(+), 94 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 91485a191c0..6f874b2d1ab 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -673,8 +673,9 @@ anv_device_init_border_colors(struct anv_device *device) border_colors); } -static VkResult -submit_simple_batch(struct anv_device *device, struct anv_batch *batch) +VkResult +anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch) { struct anv_state state; struct drm_i915_gem_execbuffer2 execbuf; @@ -685,6 +686,7 @@ submit_simple_batch(struct anv_device *device, struct anv_batch *batch) int64_t timeout; int ret; + /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); state = anv_state_pool_alloc(&device->dynamic_state_pool, MAX(size, 64), 32); bo = &device->dynamic_state_pool.block_pool->bo; @@ -702,7 +704,7 @@ submit_simple_batch(struct anv_device *device, struct anv_batch *batch) execbuf.buffers_ptr = (uintptr_t) exec2_objects; execbuf.buffer_count = 1; execbuf.batch_start_offset = state.offset; - execbuf.batch_len = batch->next - state.map; + execbuf.batch_len = size; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; @@ -814,6 +816,23 @@ VkResult anv_CreateDevice( anv_queue_init(device, &device->queue); + switch (device->info.gen) { + case 7: + if (!device->info.is_haswell) + result = gen7_init_device_state(device); + else + result = gen75_init_device_state(device); + break; + case 8: + result = gen8_init_device_state(device); + break; + case 9: + result = gen9_init_device_state(device); + break; + } + if (result != VK_SUCCESS) + goto fail_fd; + result = anv_device_init_meta(device); if (result != VK_SUCCESS) goto fail_fd; @@ -1006,7 +1025,7 @@ VkResult anv_DeviceWaitIdle( anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN7_MI_NOOP); - return submit_simple_batch(device, &batch); + return anv_device_submit_simple_batch(device, &batch); } VkResult diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 8e12792456b..188bd77ab25 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -662,6 +662,11 @@ struct anv_device { pthread_mutex_t mutex; }; +VkResult gen7_init_device_state(struct anv_device *device); +VkResult gen75_init_device_state(struct anv_device *device); +VkResult gen8_init_device_state(struct anv_device *device); +VkResult gen9_init_device_state(struct anv_device *device); + void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); void anv_gem_munmap(void *p, uint64_t size); @@ -738,6 +743,8 @@ void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); uint64_t anv_batch_emit_reloc(struct anv_batch *batch, void *location, struct anv_bo *bo, uint32_t offset); +VkResult anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch); struct anv_address { struct anv_bo *bo; diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 3fedd74f1ea..d4d6131794a 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -214,17 +214,8 @@ genX(graphics_pipeline_create)( gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, pCreateInfo->pMultisampleState); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS, - .StatisticsEnable = true); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_HS, .Enable = false); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_TE, .TEEnable = false); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false); - emit_urb_setup(pipeline); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); - const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState; diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 2375070636e..ee4b7f3a5c4 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -34,6 +34,32 @@ #include "genX_state_util.h" +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + GENX_FUNC(GEN7, GEN75) void genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 4097de177ae..389f7f797c0 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -247,76 +247,6 @@ emit_ms_state(struct anv_pipeline *pipeline, anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = sample_mask); - - /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and - * VkPhysicalDeviceFeatures::standardSampleLocations. - */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_PATTERN), - ._1xSample0XOffset = 0.5, - ._1xSample0YOffset = 0.5, - ._2xSample0XOffset = 0.25, - ._2xSample0YOffset = 0.25, - ._2xSample1XOffset = 0.75, - ._2xSample1YOffset = 0.75, - ._4xSample0XOffset = 0.375, - ._4xSample0YOffset = 0.125, - ._4xSample1XOffset = 0.875, - ._4xSample1YOffset = 0.375, - ._4xSample2XOffset = 0.125, - ._4xSample2YOffset = 0.625, - ._4xSample3XOffset = 0.625, - ._4xSample3YOffset = 0.875, - ._8xSample0XOffset = 0.5625, - ._8xSample0YOffset = 0.3125, - ._8xSample1XOffset = 0.4375, - ._8xSample1YOffset = 0.6875, - ._8xSample2XOffset = 0.8125, - ._8xSample2YOffset = 0.5625, - ._8xSample3XOffset = 0.3125, - ._8xSample3YOffset = 0.1875, - ._8xSample4XOffset = 0.1875, - ._8xSample4YOffset = 0.8125, - ._8xSample5XOffset = 0.0625, - ._8xSample5YOffset = 0.4375, - ._8xSample6XOffset = 0.6875, - ._8xSample6YOffset = 0.9375, - ._8xSample7XOffset = 0.9375, - ._8xSample7YOffset = 0.0625, -#if ANV_GEN >= 9 - ._16xSample0XOffset = 0.5625, - ._16xSample0YOffset = 0.5625, - ._16xSample1XOffset = 0.4375, - ._16xSample1YOffset = 0.3125, - ._16xSample2XOffset = 0.3125, - ._16xSample2YOffset = 0.6250, - ._16xSample3XOffset = 0.7500, - ._16xSample3YOffset = 0.4375, - ._16xSample4XOffset = 0.1875, - ._16xSample4YOffset = 0.3750, - ._16xSample5XOffset = 0.6250, - ._16xSample5YOffset = 0.8125, - ._16xSample6XOffset = 0.8125, - ._16xSample6YOffset = 0.6875, - ._16xSample7XOffset = 0.6875, - ._16xSample7YOffset = 0.1875, - ._16xSample8XOffset = 0.3750, - ._16xSample8YOffset = 0.8750, - ._16xSample9XOffset = 0.5000, - ._16xSample9YOffset = 0.0625, - ._16xSample10XOffset = 0.2500, - ._16xSample10YOffset = 0.1250, - ._16xSample11XOffset = 0.1250, - ._16xSample11YOffset = 0.7500, - ._16xSample12XOffset = 0.0000, - ._16xSample12YOffset = 0.5000, - ._16xSample13XOffset = 0.9375, - ._16xSample13YOffset = 0.2500, - ._16xSample14XOffset = 0.8750, - ._16xSample14YOffset = 0.9375, - ._16xSample15XOffset = 0.0625, - ._16xSample15YOffset = 0.0000, -#endif - ); } VkResult @@ -359,19 +289,8 @@ genX(graphics_pipeline_create)( emit_cb_state(pipeline, pCreateInfo->pColorBlendState, pCreateInfo->pMultisampleState); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), .FunctionEnable = false); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - emit_urb_setup(pipeline); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM_CHROMAKEY), - .ChromaKeyKillEnable = false); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .ClipEnable = true, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index c749cbfed0c..6b077df125a 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -34,6 +34,108 @@ #include "genX_state_util.h" +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if ANV_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); + + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + static const uint32_t isl_to_gen_multisample_layout[] = { [ISL_MSAA_LAYOUT_NONE] = MSS, -- cgit v1.2.3 From bdefaae2b92c15af4f2bff41b0e689325e762dc7 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Feb 2016 16:41:56 -0800 Subject: anv: Deduplicate anv_CmdDraw calls These were all duplicated between gen7_cmd_buffer.c and gen8_cmd_buffer.c. This commit consolidates both copies in genX_cmd_buffer.c. --- src/vulkan/anv_private.h | 5 ++ src/vulkan/gen7_cmd_buffer.c | 162 +------------------------------------- src/vulkan/gen8_cmd_buffer.c | 155 +----------------------------------- src/vulkan/genX_cmd_buffer.c | 182 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 313 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 188bd77ab25..022aae12007 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1274,6 +1274,11 @@ void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); +void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); + struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 7108d74ba65..8c563c9b5f1 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -371,8 +371,8 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -static void -cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; uint32_t *p; @@ -568,164 +568,6 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -static void -emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, - GENX(3DSTATE_VERTEX_BUFFERS)); - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, - &(struct GENX(VERTEX_BUFFER_STATE)) { - .VertexBufferIndex = 32, /* Reserved for this */ - .VertexBufferMemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = 0, - .BufferStartingAddress = { bo, offset }, - .EndAddress = { bo, offset + 8 }, - }); -} - -static void -emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, - uint32_t base_vertex, uint32_t base_instance) -{ - struct anv_state id_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); - - ((uint32_t *)id_state.map)[0] = base_vertex; - ((uint32_t *)id_state.map)[1] = base_instance; - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(id_state); - - emit_base_vertex_instance_bo(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); -} - -void genX(CmdDraw)( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, - .VertexAccessType = SEQUENTIAL, - .PrimitiveTopologyType = pipeline->topology, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void genX(CmdDrawIndexed)( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, - .VertexAccessType = RANDOM, - .PrimitiveTopologyType = pipeline->topology, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -void genX(CmdDrawIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL, - .PrimitiveTopologyType = pipeline->topology); -} - -void genX(CmdDrawIndexedIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM, - .PrimitiveTopologyType = pipeline->topology); -} - void genX(CmdDispatch)( VkCommandBuffer commandBuffer, uint32_t x, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 56d80e26eeb..b997a2ecf05 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -260,8 +260,8 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer) __emit_genx_sf_state(cmd_buffer); } -static void -cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; uint32_t *p; @@ -479,127 +479,6 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -static void -emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, - GENX(3DSTATE_VERTEX_BUFFERS)); - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, - &(struct GENX(VERTEX_BUFFER_STATE)) { - .VertexBufferIndex = 32, /* Reserved for this */ - .MemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = 0, - .BufferStartingAddress = { bo, offset }, - .BufferSize = 8 - }); -} - -static void -emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, - uint32_t base_vertex, uint32_t base_instance) -{ - struct anv_state id_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); - - ((uint32_t *)id_state.map)[0] = base_vertex; - ((uint32_t *)id_state.map)[1] = base_instance; - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(id_state); - - emit_base_vertex_instance_bo(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); -} - -void genX(CmdDraw)( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .VertexAccessType = SEQUENTIAL, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void genX(CmdDrawIndexed)( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .VertexAccessType = RANDOM, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -void genX(CmdDrawIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - cmd_buffer_flush_state(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL); -} - void genX(CmdBindIndexBuffer)( VkCommandBuffer commandBuffer, VkBuffer _buffer, @@ -745,36 +624,6 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -void genX(CmdDrawIndexedIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - cmd_buffer_flush_state(cmd_buffer); - - /* TODO: We need to stomp base vertex to 0 somehow */ - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM); -} - void genX(CmdDispatch)( VkCommandBuffer commandBuffer, uint32_t x, diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 1ab17470932..e3fa043c66b 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -273,3 +273,185 @@ void genX(CmdPipelineBarrier)( dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd); } + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .AddressModifyEnable = true, + .BufferPitch = 0, +#if (ANV_GEN >= 8) + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .BufferSize = 8 +#else + .VertexBufferMemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .EndAddress = { bo, offset + 8 }, +#endif + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + /* TODO: We need to stomp base vertex to 0 somehow */ + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} -- cgit v1.2.3 From 6c4c04690f30af655ad675b590836d3a84440c3a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 5 Feb 2016 22:36:53 -0800 Subject: anv: Deduplicate dispatch calls This can all be shared between gen8+ and pre-gen8. --- src/vulkan/anv_private.h | 5 +++ src/vulkan/gen7_cmd_buffer.c | 100 +------------------------------------------ src/vulkan/gen8_cmd_buffer.c | 92 +-------------------------------------- src/vulkan/genX_cmd_buffer.c | 80 ++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 188 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 022aae12007..d26c0b2b88d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1279,6 +1279,11 @@ void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); + struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 8c563c9b5f1..e843b942b1d 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -326,25 +326,8 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - -static void -cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; @@ -568,85 +551,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -void genX(CmdDispatch)( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - if (prog_data->uses_num_work_groups) { - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); - uint32_t *sizes = state.map; - sizes[0] = x; - sizes[1] = y; - sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - cmd_buffer->state.num_workgroups_offset = state.offset; - cmd_buffer->state.num_workgroups_bo = - &cmd_buffer->device->dynamic_state_block_pool.bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void genX(CmdDispatchIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - if (prog_data->uses_num_work_groups) { - cmd_buffer->state.num_workgroups_offset = bo_offset; - cmd_buffer->state.num_workgroups_bo = bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b997a2ecf05..3ba5bbcf4a5 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -146,15 +146,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } #endif -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - static void emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) { @@ -577,8 +568,8 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; @@ -624,85 +615,6 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -void genX(CmdDispatch)( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - if (prog_data->uses_num_work_groups) { - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); - uint32_t *sizes = state.map; - sizes[0] = x; - sizes[1] = y; - sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - cmd_buffer->state.num_workgroups_offset = state.offset; - cmd_buffer->state.num_workgroups_bo = - &cmd_buffer->device->dynamic_state_block_pool.bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void genX(CmdDispatchIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - if (prog_data->uses_num_work_groups) { - cmd_buffer->state.num_workgroups_offset = bo_offset; - cmd_buffer->state.num_workgroups_bo = bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index e3fa043c66b..923f2086717 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -455,3 +455,83 @@ void genX(CmdDrawIndexedIndirect)( .VertexAccessType = RANDOM, .PrimitiveTopologyType = pipeline->topology); } + + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} -- cgit v1.2.3 From f50a6517267230befe7ca5a5fbf064a5d1153f8e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 1 Feb 2016 14:59:01 -0800 Subject: nir/spirv: Create integer types of correct signedness. vtn_handle_type() creates a signed type regardless of the value of the signedness flag, which usually doesn't make much of a difference except when the type is used as base sampled type of an image type, what will cause the base type of the NIR image variable to be inconsistent with its format and cause an assertion failure in the back-end (most likely only reproducible on Gen7), and may change the semantics of the image intrinsic subtly (e.g. UMIN may become IMIN). --- src/compiler/nir/spirv/spirv_to_nir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index c002457ce12..ee39b333c1a 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -594,9 +594,11 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeBool: val->type->type = glsl_bool_type(); break; - case SpvOpTypeInt: - val->type->type = glsl_int_type(); + case SpvOpTypeInt: { + const bool signedness = w[3]; + val->type->type = (signedness ? glsl_int_type() : glsl_uint_type()); break; + } case SpvOpTypeFloat: val->type->type = glsl_float_type(); break; -- cgit v1.2.3 From cec6fe2ad85717a438c80aaf4f1d3da35e4e5fd1 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 28 Jan 2016 18:59:00 -0800 Subject: vtn: Clean up acos implementation. Parameterize build_asin() on the fit coefficients so the implementation can be shared while still using different polynomials for asin and acos. Also switch back to implementing acos in terms of asin -- The improvement obtained from cancelling out the pi/2 terms was negligible compared to the approximation error. --- src/compiler/nir/spirv/vtn_glsl450.c | 44 +++++++++++++----------------------- 1 file changed, 16 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c index bc38aa4b1be..4fceffa37a6 100644 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@ -208,12 +208,19 @@ build_log(nir_builder *b, nir_ssa_def *x) return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); } +/** + * Approximate asin(x) by the formula: + * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1)))) + * + * which is correct to first order at x=0 and x=±1 regardless of the p + * coefficients but can be made second-order correct at both ends by selecting + * the fit coefficients appropriately. Different p coefficients can be used + * in the asin and acos implementation to minimize some relative error metric + * in each case. + */ static nir_ssa_def * -build_asin(nir_builder *b, nir_ssa_def *x) +build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1) { - /* - * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955))) - */ nir_ssa_def *abs_x = nir_fabs(b, x); return nir_fmul(b, nir_fsign(b, x), nir_fsub(b, nir_imm_float(b, M_PI_2f), @@ -222,29 +229,9 @@ build_asin(nir_builder *b, nir_ssa_def *x) nir_fmul(b, abs_x, nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, 0.086566724f), + nir_fadd(b, nir_imm_float(b, p0), nir_fmul(b, abs_x, - nir_imm_float(b, -0.03102955f)))))))))); -} - -static nir_ssa_def * -build_acos(nir_builder *b, nir_ssa_def *x) -{ - /* - * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318))) - */ - nir_ssa_def *abs_x = nir_fabs(b, x); - nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), - nir_fadd(b, nir_imm_float(b, M_PI_2f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, 0.08132463f), - nir_fmul(b, abs_x, - nir_imm_float(b, -0.02363318f)))))))); - return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)), - nir_fsub(b, nir_imm_float(b, M_PI), poly), - poly); + nir_imm_float(b, p1)))))))))); } /** @@ -605,11 +592,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; case GLSLstd450Asin: - val->ssa->def = build_asin(nb, src[0]); + val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); return; case GLSLstd450Acos: - val->ssa->def = build_acos(nb, src[0]); + val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), + build_asin(nb, src[0], 0.08132463, -0.02363318)); return; case GLSLstd450Atan: -- cgit v1.2.3 From 4d037b551e1ac78bec22ce4472b3466d4b2b1f9a Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 8 Feb 2016 18:51:52 -0800 Subject: anv: Rename anv_format::surface_format -> isl_format Because that's what it is, an isl format. --- src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/anv_descriptor_set.c | 2 +- src/vulkan/anv_formats.c | 12 ++++++------ src/vulkan/anv_image.c | 4 ++-- src/vulkan/anv_meta.c | 2 +- src/vulkan/anv_private.h | 2 +- src/vulkan/anv_wsi_wayland.c | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index a0f9bab8e1e..6b1c31f0e2e 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -763,7 +763,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const struct anv_format *format = anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, - format->surface_format, bo_offset, 12, 1); + format->isl_format, bo_offset, 12, 1); bt_map[0] = surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index 0ddd4bf1bc2..d5e6286f3ee 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -551,7 +551,7 @@ void anv_UpdateDescriptorSets( const struct anv_format *format = anv_format_for_descriptor_type(write->descriptorType); - view->format = format->surface_format; + view->format = format->isl_format; view->bo = buffer->bo; view->offset = buffer->offset + write->pBufferInfo[j].offset; diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index e3c786ccca7..d96d730d378 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -33,7 +33,7 @@ [__vk_fmt] = { \ .vk_format = __vk_fmt, \ .name = #__vk_fmt, \ - .surface_format = __hw_fmt, \ + .isl_format = __hw_fmt, \ .isl_layout = &isl_format_layouts[__hw_fmt], \ .swizzle = __swizzle, \ __VA_ARGS__ \ @@ -259,7 +259,7 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, switch (aspect) { case VK_IMAGE_ASPECT_COLOR_BIT: - if (anv_fmt->surface_format == ISL_FORMAT_UNSUPPORTED) { + if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { return ISL_FORMAT_UNSUPPORTED; } else if (tiling == VK_IMAGE_TILING_OPTIMAL && !util_is_power_of_two(anv_fmt->isl_layout->bs)) { @@ -268,19 +268,19 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, * this by switching them over to RGBX or RGBA formats under the * hood. */ - enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->surface_format); + enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); if (rgbx != ISL_FORMAT_UNSUPPORTED) return rgbx; else - return isl_format_rgb_to_rgba(anv_fmt->surface_format); + return isl_format_rgb_to_rgba(anv_fmt->isl_format); } else { - return anv_fmt->surface_format; + return anv_fmt->isl_format; } case VK_IMAGE_ASPECT_DEPTH_BIT: case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): assert(anv_fmt->depth_format != 0); - return anv_fmt->surface_format; + return anv_fmt->isl_format; case VK_IMAGE_ASPECT_STENCIL_BIT: assert(anv_fmt->has_stencil); diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index da71406cf27..2ed654feee2 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -531,7 +531,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->base_mip = range->baseMipLevel; if (!isl_format_is_compressed(iview->format) && - isl_format_is_compressed(image->format->surface_format)) { + isl_format_is_compressed(image->format->isl_format)) { /* Scale the ImageView extent by the backing Image. This is used * internally when an uncompressed ImageView is created on a * compressed Image. The ImageView can therefore be used for copying @@ -660,7 +660,7 @@ anv_CreateBufferView(VkDevice _device, const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - view->format = format->surface_format; + view->format = format->isl_format; view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; view->range = pCreateInfo->range == VK_WHOLE_SIZE ? diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 72a927a08ee..2867e471a32 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -1334,7 +1334,7 @@ void anv_CmdCopyBufferToImage( uint32_t img_x = 0; uint32_t img_y = 0; uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->surface_format)) + if (isl_format_is_compressed(dest_image->format->isl_format)) isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, &dest_image->color_surface.isl, pRegions[r].imageSubresource.mipLevel, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index d26c0b2b88d..769f81c609b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1510,7 +1510,7 @@ struct anv_format_swizzle { struct anv_format { const VkFormat vk_format; const char *name; - enum isl_format surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ const struct isl_format_layout *isl_layout; uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ struct anv_format_swizzle swizzle; diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 31d5f836a9f..4ae594b10d4 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -60,7 +60,7 @@ wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) return; /* Don't add formats which aren't supported by the driver */ - if (anv_format_for_vk_format(format)->surface_format == + if (anv_format_for_vk_format(format)->isl_format == ISL_FORMAT_UNSUPPORTED) { return; } -- cgit v1.2.3 From 0a9306799380ff13dc21c0b7626a0e1a4d338d7d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 8 Feb 2016 18:48:41 -0800 Subject: isl: Add func isl_surf_get_depth_format() For depth surfaces, it gets the value for 3DSTATE_DEPTH_BUFFER.SurfaceFormat. --- src/isl/isl.c | 40 ++++++++++++++++++++++++++++++++++++++++ src/isl/isl.h | 10 ++++++++++ 2 files changed, 50 insertions(+) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index ec6323741e8..f5b2cd5b250 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1382,3 +1382,43 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, *x_offset_el = small_x_offset_el; *y_offset_el = small_y_offset_el; } + +uint32_t +isl_surf_get_depth_format(const struct isl_device *dev, + const struct isl_surf *surf) +{ + /* Support for separate stencil buffers began in gen5. Support for + * interleaved depthstencil buffers ceased in gen7. The intermediate gens, + * those that supported separate and interleaved stencil, were gen5 and + * gen6. + * + * For a list of all available formats, see the Sandybridge PRM >> Volume + * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface + * Format (p321). + */ + + assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); + + if (surf->usage & ISL_SURF_USAGE_STENCIL_BIT) + assert(ISL_DEV_GEN(dev) < 7); + + switch (surf->format) { + default: + unreachable("bad isl depth format"); + case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: + assert(ISL_DEV_GEN(dev) < 7); + return 0; /* D32_FLOAT_S8X24_UINT */ + case ISL_FORMAT_R32_FLOAT: + return 1; /* D32_FLOAT */ + case ISL_FORMAT_R24_UNORM_X8_TYPELESS: + if (surf->usage & ISL_SURF_USAGE_STENCIL_BIT) { + assert(ISL_DEV_GEN(dev) < 7); + return 2; /* D24_UNORM_S8_UINT */ + } else { + assert(ISL_DEV_GEN(dev) >= 5); + return 3; /* D24_UNORM_X8_UINT */ + } + case ISL_FORMAT_R16_UNORM: + return 5; /* D16_UNORM */ + } +} diff --git a/src/isl/isl.h b/src/isl/isl.h index bc7a315e8ae..3e0ff935948 100644 --- a/src/isl/isl.h +++ b/src/isl/isl.h @@ -1010,6 +1010,16 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_el, uint32_t *y_offset_el); +/** + * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat + * + * @pre surf->usage has ISL_SURF_USAGE_DEPTH_BIT + * @pre surf->format must be a valid format for depth surfaces + */ +uint32_t +isl_surf_get_depth_format(const struct isl_device *dev, + const struct isl_surf *surf); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From e6d3432c810fdc474c1a0e76eb27621568fd4f39 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 8 Feb 2016 19:07:10 -0800 Subject: anv: Replace anv_format::depth_format with ::has_depth isl now understands depth formats. We no longer need depth formats in the anv_format table. --- src/vulkan/anv_cmd_buffer.c | 2 +- src/vulkan/anv_formats.c | 18 +++++++++--------- src/vulkan/anv_image.c | 20 ++++++++++---------- src/vulkan/anv_private.h | 6 +++--- src/vulkan/gen7_cmd_buffer.c | 9 ++++----- src/vulkan/gen8_cmd_buffer.c | 11 +++++------ 6 files changed, 32 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 6b1c31f0e2e..bc6b3925cd2 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -177,7 +177,7 @@ anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, } } else { /* depthstencil attachment */ - if (att->format->depth_format && + if (att->format->has_depth && att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; } diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index d96d730d378..09cd8b9ddf9 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -159,13 +159,13 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), - fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .depth_format = D16_UNORM), - fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT), - fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT), - fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .depth_format = D16_UNORM, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), @@ -279,7 +279,7 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, case VK_IMAGE_ASPECT_DEPTH_BIT: case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): - assert(anv_fmt->depth_format != 0); + assert(anv_fmt->has_depth); return anv_fmt->isl_format; case VK_IMAGE_ASPECT_STENCIL_BIT: @@ -387,7 +387,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; } - if (anv_formats[format].depth_format) { + if (anv_formats[format].has_depth) { tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; } } else { diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2ed654feee2..9e7f236f851 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -221,7 +221,7 @@ anv_image_create(VkDevice _device, if (r != VK_SUCCESS) goto fail; } else { - if (image->format->depth_format) { + if (image->format->has_depth) { r = make_surface(device, image, create_info, VK_IMAGE_ASPECT_DEPTH_BIT); if (r != VK_SUCCESS) @@ -368,9 +368,9 @@ anv_validate_CreateImageView(VkDevice _device, /* Validate format. */ if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(!image->format->depth_format); + assert(!image->format->has_depth); assert(!image->format->has_stencil); - assert(!view_format_info->depth_format); + assert(!view_format_info->has_depth); assert(!view_format_info->has_stencil); assert(view_format_info->isl_layout->bs == image->format->isl_layout->bs); @@ -378,8 +378,8 @@ anv_validate_CreateImageView(VkDevice _device, assert((subresource->aspectMask & ~ds_flags) == 0); if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - assert(image->format->depth_format); - assert(view_format_info->depth_format); + assert(image->format->has_depth); + assert(view_format_info->has_depth); assert(view_format_info->isl_layout->bs == image->format->isl_layout->bs); } @@ -730,9 +730,9 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag * Meta attaches all destination surfaces as color render targets. Guess * what surface the Meta Dragons really want. */ - if (image->format->depth_format && image->format->has_stencil) { + if (image->format->has_depth && image->format->has_stencil) { return &image->depth_surface; - } else if (image->format->depth_format) { + } else if (image->format->has_depth) { return &image->depth_surface; } else if (image->format->has_stencil) { return &image->stencil_surface; @@ -741,13 +741,13 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag } break; case VK_IMAGE_ASPECT_DEPTH_BIT: - assert(image->format->depth_format); + assert(image->format->has_depth); return &image->depth_surface; case VK_IMAGE_ASPECT_STENCIL_BIT: assert(image->format->has_stencil); return &image->stencil_surface; case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - if (image->format->depth_format && image->format->has_stencil) { + if (image->format->has_depth && image->format->has_stencil) { /* FINISHME: The Vulkan spec (git a511ba2) requires support for * combined depth stencil formats. Specifically, it states: * @@ -760,7 +760,7 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag * stencil surfaces from the underlying surface. */ return &image->depth_surface; - } else if (image->format->depth_format) { + } else if (image->format->has_depth) { return &image->depth_surface; } else if (image->format->has_stencil) { return &image->stencil_surface; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 769f81c609b..b7d35f88ef0 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1512,8 +1512,8 @@ struct anv_format { const char *name; enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ const struct isl_format_layout *isl_layout; - uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ struct anv_format_swizzle swizzle; + bool has_depth; bool has_stencil; }; @@ -1527,13 +1527,13 @@ anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, static inline bool anv_format_is_color(const struct anv_format *format) { - return !format->depth_format && !format->has_stencil; + return !format->has_depth && !format->has_stencil; } static inline bool anv_format_is_depth_or_stencil(const struct anv_format *format) { - return format->depth_format || format->has_stencil; + return format->has_depth || format->has_stencil; } /** diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index e843b942b1d..4bec8a620c5 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -554,16 +554,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { + struct anv_device *device = cmd_buffer->device; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; - - /* XXX: isl needs to grow depth format support */ const struct anv_format *anv_format = iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - - const bool has_depth = iview && anv_format->depth_format; + const bool has_depth = iview && anv_format->has_depth; const bool has_stencil = iview && anv_format->has_stencil; /* Emit 3DSTATE_DEPTH_BUFFER */ @@ -573,7 +571,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .DepthWriteEnable = true, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = anv_format->depth_format, + .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, + &image->depth_surface.isl), .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 3ba5bbcf4a5..b81944c2156 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -618,16 +618,14 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { + struct anv_device *device = cmd_buffer->device; const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; - - /* XXX: isl needs to grow depth format support */ const struct anv_format *anv_format = iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - - const bool has_depth = iview && anv_format->depth_format; + const bool has_depth = iview && anv_format->has_depth; const bool has_stencil = iview && anv_format->has_stencil; /* FIXME: Implement the PMA stall W/A */ @@ -637,10 +635,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = anv_format->depth_format, + .DepthWriteEnable = true, .StencilWriteEnable = has_stencil, .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = anv_format->depth_format, + .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, + &image->depth_surface.isl), .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, -- cgit v1.2.3 From a485567d3a4c3ac95f329fc29b66e99b3a6adce0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Feb 2016 15:27:16 -0800 Subject: anv/WSI/X11: Use the right allocator for freeing swapchains --- src/vulkan/anv_wsi_x11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index acb4a60e65f..1e08bf31b4b 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -538,7 +538,7 @@ x11_swapchain_destroy(struct anv_swapchain *anv_chain, /* TODO: Delete images and free memory */ } - anv_free(NULL /* XXX: pAllocator */, chain); + anv_free2(&chain->base.device->alloc, pAllocator, chain); return VK_SUCCESS; } -- cgit v1.2.3 From b6c00bfb03b79ea01e6eaff916e21ed97200e330 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Feb 2016 16:56:21 -0800 Subject: nir: Rework function parameters --- src/compiler/nir/nir.c | 18 ++++++++++++++++++ src/compiler/nir/nir.h | 11 +++++++++-- src/compiler/nir/nir_clone.c | 7 ++++--- src/compiler/nir/nir_inline_functions.c | 13 +++++++++++-- src/compiler/nir/nir_print.c | 10 ++++++++++ src/compiler/nir/nir_sweep.c | 2 ++ src/compiler/nir/nir_validate.c | 14 ++++++++++---- src/compiler/nir/spirv/vtn_cfg.c | 13 +++---------- 8 files changed, 67 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 42a53f6f3db..4ab9fde871a 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -117,6 +117,10 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var) assert(!"nir_shader_add_variable cannot be used for local variables"); break; + case nir_var_param: + assert(!"nir_shader_add_variable cannot be used for function parameters"); + break; + case nir_var_global: exec_list_push_tail(&shader->globals, &var->node); break; @@ -307,6 +311,20 @@ nir_function_impl_create(nir_function *function) impl->params = ralloc_array(function->shader, nir_variable *, impl->num_params); + for (unsigned i = 0; i < impl->num_params; i++) { + impl->params[i] = rzalloc(function->shader, nir_variable); + impl->params[i]->type = function->params[i].type; + impl->params[i]->data.mode = nir_var_param; + impl->params[i]->data.location = i; + } + + if (!glsl_type_is_void(function->return_type)) { + impl->return_var = rzalloc(function->shader, nir_variable); + impl->return_var->type = function->return_type; + impl->return_var->data.mode = nir_var_param; + impl->return_var->data.location = -1; + } + return impl; } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f130e5e0eb1..7aba195fa69 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -89,7 +89,8 @@ typedef enum { nir_var_uniform, nir_var_shader_storage, nir_var_shared, - nir_var_system_value + nir_var_system_value, + nir_var_param, } nir_variable_mode; /** @@ -172,7 +173,7 @@ typedef struct nir_variable { * * \sa nir_variable_mode */ - nir_variable_mode mode:4; + nir_variable_mode mode:5; /** * Interpolation mode for shader inputs / outputs @@ -355,6 +356,12 @@ typedef struct nir_variable { #define nir_foreach_variable(var, var_list) \ foreach_list_typed(nir_variable, var, node, var_list) +static inline bool +nir_variable_is_global(const nir_variable *var) +{ + return var->data.mode != nir_var_local && var->data.mode != nir_var_param; +} + /** * Returns the bits in the inputs_read, outputs_written, or * system_values_read bitfield corresponding to this variable. diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index bc6df56b753..8ece50a69cb 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -106,7 +106,7 @@ remap_reg(clone_state *state, const nir_register *reg) static nir_variable * remap_var(clone_state *state, const nir_variable *var) { - return _lookup_ptr(state, var, var->data.mode != nir_var_local); + return _lookup_ptr(state, var, nir_variable_is_global(var)); } nir_constant * @@ -591,9 +591,10 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi) nfi->num_params = fi->num_params; nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); for (unsigned i = 0; i < fi->num_params; i++) { - nfi->params[i] = remap_local(state, fi->params[i]); + nfi->params[i] = clone_variable(state, fi->params[i]); } - nfi->return_var = remap_local(state, fi->return_var); + if (fi->return_var) + nfi->return_var = clone_variable(state, fi->return_var); assert(list_empty(&state->phi_srcs)); diff --git a/src/compiler/nir/nir_inline_functions.c b/src/compiler/nir/nir_inline_functions.c index 3cf83279053..b343eb735b1 100644 --- a/src/compiler/nir/nir_inline_functions.c +++ b/src/compiler/nir/nir_inline_functions.c @@ -69,12 +69,17 @@ inline_functions_block(nir_block *block, void *void_state) /* Add copies of all in parameters */ assert(call->num_params == callee_copy->num_params); for (unsigned i = 0; i < callee_copy->num_params; i++) { + nir_variable *param = callee_copy->params[i]; + + /* Turn it into a local variable */ + param->data.mode = nir_var_local; + exec_list_push_head(&b->impl->locals, ¶m->node); + /* Only in or inout parameters */ if (call->callee->params[i].param_type == nir_parameter_out) continue; - nir_copy_deref_var(b, nir_deref_var_create(b->shader, - callee_copy->params[i]), + nir_copy_deref_var(b, nir_deref_var_create(b->shader, param), call->params[i]); } @@ -97,6 +102,10 @@ inline_functions_block(nir_block *block, void *void_state) callee_copy->params[i])); } if (!glsl_type_is_void(call->callee->return_type)) { + /* Turn it into a local variable */ + callee_copy->return_var->data.mode = nir_var_local; + exec_list_push_head(&b->impl->locals, &callee_copy->return_var->node); + nir_copy_deref_var(b, call->return_deref, nir_deref_var_create(b->shader, callee_copy->return_var)); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index f36b91de6e0..276a948460c 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -943,6 +943,16 @@ print_function_impl(nir_function_impl *impl, print_state *state) fprintf(fp, "{\n"); + for (unsigned i = 0; i < impl->num_params; i++) { + fprintf(fp, "\t"); + print_var_decl(impl->params[i], state); + } + + if (impl->return_var) { + fprintf(fp, "\t"); + print_var_decl(impl->return_var, state); + } + nir_foreach_variable(var, &impl->locals) { fprintf(fp, "\t"); print_var_decl(var, state); diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c index 5c62154ec7f..b22f0f56569 100644 --- a/src/compiler/nir/nir_sweep.c +++ b/src/compiler/nir/nir_sweep.c @@ -119,6 +119,8 @@ sweep_impl(nir_shader *nir, nir_function_impl *impl) ralloc_steal(nir, impl); ralloc_steal(nir, impl->params); + for (unsigned i = 0; i < impl->num_params; i++) + ralloc_steal(nir, impl->params[i]); ralloc_steal(nir, impl->return_var); steal_list(nir, nir_variable, &impl->locals); steal_list(nir, nir_register, &impl->registers); diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 1a943d76314..00184cabe20 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -864,7 +864,7 @@ postvalidate_reg_decl(nir_register *reg, validate_state *state) static void validate_var_decl(nir_variable *var, bool is_global, validate_state *state) { - assert(is_global != (var->data.mode == nir_var_local)); + assert(is_global == nir_variable_is_global(var)); /* * TODO validate some things ir_validate.cpp does (requires more GLSL type @@ -933,13 +933,19 @@ validate_function_impl(nir_function_impl *impl, validate_state *state) assert(impl->cf_node.parent == NULL); assert(impl->num_params == impl->function->num_params); - for (unsigned i = 0; i < impl->num_params; i++) + for (unsigned i = 0; i < impl->num_params; i++) { assert(impl->params[i]->type == impl->function->params[i].type); + assert(impl->params[i]->data.location == i); + validate_var_decl(impl->params[i], false, state); + } - if (glsl_type_is_void(impl->function->return_type)) + if (glsl_type_is_void(impl->function->return_type)) { assert(impl->return_var == NULL); - else + } else { assert(impl->return_var->type == impl->function->return_type); + assert(impl->return_var->data.location == -1); + validate_var_decl(impl->return_var, false, state); + } assert(exec_list_is_empty(&impl->end_block->instr_list)); assert(impl->end_block->successors[0] == NULL); diff --git a/src/compiler/nir/spirv/vtn_cfg.c b/src/compiler/nir/spirv/vtn_cfg.c index 041408b1cfb..144aac315e5 100644 --- a/src/compiler/nir/spirv/vtn_cfg.c +++ b/src/compiler/nir/spirv/vtn_cfg.c @@ -73,10 +73,6 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, func->return_type = glsl_get_function_return_type(func_type); b->func->impl = nir_function_impl_create(func); - if (!glsl_type_is_void(func->return_type)) { - b->func->impl->return_var = - nir_local_variable_create(b->func->impl, func->return_type, "ret"); - } b->func_param_idx = 0; break; @@ -92,13 +88,10 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, vtn_push_value(b, w[2], vtn_value_type_access_chain); assert(b->func_param_idx < b->func->impl->num_params); - unsigned idx = b->func_param_idx++; + nir_variable *param = b->func->impl->params[b->func_param_idx++]; - nir_variable *param = - nir_local_variable_create(b->func->impl, - b->func->impl->function->params[idx].type, - val->name); - b->func->impl->params[idx] = param; + /* Name the parameter so it shows up nicely in NIR */ + param->name = ralloc_strdup(param, val->name); struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); vtn_var->mode = vtn_variable_mode_param; -- cgit v1.2.3 From de6c9c5f2e1e975d06e4d27cefc6af9ae53d75a7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Feb 2016 17:41:36 -0800 Subject: nir/inline_functions: Don't shadown variables when it isn't needed Previously, in order to get things working, we just always shadowed variables. Now, we rewrite derefs whenever it's safe to do so and only shadow if we have an in or out variable that we write or read to respectively. --- src/compiler/nir/nir_inline_functions.c | 154 +++++++++++++++++++++++++++----- 1 file changed, 131 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_inline_functions.c b/src/compiler/nir/nir_inline_functions.c index b343eb735b1..4a08dcc96e0 100644 --- a/src/compiler/nir/nir_inline_functions.c +++ b/src/compiler/nir/nir_inline_functions.c @@ -33,6 +33,106 @@ struct inline_functions_state { static bool inline_function_impl(nir_function_impl *impl, struct set *inlined); +static bool +rewrite_param_derefs_block(nir_block *block, void *void_state) +{ + nir_call_instr *call = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + for (unsigned i = 0; + i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) { + if (intrin->variables[i]->var->data.mode != nir_var_param) + continue; + + int param_idx = intrin->variables[i]->var->data.location; + + nir_deref_var *call_deref; + if (param_idx >= 0) { + assert(param_idx < call->callee->num_params); + call_deref = call->params[param_idx]; + } else { + call_deref = call->return_deref; + } + assert(call_deref); + + nir_deref_var *new_deref = nir_deref_as_var(nir_copy_deref(intrin, &call_deref->deref)); + nir_deref *new_tail = nir_deref_tail(&new_deref->deref); + new_tail->child = intrin->variables[i]->deref.child; + ralloc_steal(new_tail, new_tail->child); + intrin->variables[i] = new_deref; + } + } + + return true; +} + +static void +lower_param_to_local(nir_variable *param, nir_function_impl *impl, bool write) +{ + if (param->data.mode != nir_var_param) + return; + + nir_parameter_type param_type; + if (param->data.location >= 0) { + assert(param->data.location < impl->num_params); + param_type = impl->function->params[param->data.location].param_type; + } else { + /* Return variable */ + param_type = nir_parameter_out; + } + + if ((write && param_type == nir_parameter_in) || + (!write && param_type == nir_parameter_out)) { + /* In this case, we need a shadow copy. Turn it into a local */ + param->data.mode = nir_var_local; + exec_list_push_tail(&impl->locals, ¶m->node); + } +} + +static bool +lower_params_to_locals_block(nir_block *block, void *void_state) +{ + nir_function_impl *impl = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_store_var: + lower_param_to_local(intrin->variables[0]->var, impl, true); + break; + + case nir_intrinsic_copy_var: + lower_param_to_local(intrin->variables[0]->var, impl, true); + lower_param_to_local(intrin->variables[1]->var, impl, false); + break; + + case nir_intrinsic_load_var: + /* All other intrinsics which access variables (image_load_store) + * do so in a read-only fasion. + */ + for (unsigned i = 0; + i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) { + lower_param_to_local(intrin->variables[i]->var, impl, false); + } + break; + + default: + continue; + } + } + + return true; +} + static bool inline_functions_block(nir_block *block, void *void_state) { @@ -60,29 +160,40 @@ inline_functions_block(nir_block *block, void *void_state) nir_function_impl *callee_copy = nir_function_impl_clone(call->callee->impl); + callee_copy->function = call->callee; + + /* Add copies of all in parameters */ + assert(call->num_params == callee_copy->num_params); exec_list_append(&b->impl->locals, &callee_copy->locals); exec_list_append(&b->impl->registers, &callee_copy->registers); b->cursor = nir_before_instr(&call->instr); - /* Add copies of all in parameters */ - assert(call->num_params == callee_copy->num_params); + /* We now need to tie the two functions together using the + * parameters. There are two ways we do this: One is to turn the + * parameter into a local variable and do a shadow-copy. The other + * is to treat the parameter as a "proxy" and rewrite derefs to use + * the actual variable that comes from the call instruction. We + * implement both schemes. The first is needed in the case where we + * have an in parameter that we write or similar. The second case is + * needed for handling things such as images and uniforms properly. + */ + + /* Figure out when we need to lower to a shadow local */ + nir_foreach_block(callee_copy, lower_params_to_locals_block, callee_copy); for (unsigned i = 0; i < callee_copy->num_params; i++) { nir_variable *param = callee_copy->params[i]; - /* Turn it into a local variable */ - param->data.mode = nir_var_local; - exec_list_push_head(&b->impl->locals, ¶m->node); - - /* Only in or inout parameters */ - if (call->callee->params[i].param_type == nir_parameter_out) - continue; - - nir_copy_deref_var(b, nir_deref_var_create(b->shader, param), - call->params[i]); + if (param->data.mode == nir_var_local && + call->callee->params[i].param_type != nir_parameter_out) { + nir_copy_deref_var(b, nir_deref_var_create(b->shader, param), + call->params[i]); + } } + nir_foreach_block(callee_copy, rewrite_param_derefs_block, call); + /* Pluck the body out of the function and place it here */ nir_cf_list body; nir_cf_list_extract(&body, &callee_copy->body); @@ -93,19 +204,16 @@ inline_functions_block(nir_block *block, void *void_state) /* Add copies of all out parameters and the return */ assert(call->num_params == callee_copy->num_params); for (unsigned i = 0; i < callee_copy->num_params; i++) { - /* Only out or inout parameters */ - if (call->callee->params[i].param_type == nir_parameter_in) - continue; + nir_variable *param = callee_copy->params[i]; - nir_copy_deref_var(b, call->params[i], - nir_deref_var_create(b->shader, - callee_copy->params[i])); + if (param->data.mode == nir_var_local && + call->callee->params[i].param_type != nir_parameter_in) { + nir_copy_deref_var(b, call->params[i], + nir_deref_var_create(b->shader, param)); + } } - if (!glsl_type_is_void(call->callee->return_type)) { - /* Turn it into a local variable */ - callee_copy->return_var->data.mode = nir_var_local; - exec_list_push_head(&b->impl->locals, &callee_copy->return_var->node); - + if (!glsl_type_is_void(call->callee->return_type) && + callee_copy->return_var->data.mode == nir_var_local) { nir_copy_deref_var(b, call->return_deref, nir_deref_var_create(b->shader, callee_copy->return_var)); -- cgit v1.2.3 From 209820739bd3869b6c9464737525b4efae33b535 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Feb 2016 17:42:07 -0800 Subject: nir/spirv: Set the vtn_mode and interface type for sampler parameters --- src/compiler/nir/spirv/vtn_cfg.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_cfg.c b/src/compiler/nir/spirv/vtn_cfg.c index 144aac315e5..6a43ef8b2dd 100644 --- a/src/compiler/nir/spirv/vtn_cfg.c +++ b/src/compiler/nir/spirv/vtn_cfg.c @@ -87,19 +87,36 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_access_chain); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + assert(b->func_param_idx < b->func->impl->num_params); nir_variable *param = b->func->impl->params[b->func_param_idx++]; + assert(param->type == type->type); + /* Name the parameter so it shows up nicely in NIR */ param->name = ralloc_strdup(param, val->name); struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); - vtn_var->mode = vtn_variable_mode_param; - vtn_var->type = vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_var->type = type; vtn_var->var = param; vtn_var->chain.var = vtn_var; vtn_var->chain.length = 0; + struct vtn_type *without_array = type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; + + if (glsl_type_is_image(without_array->type)) { + vtn_var->mode = vtn_variable_mode_image; + param->interface_type = without_array->type; + } else if (glsl_type_is_sampler(without_array->type)) { + vtn_var->mode = vtn_variable_mode_sampler; + param->interface_type = without_array->type; + } else { + vtn_var->mode = vtn_variable_mode_param; + } + val->access_chain = &vtn_var->chain; break; } -- cgit v1.2.3 From 1d65abfa582a371558113f699ffbf16d60b64c90 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Feb 2016 22:12:18 -0800 Subject: nir/spirv: Better handle constant offsets in texture lookups --- src/compiler/nir/spirv/spirv_to_nir.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index ee39b333c1a..c0dd92c36a4 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -1287,6 +1287,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, unreachable("Unhandled opcode"); } + nir_constant *const_offset = NULL; + /* Now we need to handle some number of optional arguments */ if (idx < count) { uint32_t operands = w[idx++]; @@ -1310,8 +1312,12 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); } - if (operands & SpvImageOperandsOffsetMask || - operands & SpvImageOperandsConstOffsetMask) + if (operands & SpvImageOperandsConstOffsetMask) { + const_offset = + vtn_value(b, w[idx++], vtn_value_type_constant)->constant; + } + + if (operands & SpvImageOperandsOffsetMask) (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); if (operands & SpvImageOperandsConstOffsetsMask) @@ -1343,6 +1349,11 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->is_shadow = glsl_sampler_type_is_shadow(image_type); instr->is_new_style_shadow = instr->is_shadow; + if (const_offset) { + for (unsigned i = 0; i < 4; i++) + instr->const_offset[i] = const_offset->value.u[i]; + } + if (has_coord) { switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_1D: -- cgit v1.2.3 From bdab29a312cfad29dc05ed11adc9c96faaa4501d Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Feb 2016 11:21:23 -0800 Subject: isl: Add more assertions to isl_surf_get_depth_format() R16_UNORM and R32_FLOAT are illegal formats for interleaved depthstencil surfaces. --- src/isl/isl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/isl/isl.c b/src/isl/isl.c index f5b2cd5b250..27928fd0850 100644 --- a/src/isl/isl.c +++ b/src/isl/isl.c @@ -1397,9 +1397,11 @@ isl_surf_get_depth_format(const struct isl_device *dev, * Format (p321). */ + bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; + assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); - if (surf->usage & ISL_SURF_USAGE_STENCIL_BIT) + if (has_stencil) assert(ISL_DEV_GEN(dev) < 7); switch (surf->format) { @@ -1409,9 +1411,10 @@ isl_surf_get_depth_format(const struct isl_device *dev, assert(ISL_DEV_GEN(dev) < 7); return 0; /* D32_FLOAT_S8X24_UINT */ case ISL_FORMAT_R32_FLOAT: + assert(!has_stencil); return 1; /* D32_FLOAT */ case ISL_FORMAT_R24_UNORM_X8_TYPELESS: - if (surf->usage & ISL_SURF_USAGE_STENCIL_BIT) { + if (has_stencil) { assert(ISL_DEV_GEN(dev) < 7); return 2; /* D24_UNORM_S8_UINT */ } else { @@ -1419,6 +1422,7 @@ isl_surf_get_depth_format(const struct isl_device *dev, return 3; /* D24_UNORM_X8_UINT */ } case ISL_FORMAT_R16_UNORM: + assert(!has_stencil); return 5; /* D16_UNORM */ } } -- cgit v1.2.3 From 2f4bb00c2bdf56ae4db3810f049263dae42aecd5 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Feb 2016 12:35:39 -0800 Subject: anv/image: Fix choose_isl_surf_usage() Don't translate VkImageCreateInfo::usage into an isl_surf_usage bitmask. Instead, translate anv_image::usage, which is a superset of VkImageCreateInfo::usage. For-Issue: https://gitlab.khronos.org/vulkan/mesa/issues/26 --- src/vulkan/anv_image.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 9e7f236f851..74f1eca48f2 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -34,28 +34,27 @@ * anv_image_create_info. Exactly one bit must be set in \a aspect. */ static isl_surf_usage_flags_t -choose_isl_surf_usage(const struct anv_image_create_info *info, +choose_isl_surf_usage(VkImageUsageFlags vk_usage, VkImageAspectFlags aspect) { - const VkImageCreateInfo *vk_info = info->vk_info; isl_surf_usage_flags_t isl_flags = 0; /* FINISHME: Support aux surfaces */ isl_flags |= ISL_SURF_USAGE_DISABLE_AUX_BIT; - if (vk_info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) + if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; - if (vk_info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; - if (vk_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; - if (vk_info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) isl_flags |= ISL_SURF_USAGE_CUBE_BIT; - if (vk_info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { switch (aspect) { default: unreachable("bad VkImageAspect"); @@ -68,12 +67,12 @@ choose_isl_surf_usage(const struct anv_image_create_info *info, } } - if (vk_info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta implements transfers by sampling from the source image. */ isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; } - if (vk_info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { /* Meta implements transfers by rendering into the destination image. */ isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; } @@ -138,7 +137,7 @@ make_surface(const struct anv_device *dev, .samples = vk_info->samples, .min_alignment = 0, .min_pitch = 0, - .usage = choose_isl_surf_usage(anv_info, aspect), + .usage = choose_isl_surf_usage(image->usage, aspect), .tiling_flags = tiling_flags); /* isl_surf_init() will fail only if provided invalid input. Invalid input -- cgit v1.2.3 From c5e521f391d0b35f590f0fdc5eea3ac2e8abafe9 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Feb 2016 12:36:54 -0800 Subject: anv/image: Refactor choose_isl_surf_usage() - Rename local var isl_flags -> isl_usage. - Fix comment. --- src/vulkan/anv_image.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 74f1eca48f2..2cf9de7d184 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -30,54 +30,53 @@ #include "anv_private.h" /** - * The \a format argument is required and overrides any format found in struct - * anv_image_create_info. Exactly one bit must be set in \a aspect. + * Exactly one bit must be set in \a aspect. */ static isl_surf_usage_flags_t choose_isl_surf_usage(VkImageUsageFlags vk_usage, VkImageAspectFlags aspect) { - isl_surf_usage_flags_t isl_flags = 0; + isl_surf_usage_flags_t isl_usage = 0; /* FINISHME: Support aux surfaces */ - isl_flags |= ISL_SURF_USAGE_DISABLE_AUX_BIT; + isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) - isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) - isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) - isl_flags |= ISL_SURF_USAGE_CUBE_BIT; + isl_usage |= ISL_SURF_USAGE_CUBE_BIT; if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { switch (aspect) { default: unreachable("bad VkImageAspect"); case VK_IMAGE_ASPECT_DEPTH_BIT: - isl_flags |= ISL_SURF_USAGE_DEPTH_BIT; + isl_usage |= ISL_SURF_USAGE_DEPTH_BIT; break; case VK_IMAGE_ASPECT_STENCIL_BIT: - isl_flags |= ISL_SURF_USAGE_STENCIL_BIT; + isl_usage |= ISL_SURF_USAGE_STENCIL_BIT; break; } } if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta implements transfers by sampling from the source image. */ - isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; } if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { /* Meta implements transfers by rendering into the destination image. */ - isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; } - return isl_flags; + return isl_usage; } /** -- cgit v1.2.3 From 4c5dcccfba3c9d0e5c7302aa797ad8d31f18cf52 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 9 Feb 2016 12:41:08 -0800 Subject: anv/image: Fix usage for depthstencil images The tests assertion-failed in vkCmdClearDepthStencilImage because the isl surface lacked ISL_SURF_USAGE_DEPTH_BIT. Fixes: https://gitlab.khronos.org/vulkan/mesa/issues/26 Fixes: dEQP-VK.pipeline.timestamp.transfer_tests.host_stage_with_clear_depth_stencil_image_method Fixes: dEQP-VK.pipeline.timestamp.transfer_tests.transfer_stage_with_clear_depth_stencil_image_method --- src/vulkan/anv_image.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 2cf9de7d184..4ce997589c5 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -151,8 +151,12 @@ make_surface(const struct anv_device *dev, return VK_SUCCESS; } +/** + * Parameter @a format is required and overrides VkImageCreateInfo::format. + */ static VkImageUsageFlags -anv_image_get_full_usage(const VkImageCreateInfo *info) +anv_image_get_full_usage(const VkImageCreateInfo *info, + const struct anv_format *format) { VkImageUsageFlags usage = info->usage; @@ -168,10 +172,21 @@ anv_image_get_full_usage(const VkImageCreateInfo *info) } if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - /* Meta will transfer to the image by binding it as a color attachment, - * even if the image format is not a color format. + /* For non-clear transfer operations, meta will transfer to the image by + * binding it as a color attachment, even if the image format is not + * a color format. */ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (anv_format_is_depth_or_stencil(format)) { + /* vkCmdClearDepthStencilImage() only requires that + * VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does + * not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta + * clears the image, though, by binding it as a depthstencil + * attachment. + */ + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } } return usage; @@ -186,6 +201,7 @@ anv_image_create(VkDevice _device, ANV_FROM_HANDLE(anv_device, device, _device); const VkImageCreateInfo *pCreateInfo = create_info->vk_info; struct anv_image *image = NULL; + const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); VkResult r; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); @@ -206,14 +222,14 @@ anv_image_create(VkDevice _device, image->type = pCreateInfo->imageType; image->extent = pCreateInfo->extent; image->vk_format = pCreateInfo->format; - image->format = anv_format_for_vk_format(pCreateInfo->format); + image->format = format; image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arrayLayers; image->samples = pCreateInfo->samples; - image->usage = anv_image_get_full_usage(pCreateInfo); + image->usage = anv_image_get_full_usage(pCreateInfo, format); image->tiling = pCreateInfo->tiling; - if (likely(anv_format_is_color(image->format))) { + if (likely(anv_format_is_color(format))) { r = make_surface(device, image, create_info, VK_IMAGE_ASPECT_COLOR_BIT); if (r != VK_SUCCESS) -- cgit v1.2.3 From e15f7551d11f435e13d67a4e0066d1bab44b47ed Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Feb 2016 15:32:21 -0800 Subject: anv/apply_pipeline_layout: Use the new const_index helpers --- src/vulkan/anv_nir_apply_pipeline_layout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index 00ed7766acb..61dcf1d4c1b 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -114,8 +114,8 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); - uint32_t set = intrin->const_index[0]; - uint32_t binding = intrin->const_index[1]; + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); uint32_t surface_index = get_surface_index(set, binding, state); -- cgit v1.2.3 From e01dd59b73d5787bc95ce0dfa8dfc95ba10a7581 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Feb 2016 15:35:51 -0800 Subject: vtn: Use const_index helpers --- src/compiler/nir/spirv/spirv_to_nir.c | 2 +- src/compiler/nir/spirv/vtn_variables.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index c0dd92c36a4..a7efa196832 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -2048,7 +2048,7 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, nir_intrinsic_instr_create(b->shader, intrinsic_op); if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) - intrin->const_index[0] = w[1]; + nir_intrinsic_set_stream_id(intrin, w[1]); nir_builder_instr_insert(&b->nb, &intrin->instr); } diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c index 5ca24201498..31bf416ff5e 100644 --- a/src/compiler/nir/spirv/vtn_variables.c +++ b/src/compiler/nir/spirv/vtn_variables.c @@ -193,7 +193,7 @@ _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, intrin->num_components, NULL); inout->def = &intrin->dest.ssa; } else { - intrin->const_index[0] = (1 << intrin->num_components) - 1; + nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); intrin->src[0] = nir_src_for_ssa(inout->def); } @@ -392,7 +392,7 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, int src = 0; if (!load) { - instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */ + nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); instr->src[src++] = nir_src_for_ssa((*inout)->def); } @@ -400,8 +400,8 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, * constant block for now. */ if (op == nir_intrinsic_load_push_constant) { - instr->const_index[0] = 0; - instr->const_index[1] = 128; + nir_intrinsic_set_base(instr, 0); + nir_intrinsic_set_range(instr, 128); } if (index) -- cgit v1.2.3 From 09b3e30dc672d38e60aac1d279eb1db94c9b048e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Feb 2016 12:13:32 -0800 Subject: anv: Fix up spirv for new texture/sampler split stuff --- src/compiler/nir/spirv/spirv_to_nir.c | 24 +++++++++++++++++-- src/vulkan/anv_meta.c | 1 + src/vulkan/anv_meta_resolve.c | 1 + src/vulkan/anv_nir_apply_pipeline_layout.c | 38 ++++++++++++++---------------- 4 files changed, 42 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index a7efa196832..979357d67c8 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -1389,12 +1389,32 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, } nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); - instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); if (sampled.image) { nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); } else { - instr->texture = NULL; + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + } + + switch (instr->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + /* These operations require a sampler */ + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + break; + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_tg4: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + /* These don't */ + instr->sampler = NULL; + break; } nir_ssa_dest_init(&instr->instr, &instr->dest, diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 2867e471a32..46227c3407e 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -106,6 +106,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) tex->dest_type = nir_type_float; /* TODO */ tex->is_array = glsl_sampler_type_is_array(sampler_type); tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); tex->sampler = nir_deref_var_create(tex, sampler); nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index f1c985e04cf..5aa2fef76bd 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -152,6 +152,7 @@ build_nir_fs(uint32_t num_samples) nir_tex_instr *tex; tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->texture = nir_deref_var_create(tex, u_tex); tex->sampler = nir_deref_var_create(tex, u_tex); tex->sampler_dim = GLSL_SAMPLER_DIM_MS; tex->op = nir_texop_txf_ms; diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index 61dcf1d4c1b..ee93e40e76c 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -58,21 +58,18 @@ get_surface_index(unsigned set, unsigned binding, } static uint32_t -get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, +get_sampler_index(unsigned set, unsigned binding, struct apply_pipeline_layout_state *state) { assert(set < state->layout->num_sets); struct anv_descriptor_set_layout *set_layout = state->layout->set[set].layout; - assert(binding < set_layout->binding_count); - gl_shader_stage stage = state->shader->stage; - if (set_layout->binding[binding].stage[stage].sampler_index < 0) { - assert(tex_op == nir_texop_txf); - return 0; - } + assert(binding < set_layout->binding_count); + + assert(set_layout->binding[binding].stage[stage].sampler_index >= 0); uint32_t sampler_index = state->layout->set[set].stage[stage].sampler_start + @@ -188,29 +185,30 @@ static void lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) { /* No one should have come by and lowered it already */ - assert(tex->sampler); + assert(tex->texture); - nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler; tex->texture_index = - get_surface_index(tex_deref->var->data.descriptor_set, - tex_deref->var->data.binding, state); - lower_tex_deref(tex, tex_deref, &tex->texture_index, + get_surface_index(tex->texture->var->data.descriptor_set, + tex->texture->var->data.binding, state); + lower_tex_deref(tex, tex->texture, &tex->texture_index, nir_tex_src_texture_offset, state); - tex->sampler_index = - get_sampler_index(tex->sampler->var->data.descriptor_set, - tex->sampler->var->data.binding, tex->op, state); - lower_tex_deref(tex, tex->sampler, &tex->sampler_index, - nir_tex_src_sampler_offset, state); + if (tex->sampler) { + tex->sampler_index = + get_sampler_index(tex->sampler->var->data.descriptor_set, + tex->sampler->var->data.binding, state); + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + } /* The backend only ever uses this to mark used surfaces. We don't care * about that little optimization so it just needs to be non-zero. */ tex->texture_array_size = 1; - if (tex->texture) - cleanup_tex_deref(tex, tex->texture); - cleanup_tex_deref(tex, tex->sampler); + cleanup_tex_deref(tex, tex->texture); + if (tex->sampler) + cleanup_tex_deref(tex, tex->sampler); tex->texture = NULL; tex->sampler = NULL; } -- cgit v1.2.3 From 51c01e292c9a71958bb495d30397e82ba1a3d5d8 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 8 Feb 2016 16:21:09 -0800 Subject: anv: Generate pack headers from XML definition This huge commit switches us over to using a simple xml format (genxml) for defining our command streamer commands and a python script for generating the pack headers we use in the driver. --- src/vulkan/.gitignore | 1 + src/vulkan/Makefile.am | 9 +- src/vulkan/anv_batch_chain.c | 2 +- src/vulkan/anv_private.h | 2 +- src/vulkan/gen7.xml | 2519 ++++++++++ src/vulkan/gen75.xml | 2912 ++++++++++++ src/vulkan/gen75_pack.h | 8421 --------------------------------- src/vulkan/gen7_pack.h | 7003 ---------------------------- src/vulkan/gen7_state.c | 18 +- src/vulkan/gen8.xml | 3165 +++++++++++++ src/vulkan/gen8_cmd_buffer.c | 8 +- src/vulkan/gen8_pack.h | 9209 ------------------------------------ src/vulkan/gen8_pipeline.c | 8 +- src/vulkan/gen8_state.c | 2 +- src/vulkan/gen9.xml | 3469 ++++++++++++++ src/vulkan/gen9_pack.h | 9797 --------------------------------------- src/vulkan/genX_pipeline_util.h | 6 +- src/vulkan/gen_pack_header.py | 585 +++ 18 files changed, 12682 insertions(+), 34454 deletions(-) create mode 100644 src/vulkan/gen7.xml create mode 100644 src/vulkan/gen75.xml delete mode 100644 src/vulkan/gen75_pack.h delete mode 100644 src/vulkan/gen7_pack.h create mode 100644 src/vulkan/gen8.xml delete mode 100644 src/vulkan/gen8_pack.h create mode 100644 src/vulkan/gen9.xml delete mode 100644 src/vulkan/gen9_pack.h create mode 100755 src/vulkan/gen_pack_header.py (limited to 'src') diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 8f9477c4c63..2980dbfece3 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -5,3 +5,4 @@ /wayland-drm-protocol.c /wayland-drm-client-protocol.h /anv_icd.json +/gen*_pack.h \ No newline at end of file diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 58668c7cf88..36bea427c66 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -96,7 +96,11 @@ VULKAN_SOURCES = \ BUILT_SOURCES = \ anv_entrypoints.h \ - anv_entrypoints.c + anv_entrypoints.c \ + gen7_pack.h \ + gen75_pack.h \ + gen8_pack.h \ + gen9_pack.h libanv_gen7_la_SOURCES = \ genX_cmd_buffer.c \ @@ -158,6 +162,9 @@ anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ +%_pack.h : %.xml gen_pack_header.py + $(AM_V_GEN) $(srcdir)/gen_pack_header.py $< > $@ + CLEANFILES = $(BUILT_SOURCES) libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index d74c5995168..13a3faee233 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -420,7 +420,7 @@ emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, - .DwordLength = cmd_buffer->device->info.gen < 8 ? + .DWordLength = cmd_buffer->device->info.gen < 8 ? gen7_length : gen8_length, ._2ndLevelBatchBuffer = _1stlevelbatch, .AddressSpaceIndicator = ASI_PPGTT, diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index b7d35f88ef0..ad34772b081 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -792,7 +792,7 @@ __gen_combine_address(struct anv_batch *batch, void *location, void *__dst = anv_batch_emit_dwords(batch, n); \ struct cmd __template = { \ __anv_cmd_header(cmd), \ - .DwordLength = n - __anv_cmd_length_bias(cmd), \ + .DWordLength = n - __anv_cmd_length_bias(cmd), \ __VA_ARGS__ \ }; \ __anv_cmd_pack(cmd)(batch, __dst, &__template); \ diff --git a/src/vulkan/gen7.xml b/src/vulkan/gen7.xml new file mode 100644 index 00000000000..d717b144085 --- /dev/null +++ b/src/vulkan/gen7.xml @@ -0,0 +1,2519 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/vulkan/gen75.xml b/src/vulkan/gen75.xml new file mode 100644 index 00000000000..afdccd0f6a4 --- /dev/null +++ b/src/vulkan/gen75.xml @@ -0,0 +1,2912 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h deleted file mode 100644 index 7b098894ee2..00000000000 --- a/src/vulkan/gen75_pack.h +++ /dev/null @@ -1,8421 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - - -/* Instructions, enums and structures for HSW. - * - * This file has been generated, do not hand edit. - */ - -#pragma once - -#include -#include - -#ifndef __gen_validate_value -#define __gen_validate_value(x) -#endif - -#ifndef __gen_field_functions -#define __gen_field_functions - -union __gen_value { - float f; - uint32_t dw; -}; - -static inline uint64_t -__gen_mbo(uint32_t start, uint32_t end) -{ - return (~0ull >> (64 - (end - start + 1))) << start; -} - -static inline uint64_t -__gen_field(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - if (end - start + 1 < 64) - assert(v < 1ull << (end - start + 1)); -#endif - - return v << start; -} - -static inline uint64_t -__gen_fixed(float v, uint32_t start, uint32_t end, - bool is_signed, uint32_t fract_bits) -{ - __gen_validate_value(v); - - const float factor = (1 << fract_bits); - - float max, min; - if (is_signed) { - max = ((1 << (end - start)) - 1) / factor; - min = -(1 << (end - start)) / factor; - } else { - max = ((1 << (end - start + 1)) - 1) / factor; - min = 0.0f; - } - - if (v > max) - v = max; - else if (v < min) - v = min; - - int32_t int_val = roundf(v * factor); - - if (is_signed) - int_val &= (1 << (end - start + 1)) - 1; - - return int_val << start; -} - -static inline uint64_t -__gen_offset(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; - - assert((v & ~mask) == 0); -#endif - - return v; -} - -static inline uint32_t -__gen_float(float v) -{ - __gen_validate_value(v); - return ((union __gen_value) { .f = (v) }).dw; -} - -#ifndef __gen_address_type -#error #define __gen_address_type before including this file -#endif - -#ifndef __gen_user_data -#error #define __gen_combine_address before including this file -#endif - -#endif - -#define GEN75_3DSTATE_URB_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_URB_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 48, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_URB_VS_length 0x00000002 - -struct GEN75_3DSTATE_URB_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t VSURBStartingAddress; - uint32_t VSURBEntryAllocationSize; - uint32_t VSNumberofURBEntries; -}; - -static inline void -GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->VSURBStartingAddress, 25, 30) | - __gen_field(values->VSURBEntryAllocationSize, 16, 24) | - __gen_field(values->VSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN75_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 -#define GEN75_GPGPU_CSR_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 0 - -#define GEN75_GPGPU_CSR_BASE_ADDRESS_length 0x00000002 - -struct GEN75_GPGPU_CSR_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GPGPUCSRBaseAddress; -}; - -static inline void -GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GPGPU_CSR_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); - -} - -#define GEN75_MI_STORE_REGISTER_MEM_length_bias 0x00000002 -#define GEN75_MI_STORE_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 36, \ - .DwordLength = 1 - -#define GEN75_MI_STORE_REGISTER_MEM_length 0x00000003 - -struct GEN75_MI_STORE_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t PredicateEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->PredicateEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - -} - -#define GEN75_PIPELINE_SELECT_length_bias 0x00000001 -#define GEN75_PIPELINE_SELECT_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4 - -#define GEN75_PIPELINE_SELECT_length 0x00000001 - -struct GEN75_PIPELINE_SELECT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; -#define _3D 0 -#define Media 1 -#define GPGPU 2 - uint32_t PipelineSelection; -}; - -static inline void -GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PIPELINE_SELECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->PipelineSelection, 0, 1) | - 0; - -} - -#define GEN75_STATE_BASE_ADDRESS_length_bias 0x00000002 -#define GEN75_STATE_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 1, \ - .DwordLength = 8 - -#define GEN75_STATE_BASE_ADDRESS_length 0x0000000a - -#define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 - -struct GEN75_MEMORY_OBJECT_CONTROL_STATE { - uint32_t LLCeLLCCacheabilityControlLLCCC; - uint32_t L3CacheabilityControlL3CC; -}; - -static inline void -GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->LLCeLLCCacheabilityControlLLCCC, 1, 2) | - __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | - 0; - -} - -struct GEN75_STATE_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GeneralStateBaseAddress; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; - bool GeneralStateBaseAddressModifyEnable; - __gen_address_type SurfaceStateBaseAddress; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - bool SurfaceStateBaseAddressModifyEnable; - __gen_address_type DynamicStateBaseAddress; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - bool DynamicStateBaseAddressModifyEnable; - __gen_address_type IndirectObjectBaseAddress; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - bool IndirectObjectBaseAddressModifyEnable; - __gen_address_type InstructionBaseAddress; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - bool InstructionBaseAddressModifyEnable; - __gen_address_type GeneralStateAccessUpperBound; - bool GeneralStateAccessUpperBoundModifyEnable; - __gen_address_type DynamicStateAccessUpperBound; - bool DynamicStateAccessUpperBoundModifyEnable; - __gen_address_type IndirectObjectAccessUpperBound; - bool IndirectObjectAccessUpperBoundModifyEnable; - __gen_address_type InstructionAccessUpperBound; - bool InstructionAccessUpperBoundModifyEnable; -}; - -static inline void -GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_STATE_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_GeneralStateMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); - uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | - __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | - __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); - - uint32_t dw_SurfaceStateMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); - uint32_t dw2 = - __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | - __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); - - uint32_t dw_DynamicStateMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); - uint32_t dw3 = - __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | - __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); - - uint32_t dw_IndirectObjectMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); - uint32_t dw4 = - __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | - __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); - - uint32_t dw_InstructionMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); - uint32_t dw5 = - __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | - __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | - 0; - - dw[5] = - __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); - - uint32_t dw6 = - __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[6] = - __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); - - uint32_t dw7 = - __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[7] = - __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); - - uint32_t dw8 = - __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[8] = - __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); - - uint32_t dw9 = - __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[9] = - __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); - -} - -#define GEN75_STATE_PREFETCH_length_bias 0x00000002 -#define GEN75_STATE_PREFETCH_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN75_STATE_PREFETCH_length 0x00000002 - -struct GEN75_STATE_PREFETCH { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type PrefetchPointer; - uint32_t PrefetchCount; -}; - -static inline void -GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_STATE_PREFETCH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->PrefetchCount, 0, 2) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); - -} - -#define GEN75_STATE_SIP_length_bias 0x00000002 -#define GEN75_STATE_SIP_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2, \ - .DwordLength = 0 - -#define GEN75_STATE_SIP_length 0x00000002 - -struct GEN75_STATE_SIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SystemInstructionPointer; -}; - -static inline void -GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_STATE_SIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SystemInstructionPointer, 4, 31) | - 0; - -} - -#define GEN75_SWTESS_BASE_ADDRESS_length_bias 0x00000002 -#define GEN75_SWTESS_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN75_SWTESS_BASE_ADDRESS_length 0x00000002 - -struct GEN75_SWTESS_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type SWTessellationBaseAddress; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; -}; - -static inline void -GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SWTESS_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SWTessellationMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); - -} - -#define GEN75_3DPRIMITIVE_length_bias 0x00000002 -#define GEN75_3DPRIMITIVE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 3, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 5 - -#define GEN75_3DPRIMITIVE_length 0x00000007 - -struct GEN75_3DPRIMITIVE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool IndirectParameterEnable; - uint32_t UAVCoherencyRequired; - bool PredicateEnable; - uint32_t DwordLength; - bool EndOffsetEnable; -#define SEQUENTIAL 0 -#define RANDOM 1 - uint32_t VertexAccessType; - uint32_t PrimitiveTopologyType; - uint32_t VertexCountPerInstance; - uint32_t StartVertexLocation; - uint32_t InstanceCount; - uint32_t StartInstanceLocation; - uint32_t BaseVertexLocation; -}; - -static inline void -GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DPRIMITIVE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->UAVCoherencyRequired, 9, 9) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->EndOffsetEnable, 9, 9) | - __gen_field(values->VertexAccessType, 8, 8) | - __gen_field(values->PrimitiveTopologyType, 0, 5) | - 0; - - dw[2] = - __gen_field(values->VertexCountPerInstance, 0, 31) | - 0; - - dw[3] = - __gen_field(values->StartVertexLocation, 0, 31) | - 0; - - dw[4] = - __gen_field(values->InstanceCount, 0, 31) | - 0; - - dw[5] = - __gen_field(values->StartInstanceLocation, 0, 31) | - 0; - - dw[6] = - __gen_field(values->BaseVertexLocation, 0, 31) | - 0; - -} - -#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 -#define GEN75_3DSTATE_AA_LINE_PARAMETERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 - -struct GEN75_3DSTATE_AA_LINE_PARAMETERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float AACoverageBias; - float AACoverageSlope; - float AACoverageEndCapBias; - float AACoverageEndCapSlope; -}; - -static inline void -GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_AA_LINE_PARAMETERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | - __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | - 0; - - dw[2] = - __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | - __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | - 0; - -} - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 70 - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 - -#define GEN75_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 - -struct GEN75_BINDING_TABLE_EDIT_ENTRY { - uint32_t BindingTableIndex; - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BindingTableIndex, 16, 23) | - __gen_offset(values->SurfaceStatePointer, 0, 15) | - 0; - -} - -struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 68 - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 - -struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 69 - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 - -struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 71 - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 - -struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 67 - -#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 - -struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 40, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 - -struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSBindingTable; -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSBindingTable, 5, 15) | - 0; - -} - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 41, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 - -struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSBindingTable; -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSBindingTable, 5, 15) | - 0; - -} - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 39, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 - -struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSBindingTable; -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSBindingTable, 5, 15) | - 0; - -} - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 42, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 - -struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSBindingTable; -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSBindingTable, 5, 15) | - 0; - -} - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 38, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 - -struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSBindingTable; -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSBindingTable, 5, 15) | - 0; - -} - -#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 -#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000003 - -struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type BindingTablePoolBaseAddress; - uint32_t BindingTablePoolEnable; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; - __gen_address_type BindingTablePoolUpperBound; -}; - -static inline void -GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SurfaceObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); - uint32_t dw1 = - __gen_field(values->BindingTablePoolEnable, 11, 11) | - __gen_field(dw_SurfaceObjectControlState, 7, 10) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->BindingTablePoolUpperBound, dw2); - -} - -#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 -#define GEN75_3DSTATE_BLEND_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 36, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 - -struct GEN75_3DSTATE_BLEND_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BlendStatePointer; -}; - -static inline void -GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_BLEND_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->BlendStatePointer, 6, 31) | - __gen_mbo(0, 0) | - 0; - -} - -#define GEN75_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 -#define GEN75_3DSTATE_CC_STATE_POINTERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 14, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_CC_STATE_POINTERS_length 0x00000002 - -struct GEN75_3DSTATE_CC_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ColorCalcStatePointer; -}; - -static inline void -GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CC_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ColorCalcStatePointer, 6, 31) | - __gen_mbo(0, 0) | - 0; - -} - -#define GEN75_3DSTATE_CHROMA_KEY_length_bias 0x00000002 -#define GEN75_3DSTATE_CHROMA_KEY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 2 - -#define GEN75_3DSTATE_CHROMA_KEY_length 0x00000004 - -struct GEN75_3DSTATE_CHROMA_KEY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ChromaKeyTableIndex; - uint32_t ChromaKeyLowValue; - uint32_t ChromaKeyHighValue; -}; - -static inline void -GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CHROMA_KEY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ChromaKeyTableIndex, 30, 31) | - 0; - - dw[2] = - __gen_field(values->ChromaKeyLowValue, 0, 31) | - 0; - - dw[3] = - __gen_field(values->ChromaKeyHighValue, 0, 31) | - 0; - -} - -#define GEN75_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 -#define GEN75_3DSTATE_CLEAR_PARAMS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_CLEAR_PARAMS_length 0x00000003 - -struct GEN75_3DSTATE_CLEAR_PARAMS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t DepthClearValue; - bool DepthClearValueValid; -}; - -static inline void -GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CLEAR_PARAMS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DepthClearValue, 0, 31) | - 0; - - dw[2] = - __gen_field(values->DepthClearValueValid, 0, 0) | - 0; - -} - -#define GEN75_3DSTATE_CLIP_length_bias 0x00000002 -#define GEN75_3DSTATE_CLIP_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 2 - -#define GEN75_3DSTATE_CLIP_length 0x00000004 - -struct GEN75_3DSTATE_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t FrontWinding; - uint32_t VertexSubPixelPrecisionSelect; - bool EarlyCullEnable; -#define CULLMODE_BOTH 0 -#define CULLMODE_NONE 1 -#define CULLMODE_FRONT 2 -#define CULLMODE_BACK 3 - uint32_t CullMode; - bool ClipperStatisticsEnable; - uint32_t UserClipDistanceCullTestEnableBitmask; - bool ClipEnable; -#define APIMODE_OGL 0 - uint32_t APIMode; - bool ViewportXYClipTestEnable; - bool ViewportZClipTestEnable; - bool GuardbandClipTestEnable; - uint32_t UserClipDistanceClipTestEnableBitmask; -#define CLIPMODE_NORMAL 0 -#define CLIPMODE_REJECT_ALL 3 -#define CLIPMODE_ACCEPT_ALL 4 - uint32_t ClipMode; - bool PerspectiveDivideDisable; - bool NonPerspectiveBarycentricEnable; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleStripListProvokingVertexSelect; -#define Vertex0 0 -#define Vertex1 1 - uint32_t LineStripListProvokingVertexSelect; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleFanProvokingVertexSelect; - float MinimumPointWidth; - float MaximumPointWidth; - bool ForceZeroRTAIndexEnable; - uint32_t MaximumVPIndex; -}; - -static inline void -GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->FrontWinding, 20, 20) | - __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | - __gen_field(values->EarlyCullEnable, 18, 18) | - __gen_field(values->CullMode, 16, 17) | - __gen_field(values->ClipperStatisticsEnable, 10, 10) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - - dw[2] = - __gen_field(values->ClipEnable, 31, 31) | - __gen_field(values->APIMode, 30, 30) | - __gen_field(values->ViewportXYClipTestEnable, 28, 28) | - __gen_field(values->ViewportZClipTestEnable, 27, 27) | - __gen_field(values->GuardbandClipTestEnable, 26, 26) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | - __gen_field(values->ClipMode, 13, 15) | - __gen_field(values->PerspectiveDivideDisable, 9, 9) | - __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | - __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | - __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | - 0; - - dw[3] = - __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | - __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | - __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | - __gen_field(values->MaximumVPIndex, 0, 3) | - 0; - -} - -#define GEN75_3DSTATE_CONSTANT_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_CONSTANT_DS_length 0x00000007 - -#define GEN75_3DSTATE_CONSTANT_BODY_length 0x00000006 - -struct GEN75_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw_ConstantBufferObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - uint32_t dw2 = - __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); - - uint32_t dw4 = - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); - - uint32_t dw5 = - 0; - - dw[5] = - __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); - -} - -struct GEN75_3DSTATE_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN75_3DSTATE_CONSTANT_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 - -struct GEN75_3DSTATE_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN75_3DSTATE_CONSTANT_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 - -struct GEN75_3DSTATE_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN75_3DSTATE_CONSTANT_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 23, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 - -struct GEN75_3DSTATE_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN75_3DSTATE_CONSTANT_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 - -struct GEN75_3DSTATE_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN75_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN75_3DSTATE_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 5, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 - -struct GEN75_3DSTATE_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool DepthWriteEnable; - bool StencilWriteEnable; - bool HierarchicalDepthBufferEnable; -#define D32_FLOAT 1 -#define D24_UNORM_X8_UINT 3 -#define D16_UNORM 5 - uint32_t SurfaceFormat; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t Height; - uint32_t Width; - uint32_t LOD; -#define SURFTYPE_CUBEmustbezero 0 - uint32_t Depth; - uint32_t MinimumArrayElement; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; - uint32_t DepthCoordinateOffsetY; - uint32_t DepthCoordinateOffsetX; - uint32_t RenderTargetViewExtent; -}; - -static inline void -GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->DepthWriteEnable, 28, 28) | - __gen_field(values->StencilWriteEnable, 27, 27) | - __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | - __gen_field(values->SurfaceFormat, 18, 20) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[3] = - __gen_field(values->Height, 18, 31) | - __gen_field(values->Width, 4, 17) | - __gen_field(values->LOD, 0, 3) | - 0; - - uint32_t dw_DepthBufferObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); - dw[4] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->MinimumArrayElement, 10, 20) | - __gen_field(dw_DepthBufferObjectControlState, 0, 3) | - 0; - - dw[5] = - __gen_field(values->DepthCoordinateOffsetY, 16, 31) | - __gen_field(values->DepthCoordinateOffsetX, 0, 15) | - 0; - - dw[6] = - __gen_field(values->RenderTargetViewExtent, 21, 31) | - 0; - -} - -#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 -#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 37, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 - -struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDEPTH_STENCIL_STATE; -}; - -static inline void -GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | - __gen_mbo(0, 0) | - 0; - -} - -#define GEN75_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 -#define GEN75_3DSTATE_DRAWING_RECTANGLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 2 - -#define GEN75_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 - -struct GEN75_3DSTATE_DRAWING_RECTANGLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; -#define Legacy 0 -#define Core0Enabled 1 -#define Core1Enabled 2 - uint32_t CoreModeSelect; - uint32_t DwordLength; - uint32_t ClippedDrawingRectangleYMin; - uint32_t ClippedDrawingRectangleXMin; - uint32_t ClippedDrawingRectangleYMax; - uint32_t ClippedDrawingRectangleXMax; - uint32_t DrawingRectangleOriginY; - uint32_t DrawingRectangleOriginX; -}; - -static inline void -GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DRAWING_RECTANGLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->CoreModeSelect, 14, 15) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | - 0; - - dw[2] = - __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | - 0; - - dw[3] = - __gen_field(values->DrawingRectangleOriginY, 16, 31) | - __gen_field(values->DrawingRectangleOriginX, 0, 15) | - 0; - -} - -#define GEN75_3DSTATE_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 29, \ - .DwordLength = 4 - -#define GEN75_3DSTATE_DS_length 0x00000006 - -struct GEN75_3DSTATE_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleDomainPointDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool AccessesUAV; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t PatchURBEntryReadLength; - uint32_t PatchURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool ComputeWCoordinateEnable; - bool DSCacheDisable; - bool DSFunctionEnable; -}; - -static inline void -GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleDomainPointDispatch, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->AccessesUAV, 14, 14) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | - __gen_field(values->PatchURBEntryReadLength, 11, 17) | - __gen_field(values->PatchURBEntryReadOffset, 4, 9) | - 0; - - dw[5] = - __gen_field(values->MaximumNumberofThreads, 21, 29) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->ComputeWCoordinateEnable, 2, 2) | - __gen_field(values->DSCacheDisable, 1, 1) | - __gen_field(values->DSFunctionEnable, 0, 0) | - 0; - -} - -#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_GATHER_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 55 - -#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 - -#define GEN75_GATHER_CONSTANT_ENTRY_length 0x00000001 - -struct GEN75_GATHER_CONSTANT_ENTRY { - uint32_t ConstantBufferOffset; - uint32_t ChannelMask; - uint32_t BindingTableIndexOffset; -}; - -static inline void -GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->ConstantBufferOffset, 8, 15) | - __gen_field(values->ChannelMask, 4, 7) | - __gen_field(values->BindingTableIndexOffset, 0, 3) | - 0; - -} - -struct GEN75_3DSTATE_GATHER_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_GATHER_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 53 - -#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 - -struct GEN75_3DSTATE_GATHER_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_GATHER_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 54 - -#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 - -struct GEN75_3DSTATE_GATHER_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_GATHER_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 56 - -#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 - -struct GEN75_3DSTATE_GATHER_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9Enable; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9Enable, 4, 4) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_GATHER_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 52 - -#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 - -struct GEN75_3DSTATE_GATHER_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9Enable; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9Enable, 4, 4) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 -#define GEN75_3DSTATE_GATHER_POOL_ALLOC_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length 0x00000003 - -struct GEN75_3DSTATE_GATHER_POOL_ALLOC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GatherPoolBaseAddress; - bool GatherPoolEnable; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - __gen_address_type GatherPoolUpperBound; -}; - -static inline void -GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GATHER_POOL_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - uint32_t dw1 = - __gen_field(values->GatherPoolEnable, 11, 11) | - __gen_mbo(4, 5) | - __gen_field(dw_MemoryObjectControlState, 0, 3) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->GatherPoolUpperBound, dw2); - -} - -#define GEN75_3DSTATE_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_GS_length 0x00000007 - -struct GEN75_3DSTATE_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer; - uint32_t SingleProgramFlowSPF; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnableVME; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - uint32_t GSaccessesUAV; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t OutputVertexSize; - uint32_t OutputTopology; - uint32_t VertexURBEntryReadLength; - bool IncludeVertexHandles; - uint32_t VertexURBEntryReadOffset; - uint32_t DispatchGRFStartRegisterforURBData; - uint32_t MaximumNumberofThreads; - uint32_t ControlDataHeaderSize; - uint32_t InstanceControl; - uint32_t DefaultStreamID; -#define SINGLE 0 -#define DUAL_INSTANCE 1 -#define DUAL_OBJECT 2 - uint32_t DispatchMode; - uint32_t GSStatisticsEnable; - uint32_t GSInvocationsIncrementValue; - bool IncludePrimitiveID; - uint32_t Hint; -#define REORDER_LEADING 0 -#define REORDER_TRAILING 1 - uint32_t ReorderMode; - bool DiscardAdjacency; - bool GSEnable; -#define GSCTL_CUT 0 -#define GSCTL_SID 1 - uint32_t ControlDataFormat; - uint32_t SemaphoreHandle; -}; - -static inline void -GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleProgramFlowSPF, 31, 31) | - __gen_field(values->VectorMaskEnableVME, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->GSaccessesUAV, 12, 12) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->OutputVertexSize, 23, 28) | - __gen_field(values->OutputTopology, 17, 22) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->IncludeVertexHandles, 10, 10) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | - 0; - - dw[5] = - __gen_field(values->MaximumNumberofThreads, 24, 31) | - __gen_field(values->ControlDataHeaderSize, 20, 23) | - __gen_field(values->InstanceControl, 15, 19) | - __gen_field(values->DefaultStreamID, 13, 14) | - __gen_field(values->DispatchMode, 11, 12) | - __gen_field(values->GSStatisticsEnable, 10, 10) | - __gen_field(values->GSInvocationsIncrementValue, 5, 9) | - __gen_field(values->IncludePrimitiveID, 4, 4) | - __gen_field(values->Hint, 3, 3) | - __gen_field(values->ReorderMode, 2, 2) | - __gen_field(values->DiscardAdjacency, 1, 1) | - __gen_field(values->GSEnable, 0, 0) | - 0; - - dw[6] = - __gen_field(values->ControlDataFormat, 31, 31) | - __gen_offset(values->SemaphoreHandle, 0, 12) | - 0; - -} - -#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 - -struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; -}; - -static inline void -GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_HIER_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_HierarchicalDepthBufferObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); - dw[1] = - __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - -} - -#define GEN75_3DSTATE_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 27, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_HS_length 0x00000007 - -struct GEN75_3DSTATE_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t MaximumNumberofThreads; - bool Enable; - bool StatisticsEnable; - uint32_t InstanceCount; - uint32_t KernelStartPointer; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; - bool HSaccessesUAV; - bool IncludeVertexHandles; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t SemaphoreHandle; -}; - -static inline void -GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 12, 12) | - __gen_field(values->MaximumNumberofThreads, 0, 7) | - 0; - - dw[2] = - __gen_field(values->Enable, 31, 31) | - __gen_field(values->StatisticsEnable, 29, 29) | - __gen_field(values->InstanceCount, 0, 3) | - 0; - - dw[3] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[4] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[5] = - __gen_field(values->SingleProgramFlow, 27, 27) | - __gen_field(values->VectorMaskEnable, 26, 26) | - __gen_field(values->HSaccessesUAV, 25, 25) | - __gen_field(values->IncludeVertexHandles, 24, 24) | - __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[6] = - __gen_offset(values->SemaphoreHandle, 0, 12) | - 0; - -} - -#define GEN75_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 -#define GEN75_3DSTATE_INDEX_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_INDEX_BUFFER_length 0x00000003 - -struct GEN75_3DSTATE_INDEX_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; -#define INDEX_BYTE 0 -#define INDEX_WORD 1 -#define INDEX_DWORD 2 - uint32_t IndexFormat; - uint32_t DwordLength; - __gen_address_type BufferStartingAddress; - __gen_address_type BufferEndingAddress; -}; - -static inline void -GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_INDEX_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_MemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_MemoryObjectControlState, 12, 15) | - __gen_field(values->IndexFormat, 8, 9) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); - -} - -#define GEN75_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 -#define GEN75_3DSTATE_LINE_STIPPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 8, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_LINE_STIPPLE_length 0x00000003 - -struct GEN75_3DSTATE_LINE_STIPPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; - uint32_t CurrentRepeatCounter; - uint32_t CurrentStippleIndex; - uint32_t LineStipplePattern; - float LineStippleInverseRepeatCount; - uint32_t LineStippleRepeatCount; -}; - -static inline void -GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_LINE_STIPPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | - __gen_field(values->CurrentRepeatCounter, 21, 29) | - __gen_field(values->CurrentStippleIndex, 16, 19) | - __gen_field(values->LineStipplePattern, 0, 15) | - 0; - - dw[2] = - __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | - __gen_field(values->LineStippleRepeatCount, 0, 8) | - 0; - -} - -#define GEN75_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 -#define GEN75_3DSTATE_MONOFILTER_SIZE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_MONOFILTER_SIZE_length 0x00000002 - -struct GEN75_3DSTATE_MONOFILTER_SIZE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t MonochromeFilterWidth; - uint32_t MonochromeFilterHeight; -}; - -static inline void -GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_MONOFILTER_SIZE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->MonochromeFilterWidth, 3, 5) | - __gen_field(values->MonochromeFilterHeight, 0, 2) | - 0; - -} - -#define GEN75_3DSTATE_MULTISAMPLE_length_bias 0x00000002 -#define GEN75_3DSTATE_MULTISAMPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 13, \ - .DwordLength = 2 - -#define GEN75_3DSTATE_MULTISAMPLE_length 0x00000004 - -struct GEN75_3DSTATE_MULTISAMPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool MultiSampleEnable; -#define PIXLOC_CENTER 0 -#define PIXLOC_UL_CORNER 1 - uint32_t PixelLocation; -#define NUMSAMPLES_1 0 -#define NUMSAMPLES_4 2 -#define NUMSAMPLES_8 3 - uint32_t NumberofMultisamples; - float Sample3XOffset; - float Sample3YOffset; - float Sample2XOffset; - float Sample2YOffset; - float Sample1XOffset; - float Sample1YOffset; - float Sample0XOffset; - float Sample0YOffset; - float Sample7XOffset; - float Sample7YOffset; - float Sample6XOffset; - float Sample6YOffset; - float Sample5XOffset; - float Sample5YOffset; - float Sample4XOffset; - float Sample4YOffset; -}; - -static inline void -GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_MULTISAMPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->MultiSampleEnable, 5, 5) | - __gen_field(values->PixelLocation, 4, 4) | - __gen_field(values->NumberofMultisamples, 1, 3) | - 0; - - dw[2] = - __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[3] = - __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | - 0; - -} - -#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 -#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 - -struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PolygonStippleXOffset; - uint32_t PolygonStippleYOffset; -}; - -static inline void -GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PolygonStippleXOffset, 8, 12) | - __gen_field(values->PolygonStippleYOffset, 0, 4) | - 0; - -} - -#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 -#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 31 - -#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 - -struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PatternRow[32]; -}; - -static inline void -GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { - dw[j] = - __gen_field(values->PatternRow[i + 0], 0, 31) | - 0; - } - -} - -#define GEN75_3DSTATE_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 32, \ - .DwordLength = 6 - -#define GEN75_3DSTATE_PS_length 0x00000008 - -struct GEN75_3DSTATE_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer0; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlowSPF; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnableVME; - uint32_t SamplerCount; -#define FTZ 0 -#define RET 1 - uint32_t DenormalMode; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadPriority; -#define IEEE745 0 -#define Alt 1 - uint32_t FloatingPointMode; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t MaximumNumberofThreads; - uint32_t SampleMask; - bool PushConstantEnable; - bool AttributeEnable; - bool oMaskPresenttoRenderTarget; - bool RenderTargetFastClearEnable; - bool DualSourceBlendEnable; - bool RenderTargetResolveEnable; - bool PSAccessesUAV; -#define POSOFFSET_NONE 0 -#define POSOFFSET_CENTROID 2 -#define POSOFFSET_SAMPLE 3 - uint32_t PositionXYOffsetSelect; - bool _32PixelDispatchEnable; - bool _16PixelDispatchEnable; - bool _8PixelDispatchEnable; - uint32_t DispatchGRFStartRegisterforConstantSetupData0; - uint32_t DispatchGRFStartRegisterforConstantSetupData1; - uint32_t DispatchGRFStartRegisterforConstantSetupData2; - uint32_t KernelStartPointer1; - uint32_t KernelStartPointer2; -}; - -static inline void -GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer0, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleProgramFlowSPF, 31, 31) | - __gen_field(values->VectorMaskEnableVME, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->DenormalMode, 26, 26) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->RoundingMode, 14, 15) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->MaximumNumberofThreads, 23, 31) | - __gen_field(values->SampleMask, 12, 19) | - __gen_field(values->PushConstantEnable, 11, 11) | - __gen_field(values->AttributeEnable, 10, 10) | - __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | - __gen_field(values->RenderTargetFastClearEnable, 8, 8) | - __gen_field(values->DualSourceBlendEnable, 7, 7) | - __gen_field(values->RenderTargetResolveEnable, 6, 6) | - __gen_field(values->PSAccessesUAV, 5, 5) | - __gen_field(values->PositionXYOffsetSelect, 3, 4) | - __gen_field(values->_32PixelDispatchEnable, 2, 2) | - __gen_field(values->_16PixelDispatchEnable, 1, 1) | - __gen_field(values->_8PixelDispatchEnable, 0, 0) | - 0; - - dw[5] = - __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | - __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | - __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | - 0; - - dw[6] = - __gen_offset(values->KernelStartPointer1, 6, 31) | - 0; - - dw[7] = - __gen_offset(values->KernelStartPointer2, 6, 31) | - 0; - -} - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 - -struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 - -struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 - -struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 - -struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 - -struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN75_3DSTATE_RAST_MULTISAMPLE_length_bias 0x00000002 -#define GEN75_3DSTATE_RAST_MULTISAMPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 14, \ - .DwordLength = 4 - -#define GEN75_3DSTATE_RAST_MULTISAMPLE_length 0x00000006 - -struct GEN75_3DSTATE_RAST_MULTISAMPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define NRM_NUMRASTSAMPLES_1 0 -#define NRM_NUMRASTSAMPLES_2 1 -#define NRM_NUMRASTSAMPLES_4 2 -#define NRM_NUMRASTSAMPLES_8 3 -#define NRM_NUMRASTSAMPLES_16 4 - uint32_t NumberofRasterizationMultisamples; - float Sample3XOffset; - float Sample3YOffset; - float Sample2XOffset; - float Sample2YOffset; - float Sample1XOffset; - float Sample1YOffset; - float Sample0XOffset; - float Sample0YOffset; - float Sample7XOffset; - float Sample7YOffset; - float Sample6XOffset; - float Sample6YOffset; - float Sample5XOffset; - float Sample5YOffset; - float Sample4XOffset; - float Sample4YOffset; - float Sample11XOffset; - float Sample11YOffset; - float Sample10XOffset; - float Sample10YOffset; - float Sample9XOffset; - float Sample9YOffset; - float Sample8XOffset; - float Sample8YOffset; - float Sample15XOffset; - float Sample15YOffset; - float Sample14XOffset; - float Sample14YOffset; - float Sample13XOffset; - float Sample13YOffset; - float Sample12XOffset; - float Sample12YOffset; -}; - -static inline void -GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_RAST_MULTISAMPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->NumberofRasterizationMultisamples, 1, 3) | - 0; - - dw[2] = - __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[3] = - __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | - 0; - - dw[4] = - __gen_field(values->Sample11XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample11YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample10XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample10YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample9XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample9YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample8XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample8YOffset * (1 << 4), 0, 3) | - 0; - - dw[5] = - __gen_field(values->Sample15XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample15YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample14XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample14YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample13XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample13YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample12XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample12YOffset * (1 << 4), 0, 3) | - 0; - -} - -#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2 - -#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 - -#define GEN75_PALETTE_ENTRY_length 0x00000001 - -struct GEN75_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - -struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 12 - -#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 - -struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 45, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 - -struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSSamplerState; -}; - -static inline void -GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSSamplerState, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 46, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 - -struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSSamplerState; -}; - -static inline void -GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSSamplerState, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 44, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 - -struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSSamplerState; -}; - -static inline void -GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSSamplerState, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 47, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 - -struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSSamplerState; -}; - -static inline void -GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSSamplerState, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 43, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 - -struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSSamplerState; -}; - -static inline void -GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSSamplerState, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 -#define GEN75_3DSTATE_SAMPLE_MASK_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SAMPLE_MASK_length 0x00000002 - -struct GEN75_3DSTATE_SAMPLE_MASK { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SampleMask; -}; - -static inline void -GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SAMPLE_MASK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SampleMask, 0, 7) | - 0; - -} - -#define GEN75_3DSTATE_SBE_length_bias 0x00000002 -#define GEN75_3DSTATE_SBE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 31, \ - .DwordLength = 12 - -#define GEN75_3DSTATE_SBE_length 0x0000000e - -struct GEN75_3DSTATE_SBE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t AttributeSwizzleControlMode; - uint32_t NumberofSFOutputAttributes; - bool AttributeSwizzleEnable; -#define UPPERLEFT 0 -#define LOWERLEFT 1 - uint32_t PointSpriteTextureCoordinateOrigin; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - bool Attribute2n1ComponentOverrideW; - bool Attribute2n1ComponentOverrideZ; - bool Attribute2n1ComponentOverrideY; - bool Attribute2n1ComponentOverrideX; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t Attribute2n1ConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t Attribute2n1SwizzleSelect; - uint32_t Attribute2n1SourceAttribute; - bool Attribute2nComponentOverrideW; - bool Attribute2nComponentOverrideZ; - bool Attribute2nComponentOverrideY; - bool Attribute2nComponentOverrideX; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t Attribute2nConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t Attribute2nSwizzleSelect; - uint32_t Attribute2nSourceAttribute; - uint32_t PointSpriteTextureCoordinateEnable; - uint32_t ConstantInterpolationEnable310; - uint32_t Attribute7WrapShortestEnables; - uint32_t Attribute6WrapShortestEnables; - uint32_t Attribute5WrapShortestEnables; - uint32_t Attribute4WrapShortestEnables; - uint32_t Attribute3WrapShortestEnables; - uint32_t Attribute2WrapShortestEnables; - uint32_t Attribute1WrapShortestEnables; - uint32_t Attribute0WrapShortestEnables; - uint32_t Attribute15WrapShortestEnables; - uint32_t Attribute14WrapShortestEnables; - uint32_t Attribute13WrapShortestEnables; - uint32_t Attribute12WrapShortestEnables; - uint32_t Attribute11WrapShortestEnables; - uint32_t Attribute10WrapShortestEnables; - uint32_t Attribute9WrapShortestEnables; - uint32_t Attribute8WrapShortestEnables; -}; - -static inline void -GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SBE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AttributeSwizzleControlMode, 28, 28) | - __gen_field(values->NumberofSFOutputAttributes, 22, 27) | - __gen_field(values->AttributeSwizzleEnable, 21, 21) | - __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | - __gen_field(values->VertexURBEntryReadLength, 11, 15) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[2] = - __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | - __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | - __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | - __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | - __gen_field(values->Attribute2n1ConstantSource, 25, 26) | - __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | - __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | - __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | - __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | - __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | - __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | - __gen_field(values->Attribute2nConstantSource, 9, 10) | - __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | - __gen_field(values->Attribute2nSourceAttribute, 0, 4) | - 0; - - dw[10] = - __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | - 0; - - dw[11] = - __gen_field(values->ConstantInterpolationEnable310, 0, 31) | - 0; - - dw[12] = - __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | - __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | - __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | - __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | - __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | - __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | - __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | - __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | - 0; - - dw[13] = - __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | - __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | - __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | - __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | - __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | - __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | - __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | - __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | - 0; - -} - -#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 -#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 15, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 - -struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ScissorRectPointer; -}; - -static inline void -GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ScissorRectPointer, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_SF_length_bias 0x00000002 -#define GEN75_3DSTATE_SF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 5 - -#define GEN75_3DSTATE_SF_length 0x00000007 - -struct GEN75_3DSTATE_SF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define D32_FLOAT_S8X24_UINT 0 -#define D32_FLOAT 1 -#define D24_UNORM_S8_UINT 2 -#define D24_UNORM_X8_UINT 3 -#define D16_UNORM 5 - uint32_t DepthBufferSurfaceFormat; - bool LegacyGlobalDepthBiasEnable; - bool StatisticsEnable; - bool GlobalDepthOffsetEnableSolid; - bool GlobalDepthOffsetEnableWireframe; - bool GlobalDepthOffsetEnablePoint; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t FrontFaceFillMode; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t BackFaceFillMode; - bool ViewTransformEnable; - uint32_t FrontWinding; - bool AntiAliasingEnable; -#define CULLMODE_BOTH 0 -#define CULLMODE_NONE 1 -#define CULLMODE_FRONT 2 -#define CULLMODE_BACK 3 - uint32_t CullMode; - float LineWidth; - uint32_t LineEndCapAntialiasingRegionWidth; - bool LineStippleEnable; - bool ScissorRectangleEnable; - bool RTIndependentRasterizationEnable; - uint32_t MultisampleRasterizationMode; - bool LastPixelEnable; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleStripListProvokingVertexSelect; - uint32_t LineStripListProvokingVertexSelect; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleFanProvokingVertexSelect; -#define AALINEDISTANCE_TRUE 1 - uint32_t AALineDistanceMode; - uint32_t VertexSubPixelPrecisionSelect; - uint32_t UsePointWidthState; - float PointWidth; - float GlobalDepthOffsetConstant; - float GlobalDepthOffsetScale; - float GlobalDepthOffsetClamp; -}; - -static inline void -GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | - __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | - __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | - __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | - __gen_field(values->FrontFaceFillMode, 5, 6) | - __gen_field(values->BackFaceFillMode, 3, 4) | - __gen_field(values->ViewTransformEnable, 1, 1) | - __gen_field(values->FrontWinding, 0, 0) | - 0; - - dw[2] = - __gen_field(values->AntiAliasingEnable, 31, 31) | - __gen_field(values->CullMode, 29, 30) | - __gen_field(values->LineWidth * (1 << 7), 18, 27) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | - __gen_field(values->LineStippleEnable, 14, 14) | - __gen_field(values->ScissorRectangleEnable, 11, 11) | - __gen_field(values->RTIndependentRasterizationEnable, 10, 10) | - __gen_field(values->MultisampleRasterizationMode, 8, 9) | - 0; - - dw[3] = - __gen_field(values->LastPixelEnable, 31, 31) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | - __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | - __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | - __gen_field(values->AALineDistanceMode, 14, 14) | - __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | - __gen_field(values->UsePointWidthState, 11, 11) | - __gen_field(values->PointWidth * (1 << 3), 0, 10) | - 0; - - dw[4] = - __gen_float(values->GlobalDepthOffsetConstant) | - 0; - - dw[5] = - __gen_float(values->GlobalDepthOffsetScale) | - 0; - - dw[6] = - __gen_float(values->GlobalDepthOffsetClamp) | - 0; - -} - -#define GEN75_3DSTATE_SO_BUFFER_length_bias 0x00000002 -#define GEN75_3DSTATE_SO_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 2 - -#define GEN75_3DSTATE_SO_BUFFER_length 0x00000004 - -struct GEN75_3DSTATE_SO_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SOBufferIndex; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - __gen_address_type SurfaceEndAddress; -}; - -static inline void -GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SO_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SOBufferObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); - dw[1] = - __gen_field(values->SOBufferIndex, 29, 30) | - __gen_field(dw_SOBufferObjectControlState, 25, 28) | - __gen_field(values->SurfacePitch, 0, 11) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); - -} - -#define GEN75_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 -#define GEN75_3DSTATE_SO_DECL_LIST_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 23 - -#define GEN75_3DSTATE_SO_DECL_LIST_length 0x00000000 - -#define GEN75_SO_DECL_ENTRY_length 0x00000002 - -#define GEN75_SO_DECL_length 0x00000001 - -struct GEN75_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - -struct GEN75_SO_DECL_ENTRY { - struct GEN75_SO_DECL Stream3Decl; - struct GEN75_SO_DECL Stream2Decl; - struct GEN75_SO_DECL Stream1Decl; - struct GEN75_SO_DECL Stream0Decl; -}; - -static inline void -GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_Stream3Decl; - GEN75_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); - uint32_t dw_Stream2Decl; - GEN75_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); - uint32_t dw_Stream1Decl; - GEN75_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); - uint32_t dw_Stream0Decl; - GEN75_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - uint64_t qw0 = - __gen_field(dw_Stream3Decl, 48, 63) | - __gen_field(dw_Stream2Decl, 32, 47) | - __gen_field(dw_Stream1Decl, 16, 31) | - __gen_field(dw_Stream0Decl, 0, 15) | - 0; - - dw[0] = qw0; - dw[1] = qw0 >> 32; - -} - -struct GEN75_3DSTATE_SO_DECL_LIST { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StreamtoBufferSelects3; - uint32_t StreamtoBufferSelects2; - uint32_t StreamtoBufferSelects1; - uint32_t StreamtoBufferSelects0; - uint32_t NumEntries3; - uint32_t NumEntries2; - uint32_t NumEntries1; - uint32_t NumEntries0; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_SO_DECL_LIST * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->StreamtoBufferSelects3, 12, 15) | - __gen_field(values->StreamtoBufferSelects2, 8, 11) | - __gen_field(values->StreamtoBufferSelects1, 4, 7) | - __gen_field(values->StreamtoBufferSelects0, 0, 3) | - 0; - - dw[2] = - __gen_field(values->NumEntries3, 24, 31) | - __gen_field(values->NumEntries2, 16, 23) | - __gen_field(values->NumEntries1, 8, 15) | - __gen_field(values->NumEntries0, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 -#define GEN75_3DSTATE_STENCIL_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_STENCIL_BUFFER_length 0x00000003 - -struct GEN75_3DSTATE_STENCIL_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StencilBufferEnable; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; -}; - -static inline void -GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_STENCIL_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_StencilBufferObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); - dw[1] = - __gen_field(values->StencilBufferEnable, 31, 31) | - __gen_field(dw_StencilBufferObjectControlState, 25, 28) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - -} - -#define GEN75_3DSTATE_STREAMOUT_length_bias 0x00000002 -#define GEN75_3DSTATE_STREAMOUT_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 30, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_STREAMOUT_length 0x00000003 - -struct GEN75_3DSTATE_STREAMOUT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SOFunctionEnable; - uint32_t RenderingDisable; - uint32_t RenderStreamSelect; -#define LEADING 0 -#define TRAILING 1 - uint32_t ReorderMode; - bool SOStatisticsEnable; - uint32_t SOBufferEnable3; - uint32_t SOBufferEnable2; - uint32_t SOBufferEnable1; - uint32_t SOBufferEnable0; - uint32_t Stream3VertexReadOffset; - uint32_t Stream3VertexReadLength; - uint32_t Stream2VertexReadOffset; - uint32_t Stream2VertexReadLength; - uint32_t Stream1VertexReadOffset; - uint32_t Stream1VertexReadLength; - uint32_t Stream0VertexReadOffset; - uint32_t Stream0VertexReadLength; -}; - -static inline void -GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_STREAMOUT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SOFunctionEnable, 31, 31) | - __gen_field(values->RenderingDisable, 30, 30) | - __gen_field(values->RenderStreamSelect, 27, 28) | - __gen_field(values->ReorderMode, 26, 26) | - __gen_field(values->SOStatisticsEnable, 25, 25) | - __gen_field(values->SOBufferEnable3, 11, 11) | - __gen_field(values->SOBufferEnable2, 10, 10) | - __gen_field(values->SOBufferEnable1, 9, 9) | - __gen_field(values->SOBufferEnable0, 8, 8) | - 0; - - dw[2] = - __gen_field(values->Stream3VertexReadOffset, 29, 29) | - __gen_field(values->Stream3VertexReadLength, 24, 28) | - __gen_field(values->Stream2VertexReadOffset, 21, 21) | - __gen_field(values->Stream2VertexReadLength, 16, 20) | - __gen_field(values->Stream1VertexReadOffset, 13, 13) | - __gen_field(values->Stream1VertexReadLength, 8, 12) | - __gen_field(values->Stream0VertexReadOffset, 5, 5) | - __gen_field(values->Stream0VertexReadLength, 0, 4) | - 0; - -} - -#define GEN75_3DSTATE_TE_length_bias 0x00000002 -#define GEN75_3DSTATE_TE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 28, \ - .DwordLength = 2 - -#define GEN75_3DSTATE_TE_length 0x00000004 - -struct GEN75_3DSTATE_TE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define INTEGER 0 -#define ODD_FRACTIONAL 1 -#define EVEN_FRACTIONAL 2 - uint32_t Partitioning; -#define POINT 0 -#define OUTPUT_LINE 1 -#define OUTPUT_TRI_CW 2 -#define OUTPUT_TRI_CCW 3 - uint32_t OutputTopology; -#define QUAD 0 -#define TRI 1 -#define ISOLINE 2 - uint32_t TEDomain; -#define HW_TESS 0 -#define SW_TESS 1 - uint32_t TEMode; - bool TEEnable; - float MaximumTessellationFactorOdd; - float MaximumTessellationFactorNotOdd; -}; - -static inline void -GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_TE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Partitioning, 12, 13) | - __gen_field(values->OutputTopology, 8, 9) | - __gen_field(values->TEDomain, 4, 5) | - __gen_field(values->TEMode, 1, 2) | - __gen_field(values->TEEnable, 0, 0) | - 0; - - dw[2] = - __gen_float(values->MaximumTessellationFactorOdd) | - 0; - - dw[3] = - __gen_float(values->MaximumTessellationFactorNotOdd) | - 0; - -} - -#define GEN75_3DSTATE_URB_DS_length_bias 0x00000002 -#define GEN75_3DSTATE_URB_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 50, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_URB_DS_length 0x00000002 - -struct GEN75_3DSTATE_URB_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t DSURBStartingAddress; - uint32_t DSURBEntryAllocationSize; - uint32_t DSNumberofURBEntries; -}; - -static inline void -GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DSURBStartingAddress, 25, 30) | - __gen_field(values->DSURBEntryAllocationSize, 16, 24) | - __gen_field(values->DSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN75_3DSTATE_URB_GS_length_bias 0x00000002 -#define GEN75_3DSTATE_URB_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 51, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_URB_GS_length 0x00000002 - -struct GEN75_3DSTATE_URB_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t GSURBStartingAddress; - uint32_t GSURBEntryAllocationSize; - uint32_t GSNumberofURBEntries; -}; - -static inline void -GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->GSURBStartingAddress, 25, 30) | - __gen_field(values->GSURBEntryAllocationSize, 16, 24) | - __gen_field(values->GSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN75_3DSTATE_URB_HS_length_bias 0x00000002 -#define GEN75_3DSTATE_URB_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 49, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_URB_HS_length 0x00000002 - -struct GEN75_3DSTATE_URB_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t HSURBStartingAddress; - uint32_t HSURBEntryAllocationSize; - uint32_t HSNumberofURBEntries; -}; - -static inline void -GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_URB_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->HSURBStartingAddress, 25, 30) | - __gen_field(values->HSURBEntryAllocationSize, 16, 24) | - __gen_field(values->HSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN75_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 -#define GEN75_3DSTATE_VERTEX_BUFFERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 8 - -#define GEN75_3DSTATE_VERTEX_BUFFERS_length 0x00000000 - -#define GEN75_VERTEX_BUFFER_STATE_length 0x00000004 - -struct GEN75_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; -#define VERTEXDATA 0 -#define INSTANCEDATA 1 - uint32_t BufferAccessType; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; - uint32_t AddressModifyEnable; - bool NullVertexBuffer; - uint32_t VertexFetchInvalidate; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - __gen_address_type EndAddress; - uint32_t InstanceDataStepRate; -}; - -static inline void -GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_VertexBufferMemoryObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->BufferAccessType, 20, 20) | - __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->VertexFetchInvalidate, 12, 12) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->EndAddress, dw2); - - dw[3] = - __gen_field(values->InstanceDataStepRate, 0, 31) | - 0; - -} - -struct GEN75_3DSTATE_VERTEX_BUFFERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VERTEX_BUFFERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 -#define GEN75_3DSTATE_VERTEX_ELEMENTS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 9 - -#define GEN75_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 - -#define GEN75_VERTEX_ELEMENT_STATE_length 0x00000002 - -struct GEN75_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - bool Valid; - uint32_t SourceElementFormat; - bool EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN75_3DSTATE_VERTEX_ELEMENTS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VERTEX_ELEMENTS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_3DSTATE_VF_length_bias 0x00000002 -#define GEN75_3DSTATE_VF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 12, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_VF_length 0x00000002 - -struct GEN75_3DSTATE_VF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool IndexedDrawCutIndexEnable; - uint32_t DwordLength; - uint32_t CutIndex; -}; - -static inline void -GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CutIndex, 0, 31) | - 0; - -} - -#define GEN75_3DSTATE_VF_STATISTICS_length_bias 0x00000001 -#define GEN75_3DSTATE_VF_STATISTICS_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 11 - -#define GEN75_3DSTATE_VF_STATISTICS_length 0x00000001 - -struct GEN75_3DSTATE_VF_STATISTICS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool StatisticsEnable; -}; - -static inline void -GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VF_STATISTICS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->StatisticsEnable, 0, 0) | - 0; - -} - -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 35, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 - -struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t CCViewportPointer; -}; - -static inline void -GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->CCViewportPointer, 5, 31) | - 0; - -} - -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 33, \ - .DwordLength = 0 - -#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 - -struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SFClipViewportPointer; -}; - -static inline void -GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SFClipViewportPointer, 6, 31) | - 0; - -} - -#define GEN75_3DSTATE_VS_length_bias 0x00000002 -#define GEN75_3DSTATE_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 16, \ - .DwordLength = 4 - -#define GEN75_3DSTATE_VS_length 0x00000006 - -struct GEN75_3DSTATE_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleVertexDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnableVME; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool VSaccessesUAV; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBaseOffset; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterforURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool VertexCacheDisable; - bool VSFunctionEnable; -}; - -static inline void -GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleVertexDispatch, 31, 31) | - __gen_field(values->VectorMaskEnableVME, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->VSaccessesUAV, 12, 12) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[5] = - __gen_field(values->MaximumNumberofThreads, 23, 31) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->VertexCacheDisable, 1, 1) | - __gen_field(values->VSFunctionEnable, 0, 0) | - 0; - -} - -#define GEN75_3DSTATE_WM_length_bias 0x00000002 -#define GEN75_3DSTATE_WM_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 1 - -#define GEN75_3DSTATE_WM_length 0x00000003 - -struct GEN75_3DSTATE_WM { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool StatisticsEnable; - bool DepthBufferClear; - bool ThreadDispatchEnable; - bool DepthBufferResolveEnable; - bool HierarchicalDepthBufferResolveEnable; - bool LegacyDiamondLineRasterization; - bool PixelShaderKillPixel; -#define PSCDEPTH_OFF 0 -#define PSCDEPTH_ON 1 -#define PSCDEPTH_ON_GE 2 -#define PSCDEPTH_ON_LE 3 - uint32_t PixelShaderComputedDepthMode; -#define EDSC_NORMAL 0 -#define EDSC_PSEXEC 1 -#define EDSC_PREPS 2 - uint32_t EarlyDepthStencilControl; - bool PixelShaderUsesSourceDepth; - bool PixelShaderUsesSourceW; -#define INTERP_PIXEL 0 -#define INTERP_CENTROID 2 -#define INTERP_SAMPLE 3 - uint32_t PositionZWInterpolationMode; - uint32_t BarycentricInterpolationMode; - bool PixelShaderUsesInputCoverageMask; - uint32_t LineEndCapAntialiasingRegionWidth; - uint32_t LineAntialiasingRegionWidth; - bool RTIndependentRasterizationEnable; - bool PolygonStippleEnable; - bool LineStippleEnable; -#define RASTRULE_UPPER_LEFT 0 -#define RASTRULE_UPPER_RIGHT 1 - uint32_t PointRasterizationRule; -#define MSRASTMODE_OFF_PIXEL 0 -#define MSRASTMODE_OFF_PATTERN 1 -#define MSRASTMODE_ON_PIXEL 2 -#define MSRASTMODE_ON_PATTERN 3 - uint32_t MultisampleRasterizationMode; -#define MSDISPMODE_PERSAMPLE 0 -#define MSDISPMODE_PERPIXEL 1 - uint32_t MultisampleDispatchMode; -#define OFF 0 -#define ON 1 - uint32_t PSUAVonly; -}; - -static inline void -GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_3DSTATE_WM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StatisticsEnable, 31, 31) | - __gen_field(values->DepthBufferClear, 30, 30) | - __gen_field(values->ThreadDispatchEnable, 29, 29) | - __gen_field(values->DepthBufferResolveEnable, 28, 28) | - __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | - __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | - __gen_field(values->PixelShaderKillPixel, 25, 25) | - __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | - __gen_field(values->EarlyDepthStencilControl, 21, 22) | - __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | - __gen_field(values->PixelShaderUsesSourceW, 19, 19) | - __gen_field(values->PositionZWInterpolationMode, 17, 18) | - __gen_field(values->BarycentricInterpolationMode, 11, 16) | - __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | - __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | - __gen_field(values->RTIndependentRasterizationEnable, 5, 5) | - __gen_field(values->PolygonStippleEnable, 4, 4) | - __gen_field(values->LineStippleEnable, 3, 3) | - __gen_field(values->PointRasterizationRule, 2, 2) | - __gen_field(values->MultisampleRasterizationMode, 0, 1) | - 0; - - dw[2] = - __gen_field(values->MultisampleDispatchMode, 31, 31) | - __gen_field(values->PSUAVonly, 30, 30) | - 0; - -} - -#define GEN75_GPGPU_OBJECT_length_bias 0x00000002 -#define GEN75_GPGPU_OBJECT_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 4, \ - .DwordLength = 6 - -#define GEN75_GPGPU_OBJECT_length 0x00000008 - -struct GEN75_GPGPU_OBJECT { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - bool PredicateEnable; - uint32_t DwordLength; - uint32_t SharedLocalMemoryFixedOffset; - uint32_t InterfaceDescriptorOffset; - uint32_t SharedLocalMemoryOffset; - uint32_t EndofThreadGroup; -#define Slice0 0 -#define Slice1 1 - uint32_t SliceDestinationSelect; -#define HalfSlice1 2 -#define HalfSlice0 1 -#define EitherHalfSlice 0 - uint32_t HalfSliceDestinationSelect; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; - uint32_t ThreadGroupIDX; - uint32_t ThreadGroupIDY; - uint32_t ThreadGroupIDZ; - uint32_t ExecutionMask; -}; - -static inline void -GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GPGPU_OBJECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->SharedLocalMemoryOffset, 28, 31) | - __gen_field(values->EndofThreadGroup, 24, 24) | - __gen_field(values->SliceDestinationSelect, 19, 19) | - __gen_field(values->HalfSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 0, 31) | - 0; - - dw[4] = - __gen_field(values->ThreadGroupIDX, 0, 31) | - 0; - - dw[5] = - __gen_field(values->ThreadGroupIDY, 0, 31) | - 0; - - dw[6] = - __gen_field(values->ThreadGroupIDZ, 0, 31) | - 0; - - dw[7] = - __gen_field(values->ExecutionMask, 0, 31) | - 0; - -} - -#define GEN75_GPGPU_WALKER_length_bias 0x00000002 -#define GEN75_GPGPU_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcodeA = 5, \ - .DwordLength = 9 - -#define GEN75_GPGPU_WALKER_length 0x0000000b - -struct GEN75_GPGPU_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcodeA; - bool IndirectParameterEnable; - bool PredicateEnable; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; -#define SIMD8 0 -#define SIMD16 1 -#define SIMD32 2 - uint32_t SIMDSize; - uint32_t ThreadDepthCounterMaximum; - uint32_t ThreadHeightCounterMaximum; - uint32_t ThreadWidthCounterMaximum; - uint32_t ThreadGroupIDStartingX; - uint32_t ThreadGroupIDXDimension; - uint32_t ThreadGroupIDStartingY; - uint32_t ThreadGroupIDYDimension; - uint32_t ThreadGroupIDStartingZ; - uint32_t ThreadGroupIDZDimension; - uint32_t RightExecutionMask; - uint32_t BottomExecutionMask; -}; - -static inline void -GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_GPGPU_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcodeA, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->SIMDSize, 30, 31) | - __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | - __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | - __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | - 0; - - dw[3] = - __gen_field(values->ThreadGroupIDStartingX, 0, 31) | - 0; - - dw[4] = - __gen_field(values->ThreadGroupIDXDimension, 0, 31) | - 0; - - dw[5] = - __gen_field(values->ThreadGroupIDStartingY, 0, 31) | - 0; - - dw[6] = - __gen_field(values->ThreadGroupIDYDimension, 0, 31) | - 0; - - dw[7] = - __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | - 0; - - dw[8] = - __gen_field(values->ThreadGroupIDZDimension, 0, 31) | - 0; - - dw[9] = - __gen_field(values->RightExecutionMask, 0, 31) | - 0; - - dw[10] = - __gen_field(values->BottomExecutionMask, 0, 31) | - 0; - -} - -#define GEN75_MEDIA_CURBE_LOAD_length_bias 0x00000002 -#define GEN75_MEDIA_CURBE_LOAD_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 1, \ - .DwordLength = 2 - -#define GEN75_MEDIA_CURBE_LOAD_length 0x00000004 - -struct GEN75_MEDIA_CURBE_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t CURBETotalDataLength; - uint32_t CURBEDataStartAddress; -}; - -static inline void -GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_CURBE_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->CURBETotalDataLength, 0, 16) | - 0; - - dw[3] = - __gen_field(values->CURBEDataStartAddress, 0, 31) | - 0; - -} - -#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 -#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 2, \ - .DwordLength = 2 - -#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 - -struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorTotalLength; - uint32_t InterfaceDescriptorDataStartAddress; -}; - -static inline void -GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | - 0; - -} - -#define GEN75_MEDIA_OBJECT_length_bias 0x00000002 -#define GEN75_MEDIA_OBJECT_header \ - .CommandType = 3, \ - .MediaCommandPipeline = 2, \ - .MediaCommandOpcode = 1, \ - .MediaCommandSubOpcode = 0 - -#define GEN75_MEDIA_OBJECT_length 0x00000000 - -struct GEN75_MEDIA_OBJECT { - uint32_t CommandType; - uint32_t MediaCommandPipeline; - uint32_t MediaCommandOpcode; - uint32_t MediaCommandSubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; -#define Slice0 0 -#define Slice1 1 -#define EitherSlice 0 - uint32_t SliceDestinationSelect; -#define HalfSlice1 2 -#define HalfSlice0 1 -#define Eitherhalfslice 0 - uint32_t HalfSliceDestinationSelect; - uint32_t IndirectDataLength; - __gen_address_type IndirectDataStartAddress; - uint32_t ScoredboardY; - uint32_t ScoreboardX; - uint32_t ScoreboardColor; - bool ScoreboardMask; - /* variable length fields follow */ -}; - -static inline void -GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_OBJECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MediaCommandPipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->MediaCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->SliceDestinationSelect, 19, 19) | - __gen_field(values->HalfSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); - - dw[4] = - __gen_field(values->ScoredboardY, 16, 24) | - __gen_field(values->ScoreboardX, 0, 8) | - 0; - - dw[5] = - __gen_field(values->ScoreboardColor, 16, 19) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_MEDIA_OBJECT_PRT_length_bias 0x00000002 -#define GEN75_MEDIA_OBJECT_PRT_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 2, \ - .DwordLength = 14 - -#define GEN75_MEDIA_OBJECT_PRT_length 0x00000010 - -struct GEN75_MEDIA_OBJECT_PRT { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; - bool PRT_FenceNeeded; -#define Rootthreadqueue 0 -#define VFEstateflush 1 - uint32_t PRT_FenceType; - uint32_t InlineData[12]; -}; - -static inline void -GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_OBJECT_PRT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->PRT_FenceNeeded, 23, 23) | - __gen_field(values->PRT_FenceType, 22, 22) | - 0; - - dw[3] = - 0; - - for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { - dw[j] = - __gen_field(values->InlineData[i + 0], 0, 31) | - 0; - } - -} - -#define GEN75_MEDIA_OBJECT_WALKER_length_bias 0x00000002 -#define GEN75_MEDIA_OBJECT_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 3 - -#define GEN75_MEDIA_OBJECT_WALKER_length 0x00000000 - -struct GEN75_MEDIA_OBJECT_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; - bool ScoreboardMask; - bool DualMode; - bool Repel; - bool QuadMode; - uint32_t ColorCountMinusOne; - uint32_t MiddleLoopExtraSteps; - uint32_t LocalMidLoopUnitY; - uint32_t MidLoopUnitX; - uint32_t GlobalLoopExecCount; - uint32_t LocalLoopExecCount; - uint32_t BlockResolutionY; - uint32_t BlockResolutionX; - uint32_t LocalStartY; - uint32_t LocalStartX; - uint32_t LocalOuterLoopStrideY; - uint32_t LocalOuterLoopStrideX; - uint32_t LocalInnerLoopUnitY; - uint32_t LocalInnerLoopUnitX; - uint32_t GlobalResolutionY; - uint32_t GlobalResolutionX; - uint32_t GlobalStartY; - uint32_t GlobalStartX; - uint32_t GlobalOuterLoopStrideY; - uint32_t GlobalOuterLoopStrideX; - uint32_t GlobalInnerLoopUnitY; - uint32_t GlobalInnerLoopUnitX; - /* variable length fields follow */ -}; - -static inline void -GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_OBJECT_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 0, 31) | - 0; - - dw[4] = - 0; - - dw[5] = - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->DualMode, 31, 31) | - __gen_field(values->Repel, 30, 30) | - __gen_field(values->QuadMode, 29, 29) | - __gen_field(values->ColorCountMinusOne, 24, 27) | - __gen_field(values->MiddleLoopExtraSteps, 16, 20) | - __gen_field(values->LocalMidLoopUnitY, 12, 13) | - __gen_field(values->MidLoopUnitX, 8, 9) | - 0; - - dw[7] = - __gen_field(values->GlobalLoopExecCount, 16, 25) | - __gen_field(values->LocalLoopExecCount, 0, 9) | - 0; - - dw[8] = - __gen_field(values->BlockResolutionY, 16, 24) | - __gen_field(values->BlockResolutionX, 0, 8) | - 0; - - dw[9] = - __gen_field(values->LocalStartY, 16, 24) | - __gen_field(values->LocalStartX, 0, 8) | - 0; - - dw[10] = - 0; - - dw[11] = - __gen_field(values->LocalOuterLoopStrideY, 16, 25) | - __gen_field(values->LocalOuterLoopStrideX, 0, 9) | - 0; - - dw[12] = - __gen_field(values->LocalInnerLoopUnitY, 16, 25) | - __gen_field(values->LocalInnerLoopUnitX, 0, 9) | - 0; - - dw[13] = - __gen_field(values->GlobalResolutionY, 16, 24) | - __gen_field(values->GlobalResolutionX, 0, 8) | - 0; - - dw[14] = - __gen_field(values->GlobalStartY, 16, 25) | - __gen_field(values->GlobalStartX, 0, 9) | - 0; - - dw[15] = - __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | - __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | - 0; - - dw[16] = - __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | - __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_MEDIA_STATE_FLUSH_length_bias 0x00000002 -#define GEN75_MEDIA_STATE_FLUSH_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 4, \ - .DwordLength = 0 - -#define GEN75_MEDIA_STATE_FLUSH_length 0x00000002 - -struct GEN75_MEDIA_STATE_FLUSH { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - bool DisablePreemption; - bool FlushtoGO; - uint32_t WatermarkRequired; - uint32_t InterfaceDescriptorOffset; -}; - -static inline void -GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_STATE_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->DisablePreemption, 8, 8) | - __gen_field(values->FlushtoGO, 7, 7) | - __gen_field(values->WatermarkRequired, 6, 6) | - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - -} - -#define GEN75_MEDIA_VFE_STATE_length_bias 0x00000002 -#define GEN75_MEDIA_VFE_STATE_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 0, \ - .DwordLength = 6 - -#define GEN75_MEDIA_VFE_STATE_length 0x00000008 - -struct GEN75_MEDIA_VFE_STATE { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; - uint32_t StackSize; - uint32_t PerThreadScratchSpace; - uint32_t MaximumNumberofThreads; - uint32_t NumberofURBEntries; -#define Maintainingtheexistingtimestampstate 0 -#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 - uint32_t ResetGatewayTimer; -#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 -#define BypassingOpenGatewayCloseGatewayprotocol 1 - uint32_t BypassGatewayControl; - uint32_t GPGPUMode; - uint32_t HalfSliceDisable; - uint32_t URBEntryAllocationSize; - uint32_t CURBEAllocationSize; -#define Scoreboarddisabled 0 -#define Scoreboardenabled 1 - uint32_t ScoreboardEnable; -#define StallingScoreboard 0 -#define NonStallingScoreboard 1 - uint32_t ScoreboardType; - uint32_t ScoreboardMask; - uint32_t Scoreboard3DeltaY; - uint32_t Scoreboard3DeltaX; - uint32_t Scoreboard2DeltaY; - uint32_t Scoreboard2DeltaX; - uint32_t Scoreboard1DeltaY; - uint32_t Scoreboard1DeltaX; - uint32_t Scoreboard0DeltaY; - uint32_t Scoreboard0DeltaX; - uint32_t Scoreboard7DeltaY; - uint32_t Scoreboard7DeltaX; - uint32_t Scoreboard6DeltaY; - uint32_t Scoreboard6DeltaX; - uint32_t Scoreboard5DeltaY; - uint32_t Scoreboard5DeltaX; - uint32_t Scoreboard4DeltaY; - uint32_t Scoreboard4DeltaX; -}; - -static inline void -GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MEDIA_VFE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->StackSize, 4, 7) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[2] = - __gen_field(values->MaximumNumberofThreads, 16, 31) | - __gen_field(values->NumberofURBEntries, 8, 15) | - __gen_field(values->ResetGatewayTimer, 7, 7) | - __gen_field(values->BypassGatewayControl, 6, 6) | - __gen_field(values->GPGPUMode, 2, 2) | - 0; - - dw[3] = - __gen_field(values->HalfSliceDisable, 0, 1) | - 0; - - dw[4] = - __gen_field(values->URBEntryAllocationSize, 16, 31) | - __gen_field(values->CURBEAllocationSize, 0, 15) | - 0; - - dw[5] = - __gen_field(values->ScoreboardEnable, 31, 31) | - __gen_field(values->ScoreboardType, 30, 30) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->Scoreboard3DeltaY, 28, 31) | - __gen_field(values->Scoreboard3DeltaX, 24, 27) | - __gen_field(values->Scoreboard2DeltaY, 20, 23) | - __gen_field(values->Scoreboard2DeltaX, 16, 19) | - __gen_field(values->Scoreboard1DeltaY, 12, 15) | - __gen_field(values->Scoreboard1DeltaX, 8, 11) | - __gen_field(values->Scoreboard0DeltaY, 4, 7) | - __gen_field(values->Scoreboard0DeltaX, 0, 3) | - 0; - - dw[7] = - __gen_field(values->Scoreboard7DeltaY, 28, 31) | - __gen_field(values->Scoreboard7DeltaX, 24, 27) | - __gen_field(values->Scoreboard6DeltaY, 20, 23) | - __gen_field(values->Scoreboard6DeltaX, 16, 19) | - __gen_field(values->Scoreboard5DeltaY, 12, 15) | - __gen_field(values->Scoreboard5DeltaX, 8, 11) | - __gen_field(values->Scoreboard4DeltaY, 4, 7) | - __gen_field(values->Scoreboard4DeltaX, 0, 3) | - 0; - -} - -#define GEN75_MI_ARB_CHECK_length_bias 0x00000001 -#define GEN75_MI_ARB_CHECK_header \ - .CommandType = 0, \ - .MICommandOpcode = 5 - -#define GEN75_MI_ARB_CHECK_length 0x00000001 - -struct GEN75_MI_ARB_CHECK { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_ARB_CHECK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN75_MI_ARB_ON_OFF_length_bias 0x00000001 -#define GEN75_MI_ARB_ON_OFF_header \ - .CommandType = 0, \ - .MICommandOpcode = 8 - -#define GEN75_MI_ARB_ON_OFF_length 0x00000001 - -struct GEN75_MI_ARB_ON_OFF { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool ArbitrationEnable; -}; - -static inline void -GEN75_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_ARB_ON_OFF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ArbitrationEnable, 0, 0) | - 0; - -} - -#define GEN75_MI_BATCH_BUFFER_END_length_bias 0x00000001 -#define GEN75_MI_BATCH_BUFFER_END_header \ - .CommandType = 0, \ - .MICommandOpcode = 10 - -#define GEN75_MI_BATCH_BUFFER_END_length 0x00000001 - -struct GEN75_MI_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN75_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN75_MI_BATCH_BUFFER_START_length_bias 0x00000002 -#define GEN75_MI_BATCH_BUFFER_START_header \ - .CommandType = 0, \ - .MICommandOpcode = 49, \ - .DwordLength = 0 - -#define GEN75_MI_BATCH_BUFFER_START_length 0x00000002 - -struct GEN75_MI_BATCH_BUFFER_START { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define _1stlevelbatch 0 -#define _2ndlevelbatch 1 - uint32_t _2ndLevelBatchBuffer; - bool AddOffsetEnable; - bool PredicationEnable; - uint32_t NonPrivileged; - bool ClearCommandBufferEnable; - bool ResourceStreamerEnable; -#define ASI_GGTT 0 -#define ASI_PPGTT 1 - uint32_t AddressSpaceIndicator; - uint32_t DwordLength; - __gen_address_type BatchBufferStartAddress; -}; - -static inline void -GEN75_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_BATCH_BUFFER_START * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | - __gen_field(values->AddOffsetEnable, 16, 16) | - __gen_field(values->PredicationEnable, 15, 15) | - __gen_field(values->NonPrivileged, 13, 13) | - __gen_field(values->ClearCommandBufferEnable, 11, 11) | - __gen_field(values->ResourceStreamerEnable, 10, 10) | - __gen_field(values->AddressSpaceIndicator, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); - -} - -#define GEN75_MI_CLFLUSH_length_bias 0x00000002 -#define GEN75_MI_CLFLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 39 - -#define GEN75_MI_CLFLUSH_length 0x00000000 - -struct GEN75_MI_CLFLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTT; - uint32_t DwordLength; - __gen_address_type PageBaseAddress; - uint32_t StartingCachelineOffset; - __gen_address_type PageBaseAddressHigh; - /* variable length fields follow */ -}; - -static inline void -GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_CLFLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - __gen_field(values->StartingCachelineOffset, 6, 11) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); - - /* variable length fields follow */ -} - -#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 -#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_header\ - .CommandType = 0, \ - .MICommandOpcode = 54, \ - .UseGlobalGTT = 0, \ - .CompareSemaphore = 0, \ - .DwordLength = 0 - -#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 - -struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; - uint32_t CompareSemaphore; - uint32_t DwordLength; - uint32_t CompareDataDword; - __gen_address_type CompareAddress; -}; - -static inline void -GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->CompareSemaphore, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CompareDataDword, 0, 31) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); - -} - -#define GEN75_MI_FLUSH_length_bias 0x00000001 -#define GEN75_MI_FLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 4 - -#define GEN75_MI_FLUSH_length 0x00000001 - -struct GEN75_MI_FLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool IndirectStatePointersDisable; - bool GenericMediaStateClear; -#define DontReset 0 -#define Reset 1 - bool GlobalSnapshotCountReset; -#define Flush 0 -#define DontFlush 1 - bool RenderCacheFlushInhibit; -#define DontInvalidate 0 -#define Invalidate 1 - bool StateInstructionCacheInvalidate; -}; - -static inline void -GEN75_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->IndirectStatePointersDisable, 5, 5) | - __gen_field(values->GenericMediaStateClear, 4, 4) | - __gen_field(values->GlobalSnapshotCountReset, 3, 3) | - __gen_field(values->RenderCacheFlushInhibit, 2, 2) | - __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | - 0; - -} - -#define GEN75_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 -#define GEN75_MI_LOAD_REGISTER_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 34, \ - .DwordLength = 1 - -#define GEN75_MI_LOAD_REGISTER_IMM_length 0x00000003 - -struct GEN75_MI_LOAD_REGISTER_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t ByteWriteDisables; - uint32_t DwordLength; - uint32_t RegisterOffset; - uint32_t DataDWord; -}; - -static inline void -GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_REGISTER_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ByteWriteDisables, 8, 11) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterOffset, 2, 22) | - 0; - - dw[2] = - __gen_field(values->DataDWord, 0, 31) | - 0; - -} - -#define GEN75_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 -#define GEN75_MI_LOAD_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 41, \ - .DwordLength = 1 - -#define GEN75_MI_LOAD_REGISTER_MEM_length 0x00000003 - -struct GEN75_MI_LOAD_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t AsyncModeEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->AsyncModeEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - -} - -#define GEN75_MI_LOAD_REGISTER_REG_length_bias 0x00000002 -#define GEN75_MI_LOAD_REGISTER_REG_header \ - .CommandType = 0, \ - .MICommandOpcode = 42, \ - .DwordLength = 1 - -#define GEN75_MI_LOAD_REGISTER_REG_length 0x00000003 - -struct GEN75_MI_LOAD_REGISTER_REG { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t SourceRegisterAddress; - uint32_t DestinationRegisterAddress; -}; - -static inline void -GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_REGISTER_REG * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SourceRegisterAddress, 2, 22) | - 0; - - dw[2] = - __gen_offset(values->DestinationRegisterAddress, 2, 22) | - 0; - -} - -#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 -#define GEN75_MI_LOAD_SCAN_LINES_EXCL_header \ - .CommandType = 0, \ - .MICommandOpcode = 19, \ - .DwordLength = 0 - -#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 - -struct GEN75_MI_LOAD_SCAN_LINES_EXCL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define DisplayPlaneA 0 -#define DisplayPlaneB 1 -#define DisplayPlaneC 4 - uint32_t DisplayPlaneSelect; - uint32_t DwordLength; - uint32_t StartScanLineNumber; - uint32_t EndScanLineNumber; -}; - -static inline void -GEN75_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_SCAN_LINES_EXCL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlaneSelect, 19, 21) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->StartScanLineNumber, 16, 28) | - __gen_field(values->EndScanLineNumber, 0, 12) | - 0; - -} - -#define GEN75_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 -#define GEN75_MI_LOAD_SCAN_LINES_INCL_header \ - .CommandType = 0, \ - .MICommandOpcode = 18, \ - .DwordLength = 0 - -#define GEN75_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 - -struct GEN75_MI_LOAD_SCAN_LINES_INCL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define DisplayPlaneA 0 -#define DisplayPlaneB 1 -#define DisplayPlaneC 4 - uint32_t DisplayPlaneSelect; - uint32_t DwordLength; - uint32_t StartScanLineNumber; - uint32_t EndScanLineNumber; -}; - -static inline void -GEN75_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_SCAN_LINES_INCL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlaneSelect, 19, 21) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->StartScanLineNumber, 16, 28) | - __gen_field(values->EndScanLineNumber, 0, 12) | - 0; - -} - -#define GEN75_MI_LOAD_URB_MEM_length_bias 0x00000002 -#define GEN75_MI_LOAD_URB_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 44, \ - .DwordLength = 1 - -#define GEN75_MI_LOAD_URB_MEM_length 0x00000003 - -struct GEN75_MI_LOAD_URB_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN75_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_LOAD_URB_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBAddress, 2, 14) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - -} - -#define GEN75_MI_MATH_length_bias 0x00000002 -#define GEN75_MI_MATH_header \ - .CommandType = 0, \ - .MICommandOpcode = 26 - -#define GEN75_MI_MATH_length 0x00000000 - -struct GEN75_MI_MATH { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t ALUINSTRUCTION1; - uint32_t ALUINSTRUCTION2; - /* variable length fields follow */ -}; - -static inline void -GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_MATH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->ALUINSTRUCTION1, 0, 31) | - 0; - - dw[2] = - __gen_field(values->ALUINSTRUCTION2, 0, 31) | - 0; - - /* variable length fields follow */ -} - -#define GEN75_MI_NOOP_length_bias 0x00000001 -#define GEN75_MI_NOOP_header \ - .CommandType = 0, \ - .MICommandOpcode = 0 - -#define GEN75_MI_NOOP_length 0x00000001 - -struct GEN75_MI_NOOP { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool IdentificationNumberRegisterWriteEnable; - uint32_t IdentificationNumber; -}; - -static inline void -GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_NOOP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | - __gen_field(values->IdentificationNumber, 0, 21) | - 0; - -} - -#define GEN75_MI_PREDICATE_length_bias 0x00000001 -#define GEN75_MI_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 12 - -#define GEN75_MI_PREDICATE_length 0x00000001 - -struct GEN75_MI_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define LOAD_KEEP 0 -#define LOAD_LOAD 2 -#define LOAD_LOADINV 3 - uint32_t LoadOperation; -#define COMBINE_SET 0 -#define COMBINE_AND 1 -#define COMBINE_OR 2 -#define COMBINE_XOR 3 - uint32_t CombineOperation; -#define COMPARE_SRCS_EQUAL 2 -#define COMPARE_DELTAS_EQUAL 3 - uint32_t CompareOperation; -}; - -static inline void -GEN75_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->LoadOperation, 6, 7) | - __gen_field(values->CombineOperation, 3, 4) | - __gen_field(values->CompareOperation, 0, 1) | - 0; - -} - -#define GEN75_MI_REPORT_HEAD_length_bias 0x00000001 -#define GEN75_MI_REPORT_HEAD_header \ - .CommandType = 0, \ - .MICommandOpcode = 7 - -#define GEN75_MI_REPORT_HEAD_length 0x00000001 - -struct GEN75_MI_REPORT_HEAD { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_REPORT_HEAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN75_MI_RS_CONTEXT_length_bias 0x00000001 -#define GEN75_MI_RS_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 15 - -#define GEN75_MI_RS_CONTEXT_length 0x00000001 - -struct GEN75_MI_RS_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RS_RESTORE 0 -#define RS_SAVE 1 - uint32_t ResourceStreamerSave; -}; - -static inline void -GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_RS_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ResourceStreamerSave, 0, 0) | - 0; - -} - -#define GEN75_MI_RS_CONTROL_length_bias 0x00000001 -#define GEN75_MI_RS_CONTROL_header \ - .CommandType = 0, \ - .MICommandOpcode = 6 - -#define GEN75_MI_RS_CONTROL_length 0x00000001 - -struct GEN75_MI_RS_CONTROL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RS_STOP 0 -#define RS_START 1 - uint32_t ResourceStreamerControl; -}; - -static inline void -GEN75_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_RS_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ResourceStreamerControl, 0, 0) | - 0; - -} - -#define GEN75_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN75_MI_RS_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 43, \ - .DwordLength = 2 - -#define GEN75_MI_RS_STORE_DATA_IMM_length 0x00000004 - -struct GEN75_MI_RS_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type DestinationAddress; - uint32_t CoreModeEnable; - uint32_t DataDWord0; -}; - -static inline void -GEN75_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_RS_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - 0; - - uint32_t dw2 = - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->DestinationAddress, dw2); - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - -} - -#define GEN75_MI_SEMAPHORE_MBOX_length_bias 0x00000002 -#define GEN75_MI_SEMAPHORE_MBOX_header \ - .CommandType = 0, \ - .MICommandOpcode = 22, \ - .DwordLength = 1 - -#define GEN75_MI_SEMAPHORE_MBOX_length 0x00000003 - -struct GEN75_MI_SEMAPHORE_MBOX { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RVSYNC 0 -#define RVESYNC 1 -#define RBSYNC 2 -#define UseGeneralRegisterSelect 3 - uint32_t RegisterSelect; - uint32_t GeneralRegisterSelect; - uint32_t DwordLength; - uint32_t SemaphoreDataDword; -}; - -static inline void -GEN75_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SEMAPHORE_MBOX * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->RegisterSelect, 16, 17) | - __gen_field(values->GeneralRegisterSelect, 8, 13) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SemaphoreDataDword, 0, 31) | - 0; - - dw[2] = - 0; - -} - -#define GEN75_MI_SET_CONTEXT_length_bias 0x00000002 -#define GEN75_MI_SET_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 24, \ - .DwordLength = 0 - -#define GEN75_MI_SET_CONTEXT_length 0x00000002 - -struct GEN75_MI_SET_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type LogicalContextAddress; - uint32_t ReservedMustbe1; - bool CoreModeEnable; - bool ResourceStreamerStateSaveEnable; - bool ResourceStreamerStateRestoreEnable; - uint32_t ForceRestore; - uint32_t RestoreInhibit; -}; - -static inline void -GEN75_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SET_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->ReservedMustbe1, 8, 8) | - __gen_field(values->CoreModeEnable, 4, 4) | - __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | - __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | - __gen_field(values->ForceRestore, 1, 1) | - __gen_field(values->RestoreInhibit, 0, 0) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); - -} - -#define GEN75_MI_SET_PREDICATE_length_bias 0x00000001 -#define GEN75_MI_SET_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 1, \ - .PREDICATEENABLE = 6 - -#define GEN75_MI_SET_PREDICATE_length 0x00000001 - -struct GEN75_MI_SET_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PredicateAlways 0 -#define PredicateonClear 1 -#define PredicateonSet 2 -#define PredicateDisable 3 - bool PREDICATEENABLE; -}; - -static inline void -GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SET_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->PREDICATEENABLE, 0, 1) | - 0; - -} - -#define GEN75_MI_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN75_MI_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 32, \ - .DwordLength = 2 - -#define GEN75_MI_STORE_DATA_IMM_length 0x00000004 - -struct GEN75_MI_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t DwordLength; - uint32_t Address; - uint32_t CoreModeEnable; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN75_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->Address, 2, 31) | - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[4] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN75_MI_STORE_DATA_INDEX_length_bias 0x00000002 -#define GEN75_MI_STORE_DATA_INDEX_header \ - .CommandType = 0, \ - .MICommandOpcode = 33, \ - .DwordLength = 1 - -#define GEN75_MI_STORE_DATA_INDEX_length 0x00000003 - -struct GEN75_MI_STORE_DATA_INDEX { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t Offset; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN75_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_DATA_INDEX * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Offset, 2, 11) | - 0; - - dw[2] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[3] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN75_MI_STORE_URB_MEM_length_bias 0x00000002 -#define GEN75_MI_STORE_URB_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 45, \ - .DwordLength = 1 - -#define GEN75_MI_STORE_URB_MEM_length 0x00000003 - -struct GEN75_MI_STORE_URB_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_STORE_URB_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBAddress, 2, 14) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - -} - -#define GEN75_MI_SUSPEND_FLUSH_length_bias 0x00000001 -#define GEN75_MI_SUSPEND_FLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 11 - -#define GEN75_MI_SUSPEND_FLUSH_length 0x00000001 - -struct GEN75_MI_SUSPEND_FLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool SuspendFlush; -}; - -static inline void -GEN75_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_SUSPEND_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->SuspendFlush, 0, 0) | - 0; - -} - -#define GEN75_MI_TOPOLOGY_FILTER_length_bias 0x00000001 -#define GEN75_MI_TOPOLOGY_FILTER_header \ - .CommandType = 0, \ - .MICommandOpcode = 13 - -#define GEN75_MI_TOPOLOGY_FILTER_length 0x00000001 - -struct GEN75_MI_TOPOLOGY_FILTER { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t TopologyFilterValue; -}; - -static inline void -GEN75_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_TOPOLOGY_FILTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->TopologyFilterValue, 0, 5) | - 0; - -} - -#define GEN75_MI_UPDATE_GTT_length_bias 0x00000002 -#define GEN75_MI_UPDATE_GTT_header \ - .CommandType = 0, \ - .MICommandOpcode = 35 - -#define GEN75_MI_UPDATE_GTT_length 0x00000000 - -struct GEN75_MI_UPDATE_GTT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTT; - uint32_t DwordLength; - __gen_address_type EntryAddress; - /* variable length fields follow */ -}; - -static inline void -GEN75_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_UPDATE_GTT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); - - /* variable length fields follow */ -} - -#define GEN75_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 -#define GEN75_MI_URB_ATOMIC_ALLOC_header \ - .CommandType = 0, \ - .MICommandOpcode = 9 - -#define GEN75_MI_URB_ATOMIC_ALLOC_length 0x00000001 - -struct GEN75_MI_URB_ATOMIC_ALLOC { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t URBAtomicStorageOffset; - uint32_t URBAtomicStorageSize; -}; - -static inline void -GEN75_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_URB_ATOMIC_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->URBAtomicStorageOffset, 12, 19) | - __gen_field(values->URBAtomicStorageSize, 0, 8) | - 0; - -} - -#define GEN75_MI_URB_CLEAR_length_bias 0x00000002 -#define GEN75_MI_URB_CLEAR_header \ - .CommandType = 0, \ - .MICommandOpcode = 25, \ - .DwordLength = 0 - -#define GEN75_MI_URB_CLEAR_length 0x00000002 - -struct GEN75_MI_URB_CLEAR { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBClearLength; - uint32_t URBAddress; -}; - -static inline void -GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_URB_CLEAR * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBClearLength, 16, 29) | - __gen_offset(values->URBAddress, 0, 14) | - 0; - -} - -#define GEN75_MI_USER_INTERRUPT_length_bias 0x00000001 -#define GEN75_MI_USER_INTERRUPT_header \ - .CommandType = 0, \ - .MICommandOpcode = 2 - -#define GEN75_MI_USER_INTERRUPT_length 0x00000001 - -struct GEN75_MI_USER_INTERRUPT { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_USER_INTERRUPT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN75_MI_WAIT_FOR_EVENT_length_bias 0x00000001 -#define GEN75_MI_WAIT_FOR_EVENT_header \ - .CommandType = 0, \ - .MICommandOpcode = 3 - -#define GEN75_MI_WAIT_FOR_EVENT_length 0x00000001 - -struct GEN75_MI_WAIT_FOR_EVENT { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool DisplayPipeCHorizontalBlankWaitEnable; - bool DisplayPipeCVerticalBlankWaitEnable; - bool DisplaySpriteCFlipPendingWaitEnable; -#define Notenabled 0 - uint32_t ConditionCodeWaitSelect; - bool DisplayPlaneCFlipPendingWaitEnable; - bool DisplayPipeCScanLineWaitEnable; - bool DisplayPipeBHorizontalBlankWaitEnable; - bool DisplayPipeBVerticalBlankWaitEnable; - bool DisplaySpriteBFlipPendingWaitEnable; - bool DisplayPlaneBFlipPendingWaitEnable; - bool DisplayPipeBScanLineWaitEnable; - bool DisplayPipeAHorizontalBlankWaitEnable; - bool DisplayPipeAVerticalBlankWaitEnable; - bool DisplaySpriteAFlipPendingWaitEnable; - bool DisplayPlaneAFlipPendingWaitEnable; - bool DisplayPipeAScanLineWaitEnable; -}; - -static inline void -GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_MI_WAIT_FOR_EVENT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | - __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | - __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | - __gen_field(values->ConditionCodeWaitSelect, 16, 19) | - __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | - __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | - __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | - __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | - __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | - __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | - __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | - __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | - __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | - __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | - __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | - __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | - 0; - -} - -#define GEN75_PIPE_CONTROL_length_bias 0x00000002 -#define GEN75_PIPE_CONTROL_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 2, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 3 - -#define GEN75_PIPE_CONTROL_length 0x00000005 - -struct GEN75_PIPE_CONTROL { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define DAT_PPGTT 0 -#define DAT_GGTT 1 - uint32_t DestinationAddressType; -#define NoLRIOperation 0 -#define MMIOWriteImmediateData 1 - uint32_t LRIPostSyncOperation; - uint32_t StoreDataIndex; - uint32_t CommandStreamerStallEnable; -#define DontReset 0 -#define Reset 1 - uint32_t GlobalSnapshotCountReset; - uint32_t TLBInvalidate; - bool GenericMediaStateClear; -#define NoWrite 0 -#define WriteImmediateData 1 -#define WritePSDepthCount 2 -#define WriteTimestamp 3 - uint32_t PostSyncOperation; - bool DepthStallEnable; -#define DisableFlush 0 -#define EnableFlush 1 - bool RenderTargetCacheFlushEnable; - bool InstructionCacheInvalidateEnable; - bool TextureCacheInvalidationEnable; - bool IndirectStatePointersDisable; - bool NotifyEnable; - bool PipeControlFlushEnable; - bool DCFlushEnable; - bool VFCacheInvalidationEnable; - bool ConstantCacheInvalidationEnable; - bool StateCacheInvalidationEnable; - bool StallAtPixelScoreboard; -#define FlushDisabled 0 -#define FlushEnabled 1 - bool DepthCacheFlushEnable; - __gen_address_type Address; - uint32_t ImmediateData; - uint32_t ImmediateData0; -}; - -static inline void -GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_PIPE_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DestinationAddressType, 24, 24) | - __gen_field(values->LRIPostSyncOperation, 23, 23) | - __gen_field(values->StoreDataIndex, 21, 21) | - __gen_field(values->CommandStreamerStallEnable, 20, 20) | - __gen_field(values->GlobalSnapshotCountReset, 19, 19) | - __gen_field(values->TLBInvalidate, 18, 18) | - __gen_field(values->GenericMediaStateClear, 16, 16) | - __gen_field(values->PostSyncOperation, 14, 15) | - __gen_field(values->DepthStallEnable, 13, 13) | - __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | - __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | - __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | - __gen_field(values->IndirectStatePointersDisable, 9, 9) | - __gen_field(values->NotifyEnable, 8, 8) | - __gen_field(values->PipeControlFlushEnable, 7, 7) | - __gen_field(values->DCFlushEnable, 5, 5) | - __gen_field(values->VFCacheInvalidationEnable, 4, 4) | - __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | - __gen_field(values->StateCacheInvalidationEnable, 2, 2) | - __gen_field(values->StallAtPixelScoreboard, 1, 1) | - __gen_field(values->DepthCacheFlushEnable, 0, 0) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->Address, dw2); - - dw[3] = - __gen_field(values->ImmediateData, 0, 31) | - 0; - - dw[4] = - __gen_field(values->ImmediateData, 0, 31) | - 0; - -} - -#define GEN75_SCISSOR_RECT_length 0x00000002 - -struct GEN75_SCISSOR_RECT { - uint32_t ScissorRectangleYMin; - uint32_t ScissorRectangleXMin; - uint32_t ScissorRectangleYMax; - uint32_t ScissorRectangleXMax; -}; - -static inline void -GEN75_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SCISSOR_RECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ScissorRectangleYMin, 16, 31) | - __gen_field(values->ScissorRectangleXMin, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ScissorRectangleYMax, 16, 31) | - __gen_field(values->ScissorRectangleXMax, 0, 15) | - 0; - -} - -#define GEN75_SF_CLIP_VIEWPORT_length 0x00000010 - -struct GEN75_SF_CLIP_VIEWPORT { - float ViewportMatrixElementm00; - float ViewportMatrixElementm11; - float ViewportMatrixElementm22; - float ViewportMatrixElementm30; - float ViewportMatrixElementm31; - float ViewportMatrixElementm32; - float XMinClipGuardband; - float XMaxClipGuardband; - float YMinClipGuardband; - float YMaxClipGuardband; -}; - -static inline void -GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SF_CLIP_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->ViewportMatrixElementm00) | - 0; - - dw[1] = - __gen_float(values->ViewportMatrixElementm11) | - 0; - - dw[2] = - __gen_float(values->ViewportMatrixElementm22) | - 0; - - dw[3] = - __gen_float(values->ViewportMatrixElementm30) | - 0; - - dw[4] = - __gen_float(values->ViewportMatrixElementm31) | - 0; - - dw[5] = - __gen_float(values->ViewportMatrixElementm32) | - 0; - - dw[6] = - 0; - - dw[7] = - 0; - - dw[8] = - __gen_float(values->XMinClipGuardband) | - 0; - - dw[9] = - __gen_float(values->XMaxClipGuardband) | - 0; - - dw[10] = - __gen_float(values->YMinClipGuardband) | - 0; - - dw[11] = - __gen_float(values->YMaxClipGuardband) | - 0; - - for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { - dw[j] = - 0; - } - -} - -#define GEN75_BLEND_STATE_length 0x00000002 - -struct GEN75_BLEND_STATE { - bool ColorBufferBlendEnable; - bool IndependentAlphaBlendEnable; -#define BLENDFUNCTION_ADD 0 -#define BLENDFUNCTION_SUBTRACT 1 -#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BLENDFUNCTION_MIN 3 -#define BLENDFUNCTION_MAX 4 - uint32_t AlphaBlendFunction; -#define BLENDFACTOR_ONE 1 -#define BLENDFACTOR_SRC_COLOR 2 -#define BLENDFACTOR_SRC_ALPHA 3 -#define BLENDFACTOR_DST_ALPHA 4 -#define BLENDFACTOR_DST_COLOR 5 -#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 -#define BLENDFACTOR_CONST_COLOR 7 -#define BLENDFACTOR_CONST_ALPHA 8 -#define BLENDFACTOR_SRC1_COLOR 9 -#define BLENDFACTOR_SRC1_ALPHA 10 -#define BLENDFACTOR_ZERO 17 -#define BLENDFACTOR_INV_SRC_COLOR 18 -#define BLENDFACTOR_INV_SRC_ALPHA 19 -#define BLENDFACTOR_INV_DST_ALPHA 20 -#define BLENDFACTOR_INV_DST_COLOR 21 -#define BLENDFACTOR_INV_CONST_COLOR 23 -#define BLENDFACTOR_INV_CONST_ALPHA 24 -#define BLENDFACTOR_INV_SRC1_COLOR 25 -#define BLENDFACTOR_INV_SRC1_ALPHA 26 - uint32_t SourceAlphaBlendFactor; - uint32_t DestinationAlphaBlendFactor; -#define BLENDFUNCTION_ADD 0 -#define BLENDFUNCTION_SUBTRACT 1 -#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BLENDFUNCTION_MIN 3 -#define BLENDFUNCTION_MAX 4 - uint32_t ColorBlendFunction; - uint32_t SourceBlendFactor; - uint32_t DestinationBlendFactor; - bool AlphaToCoverageEnable; - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; - bool WriteDisableAlpha; - bool WriteDisableRed; - bool WriteDisableGreen; - bool WriteDisableBlue; - bool LogicOpEnable; -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 1 -#define LOGICOP_AND_INVERTED 2 -#define LOGICOP_COPY_INVERTED 3 -#define LOGICOP_AND_REVERSE 4 -#define LOGICOP_INVERT 5 -#define LOGICOP_XOR 6 -#define LOGICOP_NAND 7 -#define LOGICOP_AND 8 -#define LOGICOP_EQUIV 9 -#define LOGICOP_NOOP 10 -#define LOGICOP_OR_INVERTED 11 -#define LOGICOP_COPY 12 -#define LOGICOP_OR_REVERSE 13 -#define LOGICOP_OR 14 -#define LOGICOP_SET 15 - uint32_t LogicOpFunction; - bool AlphaTestEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; -#define COLORCLAMP_UNORM 0 -#define COLORCLAMP_SNORM 1 -#define COLORCLAMP_RTFORMAT 2 - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -}; - -static inline void -GEN75_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BLEND_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ColorBufferBlendEnable, 31, 31) | - __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | - __gen_field(values->AlphaBlendFunction, 26, 28) | - __gen_field(values->SourceAlphaBlendFactor, 20, 24) | - __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | - __gen_field(values->ColorBlendFunction, 11, 13) | - __gen_field(values->SourceBlendFactor, 5, 9) | - __gen_field(values->DestinationBlendFactor, 0, 4) | - 0; - - dw[1] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->AlphaToOneEnable, 30, 30) | - __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | - __gen_field(values->WriteDisableAlpha, 27, 27) | - __gen_field(values->WriteDisableRed, 26, 26) | - __gen_field(values->WriteDisableGreen, 25, 25) | - __gen_field(values->WriteDisableBlue, 24, 24) | - __gen_field(values->LogicOpEnable, 22, 22) | - __gen_field(values->LogicOpFunction, 18, 21) | - __gen_field(values->AlphaTestEnable, 16, 16) | - __gen_field(values->AlphaTestFunction, 13, 15) | - __gen_field(values->ColorDitherEnable, 12, 12) | - __gen_field(values->XDitherOffset, 10, 11) | - __gen_field(values->YDitherOffset, 8, 9) | - __gen_field(values->ColorClampRange, 2, 3) | - __gen_field(values->PreBlendColorClampEnable, 1, 1) | - __gen_field(values->PostBlendColorClampEnable, 0, 0) | - 0; - -} - -#define GEN75_CC_VIEWPORT_length 0x00000002 - -struct GEN75_CC_VIEWPORT { - float MinimumDepth; - float MaximumDepth; -}; - -static inline void -GEN75_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_CC_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->MinimumDepth) | - 0; - - dw[1] = - __gen_float(values->MaximumDepth) | - 0; - -} - -#define GEN75_COLOR_CALC_STATE_length 0x00000006 - -struct GEN75_COLOR_CALC_STATE { - uint32_t StencilReferenceValue; - uint32_t BackFaceStencilReferenceValue; -#define Cancelled 0 -#define NotCancelled 1 - uint32_t RoundDisableFunctionDisable; -#define ALPHATEST_UNORM8 0 -#define ALPHATEST_FLOAT32 1 - uint32_t AlphaTestFormat; - uint32_t AlphaReferenceValueAsUNORM8; - float AlphaReferenceValueAsFLOAT32; - float BlendConstantColorRed; - float BlendConstantColorGreen; - float BlendConstantColorBlue; - float BlendConstantColorAlpha; -}; - -static inline void -GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_COLOR_CALC_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->StencilReferenceValue, 24, 31) | - __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | - __gen_field(values->RoundDisableFunctionDisable, 15, 15) | - __gen_field(values->AlphaTestFormat, 0, 0) | - 0; - - dw[1] = - __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | - __gen_float(values->AlphaReferenceValueAsFLOAT32) | - 0; - - dw[2] = - __gen_float(values->BlendConstantColorRed) | - 0; - - dw[3] = - __gen_float(values->BlendConstantColorGreen) | - 0; - - dw[4] = - __gen_float(values->BlendConstantColorBlue) | - 0; - - dw[5] = - __gen_float(values->BlendConstantColorAlpha) | - 0; - -} - -#define GEN75_DEPTH_STENCIL_STATE_length 0x00000003 - -struct GEN75_DEPTH_STENCIL_STATE { - bool StencilTestEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t StencilTestFunction; -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 1 -#define STENCILOP_REPLACE 2 -#define STENCILOP_INCRSAT 3 -#define STENCILOP_DECRSAT 4 -#define STENCILOP_INCR 5 -#define STENCILOP_DECR 6 -#define STENCILOP_INVERT 7 - uint32_t StencilFailOp; - uint32_t StencilPassDepthFailOp; - uint32_t StencilPassDepthPassOp; - bool StencilBufferWriteEnable; - bool DoubleSidedStencilEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t BackFaceStencilTestFunction; -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 1 -#define STENCILOP_REPLACE 2 -#define STENCILOP_INCRSAT 3 -#define STENCILOP_DECRSAT 4 -#define STENCILOP_INCR 5 -#define STENCILOP_DECR 6 -#define STENCILOP_INVERT 7 - uint32_t BackfaceStencilFailOp; - uint32_t BackfaceStencilPassDepthFailOp; - uint32_t BackfaceStencilPassDepthPassOp; - uint32_t StencilTestMask; - uint32_t StencilWriteMask; - uint32_t BackfaceStencilTestMask; - uint32_t BackfaceStencilWriteMask; - bool DepthTestEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t DepthTestFunction; - bool DepthBufferWriteEnable; -}; - -static inline void -GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_DEPTH_STENCIL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->StencilTestEnable, 31, 31) | - __gen_field(values->StencilTestFunction, 28, 30) | - __gen_field(values->StencilFailOp, 25, 27) | - __gen_field(values->StencilPassDepthFailOp, 22, 24) | - __gen_field(values->StencilPassDepthPassOp, 19, 21) | - __gen_field(values->StencilBufferWriteEnable, 18, 18) | - __gen_field(values->DoubleSidedStencilEnable, 15, 15) | - __gen_field(values->BackFaceStencilTestFunction, 12, 14) | - __gen_field(values->BackfaceStencilFailOp, 9, 11) | - __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | - __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | - 0; - - dw[1] = - __gen_field(values->StencilTestMask, 24, 31) | - __gen_field(values->StencilWriteMask, 16, 23) | - __gen_field(values->BackfaceStencilTestMask, 8, 15) | - __gen_field(values->BackfaceStencilWriteMask, 0, 7) | - 0; - - dw[2] = - __gen_field(values->DepthTestEnable, 31, 31) | - __gen_field(values->DepthTestFunction, 27, 29) | - __gen_field(values->DepthBufferWriteEnable, 26, 26) | - 0; - -} - -#define GEN75_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 - -struct GEN75_INTERFACE_DESCRIPTOR_DATA { - uint32_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlow; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t SamplerStatePointer; -#define Nosamplersused 0 -#define Between1and4samplersused 1 -#define Between5and8samplersused 2 -#define Between9and12samplersused 3 -#define Between13and16samplersused 4 - uint32_t SamplerCount; - uint32_t BindingTablePointer; - uint32_t BindingTableEntryCount; - uint32_t ConstantURBEntryReadLength; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool BarrierEnable; - uint32_t SharedLocalMemorySize; - uint32_t NumberofThreadsinGPGPUThreadGroup; - uint32_t CrossThreadConstantDataReadLength; -}; - -static inline void -GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_INTERFACE_DESCRIPTOR_DATA * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[1] = - __gen_field(values->SingleProgramFlow, 18, 18) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[2] = - __gen_offset(values->SamplerStatePointer, 5, 31) | - __gen_field(values->SamplerCount, 2, 4) | - 0; - - dw[3] = - __gen_offset(values->BindingTablePointer, 5, 15) | - __gen_field(values->BindingTableEntryCount, 0, 4) | - 0; - - dw[4] = - __gen_field(values->ConstantURBEntryReadLength, 16, 31) | - 0; - - dw[5] = - __gen_field(values->RoundingMode, 22, 23) | - __gen_field(values->BarrierEnable, 21, 21) | - __gen_field(values->SharedLocalMemorySize, 16, 20) | - __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | - 0; - - dw[6] = - __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | - 0; - - dw[7] = - 0; - -} - -#define GEN75_BINDING_TABLE_STATE_length 0x00000001 - -struct GEN75_BINDING_TABLE_STATE { - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN75_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BINDING_TABLE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->SurfaceStatePointer, 5, 31) | - 0; - -} - -#define GEN75_RENDER_SURFACE_STATE_length 0x00000008 - -struct GEN75_RENDER_SURFACE_STATE { -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_BUFFER 4 -#define SURFTYPE_STRBUF 5 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool SurfaceArray; - uint32_t SurfaceFormat; -#define VALIGN_2 0 -#define VALIGN_4 1 - uint32_t SurfaceVerticalAlignment; -#define HALIGN_4 0 -#define HALIGN_8 1 - uint32_t SurfaceHorizontalAlignment; - uint32_t TiledSurface; -#define TILEWALK_XMAJOR 0 -#define TILEWALK_YMAJOR 1 - uint32_t TileWalk; - uint32_t VerticalLineStride; - uint32_t VerticalLineStrideOffset; -#define ARYSPC_FULL 0 -#define ARYSPC_LOD0 1 - uint32_t SurfaceArraySpacing; - uint32_t RenderCacheReadWriteMode; -#define NORMAL_MODE 0 -#define PROGRESSIVE_FRAME 2 -#define INTERLACED_FRAME 3 - uint32_t MediaBoundaryPixelMode; - uint32_t CubeFaceEnables; - __gen_address_type SurfaceBaseAddress; - uint32_t Height; - uint32_t Width; - uint32_t Depth; - uint32_t IntegerSurfaceFormat; - uint32_t SurfacePitch; -#define RTROTATE_0DEG 0 -#define RTROTATE_90DEG 1 -#define RTROTATE_270DEG 3 - uint32_t RenderTargetRotation; - uint32_t MinimumArrayElement; - uint32_t RenderTargetViewExtent; -#define MSFMT_MSS 0 -#define MSFMT_DEPTH_STENCIL 1 - uint32_t MultisampledSurfaceStorageFormat; -#define MULTISAMPLECOUNT_1 0 -#define MULTISAMPLECOUNT_4 2 -#define MULTISAMPLECOUNT_8 3 - uint32_t NumberofMultisamples; - uint32_t MultisamplePositionPaletteIndex; - uint32_t MinimumArrayElement0; - uint32_t XOffset; - uint32_t YOffset; - struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; - uint32_t SurfaceMinLOD; - uint32_t MIPCountLOD; - __gen_address_type MCSBaseAddress; - uint32_t MCSSurfacePitch; - __gen_address_type AppendCounterAddress; - bool AppendCounterEnable; - bool MCSEnable; - uint32_t XOffsetforUVPlane; - uint32_t YOffsetforUVPlane; -#define SCS_ZERO 0 -#define SCS_ONE 1 -#define SCS_RED 4 -#define SCS_GREEN 5 -#define SCS_BLUE 6 -#define SCS_ALPHA 7 - uint32_t ShaderChannelSelectR; - uint32_t ShaderChannelSelectG; - uint32_t ShaderChannelSelectB; - uint32_t ShaderChannelSelectA; - float ResourceMinLOD; -}; - -static inline void -GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_RENDER_SURFACE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->SurfaceArray, 28, 28) | - __gen_field(values->SurfaceFormat, 18, 26) | - __gen_field(values->SurfaceVerticalAlignment, 16, 17) | - __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | - __gen_field(values->TiledSurface, 14, 14) | - __gen_field(values->TileWalk, 13, 13) | - __gen_field(values->VerticalLineStride, 12, 12) | - __gen_field(values->VerticalLineStrideOffset, 11, 11) | - __gen_field(values->SurfaceArraySpacing, 10, 10) | - __gen_field(values->RenderCacheReadWriteMode, 8, 8) | - __gen_field(values->MediaBoundaryPixelMode, 6, 7) | - __gen_field(values->CubeFaceEnables, 0, 5) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); - - dw[2] = - __gen_field(values->Height, 16, 29) | - __gen_field(values->Width, 0, 13) | - 0; - - dw[3] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->IntegerSurfaceFormat, 18, 20) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - dw[4] = - __gen_field(values->RenderTargetRotation, 29, 30) | - __gen_field(values->MinimumArrayElement, 18, 28) | - __gen_field(values->RenderTargetViewExtent, 7, 17) | - __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | - __gen_field(values->NumberofMultisamples, 3, 5) | - __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | - __gen_field(values->MinimumArrayElement, 0, 26) | - 0; - - uint32_t dw_SurfaceObjectControlState; - GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); - dw[5] = - __gen_offset(values->XOffset, 25, 31) | - __gen_offset(values->YOffset, 20, 23) | - __gen_field(dw_SurfaceObjectControlState, 16, 19) | - __gen_field(values->SurfaceMinLOD, 4, 7) | - __gen_field(values->MIPCountLOD, 0, 3) | - 0; - - uint32_t dw6 = - __gen_field(values->MCSSurfacePitch, 3, 11) | - __gen_field(values->AppendCounterEnable, 1, 1) | - __gen_field(values->MCSEnable, 0, 0) | - __gen_field(values->XOffsetforUVPlane, 16, 29) | - __gen_field(values->YOffsetforUVPlane, 0, 13) | - 0; - - dw[6] = - __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); - - dw[7] = - __gen_field(values->ShaderChannelSelectR, 25, 27) | - __gen_field(values->ShaderChannelSelectG, 22, 24) | - __gen_field(values->ShaderChannelSelectB, 19, 21) | - __gen_field(values->ShaderChannelSelectA, 16, 18) | - __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | - 0; - -} - -#define GEN75_SAMPLER_BORDER_COLOR_STATE_length 0x00000014 - -#define GEN75_BORDER_COLOR_UINT32_SINT32_length 0x00000004 - -struct GEN75_BORDER_COLOR_UINT32_SINT32 { - uint32_t BorderColorRedui32integerunclamp; - uint32_t BorderColorRedsi32integerunclamp; - uint32_t BorderColorGreenui32integerunclamp; - uint32_t BorderColorGreensi32integerunclamp; - uint32_t BorderColorBlueui32integerunclamp; - uint32_t BorderColorBluesi32integerunclamp; - uint32_t BorderColorGreenui32integerunclamp0; - uint32_t BorderColorGreensi32integerunclamp0; - uint32_t BorderColorAlphaui32integerunclamp; - uint32_t BorderColorAlphasi32integerunclamp; -}; - -static inline void -GEN75_BORDER_COLOR_UINT32_SINT32_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BORDER_COLOR_UINT32_SINT32 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BorderColorRedui32integerunclamp, 0, 31) | - __gen_field(values->BorderColorRedsi32integerunclamp, 0, 31) | - 0; - - dw[1] = - __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | - __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | - 0; - - dw[2] = - __gen_field(values->BorderColorBlueui32integerunclamp, 0, 31) | - __gen_field(values->BorderColorBluesi32integerunclamp, 0, 31) | - __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | - __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | - 0; - - dw[3] = - __gen_field(values->BorderColorAlphaui32integerunclamp, 0, 31) | - __gen_field(values->BorderColorAlphasi32integerunclamp, 0, 31) | - 0; - -} - -#define GEN75_BORDER_COLOR_UINT16_SINT16_length 0x00000004 - -struct GEN75_BORDER_COLOR_UINT16_SINT16 { - uint32_t BorderColorGreenclamptouint16; - uint32_t BorderColorGreenclamptosint16; - uint32_t BorderColorRedclamptouint16; - uint32_t BorderColorRedclamptosint16; - uint32_t BorderColorAlphaclamptouint16; - uint32_t BorderColorAlphaclamptosint16; - uint32_t BorderColorBlueclamptouint16; - uint32_t BorderColorBlueclamptosint16; -}; - -static inline void -GEN75_BORDER_COLOR_UINT16_SINT16_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BORDER_COLOR_UINT16_SINT16 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BorderColorGreenclamptouint16, 16, 31) | - __gen_field(values->BorderColorGreenclamptosint16, 16, 31) | - __gen_field(values->BorderColorRedclamptouint16, 0, 15) | - __gen_field(values->BorderColorRedclamptosint16, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->BorderColorAlphaclamptouint16, 16, 31) | - __gen_field(values->BorderColorAlphaclamptosint16, 16, 31) | - __gen_field(values->BorderColorBlueclamptouint16, 0, 15) | - __gen_field(values->BorderColorBlueclamptosint16, 0, 15) | - 0; - - dw[3] = - 0; - -} - -#define GEN75_BORDER_COLOR_UINT8_SINT8_length 0x00000004 - -struct GEN75_BORDER_COLOR_UINT8_SINT8 { - uint32_t BorderColorAlphaclamptouint8; - uint32_t BorderColorAlphaclamptosint8; - uint32_t BorderColorBlueclamptouint8; - uint32_t BorderColorBlueclamptosint8; - uint32_t BorderColorGreenclamptouint8; - uint32_t BorderColorGreenclamptosint8; - uint32_t BorderRedAlphaclamptouint8; - uint32_t BorderRedAlphaclamptosint8; -}; - -static inline void -GEN75_BORDER_COLOR_UINT8_SINT8_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_BORDER_COLOR_UINT8_SINT8 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BorderColorAlphaclamptouint8, 24, 31) | - __gen_field(values->BorderColorAlphaclamptosint8, 24, 31) | - __gen_field(values->BorderColorBlueclamptouint8, 16, 23) | - __gen_field(values->BorderColorBlueclamptosint8, 16, 23) | - __gen_field(values->BorderColorGreenclamptouint8, 8, 15) | - __gen_field(values->BorderColorGreenclamptosint8, 8, 15) | - __gen_field(values->BorderRedAlphaclamptouint8, 0, 7) | - __gen_field(values->BorderRedAlphaclamptosint8, 0, 7) | - 0; - - dw[1] = - 0; - - dw[2] = - 0; - - dw[3] = - 0; - -} - -struct GEN75_SAMPLER_BORDER_COLOR_STATE { - float BorderColorRedDX100GL; - uint32_t BorderColorAlpha; - uint32_t BorderColorBlue; - uint32_t BorderColorGreen; - uint32_t BorderColorRedDX9; - float BorderColorGreen0; - float BorderColorBlue0; - float BorderColorAlpha0; - struct GEN75_BORDER_COLOR_UINT32_SINT32 BorderColor; - struct GEN75_BORDER_COLOR_UINT16_SINT16 BorderColor0; - struct GEN75_BORDER_COLOR_UINT8_SINT8 BorderColor1; -}; - -static inline void -GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SAMPLER_BORDER_COLOR_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->BorderColorRedDX100GL) | - __gen_field(values->BorderColorAlpha, 24, 31) | - __gen_field(values->BorderColorBlue, 16, 23) | - __gen_field(values->BorderColorGreen, 8, 15) | - __gen_field(values->BorderColorRedDX9, 0, 7) | - 0; - - dw[1] = - __gen_float(values->BorderColorGreen) | - 0; - - dw[2] = - __gen_float(values->BorderColorBlue) | - 0; - - dw[3] = - __gen_float(values->BorderColorAlpha) | - 0; - - for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { - dw[j] = - 0; - } - - GEN75_BORDER_COLOR_UINT32_SINT32_pack(data, &dw[16], &values->BorderColor); -} - -#define GEN75_SAMPLER_STATE_length 0x00000004 - -struct GEN75_SAMPLER_STATE { - bool SamplerDisable; -#define DX10OGL 0 -#define DX9 1 - uint32_t TextureBorderColorMode; -#define OGL 1 - uint32_t LODPreClampEnable; - float BaseMipLevel; -#define MIPFILTER_NONE 0 -#define MIPFILTER_NEAREST 1 -#define MIPFILTER_LINEAR 3 - uint32_t MipModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MagModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MinModeFilter; - float TextureLODBias; -#define LEGACY 0 -#define EWAApproximation 1 - uint32_t AnisotropicAlgorithm; - float MinLOD; - float MaxLOD; -#define PREFILTEROPALWAYS 0 -#define PREFILTEROPNEVER 1 -#define PREFILTEROPLESS 2 -#define PREFILTEROPEQUAL 3 -#define PREFILTEROPLEQUAL 4 -#define PREFILTEROPGREATER 5 -#define PREFILTEROPNOTEQUAL 6 -#define PREFILTEROPGEQUAL 7 - uint32_t ShadowFunction; -#define PROGRAMMED 0 -#define OVERRIDE 1 - uint32_t CubeSurfaceControlMode; - uint32_t BorderColorPointer; - bool ChromaKeyEnable; - uint32_t ChromaKeyIndex; -#define KEYFILTER_KILL_ON_ANY_MATCH 0 -#define KEYFILTER_REPLACE_BLACK 1 - uint32_t ChromaKeyMode; -#define RATIO21 0 -#define RATIO41 1 -#define RATIO61 2 -#define RATIO81 3 -#define RATIO101 4 -#define RATIO121 5 -#define RATIO141 6 -#define RATIO161 7 - uint32_t MaximumAnisotropy; - bool RAddressMinFilterRoundingEnable; - bool RAddressMagFilterRoundingEnable; - bool VAddressMinFilterRoundingEnable; - bool VAddressMagFilterRoundingEnable; - bool UAddressMinFilterRoundingEnable; - bool UAddressMagFilterRoundingEnable; -#define FULL 0 -#define TRIQUAL_HIGHMAG_CLAMP_MIPFILTER 1 -#define MED 2 -#define LOW 3 - uint32_t TrilinearFilterQuality; - bool NonnormalizedCoordinateEnable; - uint32_t TCXAddressControlMode; - uint32_t TCYAddressControlMode; - uint32_t TCZAddressControlMode; -}; - -static inline void -GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN75_SAMPLER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SamplerDisable, 31, 31) | - __gen_field(values->TextureBorderColorMode, 29, 29) | - __gen_field(values->LODPreClampEnable, 28, 28) | - __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | - __gen_field(values->MipModeFilter, 20, 21) | - __gen_field(values->MagModeFilter, 17, 19) | - __gen_field(values->MinModeFilter, 14, 16) | - __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | - __gen_field(values->AnisotropicAlgorithm, 0, 0) | - 0; - - dw[1] = - __gen_field(values->MinLOD * (1 << 8), 20, 31) | - __gen_field(values->MaxLOD * (1 << 8), 8, 19) | - __gen_field(values->ShadowFunction, 1, 3) | - __gen_field(values->CubeSurfaceControlMode, 0, 0) | - 0; - - dw[2] = - __gen_offset(values->BorderColorPointer, 5, 31) | - 0; - - dw[3] = - __gen_field(values->ChromaKeyEnable, 25, 25) | - __gen_field(values->ChromaKeyIndex, 23, 24) | - __gen_field(values->ChromaKeyMode, 22, 22) | - __gen_field(values->MaximumAnisotropy, 19, 21) | - __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | - __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | - __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | - __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | - __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | - __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | - __gen_field(values->TrilinearFilterQuality, 11, 12) | - __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | - __gen_field(values->TCXAddressControlMode, 6, 8) | - __gen_field(values->TCYAddressControlMode, 3, 5) | - __gen_field(values->TCZAddressControlMode, 0, 2) | - 0; - -} - -/* Enum 3D_Prim_Topo_Type */ -#define _3DPRIM_POINTLIST 1 -#define _3DPRIM_LINELIST 2 -#define _3DPRIM_LINESTRIP 3 -#define _3DPRIM_TRILIST 4 -#define _3DPRIM_TRISTRIP 5 -#define _3DPRIM_TRIFAN 6 -#define _3DPRIM_QUADLIST 7 -#define _3DPRIM_QUADSTRIP 8 -#define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LINESTRIP_ADJ 10 -#define _3DPRIM_TRILIST_ADJ 11 -#define _3DPRIM_TRISTRIP_ADJ 12 -#define _3DPRIM_TRISTRIP_REVERSE 13 -#define _3DPRIM_POLYGON 14 -#define _3DPRIM_RECTLIST 15 -#define _3DPRIM_LINELOOP 16 -#define _3DPRIM_POINTLIST_BF 17 -#define _3DPRIM_LINESTRIP_CONT 18 -#define _3DPRIM_LINESTRIP_BF 19 -#define _3DPRIM_LINESTRIP_CONT_BF 20 -#define _3DPRIM_TRIFAN_NOSTIPPLE 22 -#define _3DPRIM_PATCHLIST_1 32 -#define _3DPRIM_PATCHLIST_2 33 -#define _3DPRIM_PATCHLIST_3 34 -#define _3DPRIM_PATCHLIST_4 35 -#define _3DPRIM_PATCHLIST_5 36 -#define _3DPRIM_PATCHLIST_6 37 -#define _3DPRIM_PATCHLIST_7 38 -#define _3DPRIM_PATCHLIST_8 39 -#define _3DPRIM_PATCHLIST_9 40 -#define _3DPRIM_PATCHLIST_10 41 -#define _3DPRIM_PATCHLIST_11 42 -#define _3DPRIM_PATCHLIST_12 43 -#define _3DPRIM_PATCHLIST_13 44 -#define _3DPRIM_PATCHLIST_14 45 -#define _3DPRIM_PATCHLIST_15 46 -#define _3DPRIM_PATCHLIST_16 47 -#define _3DPRIM_PATCHLIST_17 48 -#define _3DPRIM_PATCHLIST_18 49 -#define _3DPRIM_PATCHLIST_19 50 -#define _3DPRIM_PATCHLIST_20 51 -#define _3DPRIM_PATCHLIST_21 52 -#define _3DPRIM_PATCHLIST_22 53 -#define _3DPRIM_PATCHLIST_23 54 -#define _3DPRIM_PATCHLIST_24 55 -#define _3DPRIM_PATCHLIST_25 56 -#define _3DPRIM_PATCHLIST_26 57 -#define _3DPRIM_PATCHLIST_27 58 -#define _3DPRIM_PATCHLIST_28 59 -#define _3DPRIM_PATCHLIST_29 60 -#define _3DPRIM_PATCHLIST_30 61 -#define _3DPRIM_PATCHLIST_31 62 -#define _3DPRIM_PATCHLIST_32 63 - -/* Enum 3D_Vertex_Component_Control */ -#define VFCOMP_NOSTORE 0 -#define VFCOMP_STORE_SRC 1 -#define VFCOMP_STORE_0 2 -#define VFCOMP_STORE_1_FP 3 -#define VFCOMP_STORE_1_INT 4 -#define VFCOMP_STORE_VID 5 -#define VFCOMP_STORE_IID 6 -#define VFCOMP_STORE_PID 7 - -/* Enum 3D_Compare_Function */ -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - -/* Enum SURFACE_FORMAT */ -#define R32G32B32A32_FLOAT 0 -#define R32G32B32A32_SINT 1 -#define R32G32B32A32_UINT 2 -#define R32G32B32A32_UNORM 3 -#define R32G32B32A32_SNORM 4 -#define R64G64_FLOAT 5 -#define R32G32B32X32_FLOAT 6 -#define R32G32B32A32_SSCALED 7 -#define R32G32B32A32_USCALED 8 -#define R32G32B32A32_SFIXED 32 -#define R64G64_PASSTHRU 33 -#define R32G32B32_FLOAT 64 -#define R32G32B32_SINT 65 -#define R32G32B32_UINT 66 -#define R32G32B32_UNORM 67 -#define R32G32B32_SNORM 68 -#define R32G32B32_SSCALED 69 -#define R32G32B32_USCALED 70 -#define R32G32B32_SFIXED 80 -#define R16G16B16A16_UNORM 128 -#define R16G16B16A16_SNORM 129 -#define R16G16B16A16_SINT 130 -#define R16G16B16A16_UINT 131 -#define R16G16B16A16_FLOAT 132 -#define R32G32_FLOAT 133 -#define R32G32_SINT 134 -#define R32G32_UINT 135 -#define R32_FLOAT_X8X24_TYPELESS 136 -#define X32_TYPELESS_G8X24_UINT 137 -#define L32A32_FLOAT 138 -#define R32G32_UNORM 139 -#define R32G32_SNORM 140 -#define R64_FLOAT 141 -#define R16G16B16X16_UNORM 142 -#define R16G16B16X16_FLOAT 143 -#define A32X32_FLOAT 144 -#define L32X32_FLOAT 145 -#define I32X32_FLOAT 146 -#define R16G16B16A16_SSCALED 147 -#define R16G16B16A16_USCALED 148 -#define R32G32_SSCALED 149 -#define R32G32_USCALED 150 -#define R32G32_SFIXED 160 -#define R64_PASSTHRU 161 -#define B8G8R8A8_UNORM 192 -#define B8G8R8A8_UNORM_SRGB 193 -#define R10G10B10A2_UNORM 194 -#define R10G10B10A2_UNORM_SRGB 195 -#define R10G10B10A2_UINT 196 -#define R10G10B10_SNORM_A2_UNORM 197 -#define R8G8B8A8_UNORM 199 -#define R8G8B8A8_UNORM_SRGB 200 -#define R8G8B8A8_SNORM 201 -#define R8G8B8A8_SINT 202 -#define R8G8B8A8_UINT 203 -#define R16G16_UNORM 204 -#define R16G16_SNORM 205 -#define R16G16_SINT 206 -#define R16G16_UINT 207 -#define R16G16_FLOAT 208 -#define B10G10R10A2_UNORM 209 -#define B10G10R10A2_UNORM_SRGB 210 -#define R11G11B10_FLOAT 211 -#define R32_SINT 214 -#define R32_UINT 215 -#define R32_FLOAT 216 -#define R24_UNORM_X8_TYPELESS 217 -#define X24_TYPELESS_G8_UINT 218 -#define L32_UNORM 221 -#define A32_UNORM 222 -#define L16A16_UNORM 223 -#define I24X8_UNORM 224 -#define L24X8_UNORM 225 -#define A24X8_UNORM 226 -#define I32_FLOAT 227 -#define L32_FLOAT 228 -#define A32_FLOAT 229 -#define X8B8_UNORM_G8R8_SNORM 230 -#define A8X8_UNORM_G8R8_SNORM 231 -#define B8X8_UNORM_G8R8_SNORM 232 -#define B8G8R8X8_UNORM 233 -#define B8G8R8X8_UNORM_SRGB 234 -#define R8G8B8X8_UNORM 235 -#define R8G8B8X8_UNORM_SRGB 236 -#define R9G9B9E5_SHAREDEXP 237 -#define B10G10R10X2_UNORM 238 -#define L16A16_FLOAT 240 -#define R32_UNORM 241 -#define R32_SNORM 242 -#define R10G10B10X2_USCALED 243 -#define R8G8B8A8_SSCALED 244 -#define R8G8B8A8_USCALED 245 -#define R16G16_SSCALED 246 -#define R16G16_USCALED 247 -#define R32_SSCALED 248 -#define R32_USCALED 249 -#define B5G6R5_UNORM 256 -#define B5G6R5_UNORM_SRGB 257 -#define B5G5R5A1_UNORM 258 -#define B5G5R5A1_UNORM_SRGB 259 -#define B4G4R4A4_UNORM 260 -#define B4G4R4A4_UNORM_SRGB 261 -#define R8G8_UNORM 262 -#define R8G8_SNORM 263 -#define R8G8_SINT 264 -#define R8G8_UINT 265 -#define R16_UNORM 266 -#define R16_SNORM 267 -#define R16_SINT 268 -#define R16_UINT 269 -#define R16_FLOAT 270 -#define A8P8_UNORM_PALETTE0 271 -#define A8P8_UNORM_PALETTE1 272 -#define I16_UNORM 273 -#define L16_UNORM 274 -#define A16_UNORM 275 -#define L8A8_UNORM 276 -#define I16_FLOAT 277 -#define L16_FLOAT 278 -#define A16_FLOAT 279 -#define L8A8_UNORM_SRGB 280 -#define R5G5_SNORM_B6_UNORM 281 -#define B5G5R5X1_UNORM 282 -#define B5G5R5X1_UNORM_SRGB 283 -#define R8G8_SSCALED 284 -#define R8G8_USCALED 285 -#define R16_SSCALED 286 -#define R16_USCALED 287 -#define P8A8_UNORM_PALETTE0 290 -#define P8A8_UNORM_PALETTE1 291 -#define A1B5G5R5_UNORM 292 -#define A4B4G4R4_UNORM 293 -#define L8A8_UINT 294 -#define L8A8_SINT 295 -#define R8_UNORM 320 -#define R8_SNORM 321 -#define R8_SINT 322 -#define R8_UINT 323 -#define A8_UNORM 324 -#define I8_UNORM 325 -#define L8_UNORM 326 -#define P4A4_UNORM_PALETTE0 327 -#define A4P4_UNORM_PALETTE0 328 -#define R8_SSCALED 329 -#define R8_USCALED 330 -#define P8_UNORM_PALETTE0 331 -#define L8_UNORM_SRGB 332 -#define P8_UNORM_PALETTE1 333 -#define P4A4_UNORM_PALETTE1 334 -#define A4P4_UNORM_PALETTE1 335 -#define Y8_UNORM 336 -#define L8_UINT 338 -#define L8_SINT 339 -#define I8_UINT 340 -#define I8_SINT 341 -#define DXT1_RGB_SRGB 384 -#define R1_UNORM 385 -#define YCRCB_NORMAL 386 -#define YCRCB_SWAPUVY 387 -#define P2_UNORM_PALETTE0 388 -#define P2_UNORM_PALETTE1 389 -#define BC1_UNORM 390 -#define BC2_UNORM 391 -#define BC3_UNORM 392 -#define BC4_UNORM 393 -#define BC5_UNORM 394 -#define BC1_UNORM_SRGB 395 -#define BC2_UNORM_SRGB 396 -#define BC3_UNORM_SRGB 397 -#define MONO8 398 -#define YCRCB_SWAPUV 399 -#define YCRCB_SWAPY 400 -#define DXT1_RGB 401 -#define FXT1 402 -#define R8G8B8_UNORM 403 -#define R8G8B8_SNORM 404 -#define R8G8B8_SSCALED 405 -#define R8G8B8_USCALED 406 -#define R64G64B64A64_FLOAT 407 -#define R64G64B64_FLOAT 408 -#define BC4_SNORM 409 -#define BC5_SNORM 410 -#define R16G16B16_FLOAT 411 -#define R16G16B16_UNORM 412 -#define R16G16B16_SNORM 413 -#define R16G16B16_SSCALED 414 -#define R16G16B16_USCALED 415 -#define BC6H_SF16 417 -#define BC7_UNORM 418 -#define BC7_UNORM_SRGB 419 -#define BC6H_UF16 420 -#define PLANAR_420_8 421 -#define R8G8B8_UNORM_SRGB 424 -#define ETC1_RGB8 425 -#define ETC2_RGB8 426 -#define EAC_R11 427 -#define EAC_RG11 428 -#define EAC_SIGNED_R11 429 -#define EAC_SIGNED_RG11 430 -#define ETC2_SRGB8 431 -#define R16G16B16_UINT 432 -#define R16G16B16_SINT 433 -#define R32_SFIXED 434 -#define R10G10B10A2_SNORM 435 -#define R10G10B10A2_USCALED 436 -#define R10G10B10A2_SSCALED 437 -#define R10G10B10A2_SINT 438 -#define B10G10R10A2_SNORM 439 -#define B10G10R10A2_USCALED 440 -#define B10G10R10A2_SSCALED 441 -#define B10G10R10A2_UINT 442 -#define B10G10R10A2_SINT 443 -#define R64G64B64A64_PASSTHRU 444 -#define R64G64B64_PASSTHRU 445 -#define ETC2_RGB8_PTA 448 -#define ETC2_SRGB8_PTA 449 -#define ETC2_EAC_RGBA8 450 -#define ETC2_EAC_SRGB8_A8 451 -#define R8G8B8_UINT 456 -#define R8G8B8_SINT 457 -#define RAW 511 - -/* Enum Texture Coordinate Mode */ -#define TCM_WRAP 0 -#define TCM_MIRROR 1 -#define TCM_CLAMP 2 -#define TCM_CUBE 3 -#define TCM_CLAMP_BORDER 4 -#define TCM_MIRROR_ONCE 5 - diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h deleted file mode 100644 index 7b104c3ab3c..00000000000 --- a/src/vulkan/gen7_pack.h +++ /dev/null @@ -1,7003 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - - -/* Instructions, enums and structures for IVB. - * - * This file has been generated, do not hand edit. - */ - -#pragma once - -#include -#include - -#ifndef __gen_validate_value -#define __gen_validate_value(x) -#endif - -#ifndef __gen_field_functions -#define __gen_field_functions - -union __gen_value { - float f; - uint32_t dw; -}; - -static inline uint64_t -__gen_mbo(uint32_t start, uint32_t end) -{ - return (~0ull >> (64 - (end - start + 1))) << start; -} - -static inline uint64_t -__gen_field(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - if (end - start + 1 < 64) - assert(v < 1ull << (end - start + 1)); -#endif - - return v << start; -} - -static inline uint64_t -__gen_fixed(float v, uint32_t start, uint32_t end, - bool is_signed, uint32_t fract_bits) -{ - __gen_validate_value(v); - - const float factor = (1 << fract_bits); - - float max, min; - if (is_signed) { - max = ((1 << (end - start)) - 1) / factor; - min = -(1 << (end - start)) / factor; - } else { - max = ((1 << (end - start + 1)) - 1) / factor; - min = 0.0f; - } - - if (v > max) - v = max; - else if (v < min) - v = min; - - int32_t int_val = roundf(v * factor); - - if (is_signed) - int_val &= (1 << (end - start + 1)) - 1; - - return int_val << start; -} - -static inline uint64_t -__gen_offset(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; - - assert((v & ~mask) == 0); -#endif - - return v; -} - -static inline uint32_t -__gen_float(float v) -{ - __gen_validate_value(v); - return ((union __gen_value) { .f = (v) }).dw; -} - -#ifndef __gen_address_type -#error #define __gen_address_type before including this file -#endif - -#ifndef __gen_user_data -#error #define __gen_combine_address before including this file -#endif - -#endif - -#define GEN7_3DSTATE_URB_VS_length_bias 0x00000002 -#define GEN7_3DSTATE_URB_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 48, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_URB_VS_length 0x00000002 - -struct GEN7_3DSTATE_URB_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t VSURBStartingAddress; - uint32_t VSURBEntryAllocationSize; - uint32_t VSNumberofURBEntries; -}; - -static inline void -GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->VSURBStartingAddress, 25, 29) | - __gen_field(values->VSURBEntryAllocationSize, 16, 24) | - __gen_field(values->VSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN7_MI_STORE_REGISTER_MEM_length_bias 0x00000002 -#define GEN7_MI_STORE_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 36, \ - .DwordLength = 1 - -#define GEN7_MI_STORE_REGISTER_MEM_length 0x00000003 - -struct GEN7_MI_STORE_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_STORE_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - -} - -#define GEN7_PIPELINE_SELECT_length_bias 0x00000001 -#define GEN7_PIPELINE_SELECT_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4 - -#define GEN7_PIPELINE_SELECT_length 0x00000001 - -struct GEN7_PIPELINE_SELECT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; -#define _3D 0 -#define Media 1 -#define GPGPU 2 - uint32_t PipelineSelection; -}; - -static inline void -GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PIPELINE_SELECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->PipelineSelection, 0, 1) | - 0; - -} - -#define GEN7_STATE_BASE_ADDRESS_length_bias 0x00000002 -#define GEN7_STATE_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 1, \ - .DwordLength = 8 - -#define GEN7_STATE_BASE_ADDRESS_length 0x0000000a - -#define GEN7_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 - -struct GEN7_MEMORY_OBJECT_CONTROL_STATE { - uint32_t GraphicsDataTypeGFDT; - uint32_t LLCCacheabilityControlLLCCC; - uint32_t L3CacheabilityControlL3CC; -}; - -static inline void -GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->GraphicsDataTypeGFDT, 2, 2) | - __gen_field(values->LLCCacheabilityControlLLCCC, 1, 1) | - __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | - 0; - -} - -struct GEN7_STATE_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GeneralStateBaseAddress; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; - uint32_t StatelessDataPortAccessForceWriteThru; - bool GeneralStateBaseAddressModifyEnable; - __gen_address_type SurfaceStateBaseAddress; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - bool SurfaceStateBaseAddressModifyEnable; - __gen_address_type DynamicStateBaseAddress; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - bool DynamicStateBaseAddressModifyEnable; - __gen_address_type IndirectObjectBaseAddress; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - bool IndirectObjectBaseAddressModifyEnable; - __gen_address_type InstructionBaseAddress; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - bool InstructionBaseAddressModifyEnable; - __gen_address_type GeneralStateAccessUpperBound; - bool GeneralStateAccessUpperBoundModifyEnable; - __gen_address_type DynamicStateAccessUpperBound; - bool DynamicStateAccessUpperBoundModifyEnable; - __gen_address_type IndirectObjectAccessUpperBound; - bool IndirectObjectAccessUpperBoundModifyEnable; - __gen_address_type InstructionAccessUpperBound; - bool InstructionAccessUpperBoundModifyEnable; -}; - -static inline void -GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_STATE_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_GeneralStateMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); - uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | - __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | - __gen_field(values->StatelessDataPortAccessForceWriteThru, 3, 3) | - __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); - - uint32_t dw_SurfaceStateMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); - uint32_t dw2 = - __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | - __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); - - uint32_t dw_DynamicStateMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); - uint32_t dw3 = - __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | - __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); - - uint32_t dw_IndirectObjectMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); - uint32_t dw4 = - __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | - __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); - - uint32_t dw_InstructionMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); - uint32_t dw5 = - __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | - __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | - 0; - - dw[5] = - __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); - - uint32_t dw6 = - __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[6] = - __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); - - uint32_t dw7 = - __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[7] = - __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); - - uint32_t dw8 = - __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[8] = - __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); - - uint32_t dw9 = - __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | - 0; - - dw[9] = - __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); - -} - -#define GEN7_STATE_PREFETCH_length_bias 0x00000002 -#define GEN7_STATE_PREFETCH_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN7_STATE_PREFETCH_length 0x00000002 - -struct GEN7_STATE_PREFETCH { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type PrefetchPointer; - uint32_t PrefetchCount; -}; - -static inline void -GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_STATE_PREFETCH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->PrefetchCount, 0, 2) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); - -} - -#define GEN7_STATE_SIP_length_bias 0x00000002 -#define GEN7_STATE_SIP_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2, \ - .DwordLength = 0 - -#define GEN7_STATE_SIP_length 0x00000002 - -struct GEN7_STATE_SIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SystemInstructionPointer; -}; - -static inline void -GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_STATE_SIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SystemInstructionPointer, 4, 31) | - 0; - -} - -#define GEN7_SWTESS_BASE_ADDRESS_length_bias 0x00000002 -#define GEN7_SWTESS_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN7_SWTESS_BASE_ADDRESS_length 0x00000002 - -struct GEN7_SWTESS_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type SWTessellationBaseAddress; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; -}; - -static inline void -GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SWTESS_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SWTessellationMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); - -} - -#define GEN7_3DPRIMITIVE_length_bias 0x00000002 -#define GEN7_3DPRIMITIVE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 3, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 5 - -#define GEN7_3DPRIMITIVE_length 0x00000007 - -struct GEN7_3DPRIMITIVE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool IndirectParameterEnable; - bool PredicateEnable; - uint32_t DwordLength; - bool EndOffsetEnable; -#define SEQUENTIAL 0 -#define RANDOM 1 - uint32_t VertexAccessType; - uint32_t PrimitiveTopologyType; - uint32_t VertexCountPerInstance; - uint32_t StartVertexLocation; - uint32_t InstanceCount; - uint32_t StartInstanceLocation; - uint32_t BaseVertexLocation; -}; - -static inline void -GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DPRIMITIVE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->EndOffsetEnable, 9, 9) | - __gen_field(values->VertexAccessType, 8, 8) | - __gen_field(values->PrimitiveTopologyType, 0, 5) | - 0; - - dw[2] = - __gen_field(values->VertexCountPerInstance, 0, 31) | - 0; - - dw[3] = - __gen_field(values->StartVertexLocation, 0, 31) | - 0; - - dw[4] = - __gen_field(values->InstanceCount, 0, 31) | - 0; - - dw[5] = - __gen_field(values->StartInstanceLocation, 0, 31) | - 0; - - dw[6] = - __gen_field(values->BaseVertexLocation, 0, 31) | - 0; - -} - -#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 -#define GEN7_3DSTATE_AA_LINE_PARAMETERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 - -struct GEN7_3DSTATE_AA_LINE_PARAMETERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float AACoverageBias; - float AACoverageSlope; - float AACoverageEndCapBias; - float AACoverageEndCapSlope; -}; - -static inline void -GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_AA_LINE_PARAMETERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | - __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | - 0; - - dw[2] = - __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | - __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | - 0; - -} - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 40, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 - -struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSBindingTable; -}; - -static inline void -GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSBindingTable, 5, 15) | - 0; - -} - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 41, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 - -struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSBindingTable; -}; - -static inline void -GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSBindingTable, 5, 15) | - 0; - -} - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 39, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 - -struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSBindingTable; -}; - -static inline void -GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSBindingTable, 5, 15) | - 0; - -} - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 42, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 - -struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSBindingTable; -}; - -static inline void -GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSBindingTable, 5, 15) | - 0; - -} - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 38, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 - -struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSBindingTable; -}; - -static inline void -GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSBindingTable, 5, 15) | - 0; - -} - -#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 -#define GEN7_3DSTATE_BLEND_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 36, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 - -struct GEN7_3DSTATE_BLEND_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BlendStatePointer; -}; - -static inline void -GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_BLEND_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->BlendStatePointer, 6, 31) | - __gen_mbo(0, 0) | - 0; - -} - -#define GEN7_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 -#define GEN7_3DSTATE_CC_STATE_POINTERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 14, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_CC_STATE_POINTERS_length 0x00000002 - -struct GEN7_3DSTATE_CC_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ColorCalcStatePointer; -}; - -static inline void -GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CC_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ColorCalcStatePointer, 6, 31) | - __gen_mbo(0, 0) | - 0; - -} - -#define GEN7_3DSTATE_CHROMA_KEY_length_bias 0x00000002 -#define GEN7_3DSTATE_CHROMA_KEY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 2 - -#define GEN7_3DSTATE_CHROMA_KEY_length 0x00000004 - -struct GEN7_3DSTATE_CHROMA_KEY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ChromaKeyTableIndex; - uint32_t ChromaKeyLowValue; - uint32_t ChromaKeyHighValue; -}; - -static inline void -GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CHROMA_KEY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ChromaKeyTableIndex, 30, 31) | - 0; - - dw[2] = - __gen_field(values->ChromaKeyLowValue, 0, 31) | - 0; - - dw[3] = - __gen_field(values->ChromaKeyHighValue, 0, 31) | - 0; - -} - -#define GEN7_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 -#define GEN7_3DSTATE_CLEAR_PARAMS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_CLEAR_PARAMS_length 0x00000003 - -struct GEN7_3DSTATE_CLEAR_PARAMS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t DepthClearValue; - bool DepthClearValueValid; -}; - -static inline void -GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CLEAR_PARAMS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DepthClearValue, 0, 31) | - 0; - - dw[2] = - __gen_field(values->DepthClearValueValid, 0, 0) | - 0; - -} - -#define GEN7_3DSTATE_CLIP_length_bias 0x00000002 -#define GEN7_3DSTATE_CLIP_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 2 - -#define GEN7_3DSTATE_CLIP_length 0x00000004 - -struct GEN7_3DSTATE_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t FrontWinding; - uint32_t VertexSubPixelPrecisionSelect; - bool EarlyCullEnable; -#define CULLMODE_BOTH 0 -#define CULLMODE_NONE 1 -#define CULLMODE_FRONT 2 -#define CULLMODE_BACK 3 - uint32_t CullMode; - bool ClipperStatisticsEnable; - uint32_t UserClipDistanceCullTestEnableBitmask; - bool ClipEnable; -#define APIMODE_OGL 0 - uint32_t APIMode; - bool ViewportXYClipTestEnable; - bool ViewportZClipTestEnable; - bool GuardbandClipTestEnable; - uint32_t UserClipDistanceClipTestEnableBitmask; -#define CLIPMODE_NORMAL 0 -#define CLIPMODE_REJECT_ALL 3 -#define CLIPMODE_ACCEPT_ALL 4 - uint32_t ClipMode; - bool PerspectiveDivideDisable; - bool NonPerspectiveBarycentricEnable; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleStripListProvokingVertexSelect; -#define Vertex0 0 -#define Vertex1 1 - uint32_t LineStripListProvokingVertexSelect; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleFanProvokingVertexSelect; - float MinimumPointWidth; - float MaximumPointWidth; - bool ForceZeroRTAIndexEnable; - uint32_t MaximumVPIndex; -}; - -static inline void -GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->FrontWinding, 20, 20) | - __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | - __gen_field(values->EarlyCullEnable, 18, 18) | - __gen_field(values->CullMode, 16, 17) | - __gen_field(values->ClipperStatisticsEnable, 10, 10) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - - dw[2] = - __gen_field(values->ClipEnable, 31, 31) | - __gen_field(values->APIMode, 30, 30) | - __gen_field(values->ViewportXYClipTestEnable, 28, 28) | - __gen_field(values->ViewportZClipTestEnable, 27, 27) | - __gen_field(values->GuardbandClipTestEnable, 26, 26) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | - __gen_field(values->ClipMode, 13, 15) | - __gen_field(values->PerspectiveDivideDisable, 9, 9) | - __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | - __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | - __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | - 0; - - dw[3] = - __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | - __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | - __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | - __gen_field(values->MaximumVPIndex, 0, 3) | - 0; - -} - -#define GEN7_3DSTATE_CONSTANT_DS_length_bias 0x00000002 -#define GEN7_3DSTATE_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_CONSTANT_DS_length 0x00000007 - -#define GEN7_3DSTATE_CONSTANT_BODY_length 0x00000006 - -struct GEN7_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw_ConstantBufferObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - uint32_t dw2 = - __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); - - uint32_t dw4 = - 0; - - dw[4] = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); - - uint32_t dw5 = - 0; - - dw[5] = - __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); - -} - -struct GEN7_3DSTATE_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN7_3DSTATE_CONSTANT_GS_length_bias 0x00000002 -#define GEN7_3DSTATE_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 - -struct GEN7_3DSTATE_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN7_3DSTATE_CONSTANT_HS_length_bias 0x00000002 -#define GEN7_3DSTATE_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 - -struct GEN7_3DSTATE_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN7_3DSTATE_CONSTANT_PS_length_bias 0x00000002 -#define GEN7_3DSTATE_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 23, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 - -struct GEN7_3DSTATE_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN7_3DSTATE_CONSTANT_VS_length_bias 0x00000002 -#define GEN7_3DSTATE_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 - -struct GEN7_3DSTATE_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN7_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN7_3DSTATE_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 5, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 - -struct GEN7_3DSTATE_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool DepthWriteEnable; - bool StencilWriteEnable; - bool HierarchicalDepthBufferEnable; -#define D32_FLOAT 1 -#define D24_UNORM_X8_UINT 3 -#define D16_UNORM 5 - uint32_t SurfaceFormat; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t Height; - uint32_t Width; - uint32_t LOD; -#define SURFTYPE_CUBEmustbezero 0 - uint32_t Depth; - uint32_t MinimumArrayElement; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; - uint32_t DepthCoordinateOffsetY; - uint32_t DepthCoordinateOffsetX; - uint32_t RenderTargetViewExtent; -}; - -static inline void -GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->DepthWriteEnable, 28, 28) | - __gen_field(values->StencilWriteEnable, 27, 27) | - __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | - __gen_field(values->SurfaceFormat, 18, 20) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[3] = - __gen_field(values->Height, 18, 31) | - __gen_field(values->Width, 4, 17) | - __gen_field(values->LOD, 0, 3) | - 0; - - uint32_t dw_DepthBufferObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); - dw[4] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->MinimumArrayElement, 10, 20) | - __gen_field(dw_DepthBufferObjectControlState, 0, 3) | - 0; - - dw[5] = - __gen_field(values->DepthCoordinateOffsetY, 16, 31) | - __gen_field(values->DepthCoordinateOffsetX, 0, 15) | - 0; - - dw[6] = - __gen_field(values->RenderTargetViewExtent, 21, 31) | - 0; - -} - -#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 -#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 37, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 - -struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDEPTH_STENCIL_STATE; -}; - -static inline void -GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | - __gen_mbo(0, 0) | - 0; - -} - -#define GEN7_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 -#define GEN7_3DSTATE_DRAWING_RECTANGLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 2 - -#define GEN7_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 - -struct GEN7_3DSTATE_DRAWING_RECTANGLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ClippedDrawingRectangleYMin; - uint32_t ClippedDrawingRectangleXMin; - uint32_t ClippedDrawingRectangleYMax; - uint32_t ClippedDrawingRectangleXMax; - uint32_t DrawingRectangleOriginY; - uint32_t DrawingRectangleOriginX; -}; - -static inline void -GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DRAWING_RECTANGLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | - 0; - - dw[2] = - __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | - 0; - - dw[3] = - __gen_field(values->DrawingRectangleOriginY, 16, 31) | - __gen_field(values->DrawingRectangleOriginX, 0, 15) | - 0; - -} - -#define GEN7_3DSTATE_DS_length_bias 0x00000002 -#define GEN7_3DSTATE_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 29, \ - .DwordLength = 4 - -#define GEN7_3DSTATE_DS_length 0x00000006 - -struct GEN7_3DSTATE_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleDomainPointDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t PatchURBEntryReadLength; - uint32_t PatchURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool ComputeWCoordinateEnable; - bool DSCacheDisable; - bool DSFunctionEnable; -}; - -static inline void -GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleDomainPointDispatch, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | - __gen_field(values->PatchURBEntryReadLength, 11, 17) | - __gen_field(values->PatchURBEntryReadOffset, 4, 9) | - 0; - - dw[5] = - __gen_field(values->MaximumNumberofThreads, 25, 31) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->ComputeWCoordinateEnable, 2, 2) | - __gen_field(values->DSCacheDisable, 1, 1) | - __gen_field(values->DSFunctionEnable, 0, 0) | - 0; - -} - -#define GEN7_3DSTATE_GS_length_bias 0x00000002 -#define GEN7_3DSTATE_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_GS_length 0x00000007 - -struct GEN7_3DSTATE_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer; - uint32_t SingleProgramFlowSPF; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnableVME; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t OutputVertexSize; - uint32_t OutputTopology; - uint32_t VertexURBEntryReadLength; - bool IncludeVertexHandles; - uint32_t VertexURBEntryReadOffset; - uint32_t DispatchGRFStartRegisterforURBData; - uint32_t MaximumNumberofThreads; -#define GSCTL_CUT 0 -#define GSCTL_SID 1 - uint32_t ControlDataFormat; - uint32_t ControlDataHeaderSize; - uint32_t InstanceControl; - uint32_t DefaultStreamID; -#define SINGLE 0 -#define DUAL_INSTANCE 1 -#define DUAL_OBJECT 2 - uint32_t DispatchMode; - uint32_t GSStatisticsEnable; - uint32_t GSInvocationsIncrementValue; - bool IncludePrimitiveID; - uint32_t Hint; - bool ReorderEnable; - bool DiscardAdjacency; - bool GSEnable; - uint32_t SemaphoreHandle; -}; - -static inline void -GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleProgramFlowSPF, 31, 31) | - __gen_field(values->VectorMaskEnableVME, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->OutputVertexSize, 23, 28) | - __gen_field(values->OutputTopology, 17, 22) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->IncludeVertexHandles, 10, 10) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | - 0; - - dw[5] = - __gen_field(values->MaximumNumberofThreads, 25, 31) | - __gen_field(values->ControlDataFormat, 24, 24) | - __gen_field(values->ControlDataHeaderSize, 20, 23) | - __gen_field(values->InstanceControl, 15, 19) | - __gen_field(values->DefaultStreamID, 13, 14) | - __gen_field(values->DispatchMode, 11, 12) | - __gen_field(values->GSStatisticsEnable, 10, 10) | - __gen_field(values->GSInvocationsIncrementValue, 5, 9) | - __gen_field(values->IncludePrimitiveID, 4, 4) | - __gen_field(values->Hint, 3, 3) | - __gen_field(values->ReorderEnable, 2, 2) | - __gen_field(values->DiscardAdjacency, 1, 1) | - __gen_field(values->GSEnable, 0, 0) | - 0; - - dw[6] = - __gen_offset(values->SemaphoreHandle, 0, 11) | - 0; - -} - -#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 - -struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; -}; - -static inline void -GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_HIER_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_HierarchicalDepthBufferObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); - dw[1] = - __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - -} - -#define GEN7_3DSTATE_HS_length_bias 0x00000002 -#define GEN7_3DSTATE_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 27, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_HS_length 0x00000007 - -struct GEN7_3DSTATE_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define IEEE754 0 -#define alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t MaximumNumberofThreads; - bool Enable; - bool StatisticsEnable; - uint32_t InstanceCount; - uint32_t KernelStartPointer; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; - bool IncludeVertexHandles; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t SemaphoreHandle; -}; - -static inline void -GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - __gen_field(values->MaximumNumberofThreads, 0, 6) | - 0; - - dw[2] = - __gen_field(values->Enable, 31, 31) | - __gen_field(values->StatisticsEnable, 29, 29) | - __gen_field(values->InstanceCount, 0, 3) | - 0; - - dw[3] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[4] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[5] = - __gen_field(values->SingleProgramFlow, 27, 27) | - __gen_field(values->VectorMaskEnable, 26, 26) | - __gen_field(values->IncludeVertexHandles, 24, 24) | - __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[6] = - __gen_offset(values->SemaphoreHandle, 0, 11) | - 0; - -} - -#define GEN7_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 -#define GEN7_3DSTATE_INDEX_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_INDEX_BUFFER_length 0x00000003 - -struct GEN7_3DSTATE_INDEX_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - bool CutIndexEnable; -#define INDEX_BYTE 0 -#define INDEX_WORD 1 -#define INDEX_DWORD 2 - uint32_t IndexFormat; - uint32_t DwordLength; - __gen_address_type BufferStartingAddress; - __gen_address_type BufferEndingAddress; -}; - -static inline void -GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_INDEX_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_MemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_MemoryObjectControlState, 12, 15) | - __gen_field(values->CutIndexEnable, 10, 10) | - __gen_field(values->IndexFormat, 8, 9) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); - -} - -#define GEN7_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 -#define GEN7_3DSTATE_LINE_STIPPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 8, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_LINE_STIPPLE_length 0x00000003 - -struct GEN7_3DSTATE_LINE_STIPPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; - uint32_t CurrentRepeatCounter; - uint32_t CurrentStippleIndex; - uint32_t LineStipplePattern; - float LineStippleInverseRepeatCount; - uint32_t LineStippleRepeatCount; -}; - -static inline void -GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_LINE_STIPPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | - __gen_field(values->CurrentRepeatCounter, 21, 29) | - __gen_field(values->CurrentStippleIndex, 16, 19) | - __gen_field(values->LineStipplePattern, 0, 15) | - 0; - - dw[2] = - __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | - __gen_field(values->LineStippleRepeatCount, 0, 8) | - 0; - -} - -#define GEN7_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 -#define GEN7_3DSTATE_MONOFILTER_SIZE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_MONOFILTER_SIZE_length 0x00000002 - -struct GEN7_3DSTATE_MONOFILTER_SIZE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t MonochromeFilterWidth; - uint32_t MonochromeFilterHeight; -}; - -static inline void -GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_MONOFILTER_SIZE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->MonochromeFilterWidth, 3, 5) | - __gen_field(values->MonochromeFilterHeight, 0, 2) | - 0; - -} - -#define GEN7_3DSTATE_MULTISAMPLE_length_bias 0x00000002 -#define GEN7_3DSTATE_MULTISAMPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 13, \ - .DwordLength = 2 - -#define GEN7_3DSTATE_MULTISAMPLE_length 0x00000004 - -struct GEN7_3DSTATE_MULTISAMPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define PIXLOC_CENTER 0 -#define PIXLOC_UL_CORNER 1 - uint32_t PixelLocation; -#define NUMSAMPLES_1 0 -#define NUMSAMPLES_4 2 -#define NUMSAMPLES_8 3 - uint32_t NumberofMultisamples; - float Sample3XOffset; - float Sample3YOffset; - float Sample2XOffset; - float Sample2YOffset; - float Sample1XOffset; - float Sample1YOffset; - float Sample0XOffset; - float Sample0YOffset; - float Sample7XOffset; - float Sample7YOffset; - float Sample6XOffset; - float Sample6YOffset; - float Sample5XOffset; - float Sample5YOffset; - float Sample4XOffset; - float Sample4YOffset; -}; - -static inline void -GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_MULTISAMPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PixelLocation, 4, 4) | - __gen_field(values->NumberofMultisamples, 1, 3) | - 0; - - dw[2] = - __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[3] = - __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | - __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | - __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | - __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | - __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | - __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | - __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | - __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | - 0; - -} - -#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 -#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 - -struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PolygonStippleXOffset; - uint32_t PolygonStippleYOffset; -}; - -static inline void -GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PolygonStippleXOffset, 8, 12) | - __gen_field(values->PolygonStippleYOffset, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 -#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 31 - -#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 - -struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PatternRow[32]; -}; - -static inline void -GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { - dw[j] = - __gen_field(values->PatternRow[i + 0], 0, 31) | - 0; - } - -} - -#define GEN7_3DSTATE_PS_length_bias 0x00000002 -#define GEN7_3DSTATE_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 32, \ - .DwordLength = 6 - -#define GEN7_3DSTATE_PS_length 0x00000008 - -struct GEN7_3DSTATE_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer0; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlowSPF; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnableVME; - uint32_t SamplerCount; -#define FTZ 0 -#define RET 1 - uint32_t DenormalMode; - uint32_t BindingTableEntryCount; -#define IEEE745 0 -#define Alt 1 - uint32_t FloatingPointMode; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t MaximumNumberofThreads; - bool PushConstantEnable; - bool AttributeEnable; - bool oMaskPresenttoRenderTarget; - bool RenderTargetFastClearEnable; - bool DualSourceBlendEnable; - bool RenderTargetResolveEnable; -#define POSOFFSET_NONE 0 -#define POSOFFSET_CENTROID 2 -#define POSOFFSET_SAMPLE 3 - uint32_t PositionXYOffsetSelect; - bool _32PixelDispatchEnable; - bool _16PixelDispatchEnable; - bool _8PixelDispatchEnable; - uint32_t DispatchGRFStartRegisterforConstantSetupData0; - uint32_t DispatchGRFStartRegisterforConstantSetupData1; - uint32_t DispatchGRFStartRegisterforConstantSetupData2; - uint32_t KernelStartPointer1; - uint32_t KernelStartPointer2; -}; - -static inline void -GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer0, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleProgramFlowSPF, 31, 31) | - __gen_field(values->VectorMaskEnableVME, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->DenormalMode, 26, 26) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->RoundingMode, 14, 15) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->MaximumNumberofThreads, 24, 31) | - __gen_field(values->PushConstantEnable, 11, 11) | - __gen_field(values->AttributeEnable, 10, 10) | - __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | - __gen_field(values->RenderTargetFastClearEnable, 8, 8) | - __gen_field(values->DualSourceBlendEnable, 7, 7) | - __gen_field(values->RenderTargetResolveEnable, 6, 6) | - __gen_field(values->PositionXYOffsetSelect, 3, 4) | - __gen_field(values->_32PixelDispatchEnable, 2, 2) | - __gen_field(values->_16PixelDispatchEnable, 1, 1) | - __gen_field(values->_8PixelDispatchEnable, 0, 0) | - 0; - - dw[5] = - __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | - __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | - __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | - 0; - - dw[6] = - __gen_offset(values->KernelStartPointer1, 6, 31) | - 0; - - dw[7] = - __gen_offset(values->KernelStartPointer2, 6, 31) | - 0; - -} - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 - -struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define _0KB 0 - uint32_t ConstantBufferOffset; -#define _0KB 0 - uint32_t ConstantBufferSize; -}; - -static inline void -GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 19) | - __gen_field(values->ConstantBufferSize, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 - -struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define _0KB 0 - uint32_t ConstantBufferOffset; -#define _0KB 0 - uint32_t ConstantBufferSize; -}; - -static inline void -GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 19) | - __gen_field(values->ConstantBufferSize, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 - -struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define _0KB 0 - uint32_t ConstantBufferOffset; -#define _0KB 0 - uint32_t ConstantBufferSize; -}; - -static inline void -GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 19) | - __gen_field(values->ConstantBufferSize, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 - -struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define _0KB 0 - uint32_t ConstantBufferOffset; -#define _0KB 0 - uint32_t ConstantBufferSize; -}; - -static inline void -GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 19) | - __gen_field(values->ConstantBufferSize, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 - -struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define _0KB 0 - uint32_t ConstantBufferOffset; -#define _0KB 0 - uint32_t ConstantBufferSize; -}; - -static inline void -GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 19) | - __gen_field(values->ConstantBufferSize, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2 - -#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 - -#define GEN7_PALETTE_ENTRY_length 0x00000001 - -struct GEN7_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - -struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 12 - -#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 - -struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 45, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 - -struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSSamplerState; -}; - -static inline void -GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSSamplerState, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 46, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 - -struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSSamplerState; -}; - -static inline void -GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSSamplerState, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 44, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 - -struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSSamplerState; -}; - -static inline void -GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSSamplerState, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 47, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 - -struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSSamplerState; -}; - -static inline void -GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSSamplerState, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 43, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 - -struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSSamplerState; -}; - -static inline void -GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSSamplerState, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 -#define GEN7_3DSTATE_SAMPLE_MASK_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SAMPLE_MASK_length 0x00000002 - -struct GEN7_3DSTATE_SAMPLE_MASK { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SampleMask; -}; - -static inline void -GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SAMPLE_MASK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SampleMask, 0, 7) | - 0; - -} - -#define GEN7_3DSTATE_SBE_length_bias 0x00000002 -#define GEN7_3DSTATE_SBE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 31, \ - .DwordLength = 12 - -#define GEN7_3DSTATE_SBE_length 0x0000000e - -struct GEN7_3DSTATE_SBE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define SWIZ_0_15 0 -#define SWIZ_16_31 1 - uint32_t AttributeSwizzleControlMode; - uint32_t NumberofSFOutputAttributes; - bool AttributeSwizzleEnable; -#define UPPERLEFT 0 -#define LOWERLEFT 1 - uint32_t PointSpriteTextureCoordinateOrigin; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - bool Attribute2n1ComponentOverrideW; - bool Attribute2n1ComponentOverrideZ; - bool Attribute2n1ComponentOverrideY; - bool Attribute2n1ComponentOverrideX; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t Attribute2n1ConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t Attribute2n1SwizzleSelect; - uint32_t Attribute2n1SourceAttribute; - bool Attribute2nComponentOverrideW; - bool Attribute2nComponentOverrideZ; - bool Attribute2nComponentOverrideY; - bool Attribute2nComponentOverrideX; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t Attribute2nConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t Attribute2nSwizzleSelect; - uint32_t Attribute2nSourceAttribute; - uint32_t PointSpriteTextureCoordinateEnable; - uint32_t ConstantInterpolationEnable310; - uint32_t Attribute7WrapShortestEnables; - uint32_t Attribute6WrapShortestEnables; - uint32_t Attribute5WrapShortestEnables; - uint32_t Attribute4WrapShortestEnables; - uint32_t Attribute3WrapShortestEnables; - uint32_t Attribute2WrapShortestEnables; - uint32_t Attribute1WrapShortestEnables; - uint32_t Attribute0WrapShortestEnables; - uint32_t Attribute15WrapShortestEnables; - uint32_t Attribute14WrapShortestEnables; - uint32_t Attribute13WrapShortestEnables; - uint32_t Attribute12WrapShortestEnables; - uint32_t Attribute11WrapShortestEnables; - uint32_t Attribute10WrapShortestEnables; - uint32_t Attribute9WrapShortestEnables; - uint32_t Attribute8WrapShortestEnables; -}; - -static inline void -GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SBE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AttributeSwizzleControlMode, 28, 28) | - __gen_field(values->NumberofSFOutputAttributes, 22, 27) | - __gen_field(values->AttributeSwizzleEnable, 21, 21) | - __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | - __gen_field(values->VertexURBEntryReadLength, 11, 15) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[2] = - __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | - __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | - __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | - __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | - __gen_field(values->Attribute2n1ConstantSource, 25, 26) | - __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | - __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | - __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | - __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | - __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | - __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | - __gen_field(values->Attribute2nConstantSource, 9, 10) | - __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | - __gen_field(values->Attribute2nSourceAttribute, 0, 4) | - 0; - - dw[10] = - __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | - 0; - - dw[11] = - __gen_field(values->ConstantInterpolationEnable310, 0, 31) | - 0; - - dw[12] = - __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | - __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | - __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | - __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | - __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | - __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | - __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | - __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | - 0; - - dw[13] = - __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | - __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | - __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | - __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | - __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | - __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | - __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | - __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | - 0; - -} - -#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 -#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 15, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 - -struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ScissorRectPointer; -}; - -static inline void -GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ScissorRectPointer, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_SF_length_bias 0x00000002 -#define GEN7_3DSTATE_SF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 5 - -#define GEN7_3DSTATE_SF_length 0x00000007 - -struct GEN7_3DSTATE_SF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define D32_FLOAT_S8X24_UINT 0 -#define D32_FLOAT 1 -#define D24_UNORM_S8_UINT 2 -#define D24_UNORM_X8_UINT 3 -#define D16_UNORM 5 - uint32_t DepthBufferSurfaceFormat; - bool LegacyGlobalDepthBiasEnable; - bool StatisticsEnable; - bool GlobalDepthOffsetEnableSolid; - bool GlobalDepthOffsetEnableWireframe; - bool GlobalDepthOffsetEnablePoint; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t FrontFaceFillMode; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t BackFaceFillMode; - bool ViewTransformEnable; - uint32_t FrontWinding; - bool AntiAliasingEnable; -#define CULLMODE_BOTH 0 -#define CULLMODE_NONE 1 -#define CULLMODE_FRONT 2 -#define CULLMODE_BACK 3 - uint32_t CullMode; - float LineWidth; - uint32_t LineEndCapAntialiasingRegionWidth; - bool ScissorRectangleEnable; - uint32_t MultisampleRasterizationMode; - bool LastPixelEnable; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleStripListProvokingVertexSelect; - uint32_t LineStripListProvokingVertexSelect; -#define Vertex0 0 -#define Vertex1 1 -#define Vertex2 2 - uint32_t TriangleFanProvokingVertexSelect; -#define AALINEDISTANCE_TRUE 1 - uint32_t AALineDistanceMode; - uint32_t VertexSubPixelPrecisionSelect; - uint32_t UsePointWidthState; - float PointWidth; - float GlobalDepthOffsetConstant; - float GlobalDepthOffsetScale; - float GlobalDepthOffsetClamp; -}; - -static inline void -GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | - __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | - __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | - __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | - __gen_field(values->FrontFaceFillMode, 5, 6) | - __gen_field(values->BackFaceFillMode, 3, 4) | - __gen_field(values->ViewTransformEnable, 1, 1) | - __gen_field(values->FrontWinding, 0, 0) | - 0; - - dw[2] = - __gen_field(values->AntiAliasingEnable, 31, 31) | - __gen_field(values->CullMode, 29, 30) | - __gen_field(values->LineWidth * (1 << 7), 18, 27) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | - __gen_field(values->ScissorRectangleEnable, 11, 11) | - __gen_field(values->MultisampleRasterizationMode, 8, 9) | - 0; - - dw[3] = - __gen_field(values->LastPixelEnable, 31, 31) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | - __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | - __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | - __gen_field(values->AALineDistanceMode, 14, 14) | - __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | - __gen_field(values->UsePointWidthState, 11, 11) | - __gen_field(values->PointWidth * (1 << 3), 0, 10) | - 0; - - dw[4] = - __gen_float(values->GlobalDepthOffsetConstant) | - 0; - - dw[5] = - __gen_float(values->GlobalDepthOffsetScale) | - 0; - - dw[6] = - __gen_float(values->GlobalDepthOffsetClamp) | - 0; - -} - -#define GEN7_3DSTATE_SO_BUFFER_length_bias 0x00000002 -#define GEN7_3DSTATE_SO_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 2 - -#define GEN7_3DSTATE_SO_BUFFER_length 0x00000004 - -struct GEN7_3DSTATE_SO_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SOBufferIndex; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - __gen_address_type SurfaceEndAddress; -}; - -static inline void -GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SO_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SOBufferObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); - dw[1] = - __gen_field(values->SOBufferIndex, 29, 30) | - __gen_field(dw_SOBufferObjectControlState, 25, 28) | - __gen_field(values->SurfacePitch, 0, 11) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); - -} - -#define GEN7_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 -#define GEN7_3DSTATE_SO_DECL_LIST_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 23 - -#define GEN7_3DSTATE_SO_DECL_LIST_length 0x00000000 - -#define GEN7_SO_DECL_ENTRY_length 0x00000002 - -#define GEN7_SO_DECL_length 0x00000001 - -struct GEN7_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - -struct GEN7_SO_DECL_ENTRY { - struct GEN7_SO_DECL Stream3Decl; - struct GEN7_SO_DECL Stream2Decl; - struct GEN7_SO_DECL Stream1Decl; - struct GEN7_SO_DECL Stream0Decl; -}; - -static inline void -GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_Stream3Decl; - GEN7_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); - uint32_t dw_Stream2Decl; - GEN7_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); - uint32_t dw_Stream1Decl; - GEN7_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); - uint32_t dw_Stream0Decl; - GEN7_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - uint64_t qw0 = - __gen_field(dw_Stream3Decl, 48, 63) | - __gen_field(dw_Stream2Decl, 32, 47) | - __gen_field(dw_Stream1Decl, 16, 31) | - __gen_field(dw_Stream0Decl, 0, 15) | - 0; - - dw[0] = qw0; - dw[1] = qw0 >> 32; - -} - -struct GEN7_3DSTATE_SO_DECL_LIST { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StreamtoBufferSelects3; - uint32_t StreamtoBufferSelects2; - uint32_t StreamtoBufferSelects1; - uint32_t StreamtoBufferSelects0; - uint32_t NumEntries3; - uint32_t NumEntries2; - uint32_t NumEntries1; - uint32_t NumEntries0; - /* variable length fields follow */ -}; - -static inline void -GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_SO_DECL_LIST * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->StreamtoBufferSelects3, 12, 15) | - __gen_field(values->StreamtoBufferSelects2, 8, 11) | - __gen_field(values->StreamtoBufferSelects1, 4, 7) | - __gen_field(values->StreamtoBufferSelects0, 0, 3) | - 0; - - dw[2] = - __gen_field(values->NumEntries3, 24, 31) | - __gen_field(values->NumEntries2, 16, 23) | - __gen_field(values->NumEntries1, 8, 15) | - __gen_field(values->NumEntries0, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 -#define GEN7_3DSTATE_STENCIL_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_STENCIL_BUFFER_length 0x00000003 - -struct GEN7_3DSTATE_STENCIL_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; -}; - -static inline void -GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_STENCIL_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_StencilBufferObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); - dw[1] = - __gen_field(dw_StencilBufferObjectControlState, 25, 28) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - -} - -#define GEN7_3DSTATE_STREAMOUT_length_bias 0x00000002 -#define GEN7_3DSTATE_STREAMOUT_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 30, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_STREAMOUT_length 0x00000003 - -struct GEN7_3DSTATE_STREAMOUT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SOFunctionEnable; - uint32_t RenderingDisable; - uint32_t RenderStreamSelect; -#define LEADING 0 -#define TRAILING 1 - uint32_t ReorderMode; - bool SOStatisticsEnable; - uint32_t SOBufferEnable3; - uint32_t SOBufferEnable2; - uint32_t SOBufferEnable1; - uint32_t SOBufferEnable0; - uint32_t Stream3VertexReadOffset; - uint32_t Stream3VertexReadLength; - uint32_t Stream2VertexReadOffset; - uint32_t Stream2VertexReadLength; - uint32_t Stream1VertexReadOffset; - uint32_t Stream1VertexReadLength; - uint32_t Stream0VertexReadOffset; - uint32_t Stream0VertexReadLength; -}; - -static inline void -GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_STREAMOUT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SOFunctionEnable, 31, 31) | - __gen_field(values->RenderingDisable, 30, 30) | - __gen_field(values->RenderStreamSelect, 27, 28) | - __gen_field(values->ReorderMode, 26, 26) | - __gen_field(values->SOStatisticsEnable, 25, 25) | - __gen_field(values->SOBufferEnable3, 11, 11) | - __gen_field(values->SOBufferEnable2, 10, 10) | - __gen_field(values->SOBufferEnable1, 9, 9) | - __gen_field(values->SOBufferEnable0, 8, 8) | - 0; - - dw[2] = - __gen_field(values->Stream3VertexReadOffset, 29, 29) | - __gen_field(values->Stream3VertexReadLength, 24, 28) | - __gen_field(values->Stream2VertexReadOffset, 21, 21) | - __gen_field(values->Stream2VertexReadLength, 16, 20) | - __gen_field(values->Stream1VertexReadOffset, 13, 13) | - __gen_field(values->Stream1VertexReadLength, 8, 12) | - __gen_field(values->Stream0VertexReadOffset, 5, 5) | - __gen_field(values->Stream0VertexReadLength, 0, 4) | - 0; - -} - -#define GEN7_3DSTATE_TE_length_bias 0x00000002 -#define GEN7_3DSTATE_TE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 28, \ - .DwordLength = 2 - -#define GEN7_3DSTATE_TE_length 0x00000004 - -struct GEN7_3DSTATE_TE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define INTEGER 0 -#define ODD_FRACTIONAL 1 -#define EVEN_FRACTIONAL 2 - uint32_t Partitioning; -#define POINT 0 -#define OUTPUT_LINE 1 -#define OUTPUT_TRI_CW 2 -#define OUTPUT_TRI_CCW 3 - uint32_t OutputTopology; -#define QUAD 0 -#define TRI 1 -#define ISOLINE 2 - uint32_t TEDomain; -#define HW_TESS 0 -#define SW_TESS 1 - uint32_t TEMode; - bool TEEnable; - float MaximumTessellationFactorOdd; - float MaximumTessellationFactorNotOdd; -}; - -static inline void -GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_TE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Partitioning, 12, 13) | - __gen_field(values->OutputTopology, 8, 9) | - __gen_field(values->TEDomain, 4, 5) | - __gen_field(values->TEMode, 1, 2) | - __gen_field(values->TEEnable, 0, 0) | - 0; - - dw[2] = - __gen_float(values->MaximumTessellationFactorOdd) | - 0; - - dw[3] = - __gen_float(values->MaximumTessellationFactorNotOdd) | - 0; - -} - -#define GEN7_3DSTATE_URB_DS_length_bias 0x00000002 -#define GEN7_3DSTATE_URB_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 50, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_URB_DS_length 0x00000002 - -struct GEN7_3DSTATE_URB_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t DSURBStartingAddress; - uint32_t DSURBEntryAllocationSize; - uint32_t DSNumberofURBEntries; -}; - -static inline void -GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DSURBStartingAddress, 25, 29) | - __gen_field(values->DSURBEntryAllocationSize, 16, 24) | - __gen_field(values->DSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN7_3DSTATE_URB_GS_length_bias 0x00000002 -#define GEN7_3DSTATE_URB_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 51, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_URB_GS_length 0x00000002 - -struct GEN7_3DSTATE_URB_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t GSURBStartingAddress; - uint32_t GSURBEntryAllocationSize; - uint32_t GSNumberofURBEntries; -}; - -static inline void -GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->GSURBStartingAddress, 25, 29) | - __gen_field(values->GSURBEntryAllocationSize, 16, 24) | - __gen_field(values->GSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN7_3DSTATE_URB_HS_length_bias 0x00000002 -#define GEN7_3DSTATE_URB_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 49, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_URB_HS_length 0x00000002 - -struct GEN7_3DSTATE_URB_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t HSURBStartingAddress; - uint32_t HSURBEntryAllocationSize; - uint32_t HSNumberofURBEntries; -}; - -static inline void -GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_URB_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->HSURBStartingAddress, 25, 29) | - __gen_field(values->HSURBEntryAllocationSize, 16, 24) | - __gen_field(values->HSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN7_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 -#define GEN7_3DSTATE_VERTEX_BUFFERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 8 - -#define GEN7_3DSTATE_VERTEX_BUFFERS_length 0x00000000 - -#define GEN7_VERTEX_BUFFER_STATE_length 0x00000004 - -struct GEN7_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; -#define VERTEXDATA 0 -#define INSTANCEDATA 1 - uint32_t BufferAccessType; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; - uint32_t AddressModifyEnable; - bool NullVertexBuffer; - uint32_t VertexFetchInvalidate; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - __gen_address_type EndAddress; - uint32_t InstanceDataStepRate; -}; - -static inline void -GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_VertexBufferMemoryObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->BufferAccessType, 20, 20) | - __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->VertexFetchInvalidate, 12, 12) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->EndAddress, dw2); - - dw[3] = - __gen_field(values->InstanceDataStepRate, 0, 31) | - 0; - -} - -struct GEN7_3DSTATE_VERTEX_BUFFERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VERTEX_BUFFERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 -#define GEN7_3DSTATE_VERTEX_ELEMENTS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 9 - -#define GEN7_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 - -#define GEN7_VERTEX_ELEMENT_STATE_length 0x00000002 - -struct GEN7_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - bool Valid; - uint32_t SourceElementFormat; - bool EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN7_3DSTATE_VERTEX_ELEMENTS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VERTEX_ELEMENTS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_3DSTATE_VF_STATISTICS_length_bias 0x00000001 -#define GEN7_3DSTATE_VF_STATISTICS_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 11 - -#define GEN7_3DSTATE_VF_STATISTICS_length 0x00000001 - -struct GEN7_3DSTATE_VF_STATISTICS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool StatisticsEnable; -}; - -static inline void -GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VF_STATISTICS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->StatisticsEnable, 0, 0) | - 0; - -} - -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 35, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 - -struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t CCViewportPointer; -}; - -static inline void -GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->CCViewportPointer, 5, 31) | - 0; - -} - -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 33, \ - .DwordLength = 0 - -#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 - -struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SFClipViewportPointer; -}; - -static inline void -GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SFClipViewportPointer, 6, 31) | - 0; - -} - -#define GEN7_3DSTATE_VS_length_bias 0x00000002 -#define GEN7_3DSTATE_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 16, \ - .DwordLength = 4 - -#define GEN7_3DSTATE_VS_length 0x00000006 - -struct GEN7_3DSTATE_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleVertexDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnableVME; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ScratchSpaceBaseOffset; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterforURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool VertexCacheDisable; - bool VSFunctionEnable; -}; - -static inline void -GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[2] = - __gen_field(values->SingleVertexDispatch, 31, 31) | - __gen_field(values->VectorMaskEnableVME, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = - __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[5] = - __gen_field(values->MaximumNumberofThreads, 25, 31) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->VertexCacheDisable, 1, 1) | - __gen_field(values->VSFunctionEnable, 0, 0) | - 0; - -} - -#define GEN7_3DSTATE_WM_length_bias 0x00000002 -#define GEN7_3DSTATE_WM_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 1 - -#define GEN7_3DSTATE_WM_length 0x00000003 - -struct GEN7_3DSTATE_WM { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool StatisticsEnable; - bool DepthBufferClear; - bool ThreadDispatchEnable; - bool DepthBufferResolveEnable; - bool HierarchicalDepthBufferResolveEnable; - bool LegacyDiamondLineRasterization; - bool PixelShaderKillPixel; -#define PSCDEPTH_OFF 0 -#define PSCDEPTH_ON 1 -#define PSCDEPTH_ON_GE 2 -#define PSCDEPTH_ON_LE 3 - uint32_t PixelShaderComputedDepthMode; -#define EDSC_NORMAL 0 -#define EDSC_PSEXEC 1 -#define EDSC_PREPS 2 - uint32_t EarlyDepthStencilControl; - bool PixelShaderUsesSourceDepth; - bool PixelShaderUsesSourceW; -#define INTERP_PIXEL 0 -#define INTERP_CENTROID 2 -#define INTERP_SAMPLE 3 - uint32_t PositionZWInterpolationMode; - uint32_t BarycentricInterpolationMode; - bool PixelShaderUsesInputCoverageMask; - uint32_t LineEndCapAntialiasingRegionWidth; - uint32_t LineAntialiasingRegionWidth; - bool PolygonStippleEnable; - bool LineStippleEnable; -#define RASTRULE_UPPER_LEFT 0 -#define RASTRULE_UPPER_RIGHT 1 - uint32_t PointRasterizationRule; -#define MSRASTMODE_OFF_PIXEL 0 -#define MSRASTMODE_OFF_PATTERN 1 -#define MSRASTMODE_ON_PIXEL 2 -#define MSRASTMODE_ON_PATTERN 3 - uint32_t MultisampleRasterizationMode; -#define MSDISPMODE_PERSAMPLE 0 -#define MSDISPMODE_PERPIXEL 1 - uint32_t MultisampleDispatchMode; -}; - -static inline void -GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_3DSTATE_WM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StatisticsEnable, 31, 31) | - __gen_field(values->DepthBufferClear, 30, 30) | - __gen_field(values->ThreadDispatchEnable, 29, 29) | - __gen_field(values->DepthBufferResolveEnable, 28, 28) | - __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | - __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | - __gen_field(values->PixelShaderKillPixel, 25, 25) | - __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | - __gen_field(values->EarlyDepthStencilControl, 21, 22) | - __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | - __gen_field(values->PixelShaderUsesSourceW, 19, 19) | - __gen_field(values->PositionZWInterpolationMode, 17, 18) | - __gen_field(values->BarycentricInterpolationMode, 11, 16) | - __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | - __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | - __gen_field(values->PolygonStippleEnable, 4, 4) | - __gen_field(values->LineStippleEnable, 3, 3) | - __gen_field(values->PointRasterizationRule, 2, 2) | - __gen_field(values->MultisampleRasterizationMode, 0, 1) | - 0; - - dw[2] = - __gen_field(values->MultisampleDispatchMode, 31, 31) | - 0; - -} - -#define GEN7_GPGPU_OBJECT_length_bias 0x00000002 -#define GEN7_GPGPU_OBJECT_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 4, \ - .DwordLength = 6 - -#define GEN7_GPGPU_OBJECT_length 0x00000008 - -struct GEN7_GPGPU_OBJECT { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - bool PredicateEnable; - uint32_t DwordLength; - uint32_t SharedLocalMemoryFixedOffset; - uint32_t InterfaceDescriptorOffset; - uint32_t SharedLocalMemoryOffset; - uint32_t EndofThreadGroup; -#define HalfSlice1 2 -#define HalfSlice0 1 -#define EitherHalfSlice 0 - uint32_t HalfSliceDestinationSelect; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; - uint32_t ThreadGroupIDX; - uint32_t ThreadGroupIDY; - uint32_t ThreadGroupIDZ; - uint32_t ExecutionMask; -}; - -static inline void -GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_GPGPU_OBJECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | - __gen_field(values->InterfaceDescriptorOffset, 0, 4) | - 0; - - dw[2] = - __gen_field(values->SharedLocalMemoryOffset, 28, 31) | - __gen_field(values->EndofThreadGroup, 24, 24) | - __gen_field(values->HalfSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 0, 31) | - 0; - - dw[4] = - __gen_field(values->ThreadGroupIDX, 0, 31) | - 0; - - dw[5] = - __gen_field(values->ThreadGroupIDY, 0, 31) | - 0; - - dw[6] = - __gen_field(values->ThreadGroupIDZ, 0, 31) | - 0; - - dw[7] = - __gen_field(values->ExecutionMask, 0, 31) | - 0; - -} - -#define GEN7_GPGPU_WALKER_length_bias 0x00000002 -#define GEN7_GPGPU_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcodeA = 5, \ - .DwordLength = 9 - -#define GEN7_GPGPU_WALKER_length 0x0000000b - -struct GEN7_GPGPU_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcodeA; - bool IndirectParameterEnable; - bool PredicateEnable; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; -#define SIMD8 0 -#define SIMD16 1 -#define SIMD32 2 - uint32_t SIMDSize; - uint32_t ThreadDepthCounterMaximum; - uint32_t ThreadHeightCounterMaximum; - uint32_t ThreadWidthCounterMaximum; - uint32_t ThreadGroupIDStartingX; - uint32_t ThreadGroupIDXDimension; - uint32_t ThreadGroupIDStartingY; - uint32_t ThreadGroupIDYDimension; - uint32_t ThreadGroupIDStartingZ; - uint32_t ThreadGroupIDZDimension; - uint32_t RightExecutionMask; - uint32_t BottomExecutionMask; -}; - -static inline void -GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_GPGPU_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcodeA, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 4) | - 0; - - dw[2] = - __gen_field(values->SIMDSize, 30, 31) | - __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | - __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | - __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | - 0; - - dw[3] = - __gen_field(values->ThreadGroupIDStartingX, 0, 31) | - 0; - - dw[4] = - __gen_field(values->ThreadGroupIDXDimension, 0, 31) | - 0; - - dw[5] = - __gen_field(values->ThreadGroupIDStartingY, 0, 31) | - 0; - - dw[6] = - __gen_field(values->ThreadGroupIDYDimension, 0, 31) | - 0; - - dw[7] = - __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | - 0; - - dw[8] = - __gen_field(values->ThreadGroupIDZDimension, 0, 31) | - 0; - - dw[9] = - __gen_field(values->RightExecutionMask, 0, 31) | - 0; - - dw[10] = - __gen_field(values->BottomExecutionMask, 0, 31) | - 0; - -} - -#define GEN7_MEDIA_CURBE_LOAD_length_bias 0x00000002 -#define GEN7_MEDIA_CURBE_LOAD_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 1, \ - .DwordLength = 2 - -#define GEN7_MEDIA_CURBE_LOAD_length 0x00000004 - -struct GEN7_MEDIA_CURBE_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t CURBETotalDataLength; - uint32_t CURBEDataStartAddress; -}; - -static inline void -GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_CURBE_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->CURBETotalDataLength, 0, 16) | - 0; - - dw[3] = - __gen_field(values->CURBEDataStartAddress, 0, 31) | - 0; - -} - -#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 -#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 2, \ - .DwordLength = 2 - -#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 - -struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorTotalLength; - uint32_t InterfaceDescriptorDataStartAddress; -}; - -static inline void -GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | - 0; - -} - -#define GEN7_MEDIA_OBJECT_length_bias 0x00000002 -#define GEN7_MEDIA_OBJECT_header \ - .CommandType = 3, \ - .MediaCommandPipeline = 2, \ - .MediaCommandOpcode = 1, \ - .MediaCommandSubOpcode = 0 - -#define GEN7_MEDIA_OBJECT_length 0x00000000 - -struct GEN7_MEDIA_OBJECT { - uint32_t CommandType; - uint32_t MediaCommandPipeline; - uint32_t MediaCommandOpcode; - uint32_t MediaCommandSubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; -#define HalfSlice1 2 -#define HalfSlice0 1 -#define Eitherhalfslice 0 - uint32_t HalfSliceDestinationSelect; - uint32_t IndirectDataLength; - __gen_address_type IndirectDataStartAddress; - uint32_t ScoredboardY; - uint32_t ScoreboardX; - uint32_t ScoreboardColor; - bool ScoreboardMask; - /* variable length fields follow */ -}; - -static inline void -GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_OBJECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MediaCommandPipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->MediaCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 4) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->HalfSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); - - dw[4] = - __gen_field(values->ScoredboardY, 16, 24) | - __gen_field(values->ScoreboardX, 0, 8) | - 0; - - dw[5] = - __gen_field(values->ScoreboardColor, 16, 19) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_MEDIA_OBJECT_PRT_length_bias 0x00000002 -#define GEN7_MEDIA_OBJECT_PRT_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 2, \ - .DwordLength = 14 - -#define GEN7_MEDIA_OBJECT_PRT_length 0x00000010 - -struct GEN7_MEDIA_OBJECT_PRT { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; - bool PRT_FenceNeeded; -#define Rootthreadqueue 0 -#define VFEstateflush 1 - uint32_t PRT_FenceType; - uint32_t InlineData[12]; -}; - -static inline void -GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_OBJECT_PRT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 4) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->PRT_FenceNeeded, 23, 23) | - __gen_field(values->PRT_FenceType, 22, 22) | - 0; - - dw[3] = - 0; - - for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { - dw[j] = - __gen_field(values->InlineData[i + 0], 0, 31) | - 0; - } - -} - -#define GEN7_MEDIA_OBJECT_WALKER_length_bias 0x00000002 -#define GEN7_MEDIA_OBJECT_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 3 - -#define GEN7_MEDIA_OBJECT_WALKER_length 0x00000000 - -struct GEN7_MEDIA_OBJECT_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; - bool ScoreboardMask; - bool DualMode; - bool Repel; - uint32_t ColorCountMinusOne; - uint32_t MiddleLoopExtraSteps; - uint32_t LocalMidLoopUnitY; - uint32_t MidLoopUnitX; - uint32_t GlobalLoopExecCount; - uint32_t LocalLoopExecCount; - uint32_t BlockResolutionY; - uint32_t BlockResolutionX; - uint32_t LocalStartY; - uint32_t LocalStartX; - uint32_t LocalEndY; - uint32_t LocalEndX; - uint32_t LocalOuterLoopStrideY; - uint32_t LocalOuterLoopStrideX; - uint32_t LocalInnerLoopUnitY; - uint32_t LocalInnerLoopUnitX; - uint32_t GlobalResolutionY; - uint32_t GlobalResolutionX; - uint32_t GlobalStartY; - uint32_t GlobalStartX; - uint32_t GlobalOuterLoopStrideY; - uint32_t GlobalOuterLoopStrideX; - uint32_t GlobalInnerLoopUnitY; - uint32_t GlobalInnerLoopUnitX; - /* variable length fields follow */ -}; - -static inline void -GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_OBJECT_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 4) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 0, 31) | - 0; - - dw[4] = - 0; - - dw[5] = - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->DualMode, 31, 31) | - __gen_field(values->Repel, 30, 30) | - __gen_field(values->ColorCountMinusOne, 24, 27) | - __gen_field(values->MiddleLoopExtraSteps, 16, 20) | - __gen_field(values->LocalMidLoopUnitY, 12, 13) | - __gen_field(values->MidLoopUnitX, 8, 9) | - 0; - - dw[7] = - __gen_field(values->GlobalLoopExecCount, 16, 25) | - __gen_field(values->LocalLoopExecCount, 0, 9) | - 0; - - dw[8] = - __gen_field(values->BlockResolutionY, 16, 24) | - __gen_field(values->BlockResolutionX, 0, 8) | - 0; - - dw[9] = - __gen_field(values->LocalStartY, 16, 24) | - __gen_field(values->LocalStartX, 0, 8) | - 0; - - dw[10] = - __gen_field(values->LocalEndY, 16, 24) | - __gen_field(values->LocalEndX, 0, 8) | - 0; - - dw[11] = - __gen_field(values->LocalOuterLoopStrideY, 16, 25) | - __gen_field(values->LocalOuterLoopStrideX, 0, 9) | - 0; - - dw[12] = - __gen_field(values->LocalInnerLoopUnitY, 16, 25) | - __gen_field(values->LocalInnerLoopUnitX, 0, 9) | - 0; - - dw[13] = - __gen_field(values->GlobalResolutionY, 16, 24) | - __gen_field(values->GlobalResolutionX, 0, 8) | - 0; - - dw[14] = - __gen_field(values->GlobalStartY, 16, 25) | - __gen_field(values->GlobalStartX, 0, 9) | - 0; - - dw[15] = - __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | - __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | - 0; - - dw[16] = - __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | - __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | - 0; - - /* variable length fields follow */ -} - -#define GEN7_MEDIA_STATE_FLUSH_length_bias 0x00000002 -#define GEN7_MEDIA_STATE_FLUSH_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 4, \ - .DwordLength = 0 - -#define GEN7_MEDIA_STATE_FLUSH_length 0x00000002 - -struct GEN7_MEDIA_STATE_FLUSH { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t WatermarkRequired; - uint32_t InterfaceDescriptorOffset; -}; - -static inline void -GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_STATE_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->WatermarkRequired, 6, 6) | - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - -} - -#define GEN7_MEDIA_VFE_STATE_length_bias 0x00000002 -#define GEN7_MEDIA_VFE_STATE_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 0, \ - .DwordLength = 6 - -#define GEN7_MEDIA_VFE_STATE_length 0x00000008 - -struct GEN7_MEDIA_VFE_STATE { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t MaximumNumberofThreads; - uint32_t NumberofURBEntries; -#define Maintainingtheexistingtimestampstate 0 -#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 - uint32_t ResetGatewayTimer; -#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 -#define BypassingOpenGatewayCloseGatewayprotocol 1 - uint32_t BypassGatewayControl; -#define NoMMIOreadwriteallowed 0 -#define MMIOreadwritetoanyaddress 2 - uint32_t GatewayMMIOAccessControl; - uint32_t GPGPUMode; - uint32_t URBEntryAllocationSize; - uint32_t CURBEAllocationSize; -#define Scoreboarddisabled 0 -#define Scoreboardenabled 1 - uint32_t ScoreboardEnable; -#define StallingScoreboard 0 -#define NonStallingScoreboard 1 - uint32_t ScoreboardType; - uint32_t ScoreboardMask; - uint32_t Scoreboard3DeltaY; - uint32_t Scoreboard3DeltaX; - uint32_t Scoreboard2DeltaY; - uint32_t Scoreboard2DeltaX; - uint32_t Scoreboard1DeltaY; - uint32_t Scoreboard1DeltaX; - uint32_t Scoreboard0DeltaY; - uint32_t Scoreboard0DeltaX; - uint32_t Scoreboard7DeltaY; - uint32_t Scoreboard7DeltaX; - uint32_t Scoreboard6DeltaY; - uint32_t Scoreboard6DeltaX; - uint32_t Scoreboard5DeltaY; - uint32_t Scoreboard5DeltaX; - uint32_t Scoreboard4DeltaY; - uint32_t Scoreboard4DeltaX; -}; - -static inline void -GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MEDIA_VFE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[2] = - __gen_field(values->MaximumNumberofThreads, 16, 31) | - __gen_field(values->NumberofURBEntries, 8, 15) | - __gen_field(values->ResetGatewayTimer, 7, 7) | - __gen_field(values->BypassGatewayControl, 6, 6) | - __gen_field(values->GatewayMMIOAccessControl, 3, 4) | - __gen_field(values->GPGPUMode, 2, 2) | - 0; - - dw[3] = - 0; - - dw[4] = - __gen_field(values->URBEntryAllocationSize, 16, 31) | - __gen_field(values->CURBEAllocationSize, 0, 15) | - 0; - - dw[5] = - __gen_field(values->ScoreboardEnable, 31, 31) | - __gen_field(values->ScoreboardType, 30, 30) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->Scoreboard3DeltaY, 28, 31) | - __gen_field(values->Scoreboard3DeltaX, 24, 27) | - __gen_field(values->Scoreboard2DeltaY, 20, 23) | - __gen_field(values->Scoreboard2DeltaX, 16, 19) | - __gen_field(values->Scoreboard1DeltaY, 12, 15) | - __gen_field(values->Scoreboard1DeltaX, 8, 11) | - __gen_field(values->Scoreboard0DeltaY, 4, 7) | - __gen_field(values->Scoreboard0DeltaX, 0, 3) | - 0; - - dw[7] = - __gen_field(values->Scoreboard7DeltaY, 28, 31) | - __gen_field(values->Scoreboard7DeltaX, 24, 27) | - __gen_field(values->Scoreboard6DeltaY, 20, 23) | - __gen_field(values->Scoreboard6DeltaX, 16, 19) | - __gen_field(values->Scoreboard5DeltaY, 12, 15) | - __gen_field(values->Scoreboard5DeltaX, 8, 11) | - __gen_field(values->Scoreboard4DeltaY, 4, 7) | - __gen_field(values->Scoreboard4DeltaX, 0, 3) | - 0; - -} - -#define GEN7_MI_ARB_CHECK_length_bias 0x00000001 -#define GEN7_MI_ARB_CHECK_header \ - .CommandType = 0, \ - .MICommandOpcode = 5 - -#define GEN7_MI_ARB_CHECK_length 0x00000001 - -struct GEN7_MI_ARB_CHECK { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_ARB_CHECK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN7_MI_ARB_ON_OFF_length_bias 0x00000001 -#define GEN7_MI_ARB_ON_OFF_header \ - .CommandType = 0, \ - .MICommandOpcode = 8 - -#define GEN7_MI_ARB_ON_OFF_length 0x00000001 - -struct GEN7_MI_ARB_ON_OFF { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool ArbitrationEnable; -}; - -static inline void -GEN7_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_ARB_ON_OFF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ArbitrationEnable, 0, 0) | - 0; - -} - -#define GEN7_MI_BATCH_BUFFER_END_length_bias 0x00000001 -#define GEN7_MI_BATCH_BUFFER_END_header \ - .CommandType = 0, \ - .MICommandOpcode = 10 - -#define GEN7_MI_BATCH_BUFFER_END_length 0x00000001 - -struct GEN7_MI_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN7_MI_BATCH_BUFFER_START_length_bias 0x00000002 -#define GEN7_MI_BATCH_BUFFER_START_header \ - .CommandType = 0, \ - .MICommandOpcode = 49, \ - .DwordLength = 0 - -#define GEN7_MI_BATCH_BUFFER_START_length 0x00000002 - -struct GEN7_MI_BATCH_BUFFER_START { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool ClearCommandBufferEnable; -#define ASI_GGTT 0 -#define ASI_PPGTT 1 - uint32_t AddressSpaceIndicator; - uint32_t DwordLength; - __gen_address_type BatchBufferStartAddress; -}; - -static inline void -GEN7_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_BATCH_BUFFER_START * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ClearCommandBufferEnable, 11, 11) | - __gen_field(values->AddressSpaceIndicator, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); - -} - -#define GEN7_MI_CLFLUSH_length_bias 0x00000002 -#define GEN7_MI_CLFLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 39 - -#define GEN7_MI_CLFLUSH_length 0x00000000 - -struct GEN7_MI_CLFLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTT; - uint32_t DwordLength; - __gen_address_type PageBaseAddress; - uint32_t StartingCachelineOffset; - __gen_address_type PageBaseAddressHigh; - /* variable length fields follow */ -}; - -static inline void -GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_CLFLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - __gen_field(values->StartingCachelineOffset, 6, 11) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); - - /* variable length fields follow */ -} - -#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 -#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_header\ - .CommandType = 0, \ - .MICommandOpcode = 54, \ - .UseGlobalGTT = 0, \ - .CompareSemaphore = 0, \ - .DwordLength = 0 - -#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 - -struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; - uint32_t CompareSemaphore; - uint32_t DwordLength; - uint32_t CompareDataDword; - __gen_address_type CompareAddress; -}; - -static inline void -GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->CompareSemaphore, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CompareDataDword, 0, 31) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); - -} - -#define GEN7_MI_FLUSH_length_bias 0x00000001 -#define GEN7_MI_FLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 4 - -#define GEN7_MI_FLUSH_length 0x00000001 - -struct GEN7_MI_FLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool IndirectStatePointersDisable; - bool GenericMediaStateClear; -#define DontReset 0 -#define Reset 1 - bool GlobalSnapshotCountReset; -#define Flush 0 -#define DontFlush 1 - bool RenderCacheFlushInhibit; -#define DontInvalidate 0 -#define Invalidate 1 - bool StateInstructionCacheInvalidate; -}; - -static inline void -GEN7_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->IndirectStatePointersDisable, 5, 5) | - __gen_field(values->GenericMediaStateClear, 4, 4) | - __gen_field(values->GlobalSnapshotCountReset, 3, 3) | - __gen_field(values->RenderCacheFlushInhibit, 2, 2) | - __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | - 0; - -} - -#define GEN7_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 -#define GEN7_MI_LOAD_REGISTER_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 34, \ - .DwordLength = 1 - -#define GEN7_MI_LOAD_REGISTER_IMM_length 0x00000003 - -struct GEN7_MI_LOAD_REGISTER_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t ByteWriteDisables; - uint32_t DwordLength; - uint32_t RegisterOffset; - uint32_t DataDWord; -}; - -static inline void -GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_LOAD_REGISTER_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ByteWriteDisables, 8, 11) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterOffset, 2, 22) | - 0; - - dw[2] = - __gen_field(values->DataDWord, 0, 31) | - 0; - -} - -#define GEN7_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 -#define GEN7_MI_LOAD_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 41, \ - .DwordLength = 1 - -#define GEN7_MI_LOAD_REGISTER_MEM_length 0x00000003 - -struct GEN7_MI_LOAD_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t AsyncModeEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_LOAD_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->AsyncModeEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - -} - -#define GEN7_MI_NOOP_length_bias 0x00000001 -#define GEN7_MI_NOOP_header \ - .CommandType = 0, \ - .MICommandOpcode = 0 - -#define GEN7_MI_NOOP_length 0x00000001 - -struct GEN7_MI_NOOP { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool IdentificationNumberRegisterWriteEnable; - uint32_t IdentificationNumber; -}; - -static inline void -GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_NOOP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | - __gen_field(values->IdentificationNumber, 0, 21) | - 0; - -} - -#define GEN7_MI_PREDICATE_length_bias 0x00000001 -#define GEN7_MI_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 12 - -#define GEN7_MI_PREDICATE_length 0x00000001 - -struct GEN7_MI_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define LOAD_KEEP 0 -#define LOAD_LOAD 2 -#define LOAD_LOADINV 3 - uint32_t LoadOperation; -#define COMBINE_SET 0 -#define COMBINE_AND 1 -#define COMBINE_OR 2 -#define COMBINE_XOR 3 - uint32_t CombineOperation; -#define COMPARE_SRCS_EQUAL 2 -#define COMPARE_DELTAS_EQUAL 3 - uint32_t CompareOperation; -}; - -static inline void -GEN7_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->LoadOperation, 6, 7) | - __gen_field(values->CombineOperation, 3, 4) | - __gen_field(values->CompareOperation, 0, 1) | - 0; - -} - -#define GEN7_MI_REPORT_HEAD_length_bias 0x00000001 -#define GEN7_MI_REPORT_HEAD_header \ - .CommandType = 0, \ - .MICommandOpcode = 7 - -#define GEN7_MI_REPORT_HEAD_length 0x00000001 - -struct GEN7_MI_REPORT_HEAD { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN7_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_REPORT_HEAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN7_MI_SEMAPHORE_MBOX_length_bias 0x00000002 -#define GEN7_MI_SEMAPHORE_MBOX_header \ - .CommandType = 0, \ - .MICommandOpcode = 22, \ - .DwordLength = 1 - -#define GEN7_MI_SEMAPHORE_MBOX_length 0x00000003 - -struct GEN7_MI_SEMAPHORE_MBOX { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RVSYNC 0 -#define RBSYNC 2 -#define UseGeneralRegisterSelect 3 - uint32_t RegisterSelect; - uint32_t DwordLength; - uint32_t SemaphoreDataDword; -}; - -static inline void -GEN7_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_SEMAPHORE_MBOX * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->RegisterSelect, 16, 17) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SemaphoreDataDword, 0, 31) | - 0; - - dw[2] = - 0; - -} - -#define GEN7_MI_SET_CONTEXT_length_bias 0x00000002 -#define GEN7_MI_SET_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 24, \ - .DwordLength = 0 - -#define GEN7_MI_SET_CONTEXT_length 0x00000002 - -struct GEN7_MI_SET_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type LogicalContextAddress; - uint32_t ReservedMustbe1; - bool ExtendedStateSaveEnable; - bool ExtendedStateRestoreEnable; - uint32_t ForceRestore; - uint32_t RestoreInhibit; -}; - -static inline void -GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_SET_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->ReservedMustbe1, 8, 8) | - __gen_field(values->ExtendedStateSaveEnable, 3, 3) | - __gen_field(values->ExtendedStateRestoreEnable, 2, 2) | - __gen_field(values->ForceRestore, 1, 1) | - __gen_field(values->RestoreInhibit, 0, 0) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); - -} - -#define GEN7_MI_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN7_MI_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 32, \ - .DwordLength = 2 - -#define GEN7_MI_STORE_DATA_IMM_length 0x00000004 - -struct GEN7_MI_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t DwordLength; - uint32_t Address; - uint32_t CoreModeEnable; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN7_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->Address, 2, 31) | - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[4] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN7_MI_STORE_DATA_INDEX_length_bias 0x00000002 -#define GEN7_MI_STORE_DATA_INDEX_header \ - .CommandType = 0, \ - .MICommandOpcode = 33, \ - .DwordLength = 1 - -#define GEN7_MI_STORE_DATA_INDEX_length 0x00000003 - -struct GEN7_MI_STORE_DATA_INDEX { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t Offset; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_STORE_DATA_INDEX * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Offset, 2, 11) | - 0; - - dw[2] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[3] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN7_MI_SUSPEND_FLUSH_length_bias 0x00000001 -#define GEN7_MI_SUSPEND_FLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 11 - -#define GEN7_MI_SUSPEND_FLUSH_length 0x00000001 - -struct GEN7_MI_SUSPEND_FLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool SuspendFlush; -}; - -static inline void -GEN7_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_SUSPEND_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->SuspendFlush, 0, 0) | - 0; - -} - -#define GEN7_MI_TOPOLOGY_FILTER_length_bias 0x00000001 -#define GEN7_MI_TOPOLOGY_FILTER_header \ - .CommandType = 0, \ - .MICommandOpcode = 13 - -#define GEN7_MI_TOPOLOGY_FILTER_length 0x00000001 - -struct GEN7_MI_TOPOLOGY_FILTER { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t TopologyFilterValue; -}; - -static inline void -GEN7_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_TOPOLOGY_FILTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->TopologyFilterValue, 0, 5) | - 0; - -} - -#define GEN7_MI_UPDATE_GTT_length_bias 0x00000002 -#define GEN7_MI_UPDATE_GTT_header \ - .CommandType = 0, \ - .MICommandOpcode = 35 - -#define GEN7_MI_UPDATE_GTT_length 0x00000000 - -struct GEN7_MI_UPDATE_GTT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTT; - uint32_t DwordLength; - __gen_address_type EntryAddress; - /* variable length fields follow */ -}; - -static inline void -GEN7_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_UPDATE_GTT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); - - /* variable length fields follow */ -} - -#define GEN7_MI_URB_CLEAR_length_bias 0x00000002 -#define GEN7_MI_URB_CLEAR_header \ - .CommandType = 0, \ - .MICommandOpcode = 25, \ - .DwordLength = 0 - -#define GEN7_MI_URB_CLEAR_length 0x00000002 - -struct GEN7_MI_URB_CLEAR { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBClearLength; - uint32_t URBAddress; -}; - -static inline void -GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_URB_CLEAR * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBClearLength, 16, 28) | - __gen_offset(values->URBAddress, 0, 13) | - 0; - -} - -#define GEN7_MI_USER_INTERRUPT_length_bias 0x00000001 -#define GEN7_MI_USER_INTERRUPT_header \ - .CommandType = 0, \ - .MICommandOpcode = 2 - -#define GEN7_MI_USER_INTERRUPT_length 0x00000001 - -struct GEN7_MI_USER_INTERRUPT { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_USER_INTERRUPT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN7_MI_WAIT_FOR_EVENT_length_bias 0x00000001 -#define GEN7_MI_WAIT_FOR_EVENT_header \ - .CommandType = 0, \ - .MICommandOpcode = 3 - -#define GEN7_MI_WAIT_FOR_EVENT_length 0x00000001 - -struct GEN7_MI_WAIT_FOR_EVENT { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool DisplayPipeCHorizontalBlankWaitEnable; - bool DisplayPipeCVerticalBlankWaitEnable; - bool DisplaySpriteCFlipPendingWaitEnable; -#define Notenabled 0 - uint32_t ConditionCodeWaitSelect; - bool DisplayPlaneCFlipPendingWaitEnable; - bool DisplayPipeCScanLineWaitEnable; - bool DisplayPipeBHorizontalBlankWaitEnable; - bool DisplayPipeBVerticalBlankWaitEnable; - bool DisplaySpriteBFlipPendingWaitEnable; - bool DisplayPlaneBFlipPendingWaitEnable; - bool DisplayPipeBScanLineWaitEnable; - bool DisplayPipeAHorizontalBlankWaitEnable; - bool DisplayPipeAVerticalBlankWaitEnable; - bool DisplaySpriteAFlipPendingWaitEnable; - bool DisplayPlaneAFlipPendingWaitEnable; - bool DisplayPipeAScanLineWaitEnable; -}; - -static inline void -GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_MI_WAIT_FOR_EVENT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | - __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | - __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | - __gen_field(values->ConditionCodeWaitSelect, 16, 19) | - __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | - __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | - __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | - __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | - __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | - __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | - __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | - __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | - __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | - __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | - __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | - __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | - 0; - -} - -#define GEN7_PIPE_CONTROL_length_bias 0x00000002 -#define GEN7_PIPE_CONTROL_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 2, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 3 - -#define GEN7_PIPE_CONTROL_length 0x00000005 - -struct GEN7_PIPE_CONTROL { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define DAT_PPGTT 0 -#define DAT_GGTT 1 - uint32_t DestinationAddressType; -#define NoLRIOperation 0 -#define MMIOWriteImmediateData 1 - uint32_t LRIPostSyncOperation; - uint32_t StoreDataIndex; - uint32_t CommandStreamerStallEnable; -#define DontReset 0 -#define Reset 1 - uint32_t GlobalSnapshotCountReset; - uint32_t TLBInvalidate; - bool GenericMediaStateClear; -#define NoWrite 0 -#define WriteImmediateData 1 -#define WritePSDepthCount 2 -#define WriteTimestamp 3 - uint32_t PostSyncOperation; - bool DepthStallEnable; -#define DisableFlush 0 -#define EnableFlush 1 - bool RenderTargetCacheFlushEnable; - bool InstructionCacheInvalidateEnable; - bool TextureCacheInvalidationEnable; - bool IndirectStatePointersDisable; - bool NotifyEnable; - bool PipeControlFlushEnable; - bool DCFlushEnable; - bool VFCacheInvalidationEnable; - bool ConstantCacheInvalidationEnable; - bool StateCacheInvalidationEnable; - bool StallAtPixelScoreboard; -#define FlushDisabled 0 -#define FlushEnabled 1 - bool DepthCacheFlushEnable; - __gen_address_type Address; - uint32_t ImmediateData; - uint32_t ImmediateData0; -}; - -static inline void -GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_PIPE_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DestinationAddressType, 24, 24) | - __gen_field(values->LRIPostSyncOperation, 23, 23) | - __gen_field(values->StoreDataIndex, 21, 21) | - __gen_field(values->CommandStreamerStallEnable, 20, 20) | - __gen_field(values->GlobalSnapshotCountReset, 19, 19) | - __gen_field(values->TLBInvalidate, 18, 18) | - __gen_field(values->GenericMediaStateClear, 16, 16) | - __gen_field(values->PostSyncOperation, 14, 15) | - __gen_field(values->DepthStallEnable, 13, 13) | - __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | - __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | - __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | - __gen_field(values->IndirectStatePointersDisable, 9, 9) | - __gen_field(values->NotifyEnable, 8, 8) | - __gen_field(values->PipeControlFlushEnable, 7, 7) | - __gen_field(values->DCFlushEnable, 5, 5) | - __gen_field(values->VFCacheInvalidationEnable, 4, 4) | - __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | - __gen_field(values->StateCacheInvalidationEnable, 2, 2) | - __gen_field(values->StallAtPixelScoreboard, 1, 1) | - __gen_field(values->DepthCacheFlushEnable, 0, 0) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->Address, dw2); - - dw[3] = - __gen_field(values->ImmediateData, 0, 31) | - 0; - - dw[4] = - __gen_field(values->ImmediateData, 0, 31) | - 0; - -} - -#define GEN7_SCISSOR_RECT_length 0x00000002 - -struct GEN7_SCISSOR_RECT { - uint32_t ScissorRectangleYMin; - uint32_t ScissorRectangleXMin; - uint32_t ScissorRectangleYMax; - uint32_t ScissorRectangleXMax; -}; - -static inline void -GEN7_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SCISSOR_RECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ScissorRectangleYMin, 16, 31) | - __gen_field(values->ScissorRectangleXMin, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ScissorRectangleYMax, 16, 31) | - __gen_field(values->ScissorRectangleXMax, 0, 15) | - 0; - -} - -#define GEN7_SF_CLIP_VIEWPORT_length 0x00000010 - -struct GEN7_SF_CLIP_VIEWPORT { - float ViewportMatrixElementm00; - float ViewportMatrixElementm11; - float ViewportMatrixElementm22; - float ViewportMatrixElementm30; - float ViewportMatrixElementm31; - float ViewportMatrixElementm32; - float XMinClipGuardband; - float XMaxClipGuardband; - float YMinClipGuardband; - float YMaxClipGuardband; -}; - -static inline void -GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SF_CLIP_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->ViewportMatrixElementm00) | - 0; - - dw[1] = - __gen_float(values->ViewportMatrixElementm11) | - 0; - - dw[2] = - __gen_float(values->ViewportMatrixElementm22) | - 0; - - dw[3] = - __gen_float(values->ViewportMatrixElementm30) | - 0; - - dw[4] = - __gen_float(values->ViewportMatrixElementm31) | - 0; - - dw[5] = - __gen_float(values->ViewportMatrixElementm32) | - 0; - - dw[6] = - 0; - - dw[7] = - 0; - - dw[8] = - __gen_float(values->XMinClipGuardband) | - 0; - - dw[9] = - __gen_float(values->XMaxClipGuardband) | - 0; - - dw[10] = - __gen_float(values->YMinClipGuardband) | - 0; - - dw[11] = - __gen_float(values->YMaxClipGuardband) | - 0; - - for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { - dw[j] = - 0; - } - -} - -#define GEN7_BLEND_STATE_length 0x00000002 - -struct GEN7_BLEND_STATE { - bool ColorBufferBlendEnable; - bool IndependentAlphaBlendEnable; -#define BLENDFUNCTION_ADD 0 -#define BLENDFUNCTION_SUBTRACT 1 -#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BLENDFUNCTION_MIN 3 -#define BLENDFUNCTION_MAX 4 - uint32_t AlphaBlendFunction; -#define BLENDFACTOR_ONE 1 -#define BLENDFACTOR_SRC_COLOR 2 -#define BLENDFACTOR_SRC_ALPHA 3 -#define BLENDFACTOR_DST_ALPHA 4 -#define BLENDFACTOR_DST_COLOR 5 -#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 -#define BLENDFACTOR_CONST_COLOR 7 -#define BLENDFACTOR_CONST_ALPHA 8 -#define BLENDFACTOR_SRC1_COLOR 9 -#define BLENDFACTOR_SRC1_ALPHA 10 -#define BLENDFACTOR_ZERO 17 -#define BLENDFACTOR_INV_SRC_COLOR 18 -#define BLENDFACTOR_INV_SRC_ALPHA 19 -#define BLENDFACTOR_INV_DST_ALPHA 20 -#define BLENDFACTOR_INV_DST_COLOR 21 -#define BLENDFACTOR_INV_CONST_COLOR 23 -#define BLENDFACTOR_INV_CONST_ALPHA 24 -#define BLENDFACTOR_INV_SRC1_COLOR 25 -#define BLENDFACTOR_INV_SRC1_ALPHA 26 - uint32_t SourceAlphaBlendFactor; - uint32_t DestinationAlphaBlendFactor; -#define BLENDFUNCTION_ADD 0 -#define BLENDFUNCTION_SUBTRACT 1 -#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BLENDFUNCTION_MIN 3 -#define BLENDFUNCTION_MAX 4 - uint32_t ColorBlendFunction; - uint32_t SourceBlendFactor; - uint32_t DestinationBlendFactor; - bool AlphaToCoverageEnable; - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; - bool WriteDisableAlpha; - bool WriteDisableRed; - bool WriteDisableGreen; - bool WriteDisableBlue; - bool LogicOpEnable; -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 1 -#define LOGICOP_AND_INVERTED 2 -#define LOGICOP_COPY_INVERTED 3 -#define LOGICOP_AND_REVERSE 4 -#define LOGICOP_INVERT 5 -#define LOGICOP_XOR 6 -#define LOGICOP_NAND 7 -#define LOGICOP_AND 8 -#define LOGICOP_EQUIV 9 -#define LOGICOP_NOOP 10 -#define LOGICOP_OR_INVERTED 11 -#define LOGICOP_COPY 12 -#define LOGICOP_OR_REVERSE 13 -#define LOGICOP_OR 14 -#define LOGICOP_SET 15 - uint32_t LogicOpFunction; - bool AlphaTestEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; -#define COLORCLAMP_UNORM 0 -#define COLORCLAMP_SNORM 1 -#define COLORCLAMP_RTFORMAT 2 - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -}; - -static inline void -GEN7_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_BLEND_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ColorBufferBlendEnable, 31, 31) | - __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | - __gen_field(values->AlphaBlendFunction, 26, 28) | - __gen_field(values->SourceAlphaBlendFactor, 20, 24) | - __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | - __gen_field(values->ColorBlendFunction, 11, 13) | - __gen_field(values->SourceBlendFactor, 5, 9) | - __gen_field(values->DestinationBlendFactor, 0, 4) | - 0; - - dw[1] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->AlphaToOneEnable, 30, 30) | - __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | - __gen_field(values->WriteDisableAlpha, 27, 27) | - __gen_field(values->WriteDisableRed, 26, 26) | - __gen_field(values->WriteDisableGreen, 25, 25) | - __gen_field(values->WriteDisableBlue, 24, 24) | - __gen_field(values->LogicOpEnable, 22, 22) | - __gen_field(values->LogicOpFunction, 18, 21) | - __gen_field(values->AlphaTestEnable, 16, 16) | - __gen_field(values->AlphaTestFunction, 13, 15) | - __gen_field(values->ColorDitherEnable, 12, 12) | - __gen_field(values->XDitherOffset, 10, 11) | - __gen_field(values->YDitherOffset, 8, 9) | - __gen_field(values->ColorClampRange, 2, 3) | - __gen_field(values->PreBlendColorClampEnable, 1, 1) | - __gen_field(values->PostBlendColorClampEnable, 0, 0) | - 0; - -} - -#define GEN7_CC_VIEWPORT_length 0x00000002 - -struct GEN7_CC_VIEWPORT { - float MinimumDepth; - float MaximumDepth; -}; - -static inline void -GEN7_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_CC_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->MinimumDepth) | - 0; - - dw[1] = - __gen_float(values->MaximumDepth) | - 0; - -} - -#define GEN7_COLOR_CALC_STATE_length 0x00000006 - -struct GEN7_COLOR_CALC_STATE { - uint32_t StencilReferenceValue; - uint32_t BackFaceStencilReferenceValue; -#define Cancelled 0 -#define NotCancelled 1 - uint32_t RoundDisableFunctionDisable; -#define ALPHATEST_UNORM8 0 -#define ALPHATEST_FLOAT32 1 - uint32_t AlphaTestFormat; - uint32_t AlphaReferenceValueAsUNORM8; - float AlphaReferenceValueAsFLOAT32; - float BlendConstantColorRed; - float BlendConstantColorGreen; - float BlendConstantColorBlue; - float BlendConstantColorAlpha; -}; - -static inline void -GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_COLOR_CALC_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->StencilReferenceValue, 24, 31) | - __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | - __gen_field(values->RoundDisableFunctionDisable, 15, 15) | - __gen_field(values->AlphaTestFormat, 0, 0) | - 0; - - dw[1] = - __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | - __gen_float(values->AlphaReferenceValueAsFLOAT32) | - 0; - - dw[2] = - __gen_float(values->BlendConstantColorRed) | - 0; - - dw[3] = - __gen_float(values->BlendConstantColorGreen) | - 0; - - dw[4] = - __gen_float(values->BlendConstantColorBlue) | - 0; - - dw[5] = - __gen_float(values->BlendConstantColorAlpha) | - 0; - -} - -#define GEN7_DEPTH_STENCIL_STATE_length 0x00000003 - -struct GEN7_DEPTH_STENCIL_STATE { - bool StencilTestEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t StencilTestFunction; -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 1 -#define STENCILOP_REPLACE 2 -#define STENCILOP_INCRSAT 3 -#define STENCILOP_DECRSAT 4 -#define STENCILOP_INCR 5 -#define STENCILOP_DECR 6 -#define STENCILOP_INVERT 7 - uint32_t StencilFailOp; - uint32_t StencilPassDepthFailOp; - uint32_t StencilPassDepthPassOp; - bool StencilBufferWriteEnable; - bool DoubleSidedStencilEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t BackFaceStencilTestFunction; -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 1 -#define STENCILOP_REPLACE 2 -#define STENCILOP_INCRSAT 3 -#define STENCILOP_DECRSAT 4 -#define STENCILOP_INCR 5 -#define STENCILOP_DECR 6 -#define STENCILOP_INVERT 7 - uint32_t BackfaceStencilFailOp; - uint32_t BackfaceStencilPassDepthFailOp; - uint32_t BackfaceStencilPassDepthPassOp; - uint32_t StencilTestMask; - uint32_t StencilWriteMask; - uint32_t BackfaceStencilTestMask; - uint32_t BackfaceStencilWriteMask; - bool DepthTestEnable; -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - uint32_t DepthTestFunction; - bool DepthBufferWriteEnable; -}; - -static inline void -GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_DEPTH_STENCIL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->StencilTestEnable, 31, 31) | - __gen_field(values->StencilTestFunction, 28, 30) | - __gen_field(values->StencilFailOp, 25, 27) | - __gen_field(values->StencilPassDepthFailOp, 22, 24) | - __gen_field(values->StencilPassDepthPassOp, 19, 21) | - __gen_field(values->StencilBufferWriteEnable, 18, 18) | - __gen_field(values->DoubleSidedStencilEnable, 15, 15) | - __gen_field(values->BackFaceStencilTestFunction, 12, 14) | - __gen_field(values->BackfaceStencilFailOp, 9, 11) | - __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | - __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | - 0; - - dw[1] = - __gen_field(values->StencilTestMask, 24, 31) | - __gen_field(values->StencilWriteMask, 16, 23) | - __gen_field(values->BackfaceStencilTestMask, 8, 15) | - __gen_field(values->BackfaceStencilWriteMask, 0, 7) | - 0; - - dw[2] = - __gen_field(values->DepthTestEnable, 31, 31) | - __gen_field(values->DepthTestFunction, 27, 29) | - __gen_field(values->DepthBufferWriteEnable, 26, 26) | - 0; - -} - -#define GEN7_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 - -struct GEN7_INTERFACE_DESCRIPTOR_DATA { - uint32_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlow; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t SamplerStatePointer; -#define Nosamplersused 0 -#define Between1and4samplersused 1 -#define Between5and8samplersused 2 -#define Between9and12samplersused 3 -#define Between13and16samplersused 4 - uint32_t SamplerCount; - uint32_t BindingTablePointer; - uint32_t BindingTableEntryCount; - uint32_t ConstantURBEntryReadLength; - uint32_t ConstantURBEntryReadOffset; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool BarrierEnable; - uint32_t SharedLocalMemorySize; - uint32_t NumberofThreadsinGPGPUThreadGroup; -}; - -static inline void -GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_INTERFACE_DESCRIPTOR_DATA * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[1] = - __gen_field(values->SingleProgramFlow, 18, 18) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[2] = - __gen_offset(values->SamplerStatePointer, 5, 31) | - __gen_field(values->SamplerCount, 2, 4) | - 0; - - dw[3] = - __gen_offset(values->BindingTablePointer, 5, 15) | - __gen_field(values->BindingTableEntryCount, 0, 4) | - 0; - - dw[4] = - __gen_field(values->ConstantURBEntryReadLength, 16, 31) | - __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | - 0; - - dw[5] = - __gen_field(values->RoundingMode, 22, 23) | - __gen_field(values->BarrierEnable, 21, 21) | - __gen_field(values->SharedLocalMemorySize, 16, 20) | - __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | - 0; - - dw[6] = - 0; - - dw[7] = - 0; - -} - -#define GEN7_BINDING_TABLE_STATE_length 0x00000001 - -struct GEN7_BINDING_TABLE_STATE { - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN7_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_BINDING_TABLE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->SurfaceStatePointer, 5, 31) | - 0; - -} - -#define GEN7_RENDER_SURFACE_STATE_length 0x00000008 - -struct GEN7_RENDER_SURFACE_STATE { -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_BUFFER 4 -#define SURFTYPE_STRBUF 5 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool SurfaceArray; - uint32_t SurfaceFormat; -#define VALIGN_2 0 -#define VALIGN_4 1 - uint32_t SurfaceVerticalAlignment; -#define HALIGN_4 0 -#define HALIGN_8 1 - uint32_t SurfaceHorizontalAlignment; - uint32_t TiledSurface; -#define TILEWALK_XMAJOR 0 -#define TILEWALK_YMAJOR 1 - uint32_t TileWalk; - uint32_t VerticalLineStride; - uint32_t VerticalLineStrideOffset; -#define ARYSPC_FULL 0 -#define ARYSPC_LOD0 1 - uint32_t SurfaceArraySpacing; - uint32_t RenderCacheReadWriteMode; -#define NORMAL_MODE 0 -#define PROGRESSIVE_FRAME 2 -#define INTERLACED_FRAME 3 - uint32_t MediaBoundaryPixelMode; - uint32_t CubeFaceEnables; - __gen_address_type SurfaceBaseAddress; - uint32_t Height; - uint32_t Width; - uint32_t Depth; - uint32_t SurfacePitch; -#define RTROTATE_0DEG 0 -#define RTROTATE_90DEG 1 -#define RTROTATE_270DEG 3 - uint32_t RenderTargetRotation; - uint32_t MinimumArrayElement; - uint32_t RenderTargetViewExtent; -#define MSFMT_MSS 0 -#define MSFMT_DEPTH_STENCIL 1 - uint32_t MultisampledSurfaceStorageFormat; -#define MULTISAMPLECOUNT_1 0 -#define MULTISAMPLECOUNT_4 2 -#define MULTISAMPLECOUNT_8 3 - uint32_t NumberofMultisamples; - uint32_t MultisamplePositionPaletteIndex; - uint32_t MinimumArrayElement0; - uint32_t XOffset; - uint32_t YOffset; - struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; - uint32_t SurfaceMinLOD; - uint32_t MIPCountLOD; - __gen_address_type MCSBaseAddress; - uint32_t MCSSurfacePitch; - __gen_address_type AppendCounterAddress; - bool AppendCounterEnable; - bool MCSEnable; - uint32_t XOffsetforUVPlane; - uint32_t YOffsetforUVPlane; -#define CC_ZERO 0 -#define CC_ONE 1 - uint32_t RedClearColor; -#define CC_ZERO 0 -#define CC_ONE 1 - uint32_t GreenClearColor; -#define CC_ZERO 0 -#define CC_ONE 1 - uint32_t BlueClearColor; -#define CC_ZERO 0 -#define CC_ONE 1 - uint32_t AlphaClearColor; - float ResourceMinLOD; -}; - -static inline void -GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_RENDER_SURFACE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->SurfaceArray, 28, 28) | - __gen_field(values->SurfaceFormat, 18, 26) | - __gen_field(values->SurfaceVerticalAlignment, 16, 17) | - __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | - __gen_field(values->TiledSurface, 14, 14) | - __gen_field(values->TileWalk, 13, 13) | - __gen_field(values->VerticalLineStride, 12, 12) | - __gen_field(values->VerticalLineStrideOffset, 11, 11) | - __gen_field(values->SurfaceArraySpacing, 10, 10) | - __gen_field(values->RenderCacheReadWriteMode, 8, 8) | - __gen_field(values->MediaBoundaryPixelMode, 6, 7) | - __gen_field(values->CubeFaceEnables, 0, 5) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); - - dw[2] = - __gen_field(values->Height, 16, 29) | - __gen_field(values->Width, 0, 13) | - 0; - - dw[3] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - dw[4] = - __gen_field(values->RenderTargetRotation, 29, 30) | - __gen_field(values->MinimumArrayElement, 18, 28) | - __gen_field(values->RenderTargetViewExtent, 7, 17) | - __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | - __gen_field(values->NumberofMultisamples, 3, 5) | - __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | - __gen_field(values->MinimumArrayElement, 0, 26) | - 0; - - uint32_t dw_SurfaceObjectControlState; - GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); - dw[5] = - __gen_offset(values->XOffset, 25, 31) | - __gen_offset(values->YOffset, 20, 23) | - __gen_field(dw_SurfaceObjectControlState, 16, 19) | - __gen_field(values->SurfaceMinLOD, 4, 7) | - __gen_field(values->MIPCountLOD, 0, 3) | - 0; - - uint32_t dw6 = - __gen_field(values->MCSSurfacePitch, 3, 11) | - __gen_field(values->AppendCounterEnable, 1, 1) | - __gen_field(values->MCSEnable, 0, 0) | - __gen_field(values->XOffsetforUVPlane, 16, 29) | - __gen_field(values->YOffsetforUVPlane, 0, 13) | - 0; - - dw[6] = - __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); - - dw[7] = - __gen_field(values->RedClearColor, 31, 31) | - __gen_field(values->GreenClearColor, 30, 30) | - __gen_field(values->BlueClearColor, 29, 29) | - __gen_field(values->AlphaClearColor, 28, 28) | - __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | - 0; - -} - -#define GEN7_SAMPLER_BORDER_COLOR_STATE_length 0x00000004 - -struct GEN7_SAMPLER_BORDER_COLOR_STATE { - float BorderColorRedDX100GL; - uint32_t BorderColorAlpha; - uint32_t BorderColorBlue; - uint32_t BorderColorGreen; - uint32_t BorderColorRedDX9; - float BorderColorGreen0; - float BorderColorBlue0; - float BorderColorAlpha0; -}; - -static inline void -GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SAMPLER_BORDER_COLOR_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->BorderColorRedDX100GL) | - __gen_field(values->BorderColorAlpha, 24, 31) | - __gen_field(values->BorderColorBlue, 16, 23) | - __gen_field(values->BorderColorGreen, 8, 15) | - __gen_field(values->BorderColorRedDX9, 0, 7) | - 0; - - dw[1] = - __gen_float(values->BorderColorGreen) | - 0; - - dw[2] = - __gen_float(values->BorderColorBlue) | - 0; - - dw[3] = - __gen_float(values->BorderColorAlpha) | - 0; - -} - -#define GEN7_SAMPLER_STATE_length 0x00000004 - -struct GEN7_SAMPLER_STATE { - bool SamplerDisable; -#define DX10OGL 0 -#define DX9 1 - uint32_t TextureBorderColorMode; -#define OGL 1 - uint32_t LODPreClampEnable; - float BaseMipLevel; -#define MIPFILTER_NONE 0 -#define MIPFILTER_NEAREST 1 -#define MIPFILTER_LINEAR 3 - uint32_t MipModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MagModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MinModeFilter; - float TextureLODBias; -#define LEGACY 0 -#define EWAApproximation 1 - uint32_t AnisotropicAlgorithm; - float MinLOD; - float MaxLOD; -#define PREFILTEROPALWAYS 0 -#define PREFILTEROPNEVER 1 -#define PREFILTEROPLESS 2 -#define PREFILTEROPEQUAL 3 -#define PREFILTEROPLEQUAL 4 -#define PREFILTEROPGREATER 5 -#define PREFILTEROPNOTEQUAL 6 -#define PREFILTEROPGEQUAL 7 - uint32_t ShadowFunction; -#define PROGRAMMED 0 -#define OVERRIDE 1 - uint32_t CubeSurfaceControlMode; - uint32_t BorderColorPointer; - bool ChromaKeyEnable; - uint32_t ChromaKeyIndex; -#define KEYFILTER_KILL_ON_ANY_MATCH 0 -#define KEYFILTER_REPLACE_BLACK 1 - uint32_t ChromaKeyMode; -#define RATIO21 0 -#define RATIO41 1 -#define RATIO61 2 -#define RATIO81 3 -#define RATIO101 4 -#define RATIO121 5 -#define RATIO141 6 -#define RATIO161 7 - uint32_t MaximumAnisotropy; - bool RAddressMinFilterRoundingEnable; - bool RAddressMagFilterRoundingEnable; - bool VAddressMinFilterRoundingEnable; - bool VAddressMagFilterRoundingEnable; - bool UAddressMinFilterRoundingEnable; - bool UAddressMagFilterRoundingEnable; -#define FULL 0 -#define MED 2 -#define LOW 3 - uint32_t TrilinearFilterQuality; - bool NonnormalizedCoordinateEnable; - uint32_t TCXAddressControlMode; - uint32_t TCYAddressControlMode; - uint32_t TCZAddressControlMode; -}; - -static inline void -GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN7_SAMPLER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SamplerDisable, 31, 31) | - __gen_field(values->TextureBorderColorMode, 29, 29) | - __gen_field(values->LODPreClampEnable, 28, 28) | - __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | - __gen_field(values->MipModeFilter, 20, 21) | - __gen_field(values->MagModeFilter, 17, 19) | - __gen_field(values->MinModeFilter, 14, 16) | - __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | - __gen_field(values->AnisotropicAlgorithm, 0, 0) | - 0; - - dw[1] = - __gen_field(values->MinLOD * (1 << 8), 20, 31) | - __gen_field(values->MaxLOD * (1 << 8), 8, 19) | - __gen_field(values->ShadowFunction, 1, 3) | - __gen_field(values->CubeSurfaceControlMode, 0, 0) | - 0; - - dw[2] = - __gen_offset(values->BorderColorPointer, 5, 31) | - 0; - - dw[3] = - __gen_field(values->ChromaKeyEnable, 25, 25) | - __gen_field(values->ChromaKeyIndex, 23, 24) | - __gen_field(values->ChromaKeyMode, 22, 22) | - __gen_field(values->MaximumAnisotropy, 19, 21) | - __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | - __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | - __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | - __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | - __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | - __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | - __gen_field(values->TrilinearFilterQuality, 11, 12) | - __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | - __gen_field(values->TCXAddressControlMode, 6, 8) | - __gen_field(values->TCYAddressControlMode, 3, 5) | - __gen_field(values->TCZAddressControlMode, 0, 2) | - 0; - -} - -/* Enum 3D_Prim_Topo_Type */ -#define _3DPRIM_POINTLIST 1 -#define _3DPRIM_LINELIST 2 -#define _3DPRIM_LINESTRIP 3 -#define _3DPRIM_TRILIST 4 -#define _3DPRIM_TRISTRIP 5 -#define _3DPRIM_TRIFAN 6 -#define _3DPRIM_QUADLIST 7 -#define _3DPRIM_QUADSTRIP 8 -#define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LINESTRIP_ADJ 10 -#define _3DPRIM_TRILIST_ADJ 11 -#define _3DPRIM_TRISTRIP_ADJ 12 -#define _3DPRIM_TRISTRIP_REVERSE 13 -#define _3DPRIM_POLYGON 14 -#define _3DPRIM_RECTLIST 15 -#define _3DPRIM_LINELOOP 16 -#define _3DPRIM_POINTLIST_BF 17 -#define _3DPRIM_LINESTRIP_CONT 18 -#define _3DPRIM_LINESTRIP_BF 19 -#define _3DPRIM_LINESTRIP_CONT_BF 20 -#define _3DPRIM_TRIFAN_NOSTIPPLE 22 -#define _3DPRIM_PATCHLIST_1 32 -#define _3DPRIM_PATCHLIST_2 33 -#define _3DPRIM_PATCHLIST_3 34 -#define _3DPRIM_PATCHLIST_4 35 -#define _3DPRIM_PATCHLIST_5 36 -#define _3DPRIM_PATCHLIST_6 37 -#define _3DPRIM_PATCHLIST_7 38 -#define _3DPRIM_PATCHLIST_8 39 -#define _3DPRIM_PATCHLIST_9 40 -#define _3DPRIM_PATCHLIST_10 41 -#define _3DPRIM_PATCHLIST_11 42 -#define _3DPRIM_PATCHLIST_12 43 -#define _3DPRIM_PATCHLIST_13 44 -#define _3DPRIM_PATCHLIST_14 45 -#define _3DPRIM_PATCHLIST_15 46 -#define _3DPRIM_PATCHLIST_16 47 -#define _3DPRIM_PATCHLIST_17 48 -#define _3DPRIM_PATCHLIST_18 49 -#define _3DPRIM_PATCHLIST_19 50 -#define _3DPRIM_PATCHLIST_20 51 -#define _3DPRIM_PATCHLIST_21 52 -#define _3DPRIM_PATCHLIST_22 53 -#define _3DPRIM_PATCHLIST_23 54 -#define _3DPRIM_PATCHLIST_24 55 -#define _3DPRIM_PATCHLIST_25 56 -#define _3DPRIM_PATCHLIST_26 57 -#define _3DPRIM_PATCHLIST_27 58 -#define _3DPRIM_PATCHLIST_28 59 -#define _3DPRIM_PATCHLIST_29 60 -#define _3DPRIM_PATCHLIST_30 61 -#define _3DPRIM_PATCHLIST_31 62 -#define _3DPRIM_PATCHLIST_32 63 - -/* Enum 3D_Vertex_Component_Control */ -#define VFCOMP_NOSTORE 0 -#define VFCOMP_STORE_SRC 1 -#define VFCOMP_STORE_0 2 -#define VFCOMP_STORE_1_FP 3 -#define VFCOMP_STORE_1_INT 4 -#define VFCOMP_STORE_VID 5 -#define VFCOMP_STORE_IID 6 -#define VFCOMP_STORE_PID 7 - -/* Enum 3D_Compare_Function */ -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - -/* Enum SURFACE_FORMAT */ -#define R32G32B32A32_FLOAT 0 -#define R32G32B32A32_SINT 1 -#define R32G32B32A32_UINT 2 -#define R32G32B32A32_UNORM 3 -#define R32G32B32A32_SNORM 4 -#define R64G64_FLOAT 5 -#define R32G32B32X32_FLOAT 6 -#define R32G32B32A32_SSCALED 7 -#define R32G32B32A32_USCALED 8 -#define R32G32B32A32_SFIXED 32 -#define R64G64_PASSTHRU 33 -#define R32G32B32_FLOAT 64 -#define R32G32B32_SINT 65 -#define R32G32B32_UINT 66 -#define R32G32B32_UNORM 67 -#define R32G32B32_SNORM 68 -#define R32G32B32_SSCALED 69 -#define R32G32B32_USCALED 70 -#define R32G32B32_SFIXED 80 -#define R16G16B16A16_UNORM 128 -#define R16G16B16A16_SNORM 129 -#define R16G16B16A16_SINT 130 -#define R16G16B16A16_UINT 131 -#define R16G16B16A16_FLOAT 132 -#define R32G32_FLOAT 133 -#define R32G32_SINT 134 -#define R32G32_UINT 135 -#define R32_FLOAT_X8X24_TYPELESS 136 -#define X32_TYPELESS_G8X24_UINT 137 -#define L32A32_FLOAT 138 -#define R32G32_UNORM 139 -#define R32G32_SNORM 140 -#define R64_FLOAT 141 -#define R16G16B16X16_UNORM 142 -#define R16G16B16X16_FLOAT 143 -#define A32X32_FLOAT 144 -#define L32X32_FLOAT 145 -#define I32X32_FLOAT 146 -#define R16G16B16A16_SSCALED 147 -#define R16G16B16A16_USCALED 148 -#define R32G32_SSCALED 149 -#define R32G32_USCALED 150 -#define R32G32_SFIXED 160 -#define R64_PASSTHRU 161 -#define B8G8R8A8_UNORM 192 -#define B8G8R8A8_UNORM_SRGB 193 -#define R10G10B10A2_UNORM 194 -#define R10G10B10A2_UNORM_SRGB 195 -#define R10G10B10A2_UINT 196 -#define R10G10B10_SNORM_A2_UNORM 197 -#define R8G8B8A8_UNORM 199 -#define R8G8B8A8_UNORM_SRGB 200 -#define R8G8B8A8_SNORM 201 -#define R8G8B8A8_SINT 202 -#define R8G8B8A8_UINT 203 -#define R16G16_UNORM 204 -#define R16G16_SNORM 205 -#define R16G16_SINT 206 -#define R16G16_UINT 207 -#define R16G16_FLOAT 208 -#define B10G10R10A2_UNORM 209 -#define B10G10R10A2_UNORM_SRGB 210 -#define R11G11B10_FLOAT 211 -#define R32_SINT 214 -#define R32_UINT 215 -#define R32_FLOAT 216 -#define R24_UNORM_X8_TYPELESS 217 -#define X24_TYPELESS_G8_UINT 218 -#define L32_UNORM 221 -#define A32_UNORM 222 -#define L16A16_UNORM 223 -#define I24X8_UNORM 224 -#define L24X8_UNORM 225 -#define A24X8_UNORM 226 -#define I32_FLOAT 227 -#define L32_FLOAT 228 -#define A32_FLOAT 229 -#define X8B8_UNORM_G8R8_SNORM 230 -#define A8X8_UNORM_G8R8_SNORM 231 -#define B8X8_UNORM_G8R8_SNORM 232 -#define B8G8R8X8_UNORM 233 -#define B8G8R8X8_UNORM_SRGB 234 -#define R8G8B8X8_UNORM 235 -#define R8G8B8X8_UNORM_SRGB 236 -#define R9G9B9E5_SHAREDEXP 237 -#define B10G10R10X2_UNORM 238 -#define L16A16_FLOAT 240 -#define R32_UNORM 241 -#define R32_SNORM 242 -#define R10G10B10X2_USCALED 243 -#define R8G8B8A8_SSCALED 244 -#define R8G8B8A8_USCALED 245 -#define R16G16_SSCALED 246 -#define R16G16_USCALED 247 -#define R32_SSCALED 248 -#define R32_USCALED 249 -#define B5G6R5_UNORM 256 -#define B5G6R5_UNORM_SRGB 257 -#define B5G5R5A1_UNORM 258 -#define B5G5R5A1_UNORM_SRGB 259 -#define B4G4R4A4_UNORM 260 -#define B4G4R4A4_UNORM_SRGB 261 -#define R8G8_UNORM 262 -#define R8G8_SNORM 263 -#define R8G8_SINT 264 -#define R8G8_UINT 265 -#define R16_UNORM 266 -#define R16_SNORM 267 -#define R16_SINT 268 -#define R16_UINT 269 -#define R16_FLOAT 270 -#define A8P8_UNORM_PALETTE0 271 -#define A8P8_UNORM_PALETTE1 272 -#define I16_UNORM 273 -#define L16_UNORM 274 -#define A16_UNORM 275 -#define L8A8_UNORM 276 -#define I16_FLOAT 277 -#define L16_FLOAT 278 -#define A16_FLOAT 279 -#define L8A8_UNORM_SRGB 280 -#define R5G5_SNORM_B6_UNORM 281 -#define B5G5R5X1_UNORM 282 -#define B5G5R5X1_UNORM_SRGB 283 -#define R8G8_SSCALED 284 -#define R8G8_USCALED 285 -#define R16_SSCALED 286 -#define R16_USCALED 287 -#define P8A8_UNORM_PALETTE0 290 -#define P8A8_UNORM_PALETTE1 291 -#define A1B5G5R5_UNORM 292 -#define A4B4G4R4_UNORM 293 -#define L8A8_UINT 294 -#define L8A8_SINT 295 -#define R8_UNORM 320 -#define R8_SNORM 321 -#define R8_SINT 322 -#define R8_UINT 323 -#define A8_UNORM 324 -#define I8_UNORM 325 -#define L8_UNORM 326 -#define P4A4_UNORM_PALETTE0 327 -#define A4P4_UNORM_PALETTE0 328 -#define R8_SSCALED 329 -#define R8_USCALED 330 -#define P8_UNORM_PALETTE0 331 -#define L8_UNORM_SRGB 332 -#define P8_UNORM_PALETTE1 333 -#define P4A4_UNORM_PALETTE1 334 -#define A4P4_UNORM_PALETTE1 335 -#define Y8_UNORM 336 -#define L8_UINT 338 -#define L8_SINT 339 -#define I8_UINT 340 -#define I8_SINT 341 -#define DXT1_RGB_SRGB 384 -#define R1_UNORM 385 -#define YCRCB_NORMAL 386 -#define YCRCB_SWAPUVY 387 -#define P2_UNORM_PALETTE0 388 -#define P2_UNORM_PALETTE1 389 -#define BC1_UNORM 390 -#define BC2_UNORM 391 -#define BC3_UNORM 392 -#define BC4_UNORM 393 -#define BC5_UNORM 394 -#define BC1_UNORM_SRGB 395 -#define BC2_UNORM_SRGB 396 -#define BC3_UNORM_SRGB 397 -#define MONO8 398 -#define YCRCB_SWAPUV 399 -#define YCRCB_SWAPY 400 -#define DXT1_RGB 401 -#define FXT1 402 -#define R8G8B8_UNORM 403 -#define R8G8B8_SNORM 404 -#define R8G8B8_SSCALED 405 -#define R8G8B8_USCALED 406 -#define R64G64B64A64_FLOAT 407 -#define R64G64B64_FLOAT 408 -#define BC4_SNORM 409 -#define BC5_SNORM 410 -#define R16G16B16_FLOAT 411 -#define R16G16B16_UNORM 412 -#define R16G16B16_SNORM 413 -#define R16G16B16_SSCALED 414 -#define R16G16B16_USCALED 415 -#define BC6H_SF16 417 -#define BC7_UNORM 418 -#define BC7_UNORM_SRGB 419 -#define BC6H_UF16 420 -#define PLANAR_420_8 421 -#define R8G8B8_UNORM_SRGB 424 -#define ETC1_RGB8 425 -#define ETC2_RGB8 426 -#define EAC_R11 427 -#define EAC_RG11 428 -#define EAC_SIGNED_R11 429 -#define EAC_SIGNED_RG11 430 -#define ETC2_SRGB8 431 -#define R16G16B16_UINT 432 -#define R16G16B16_SINT 433 -#define R32_SFIXED 434 -#define R10G10B10A2_SNORM 435 -#define R10G10B10A2_USCALED 436 -#define R10G10B10A2_SSCALED 437 -#define R10G10B10A2_SINT 438 -#define B10G10R10A2_SNORM 439 -#define B10G10R10A2_USCALED 440 -#define B10G10R10A2_SSCALED 441 -#define B10G10R10A2_UINT 442 -#define B10G10R10A2_SINT 443 -#define R64G64B64A64_PASSTHRU 444 -#define R64G64B64_PASSTHRU 445 -#define ETC2_RGB8_PTA 448 -#define ETC2_SRGB8_PTA 449 -#define ETC2_EAC_RGBA8 450 -#define ETC2_EAC_SRGB8_A8 451 -#define R8G8B8_UINT 456 -#define R8G8B8_SINT 457 -#define RAW 511 - -/* Enum Texture Coordinate Mode */ -#define TCM_WRAP 0 -#define TCM_MIRROR 1 -#define TCM_CLAMP 2 -#define TCM_CUBE 3 -#define TCM_CLAMP_BORDER 4 -#define TCM_MIRROR_ONCE 5 - diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index ee4b7f3a5c4..ba0e75f0c3e 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -80,10 +80,10 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, .Depth = ((num_elements - 1) >> 21) & 0x3f, .SurfacePitch = stride - 1, # if (ANV_IS_HASWELL) - .ShaderChannelSelectR = SCS_RED, - .ShaderChannelSelectG = SCS_GREEN, - .ShaderChannelSelectB = SCS_BLUE, - .ShaderChannelSelectA = SCS_ALPHA, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, # endif .SurfaceBaseAddress = { NULL, offset }, }; @@ -110,7 +110,7 @@ VkResult genX(CreateSampler)( struct GEN7_SAMPLER_STATE sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, - .LODPreClampEnable = OGL, + .LODPreClampEnable = CLAMP_ENABLE_OGL, .BaseMipLevel = 0.0, .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, @@ -228,10 +228,10 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .MCSEnable = false, # if (ANV_IS_HASWELL) - .ShaderChannelSelectR = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectG = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectB = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectA = vk_to_gen_swizzle[iview->swizzle.a], + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], # else /* XXX: Seriously? */ .RedClearColor = 0, .GreenClearColor = 0, diff --git a/src/vulkan/gen8.xml b/src/vulkan/gen8.xml new file mode 100644 index 00000000000..fff6ae2ed42 --- /dev/null +++ b/src/vulkan/gen8.xml @@ -0,0 +1,3165 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b81944c2156..82f7eb1f48f 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -903,9 +903,9 @@ void genX(CmdWriteTimestamp)( emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); } -#define alu_opcode(v) __gen_field((v), 20, 31) -#define alu_operand1(v) __gen_field((v), 10, 19) -#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu_opcode(v) __gen_uint((v), 20, 31) +#define alu_operand1(v) __gen_uint((v), 10, 19) +#define alu_operand2(v) __gen_uint((v), 0, 9) #define alu(opcode, operand1, operand2) \ alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) @@ -1083,7 +1083,7 @@ void genX(CmdWaitEvents)( anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), .WaitMode = PollingMode, - .CompareOperation = SAD_EQUAL_SDD, + .CompareOperation = COMPARE_SAD_EQUAL_SDD, .SemaphoreDataDword = VK_EVENT_SET, .SemaphoreAddress = { &cmd_buffer->device->dynamic_state_block_pool.bo, diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h deleted file mode 100644 index 3c014b96147..00000000000 --- a/src/vulkan/gen8_pack.h +++ /dev/null @@ -1,9209 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - - -/* Instructions, enums and structures for BDW. - * - * This file has been generated, do not hand edit. - */ - -#pragma once - -#include -#include - -#ifndef __gen_validate_value -#define __gen_validate_value(x) -#endif - -#ifndef __gen_field_functions -#define __gen_field_functions - -union __gen_value { - float f; - uint32_t dw; -}; - -static inline uint64_t -__gen_mbo(uint32_t start, uint32_t end) -{ - return (~0ull >> (64 - (end - start + 1))) << start; -} - -static inline uint64_t -__gen_field(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - if (end - start + 1 < 64) - assert(v < 1ull << (end - start + 1)); -#endif - - return v << start; -} - -static inline uint64_t -__gen_fixed(float v, uint32_t start, uint32_t end, - bool is_signed, uint32_t fract_bits) -{ - __gen_validate_value(v); - - const float factor = (1 << fract_bits); - - float max, min; - if (is_signed) { - max = ((1 << (end - start)) - 1) / factor; - min = -(1 << (end - start)) / factor; - } else { - max = ((1 << (end - start + 1)) - 1) / factor; - min = 0.0f; - } - - if (v > max) - v = max; - else if (v < min) - v = min; - - int32_t int_val = roundf(v * factor); - - if (is_signed) - int_val &= (1 << (end - start + 1)) - 1; - - return int_val << start; -} - -static inline uint64_t -__gen_offset(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; - - assert((v & ~mask) == 0); -#endif - - return v; -} - -static inline uint32_t -__gen_float(float v) -{ - __gen_validate_value(v); - return ((union __gen_value) { .f = (v) }).dw; -} - -#ifndef __gen_address_type -#error #define __gen_address_type before including this file -#endif - -#ifndef __gen_user_data -#error #define __gen_combine_address before including this file -#endif - -#endif - -#define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_URB_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 48, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_URB_VS_length 0x00000002 - -struct GEN8_3DSTATE_URB_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t VSURBStartingAddress; - uint32_t VSURBEntryAllocationSize; - uint32_t VSNumberofURBEntries; -}; - -static inline void -GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->VSURBStartingAddress, 25, 31) | - __gen_field(values->VSURBEntryAllocationSize, 16, 24) | - __gen_field(values->VSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN8_3DSTATE_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 16, \ - .DwordLength = 7 - -#define GEN8_3DSTATE_VS_length 0x00000009 - -struct GEN8_3DSTATE_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleVertexDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool AccessesUAV; - bool SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool SIMD8DispatchEnable; - bool VertexCacheDisable; - bool FunctionEnable; - uint32_t VertexURBEntryOutputReadOffset; - uint32_t VertexURBEntryOutputLength; - uint32_t UserClipDistanceClipTestEnableBitmask; - uint32_t UserClipDistanceCullTestEnableBitmask; -}; - -static inline void -GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleVertexDispatch, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->AccessesUAV, 12, 12) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[7] = - __gen_field(values->MaximumNumberofThreads, 23, 31) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->SIMD8DispatchEnable, 2, 2) | - __gen_field(values->VertexCacheDisable, 1, 1) | - __gen_field(values->FunctionEnable, 0, 0) | - 0; - - dw[8] = - __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | - __gen_field(values->VertexURBEntryOutputLength, 16, 20) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - -} - -#define GEN8_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 -#define GEN8_GPGPU_CSR_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 1 - -#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 - -struct GEN8_GPGPU_CSR_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GPGPUCSRBaseAddress; -}; - -static inline void -GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN8_MI_ATOMIC_length_bias 0x00000002 -#define GEN8_MI_ATOMIC_header \ - .CommandType = 0, \ - .MICommandOpcode = 47 - -#define GEN8_MI_ATOMIC_length 0x00000003 - -struct GEN8_MI_ATOMIC { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t MemoryType; - uint32_t PostSyncOperation; -#define DWORD 0 -#define QWORD 1 -#define OCTWORD 2 -#define RESERVED 3 - uint32_t DataSize; - uint32_t InlineData; - uint32_t CSSTALL; - uint32_t ReturnDataControl; - uint32_t ATOMICOPCODE; - uint32_t DwordLength; - __gen_address_type MemoryAddress; - uint32_t Operand1DataDword0; - uint32_t Operand2DataDword0; - uint32_t Operand1DataDword1; - uint32_t Operand2DataDword1; - uint32_t Operand1DataDword2; - uint32_t Operand2DataDword2; - uint32_t Operand1DataDword3; - uint32_t Operand2DataDword3; -}; - -static inline void -GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_ATOMIC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->MemoryType, 22, 22) | - __gen_field(values->PostSyncOperation, 21, 21) | - __gen_field(values->DataSize, 19, 20) | - __gen_field(values->InlineData, 18, 18) | - __gen_field(values->CSSTALL, 17, 17) | - __gen_field(values->ReturnDataControl, 16, 16) | - __gen_field(values->ATOMICOPCODE, 8, 15) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->Operand1DataDword0, 0, 31) | - 0; - - dw[4] = - __gen_field(values->Operand2DataDword0, 0, 31) | - 0; - - dw[5] = - __gen_field(values->Operand1DataDword1, 0, 31) | - 0; - - dw[6] = - __gen_field(values->Operand2DataDword1, 0, 31) | - 0; - - dw[7] = - __gen_field(values->Operand1DataDword2, 0, 31) | - 0; - - dw[8] = - __gen_field(values->Operand2DataDword2, 0, 31) | - 0; - - dw[9] = - __gen_field(values->Operand1DataDword3, 0, 31) | - 0; - - dw[10] = - __gen_field(values->Operand2DataDword3, 0, 31) | - 0; - -} - -#define GEN8_MI_LOAD_REGISTER_REG_length_bias 0x00000002 -#define GEN8_MI_LOAD_REGISTER_REG_header \ - .CommandType = 0, \ - .MICommandOpcode = 42, \ - .DwordLength = 1 - -#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 - -struct GEN8_MI_LOAD_REGISTER_REG { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t SourceRegisterAddress; - uint32_t DestinationRegisterAddress; -}; - -static inline void -GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SourceRegisterAddress, 2, 22) | - 0; - - dw[2] = - __gen_offset(values->DestinationRegisterAddress, 2, 22) | - 0; - -} - -#define GEN8_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 -#define GEN8_MI_SEMAPHORE_SIGNAL_header \ - .CommandType = 0, \ - .MICommandOpcode = 27, \ - .DwordLength = 0 - -#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 - -struct GEN8_MI_SEMAPHORE_SIGNAL { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t PostSyncOperation; -#define RCS 0 -#define VCS0 1 -#define BCS 2 -#define VECS 3 -#define VCS1 4 - uint32_t TargetEngineSelect; - uint32_t DwordLength; - uint32_t TargetContextID; -}; - -static inline void -GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->PostSyncOperation, 21, 21) | - __gen_field(values->TargetEngineSelect, 15, 17) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->TargetContextID, 0, 31) | - 0; - -} - -#define GEN8_MI_SEMAPHORE_WAIT_length_bias 0x00000002 -#define GEN8_MI_SEMAPHORE_WAIT_header \ - .CommandType = 0, \ - .MICommandOpcode = 28, \ - .DwordLength = 2 - -#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 - -struct GEN8_MI_SEMAPHORE_WAIT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t MemoryType; -#define PollingMode 1 -#define SignalMode 0 - uint32_t WaitMode; -#define SAD_GREATER_THAN_SDD 0 -#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 -#define SAD_LESS_THAN_SDD 2 -#define SAD_LESS_THAN_OR_EQUAL_SDD 3 -#define SAD_EQUAL_SDD 4 -#define SAD_NOT_EQUAL_SDD 5 - uint32_t CompareOperation; - uint32_t DwordLength; - uint32_t SemaphoreDataDword; - __gen_address_type SemaphoreAddress; -}; - -static inline void -GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->MemoryType, 22, 22) | - __gen_field(values->WaitMode, 15, 15) | - __gen_field(values->CompareOperation, 12, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SemaphoreDataDword, 0, 31) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN8_MI_STORE_REGISTER_MEM_length_bias 0x00000002 -#define GEN8_MI_STORE_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 36, \ - .DwordLength = 2 - -#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 - -struct GEN8_MI_STORE_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t PredicateEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->PredicateEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN8_PIPELINE_SELECT_length_bias 0x00000001 -#define GEN8_PIPELINE_SELECT_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4 - -#define GEN8_PIPELINE_SELECT_length 0x00000001 - -struct GEN8_PIPELINE_SELECT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; -#define _3D 0 -#define Media 1 -#define GPGPU 2 - uint32_t PipelineSelection; -}; - -static inline void -GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PIPELINE_SELECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->PipelineSelection, 0, 1) | - 0; - -} - -#define GEN8_STATE_BASE_ADDRESS_length_bias 0x00000002 -#define GEN8_STATE_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 1, \ - .DwordLength = 14 - -#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 - -#define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 - -struct GEN8_MEMORY_OBJECT_CONTROL_STATE { -#define UCwithFenceifcoherentcycle 0 -#define UCUncacheable 1 -#define WT 2 -#define WB 3 - uint32_t MemoryTypeLLCeLLCCacheabilityControl; -#define eLLCOnlywheneDRAMispresentelsegetsallocatedinLLC 0 -#define LLCOnly 1 -#define LLCeLLCAllowed 2 -#define L3DefertoPATforLLCeLLCselection 3 - uint32_t TargetCache; - uint32_t AgeforQUADLRU; -}; - -static inline void -GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->MemoryTypeLLCeLLCCacheabilityControl, 5, 6) | - __gen_field(values->TargetCache, 3, 4) | - __gen_field(values->AgeforQUADLRU, 0, 1) | - 0; - -} - -struct GEN8_STATE_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GeneralStateBaseAddress; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; - bool GeneralStateBaseAddressModifyEnable; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; - __gen_address_type SurfaceStateBaseAddress; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - bool SurfaceStateBaseAddressModifyEnable; - __gen_address_type DynamicStateBaseAddress; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - bool DynamicStateBaseAddressModifyEnable; - __gen_address_type IndirectObjectBaseAddress; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - bool IndirectObjectBaseAddressModifyEnable; - __gen_address_type InstructionBaseAddress; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - bool InstructionBaseAddressModifyEnable; - uint32_t GeneralStateBufferSize; - bool GeneralStateBufferSizeModifyEnable; - uint32_t DynamicStateBufferSize; - bool DynamicStateBufferSizeModifyEnable; - uint32_t IndirectObjectBufferSize; - bool IndirectObjectBufferSizeModifyEnable; - uint32_t InstructionBufferSize; - bool InstructionBuffersizeModifyEnable; -}; - -static inline void -GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_STATE_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_GeneralStateMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | - __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); - dw[3] = - __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | - 0; - - uint32_t dw_SurfaceStateMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); - uint32_t dw4 = - __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | - __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - uint32_t dw_DynamicStateMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); - uint32_t dw6 = - __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | - __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw6 = - __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); - - dw[6] = qw6; - dw[7] = qw6 >> 32; - - uint32_t dw_IndirectObjectMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); - uint32_t dw8 = - __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | - __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw8 = - __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); - - dw[8] = qw8; - dw[9] = qw8 >> 32; - - uint32_t dw_InstructionMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); - uint32_t dw10 = - __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | - __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw10 = - __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); - - dw[10] = qw10; - dw[11] = qw10 >> 32; - - dw[12] = - __gen_field(values->GeneralStateBufferSize, 12, 31) | - __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | - 0; - - dw[13] = - __gen_field(values->DynamicStateBufferSize, 12, 31) | - __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | - 0; - - dw[14] = - __gen_field(values->IndirectObjectBufferSize, 12, 31) | - __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | - 0; - - dw[15] = - __gen_field(values->InstructionBufferSize, 12, 31) | - __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | - 0; - -} - -#define GEN8_STATE_PREFETCH_length_bias 0x00000002 -#define GEN8_STATE_PREFETCH_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN8_STATE_PREFETCH_length 0x00000002 - -struct GEN8_STATE_PREFETCH { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type PrefetchPointer; - uint32_t PrefetchCount; -}; - -static inline void -GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_STATE_PREFETCH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->PrefetchCount, 0, 2) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); - -} - -#define GEN8_STATE_SIP_length_bias 0x00000002 -#define GEN8_STATE_SIP_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2, \ - .DwordLength = 1 - -#define GEN8_STATE_SIP_length 0x00000003 - -struct GEN8_STATE_SIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t SystemInstructionPointer; -}; - -static inline void -GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_STATE_SIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->SystemInstructionPointer, 4, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN8_SWTESS_BASE_ADDRESS_length_bias 0x00000002 -#define GEN8_SWTESS_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 - -struct GEN8_SWTESS_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type SWTessellationBaseAddress; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; -}; - -static inline void -GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SWTessellationMemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN8_3DPRIMITIVE_length_bias 0x00000002 -#define GEN8_3DPRIMITIVE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 3, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 5 - -#define GEN8_3DPRIMITIVE_length 0x00000007 - -struct GEN8_3DPRIMITIVE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool IndirectParameterEnable; - uint32_t UAVCoherencyRequired; - bool PredicateEnable; - uint32_t DwordLength; - bool EndOffsetEnable; -#define SEQUENTIAL 0 -#define RANDOM 1 - uint32_t VertexAccessType; - uint32_t PrimitiveTopologyType; - uint32_t VertexCountPerInstance; - uint32_t StartVertexLocation; - uint32_t InstanceCount; - uint32_t StartInstanceLocation; - uint32_t BaseVertexLocation; -}; - -static inline void -GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DPRIMITIVE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->UAVCoherencyRequired, 9, 9) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->EndOffsetEnable, 9, 9) | - __gen_field(values->VertexAccessType, 8, 8) | - __gen_field(values->PrimitiveTopologyType, 0, 5) | - 0; - - dw[2] = - __gen_field(values->VertexCountPerInstance, 0, 31) | - 0; - - dw[3] = - __gen_field(values->StartVertexLocation, 0, 31) | - 0; - - dw[4] = - __gen_field(values->InstanceCount, 0, 31) | - 0; - - dw[5] = - __gen_field(values->StartInstanceLocation, 0, 31) | - 0; - - dw[6] = - __gen_field(values->BaseVertexLocation, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 -#define GEN8_3DSTATE_AA_LINE_PARAMETERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 1 - -#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 - -struct GEN8_3DSTATE_AA_LINE_PARAMETERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float AAPointCoverageBias; - float AACoverageBias; - float AAPointCoverageSlope; - float AACoverageSlope; - float AAPointCoverageEndCapBias; - float AACoverageEndCapBias; - float AAPointCoverageEndCapSlope; - float AACoverageEndCapSlope; -}; - -static inline void -GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | - __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | - __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | - __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | - 0; - - dw[2] = - __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | - __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | - __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | - __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | - 0; - -} - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 70 - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 - -#define GEN8_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 - -struct GEN8_BINDING_TABLE_EDIT_ENTRY { - uint32_t BindingTableIndex; - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BindingTableIndex, 16, 23) | - __gen_offset(values->SurfaceStatePointer, 0, 15) | - 0; - -} - -struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 68 - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 - -struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 69 - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 - -struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 71 - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 - -struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 67 - -#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 - -struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 40, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 - -struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSBindingTable; -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSBindingTable, 5, 15) | - 0; - -} - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 41, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 - -struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSBindingTable; -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSBindingTable, 5, 15) | - 0; - -} - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 39, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 - -struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSBindingTable; -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSBindingTable, 5, 15) | - 0; - -} - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 42, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 - -struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSBindingTable; -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSBindingTable, 5, 15) | - 0; - -} - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 38, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 - -struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSBindingTable; -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSBindingTable, 5, 15) | - 0; - -} - -#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 -#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 - -struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type BindingTablePoolBaseAddress; - uint32_t BindingTablePoolEnable; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; -#define NoValidData 0 - uint32_t BindingTablePoolBufferSize; -}; - -static inline void -GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SurfaceObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); - uint32_t dw1 = - __gen_field(values->BindingTablePoolEnable, 11, 11) | - __gen_field(dw_SurfaceObjectControlState, 0, 6) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->BindingTablePoolBufferSize, 12, 31) | - 0; - -} - -#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 -#define GEN8_3DSTATE_BLEND_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 36, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 - -struct GEN8_3DSTATE_BLEND_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BlendStatePointer; - bool BlendStatePointerValid; -}; - -static inline void -GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->BlendStatePointer, 6, 31) | - __gen_field(values->BlendStatePointerValid, 0, 0) | - 0; - -} - -#define GEN8_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 -#define GEN8_3DSTATE_CC_STATE_POINTERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 14, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 - -struct GEN8_3DSTATE_CC_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ColorCalcStatePointer; - bool ColorCalcStatePointerValid; -}; - -static inline void -GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ColorCalcStatePointer, 6, 31) | - __gen_field(values->ColorCalcStatePointerValid, 0, 0) | - 0; - -} - -#define GEN8_3DSTATE_CHROMA_KEY_length_bias 0x00000002 -#define GEN8_3DSTATE_CHROMA_KEY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 - -struct GEN8_3DSTATE_CHROMA_KEY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ChromaKeyTableIndex; - uint32_t ChromaKeyLowValue; - uint32_t ChromaKeyHighValue; -}; - -static inline void -GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ChromaKeyTableIndex, 30, 31) | - 0; - - dw[2] = - __gen_field(values->ChromaKeyLowValue, 0, 31) | - 0; - - dw[3] = - __gen_field(values->ChromaKeyHighValue, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 -#define GEN8_3DSTATE_CLEAR_PARAMS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 1 - -#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 - -struct GEN8_3DSTATE_CLEAR_PARAMS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float DepthClearValue; - bool DepthClearValueValid; -}; - -static inline void -GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_float(values->DepthClearValue) | - 0; - - dw[2] = - __gen_field(values->DepthClearValueValid, 0, 0) | - 0; - -} - -#define GEN8_3DSTATE_CLIP_length_bias 0x00000002 -#define GEN8_3DSTATE_CLIP_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_CLIP_length 0x00000004 - -struct GEN8_3DSTATE_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define Normal 0 -#define Force 1 - bool ForceUserClipDistanceCullTestEnableBitmask; -#define _8Bit 0 -#define _4Bit 1 - uint32_t VertexSubPixelPrecisionSelect; - bool EarlyCullEnable; -#define Normal 0 -#define Force 1 - bool ForceUserClipDistanceClipTestEnableBitmask; -#define Normal 0 -#define Force 1 - bool ForceClipMode; - bool ClipperStatisticsEnable; - uint32_t UserClipDistanceCullTestEnableBitmask; - bool ClipEnable; -#define API_OGL 0 - uint32_t APIMode; - bool ViewportXYClipTestEnable; - bool GuardbandClipTestEnable; - uint32_t UserClipDistanceClipTestEnableBitmask; -#define NORMAL 0 -#define REJECT_ALL 3 -#define ACCEPT_ALL 4 - uint32_t ClipMode; - bool PerspectiveDivideDisable; - bool NonPerspectiveBarycentricEnable; - uint32_t TriangleStripListProvokingVertexSelect; - uint32_t LineStripListProvokingVertexSelect; - uint32_t TriangleFanProvokingVertexSelect; - float MinimumPointWidth; - float MaximumPointWidth; - bool ForceZeroRTAIndexEnable; - uint32_t MaximumVPIndex; -}; - -static inline void -GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | - __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | - __gen_field(values->EarlyCullEnable, 18, 18) | - __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | - __gen_field(values->ForceClipMode, 16, 16) | - __gen_field(values->ClipperStatisticsEnable, 10, 10) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - - dw[2] = - __gen_field(values->ClipEnable, 31, 31) | - __gen_field(values->APIMode, 30, 30) | - __gen_field(values->ViewportXYClipTestEnable, 28, 28) | - __gen_field(values->GuardbandClipTestEnable, 26, 26) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | - __gen_field(values->ClipMode, 13, 15) | - __gen_field(values->PerspectiveDivideDisable, 9, 9) | - __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | - __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | - __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | - 0; - - dw[3] = - __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | - __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | - __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | - __gen_field(values->MaximumVPIndex, 0, 3) | - 0; - -} - -#define GEN8_3DSTATE_CONSTANT_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 9 - -#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b - -#define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a - -struct GEN8_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - uint32_t dw4 = - 0; - - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - uint32_t dw6 = - 0; - - uint64_t qw6 = - __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); - - dw[6] = qw6; - dw[7] = qw6 >> 32; - - uint32_t dw8 = - 0; - - uint64_t qw8 = - __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); - - dw[8] = qw8; - dw[9] = qw8 >> 32; - -} - -struct GEN8_3DSTATE_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN8_3DSTATE_CONSTANT_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 9 - -#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b - -struct GEN8_3DSTATE_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN8_3DSTATE_CONSTANT_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 9 - -#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b - -struct GEN8_3DSTATE_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN8_3DSTATE_CONSTANT_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 23, \ - .DwordLength = 9 - -#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b - -struct GEN8_3DSTATE_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN8_3DSTATE_CONSTANT_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 9 - -#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b - -struct GEN8_3DSTATE_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN8_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN8_3DSTATE_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 5, \ - .DwordLength = 6 - -#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 - -struct GEN8_3DSTATE_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool DepthWriteEnable; - bool StencilWriteEnable; - bool HierarchicalDepthBufferEnable; -#define D32_FLOAT 1 -#define D24_UNORM_X8_UINT 3 -#define D16_UNORM 5 - uint32_t SurfaceFormat; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t Height; - uint32_t Width; - uint32_t LOD; - uint32_t Depth; - uint32_t MinimumArrayElement; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; - uint32_t RenderTargetViewExtent; - uint32_t SurfaceQPitch; -}; - -static inline void -GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->DepthWriteEnable, 28, 28) | - __gen_field(values->StencilWriteEnable, 27, 27) | - __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | - __gen_field(values->SurfaceFormat, 18, 20) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->Height, 18, 31) | - __gen_field(values->Width, 4, 17) | - __gen_field(values->LOD, 0, 3) | - 0; - - uint32_t dw_DepthBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); - dw[5] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->MinimumArrayElement, 10, 20) | - __gen_field(dw_DepthBufferObjectControlState, 0, 6) | - 0; - - dw[6] = - 0; - - dw[7] = - __gen_field(values->RenderTargetViewExtent, 21, 31) | - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - -} - -#define GEN8_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 -#define GEN8_3DSTATE_DRAWING_RECTANGLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 - -struct GEN8_3DSTATE_DRAWING_RECTANGLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; -#define Legacy 0 -#define Core0Enabled 1 -#define Core1Enabled 2 - uint32_t CoreModeSelect; - uint32_t DwordLength; - uint32_t ClippedDrawingRectangleYMin; - uint32_t ClippedDrawingRectangleXMin; - uint32_t ClippedDrawingRectangleYMax; - uint32_t ClippedDrawingRectangleXMax; - uint32_t DrawingRectangleOriginY; - uint32_t DrawingRectangleOriginX; -}; - -static inline void -GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->CoreModeSelect, 14, 15) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | - 0; - - dw[2] = - __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | - 0; - - dw[3] = - __gen_field(values->DrawingRectangleOriginY, 16, 31) | - __gen_field(values->DrawingRectangleOriginX, 0, 15) | - 0; - -} - -#define GEN8_3DSTATE_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 29, \ - .DwordLength = 7 - -#define GEN8_3DSTATE_DS_length 0x00000009 - -struct GEN8_3DSTATE_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleDomainPointDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool AccessesUAV; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t PatchURBEntryReadLength; - uint32_t PatchURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool SIMD8DispatchEnable; - bool ComputeWCoordinateEnable; - bool CacheDisable; - bool FunctionEnable; - uint32_t VertexURBEntryOutputReadOffset; - uint32_t VertexURBEntryOutputLength; - uint32_t UserClipDistanceClipTestEnableBitmask; - uint32_t UserClipDistanceCullTestEnableBitmask; -}; - -static inline void -GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleDomainPointDispatch, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->AccessesUAV, 14, 14) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | - __gen_field(values->PatchURBEntryReadLength, 11, 17) | - __gen_field(values->PatchURBEntryReadOffset, 4, 9) | - 0; - - dw[7] = - __gen_field(values->MaximumNumberofThreads, 21, 29) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->SIMD8DispatchEnable, 3, 3) | - __gen_field(values->ComputeWCoordinateEnable, 2, 2) | - __gen_field(values->CacheDisable, 1, 1) | - __gen_field(values->FunctionEnable, 0, 0) | - 0; - - dw[8] = - __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | - __gen_field(values->VertexURBEntryOutputLength, 16, 20) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - -} - -#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_GATHER_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 55 - -#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 - -#define GEN8_GATHER_CONSTANT_ENTRY_length 0x00000001 - -struct GEN8_GATHER_CONSTANT_ENTRY { - uint32_t ConstantBufferOffset; - uint32_t ChannelMask; - uint32_t BindingTableIndexOffset; -}; - -static inline void -GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->ConstantBufferOffset, 8, 15) | - __gen_field(values->ChannelMask, 4, 7) | - __gen_field(values->BindingTableIndexOffset, 0, 3) | - 0; - -} - -struct GEN8_3DSTATE_GATHER_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_GATHER_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 53 - -#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 - -struct GEN8_3DSTATE_GATHER_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_GATHER_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 54 - -#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 - -struct GEN8_3DSTATE_GATHER_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_GATHER_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 56 - -#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 - -struct GEN8_3DSTATE_GATHER_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - bool ConstantBufferDx9Enable; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->ConstantBufferDx9Enable, 4, 4) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_GATHER_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 52 - -#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 - -struct GEN8_3DSTATE_GATHER_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - bool ConstantBufferDx9Enable; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->ConstantBufferDx9Enable, 4, 4) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 -#define GEN8_3DSTATE_GATHER_POOL_ALLOC_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 - -struct GEN8_3DSTATE_GATHER_POOL_ALLOC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GatherPoolBaseAddress; - bool GatherPoolEnable; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - uint32_t GatherPoolBufferSize; -}; - -static inline void -GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - uint32_t dw1 = - __gen_field(values->GatherPoolEnable, 11, 11) | - __gen_field(dw_MemoryObjectControlState, 0, 6) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->GatherPoolBufferSize, 12, 31) | - 0; - -} - -#define GEN8_3DSTATE_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 8 - -#define GEN8_3DSTATE_GS_length 0x0000000a - -struct GEN8_3DSTATE_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer; - uint32_t SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool AccessesUAV; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ExpectedVertexCount; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t OutputVertexSize; - uint32_t OutputTopology; - uint32_t VertexURBEntryReadLength; - bool IncludeVertexHandles; - uint32_t VertexURBEntryReadOffset; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t MaximumNumberofThreads; - uint32_t ControlDataHeaderSize; - uint32_t InstanceControl; - uint32_t DefaultStreamId; -#define DispatchModeSingle 0 -#define DispatchModeDualInstance 1 -#define DispatchModeDualObject 2 -#define DispatchModeSIMD8 3 - uint32_t DispatchMode; - bool StatisticsEnable; - uint32_t InvocationsIncrementValue; - bool IncludePrimitiveID; - uint32_t Hint; -#define LEADING 0 -#define TRAILING 1 - uint32_t ReorderMode; - bool DiscardAdjacency; - bool Enable; -#define CUT 0 -#define SID 1 - uint32_t ControlDataFormat; - bool StaticOutput; - uint32_t StaticOutputVertexCount; - uint32_t VertexURBEntryOutputReadOffset; - uint32_t VertexURBEntryOutputLength; - uint32_t UserClipDistanceClipTestEnableBitmask; - uint32_t UserClipDistanceCullTestEnableBitmask; -}; - -static inline void -GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleProgramFlow, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->AccessesUAV, 12, 12) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - __gen_field(values->ExpectedVertexCount, 0, 5) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->OutputVertexSize, 23, 28) | - __gen_field(values->OutputTopology, 17, 22) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->IncludeVertexHandles, 10, 10) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | - 0; - - dw[7] = - __gen_field(values->MaximumNumberofThreads, 24, 31) | - __gen_field(values->ControlDataHeaderSize, 20, 23) | - __gen_field(values->InstanceControl, 15, 19) | - __gen_field(values->DefaultStreamId, 13, 14) | - __gen_field(values->DispatchMode, 11, 12) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->InvocationsIncrementValue, 5, 9) | - __gen_field(values->IncludePrimitiveID, 4, 4) | - __gen_field(values->Hint, 3, 3) | - __gen_field(values->ReorderMode, 2, 2) | - __gen_field(values->DiscardAdjacency, 1, 1) | - __gen_field(values->Enable, 0, 0) | - 0; - - dw[8] = - __gen_field(values->ControlDataFormat, 31, 31) | - __gen_field(values->StaticOutput, 30, 30) | - __gen_field(values->StaticOutputVertexCount, 16, 26) | - 0; - - dw[9] = - __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | - __gen_field(values->VertexURBEntryOutputLength, 16, 20) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - -} - -#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 3 - -#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 - -struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t SurfaceQPitch; -}; - -static inline void -GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_HierarchicalDepthBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); - dw[1] = - __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - -} - -#define GEN8_3DSTATE_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 27, \ - .DwordLength = 7 - -#define GEN8_3DSTATE_HS_length 0x00000009 - -struct GEN8_3DSTATE_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - bool Enable; - bool StatisticsEnable; - uint32_t MaximumNumberofThreads; - uint32_t InstanceCount; - uint64_t KernelStartPointer; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - bool SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; - bool AccessesUAV; - bool IncludeVertexHandles; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; -}; - -static inline void -GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 12, 12) | - 0; - - dw[2] = - __gen_field(values->Enable, 31, 31) | - __gen_field(values->StatisticsEnable, 29, 29) | - __gen_field(values->MaximumNumberofThreads, 8, 16) | - __gen_field(values->InstanceCount, 0, 3) | - 0; - - uint64_t qw3 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[3] = qw3; - dw[4] = qw3 >> 32; - - uint64_t qw5 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[5] = qw5; - dw[6] = qw5 >> 32; - - dw[7] = - __gen_field(values->SingleProgramFlow, 27, 27) | - __gen_field(values->VectorMaskEnable, 26, 26) | - __gen_field(values->AccessesUAV, 25, 25) | - __gen_field(values->IncludeVertexHandles, 24, 24) | - __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[8] = - 0; - -} - -#define GEN8_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 -#define GEN8_3DSTATE_INDEX_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 3 - -#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 - -struct GEN8_3DSTATE_INDEX_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define INDEX_BYTE 0 -#define INDEX_WORD 1 -#define INDEX_DWORD 2 - uint32_t IndexFormat; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - __gen_address_type BufferStartingAddress; - uint32_t BufferSize; -}; - -static inline void -GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[1] = - __gen_field(values->IndexFormat, 8, 9) | - __gen_field(dw_MemoryObjectControlState, 0, 6) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->BufferSize, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 -#define GEN8_3DSTATE_LINE_STIPPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 8, \ - .DwordLength = 1 - -#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 - -struct GEN8_3DSTATE_LINE_STIPPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; - uint32_t CurrentRepeatCounter; - uint32_t CurrentStippleIndex; - uint32_t LineStipplePattern; - float LineStippleInverseRepeatCount; - uint32_t LineStippleRepeatCount; -}; - -static inline void -GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | - __gen_field(values->CurrentRepeatCounter, 21, 29) | - __gen_field(values->CurrentStippleIndex, 16, 19) | - __gen_field(values->LineStipplePattern, 0, 15) | - 0; - - dw[2] = - __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | - __gen_field(values->LineStippleRepeatCount, 0, 8) | - 0; - -} - -#define GEN8_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 -#define GEN8_3DSTATE_MONOFILTER_SIZE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 - -struct GEN8_3DSTATE_MONOFILTER_SIZE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t MonochromeFilterWidth; - uint32_t MonochromeFilterHeight; -}; - -static inline void -GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->MonochromeFilterWidth, 3, 5) | - __gen_field(values->MonochromeFilterHeight, 0, 2) | - 0; - -} - -#define GEN8_3DSTATE_MULTISAMPLE_length_bias 0x00000002 -#define GEN8_3DSTATE_MULTISAMPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 13, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 - -struct GEN8_3DSTATE_MULTISAMPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PixelPositionOffsetEnable; -#define CENTER 0 -#define UL_CORNER 1 - uint32_t PixelLocation; - uint32_t NumberofMultisamples; -}; - -static inline void -GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PixelPositionOffsetEnable, 5, 5) | - __gen_field(values->PixelLocation, 4, 4) | - __gen_field(values->NumberofMultisamples, 1, 3) | - 0; - -} - -#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 -#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 - -struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PolygonStippleXOffset; - uint32_t PolygonStippleYOffset; -}; - -static inline void -GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PolygonStippleXOffset, 8, 12) | - __gen_field(values->PolygonStippleYOffset, 0, 4) | - 0; - -} - -#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 -#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 31 - -#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 - -struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PatternRow[32]; -}; - -static inline void -GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { - dw[j] = - __gen_field(values->PatternRow[i + 0], 0, 31) | - 0; - } - -} - -#define GEN8_3DSTATE_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 32, \ - .DwordLength = 10 - -#define GEN8_3DSTATE_PS_length 0x0000000c - -struct GEN8_3DSTATE_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer0; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; -#define FlushedtoZero 0 -#define Retained 1 - uint32_t SinglePrecisionDenormalMode; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t MaximumNumberofThreadsPerPSD; - bool PushConstantEnable; - bool RenderTargetFastClearEnable; - bool RenderTargetResolveEnable; -#define POSOFFSET_NONE 0 -#define POSOFFSET_CENTROID 2 -#define POSOFFSET_SAMPLE 3 - uint32_t PositionXYOffsetSelect; - bool _32PixelDispatchEnable; - bool _16PixelDispatchEnable; - bool _8PixelDispatchEnable; - uint32_t DispatchGRFStartRegisterForConstantSetupData0; - uint32_t DispatchGRFStartRegisterForConstantSetupData1; - uint32_t DispatchGRFStartRegisterForConstantSetupData2; - uint64_t KernelStartPointer1; - uint64_t KernelStartPointer2; -}; - -static inline void -GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer0, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleProgramFlow, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->RoundingMode, 14, 15) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | - __gen_field(values->PushConstantEnable, 11, 11) | - __gen_field(values->RenderTargetFastClearEnable, 8, 8) | - __gen_field(values->RenderTargetResolveEnable, 6, 6) | - __gen_field(values->PositionXYOffsetSelect, 3, 4) | - __gen_field(values->_32PixelDispatchEnable, 2, 2) | - __gen_field(values->_16PixelDispatchEnable, 1, 1) | - __gen_field(values->_8PixelDispatchEnable, 0, 0) | - 0; - - dw[7] = - __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | - __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | - __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | - 0; - - uint64_t qw8 = - __gen_offset(values->KernelStartPointer1, 6, 63) | - 0; - - dw[8] = qw8; - dw[9] = qw8 >> 32; - - uint64_t qw10 = - __gen_offset(values->KernelStartPointer2, 6, 63) | - 0; - - dw[10] = qw10; - dw[11] = qw10 >> 32; - -} - -#define GEN8_3DSTATE_PS_BLEND_length_bias 0x00000002 -#define GEN8_3DSTATE_PS_BLEND_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 77, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 - -struct GEN8_3DSTATE_PS_BLEND { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool AlphaToCoverageEnable; - bool HasWriteableRT; - bool ColorBufferBlendEnable; - uint32_t SourceAlphaBlendFactor; - uint32_t DestinationAlphaBlendFactor; - uint32_t SourceBlendFactor; - uint32_t DestinationBlendFactor; - bool AlphaTestEnable; - bool IndependentAlphaBlendEnable; -}; - -static inline void -GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PS_BLEND * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->HasWriteableRT, 30, 30) | - __gen_field(values->ColorBufferBlendEnable, 29, 29) | - __gen_field(values->SourceAlphaBlendFactor, 24, 28) | - __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | - __gen_field(values->SourceBlendFactor, 14, 18) | - __gen_field(values->DestinationBlendFactor, 9, 13) | - __gen_field(values->AlphaTestEnable, 8, 8) | - __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | - 0; - -} - -#define GEN8_3DSTATE_PS_EXTRA_length_bias 0x00000002 -#define GEN8_3DSTATE_PS_EXTRA_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 79, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 - -struct GEN8_3DSTATE_PS_EXTRA { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool PixelShaderValid; - bool PixelShaderDoesnotwritetoRT; - bool oMaskPresenttoRenderTarget; - bool PixelShaderKillsPixel; -#define PSCDEPTH_OFF 0 -#define PSCDEPTH_ON 1 -#define PSCDEPTH_ON_GE 2 -#define PSCDEPTH_ON_LE 3 - uint32_t PixelShaderComputedDepthMode; - bool ForceComputedDepth; - bool PixelShaderUsesSourceDepth; - bool PixelShaderUsesSourceW; - uint32_t Removed; - bool AttributeEnable; - bool PixelShaderDisablesAlphaToCoverage; - bool PixelShaderIsPerSample; - bool PixelShaderHasUAV; - bool PixelShaderUsesInputCoverageMask; -}; - -static inline void -GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PS_EXTRA * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PixelShaderValid, 31, 31) | - __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | - __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | - __gen_field(values->PixelShaderKillsPixel, 28, 28) | - __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | - __gen_field(values->ForceComputedDepth, 25, 25) | - __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | - __gen_field(values->PixelShaderUsesSourceW, 23, 23) | - __gen_field(values->Removed, 17, 17) | - __gen_field(values->AttributeEnable, 8, 8) | - __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | - __gen_field(values->PixelShaderIsPerSample, 6, 6) | - __gen_field(values->PixelShaderHasUAV, 2, 2) | - __gen_field(values->PixelShaderUsesInputCoverageMask, 1, 1) | - 0; - -} - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 - -struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 - -struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 - -struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 - -struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 - -struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_RASTER_length_bias 0x00000002 -#define GEN8_3DSTATE_RASTER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 80, \ - .DwordLength = 3 - -#define GEN8_3DSTATE_RASTER_length 0x00000005 - -struct GEN8_3DSTATE_RASTER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define DX9OGL 0 -#define DX100 1 -#define DX101 2 - uint32_t APIMode; -#define Clockwise 0 -#define CounterClockwise 1 - uint32_t FrontWinding; -#define FSC_NUMRASTSAMPLES_0 0 -#define FSC_NUMRASTSAMPLES_1 1 -#define FSC_NUMRASTSAMPLES_2 2 -#define FSC_NUMRASTSAMPLES_4 3 -#define FSC_NUMRASTSAMPLES_8 4 -#define FSC_NUMRASTSAMPLES_16 5 - uint32_t ForcedSampleCount; -#define CULLMODE_BOTH 0 -#define CULLMODE_NONE 1 -#define CULLMODE_FRONT 2 -#define CULLMODE_BACK 3 - uint32_t CullMode; -#define Normal 0 -#define Force 1 - uint32_t ForceMultisampling; - bool SmoothPointEnable; - bool DXMultisampleRasterizationEnable; -#define MSRASTMODE_OFF_PIXEL 0 -#define MSRASTMODE_OFF_PATTERN 1 -#define MSRASTMODE_ON_PIXEL 2 -#define MSRASTMODE_ON_PATTERN 3 - uint32_t DXMultisampleRasterizationMode; - bool GlobalDepthOffsetEnableSolid; - bool GlobalDepthOffsetEnableWireframe; - bool GlobalDepthOffsetEnablePoint; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t FrontFaceFillMode; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t BackFaceFillMode; - bool AntialiasingEnable; - bool ScissorRectangleEnable; - bool ViewportZClipTestEnable; - float GlobalDepthOffsetConstant; - float GlobalDepthOffsetScale; - float GlobalDepthOffsetClamp; -}; - -static inline void -GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_RASTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->APIMode, 22, 23) | - __gen_field(values->FrontWinding, 21, 21) | - __gen_field(values->ForcedSampleCount, 18, 20) | - __gen_field(values->CullMode, 16, 17) | - __gen_field(values->ForceMultisampling, 14, 14) | - __gen_field(values->SmoothPointEnable, 13, 13) | - __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | - __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | - __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | - __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | - __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | - __gen_field(values->FrontFaceFillMode, 5, 6) | - __gen_field(values->BackFaceFillMode, 3, 4) | - __gen_field(values->AntialiasingEnable, 2, 2) | - __gen_field(values->ScissorRectangleEnable, 1, 1) | - __gen_field(values->ViewportZClipTestEnable, 0, 0) | - 0; - - dw[2] = - __gen_float(values->GlobalDepthOffsetConstant) | - 0; - - dw[3] = - __gen_float(values->GlobalDepthOffsetScale) | - 0; - - dw[4] = - __gen_float(values->GlobalDepthOffsetClamp) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2 - -#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 - -#define GEN8_PALETTE_ENTRY_length 0x00000001 - -struct GEN8_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - -struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 12 - -#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 - -struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 45, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 - -struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSSamplerState; -}; - -static inline void -GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSSamplerState, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 46, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 - -struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSSamplerState; -}; - -static inline void -GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSSamplerState, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 44, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 - -struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSSamplerState; -}; - -static inline void -GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSSamplerState, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 47, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 - -struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSSamplerState; -}; - -static inline void -GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSSamplerState, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 43, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 - -struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSSamplerState; -}; - -static inline void -GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSSamplerState, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLE_MASK_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 - -struct GEN8_3DSTATE_SAMPLE_MASK { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SampleMask; -}; - -static inline void -GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SampleMask, 0, 15) | - 0; - -} - -#define GEN8_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 -#define GEN8_3DSTATE_SAMPLE_PATTERN_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 28, \ - .DwordLength = 7 - -#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 - -struct GEN8_3DSTATE_SAMPLE_PATTERN { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float _8xSample7XOffset; - float _8xSample7YOffset; - float _8xSample6XOffset; - float _8xSample6YOffset; - float _8xSample5XOffset; - float _8xSample5YOffset; - float _8xSample4XOffset; - float _8xSample4YOffset; - float _8xSample3XOffset; - float _8xSample3YOffset; - float _8xSample2XOffset; - float _8xSample2YOffset; - float _8xSample1XOffset; - float _8xSample1YOffset; - float _8xSample0XOffset; - float _8xSample0YOffset; - float _4xSample3XOffset; - float _4xSample3YOffset; - float _4xSample2XOffset; - float _4xSample2YOffset; - float _4xSample1XOffset; - float _4xSample1YOffset; - float _4xSample0XOffset; - float _4xSample0YOffset; - float _1xSample0XOffset; - float _1xSample0YOffset; - float _2xSample1XOffset; - float _2xSample1YOffset; - float _2xSample0XOffset; - float _2xSample0YOffset; -}; - -static inline void -GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 4; i += 1, j++) { - dw[j] = - 0; - } - - dw[5] = - __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | - __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | - __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | - __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | - __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | - __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | - __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | - __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | - 0; - - dw[6] = - __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[7] = - __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[8] = - __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | - __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | - __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | - 0; - -} - -#define GEN8_3DSTATE_SBE_length_bias 0x00000002 -#define GEN8_3DSTATE_SBE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 31, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_SBE_length 0x00000004 - -struct GEN8_3DSTATE_SBE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ForceVertexURBEntryReadLength; - bool ForceVertexURBEntryReadOffset; - uint32_t NumberofSFOutputAttributes; - bool AttributeSwizzleEnable; -#define UPPERLEFT 0 -#define LOWERLEFT 1 - uint32_t PointSpriteTextureCoordinateOrigin; - bool PrimitiveIDOverrideComponentW; - bool PrimitiveIDOverrideComponentZ; - bool PrimitiveIDOverrideComponentY; - bool PrimitiveIDOverrideComponentX; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t PrimitiveIDOverrideAttributeSelect; - uint32_t PointSpriteTextureCoordinateEnable; - uint32_t ConstantInterpolationEnable; -}; - -static inline void -GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SBE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | - __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | - __gen_field(values->NumberofSFOutputAttributes, 22, 27) | - __gen_field(values->AttributeSwizzleEnable, 21, 21) | - __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | - __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | - __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | - __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | - __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | - __gen_field(values->VertexURBEntryReadLength, 11, 15) | - __gen_field(values->VertexURBEntryReadOffset, 5, 10) | - __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | - 0; - - dw[2] = - __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | - 0; - - dw[3] = - __gen_field(values->ConstantInterpolationEnable, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_SBE_SWIZ_length_bias 0x00000002 -#define GEN8_3DSTATE_SBE_SWIZ_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 81, \ - .DwordLength = 9 - -#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b - -#define GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 - -struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { - bool ComponentOverrideW; - bool ComponentOverrideZ; - bool ComponentOverrideY; - bool ComponentOverrideX; - uint32_t SwizzleControlMode; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t ConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t SwizzleSelect; - uint32_t SourceAttribute; -}; - -static inline void -GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ComponentOverrideW, 15, 15) | - __gen_field(values->ComponentOverrideZ, 14, 14) | - __gen_field(values->ComponentOverrideY, 13, 13) | - __gen_field(values->ComponentOverrideX, 12, 12) | - __gen_field(values->SwizzleControlMode, 11, 11) | - __gen_field(values->ConstantSource, 9, 10) | - __gen_field(values->SwizzleSelect, 6, 7) | - __gen_field(values->SourceAttribute, 0, 4) | - 0; - -} - -struct GEN8_3DSTATE_SBE_SWIZ { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; - uint32_t AttributeWrapShortestEnables[16]; -}; - -static inline void -GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { - uint32_t dw_Attribute0; - GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); - uint32_t dw_Attribute1; - GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); - dw[j] = - __gen_field(dw_Attribute0, 0, 15) | - __gen_field(dw_Attribute1, 16, 31) | - 0; - } - - for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { - dw[j] = - __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | - __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | - __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | - __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | - __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | - __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | - __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | - __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | - 0; - } - -} - -#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 -#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 15, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 - -struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ScissorRectPointer; -}; - -static inline void -GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ScissorRectPointer, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_SF_length_bias 0x00000002 -#define GEN8_3DSTATE_SF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_SF_length 0x00000004 - -struct GEN8_3DSTATE_SF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool LegacyGlobalDepthBiasEnable; - bool StatisticsEnable; - bool ViewportTransformEnable; - float LineWidth; -#define _05pixels 0 -#define _10pixels 1 -#define _20pixels 2 -#define _40pixels 3 - uint32_t LineEndCapAntialiasingRegionWidth; - bool LastPixelEnable; - uint32_t TriangleStripListProvokingVertexSelect; - uint32_t LineStripListProvokingVertexSelect; - uint32_t TriangleFanProvokingVertexSelect; -#define AALINEDISTANCE_TRUE 1 - uint32_t AALineDistanceMode; - bool SmoothPointEnable; - uint32_t VertexSubPixelPrecisionSelect; -#define Vertex 0 -#define State 1 - uint32_t PointWidthSource; - float PointWidth; -}; - -static inline void -GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->ViewportTransformEnable, 1, 1) | - 0; - - dw[2] = - __gen_field(values->LineWidth * (1 << 7), 18, 27) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | - 0; - - dw[3] = - __gen_field(values->LastPixelEnable, 31, 31) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | - __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | - __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | - __gen_field(values->AALineDistanceMode, 14, 14) | - __gen_field(values->SmoothPointEnable, 13, 13) | - __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | - __gen_field(values->PointWidthSource, 11, 11) | - __gen_field(values->PointWidth * (1 << 3), 0, 10) | - 0; - -} - -#define GEN8_3DSTATE_SO_BUFFER_length_bias 0x00000002 -#define GEN8_3DSTATE_SO_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 6 - -#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 - -struct GEN8_3DSTATE_SO_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool SOBufferEnable; - uint32_t SOBufferIndex; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; - bool StreamOffsetWriteEnable; - bool StreamOutputBufferOffsetAddressEnable; - __gen_address_type SurfaceBaseAddress; - uint32_t SurfaceSize; - __gen_address_type StreamOutputBufferOffsetAddress; - uint32_t StreamOffset; -}; - -static inline void -GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SO_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SOBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); - dw[1] = - __gen_field(values->SOBufferEnable, 31, 31) | - __gen_field(values->SOBufferIndex, 29, 30) | - __gen_field(dw_SOBufferObjectControlState, 22, 28) | - __gen_field(values->StreamOffsetWriteEnable, 21, 21) | - __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->SurfaceSize, 0, 29) | - 0; - - uint32_t dw5 = - 0; - - uint64_t qw5 = - __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); - - dw[5] = qw5; - dw[6] = qw5 >> 32; - - dw[7] = - __gen_field(values->StreamOffset, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 -#define GEN8_3DSTATE_SO_DECL_LIST_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 23 - -#define GEN8_3DSTATE_SO_DECL_LIST_length 0x00000000 - -#define GEN8_SO_DECL_ENTRY_length 0x00000002 - -#define GEN8_SO_DECL_length 0x00000001 - -struct GEN8_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - -struct GEN8_SO_DECL_ENTRY { - struct GEN8_SO_DECL Stream3Decl; - struct GEN8_SO_DECL Stream2Decl; - struct GEN8_SO_DECL Stream1Decl; - struct GEN8_SO_DECL Stream0Decl; -}; - -static inline void -GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_Stream3Decl; - GEN8_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); - uint32_t dw_Stream2Decl; - GEN8_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); - uint32_t dw_Stream1Decl; - GEN8_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); - uint32_t dw_Stream0Decl; - GEN8_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - uint64_t qw0 = - __gen_field(dw_Stream3Decl, 48, 63) | - __gen_field(dw_Stream2Decl, 32, 47) | - __gen_field(dw_Stream1Decl, 16, 31) | - __gen_field(dw_Stream0Decl, 0, 15) | - 0; - - dw[0] = qw0; - dw[1] = qw0 >> 32; - -} - -struct GEN8_3DSTATE_SO_DECL_LIST { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StreamtoBufferSelects3; - uint32_t StreamtoBufferSelects2; - uint32_t StreamtoBufferSelects1; - uint32_t StreamtoBufferSelects0; - uint32_t NumEntries3; - uint32_t NumEntries2; - uint32_t NumEntries1; - uint32_t NumEntries0; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->StreamtoBufferSelects3, 12, 15) | - __gen_field(values->StreamtoBufferSelects2, 8, 11) | - __gen_field(values->StreamtoBufferSelects1, 4, 7) | - __gen_field(values->StreamtoBufferSelects0, 0, 3) | - 0; - - dw[2] = - __gen_field(values->NumEntries3, 24, 31) | - __gen_field(values->NumEntries2, 16, 23) | - __gen_field(values->NumEntries1, 8, 15) | - __gen_field(values->NumEntries0, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 -#define GEN8_3DSTATE_STENCIL_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 3 - -#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 - -struct GEN8_3DSTATE_STENCIL_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StencilBufferEnable; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t SurfaceQPitch; -}; - -static inline void -GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_StencilBufferObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); - dw[1] = - __gen_field(values->StencilBufferEnable, 31, 31) | - __gen_field(dw_StencilBufferObjectControlState, 22, 28) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - -} - -#define GEN8_3DSTATE_STREAMOUT_length_bias 0x00000002 -#define GEN8_3DSTATE_STREAMOUT_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 30, \ - .DwordLength = 3 - -#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 - -struct GEN8_3DSTATE_STREAMOUT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SOFunctionEnable; - uint32_t APIRenderingDisable; - uint32_t RenderStreamSelect; -#define LEADING 0 -#define TRAILING 1 - uint32_t ReorderMode; - bool SOStatisticsEnable; -#define Normal 0 -#define Resreved 1 -#define Force_Off 2 -#define Force_on 3 - uint32_t ForceRendering; - uint32_t Stream3VertexReadOffset; - uint32_t Stream3VertexReadLength; - uint32_t Stream2VertexReadOffset; - uint32_t Stream2VertexReadLength; - uint32_t Stream1VertexReadOffset; - uint32_t Stream1VertexReadLength; - uint32_t Stream0VertexReadOffset; - uint32_t Stream0VertexReadLength; - uint32_t Buffer1SurfacePitch; - uint32_t Buffer0SurfacePitch; - uint32_t Buffer3SurfacePitch; - uint32_t Buffer2SurfacePitch; -}; - -static inline void -GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_STREAMOUT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SOFunctionEnable, 31, 31) | - __gen_field(values->APIRenderingDisable, 30, 30) | - __gen_field(values->RenderStreamSelect, 27, 28) | - __gen_field(values->ReorderMode, 26, 26) | - __gen_field(values->SOStatisticsEnable, 25, 25) | - __gen_field(values->ForceRendering, 23, 24) | - 0; - - dw[2] = - __gen_field(values->Stream3VertexReadOffset, 29, 29) | - __gen_field(values->Stream3VertexReadLength, 24, 28) | - __gen_field(values->Stream2VertexReadOffset, 21, 21) | - __gen_field(values->Stream2VertexReadLength, 16, 20) | - __gen_field(values->Stream1VertexReadOffset, 13, 13) | - __gen_field(values->Stream1VertexReadLength, 8, 12) | - __gen_field(values->Stream0VertexReadOffset, 5, 5) | - __gen_field(values->Stream0VertexReadLength, 0, 4) | - 0; - - dw[3] = - __gen_field(values->Buffer1SurfacePitch, 16, 27) | - __gen_field(values->Buffer0SurfacePitch, 0, 11) | - 0; - - dw[4] = - __gen_field(values->Buffer3SurfacePitch, 16, 27) | - __gen_field(values->Buffer2SurfacePitch, 0, 11) | - 0; - -} - -#define GEN8_3DSTATE_TE_length_bias 0x00000002 -#define GEN8_3DSTATE_TE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 28, \ - .DwordLength = 2 - -#define GEN8_3DSTATE_TE_length 0x00000004 - -struct GEN8_3DSTATE_TE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define INTEGER 0 -#define ODD_FRACTIONAL 1 -#define EVEN_FRACTIONAL 2 - uint32_t Partitioning; -#define POINT 0 -#define OUTPUT_LINE 1 -#define OUTPUT_TRI_CW 2 -#define OUTPUT_TRI_CCW 3 - uint32_t OutputTopology; -#define QUAD 0 -#define TRI 1 -#define ISOLINE 2 - uint32_t TEDomain; -#define HW_TESS 0 -#define SW_TESS 1 - uint32_t TEMode; - bool TEEnable; - float MaximumTessellationFactorOdd; - float MaximumTessellationFactorNotOdd; -}; - -static inline void -GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_TE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Partitioning, 12, 13) | - __gen_field(values->OutputTopology, 8, 9) | - __gen_field(values->TEDomain, 4, 5) | - __gen_field(values->TEMode, 1, 2) | - __gen_field(values->TEEnable, 0, 0) | - 0; - - dw[2] = - __gen_float(values->MaximumTessellationFactorOdd) | - 0; - - dw[3] = - __gen_float(values->MaximumTessellationFactorNotOdd) | - 0; - -} - -#define GEN8_3DSTATE_URB_DS_length_bias 0x00000002 -#define GEN8_3DSTATE_URB_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 50, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_URB_DS_length 0x00000002 - -struct GEN8_3DSTATE_URB_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t DSURBStartingAddress; - uint32_t DSURBEntryAllocationSize; - uint32_t DSNumberofURBEntries; -}; - -static inline void -GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DSURBStartingAddress, 25, 31) | - __gen_field(values->DSURBEntryAllocationSize, 16, 24) | - __gen_field(values->DSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN8_3DSTATE_URB_GS_length_bias 0x00000002 -#define GEN8_3DSTATE_URB_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 51, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_URB_GS_length 0x00000002 - -struct GEN8_3DSTATE_URB_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t GSURBStartingAddress; - uint32_t GSURBEntryAllocationSize; - uint32_t GSNumberofURBEntries; -}; - -static inline void -GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->GSURBStartingAddress, 25, 31) | - __gen_field(values->GSURBEntryAllocationSize, 16, 24) | - __gen_field(values->GSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN8_3DSTATE_URB_HS_length_bias 0x00000002 -#define GEN8_3DSTATE_URB_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 49, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_URB_HS_length 0x00000002 - -struct GEN8_3DSTATE_URB_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t HSURBStartingAddress; - uint32_t HSURBEntryAllocationSize; - uint32_t HSNumberofURBEntries; -}; - -static inline void -GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_URB_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->HSURBStartingAddress, 25, 31) | - __gen_field(values->HSURBEntryAllocationSize, 16, 24) | - __gen_field(values->HSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN8_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 -#define GEN8_3DSTATE_VERTEX_BUFFERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 8 - -#define GEN8_3DSTATE_VERTEX_BUFFERS_length 0x00000000 - -#define GEN8_VERTEX_BUFFER_STATE_length 0x00000004 - -struct GEN8_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - uint32_t AddressModifyEnable; - bool NullVertexBuffer; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - uint32_t BufferSize; -}; - -static inline void -GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_MemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(dw_MemoryObjectControlState, 16, 22) | - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->BufferSize, 0, 31) | - 0; - -} - -struct GEN8_3DSTATE_VERTEX_BUFFERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 -#define GEN8_3DSTATE_VERTEX_ELEMENTS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 9 - -#define GEN8_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 - -#define GEN8_VERTEX_ELEMENT_STATE_length 0x00000002 - -struct GEN8_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - bool Valid; - uint32_t SourceElementFormat; - bool EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN8_3DSTATE_VERTEX_ELEMENTS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_3DSTATE_VF_length_bias 0x00000002 -#define GEN8_3DSTATE_VF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 12, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_VF_length 0x00000002 - -struct GEN8_3DSTATE_VF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool IndexedDrawCutIndexEnable; - uint32_t DwordLength; - uint32_t CutIndex; -}; - -static inline void -GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CutIndex, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_VF_INSTANCING_length_bias 0x00000002 -#define GEN8_3DSTATE_VF_INSTANCING_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 73, \ - .DwordLength = 1 - -#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 - -struct GEN8_3DSTATE_VF_INSTANCING { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool InstancingEnable; - uint32_t VertexElementIndex; - uint32_t InstanceDataStepRate; -}; - -static inline void -GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InstancingEnable, 8, 8) | - __gen_field(values->VertexElementIndex, 0, 5) | - 0; - - dw[2] = - __gen_field(values->InstanceDataStepRate, 0, 31) | - 0; - -} - -#define GEN8_3DSTATE_VF_SGVS_length_bias 0x00000002 -#define GEN8_3DSTATE_VF_SGVS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 74, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 - -struct GEN8_3DSTATE_VF_SGVS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool InstanceIDEnable; -#define COMP_0 0 -#define COMP_1 1 -#define COMP_2 2 -#define COMP_3 3 - uint32_t InstanceIDComponentNumber; - uint32_t InstanceIDElementOffset; - bool VertexIDEnable; -#define COMP_0 0 -#define COMP_1 1 -#define COMP_2 2 -#define COMP_3 3 - uint32_t VertexIDComponentNumber; - uint32_t VertexIDElementOffset; -}; - -static inline void -GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_SGVS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InstanceIDEnable, 31, 31) | - __gen_field(values->InstanceIDComponentNumber, 29, 30) | - __gen_field(values->InstanceIDElementOffset, 16, 21) | - __gen_field(values->VertexIDEnable, 15, 15) | - __gen_field(values->VertexIDComponentNumber, 13, 14) | - __gen_field(values->VertexIDElementOffset, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_VF_STATISTICS_length_bias 0x00000001 -#define GEN8_3DSTATE_VF_STATISTICS_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 11 - -#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 - -struct GEN8_3DSTATE_VF_STATISTICS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool StatisticsEnable; -}; - -static inline void -GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->StatisticsEnable, 0, 0) | - 0; - -} - -#define GEN8_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 -#define GEN8_3DSTATE_VF_TOPOLOGY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 75, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 - -struct GEN8_3DSTATE_VF_TOPOLOGY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PrimitiveTopologyType; -}; - -static inline void -GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PrimitiveTopologyType, 0, 5) | - 0; - -} - -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 35, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 - -struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t CCViewportPointer; -}; - -static inline void -GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->CCViewportPointer, 5, 31) | - 0; - -} - -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 33, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 - -struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SFClipViewportPointer; -}; - -static inline void -GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SFClipViewportPointer, 6, 31) | - 0; - -} - -#define GEN8_3DSTATE_WM_length_bias 0x00000002 -#define GEN8_3DSTATE_WM_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_WM_length 0x00000002 - -struct GEN8_3DSTATE_WM { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool StatisticsEnable; - bool LegacyDepthBufferClearEnable; - bool LegacyDepthBufferResolveEnable; - bool LegacyHierarchicalDepthBufferResolveEnable; - bool LegacyDiamondLineRasterization; -#define NORMAL 0 -#define PSEXEC 1 -#define PREPS 2 - uint32_t EarlyDepthStencilControl; -#define Normal 0 -#define ForceOff 1 -#define ForceON 2 - uint32_t ForceThreadDispatchEnable; -#define INTERP_PIXEL 0 -#define INTERP_CENTROID 2 -#define INTERP_SAMPLE 3 - uint32_t PositionZWInterpolationMode; - uint32_t BarycentricInterpolationMode; -#define _05pixels 0 -#define _10pixels 1 -#define _20pixels 2 -#define _40pixels 3 - uint32_t LineEndCapAntialiasingRegionWidth; -#define _05pixels 0 -#define _10pixels 1 -#define _20pixels 2 -#define _40pixels 3 - uint32_t LineAntialiasingRegionWidth; - bool PolygonStippleEnable; - bool LineStippleEnable; -#define RASTRULE_UPPER_LEFT 0 -#define RASTRULE_UPPER_RIGHT 1 - uint32_t PointRasterizationRule; -#define Normal 0 -#define ForceOff 1 -#define ForceON 2 - uint32_t ForceKillPixelEnable; -}; - -static inline void -GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StatisticsEnable, 31, 31) | - __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | - __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | - __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | - __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | - __gen_field(values->EarlyDepthStencilControl, 21, 22) | - __gen_field(values->ForceThreadDispatchEnable, 19, 20) | - __gen_field(values->PositionZWInterpolationMode, 17, 18) | - __gen_field(values->BarycentricInterpolationMode, 11, 16) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | - __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | - __gen_field(values->PolygonStippleEnable, 4, 4) | - __gen_field(values->LineStippleEnable, 3, 3) | - __gen_field(values->PointRasterizationRule, 2, 2) | - __gen_field(values->ForceKillPixelEnable, 0, 1) | - 0; - -} - -#define GEN8_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 -#define GEN8_3DSTATE_WM_CHROMAKEY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 76, \ - .DwordLength = 0 - -#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 - -struct GEN8_3DSTATE_WM_CHROMAKEY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ChromaKeyKillEnable; -}; - -static inline void -GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ChromaKeyKillEnable, 31, 31) | - 0; - -} - -#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 -#define GEN8_3DSTATE_WM_DEPTH_STENCIL_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 78, \ - .DwordLength = 1 - -#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 - -struct GEN8_3DSTATE_WM_DEPTH_STENCIL { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StencilFailOp; - uint32_t StencilPassDepthFailOp; - uint32_t StencilPassDepthPassOp; - uint32_t BackfaceStencilTestFunction; - uint32_t BackfaceStencilFailOp; - uint32_t BackfaceStencilPassDepthFailOp; - uint32_t BackfaceStencilPassDepthPassOp; - uint32_t StencilTestFunction; - uint32_t DepthTestFunction; - bool DoubleSidedStencilEnable; - bool StencilTestEnable; - bool StencilBufferWriteEnable; - bool DepthTestEnable; - bool DepthBufferWriteEnable; - uint32_t StencilTestMask; - uint32_t StencilWriteMask; - uint32_t BackfaceStencilTestMask; - uint32_t BackfaceStencilWriteMask; -}; - -static inline void -GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StencilFailOp, 29, 31) | - __gen_field(values->StencilPassDepthFailOp, 26, 28) | - __gen_field(values->StencilPassDepthPassOp, 23, 25) | - __gen_field(values->BackfaceStencilTestFunction, 20, 22) | - __gen_field(values->BackfaceStencilFailOp, 17, 19) | - __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | - __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | - __gen_field(values->StencilTestFunction, 8, 10) | - __gen_field(values->DepthTestFunction, 5, 7) | - __gen_field(values->DoubleSidedStencilEnable, 4, 4) | - __gen_field(values->StencilTestEnable, 3, 3) | - __gen_field(values->StencilBufferWriteEnable, 2, 2) | - __gen_field(values->DepthTestEnable, 1, 1) | - __gen_field(values->DepthBufferWriteEnable, 0, 0) | - 0; - - dw[2] = - __gen_field(values->StencilTestMask, 24, 31) | - __gen_field(values->StencilWriteMask, 16, 23) | - __gen_field(values->BackfaceStencilTestMask, 8, 15) | - __gen_field(values->BackfaceStencilWriteMask, 0, 7) | - 0; - -} - -#define GEN8_3DSTATE_WM_HZ_OP_length_bias 0x00000002 -#define GEN8_3DSTATE_WM_HZ_OP_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 82, \ - .DwordLength = 3 - -#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 - -struct GEN8_3DSTATE_WM_HZ_OP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool StencilBufferClearEnable; - bool DepthBufferClearEnable; - bool ScissorRectangleEnable; - bool DepthBufferResolveEnable; - bool HierarchicalDepthBufferResolveEnable; - uint32_t PixelPositionOffsetEnable; - bool FullSurfaceDepthClear; - uint32_t StencilClearValue; - uint32_t NumberofMultisamples; - uint32_t ClearRectangleYMin; - uint32_t ClearRectangleXMin; - uint32_t ClearRectangleYMax; - uint32_t ClearRectangleXMax; - uint32_t SampleMask; -}; - -static inline void -GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StencilBufferClearEnable, 31, 31) | - __gen_field(values->DepthBufferClearEnable, 30, 30) | - __gen_field(values->ScissorRectangleEnable, 29, 29) | - __gen_field(values->DepthBufferResolveEnable, 28, 28) | - __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | - __gen_field(values->PixelPositionOffsetEnable, 26, 26) | - __gen_field(values->FullSurfaceDepthClear, 25, 25) | - __gen_field(values->StencilClearValue, 16, 23) | - __gen_field(values->NumberofMultisamples, 13, 15) | - 0; - - dw[2] = - __gen_field(values->ClearRectangleYMin, 16, 31) | - __gen_field(values->ClearRectangleXMin, 0, 15) | - 0; - - dw[3] = - __gen_field(values->ClearRectangleYMax, 16, 31) | - __gen_field(values->ClearRectangleXMax, 0, 15) | - 0; - - dw[4] = - __gen_field(values->SampleMask, 0, 15) | - 0; - -} - -#define GEN8_GPGPU_WALKER_length_bias 0x00000002 -#define GEN8_GPGPU_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 5, \ - .DwordLength = 13 - -#define GEN8_GPGPU_WALKER_length 0x0000000f - -struct GEN8_GPGPU_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - bool IndirectParameterEnable; - bool PredicateEnable; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; -#define SIMD8 0 -#define SIMD16 1 -#define SIMD32 2 - uint32_t SIMDSize; - uint32_t ThreadDepthCounterMaximum; - uint32_t ThreadHeightCounterMaximum; - uint32_t ThreadWidthCounterMaximum; - uint32_t ThreadGroupIDStartingX; - uint32_t ThreadGroupIDXDimension; - uint32_t ThreadGroupIDStartingY; - uint32_t ThreadGroupIDYDimension; - uint32_t ThreadGroupIDStartingResumeZ; - uint32_t ThreadGroupIDZDimension; - uint32_t RightExecutionMask; - uint32_t BottomExecutionMask; -}; - -static inline void -GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_GPGPU_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 6, 31) | - 0; - - dw[4] = - __gen_field(values->SIMDSize, 30, 31) | - __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | - __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | - __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | - 0; - - dw[5] = - __gen_field(values->ThreadGroupIDStartingX, 0, 31) | - 0; - - dw[6] = - 0; - - dw[7] = - __gen_field(values->ThreadGroupIDXDimension, 0, 31) | - 0; - - dw[8] = - __gen_field(values->ThreadGroupIDStartingY, 0, 31) | - 0; - - dw[9] = - 0; - - dw[10] = - __gen_field(values->ThreadGroupIDYDimension, 0, 31) | - 0; - - dw[11] = - __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | - 0; - - dw[12] = - __gen_field(values->ThreadGroupIDZDimension, 0, 31) | - 0; - - dw[13] = - __gen_field(values->RightExecutionMask, 0, 31) | - 0; - - dw[14] = - __gen_field(values->BottomExecutionMask, 0, 31) | - 0; - -} - -#define GEN8_MEDIA_CURBE_LOAD_length_bias 0x00000002 -#define GEN8_MEDIA_CURBE_LOAD_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 1, \ - .DwordLength = 2 - -#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 - -struct GEN8_MEDIA_CURBE_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t CURBETotalDataLength; - uint32_t CURBEDataStartAddress; -}; - -static inline void -GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_CURBE_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->CURBETotalDataLength, 0, 16) | - 0; - - dw[3] = - __gen_field(values->CURBEDataStartAddress, 0, 31) | - 0; - -} - -#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 -#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 2, \ - .DwordLength = 2 - -#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 - -struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorTotalLength; - uint32_t InterfaceDescriptorDataStartAddress; -}; - -static inline void -GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | - 0; - -} - -#define GEN8_MEDIA_OBJECT_length_bias 0x00000002 -#define GEN8_MEDIA_OBJECT_header \ - .CommandType = 3, \ - .MediaCommandPipeline = 2, \ - .MediaCommandOpcode = 1, \ - .MediaCommandSubOpcode = 0 - -#define GEN8_MEDIA_OBJECT_length 0x00000000 - -struct GEN8_MEDIA_OBJECT { - uint32_t CommandType; - uint32_t MediaCommandPipeline; - uint32_t MediaCommandOpcode; - uint32_t MediaCommandSubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; - uint32_t ForceDestination; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; -#define Slice0 0 -#define Slice1 1 -#define Slice2 2 - uint32_t SliceDestinationSelect; -#define SubSlice2 2 -#define SubSlice1 1 -#define SubSlice0 0 - uint32_t SubSliceDestinationSelect; - uint32_t IndirectDataLength; - __gen_address_type IndirectDataStartAddress; - uint32_t ScoredboardY; - uint32_t ScoreboardX; - uint32_t ScoreboardColor; - bool ScoreboardMask; - /* variable length fields follow */ -}; - -static inline void -GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MediaCommandPipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->MediaCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->ForceDestination, 22, 22) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->SliceDestinationSelect, 19, 20) | - __gen_field(values->SubSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); - - dw[4] = - __gen_field(values->ScoredboardY, 16, 24) | - __gen_field(values->ScoreboardX, 0, 8) | - 0; - - dw[5] = - __gen_field(values->ScoreboardColor, 16, 19) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_MEDIA_OBJECT_GRPID_length_bias 0x00000002 -#define GEN8_MEDIA_OBJECT_GRPID_header \ - .CommandType = 3, \ - .MediaCommandPipeline = 2, \ - .MediaCommandOpcode = 1, \ - .MediaCommandSubOpcode = 6 - -#define GEN8_MEDIA_OBJECT_GRPID_length 0x00000000 - -struct GEN8_MEDIA_OBJECT_GRPID { - uint32_t CommandType; - uint32_t MediaCommandPipeline; - uint32_t MediaCommandOpcode; - uint32_t MediaCommandSubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - uint32_t EndofThreadGroup; - uint32_t ForceDestination; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; -#define Slice0 0 -#define Slice1 1 -#define Slice2 2 - uint32_t SliceDestinationSelect; -#define SubSlice2 2 -#define SubSlice1 1 -#define SubSlice0 0 - uint32_t SubSliceDestinationSelect; - uint32_t IndirectDataLength; - __gen_address_type IndirectDataStartAddress; - uint32_t ScoreboardY; - uint32_t ScoreboardX; - uint32_t ScoreboardColor; - bool ScoreboardMask; - uint32_t GroupID; - /* variable length fields follow */ -}; - -static inline void -GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MediaCommandPipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->MediaCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->EndofThreadGroup, 23, 23) | - __gen_field(values->ForceDestination, 22, 22) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->SliceDestinationSelect, 19, 20) | - __gen_field(values->SubSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); - - dw[4] = - __gen_field(values->ScoreboardY, 16, 24) | - __gen_field(values->ScoreboardX, 0, 8) | - 0; - - dw[5] = - __gen_field(values->ScoreboardColor, 16, 19) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->GroupID, 0, 31) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_MEDIA_OBJECT_PRT_length_bias 0x00000002 -#define GEN8_MEDIA_OBJECT_PRT_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 2, \ - .DwordLength = 14 - -#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 - -struct GEN8_MEDIA_OBJECT_PRT { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; - bool PRT_FenceNeeded; -#define Rootthreadqueue 0 -#define VFEstateflush 1 - uint32_t PRT_FenceType; - uint32_t InlineData[12]; -}; - -static inline void -GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT_PRT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->PRT_FenceNeeded, 23, 23) | - __gen_field(values->PRT_FenceType, 22, 22) | - 0; - - dw[3] = - 0; - - for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { - dw[j] = - __gen_field(values->InlineData[i + 0], 0, 31) | - 0; - } - -} - -#define GEN8_MEDIA_OBJECT_WALKER_length_bias 0x00000002 -#define GEN8_MEDIA_OBJECT_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 3 - -#define GEN8_MEDIA_OBJECT_WALKER_length 0x00000000 - -struct GEN8_MEDIA_OBJECT_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; - uint32_t GroupIDLoopSelect; - bool ScoreboardMask; - uint32_t ColorCountMinusOne; - uint32_t MiddleLoopExtraSteps; - uint32_t LocalMidLoopUnitY; - uint32_t MidLoopUnitX; - uint32_t GlobalLoopExecCount; - uint32_t LocalLoopExecCount; - uint32_t BlockResolutionY; - uint32_t BlockResolutionX; - uint32_t LocalStartY; - uint32_t LocalStartX; - uint32_t LocalOuterLoopStrideY; - uint32_t LocalOuterLoopStrideX; - uint32_t LocalInnerLoopUnitY; - uint32_t LocalInnerLoopUnitX; - uint32_t GlobalResolutionY; - uint32_t GlobalResolutionX; - uint32_t GlobalStartY; - uint32_t GlobalStartX; - uint32_t GlobalOuterLoopStrideY; - uint32_t GlobalOuterLoopStrideX; - uint32_t GlobalInnerLoopUnitY; - uint32_t GlobalInnerLoopUnitX; - /* variable length fields follow */ -}; - -static inline void -GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 0, 31) | - 0; - - dw[4] = - 0; - - dw[5] = - __gen_field(values->GroupIDLoopSelect, 8, 31) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->ColorCountMinusOne, 24, 27) | - __gen_field(values->MiddleLoopExtraSteps, 16, 20) | - __gen_field(values->LocalMidLoopUnitY, 12, 13) | - __gen_field(values->MidLoopUnitX, 8, 9) | - 0; - - dw[7] = - __gen_field(values->GlobalLoopExecCount, 16, 25) | - __gen_field(values->LocalLoopExecCount, 0, 9) | - 0; - - dw[8] = - __gen_field(values->BlockResolutionY, 16, 24) | - __gen_field(values->BlockResolutionX, 0, 8) | - 0; - - dw[9] = - __gen_field(values->LocalStartY, 16, 24) | - __gen_field(values->LocalStartX, 0, 8) | - 0; - - dw[10] = - 0; - - dw[11] = - __gen_field(values->LocalOuterLoopStrideY, 16, 25) | - __gen_field(values->LocalOuterLoopStrideX, 0, 9) | - 0; - - dw[12] = - __gen_field(values->LocalInnerLoopUnitY, 16, 25) | - __gen_field(values->LocalInnerLoopUnitX, 0, 9) | - 0; - - dw[13] = - __gen_field(values->GlobalResolutionY, 16, 24) | - __gen_field(values->GlobalResolutionX, 0, 8) | - 0; - - dw[14] = - __gen_field(values->GlobalStartY, 16, 25) | - __gen_field(values->GlobalStartX, 0, 9) | - 0; - - dw[15] = - __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | - __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | - 0; - - dw[16] = - __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | - __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_MEDIA_STATE_FLUSH_length_bias 0x00000002 -#define GEN8_MEDIA_STATE_FLUSH_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 4, \ - .DwordLength = 0 - -#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 - -struct GEN8_MEDIA_STATE_FLUSH { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - bool FlushtoGO; - uint32_t WatermarkRequired; - uint32_t InterfaceDescriptorOffset; -}; - -static inline void -GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_STATE_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->FlushtoGO, 7, 7) | - __gen_field(values->WatermarkRequired, 6, 6) | - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - -} - -#define GEN8_MEDIA_VFE_STATE_length_bias 0x00000002 -#define GEN8_MEDIA_VFE_STATE_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 0, \ - .DwordLength = 7 - -#define GEN8_MEDIA_VFE_STATE_length 0x00000009 - -struct GEN8_MEDIA_VFE_STATE { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; - uint32_t StackSize; - uint32_t PerThreadScratchSpace; - uint32_t ScratchSpaceBasePointerHigh; - uint32_t MaximumNumberofThreads; - uint32_t NumberofURBEntries; -#define Maintainingtheexistingtimestampstate 0 -#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 - uint32_t ResetGatewayTimer; -#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 -#define BypassingOpenGatewayCloseGatewayprotocol 1 - uint32_t BypassGatewayControl; - uint32_t SliceDisable; - uint32_t URBEntryAllocationSize; - uint32_t CURBEAllocationSize; -#define Scoreboarddisabled 0 -#define Scoreboardenabled 1 - uint32_t ScoreboardEnable; -#define StallingScoreboard 0 -#define NonStallingScoreboard 1 - uint32_t ScoreboardType; - uint32_t ScoreboardMask; - uint32_t Scoreboard3DeltaY; - uint32_t Scoreboard3DeltaX; - uint32_t Scoreboard2DeltaY; - uint32_t Scoreboard2DeltaX; - uint32_t Scoreboard1DeltaY; - uint32_t Scoreboard1DeltaX; - uint32_t Scoreboard0DeltaY; - uint32_t Scoreboard0DeltaX; - uint32_t Scoreboard7DeltaY; - uint32_t Scoreboard7DeltaX; - uint32_t Scoreboard6DeltaY; - uint32_t Scoreboard6DeltaX; - uint32_t Scoreboard5DeltaY; - uint32_t Scoreboard5DeltaX; - uint32_t Scoreboard4DeltaY; - uint32_t Scoreboard4DeltaX; -}; - -static inline void -GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MEDIA_VFE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->StackSize, 4, 7) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[2] = - __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | - 0; - - dw[3] = - __gen_field(values->MaximumNumberofThreads, 16, 31) | - __gen_field(values->NumberofURBEntries, 8, 15) | - __gen_field(values->ResetGatewayTimer, 7, 7) | - __gen_field(values->BypassGatewayControl, 6, 6) | - 0; - - dw[4] = - __gen_field(values->SliceDisable, 0, 1) | - 0; - - dw[5] = - __gen_field(values->URBEntryAllocationSize, 16, 31) | - __gen_field(values->CURBEAllocationSize, 0, 15) | - 0; - - dw[6] = - __gen_field(values->ScoreboardEnable, 31, 31) | - __gen_field(values->ScoreboardType, 30, 30) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[7] = - __gen_field(values->Scoreboard3DeltaY, 28, 31) | - __gen_field(values->Scoreboard3DeltaX, 24, 27) | - __gen_field(values->Scoreboard2DeltaY, 20, 23) | - __gen_field(values->Scoreboard2DeltaX, 16, 19) | - __gen_field(values->Scoreboard1DeltaY, 12, 15) | - __gen_field(values->Scoreboard1DeltaX, 8, 11) | - __gen_field(values->Scoreboard0DeltaY, 4, 7) | - __gen_field(values->Scoreboard0DeltaX, 0, 3) | - 0; - - dw[8] = - __gen_field(values->Scoreboard7DeltaY, 28, 31) | - __gen_field(values->Scoreboard7DeltaX, 24, 27) | - __gen_field(values->Scoreboard6DeltaY, 20, 23) | - __gen_field(values->Scoreboard6DeltaX, 16, 19) | - __gen_field(values->Scoreboard5DeltaY, 12, 15) | - __gen_field(values->Scoreboard5DeltaX, 8, 11) | - __gen_field(values->Scoreboard4DeltaY, 4, 7) | - __gen_field(values->Scoreboard4DeltaX, 0, 3) | - 0; - -} - -#define GEN8_MI_ARB_CHECK_length_bias 0x00000001 -#define GEN8_MI_ARB_CHECK_header \ - .CommandType = 0, \ - .MICommandOpcode = 5 - -#define GEN8_MI_ARB_CHECK_length 0x00000001 - -struct GEN8_MI_ARB_CHECK { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_ARB_CHECK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN8_MI_BATCH_BUFFER_END_length_bias 0x00000001 -#define GEN8_MI_BATCH_BUFFER_END_header \ - .CommandType = 0, \ - .MICommandOpcode = 10 - -#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 - -struct GEN8_MI_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN8_MI_BATCH_BUFFER_START_length_bias 0x00000002 -#define GEN8_MI_BATCH_BUFFER_START_header \ - .CommandType = 0, \ - .MICommandOpcode = 49, \ - .DwordLength = 1 - -#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 - -struct GEN8_MI_BATCH_BUFFER_START { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define _1stlevelbatch 0 -#define _2ndlevelbatch 1 - uint32_t _2ndLevelBatchBuffer; - bool AddOffsetEnable; - uint32_t PredicationEnable; - bool ResourceStreamerEnable; -#define ASI_GGTT 0 -#define ASI_PPGTT 1 - uint32_t AddressSpaceIndicator; - uint32_t DwordLength; - __gen_address_type BatchBufferStartAddress; -}; - -static inline void -GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_BATCH_BUFFER_START * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | - __gen_field(values->AddOffsetEnable, 16, 16) | - __gen_field(values->PredicationEnable, 15, 15) | - __gen_field(values->ResourceStreamerEnable, 10, 10) | - __gen_field(values->AddressSpaceIndicator, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN8_MI_CLFLUSH_length_bias 0x00000002 -#define GEN8_MI_CLFLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 39 - -#define GEN8_MI_CLFLUSH_length 0x00000000 - -struct GEN8_MI_CLFLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTT; - uint32_t DwordLength; - __gen_address_type PageBaseAddress; - uint32_t StartingCachelineOffset; - /* variable length fields follow */ -}; - -static inline void -GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_CLFLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - __gen_field(values->StartingCachelineOffset, 6, 11) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - /* variable length fields follow */ -} - -#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 -#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_header\ - .CommandType = 0, \ - .MICommandOpcode = 54, \ - .UseGlobalGTT = 0, \ - .CompareSemaphore = 0, \ - .DwordLength = 1 - -#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 - -struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; - uint32_t CompareSemaphore; - uint32_t DwordLength; - uint32_t CompareDataDword; - __gen_address_type CompareAddress; -}; - -static inline void -GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->CompareSemaphore, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CompareDataDword, 0, 31) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN8_MI_COPY_MEM_MEM_length_bias 0x00000002 -#define GEN8_MI_COPY_MEM_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 46, \ - .DwordLength = 3 - -#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 - -struct GEN8_MI_COPY_MEM_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTTSource; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTTDestination; - uint32_t DwordLength; - __gen_address_type DestinationMemoryAddress; - __gen_address_type SourceMemoryAddress; -}; - -static inline void -GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_COPY_MEM_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTTSource, 22, 22) | - __gen_field(values->UseGlobalGTTDestination, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - uint32_t dw3 = - 0; - - uint64_t qw3 = - __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); - - dw[3] = qw3; - dw[4] = qw3 >> 32; - -} - -#define GEN8_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 -#define GEN8_MI_LOAD_REGISTER_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 34, \ - .DwordLength = 1 - -#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 - -struct GEN8_MI_LOAD_REGISTER_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t ByteWriteDisables; - uint32_t DwordLength; - uint32_t RegisterOffset; - uint32_t DataDWord; -}; - -static inline void -GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ByteWriteDisables, 8, 11) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterOffset, 2, 22) | - 0; - - dw[2] = - __gen_field(values->DataDWord, 0, 31) | - 0; - -} - -#define GEN8_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 -#define GEN8_MI_LOAD_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 41, \ - .DwordLength = 2 - -#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 - -struct GEN8_MI_LOAD_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t AsyncModeEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->AsyncModeEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 -#define GEN8_MI_LOAD_SCAN_LINES_EXCL_header \ - .CommandType = 0, \ - .MICommandOpcode = 19, \ - .DwordLength = 0 - -#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 - -struct GEN8_MI_LOAD_SCAN_LINES_EXCL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define DisplayPlaneA 0 -#define DisplayPlaneB 1 -#define DisplayPlaneC 4 - uint32_t DisplayPlaneSelect; - uint32_t DwordLength; - uint32_t StartScanLineNumber; - uint32_t EndScanLineNumber; -}; - -static inline void -GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlaneSelect, 19, 21) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->StartScanLineNumber, 16, 28) | - __gen_field(values->EndScanLineNumber, 0, 12) | - 0; - -} - -#define GEN8_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 -#define GEN8_MI_LOAD_SCAN_LINES_INCL_header \ - .CommandType = 0, \ - .MICommandOpcode = 18, \ - .DwordLength = 0 - -#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 - -struct GEN8_MI_LOAD_SCAN_LINES_INCL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define DisplayPlaneA 0 -#define DisplayPlaneB 1 -#define DisplayPlaneC 4 - uint32_t DisplayPlaneSelect; -#define NeverForward 0 -#define AlwaysForward 1 -#define ConditionallyForward 2 - bool ScanLineEventDoneForward; - uint32_t DwordLength; - uint32_t StartScanLineNumber; - uint32_t EndScanLineNumber; -}; - -static inline void -GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlaneSelect, 19, 21) | - __gen_field(values->ScanLineEventDoneForward, 17, 18) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->StartScanLineNumber, 16, 28) | - __gen_field(values->EndScanLineNumber, 0, 12) | - 0; - -} - -#define GEN8_MI_LOAD_URB_MEM_length_bias 0x00000002 -#define GEN8_MI_LOAD_URB_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 44, \ - .DwordLength = 2 - -#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 - -struct GEN8_MI_LOAD_URB_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_LOAD_URB_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBAddress, 2, 14) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN8_MI_MATH_length_bias 0x00000002 -#define GEN8_MI_MATH_header \ - .CommandType = 0, \ - .MICommandOpcode = 26 - -#define GEN8_MI_MATH_length 0x00000000 - -struct GEN8_MI_MATH { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t ALUINSTRUCTION1; - uint32_t ALUINSTRUCTION2; - /* variable length fields follow */ -}; - -static inline void -GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_MATH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->ALUINSTRUCTION1, 0, 31) | - 0; - - dw[2] = - __gen_field(values->ALUINSTRUCTION2, 0, 31) | - 0; - - /* variable length fields follow */ -} - -#define GEN8_MI_NOOP_length_bias 0x00000001 -#define GEN8_MI_NOOP_header \ - .CommandType = 0, \ - .MICommandOpcode = 0 - -#define GEN8_MI_NOOP_length 0x00000001 - -struct GEN8_MI_NOOP { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool IdentificationNumberRegisterWriteEnable; - uint32_t IdentificationNumber; -}; - -static inline void -GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_NOOP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | - __gen_field(values->IdentificationNumber, 0, 21) | - 0; - -} - -#define GEN8_MI_PREDICATE_length_bias 0x00000001 -#define GEN8_MI_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 12 - -#define GEN8_MI_PREDICATE_length 0x00000001 - -struct GEN8_MI_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define LOAD_KEEP 0 -#define LOAD_LOAD 2 -#define LOAD_LOADINV 3 - uint32_t LoadOperation; -#define COMBINE_SET 0 -#define COMBINE_AND 1 -#define COMBINE_OR 2 -#define COMBINE_XOR 3 - uint32_t CombineOperation; -#define COMPARE_SRCS_EQUAL 2 -#define COMPARE_DELTAS_EQUAL 3 - uint32_t CompareOperation; -}; - -static inline void -GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->LoadOperation, 6, 7) | - __gen_field(values->CombineOperation, 3, 4) | - __gen_field(values->CompareOperation, 0, 1) | - 0; - -} - -#define GEN8_MI_REPORT_HEAD_length_bias 0x00000001 -#define GEN8_MI_REPORT_HEAD_header \ - .CommandType = 0, \ - .MICommandOpcode = 7 - -#define GEN8_MI_REPORT_HEAD_length 0x00000001 - -struct GEN8_MI_REPORT_HEAD { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_REPORT_HEAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN8_MI_RS_CONTEXT_length_bias 0x00000001 -#define GEN8_MI_RS_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 15 - -#define GEN8_MI_RS_CONTEXT_length 0x00000001 - -struct GEN8_MI_RS_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RS_RESTORE 0 -#define RS_SAVE 1 - uint32_t ResourceStreamerSave; -}; - -static inline void -GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_RS_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ResourceStreamerSave, 0, 0) | - 0; - -} - -#define GEN8_MI_RS_CONTROL_length_bias 0x00000001 -#define GEN8_MI_RS_CONTROL_header \ - .CommandType = 0, \ - .MICommandOpcode = 6 - -#define GEN8_MI_RS_CONTROL_length 0x00000001 - -struct GEN8_MI_RS_CONTROL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RS_STOP 0 -#define RS_START 1 - uint32_t ResourceStreamerControl; -}; - -static inline void -GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_RS_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ResourceStreamerControl, 0, 0) | - 0; - -} - -#define GEN8_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN8_MI_RS_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 43, \ - .DwordLength = 2 - -#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 - -struct GEN8_MI_RS_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type DestinationAddress; - uint32_t CoreModeEnable; - uint32_t DataDWord0; -}; - -static inline void -GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - -} - -#define GEN8_MI_SET_CONTEXT_length_bias 0x00000002 -#define GEN8_MI_SET_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 24, \ - .DwordLength = 0 - -#define GEN8_MI_SET_CONTEXT_length 0x00000002 - -struct GEN8_MI_SET_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type LogicalContextAddress; - uint32_t ReservedMustbe1; - bool CoreModeEnable; - bool ResourceStreamerStateSaveEnable; - bool ResourceStreamerStateRestoreEnable; - uint32_t ForceRestore; - uint32_t RestoreInhibit; -}; - -static inline void -GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SET_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->ReservedMustbe1, 8, 8) | - __gen_field(values->CoreModeEnable, 4, 4) | - __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | - __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | - __gen_field(values->ForceRestore, 1, 1) | - __gen_field(values->RestoreInhibit, 0, 0) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); - -} - -#define GEN8_MI_SET_PREDICATE_length_bias 0x00000001 -#define GEN8_MI_SET_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 1 - -#define GEN8_MI_SET_PREDICATE_length 0x00000001 - -struct GEN8_MI_SET_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define NOOPNever 0 -#define NOOPonResult2clear 1 -#define NOOPonResult2set 2 -#define NOOPonResultclear 3 -#define NOOPonResultset 4 -#define Executewhenonesliceenabled 5 -#define Executewhentwoslicesareenabled 6 -#define Executewhenthreeslicesareenabled 7 -#define NOOPAlways 15 - uint32_t PREDICATEENABLE; -}; - -static inline void -GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SET_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->PREDICATEENABLE, 0, 3) | - 0; - -} - -#define GEN8_MI_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN8_MI_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 32, \ - .DwordLength = 2 - -#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 - -struct GEN8_MI_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - bool StoreQword; - uint32_t DwordLength; - __gen_address_type Address; - uint32_t CoreModeEnable; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->StoreQword, 21, 21) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->Address, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[4] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN8_MI_STORE_DATA_INDEX_length_bias 0x00000002 -#define GEN8_MI_STORE_DATA_INDEX_header \ - .CommandType = 0, \ - .MICommandOpcode = 33, \ - .DwordLength = 1 - -#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 - -struct GEN8_MI_STORE_DATA_INDEX { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t UsePerProcessHardwareStatusPage; - uint32_t DwordLength; - uint32_t Offset; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_DATA_INDEX * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Offset, 2, 11) | - 0; - - dw[2] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[3] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN8_MI_STORE_URB_MEM_length_bias 0x00000002 -#define GEN8_MI_STORE_URB_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 45, \ - .DwordLength = 2 - -#define GEN8_MI_STORE_URB_MEM_length 0x00000004 - -struct GEN8_MI_STORE_URB_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_STORE_URB_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBAddress, 2, 14) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN8_MI_SUSPEND_FLUSH_length_bias 0x00000001 -#define GEN8_MI_SUSPEND_FLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 11 - -#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 - -struct GEN8_MI_SUSPEND_FLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool SuspendFlush; -}; - -static inline void -GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_SUSPEND_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->SuspendFlush, 0, 0) | - 0; - -} - -#define GEN8_MI_TOPOLOGY_FILTER_length_bias 0x00000001 -#define GEN8_MI_TOPOLOGY_FILTER_header \ - .CommandType = 0, \ - .MICommandOpcode = 13 - -#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 - -struct GEN8_MI_TOPOLOGY_FILTER { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t TopologyFilterValue; -}; - -static inline void -GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->TopologyFilterValue, 0, 5) | - 0; - -} - -#define GEN8_MI_UPDATE_GTT_length_bias 0x00000002 -#define GEN8_MI_UPDATE_GTT_header \ - .CommandType = 0, \ - .MICommandOpcode = 35 - -#define GEN8_MI_UPDATE_GTT_length 0x00000000 - -struct GEN8_MI_UPDATE_GTT { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type EntryAddress; - /* variable length fields follow */ -}; - -static inline void -GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_UPDATE_GTT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); - - /* variable length fields follow */ -} - -#define GEN8_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 -#define GEN8_MI_URB_ATOMIC_ALLOC_header \ - .CommandType = 0, \ - .MICommandOpcode = 9 - -#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 - -struct GEN8_MI_URB_ATOMIC_ALLOC { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t URBAtomicStorageOffset; - uint32_t URBAtomicStorageSize; -}; - -static inline void -GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->URBAtomicStorageOffset, 12, 19) | - __gen_field(values->URBAtomicStorageSize, 0, 8) | - 0; - -} - -#define GEN8_MI_URB_CLEAR_length_bias 0x00000002 -#define GEN8_MI_URB_CLEAR_header \ - .CommandType = 0, \ - .MICommandOpcode = 25, \ - .DwordLength = 0 - -#define GEN8_MI_URB_CLEAR_length 0x00000002 - -struct GEN8_MI_URB_CLEAR { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBClearLength; - uint32_t URBAddress; -}; - -static inline void -GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_URB_CLEAR * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBClearLength, 16, 29) | - __gen_offset(values->URBAddress, 0, 14) | - 0; - -} - -#define GEN8_MI_USER_INTERRUPT_length_bias 0x00000001 -#define GEN8_MI_USER_INTERRUPT_header \ - .CommandType = 0, \ - .MICommandOpcode = 2 - -#define GEN8_MI_USER_INTERRUPT_length 0x00000001 - -struct GEN8_MI_USER_INTERRUPT { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_USER_INTERRUPT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN8_MI_WAIT_FOR_EVENT_length_bias 0x00000001 -#define GEN8_MI_WAIT_FOR_EVENT_header \ - .CommandType = 0, \ - .MICommandOpcode = 3 - -#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 - -struct GEN8_MI_WAIT_FOR_EVENT { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool DisplayPipeCVerticalBlankWaitEnable; - bool DisplaySpriteCFlipPendingWaitEnable; - bool DisplayPlaneCFlipPendingWaitEnable; - bool DisplayPipeCScanLineWaitEnable; - bool DisplayPipeBVerticalBlankWaitEnable; - bool DisplaySpriteBFlipPendingWaitEnable; - bool DisplayPlaneBFlipPendingWaitEnable; - bool DisplayPipeBScanLineWaitEnable; - bool DisplayPipeAVerticalBlankWaitEnable; - bool DisplaySpriteAFlipPendingWaitEnable; - bool DisplayPlaneAFlipPendingWaitEnable; - bool DisplayPipeAScanLineWaitEnable; -}; - -static inline void -GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | - __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | - __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | - __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | - __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | - __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | - __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | - __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | - __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | - __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | - __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | - __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | - 0; - -} - -#define GEN8_PIPE_CONTROL_length_bias 0x00000002 -#define GEN8_PIPE_CONTROL_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 2, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 4 - -#define GEN8_PIPE_CONTROL_length 0x00000006 - -struct GEN8_PIPE_CONTROL { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define DAT_PPGTT 0 -#define DAT_GGTT 1 - uint32_t DestinationAddressType; -#define NoLRIOperation 0 -#define MMIOWriteImmediateData 1 - uint32_t LRIPostSyncOperation; - uint32_t StoreDataIndex; - uint32_t CommandStreamerStallEnable; -#define DontReset 0 -#define Reset 1 - uint32_t GlobalSnapshotCountReset; - uint32_t TLBInvalidate; - bool GenericMediaStateClear; -#define NoWrite 0 -#define WriteImmediateData 1 -#define WritePSDepthCount 2 -#define WriteTimestamp 3 - uint32_t PostSyncOperation; - bool DepthStallEnable; -#define DisableFlush 0 -#define EnableFlush 1 - bool RenderTargetCacheFlushEnable; - bool InstructionCacheInvalidateEnable; - bool TextureCacheInvalidationEnable; - bool IndirectStatePointersDisable; - bool NotifyEnable; - bool PipeControlFlushEnable; - bool DCFlushEnable; - bool VFCacheInvalidationEnable; - bool ConstantCacheInvalidationEnable; - bool StateCacheInvalidationEnable; - bool StallAtPixelScoreboard; -#define FlushDisabled 0 -#define FlushEnabled 1 - bool DepthCacheFlushEnable; - __gen_address_type Address; - uint64_t ImmediateData; -}; - -static inline void -GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_PIPE_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DestinationAddressType, 24, 24) | - __gen_field(values->LRIPostSyncOperation, 23, 23) | - __gen_field(values->StoreDataIndex, 21, 21) | - __gen_field(values->CommandStreamerStallEnable, 20, 20) | - __gen_field(values->GlobalSnapshotCountReset, 19, 19) | - __gen_field(values->TLBInvalidate, 18, 18) | - __gen_field(values->GenericMediaStateClear, 16, 16) | - __gen_field(values->PostSyncOperation, 14, 15) | - __gen_field(values->DepthStallEnable, 13, 13) | - __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | - __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | - __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | - __gen_field(values->IndirectStatePointersDisable, 9, 9) | - __gen_field(values->NotifyEnable, 8, 8) | - __gen_field(values->PipeControlFlushEnable, 7, 7) | - __gen_field(values->DCFlushEnable, 5, 5) | - __gen_field(values->VFCacheInvalidationEnable, 4, 4) | - __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | - __gen_field(values->StateCacheInvalidationEnable, 2, 2) | - __gen_field(values->StallAtPixelScoreboard, 1, 1) | - __gen_field(values->DepthCacheFlushEnable, 0, 0) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->Address, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - uint64_t qw4 = - __gen_field(values->ImmediateData, 0, 63) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - -} - -#define GEN8_SCISSOR_RECT_length 0x00000002 - -struct GEN8_SCISSOR_RECT { - uint32_t ScissorRectangleYMin; - uint32_t ScissorRectangleXMin; - uint32_t ScissorRectangleYMax; - uint32_t ScissorRectangleXMax; -}; - -static inline void -GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SCISSOR_RECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ScissorRectangleYMin, 16, 31) | - __gen_field(values->ScissorRectangleXMin, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ScissorRectangleYMax, 16, 31) | - __gen_field(values->ScissorRectangleXMax, 0, 15) | - 0; - -} - -#define GEN8_SF_CLIP_VIEWPORT_length 0x00000010 - -struct GEN8_SF_CLIP_VIEWPORT { - float ViewportMatrixElementm00; - float ViewportMatrixElementm11; - float ViewportMatrixElementm22; - float ViewportMatrixElementm30; - float ViewportMatrixElementm31; - float ViewportMatrixElementm32; - float XMinClipGuardband; - float XMaxClipGuardband; - float YMinClipGuardband; - float YMaxClipGuardband; - float XMinViewPort; - float XMaxViewPort; - float YMinViewPort; - float YMaxViewPort; -}; - -static inline void -GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SF_CLIP_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->ViewportMatrixElementm00) | - 0; - - dw[1] = - __gen_float(values->ViewportMatrixElementm11) | - 0; - - dw[2] = - __gen_float(values->ViewportMatrixElementm22) | - 0; - - dw[3] = - __gen_float(values->ViewportMatrixElementm30) | - 0; - - dw[4] = - __gen_float(values->ViewportMatrixElementm31) | - 0; - - dw[5] = - __gen_float(values->ViewportMatrixElementm32) | - 0; - - dw[6] = - 0; - - dw[7] = - 0; - - dw[8] = - __gen_float(values->XMinClipGuardband) | - 0; - - dw[9] = - __gen_float(values->XMaxClipGuardband) | - 0; - - dw[10] = - __gen_float(values->YMinClipGuardband) | - 0; - - dw[11] = - __gen_float(values->YMaxClipGuardband) | - 0; - - dw[12] = - __gen_float(values->XMinViewPort) | - 0; - - dw[13] = - __gen_float(values->XMaxViewPort) | - 0; - - dw[14] = - __gen_float(values->YMinViewPort) | - 0; - - dw[15] = - __gen_float(values->YMaxViewPort) | - 0; - -} - -#define GEN8_BLEND_STATE_length 0x00000011 - -#define GEN8_BLEND_STATE_ENTRY_length 0x00000002 - -struct GEN8_BLEND_STATE_ENTRY { - bool LogicOpEnable; - uint32_t LogicOpFunction; - uint32_t PreBlendSourceOnlyClampEnable; -#define COLORCLAMP_UNORM 0 -#define COLORCLAMP_SNORM 1 -#define COLORCLAMP_RTFORMAT 2 - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; - bool ColorBufferBlendEnable; - uint32_t SourceBlendFactor; - uint32_t DestinationBlendFactor; - uint32_t ColorBlendFunction; - uint32_t SourceAlphaBlendFactor; - uint32_t DestinationAlphaBlendFactor; - uint32_t AlphaBlendFunction; - bool WriteDisableAlpha; - bool WriteDisableRed; - bool WriteDisableGreen; - bool WriteDisableBlue; -}; - -static inline void -GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BLEND_STATE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint64_t qw0 = - __gen_field(values->LogicOpEnable, 63, 63) | - __gen_field(values->LogicOpFunction, 59, 62) | - __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | - __gen_field(values->ColorClampRange, 34, 35) | - __gen_field(values->PreBlendColorClampEnable, 33, 33) | - __gen_field(values->PostBlendColorClampEnable, 32, 32) | - __gen_field(values->ColorBufferBlendEnable, 31, 31) | - __gen_field(values->SourceBlendFactor, 26, 30) | - __gen_field(values->DestinationBlendFactor, 21, 25) | - __gen_field(values->ColorBlendFunction, 18, 20) | - __gen_field(values->SourceAlphaBlendFactor, 13, 17) | - __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | - __gen_field(values->AlphaBlendFunction, 5, 7) | - __gen_field(values->WriteDisableAlpha, 3, 3) | - __gen_field(values->WriteDisableRed, 2, 2) | - __gen_field(values->WriteDisableGreen, 1, 1) | - __gen_field(values->WriteDisableBlue, 0, 0) | - 0; - - dw[0] = qw0; - dw[1] = qw0 >> 32; - -} - -struct GEN8_BLEND_STATE { - bool AlphaToCoverageEnable; - bool IndependentAlphaBlendEnable; - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - struct GEN8_BLEND_STATE_ENTRY Entry[8]; -}; - -static inline void -GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BLEND_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | - __gen_field(values->AlphaToOneEnable, 29, 29) | - __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | - __gen_field(values->AlphaTestEnable, 27, 27) | - __gen_field(values->AlphaTestFunction, 24, 26) | - __gen_field(values->ColorDitherEnable, 23, 23) | - __gen_field(values->XDitherOffset, 21, 22) | - __gen_field(values->YDitherOffset, 19, 20) | - 0; - - for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) - GEN8_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); -} - -#define GEN8_CC_VIEWPORT_length 0x00000002 - -struct GEN8_CC_VIEWPORT { - float MinimumDepth; - float MaximumDepth; -}; - -static inline void -GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_CC_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->MinimumDepth) | - 0; - - dw[1] = - __gen_float(values->MaximumDepth) | - 0; - -} - -#define GEN8_COLOR_CALC_STATE_length 0x00000006 - -struct GEN8_COLOR_CALC_STATE { - uint32_t StencilReferenceValue; - uint32_t BackFaceStencilReferenceValue; -#define Cancelled 0 -#define NotCancelled 1 - uint32_t RoundDisableFunctionDisable; -#define ALPHATEST_UNORM8 0 -#define ALPHATEST_FLOAT32 1 - uint32_t AlphaTestFormat; - uint32_t AlphaReferenceValueAsUNORM8; - float AlphaReferenceValueAsFLOAT32; - float BlendConstantColorRed; - float BlendConstantColorGreen; - float BlendConstantColorBlue; - float BlendConstantColorAlpha; -}; - -static inline void -GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_COLOR_CALC_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->StencilReferenceValue, 24, 31) | - __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | - __gen_field(values->RoundDisableFunctionDisable, 15, 15) | - __gen_field(values->AlphaTestFormat, 0, 0) | - 0; - - dw[1] = - __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | - __gen_float(values->AlphaReferenceValueAsFLOAT32) | - 0; - - dw[2] = - __gen_float(values->BlendConstantColorRed) | - 0; - - dw[3] = - __gen_float(values->BlendConstantColorGreen) | - 0; - - dw[4] = - __gen_float(values->BlendConstantColorBlue) | - 0; - - dw[5] = - __gen_float(values->BlendConstantColorAlpha) | - 0; - -} - -#define GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_length 0x00000002 - -struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 { - uint32_t BlackPointOffsetR; - uint32_t BlackPointOffsetG; - uint32_t BlackPointOffsetB; -}; - -static inline void -GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BlackPointOffsetR, 0, 12) | - 0; - - dw[1] = - __gen_field(values->BlackPointOffsetG, 13, 25) | - __gen_field(values->BlackPointOffsetB, 0, 12) | - 0; - -} - -#define GEN8_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 - -struct GEN8_INTERFACE_DESCRIPTOR_DATA { - uint32_t KernelStartPointer; - uint32_t KernelStartPointerHigh; -#define Ftz 0 -#define SetByKernel 1 - uint32_t DenormMode; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlow; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t SamplerStatePointer; -#define Nosamplersused 0 -#define Between1and4samplersused 1 -#define Between5and8samplersused 2 -#define Between9and12samplersused 3 -#define Between13and16samplersused 4 - uint32_t SamplerCount; - uint32_t BindingTablePointer; - uint32_t BindingTableEntryCount; - uint32_t ConstantIndirectURBEntryReadLength; - uint32_t ConstantURBEntryReadOffset; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool BarrierEnable; -#define Encodes0k 0 -#define Encodes4k 1 -#define Encodes8k 2 -#define Encodes16k 4 -#define Encodes32k 8 -#define Encodes64k 16 - uint32_t SharedLocalMemorySize; - uint32_t NumberofThreadsinGPGPUThreadGroup; - uint32_t CrossThreadConstantDataReadLength; -}; - -static inline void -GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointerHigh, 0, 15) | - 0; - - dw[2] = - __gen_field(values->DenormMode, 19, 19) | - __gen_field(values->SingleProgramFlow, 18, 18) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->SamplerStatePointer, 5, 31) | - __gen_field(values->SamplerCount, 2, 4) | - 0; - - dw[4] = - __gen_offset(values->BindingTablePointer, 5, 15) | - __gen_field(values->BindingTableEntryCount, 0, 4) | - 0; - - dw[5] = - __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | - __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | - 0; - - dw[6] = - __gen_field(values->RoundingMode, 22, 23) | - __gen_field(values->BarrierEnable, 21, 21) | - __gen_field(values->SharedLocalMemorySize, 16, 20) | - __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | - 0; - - dw[7] = - __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | - 0; - -} - -#define GEN8_BINDING_TABLE_STATE_length 0x00000001 - -struct GEN8_BINDING_TABLE_STATE { - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN8_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_BINDING_TABLE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->SurfaceStatePointer, 6, 31) | - 0; - -} - -#define GEN8_RENDER_SURFACE_STATE_length 0x00000010 - -struct GEN8_RENDER_SURFACE_STATE { -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_BUFFER 4 -#define SURFTYPE_STRBUF 5 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool SurfaceArray; - uint32_t SurfaceFormat; -#define VALIGN4 1 -#define VALIGN8 2 -#define VALIGN16 3 - uint32_t SurfaceVerticalAlignment; -#define HALIGN4 1 -#define HALIGN8 2 -#define HALIGN16 3 - uint32_t SurfaceHorizontalAlignment; -#define LINEAR 0 -#define WMAJOR 1 -#define XMAJOR 2 -#define YMAJOR 3 - uint32_t TileMode; - uint32_t VerticalLineStride; - uint32_t VerticalLineStrideOffset; - bool SamplerL2BypassModeDisable; -#define WriteOnlyCache 0 -#define ReadWriteCache 1 - uint32_t RenderCacheReadWriteMode; -#define NORMAL_MODE 0 -#define PROGRESSIVE_FRAME 2 -#define INTERLACED_FRAME 3 - uint32_t MediaBoundaryPixelMode; - bool CubeFaceEnablePositiveZ; - bool CubeFaceEnableNegativeZ; - bool CubeFaceEnablePositiveY; - bool CubeFaceEnableNegativeY; - bool CubeFaceEnablePositiveX; - bool CubeFaceEnableNegativeX; - struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - float BaseMipLevel; - uint32_t SurfaceQPitch; - uint32_t Height; - uint32_t Width; - uint32_t Depth; - uint32_t SurfacePitch; -#define _0DEG 0 -#define _90DEG 1 -#define _270DEG 3 - uint32_t RenderTargetAndSampleUnormRotation; - uint32_t MinimumArrayElement; - uint32_t RenderTargetViewExtent; -#define MSS 0 -#define DEPTH_STENCIL 1 - uint32_t MultisampledSurfaceStorageFormat; -#define MULTISAMPLECOUNT_1 0 -#define MULTISAMPLECOUNT_2 1 -#define MULTISAMPLECOUNT_4 2 -#define MULTISAMPLECOUNT_8 3 - uint32_t NumberofMultisamples; - uint32_t MultisamplePositionPaletteIndex; - uint32_t XOffset; - uint32_t YOffset; - bool EWADisableForCube; -#define GPUcoherent 0 -#define IAcoherent 1 - uint32_t CoherencyType; - uint32_t SurfaceMinLOD; - uint32_t MIPCountLOD; - uint32_t AuxiliarySurfaceQPitch; - uint32_t AuxiliarySurfacePitch; -#define AUX_NONE 0 -#define AUX_MCS 1 -#define AUX_APPEND 2 -#define AUX_HIZ 3 - uint32_t AuxiliarySurfaceMode; - bool SeparateUVPlaneEnable; - uint32_t XOffsetforUorUVPlane; - uint32_t YOffsetforUorUVPlane; - uint32_t RedClearColor; - uint32_t GreenClearColor; - uint32_t BlueClearColor; - uint32_t AlphaClearColor; - uint32_t ShaderChannelSelectRed; - uint32_t ShaderChannelSelectGreen; - uint32_t ShaderChannelSelectBlue; - uint32_t ShaderChannelSelectAlpha; - float ResourceMinLOD; - __gen_address_type SurfaceBaseAddress; - uint32_t XOffsetforVPlane; - uint32_t YOffsetforVPlane; - uint32_t AuxiliaryTableIndexforMediaCompressedSurface; - __gen_address_type AuxiliarySurfaceBaseAddress; -}; - -static inline void -GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_RENDER_SURFACE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->SurfaceArray, 28, 28) | - __gen_field(values->SurfaceFormat, 18, 26) | - __gen_field(values->SurfaceVerticalAlignment, 16, 17) | - __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | - __gen_field(values->TileMode, 12, 13) | - __gen_field(values->VerticalLineStride, 11, 11) | - __gen_field(values->VerticalLineStrideOffset, 10, 10) | - __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | - __gen_field(values->RenderCacheReadWriteMode, 8, 8) | - __gen_field(values->MediaBoundaryPixelMode, 6, 7) | - __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | - __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | - __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | - __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | - __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | - __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[1] = - __gen_field(dw_MemoryObjectControlState, 24, 30) | - __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - - dw[2] = - __gen_field(values->Height, 16, 29) | - __gen_field(values->Width, 0, 13) | - 0; - - dw[3] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - dw[4] = - __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | - __gen_field(values->MinimumArrayElement, 18, 28) | - __gen_field(values->RenderTargetViewExtent, 7, 17) | - __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | - __gen_field(values->NumberofMultisamples, 3, 5) | - __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | - 0; - - dw[5] = - __gen_offset(values->XOffset, 25, 31) | - __gen_offset(values->YOffset, 21, 23) | - __gen_field(values->EWADisableForCube, 20, 20) | - __gen_field(values->CoherencyType, 14, 14) | - __gen_field(values->SurfaceMinLOD, 4, 7) | - __gen_field(values->MIPCountLOD, 0, 3) | - 0; - - dw[6] = - __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | - __gen_field(values->AuxiliarySurfacePitch, 3, 11) | - __gen_field(values->AuxiliarySurfaceMode, 0, 2) | - __gen_field(values->SeparateUVPlaneEnable, 31, 31) | - __gen_field(values->XOffsetforUorUVPlane, 16, 29) | - __gen_field(values->YOffsetforUorUVPlane, 0, 13) | - 0; - - dw[7] = - __gen_field(values->RedClearColor, 31, 31) | - __gen_field(values->GreenClearColor, 30, 30) | - __gen_field(values->BlueClearColor, 29, 29) | - __gen_field(values->AlphaClearColor, 28, 28) | - __gen_field(values->ShaderChannelSelectRed, 25, 27) | - __gen_field(values->ShaderChannelSelectGreen, 22, 24) | - __gen_field(values->ShaderChannelSelectBlue, 19, 21) | - __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | - __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | - 0; - - uint32_t dw8 = - 0; - - uint64_t qw8 = - __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); - - dw[8] = qw8; - dw[9] = qw8 >> 32; - - uint32_t dw10 = - __gen_field(values->XOffsetforVPlane, 48, 61) | - __gen_field(values->YOffsetforVPlane, 32, 45) | - __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | - 0; - - uint64_t qw10 = - __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); - - dw[10] = qw10; - dw[11] = qw10 >> 32; - - dw[12] = - 0; - - dw[13] = - 0; - - dw[14] = - 0; - - dw[15] = - 0; - -} - -#define GEN8_FILTER_COEFFICIENT_length 0x00000001 - -struct GEN8_FILTER_COEFFICIENT { - uint32_t FilterCoefficient; -}; - -static inline void -GEN8_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_FILTER_COEFFICIENT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->FilterCoefficient, 0, 7) | - 0; - -} - -#define GEN8_SAMPLER_STATE_length 0x00000004 - -struct GEN8_SAMPLER_STATE { - bool SamplerDisable; -#define DX10OGL 0 -#define DX9 1 - uint32_t TextureBorderColorMode; -#define CLAMP_NONE 0 -#define CLAMP_OGL 2 - uint32_t LODPreClampMode; - float BaseMipLevel; -#define MIPFILTER_NONE 0 -#define MIPFILTER_NEAREST 1 -#define MIPFILTER_LINEAR 3 - uint32_t MipModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MagModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MinModeFilter; - float TextureLODBias; -#define LEGACY 0 -#define EWAApproximation 1 - uint32_t AnisotropicAlgorithm; - float MinLOD; - float MaxLOD; - bool ChromaKeyEnable; - uint32_t ChromaKeyIndex; -#define KEYFILTER_KILL_ON_ANY_MATCH 0 -#define KEYFILTER_REPLACE_BLACK 1 - uint32_t ChromaKeyMode; -#define PREFILTEROPALWAYS 0 -#define PREFILTEROPNEVER 1 -#define PREFILTEROPLESS 2 -#define PREFILTEROPEQUAL 3 -#define PREFILTEROPLEQUAL 4 -#define PREFILTEROPGREATER 5 -#define PREFILTEROPNOTEQUAL 6 -#define PREFILTEROPGEQUAL 7 - uint32_t ShadowFunction; -#define PROGRAMMED 0 -#define OVERRIDE 1 - uint32_t CubeSurfaceControlMode; - uint32_t IndirectStatePointer; -#define MIPNONE 0 -#define MIPFILTER 1 - uint32_t LODClampMagnificationMode; -#define RATIO21 0 -#define RATIO41 1 -#define RATIO61 2 -#define RATIO81 3 -#define RATIO101 4 -#define RATIO121 5 -#define RATIO141 6 -#define RATIO161 7 - uint32_t MaximumAnisotropy; - bool RAddressMinFilterRoundingEnable; - bool RAddressMagFilterRoundingEnable; - bool VAddressMinFilterRoundingEnable; - bool VAddressMagFilterRoundingEnable; - bool UAddressMinFilterRoundingEnable; - bool UAddressMagFilterRoundingEnable; -#define FULL 0 -#define HIGH 1 -#define MED 2 -#define LOW 3 - uint32_t TrilinearFilterQuality; - bool NonnormalizedCoordinateEnable; - uint32_t TCXAddressControlMode; - uint32_t TCYAddressControlMode; - uint32_t TCZAddressControlMode; -}; - -static inline void -GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SAMPLER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SamplerDisable, 31, 31) | - __gen_field(values->TextureBorderColorMode, 29, 29) | - __gen_field(values->LODPreClampMode, 27, 28) | - __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | - __gen_field(values->MipModeFilter, 20, 21) | - __gen_field(values->MagModeFilter, 17, 19) | - __gen_field(values->MinModeFilter, 14, 16) | - __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | - __gen_field(values->AnisotropicAlgorithm, 0, 0) | - 0; - - dw[1] = - __gen_field(values->MinLOD * (1 << 8), 20, 31) | - __gen_field(values->MaxLOD * (1 << 8), 8, 19) | - __gen_field(values->ChromaKeyEnable, 7, 7) | - __gen_field(values->ChromaKeyIndex, 5, 6) | - __gen_field(values->ChromaKeyMode, 4, 4) | - __gen_field(values->ShadowFunction, 1, 3) | - __gen_field(values->CubeSurfaceControlMode, 0, 0) | - 0; - - dw[2] = - __gen_field(values->IndirectStatePointer, 6, 23) | - __gen_field(values->LODClampMagnificationMode, 0, 0) | - 0; - - dw[3] = - __gen_field(values->MaximumAnisotropy, 19, 21) | - __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | - __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | - __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | - __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | - __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | - __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | - __gen_field(values->TrilinearFilterQuality, 11, 12) | - __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | - __gen_field(values->TCXAddressControlMode, 6, 8) | - __gen_field(values->TCYAddressControlMode, 3, 5) | - __gen_field(values->TCZAddressControlMode, 0, 2) | - 0; - -} - -#define GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 - -struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { - uint32_t Table0YFilterCoefficientn1; - uint32_t Table0XFilterCoefficientn1; - uint32_t Table0YFilterCoefficientn0; - uint32_t Table0XFilterCoefficientn0; - uint32_t Table0YFilterCoefficientn3; - uint32_t Table0XFilterCoefficientn3; - uint32_t Table0YFilterCoefficientn2; - uint32_t Table0XFilterCoefficientn2; - uint32_t Table0YFilterCoefficientn5; - uint32_t Table0XFilterCoefficientn5; - uint32_t Table0YFilterCoefficientn4; - uint32_t Table0XFilterCoefficientn4; - uint32_t Table0YFilterCoefficientn7; - uint32_t Table0XFilterCoefficientn7; - uint32_t Table0YFilterCoefficientn6; - uint32_t Table0XFilterCoefficientn6; - uint32_t Table1XFilterCoefficientn3; - uint32_t Table1XFilterCoefficientn2; - uint32_t Table1XFilterCoefficientn5; - uint32_t Table1XFilterCoefficientn4; - uint32_t Table1YFilterCoefficientn3; - uint32_t Table1YFilterCoefficientn2; - uint32_t Table1YFilterCoefficientn5; - uint32_t Table1YFilterCoefficientn4; -}; - -static inline void -GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | - 0; - - dw[2] = - __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | - 0; - - dw[3] = - __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | - 0; - - dw[4] = - __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | - __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | - 0; - - dw[5] = - __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | - __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | - 0; - - dw[6] = - __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | - __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | - 0; - - dw[7] = - __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | - __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | - 0; - -} - -/* Enum 3D_Prim_Topo_Type */ -#define _3DPRIM_POINTLIST 1 -#define _3DPRIM_LINELIST 2 -#define _3DPRIM_LINESTRIP 3 -#define _3DPRIM_TRILIST 4 -#define _3DPRIM_TRISTRIP 5 -#define _3DPRIM_TRIFAN 6 -#define _3DPRIM_QUADLIST 7 -#define _3DPRIM_QUADSTRIP 8 -#define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LINESTRIP_ADJ 10 -#define _3DPRIM_TRILIST_ADJ 11 -#define _3DPRIM_TRISTRIP_ADJ 12 -#define _3DPRIM_TRISTRIP_REVERSE 13 -#define _3DPRIM_POLYGON 14 -#define _3DPRIM_RECTLIST 15 -#define _3DPRIM_LINELOOP 16 -#define _3DPRIM_POINTLIST_BF 17 -#define _3DPRIM_LINESTRIP_CONT 18 -#define _3DPRIM_LINESTRIP_BF 19 -#define _3DPRIM_LINESTRIP_CONT_BF 20 -#define _3DPRIM_TRIFAN_NOSTIPPLE 22 -#define _3DPRIM_PATCHLIST_1 32 -#define _3DPRIM_PATCHLIST_2 33 -#define _3DPRIM_PATCHLIST_3 34 -#define _3DPRIM_PATCHLIST_4 35 -#define _3DPRIM_PATCHLIST_5 36 -#define _3DPRIM_PATCHLIST_6 37 -#define _3DPRIM_PATCHLIST_7 38 -#define _3DPRIM_PATCHLIST_8 39 -#define _3DPRIM_PATCHLIST_9 40 -#define _3DPRIM_PATCHLIST_10 41 -#define _3DPRIM_PATCHLIST_11 42 -#define _3DPRIM_PATCHLIST_12 43 -#define _3DPRIM_PATCHLIST_13 44 -#define _3DPRIM_PATCHLIST_14 45 -#define _3DPRIM_PATCHLIST_15 46 -#define _3DPRIM_PATCHLIST_16 47 -#define _3DPRIM_PATCHLIST_17 48 -#define _3DPRIM_PATCHLIST_18 49 -#define _3DPRIM_PATCHLIST_19 50 -#define _3DPRIM_PATCHLIST_20 51 -#define _3DPRIM_PATCHLIST_21 52 -#define _3DPRIM_PATCHLIST_22 53 -#define _3DPRIM_PATCHLIST_23 54 -#define _3DPRIM_PATCHLIST_24 55 -#define _3DPRIM_PATCHLIST_25 56 -#define _3DPRIM_PATCHLIST_26 57 -#define _3DPRIM_PATCHLIST_27 58 -#define _3DPRIM_PATCHLIST_28 59 -#define _3DPRIM_PATCHLIST_29 60 -#define _3DPRIM_PATCHLIST_30 61 -#define _3DPRIM_PATCHLIST_31 62 -#define _3DPRIM_PATCHLIST_32 63 - -/* Enum 3D_Vertex_Component_Control */ -#define VFCOMP_NOSTORE 0 -#define VFCOMP_STORE_SRC 1 -#define VFCOMP_STORE_0 2 -#define VFCOMP_STORE_1_FP 3 -#define VFCOMP_STORE_1_INT 4 -#define VFCOMP_STORE_PID 7 - -/* Enum WRAP_SHORTEST_ENABLE */ -#define WSE_X 1 -#define WSE_Y 2 -#define WSE_XY 3 -#define WSE_Z 4 -#define WSE_XZ 5 -#define WSE_YZ 6 -#define WSE_XYZ 7 -#define WSE_W 8 -#define WSE_XW 9 -#define WSE_YW 10 -#define WSE_XYW 11 -#define WSE_ZW 12 -#define WSE_XZW 13 -#define WSE_YZW 14 -#define WSE_XYZW 15 - -/* Enum 3D_Stencil_Operation */ -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 1 -#define STENCILOP_REPLACE 2 -#define STENCILOP_INCRSAT 3 -#define STENCILOP_DECRSAT 4 -#define STENCILOP_INCR 5 -#define STENCILOP_DECR 6 -#define STENCILOP_INVERT 7 - -/* Enum 3D_Color_Buffer_Blend_Factor */ -#define BLENDFACTOR_ONE 1 -#define BLENDFACTOR_SRC_COLOR 2 -#define BLENDFACTOR_SRC_ALPHA 3 -#define BLENDFACTOR_DST_ALPHA 4 -#define BLENDFACTOR_DST_COLOR 5 -#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 -#define BLENDFACTOR_CONST_COLOR 7 -#define BLENDFACTOR_CONST_ALPHA 8 -#define BLENDFACTOR_SRC1_COLOR 9 -#define BLENDFACTOR_SRC1_ALPHA 10 -#define BLENDFACTOR_ZERO 17 -#define BLENDFACTOR_INV_SRC_COLOR 18 -#define BLENDFACTOR_INV_SRC_ALPHA 19 -#define BLENDFACTOR_INV_DST_ALPHA 20 -#define BLENDFACTOR_INV_DST_COLOR 21 -#define BLENDFACTOR_INV_CONST_COLOR 23 -#define BLENDFACTOR_INV_CONST_ALPHA 24 -#define BLENDFACTOR_INV_SRC1_COLOR 25 -#define BLENDFACTOR_INV_SRC1_ALPHA 26 - -/* Enum 3D_Color_Buffer_Blend_Function */ -#define BLENDFUNCTION_ADD 0 -#define BLENDFUNCTION_SUBTRACT 1 -#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BLENDFUNCTION_MIN 3 -#define BLENDFUNCTION_MAX 4 - -/* Enum 3D_Compare_Function */ -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - -/* Enum 3D_Logic_Op_Function */ -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 1 -#define LOGICOP_AND_INVERTED 2 -#define LOGICOP_COPY_INVERTED 3 -#define LOGICOP_AND_REVERSE 4 -#define LOGICOP_INVERT 5 -#define LOGICOP_XOR 6 -#define LOGICOP_NAND 7 -#define LOGICOP_AND 8 -#define LOGICOP_EQUIV 9 -#define LOGICOP_NOOP 10 -#define LOGICOP_OR_INVERTED 11 -#define LOGICOP_COPY 12 -#define LOGICOP_OR_REVERSE 13 -#define LOGICOP_OR 14 -#define LOGICOP_SET 15 - -/* Enum SURFACE_FORMAT */ -#define R32G32B32A32_FLOAT 0 -#define R32G32B32A32_SINT 1 -#define R32G32B32A32_UINT 2 -#define R32G32B32A32_UNORM 3 -#define R32G32B32A32_SNORM 4 -#define R64G64_FLOAT 5 -#define R32G32B32X32_FLOAT 6 -#define R32G32B32A32_SSCALED 7 -#define R32G32B32A32_USCALED 8 -#define R32G32B32A32_SFIXED 32 -#define R64G64_PASSTHRU 33 -#define R32G32B32_FLOAT 64 -#define R32G32B32_SINT 65 -#define R32G32B32_UINT 66 -#define R32G32B32_UNORM 67 -#define R32G32B32_SNORM 68 -#define R32G32B32_SSCALED 69 -#define R32G32B32_USCALED 70 -#define R32G32B32_SFIXED 80 -#define R16G16B16A16_UNORM 128 -#define R16G16B16A16_SNORM 129 -#define R16G16B16A16_SINT 130 -#define R16G16B16A16_UINT 131 -#define R16G16B16A16_FLOAT 132 -#define R32G32_FLOAT 133 -#define R32G32_SINT 134 -#define R32G32_UINT 135 -#define R32_FLOAT_X8X24_TYPELESS 136 -#define X32_TYPELESS_G8X24_UINT 137 -#define L32A32_FLOAT 138 -#define R32G32_UNORM 139 -#define R32G32_SNORM 140 -#define R64_FLOAT 141 -#define R16G16B16X16_UNORM 142 -#define R16G16B16X16_FLOAT 143 -#define A32X32_FLOAT 144 -#define L32X32_FLOAT 145 -#define I32X32_FLOAT 146 -#define R16G16B16A16_SSCALED 147 -#define R16G16B16A16_USCALED 148 -#define R32G32_SSCALED 149 -#define R32G32_USCALED 150 -#define R32G32_SFIXED 160 -#define R64_PASSTHRU 161 -#define B8G8R8A8_UNORM 192 -#define B8G8R8A8_UNORM_SRGB 193 -#define R10G10B10A2_UNORM 194 -#define R10G10B10A2_UNORM_SRGB 195 -#define R10G10B10A2_UINT 196 -#define R10G10B10_SNORM_A2_UNORM 197 -#define R8G8B8A8_UNORM 199 -#define R8G8B8A8_UNORM_SRGB 200 -#define R8G8B8A8_SNORM 201 -#define R8G8B8A8_SINT 202 -#define R8G8B8A8_UINT 203 -#define R16G16_UNORM 204 -#define R16G16_SNORM 205 -#define R16G16_SINT 206 -#define R16G16_UINT 207 -#define R16G16_FLOAT 208 -#define B10G10R10A2_UNORM 209 -#define B10G10R10A2_UNORM_SRGB 210 -#define R11G11B10_FLOAT 211 -#define R32_SINT 214 -#define R32_UINT 215 -#define R32_FLOAT 216 -#define R24_UNORM_X8_TYPELESS 217 -#define X24_TYPELESS_G8_UINT 218 -#define L32_UNORM 221 -#define A32_UNORM 222 -#define L16A16_UNORM 223 -#define I24X8_UNORM 224 -#define L24X8_UNORM 225 -#define A24X8_UNORM 226 -#define I32_FLOAT 227 -#define L32_FLOAT 228 -#define A32_FLOAT 229 -#define X8B8_UNORM_G8R8_SNORM 230 -#define A8X8_UNORM_G8R8_SNORM 231 -#define B8X8_UNORM_G8R8_SNORM 232 -#define B8G8R8X8_UNORM 233 -#define B8G8R8X8_UNORM_SRGB 234 -#define R8G8B8X8_UNORM 235 -#define R8G8B8X8_UNORM_SRGB 236 -#define R9G9B9E5_SHAREDEXP 237 -#define B10G10R10X2_UNORM 238 -#define L16A16_FLOAT 240 -#define R32_UNORM 241 -#define R32_SNORM 242 -#define R10G10B10X2_USCALED 243 -#define R8G8B8A8_SSCALED 244 -#define R8G8B8A8_USCALED 245 -#define R16G16_SSCALED 246 -#define R16G16_USCALED 247 -#define R32_SSCALED 248 -#define R32_USCALED 249 -#define B5G6R5_UNORM 256 -#define B5G6R5_UNORM_SRGB 257 -#define B5G5R5A1_UNORM 258 -#define B5G5R5A1_UNORM_SRGB 259 -#define B4G4R4A4_UNORM 260 -#define B4G4R4A4_UNORM_SRGB 261 -#define R8G8_UNORM 262 -#define R8G8_SNORM 263 -#define R8G8_SINT 264 -#define R8G8_UINT 265 -#define R16_UNORM 266 -#define R16_SNORM 267 -#define R16_SINT 268 -#define R16_UINT 269 -#define R16_FLOAT 270 -#define A8P8_UNORM_PALETTE0 271 -#define A8P8_UNORM_PALETTE1 272 -#define I16_UNORM 273 -#define L16_UNORM 274 -#define A16_UNORM 275 -#define L8A8_UNORM 276 -#define I16_FLOAT 277 -#define L16_FLOAT 278 -#define A16_FLOAT 279 -#define L8A8_UNORM_SRGB 280 -#define R5G5_SNORM_B6_UNORM 281 -#define B5G5R5X1_UNORM 282 -#define B5G5R5X1_UNORM_SRGB 283 -#define R8G8_SSCALED 284 -#define R8G8_USCALED 285 -#define R16_SSCALED 286 -#define R16_USCALED 287 -#define P8A8_UNORM_PALETTE0 290 -#define P8A8_UNORM_PALETTE1 291 -#define A1B5G5R5_UNORM 292 -#define A4B4G4R4_UNORM 293 -#define L8A8_UINT 294 -#define L8A8_SINT 295 -#define R8_UNORM 320 -#define R8_SNORM 321 -#define R8_SINT 322 -#define R8_UINT 323 -#define A8_UNORM 324 -#define I8_UNORM 325 -#define L8_UNORM 326 -#define P4A4_UNORM_PALETTE0 327 -#define A4P4_UNORM_PALETTE0 328 -#define R8_SSCALED 329 -#define R8_USCALED 330 -#define P8_UNORM_PALETTE0 331 -#define L8_UNORM_SRGB 332 -#define P8_UNORM_PALETTE1 333 -#define P4A4_UNORM_PALETTE1 334 -#define A4P4_UNORM_PALETTE1 335 -#define Y8_UNORM 336 -#define L8_UINT 338 -#define L8_SINT 339 -#define I8_UINT 340 -#define I8_SINT 341 -#define DXT1_RGB_SRGB 384 -#define R1_UNORM 385 -#define YCRCB_NORMAL 386 -#define YCRCB_SWAPUVY 387 -#define P2_UNORM_PALETTE0 388 -#define P2_UNORM_PALETTE1 389 -#define BC1_UNORM 390 -#define BC2_UNORM 391 -#define BC3_UNORM 392 -#define BC4_UNORM 393 -#define BC5_UNORM 394 -#define BC1_UNORM_SRGB 395 -#define BC2_UNORM_SRGB 396 -#define BC3_UNORM_SRGB 397 -#define MONO8 398 -#define YCRCB_SWAPUV 399 -#define YCRCB_SWAPY 400 -#define DXT1_RGB 401 -#define FXT1 402 -#define R8G8B8_UNORM 403 -#define R8G8B8_SNORM 404 -#define R8G8B8_SSCALED 405 -#define R8G8B8_USCALED 406 -#define R64G64B64A64_FLOAT 407 -#define R64G64B64_FLOAT 408 -#define BC4_SNORM 409 -#define BC5_SNORM 410 -#define R16G16B16_FLOAT 411 -#define R16G16B16_UNORM 412 -#define R16G16B16_SNORM 413 -#define R16G16B16_SSCALED 414 -#define R16G16B16_USCALED 415 -#define BC6H_SF16 417 -#define BC7_UNORM 418 -#define BC7_UNORM_SRGB 419 -#define BC6H_UF16 420 -#define PLANAR_420_8 421 -#define R8G8B8_UNORM_SRGB 424 -#define ETC1_RGB8 425 -#define ETC2_RGB8 426 -#define EAC_R11 427 -#define EAC_RG11 428 -#define EAC_SIGNED_R11 429 -#define EAC_SIGNED_RG11 430 -#define ETC2_SRGB8 431 -#define R16G16B16_UINT 432 -#define R16G16B16_SINT 433 -#define R32_SFIXED 434 -#define R10G10B10A2_SNORM 435 -#define R10G10B10A2_USCALED 436 -#define R10G10B10A2_SSCALED 437 -#define R10G10B10A2_SINT 438 -#define B10G10R10A2_SNORM 439 -#define B10G10R10A2_USCALED 440 -#define B10G10R10A2_SSCALED 441 -#define B10G10R10A2_UINT 442 -#define B10G10R10A2_SINT 443 -#define R64G64B64A64_PASSTHRU 444 -#define R64G64B64_PASSTHRU 445 -#define ETC2_RGB8_PTA 448 -#define ETC2_SRGB8_PTA 449 -#define ETC2_EAC_RGBA8 450 -#define ETC2_EAC_SRGB8_A8 451 -#define R8G8B8_UINT 456 -#define R8G8B8_SINT 457 -#define RAW 511 - -/* Enum Shader Channel Select */ -#define SCS_ZERO 0 -#define SCS_ONE 1 -#define SCS_RED 4 -#define SCS_GREEN 5 -#define SCS_BLUE 6 -#define SCS_ALPHA 7 - -/* Enum Clear Color */ -#define CC_ZERO 0 -#define CC_ONE 1 - -/* Enum Texture Coordinate Mode */ -#define TCM_WRAP 0 -#define TCM_MIRROR 1 -#define TCM_CLAMP 2 -#define TCM_CUBE 3 -#define TCM_CLAMP_BORDER 4 -#define TCM_MIRROR_ONCE 5 -#define TCM_HALF_BORDER 6 - diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 389f7f797c0..e31966f7d85 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -319,7 +319,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .SingleProgramFlow = false, .KernelStartPointer = pipeline->gs_kernel, - .VectorMaskEnable = Dmask, + .VectorMaskEnable = false, .SamplerCount = 0, .BindingTableEntryCount = 0, .ExpectedVertexCount = pipeline->gs_vertex_count, @@ -375,12 +375,12 @@ genX(graphics_pipeline_create)( else anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = vs_start, - .SingleVertexDispatch = Multiple, - .VectorMaskEnable = Dmask, + .SingleVertexDispatch = false, + .VectorMaskEnable = false, .SamplerCount = 0, .BindingTableEntryCount = vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = Normal, + .ThreadDispatchPriority = false, .FloatingPointMode = IEEE754, .IllegalOpcodeExceptionEnable = false, .AccessesUAV = false, diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 6b077df125a..45b8080d0bf 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -451,7 +451,7 @@ VkResult genX(CreateSampler)( struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = CLAMP_OGL, + .LODPreClampMode = CLAMP_MODE_OGL, #if ANV_GEN == 8 .BaseMipLevel = 0.0, #endif diff --git a/src/vulkan/gen9.xml b/src/vulkan/gen9.xml new file mode 100644 index 00000000000..3f229b0d0fc --- /dev/null +++ b/src/vulkan/gen9.xml @@ -0,0 +1,3469 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/vulkan/gen9_pack.h b/src/vulkan/gen9_pack.h deleted file mode 100644 index df295f4900a..00000000000 --- a/src/vulkan/gen9_pack.h +++ /dev/null @@ -1,9797 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - - -/* Instructions, enums and structures for SKL. - * - * This file has been generated, do not hand edit. - */ - -#pragma once - -#include -#include - -#ifndef __gen_validate_value -#define __gen_validate_value(x) -#endif - -#ifndef __gen_field_functions -#define __gen_field_functions - -union __gen_value { - float f; - uint32_t dw; -}; - -static inline uint64_t -__gen_mbo(uint32_t start, uint32_t end) -{ - return (~0ull >> (64 - (end - start + 1))) << start; -} - -static inline uint64_t -__gen_field(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - if (end - start + 1 < 64) - assert(v < 1ull << (end - start + 1)); -#endif - - return v << start; -} - -static inline uint64_t -__gen_fixed(float v, uint32_t start, uint32_t end, - bool is_signed, uint32_t fract_bits) -{ - __gen_validate_value(v); - - const float factor = (1 << fract_bits); - - float max, min; - if (is_signed) { - max = ((1 << (end - start)) - 1) / factor; - min = -(1 << (end - start)) / factor; - } else { - max = ((1 << (end - start + 1)) - 1) / factor; - min = 0.0f; - } - - if (v > max) - v = max; - else if (v < min) - v = min; - - int32_t int_val = roundf(v * factor); - - if (is_signed) - int_val &= (1 << (end - start + 1)) - 1; - - return int_val << start; -} - -static inline uint64_t -__gen_offset(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; - - assert((v & ~mask) == 0); -#endif - - return v; -} - -static inline uint32_t -__gen_float(float v) -{ - __gen_validate_value(v); - return ((union __gen_value) { .f = (v) }).dw; -} - -#ifndef __gen_address_type -#error #define __gen_address_type before including this file -#endif - -#ifndef __gen_user_data -#error #define __gen_combine_address before including this file -#endif - -#endif - -#define GEN9_3DSTATE_URB_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_URB_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 48, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_URB_VS_length 0x00000002 - -struct GEN9_3DSTATE_URB_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t VSURBStartingAddress; - uint32_t VSURBEntryAllocationSize; - uint32_t VSNumberofURBEntries; -}; - -static inline void -GEN9_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_URB_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->VSURBStartingAddress, 25, 31) | - __gen_field(values->VSURBEntryAllocationSize, 16, 24) | - __gen_field(values->VSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN9_3DSTATE_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 16, \ - .DwordLength = 7 - -#define GEN9_3DSTATE_VS_length 0x00000009 - -#define __gen_prefix(name) GEN9_ ## name - -struct __gen_prefix(3DSTATE_VS) { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer; -#define Multiple 0 -#define Single 1 - uint32_t SingleVertexDispatch; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool AccessesUAV; - bool SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; - bool SIMD8DispatchEnable; - bool VertexCacheDisable; - bool FunctionEnable; - uint32_t VertexURBEntryOutputReadOffset; - uint32_t VertexURBEntryOutputLength; - uint32_t UserClipDistanceClipTestEnableBitmask; - uint32_t UserClipDistanceCullTestEnableBitmask; -}; - -static inline void -GEN9_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleVertexDispatch, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->AccessesUAV, 12, 12) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - 0; - - dw[7] = - __gen_field(values->MaximumNumberofThreads, 23, 31) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->SIMD8DispatchEnable, 2, 2) | - __gen_field(values->VertexCacheDisable, 1, 1) | - __gen_field(values->FunctionEnable, 0, 0) | - 0; - - dw[8] = - __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | - __gen_field(values->VertexURBEntryOutputLength, 16, 20) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - -} - -#define GEN9_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 -#define GEN9_GPGPU_CSR_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 1 - -#define GEN9_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 - -struct GEN9_GPGPU_CSR_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GPGPUCSRBaseAddress; -}; - -static inline void -GEN9_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_GPGPU_CSR_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN9_MI_ATOMIC_length_bias 0x00000002 -#define GEN9_MI_ATOMIC_header \ - .CommandType = 0, \ - .MICommandOpcode = 47 - -#define GEN9_MI_ATOMIC_length 0x00000003 - -struct __gen_prefix(MI_ATOMIC) { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t MemoryType; - uint32_t PostSyncOperation; -#define DWORD 0 -#define QWORD 1 -#define OCTWORD 2 -#define RESERVED 3 - uint32_t DataSize; - uint32_t InlineData; - uint32_t CSSTALL; - uint32_t ReturnDataControl; - uint32_t ATOMICOPCODE; - uint32_t DwordLength; - __gen_address_type MemoryAddress; - uint32_t Operand1DataDword0; - uint32_t Operand2DataDword0; - uint32_t Operand1DataDword1; - uint32_t Operand2DataDword1; - uint32_t Operand1DataDword2; - uint32_t Operand2DataDword2; - uint32_t Operand1DataDword3; - uint32_t Operand2DataDword3; -}; - -static inline void -GEN9_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_ATOMIC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->MemoryType, 22, 22) | - __gen_field(values->PostSyncOperation, 21, 21) | - __gen_field(values->DataSize, 19, 20) | - __gen_field(values->InlineData, 18, 18) | - __gen_field(values->CSSTALL, 17, 17) | - __gen_field(values->ReturnDataControl, 16, 16) | - __gen_field(values->ATOMICOPCODE, 8, 15) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->Operand1DataDword0, 0, 31) | - 0; - - dw[4] = - __gen_field(values->Operand2DataDword0, 0, 31) | - 0; - - dw[5] = - __gen_field(values->Operand1DataDword1, 0, 31) | - 0; - - dw[6] = - __gen_field(values->Operand2DataDword1, 0, 31) | - 0; - - dw[7] = - __gen_field(values->Operand1DataDword2, 0, 31) | - 0; - - dw[8] = - __gen_field(values->Operand2DataDword2, 0, 31) | - 0; - - dw[9] = - __gen_field(values->Operand1DataDword3, 0, 31) | - 0; - - dw[10] = - __gen_field(values->Operand2DataDword3, 0, 31) | - 0; - -} - -#define GEN9_MI_BATCH_BUFFER_START_length_bias 0x00000002 -#define GEN9_MI_BATCH_BUFFER_START_header \ - .CommandType = 0, \ - .MICommandOpcode = 49, \ - .DwordLength = 1 - -#define GEN9_MI_BATCH_BUFFER_START_length 0x00000003 - -struct GEN9_MI_BATCH_BUFFER_START { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define Firstlevelbatch 0 -#define Secondlevelbatch 1 - uint32_t SecondLevelBatchBuffer; - bool AddOffsetEnable; - uint32_t PredicationEnable; - bool ResourceStreamerEnable; -#define ASI_GGTT 0 -#define ASI_PPGTT 1 - uint32_t AddressSpaceIndicator; - uint32_t DwordLength; - __gen_address_type BatchBufferStartAddress; -}; - -static inline void -GEN9_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_BATCH_BUFFER_START * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->SecondLevelBatchBuffer, 22, 22) | - __gen_field(values->AddOffsetEnable, 16, 16) | - __gen_field(values->PredicationEnable, 15, 15) | - __gen_field(values->ResourceStreamerEnable, 10, 10) | - __gen_field(values->AddressSpaceIndicator, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 -#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_header\ - .CommandType = 0, \ - .MICommandOpcode = 54, \ - .UseGlobalGTT = 0, \ - .CompareSemaphore = 0, \ - .DwordLength = 2 - -#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000004 - -struct GEN9_MI_CONDITIONAL_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t UseGlobalGTT; - uint32_t CompareSemaphore; -#define CompareMaskModeDisabled 0 -#define CompareMaskModeEnabled 1 - uint32_t CompareMaskMode; - uint32_t DwordLength; - uint32_t CompareDataDword; - __gen_address_type CompareAddress; -}; - -static inline void -GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->CompareSemaphore, 21, 21) | - __gen_field(values->CompareMaskMode, 19, 19) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CompareDataDword, 0, 31) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN9_MI_FORCE_WAKEUP_length_bias 0x00000002 -#define GEN9_MI_FORCE_WAKEUP_header \ - .CommandType = 0, \ - .MICommandOpcode = 29, \ - .DwordLength = 0 - -#define GEN9_MI_FORCE_WAKEUP_length 0x00000002 - -struct GEN9_MI_FORCE_WAKEUP { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t MaskBits; - uint32_t ForceRenderAwake; - uint32_t ForceMediaAwake; -}; - -static inline void -GEN9_MI_FORCE_WAKEUP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_FORCE_WAKEUP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->MaskBits, 16, 31) | - __gen_field(values->ForceRenderAwake, 1, 1) | - __gen_field(values->ForceMediaAwake, 0, 0) | - 0; - -} - -#define GEN9_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 -#define GEN9_MI_LOAD_REGISTER_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 34, \ - .DwordLength = 1 - -#define GEN9_MI_LOAD_REGISTER_IMM_length 0x00000003 - -struct GEN9_MI_LOAD_REGISTER_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t ByteWriteDisables; - uint32_t DwordLength; - uint32_t RegisterOffset; - uint32_t DataDWord; -}; - -static inline void -GEN9_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_LOAD_REGISTER_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ByteWriteDisables, 8, 11) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterOffset, 2, 22) | - 0; - - dw[2] = - __gen_field(values->DataDWord, 0, 31) | - 0; - -} - -#define GEN9_MI_LOAD_REGISTER_REG_length_bias 0x00000002 -#define GEN9_MI_LOAD_REGISTER_REG_header \ - .CommandType = 0, \ - .MICommandOpcode = 42, \ - .DwordLength = 1 - -#define GEN9_MI_LOAD_REGISTER_REG_length 0x00000003 - -struct GEN9_MI_LOAD_REGISTER_REG { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t SourceRegisterAddress; - uint32_t DestinationRegisterAddress; -}; - -static inline void -GEN9_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_LOAD_REGISTER_REG * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SourceRegisterAddress, 2, 22) | - 0; - - dw[2] = - __gen_offset(values->DestinationRegisterAddress, 2, 22) | - 0; - -} - -#define GEN9_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 -#define GEN9_MI_SEMAPHORE_SIGNAL_header \ - .CommandType = 0, \ - .MICommandOpcode = 27, \ - .DwordLength = 0 - -#define GEN9_MI_SEMAPHORE_SIGNAL_length 0x00000002 - -struct GEN9_MI_SEMAPHORE_SIGNAL { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t PostSyncOperation; -#define RCS 0 -#define VCS0 1 -#define BCS 2 -#define VECS 3 -#define VCS1 4 - uint32_t TargetEngineSelect; - uint32_t DwordLength; - uint32_t TargetContextID; -}; - -static inline void -GEN9_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_SEMAPHORE_SIGNAL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->PostSyncOperation, 21, 21) | - __gen_field(values->TargetEngineSelect, 15, 17) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->TargetContextID, 0, 31) | - 0; - -} - -#define GEN9_MI_SEMAPHORE_WAIT_length_bias 0x00000002 -#define GEN9_MI_SEMAPHORE_WAIT_header \ - .CommandType = 0, \ - .MICommandOpcode = 28, \ - .DwordLength = 2 - -#define GEN9_MI_SEMAPHORE_WAIT_length 0x00000004 - -struct GEN9_MI_SEMAPHORE_WAIT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t MemoryType; -#define PollingMode 1 -#define SignalMode 0 - uint32_t WaitMode; -#define SAD_GREATER_THAN_SDD 0 -#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 -#define SAD_LESS_THAN_SDD 2 -#define SAD_LESS_THAN_OR_EQUAL_SDD 3 -#define SAD_EQUAL_SDD 4 -#define SAD_NOT_EQUAL_SDD 5 - uint32_t CompareOperation; - uint32_t DwordLength; - uint32_t SemaphoreDataDword; - __gen_address_type SemaphoreAddress; -}; - -static inline void -GEN9_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_SEMAPHORE_WAIT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->MemoryType, 22, 22) | - __gen_field(values->WaitMode, 15, 15) | - __gen_field(values->CompareOperation, 12, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SemaphoreDataDword, 0, 31) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN9_MI_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN9_MI_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 32, \ - .DwordLength = 2 - -#define GEN9_MI_STORE_DATA_IMM_length 0x00000004 - -struct GEN9_MI_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - bool StoreQword; - uint32_t DwordLength; - __gen_address_type Address; - uint32_t CoreModeEnable; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN9_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->StoreQword, 21, 21) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->Address, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[4] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN9_MI_STORE_REGISTER_MEM_length_bias 0x00000002 -#define GEN9_MI_STORE_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 36, \ - .DwordLength = 2 - -#define GEN9_MI_STORE_REGISTER_MEM_length 0x00000004 - -struct GEN9_MI_STORE_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t PredicateEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN9_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_STORE_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->PredicateEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN9_PIPELINE_SELECT_length_bias 0x00000001 -#define GEN9_PIPELINE_SELECT_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4 - -#define GEN9_PIPELINE_SELECT_length 0x00000001 - -struct GEN9_PIPELINE_SELECT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t MaskBits; - uint32_t ForceMediaAwake; - uint32_t MediaSamplerDOPClockGateEnable; -#define _3D 0 -#define Media 1 -#define GPGPU 2 - uint32_t PipelineSelection; -}; - -static inline void -GEN9_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_PIPELINE_SELECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->MaskBits, 8, 15) | - __gen_field(values->ForceMediaAwake, 5, 5) | - __gen_field(values->MediaSamplerDOPClockGateEnable, 4, 4) | - __gen_field(values->PipelineSelection, 0, 1) | - 0; - -} - -#define GEN9_STATE_BASE_ADDRESS_length_bias 0x00000002 -#define GEN9_STATE_BASE_ADDRESS_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 1, \ - .DwordLength = 17 - -#define GEN9_STATE_BASE_ADDRESS_length 0x00000013 - -#define GEN9_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 - -struct GEN9_MEMORY_OBJECT_CONTROL_STATE { - uint32_t IndextoMOCSTables; -}; - -static inline void -GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEMORY_OBJECT_CONTROL_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->IndextoMOCSTables, 1, 6) | - 0; - -} - -struct GEN9_STATE_BASE_ADDRESS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GeneralStateBaseAddress; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; - bool GeneralStateBaseAddressModifyEnable; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; - __gen_address_type SurfaceStateBaseAddress; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; - bool SurfaceStateBaseAddressModifyEnable; - __gen_address_type DynamicStateBaseAddress; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; - bool DynamicStateBaseAddressModifyEnable; - __gen_address_type IndirectObjectBaseAddress; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; - bool IndirectObjectBaseAddressModifyEnable; - __gen_address_type InstructionBaseAddress; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; - bool InstructionBaseAddressModifyEnable; - uint32_t GeneralStateBufferSize; - bool GeneralStateBufferSizeModifyEnable; - uint32_t DynamicStateBufferSize; - bool DynamicStateBufferSizeModifyEnable; - uint32_t IndirectObjectBufferSize; - bool IndirectObjectBufferSizeModifyEnable; - uint32_t InstructionBufferSize; - bool InstructionBuffersizeModifyEnable; - __gen_address_type BindlessSurfaceStateBaseAddress; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE BindlessSurfaceStateMemoryObjectControlState; - bool BindlessSurfaceStateBaseAddressModifyEnable; - uint32_t BindlessSurfaceStateSize; -}; - -static inline void -GEN9_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_STATE_BASE_ADDRESS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_GeneralStateMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); - uint32_t dw1 = - __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | - __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); - dw[3] = - __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | - 0; - - uint32_t dw_SurfaceStateMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); - uint32_t dw4 = - __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | - __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - uint32_t dw_DynamicStateMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); - uint32_t dw6 = - __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | - __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw6 = - __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); - - dw[6] = qw6; - dw[7] = qw6 >> 32; - - uint32_t dw_IndirectObjectMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); - uint32_t dw8 = - __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | - __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw8 = - __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); - - dw[8] = qw8; - dw[9] = qw8 >> 32; - - uint32_t dw_InstructionMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); - uint32_t dw10 = - __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | - __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw10 = - __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); - - dw[10] = qw10; - dw[11] = qw10 >> 32; - - dw[12] = - __gen_field(values->GeneralStateBufferSize, 12, 31) | - __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | - 0; - - dw[13] = - __gen_field(values->DynamicStateBufferSize, 12, 31) | - __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | - 0; - - dw[14] = - __gen_field(values->IndirectObjectBufferSize, 12, 31) | - __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | - 0; - - dw[15] = - __gen_field(values->InstructionBufferSize, 12, 31) | - __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | - 0; - - uint32_t dw_BindlessSurfaceStateMemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_BindlessSurfaceStateMemoryObjectControlState, &values->BindlessSurfaceStateMemoryObjectControlState); - uint32_t dw16 = - __gen_field(dw_BindlessSurfaceStateMemoryObjectControlState, 4, 10) | - __gen_field(values->BindlessSurfaceStateBaseAddressModifyEnable, 0, 0) | - 0; - - uint64_t qw16 = - __gen_combine_address(data, &dw[16], values->BindlessSurfaceStateBaseAddress, dw16); - - dw[16] = qw16; - dw[17] = qw16 >> 32; - - dw[18] = - __gen_field(values->BindlessSurfaceStateSize, 12, 31) | - 0; - -} - -#define GEN9_STATE_PREFETCH_length_bias 0x00000002 -#define GEN9_STATE_PREFETCH_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 3, \ - .DwordLength = 0 - -#define GEN9_STATE_PREFETCH_length 0x00000002 - -struct GEN9_STATE_PREFETCH { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type PrefetchPointer; - uint32_t PrefetchCount; -}; - -static inline void -GEN9_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_STATE_PREFETCH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->PrefetchCount, 0, 2) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); - -} - -#define GEN9_STATE_SIP_length_bias 0x00000002 -#define GEN9_STATE_SIP_header \ - .CommandType = 3, \ - .CommandSubType = 0, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2, \ - .DwordLength = 1 - -#define GEN9_STATE_SIP_length 0x00000003 - -struct GEN9_STATE_SIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t SystemInstructionPointer; -}; - -static inline void -GEN9_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_STATE_SIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->SystemInstructionPointer, 4, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - -} - -#define GEN9_3DPRIMITIVE_length_bias 0x00000002 -#define GEN9_3DPRIMITIVE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 3, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 5 - -#define GEN9_3DPRIMITIVE_length 0x00000007 - -struct GEN9_3DPRIMITIVE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool IndirectParameterEnable; - uint32_t UAVCoherencyRequired; - bool PredicateEnable; - uint32_t DwordLength; - bool EndOffsetEnable; -#define SEQUENTIAL 0 -#define RANDOM 1 - uint32_t VertexAccessType; - uint32_t PrimitiveTopologyType; - uint32_t VertexCountPerInstance; - uint32_t StartVertexLocation; - uint32_t InstanceCount; - uint32_t StartInstanceLocation; - uint32_t BaseVertexLocation; -}; - -static inline void -GEN9_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DPRIMITIVE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->UAVCoherencyRequired, 9, 9) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->EndOffsetEnable, 9, 9) | - __gen_field(values->VertexAccessType, 8, 8) | - __gen_field(values->PrimitiveTopologyType, 0, 5) | - 0; - - dw[2] = - __gen_field(values->VertexCountPerInstance, 0, 31) | - 0; - - dw[3] = - __gen_field(values->StartVertexLocation, 0, 31) | - 0; - - dw[4] = - __gen_field(values->InstanceCount, 0, 31) | - 0; - - dw[5] = - __gen_field(values->StartInstanceLocation, 0, 31) | - 0; - - dw[6] = - __gen_field(values->BaseVertexLocation, 0, 31) | - 0; - -} - -#define GEN9_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 -#define GEN9_3DSTATE_AA_LINE_PARAMETERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 1 - -#define GEN9_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 - -struct GEN9_3DSTATE_AA_LINE_PARAMETERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float AAPointCoverageBias; - float AACoverageBias; - float AAPointCoverageSlope; - float AACoverageSlope; - float AAPointCoverageEndCapBias; - float AACoverageEndCapBias; - float AAPointCoverageEndCapSlope; - float AACoverageEndCapSlope; -}; - -static inline void -GEN9_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_AA_LINE_PARAMETERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | - __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | - __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | - __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | - 0; - - dw[2] = - __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | - __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | - __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | - __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | - 0; - -} - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 70 - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 - -#define GEN9_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 - -struct GEN9_BINDING_TABLE_EDIT_ENTRY { - uint32_t BindingTableIndex; - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN9_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_BINDING_TABLE_EDIT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->BindingTableIndex, 16, 23) | - __gen_offset(values->SurfaceStatePointer, 0, 15) | - 0; - -} - -struct GEN9_3DSTATE_BINDING_TABLE_EDIT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 68 - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 - -struct GEN9_3DSTATE_BINDING_TABLE_EDIT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 69 - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 - -struct GEN9_3DSTATE_BINDING_TABLE_EDIT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 71 - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 - -struct GEN9_3DSTATE_BINDING_TABLE_EDIT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 67 - -#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 - -struct GEN9_3DSTATE_BINDING_TABLE_EDIT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BindingTableBlockClear; -#define AllCores 3 -#define Core1 2 -#define Core0 1 - uint32_t BindingTableEditTarget; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->BindingTableBlockClear, 16, 31) | - __gen_field(values->BindingTableEditTarget, 0, 1) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 40, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 - -struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSBindingTable; -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSBindingTable, 5, 15) | - 0; - -} - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 41, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 - -struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSBindingTable; -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSBindingTable, 5, 15) | - 0; - -} - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 39, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 - -struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSBindingTable; -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSBindingTable, 5, 15) | - 0; - -} - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 42, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 - -struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSBindingTable; -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSBindingTable, 5, 15) | - 0; - -} - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 38, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 - -struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSBindingTable; -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSBindingTable, 5, 15) | - 0; - -} - -#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 -#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 - -struct GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type BindingTablePoolBaseAddress; - uint32_t BindingTablePoolEnable; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; -#define NoValidData 0 - uint32_t BindingTablePoolBufferSize; -}; - -static inline void -GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SurfaceObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); - uint32_t dw1 = - __gen_field(values->BindingTablePoolEnable, 11, 11) | - __gen_field(dw_SurfaceObjectControlState, 0, 6) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->BindingTablePoolBufferSize, 12, 31) | - 0; - -} - -#define GEN9_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 -#define GEN9_3DSTATE_BLEND_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 36, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 - -struct GEN9_3DSTATE_BLEND_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t BlendStatePointer; - bool BlendStatePointerValid; -}; - -static inline void -GEN9_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_BLEND_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->BlendStatePointer, 6, 31) | - __gen_field(values->BlendStatePointerValid, 0, 0) | - 0; - -} - -#define GEN9_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 -#define GEN9_3DSTATE_CC_STATE_POINTERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 14, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_CC_STATE_POINTERS_length 0x00000002 - -struct GEN9_3DSTATE_CC_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ColorCalcStatePointer; - bool ColorCalcStatePointerValid; -}; - -static inline void -GEN9_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CC_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ColorCalcStatePointer, 6, 31) | - __gen_field(values->ColorCalcStatePointerValid, 0, 0) | - 0; - -} - -#define GEN9_3DSTATE_CHROMA_KEY_length_bias 0x00000002 -#define GEN9_3DSTATE_CHROMA_KEY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_CHROMA_KEY_length 0x00000004 - -struct GEN9_3DSTATE_CHROMA_KEY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ChromaKeyTableIndex; - uint32_t ChromaKeyLowValue; - uint32_t ChromaKeyHighValue; -}; - -static inline void -GEN9_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CHROMA_KEY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ChromaKeyTableIndex, 30, 31) | - 0; - - dw[2] = - __gen_field(values->ChromaKeyLowValue, 0, 31) | - 0; - - dw[3] = - __gen_field(values->ChromaKeyHighValue, 0, 31) | - 0; - -} - -#define GEN9_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 -#define GEN9_3DSTATE_CLEAR_PARAMS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 4, \ - .DwordLength = 1 - -#define GEN9_3DSTATE_CLEAR_PARAMS_length 0x00000003 - -struct GEN9_3DSTATE_CLEAR_PARAMS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float DepthClearValue; - bool DepthClearValueValid; -}; - -static inline void -GEN9_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CLEAR_PARAMS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_float(values->DepthClearValue) | - 0; - - dw[2] = - __gen_field(values->DepthClearValueValid, 0, 0) | - 0; - -} - -#define GEN9_3DSTATE_CLIP_length_bias 0x00000002 -#define GEN9_3DSTATE_CLIP_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_CLIP_length 0x00000004 - -struct GEN9_3DSTATE_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define Normal 0 -#define Force 1 - bool ForceUserClipDistanceCullTestEnableBitmask; -#define _8Bit 0 -#define _4Bit 1 - uint32_t VertexSubPixelPrecisionSelect; - bool EarlyCullEnable; -#define Normal 0 -#define Force 1 - bool ForceUserClipDistanceClipTestEnableBitmask; -#define Normal 0 -#define Force 1 - bool ForceClipMode; - bool ClipperStatisticsEnable; - uint32_t UserClipDistanceCullTestEnableBitmask; - bool ClipEnable; -#define API_OGL 0 - uint32_t APIMode; - bool ViewportXYClipTestEnable; - bool GuardbandClipTestEnable; - uint32_t UserClipDistanceClipTestEnableBitmask; -#define NORMAL 0 -#define REJECT_ALL 3 -#define ACCEPT_ALL 4 - uint32_t ClipMode; - bool PerspectiveDivideDisable; - bool NonPerspectiveBarycentricEnable; - uint32_t TriangleStripListProvokingVertexSelect; - uint32_t LineStripListProvokingVertexSelect; - uint32_t TriangleFanProvokingVertexSelect; - float MinimumPointWidth; - float MaximumPointWidth; - bool ForceZeroRTAIndexEnable; - uint32_t MaximumVPIndex; -}; - -static inline void -GEN9_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | - __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | - __gen_field(values->EarlyCullEnable, 18, 18) | - __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | - __gen_field(values->ForceClipMode, 16, 16) | - __gen_field(values->ClipperStatisticsEnable, 10, 10) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - - dw[2] = - __gen_field(values->ClipEnable, 31, 31) | - __gen_field(values->APIMode, 30, 30) | - __gen_field(values->ViewportXYClipTestEnable, 28, 28) | - __gen_field(values->GuardbandClipTestEnable, 26, 26) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | - __gen_field(values->ClipMode, 13, 15) | - __gen_field(values->PerspectiveDivideDisable, 9, 9) | - __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | - __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | - __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | - 0; - - dw[3] = - __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | - __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | - __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | - __gen_field(values->MaximumVPIndex, 0, 3) | - 0; - -} - -#define GEN9_3DSTATE_CONSTANT_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_CONSTANT_DS_length 0x0000000b - -#define GEN9_3DSTATE_CONSTANT_BODY_length 0x0000000a - -struct GEN9_3DSTATE_CONSTANT_BODY { - uint32_t ConstantBuffer1ReadLength; - uint32_t ConstantBuffer0ReadLength; - uint32_t ConstantBuffer3ReadLength; - uint32_t ConstantBuffer2ReadLength; - __gen_address_type PointerToConstantBuffer0; - __gen_address_type PointerToConstantBuffer1; - __gen_address_type PointerToConstantBuffer2; - __gen_address_type PointerToConstantBuffer3; -}; - -static inline void -GEN9_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CONSTANT_BODY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | - __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - uint32_t dw4 = - 0; - - uint64_t qw4 = - __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - uint32_t dw6 = - 0; - - uint64_t qw6 = - __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); - - dw[6] = qw6; - dw[7] = qw6 >> 32; - - uint32_t dw8 = - 0; - - uint64_t qw8 = - __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); - - dw[8] = qw8; - dw[9] = qw8 >> 32; - -} - -struct GEN9_3DSTATE_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN9_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN9_3DSTATE_CONSTANT_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_CONSTANT_GS_length 0x0000000b - -struct GEN9_3DSTATE_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN9_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN9_3DSTATE_CONSTANT_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 25, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_CONSTANT_HS_length 0x0000000b - -struct GEN9_3DSTATE_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN9_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN9_3DSTATE_CONSTANT_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 23, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_CONSTANT_PS_length 0x0000000b - -struct GEN9_3DSTATE_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN9_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN9_3DSTATE_CONSTANT_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_CONSTANT_VS_length 0x0000000b - -struct GEN9_3DSTATE_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; - uint32_t DwordLength; - struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; -}; - -static inline void -GEN9_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_ConstantBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); -} - -#define GEN9_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN9_3DSTATE_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 5, \ - .DwordLength = 6 - -#define GEN9_3DSTATE_DEPTH_BUFFER_length 0x00000008 - -struct GEN9_3DSTATE_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define SURFTYPE_2D 1 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool DepthWriteEnable; - bool StencilWriteEnable; - bool HierarchicalDepthBufferEnable; -#define D32_FLOAT 1 -#define D24_UNORM_X8_UINT 3 -#define D16_UNORM 5 - uint32_t SurfaceFormat; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t Height; - uint32_t Width; - uint32_t LOD; - uint32_t Depth; - uint32_t MinimumArrayElement; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; -#define NONE 0 -#define TILEYF 1 -#define TILEYS 2 - uint32_t TiledResourceMode; - uint32_t MipTailStartLOD; - uint32_t RenderTargetViewExtent; - uint32_t SurfaceQPitch; -}; - -static inline void -GEN9_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->DepthWriteEnable, 28, 28) | - __gen_field(values->StencilWriteEnable, 27, 27) | - __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | - __gen_field(values->SurfaceFormat, 18, 20) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->Height, 18, 31) | - __gen_field(values->Width, 4, 17) | - __gen_field(values->LOD, 0, 3) | - 0; - - uint32_t dw_DepthBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); - dw[5] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->MinimumArrayElement, 10, 20) | - __gen_field(dw_DepthBufferObjectControlState, 0, 6) | - 0; - - dw[6] = - __gen_field(values->TiledResourceMode, 30, 31) | - __gen_field(values->MipTailStartLOD, 26, 29) | - 0; - - dw[7] = - __gen_field(values->RenderTargetViewExtent, 21, 31) | - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - -} - -#define GEN9_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 -#define GEN9_3DSTATE_DRAWING_RECTANGLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 - -struct GEN9_3DSTATE_DRAWING_RECTANGLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; -#define Legacy 0 -#define Core0Enabled 1 -#define Core1Enabled 2 - uint32_t CoreModeSelect; - uint32_t DwordLength; - uint32_t ClippedDrawingRectangleYMin; - uint32_t ClippedDrawingRectangleXMin; - uint32_t ClippedDrawingRectangleYMax; - uint32_t ClippedDrawingRectangleXMax; - uint32_t DrawingRectangleOriginY; - uint32_t DrawingRectangleOriginX; -}; - -static inline void -GEN9_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_DRAWING_RECTANGLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->CoreModeSelect, 14, 15) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | - 0; - - dw[2] = - __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | - __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | - 0; - - dw[3] = - __gen_field(values->DrawingRectangleOriginY, 16, 31) | - __gen_field(values->DrawingRectangleOriginX, 0, 15) | - 0; - -} - -#define GEN9_3DSTATE_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 29, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_DS_length 0x0000000b - -struct GEN9_3DSTATE_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool AccessesUAV; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t PatchURBEntryReadLength; - uint32_t PatchURBEntryReadOffset; - uint32_t MaximumNumberofThreads; - bool StatisticsEnable; -#define SIMD4X2 0 -#define SIMD8_SINGLE_PATCH 1 -#define SIMD8_SINGLE_OR_DUAL_PATCH 2 - uint32_t DispatchMode; - bool ComputeWCoordinateEnable; - bool CacheDisable; - bool FunctionEnable; - uint32_t VertexURBEntryOutputReadOffset; - uint32_t VertexURBEntryOutputLength; - uint32_t UserClipDistanceClipTestEnableBitmask; - uint32_t UserClipDistanceCullTestEnableBitmask; - uint64_t DUAL_PATCHKernelStartPointer; -}; - -static inline void -GEN9_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->AccessesUAV, 14, 14) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | - __gen_field(values->PatchURBEntryReadLength, 11, 17) | - __gen_field(values->PatchURBEntryReadOffset, 4, 9) | - 0; - - dw[7] = - __gen_field(values->MaximumNumberofThreads, 21, 29) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->DispatchMode, 3, 4) | - __gen_field(values->ComputeWCoordinateEnable, 2, 2) | - __gen_field(values->CacheDisable, 1, 1) | - __gen_field(values->FunctionEnable, 0, 0) | - 0; - - dw[8] = - __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | - __gen_field(values->VertexURBEntryOutputLength, 16, 20) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - - uint64_t qw9 = - __gen_offset(values->DUAL_PATCHKernelStartPointer, 6, 63) | - 0; - - dw[9] = qw9; - dw[10] = qw9 >> 32; - -} - -#define GEN9_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_GATHER_CONSTANT_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 55 - -#define GEN9_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 - -#define GEN9_GATHER_CONSTANT_ENTRY_length 0x00000001 - -struct GEN9_GATHER_CONSTANT_ENTRY { - uint32_t ConstantBufferOffset; - uint32_t ChannelMask; - uint32_t BindingTableIndexOffset; -}; - -static inline void -GEN9_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_GATHER_CONSTANT_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->ConstantBufferOffset, 8, 15) | - __gen_field(values->ChannelMask, 4, 7) | - __gen_field(values->BindingTableIndexOffset, 0, 3) | - 0; - -} - -struct GEN9_3DSTATE_GATHER_CONSTANT_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; -#define CommitGather 0 -#define NonCommitGather 1 - uint32_t UpdateGatherTableOnly; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; -#define Load 0 -#define Read 1 - uint32_t OnDieTable; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GATHER_CONSTANT_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - __gen_field(values->UpdateGatherTableOnly, 1, 1) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->OnDieTable, 3, 3) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_GATHER_CONSTANT_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 53 - -#define GEN9_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 - -struct GEN9_3DSTATE_GATHER_CONSTANT_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; -#define CommitGather 0 -#define NonCommitGather 1 - uint32_t UpdateGatherTableOnly; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; -#define Load 0 -#define Read 1 - uint32_t OnDieTable; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GATHER_CONSTANT_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - __gen_field(values->UpdateGatherTableOnly, 1, 1) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->OnDieTable, 3, 3) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_GATHER_CONSTANT_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 54 - -#define GEN9_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 - -struct GEN9_3DSTATE_GATHER_CONSTANT_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; -#define CommitGather 0 -#define NonCommitGather 1 - uint32_t UpdateGatherTableOnly; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; -#define Load 0 -#define Read 1 - uint32_t OnDieTable; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GATHER_CONSTANT_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - __gen_field(values->UpdateGatherTableOnly, 1, 1) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->OnDieTable, 3, 3) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_GATHER_CONSTANT_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 56 - -#define GEN9_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 - -struct GEN9_3DSTATE_GATHER_CONSTANT_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; -#define CommitGather 0 -#define NonCommitGather 1 - uint32_t UpdateGatherTableOnly; - bool DX9OnDieRegisterReadEnable; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - bool ConstantBufferDx9Enable; -#define Load 0 -#define Read 1 - uint32_t OnDieTable; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GATHER_CONSTANT_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - __gen_field(values->UpdateGatherTableOnly, 1, 1) | - __gen_field(values->DX9OnDieRegisterReadEnable, 0, 0) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->ConstantBufferDx9Enable, 4, 4) | - __gen_field(values->OnDieTable, 3, 3) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_GATHER_CONSTANT_VS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 52 - -#define GEN9_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 - -struct GEN9_3DSTATE_GATHER_CONSTANT_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferValid; - uint32_t ConstantBufferBindingTableBlock; -#define CommitGather 0 -#define NonCommitGather 1 - uint32_t UpdateGatherTableOnly; - bool DX9OnDieRegisterReadEnable; - uint32_t GatherBufferOffset; - bool ConstantBufferDx9GenerateStall; - bool ConstantBufferDx9Enable; -#define Load 0 -#define Read 1 - uint32_t OnDieTable; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GATHER_CONSTANT_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferValid, 16, 31) | - __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | - __gen_field(values->UpdateGatherTableOnly, 1, 1) | - __gen_field(values->DX9OnDieRegisterReadEnable, 0, 0) | - 0; - - dw[2] = - __gen_offset(values->GatherBufferOffset, 6, 22) | - __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | - __gen_field(values->ConstantBufferDx9Enable, 4, 4) | - __gen_field(values->OnDieTable, 3, 3) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 -#define GEN9_3DSTATE_GATHER_POOL_ALLOC_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 26, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 - -struct GEN9_3DSTATE_GATHER_POOL_ALLOC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - __gen_address_type GatherPoolBaseAddress; - bool GatherPoolEnable; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - uint32_t GatherPoolBufferSize; -}; - -static inline void -GEN9_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GATHER_POOL_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - uint32_t dw1 = - __gen_field(values->GatherPoolEnable, 11, 11) | - __gen_field(dw_MemoryObjectControlState, 0, 6) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->GatherPoolBufferSize, 12, 31) | - 0; - -} - -#define GEN9_3DSTATE_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 8 - -#define GEN9_3DSTATE_GS_length 0x0000000a - -struct GEN9_3DSTATE_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer; - uint32_t SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool AccessesUAV; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t ExpectedVertexCount; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData54; - uint32_t OutputVertexSize; - uint32_t OutputTopology; - uint32_t VertexURBEntryReadLength; - bool IncludeVertexHandles; - uint32_t VertexURBEntryReadOffset; - uint32_t DispatchGRFStartRegisterForURBData; - uint32_t ControlDataHeaderSize; - uint32_t InstanceControl; - uint32_t DefaultStreamId; -#define DispatchModeSingle 0 -#define DispatchModeDualInstance 1 -#define DispatchModeDualObject 2 -#define DispatchModeSIMD8 3 - uint32_t DispatchMode; - bool StatisticsEnable; - uint32_t InvocationsIncrementValue; - bool IncludePrimitiveID; - uint32_t Hint; -#define LEADING 0 -#define TRAILING 1 - uint32_t ReorderMode; - bool DiscardAdjacency; - bool Enable; -#define CUT 0 -#define SID 1 - uint32_t ControlDataFormat; - bool StaticOutput; - uint32_t StaticOutputVertexCount; - uint32_t MaximumNumberofThreads; - uint32_t VertexURBEntryOutputReadOffset; - uint32_t VertexURBEntryOutputLength; - uint32_t UserClipDistanceClipTestEnableBitmask; - uint32_t UserClipDistanceCullTestEnableBitmask; -}; - -static inline void -GEN9_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleProgramFlow, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->AccessesUAV, 12, 12) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - __gen_field(values->ExpectedVertexCount, 0, 5) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->DispatchGRFStartRegisterForURBData54, 29, 30) | - __gen_field(values->OutputVertexSize, 23, 28) | - __gen_field(values->OutputTopology, 17, 22) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->IncludeVertexHandles, 10, 10) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | - 0; - - dw[7] = - __gen_field(values->ControlDataHeaderSize, 20, 23) | - __gen_field(values->InstanceControl, 15, 19) | - __gen_field(values->DefaultStreamId, 13, 14) | - __gen_field(values->DispatchMode, 11, 12) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->InvocationsIncrementValue, 5, 9) | - __gen_field(values->IncludePrimitiveID, 4, 4) | - __gen_field(values->Hint, 3, 3) | - __gen_field(values->ReorderMode, 2, 2) | - __gen_field(values->DiscardAdjacency, 1, 1) | - __gen_field(values->Enable, 0, 0) | - 0; - - dw[8] = - __gen_field(values->ControlDataFormat, 31, 31) | - __gen_field(values->StaticOutput, 30, 30) | - __gen_field(values->StaticOutputVertexCount, 16, 26) | - __gen_field(values->MaximumNumberofThreads, 0, 8) | - 0; - - dw[9] = - __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | - __gen_field(values->VertexURBEntryOutputLength, 16, 20) | - __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | - __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | - 0; - -} - -#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 -#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 - -struct GEN9_3DSTATE_HIER_DEPTH_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t SurfaceQPitch; -}; - -static inline void -GEN9_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_HIER_DEPTH_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_HierarchicalDepthBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); - dw[1] = - __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - -} - -#define GEN9_3DSTATE_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 27, \ - .DwordLength = 7 - -#define GEN9_3DSTATE_HS_length 0x00000009 - -struct GEN9_3DSTATE_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool SoftwareExceptionEnable; - bool Enable; - bool StatisticsEnable; - uint32_t MaximumNumberofThreads; - uint32_t InstanceCount; - uint64_t KernelStartPointer; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t DispatchGRFStartRegisterForURBData5; - bool SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; - bool AccessesUAV; - bool IncludeVertexHandles; - uint32_t DispatchGRFStartRegisterForURBData; -#define SINGLE_PATCH 0 -#define DUAL_PATCH 1 -#define _8_PATCH 2 - uint32_t DispatchMode; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - bool IncludePrimitiveID; -}; - -static inline void -GEN9_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->SoftwareExceptionEnable, 12, 12) | - 0; - - dw[2] = - __gen_field(values->Enable, 31, 31) | - __gen_field(values->StatisticsEnable, 29, 29) | - __gen_field(values->MaximumNumberofThreads, 8, 16) | - __gen_field(values->InstanceCount, 0, 3) | - 0; - - uint64_t qw3 = - __gen_offset(values->KernelStartPointer, 6, 63) | - 0; - - dw[3] = qw3; - dw[4] = qw3 >> 32; - - uint64_t qw5 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[5] = qw5; - dw[6] = qw5 >> 32; - - dw[7] = - __gen_field(values->DispatchGRFStartRegisterForURBData5, 28, 28) | - __gen_field(values->SingleProgramFlow, 27, 27) | - __gen_field(values->VectorMaskEnable, 26, 26) | - __gen_field(values->AccessesUAV, 25, 25) | - __gen_field(values->IncludeVertexHandles, 24, 24) | - __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | - __gen_field(values->DispatchMode, 17, 18) | - __gen_field(values->VertexURBEntryReadLength, 11, 16) | - __gen_field(values->VertexURBEntryReadOffset, 4, 9) | - __gen_field(values->IncludePrimitiveID, 0, 0) | - 0; - - dw[8] = - 0; - -} - -#define GEN9_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 -#define GEN9_3DSTATE_INDEX_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 10, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_INDEX_BUFFER_length 0x00000005 - -struct GEN9_3DSTATE_INDEX_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define INDEX_BYTE 0 -#define INDEX_WORD 1 -#define INDEX_DWORD 2 - uint32_t IndexFormat; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - __gen_address_type BufferStartingAddress; - uint32_t BufferSize; -}; - -static inline void -GEN9_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_INDEX_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[1] = - __gen_field(values->IndexFormat, 8, 9) | - __gen_field(dw_MemoryObjectControlState, 0, 6) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->BufferSize, 0, 31) | - 0; - -} - -#define GEN9_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 -#define GEN9_3DSTATE_LINE_STIPPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 8, \ - .DwordLength = 1 - -#define GEN9_3DSTATE_LINE_STIPPLE_length 0x00000003 - -struct GEN9_3DSTATE_LINE_STIPPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; - uint32_t CurrentRepeatCounter; - uint32_t CurrentStippleIndex; - uint32_t LineStipplePattern; - float LineStippleInverseRepeatCount; - uint32_t LineStippleRepeatCount; -}; - -static inline void -GEN9_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_LINE_STIPPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | - __gen_field(values->CurrentRepeatCounter, 21, 29) | - __gen_field(values->CurrentStippleIndex, 16, 19) | - __gen_field(values->LineStipplePattern, 0, 15) | - 0; - - dw[2] = - __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | - __gen_field(values->LineStippleRepeatCount, 0, 8) | - 0; - -} - -#define GEN9_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 -#define GEN9_3DSTATE_MONOFILTER_SIZE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 17, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_MONOFILTER_SIZE_length 0x00000002 - -struct GEN9_3DSTATE_MONOFILTER_SIZE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t MonochromeFilterWidth; - uint32_t MonochromeFilterHeight; -}; - -static inline void -GEN9_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_MONOFILTER_SIZE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->MonochromeFilterWidth, 3, 5) | - __gen_field(values->MonochromeFilterHeight, 0, 2) | - 0; - -} - -#define GEN9_3DSTATE_MULTISAMPLE_length_bias 0x00000002 -#define GEN9_3DSTATE_MULTISAMPLE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 13, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_MULTISAMPLE_length 0x00000002 - -struct GEN9_3DSTATE_MULTISAMPLE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PixelPositionOffsetEnable; -#define CENTER 0 -#define UL_CORNER 1 - uint32_t PixelLocation; - uint32_t NumberofMultisamples; -}; - -static inline void -GEN9_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_MULTISAMPLE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PixelPositionOffsetEnable, 5, 5) | - __gen_field(values->PixelLocation, 4, 4) | - __gen_field(values->NumberofMultisamples, 1, 3) | - 0; - -} - -#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 -#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 - -struct GEN9_3DSTATE_POLY_STIPPLE_OFFSET { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PolygonStippleXOffset; - uint32_t PolygonStippleYOffset; -}; - -static inline void -GEN9_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PolygonStippleXOffset, 8, 12) | - __gen_field(values->PolygonStippleYOffset, 0, 4) | - 0; - -} - -#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 -#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 7, \ - .DwordLength = 31 - -#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 - -struct GEN9_3DSTATE_POLY_STIPPLE_PATTERN { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PatternRow[32]; -}; - -static inline void -GEN9_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { - dw[j] = - __gen_field(values->PatternRow[i + 0], 0, 31) | - 0; - } - -} - -#define GEN9_3DSTATE_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_PS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 32, \ - .DwordLength = 10 - -#define GEN9_3DSTATE_PS_length 0x0000000c - -struct GEN9_3DSTATE_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint64_t KernelStartPointer0; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlow; -#define Dmask 0 -#define Vmask 1 - uint32_t VectorMaskEnable; -#define NoSamplers 0 -#define _14Samplers 1 -#define _58Samplers 2 -#define _912Samplers 3 -#define _1316Samplers 4 - uint32_t SamplerCount; -#define FlushedtoZero 0 -#define Retained 1 - uint32_t SinglePrecisionDenormalMode; - uint32_t BindingTableEntryCount; -#define Normal 0 -#define High 1 - uint32_t ThreadDispatchPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint64_t ScratchSpaceBasePointer; - uint32_t PerThreadScratchSpace; - uint32_t MaximumNumberofThreadsPerPSD; - bool PushConstantEnable; - bool RenderTargetFastClearEnable; -#define RESOLVE_DISABLED 0 -#define RESOLVE_PARTIAL 1 -#define RESOLVE_FULL 3 - uint32_t RenderTargetResolveType; -#define POSOFFSET_NONE 0 -#define POSOFFSET_CENTROID 2 -#define POSOFFSET_SAMPLE 3 - uint32_t PositionXYOffsetSelect; - bool _32PixelDispatchEnable; - bool _16PixelDispatchEnable; - bool _8PixelDispatchEnable; - uint32_t DispatchGRFStartRegisterForConstantSetupData0; - uint32_t DispatchGRFStartRegisterForConstantSetupData1; - uint32_t DispatchGRFStartRegisterForConstantSetupData2; - uint64_t KernelStartPointer1; - uint64_t KernelStartPointer2; -}; - -static inline void -GEN9_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint64_t qw1 = - __gen_offset(values->KernelStartPointer0, 6, 63) | - 0; - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->SingleProgramFlow, 31, 31) | - __gen_field(values->VectorMaskEnable, 30, 30) | - __gen_field(values->SamplerCount, 27, 29) | - __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | - __gen_field(values->BindingTableEntryCount, 18, 25) | - __gen_field(values->ThreadDispatchPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->RoundingMode, 14, 15) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - uint64_t qw4 = - __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - - dw[6] = - __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | - __gen_field(values->PushConstantEnable, 11, 11) | - __gen_field(values->RenderTargetFastClearEnable, 8, 8) | - __gen_field(values->RenderTargetResolveType, 6, 7) | - __gen_field(values->PositionXYOffsetSelect, 3, 4) | - __gen_field(values->_32PixelDispatchEnable, 2, 2) | - __gen_field(values->_16PixelDispatchEnable, 1, 1) | - __gen_field(values->_8PixelDispatchEnable, 0, 0) | - 0; - - dw[7] = - __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | - __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | - __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | - 0; - - uint64_t qw8 = - __gen_offset(values->KernelStartPointer1, 6, 63) | - 0; - - dw[8] = qw8; - dw[9] = qw8 >> 32; - - uint64_t qw10 = - __gen_offset(values->KernelStartPointer2, 6, 63) | - 0; - - dw[10] = qw10; - dw[11] = qw10 >> 32; - -} - -#define GEN9_3DSTATE_PS_BLEND_length_bias 0x00000002 -#define GEN9_3DSTATE_PS_BLEND_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 77, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PS_BLEND_length 0x00000002 - -struct GEN9_3DSTATE_PS_BLEND { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool AlphaToCoverageEnable; - bool HasWriteableRT; - bool ColorBufferBlendEnable; - uint32_t SourceAlphaBlendFactor; - uint32_t DestinationAlphaBlendFactor; - uint32_t SourceBlendFactor; - uint32_t DestinationBlendFactor; - bool AlphaTestEnable; - bool IndependentAlphaBlendEnable; -}; - -static inline void -GEN9_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PS_BLEND * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->HasWriteableRT, 30, 30) | - __gen_field(values->ColorBufferBlendEnable, 29, 29) | - __gen_field(values->SourceAlphaBlendFactor, 24, 28) | - __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | - __gen_field(values->SourceBlendFactor, 14, 18) | - __gen_field(values->DestinationBlendFactor, 9, 13) | - __gen_field(values->AlphaTestEnable, 8, 8) | - __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | - 0; - -} - -#define GEN9_3DSTATE_PS_EXTRA_length_bias 0x00000002 -#define GEN9_3DSTATE_PS_EXTRA_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 79, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PS_EXTRA_length 0x00000002 - -struct GEN9_3DSTATE_PS_EXTRA { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool PixelShaderValid; - bool PixelShaderDoesnotwritetoRT; - bool oMaskPresenttoRenderTarget; - bool PixelShaderKillsPixel; -#define PSCDEPTH_OFF 0 -#define PSCDEPTH_ON 1 -#define PSCDEPTH_ON_GE 2 -#define PSCDEPTH_ON_LE 3 - uint32_t PixelShaderComputedDepthMode; - bool ForceComputedDepth; - bool PixelShaderUsesSourceDepth; - bool PixelShaderUsesSourceW; - uint32_t Removed; - bool AttributeEnable; - bool PixelShaderDisablesAlphaToCoverage; - bool PixelShaderIsPerSample; - bool PixelShaderComputesStencil; - bool PixelShaderPullsBary; - bool PixelShaderHasUAV; -#define ICMS_NONE 0 -#define ICMS_NORMAL 1 -#define ICMS_INNER_CONSERVATIVE 2 -#define ICMS_DEPTH_COVERAGE 3 - uint32_t InputCoverageMaskState; -}; - -static inline void -GEN9_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PS_EXTRA * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PixelShaderValid, 31, 31) | - __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | - __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | - __gen_field(values->PixelShaderKillsPixel, 28, 28) | - __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | - __gen_field(values->ForceComputedDepth, 25, 25) | - __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | - __gen_field(values->PixelShaderUsesSourceW, 23, 23) | - __gen_field(values->Removed, 17, 17) | - __gen_field(values->AttributeEnable, 8, 8) | - __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | - __gen_field(values->PixelShaderIsPerSample, 6, 6) | - __gen_field(values->PixelShaderComputesStencil, 5, 5) | - __gen_field(values->PixelShaderPullsBary, 3, 3) | - __gen_field(values->PixelShaderHasUAV, 2, 2) | - __gen_field(values->InputCoverageMaskState, 0, 1) | - 0; - -} - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 - -struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 21, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 - -struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 - -struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 22, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 - -struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 18, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 - -struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ConstantBufferOffset; - uint32_t ConstantBufferSize; -}; - -static inline void -GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ConstantBufferOffset, 16, 20) | - __gen_field(values->ConstantBufferSize, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_RASTER_length_bias 0x00000002 -#define GEN9_3DSTATE_RASTER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 80, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_RASTER_length 0x00000005 - -struct GEN9_3DSTATE_RASTER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ViewportZFarClipTestEnable; - bool ConservativeRasterizationEnable; -#define DX9OGL 0 -#define DX100 1 -#define DX101 2 - uint32_t APIMode; -#define Clockwise 0 -#define CounterClockwise 1 - uint32_t FrontWinding; -#define FSC_NUMRASTSAMPLES_0 0 -#define FSC_NUMRASTSAMPLES_1 1 -#define FSC_NUMRASTSAMPLES_2 2 -#define FSC_NUMRASTSAMPLES_4 3 -#define FSC_NUMRASTSAMPLES_8 4 -#define FSC_NUMRASTSAMPLES_16 5 - uint32_t ForcedSampleCount; -#define CULLMODE_BOTH 0 -#define CULLMODE_NONE 1 -#define CULLMODE_FRONT 2 -#define CULLMODE_BACK 3 - uint32_t CullMode; -#define Normal 0 -#define Force 1 - uint32_t ForceMultisampling; - bool SmoothPointEnable; - bool DXMultisampleRasterizationEnable; -#define MSRASTMODE_OFF_PIXEL 0 -#define MSRASTMODE_OFF_PATTERN 1 -#define MSRASTMODE_ON_PIXEL 2 -#define MSRASTMODE_ON_PATTERN 3 - uint32_t DXMultisampleRasterizationMode; - bool GlobalDepthOffsetEnableSolid; - bool GlobalDepthOffsetEnableWireframe; - bool GlobalDepthOffsetEnablePoint; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t FrontFaceFillMode; -#define RASTER_SOLID 0 -#define RASTER_WIREFRAME 1 -#define RASTER_POINT 2 - uint32_t BackFaceFillMode; - bool AntialiasingEnable; - bool ScissorRectangleEnable; - bool ViewportZNearClipTestEnable; - float GlobalDepthOffsetConstant; - float GlobalDepthOffsetScale; - float GlobalDepthOffsetClamp; -}; - -static inline void -GEN9_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_RASTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ViewportZFarClipTestEnable, 26, 26) | - __gen_field(values->ConservativeRasterizationEnable, 24, 24) | - __gen_field(values->APIMode, 22, 23) | - __gen_field(values->FrontWinding, 21, 21) | - __gen_field(values->ForcedSampleCount, 18, 20) | - __gen_field(values->CullMode, 16, 17) | - __gen_field(values->ForceMultisampling, 14, 14) | - __gen_field(values->SmoothPointEnable, 13, 13) | - __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | - __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | - __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | - __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | - __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | - __gen_field(values->FrontFaceFillMode, 5, 6) | - __gen_field(values->BackFaceFillMode, 3, 4) | - __gen_field(values->AntialiasingEnable, 2, 2) | - __gen_field(values->ScissorRectangleEnable, 1, 1) | - __gen_field(values->ViewportZNearClipTestEnable, 0, 0) | - 0; - - dw[2] = - __gen_float(values->GlobalDepthOffsetConstant) | - 0; - - dw[3] = - __gen_float(values->GlobalDepthOffsetScale) | - 0; - - dw[4] = - __gen_float(values->GlobalDepthOffsetClamp) | - 0; - -} - -#define GEN9_3DSTATE_RS_CONSTANT_POINTER_length_bias 0x00000002 -#define GEN9_3DSTATE_RS_CONSTANT_POINTER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 84, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_RS_CONSTANT_POINTER_length 0x00000004 - -struct GEN9_3DSTATE_RS_CONSTANT_POINTER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define VS 0 -#define PS 4 - uint32_t ShaderSelect; -#define RS_STORE 0 -#define RS_LOAD 1 - uint32_t OperationLoadorStore; - __gen_address_type GlobalConstantBufferAddress; - __gen_address_type GlobalConstantBufferAddressHigh; -}; - -static inline void -GEN9_3DSTATE_RS_CONSTANT_POINTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_RS_CONSTANT_POINTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ShaderSelect, 28, 30) | - __gen_field(values->OperationLoadorStore, 12, 12) | - 0; - - uint32_t dw2 = - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->GlobalConstantBufferAddress, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->GlobalConstantBufferAddressHigh, dw3); - -} - -#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 2 - -#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 - -#define GEN9_PALETTE_ENTRY_length 0x00000001 - -struct GEN9_PALETTE_ENTRY { - uint32_t Alpha; - uint32_t Red; - uint32_t Green; - uint32_t Blue; -}; - -static inline void -GEN9_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_PALETTE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Alpha, 24, 31) | - __gen_field(values->Red, 16, 23) | - __gen_field(values->Green, 8, 15) | - __gen_field(values->Blue, 0, 7) | - 0; - -} - -struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 12 - -#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 - -struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 45, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 - -struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoDSSamplerState; -}; - -static inline void -GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoDSSamplerState, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 46, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 - -struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoGSSamplerState; -}; - -static inline void -GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoGSSamplerState, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 44, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 - -struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoHSSamplerState; -}; - -static inline void -GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoHSSamplerState, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 47, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 - -struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoPSSamplerState; -}; - -static inline void -GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoPSSamplerState, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 43, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 - -struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PointertoVSSamplerState; -}; - -static inline void -GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->PointertoVSSamplerState, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLE_MASK_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SAMPLE_MASK_length 0x00000002 - -struct GEN9_3DSTATE_SAMPLE_MASK { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SampleMask; -}; - -static inline void -GEN9_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLE_MASK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SampleMask, 0, 15) | - 0; - -} - -#define GEN9_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 -#define GEN9_3DSTATE_SAMPLE_PATTERN_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 28, \ - .DwordLength = 7 - -#define GEN9_3DSTATE_SAMPLE_PATTERN_length 0x00000009 - -struct GEN9_3DSTATE_SAMPLE_PATTERN { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float _16xSample3XOffset; - float _16xSample3YOffset; - float _16xSample2XOffset; - float _16xSample2YOffset; - float _16xSample1XOffset; - float _16xSample1YOffset; - float _16xSample0XOffset; - float _16xSample0YOffset; - float _16xSample7XOffset; - float _16xSample7YOffset; - float _16xSample6XOffset; - float _16xSample6YOffset; - float _16xSample5XOffset; - float _16xSample5YOffset; - float _16xSample4XOffset; - float _16xSample4YOffset; - float _16xSample11XOffset; - float _16xSample11YOffset; - float _16xSample10XOffset; - float _16xSample10YOffset; - float _16xSample9XOffset; - float _16xSample9YOffset; - float _16xSample8XOffset; - float _16xSample8YOffset; - float _16xSample15XOffset; - float _16xSample15YOffset; - float _16xSample14XOffset; - float _16xSample14YOffset; - float _16xSample13XOffset; - float _16xSample13YOffset; - float _16xSample12XOffset; - float _16xSample12YOffset; - float _8xSample7XOffset; - float _8xSample7YOffset; - float _8xSample6XOffset; - float _8xSample6YOffset; - float _8xSample5XOffset; - float _8xSample5YOffset; - float _8xSample4XOffset; - float _8xSample4YOffset; - float _8xSample3XOffset; - float _8xSample3YOffset; - float _8xSample2XOffset; - float _8xSample2YOffset; - float _8xSample1XOffset; - float _8xSample1YOffset; - float _8xSample0XOffset; - float _8xSample0YOffset; - float _4xSample3XOffset; - float _4xSample3YOffset; - float _4xSample2XOffset; - float _4xSample2YOffset; - float _4xSample1XOffset; - float _4xSample1YOffset; - float _4xSample0XOffset; - float _4xSample0YOffset; - float _1xSample0XOffset; - float _1xSample0YOffset; - float _2xSample1XOffset; - float _2xSample1YOffset; - float _2xSample0XOffset; - float _2xSample0YOffset; -}; - -static inline void -GEN9_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SAMPLE_PATTERN * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->_16xSample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->_16xSample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->_16xSample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->_16xSample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->_16xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_16xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_16xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_16xSample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[2] = - __gen_field(values->_16xSample7XOffset * (1 << 4), 28, 31) | - __gen_field(values->_16xSample7YOffset * (1 << 4), 24, 27) | - __gen_field(values->_16xSample6XOffset * (1 << 4), 20, 23) | - __gen_field(values->_16xSample6YOffset * (1 << 4), 16, 19) | - __gen_field(values->_16xSample5XOffset * (1 << 4), 12, 15) | - __gen_field(values->_16xSample5YOffset * (1 << 4), 8, 11) | - __gen_field(values->_16xSample4XOffset * (1 << 4), 4, 7) | - __gen_field(values->_16xSample4YOffset * (1 << 4), 0, 3) | - 0; - - dw[3] = - __gen_field(values->_16xSample11XOffset * (1 << 4), 28, 31) | - __gen_field(values->_16xSample11YOffset * (1 << 4), 24, 27) | - __gen_field(values->_16xSample10XOffset * (1 << 4), 20, 23) | - __gen_field(values->_16xSample10YOffset * (1 << 4), 16, 19) | - __gen_field(values->_16xSample9XOffset * (1 << 4), 12, 15) | - __gen_field(values->_16xSample9YOffset * (1 << 4), 8, 11) | - __gen_field(values->_16xSample8XOffset * (1 << 4), 4, 7) | - __gen_field(values->_16xSample8YOffset * (1 << 4), 0, 3) | - 0; - - dw[4] = - __gen_field(values->_16xSample15XOffset * (1 << 4), 28, 31) | - __gen_field(values->_16xSample15YOffset * (1 << 4), 24, 27) | - __gen_field(values->_16xSample14XOffset * (1 << 4), 20, 23) | - __gen_field(values->_16xSample14YOffset * (1 << 4), 16, 19) | - __gen_field(values->_16xSample13XOffset * (1 << 4), 12, 15) | - __gen_field(values->_16xSample13YOffset * (1 << 4), 8, 11) | - __gen_field(values->_16xSample12XOffset * (1 << 4), 4, 7) | - __gen_field(values->_16xSample12YOffset * (1 << 4), 0, 3) | - 0; - - dw[5] = - __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | - __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | - __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | - __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | - __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | - __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | - __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | - __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | - 0; - - dw[6] = - __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[7] = - __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | - __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | - __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | - __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | - __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | - 0; - - dw[8] = - __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | - __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | - __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | - __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | - __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | - __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | - 0; - -} - -#define GEN9_3DSTATE_SBE_length_bias 0x00000002 -#define GEN9_3DSTATE_SBE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 31, \ - .DwordLength = 4 - -#define GEN9_3DSTATE_SBE_length 0x00000006 - -struct GEN9_3DSTATE_SBE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ForceVertexURBEntryReadLength; - bool ForceVertexURBEntryReadOffset; - uint32_t NumberofSFOutputAttributes; - bool AttributeSwizzleEnable; -#define UPPERLEFT 0 -#define LOWERLEFT 1 - uint32_t PointSpriteTextureCoordinateOrigin; - bool PrimitiveIDOverrideComponentW; - bool PrimitiveIDOverrideComponentZ; - bool PrimitiveIDOverrideComponentY; - bool PrimitiveIDOverrideComponentX; - uint32_t VertexURBEntryReadLength; - uint32_t VertexURBEntryReadOffset; - uint32_t PrimitiveIDOverrideAttributeSelect; - uint32_t PointSpriteTextureCoordinateEnable; - uint32_t ConstantInterpolationEnable; - uint32_t Attribute15ActiveComponentFormat; - uint32_t Attribute14ActiveComponentFormat; - uint32_t Attribute13ActiveComponentFormat; - uint32_t Attribute12ActiveComponentFormat; - uint32_t Attribute11ActiveComponentFormat; - uint32_t Attribute10ActiveComponentFormat; - uint32_t Attribute9ActiveComponentFormat; - uint32_t Attribute8ActiveComponentFormat; - uint32_t Attribute7ActiveComponentFormat; - uint32_t Attribute6ActiveComponentFormat; - uint32_t Attribute5ActiveComponentFormat; - uint32_t Attribute4ActiveComponentFormat; - uint32_t Attribute3ActiveComponentFormat; - uint32_t Attribute2ActiveComponentFormat; - uint32_t Attribute1ActiveComponentFormat; - uint32_t Attribute0ActiveComponentFormat; - uint32_t Attribute31ActiveComponentFormat; - uint32_t Attribute30ActiveComponentFormat; - uint32_t Attribute29ActiveComponentFormat; - uint32_t Attribute28ActiveComponentFormat; - uint32_t Attribute27ActiveComponentFormat; - uint32_t Attribute26ActiveComponentFormat; - uint32_t Attribute25ActiveComponentFormat; - uint32_t Attribute24ActiveComponentFormat; - uint32_t Attribute23ActiveComponentFormat; - uint32_t Attribute22ActiveComponentFormat; - uint32_t Attribute21ActiveComponentFormat; - uint32_t Attribute20ActiveComponentFormat; - uint32_t Attribute19ActiveComponentFormat; - uint32_t Attribute18ActiveComponentFormat; - uint32_t Attribute17ActiveComponentFormat; - uint32_t Attribute16ActiveComponentFormat; -}; - -static inline void -GEN9_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SBE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | - __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | - __gen_field(values->NumberofSFOutputAttributes, 22, 27) | - __gen_field(values->AttributeSwizzleEnable, 21, 21) | - __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | - __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | - __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | - __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | - __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | - __gen_field(values->VertexURBEntryReadLength, 11, 15) | - __gen_field(values->VertexURBEntryReadOffset, 5, 10) | - __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | - 0; - - dw[2] = - __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | - 0; - - dw[3] = - __gen_field(values->ConstantInterpolationEnable, 0, 31) | - 0; - - dw[4] = - __gen_field(values->Attribute15ActiveComponentFormat, 30, 31) | - __gen_field(values->Attribute14ActiveComponentFormat, 28, 29) | - __gen_field(values->Attribute13ActiveComponentFormat, 26, 27) | - __gen_field(values->Attribute12ActiveComponentFormat, 24, 25) | - __gen_field(values->Attribute11ActiveComponentFormat, 22, 23) | - __gen_field(values->Attribute10ActiveComponentFormat, 20, 21) | - __gen_field(values->Attribute9ActiveComponentFormat, 18, 19) | - __gen_field(values->Attribute8ActiveComponentFormat, 16, 17) | - __gen_field(values->Attribute7ActiveComponentFormat, 14, 15) | - __gen_field(values->Attribute6ActiveComponentFormat, 12, 13) | - __gen_field(values->Attribute5ActiveComponentFormat, 10, 11) | - __gen_field(values->Attribute4ActiveComponentFormat, 8, 9) | - __gen_field(values->Attribute3ActiveComponentFormat, 6, 7) | - __gen_field(values->Attribute2ActiveComponentFormat, 4, 5) | - __gen_field(values->Attribute1ActiveComponentFormat, 2, 3) | - __gen_field(values->Attribute0ActiveComponentFormat, 0, 1) | - 0; - - dw[5] = - __gen_field(values->Attribute31ActiveComponentFormat, 30, 31) | - __gen_field(values->Attribute30ActiveComponentFormat, 28, 29) | - __gen_field(values->Attribute29ActiveComponentFormat, 26, 27) | - __gen_field(values->Attribute28ActiveComponentFormat, 24, 25) | - __gen_field(values->Attribute27ActiveComponentFormat, 22, 23) | - __gen_field(values->Attribute26ActiveComponentFormat, 20, 21) | - __gen_field(values->Attribute25ActiveComponentFormat, 18, 19) | - __gen_field(values->Attribute24ActiveComponentFormat, 16, 17) | - __gen_field(values->Attribute23ActiveComponentFormat, 14, 15) | - __gen_field(values->Attribute22ActiveComponentFormat, 12, 13) | - __gen_field(values->Attribute21ActiveComponentFormat, 10, 11) | - __gen_field(values->Attribute20ActiveComponentFormat, 8, 9) | - __gen_field(values->Attribute19ActiveComponentFormat, 6, 7) | - __gen_field(values->Attribute18ActiveComponentFormat, 4, 5) | - __gen_field(values->Attribute17ActiveComponentFormat, 2, 3) | - __gen_field(values->Attribute16ActiveComponentFormat, 0, 1) | - 0; - -} - -#define GEN9_3DSTATE_SBE_SWIZ_length_bias 0x00000002 -#define GEN9_3DSTATE_SBE_SWIZ_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 81, \ - .DwordLength = 9 - -#define GEN9_3DSTATE_SBE_SWIZ_length 0x0000000b - -#define GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 - -struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL { - bool ComponentOverrideW; - bool ComponentOverrideZ; - bool ComponentOverrideY; - bool ComponentOverrideX; - uint32_t SwizzleControlMode; -#define CONST_0000 0 -#define CONST_0001_FLOAT 1 -#define CONST_1111_FLOAT 2 -#define PRIM_ID 3 - uint32_t ConstantSource; -#define INPUTATTR 0 -#define INPUTATTR_FACING 1 -#define INPUTATTR_W 2 -#define INPUTATTR_FACING_W 3 - uint32_t SwizzleSelect; - uint32_t SourceAttribute; -}; - -static inline void -GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ComponentOverrideW, 15, 15) | - __gen_field(values->ComponentOverrideZ, 14, 14) | - __gen_field(values->ComponentOverrideY, 13, 13) | - __gen_field(values->ComponentOverrideX, 12, 12) | - __gen_field(values->SwizzleControlMode, 11, 11) | - __gen_field(values->ConstantSource, 9, 10) | - __gen_field(values->SwizzleSelect, 6, 7) | - __gen_field(values->SourceAttribute, 0, 4) | - 0; - -} - -struct GEN9_3DSTATE_SBE_SWIZ { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; - uint32_t AttributeWrapShortestEnables[16]; -}; - -static inline void -GEN9_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SBE_SWIZ * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { - uint32_t dw_Attribute0; - GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); - uint32_t dw_Attribute1; - GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); - dw[j] = - __gen_field(dw_Attribute0, 0, 15) | - __gen_field(dw_Attribute1, 16, 31) | - 0; - } - - for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { - dw[j] = - __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | - __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | - __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | - __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | - __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | - __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | - __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | - __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | - 0; - } - -} - -#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 -#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 15, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 - -struct GEN9_3DSTATE_SCISSOR_STATE_POINTERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t ScissorRectPointer; -}; - -static inline void -GEN9_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->ScissorRectPointer, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_SF_length_bias 0x00000002 -#define GEN9_3DSTATE_SF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 19, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_SF_length 0x00000004 - -struct GEN9_3DSTATE_SF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - float LineWidth; - bool LegacyGlobalDepthBiasEnable; - bool StatisticsEnable; - bool ViewportTransformEnable; -#define _05pixels 0 -#define _10pixels 1 -#define _20pixels 2 -#define _40pixels 3 - uint32_t LineEndCapAntialiasingRegionWidth; - bool LastPixelEnable; - uint32_t TriangleStripListProvokingVertexSelect; - uint32_t LineStripListProvokingVertexSelect; - uint32_t TriangleFanProvokingVertexSelect; -#define AALINEDISTANCE_TRUE 1 - uint32_t AALineDistanceMode; - bool SmoothPointEnable; - uint32_t VertexSubPixelPrecisionSelect; -#define Vertex 0 -#define State 1 - uint32_t PointWidthSource; - float PointWidth; -}; - -static inline void -GEN9_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->LineWidth * (1 << 7), 12, 29) | - __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | - __gen_field(values->StatisticsEnable, 10, 10) | - __gen_field(values->ViewportTransformEnable, 1, 1) | - 0; - - dw[2] = - __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | - 0; - - dw[3] = - __gen_field(values->LastPixelEnable, 31, 31) | - __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | - __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | - __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | - __gen_field(values->AALineDistanceMode, 14, 14) | - __gen_field(values->SmoothPointEnable, 13, 13) | - __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | - __gen_field(values->PointWidthSource, 11, 11) | - __gen_field(values->PointWidth * (1 << 3), 0, 10) | - 0; - -} - -#define GEN9_3DSTATE_SO_BUFFER_length_bias 0x00000002 -#define GEN9_3DSTATE_SO_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 24, \ - .DwordLength = 6 - -#define GEN9_3DSTATE_SO_BUFFER_length 0x00000008 - -struct GEN9_3DSTATE_SO_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool SOBufferEnable; - uint32_t SOBufferIndex; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; - bool StreamOffsetWriteEnable; - bool StreamOutputBufferOffsetAddressEnable; - __gen_address_type SurfaceBaseAddress; - uint32_t SurfaceSize; - __gen_address_type StreamOutputBufferOffsetAddress; - uint32_t StreamOffset; -}; - -static inline void -GEN9_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SO_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_SOBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); - dw[1] = - __gen_field(values->SOBufferEnable, 31, 31) | - __gen_field(values->SOBufferIndex, 29, 30) | - __gen_field(dw_SOBufferObjectControlState, 22, 28) | - __gen_field(values->StreamOffsetWriteEnable, 21, 21) | - __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->SurfaceSize, 0, 29) | - 0; - - uint32_t dw5 = - 0; - - uint64_t qw5 = - __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); - - dw[5] = qw5; - dw[6] = qw5 >> 32; - - dw[7] = - __gen_field(values->StreamOffset, 0, 31) | - 0; - -} - -#define GEN9_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 -#define GEN9_3DSTATE_SO_DECL_LIST_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 23 - -#define GEN9_3DSTATE_SO_DECL_LIST_length 0x00000000 - -#define GEN9_SO_DECL_ENTRY_length 0x00000002 - -#define GEN9_SO_DECL_length 0x00000001 - -struct GEN9_SO_DECL { - uint32_t OutputBufferSlot; - uint32_t HoleFlag; - uint32_t RegisterIndex; - uint32_t ComponentMask; -}; - -static inline void -GEN9_SO_DECL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SO_DECL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->OutputBufferSlot, 12, 13) | - __gen_field(values->HoleFlag, 11, 11) | - __gen_field(values->RegisterIndex, 4, 9) | - __gen_field(values->ComponentMask, 0, 3) | - 0; - -} - -struct GEN9_SO_DECL_ENTRY { - struct GEN9_SO_DECL Stream3Decl; - struct GEN9_SO_DECL Stream2Decl; - struct GEN9_SO_DECL Stream1Decl; - struct GEN9_SO_DECL Stream0Decl; -}; - -static inline void -GEN9_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SO_DECL_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_Stream3Decl; - GEN9_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); - uint32_t dw_Stream2Decl; - GEN9_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); - uint32_t dw_Stream1Decl; - GEN9_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); - uint32_t dw_Stream0Decl; - GEN9_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); - uint64_t qw0 = - __gen_field(dw_Stream3Decl, 48, 63) | - __gen_field(dw_Stream2Decl, 32, 47) | - __gen_field(dw_Stream1Decl, 16, 31) | - __gen_field(dw_Stream0Decl, 0, 15) | - 0; - - dw[0] = qw0; - dw[1] = qw0 >> 32; - -} - -struct GEN9_3DSTATE_SO_DECL_LIST { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StreamtoBufferSelects3; - uint32_t StreamtoBufferSelects2; - uint32_t StreamtoBufferSelects1; - uint32_t StreamtoBufferSelects0; - uint32_t NumEntries3; - uint32_t NumEntries2; - uint32_t NumEntries1; - uint32_t NumEntries0; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_SO_DECL_LIST * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 8) | - 0; - - dw[1] = - __gen_field(values->StreamtoBufferSelects3, 12, 15) | - __gen_field(values->StreamtoBufferSelects2, 8, 11) | - __gen_field(values->StreamtoBufferSelects1, 4, 7) | - __gen_field(values->StreamtoBufferSelects0, 0, 3) | - 0; - - dw[2] = - __gen_field(values->NumEntries3, 24, 31) | - __gen_field(values->NumEntries2, 16, 23) | - __gen_field(values->NumEntries1, 8, 15) | - __gen_field(values->NumEntries0, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 -#define GEN9_3DSTATE_STENCIL_BUFFER_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 6, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_STENCIL_BUFFER_length 0x00000005 - -struct GEN9_3DSTATE_STENCIL_BUFFER { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StencilBufferEnable; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; - uint32_t SurfacePitch; - __gen_address_type SurfaceBaseAddress; - uint32_t SurfaceQPitch; -}; - -static inline void -GEN9_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_STENCIL_BUFFER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw_StencilBufferObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); - dw[1] = - __gen_field(values->StencilBufferEnable, 31, 31) | - __gen_field(dw_StencilBufferObjectControlState, 22, 28) | - __gen_field(values->SurfacePitch, 0, 16) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - dw[4] = - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - -} - -#define GEN9_3DSTATE_STREAMOUT_length_bias 0x00000002 -#define GEN9_3DSTATE_STREAMOUT_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 30, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_STREAMOUT_length 0x00000005 - -struct GEN9_3DSTATE_STREAMOUT { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SOFunctionEnable; - uint32_t APIRenderingDisable; - uint32_t RenderStreamSelect; -#define LEADING 0 -#define TRAILING 1 - uint32_t ReorderMode; - bool SOStatisticsEnable; -#define Normal 0 -#define Resreved 1 -#define Force_Off 2 -#define Force_on 3 - uint32_t ForceRendering; - uint32_t Stream3VertexReadOffset; - uint32_t Stream3VertexReadLength; - uint32_t Stream2VertexReadOffset; - uint32_t Stream2VertexReadLength; - uint32_t Stream1VertexReadOffset; - uint32_t Stream1VertexReadLength; - uint32_t Stream0VertexReadOffset; - uint32_t Stream0VertexReadLength; - uint32_t Buffer1SurfacePitch; - uint32_t Buffer0SurfacePitch; - uint32_t Buffer3SurfacePitch; - uint32_t Buffer2SurfacePitch; -}; - -static inline void -GEN9_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_STREAMOUT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->SOFunctionEnable, 31, 31) | - __gen_field(values->APIRenderingDisable, 30, 30) | - __gen_field(values->RenderStreamSelect, 27, 28) | - __gen_field(values->ReorderMode, 26, 26) | - __gen_field(values->SOStatisticsEnable, 25, 25) | - __gen_field(values->ForceRendering, 23, 24) | - 0; - - dw[2] = - __gen_field(values->Stream3VertexReadOffset, 29, 29) | - __gen_field(values->Stream3VertexReadLength, 24, 28) | - __gen_field(values->Stream2VertexReadOffset, 21, 21) | - __gen_field(values->Stream2VertexReadLength, 16, 20) | - __gen_field(values->Stream1VertexReadOffset, 13, 13) | - __gen_field(values->Stream1VertexReadLength, 8, 12) | - __gen_field(values->Stream0VertexReadOffset, 5, 5) | - __gen_field(values->Stream0VertexReadLength, 0, 4) | - 0; - - dw[3] = - __gen_field(values->Buffer1SurfacePitch, 16, 27) | - __gen_field(values->Buffer0SurfacePitch, 0, 11) | - 0; - - dw[4] = - __gen_field(values->Buffer3SurfacePitch, 16, 27) | - __gen_field(values->Buffer2SurfacePitch, 0, 11) | - 0; - -} - -#define GEN9_3DSTATE_TE_length_bias 0x00000002 -#define GEN9_3DSTATE_TE_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 28, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_TE_length 0x00000004 - -struct GEN9_3DSTATE_TE { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; -#define INTEGER 0 -#define ODD_FRACTIONAL 1 -#define EVEN_FRACTIONAL 2 - uint32_t Partitioning; -#define POINT 0 -#define OUTPUT_LINE 1 -#define OUTPUT_TRI_CW 2 -#define OUTPUT_TRI_CCW 3 - uint32_t OutputTopology; -#define QUAD 0 -#define TRI 1 -#define ISOLINE 2 - uint32_t TEDomain; -#define HW_TESS 0 - uint32_t TEMode; - bool TEEnable; - float MaximumTessellationFactorOdd; - float MaximumTessellationFactorNotOdd; -}; - -static inline void -GEN9_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_TE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Partitioning, 12, 13) | - __gen_field(values->OutputTopology, 8, 9) | - __gen_field(values->TEDomain, 4, 5) | - __gen_field(values->TEMode, 1, 2) | - __gen_field(values->TEEnable, 0, 0) | - 0; - - dw[2] = - __gen_float(values->MaximumTessellationFactorOdd) | - 0; - - dw[3] = - __gen_float(values->MaximumTessellationFactorNotOdd) | - 0; - -} - -#define GEN9_3DSTATE_URB_CLEAR_length_bias 0x00000002 -#define GEN9_3DSTATE_URB_CLEAR_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 1, \ - ._3DCommandSubOpcode = 29, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_URB_CLEAR_length 0x00000002 - -struct GEN9_3DSTATE_URB_CLEAR { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t URBClearLength; - uint32_t URBAddress; -}; - -static inline void -GEN9_3DSTATE_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_URB_CLEAR * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBClearLength, 16, 29) | - __gen_offset(values->URBAddress, 0, 14) | - 0; - -} - -#define GEN9_3DSTATE_URB_DS_length_bias 0x00000002 -#define GEN9_3DSTATE_URB_DS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 50, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_URB_DS_length 0x00000002 - -struct GEN9_3DSTATE_URB_DS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t DSURBStartingAddress; - uint32_t DSURBEntryAllocationSize; - uint32_t DSNumberofURBEntries; -}; - -static inline void -GEN9_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_URB_DS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->DSURBStartingAddress, 25, 31) | - __gen_field(values->DSURBEntryAllocationSize, 16, 24) | - __gen_field(values->DSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN9_3DSTATE_URB_GS_length_bias 0x00000002 -#define GEN9_3DSTATE_URB_GS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 51, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_URB_GS_length 0x00000002 - -struct GEN9_3DSTATE_URB_GS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t GSURBStartingAddress; - uint32_t GSURBEntryAllocationSize; - uint32_t GSNumberofURBEntries; -}; - -static inline void -GEN9_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_URB_GS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->GSURBStartingAddress, 25, 31) | - __gen_field(values->GSURBEntryAllocationSize, 16, 24) | - __gen_field(values->GSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN9_3DSTATE_URB_HS_length_bias 0x00000002 -#define GEN9_3DSTATE_URB_HS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 49, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_URB_HS_length 0x00000002 - -struct GEN9_3DSTATE_URB_HS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t HSURBStartingAddress; - uint32_t HSURBEntryAllocationSize; - uint32_t HSNumberofURBEntries; -}; - -static inline void -GEN9_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_URB_HS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->HSURBStartingAddress, 25, 31) | - __gen_field(values->HSURBEntryAllocationSize, 16, 24) | - __gen_field(values->HSNumberofURBEntries, 0, 15) | - 0; - -} - -#define GEN9_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 -#define GEN9_3DSTATE_VERTEX_BUFFERS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 8 - -#define GEN9_3DSTATE_VERTEX_BUFFERS_length 0x00000000 - -#define GEN9_VERTEX_BUFFER_STATE_length 0x00000004 - -struct GEN9_VERTEX_BUFFER_STATE { - uint32_t VertexBufferIndex; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - uint32_t AddressModifyEnable; - bool NullVertexBuffer; - uint32_t BufferPitch; - __gen_address_type BufferStartingAddress; - uint32_t BufferSize; -}; - -static inline void -GEN9_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_VERTEX_BUFFER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint32_t dw_MemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(dw_MemoryObjectControlState, 16, 22) | - __gen_field(values->AddressModifyEnable, 14, 14) | - __gen_field(values->NullVertexBuffer, 13, 13) | - __gen_field(values->BufferPitch, 0, 11) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->BufferSize, 0, 31) | - 0; - -} - -struct GEN9_3DSTATE_VERTEX_BUFFERS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VERTEX_BUFFERS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 -#define GEN9_3DSTATE_VERTEX_ELEMENTS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 9 - -#define GEN9_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 - -#define GEN9_VERTEX_ELEMENT_STATE_length 0x00000002 - -struct GEN9_VERTEX_ELEMENT_STATE { - uint32_t VertexBufferIndex; - bool Valid; - uint32_t SourceElementFormat; - bool EdgeFlagEnable; - uint32_t SourceElementOffset; - uint32_t Component0Control; - uint32_t Component1Control; - uint32_t Component2Control; - uint32_t Component3Control; -}; - -static inline void -GEN9_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_VERTEX_ELEMENT_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->VertexBufferIndex, 26, 31) | - __gen_field(values->Valid, 25, 25) | - __gen_field(values->SourceElementFormat, 16, 24) | - __gen_field(values->EdgeFlagEnable, 15, 15) | - __gen_field(values->SourceElementOffset, 0, 11) | - 0; - - dw[1] = - __gen_field(values->Component0Control, 28, 30) | - __gen_field(values->Component1Control, 24, 26) | - __gen_field(values->Component2Control, 20, 22) | - __gen_field(values->Component3Control, 16, 18) | - 0; - -} - -struct GEN9_3DSTATE_VERTEX_ELEMENTS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - /* variable length fields follow */ -}; - -static inline void -GEN9_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VERTEX_ELEMENTS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_3DSTATE_VF_length_bias 0x00000002 -#define GEN9_3DSTATE_VF_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 12, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_VF_length 0x00000002 - -struct GEN9_3DSTATE_VF { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool SequentialDrawCutIndexEnable; - bool ComponentPackingEnable; - bool IndexedDrawCutIndexEnable; - uint32_t DwordLength; - uint32_t CutIndex; -}; - -static inline void -GEN9_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VF * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->SequentialDrawCutIndexEnable, 10, 10) | - __gen_field(values->ComponentPackingEnable, 9, 9) | - __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->CutIndex, 0, 31) | - 0; - -} - -#define GEN9_3DSTATE_VF_COMPONENT_PACKING_length_bias 0x00000002 -#define GEN9_3DSTATE_VF_COMPONENT_PACKING_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 85, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_VF_COMPONENT_PACKING_length 0x00000005 - -struct GEN9_3DSTATE_VF_COMPONENT_PACKING { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t VertexElement07Enables; - uint32_t VertexElement06Enables; - uint32_t VertexElement05Enables; - uint32_t VertexElement04Enables; - uint32_t VertexElement03Enables; - uint32_t VertexElement02Enables; - uint32_t VertexElement01Enables; - uint32_t VertexElement00Enables; - uint32_t VertexElement15Enables; - uint32_t VertexElement14Enables; - uint32_t VertexElement13Enables; - uint32_t VertexElement12Enables; - uint32_t VertexElement11Enables; - uint32_t VertexElement10Enables; - uint32_t VertexElement09Enables; - uint32_t VertexElement08Enables; - uint32_t VertexElement23Enables; - uint32_t VertexElement22Enables; - uint32_t VertexElement21Enables; - uint32_t VertexElement20Enables; - uint32_t VertexElement19Enables; - uint32_t VertexElement18Enables; - uint32_t VertexElement17Enables; - uint32_t VertexElement16Enables; - uint32_t VertexElement31Enables; - uint32_t VertexElement30Enables; - uint32_t VertexElement29Enables; - uint32_t VertexElement28Enables; - uint32_t VertexElement27Enables; - uint32_t VertexElement26Enables; - uint32_t VertexElement25Enables; - uint32_t VertexElement24Enables; -}; - -static inline void -GEN9_3DSTATE_VF_COMPONENT_PACKING_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VF_COMPONENT_PACKING * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->VertexElement07Enables, 28, 31) | - __gen_field(values->VertexElement06Enables, 24, 27) | - __gen_field(values->VertexElement05Enables, 20, 23) | - __gen_field(values->VertexElement04Enables, 16, 19) | - __gen_field(values->VertexElement03Enables, 12, 15) | - __gen_field(values->VertexElement02Enables, 8, 11) | - __gen_field(values->VertexElement01Enables, 4, 7) | - __gen_field(values->VertexElement00Enables, 0, 3) | - 0; - - dw[2] = - __gen_field(values->VertexElement15Enables, 28, 31) | - __gen_field(values->VertexElement14Enables, 24, 27) | - __gen_field(values->VertexElement13Enables, 20, 23) | - __gen_field(values->VertexElement12Enables, 16, 19) | - __gen_field(values->VertexElement11Enables, 12, 15) | - __gen_field(values->VertexElement10Enables, 8, 11) | - __gen_field(values->VertexElement09Enables, 4, 7) | - __gen_field(values->VertexElement08Enables, 0, 3) | - 0; - - dw[3] = - __gen_field(values->VertexElement23Enables, 28, 31) | - __gen_field(values->VertexElement22Enables, 24, 27) | - __gen_field(values->VertexElement21Enables, 20, 23) | - __gen_field(values->VertexElement20Enables, 16, 19) | - __gen_field(values->VertexElement19Enables, 12, 15) | - __gen_field(values->VertexElement18Enables, 8, 11) | - __gen_field(values->VertexElement17Enables, 4, 7) | - __gen_field(values->VertexElement16Enables, 0, 3) | - 0; - - dw[4] = - __gen_field(values->VertexElement31Enables, 28, 31) | - __gen_field(values->VertexElement30Enables, 24, 27) | - __gen_field(values->VertexElement29Enables, 20, 23) | - __gen_field(values->VertexElement28Enables, 16, 19) | - __gen_field(values->VertexElement27Enables, 12, 15) | - __gen_field(values->VertexElement26Enables, 8, 11) | - __gen_field(values->VertexElement25Enables, 4, 7) | - __gen_field(values->VertexElement24Enables, 0, 3) | - 0; - -} - -#define GEN9_3DSTATE_VF_INSTANCING_length_bias 0x00000002 -#define GEN9_3DSTATE_VF_INSTANCING_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 73, \ - .DwordLength = 1 - -#define GEN9_3DSTATE_VF_INSTANCING_length 0x00000003 - -struct GEN9_3DSTATE_VF_INSTANCING { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool InstancingEnable; - uint32_t VertexElementIndex; - uint32_t InstanceDataStepRate; -}; - -static inline void -GEN9_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VF_INSTANCING * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InstancingEnable, 8, 8) | - __gen_field(values->VertexElementIndex, 0, 5) | - 0; - - dw[2] = - __gen_field(values->InstanceDataStepRate, 0, 31) | - 0; - -} - -#define GEN9_3DSTATE_VF_SGVS_length_bias 0x00000002 -#define GEN9_3DSTATE_VF_SGVS_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 74, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_VF_SGVS_length 0x00000002 - -struct GEN9_3DSTATE_VF_SGVS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool InstanceIDEnable; -#define COMP_0 0 -#define COMP_1 1 -#define COMP_2 2 -#define COMP_3 3 - uint32_t InstanceIDComponentNumber; - uint32_t InstanceIDElementOffset; - bool VertexIDEnable; -#define COMP_0 0 -#define COMP_1 1 -#define COMP_2 2 -#define COMP_3 3 - uint32_t VertexIDComponentNumber; - uint32_t VertexIDElementOffset; -}; - -static inline void -GEN9_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VF_SGVS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InstanceIDEnable, 31, 31) | - __gen_field(values->InstanceIDComponentNumber, 29, 30) | - __gen_field(values->InstanceIDElementOffset, 16, 21) | - __gen_field(values->VertexIDEnable, 15, 15) | - __gen_field(values->VertexIDComponentNumber, 13, 14) | - __gen_field(values->VertexIDElementOffset, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_VF_STATISTICS_length_bias 0x00000001 -#define GEN9_3DSTATE_VF_STATISTICS_header \ - .CommandType = 3, \ - .CommandSubType = 1, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 11 - -#define GEN9_3DSTATE_VF_STATISTICS_length 0x00000001 - -struct GEN9_3DSTATE_VF_STATISTICS { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - bool StatisticsEnable; -}; - -static inline void -GEN9_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VF_STATISTICS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->StatisticsEnable, 0, 0) | - 0; - -} - -#define GEN9_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 -#define GEN9_3DSTATE_VF_TOPOLOGY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 75, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_VF_TOPOLOGY_length 0x00000002 - -struct GEN9_3DSTATE_VF_TOPOLOGY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t PrimitiveTopologyType; -}; - -static inline void -GEN9_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VF_TOPOLOGY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->PrimitiveTopologyType, 0, 5) | - 0; - -} - -#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 -#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 35, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 - -struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t CCViewportPointer; -}; - -static inline void -GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->CCViewportPointer, 5, 31) | - 0; - -} - -#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 -#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 33, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 - -struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t SFClipViewportPointer; -}; - -static inline void -GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->SFClipViewportPointer, 6, 31) | - 0; - -} - -#define GEN9_3DSTATE_WM_length_bias 0x00000002 -#define GEN9_3DSTATE_WM_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 20, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_WM_length 0x00000002 - -struct GEN9_3DSTATE_WM { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool StatisticsEnable; - bool LegacyDepthBufferClearEnable; - bool LegacyDepthBufferResolveEnable; - bool LegacyHierarchicalDepthBufferResolveEnable; - bool LegacyDiamondLineRasterization; -#define NORMAL 0 -#define PSEXEC 1 -#define PREPS 2 - uint32_t EarlyDepthStencilControl; -#define Normal 0 -#define ForceOff 1 -#define ForceON 2 - uint32_t ForceThreadDispatchEnable; -#define INTERP_PIXEL 0 -#define INTERP_CENTROID 2 -#define INTERP_SAMPLE 3 - uint32_t PositionZWInterpolationMode; - uint32_t BarycentricInterpolationMode; -#define _05pixels 0 -#define _10pixels 1 -#define _20pixels 2 -#define _40pixels 3 - uint32_t LineEndCapAntialiasingRegionWidth; -#define _05pixels 0 -#define _10pixels 1 -#define _20pixels 2 -#define _40pixels 3 - uint32_t LineAntialiasingRegionWidth; - bool PolygonStippleEnable; - bool LineStippleEnable; -#define RASTRULE_UPPER_LEFT 0 -#define RASTRULE_UPPER_RIGHT 1 - uint32_t PointRasterizationRule; -#define Normal 0 -#define ForceOff 1 -#define ForceON 2 - uint32_t ForceKillPixelEnable; -}; - -static inline void -GEN9_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_WM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StatisticsEnable, 31, 31) | - __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | - __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | - __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | - __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | - __gen_field(values->EarlyDepthStencilControl, 21, 22) | - __gen_field(values->ForceThreadDispatchEnable, 19, 20) | - __gen_field(values->PositionZWInterpolationMode, 17, 18) | - __gen_field(values->BarycentricInterpolationMode, 11, 16) | - __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | - __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | - __gen_field(values->PolygonStippleEnable, 4, 4) | - __gen_field(values->LineStippleEnable, 3, 3) | - __gen_field(values->PointRasterizationRule, 2, 2) | - __gen_field(values->ForceKillPixelEnable, 0, 1) | - 0; - -} - -#define GEN9_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 -#define GEN9_3DSTATE_WM_CHROMAKEY_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 76, \ - .DwordLength = 0 - -#define GEN9_3DSTATE_WM_CHROMAKEY_length 0x00000002 - -struct GEN9_3DSTATE_WM_CHROMAKEY { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool ChromaKeyKillEnable; -}; - -static inline void -GEN9_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_WM_CHROMAKEY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ChromaKeyKillEnable, 31, 31) | - 0; - -} - -#define GEN9_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 -#define GEN9_3DSTATE_WM_DEPTH_STENCIL_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 78, \ - .DwordLength = 2 - -#define GEN9_3DSTATE_WM_DEPTH_STENCIL_length 0x00000004 - -struct GEN9_3DSTATE_WM_DEPTH_STENCIL { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - uint32_t StencilFailOp; - uint32_t StencilPassDepthFailOp; - uint32_t StencilPassDepthPassOp; - uint32_t BackfaceStencilTestFunction; - uint32_t BackfaceStencilFailOp; - uint32_t BackfaceStencilPassDepthFailOp; - uint32_t BackfaceStencilPassDepthPassOp; - uint32_t StencilTestFunction; - uint32_t DepthTestFunction; - bool DoubleSidedStencilEnable; - bool StencilTestEnable; - bool StencilBufferWriteEnable; - bool DepthTestEnable; - bool DepthBufferWriteEnable; - uint32_t StencilTestMask; - uint32_t StencilWriteMask; - uint32_t BackfaceStencilTestMask; - uint32_t BackfaceStencilWriteMask; - uint32_t StencilReferenceValue; - uint32_t BackfaceStencilReferenceValue; -}; - -static inline void -GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_WM_DEPTH_STENCIL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StencilFailOp, 29, 31) | - __gen_field(values->StencilPassDepthFailOp, 26, 28) | - __gen_field(values->StencilPassDepthPassOp, 23, 25) | - __gen_field(values->BackfaceStencilTestFunction, 20, 22) | - __gen_field(values->BackfaceStencilFailOp, 17, 19) | - __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | - __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | - __gen_field(values->StencilTestFunction, 8, 10) | - __gen_field(values->DepthTestFunction, 5, 7) | - __gen_field(values->DoubleSidedStencilEnable, 4, 4) | - __gen_field(values->StencilTestEnable, 3, 3) | - __gen_field(values->StencilBufferWriteEnable, 2, 2) | - __gen_field(values->DepthTestEnable, 1, 1) | - __gen_field(values->DepthBufferWriteEnable, 0, 0) | - 0; - - dw[2] = - __gen_field(values->StencilTestMask, 24, 31) | - __gen_field(values->StencilWriteMask, 16, 23) | - __gen_field(values->BackfaceStencilTestMask, 8, 15) | - __gen_field(values->BackfaceStencilWriteMask, 0, 7) | - 0; - - dw[3] = - __gen_field(values->StencilReferenceValue, 8, 15) | - __gen_field(values->BackfaceStencilReferenceValue, 0, 7) | - 0; - -} - -#define GEN9_3DSTATE_WM_HZ_OP_length_bias 0x00000002 -#define GEN9_3DSTATE_WM_HZ_OP_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 0, \ - ._3DCommandSubOpcode = 82, \ - .DwordLength = 3 - -#define GEN9_3DSTATE_WM_HZ_OP_length 0x00000005 - -struct GEN9_3DSTATE_WM_HZ_OP { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool StencilBufferClearEnable; - bool DepthBufferClearEnable; - bool ScissorRectangleEnable; - bool DepthBufferResolveEnable; - bool HierarchicalDepthBufferResolveEnable; - uint32_t PixelPositionOffsetEnable; - bool FullSurfaceDepthClear; - uint32_t StencilClearValue; - uint32_t NumberofMultisamples; - uint32_t ClearRectangleYMin; - uint32_t ClearRectangleXMin; - uint32_t ClearRectangleYMax; - uint32_t ClearRectangleXMax; - uint32_t SampleMask; -}; - -static inline void -GEN9_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_3DSTATE_WM_HZ_OP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->StencilBufferClearEnable, 31, 31) | - __gen_field(values->DepthBufferClearEnable, 30, 30) | - __gen_field(values->ScissorRectangleEnable, 29, 29) | - __gen_field(values->DepthBufferResolveEnable, 28, 28) | - __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | - __gen_field(values->PixelPositionOffsetEnable, 26, 26) | - __gen_field(values->FullSurfaceDepthClear, 25, 25) | - __gen_field(values->StencilClearValue, 16, 23) | - __gen_field(values->NumberofMultisamples, 13, 15) | - 0; - - dw[2] = - __gen_field(values->ClearRectangleYMin, 16, 31) | - __gen_field(values->ClearRectangleXMin, 0, 15) | - 0; - - dw[3] = - __gen_field(values->ClearRectangleYMax, 16, 31) | - __gen_field(values->ClearRectangleXMax, 0, 15) | - 0; - - dw[4] = - __gen_field(values->SampleMask, 0, 15) | - 0; - -} - -#define GEN9_GPGPU_WALKER_length_bias 0x00000002 -#define GEN9_GPGPU_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 5, \ - .DwordLength = 13 - -#define GEN9_GPGPU_WALKER_length 0x0000000f - -struct GEN9_GPGPU_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - bool IndirectParameterEnable; - bool PredicateEnable; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; -#define SIMD8 0 -#define SIMD16 1 -#define SIMD32 2 - uint32_t SIMDSize; - uint32_t ThreadDepthCounterMaximum; - uint32_t ThreadHeightCounterMaximum; - uint32_t ThreadWidthCounterMaximum; - uint32_t ThreadGroupIDStartingX; - uint32_t ThreadGroupIDXDimension; - uint32_t ThreadGroupIDStartingY; - uint32_t ThreadGroupIDYDimension; - uint32_t ThreadGroupIDStartingResumeZ; - uint32_t ThreadGroupIDZDimension; - uint32_t RightExecutionMask; - uint32_t BottomExecutionMask; -}; - -static inline void -GEN9_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_GPGPU_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->IndirectParameterEnable, 10, 10) | - __gen_field(values->PredicateEnable, 8, 8) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->IndirectDataStartAddress, 6, 31) | - 0; - - dw[4] = - __gen_field(values->SIMDSize, 30, 31) | - __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | - __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | - __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | - 0; - - dw[5] = - __gen_field(values->ThreadGroupIDStartingX, 0, 31) | - 0; - - dw[6] = - 0; - - dw[7] = - __gen_field(values->ThreadGroupIDXDimension, 0, 31) | - 0; - - dw[8] = - __gen_field(values->ThreadGroupIDStartingY, 0, 31) | - 0; - - dw[9] = - 0; - - dw[10] = - __gen_field(values->ThreadGroupIDYDimension, 0, 31) | - 0; - - dw[11] = - __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | - 0; - - dw[12] = - __gen_field(values->ThreadGroupIDZDimension, 0, 31) | - 0; - - dw[13] = - __gen_field(values->RightExecutionMask, 0, 31) | - 0; - - dw[14] = - __gen_field(values->BottomExecutionMask, 0, 31) | - 0; - -} - -#define GEN9_MEDIA_CURBE_LOAD_length_bias 0x00000002 -#define GEN9_MEDIA_CURBE_LOAD_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 1, \ - .DwordLength = 2 - -#define GEN9_MEDIA_CURBE_LOAD_length 0x00000004 - -struct GEN9_MEDIA_CURBE_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t CURBETotalDataLength; - uint32_t CURBEDataStartAddress; -}; - -static inline void -GEN9_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_CURBE_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->CURBETotalDataLength, 0, 16) | - 0; - - dw[3] = - __gen_field(values->CURBEDataStartAddress, 0, 31) | - 0; - -} - -#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 -#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 2, \ - .DwordLength = 2 - -#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 - -struct GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorTotalLength; - uint32_t InterfaceDescriptorDataStartAddress; -}; - -static inline void -GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - 0; - - dw[2] = - __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | - 0; - - dw[3] = - __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | - 0; - -} - -#define GEN9_MEDIA_OBJECT_length_bias 0x00000002 -#define GEN9_MEDIA_OBJECT_header \ - .CommandType = 3, \ - .MediaCommandPipeline = 2, \ - .MediaCommandOpcode = 1, \ - .MediaCommandSubOpcode = 0 - -#define GEN9_MEDIA_OBJECT_length 0x00000000 - -struct GEN9_MEDIA_OBJECT { - uint32_t CommandType; - uint32_t MediaCommandPipeline; - uint32_t MediaCommandOpcode; - uint32_t MediaCommandSubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; - uint32_t SliceDestinationSelectMSBs; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; - uint32_t ForceDestination; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; -#define Slice0 0 -#define Slice1 1 -#define Slice2 2 - uint32_t SliceDestinationSelect; -#define Subslice3 3 -#define SubSlice2 2 -#define SubSlice1 1 -#define SubSlice0 0 - uint32_t SubSliceDestinationSelect; - uint32_t IndirectDataLength; - __gen_address_type IndirectDataStartAddress; - uint32_t ScoredboardY; - uint32_t ScoreboardX; - uint32_t ScoreboardColor; - bool ScoreboardMask; - /* variable length fields follow */ -}; - -static inline void -GEN9_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_OBJECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MediaCommandPipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->MediaCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->SliceDestinationSelectMSBs, 25, 26) | - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->ForceDestination, 22, 22) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->SliceDestinationSelect, 19, 20) | - __gen_field(values->SubSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); - - dw[4] = - __gen_field(values->ScoredboardY, 16, 24) | - __gen_field(values->ScoreboardX, 0, 8) | - 0; - - dw[5] = - __gen_field(values->ScoreboardColor, 16, 19) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_MEDIA_OBJECT_GRPID_length_bias 0x00000002 -#define GEN9_MEDIA_OBJECT_GRPID_header \ - .CommandType = 3, \ - .MediaCommandPipeline = 2, \ - .MediaCommandOpcode = 1, \ - .MediaCommandSubOpcode = 6 - -#define GEN9_MEDIA_OBJECT_GRPID_length 0x00000000 - -struct GEN9_MEDIA_OBJECT_GRPID { - uint32_t CommandType; - uint32_t MediaCommandPipeline; - uint32_t MediaCommandOpcode; - uint32_t MediaCommandSubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - uint32_t SliceDestinationSelectMSB; - uint32_t EndofThreadGroup; - uint32_t ForceDestination; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; -#define Slice0 0 -#define Slice1 1 -#define Slice2 2 - uint32_t SliceDestinationSelect; -#define Subslice3 3 -#define SubSlice2 2 -#define SubSlice1 1 -#define SubSlice0 0 - uint32_t SubSliceDestinationSelect; - uint32_t IndirectDataLength; - __gen_address_type IndirectDataStartAddress; - uint32_t ScoreboardY; - uint32_t ScoreboardX; - uint32_t ScoreboardColor; - bool ScoreboardMask; - uint32_t GroupID; - /* variable length fields follow */ -}; - -static inline void -GEN9_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_OBJECT_GRPID * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MediaCommandPipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->MediaCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->SliceDestinationSelectMSB, 24, 24) | - __gen_field(values->EndofThreadGroup, 23, 23) | - __gen_field(values->ForceDestination, 22, 22) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->SliceDestinationSelect, 19, 20) | - __gen_field(values->SubSliceDestinationSelect, 17, 18) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); - - dw[4] = - __gen_field(values->ScoreboardY, 16, 24) | - __gen_field(values->ScoreboardX, 0, 8) | - 0; - - dw[5] = - __gen_field(values->ScoreboardColor, 16, 19) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->GroupID, 0, 31) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_MEDIA_OBJECT_PRT_length_bias 0x00000002 -#define GEN9_MEDIA_OBJECT_PRT_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 2, \ - .DwordLength = 14 - -#define GEN9_MEDIA_OBJECT_PRT_length 0x00000010 - -struct GEN9_MEDIA_OBJECT_PRT { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; - bool ChildrenPresent; - bool PRT_FenceNeeded; -#define Rootthreadqueue 0 -#define VFEstateflush 1 - uint32_t PRT_FenceType; - uint32_t InlineData[12]; -}; - -static inline void -GEN9_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_OBJECT_PRT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ChildrenPresent, 31, 31) | - __gen_field(values->PRT_FenceNeeded, 23, 23) | - __gen_field(values->PRT_FenceType, 22, 22) | - 0; - - dw[3] = - 0; - - for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { - dw[j] = - __gen_field(values->InlineData[i + 0], 0, 31) | - 0; - } - -} - -#define GEN9_MEDIA_OBJECT_WALKER_length_bias 0x00000002 -#define GEN9_MEDIA_OBJECT_WALKER_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 1, \ - .SubOpcode = 3 - -#define GEN9_MEDIA_OBJECT_WALKER_length 0x00000000 - -struct GEN9_MEDIA_OBJECT_WALKER { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t InterfaceDescriptorOffset; -#define Nothreadsynchronization 0 -#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 - uint32_t ThreadSynchronization; - uint32_t MaskedDispatch; -#define Notusingscoreboard 0 -#define Usingscoreboard 1 - uint32_t UseScoreboard; - uint32_t IndirectDataLength; - uint32_t IndirectDataStartAddress; - uint32_t GroupIDLoopSelect; - bool ScoreboardMask; - uint32_t ColorCountMinusOne; - uint32_t MiddleLoopExtraSteps; - uint32_t LocalMidLoopUnitY; - uint32_t MidLoopUnitX; - uint32_t GlobalLoopExecCount; - uint32_t LocalLoopExecCount; - uint32_t BlockResolutionY; - uint32_t BlockResolutionX; - uint32_t LocalStartY; - uint32_t LocalStartX; - uint32_t LocalOuterLoopStrideY; - uint32_t LocalOuterLoopStrideX; - uint32_t LocalInnerLoopUnitY; - uint32_t LocalInnerLoopUnitX; - uint32_t GlobalResolutionY; - uint32_t GlobalResolutionX; - uint32_t GlobalStartY; - uint32_t GlobalStartX; - uint32_t GlobalOuterLoopStrideY; - uint32_t GlobalOuterLoopStrideX; - uint32_t GlobalInnerLoopUnitY; - uint32_t GlobalInnerLoopUnitX; - /* variable length fields follow */ -}; - -static inline void -GEN9_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_OBJECT_WALKER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - - dw[2] = - __gen_field(values->ThreadSynchronization, 24, 24) | - __gen_field(values->MaskedDispatch, 22, 23) | - __gen_field(values->UseScoreboard, 21, 21) | - __gen_field(values->IndirectDataLength, 0, 16) | - 0; - - dw[3] = - __gen_field(values->IndirectDataStartAddress, 0, 31) | - 0; - - dw[4] = - 0; - - dw[5] = - __gen_field(values->GroupIDLoopSelect, 8, 31) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[6] = - __gen_field(values->ColorCountMinusOne, 24, 27) | - __gen_field(values->MiddleLoopExtraSteps, 16, 20) | - __gen_field(values->LocalMidLoopUnitY, 12, 13) | - __gen_field(values->MidLoopUnitX, 8, 9) | - 0; - - dw[7] = - __gen_field(values->GlobalLoopExecCount, 16, 27) | - __gen_field(values->LocalLoopExecCount, 0, 11) | - 0; - - dw[8] = - __gen_field(values->BlockResolutionY, 16, 26) | - __gen_field(values->BlockResolutionX, 0, 10) | - 0; - - dw[9] = - __gen_field(values->LocalStartY, 16, 26) | - __gen_field(values->LocalStartX, 0, 10) | - 0; - - dw[10] = - 0; - - dw[11] = - __gen_field(values->LocalOuterLoopStrideY, 16, 27) | - __gen_field(values->LocalOuterLoopStrideX, 0, 11) | - 0; - - dw[12] = - __gen_field(values->LocalInnerLoopUnitY, 16, 27) | - __gen_field(values->LocalInnerLoopUnitX, 0, 11) | - 0; - - dw[13] = - __gen_field(values->GlobalResolutionY, 16, 26) | - __gen_field(values->GlobalResolutionX, 0, 10) | - 0; - - dw[14] = - __gen_field(values->GlobalStartY, 16, 27) | - __gen_field(values->GlobalStartX, 0, 11) | - 0; - - dw[15] = - __gen_field(values->GlobalOuterLoopStrideY, 16, 27) | - __gen_field(values->GlobalOuterLoopStrideX, 0, 11) | - 0; - - dw[16] = - __gen_field(values->GlobalInnerLoopUnitY, 16, 27) | - __gen_field(values->GlobalInnerLoopUnitX, 0, 11) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_MEDIA_STATE_FLUSH_length_bias 0x00000002 -#define GEN9_MEDIA_STATE_FLUSH_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 4, \ - .DwordLength = 0 - -#define GEN9_MEDIA_STATE_FLUSH_length 0x00000002 - -struct GEN9_MEDIA_STATE_FLUSH { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - bool FlushtoGO; - uint32_t WatermarkRequired; - uint32_t InterfaceDescriptorOffset; -}; - -static inline void -GEN9_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_STATE_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_field(values->FlushtoGO, 7, 7) | - __gen_field(values->WatermarkRequired, 6, 6) | - __gen_field(values->InterfaceDescriptorOffset, 0, 5) | - 0; - -} - -#define GEN9_MEDIA_VFE_STATE_length_bias 0x00000002 -#define GEN9_MEDIA_VFE_STATE_header \ - .CommandType = 3, \ - .Pipeline = 2, \ - .MediaCommandOpcode = 0, \ - .SubOpcode = 0, \ - .DwordLength = 7 - -#define GEN9_MEDIA_VFE_STATE_length 0x00000009 - -struct GEN9_MEDIA_VFE_STATE { - uint32_t CommandType; - uint32_t Pipeline; - uint32_t MediaCommandOpcode; - uint32_t SubOpcode; - uint32_t DwordLength; - uint32_t ScratchSpaceBasePointer; - uint32_t StackSize; - uint32_t PerThreadScratchSpace; - uint32_t ScratchSpaceBasePointerHigh; - uint32_t MaximumNumberofThreads; - uint32_t NumberofURBEntries; -#define Maintainingtheexistingtimestampstate 0 -#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 - uint32_t ResetGatewayTimer; - uint32_t SliceDisable; - uint32_t URBEntryAllocationSize; - uint32_t CURBEAllocationSize; -#define Scoreboarddisabled 0 -#define Scoreboardenabled 1 - uint32_t ScoreboardEnable; -#define StallingScoreboard 0 -#define NonStallingScoreboard 1 - uint32_t ScoreboardType; - uint32_t ScoreboardMask; - uint32_t Scoreboard3DeltaY; - uint32_t Scoreboard3DeltaX; - uint32_t Scoreboard2DeltaY; - uint32_t Scoreboard2DeltaX; - uint32_t Scoreboard1DeltaY; - uint32_t Scoreboard1DeltaX; - uint32_t Scoreboard0DeltaY; - uint32_t Scoreboard0DeltaX; - uint32_t Scoreboard7DeltaY; - uint32_t Scoreboard7DeltaX; - uint32_t Scoreboard6DeltaY; - uint32_t Scoreboard6DeltaX; - uint32_t Scoreboard5DeltaY; - uint32_t Scoreboard5DeltaX; - uint32_t Scoreboard4DeltaY; - uint32_t Scoreboard4DeltaX; -}; - -static inline void -GEN9_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MEDIA_VFE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->Pipeline, 27, 28) | - __gen_field(values->MediaCommandOpcode, 24, 26) | - __gen_field(values->SubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 15) | - 0; - - dw[1] = - __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | - __gen_field(values->StackSize, 4, 7) | - __gen_field(values->PerThreadScratchSpace, 0, 3) | - 0; - - dw[2] = - __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | - 0; - - dw[3] = - __gen_field(values->MaximumNumberofThreads, 16, 31) | - __gen_field(values->NumberofURBEntries, 8, 15) | - __gen_field(values->ResetGatewayTimer, 7, 7) | - 0; - - dw[4] = - __gen_field(values->SliceDisable, 0, 1) | - 0; - - dw[5] = - __gen_field(values->URBEntryAllocationSize, 16, 31) | - __gen_field(values->CURBEAllocationSize, 0, 15) | - 0; - - dw[6] = - __gen_field(values->ScoreboardEnable, 31, 31) | - __gen_field(values->ScoreboardType, 30, 30) | - __gen_field(values->ScoreboardMask, 0, 7) | - 0; - - dw[7] = - __gen_field(values->Scoreboard3DeltaY, 28, 31) | - __gen_field(values->Scoreboard3DeltaX, 24, 27) | - __gen_field(values->Scoreboard2DeltaY, 20, 23) | - __gen_field(values->Scoreboard2DeltaX, 16, 19) | - __gen_field(values->Scoreboard1DeltaY, 12, 15) | - __gen_field(values->Scoreboard1DeltaX, 8, 11) | - __gen_field(values->Scoreboard0DeltaY, 4, 7) | - __gen_field(values->Scoreboard0DeltaX, 0, 3) | - 0; - - dw[8] = - __gen_field(values->Scoreboard7DeltaY, 28, 31) | - __gen_field(values->Scoreboard7DeltaX, 24, 27) | - __gen_field(values->Scoreboard6DeltaY, 20, 23) | - __gen_field(values->Scoreboard6DeltaX, 16, 19) | - __gen_field(values->Scoreboard5DeltaY, 12, 15) | - __gen_field(values->Scoreboard5DeltaX, 8, 11) | - __gen_field(values->Scoreboard4DeltaY, 4, 7) | - __gen_field(values->Scoreboard4DeltaX, 0, 3) | - 0; - -} - -#define GEN9_MI_ARB_CHECK_length_bias 0x00000001 -#define GEN9_MI_ARB_CHECK_header \ - .CommandType = 0, \ - .MICommandOpcode = 5 - -#define GEN9_MI_ARB_CHECK_length 0x00000001 - -struct GEN9_MI_ARB_CHECK { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN9_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_ARB_CHECK * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN9_MI_BATCH_BUFFER_END_length_bias 0x00000001 -#define GEN9_MI_BATCH_BUFFER_END_header \ - .CommandType = 0, \ - .MICommandOpcode = 10 - -#define GEN9_MI_BATCH_BUFFER_END_length 0x00000001 - -struct GEN9_MI_BATCH_BUFFER_END { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN9_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_BATCH_BUFFER_END * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN9_MI_CLFLUSH_length_bias 0x00000002 -#define GEN9_MI_CLFLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 39 - -#define GEN9_MI_CLFLUSH_length 0x00000000 - -struct GEN9_MI_CLFLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTT; - uint32_t DwordLength; - __gen_address_type PageBaseAddress; - uint32_t StartingCachelineOffset; - /* variable length fields follow */ -}; - -static inline void -GEN9_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_CLFLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - __gen_field(values->StartingCachelineOffset, 6, 11) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - /* variable length fields follow */ -} - -#define GEN9_MI_COPY_MEM_MEM_length_bias 0x00000002 -#define GEN9_MI_COPY_MEM_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 46, \ - .DwordLength = 3 - -#define GEN9_MI_COPY_MEM_MEM_length 0x00000005 - -struct GEN9_MI_COPY_MEM_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTTSource; -#define PerProcessGraphicsAddress 0 -#define GlobalGraphicsAddress 1 - uint32_t UseGlobalGTTDestination; - uint32_t DwordLength; - __gen_address_type DestinationMemoryAddress; - __gen_address_type SourceMemoryAddress; -}; - -static inline void -GEN9_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_COPY_MEM_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTTSource, 22, 22) | - __gen_field(values->UseGlobalGTTDestination, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - uint32_t dw3 = - 0; - - uint64_t qw3 = - __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); - - dw[3] = qw3; - dw[4] = qw3 >> 32; - -} - -#define GEN9_MI_DISPLAY_FLIP_length_bias 0x00000002 -#define GEN9_MI_DISPLAY_FLIP_header \ - .CommandType = 0, \ - .MICommandOpcode = 20 - -#define GEN9_MI_DISPLAY_FLIP_length 0x00000003 - -struct GEN9_MI_DISPLAY_FLIP { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool AsyncFlipIndicator; -#define DisplayPlane1 0 -#define DisplayPlane2 1 -#define DisplayPlane3 2 -#define DisplayPlane4 4 -#define DisplayPlane5 5 -#define DisplayPlane6 6 -#define DisplayPlane7 7 -#define DisplayPlane8 8 -#define DisplayPlane9 9 -#define DisplayPlane10 10 -#define DisplayPlane11 11 -#define DisplayPlane12 12 - uint32_t DisplayPlaneSelect; - uint32_t DwordLength; - bool Stereoscopic3DMode; - uint32_t DisplayBufferPitch; -#define Linear 0 -#define TiledX 1 -#define TiledYLegacyYB 4 -#define TiledYF 5 - bool TileParameter; - __gen_address_type DisplayBufferBaseAddress; -#define SyncFlip 0 -#define AsyncFlip 1 -#define Stereo3DFlip 2 - uint32_t FlipType; - __gen_address_type LeftEyeDisplayBufferBaseAddress; -}; - -static inline void -GEN9_MI_DISPLAY_FLIP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_DISPLAY_FLIP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->AsyncFlipIndicator, 22, 22) | - __gen_field(values->DisplayPlaneSelect, 8, 12) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Stereoscopic3DMode, 31, 31) | - __gen_field(values->DisplayBufferPitch, 6, 15) | - __gen_field(values->TileParameter, 0, 2) | - 0; - - uint32_t dw2 = - __gen_field(values->FlipType, 0, 1) | - 0; - - dw[2] = - __gen_combine_address(data, &dw[2], values->DisplayBufferBaseAddress, dw2); - - uint32_t dw3 = - 0; - - dw[3] = - __gen_combine_address(data, &dw[3], values->LeftEyeDisplayBufferBaseAddress, dw3); - -} - -#define GEN9_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 -#define GEN9_MI_LOAD_REGISTER_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 41, \ - .DwordLength = 2 - -#define GEN9_MI_LOAD_REGISTER_MEM_length 0x00000004 - -struct GEN9_MI_LOAD_REGISTER_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool UseGlobalGTT; - uint32_t AsyncModeEnable; - uint32_t DwordLength; - uint32_t RegisterAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN9_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_LOAD_REGISTER_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UseGlobalGTT, 22, 22) | - __gen_field(values->AsyncModeEnable, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_offset(values->RegisterAddress, 2, 22) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN9_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 -#define GEN9_MI_LOAD_SCAN_LINES_EXCL_header \ - .CommandType = 0, \ - .MICommandOpcode = 19, \ - .DwordLength = 0 - -#define GEN9_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 - -struct GEN9_MI_LOAD_SCAN_LINES_EXCL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define DisplayPlaneA 0 -#define DisplayPlaneB 1 -#define DisplayPlaneC 4 - uint32_t DisplayPlaneSelect; - uint32_t DwordLength; - uint32_t StartScanLineNumber; - uint32_t EndScanLineNumber; -}; - -static inline void -GEN9_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_LOAD_SCAN_LINES_EXCL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlaneSelect, 19, 21) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->StartScanLineNumber, 16, 28) | - __gen_field(values->EndScanLineNumber, 0, 12) | - 0; - -} - -#define GEN9_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 -#define GEN9_MI_LOAD_SCAN_LINES_INCL_header \ - .CommandType = 0, \ - .MICommandOpcode = 18, \ - .DwordLength = 0 - -#define GEN9_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 - -struct GEN9_MI_LOAD_SCAN_LINES_INCL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define DisplayPlane1A 0 -#define DisplayPlane1B 1 -#define DisplayPlane1C 4 - uint32_t DisplayPlaneSelect; -#define NeverForward 0 -#define AlwaysForward 1 -#define ConditionallyForward 2 - bool ScanLineEventDoneForward; - uint32_t DwordLength; - uint32_t StartScanLineNumber; - uint32_t EndScanLineNumber; -}; - -static inline void -GEN9_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_LOAD_SCAN_LINES_INCL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlaneSelect, 19, 21) | - __gen_field(values->ScanLineEventDoneForward, 17, 18) | - __gen_field(values->DwordLength, 0, 5) | - 0; - - dw[1] = - __gen_field(values->StartScanLineNumber, 16, 28) | - __gen_field(values->EndScanLineNumber, 0, 12) | - 0; - -} - -#define GEN9_MI_LOAD_URB_MEM_length_bias 0x00000002 -#define GEN9_MI_LOAD_URB_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 44, \ - .DwordLength = 2 - -#define GEN9_MI_LOAD_URB_MEM_length 0x00000004 - -struct GEN9_MI_LOAD_URB_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN9_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_LOAD_URB_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBAddress, 2, 14) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN9_MI_MATH_length_bias 0x00000002 -#define GEN9_MI_MATH_header \ - .CommandType = 0, \ - .MICommandOpcode = 26 - -#define GEN9_MI_MATH_length 0x00000000 - -struct GEN9_MI_MATH { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t ALUINSTRUCTION1; - uint32_t ALUINSTRUCTION2; - /* variable length fields follow */ -}; - -static inline void -GEN9_MI_MATH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_MATH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->ALUINSTRUCTION1, 0, 31) | - 0; - - dw[2] = - __gen_field(values->ALUINSTRUCTION2, 0, 31) | - 0; - - /* variable length fields follow */ -} - -#define GEN9_MI_NOOP_length_bias 0x00000001 -#define GEN9_MI_NOOP_header \ - .CommandType = 0, \ - .MICommandOpcode = 0 - -#define GEN9_MI_NOOP_length 0x00000001 - -struct GEN9_MI_NOOP { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool IdentificationNumberRegisterWriteEnable; - uint32_t IdentificationNumber; -}; - -static inline void -GEN9_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_NOOP * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | - __gen_field(values->IdentificationNumber, 0, 21) | - 0; - -} - -#define GEN9_MI_PREDICATE_length_bias 0x00000001 -#define GEN9_MI_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 12 - -#define GEN9_MI_PREDICATE_length 0x00000001 - -struct GEN9_MI_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define LOAD_KEEP 0 -#define LOAD_LOAD 2 -#define LOAD_LOADINV 3 - uint32_t LoadOperation; -#define COMBINE_SET 0 -#define COMBINE_AND 1 -#define COMBINE_OR 2 -#define COMBINE_XOR 3 - uint32_t CombineOperation; -#define COMPARE_SRCS_EQUAL 2 -#define COMPARE_DELTAS_EQUAL 3 - uint32_t CompareOperation; -}; - -static inline void -GEN9_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->LoadOperation, 6, 7) | - __gen_field(values->CombineOperation, 3, 4) | - __gen_field(values->CompareOperation, 0, 1) | - 0; - -} - -#define GEN9_MI_REPORT_HEAD_length_bias 0x00000001 -#define GEN9_MI_REPORT_HEAD_header \ - .CommandType = 0, \ - .MICommandOpcode = 7 - -#define GEN9_MI_REPORT_HEAD_length 0x00000001 - -struct GEN9_MI_REPORT_HEAD { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN9_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_REPORT_HEAD * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN9_MI_RS_CONTEXT_length_bias 0x00000001 -#define GEN9_MI_RS_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 15 - -#define GEN9_MI_RS_CONTEXT_length 0x00000001 - -struct GEN9_MI_RS_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RS_RESTORE 0 -#define RS_SAVE 1 - uint32_t ResourceStreamerSave; -}; - -static inline void -GEN9_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_RS_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ResourceStreamerSave, 0, 0) | - 0; - -} - -#define GEN9_MI_RS_CONTROL_length_bias 0x00000001 -#define GEN9_MI_RS_CONTROL_header \ - .CommandType = 0, \ - .MICommandOpcode = 6 - -#define GEN9_MI_RS_CONTROL_length 0x00000001 - -struct GEN9_MI_RS_CONTROL { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define RS_STOP 0 -#define RS_START 1 - uint32_t ResourceStreamerControl; -}; - -static inline void -GEN9_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_RS_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->ResourceStreamerControl, 0, 0) | - 0; - -} - -#define GEN9_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 -#define GEN9_MI_RS_STORE_DATA_IMM_header \ - .CommandType = 0, \ - .MICommandOpcode = 43, \ - .DwordLength = 2 - -#define GEN9_MI_RS_STORE_DATA_IMM_length 0x00000004 - -struct GEN9_MI_RS_STORE_DATA_IMM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type DestinationAddress; - uint32_t CoreModeEnable; - uint32_t DataDWord0; -}; - -static inline void -GEN9_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_RS_STORE_DATA_IMM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->CoreModeEnable, 0, 0) | - 0; - - uint64_t qw1 = - __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); - - dw[1] = qw1; - dw[2] = qw1 >> 32; - - dw[3] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - -} - -#define GEN9_MI_SET_CONTEXT_length_bias 0x00000002 -#define GEN9_MI_SET_CONTEXT_header \ - .CommandType = 0, \ - .MICommandOpcode = 24, \ - .DwordLength = 0 - -#define GEN9_MI_SET_CONTEXT_length 0x00000002 - -struct GEN9_MI_SET_CONTEXT { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type LogicalContextAddress; - uint32_t ReservedMustbe1; - bool CoreModeEnable; - bool ResourceStreamerStateSaveEnable; - bool ResourceStreamerStateRestoreEnable; - uint32_t ForceRestore; - uint32_t RestoreInhibit; -}; - -static inline void -GEN9_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_SET_CONTEXT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - uint32_t dw1 = - __gen_field(values->ReservedMustbe1, 8, 8) | - __gen_field(values->CoreModeEnable, 4, 4) | - __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | - __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | - __gen_field(values->ForceRestore, 1, 1) | - __gen_field(values->RestoreInhibit, 0, 0) | - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); - -} - -#define GEN9_MI_SET_PREDICATE_length_bias 0x00000001 -#define GEN9_MI_SET_PREDICATE_header \ - .CommandType = 0, \ - .MICommandOpcode = 1 - -#define GEN9_MI_SET_PREDICATE_length 0x00000001 - -struct GEN9_MI_SET_PREDICATE { - uint32_t CommandType; - uint32_t MICommandOpcode; -#define NOOPNever 0 -#define NOOPonResult2clear 1 -#define NOOPonResult2set 2 -#define NOOPonResultclear 3 -#define NOOPonResultset 4 -#define Executewhenonesliceenabled 5 -#define Executewhentwoslicesareenabled 6 -#define Executewhenthreeslicesareenabled 7 -#define NOOPAlways 15 - uint32_t PREDICATEENABLE; -}; - -static inline void -GEN9_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_SET_PREDICATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->PREDICATEENABLE, 0, 3) | - 0; - -} - -#define GEN9_MI_STORE_DATA_INDEX_length_bias 0x00000002 -#define GEN9_MI_STORE_DATA_INDEX_header \ - .CommandType = 0, \ - .MICommandOpcode = 33, \ - .DwordLength = 1 - -#define GEN9_MI_STORE_DATA_INDEX_length 0x00000003 - -struct GEN9_MI_STORE_DATA_INDEX { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t UsePerProcessHardwareStatusPage; - uint32_t DwordLength; - uint32_t Offset; - uint32_t DataDWord0; - uint32_t DataDWord1; -}; - -static inline void -GEN9_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_STORE_DATA_INDEX * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Offset, 2, 11) | - 0; - - dw[2] = - __gen_field(values->DataDWord0, 0, 31) | - 0; - - dw[3] = - __gen_field(values->DataDWord1, 0, 31) | - 0; - -} - -#define GEN9_MI_STORE_URB_MEM_length_bias 0x00000002 -#define GEN9_MI_STORE_URB_MEM_header \ - .CommandType = 0, \ - .MICommandOpcode = 45, \ - .DwordLength = 2 - -#define GEN9_MI_STORE_URB_MEM_length 0x00000004 - -struct GEN9_MI_STORE_URB_MEM { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - uint32_t URBAddress; - __gen_address_type MemoryAddress; -}; - -static inline void -GEN9_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_STORE_URB_MEM * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->URBAddress, 2, 14) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - -} - -#define GEN9_MI_SUSPEND_FLUSH_length_bias 0x00000001 -#define GEN9_MI_SUSPEND_FLUSH_header \ - .CommandType = 0, \ - .MICommandOpcode = 11 - -#define GEN9_MI_SUSPEND_FLUSH_length 0x00000001 - -struct GEN9_MI_SUSPEND_FLUSH { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool SuspendFlush; -}; - -static inline void -GEN9_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_SUSPEND_FLUSH * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->SuspendFlush, 0, 0) | - 0; - -} - -#define GEN9_MI_TOPOLOGY_FILTER_length_bias 0x00000001 -#define GEN9_MI_TOPOLOGY_FILTER_header \ - .CommandType = 0, \ - .MICommandOpcode = 13 - -#define GEN9_MI_TOPOLOGY_FILTER_length 0x00000001 - -struct GEN9_MI_TOPOLOGY_FILTER { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t TopologyFilterValue; -}; - -static inline void -GEN9_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_TOPOLOGY_FILTER * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->TopologyFilterValue, 0, 5) | - 0; - -} - -#define GEN9_MI_UPDATE_GTT_length_bias 0x00000002 -#define GEN9_MI_UPDATE_GTT_header \ - .CommandType = 0, \ - .MICommandOpcode = 35 - -#define GEN9_MI_UPDATE_GTT_length 0x00000000 - -struct GEN9_MI_UPDATE_GTT { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t DwordLength; - __gen_address_type EntryAddress; - /* variable length fields follow */ -}; - -static inline void -GEN9_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_UPDATE_GTT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DwordLength, 0, 9) | - 0; - - uint32_t dw1 = - 0; - - dw[1] = - __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); - - /* variable length fields follow */ -} - -#define GEN9_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 -#define GEN9_MI_URB_ATOMIC_ALLOC_header \ - .CommandType = 0, \ - .MICommandOpcode = 9 - -#define GEN9_MI_URB_ATOMIC_ALLOC_length 0x00000001 - -struct GEN9_MI_URB_ATOMIC_ALLOC { - uint32_t CommandType; - uint32_t MICommandOpcode; - uint32_t URBAtomicStorageOffset; - uint32_t URBAtomicStorageSize; -}; - -static inline void -GEN9_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_URB_ATOMIC_ALLOC * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->URBAtomicStorageOffset, 12, 19) | - __gen_field(values->URBAtomicStorageSize, 0, 8) | - 0; - -} - -#define GEN9_MI_USER_INTERRUPT_length_bias 0x00000001 -#define GEN9_MI_USER_INTERRUPT_header \ - .CommandType = 0, \ - .MICommandOpcode = 2 - -#define GEN9_MI_USER_INTERRUPT_length 0x00000001 - -struct GEN9_MI_USER_INTERRUPT { - uint32_t CommandType; - uint32_t MICommandOpcode; -}; - -static inline void -GEN9_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_USER_INTERRUPT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - 0; - -} - -#define GEN9_MI_WAIT_FOR_EVENT_length_bias 0x00000001 -#define GEN9_MI_WAIT_FOR_EVENT_header \ - .CommandType = 0, \ - .MICommandOpcode = 3 - -#define GEN9_MI_WAIT_FOR_EVENT_length 0x00000001 - -struct GEN9_MI_WAIT_FOR_EVENT { - uint32_t CommandType; - uint32_t MICommandOpcode; - bool DisplayPlane1CVerticalBlankWaitEnable; - bool DisplayPlane6FlipPendingWaitEnable; - bool DisplayPlane12FlipPendingWaitEnable; - bool DisplayPlane11FlipPendingWaitEnable; - bool DisplayPlane10FlipPendingWaitEnable; - bool DisplayPlane9FlipPendingWaitEnable; - bool DisplayPlane3FlipPendingWaitEnable; - bool DisplayPlane1CScanLineWaitEnable; - bool DisplayPlane1BVerticalBlankWaitEnable; - bool DisplayPlane5FlipPendingWaitEnable; - bool DisplayPlane2FlipPendingWaitEnable; - bool DisplayPlane1BScanLineWaitEnable; - bool DisplayPlane8FlipPendingWaitEnable; - bool DisplayPlane7FlipPendingWaitEnable; - bool DisplayPlane1AVerticalBlankWaitEnable; - bool DisplayPlane4FlipPendingWaitEnable; - bool DisplayPlane1FlipPendingWaitEnable; - bool DisplayPlnae1AScanLineWaitEnable; -}; - -static inline void -GEN9_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_MI_WAIT_FOR_EVENT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->MICommandOpcode, 23, 28) | - __gen_field(values->DisplayPlane1CVerticalBlankWaitEnable, 21, 21) | - __gen_field(values->DisplayPlane6FlipPendingWaitEnable, 20, 20) | - __gen_field(values->DisplayPlane12FlipPendingWaitEnable, 19, 19) | - __gen_field(values->DisplayPlane11FlipPendingWaitEnable, 18, 18) | - __gen_field(values->DisplayPlane10FlipPendingWaitEnable, 17, 17) | - __gen_field(values->DisplayPlane9FlipPendingWaitEnable, 16, 16) | - __gen_field(values->DisplayPlane3FlipPendingWaitEnable, 15, 15) | - __gen_field(values->DisplayPlane1CScanLineWaitEnable, 14, 14) | - __gen_field(values->DisplayPlane1BVerticalBlankWaitEnable, 11, 11) | - __gen_field(values->DisplayPlane5FlipPendingWaitEnable, 10, 10) | - __gen_field(values->DisplayPlane2FlipPendingWaitEnable, 9, 9) | - __gen_field(values->DisplayPlane1BScanLineWaitEnable, 8, 8) | - __gen_field(values->DisplayPlane8FlipPendingWaitEnable, 7, 7) | - __gen_field(values->DisplayPlane7FlipPendingWaitEnable, 6, 6) | - __gen_field(values->DisplayPlane1AVerticalBlankWaitEnable, 3, 3) | - __gen_field(values->DisplayPlane4FlipPendingWaitEnable, 2, 2) | - __gen_field(values->DisplayPlane1FlipPendingWaitEnable, 1, 1) | - __gen_field(values->DisplayPlnae1AScanLineWaitEnable, 0, 0) | - 0; - -} - -#define GEN9_PIPE_CONTROL_length_bias 0x00000002 -#define GEN9_PIPE_CONTROL_header \ - .CommandType = 3, \ - .CommandSubType = 3, \ - ._3DCommandOpcode = 2, \ - ._3DCommandSubOpcode = 0, \ - .DwordLength = 4 - -#define GEN9_PIPE_CONTROL_length 0x00000006 - -struct GEN9_PIPE_CONTROL { - uint32_t CommandType; - uint32_t CommandSubType; - uint32_t _3DCommandOpcode; - uint32_t _3DCommandSubOpcode; - uint32_t DwordLength; - bool FlushLLC; -#define DAT_PPGTT 0 -#define DAT_GGTT 1 - uint32_t DestinationAddressType; -#define NoLRIOperation 0 -#define MMIOWriteImmediateData 1 - uint32_t LRIPostSyncOperation; - uint32_t StoreDataIndex; - uint32_t CommandStreamerStallEnable; -#define DontReset 0 -#define Reset 1 - uint32_t GlobalSnapshotCountReset; - uint32_t TLBInvalidate; - bool GenericMediaStateClear; -#define NoWrite 0 -#define WriteImmediateData 1 -#define WritePSDepthCount 2 -#define WriteTimestamp 3 - uint32_t PostSyncOperation; - bool DepthStallEnable; -#define DisableFlush 0 -#define EnableFlush 1 - bool RenderTargetCacheFlushEnable; - bool InstructionCacheInvalidateEnable; - bool TextureCacheInvalidationEnable; - bool IndirectStatePointersDisable; - bool NotifyEnable; - bool PipeControlFlushEnable; - bool DCFlushEnable; - bool VFCacheInvalidationEnable; - bool ConstantCacheInvalidationEnable; - bool StateCacheInvalidationEnable; - bool StallAtPixelScoreboard; -#define FlushDisabled 0 -#define FlushEnabled 1 - bool DepthCacheFlushEnable; - __gen_address_type Address; - uint64_t ImmediateData; -}; - -static inline void -GEN9_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_PIPE_CONTROL * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->CommandType, 29, 31) | - __gen_field(values->CommandSubType, 27, 28) | - __gen_field(values->_3DCommandOpcode, 24, 26) | - __gen_field(values->_3DCommandSubOpcode, 16, 23) | - __gen_field(values->DwordLength, 0, 7) | - 0; - - dw[1] = - __gen_field(values->FlushLLC, 26, 26) | - __gen_field(values->DestinationAddressType, 24, 24) | - __gen_field(values->LRIPostSyncOperation, 23, 23) | - __gen_field(values->StoreDataIndex, 21, 21) | - __gen_field(values->CommandStreamerStallEnable, 20, 20) | - __gen_field(values->GlobalSnapshotCountReset, 19, 19) | - __gen_field(values->TLBInvalidate, 18, 18) | - __gen_field(values->GenericMediaStateClear, 16, 16) | - __gen_field(values->PostSyncOperation, 14, 15) | - __gen_field(values->DepthStallEnable, 13, 13) | - __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | - __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | - __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | - __gen_field(values->IndirectStatePointersDisable, 9, 9) | - __gen_field(values->NotifyEnable, 8, 8) | - __gen_field(values->PipeControlFlushEnable, 7, 7) | - __gen_field(values->DCFlushEnable, 5, 5) | - __gen_field(values->VFCacheInvalidationEnable, 4, 4) | - __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | - __gen_field(values->StateCacheInvalidationEnable, 2, 2) | - __gen_field(values->StallAtPixelScoreboard, 1, 1) | - __gen_field(values->DepthCacheFlushEnable, 0, 0) | - 0; - - uint32_t dw2 = - 0; - - uint64_t qw2 = - __gen_combine_address(data, &dw[2], values->Address, dw2); - - dw[2] = qw2; - dw[3] = qw2 >> 32; - - uint64_t qw4 = - __gen_field(values->ImmediateData, 0, 63) | - 0; - - dw[4] = qw4; - dw[5] = qw4 >> 32; - -} - -#define GEN9_SCISSOR_RECT_length 0x00000002 - -struct GEN9_SCISSOR_RECT { - uint32_t ScissorRectangleYMin; - uint32_t ScissorRectangleXMin; - uint32_t ScissorRectangleYMax; - uint32_t ScissorRectangleXMax; -}; - -static inline void -GEN9_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SCISSOR_RECT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ScissorRectangleYMin, 16, 31) | - __gen_field(values->ScissorRectangleXMin, 0, 15) | - 0; - - dw[1] = - __gen_field(values->ScissorRectangleYMax, 16, 31) | - __gen_field(values->ScissorRectangleXMax, 0, 15) | - 0; - -} - -#define GEN9_SF_CLIP_VIEWPORT_length 0x00000010 - -struct GEN9_SF_CLIP_VIEWPORT { - float ViewportMatrixElementm00; - float ViewportMatrixElementm11; - float ViewportMatrixElementm22; - float ViewportMatrixElementm30; - float ViewportMatrixElementm31; - float ViewportMatrixElementm32; - float XMinClipGuardband; - float XMaxClipGuardband; - float YMinClipGuardband; - float YMaxClipGuardband; - float XMinViewPort; - float XMaxViewPort; - float YMinViewPort; - float YMaxViewPort; -}; - -static inline void -GEN9_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SF_CLIP_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->ViewportMatrixElementm00) | - 0; - - dw[1] = - __gen_float(values->ViewportMatrixElementm11) | - 0; - - dw[2] = - __gen_float(values->ViewportMatrixElementm22) | - 0; - - dw[3] = - __gen_float(values->ViewportMatrixElementm30) | - 0; - - dw[4] = - __gen_float(values->ViewportMatrixElementm31) | - 0; - - dw[5] = - __gen_float(values->ViewportMatrixElementm32) | - 0; - - dw[6] = - 0; - - dw[7] = - 0; - - dw[8] = - __gen_float(values->XMinClipGuardband) | - 0; - - dw[9] = - __gen_float(values->XMaxClipGuardband) | - 0; - - dw[10] = - __gen_float(values->YMinClipGuardband) | - 0; - - dw[11] = - __gen_float(values->YMaxClipGuardband) | - 0; - - dw[12] = - __gen_float(values->XMinViewPort) | - 0; - - dw[13] = - __gen_float(values->XMaxViewPort) | - 0; - - dw[14] = - __gen_float(values->YMinViewPort) | - 0; - - dw[15] = - __gen_float(values->YMaxViewPort) | - 0; - -} - -#define GEN9_BLEND_STATE_length 0x00000011 - -#define GEN9_BLEND_STATE_ENTRY_length 0x00000002 - -struct GEN9_BLEND_STATE_ENTRY { - bool LogicOpEnable; - uint32_t LogicOpFunction; - uint32_t PreBlendSourceOnlyClampEnable; -#define COLORCLAMP_UNORM 0 -#define COLORCLAMP_SNORM 1 -#define COLORCLAMP_RTFORMAT 2 - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; - bool ColorBufferBlendEnable; - uint32_t SourceBlendFactor; - uint32_t DestinationBlendFactor; - uint32_t ColorBlendFunction; - uint32_t SourceAlphaBlendFactor; - uint32_t DestinationAlphaBlendFactor; - uint32_t AlphaBlendFunction; - bool WriteDisableAlpha; - bool WriteDisableRed; - bool WriteDisableGreen; - bool WriteDisableBlue; -}; - -static inline void -GEN9_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_BLEND_STATE_ENTRY * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - uint64_t qw0 = - __gen_field(values->LogicOpEnable, 63, 63) | - __gen_field(values->LogicOpFunction, 59, 62) | - __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | - __gen_field(values->ColorClampRange, 34, 35) | - __gen_field(values->PreBlendColorClampEnable, 33, 33) | - __gen_field(values->PostBlendColorClampEnable, 32, 32) | - __gen_field(values->ColorBufferBlendEnable, 31, 31) | - __gen_field(values->SourceBlendFactor, 26, 30) | - __gen_field(values->DestinationBlendFactor, 21, 25) | - __gen_field(values->ColorBlendFunction, 18, 20) | - __gen_field(values->SourceAlphaBlendFactor, 13, 17) | - __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | - __gen_field(values->AlphaBlendFunction, 5, 7) | - __gen_field(values->WriteDisableAlpha, 3, 3) | - __gen_field(values->WriteDisableRed, 2, 2) | - __gen_field(values->WriteDisableGreen, 1, 1) | - __gen_field(values->WriteDisableBlue, 0, 0) | - 0; - - dw[0] = qw0; - dw[1] = qw0 >> 32; - -} - -struct GEN9_BLEND_STATE { - bool AlphaToCoverageEnable; - bool IndependentAlphaBlendEnable; - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - struct GEN9_BLEND_STATE_ENTRY Entry[8]; -}; - -static inline void -GEN9_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_BLEND_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->AlphaToCoverageEnable, 31, 31) | - __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | - __gen_field(values->AlphaToOneEnable, 29, 29) | - __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | - __gen_field(values->AlphaTestEnable, 27, 27) | - __gen_field(values->AlphaTestFunction, 24, 26) | - __gen_field(values->ColorDitherEnable, 23, 23) | - __gen_field(values->XDitherOffset, 21, 22) | - __gen_field(values->YDitherOffset, 19, 20) | - 0; - - for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) - GEN9_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); -} - -#define GEN9_CC_VIEWPORT_length 0x00000002 - -struct GEN9_CC_VIEWPORT { - float MinimumDepth; - float MaximumDepth; -}; - -static inline void -GEN9_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_CC_VIEWPORT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_float(values->MinimumDepth) | - 0; - - dw[1] = - __gen_float(values->MaximumDepth) | - 0; - -} - -#define GEN9_COLOR_CALC_STATE_length 0x00000006 - -struct GEN9_COLOR_CALC_STATE { -#define Cancelled 0 -#define NotCancelled 1 - uint32_t RoundDisableFunctionDisable; -#define ALPHATEST_UNORM8 0 -#define ALPHATEST_FLOAT32 1 - uint32_t AlphaTestFormat; - uint32_t AlphaReferenceValueAsUNORM8; - float AlphaReferenceValueAsFLOAT32; - float BlendConstantColorRed; - float BlendConstantColorGreen; - float BlendConstantColorBlue; - float BlendConstantColorAlpha; -}; - -static inline void -GEN9_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_COLOR_CALC_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->RoundDisableFunctionDisable, 15, 15) | - __gen_field(values->AlphaTestFormat, 0, 0) | - 0; - - dw[1] = - __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | - __gen_float(values->AlphaReferenceValueAsFLOAT32) | - 0; - - dw[2] = - __gen_float(values->BlendConstantColorRed) | - 0; - - dw[3] = - __gen_float(values->BlendConstantColorGreen) | - 0; - - dw[4] = - __gen_float(values->BlendConstantColorBlue) | - 0; - - dw[5] = - __gen_float(values->BlendConstantColorAlpha) | - 0; - -} - -#define GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR_length 0x00000001 - -struct GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR { - uint32_t ExtendedMessageLength; -#define NoTermination 0 -#define EOT 1 - uint32_t EndOfThread; - uint32_t TargetFunctionID; -}; - -static inline void -GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->ExtendedMessageLength, 6, 9) | - __gen_field(values->EndOfThread, 5, 5) | - __gen_field(values->TargetFunctionID, 0, 3) | - 0; - -} - -#define GEN9_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 - -struct GEN9_INTERFACE_DESCRIPTOR_DATA { - uint32_t KernelStartPointer; - uint32_t KernelStartPointerHigh; -#define Ftz 0 -#define SetByKernel 1 - uint32_t DenormMode; -#define Multiple 0 -#define Single 1 - uint32_t SingleProgramFlow; -#define NormalPriority 0 -#define HighPriority 1 - uint32_t ThreadPriority; -#define IEEE754 0 -#define Alternate 1 - uint32_t FloatingPointMode; - bool IllegalOpcodeExceptionEnable; - bool MaskStackExceptionEnable; - bool SoftwareExceptionEnable; - uint32_t SamplerStatePointer; -#define Nosamplersused 0 -#define Between1and4samplersused 1 -#define Between5and8samplersused 2 -#define Between9and12samplersused 3 -#define Between13and16samplersused 4 - uint32_t SamplerCount; - uint32_t BindingTablePointer; - uint32_t BindingTableEntryCount; - uint32_t ConstantIndirectURBEntryReadLength; - uint32_t ConstantURBEntryReadOffset; -#define RTNE 0 -#define RU 1 -#define RD 2 -#define RTZ 3 - uint32_t RoundingMode; - bool BarrierEnable; -#define Encodes0K 0 -#define Encodes1K 1 -#define Encodes2K 2 -#define Encodes4K 3 -#define Encodes8K 4 -#define Encodes16K 5 -#define Encodes32K 6 -#define Encodes64K 7 - uint32_t SharedLocalMemorySize; - bool GlobalBarrierEnable; - uint32_t NumberofThreadsinGPGPUThreadGroup; - uint32_t CrossThreadConstantDataReadLength; -}; - -static inline void -GEN9_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_INTERFACE_DESCRIPTOR_DATA * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->KernelStartPointer, 6, 31) | - 0; - - dw[1] = - __gen_offset(values->KernelStartPointerHigh, 0, 15) | - 0; - - dw[2] = - __gen_field(values->DenormMode, 19, 19) | - __gen_field(values->SingleProgramFlow, 18, 18) | - __gen_field(values->ThreadPriority, 17, 17) | - __gen_field(values->FloatingPointMode, 16, 16) | - __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | - __gen_field(values->MaskStackExceptionEnable, 11, 11) | - __gen_field(values->SoftwareExceptionEnable, 7, 7) | - 0; - - dw[3] = - __gen_offset(values->SamplerStatePointer, 5, 31) | - __gen_field(values->SamplerCount, 2, 4) | - 0; - - dw[4] = - __gen_offset(values->BindingTablePointer, 5, 15) | - __gen_field(values->BindingTableEntryCount, 0, 4) | - 0; - - dw[5] = - __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | - __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | - 0; - - dw[6] = - __gen_field(values->RoundingMode, 22, 23) | - __gen_field(values->BarrierEnable, 21, 21) | - __gen_field(values->SharedLocalMemorySize, 16, 20) | - __gen_field(values->GlobalBarrierEnable, 15, 15) | - __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | - 0; - - dw[7] = - __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | - 0; - -} - -#define GEN9_ROUNDINGPRECISIONTABLE_3_BITS_length 0x00000001 - -struct GEN9_ROUNDINGPRECISIONTABLE_3_BITS { -#define _116 0 -#define _216 1 -#define _316 2 -#define _416 3 -#define _516 4 -#define _616 5 -#define _716 6 -#define _816 7 - uint32_t RoundingPrecision; -}; - -static inline void -GEN9_ROUNDINGPRECISIONTABLE_3_BITS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_ROUNDINGPRECISIONTABLE_3_BITS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->RoundingPrecision, 0, 2) | - 0; - -} - -#define GEN9_BINDING_TABLE_STATE_length 0x00000001 - -struct GEN9_BINDING_TABLE_STATE { - uint32_t SurfaceStatePointer; -}; - -static inline void -GEN9_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_BINDING_TABLE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_offset(values->SurfaceStatePointer, 6, 31) | - 0; - -} - -#define GEN9_RENDER_SURFACE_STATE_length 0x00000010 - -struct GEN9_RENDER_SURFACE_STATE { -#define SURFTYPE_1D 0 -#define SURFTYPE_2D 1 -#define SURFTYPE_3D 2 -#define SURFTYPE_CUBE 3 -#define SURFTYPE_BUFFER 4 -#define SURFTYPE_STRBUF 5 -#define SURFTYPE_NULL 7 - uint32_t SurfaceType; - bool SurfaceArray; - bool ASTC_Enable; - uint32_t SurfaceFormat; -#define VALIGN4 1 -#define VALIGN8 2 -#define VALIGN16 3 - uint32_t SurfaceVerticalAlignment; -#define HALIGN4 1 -#define HALIGN8 2 -#define HALIGN16 3 - uint32_t SurfaceHorizontalAlignment; -#define LINEAR 0 -#define WMAJOR 1 -#define XMAJOR 2 -#define YMAJOR 3 - uint32_t TileMode; - uint32_t VerticalLineStride; - uint32_t VerticalLineStrideOffset; - bool SamplerL2BypassModeDisable; -#define WriteOnlyCache 0 -#define ReadWriteCache 1 - uint32_t RenderCacheReadWriteMode; -#define NORMAL_MODE 0 -#define PROGRESSIVE_FRAME 2 -#define INTERLACED_FRAME 3 - uint32_t MediaBoundaryPixelMode; - bool CubeFaceEnablePositiveZ; - bool CubeFaceEnableNegativeZ; - bool CubeFaceEnablePositiveY; - bool CubeFaceEnableNegativeY; - bool CubeFaceEnablePositiveX; - bool CubeFaceEnableNegativeX; - struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; - float BaseMipLevel; - uint32_t SurfaceQPitch; - uint32_t Height; - uint32_t Width; - uint32_t Depth; - uint32_t SurfacePitch; -#define _0DEG 0 -#define _90DEG 1 -#define _180DEG 2 -#define _270DEG 3 - uint32_t RenderTargetAndSampleUnormRotation; - uint32_t MinimumArrayElement; - uint32_t RenderTargetViewExtent; -#define MSS 0 -#define DEPTH_STENCIL 1 - uint32_t MultisampledSurfaceStorageFormat; -#define MULTISAMPLECOUNT_1 0 -#define MULTISAMPLECOUNT_2 1 -#define MULTISAMPLECOUNT_4 2 -#define MULTISAMPLECOUNT_8 3 -#define MULTISAMPLECOUNT_16 4 - uint32_t NumberofMultisamples; - uint32_t MultisamplePositionPaletteIndex; - uint32_t XOffset; - uint32_t YOffset; - bool EWADisableForCube; -#define NONE 0 -#define _4KB 1 -#define _64KB 2 -#define TILEYF 1 -#define TILEYS 2 - uint32_t TiledResourceMode; -#define GPUcoherent 0 -#define IAcoherent 1 - uint32_t CoherencyType; - uint32_t MipTailStartLOD; - uint32_t SurfaceMinLOD; - uint32_t MIPCountLOD; - uint32_t AuxiliarySurfaceQPitch; - uint32_t AuxiliarySurfacePitch; -#define AUX_NONE 0 -#define AUX_CCS_D 1 -#define AUX_APPEND 2 -#define AUX_HIZ 3 -#define AUX_CCS_E 5 - uint32_t AuxiliarySurfaceMode; - bool SeparateUVPlaneEnable; - uint32_t XOffsetforUorUVPlane; - uint32_t YOffsetforUorUVPlane; -#define Horizontal 0 -#define Vertical 1 - uint32_t MemoryCompressionMode; - bool MemoryCompressionEnable; - uint32_t ShaderChannelSelectRed; - uint32_t ShaderChannelSelectGreen; - uint32_t ShaderChannelSelectBlue; - uint32_t ShaderChannelSelectAlpha; - float ResourceMinLOD; - __gen_address_type SurfaceBaseAddress; - uint32_t XOffsetforVPlane; - uint32_t YOffsetforVPlane; - uint32_t AuxiliaryTableIndexforMediaCompressedSurface; - __gen_address_type AuxiliarySurfaceBaseAddress; - uint32_t QuiltHeight; - uint32_t QuiltWidth; - float HierarchicalDepthClearValue; - uint32_t RedClearColor; - uint32_t GreenClearColor; - uint32_t BlueClearColor; - uint32_t AlphaClearColor; -}; - -static inline void -GEN9_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_RENDER_SURFACE_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SurfaceType, 29, 31) | - __gen_field(values->SurfaceArray, 28, 28) | - __gen_field(values->ASTC_Enable, 27, 27) | - __gen_field(values->SurfaceFormat, 18, 26) | - __gen_field(values->SurfaceVerticalAlignment, 16, 17) | - __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | - __gen_field(values->TileMode, 12, 13) | - __gen_field(values->VerticalLineStride, 11, 11) | - __gen_field(values->VerticalLineStrideOffset, 10, 10) | - __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | - __gen_field(values->RenderCacheReadWriteMode, 8, 8) | - __gen_field(values->MediaBoundaryPixelMode, 6, 7) | - __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | - __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | - __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | - __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | - __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | - __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | - 0; - - uint32_t dw_MemoryObjectControlState; - GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); - dw[1] = - __gen_field(dw_MemoryObjectControlState, 24, 30) | - __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | - __gen_field(values->SurfaceQPitch, 0, 14) | - 0; - - dw[2] = - __gen_field(values->Height, 16, 29) | - __gen_field(values->Width, 0, 13) | - 0; - - dw[3] = - __gen_field(values->Depth, 21, 31) | - __gen_field(values->SurfacePitch, 0, 17) | - 0; - - dw[4] = - __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | - __gen_field(values->MinimumArrayElement, 18, 28) | - __gen_field(values->RenderTargetViewExtent, 7, 17) | - __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | - __gen_field(values->NumberofMultisamples, 3, 5) | - __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | - 0; - - dw[5] = - __gen_offset(values->XOffset, 25, 31) | - __gen_offset(values->YOffset, 21, 23) | - __gen_field(values->EWADisableForCube, 20, 20) | - __gen_field(values->TiledResourceMode, 18, 19) | - __gen_field(values->CoherencyType, 14, 14) | - __gen_field(values->MipTailStartLOD, 8, 11) | - __gen_field(values->SurfaceMinLOD, 4, 7) | - __gen_field(values->MIPCountLOD, 0, 3) | - 0; - - dw[6] = - __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | - __gen_field(values->AuxiliarySurfacePitch, 3, 11) | - __gen_field(values->AuxiliarySurfaceMode, 0, 2) | - __gen_field(values->SeparateUVPlaneEnable, 31, 31) | - __gen_field(values->XOffsetforUorUVPlane, 16, 29) | - __gen_field(values->YOffsetforUorUVPlane, 0, 13) | - 0; - - dw[7] = - __gen_field(values->MemoryCompressionMode, 31, 31) | - __gen_field(values->MemoryCompressionEnable, 30, 30) | - __gen_field(values->ShaderChannelSelectRed, 25, 27) | - __gen_field(values->ShaderChannelSelectGreen, 22, 24) | - __gen_field(values->ShaderChannelSelectBlue, 19, 21) | - __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | - __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | - 0; - - uint32_t dw8 = - 0; - - uint64_t qw8 = - __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); - - dw[8] = qw8; - dw[9] = qw8 >> 32; - - uint32_t dw10 = - __gen_field(values->XOffsetforVPlane, 48, 61) | - __gen_field(values->YOffsetforVPlane, 32, 45) | - __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | - __gen_field(values->QuiltHeight, 5, 9) | - __gen_field(values->QuiltWidth, 0, 4) | - 0; - - uint64_t qw10 = - __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); - - dw[10] = qw10; - dw[11] = qw10 >> 32; - - dw[12] = - __gen_float(values->HierarchicalDepthClearValue) | - __gen_field(values->RedClearColor, 0, 31) | - 0; - - dw[13] = - __gen_field(values->GreenClearColor, 0, 31) | - 0; - - dw[14] = - __gen_field(values->BlueClearColor, 0, 31) | - 0; - - dw[15] = - __gen_field(values->AlphaClearColor, 0, 31) | - 0; - -} - -#define GEN9_FILTER_COEFFICIENT_length 0x00000001 - -struct GEN9_FILTER_COEFFICIENT { - uint32_t FilterCoefficient; -}; - -static inline void -GEN9_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_FILTER_COEFFICIENT * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->FilterCoefficient, 0, 7) | - 0; - -} - -#define GEN9_SAMPLER_STATE_length 0x00000004 - -struct GEN9_SAMPLER_STATE { - bool SamplerDisable; -#define DX10OGL 0 -#define DX9 1 - uint32_t TextureBorderColorMode; -#define CLAMP_NONE 0 -#define CLAMP_OGL 2 - uint32_t LODPreClampMode; - uint32_t CoarseLODQualityMode; -#define MIPFILTER_NONE 0 -#define MIPFILTER_NEAREST 1 -#define MIPFILTER_LINEAR 3 - uint32_t MipModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MagModeFilter; -#define MAPFILTER_NEAREST 0 -#define MAPFILTER_LINEAR 1 -#define MAPFILTER_ANISOTROPIC 2 -#define MAPFILTER_MONO 6 - uint32_t MinModeFilter; - float TextureLODBias; -#define LEGACY 0 -#define EWAApproximation 1 - uint32_t AnisotropicAlgorithm; - float MinLOD; - float MaxLOD; - bool ChromaKeyEnable; - uint32_t ChromaKeyIndex; -#define KEYFILTER_KILL_ON_ANY_MATCH 0 -#define KEYFILTER_REPLACE_BLACK 1 - uint32_t ChromaKeyMode; -#define PREFILTEROPALWAYS 0 -#define PREFILTEROPNEVER 1 -#define PREFILTEROPLESS 2 -#define PREFILTEROPEQUAL 3 -#define PREFILTEROPLEQUAL 4 -#define PREFILTEROPGREATER 5 -#define PREFILTEROPNOTEQUAL 6 -#define PREFILTEROPGEQUAL 7 - uint32_t ShadowFunction; -#define PROGRAMMED 0 -#define OVERRIDE 1 - uint32_t CubeSurfaceControlMode; - uint32_t IndirectStatePointer; -#define MIPNONE 0 -#define MIPFILTER 1 - uint32_t LODClampMagnificationMode; -#define STD_FILTER 0 -#define COMPARISON 1 -#define MINIMUM 2 -#define MAXIMUM 3 - uint32_t ReductionType; -#define RATIO21 0 -#define RATIO41 1 -#define RATIO61 2 -#define RATIO81 3 -#define RATIO101 4 -#define RATIO121 5 -#define RATIO141 6 -#define RATIO161 7 - uint32_t MaximumAnisotropy; - bool RAddressMinFilterRoundingEnable; - bool RAddressMagFilterRoundingEnable; - bool VAddressMinFilterRoundingEnable; - bool VAddressMagFilterRoundingEnable; - bool UAddressMinFilterRoundingEnable; - bool UAddressMagFilterRoundingEnable; -#define FULL 0 -#define HIGH 1 -#define MED 2 -#define LOW 3 - uint32_t TrilinearFilterQuality; - bool NonnormalizedCoordinateEnable; - bool ReductionTypeEnable; - uint32_t TCXAddressControlMode; - uint32_t TCYAddressControlMode; - uint32_t TCZAddressControlMode; -}; - -static inline void -GEN9_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SAMPLER_STATE * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->SamplerDisable, 31, 31) | - __gen_field(values->TextureBorderColorMode, 29, 29) | - __gen_field(values->LODPreClampMode, 27, 28) | - __gen_field(values->CoarseLODQualityMode, 22, 26) | - __gen_field(values->MipModeFilter, 20, 21) | - __gen_field(values->MagModeFilter, 17, 19) | - __gen_field(values->MinModeFilter, 14, 16) | - __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | - __gen_field(values->AnisotropicAlgorithm, 0, 0) | - 0; - - dw[1] = - __gen_field(values->MinLOD * (1 << 8), 20, 31) | - __gen_field(values->MaxLOD * (1 << 8), 8, 19) | - __gen_field(values->ChromaKeyEnable, 7, 7) | - __gen_field(values->ChromaKeyIndex, 5, 6) | - __gen_field(values->ChromaKeyMode, 4, 4) | - __gen_field(values->ShadowFunction, 1, 3) | - __gen_field(values->CubeSurfaceControlMode, 0, 0) | - 0; - - dw[2] = - __gen_field(values->IndirectStatePointer, 6, 23) | - __gen_field(values->LODClampMagnificationMode, 0, 0) | - 0; - - dw[3] = - __gen_field(values->ReductionType, 22, 23) | - __gen_field(values->MaximumAnisotropy, 19, 21) | - __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | - __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | - __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | - __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | - __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | - __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | - __gen_field(values->TrilinearFilterQuality, 11, 12) | - __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | - __gen_field(values->ReductionTypeEnable, 9, 9) | - __gen_field(values->TCXAddressControlMode, 6, 8) | - __gen_field(values->TCYAddressControlMode, 3, 5) | - __gen_field(values->TCZAddressControlMode, 0, 2) | - 0; - -} - -#define GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 - -struct GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { - uint32_t Table0YFilterCoefficientn1; - uint32_t Table0XFilterCoefficientn1; - uint32_t Table0YFilterCoefficientn0; - uint32_t Table0XFilterCoefficientn0; - uint32_t Table0YFilterCoefficientn3; - uint32_t Table0XFilterCoefficientn3; - uint32_t Table0YFilterCoefficientn2; - uint32_t Table0XFilterCoefficientn2; - uint32_t Table0YFilterCoefficientn5; - uint32_t Table0XFilterCoefficientn5; - uint32_t Table0YFilterCoefficientn4; - uint32_t Table0XFilterCoefficientn4; - uint32_t Table0YFilterCoefficientn7; - uint32_t Table0XFilterCoefficientn7; - uint32_t Table0YFilterCoefficientn6; - uint32_t Table0XFilterCoefficientn6; - uint32_t Table1XFilterCoefficientn3; - uint32_t Table1XFilterCoefficientn2; - uint32_t Table1XFilterCoefficientn5; - uint32_t Table1XFilterCoefficientn4; - uint32_t Table1YFilterCoefficientn3; - uint32_t Table1YFilterCoefficientn2; - uint32_t Table1YFilterCoefficientn5; - uint32_t Table1YFilterCoefficientn4; -}; - -static inline void -GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, - const struct GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) -{ - uint32_t *dw = (uint32_t * restrict) dst; - - dw[0] = - __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | - 0; - - dw[1] = - __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | - 0; - - dw[2] = - __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | - 0; - - dw[3] = - __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | - __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | - __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | - __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | - 0; - - dw[4] = - __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | - __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | - 0; - - dw[5] = - __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | - __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | - 0; - - dw[6] = - __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | - __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | - 0; - - dw[7] = - __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | - __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | - 0; - -} - -/* Enum 3D_Prim_Topo_Type */ -#define _3DPRIM_POINTLIST 1 -#define _3DPRIM_LINELIST 2 -#define _3DPRIM_LINESTRIP 3 -#define _3DPRIM_TRILIST 4 -#define _3DPRIM_TRISTRIP 5 -#define _3DPRIM_TRIFAN 6 -#define _3DPRIM_QUADLIST 7 -#define _3DPRIM_QUADSTRIP 8 -#define _3DPRIM_LINELIST_ADJ 9 -#define _3DPRIM_LINESTRIP_ADJ 10 -#define _3DPRIM_TRILIST_ADJ 11 -#define _3DPRIM_TRISTRIP_ADJ 12 -#define _3DPRIM_TRISTRIP_REVERSE 13 -#define _3DPRIM_POLYGON 14 -#define _3DPRIM_RECTLIST 15 -#define _3DPRIM_LINELOOP 16 -#define _3DPRIM_POINTLIST_BF 17 -#define _3DPRIM_LINESTRIP_CONT 18 -#define _3DPRIM_LINESTRIP_BF 19 -#define _3DPRIM_LINESTRIP_CONT_BF 20 -#define _3DPRIM_TRIFAN_NOSTIPPLE 22 -#define _3DPRIM_PATCHLIST_1 32 -#define _3DPRIM_PATCHLIST_2 33 -#define _3DPRIM_PATCHLIST_3 34 -#define _3DPRIM_PATCHLIST_4 35 -#define _3DPRIM_PATCHLIST_5 36 -#define _3DPRIM_PATCHLIST_6 37 -#define _3DPRIM_PATCHLIST_7 38 -#define _3DPRIM_PATCHLIST_8 39 -#define _3DPRIM_PATCHLIST_9 40 -#define _3DPRIM_PATCHLIST_10 41 -#define _3DPRIM_PATCHLIST_11 42 -#define _3DPRIM_PATCHLIST_12 43 -#define _3DPRIM_PATCHLIST_13 44 -#define _3DPRIM_PATCHLIST_14 45 -#define _3DPRIM_PATCHLIST_15 46 -#define _3DPRIM_PATCHLIST_16 47 -#define _3DPRIM_PATCHLIST_17 48 -#define _3DPRIM_PATCHLIST_18 49 -#define _3DPRIM_PATCHLIST_19 50 -#define _3DPRIM_PATCHLIST_20 51 -#define _3DPRIM_PATCHLIST_21 52 -#define _3DPRIM_PATCHLIST_22 53 -#define _3DPRIM_PATCHLIST_23 54 -#define _3DPRIM_PATCHLIST_24 55 -#define _3DPRIM_PATCHLIST_25 56 -#define _3DPRIM_PATCHLIST_26 57 -#define _3DPRIM_PATCHLIST_27 58 -#define _3DPRIM_PATCHLIST_28 59 -#define _3DPRIM_PATCHLIST_29 60 -#define _3DPRIM_PATCHLIST_30 61 -#define _3DPRIM_PATCHLIST_31 62 -#define _3DPRIM_PATCHLIST_32 63 - -/* Enum 3D_Vertex_Component_Control */ -#define VFCOMP_NOSTORE 0 -#define VFCOMP_STORE_SRC 1 -#define VFCOMP_STORE_0 2 -#define VFCOMP_STORE_1_FP 3 -#define VFCOMP_STORE_1_INT 4 -#define VFCOMP_STORE_PID 7 - -/* Enum COMPONENT_ENABLES */ -#define CE_NONE 0 -#define CE_X 1 -#define CE_Y 2 -#define CE_XY 3 -#define CE_Z 4 -#define CE_XZ 5 -#define CE_YZ 6 -#define CE_XYZ 7 -#define CE_W 8 -#define CE_XW 9 -#define CE_YW 10 -#define CE_XYW 11 -#define CE_ZW 12 -#define CE_XZW 13 -#define CE_YZW 14 -#define CE_XYZW 15 - -/* Enum Attribute_Component_Format */ -#define ACF_DISABLED 0 -#define ACF_XY 1 -#define ACF_XYZ 2 -#define ACF_XYZW 3 - -/* Enum WRAP_SHORTEST_ENABLE */ -#define WSE_X 1 -#define WSE_Y 2 -#define WSE_XY 3 -#define WSE_Z 4 -#define WSE_XZ 5 -#define WSE_YZ 6 -#define WSE_XYZ 7 -#define WSE_W 8 -#define WSE_XW 9 -#define WSE_YW 10 -#define WSE_XYW 11 -#define WSE_ZW 12 -#define WSE_XZW 13 -#define WSE_YZW 14 -#define WSE_XYZW 15 - -/* Enum 3D_Stencil_Operation */ -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 1 -#define STENCILOP_REPLACE 2 -#define STENCILOP_INCRSAT 3 -#define STENCILOP_DECRSAT 4 -#define STENCILOP_INCR 5 -#define STENCILOP_DECR 6 -#define STENCILOP_INVERT 7 - -/* Enum 3D_Color_Buffer_Blend_Factor */ -#define BLENDFACTOR_ONE 1 -#define BLENDFACTOR_SRC_COLOR 2 -#define BLENDFACTOR_SRC_ALPHA 3 -#define BLENDFACTOR_DST_ALPHA 4 -#define BLENDFACTOR_DST_COLOR 5 -#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 -#define BLENDFACTOR_CONST_COLOR 7 -#define BLENDFACTOR_CONST_ALPHA 8 -#define BLENDFACTOR_SRC1_COLOR 9 -#define BLENDFACTOR_SRC1_ALPHA 10 -#define BLENDFACTOR_ZERO 17 -#define BLENDFACTOR_INV_SRC_COLOR 18 -#define BLENDFACTOR_INV_SRC_ALPHA 19 -#define BLENDFACTOR_INV_DST_ALPHA 20 -#define BLENDFACTOR_INV_DST_COLOR 21 -#define BLENDFACTOR_INV_CONST_COLOR 23 -#define BLENDFACTOR_INV_CONST_ALPHA 24 -#define BLENDFACTOR_INV_SRC1_COLOR 25 -#define BLENDFACTOR_INV_SRC1_ALPHA 26 - -/* Enum 3D_Color_Buffer_Blend_Function */ -#define BLENDFUNCTION_ADD 0 -#define BLENDFUNCTION_SUBTRACT 1 -#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BLENDFUNCTION_MIN 3 -#define BLENDFUNCTION_MAX 4 - -/* Enum 3D_Compare_Function */ -#define COMPAREFUNCTION_ALWAYS 0 -#define COMPAREFUNCTION_NEVER 1 -#define COMPAREFUNCTION_LESS 2 -#define COMPAREFUNCTION_EQUAL 3 -#define COMPAREFUNCTION_LEQUAL 4 -#define COMPAREFUNCTION_GREATER 5 -#define COMPAREFUNCTION_NOTEQUAL 6 -#define COMPAREFUNCTION_GEQUAL 7 - -/* Enum 3D_Logic_Op_Function */ -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 1 -#define LOGICOP_AND_INVERTED 2 -#define LOGICOP_COPY_INVERTED 3 -#define LOGICOP_AND_REVERSE 4 -#define LOGICOP_INVERT 5 -#define LOGICOP_XOR 6 -#define LOGICOP_NAND 7 -#define LOGICOP_AND 8 -#define LOGICOP_EQUIV 9 -#define LOGICOP_NOOP 10 -#define LOGICOP_OR_INVERTED 11 -#define LOGICOP_COPY 12 -#define LOGICOP_OR_REVERSE 13 -#define LOGICOP_OR 14 -#define LOGICOP_SET 15 - -/* Enum SURFACE_FORMAT */ -#define R32G32B32A32_FLOAT 0 -#define R32G32B32A32_SINT 1 -#define R32G32B32A32_UINT 2 -#define R32G32B32A32_UNORM 3 -#define R32G32B32A32_SNORM 4 -#define R64G64_FLOAT 5 -#define R32G32B32X32_FLOAT 6 -#define R32G32B32A32_SSCALED 7 -#define R32G32B32A32_USCALED 8 -#define R32G32B32A32_SFIXED 32 -#define R64G64_PASSTHRU 33 -#define R32G32B32_FLOAT 64 -#define R32G32B32_SINT 65 -#define R32G32B32_UINT 66 -#define R32G32B32_UNORM 67 -#define R32G32B32_SNORM 68 -#define R32G32B32_SSCALED 69 -#define R32G32B32_USCALED 70 -#define R32G32B32_SFIXED 80 -#define R16G16B16A16_UNORM 128 -#define R16G16B16A16_SNORM 129 -#define R16G16B16A16_SINT 130 -#define R16G16B16A16_UINT 131 -#define R16G16B16A16_FLOAT 132 -#define R32G32_FLOAT 133 -#define R32G32_SINT 134 -#define R32G32_UINT 135 -#define R32_FLOAT_X8X24_TYPELESS 136 -#define X32_TYPELESS_G8X24_UINT 137 -#define L32A32_FLOAT 138 -#define R32G32_UNORM 139 -#define R32G32_SNORM 140 -#define R64_FLOAT 141 -#define R16G16B16X16_UNORM 142 -#define R16G16B16X16_FLOAT 143 -#define A32X32_FLOAT 144 -#define L32X32_FLOAT 145 -#define I32X32_FLOAT 146 -#define R16G16B16A16_SSCALED 147 -#define R16G16B16A16_USCALED 148 -#define R32G32_SSCALED 149 -#define R32G32_USCALED 150 -#define R32G32_SFIXED 160 -#define R64_PASSTHRU 161 -#define B8G8R8A8_UNORM 192 -#define B8G8R8A8_UNORM_SRGB 193 -#define R10G10B10A2_UNORM 194 -#define R10G10B10A2_UNORM_SRGB 195 -#define R10G10B10A2_UINT 196 -#define R10G10B10_SNORM_A2_UNORM 197 -#define R8G8B8A8_UNORM 199 -#define R8G8B8A8_UNORM_SRGB 200 -#define R8G8B8A8_SNORM 201 -#define R8G8B8A8_SINT 202 -#define R8G8B8A8_UINT 203 -#define R16G16_UNORM 204 -#define R16G16_SNORM 205 -#define R16G16_SINT 206 -#define R16G16_UINT 207 -#define R16G16_FLOAT 208 -#define B10G10R10A2_UNORM 209 -#define B10G10R10A2_UNORM_SRGB 210 -#define R11G11B10_FLOAT 211 -#define R32_SINT 214 -#define R32_UINT 215 -#define R32_FLOAT 216 -#define R24_UNORM_X8_TYPELESS 217 -#define X24_TYPELESS_G8_UINT 218 -#define L32_UNORM 221 -#define A32_UNORM 222 -#define L16A16_UNORM 223 -#define I24X8_UNORM 224 -#define L24X8_UNORM 225 -#define A24X8_UNORM 226 -#define I32_FLOAT 227 -#define L32_FLOAT 228 -#define A32_FLOAT 229 -#define X8B8_UNORM_G8R8_SNORM 230 -#define A8X8_UNORM_G8R8_SNORM 231 -#define B8X8_UNORM_G8R8_SNORM 232 -#define B8G8R8X8_UNORM 233 -#define B8G8R8X8_UNORM_SRGB 234 -#define R8G8B8X8_UNORM 235 -#define R8G8B8X8_UNORM_SRGB 236 -#define R9G9B9E5_SHAREDEXP 237 -#define B10G10R10X2_UNORM 238 -#define L16A16_FLOAT 240 -#define R32_UNORM 241 -#define R32_SNORM 242 -#define R10G10B10X2_USCALED 243 -#define R8G8B8A8_SSCALED 244 -#define R8G8B8A8_USCALED 245 -#define R16G16_SSCALED 246 -#define R16G16_USCALED 247 -#define R32_SSCALED 248 -#define R32_USCALED 249 -#define B5G6R5_UNORM 256 -#define B5G6R5_UNORM_SRGB 257 -#define B5G5R5A1_UNORM 258 -#define B5G5R5A1_UNORM_SRGB 259 -#define B4G4R4A4_UNORM 260 -#define B4G4R4A4_UNORM_SRGB 261 -#define R8G8_UNORM 262 -#define R8G8_SNORM 263 -#define R8G8_SINT 264 -#define R8G8_UINT 265 -#define R16_UNORM 266 -#define R16_SNORM 267 -#define R16_SINT 268 -#define R16_UINT 269 -#define R16_FLOAT 270 -#define A8P8_UNORM_PALETTE0 271 -#define A8P8_UNORM_PALETTE1 272 -#define I16_UNORM 273 -#define L16_UNORM 274 -#define A16_UNORM 275 -#define L8A8_UNORM 276 -#define I16_FLOAT 277 -#define L16_FLOAT 278 -#define A16_FLOAT 279 -#define L8A8_UNORM_SRGB 280 -#define R5G5_SNORM_B6_UNORM 281 -#define B5G5R5X1_UNORM 282 -#define B5G5R5X1_UNORM_SRGB 283 -#define R8G8_SSCALED 284 -#define R8G8_USCALED 285 -#define R16_SSCALED 286 -#define R16_USCALED 287 -#define P8A8_UNORM_PALETTE0 290 -#define P8A8_UNORM_PALETTE1 291 -#define A1B5G5R5_UNORM 292 -#define A4B4G4R4_UNORM 293 -#define L8A8_UINT 294 -#define L8A8_SINT 295 -#define R8_UNORM 320 -#define R8_SNORM 321 -#define R8_SINT 322 -#define R8_UINT 323 -#define A8_UNORM 324 -#define I8_UNORM 325 -#define L8_UNORM 326 -#define P4A4_UNORM_PALETTE0 327 -#define A4P4_UNORM_PALETTE0 328 -#define R8_SSCALED 329 -#define R8_USCALED 330 -#define P8_UNORM_PALETTE0 331 -#define L8_UNORM_SRGB 332 -#define P8_UNORM_PALETTE1 333 -#define P4A4_UNORM_PALETTE1 334 -#define A4P4_UNORM_PALETTE1 335 -#define Y8_UNORM 336 -#define L8_UINT 338 -#define L8_SINT 339 -#define I8_UINT 340 -#define I8_SINT 341 -#define DXT1_RGB_SRGB 384 -#define R1_UNORM 385 -#define YCRCB_NORMAL 386 -#define YCRCB_SWAPUVY 387 -#define P2_UNORM_PALETTE0 388 -#define P2_UNORM_PALETTE1 389 -#define BC1_UNORM 390 -#define BC2_UNORM 391 -#define BC3_UNORM 392 -#define BC4_UNORM 393 -#define BC5_UNORM 394 -#define BC1_UNORM_SRGB 395 -#define BC2_UNORM_SRGB 396 -#define BC3_UNORM_SRGB 397 -#define MONO8 398 -#define YCRCB_SWAPUV 399 -#define YCRCB_SWAPY 400 -#define DXT1_RGB 401 -#define FXT1 402 -#define R8G8B8_UNORM 403 -#define R8G8B8_SNORM 404 -#define R8G8B8_SSCALED 405 -#define R8G8B8_USCALED 406 -#define R64G64B64A64_FLOAT 407 -#define R64G64B64_FLOAT 408 -#define BC4_SNORM 409 -#define BC5_SNORM 410 -#define R16G16B16_FLOAT 411 -#define R16G16B16_UNORM 412 -#define R16G16B16_SNORM 413 -#define R16G16B16_SSCALED 414 -#define R16G16B16_USCALED 415 -#define BC6H_SF16 417 -#define BC7_UNORM 418 -#define BC7_UNORM_SRGB 419 -#define BC6H_UF16 420 -#define PLANAR_420_8 421 -#define R8G8B8_UNORM_SRGB 424 -#define ETC1_RGB8 425 -#define ETC2_RGB8 426 -#define EAC_R11 427 -#define EAC_RG11 428 -#define EAC_SIGNED_R11 429 -#define EAC_SIGNED_RG11 430 -#define ETC2_SRGB8 431 -#define R16G16B16_UINT 432 -#define R16G16B16_SINT 433 -#define R32_SFIXED 434 -#define R10G10B10A2_SNORM 435 -#define R10G10B10A2_USCALED 436 -#define R10G10B10A2_SSCALED 437 -#define R10G10B10A2_SINT 438 -#define B10G10R10A2_SNORM 439 -#define B10G10R10A2_USCALED 440 -#define B10G10R10A2_SSCALED 441 -#define B10G10R10A2_UINT 442 -#define B10G10R10A2_SINT 443 -#define R64G64B64A64_PASSTHRU 444 -#define R64G64B64_PASSTHRU 445 -#define ETC2_RGB8_PTA 448 -#define ETC2_SRGB8_PTA 449 -#define ETC2_EAC_RGBA8 450 -#define ETC2_EAC_SRGB8_A8 451 -#define R8G8B8_UINT 456 -#define R8G8B8_SINT 457 -#define RAW 511 - -/* Enum Shader Channel Select */ -#define SCS_ZERO 0 -#define SCS_ONE 1 -#define SCS_RED 4 -#define SCS_GREEN 5 -#define SCS_BLUE 6 -#define SCS_ALPHA 7 - -/* Enum Texture Coordinate Mode */ -#define TCM_WRAP 0 -#define TCM_MIRROR 1 -#define TCM_CLAMP 2 -#define TCM_CUBE 3 -#define TCM_CLAMP_BORDER 4 -#define TCM_MIRROR_ONCE 5 -#define TCM_HALF_BORDER 6 - diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index 9e0f82e7167..077281d2f56 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -244,9 +244,9 @@ static const uint32_t vk_to_gen_cullmode[] = { }; static const uint32_t vk_to_gen_fillmode[] = { - [VK_POLYGON_MODE_FILL] = RASTER_SOLID, - [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, - [VK_POLYGON_MODE_POINT] = RASTER_POINT, + [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, + [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, + [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, }; static const uint32_t vk_to_gen_front_face[] = { diff --git a/src/vulkan/gen_pack_header.py b/src/vulkan/gen_pack_header.py new file mode 100755 index 00000000000..58da0184faf --- /dev/null +++ b/src/vulkan/gen_pack_header.py @@ -0,0 +1,585 @@ +#!/usr/bin/env python3 + +import xml.parsers.expat +import re +import sys +import copy + +license = """/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +""" + +pack_header = """%(license)s + +/* Instructions, enums and structures for %(platform)s. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include +#include + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_uint(uint64_t v, uint32_t start, uint32_t end) +{ + const int width = end - start + 1; + + __gen_validate_value(v); + +#if DEBUG + if (width < 64) { + const uint64_t max = (1ull << width) - 1; + assert(v <= max); + } +#endif + + return v << start; +} + +static inline uint64_t +__gen_sint(int64_t v, uint32_t start, uint32_t end) +{ + const int width = end - start + 1; + + __gen_validate_value(v); + +#if DEBUG + if (width < 64) { + const int64_t max = (1ll << (width - 1)) - 1; + const int64_t min = -(1ll << (width - 1)); + assert(min <= v && v <= max); + } +#endif + + const uint64_t mask = ~0ull >> (64 - width); + + return (v & mask) << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +static inline uint64_t +__gen_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + +#if DEBUG + const float max = ((1 << (end - start)) - 1) / factor; + const float min = -(1 << (end - start)) / factor; + assert(min <= v && v <= max); +#endif + + const int32_t int_val = roundf(v * factor); + const uint64_t mask = ~0ull >> (64 - (end - start + 1)); + + return (int_val & mask) << start; +} + +static inline uint64_t +__gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + +#if DEBUG + const float max = ((1 << (end - start + 1)) - 1) / factor; + const float min = 0.0f; + assert(min <= v && v <= max); +#endif + + const uint32_t uint_val = roundf(v * factor); + + return uint_val << start; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +""" + +def to_alphanum(name): + substitutions = { + ' ': '', + '/': '', + '[': '', + ']': '', + '(': '', + ')': '', + '-': '', + ':': '', + '.': '', + ',': '', + '=': '', + '>': '', + '#': '', + 'α': 'alpha', + '&': '', + '*': '', + '"': '', + '+': '', + '\'': '', + } + + for i, j in substitutions.items(): + name = name.replace(i, j) + + return name + +def safe_name(name): + name = to_alphanum(name) + if not str.isalpha(name[0]): + name = '_' + name + + return name + +class Field: + ufixed_pattern = re.compile("u(\d+)\.(\d+)") + sfixed_pattern = re.compile("s(\d+)\.(\d+)") + + def __init__(self, parser, attrs): + self.parser = parser + self.name = safe_name(attrs["name"]) + self.start = int(attrs["start"]) + self.end = int(attrs["end"]) + self.type = attrs["type"] + + if "prefix" in attrs: + self.prefix = attrs["prefix"] + else: + self.prefix = None + + if "default" in attrs: + self.default = int(attrs["default"]) + else: + self.default = None + + ufixed_match = Field.ufixed_pattern.match(self.type) + if ufixed_match: + self.type = 'ufixed' + self.fractional_size = int(ufixed_match.group(2)) + + sfixed_match = Field.sfixed_pattern.match(self.type) + if sfixed_match: + self.type = 'sfixed' + self.fractional_size = int(sfixed_match.group(2)) + + def emit_template_struct(self, dim): + if self.type == 'address': + type = '__gen_address_type' + elif self.type == 'bool': + type = 'bool' + elif self.type == 'float': + type = 'float' + elif self.type == 'ufixed': + type = 'float' + elif self.type == 'sfixed': + type = 'float' + elif self.type == 'uint' and self.end - self.start > 32: + type = 'uint64_t' + elif self.type == 'offset': + type = 'uint64_t' + elif self.type == 'int': + type = 'int32_t' + elif self.type == 'uint': + type = 'uint32_t' + elif self.type in self.parser.structs: + type = 'struct ' + self.parser.gen_prefix(safe_name(self.type)) + else: + print("#error unhandled type: %s" % self.type) + + print(" %-36s %s%s;" % (type, self.name, dim)) + + if len(self.values) > 0 and self.default == None: + if self.prefix: + prefix = self.prefix + "_" + else: + prefix = "" + + for value in self.values: + print("#define %-40s %d" % (prefix + value.name, value.value)) + +class Group: + def __init__(self, parser, parent, start, count, size): + self.parser = parser + self.parent = parent + self.start = start + self.count = count + self.size = size + self.fields = [] + + def emit_template_struct(self, dim): + if self.count == 0: + print(" /* variable length fields follow */") + else: + if self.count > 1: + dim = "%s[%d]" % (dim, self.count) + + for field in self.fields: + field.emit_template_struct(dim) + + class DWord: + def __init__(self): + self.size = 32 + self.fields = [] + self.address = None + + def collect_dwords(self, dwords, start, dim): + for field in self.fields: + if type(field) is Group: + if field.count == 1: + field.collect_dwords(dwords, start + field.start, dim) + else: + for i in range(field.count): + field.collect_dwords(dwords, + start + field.start + i * field.size, + "%s[%d]" % (dim, i)) + continue + + index = (start + field.start) // 32 + if not index in dwords: + dwords[index] = self.DWord() + + clone = copy.copy(field) + clone.start = clone.start + start + clone.end = clone.end + start + clone.dim = dim + dwords[index].fields.append(clone) + + if field.type == "address": + # assert dwords[index].address == None + dwords[index].address = field + + # Does this field extend into the next dword? + if index < field.end // 32 and dwords[index].size == 32: + if index + 1 in dwords: + assert dwords[index + 1].size == 32 + dwords[index].fields.extend(dwords[index + 1].fields) + dwords[index].size = 64 + dwords[index + 1] = dwords[index] + + def emit_pack_function(self, start): + dwords = {} + self.collect_dwords(dwords, 0, "") + + for index, dw in dwords.items(): + # For 64 bit dwords, we aliased the two dword entries in the dword + # dict it occupies. Now that we're emitting the pack function, + # skip the duplicate entries. + if index > 0 and index - 1 in dwords and dwords[index - 1] == dwords[index]: + continue + + # Special case: only one field and it's a struct at the beginning + # of the dword. In this case we pack directly into the + # destination. This is the only way we handle embedded structs + # larger than 32 bits. + if len(dw.fields) == 1: + field = dw.fields[0] + name = field.name + field.dim + if field.type in self.parser.structs and field.start % 32 == 0: + print("") + print(" %s_pack(data, &dw[%d], &values->%s);" % + (self.parser.gen_prefix(safe_name(field.type)), index, name)) + continue + + # Pack any fields of struct type first so we have integer values + # to the dword for those fields. + field_index = 0 + for field in dw.fields: + if type(field) is Field and field.type in self.parser.structs: + name = field.name + field.dim + print("") + print(" uint32_t v%d_%d;" % (index, field_index)) + print(" %s_pack(data, &v%d_%d, &values->%s);" % + (self.parser.gen_prefix(safe_name(field.type)), index, field_index, name)) + field_index = field_index + 1 + + print("") + dword_start = index * 32 + if dw.address == None: + address_count = 0 + else: + address_count = 1 + + if dw.size == 32 and dw.address == None: + v = None + print(" dw[%d] =" % index) + elif len(dw.fields) > address_count: + v = "v%d" % index + print(" const uint%d_t %s =" % (dw.size, v)) + else: + v = "0" + + field_index = 0 + for field in dw.fields: + name = field.name + field.dim + if field.type == "mbo": + s = "__gen_mbo(%d, %d)" % \ + (field.start - dword_start, field.end - dword_start) + elif field.type == "address": + s = None + elif field.type == "uint": + s = "__gen_uint(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == "int": + s = "__gen_sint(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == "bool": + s = "__gen_uint(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == "float": + s = "__gen_float(values->%s)" % name + elif field.type == "offset": + s = "__gen_offset(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == 'ufixed': + s = "__gen_ufixed(values->%s, %d, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start, field.fractional_size) + elif field.type == 'sfixed': + s = "__gen_sfixed(values->%s, %d, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start, field.fractional_size) + elif field.type in self.parser.structs: + s = "__gen_uint(v%d_%d, %d, %d)" % \ + (index, field_index, field.start - dword_start, field.end - dword_start) + field_index = field_index + 1 + else: + print("/* unhandled field %s, type %s */\n" % (name, field.type)) + s = None + + if not s == None: + if field == dw.fields[-1]: + print(" %s;" % s) + else: + print(" %s |" % s) + + if dw.size == 32: + if dw.address: + print(" dw[%d] = __gen_combine_address(data, &dw[%d], values->%s, %s);" % (index, index, dw.address.name, v)) + continue + + if dw.address: + v_address = "v%d_address" % index + print(" const uint64_t %s =\n __gen_combine_address(data, &dw[%d], values->%s, %s);" % + (v_address, index, dw.address.name, v)) + v = v_address + + print(" dw[%d] = %s;" % (index, v)) + print(" dw[%d] = %s >> 32;" % (index + 1, v)) + +class Value: + def __init__(self, attrs): + self.name = safe_name(attrs["name"]) + self.value = int(attrs["value"]) + +class Parser: + def __init__(self): + self.parser = xml.parsers.expat.ParserCreate() + self.parser.StartElementHandler = self.start_element + self.parser.EndElementHandler = self.end_element + + self.instruction = None + self.structs = {} + + def start_element(self, name, attrs): + if name == "genxml": + self.platform = attrs["name"] + self.gen = attrs["gen"].replace('.', '') + print(pack_header % {'license': license, 'platform': self.platform}) + elif name == "instruction": + self.group = Group(self, None, 0, 1, 0) + self.instruction = safe_name(attrs["name"]) + self.length_bias = int(attrs["bias"]) + if "length" in attrs: + self.length = int(attrs["length"]) + else: + self.length = None + elif name == "struct": + self.group = Group(self, None, 0, 1, 0) + self.struct = safe_name(attrs["name"]) + self.structs[attrs["name"]] = 1 + if "length" in attrs: + self.length = int(attrs["length"]) + else: + self.length = None + elif name == "group": + group = Group(self, self.group, + int(attrs["start"]), int(attrs["count"]), int(attrs["size"])) + self.group.fields.append(group) + self.group = group + elif name == "field": + self.group.fields.append(Field(self, attrs)) + self.values = [] + elif name == "enum": + self.values = [] + self.enum = safe_name(attrs["name"]) + if "prefix" in attrs: + self.prefix = safe_name(attrs["prefix"]) + else: + self.prefix= None + elif name == "value": + self.values.append(Value(attrs)) + + def end_element(self, name): + if name == "instruction": + self.emit_instruction() + self.instruction = None + self.group = None + elif name == "struct": + self.emit_struct() + self.struct = None + self.group = None + elif name == "group": + self.group = self.group.parent + elif name == "field": + self.group.fields[-1].values = self.values + elif name == "enum": + self.emit_enum() + self.enum = None + + def gen_prefix(self, name): + if name[0] == "_": + return 'GEN%s%s' % (self.gen, name) + else: + return 'GEN%s_%s' % (self.gen, name) + + def emit_template_struct(self, name, group): + print("struct %s {" % self.gen_prefix(name)) + group.emit_template_struct("") + print("};\n") + + def emit_pack_function(self, name, group): + name = self.gen_prefix(name) + print("static inline void\n%s_pack(__gen_user_data *data, void * restrict dst,\n%sconst struct %s * restrict values)\n{" % + (name, ' ' * (len(name) + 6), name)) + + # Cast dst to make header C++ friendly + print(" uint32_t * restrict dw = (uint32_t * restrict) dst;") + + group.emit_pack_function(0) + + print("}\n") + + def emit_instruction(self): + name = self.instruction + if not self.length == None: + print('#define %-33s %4d' % + (self.gen_prefix(name + "_length"), self.length)) + print('#define %-33s %4d' % + (self.gen_prefix(name + "_length_bias"), self.length_bias)) + + default_fields = [] + for field in self.group.fields: + if not type(field) is Field: + continue + if field.default == None: + continue + default_fields.append(" .%-35s = %4d" % (field.name, field.default)) + + if default_fields: + print('#define %-40s\\' % (self.gen_prefix(name + '_header'))) + print(", \\\n".join(default_fields)) + print('') + + self.emit_template_struct(self.instruction, self.group) + + self.emit_pack_function(self.instruction, self.group) + + def emit_struct(self): + name = self.struct + if not self.length == None: + print('#define %-33s %4d' % + (self.gen_prefix(name + "_length"), self.length)) + + self.emit_template_struct(self.struct, self.group) + self.emit_pack_function(self.struct, self.group) + + def emit_enum(self): + print('/* enum %s */' % self.gen_prefix(self.enum)) + for value in self.values: + if self.prefix: + name = self.prefix + "_" + value.name + else: + name = value.name + print('#define %-36s %4d' % (name.upper(), value.value)) + print('') + + def parse(self, filename): + file = open(filename, "rb") + self.parser.ParseFile(file) + file.close() + +if len(sys.argv) < 2: + print("No input xml file specified") + sys.exit(1) + +input_file = sys.argv[1] + +p = Parser() +p.parse(input_file) -- cgit v1.2.3 From 8179834030c85722824a2a4863e4d5c2d75f07eb Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Wed, 10 Feb 2016 13:41:27 -0800 Subject: nir/spirv: fix build_mat_subdet stack smasher The sub-determinate implementation pattern fixed by 6a7e2904e0a2a6f8efbf739a1b3cad7e1e4ab42d has a second instance in the same file. With the previous algorithm, when row and j are both 3, the index overruns the array. This only impacts the stack on 32 bit builds. Reviewed-by: Jason Ekstrand --- src/compiler/nir/spirv/vtn_glsl450.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c index 4fceffa37a6..5fb73df0566 100644 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@ -121,8 +121,11 @@ build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, } else { /* Swizzle to get all but the specified row */ unsigned swiz[3]; - for (unsigned j = 0; j < 4; j++) - swiz[j - (j > row)] = j; + for (unsigned j = 0, k = 0; j < 3; j++, k++) { + if (k == row) + k++; /* skip column */ + swiz[j] = k; + } /* Grab all but the specified column */ nir_ssa_def *subcol[3]; -- cgit v1.2.3 From ac04c6de2c3abd2def23c43ede2458e4a61f7f6d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Feb 2016 15:34:35 -0800 Subject: nir/spirv: Assert that struct member ids are in-bounds --- src/compiler/nir/spirv/spirv_to_nir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index 979357d67c8..544251f5a1f 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -379,6 +379,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, } struct member_decoration_ctx { + unsigned num_fields; struct glsl_struct_field *fields; struct vtn_type *type; }; @@ -453,6 +454,8 @@ struct_member_decoration_cb(struct vtn_builder *b, if (member < 0) return; + assert(member < ctx->num_fields); + switch (dec->decoration) { case SpvDecorationRelaxedPrecision: break; /* FIXME: Do nothing with this for now. */ @@ -671,6 +674,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, } struct member_decoration_ctx ctx = { + .num_fields = num_fields, .fields = fields, .type = val->type }; -- cgit v1.2.3 From 9be5a4bc29bb911728bc82718571d771c74602ab Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Feb 2016 15:35:34 -0800 Subject: nir/spirv: Fix handling of OpGroupMemberDecorate We were pulling the member index from the wrong dword --- src/compiler/nir/spirv/spirv_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index 544251f5a1f..6848c0207c8 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -363,7 +363,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, if (opcode == SpvOpGroupDecorate) { dec->scope = VTN_DEC_DECORATION; } else { - dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); } /* Link into the list */ -- cgit v1.2.3 From 9cc939d82f2de83133010545af0c2f519a13d064 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Feb 2016 15:49:25 -0800 Subject: nir: Fix out-of-tree build for spirv2nir This needs to be able to find the generated nir_opcodes.h header. --- src/compiler/Makefile.am | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am index fe96cb3c879..8f374484696 100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@ -179,6 +179,11 @@ glsl_glsl_test_LDADD = \ spirv2nir_SOURCES = \ nir/spirv2nir.c +spirv2nir_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir + spirv2nir_LDADD = \ nir/libnir.la \ $(top_builddir)/src/util/libmesautil.la \ -- cgit v1.2.3 From 09bb7ea4b735dc2f7a64a3bacdc79eb95728d78f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Feb 2016 15:52:29 -0800 Subject: anv: Fix out-of-tree build We need to be able to find the generated nir_opcodes.h header. --- src/vulkan/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 36bea427c66..a3b05faeae8 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -64,6 +64,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/isl/ \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ + -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/vulkan libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From d2623a324713134b4da6e08775aed868300e96cb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Feb 2016 16:35:28 -0800 Subject: anv: Handle dwords that are all MBZ correctly A few packets have dwords in them that are all MBZ and we failed to write those. This change makes sure we iterate through all dwords and write them all. --- src/vulkan/gen_pack_header.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen_pack_header.py b/src/vulkan/gen_pack_header.py index 58da0184faf..8ed74581063 100755 --- a/src/vulkan/gen_pack_header.py +++ b/src/vulkan/gen_pack_header.py @@ -329,11 +329,27 @@ class Group: dwords = {} self.collect_dwords(dwords, 0, "") - for index, dw in dwords.items(): + # Determine number of dwords in this group. If we have a size, use + # that, since that'll account for MBZ dwords at the end of a group + # (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword + # index we've seen plus one. + if self.size > 0: + length = self.size // 32 + else: + length = max(dwords.keys()) + 1 + + for index in range(length): + # Handle MBZ dwords + if not index in dwords: + print("") + print(" dw[%d] = 0;" % index) + continue + # For 64 bit dwords, we aliased the two dword entries in the dword # dict it occupies. Now that we're emitting the pack function, # skip the duplicate entries. - if index > 0 and index - 1 in dwords and dwords[index - 1] == dwords[index]: + dw = dwords[index] + if index > 0 and index - 1 in dwords and dw == dwords[index - 1]: continue # Special case: only one field and it's a struct at the beginning @@ -453,21 +469,26 @@ class Parser: self.gen = attrs["gen"].replace('.', '') print(pack_header % {'license': license, 'platform': self.platform}) elif name == "instruction": - self.group = Group(self, None, 0, 1, 0) self.instruction = safe_name(attrs["name"]) self.length_bias = int(attrs["bias"]) if "length" in attrs: self.length = int(attrs["length"]) + size = self.length * 32 else: self.length = None + size = 0 + self.group = Group(self, None, 0, 1, size) elif name == "struct": - self.group = Group(self, None, 0, 1, 0) self.struct = safe_name(attrs["name"]) self.structs[attrs["name"]] = 1 if "length" in attrs: self.length = int(attrs["length"]) + size = self.length * 32 else: self.length = None + size = 0 + self.group = Group(self, None, 0, 1, size) + elif name == "group": group = Group(self, self.group, int(attrs["start"]), int(attrs["count"]), int(attrs["size"])) -- cgit v1.2.3 From 2009e304f7c0bdf5bf01b8dd60dddc2f8bb25f18 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Feb 2016 21:42:56 -0800 Subject: anv/pack: Handle case where a struct field covers multiple dwords We also didn't add start to field.end to get the absolute field end position. --- src/vulkan/gen_pack_header.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen_pack_header.py b/src/vulkan/gen_pack_header.py index 8ed74581063..fa2eed7a9ab 100755 --- a/src/vulkan/gen_pack_header.py +++ b/src/vulkan/gen_pack_header.py @@ -317,13 +317,16 @@ class Group: # assert dwords[index].address == None dwords[index].address = field - # Does this field extend into the next dword? - if index < field.end // 32 and dwords[index].size == 32: - if index + 1 in dwords: - assert dwords[index + 1].size == 32 + # Coalesce all the dwords covered by this field. The two cases we + # handle are where multiple fields are in a 64 bit word (typically + # and address and a few bits) or where a single struct field + # completely covers multiple dwords. + while index < (start + field.end) // 32: + if index + 1 in dwords and not dwords[index] == dwords[index + 1]: dwords[index].fields.extend(dwords[index + 1].fields) dwords[index].size = 64 dwords[index + 1] = dwords[index] + index = index + 1 def emit_pack_function(self, start): dwords = {} -- cgit v1.2.3 From da4a6bbbeada8e4c9165f27961c54a28c7d78915 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Feb 2016 15:11:38 -0800 Subject: gen8/pipeline: Pull gs_vertex_count from prog_data --- src/vulkan/anv_pipeline.c | 1 - src/vulkan/anv_private.h | 1 - src/vulkan/gen8_pipeline.c | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 4e75557f48d..1b70b18fba1 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -635,7 +635,6 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, /* TODO: SIMD8 GS */ pipeline->gs_kernel = anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); - pipeline->gs_vertex_count = nir->info.gs.vertices_in; ralloc_free(mem_ctx); diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index ad34772b081..29d3c82b9b9 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1379,7 +1379,6 @@ struct anv_pipeline { uint32_t ps_grf_start0; uint32_t ps_grf_start2; uint32_t gs_kernel; - uint32_t gs_vertex_count; uint32_t cs_simd; uint32_t vb_used; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index e31966f7d85..e796ab3d51d 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -322,7 +322,7 @@ genX(graphics_pipeline_create)( .VectorMaskEnable = false, .SamplerCount = 0, .BindingTableEntryCount = 0, - .ExpectedVertexCount = pipeline->gs_vertex_count, + .ExpectedVertexCount = gs_prog_data->vertices_in, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), -- cgit v1.2.3 From 4016619931f2ffbb83ca65d83c3a60c6ced886f1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Feb 2016 14:27:15 -0800 Subject: nir/spirv: Allow the clip distance capability. --- src/compiler/nir/spirv/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index e6255e8a68a..b5e7f536e2e 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -2153,6 +2153,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvCapabilityMatrix: case SpvCapabilityShader: case SpvCapabilityGeometry: + case SpvCapabilityClipDistance: break; default: assert(!"Unsupported capability"); -- cgit v1.2.3 From 3086c5a5e10e6faef0de3e62ba508fef82239df3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Feb 2016 18:07:55 -0800 Subject: gen8/pipeline: Properly set bits in PS_EXTRA for W, depth, and samaple mask --- src/vulkan/gen7_pipeline.c | 3 +++ src/vulkan/gen8_pipeline.c | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index d4d6131794a..ec8db1d1956 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -387,6 +387,9 @@ genX(graphics_pipeline_create)( .EarlyDepthStencilControl = EDSC_NORMAL, .PointRasterizationRule = RASTRULE_UPPER_RIGHT, .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, + .PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask, .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); } diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index e796ab3d51d..c41d6ffa82e 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -547,9 +547,15 @@ genX(graphics_pipeline_create)( .AttributeEnable = wm_prog_data->num_varying_inputs > 0, .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, .PixelShaderIsPerSample = per_sample_ps, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, #if ANV_GEN >= 9 .PixelShaderPullsBary = wm_prog_data->pulls_bary, - .InputCoverageMaskState = ICMS_NONE + .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? + ICMS_INNER_CONSERVATIVE : ICMS_NONE, +#else + .PixelShaderUsesInputCoverageMask = + wm_prog_data->uses_sample_mask, #endif ); } -- cgit v1.2.3 From 25b09d1b5d74a946700f0956799cf00db0edae1a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Feb 2016 18:57:37 -0800 Subject: anv/event: Use a 64-bit value The immediate write from PIPE_CONTROL is 64-bits at least on BDW. This used to work on 64-bit archs because the compiler would align the following anv_state struct up for us. However, in 32-bit builds, they overlap and it causes problems. --- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_private.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 6f874b2d1ab..21184c4a201 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1534,7 +1534,7 @@ VkResult anv_CreateEvent( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); state = anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(*event), 4); + sizeof(*event), 8); event = state.map; event->state = state; event->semaphore = VK_EVENT_RESET; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 29d3c82b9b9..6da714ad8b2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1306,7 +1306,7 @@ struct anv_fence { }; struct anv_event { - uint32_t semaphore; + uint64_t semaphore; struct anv_state state; }; -- cgit v1.2.3 From 3a2b23a447d612f6e339ccee6fc853108ff9c7bd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Feb 2016 21:18:02 -0800 Subject: anv: Add a vk_icdGetInstanceProcAddr entrypoint Aparently there are some issues in symbol resolution if an application packages its own loader and you have a system-installed one. I don't really understand the details, but it's not onorous to add. --- src/vulkan/anv_device.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 21184c4a201..d313d570e29 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -611,6 +611,20 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr( return anv_lookup_entrypoint(pName); } +/* The loader wants us to expose a second GetInstanceProcAddr function + * to work around certain LD_PRELOAD issues seen in apps. + */ +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_GetInstanceProcAddr(instance, pName); +} + PFN_vkVoidFunction anv_GetDeviceProcAddr( VkDevice device, const char* pName) -- cgit v1.2.3 From ea93041ccce6f4c460b71c48c1cb8589bf0bbe7c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Feb 2016 18:41:04 -0800 Subject: anv/device: Use a normal BO in submit_simple_batch --- src/vulkan/anv_device.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index d313d570e29..0ff5c9f7aa9 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -691,10 +691,9 @@ VkResult anv_device_submit_simple_batch(struct anv_device *device, struct anv_batch *batch) { - struct anv_state state; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo *bo = NULL; + struct anv_bo bo; VkResult result = VK_SUCCESS; uint32_t size; int64_t timeout; @@ -702,22 +701,25 @@ anv_device_submit_simple_batch(struct anv_device *device, /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); - state = anv_state_pool_alloc(&device->dynamic_state_pool, MAX(size, 64), 32); - bo = &device->dynamic_state_pool.block_pool->bo; - memcpy(state.map, batch->start, size); + assert(size < device->batch_bo_pool.bo_size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); + if (result != VK_SUCCESS) + return result; + + memcpy(bo.map, batch->start, size); - exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].handle = bo.gem_handle; exec2_objects[0].relocation_count = 0; exec2_objects[0].relocs_ptr = 0; exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo->offset; + exec2_objects[0].offset = bo.offset; exec2_objects[0].flags = 0; exec2_objects[0].rsvd1 = 0; exec2_objects[0].rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t) exec2_objects; execbuf.buffer_count = 1; - execbuf.batch_start_offset = state.offset; + execbuf.batch_start_offset = 0; execbuf.batch_len = size; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; @@ -737,7 +739,7 @@ anv_device_submit_simple_batch(struct anv_device *device, } timeout = INT64_MAX; - ret = anv_gem_wait(device, bo->gem_handle, &timeout); + ret = anv_gem_wait(device, bo.gem_handle, &timeout); if (ret != 0) { /* We don't know the real error. */ result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); @@ -745,7 +747,7 @@ anv_device_submit_simple_batch(struct anv_device *device, } fail: - anv_state_pool_free(&device->dynamic_state_pool, state); + anv_bo_pool_free(&device->batch_bo_pool, &bo); return result; } -- cgit v1.2.3 From cf7fd538509dfdab7696fc8cebc08a6f57548011 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 10 Feb 2016 13:36:56 -0800 Subject: anv/meta: Hardcode smooth texcoord interpolation in blit shaders Trivial cleanup. No change in behavior. Function argument 'attr_flat', in anv_meta.c:build_nir_vertex_shader(), was always false. --- src/vulkan/anv_meta.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 46227c3407e..9b48b7f5dbb 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -34,7 +34,7 @@ struct anv_render_pass anv_meta_dummy_renderpass = {0}; static nir_shader * -build_nir_vertex_shader(bool attr_flat) +build_nir_vertex_shader(void) { nir_builder b; @@ -61,8 +61,7 @@ build_nir_vertex_shader(bool attr_flat) nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, attr_type, "v_attr"); attr_out->data.location = VARYING_SLOT_VAR0; - attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : - INTERP_QUALIFIER_SMOOTH; + attr_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; nir_copy_var(&b, attr_out, attr_in); return b.shader; @@ -239,7 +238,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) * does not dead-code our inputs. */ struct anv_shader_module vs = { - .nir = build_nir_vertex_shader(false), + .nir = build_nir_vertex_shader(), }; struct anv_shader_module fs_1d = { -- cgit v1.2.3 From 37f4dfb19dadf5a2643ab3c78e09908cfd440504 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 12 Feb 2016 09:55:32 -0800 Subject: anv/meta: Move blit code to anv_meta_blit.c The clear code lived in anv_meta_clear.c. The resolve code in anv_meta_resolve.c. Only the blit code lived in anv_meta.c, alongside the shareed meta code. This is just a copy-paste patch. No change in behavior. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_meta.c | 1429 +------------------------------------------ src/vulkan/anv_meta.h | 3 + src/vulkan/anv_meta_blit.c | 1448 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1453 insertions(+), 1428 deletions(-) create mode 100644 src/vulkan/anv_meta_blit.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index a3b05faeae8..55e40283e4f 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -82,6 +82,7 @@ VULKAN_SOURCES = \ anv_image.c \ anv_intel.c \ anv_meta.c \ + anv_meta_blit.c \ anv_meta_clear.c \ anv_meta_resolve.c \ anv_nir_apply_dynamic_offsets.c \ diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c index 9b48b7f5dbb..82944ea1a92 100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@ -21,104 +21,10 @@ * IN THE SOFTWARE. */ -#include -#include -#include -#include -#include - #include "anv_meta.h" -#include "anv_private.h" -#include "nir/nir_builder.h" struct anv_render_pass anv_meta_dummy_renderpass = {0}; -static nir_shader * -build_nir_vertex_shader(void) -{ - nir_builder b; - - const struct glsl_type *vertex_type = glsl_vec4_type(); - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); - - nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vertex_type, "a_pos"); - pos_in->data.location = VERT_ATTRIB_GENERIC0; - nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vertex_type, "gl_Position"); - pos_out->data.location = VARYING_SLOT_POS; - nir_copy_var(&b, pos_out, pos_in); - - /* Add one more pass-through attribute. For clear shaders, this is used - * to store the color and for blit shaders it's the texture coordinate. - */ - const struct glsl_type *attr_type = glsl_vec4_type(); - nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, - attr_type, "a_attr"); - attr_in->data.location = VERT_ATTRIB_GENERIC1; - nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, - attr_type, "v_attr"); - attr_out->data.location = VARYING_SLOT_VAR0; - attr_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; - nir_copy_var(&b, attr_out, attr_in); - - return b.shader; -} - -static nir_shader * -build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) -{ - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); - - const struct glsl_type *color_type = glsl_vec4_type(); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - glsl_vec4_type(), "v_attr"); - tex_pos_in->data.location = VARYING_SLOT_VAR0; - - /* Swizzle the array index which comes in as Z coordinate into the right - * position. - */ - unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; - nir_ssa_def *const tex_pos = - nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, - (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); - - const struct glsl_type *sampler_type = - glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, - glsl_get_base_type(color_type)); - nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, - sampler_type, "s_tex"); - sampler->data.descriptor_set = 0; - sampler->data.binding = 0; - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); - tex->sampler_dim = tex_dim; - tex->op = nir_texop_tex; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(tex_pos); - tex->dest_type = nir_type_float; /* TODO */ - tex->is_array = glsl_sampler_type_is_array(sampler_type); - tex->coord_components = tex_pos->num_components; - tex->texture = nir_deref_var_create(tex, sampler); - tex->sampler = nir_deref_var_create(tex, sampler); - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, - color_type, "f_color"); - color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 4); - - return b.shader; -} - void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, @@ -194,1320 +100,6 @@ anv_meta_get_iview_layer(const struct anv_image *dest_image, } } -static VkResult -anv_device_init_meta_blit_state(struct anv_device *device) -{ - VkResult result; - - result = anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); - if (result != VK_SUCCESS) - goto fail; - - /* We don't use a vertex shader for clearing, but instead build and pass - * the VUEs directly to the rasterization backend. However, we do need - * to provide GLSL source for the vertex shader so that the compiler - * does not dead-code our inputs. - */ - struct anv_shader_module vs = { - .nir = build_nir_vertex_shader(), - }; - - struct anv_shader_module fs_1d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), - }; - - struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), - }; - - struct anv_shader_module fs_3d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), - }; - - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - { - .binding = 1, - .stride = 5 * sizeof(float), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offset = 8 - } - } - }; - - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, - &device->meta_state.alloc, - &device->meta_state.blit.ds_layout); - if (result != VK_SUCCESS) - goto fail_render_pass; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); - if (result != VK_SUCCESS) - goto fail_descriptor_set_layout; - - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = anv_shader_module_to_handle(&vs), - .pName = "main", - .pSpecializationInfo = NULL - }, { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ - .pName = "main", - .pSpecializationInfo = NULL - }, - }; - - const VkGraphicsPipelineCreateInfo vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = ARRAY_SIZE(pipeline_shader_stages), - .pStages = pipeline_shader_stages, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .colorWriteMask = - VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT }, - } - }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - .renderPass = device->meta_state.blit.render_pass, - .subpass = 0, - }; - - const struct anv_graphics_pipeline_create_info anv_pipeline_info = { - .color_attachment_count = -1, - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_1d; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_2d; - - ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); - ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); - - return VK_SUCCESS; - - fail_pipeline_2d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - - fail_pipeline_1d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - - fail_pipeline_layout: - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, - &device->meta_state.alloc); - fail_descriptor_set_layout: - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, - &device->meta_state.alloc); - fail_render_pass: - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, - &device->meta_state.alloc); - - ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); - ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); - fail: - return result; -} - -static void -meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *saved_state) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); -} - -struct blit_region { - VkOffset3D src_offset; - VkExtent3D src_extent; - VkOffset3D dest_offset; - VkExtent3D dest_extent; -}; - -/* Returns the user-provided VkBufferImageCopy::imageOffset in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkOffset3D -meta_region_offset_el(const struct anv_image * image, - const struct VkOffset3D * offset) -{ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - return (VkOffset3D) { - .x = offset->x / isl_layout->bw, - .y = offset->y / isl_layout->bh, - .z = offset->z / isl_layout->bd, - }; -} - -/* Returns the user-provided VkBufferImageCopy::imageExtent in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkExtent3D -meta_region_extent_el(const VkFormat format, - const struct VkExtent3D * extent) -{ - const struct isl_format_layout * isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { - .width = DIV_ROUND_UP(extent->width , isl_layout->bw), - .height = DIV_ROUND_UP(extent->height, isl_layout->bh), - .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; -} - -static void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - VkExtent3D src_extent, - struct anv_image *dest_image, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter) -{ - struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; - - struct blit_vb_data { - float pos[2]; - float tex_coord[3]; - } *vb_data; - - assert(src_image->samples == dest_image->samples); - - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct anv_vue_header)); - vb_data = vb_state.map + sizeof(struct anv_vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + dest_extent.width, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)src_offset.y / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - anv_state_clflush(vb_state); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = vb_state.offset, - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct anv_vue_header), - }); - - VkSampler sampler; - ANV_CALL(CreateSampler)(anv_device_to_handle(device), - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = blit_filter, - .minFilter = blit_filter, - }, &cmd_buffer->pool->alloc, &sampler); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(anv_device_to_handle(device), - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout - }, &set); - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = sampler, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), - }, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit.render_pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { dest_extent.width, dest_extent.height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, VK_SUBPASS_CONTENTS_INLINE); - - VkPipeline pipeline; - - switch (src_image->type) { - case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.pipeline_1d_src; - break; - case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.pipeline_2d_src; - break; - case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.pipeline_3d_src; - break; - default: - unreachable(!"bad VkImageType"); - } - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, 0, 1, - &set, 0, NULL); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); - anv_DestroySampler(anv_device_to_handle(device), sampler, - &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb, - &cmd_buffer->pool->alloc); -} - -static void -meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, - const struct anv_meta_saved_state *saved_state) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static VkFormat -vk_format_for_size(int bs) -{ - /* Note: We intentionally use the 4-channel formats whenever we can. - * This is so that, when we do a RGB <-> RGBX copy, the two formats will - * line up even though one of them is 3/4 the size of the other. - */ - switch (bs) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UINT; - case 4: return VK_FORMAT_R8G8B8A8_UINT; - case 6: return VK_FORMAT_R16G16B16_UINT; - case 8: return VK_FORMAT_R16G16B16A16_UINT; - case 12: return VK_FORMAT_R32G32B32_UINT; - case 16: return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format block size"); - } -} - -static void -do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat copy_format) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = 0, - .flags = 0, - }; - - VkImage src_image; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &src_image); - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src; - anv_image_from_handle(src_image)->offset = src_offset; - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - anv_image_from_handle(dest_image), - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); -} - -void anv_CmdCopyBuffer( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - - struct anv_meta_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; - uint64_t copy_size = pRegions[r].size; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - - int fs = ffs(src_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(src_offset % bs == 0); - - fs = ffs(dest_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(dest_offset % bs == 0); - - fs = ffs(pRegions[r].size) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(pRegions[r].size % bs == 0); - - VkFormat copy_format = vk_format_for_size(bs); - - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, copy_format); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dest_offset += max_copy_size; - } - - uint64_t height = copy_size / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * bs; - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, height, copy_format); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dest_offset += rect_copy_size; - } - - if (copy_size != 0) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - copy_size / bs, 1, copy_format); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - /* We can't quite grab a full block because the state stream needs a - * little data at the top to build its linked list. - */ - const uint32_t max_update_size = - cmd_buffer->device->dynamic_state_block_pool.block_size - 64; - - assert(max_update_size < (1 << 14) * 4); - - while (dataSize) { - const uint32_t copy_size = MIN2(dataSize, max_update_size); - - struct anv_state tmp_data = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); - - memcpy(tmp_data.map, pData, copy_size); - - VkFormat format; - int bs; - if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; - bs = 16; - } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - format = VK_FORMAT_R32G32_UINT; - bs = 8; - } else { - assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; - bs = 4; - } - - do_buffer_copy(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, - tmp_data.offset, - dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, format); - - dataSize -= copy_size; - dstOffset += copy_size; - pData = (void *)pData + copy_size; - } -} - -static VkFormat -choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - struct isl_surf *surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - - /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT - * formats for the source and destination image views. - * - * From the Vulkan spec (2015-12-30): - * - * vkCmdCopyImage performs image copies in a similar manner to a host - * memcpy. It does not perform general-purpose conversions such as - * scaling, resizing, blending, color-space conversion, or format - * conversions. Rather, it simply copies raw image data. vkCmdCopyImage - * can copy between images with different formats, provided the formats - * are compatible as defined below. - * - * [The spec later defines compatibility as having the same number of - * bytes per block]. - */ - return vk_format_for_size(isl_format_layouts[surf->format].bs); -} - -static VkFormat -choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - /* vkCmdCopy* commands behave like memcpy. Therefore we choose - * compatable UINT formats for the source and destination image views. - * - * For the buffer, we go back to the original image format and get a - * the format as if it were linear. This way, for RGB formats, we get - * an RGB format here even if the tiled image is RGBA. XXX: This doesn't - * work if the buffer is the destination. - */ - enum isl_format linear_format = anv_get_isl_format(format, aspect, - VK_IMAGE_TILING_LINEAR, - NULL); - - return vk_format_for_size(isl_format_layouts[linear_format].bs); -} - -void anv_CmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdCopyImage can be used to copy image data between multisample - * images, but both images must have the same number of samples. - */ - assert(src_image->samples == dest_image->samples); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - assert(pRegions[r].srcSubresource.aspectMask == - pRegions[r].dstSubresource.aspectMask); - - VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - - VkFormat src_format = choose_iview_format(src_image, aspect); - VkFormat dst_format = choose_iview_format(dest_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = pRegions[r].dstSubresource.layerCount, - }, - }, - cmd_buffer, 0); - - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffset.x, - .y = pRegions[r].dstOffset.y, - .z = 0, - }; - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].srcSubresource.layerCount == 1 && - pRegions[r].dstSubresource.layerCount == 1); - num_slices = pRegions[r].extent.depth; - } else { - assert(pRegions[r].srcSubresource.layerCount == - pRegions[r].dstSubresource.layerCount); - assert(pRegions[r].extent.depth == 1); - num_slices = pRegions[r].dstSubresource.layerCount; - } - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dst_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + slice, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - src_offset, - pRegions[r].extent, - dest_image, &dest_iview, - dest_offset, - pRegions[r].extent, - VK_FILTER_NEAREST); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdBlitImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkFilter filter) - -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdBlitImage must not be used for multisampled source or - * destination images. Use vkCmdResolveImage for this purpose. - */ - assert(src_image->samples == 1); - assert(dest_image->samples == 1); - - anv_finishme("respect VkFilter"); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = pRegions[r].srcSubresource.aspectMask, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffsets[0].x, - .y = pRegions[r].dstOffsets[0].y, - .z = 0, - }; - - if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || - pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || - pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || - pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) - anv_finishme("FINISHME: Allow flipping in blits"); - - const VkExtent3D dest_extent = { - .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, - .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, - }; - - const VkExtent3D src_extent = { - .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, - .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, - }; - - const uint32_t dest_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffsets[0]); - - if (pRegions[r].srcSubresource.layerCount > 1) - anv_finishme("FINISHME: copy multiple array layers"); - - if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || - pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) - anv_finishme("FINISHME: copy multiple depth layers"); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_array_slice, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - pRegions[r].srcOffsets[0], src_extent, - dest_image, &dest_iview, - dest_offset, dest_extent, - filter); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -static struct anv_image * -make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, - VkImageUsageFlags usage, - VkImageType image_type, - const VkAllocationCallbacks *alloc, - const VkBufferImageCopy *copy) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); - - VkExtent3D extent = copy->imageExtent; - if (copy->bufferRowLength) - extent.width = copy->bufferRowLength; - if (copy->bufferImageHeight) - extent.height = copy->bufferImageHeight; - extent.depth = 1; - extent = meta_region_extent_el(format, &extent); - - VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; - VkFormat buffer_format = choose_buffer_format(format, aspect); - - VkImage vk_image; - VkResult result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = buffer_format, - .extent = extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = usage, - .flags = 0, - }, alloc, &vk_image); - assert(result == VK_SUCCESS); - - ANV_FROM_HANDLE(anv_image, image, vk_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - image->bo = buffer->bo; - image->offset = buffer->offset + copy->bufferOffset; - - return image; -} - -void anv_CmdCopyBufferToImage( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(dest_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(dest_image, aspect); - - struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, - VK_IMAGE_USAGE_SAMPLED_BIT, - dest_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, - &pRegions[r].imageOffset); - - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = anv_meta_get_view_type(dest_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o); - - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].imageExtent); - - meta_emit_blit(cmd_buffer, - src_image, - &src_iview, - (VkOffset3D){0, 0, 0}, - img_extent_el, - dest_image, - &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - src_image->offset += src_image->extent.width * - src_image->extent.height * - src_image->format->isl_layout->bs; - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(src_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImageToBuffer( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - - /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(src_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(src_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, - .layerCount = pRegions[r].imageSubresource.layerCount, - }, - }, - cmd_buffer, 0); - - struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - src_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.layerCount == 1); - num_slices = pRegions[r].imageExtent.depth; - } else { - assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.layerCount; - } - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].imageOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - src_offset, - pRegions[r].imageExtent, - dest_image, - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - dest_image->offset += dest_image->extent.width * - dest_image->extent.height * - src_image->format->isl_layout->bs; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - static void * meta_alloc(void* _device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) @@ -1573,24 +165,5 @@ anv_device_finish_meta(struct anv_device *device) { anv_device_finish_meta_resolve_state(device); anv_device_finish_meta_clear_state(device); - - /* Blit */ - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src, - &device->meta_state.alloc); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, - &device->meta_state.alloc); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, - &device->meta_state.alloc); + anv_device_finish_meta_blit_state(device); } diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h index 91c3c7d21ca..d33e9e6d8ba 100644 --- a/src/vulkan/anv_meta.h +++ b/src/vulkan/anv_meta.h @@ -50,6 +50,9 @@ void anv_device_finish_meta_clear_state(struct anv_device *device); VkResult anv_device_init_meta_resolve_state(struct anv_device *device); void anv_device_finish_meta_resolve_state(struct anv_device *device); +VkResult anv_device_init_meta_blit_state(struct anv_device *device); +void anv_device_finish_meta_blit_state(struct anv_device *device); + void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, diff --git a/src/vulkan/anv_meta_blit.c b/src/vulkan/anv_meta_blit.c new file mode 100644 index 00000000000..07d99792cf1 --- /dev/null +++ b/src/vulkan/anv_meta_blit.c @@ -0,0 +1,1448 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "nir/nir_builder.h" + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static nir_shader * +build_nir_vertex_shader(void) +{ + nir_builder b; + + const struct glsl_type *vertex_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vertex_type, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vertex_type, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + /* Add one more pass-through attribute. For clear shaders, this is used + * to store the color and for blit shaders it's the texture coordinate. + */ + const struct glsl_type *attr_type = glsl_vec4_type(); + nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, + attr_type, "a_attr"); + attr_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, + attr_type, "v_attr"); + attr_out->data.location = VARYING_SLOT_VAR0; + attr_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, attr_out, attr_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + glsl_vec4_type(), "v_attr"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(color_type)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + color_type, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *saved_state) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *saved_state) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkFormat +vk_format_for_size(int bs) +{ + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + VkFormat copy_format = vk_format_for_size(bs); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } +} + +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(format, aspect, + VK_IMAGE_TILING_LINEAR, + NULL); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = pRegions[r].dstSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, + .z = 0, + }; + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].dstSubresource.layerCount; + } + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_FILTER_NEAREST); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + + anv_finishme("respect VkFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = 0, + }; + + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + + const uint32_t dest_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); + + if (pRegions[r].srcSubresource.layerCount > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_array_slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffsets[0], src_extent, + dest_image, &dest_iview, + dest_offset, dest_extent, + filter); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static struct anv_image * +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + VkImageType image_type, + const VkAllocationCallbacks *alloc, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = buffer_format, + .extent = extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, alloc, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return image; +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(dest_image, aspect); + + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->isl_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = anv_meta_get_view_type(dest_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, + .layerCount = 1 + }, + }, + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + (VkOffset3D){0, 0, 0}, + img_extent_el, + dest_image, + &dest_iview, + dest_offset_el, + img_extent_el, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + src_image->offset += src_image->extent.width * + src_image->extent.height * + src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.layerCount == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.layerCount; + } + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * + src_image->format->isl_layout->bs; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void +anv_device_finish_meta_blit_state(struct anv_device *device) +{ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} + +VkResult +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} -- cgit v1.2.3 From 3c8dc1afd1101ff11d942a056a22ae43c0611bb7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 12 Feb 2016 10:40:24 -0800 Subject: nir/spirv/glsl: Clean up the row-skipping swizzle logic a bit --- src/compiler/nir/spirv/vtn_glsl450.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c index 5fb73df0566..6b649fd085b 100644 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@ -68,11 +68,8 @@ build_mat4_det(nir_builder *b, nir_ssa_def **col) nir_ssa_def *subdet[4]; for (unsigned i = 0; i < 4; i++) { unsigned swiz[3]; - for (unsigned j = 0, k = 0; j < 3; j++, k++) { - if (k == i) - k++; /* skip column */ - swiz[j] = k; - } + for (unsigned j = 0; j < 3; j++) + swiz[j] = j + (j >= i); nir_ssa_def *subcol[3]; subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); @@ -121,11 +118,8 @@ build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, } else { /* Swizzle to get all but the specified row */ unsigned swiz[3]; - for (unsigned j = 0, k = 0; j < 3; j++, k++) { - if (k == row) - k++; /* skip column */ - swiz[j] = k; - } + for (unsigned j = 0; j < 3; j++) + swiz[j] = j + (j >= row); /* Grab all but the specified column */ nir_ssa_def *subcol[3]; -- cgit v1.2.3 From 42155abdd7265efd32ea4678fb9698c2ac4ee18a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 12 Feb 2016 11:00:08 -0800 Subject: anv: Add a clfush_range helper function --- src/vulkan/anv_private.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 6da714ad8b2..22df64c5825 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -418,14 +418,11 @@ struct anv_state_stream { #define CACHELINE_SIZE 64 #define CACHELINE_MASK 63 -static void inline -anv_state_clflush(struct anv_state state) +static inline void +anv_clflush_range(void *start, size_t size) { - /* state.map may not be cacheline aligned, so round down the start pointer - * to a cacheline boundary so we flush all pages that contain the state. - */ - void *end = state.map + state.alloc_size; - void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK); + void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK); + void *end = start + size; __builtin_ia32_mfence(); while (p < end) { @@ -434,6 +431,12 @@ anv_state_clflush(struct anv_state state) } } +static void inline +anv_state_clflush(struct anv_state state) +{ + anv_clflush_range(state.map, state.alloc_size); +} + void anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size); void anv_block_pool_finish(struct anv_block_pool *pool); -- cgit v1.2.3 From 699f21216f0d026c883b1cd2b3676fdc6e3668d8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 12 Feb 2016 11:00:42 -0800 Subject: anv/device: clflush simple batches if !LLC --- src/vulkan/anv_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 0ff5c9f7aa9..05303fe1ca6 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -707,6 +707,8 @@ anv_device_submit_simple_batch(struct anv_device *device, return result; memcpy(bo.map, batch->start, size); + if (!device->info.has_llc) + anv_clflush_range(bo.map, size); exec2_objects[0].handle = bo.gem_handle; exec2_objects[0].relocation_count = 0; -- cgit v1.2.3 From 97b5a07378c45ef6f74a9c0f650b5704ea1c0db0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 12 Feb 2016 10:27:58 -0800 Subject: anv/meta_blit: Coalesce glsl_vec4_type vars Just a refactor. No behavior change. Several expressions have the same value: they point to glsl_vec4_type(). Coalesce them into a single variable. --- src/vulkan/anv_meta_blit.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_blit.c b/src/vulkan/anv_meta_blit.c index 07d99792cf1..3a777705ef1 100644 --- a/src/vulkan/anv_meta_blit.c +++ b/src/vulkan/anv_meta_blit.c @@ -34,30 +34,28 @@ struct blit_region { static nir_shader * build_nir_vertex_shader(void) { + const struct glsl_type *vec4 = glsl_vec4_type(); nir_builder b; - const struct glsl_type *vertex_type = glsl_vec4_type(); - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vertex_type, "a_pos"); + vec4, "a_pos"); pos_in->data.location = VERT_ATTRIB_GENERIC0; nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vertex_type, "gl_Position"); + vec4, "gl_Position"); pos_out->data.location = VARYING_SLOT_POS; nir_copy_var(&b, pos_out, pos_in); /* Add one more pass-through attribute. For clear shaders, this is used * to store the color and for blit shaders it's the texture coordinate. */ - const struct glsl_type *attr_type = glsl_vec4_type(); nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, - attr_type, "a_attr"); + vec4, "a_attr"); attr_in->data.location = VERT_ATTRIB_GENERIC1; nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, - attr_type, "v_attr"); + vec4, "v_attr"); attr_out->data.location = VARYING_SLOT_VAR0; attr_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; nir_copy_var(&b, attr_out, attr_in); @@ -68,15 +66,14 @@ build_nir_vertex_shader(void) static nir_shader * build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) { + const struct glsl_type *vec4 = glsl_vec4_type(); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); - const struct glsl_type *color_type = glsl_vec4_type(); - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - glsl_vec4_type(), "v_attr"); + vec4, "v_attr"); tex_pos_in->data.location = VARYING_SLOT_VAR0; /* Swizzle the array index which comes in as Z coordinate into the right @@ -89,7 +86,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) const struct glsl_type *sampler_type = glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, - glsl_get_base_type(color_type)); + glsl_get_base_type(vec4)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); sampler->data.descriptor_set = 0; @@ -110,7 +107,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) nir_builder_instr_insert(&b, &tex->instr); nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, - color_type, "f_color"); + vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; nir_store_var(&b, color_out, &tex->dest.ssa, 4); -- cgit v1.2.3 From 03bea8fda790592685d33743a5c9ce36b2fa91f0 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 12 Feb 2016 11:27:19 -0800 Subject: anv/meta_blit: Remove references to clearing Long ago, the blit code used to handle clearing and blitting. - Fix any comments that refer to clearing. - Rename shader var 'attr' to 'tex_pos'. The name 'attr' is an artifact of the time when the shader was used for blitting as well as clearing. --- src/vulkan/anv_meta_blit.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_blit.c b/src/vulkan/anv_meta_blit.c index 3a777705ef1..07ebcbc06b1 100644 --- a/src/vulkan/anv_meta_blit.c +++ b/src/vulkan/anv_meta_blit.c @@ -48,17 +48,14 @@ build_nir_vertex_shader(void) pos_out->data.location = VARYING_SLOT_POS; nir_copy_var(&b, pos_out, pos_in); - /* Add one more pass-through attribute. For clear shaders, this is used - * to store the color and for blit shaders it's the texture coordinate. - */ - nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "a_attr"); - attr_in->data.location = VERT_ATTRIB_GENERIC1; - nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "v_attr"); - attr_out->data.location = VARYING_SLOT_VAR0; - attr_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; - nir_copy_var(&b, attr_out, attr_in); + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_tex_pos"); + tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_tex_pos"); + tex_pos_out->data.location = VARYING_SLOT_VAR0; + tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, tex_pos_out, tex_pos_in); return b.shader; } @@ -73,7 +70,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "v_attr"); + vec4, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; /* Swizzle the array index which comes in as Z coordinate into the right @@ -1205,7 +1202,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) if (result != VK_SUCCESS) goto fail; - /* We don't use a vertex shader for clearing, but instead build and pass + /* We don't use a vertex shader for blitting, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need * to provide GLSL source for the vertex shader so that the compiler * does not dead-code our inputs. -- cgit v1.2.3 From 39a120aefe43f7b664713b5d0aff13f0a2a19168 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 10 Feb 2016 09:43:03 -0800 Subject: anv: Implement VkPipelineCache We hash the input SPIR-V, specialization constants, entrypoint and the shader key using SHA1 to determine a unique identifier for the combination. A VkPipelineCache is then a hash table mapping these identifiers to the corresponding prog_data and kernel data. --- src/vulkan/Makefile.am | 1 + src/vulkan/anv_device.c | 11 +- src/vulkan/anv_pipeline.c | 348 +++++++++++++++------------------- src/vulkan/anv_pipeline_cache.c | 405 ++++++++++++++++++++++++++++++++++++++++ src/vulkan/anv_private.h | 22 +++ 5 files changed, 589 insertions(+), 198 deletions(-) create mode 100644 src/vulkan/anv_pipeline_cache.c (limited to 'src') diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 55e40283e4f..081f6b67682 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -90,6 +90,7 @@ VULKAN_SOURCES = \ anv_nir_lower_push_constants.c \ anv_pass.c \ anv_pipeline.c \ + anv_pipeline_cache.c \ anv_private.h \ anv_query.c \ anv_util.c \ diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 05303fe1ca6..5e9c42b2fdc 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -388,6 +388,13 @@ void anv_GetPhysicalDeviceFeatures( }; } +void +anv_device_get_cache_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); +} + void anv_GetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) @@ -526,8 +533,7 @@ void anv_GetPhysicalDeviceProperties( }; strcpy(pProperties->deviceName, pdevice->name); - snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_SIZE, - "anv-%s", MESA_GIT_SHA1 + 4); + anv_device_get_cache_uuid(pProperties->pipelineCacheUUID); } void anv_GetPhysicalDeviceQueueFamilyProperties( @@ -789,6 +795,7 @@ VkResult anv_CreateDevice( device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = physical_device->instance; + device->chipset_id = physical_device->chipset_id; if (pAllocator) device->alloc = *pAllocator; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 1b70b18fba1..21df3e081a3 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -27,6 +27,7 @@ #include #include +#include "util/mesa-sha1.h" #include "anv_private.h" #include "brw_nir.h" #include "anv_nir.h" @@ -59,6 +60,8 @@ VkResult anv_CreateShaderModule( module->size = pCreateInfo->codeSize; memcpy(module->data, pCreateInfo->pCode, module->size); + _mesa_sha1_compute(module->data, module->size, module->sha1); + *pShaderModule = anv_shader_module_to_handle(module); return VK_SUCCESS; @@ -182,102 +185,6 @@ anv_shader_compile_to_nir(struct anv_device *device, return nir; } -void -anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) -{ - cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); - pthread_mutex_init(&cache->mutex, NULL); -} - -void -anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) -{ - anv_state_stream_finish(&cache->program_stream); - pthread_mutex_destroy(&cache->mutex); -} - -static uint32_t -anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const void *data, size_t size) -{ - pthread_mutex_lock(&cache->mutex); - - struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - pthread_mutex_unlock(&cache->mutex); - - assert(size < cache->program_stream.block_pool->block_size); - - memcpy(state.map, data, size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - return state.offset; -} - -VkResult anv_CreatePipelineCache( - VkDevice _device, - const VkPipelineCacheCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineCache* pPipelineCache) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_cache *cache; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - cache = anv_alloc2(&device->alloc, pAllocator, - sizeof(*cache), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cache == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_pipeline_cache_init(cache, device); - - *pPipelineCache = anv_pipeline_cache_to_handle(cache); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineCache( - VkDevice _device, - VkPipelineCache _cache, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - anv_pipeline_cache_finish(cache); - - anv_free2(&device->alloc, pAllocator, cache); -} - -VkResult anv_GetPipelineCacheData( - VkDevice device, - VkPipelineCache pipelineCache, - size_t* pDataSize, - void* pData) -{ - *pDataSize = 0; - - return VK_SUCCESS; -} - -VkResult anv_MergePipelineCaches( - VkDevice device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches) -{ - stub_return(VK_SUCCESS); -} - void anv_DestroyPipeline( VkDevice _device, VkPipeline _pipeline, @@ -531,54 +438,65 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; struct brw_vs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; populate_vs_prog_key(&pipeline->device->info, &key); - /* TODO: Look up shader in cache */ + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } - memset(prog_data, 0, sizeof(*prog_data)); + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_VERTEX, spec_info, - &prog_data->base.base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - void *mem_ctx = ralloc_context(NULL); + void *mem_ctx = ralloc_context(NULL); - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); - prog_data->inputs_read = nir->info.inputs_read; - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; + prog_data->inputs_read = nir->info.inputs_read; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; - brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, - nir->info.outputs_written, - nir->info.separate_shader); + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); - unsigned code_size; - const unsigned *shader_code = - brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, false, -1, &code_size, NULL); - if (shader_code == NULL) { + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - const uint32_t offset = - anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = offset; + pipeline->vs_simd8 = kernel; pipeline->vs_vec4 = NO_KERNEL; } else { pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = offset; + pipeline->vs_vec4 = kernel; } - ralloc_free(mem_ctx); - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, &prog_data->base.base); @@ -597,46 +515,59 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; struct brw_gs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; populate_gs_prog_key(&pipeline->device->info, &key); - /* TODO: Look up shader in cache */ + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } - memset(prog_data, 0, sizeof(*prog_data)); + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_GEOMETRY, spec_info, - &prog_data->base.base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - void *mem_ctx = ralloc_context(NULL); + void *mem_ctx = ralloc_context(NULL); - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; - brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, - nir->info.outputs_written, - nir->info.separate_shader); + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); - unsigned code_size; - const unsigned *shader_code = - brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, -1, &code_size, NULL); - if (shader_code == NULL) { ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - /* TODO: SIMD8 GS */ - pipeline->gs_kernel = - anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); - - ralloc_free(mem_ctx); + pipeline->gs_kernel = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, &prog_data->base.base); @@ -657,47 +588,61 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; struct brw_wm_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; populate_wm_prog_key(&pipeline->device->info, info, extra, &key); if (pipeline->use_repclear) key.nr_color_regions = 1; - /* TODO: Look up shader in cache */ + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } - memset(prog_data, 0, sizeof(*prog_data)); + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); - prog_data->binding_table.render_target_start = 0; + prog_data->binding_table.render_target_start = 0; - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_FRAGMENT, spec_info, - &prog_data->base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - void *mem_ctx = ralloc_context(NULL); + void *mem_ctx = ralloc_context(NULL); - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); - unsigned code_size; - const unsigned *shader_code = - brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); - if (shader_code == NULL) { ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - uint32_t offset = - anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); if (prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else - pipeline->ps_simd8 = offset; + pipeline->ps_simd8 = kernel; if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; } else { pipeline->ps_simd16 = NO_KERNEL; } @@ -716,8 +661,6 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; } - ralloc_free(mem_ctx); - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, &prog_data->base); @@ -736,40 +679,53 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; struct brw_cs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; populate_cs_prog_key(&pipeline->device->info, &key); - /* TODO: Look up shader in cache */ + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); - memset(prog_data, 0, sizeof(*prog_data)); + prog_data->binding_table.work_groups_start = 0; - prog_data->binding_table.work_groups_start = 0; + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_COMPUTE, spec_info, - &prog_data->base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + prog_data->base.total_shared = nir->num_shared; - prog_data->base.total_shared = nir->num_shared; + void *mem_ctx = ralloc_context(NULL); - void *mem_ctx = ralloc_context(NULL); + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } - unsigned code_size; - const unsigned *shader_code = - brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, - -1, &code_size, NULL); - if (shader_code == NULL) { + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - pipeline->cs_simd = - anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); - ralloc_free(mem_ctx); + pipeline->cs_simd = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, &prog_data->base); diff --git a/src/vulkan/anv_pipeline_cache.c b/src/vulkan/anv_pipeline_cache.c new file mode 100644 index 00000000000..c89bb2a2ee1 --- /dev/null +++ b/src/vulkan/anv_pipeline_cache.c @@ -0,0 +1,405 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/mesa-sha1.h" +#include "util/debug.h" +#include "anv_private.h" + +/* Remaining work: + * + * - Compact binding table layout so it's tight and not dependent on + * descriptor set layout. + * + * - Review prog_data struct for size and cacheability: struct + * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 + * bit quantities etc; param, pull_param, and image_params are pointers, we + * just need the compation map. use bit fields for all bools, eg + * dual_src_blend. + */ + +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); + + cache->kernel_count = 0; + cache->total_size = 0; + cache->table_size = 1024; + const size_t byte_size = cache->table_size * sizeof(cache->table[0]); + cache->table = malloc(byte_size); + + /* We don't consider allocation failure fatal, we just start with a 0-sized + * cache. */ + if (cache->table == NULL) + cache->table_size = 0; + else + memset(cache->table, 0xff, byte_size); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); + free(cache->table); +} + +struct cache_entry { + unsigned char sha1[20]; + uint32_t prog_data_size; + uint32_t kernel_size; + char prog_data[0]; + + /* kernel follows prog_data at next 64 byte aligned address */ +}; + +void +anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + struct mesa_sha1 *ctx; + + ctx = _mesa_sha1_init(); + _mesa_sha1_update(ctx, &key, sizeof(key)); + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); + /* hash in shader stage, pipeline layout? */ + if (spec_info) { + _mesa_sha1_update(ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); + } + _mesa_sha1_final(ctx, hash); +} + +uint32_t +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) sha1); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + const uint32_t offset = cache->table[index]; + + if (offset == ~0) + return NO_KERNEL; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { + if (prog_data) + memcpy(prog_data, entry->prog_data, entry->prog_data_size); + + const uint32_t preamble_size = + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + return offset + preamble_size; + } + } + + return NO_KERNEL; +} + +static void +anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, + struct cache_entry *entry, uint32_t entry_offset) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) entry->sha1); + + /* We'll always be able to insert when we get here. */ + assert(cache->kernel_count < cache->table_size / 2); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + if (cache->table[index] == ~0) { + cache->table[index] = entry_offset; + break; + } + } + + /* We don't include the alignment padding bytes when we serialize, so + * don't include taht in the the total size. */ + cache->total_size += + sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + cache->kernel_count++; +} + +static VkResult +anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +{ + const uint32_t table_size = cache->table_size * 2; + const uint32_t old_table_size = cache->table_size; + const size_t byte_size = table_size * sizeof(cache->table[0]); + uint32_t *table; + uint32_t *old_table = cache->table; + + table = malloc(byte_size); + if (table == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cache->table = table; + cache->table_size = table_size; + cache->kernel_count = 0; + cache->total_size = 0; + + memset(cache->table, 0xff, byte_size); + for (uint32_t i = 0; i < old_table_size; i++) { + const uint32_t offset = old_table[i]; + if (offset == ~0) + continue; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + anv_pipeline_cache_add_entry(cache, entry, offset); + } + + free(old_table); + + return VK_SUCCESS; +} + +uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, size_t kernel_size, + const void *prog_data, size_t prog_data_size) +{ + pthread_mutex_lock(&cache->mutex); + struct cache_entry *entry; + + /* Meta pipelines don't have SPIR-V, so we can't hash them. + * Consequentally, they just don't get cached. + */ + const uint32_t preamble_size = sha1 ? + align_u32(sizeof(*entry) + prog_data_size, 64) : + 0; + + const uint32_t size = preamble_size + kernel_size; + + assert(size < cache->program_stream.block_pool->block_size); + const struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { + assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); + entry = state.map; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, prog_data, prog_data_size); + entry->kernel_size = kernel_size; + + if (cache->kernel_count == cache->table_size / 2) + anv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + anv_pipeline_cache_add_entry(cache, entry, state.offset); + } + + pthread_mutex_unlock(&cache->mutex); + + memcpy(state.map + preamble_size, kernel, kernel_size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset + preamble_size; +} + +static void +anv_pipeline_cache_load(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + struct anv_device *device = cache->device; + uint8_t uuid[VK_UUID_SIZE]; + struct { + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; + } header; + + if (size < sizeof(header)) + return; + memcpy(&header, data, sizeof(header)); + if (header.device_id != device->chipset_id) + return; + anv_device_get_cache_uuid(uuid); + if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) + return; + + const void *end = data + size; + const void *p = data + sizeof(header); + + while (p < end) { + /* The kernels aren't 64 byte aligned in the serialized format so + * they're always right after the prog_data. + */ + const struct cache_entry *entry = p; + const void *kernel = &entry->prog_data[entry->prog_data_size]; + + anv_pipeline_cache_upload_kernel(cache, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + p = kernel + entry->kernel_size; + } +} + +VkResult anv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + if (pCreateInfo->initialDataSize > 0) + anv_pipeline_cache_load(cache, + pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); +} + +VkResult anv_GetPipelineCacheData( + VkDevice _device, + VkPipelineCache _cache, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + const size_t size = 4 + VK_UUID_SIZE + cache->total_size; + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) { + *pDataSize = 0; + return VK_INCOMPLETE; + } + + void *p = pData; + memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); + p += sizeof(device->chipset_id); + + anv_device_get_cache_uuid(p); + p += VK_UUID_SIZE; + + struct cache_entry *entry; + for (uint32_t i = 0; i < cache->table_size; i++) { + if (cache->table[i] == ~0) + continue; + + entry = cache->program_stream.block_pool->map + cache->table[i]; + + memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); + p += sizeof(*entry) + entry->prog_data_size; + + void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + memcpy(p, kernel, entry->kernel_size); + p += entry->kernel_size; + } + + return VK_SUCCESS; +} + +static void +anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, + struct anv_pipeline_cache *src) +{ + for (uint32_t i = 0; i < src->table_size; i++) { + if (src->table[i] == ~0) + continue; + + struct cache_entry *entry = + src->program_stream.block_pool->map + src->table[i]; + + if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) + continue; + + const void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + anv_pipeline_cache_upload_kernel(dst, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + } +} + +VkResult anv_MergePipelineCaches( + VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + + anv_pipeline_cache_merge(dst, src); + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 22df64c5825..a5c390674a3 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -623,11 +623,24 @@ struct anv_pipeline_cache { struct anv_device * device; struct anv_state_stream program_stream; pthread_mutex_t mutex; + + uint32_t total_size; + uint32_t table_size; + uint32_t kernel_count; + uint32_t *table; }; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); +uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data); +uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, + size_t kernel_size, + const void *prog_data, + size_t prog_data_size); struct anv_device { VK_LOADER_DATA _loader_data; @@ -670,6 +683,9 @@ VkResult gen75_init_device_state(struct anv_device *device); VkResult gen8_init_device_state(struct anv_device *device); VkResult gen9_init_device_state(struct anv_device *device); +void anv_device_get_cache_uuid(void *uuid); + + void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); void anv_gem_munmap(void *p, uint64_t size); @@ -1318,10 +1334,16 @@ struct nir_shader; struct anv_shader_module { struct nir_shader * nir; + unsigned char sha1[20]; uint32_t size; char data[0]; }; +void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + static inline gl_shader_stage vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) { -- cgit v1.2.3 From b0c30b77d4c57d653ed657d069d748b5be88d709 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 12 Feb 2016 15:08:09 -0800 Subject: anv: Submit fence bo only after all command buffers We were submitting the fence bo after each command buffer in a multi command buffer submit, causing us to occasionally complete the fence too early. --- src/vulkan/anv_device.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 5e9c42b2fdc..e2efaafa25d 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1012,20 +1012,20 @@ VkResult anv_QueueSubmit( "execbuf2 failed: %m"); } - if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - } - for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; } } + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + } + return VK_SUCCESS; } -- cgit v1.2.3 From c136672c593399e831db0da8b8cc6d8a5d73c1d3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 12 Feb 2016 15:10:22 -0800 Subject: anv: Disable snooping for allocator pools again The race we were seeing on cherryview was caused by the multi-submit problem with fences. We can now turn snooping off again an rely on clflush and we intended. --- src/vulkan/anv_allocator.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index e935cd71df0..4be149ea695 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -441,7 +441,6 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) goto fail; cleanup->gem_handle = gem_handle; -#if 0 /* Regular objects are created I915_CACHING_CACHED on LLC platforms and * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are * always created as I915_CACHING_CACHED, which on non-LLC means @@ -453,7 +452,6 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) anv_gem_set_domain(pool->device, gem_handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); } -#endif /* Now that we successfull allocated everything, we can write the new * values back into pool. */ -- cgit v1.2.3 From 88042b9f1087460e3c1a7cf81a29aa536572bc17 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 12 Feb 2016 21:34:59 -0800 Subject: nir: Get rid of the C++ NIR_SRC/DEST_INIT macros These were originally added to reduce compiler warnings but aren't really needed. Getting rid of them reduces the diff between the Vulkan branch and master, so we might as well. --- src/compiler/nir/nir.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c2e9e6fb2fb..9c478870626 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -526,11 +526,7 @@ typedef struct nir_src { bool is_ssa; } nir_src; -#ifdef __cplusplus -# define NIR_SRC_INIT nir_src() -#else -# define NIR_SRC_INIT (nir_src) { { NULL } } -#endif +#define NIR_SRC_INIT (nir_src) { { NULL } } #define nir_foreach_use(reg_or_ssa_def, src) \ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) @@ -553,11 +549,7 @@ typedef struct { bool is_ssa; } nir_dest; -#ifdef __cplusplus -# define NIR_DEST_INIT nir_dest() -#else -# define NIR_DEST_INIT (nir_dest) { { { NULL } } } -#endif +#define NIR_DEST_INIT (nir_dest) { { { NULL } } } #define nir_foreach_def(reg, dest) \ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) -- cgit v1.2.3 From 08ecd8a8d1a2ffc4376615a6ded69ac89bad8d81 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 15 Feb 2016 12:44:39 -0800 Subject: anv/meta_resolve: Set origin_upper_left on gl_FragCoord It's required by the spec and any shaders that don't set it will be broken. I'm not really sure how multisampling was even working before... --- src/vulkan/anv_meta_resolve.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index 5aa2fef76bd..ae53292bde8 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -134,6 +134,7 @@ build_nir_fs(uint32_t num_samples) v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_position"); v_position->data.location = VARYING_SLOT_POS; + v_position->data.origin_upper_left = true; v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_position"); -- cgit v1.2.3 From 5d72d7b12d623c86539c807ea07f2e0bdc32836d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 13:20:06 -0800 Subject: anv: Fix misc simple warnings --- src/vulkan/anv_allocator.c | 2 +- src/vulkan/anv_batch_chain.c | 2 +- src/vulkan/anv_device.c | 4 ++++ src/vulkan/anv_query.c | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 4be149ea695..1f858bbdff4 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -712,7 +712,7 @@ anv_state_stream_init(struct anv_state_stream *stream, void anv_state_stream_finish(struct anv_state_stream *stream) { - const uint32_t block_size = stream->block_pool->block_size; + VG(const uint32_t block_size = stream->block_pool->block_size); struct anv_state_stream_block *next = stream->block; while (next != NULL) { diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 13a3faee233..79e55197e85 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -355,7 +355,7 @@ anv_batch_bo_list_clone(const struct list_head *list, struct anv_batch_bo *prev_bbo = NULL; list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo *new_bbo; + struct anv_batch_bo *new_bbo = NULL; result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); if (result != VK_SUCCESS) break; diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index e2efaafa25d..a1671d931d5 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -854,6 +854,10 @@ VkResult anv_CreateDevice( case 9: result = gen9_init_device_state(device); break; + default: + /* Shouldn't get here as we don't create physical devices for any other + * gens. */ + unreachable("unhandled gen"); } if (result != VK_SUCCESS) goto fail_fd; diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index 5b052341e0c..e45b519c0f3 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -133,14 +133,13 @@ VkResult anv_GetQueryPoolResults( break; } case VK_QUERY_TYPE_PIPELINE_STATISTICS: - /* Not yet implemented */ - break; + unreachable("pipeline stats not supported"); case VK_QUERY_TYPE_TIMESTAMP: { result = slot[firstQuery + i].begin; break; } default: - assert(!"Invalid query type"); + unreachable("invalid pool type"); } if (flags & VK_QUERY_RESULT_64_BIT) { -- cgit v1.2.3 From 53eaa0a6b8486ff82e7d2e68c3d491866ad5a12f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 14:17:08 -0800 Subject: anv: Fix warning 3DSTATE_VERTEX_ELEMENTS setup This is a little more subtle. If elem_count is 0, nothing else happens in this function, so we return early to avoid warning about uninitialized 'p'. --- src/vulkan/genX_pipeline_util.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h index 077281d2f56..696e2be7c3f 100644 --- a/src/vulkan/genX_pipeline_util.h +++ b/src/vulkan/genX_pipeline_util.h @@ -85,14 +85,15 @@ emit_vertex_input(struct anv_pipeline *pipeline, #endif uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; + if (elem_count == 0) + return; uint32_t *p; - if (elem_count > 0) { - const uint32_t num_dwords = 1 + elem_count * 2; - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GENX(3DSTATE_VERTEX_ELEMENTS)); - memset(p + 1, 0, (num_dwords - 1) * 4); - } + + const uint32_t num_dwords = 1 + elem_count * 2; + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = -- cgit v1.2.3 From 832f73f512e9e6c21d495d7b07e229482371ef2f Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 22:45:41 -0800 Subject: anv: Share flush_pipeline_select_3d() between gen7 and gen8 --- src/vulkan/anv_private.h | 5 +++++ src/vulkan/gen7_cmd_buffer.c | 6 +----- src/vulkan/gen8_cmd_buffer.c | 21 ++++----------------- src/vulkan/genX_cmd_buffer.c | 13 +++++++++++++ 4 files changed, 23 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index a5c390674a3..160d5fe84e8 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1293,6 +1293,11 @@ void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); +void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); + void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 4bec8a620c5..be713798709 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -364,11 +364,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } + genX(flush_pipeline_select_3d)(cmd_buffer); if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 82f7eb1f48f..1635a53b32a 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -200,21 +200,6 @@ config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) } } -static void -flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer) -{ - config_l3(cmd_buffer, false); - - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } -} - static void __emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) { @@ -261,7 +246,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - flush_pipeline_select_3d(cmd_buffer); + config_l3(cmd_buffer, false); + + genX(flush_pipeline_select_3d)(cmd_buffer); if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); @@ -735,7 +722,7 @@ void genX(CmdBeginRenderPass)( cmd_buffer->state.pass = pass; anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - flush_pipeline_select_3d(cmd_buffer); + genX(flush_pipeline_select_3d)(cmd_buffer); const VkRect2D *render_area = &pRenderPassBegin->renderArea; diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 923f2086717..7c6ad43a9b2 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -535,3 +535,16 @@ void genX(CmdDispatchIndirect)( anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); } + +void +genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } +} -- cgit v1.2.3 From 18dd59538b888127a3894bf764d863e5067f4a4a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 22:46:25 -0800 Subject: anv/gen7: Call flush_pipeline_select_3d() from CmdBeginRenderPass --- src/vulkan/gen7_cmd_buffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index be713798709..b5b9d1c5ce0 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -664,6 +664,8 @@ void genX(CmdBeginRenderPass)( cmd_buffer->state.pass = pass; anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + genX(flush_pipeline_select_3d)(cmd_buffer); + const VkRect2D *render_area = &pRenderPassBegin->renderArea; anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DRAWING_RECTANGLE, -- cgit v1.2.3 From ea694637ac8a7f6aeb9682a64b197fab6aabf9cd Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 22:52:37 -0800 Subject: anv/gen7: Set 3DSTATE_SF depth buffer format correctly We need to pull this from the render pass information at state flush time. --- src/vulkan/gen7_cmd_buffer.c | 9 +++++++++ src/vulkan/gen7_pipeline.c | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index b5b9d1c5ce0..18b36487c5f 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -442,15 +442,24 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) gen7_cmd_buffer_emit_scissor(cmd_buffer); if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const uint32_t depth_format = image ? + isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, + &image->depth_surface.isl) : D16_UNORM; + uint32_t sf_dw[GEN7_3DSTATE_SF_length]; struct GEN7_3DSTATE_SF sf = { GEN7_3DSTATE_SF_header, + .DepthBufferSurfaceFormat = depth_format, .LineWidth = cmd_buffer->state.dynamic.line_width, .GlobalDepthOffsetEnableSolid = enable_bias, .GlobalDepthOffsetEnableWireframe = enable_bias, diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index ec8db1d1956..4ec9cf5f019 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -42,9 +42,6 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, struct GEN7_3DSTATE_SF sf = { GEN7_3DSTATE_SF_header, - /* FIXME: Get this from pass info */ - .DepthBufferSurfaceFormat = D24_UNORM_X8_UINT, - /* LegacyGlobalDepthBiasEnable */ .StatisticsEnable = true, -- cgit v1.2.3 From ac4fd0ed211b9e4e13491c6d0737b2f629ae85bb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 22:56:23 -0800 Subject: anv/gen7: Fix pipeline selection in init_device_state() We need the 3D pipeline for the initial setup, not GPGPU. --- src/vulkan/gen7_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index ba0e75f0c3e..0f66a5056c7 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -44,7 +44,7 @@ genX(init_device_state)(struct anv_device *device) batch.end = (void *) cmds + sizeof(cmds); anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, - .PipelineSelection = GPGPU); + .PipelineSelection = _3D); anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), .StatisticsEnable = true); -- cgit v1.2.3 From 85f67cf16e9823c858215e8a7359d18762c2653c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 23:01:42 -0800 Subject: anv: Deduplicate render pass code This lets us share the renderpass code and depth/stencil state code between gen 7 and gen 8. --- src/vulkan/gen7_cmd_buffer.c | 170 ---------------------------------------- src/vulkan/gen8_cmd_buffer.c | 173 ----------------------------------------- src/vulkan/genX_cmd_buffer.c | 179 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+), 343 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 18b36487c5f..7c12394b088 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -556,176 +556,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -static void -cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const struct anv_format *anv_format = - iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - const bool has_depth = iview && anv_format->has_depth; - const bool has_stencil = iview && anv_format->has_stencil; - - /* Emit 3DSTATE_DEPTH_BUFFER */ - if (has_depth) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = true, - .StencilWriteEnable = has_stencil, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, - &image->depth_surface.isl), - .SurfacePitch = image->depth_surface.isl.row_pitch - 1, - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->depth_surface.offset, - }, - .Height = fb->height - 1, - .Width = fb->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GENX(MOCS), - .RenderTargetViewExtent = 1 - 1); - } else { - /* Even when no depth buffer is present, the hardware requires that - * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: - * - * If a null depth buffer is bound, the driver must instead bind depth as: - * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D - * 3DSTATE_DEPTH.Width = 1 - * 3DSTATE_DEPTH.Height = 1 - * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM - * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 - * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 - * - * The PRM is wrong, though. The width and height must be programmed to - * actual framebuffer's width and height, even when neither depth buffer - * nor stencil buffer is present. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .SurfaceFormat = D16_UNORM, - .Width = fb->width - 1, - .Height = fb->height - 1, - .StencilWriteEnable = has_stencil); - } - - /* Emit 3DSTATE_STENCIL_BUFFER */ - if (has_stencil) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), -# if (ANV_IS_HASWELL) - .StencilBufferEnable = true, -# endif - .StencilBufferObjectControlState = GENX(MOCS), - - /* Stencil buffers have strange pitch. The PRM says: - * - * The pitch must be set to 2x the value computed based on width, - * as the stencil buffer is stored with two rows interleaved. - */ - .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, - - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->offset + image->stencil_surface.offset, - }); - } else { - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER); - } - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); -} - -/** - * @see anv_cmd_buffer_set_subpass() - */ -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; - - cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void genX(CmdBeginRenderPass)( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DRAWING_RECTANGLE, - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - gen7_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdNextSubpass)( - VkCommandBuffer commandBuffer, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); - gen7_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdEndRenderPass)( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); -} - void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent event, diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 1635a53b32a..ebd6123454d 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -602,179 +602,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -static void -cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const struct anv_format *anv_format = - iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - const bool has_depth = iview && anv_format->has_depth; - const bool has_stencil = iview && anv_format->has_stencil; - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - /* Emit 3DSTATE_DEPTH_BUFFER */ - if (has_depth) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = true, - .StencilWriteEnable = has_stencil, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, - &image->depth_surface.isl), - .SurfacePitch = image->depth_surface.isl.row_pitch - 1, - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->depth_surface.offset, - }, - .Height = fb->height - 1, - .Width = fb->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GENX(MOCS), - .RenderTargetViewExtent = 1 - 1, - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2); - } else { - /* Even when no depth buffer is present, the hardware requires that - * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: - * - * If a null depth buffer is bound, the driver must instead bind depth as: - * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D - * 3DSTATE_DEPTH.Width = 1 - * 3DSTATE_DEPTH.Height = 1 - * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM - * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 - * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 - * - * The PRM is wrong, though. The width and height must be programmed to - * actual framebuffer's width and height, even when neither depth buffer - * nor stencil buffer is present. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .SurfaceFormat = D16_UNORM, - .Width = fb->width - 1, - .Height = fb->height - 1, - .StencilWriteEnable = has_stencil); - } - - /* Emit 3DSTATE_STENCIL_BUFFER */ - if (has_stencil) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), - .StencilBufferEnable = true, - .StencilBufferObjectControlState = GENX(MOCS), - - /* Stencil buffers have strange pitch. The PRM says: - * - * The pitch must be set to 2x the value computed based on width, - * as the stencil buffer is stored with two rows interleaved. - */ - .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, - - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->offset + image->stencil_surface.offset, - }, - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); - } - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); -} - -/** - * @see anv_cmd_buffer_set_subpass() - */ -void -genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void genX(CmdBeginRenderPass)( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdNextSubpass)( - VkCommandBuffer commandBuffer, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); - genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdEndRenderPass)( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); -} - static void emit_ps_depth_count(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 7c6ad43a9b2..e8b215e6377 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -548,3 +548,182 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.current_pipeline = _3D; } } + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + const bool has_depth = iview && anv_format->has_depth; + const bool has_stencil = iview && anv_format->has_stencil; + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = true, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, + &image->depth_surface.isl), + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, +#endif + .RenderTargetViewExtent = 1 - 1); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), +#if ANV_GEN >= 8 || ANV_IS_HASWELL + .StencilBufferEnable = true, +#endif + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, +#endif + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); +} + +/** + * @see anv_cmd_buffer_set_subpass() + */ +void +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} -- cgit v1.2.3 From b53edea76c025b1596e057bb3c04c556c0a4536a Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 14 Feb 2016 23:03:13 -0800 Subject: anv/gen7: Make disabling the FS work We disable the fragment shader for depth/stencil-only pipelines. This commit makes that work for gen7. --- src/vulkan/gen7_pipeline.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index 4ec9cf5f019..e329c8f42ab 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -323,9 +323,23 @@ genX(graphics_pipeline_create)( } if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); - anv_finishme("gen7 alternative to " - "3DSTATE_PS_EXTRA.PixelShaderValid = false"); + anv_finishme("disabling ps"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = false, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT); + + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + } else { const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || -- cgit v1.2.3 From 80ec20351ce622c5112d1914a2a97b3183ce6be0 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 15 Feb 2016 16:20:01 -0500 Subject: anv: Bump to 1.0.3 Probably this should be picked up from directly, or we should just assume that any 1.0.x is legal. --- src/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a1671d931d5..a6ce1764f6b 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -216,7 +216,7 @@ VkResult anv_CreateInstance( uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion; if (VK_MAKE_VERSION(1, 0, 0) > client_version || - client_version > VK_MAKE_VERSION(1, 0, 2)) { + client_version > VK_MAKE_VERSION(1, 0, 3)) { return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, "Client requested version %d.%d.%d", VK_VERSION_MAJOR(client_version), -- cgit v1.2.3 From c2b2ebf1ed85b07d2e1ceffca89a7f7833dcb8d1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 15 Feb 2016 17:40:15 -0800 Subject: anv: Add missing gen75_cmd_buffer_set_subpass() prototype --- src/vulkan/anv_private.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 160d5fe84e8..e2ae0119200 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1286,6 +1286,8 @@ void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); +void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, -- cgit v1.2.3 From a3672a241b61ed33ebcb5f4114436c9311f748c3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 15 Feb 2016 17:57:03 -0800 Subject: anv/genxml: Include MBO bits for gen7 and gen75 --- src/vulkan/gen7.xml | 3 +++ src/vulkan/gen75.xml | 4 ++++ src/vulkan/gen_pack_header.py | 17 +++++++++++------ 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7.xml b/src/vulkan/gen7.xml index d717b144085..8faffcbf876 100644 --- a/src/vulkan/gen7.xml +++ b/src/vulkan/gen7.xml @@ -877,6 +877,7 @@ + @@ -886,6 +887,7 @@ + @@ -1049,6 +1051,7 @@ + diff --git a/src/vulkan/gen75.xml b/src/vulkan/gen75.xml index afdccd0f6a4..c1cbce9fdb6 100644 --- a/src/vulkan/gen75.xml +++ b/src/vulkan/gen75.xml @@ -982,6 +982,7 @@ + @@ -991,6 +992,7 @@ + @@ -1154,6 +1156,7 @@ + @@ -1299,6 +1302,7 @@ + diff --git a/src/vulkan/gen_pack_header.py b/src/vulkan/gen_pack_header.py index fa2eed7a9ab..3cabb5864aa 100755 --- a/src/vulkan/gen_pack_header.py +++ b/src/vulkan/gen_pack_header.py @@ -206,7 +206,8 @@ class Field: def __init__(self, parser, attrs): self.parser = parser - self.name = safe_name(attrs["name"]) + if "name" in attrs: + self.name = safe_name(attrs["name"]) self.start = int(attrs["start"]) self.end = int(attrs["end"]) self.type = attrs["type"] @@ -225,7 +226,7 @@ class Field: if ufixed_match: self.type = 'ufixed' self.fractional_size = int(ufixed_match.group(2)) - + sfixed_match = Field.sfixed_pattern.match(self.type) if sfixed_match: self.type = 'sfixed' @@ -252,9 +253,11 @@ class Field: type = 'uint32_t' elif self.type in self.parser.structs: type = 'struct ' + self.parser.gen_prefix(safe_name(self.type)) + elif self.type == 'mbo': + return else: print("#error unhandled type: %s" % self.type) - + print(" %-36s %s%s;" % (type, self.name, dim)) if len(self.values) > 0 and self.default == None: @@ -398,7 +401,9 @@ class Group: field_index = 0 for field in dw.fields: - name = field.name + field.dim + if field.type != "mbo": + name = field.name + field.dim + if field.type == "mbo": s = "__gen_mbo(%d, %d)" % \ (field.start - dword_start, field.end - dword_start) @@ -537,7 +542,7 @@ class Parser: print("struct %s {" % self.gen_prefix(name)) group.emit_template_struct("") print("};\n") - + def emit_pack_function(self, name, group): name = self.gen_prefix(name) print("static inline void\n%s_pack(__gen_user_data *data, void * restrict dst,\n%sconst struct %s * restrict values)\n{" % @@ -593,7 +598,7 @@ class Parser: name = value.name print('#define %-36s %4d' % (name.upper(), value.value)) print('') - + def parse(self, filename): file = open(filename, "rb") self.parser.ParseFile(file) -- cgit v1.2.3 From 48087cfc4e90f39547cb2ebc06e044f56ccb2983 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 16 Feb 2016 08:02:17 -0800 Subject: anv/icd.json: Update the ABI version --- src/vulkan/anv_icd.json.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_icd.json.in b/src/vulkan/anv_icd.json.in index ad069b3e2ff..40de043668b 100644 --- a/src/vulkan/anv_icd.json.in +++ b/src/vulkan/anv_icd.json.in @@ -2,7 +2,7 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@abs_top_builddir@/@LIB_DIR@/libvulkan.so.0.0.0", - "abi_versions": "0.210.1" + "abi_versions": "1.0.3" } } -- cgit v1.2.3 From ecd1d94d1c74be6481ebc6adde01fe73c7d41331 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 16 Feb 2016 22:55:33 +0100 Subject: anv: pCreateInfo->pApplicationInfo parameter to vkCreateInstance may be NULL Fix a NULL pointer dereference in anv_CreateInstance in case the pApplicationInfo field of the supplied VkInstanceCreateInfo structure is NULL [1]. [1] https://www.khronos.org/registry/vulkan/specs/1.0/apispec.html#VkInstanceCreateInfo Signed-off-by: Philipp Zabel --- src/vulkan/anv_device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index a6ce1764f6b..68639068324 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -214,7 +214,9 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion; + uint32_t client_version = pCreateInfo->pApplicationInfo ? + pCreateInfo->pApplicationInfo->apiVersion : + VK_MAKE_VERSION(1, 0, 0); if (VK_MAKE_VERSION(1, 0, 0) > client_version || client_version > VK_MAKE_VERSION(1, 0, 3)) { return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, @@ -249,7 +251,7 @@ VkResult anv_CreateInstance( else instance->alloc = default_alloc; - instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion; + instance->apiVersion = client_version; instance->physicalDeviceCount = -1; _mesa_locale_init(); -- cgit v1.2.3 From 4a2d17f60652f5a57a34d0b099dea95fcda2b362 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 15 Feb 2016 21:24:40 -0800 Subject: anv: Revise PhysicalDeviceFeatures and remove FINISHME --- src/vulkan/anv_device.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 68639068324..dfc29e45444 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -339,13 +339,13 @@ void anv_GetPhysicalDeviceFeatures( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures) { - anv_finishme("Get correct values for PhysicalDeviceFeatures"); + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); *pFeatures = (VkPhysicalDeviceFeatures) { .robustBufferAccess = true, - .fullDrawIndexUint32 = false, + .fullDrawIndexUint32 = true, .imageCubeArray = false, - .independentBlend = false, + .independentBlend = pdevice->info->gen >= 8, .geometryShader = true, .tessellationShader = false, .sampleRateShading = false, @@ -365,18 +365,18 @@ void anv_GetPhysicalDeviceFeatures( .textureCompressionETC2 = true, .textureCompressionASTC_LDR = true, .textureCompressionBC = true, - .occlusionQueryPrecise = false, /* FINISHME */ + .occlusionQueryPrecise = true, .pipelineStatisticsQuery = true, - .vertexPipelineStoresAndAtomics = false, + .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, .fragmentStoresAndAtomics = true, .shaderTessellationAndGeometryPointSize = true, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = false, .shaderStorageImageMultisample = false, .shaderUniformBufferArrayDynamicIndexing = true, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, .shaderStorageImageReadWithoutFormat = false, .shaderStorageImageWriteWithoutFormat = true, .shaderClipDistance = false, -- cgit v1.2.3 From ecc67f1aacffd848887a711511772fc3f175279b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 16 Feb 2016 23:23:17 -0800 Subject: anv: Make driver and icd file installable Change the name of the .so to libvulkan_intel.so and add an installable icd with the installed paths. Keep the icd file with build-tree paths, but rename to dev_icd.json to make it clear that it's for development purposes. --- configure.ac | 1 - src/vulkan/.gitignore | 3 ++- src/vulkan/Makefile.am | 38 +++++++++++++++++++++++++++++--------- src/vulkan/anv_icd.json.in | 8 -------- src/vulkan/dev_icd.json.in | 7 +++++++ src/vulkan/intel_icd.json.in | 7 +++++++ 6 files changed, 45 insertions(+), 19 deletions(-) delete mode 100644 src/vulkan/anv_icd.json.in create mode 100644 src/vulkan/dev_icd.json.in create mode 100644 src/vulkan/intel_icd.json.in (limited to 'src') diff --git a/configure.ac b/configure.ac index acfca57a400..71bec624121 100644 --- a/configure.ac +++ b/configure.ac @@ -2541,7 +2541,6 @@ AC_CONFIG_FILES([Makefile src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile src/vulkan/Makefile - src/vulkan/anv_icd.json src/vulkan/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile]) diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore index 2980dbfece3..40afc2e3989 100644 --- a/src/vulkan/.gitignore +++ b/src/vulkan/.gitignore @@ -4,5 +4,6 @@ /anv_entrypoints.h /wayland-drm-protocol.c /wayland-drm-client-protocol.h -/anv_icd.json +/dev_icd.json +/intel_icd.json /gen*_pack.h \ No newline at end of file diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 081f6b67682..220bdbf5cec 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -34,7 +34,7 @@ VULKAN_ENTRYPOINT_CPPFLAGS = \ -DVK_USE_PLATFORM_XCB_KHR \ -DVK_USE_PLATFORM_WAYLAND_KHR -lib_LTLIBRARIES = libvulkan.la +lib_LTLIBRARIES = libvulkan_intel.la check_LTLIBRARIES = libvulkan-test.la @@ -67,7 +67,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/vulkan -libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init +libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init VULKAN_SOURCES = \ anv_allocator.c \ @@ -111,7 +111,7 @@ libanv_gen7_la_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c -libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70 +libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 libanv_gen75_la_SOURCES = \ genX_cmd_buffer.c \ @@ -119,7 +119,7 @@ libanv_gen75_la_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c -libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 +libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 libanv_gen8_la_SOURCES = \ genX_cmd_buffer.c \ @@ -127,7 +127,7 @@ libanv_gen8_la_SOURCES = \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c -libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 +libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 libanv_gen9_la_SOURCES = \ genX_cmd_buffer.c \ @@ -135,7 +135,7 @@ libanv_gen9_la_SOURCES = \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c -libanv_gen9_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=90 +libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ @@ -152,10 +152,10 @@ AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm VULKAN_SOURCES += \ wayland-drm-protocol.c \ anv_wsi_wayland.c -libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM endif -libvulkan_la_SOURCES = \ +libvulkan_intel_la_SOURCES = \ $(VULKAN_SOURCES) \ anv_gem.c @@ -170,7 +170,7 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) CLEANFILES = $(BUILT_SOURCES) -libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ +libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ $(top_builddir)/src/isl/libisl.la \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ ../mesa/libmesa.la \ @@ -178,6 +178,26 @@ libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ -lpthread -ldl -lstdc++ \ $(PER_GEN_LIBS) +libvulkan_intel_la_LDFLAGS = \ + -module -avoid-version -shared -shrext .so + + +# Generate icd files. It would be nice to just be able to add these to +# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64', +# which we can't put in the icd file. When running sed from the Makefile we +# can use ${libdir}, which expands completely and we avoid putting Makefile +# variables in the icd file. + +icdconfdir=$(sysconfdir)/vulkan/icd.d +icdconf_DATA = intel_icd.json +noinst_DATA = dev_icd.json + +%.json : %.json.in + $(AM_V_GEN) $(SED) \ + -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ + -e "s#@install_libdir@#${libdir}#" < $< > $@ + + # Libvulkan with dummy gem. Used for unit tests. libvulkan_test_la_SOURCES = \ diff --git a/src/vulkan/anv_icd.json.in b/src/vulkan/anv_icd.json.in deleted file mode 100644 index 40de043668b..00000000000 --- a/src/vulkan/anv_icd.json.in +++ /dev/null @@ -1,8 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@abs_top_builddir@/@LIB_DIR@/libvulkan.so.0.0.0", - "abi_versions": "1.0.3" - } -} - diff --git a/src/vulkan/dev_icd.json.in b/src/vulkan/dev_icd.json.in new file mode 100644 index 00000000000..84920365289 --- /dev/null +++ b/src/vulkan/dev_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@build_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/vulkan/intel_icd.json.in b/src/vulkan/intel_icd.json.in new file mode 100644 index 00000000000..d9b363a9762 --- /dev/null +++ b/src/vulkan/intel_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@install_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} -- cgit v1.2.3 From 5caa995c32a5383bc530395a160bc94ef6ad8784 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 16 Feb 2016 23:19:27 -0800 Subject: Revert "anv: Disable snooping for allocator pools again" This reverts commit c136672c593399e831db0da8b8cc6d8a5d73c1d3. We still have the intermittent missing flush for VkEvent in certain vulkancts cases: piglit.deqp-vk.api.command_buffers.execute_large_primary piglit.deqp-vk.api.command_buffers.submit_count_non_zero, Let's reenable the snooping until we figure out the root cause. --- src/vulkan/anv_allocator.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c index 1f858bbdff4..a7ae975656b 100644 --- a/src/vulkan/anv_allocator.c +++ b/src/vulkan/anv_allocator.c @@ -441,6 +441,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) goto fail; cleanup->gem_handle = gem_handle; +#if 0 /* Regular objects are created I915_CACHING_CACHED on LLC platforms and * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are * always created as I915_CACHING_CACHED, which on non-LLC means @@ -452,6 +453,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) anv_gem_set_domain(pool->device, gem_handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); } +#endif /* Now that we successfull allocated everything, we can write the new * values back into pool. */ -- cgit v1.2.3 From b63e28c0e1275761f7effe4b0a8202735ff72291 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 17 Feb 2016 11:19:56 -0800 Subject: anv: Set correct write domain on window system BOs We need to make sure GEM understands that we're writing to the BO, in case it needs to synchronize with other rings (blitter use in display server, for example). --- src/vulkan/anv_batch_chain.c | 9 ++++++--- src/vulkan/anv_device.c | 1 + src/vulkan/anv_private.h | 6 ++++++ src/vulkan/anv_wsi_wayland.c | 1 + src/vulkan/anv_wsi_x11.c | 1 + 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index 79e55197e85..a050fa05c8e 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -147,6 +147,9 @@ anv_reloc_list_add(struct anv_reloc_list *list, struct drm_i915_gem_relocation_entry *entry; int index; + const uint32_t domain = + target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0; + anv_reloc_list_grow(list, alloc, 1); /* TODO: Handle failure */ @@ -158,8 +161,8 @@ anv_reloc_list_add(struct anv_reloc_list *list, entry->delta = delta; entry->offset = offset; entry->presumed_offset = target_bo->offset; - entry->read_domains = 0; - entry->write_domain = 0; + entry->read_domains = domain; + entry->write_domain = domain; VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); return target_bo->offset + delta; @@ -854,7 +857,7 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, obj->relocs_ptr = 0; obj->alignment = 0; obj->offset = bo->offset; - obj->flags = 0; + obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0; obj->rsvd1 = 0; obj->rsvd2 = 0; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index dfc29e45444..c3a370b0542 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1070,6 +1070,7 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) bo->index = 0; bo->offset = 0; bo->size = size; + bo->is_winsys_bo = false; return VK_SUCCESS; } diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index e2ae0119200..951a571944d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -297,6 +297,12 @@ struct anv_bo { uint64_t size; void *map; + + /* We need to set the WRITE flag on winsys bos so GEM will know we're + * writing to them and synchronize uses on other rings (eg if the display + * server uses the blitter ring). + */ + bool is_winsys_bo; }; /* Represents a lock-free linked list of "free" things. This is used by diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c index 4ae594b10d4..6f25eaf43ea 100644 --- a/src/vulkan/anv_wsi_wayland.c +++ b/src/vulkan/anv_wsi_wayland.c @@ -656,6 +656,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, goto fail_image; image->memory = anv_device_memory_from_handle(vk_memory); + image->memory->bo.is_winsys_bo = true; result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c index 1e08bf31b4b..843a6b62504 100644 --- a/src/vulkan/anv_wsi_x11.c +++ b/src/vulkan/anv_wsi_x11.c @@ -624,6 +624,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, &memory_h); memory = anv_device_memory_from_handle(memory_h); + memory->bo.is_winsys_bo = true; anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), memory_h, 0); -- cgit v1.2.3 From 3b9b908054162055b203657d2971c28496aa6dfd Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 17 Feb 2016 12:21:46 -0800 Subject: anv: Ignore unused dimensions in vkCreateImage We would assert on unused dimensions (eg extent.depth for VK_IMAGE_TYPE_2D) not being 1, but the specification doesn't put any constraints on those. For example, for VK_IMAGE_TYPE_1D: "If imageType is VK_IMAGE_TYPE_1D, the value of extent.width must be less than or equal to the value of VkPhysicalDeviceLimits::maxImageDimension1D, or the value of VkImageFormatProperties::maxExtent.width (as returned by vkGetPhysicalDeviceImageFormatProperties with values of format, type, tiling, usage and flags equal to those in this structure) - whichever is higher" We'll fix up the arguments to isl to keep isl strict in what it expects. --- src/vulkan/anv_image.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 4ce997589c5..dfacced92ef 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -124,13 +124,28 @@ make_surface(const struct anv_device *dev, struct anv_surface *anv_surf = get_surface(image, aspect); + VkExtent3D extent; + switch (vk_info->imageType) { + case VK_IMAGE_TYPE_1D: + extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; + break; + case VK_IMAGE_TYPE_2D: + extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; + break; + case VK_IMAGE_TYPE_3D: + extent = vk_info->extent; + break; + default: + unreachable("invalid image type"); + } + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect, vk_info->tiling, NULL), - .width = vk_info->extent.width, - .height = vk_info->extent.height, - .depth = vk_info->extent.depth, + .width = extent.width, + .height = extent.height, + .depth = extent.depth, .levels = vk_info->mipLevels, .array_len = vk_info->arrayLayers, .samples = vk_info->samples, -- cgit v1.2.3 From 5e92e91c6177bfe31214b8a0ebd0d4c47969b61d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 17 Feb 2016 15:09:40 -0800 Subject: anv: Rework vkCmdPipelineBarrier() We don't need to look at the stage flags, as we don't really support any fine-grained, stage-level synchronization. We have to do two PIPE_CONTROLs in case we're both flushing and invalidating. Additionally, if we do end up doing two PIPE_CONTROLs, the first, flusing one also has to stall and wait for the flushing to finish, so we don't re-dirty the caches with in-flight rendering after the second PIPE_CONTROL invalidates. --- src/vulkan/genX_cmd_buffer.c | 122 ++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index e8b215e6377..2552cd1befe 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -153,47 +153,6 @@ void genX(CmdPipelineBarrier)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); uint32_t b, *dw; - struct GENX(PIPE_CONTROL) cmd = { - GENX(PIPE_CONTROL_header), - .PostSyncOperation = NoWrite, - }; - - /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - /* This is just what PIPE_CONTROL does */ - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { - cmd.StallAtPixelScoreboard = true; - } - - if (anv_clear_mask(&srcStageMask, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT)) { - cmd.CommandStreamerStallEnable = true; - } - - if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { - anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); - } - - /* On our hardware, all stages will wait for execution as needed. */ - (void)destStageMask; - - /* We checked all known VkPipeEventFlags. */ - anv_assert(srcStageMask == 0); - /* XXX: Right now, we're really dumb and just flush whatever categories * the app asks for. One of these days we may make this a bit better * but right now that's all the hardware allows for in most areas. @@ -216,62 +175,105 @@ void genX(CmdPipelineBarrier)( dst_flags |= pImageMemoryBarriers[i].dstAccessMask; } + /* Mask out the Source access flags we care about */ + const uint32_t src_mask = + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT; + + src_flags = src_flags & src_mask; + + /* Mask out the destination access flags we care about */ + const uint32_t dst_mask = + VK_ACCESS_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT; + + dst_flags = dst_flags & dst_mask; + /* The src flags represent how things were used previously. This is * what we use for doing flushes. */ + struct GENX(PIPE_CONTROL) flush_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + for_each_bit(b, src_flags) { switch ((VkAccessFlagBits)(1 << b)) { case VK_ACCESS_SHADER_WRITE_BIT: - cmd.DCFlushEnable = true; + flush_cmd.DCFlushEnable = true; break; case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - cmd.RenderTargetCacheFlushEnable = true; + flush_cmd.RenderTargetCacheFlushEnable = true; break; case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - cmd.DepthCacheFlushEnable = true; + flush_cmd.DepthCacheFlushEnable = true; break; case VK_ACCESS_TRANSFER_WRITE_BIT: - cmd.RenderTargetCacheFlushEnable = true; - cmd.DepthCacheFlushEnable = true; + flush_cmd.RenderTargetCacheFlushEnable = true; + flush_cmd.DepthCacheFlushEnable = true; break; default: - /* Doesn't require a flush */ - break; + unreachable("should've masked this out by now"); } } - /* The dst flags represent how things will be used in the fugure. This + /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to + * stall and wait for the flushing to finish, so we don't re-dirty the + * caches with in-flight rendering after the second PIPE_CONTROL + * invalidates. + */ + + if (dst_flags) + flush_cmd.CommandStreamerStallEnable = true; + + if (src_flags && dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd); + } + + /* The dst flags represent how things will be used in the future. This * is what we use for doing cache invalidations. */ + struct GENX(PIPE_CONTROL) invalidate_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + for_each_bit(b, dst_flags) { switch ((VkAccessFlagBits)(1 << b)) { case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: case VK_ACCESS_INDEX_READ_BIT: case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - cmd.VFCacheInvalidationEnable = true; + invalidate_cmd.VFCacheInvalidationEnable = true; break; case VK_ACCESS_UNIFORM_READ_BIT: - cmd.ConstantCacheInvalidationEnable = true; + invalidate_cmd.ConstantCacheInvalidationEnable = true; /* fallthrough */ case VK_ACCESS_SHADER_READ_BIT: - cmd.TextureCacheInvalidationEnable = true; + invalidate_cmd.TextureCacheInvalidationEnable = true; break; case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - cmd.TextureCacheInvalidationEnable = true; + invalidate_cmd.TextureCacheInvalidationEnable = true; break; case VK_ACCESS_TRANSFER_READ_BIT: - cmd.TextureCacheInvalidationEnable = true; + invalidate_cmd.TextureCacheInvalidationEnable = true; break; - case VK_ACCESS_MEMORY_READ_BIT: - break; /* XXX: What is this? */ default: - /* Doesn't require a flush */ - break; + unreachable("should've masked this out by now"); } } - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); - GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd); + if (dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd); + } } static void -- cgit v1.2.3 From ae3e249d57e10a9bfe261cdff4a6f27052bc580e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 17 Feb 2016 15:19:07 -0800 Subject: anv: Remove hacky PIPE_CONTROL in vkCmdEndRenderPass() The vkCmdPipelineBarrier() command should work as intended now and we need to pull the plug on this old hack. --- src/vulkan/genX_cmd_buffer.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'src') diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index 2552cd1befe..e25a20b3d7a 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -714,18 +714,4 @@ void genX(CmdEndRenderPass)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_cmd_buffer_resolve_subpass(cmd_buffer); - - /* Emit a flushing pipe control at the end of a pass. This is kind of a - * hack but it ensures that render targets always actually get written. - * Eventually, we should do flushing based on image format transitions - * or something of that nature. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .PostSyncOperation = NoWrite, - .RenderTargetCacheFlushEnable = true, - .InstructionCacheInvalidateEnable = true, - .DepthCacheFlushEnable = true, - .VFCacheInvalidationEnable = true, - .TextureCacheInvalidationEnable = true, - .CommandStreamerStallEnable = true); } -- cgit v1.2.3 From b8da261dc7e842f3a5f5256ea424c403543d40c9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 17 Feb 2016 15:28:52 -0800 Subject: spirv: Fix SpvOpFwidth, SpvOpFwidthFine and SpvOpFwidthCoarse "Result is the same as computing the sum of the absolute values of OpDPdx and OpDPdy on P." We were doing sum of absolute values of OpDPdx of P and OpDPdx of NULL. --- src/compiler/nir/spirv/vtn_alu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_alu.c b/src/compiler/nir/spirv/vtn_alu.c index d866da7445e..450bc158be9 100644 --- a/src/compiler/nir/spirv/vtn_alu.c +++ b/src/compiler/nir/spirv/vtn_alu.c @@ -404,17 +404,17 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpFwidth: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); return; case SpvOpFwidthFine: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); return; case SpvOpFwidthCoarse: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); return; case SpvOpVectorTimesScalar: -- cgit v1.2.3 From 9963af8bbdcf424c4f1ff16d2122d003f7c99154 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 17 Feb 2016 17:20:03 -0800 Subject: anv: Ignore unused dimensions in vkCreateImage's anv_image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We ignore unused dimensions in the isl surface; do the same for the resulting anv_image. Reviewed-by: Kristian Høgsberg Kristensen --- src/vulkan/anv_image.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index dfacced92ef..0a412a3f8c6 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -139,6 +139,8 @@ make_surface(const struct anv_device *dev, unreachable("invalid image type"); } + image->extent = extent; + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect, -- cgit v1.2.3 From da9fd74d34478a24e1a607dd612a90e1ecb40a7d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 16 Feb 2016 12:21:55 -0800 Subject: anv: Pull StencilBufferWriteEnable from both sides --- src/vulkan/gen8_cmd_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index ebd6123454d..5ca6f6978c9 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -427,7 +427,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { GEN9_3DSTATE_WM_DEPTH_STENCIL_header, - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, -- cgit v1.2.3 From 9f9cd3de44feb83dc36f4947c81390358e448cc8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 11:54:42 -0800 Subject: anv/gen8_pipeline: Default color attachments to WriteDisable = true --- src/vulkan/gen8_pipeline.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index c41d6ffa82e..eec3f7aafdd 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -158,6 +158,13 @@ emit_cb_state(struct anv_pipeline *pipeline, } } + for (uint32_t i = info->attachmentCount; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); if (!device->info.has_llc) anv_state_clflush(pipeline->blend_state); -- cgit v1.2.3 From 7c26d8d47174b19d91651c5eff3be9e71a004b24 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 12:05:03 -0800 Subject: anv/gen7_pipeline: Set WriteDisable = true if we have no color attachments --- src/vulkan/gen7_pipeline.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index e329c8f42ab..c4f7a369598 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -113,15 +113,15 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, { struct anv_device *device = pipeline->device; - if (info->pAttachments == NULL) { + if (info == NULL || info->attachmentCount == 0) { pipeline->blend_state = anv_state_pool_emit(&device->dynamic_state_pool, GEN7_BLEND_STATE, 64, .ColorBufferBlendEnable = false, - .WriteDisableAlpha = false, - .WriteDisableRed = false, - .WriteDisableGreen = false, - .WriteDisableBlue = false); + .WriteDisableAlpha = true, + .WriteDisableRed = true, + .WriteDisableGreen = true, + .WriteDisableBlue = true); } else { /* FIXME-GEN7: All render targets share blend state settings on gen7, we * can't implement this. -- cgit v1.2.3 From d67d84f5e568feaa988bdda6a23c84aa34b8bbaf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 13:23:45 -0800 Subject: i965/nir: Do lower_io late for fragment shaders --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_nir.c | 3 ++- src/vulkan/anv_pipeline.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7e161e8bb48..bb22cfa5fab 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5606,6 +5606,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, true); + shader = brw_nir_lower_io(shader, compiler->devinfo, true, + false, NULL); shader = brw_postprocess_nir(shader, compiler->devinfo, true); /* key->alpha_test_func means simulating alpha testing via discards, diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 44446694b4b..e9351a5556a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -637,7 +637,8 @@ brw_create_nir(struct brw_context *brw, if (nir->stage != MESA_SHADER_VERTEX && nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL) { + nir->stage != MESA_SHADER_TESS_EVAL && + nir->stage != MESA_SHADER_FRAGMENT) { nir = brw_nir_lower_io(nir, devinfo, is_scalar, false, NULL); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 21df3e081a3..e6cc8faf4fc 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -390,7 +390,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Finish the optimization and compilation process */ if (nir->stage != MESA_SHADER_VERTEX && nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL) { + nir->stage != MESA_SHADER_TESS_EVAL && + nir->stage != MESA_SHADER_FRAGMENT) { nir = brw_nir_lower_io(nir, &pipeline->device->info, compiler->scalar_stage[stage], false, NULL); } -- cgit v1.2.3 From 8c05b44bbb317b5547f1079d1957c05ac29a591e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 13:45:24 -0800 Subject: nir: Add a nir_foreach_variable_safe helper --- src/compiler/nir/nir.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9c478870626..932cab62e64 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -338,6 +338,9 @@ typedef struct nir_variable { #define nir_foreach_variable(var, var_list) \ foreach_list_typed(nir_variable, var, node, var_list) +#define nir_foreach_variable_safe(var, var_list) \ + foreach_list_typed_safe(nir_variable, var, node, var_list) + static inline bool nir_variable_is_global(const nir_variable *var) { -- cgit v1.2.3 From 979732fafc8d5c8db4c70461493b7c93ac870cfd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 13:46:25 -0800 Subject: nir: Add a helper for getting the one function from a shader --- src/compiler/nir/nir.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 932cab62e64..0f8c78100bf 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1756,6 +1756,15 @@ typedef struct nir_shader { gl_shader_stage stage; } nir_shader; +static inline nir_function * +nir_shader_get_entrypoint(nir_shader *shader) +{ + assert(exec_list_length(&shader->functions) == 1); + struct exec_node *func_node = exec_list_get_head(&shader->functions); + nir_function *func = exec_node_data(nir_function, func_node, node); + return func; +} + #define nir_foreach_function(shader, func) \ foreach_list_typed(nir_function, func, node, &(shader)->functions) -- cgit v1.2.3 From fed8b7f8177ed3e770df1989922ff5cc63eba895 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 13:46:44 -0800 Subject: anv/pipeline: Delete out-of-bounds fragment shader outputs --- src/vulkan/anv_pipeline.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index e6cc8faf4fc..4be2bfc625b 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -616,6 +616,19 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; + nir_foreach_variable_safe(var, &nir->outputs) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + unsigned rt = var->data.location - FRAG_RESULT_DATA0; + if (rt >= key.nr_color_regions) { + var->data.mode = nir_var_local; + exec_node_remove(&var->node); + exec_list_push_tail(&impl->locals, &var->node); + } + } + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) -- cgit v1.2.3 From 581e4468f96429066249cc3421a5ba0cc4c918de Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 17:44:18 -0800 Subject: nir/spirv: Add some more capabilities --- src/compiler/nir/spirv/spirv_to_nir.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index b5e7f536e2e..a50a581cfb6 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -2148,17 +2148,24 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, /* Unhandled, but these are for debug so that's ok. */ break; - case SpvOpCapability: - switch ((SpvCapability)w[1]) { + case SpvOpCapability: { + SpvCapability cap = w[1]; + switch (cap) { case SpvCapabilityMatrix: case SpvCapabilityShader: case SpvCapabilityGeometry: + case SpvCapabilityUniformBufferArrayDynamicIndexing: + case SpvCapabilitySampledImageArrayDynamicIndexing: + case SpvCapabilityStorageBufferArrayDynamicIndexing: + case SpvCapabilityStorageImageArrayDynamicIndexing: case SpvCapabilityClipDistance: + case SpvCapabilityImageQuery: break; default: assert(!"Unsupported capability"); } break; + } case SpvOpExtInstImport: vtn_handle_extension(b, opcode, w, count); -- cgit v1.2.3 From c2581a9375ea956abf4b9b75d27ec199fabb99d0 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 3 Feb 2016 12:14:28 -0800 Subject: anv: Build the real pipeline layout in the pipeline This gives us the chance to pack the binding table down to just what the shaders actually need. Some applications use very large descriptor sets and only ever use a handful of entries. Compacted binding tables should be much more efficient in this case. It comes at the down-side of having to re-emit binding tables every time we switch pipelines, but that's considered an acceptable cost. --- src/vulkan/anv_cmd_buffer.c | 47 +++--- src/vulkan/anv_nir.h | 6 +- src/vulkan/anv_nir_apply_dynamic_offsets.c | 2 +- src/vulkan/anv_nir_apply_pipeline_layout.c | 242 +++++++++++++++++++---------- src/vulkan/anv_pipeline.c | 11 +- src/vulkan/anv_private.h | 11 ++ 6 files changed, 201 insertions(+), 118 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index bc6b3925cd2..b060828cf61 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -433,6 +433,7 @@ void anv_CmdBindPipeline( cmd_buffer->state.compute_pipeline = pipeline; cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; break; case VK_PIPELINE_BIND_POINT_GRAPHICS: @@ -440,6 +441,7 @@ void anv_CmdBindPipeline( cmd_buffer->state.vb_dirty |= pipeline->vb_used; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; /* Apply the dynamic state from the pipeline */ cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; @@ -702,39 +704,34 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_layout *layout; + struct anv_pipeline_bind_map *map; uint32_t color_count, bias, state_offset; switch (stage) { case MESA_SHADER_FRAGMENT: - layout = cmd_buffer->state.pipeline->layout; + map = &cmd_buffer->state.pipeline->bindings[stage]; bias = MAX_RTS; color_count = subpass->color_count; break; case MESA_SHADER_COMPUTE: - layout = cmd_buffer->state.compute_pipeline->layout; + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; bias = 1; color_count = 0; break; default: - layout = cmd_buffer->state.pipeline->layout; + map = &cmd_buffer->state.pipeline->bindings[stage]; bias = 0; color_count = 0; break; } - /* This is a little awkward: layout can be NULL but we still have to - * allocate and set a binding table for the PS stage for render - * targets. */ - uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - - if (color_count + surface_count == 0) { + if (color_count + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; } *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, - bias + surface_count, + bias + map->surface_count, &state_offset); uint32_t *bt_map = bt_state->map; @@ -769,10 +766,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } - if (layout == NULL) + if (map->surface_count == 0) goto out; - if (layout->stage[stage].image_count > 0) { + if (map->image_count > 0) { VkResult result = anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); if (result != VK_SUCCESS) @@ -782,9 +779,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } uint32_t image = 0; - for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { - struct anv_pipeline_binding *binding = - &layout->stage[stage].surface_to_descriptor[s]; + for (uint32_t s = 0; s < map->surface_count; s++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; struct anv_descriptor_set *set = cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; @@ -855,7 +851,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[bias + s] = surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } - assert(image == layout->stage[stage].image_count); + assert(image == map->image_count); out: if (!cmd_buffer->device->info.has_llc) @@ -868,29 +864,26 @@ VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, struct anv_state *state) { - struct anv_pipeline_layout *layout; - uint32_t sampler_count; + struct anv_pipeline_bind_map *map; if (stage == MESA_SHADER_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; else - layout = cmd_buffer->state.pipeline->layout; + map = &cmd_buffer->state.pipeline->bindings[stage]; - sampler_count = layout ? layout->stage[stage].sampler_count : 0; - if (sampler_count == 0) { + if (map->sampler_count == 0) { *state = (struct anv_state) { 0, }; return VK_SUCCESS; } - uint32_t size = sampler_count * 16; + uint32_t size = map->sampler_count * 16; *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) { - struct anv_pipeline_binding *binding = - &layout->stage[stage].sampler_to_descriptor[s]; + for (uint32_t s = 0; s < map->sampler_count; s++) { + struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; struct anv_descriptor_set *set = cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h index c76314d9df6..a7ea3eb0e28 100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@ -35,9 +35,9 @@ void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, nir_shader *shader, struct brw_stage_prog_data *prog_data); -bool anv_nir_apply_pipeline_layout(nir_shader *shader, - struct brw_stage_prog_data *prog_data, - const struct anv_pipeline_layout *layout); +void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); #ifdef __cplusplus } diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index a5e3238a36a..e71a8ffb1f4 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -28,7 +28,7 @@ struct apply_dynamic_offsets_state { nir_shader *shader; nir_builder builder; - struct anv_pipeline_layout *layout; + const struct anv_pipeline_layout *layout; uint32_t indices_start; }; diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index ee93e40e76c..c58a93878ee 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -29,78 +29,76 @@ struct apply_pipeline_layout_state { nir_shader *shader; nir_builder builder; - const struct anv_pipeline_layout *layout; - - bool progress; + struct { + BITSET_WORD *used; + uint8_t *surface_offsets; + uint8_t *sampler_offsets; + uint8_t *image_offsets; + } set[MAX_SETS]; }; -static uint32_t -get_surface_index(unsigned set, unsigned binding, - struct apply_pipeline_layout_state *state) +static void +add_binding(struct apply_pipeline_layout_state *state, + uint32_t set, uint32_t binding) { - assert(set < state->layout->num_sets); - struct anv_descriptor_set_layout *set_layout = - state->layout->set[set].layout; - - gl_shader_stage stage = state->shader->stage; - - assert(binding < set_layout->binding_count); - - assert(set_layout->binding[binding].stage[stage].surface_index >= 0); - - uint32_t surface_index = - state->layout->set[set].stage[stage].surface_start + - set_layout->binding[binding].stage[stage].surface_index; - - assert(surface_index < state->layout->stage[stage].surface_count); - - return surface_index; + BITSET_SET(state->set[set].used, binding); } -static uint32_t -get_sampler_index(unsigned set, unsigned binding, - struct apply_pipeline_layout_state *state) +static void +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) { - assert(set < state->layout->num_sets); - struct anv_descriptor_set_layout *set_layout = - state->layout->set[set].layout; - - gl_shader_stage stage = state->shader->stage; - - assert(binding < set_layout->binding_count); - - assert(set_layout->binding[binding].stage[stage].sampler_index >= 0); - - uint32_t sampler_index = - state->layout->set[set].stage[stage].sampler_start + - set_layout->binding[binding].stage[stage].sampler_index; - - assert(sampler_index < state->layout->stage[stage].sampler_count); - - return sampler_index; + add_binding(state, var->data.descriptor_set, var->data.binding); } -static uint32_t -get_image_index(unsigned set, unsigned binding, - struct apply_pipeline_layout_state *state) +static bool +get_used_bindings_block(nir_block *block, void *void_state) { - assert(set < state->layout->num_sets); - struct anv_descriptor_set_layout *set_layout = - state->layout->set[set].layout; - - assert(binding < set_layout->binding_count); - - gl_shader_stage stage = state->shader->stage; - - assert(set_layout->binding[binding].stage[stage].image_index >= 0); - - uint32_t image_index = - state->layout->set[set].stage[stage].image_start + - set_layout->binding[binding].stage[stage].image_index; + struct apply_pipeline_layout_state *state = void_state; - assert(image_index < state->layout->stage[stage].image_count); + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + add_binding(state, nir_intrinsic_desc_set(intrin), + nir_intrinsic_binding(intrin)); + break; + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_size: + case nir_intrinsic_image_samples: + add_var_binding(state, intrin->variables[0]->var); + break; + + default: + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + assert(tex->texture); + add_var_binding(state, tex->texture->var); + if (tex->sampler) + add_var_binding(state, tex->sampler->var); + break; + } + default: + continue; + } + } - return image_index; + return true; } static void @@ -114,7 +112,7 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, uint32_t set = nir_intrinsic_desc_set(intrin); uint32_t binding = nir_intrinsic_binding(intrin); - uint32_t surface_index = get_surface_index(set, binding, state); + uint32_t surface_index = state->set[set].surface_offsets[binding]; nir_const_value *const_block_idx = nir_src_as_const_value(intrin->src[0]); @@ -187,16 +185,16 @@ lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) /* No one should have come by and lowered it already */ assert(tex->texture); - tex->texture_index = - get_surface_index(tex->texture->var->data.descriptor_set, - tex->texture->var->data.binding, state); + unsigned set = tex->texture->var->data.descriptor_set; + unsigned binding = tex->texture->var->data.binding; + tex->texture_index = state->set[set].surface_offsets[binding]; lower_tex_deref(tex, tex->texture, &tex->texture_index, nir_tex_src_texture_offset, state); if (tex->sampler) { - tex->sampler_index = - get_sampler_index(tex->sampler->var->data.descriptor_set, - tex->sampler->var->data.binding, state); + unsigned set = tex->sampler->var->data.descriptor_set; + unsigned binding = tex->sampler->var->data.binding; + tex->sampler_index = state->set[set].surface_offsets[binding]; lower_tex_deref(tex, tex->sampler, &tex->sampler_index, nir_tex_src_sampler_offset, state); } @@ -224,14 +222,11 @@ apply_pipeline_layout_block(nir_block *block, void *void_state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { lower_res_index_intrinsic(intrin, state); - state->progress = true; } break; } case nir_instr_type_tex: lower_tex(nir_instr_as_tex(instr), state); - /* All texture instructions need lowering */ - state->progress = true; break; default: continue; @@ -255,16 +250,97 @@ setup_vec4_uniform_value(const union gl_constant_value **params, params[i] = &zero; } -bool -anv_nir_apply_pipeline_layout(nir_shader *shader, - struct brw_stage_prog_data *prog_data, - const struct anv_pipeline_layout *layout) +void +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) { + struct anv_pipeline_layout *layout = pipeline->layout; + struct apply_pipeline_layout_state state = { .shader = shader, - .layout = layout, }; + void *mem_ctx = ralloc_context(NULL); + + for (unsigned s = 0; s < layout->num_sets; s++) { + const unsigned count = layout->set[s].layout->binding_count; + const unsigned words = BITSET_WORDS(count); + state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); + state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); + } + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, get_used_bindings_block, &state); + } + + struct anv_pipeline_bind_map map = { + .surface_count = 0, + .sampler_count = 0, + }; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) + map.surface_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) + map.sampler_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) + map.image_count += set_layout->binding[b].array_size; + } + } + + map.surface_to_descriptor = + malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); + map.sampler_to_descriptor = + malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); + + pipeline->bindings[shader->stage] = map; + + unsigned surface = 0; + unsigned sampler = 0; + unsigned image = 0; + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { + state.set[set].surface_offsets[b] = surface; + for (unsigned i = 0; i < array_size; i++) { + map.surface_to_descriptor[surface + i].set = set; + map.surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { + state.set[set].sampler_offsets[b] = sampler; + for (unsigned i = 0; i < array_size; i++) { + map.sampler_to_descriptor[sampler + i].set = set; + map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { + state.set[set].image_offsets[b] = image; + image += array_size; + } + } + } + nir_foreach_function(shader, function) { if (function->impl) { nir_builder_init(&state.builder, function->impl); @@ -274,7 +350,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, } } - if (layout->stage[shader->stage].image_count > 0) { + if (map.image_count > 0) { nir_foreach_variable(var, &shader->uniforms) { if (glsl_type_is_image(var->type) || (glsl_type_is_array(var->type) && @@ -283,8 +359,9 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, * information required for reading/writing to/from the image is * storred in the uniform. */ - unsigned image_index = get_image_index(var->data.descriptor_set, - var->data.binding, &state); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned image_index = state.set[set].image_offsets[binding]; var->data.driver_location = shader->num_uniforms + image_index * BRW_IMAGE_PARAM_SIZE * 4; @@ -294,7 +371,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, struct anv_push_constants *null_data = NULL; const gl_constant_value **param = prog_data->param + shader->num_uniforms; const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) { + for (uint32_t i = 0; i < map.image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, (const union gl_constant_value *)&image_param->surface_idx, 1); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, @@ -312,9 +389,6 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, image_param ++; } - shader->num_uniforms += layout->stage[shader->stage].image_count * - BRW_IMAGE_PARAM_SIZE * 4; + shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; } - - return state.progress; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 4be2bfc625b..a7feefb540e 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -336,8 +336,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0) - prog_data->nr_params += pipeline->layout->stage[stage].image_count * + if (pipeline->bindings[stage].image_count > 0) + prog_data->nr_params += pipeline->bindings[stage].image_count * BRW_IMAGE_PARAM_SIZE; if (prog_data->nr_params > 0) { @@ -362,9 +362,13 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Set up dynamic offsets */ anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + char surface_usage_mask[256], sampler_usage_mask[256]; + zero(surface_usage_mask); + zero(sampler_usage_mask); + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) - anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout); + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). @@ -1059,6 +1063,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 951a571944d..29343397b6c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1378,6 +1378,15 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) stage = __builtin_ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) +struct anv_pipeline_bind_map { + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + + struct anv_pipeline_binding * surface_to_descriptor; + struct anv_pipeline_binding * sampler_to_descriptor; +}; + struct anv_pipeline { struct anv_device * device; struct anv_batch batch; @@ -1387,6 +1396,8 @@ struct anv_pipeline { struct anv_dynamic_state dynamic_state; struct anv_pipeline_layout * layout; + struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; + bool use_repclear; struct brw_vs_prog_data vs_prog_data; -- cgit v1.2.3 From 005b9ac75844473b7c86a6f94dcddc1c926bceec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 18:03:21 -0800 Subject: anv: Gut anv_pipeline_layout Almost none of the data in anv_pipeline_layout is used anymore thanks to doing real layout in the pipeline itself. --- src/vulkan/anv_descriptor_set.c | 98 ++++++----------------------------------- src/vulkan/anv_private.h | 12 ----- 2 files changed, 13 insertions(+), 97 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c index d5e6286f3ee..7a77336602a 100644 --- a/src/vulkan/anv_descriptor_set.c +++ b/src/vulkan/anv_descriptor_set.c @@ -197,108 +197,36 @@ VkResult anv_CreatePipelineLayout( VkPipelineLayout* pPipelineLayout) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_layout l, *layout; + struct anv_pipeline_layout *layout; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - l.num_sets = pCreateInfo->setLayoutCount; + layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; unsigned dynamic_offset_count = 0; - memset(l.stage, 0, sizeof(l.stage)); + memset(layout->stage, 0, sizeof(layout->stage)); for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]); - l.set[set].layout = set_layout; + layout->set[set].layout = set_layout; - l.set[set].dynamic_offset_start = dynamic_offset_count; + layout->set[set].dynamic_offset_start = dynamic_offset_count; for (uint32_t b = 0; b < set_layout->binding_count; b++) { if (set_layout->binding[b].dynamic_offset_index >= 0) dynamic_offset_count += set_layout->binding[b].array_size; - } - - for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { - l.set[set].stage[s].surface_start = l.stage[s].surface_count; - l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; - l.set[set].stage[s].image_start = l.stage[s].image_count; - - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - unsigned array_size = set_layout->binding[b].array_size; - - if (set_layout->binding[b].stage[s].surface_index >= 0) { - l.stage[s].surface_count += array_size; - - if (set_layout->binding[b].dynamic_offset_index >= 0) - l.stage[s].has_dynamic_offsets = true; - } - - if (set_layout->binding[b].stage[s].sampler_index >= 0) - l.stage[s].sampler_count += array_size; - - if (set_layout->binding[b].stage[s].image_index >= 0) - l.stage[s].image_count += array_size; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + if (set_layout->binding[b].stage[s].surface_index >= 0) + layout->stage[s].has_dynamic_offsets = true; } } } - unsigned num_bindings = 0; - for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { - num_bindings += l.stage[s].surface_count + - l.stage[s].sampler_count + - l.stage[s].image_count; - } - - size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); - - layout = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (layout == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Now we can actually build our surface and sampler maps */ - struct anv_pipeline_binding *entry = layout->entries; - for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { - l.stage[s].surface_to_descriptor = entry; - entry += l.stage[s].surface_count; - l.stage[s].sampler_to_descriptor = entry; - entry += l.stage[s].sampler_count; - entry += l.stage[s].image_count; - - int surface = 0; - int sampler = 0; - for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { - struct anv_descriptor_set_layout *set_layout = l.set[set].layout; - - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - unsigned array_size = set_layout->binding[b].array_size; - unsigned set_offset = set_layout->binding[b].descriptor_index; - - if (set_layout->binding[b].stage[s].surface_index >= 0) { - assert(surface == l.set[set].stage[s].surface_start + - set_layout->binding[b].stage[s].surface_index); - for (unsigned i = 0; i < array_size; i++) { - l.stage[s].surface_to_descriptor[surface + i].set = set; - l.stage[s].surface_to_descriptor[surface + i].offset = set_offset + i; - } - surface += array_size; - } - - if (set_layout->binding[b].stage[s].sampler_index >= 0) { - assert(sampler == l.set[set].stage[s].sampler_start + - set_layout->binding[b].stage[s].sampler_index); - for (unsigned i = 0; i < array_size; i++) { - l.stage[s].sampler_to_descriptor[sampler + i].set = set; - l.stage[s].sampler_to_descriptor[sampler + i].offset = set_offset + i; - } - sampler += array_size; - } - } - } - } - - /* Finally, we're done setting it up, copy into the allocated version */ - *layout = l; - *pPipelineLayout = anv_pipeline_layout_to_handle(layout); return VK_SUCCESS; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 29343397b6c..06b961495c2 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -986,25 +986,13 @@ struct anv_pipeline_layout { struct { struct anv_descriptor_set_layout *layout; uint32_t dynamic_offset_start; - struct { - uint32_t surface_start; - uint32_t sampler_start; - uint32_t image_start; - } stage[MESA_SHADER_STAGES]; } set[MAX_SETS]; uint32_t num_sets; struct { bool has_dynamic_offsets; - uint32_t surface_count; - struct anv_pipeline_binding *surface_to_descriptor; - uint32_t sampler_count; - struct anv_pipeline_binding *sampler_to_descriptor; - uint32_t image_count; } stage[MESA_SHADER_STAGES]; - - struct anv_pipeline_binding entries[0]; }; struct anv_buffer { -- cgit v1.2.3 From 40c76d4efa7e67d4412ecbcf4cd4481387abbe6d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Feb 2016 20:16:11 -0800 Subject: Delete nir_lower_samplers.cpp Somehow, in one of the merges with mesa master, the old file must have been kept when nir_lower_samplers.cpp was moved to nir_lower_samplers.c. --- src/glsl/nir/nir_lower_samplers.cpp | 248 ------------------------------------ 1 file changed, 248 deletions(-) delete mode 100644 src/glsl/nir/nir_lower_samplers.cpp (limited to 'src') diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp deleted file mode 100644 index 438caacfd4d..00000000000 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. - * Copyright (C) 2008 VMware, Inc. All Rights Reserved. - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "nir.h" -#include "../program.h" -#include "program/hash_table.h" -#include "ir_uniform.h" - -extern "C" { -#include "main/compiler.h" -#include "main/mtypes.h" -#include "program/prog_parameter.h" -#include "program/program.h" -} - -static void -add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect) -{ - /* First, we have to resize the array of texture sources */ - nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - new_srcs[i].src_type = instr->src[i].src_type; - nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src); - } - - ralloc_free(instr->src); - instr->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; - instr->num_srcs++; - nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, - indirect); -} - -static unsigned -get_sampler_index(const struct gl_shader_program *shader_program, - gl_shader_stage stage, const char *name) -{ - unsigned location; - if (!shader_program->UniformHash->get(location, name)) { - assert(!"failed to find sampler"); - return 0; - } - - if (!shader_program->UniformStorage[location].sampler[stage].active) { - assert(!"cannot return a sampler"); - return 0; - } - - return shader_program->UniformStorage[location].sampler[stage].index; -} - -static void -lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, - gl_shader_stage stage, void *mem_ctx) -{ - if (instr->sampler == NULL) - return; - - /* Get the name and the offset */ - instr->sampler_index = 0; - char *name = ralloc_strdup(mem_ctx, instr->sampler->var->name); - - for (nir_deref *deref = &instr->sampler->deref; - deref->child; deref = deref->child) { - switch (deref->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *deref_array = nir_deref_as_array(deref->child); - - assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); - - if (deref_array->deref.child) { - ralloc_asprintf_append(&name, "[%u]", - deref_array->deref_array_type == nir_deref_array_type_direct ? - deref_array->base_offset : 0); - } else { - assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER); - instr->sampler_index = deref_array->base_offset; - } - - /* XXX: We're assuming here that the indirect is the last array - * thing we have. This should be ok for now as we don't support - * arrays_of_arrays yet. - */ - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - /* First, we have to resize the array of texture sources */ - nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - new_srcs[i].src_type = instr->src[i].src_type; - nir_instr_move_src(&instr->instr, &new_srcs[i].src, - &instr->src[i].src); - } - - ralloc_free(instr->src); - instr->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; - instr->num_srcs++; - nir_instr_move_src(&instr->instr, - &instr->src[instr->num_srcs - 1].src, - &deref_array->indirect); - - instr->sampler_array_size = glsl_get_length(deref->type); - } - break; - } - - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = nir_deref_as_struct(deref->child); - const char *field = glsl_get_struct_elem_name(deref->type, - deref_struct->index); - ralloc_asprintf_append(&name, ".%s", field); - break; - } - - default: - unreachable("Invalid deref type"); - break; - } - } - - instr->sampler_index += get_sampler_index(shader_program, stage, name); - - instr->sampler = NULL; -} - -typedef struct { - void *mem_ctx; - const struct gl_shader_program *shader_program; - gl_shader_stage stage; -} lower_state; - -static bool -lower_block_cb(nir_block *block, void *_state) -{ - lower_state *state = (lower_state *) _state; - - nir_foreach_instr(block, instr) { - if (instr->type == nir_instr_type_tex) { - nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - lower_sampler(tex_instr, state->shader_program, state->stage, - state->mem_ctx); - } - } - - return true; -} - -static void -lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, - gl_shader_stage stage) -{ - lower_state state; - - state.mem_ctx = ralloc_parent(impl); - state.shader_program = shader_program; - state.stage = stage; - - nir_foreach_block(impl, lower_block_cb, &state); -} - -extern "C" void -nir_lower_samplers(nir_shader *shader, - const struct gl_shader_program *shader_program) -{ - nir_foreach_overload(shader, overload) { - if (overload->impl) - lower_impl(overload->impl, shader_program, shader->stage); - } -} - -static bool -lower_samplers_for_vk_block(nir_block *block, void *data) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tex = nir_instr_as_tex(instr); - - assert(tex->sampler); - - tex->sampler_set = tex->sampler->var->data.descriptor_set; - tex->sampler_index = tex->sampler->var->data.binding; - - if (tex->sampler->deref.child) { - assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); - nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child); - - /* Only one-level arrays are allowed in vulkan */ - assert(arr->deref.child == NULL); - - tex->sampler_index += arr->base_offset; - if (arr->deref_array_type == nir_deref_array_type_indirect) { - add_indirect_to_tex(tex, arr->indirect); - nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT); - - tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type); - } - } - - tex->sampler = NULL; - } - - return true; -} - -extern "C" void -nir_lower_samplers_for_vk(nir_shader *shader) -{ - nir_foreach_overload(shader, overload) { - if (overload->impl) { - nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL); - } - } -} -- cgit v1.2.3 From 05f75a30267249fe1781e87389b5dd6c2339fdfb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 17 Feb 2016 17:27:25 -0800 Subject: anv/meta: Don't use the blit ds layout in resolve code --- src/vulkan/anv_meta_resolve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c index ae53292bde8..ea5020c5f24 100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@ -567,7 +567,7 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, .descriptorPool = dummy_desc_pool_h, .descriptorSetCount = 1, .pSetLayouts = (VkDescriptorSetLayout[]) { - device->meta_state.blit.ds_layout, + device->meta_state.resolve.ds_layout, }, }, &desc_set_h); -- cgit v1.2.3 From 542c38df36613eb1baa4c1c23b971dc7743b8e11 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 18 Feb 2016 10:21:08 -0800 Subject: anv/meta: Initialize blend state for the right attachment We were always initializing only RT 0. We need to initialize the RT we're creating the clear pipeline for. --- src/vulkan/anv_meta_clear.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 15e24a32a75..739ae09582c 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -280,19 +280,20 @@ create_color_pipeline(struct anv_device *device, .stencilTestEnable = false, }; + VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 }; + blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, + }; + const VkPipelineColorBlendStateCreateInfo cb_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .logicOpEnable = false, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { - .blendEnable = false, - .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT, - }, - }, + .attachmentCount = MAX_RTS, + .pAttachments = blend_attachment_state }; /* Disable repclear because we do not want the compiler to replace the -- cgit v1.2.3 From f6d95876888c81559d4ba773e4e6c82b184e708e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:19:02 -0800 Subject: vulkan: Move XML and generator into src/intel/genxml --- configure.ac | 2 + src/Makefile.am | 1 + src/intel/Makefile.am | 22 + src/intel/genxml/.gitignore | 1 + src/intel/genxml/Makefile.am | 31 + src/intel/genxml/gen7.xml | 2522 +++++++++++++++++++++++++ src/intel/genxml/gen75.xml | 2916 +++++++++++++++++++++++++++++ src/intel/genxml/gen8.xml | 3165 ++++++++++++++++++++++++++++++++ src/intel/genxml/gen9.xml | 3469 +++++++++++++++++++++++++++++++++++ src/intel/genxml/gen_pack_header.py | 614 +++++++ src/vulkan/Makefile.am | 10 +- src/vulkan/anv_batch_chain.c | 4 +- src/vulkan/anv_device.c | 2 +- src/vulkan/anv_formats.c | 2 - src/vulkan/gen7.xml | 2522 ------------------------- src/vulkan/gen75.xml | 2916 ----------------------------- src/vulkan/gen7_cmd_buffer.c | 4 +- src/vulkan/gen7_pipeline.c | 4 +- src/vulkan/gen7_state.c | 4 +- src/vulkan/gen8.xml | 3165 -------------------------------- src/vulkan/gen8_cmd_buffer.c | 4 +- src/vulkan/gen8_pipeline.c | 4 +- src/vulkan/gen8_state.c | 4 +- src/vulkan/gen9.xml | 3469 ----------------------------------- src/vulkan/genX_cmd_buffer.c | 8 +- src/vulkan/genX_pipeline.c | 8 +- src/vulkan/gen_pack_header.py | 614 ------- 27 files changed, 12768 insertions(+), 12719 deletions(-) create mode 100644 src/intel/Makefile.am create mode 100644 src/intel/genxml/.gitignore create mode 100644 src/intel/genxml/Makefile.am create mode 100644 src/intel/genxml/gen7.xml create mode 100644 src/intel/genxml/gen75.xml create mode 100644 src/intel/genxml/gen8.xml create mode 100644 src/intel/genxml/gen9.xml create mode 100755 src/intel/genxml/gen_pack_header.py delete mode 100644 src/vulkan/gen7.xml delete mode 100644 src/vulkan/gen75.xml delete mode 100644 src/vulkan/gen8.xml delete mode 100644 src/vulkan/gen9.xml delete mode 100755 src/vulkan/gen_pack_header.py (limited to 'src') diff --git a/configure.ac b/configure.ac index 71bec624121..d6692b73ff1 100644 --- a/configure.ac +++ b/configure.ac @@ -2518,6 +2518,8 @@ AC_CONFIG_FILES([Makefile src/glx/apple/Makefile src/glx/tests/Makefile src/gtest/Makefile + src/intel/Makefile + src/intel/genxml/Makefile src/isl/Makefile src/loader/Makefile src/mapi/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 272e68ce875..25b48c65009 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -57,6 +57,7 @@ AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) if HAVE_VULKAN +SUBDIRS += intel SUBDIRS += isl SUBDIRS += vulkan endif diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am new file mode 100644 index 00000000000..0a6f411c7ba --- /dev/null +++ b/src/intel/Makefile.am @@ -0,0 +1,22 @@ +# Copyright © 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = genxml diff --git a/src/intel/genxml/.gitignore b/src/intel/genxml/.gitignore new file mode 100644 index 00000000000..dd114953c31 --- /dev/null +++ b/src/intel/genxml/.gitignore @@ -0,0 +1 @@ +gen*_pack.h diff --git a/src/intel/genxml/Makefile.am b/src/intel/genxml/Makefile.am new file mode 100644 index 00000000000..36ba526c154 --- /dev/null +++ b/src/intel/genxml/Makefile.am @@ -0,0 +1,31 @@ +# Copyright © 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +BUILT_SOURCES = \ + gen7_pack.h \ + gen75_pack.h \ + gen8_pack.h \ + gen9_pack.h + +%_pack.h : %.xml gen_pack_header.py + $(AM_V_GEN) $(srcdir)/gen_pack_header.py $< > $@ + +CLEANFILES = $(BUILT_SOURCES) diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml new file mode 100644 index 00000000000..8faffcbf876 --- /dev/null +++ b/src/intel/genxml/gen7.xml @@ -0,0 +1,2522 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml new file mode 100644 index 00000000000..c1cbce9fdb6 --- /dev/null +++ b/src/intel/genxml/gen75.xml @@ -0,0 +1,2916 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml new file mode 100644 index 00000000000..fff6ae2ed42 --- /dev/null +++ b/src/intel/genxml/gen8.xml @@ -0,0 +1,3165 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml new file mode 100644 index 00000000000..3f229b0d0fc --- /dev/null +++ b/src/intel/genxml/gen9.xml @@ -0,0 +1,3469 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen_pack_header.py b/src/intel/genxml/gen_pack_header.py new file mode 100755 index 00000000000..3cabb5864aa --- /dev/null +++ b/src/intel/genxml/gen_pack_header.py @@ -0,0 +1,614 @@ +#!/usr/bin/env python3 + +import xml.parsers.expat +import re +import sys +import copy + +license = """/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +""" + +pack_header = """%(license)s + +/* Instructions, enums and structures for %(platform)s. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include +#include + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_uint(uint64_t v, uint32_t start, uint32_t end) +{ + const int width = end - start + 1; + + __gen_validate_value(v); + +#if DEBUG + if (width < 64) { + const uint64_t max = (1ull << width) - 1; + assert(v <= max); + } +#endif + + return v << start; +} + +static inline uint64_t +__gen_sint(int64_t v, uint32_t start, uint32_t end) +{ + const int width = end - start + 1; + + __gen_validate_value(v); + +#if DEBUG + if (width < 64) { + const int64_t max = (1ll << (width - 1)) - 1; + const int64_t min = -(1ll << (width - 1)); + assert(min <= v && v <= max); + } +#endif + + const uint64_t mask = ~0ull >> (64 - width); + + return (v & mask) << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +static inline uint64_t +__gen_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + +#if DEBUG + const float max = ((1 << (end - start)) - 1) / factor; + const float min = -(1 << (end - start)) / factor; + assert(min <= v && v <= max); +#endif + + const int32_t int_val = roundf(v * factor); + const uint64_t mask = ~0ull >> (64 - (end - start + 1)); + + return (int_val & mask) << start; +} + +static inline uint64_t +__gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + +#if DEBUG + const float max = ((1 << (end - start + 1)) - 1) / factor; + const float min = 0.0f; + assert(min <= v && v <= max); +#endif + + const uint32_t uint_val = roundf(v * factor); + + return uint_val << start; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +""" + +def to_alphanum(name): + substitutions = { + ' ': '', + '/': '', + '[': '', + ']': '', + '(': '', + ')': '', + '-': '', + ':': '', + '.': '', + ',': '', + '=': '', + '>': '', + '#': '', + 'α': 'alpha', + '&': '', + '*': '', + '"': '', + '+': '', + '\'': '', + } + + for i, j in substitutions.items(): + name = name.replace(i, j) + + return name + +def safe_name(name): + name = to_alphanum(name) + if not str.isalpha(name[0]): + name = '_' + name + + return name + +class Field: + ufixed_pattern = re.compile("u(\d+)\.(\d+)") + sfixed_pattern = re.compile("s(\d+)\.(\d+)") + + def __init__(self, parser, attrs): + self.parser = parser + if "name" in attrs: + self.name = safe_name(attrs["name"]) + self.start = int(attrs["start"]) + self.end = int(attrs["end"]) + self.type = attrs["type"] + + if "prefix" in attrs: + self.prefix = attrs["prefix"] + else: + self.prefix = None + + if "default" in attrs: + self.default = int(attrs["default"]) + else: + self.default = None + + ufixed_match = Field.ufixed_pattern.match(self.type) + if ufixed_match: + self.type = 'ufixed' + self.fractional_size = int(ufixed_match.group(2)) + + sfixed_match = Field.sfixed_pattern.match(self.type) + if sfixed_match: + self.type = 'sfixed' + self.fractional_size = int(sfixed_match.group(2)) + + def emit_template_struct(self, dim): + if self.type == 'address': + type = '__gen_address_type' + elif self.type == 'bool': + type = 'bool' + elif self.type == 'float': + type = 'float' + elif self.type == 'ufixed': + type = 'float' + elif self.type == 'sfixed': + type = 'float' + elif self.type == 'uint' and self.end - self.start > 32: + type = 'uint64_t' + elif self.type == 'offset': + type = 'uint64_t' + elif self.type == 'int': + type = 'int32_t' + elif self.type == 'uint': + type = 'uint32_t' + elif self.type in self.parser.structs: + type = 'struct ' + self.parser.gen_prefix(safe_name(self.type)) + elif self.type == 'mbo': + return + else: + print("#error unhandled type: %s" % self.type) + + print(" %-36s %s%s;" % (type, self.name, dim)) + + if len(self.values) > 0 and self.default == None: + if self.prefix: + prefix = self.prefix + "_" + else: + prefix = "" + + for value in self.values: + print("#define %-40s %d" % (prefix + value.name, value.value)) + +class Group: + def __init__(self, parser, parent, start, count, size): + self.parser = parser + self.parent = parent + self.start = start + self.count = count + self.size = size + self.fields = [] + + def emit_template_struct(self, dim): + if self.count == 0: + print(" /* variable length fields follow */") + else: + if self.count > 1: + dim = "%s[%d]" % (dim, self.count) + + for field in self.fields: + field.emit_template_struct(dim) + + class DWord: + def __init__(self): + self.size = 32 + self.fields = [] + self.address = None + + def collect_dwords(self, dwords, start, dim): + for field in self.fields: + if type(field) is Group: + if field.count == 1: + field.collect_dwords(dwords, start + field.start, dim) + else: + for i in range(field.count): + field.collect_dwords(dwords, + start + field.start + i * field.size, + "%s[%d]" % (dim, i)) + continue + + index = (start + field.start) // 32 + if not index in dwords: + dwords[index] = self.DWord() + + clone = copy.copy(field) + clone.start = clone.start + start + clone.end = clone.end + start + clone.dim = dim + dwords[index].fields.append(clone) + + if field.type == "address": + # assert dwords[index].address == None + dwords[index].address = field + + # Coalesce all the dwords covered by this field. The two cases we + # handle are where multiple fields are in a 64 bit word (typically + # and address and a few bits) or where a single struct field + # completely covers multiple dwords. + while index < (start + field.end) // 32: + if index + 1 in dwords and not dwords[index] == dwords[index + 1]: + dwords[index].fields.extend(dwords[index + 1].fields) + dwords[index].size = 64 + dwords[index + 1] = dwords[index] + index = index + 1 + + def emit_pack_function(self, start): + dwords = {} + self.collect_dwords(dwords, 0, "") + + # Determine number of dwords in this group. If we have a size, use + # that, since that'll account for MBZ dwords at the end of a group + # (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword + # index we've seen plus one. + if self.size > 0: + length = self.size // 32 + else: + length = max(dwords.keys()) + 1 + + for index in range(length): + # Handle MBZ dwords + if not index in dwords: + print("") + print(" dw[%d] = 0;" % index) + continue + + # For 64 bit dwords, we aliased the two dword entries in the dword + # dict it occupies. Now that we're emitting the pack function, + # skip the duplicate entries. + dw = dwords[index] + if index > 0 and index - 1 in dwords and dw == dwords[index - 1]: + continue + + # Special case: only one field and it's a struct at the beginning + # of the dword. In this case we pack directly into the + # destination. This is the only way we handle embedded structs + # larger than 32 bits. + if len(dw.fields) == 1: + field = dw.fields[0] + name = field.name + field.dim + if field.type in self.parser.structs and field.start % 32 == 0: + print("") + print(" %s_pack(data, &dw[%d], &values->%s);" % + (self.parser.gen_prefix(safe_name(field.type)), index, name)) + continue + + # Pack any fields of struct type first so we have integer values + # to the dword for those fields. + field_index = 0 + for field in dw.fields: + if type(field) is Field and field.type in self.parser.structs: + name = field.name + field.dim + print("") + print(" uint32_t v%d_%d;" % (index, field_index)) + print(" %s_pack(data, &v%d_%d, &values->%s);" % + (self.parser.gen_prefix(safe_name(field.type)), index, field_index, name)) + field_index = field_index + 1 + + print("") + dword_start = index * 32 + if dw.address == None: + address_count = 0 + else: + address_count = 1 + + if dw.size == 32 and dw.address == None: + v = None + print(" dw[%d] =" % index) + elif len(dw.fields) > address_count: + v = "v%d" % index + print(" const uint%d_t %s =" % (dw.size, v)) + else: + v = "0" + + field_index = 0 + for field in dw.fields: + if field.type != "mbo": + name = field.name + field.dim + + if field.type == "mbo": + s = "__gen_mbo(%d, %d)" % \ + (field.start - dword_start, field.end - dword_start) + elif field.type == "address": + s = None + elif field.type == "uint": + s = "__gen_uint(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == "int": + s = "__gen_sint(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == "bool": + s = "__gen_uint(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == "float": + s = "__gen_float(values->%s)" % name + elif field.type == "offset": + s = "__gen_offset(values->%s, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start) + elif field.type == 'ufixed': + s = "__gen_ufixed(values->%s, %d, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start, field.fractional_size) + elif field.type == 'sfixed': + s = "__gen_sfixed(values->%s, %d, %d, %d)" % \ + (name, field.start - dword_start, field.end - dword_start, field.fractional_size) + elif field.type in self.parser.structs: + s = "__gen_uint(v%d_%d, %d, %d)" % \ + (index, field_index, field.start - dword_start, field.end - dword_start) + field_index = field_index + 1 + else: + print("/* unhandled field %s, type %s */\n" % (name, field.type)) + s = None + + if not s == None: + if field == dw.fields[-1]: + print(" %s;" % s) + else: + print(" %s |" % s) + + if dw.size == 32: + if dw.address: + print(" dw[%d] = __gen_combine_address(data, &dw[%d], values->%s, %s);" % (index, index, dw.address.name, v)) + continue + + if dw.address: + v_address = "v%d_address" % index + print(" const uint64_t %s =\n __gen_combine_address(data, &dw[%d], values->%s, %s);" % + (v_address, index, dw.address.name, v)) + v = v_address + + print(" dw[%d] = %s;" % (index, v)) + print(" dw[%d] = %s >> 32;" % (index + 1, v)) + +class Value: + def __init__(self, attrs): + self.name = safe_name(attrs["name"]) + self.value = int(attrs["value"]) + +class Parser: + def __init__(self): + self.parser = xml.parsers.expat.ParserCreate() + self.parser.StartElementHandler = self.start_element + self.parser.EndElementHandler = self.end_element + + self.instruction = None + self.structs = {} + + def start_element(self, name, attrs): + if name == "genxml": + self.platform = attrs["name"] + self.gen = attrs["gen"].replace('.', '') + print(pack_header % {'license': license, 'platform': self.platform}) + elif name == "instruction": + self.instruction = safe_name(attrs["name"]) + self.length_bias = int(attrs["bias"]) + if "length" in attrs: + self.length = int(attrs["length"]) + size = self.length * 32 + else: + self.length = None + size = 0 + self.group = Group(self, None, 0, 1, size) + elif name == "struct": + self.struct = safe_name(attrs["name"]) + self.structs[attrs["name"]] = 1 + if "length" in attrs: + self.length = int(attrs["length"]) + size = self.length * 32 + else: + self.length = None + size = 0 + self.group = Group(self, None, 0, 1, size) + + elif name == "group": + group = Group(self, self.group, + int(attrs["start"]), int(attrs["count"]), int(attrs["size"])) + self.group.fields.append(group) + self.group = group + elif name == "field": + self.group.fields.append(Field(self, attrs)) + self.values = [] + elif name == "enum": + self.values = [] + self.enum = safe_name(attrs["name"]) + if "prefix" in attrs: + self.prefix = safe_name(attrs["prefix"]) + else: + self.prefix= None + elif name == "value": + self.values.append(Value(attrs)) + + def end_element(self, name): + if name == "instruction": + self.emit_instruction() + self.instruction = None + self.group = None + elif name == "struct": + self.emit_struct() + self.struct = None + self.group = None + elif name == "group": + self.group = self.group.parent + elif name == "field": + self.group.fields[-1].values = self.values + elif name == "enum": + self.emit_enum() + self.enum = None + + def gen_prefix(self, name): + if name[0] == "_": + return 'GEN%s%s' % (self.gen, name) + else: + return 'GEN%s_%s' % (self.gen, name) + + def emit_template_struct(self, name, group): + print("struct %s {" % self.gen_prefix(name)) + group.emit_template_struct("") + print("};\n") + + def emit_pack_function(self, name, group): + name = self.gen_prefix(name) + print("static inline void\n%s_pack(__gen_user_data *data, void * restrict dst,\n%sconst struct %s * restrict values)\n{" % + (name, ' ' * (len(name) + 6), name)) + + # Cast dst to make header C++ friendly + print(" uint32_t * restrict dw = (uint32_t * restrict) dst;") + + group.emit_pack_function(0) + + print("}\n") + + def emit_instruction(self): + name = self.instruction + if not self.length == None: + print('#define %-33s %4d' % + (self.gen_prefix(name + "_length"), self.length)) + print('#define %-33s %4d' % + (self.gen_prefix(name + "_length_bias"), self.length_bias)) + + default_fields = [] + for field in self.group.fields: + if not type(field) is Field: + continue + if field.default == None: + continue + default_fields.append(" .%-35s = %4d" % (field.name, field.default)) + + if default_fields: + print('#define %-40s\\' % (self.gen_prefix(name + '_header'))) + print(", \\\n".join(default_fields)) + print('') + + self.emit_template_struct(self.instruction, self.group) + + self.emit_pack_function(self.instruction, self.group) + + def emit_struct(self): + name = self.struct + if not self.length == None: + print('#define %-33s %4d' % + (self.gen_prefix(name + "_length"), self.length)) + + self.emit_template_struct(self.struct, self.group) + self.emit_pack_function(self.struct, self.group) + + def emit_enum(self): + print('/* enum %s */' % self.gen_prefix(self.enum)) + for value in self.values: + if self.prefix: + name = self.prefix + "_" + value.name + else: + name = value.name + print('#define %-36s %4d' % (name.upper(), value.value)) + print('') + + def parse(self, filename): + file = open(filename, "rb") + self.parser.ParseFile(file) + file.close() + +if len(sys.argv) < 2: + print("No input xml file specified") + sys.exit(1) + +input_file = sys.argv[1] + +p = Parser() +p.parse(input_file) diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 220bdbf5cec..50d972e6c7e 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -61,6 +61,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/intel/ \ -I$(top_srcdir)/src/isl/ \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ @@ -99,11 +100,7 @@ VULKAN_SOURCES = \ BUILT_SOURCES = \ anv_entrypoints.h \ - anv_entrypoints.c \ - gen7_pack.h \ - gen75_pack.h \ - gen8_pack.h \ - gen9_pack.h + anv_entrypoints.c libanv_gen7_la_SOURCES = \ genX_cmd_buffer.c \ @@ -165,9 +162,6 @@ anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ -%_pack.h : %.xml gen_pack_header.py - $(AM_V_GEN) $(srcdir)/gen_pack_header.py $< > $@ - CLEANFILES = $(BUILT_SOURCES) libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c index a050fa05c8e..d24dd06d7eb 100644 --- a/src/vulkan/anv_batch_chain.c +++ b/src/vulkan/anv_batch_chain.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen7_pack.h" -#include "gen8_pack.h" +#include "genxml/gen7_pack.h" +#include "genxml/gen8_pack.h" /** \file anv_batch_chain.c * diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c3a370b0542..a8835f74179 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -32,7 +32,7 @@ #include "util/strtod.h" #include "util/debug.h" -#include "gen7_pack.h" +#include "genxml/gen7_pack.h" struct anv_dispatch_table dtable; diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c index 09cd8b9ddf9..7798a7bbde3 100644 --- a/src/vulkan/anv_formats.c +++ b/src/vulkan/anv_formats.c @@ -24,8 +24,6 @@ #include "anv_private.h" #include "brw_surface_formats.h" -#include "gen7_pack.h" - #define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) #define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) diff --git a/src/vulkan/gen7.xml b/src/vulkan/gen7.xml deleted file mode 100644 index 8faffcbf876..00000000000 --- a/src/vulkan/gen7.xml +++ /dev/null @@ -1,2522 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/vulkan/gen75.xml b/src/vulkan/gen75.xml deleted file mode 100644 index c1cbce9fdb6..00000000000 --- a/src/vulkan/gen75.xml +++ /dev/null @@ -1,2916 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 7c12394b088..23327ec0724 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen7_pack.h" -#include "gen75_pack.h" +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c index c4f7a369598..7c054fa56d5 100644 --- a/src/vulkan/gen7_pipeline.c +++ b/src/vulkan/gen7_pipeline.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen7_pack.h" -#include "gen75_pack.h" +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" #include "genX_pipeline_util.h" diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c index 0f66a5056c7..77bdb75260c 100644 --- a/src/vulkan/gen7_state.c +++ b/src/vulkan/gen7_state.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen7_pack.h" -#include "gen75_pack.h" +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" #include "genX_state_util.h" diff --git a/src/vulkan/gen8.xml b/src/vulkan/gen8.xml deleted file mode 100644 index fff6ae2ed42..00000000000 --- a/src/vulkan/gen8.xml +++ /dev/null @@ -1,3165 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index 5ca6f6978c9..b741612c891 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen8_pack.h" -#include "gen9_pack.h" +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index eec3f7aafdd..f0411562fba 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen8_pack.h" -#include "gen9_pack.h" +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" #include "genX_pipeline_util.h" diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index 45b8080d0bf..04cfff5444d 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "gen8_pack.h" -#include "gen9_pack.h" +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" #include "genX_state_util.h" diff --git a/src/vulkan/gen9.xml b/src/vulkan/gen9.xml deleted file mode 100644 index 3f229b0d0fc..00000000000 --- a/src/vulkan/gen9.xml +++ /dev/null @@ -1,3469 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index e25a20b3d7a..5498d1d68c6 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -27,13 +27,13 @@ #include "anv_private.h" #if (ANV_GEN == 9) -# include "gen9_pack.h" +# include "genxml/gen9_pack.h" #elif (ANV_GEN == 8) -# include "gen8_pack.h" +# include "genxml/gen8_pack.h" #elif (ANV_IS_HASWELL) -# include "gen75_pack.h" +# include "genxml/gen75_pack.h" #elif (ANV_GEN == 7) -# include "gen7_pack.h" +# include "genxml/gen7_pack.h" #endif void diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c index b62e401c4ee..4c2e0bc6e0d 100644 --- a/src/vulkan/genX_pipeline.c +++ b/src/vulkan/genX_pipeline.c @@ -24,13 +24,13 @@ #include "anv_private.h" #if (ANV_GEN == 9) -# include "gen9_pack.h" +# include "genxml/gen9_pack.h" #elif (ANV_GEN == 8) -# include "gen8_pack.h" +# include "genxml/gen8_pack.h" #elif (ANV_IS_HASWELL) -# include "gen75_pack.h" +# include "genxml/gen75_pack.h" #elif (ANV_GEN == 7) -# include "gen7_pack.h" +# include "genxml/gen7_pack.h" #endif VkResult diff --git a/src/vulkan/gen_pack_header.py b/src/vulkan/gen_pack_header.py deleted file mode 100755 index 3cabb5864aa..00000000000 --- a/src/vulkan/gen_pack_header.py +++ /dev/null @@ -1,614 +0,0 @@ -#!/usr/bin/env python3 - -import xml.parsers.expat -import re -import sys -import copy - -license = """/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ -""" - -pack_header = """%(license)s - -/* Instructions, enums and structures for %(platform)s. - * - * This file has been generated, do not hand edit. - */ - -#pragma once - -#include -#include - -#ifndef __gen_validate_value -#define __gen_validate_value(x) -#endif - -#ifndef __gen_field_functions -#define __gen_field_functions - -union __gen_value { - float f; - uint32_t dw; -}; - -static inline uint64_t -__gen_mbo(uint32_t start, uint32_t end) -{ - return (~0ull >> (64 - (end - start + 1))) << start; -} - -static inline uint64_t -__gen_uint(uint64_t v, uint32_t start, uint32_t end) -{ - const int width = end - start + 1; - - __gen_validate_value(v); - -#if DEBUG - if (width < 64) { - const uint64_t max = (1ull << width) - 1; - assert(v <= max); - } -#endif - - return v << start; -} - -static inline uint64_t -__gen_sint(int64_t v, uint32_t start, uint32_t end) -{ - const int width = end - start + 1; - - __gen_validate_value(v); - -#if DEBUG - if (width < 64) { - const int64_t max = (1ll << (width - 1)) - 1; - const int64_t min = -(1ll << (width - 1)); - assert(min <= v && v <= max); - } -#endif - - const uint64_t mask = ~0ull >> (64 - width); - - return (v & mask) << start; -} - -static inline uint64_t -__gen_offset(uint64_t v, uint32_t start, uint32_t end) -{ - __gen_validate_value(v); -#if DEBUG - uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; - - assert((v & ~mask) == 0); -#endif - - return v; -} - -static inline uint32_t -__gen_float(float v) -{ - __gen_validate_value(v); - return ((union __gen_value) { .f = (v) }).dw; -} - -static inline uint64_t -__gen_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) -{ - __gen_validate_value(v); - - const float factor = (1 << fract_bits); - -#if DEBUG - const float max = ((1 << (end - start)) - 1) / factor; - const float min = -(1 << (end - start)) / factor; - assert(min <= v && v <= max); -#endif - - const int32_t int_val = roundf(v * factor); - const uint64_t mask = ~0ull >> (64 - (end - start + 1)); - - return (int_val & mask) << start; -} - -static inline uint64_t -__gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits) -{ - __gen_validate_value(v); - - const float factor = (1 << fract_bits); - -#if DEBUG - const float max = ((1 << (end - start + 1)) - 1) / factor; - const float min = 0.0f; - assert(min <= v && v <= max); -#endif - - const uint32_t uint_val = roundf(v * factor); - - return uint_val << start; -} - -#ifndef __gen_address_type -#error #define __gen_address_type before including this file -#endif - -#ifndef __gen_user_data -#error #define __gen_combine_address before including this file -#endif - -#endif - -""" - -def to_alphanum(name): - substitutions = { - ' ': '', - '/': '', - '[': '', - ']': '', - '(': '', - ')': '', - '-': '', - ':': '', - '.': '', - ',': '', - '=': '', - '>': '', - '#': '', - 'α': 'alpha', - '&': '', - '*': '', - '"': '', - '+': '', - '\'': '', - } - - for i, j in substitutions.items(): - name = name.replace(i, j) - - return name - -def safe_name(name): - name = to_alphanum(name) - if not str.isalpha(name[0]): - name = '_' + name - - return name - -class Field: - ufixed_pattern = re.compile("u(\d+)\.(\d+)") - sfixed_pattern = re.compile("s(\d+)\.(\d+)") - - def __init__(self, parser, attrs): - self.parser = parser - if "name" in attrs: - self.name = safe_name(attrs["name"]) - self.start = int(attrs["start"]) - self.end = int(attrs["end"]) - self.type = attrs["type"] - - if "prefix" in attrs: - self.prefix = attrs["prefix"] - else: - self.prefix = None - - if "default" in attrs: - self.default = int(attrs["default"]) - else: - self.default = None - - ufixed_match = Field.ufixed_pattern.match(self.type) - if ufixed_match: - self.type = 'ufixed' - self.fractional_size = int(ufixed_match.group(2)) - - sfixed_match = Field.sfixed_pattern.match(self.type) - if sfixed_match: - self.type = 'sfixed' - self.fractional_size = int(sfixed_match.group(2)) - - def emit_template_struct(self, dim): - if self.type == 'address': - type = '__gen_address_type' - elif self.type == 'bool': - type = 'bool' - elif self.type == 'float': - type = 'float' - elif self.type == 'ufixed': - type = 'float' - elif self.type == 'sfixed': - type = 'float' - elif self.type == 'uint' and self.end - self.start > 32: - type = 'uint64_t' - elif self.type == 'offset': - type = 'uint64_t' - elif self.type == 'int': - type = 'int32_t' - elif self.type == 'uint': - type = 'uint32_t' - elif self.type in self.parser.structs: - type = 'struct ' + self.parser.gen_prefix(safe_name(self.type)) - elif self.type == 'mbo': - return - else: - print("#error unhandled type: %s" % self.type) - - print(" %-36s %s%s;" % (type, self.name, dim)) - - if len(self.values) > 0 and self.default == None: - if self.prefix: - prefix = self.prefix + "_" - else: - prefix = "" - - for value in self.values: - print("#define %-40s %d" % (prefix + value.name, value.value)) - -class Group: - def __init__(self, parser, parent, start, count, size): - self.parser = parser - self.parent = parent - self.start = start - self.count = count - self.size = size - self.fields = [] - - def emit_template_struct(self, dim): - if self.count == 0: - print(" /* variable length fields follow */") - else: - if self.count > 1: - dim = "%s[%d]" % (dim, self.count) - - for field in self.fields: - field.emit_template_struct(dim) - - class DWord: - def __init__(self): - self.size = 32 - self.fields = [] - self.address = None - - def collect_dwords(self, dwords, start, dim): - for field in self.fields: - if type(field) is Group: - if field.count == 1: - field.collect_dwords(dwords, start + field.start, dim) - else: - for i in range(field.count): - field.collect_dwords(dwords, - start + field.start + i * field.size, - "%s[%d]" % (dim, i)) - continue - - index = (start + field.start) // 32 - if not index in dwords: - dwords[index] = self.DWord() - - clone = copy.copy(field) - clone.start = clone.start + start - clone.end = clone.end + start - clone.dim = dim - dwords[index].fields.append(clone) - - if field.type == "address": - # assert dwords[index].address == None - dwords[index].address = field - - # Coalesce all the dwords covered by this field. The two cases we - # handle are where multiple fields are in a 64 bit word (typically - # and address and a few bits) or where a single struct field - # completely covers multiple dwords. - while index < (start + field.end) // 32: - if index + 1 in dwords and not dwords[index] == dwords[index + 1]: - dwords[index].fields.extend(dwords[index + 1].fields) - dwords[index].size = 64 - dwords[index + 1] = dwords[index] - index = index + 1 - - def emit_pack_function(self, start): - dwords = {} - self.collect_dwords(dwords, 0, "") - - # Determine number of dwords in this group. If we have a size, use - # that, since that'll account for MBZ dwords at the end of a group - # (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword - # index we've seen plus one. - if self.size > 0: - length = self.size // 32 - else: - length = max(dwords.keys()) + 1 - - for index in range(length): - # Handle MBZ dwords - if not index in dwords: - print("") - print(" dw[%d] = 0;" % index) - continue - - # For 64 bit dwords, we aliased the two dword entries in the dword - # dict it occupies. Now that we're emitting the pack function, - # skip the duplicate entries. - dw = dwords[index] - if index > 0 and index - 1 in dwords and dw == dwords[index - 1]: - continue - - # Special case: only one field and it's a struct at the beginning - # of the dword. In this case we pack directly into the - # destination. This is the only way we handle embedded structs - # larger than 32 bits. - if len(dw.fields) == 1: - field = dw.fields[0] - name = field.name + field.dim - if field.type in self.parser.structs and field.start % 32 == 0: - print("") - print(" %s_pack(data, &dw[%d], &values->%s);" % - (self.parser.gen_prefix(safe_name(field.type)), index, name)) - continue - - # Pack any fields of struct type first so we have integer values - # to the dword for those fields. - field_index = 0 - for field in dw.fields: - if type(field) is Field and field.type in self.parser.structs: - name = field.name + field.dim - print("") - print(" uint32_t v%d_%d;" % (index, field_index)) - print(" %s_pack(data, &v%d_%d, &values->%s);" % - (self.parser.gen_prefix(safe_name(field.type)), index, field_index, name)) - field_index = field_index + 1 - - print("") - dword_start = index * 32 - if dw.address == None: - address_count = 0 - else: - address_count = 1 - - if dw.size == 32 and dw.address == None: - v = None - print(" dw[%d] =" % index) - elif len(dw.fields) > address_count: - v = "v%d" % index - print(" const uint%d_t %s =" % (dw.size, v)) - else: - v = "0" - - field_index = 0 - for field in dw.fields: - if field.type != "mbo": - name = field.name + field.dim - - if field.type == "mbo": - s = "__gen_mbo(%d, %d)" % \ - (field.start - dword_start, field.end - dword_start) - elif field.type == "address": - s = None - elif field.type == "uint": - s = "__gen_uint(values->%s, %d, %d)" % \ - (name, field.start - dword_start, field.end - dword_start) - elif field.type == "int": - s = "__gen_sint(values->%s, %d, %d)" % \ - (name, field.start - dword_start, field.end - dword_start) - elif field.type == "bool": - s = "__gen_uint(values->%s, %d, %d)" % \ - (name, field.start - dword_start, field.end - dword_start) - elif field.type == "float": - s = "__gen_float(values->%s)" % name - elif field.type == "offset": - s = "__gen_offset(values->%s, %d, %d)" % \ - (name, field.start - dword_start, field.end - dword_start) - elif field.type == 'ufixed': - s = "__gen_ufixed(values->%s, %d, %d, %d)" % \ - (name, field.start - dword_start, field.end - dword_start, field.fractional_size) - elif field.type == 'sfixed': - s = "__gen_sfixed(values->%s, %d, %d, %d)" % \ - (name, field.start - dword_start, field.end - dword_start, field.fractional_size) - elif field.type in self.parser.structs: - s = "__gen_uint(v%d_%d, %d, %d)" % \ - (index, field_index, field.start - dword_start, field.end - dword_start) - field_index = field_index + 1 - else: - print("/* unhandled field %s, type %s */\n" % (name, field.type)) - s = None - - if not s == None: - if field == dw.fields[-1]: - print(" %s;" % s) - else: - print(" %s |" % s) - - if dw.size == 32: - if dw.address: - print(" dw[%d] = __gen_combine_address(data, &dw[%d], values->%s, %s);" % (index, index, dw.address.name, v)) - continue - - if dw.address: - v_address = "v%d_address" % index - print(" const uint64_t %s =\n __gen_combine_address(data, &dw[%d], values->%s, %s);" % - (v_address, index, dw.address.name, v)) - v = v_address - - print(" dw[%d] = %s;" % (index, v)) - print(" dw[%d] = %s >> 32;" % (index + 1, v)) - -class Value: - def __init__(self, attrs): - self.name = safe_name(attrs["name"]) - self.value = int(attrs["value"]) - -class Parser: - def __init__(self): - self.parser = xml.parsers.expat.ParserCreate() - self.parser.StartElementHandler = self.start_element - self.parser.EndElementHandler = self.end_element - - self.instruction = None - self.structs = {} - - def start_element(self, name, attrs): - if name == "genxml": - self.platform = attrs["name"] - self.gen = attrs["gen"].replace('.', '') - print(pack_header % {'license': license, 'platform': self.platform}) - elif name == "instruction": - self.instruction = safe_name(attrs["name"]) - self.length_bias = int(attrs["bias"]) - if "length" in attrs: - self.length = int(attrs["length"]) - size = self.length * 32 - else: - self.length = None - size = 0 - self.group = Group(self, None, 0, 1, size) - elif name == "struct": - self.struct = safe_name(attrs["name"]) - self.structs[attrs["name"]] = 1 - if "length" in attrs: - self.length = int(attrs["length"]) - size = self.length * 32 - else: - self.length = None - size = 0 - self.group = Group(self, None, 0, 1, size) - - elif name == "group": - group = Group(self, self.group, - int(attrs["start"]), int(attrs["count"]), int(attrs["size"])) - self.group.fields.append(group) - self.group = group - elif name == "field": - self.group.fields.append(Field(self, attrs)) - self.values = [] - elif name == "enum": - self.values = [] - self.enum = safe_name(attrs["name"]) - if "prefix" in attrs: - self.prefix = safe_name(attrs["prefix"]) - else: - self.prefix= None - elif name == "value": - self.values.append(Value(attrs)) - - def end_element(self, name): - if name == "instruction": - self.emit_instruction() - self.instruction = None - self.group = None - elif name == "struct": - self.emit_struct() - self.struct = None - self.group = None - elif name == "group": - self.group = self.group.parent - elif name == "field": - self.group.fields[-1].values = self.values - elif name == "enum": - self.emit_enum() - self.enum = None - - def gen_prefix(self, name): - if name[0] == "_": - return 'GEN%s%s' % (self.gen, name) - else: - return 'GEN%s_%s' % (self.gen, name) - - def emit_template_struct(self, name, group): - print("struct %s {" % self.gen_prefix(name)) - group.emit_template_struct("") - print("};\n") - - def emit_pack_function(self, name, group): - name = self.gen_prefix(name) - print("static inline void\n%s_pack(__gen_user_data *data, void * restrict dst,\n%sconst struct %s * restrict values)\n{" % - (name, ' ' * (len(name) + 6), name)) - - # Cast dst to make header C++ friendly - print(" uint32_t * restrict dw = (uint32_t * restrict) dst;") - - group.emit_pack_function(0) - - print("}\n") - - def emit_instruction(self): - name = self.instruction - if not self.length == None: - print('#define %-33s %4d' % - (self.gen_prefix(name + "_length"), self.length)) - print('#define %-33s %4d' % - (self.gen_prefix(name + "_length_bias"), self.length_bias)) - - default_fields = [] - for field in self.group.fields: - if not type(field) is Field: - continue - if field.default == None: - continue - default_fields.append(" .%-35s = %4d" % (field.name, field.default)) - - if default_fields: - print('#define %-40s\\' % (self.gen_prefix(name + '_header'))) - print(", \\\n".join(default_fields)) - print('') - - self.emit_template_struct(self.instruction, self.group) - - self.emit_pack_function(self.instruction, self.group) - - def emit_struct(self): - name = self.struct - if not self.length == None: - print('#define %-33s %4d' % - (self.gen_prefix(name + "_length"), self.length)) - - self.emit_template_struct(self.struct, self.group) - self.emit_pack_function(self.struct, self.group) - - def emit_enum(self): - print('/* enum %s */' % self.gen_prefix(self.enum)) - for value in self.values: - if self.prefix: - name = self.prefix + "_" + value.name - else: - name = value.name - print('#define %-36s %4d' % (name.upper(), value.value)) - print('') - - def parse(self, filename): - file = open(filename, "rb") - self.parser.ParseFile(file) - file.close() - -if len(sys.argv) < 2: - print("No input xml file specified") - sys.exit(1) - -input_file = sys.argv[1] - -p = Parser() -p.parse(input_file) -- cgit v1.2.3 From 47b8b08612d44a43e43c3f6e95fe509ee3348723 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:34:47 -0800 Subject: Move isl to src/intel --- configure.ac | 2 +- src/Makefile.am | 1 - src/intel/Makefile.am | 2 +- src/intel/isl/.gitignore | 1 + src/intel/isl/Makefile.am | 90 ++ src/intel/isl/README | 113 ++ src/intel/isl/isl.c | 1428 ++++++++++++++++++++ src/intel/isl/isl.h | 1025 ++++++++++++++ src/intel/isl/isl_format.c | 95 ++ src/intel/isl/isl_format_layout.csv | 287 ++++ src/intel/isl/isl_format_layout_gen.bash | 128 ++ src/intel/isl/isl_gen4.c | 74 + src/intel/isl/isl_gen4.h | 47 + src/intel/isl/isl_gen6.c | 160 +++ src/intel/isl/isl_gen6.h | 47 + src/intel/isl/isl_gen7.c | 395 ++++++ src/intel/isl/isl_gen7.h | 52 + src/intel/isl/isl_gen8.c | 229 ++++ src/intel/isl/isl_gen8.h | 47 + src/intel/isl/isl_gen9.c | 185 +++ src/intel/isl/isl_gen9.h | 41 + src/intel/isl/isl_image.c | 188 +++ src/intel/isl/isl_priv.h | 141 ++ src/intel/isl/tests/.gitignore | 1 + .../isl/tests/isl_surf_get_image_offset_test.c | 353 +++++ src/isl/.gitignore | 1 - src/isl/Makefile.am | 90 -- src/isl/README | 113 -- src/isl/isl.c | 1428 -------------------- src/isl/isl.h | 1025 -------------- src/isl/isl_format.c | 95 -- src/isl/isl_format_layout.csv | 287 ---- src/isl/isl_format_layout_gen.bash | 128 -- src/isl/isl_gen4.c | 74 - src/isl/isl_gen4.h | 47 - src/isl/isl_gen6.c | 160 --- src/isl/isl_gen6.h | 47 - src/isl/isl_gen7.c | 395 ------ src/isl/isl_gen7.h | 52 - src/isl/isl_gen8.c | 229 ---- src/isl/isl_gen8.h | 47 - src/isl/isl_gen9.c | 185 --- src/isl/isl_gen9.h | 41 - src/isl/isl_image.c | 188 --- src/isl/isl_priv.h | 141 -- src/isl/tests/.gitignore | 1 - src/isl/tests/isl_surf_get_image_offset_test.c | 353 ----- src/vulkan/Makefile.am | 3 +- src/vulkan/anv_private.h | 2 +- 49 files changed, 5131 insertions(+), 5133 deletions(-) create mode 100644 src/intel/isl/.gitignore create mode 100644 src/intel/isl/Makefile.am create mode 100644 src/intel/isl/README create mode 100644 src/intel/isl/isl.c create mode 100644 src/intel/isl/isl.h create mode 100644 src/intel/isl/isl_format.c create mode 100644 src/intel/isl/isl_format_layout.csv create mode 100755 src/intel/isl/isl_format_layout_gen.bash create mode 100644 src/intel/isl/isl_gen4.c create mode 100644 src/intel/isl/isl_gen4.h create mode 100644 src/intel/isl/isl_gen6.c create mode 100644 src/intel/isl/isl_gen6.h create mode 100644 src/intel/isl/isl_gen7.c create mode 100644 src/intel/isl/isl_gen7.h create mode 100644 src/intel/isl/isl_gen8.c create mode 100644 src/intel/isl/isl_gen8.h create mode 100644 src/intel/isl/isl_gen9.c create mode 100644 src/intel/isl/isl_gen9.h create mode 100644 src/intel/isl/isl_image.c create mode 100644 src/intel/isl/isl_priv.h create mode 100644 src/intel/isl/tests/.gitignore create mode 100644 src/intel/isl/tests/isl_surf_get_image_offset_test.c delete mode 100644 src/isl/.gitignore delete mode 100644 src/isl/Makefile.am delete mode 100644 src/isl/README delete mode 100644 src/isl/isl.c delete mode 100644 src/isl/isl.h delete mode 100644 src/isl/isl_format.c delete mode 100644 src/isl/isl_format_layout.csv delete mode 100755 src/isl/isl_format_layout_gen.bash delete mode 100644 src/isl/isl_gen4.c delete mode 100644 src/isl/isl_gen4.h delete mode 100644 src/isl/isl_gen6.c delete mode 100644 src/isl/isl_gen6.h delete mode 100644 src/isl/isl_gen7.c delete mode 100644 src/isl/isl_gen7.h delete mode 100644 src/isl/isl_gen8.c delete mode 100644 src/isl/isl_gen8.h delete mode 100644 src/isl/isl_gen9.c delete mode 100644 src/isl/isl_gen9.h delete mode 100644 src/isl/isl_image.c delete mode 100644 src/isl/isl_priv.h delete mode 100644 src/isl/tests/.gitignore delete mode 100644 src/isl/tests/isl_surf_get_image_offset_test.c (limited to 'src') diff --git a/configure.ac b/configure.ac index d6692b73ff1..604ea3728b4 100644 --- a/configure.ac +++ b/configure.ac @@ -2520,7 +2520,7 @@ AC_CONFIG_FILES([Makefile src/gtest/Makefile src/intel/Makefile src/intel/genxml/Makefile - src/isl/Makefile + src/intel/isl/Makefile src/loader/Makefile src/mapi/Makefile src/mapi/es1api/glesv1_cm.pc diff --git a/src/Makefile.am b/src/Makefile.am index 25b48c65009..02b83717755 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -58,7 +58,6 @@ AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) if HAVE_VULKAN SUBDIRS += intel -SUBDIRS += isl SUBDIRS += vulkan endif diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am index 0a6f411c7ba..520602dd290 100644 --- a/src/intel/Makefile.am +++ b/src/intel/Makefile.am @@ -19,4 +19,4 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = genxml +SUBDIRS = genxml isl diff --git a/src/intel/isl/.gitignore b/src/intel/isl/.gitignore new file mode 100644 index 00000000000..e9cfd67b94e --- /dev/null +++ b/src/intel/isl/.gitignore @@ -0,0 +1 @@ +/isl_format_layout.c diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am new file mode 100644 index 00000000000..72f5460554f --- /dev/null +++ b/src/intel/isl/Makefile.am @@ -0,0 +1,90 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . + +noinst_LTLIBRARIES = libisl.la + +EXTRA_DIST = tests + +# The gallium includes are for the util/u_math.h include from main/macros.h +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src + +libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init + +libisl_la_SOURCES = \ + isl.c \ + isl.h \ + isl_format.c \ + isl_format_layout.c \ + isl_gen4.c \ + isl_gen4.h \ + isl_gen6.c \ + isl_gen6.h \ + isl_gen7.c \ + isl_gen7.h \ + isl_gen8.c \ + isl_gen8.h \ + isl_gen9.c \ + isl_gen9.h \ + isl_image.c \ + $(NULL) + +BUILT_SOURCES = \ + isl_format_layout.c + +isl_format_layout.c: isl_format_layout_gen.bash \ + isl_format_layout.csv + $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ + <$(srcdir)/isl_format_layout.csv >$@ + +# ---------------------------------------------------------------------------- +# Tests +# ---------------------------------------------------------------------------- + +TESTS = tests/isl_surf_get_image_offset_test + +check_PROGRAMS = $(TESTS) + +# Link tests to lib965_compiler.la for brw_get_device_info(). +tests_ldadd = \ + libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la + +tests_isl_surf_get_image_offset_test_SOURCES = \ + tests/isl_surf_get_image_offset_test.c +tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd) + +# ---------------------------------------------------------------------------- + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/intel/isl/README b/src/intel/isl/README new file mode 100644 index 00000000000..1ab4313fcc5 --- /dev/null +++ b/src/intel/isl/README @@ -0,0 +1,113 @@ +Intel Surface Layout + +Introduction +============ +isl is a small library that calculates the layout of Intel GPU surfaces, queries +those layouts, and queries the properties of surface formats. + + +Independence from User APIs +=========================== +isl's API is independent of any user-facing graphics API, such as OpenGL and +Vulkan. This independence allows isl to be used a shared component by multiple +Intel drivers. + +Rather than mimic the user-facing APIs, the isl API attempts to reflect Intel +hardware: the actual memory layout of Intel GPU surfaces and how one programs +the GPU to use those surfaces. For example: + + - The tokens of `enum isl_format` (such as `ISL_FORMAT_R8G8B8A8_UNORM`) + match those of the hardware enum `SURFACE_FORMAT` rather than the OpenGL + or Vulkan format tokens. And the values of `isl_format` and + `SURFACE_FORMAT` are identical. + + - The OpenGL and Vulkan APIs contain depth and stencil formats. However the + hardware enum `SURFACE_FORMAT` does not, and therefore neither does `enum + isl_format`. Rather than define new pixel formats that have no hardware + counterpart, isl records the intent to use a surface as a depth or stencil + buffer with the usage flags `ISL_SURF_USAGE_DEPTH_BIT` and + `ISL_SURF_USAGE_STENCIL_BIT`. + + - `struct isl_surf` distinguishes between the surface's logical dimension + from the user API's perspective (`enum isl_surf_dim`, which may be 1D, 2D, + or 3D) and the layout of those dimensions in memory (`enum isl_dim_layout`). + + +Surface Units +============= + +Intro +----- +ISL takes care in its equations to correctly handle conversion among surface +units (such as pixels and compression blocks) and to carefully distinguish +between a surface's logical layout in the client API and its physical layout +in memory. + +Symbol names often explicitly declare their unit with a suffix: + + - px: logical pixels + - sa: physical surface samples + - el: physical surface elements + - sa_rows: rows of physical surface samples + - el_rows: rows of physical surface elements + +Logical units are independent of hardware generation and are closely related +to the user-facing API (OpenGL and Vulkan). Physical units are dependent on +hardware generation and reflect the surface's layout in memory. + +Definitions +----------- +- Logical Pixels (px): + + The surface's layout from the perspective of the client API (OpenGL and + Vulkan) is in units of logical pixels. Logical pixels are independent of the + surface's layout in memory. + + A surface's width and height, in units of logical pixels, is not affected by + the surface's sample count. For example, consider a VkImage created with + VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's width and + height at level 0 is, in units of logical pixels, w0 and h0 regardless of + the value of s0. + + For example, the logical array length of a 3D surface is always 1, even on + Gen9 where the surface's memory layout is that of an array surface + (ISL_DIM_LAYOUT_GEN4_2D). + +- Physical Surface Samples (sa): + + For a multisampled surface, this unit has the obvious meaning. + A singlesampled surface, from ISL's perspective, is simply a multisampled + surface whose sample count is 1. + + For example, consider a 2D single-level non-array surface with samples=4, + width_px=64, and height_px=64 (note that the suffix 'px' indicates logical + pixels). If the surface's multisample layout is ISL_MSAA_LAYOUT_INTERLEAVED, + then the extent of level 0 is, in units of physical surface samples, + width_sa=128, height_sa=128, depth_sa=1, array_length_sa=1. If + ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, height_sa=64, depth_sa=1, + array_length_sa=4. + +- Physical Surface Elements (el): + + This unit allows ISL to treat compressed and uncompressed formats + identically in many calculations. + + If the surface's pixel format is compressed, such as ETC2, then a surface + element is equivalent to a compression block. If uncompressed, then + a surface element is equivalent to a surface sample. As a corollary, for + a given surface a surface element is at least as large as a surface sample. + +Errata +------ +ISL acquired the term 'surface element' from the Broadwell PRM [1], which +defines it as follows: + + An element is defined as a pixel in uncompresed surface formats, and as + a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL + type multisampled surfaces, an element is a sample. + + +References +========== +[1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> + RENDER_SURFACE_STATE Surface Vertical Alignment (p325) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c new file mode 100644 index 00000000000..27928fd0850 --- /dev/null +++ b/src/intel/isl/isl.c @@ -0,0 +1,1428 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "isl.h" +#include "isl_gen4.h" +#include "isl_gen6.h" +#include "isl_gen7.h" +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...) +{ + va_list ap; + char buf[512]; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); +} + +void +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info, + bool has_bit6_swizzling) +{ + dev->info = info; + dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; + dev->has_bit6_swizzling = has_bit6_swizzling; + + /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some + * device properties at buildtime. Verify that the macros with the device + * properties chosen during runtime. + */ + assert(ISL_DEV_GEN(dev) == dev->info->gen); + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil); + + /* Did we break hiz or stencil? */ + if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) + assert(info->has_hiz_and_separate_stencil); + if (info->must_use_separate_stencil) + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); +} + +/** + * @brief Query the set of multisamples supported by the device. + * + * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always + * supported. + */ +isl_sample_count_mask_t ATTRIBUTE_CONST +isl_device_get_sample_counts(struct isl_device *dev) +{ + if (ISL_DEV_GEN(dev) >= 9) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_2_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT | + ISL_SAMPLE_COUNT_16_BIT; + } else if (ISL_DEV_GEN(dev) >= 8) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_2_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT; + } else if (ISL_DEV_GEN(dev) >= 7) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT; + } else if (ISL_DEV_GEN(dev) >= 6) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_4_BIT; + } else { + return ISL_SAMPLE_COUNT_1_BIT; + } +} + +/** + * @param[out] info is written only on success + */ +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *tile_info) +{ + const uint32_t bs = format_block_size; + uint32_t width, height; + + assert(bs > 0); + + switch (tiling) { + case ISL_TILING_LINEAR: + width = 1; + height = 1; + break; + + case ISL_TILING_X: + width = 1 << 9; + height = 1 << 3; + break; + + case ISL_TILING_Y0: + width = 1 << 7; + height = 1 << 5; + break; + + case ISL_TILING_W: + /* XXX: Should W tile be same as Y? */ + width = 1 << 6; + height = 1 << 6; + break; + + case ISL_TILING_Yf: + case ISL_TILING_Ys: { + if (ISL_DEV_GEN(dev) < 9) + return false; + + if (!isl_is_pow2(bs)) + return false; + + bool is_Ys = tiling == ISL_TILING_Ys; + + width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); + height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); + break; + } + } /* end switch */ + + *tile_info = (struct isl_tile_info) { + .tiling = tiling, + .width = width, + .height = height, + .size = width * height, + }; + + return true; +} + +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_extent2d *e) +{ + struct isl_tile_info tile_info; + isl_tiling_get_info(dev, tiling, format_block_size, &tile_info); + *e = isl_extent2d(tile_info.width, tile_info.height); +} + +/** + * @param[out] tiling is set only on success + */ +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling) +{ + isl_tiling_flags_t tiling_flags = info->tiling_flags; + + if (ISL_DEV_GEN(dev) >= 7) { + gen7_filter_tiling(dev, info, &tiling_flags); + } else { + isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); + gen7_filter_tiling(dev, info, &tiling_flags); + } + + #define CHOOSE(__tiling) \ + do { \ + if (tiling_flags & (1u << (__tiling))) { \ + *tiling = (__tiling); \ + return true; \ + } \ + } while (0) + + /* Of the tiling modes remaining, choose the one that offers the best + * performance. + */ + + if (info->dim == ISL_SURF_DIM_1D) { + /* Prefer linear for 1D surfaces because they do not benefit from + * tiling. To the contrary, tiling leads to wasted memory and poor + * memory locality due to the swizzling and alignment restrictions + * required in tiled surfaces. + */ + CHOOSE(ISL_TILING_LINEAR); + } + + CHOOSE(ISL_TILING_Ys); + CHOOSE(ISL_TILING_Yf); + CHOOSE(ISL_TILING_Y0); + CHOOSE(ISL_TILING_X); + CHOOSE(ISL_TILING_W); + CHOOSE(ISL_TILING_LINEAR); + + #undef CHOOSE + + /* No tiling mode accomodates the inputs. */ + return false; +} + +static bool +isl_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + if (ISL_DEV_GEN(dev) >= 8) { + return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 7) { + return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 6) { + return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else { + return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); + } +} + +static void +isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, + uint32_t *width, uint32_t *height) +{ + assert(isl_is_pow2(samples)); + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil surface + * or Multisampled Surface StorageFormat in SURFACE_STATE is + * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before + * proceeding: [...] + */ + if (width) + *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); + if (height) + *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); +} + +static enum isl_array_pitch_span +isl_choose_array_pitch_span(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + const struct isl_extent4d *phys_level0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + case ISL_DIM_LAYOUT_GEN4_2D: + if (ISL_DEV_GEN(dev) >= 8) { + /* QPitch becomes programmable in Broadwell. So choose the + * most compact QPitch possible in order to conserve memory. + * + * From the Broadwell PRM >> Volume 2d: Command Reference: Structures + * >> RENDER_SURFACE_STATE Surface QPitch (p325): + * + * - Software must ensure that this field is set to a value + * sufficiently large such that the array slices in the surface + * do not overlap. Refer to the Memory Data Formats section for + * information on how surfaces are stored in memory. + * + * - This field specifies the distance in rows between array + * slices. It is used only in the following cases: + * + * - Surface Array is enabled OR + * - Number of Mulitsamples is not NUMSAMPLES_1 and + * Multisampled Surface Storage Format set to MSFMT_MSS OR + * - Surface Type is SURFTYPE_CUBE + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else if (ISL_DEV_GEN(dev) >= 7) { + /* Note that Ivybridge introduces + * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the + * driver more control over the QPitch. + */ + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) { + /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> + * Section 6.18.4.7: Surface Arrays (p112): + * + * If Surface Array Spacing is set to ARYSPC_FULL (note that + * the depth buffer and stencil buffer have an implied value of + * ARYSPC_FULL): + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (info->levels == 1) { + /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing + * to ARYSPC_LOD0. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, thus + * the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else { + if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, + * thus the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } + + case ISL_DIM_LAYOUT_GEN4_3D: + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + unreachable("bad isl_dim_layout"); + return ISL_ARRAY_PITCH_SPAN_FULL; +} + +static void +isl_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + if (ISL_DEV_GEN(dev) >= 9) { + gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 8) { + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 7) { + gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 6) { + gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else { + gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } +} + +static enum isl_dim_layout +isl_surf_choose_dim_layout(const struct isl_device *dev, + enum isl_surf_dim logical_dim) +{ + if (ISL_DEV_GEN(dev) >= 9) { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + return ISL_DIM_LAYOUT_GEN9_1D; + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_2D; + } + } else { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + case ISL_SURF_DIM_2D: + return ISL_DIM_LAYOUT_GEN4_2D; + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_3D; + } + } + + unreachable("bad isl_surf_dim"); + return ISL_DIM_LAYOUT_GEN4_2D; +} + +/** + * Calculate the physical extent of the surface's first level, in units of + * surface samples. The result is aligned to the format's compression block. + */ +static void +isl_calc_phys_level0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent4d *phys_level0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + if (isl_format_is_yuv(info->format)) + isl_finishme("%s:%s: YUV format", __FILE__, __func__); + + switch (info->dim) { + case ISL_SURF_DIM_1D: + assert(info->height == 1); + assert(info->depth == 1); + assert(info->samples == 1); + assert(!isl_format_is_compressed(info->format)); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN4_3D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN9_1D: + case ISL_DIM_LAYOUT_GEN4_2D: + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = 1, + .d = 1, + .a = info->array_len, + }; + break; + } + break; + + case ISL_SURF_DIM_2D: + assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); + + if (tiling == ISL_TILING_Ys && info->samples > 1) + isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + assert(info->depth == 1); + assert(info->samples == 1); + + *phys_level0_sa = (struct isl_extent4d) { + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), + .d = 1, + .a = info->array_len, + }; + break; + + case ISL_MSAA_LAYOUT_ARRAY: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->samples, + }; + break; + + case ISL_MSAA_LAYOUT_INTERLEAVED: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = 1, + }; + + isl_msaa_interleaved_scale_px_to_sa(info->samples, + &phys_level0_sa->w, + &phys_level0_sa->h); + break; + } + break; + + case ISL_SURF_DIM_3D: + assert(info->array_len == 1); + assert(info->samples == 1); + + if (fmtl->bd > 1) { + isl_finishme("%s:%s: compression block with depth > 1", + __FILE__, __func__); + } + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN4_2D: + assert(ISL_DEV_GEN(dev) >= 9); + + *phys_level0_sa = (struct isl_extent4d) { + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), + .d = 1, + .a = info->depth, + }; + break; + + case ISL_DIM_LAYOUT_GEN4_3D: + assert(ISL_DEV_GEN(dev) < 9); + *phys_level0_sa = (struct isl_extent4d) { + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), + .d = info->depth, + .a = 1, + }; + break; + } + break; + } +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_2d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(phys_level0_sa->depth == 1); + + if (info->levels == 1 && msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) { + /* Do not pad the surface to the image alignment. Instead, pad it only + * to the pixel format's block alignment. + * + * For tiled surfaces, using a reduced alignment here avoids wasting CPU + * cycles on the below mipmap layout caluclations. Reducing the + * alignment here is safe because we later align the row pitch and array + * pitch to the tile boundary. It is safe even for + * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled + * to accomodate the interleaved samples. + * + * For linear surfaces, reducing the alignment here permits us to later + * choose an arbitrary, non-aligned row pitch. If the surface backs + * a VkBuffer, then an arbitrary pitch may be needed to accomodate + * VkBufferImageCopy::bufferRowLength. + */ + *phys_slice0_sa = (struct isl_extent2d) { + .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), + .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), + }; + return; + } + + uint32_t slice_top_w = 0; + uint32_t slice_bottom_w = 0; + uint32_t slice_left_h = 0; + uint32_t slice_right_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t H = isl_minify(H0, l); + + if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil + * surface or Multisampled Surface StorageFormat in + * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be + * adjusted as follows before proceeding: [...] + */ + isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); + } + + uint32_t w = isl_align_npot(W, image_align_sa->w); + uint32_t h = isl_align_npot(H, image_align_sa->h); + + if (l == 0) { + slice_top_w = w; + slice_left_h = h; + slice_right_h = h; + } else if (l == 1) { + slice_bottom_w = w; + slice_left_h += h; + } else if (l == 2) { + slice_bottom_w += w; + slice_right_h += h; + } else { + slice_right_h += h; + } + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = MAX(slice_top_w, slice_bottom_w), + .h = MAX(slice_left_h, slice_right_h), + }; +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_3d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + assert(info->samples == 1); + assert(phys_level0_sa->array_len == 1); + + uint32_t slice_w = 0; + uint32_t slice_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + uint32_t D0 = phys_level0_sa->d; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); + uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); + uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); + + uint32_t max_layers_horiz = MIN(level_d, 1u << l); + uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + slice_w = MAX(slice_w, level_w * max_layers_horiz); + slice_h += level_h * max_layers_vert; + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = slice_w, + .h = slice_h, + }; +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN9_1D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen9_1d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(phys_level0_sa->height == 1); + assert(phys_level0_sa->depth == 1); + assert(info->samples == 1); + assert(image_align_sa->w >= fmtl->bw); + + uint32_t slice_w = 0; + const uint32_t W0 = phys_level0_sa->w; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t w = isl_align_npot(W, image_align_sa->w); + + slice_w += w; + } + + *phys_slice0_sa = isl_extent2d(slice_w, 1); +} + +/** + * Calculate the physical extent of the surface's first array slice, in units + * of surface samples. If the surface is multi-leveled, then the result will + * be aligned to \a image_align_sa. + */ +static void +isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info, + image_align_sa, phys_level0_sa, + phys_slice0_sa); + return; + case ISL_DIM_LAYOUT_GEN4_2D: + isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, + image_align_sa, phys_level0_sa, + phys_slice0_sa); + return; + case ISL_DIM_LAYOUT_GEN4_3D: + isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, + phys_level0_sa, phys_slice0_sa); + return; + } +} + +/** + * Calculate the pitch between physical array slices, in units of rows of + * surface elements. + */ +static uint32_t +isl_calc_array_pitch_el_rows(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + enum isl_dim_layout dim_layout, + enum isl_array_pitch_span array_pitch_span, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + uint32_t pitch_sa_rows = 0; + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + /* Each row is an array slice */ + pitch_sa_rows = 1; + break; + case ISL_DIM_LAYOUT_GEN4_2D: + switch (array_pitch_span) { + case ISL_ARRAY_PITCH_SPAN_COMPACT: + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; + case ISL_ARRAY_PITCH_SPAN_FULL: { + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: + * Memory Views >> Common Surface Formats >> Surface Layout >> 2D + * Surfaces >> Surface Arrays. + */ + uint32_t H0_sa = phys_level0_sa->h; + uint32_t H1_sa = isl_minify(H0_sa, 1); + + uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); + uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); + + uint32_t m; + if (ISL_DEV_GEN(dev) >= 7) { + /* The QPitch equation changed slightly in Ivybridge. */ + m = 12; + } else { + m = 11; + } + + pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); + + if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && + (info->height % 4 == 1)) { + /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than + * the value calculated in the equation above , for every + * other odd Surface Height starting from 1 i.e. 1,5,9,13. + * + * XXX(chadv): Is the errata natural corollary of the physical + * layout of interleaved samples? + */ + pitch_sa_rows += 4; + } + + pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); + } /* end case */ + break; + } + break; + case ISL_DIM_LAYOUT_GEN4_3D: + assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; + default: + unreachable("bad isl_dim_layout"); + break; + } + + assert(pitch_sa_rows % fmtl->bh == 0); + uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; + + if (ISL_DEV_GEN(dev) >= 9 && + info->dim == ISL_SURF_DIM_3D && + tile_info->tiling != ISL_TILING_LINEAR) { + /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: + * + * Tile Mode != Linear: This field must be set to an integer multiple + * of the tile height + */ + pitch_el_rows = isl_align(pitch_el_rows, tile_info->height); + } + + return pitch_el_rows; +} + +/** + * Calculate the pitch of each surface row, in bytes. + */ +static uint32_t +isl_calc_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t row_pitch = info->min_pitch; + + /* First, align the surface to a cache line boundary, as the PRM explains + * below. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In order + * to avoid these GTT errors, “padding” at the bottom of the surface is + * sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * The sampling engine accesses texels outside of the surface if they + * are contained in the same cache line as texels that are within the + * surface. These texels will not participate in any calculation + * performed by the sampling engine and will not affect the result of + * any sampling engine operation, however if these texels lie outside of + * defined pages in the GTT, a GTT error will result when the cache line + * is accessed. In order to avoid these GTT errors, “padding” at the + * bottom and right side of a sampling engine surface is sometimes + * necessary. + * + * It is possible that a cache line will straddle a page boundary if the + * base address or pitch is not aligned. All pages included in the cache + * lines that are part of the surface must map to valid GTT entries to + * avoid errors. To determine the necessary padding on the bottom and + * right side of the surface, refer to the table in Alignment Unit Size + * section for the i and j parameters for the surface format in use. The + * surface must then be extended to the next multiple of the alignment + * unit size in each dimension, and all texels contained in this + * extended surface must have valid GTT entries. + * + * For example, suppose the surface size is 15 texels by 10 texels and + * the alignment parameters are i=4 and j=2. In this case, the extended + * surface would be 16 by 10. Note that these calculations are done in + * texels, and must be converted to bytes based on the surface format + * being used to determine whether additional pages need to be defined. + */ + assert(phys_slice0_sa->w % fmtl->bw == 0); + row_pitch = MAX(row_pitch, fmtl->bs * (phys_slice0_sa->w / fmtl->bw)); + + switch (tile_info->tiling) { + case ISL_TILING_LINEAR: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For linear render target surfaces and surfaces accessed with the + * typed data port messages, the pitch must be a multiple of the + * element size for non-YUV surface formats. Pitch must be + * a multiple of 2 * element size for YUV surface formats. + * + * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we + * ignore because isl doesn't do buffers.] + * + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + if (isl_format_is_yuv(info->format)) { + row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs); + } else { + row_pitch = isl_align_npot(row_pitch, fmtl->bs); + } + } + break; + default: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For tiled surfaces, the pitch must be a multiple of the tile + * width. + */ + row_pitch = isl_align(row_pitch, tile_info->width); + break; + } + + return row_pitch; +} + +/** + * Calculate the surface's total height, including padding, in units of + * surface elements. + */ +static uint32_t +isl_calc_total_height_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + uint32_t phys_array_len, + uint32_t row_pitch, + uint32_t array_pitch_el_rows) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t total_h_el = phys_array_len * array_pitch_el_rows; + uint32_t pad_bytes = 0; + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In + * order to avoid these GTT errors, “padding” at the bottom of the + * surface is sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * ... Lots of padding requirements, all listed separately below. + */ + + /* We can safely ignore the first padding requirement, quoted below, + * because isl doesn't do buffers. + * + * - [pre-BDW] For buffers, which have no inherent “height,” padding + * requirements are different. A buffer must be padded to the next + * multiple of 256 array elements, with an additional 16 bytes added + * beyond that to account for the L1 cache line. + */ + + /* + * - For compressed textures [...], padding at the bottom of the surface + * is to an even compressed row. + */ + if (isl_format_is_compressed(info->format)) + total_h_el = isl_align(total_h_el, 2); + + /* + * - For cube surfaces, an additional two rows of padding are required + * at the bottom of the surface. + */ + if (info->usage & ISL_SURF_USAGE_CUBE_BIT) + total_h_el += 2; + + /* + * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, + * additional padding is required. These surfaces require an extra row + * plus 16 bytes of padding at the bottom in addition to the general + * padding requirements. + */ + if (isl_format_is_yuv(info->format) && + (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) { + total_h_el += 1; + pad_bytes += 16; + } + + /* + * - For linear surfaces, additional padding of 64 bytes is required at + * the bottom of the surface. This is in addition to the padding + * required above. + */ + if (tile_info->tiling == ISL_TILING_LINEAR) + pad_bytes += 64; + + /* The below text weakens, not strengthens, the padding requirements for + * linear surfaces. Therefore we can safely ignore it. + * + * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, + * non-MSAA, non-mip-mapped surfaces in linear memory, the only + * padding requirement is to the next aligned 64-byte boundary beyond + * the end of the surface. The rest of the padding requirements + * documented above do not apply to these surfaces. + */ + + /* + * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and + * height % 4 != 0, the surface must be padded with + * 4-(height % 4)*Surface Pitch # of bytes. + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { + total_h_el = isl_align(total_h_el, 4); + } + + /* + * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded + * to 4 times the Surface Pitch # of bytes + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + info->dim == ISL_SURF_DIM_1D) { + total_h_el += 4; + } + + /* Be sloppy. Align any leftover padding to a row boundary. */ + total_h_el += isl_align_div_npot(pad_bytes, row_pitch); + + return total_h_el; +} + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + const struct isl_extent4d logical_level0_px = { + .w = info->width, + .h = info->height, + .d = info->depth, + .a = info->array_len, + }; + + enum isl_dim_layout dim_layout = + isl_surf_choose_dim_layout(dev, info->dim); + + enum isl_tiling tiling; + if (!isl_surf_choose_tiling(dev, info, &tiling)) + return false; + + struct isl_tile_info tile_info; + if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info)) + return false; + + enum isl_msaa_layout msaa_layout; + if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) + return false; + + struct isl_extent3d image_align_el; + isl_choose_image_alignment_el(dev, info, tiling, msaa_layout, + &image_align_el); + + struct isl_extent3d image_align_sa = + isl_extent3d_el_to_sa(info->format, image_align_el); + + struct isl_extent4d phys_level0_sa; + isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, + &phys_level0_sa); + assert(phys_level0_sa.w % fmtl->bw == 0); + assert(phys_level0_sa.h % fmtl->bh == 0); + + enum isl_array_pitch_span array_pitch_span = + isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); + + struct isl_extent2d phys_slice0_sa; + isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, + &image_align_sa, &phys_level0_sa, + &phys_slice0_sa); + assert(phys_slice0_sa.w % fmtl->bw == 0); + assert(phys_slice0_sa.h % fmtl->bh == 0); + + const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, + &image_align_sa, + &phys_slice0_sa); + + const uint32_t array_pitch_el_rows = + isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout, + array_pitch_span, &image_align_sa, + &phys_level0_sa, &phys_slice0_sa); + + const uint32_t total_h_el = + isl_calc_total_height_el(dev, info, &tile_info, + phys_level0_sa.array_len, row_pitch, + array_pitch_el_rows); + + const uint32_t total_h_sa = total_h_el * fmtl->bh; + const uint32_t size = row_pitch * isl_align(total_h_sa, tile_info.height); + + /* Alignment of surface base address, in bytes */ + uint32_t base_alignment = MAX(1, info->min_alignment); + assert(isl_is_pow2(base_alignment) && isl_is_pow2(tile_info.size)); + base_alignment = MAX(base_alignment, tile_info.size); + + *surf = (struct isl_surf) { + .dim = info->dim, + .dim_layout = dim_layout, + .msaa_layout = msaa_layout, + .tiling = tiling, + .format = info->format, + + .levels = info->levels, + .samples = info->samples, + + .image_alignment_el = image_align_el, + .logical_level0_px = logical_level0_px, + .phys_level0_sa = phys_level0_sa, + + .size = size, + .alignment = base_alignment, + .row_pitch = row_pitch, + .array_pitch_el_rows = array_pitch_el_rows, + .array_pitch_span = array_pitch_span, + + .usage = info->usage, + }; + + return true; +} + +void +isl_surf_get_tile_info(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_tile_info *tile_info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + isl_tiling_get_info(dev, surf->tiling, fmtl->bs, tile_info); +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +get_image_offset_sa_gen4_2d(const struct isl_surf *surf, + uint32_t level, uint32_t layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(layer < surf->phys_level0_sa.array_len); + assert(surf->phys_level0_sa.depth == 1); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + const uint32_t W0 = surf->phys_level0_sa.width; + const uint32_t H0 = surf->phys_level0_sa.height; + + uint32_t x = 0; + uint32_t y = layer * isl_surf_get_array_pitch_sa_rows(surf); + + for (uint32_t l = 0; l < level; ++l) { + if (l == 1) { + uint32_t W = isl_minify(W0, l); + + if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) + isl_msaa_interleaved_scale_px_to_sa(surf->samples, &W, NULL); + + x += isl_align_npot(W, image_align_sa.w); + } else { + uint32_t H = isl_minify(H0, l); + + if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) + isl_msaa_interleaved_scale_px_to_sa(surf->samples, NULL, &H); + + y += isl_align_npot(H, image_align_sa.h); + } + } + + *x_offset_sa = x; + *y_offset_sa = y; +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +get_image_offset_sa_gen4_3d(const struct isl_surf *surf, + uint32_t level, uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); + assert(surf->phys_level0_sa.array_len == 1); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + const uint32_t W0 = surf->phys_level0_sa.width; + const uint32_t H0 = surf->phys_level0_sa.height; + const uint32_t D0 = surf->phys_level0_sa.depth; + + uint32_t x = 0; + uint32_t y = 0; + + for (uint32_t l = 0; l < level; ++l) { + const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); + const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d); + const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + y += level_h * max_layers_vert; + } + + const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); + const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); + const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); + + const uint32_t max_layers_horiz = MIN(level_d, 1u << level); + + x += level_w * (logical_z_offset_px % max_layers_horiz); + y += level_h * (logical_z_offset_px / max_layers_horiz); + + *x_offset_sa = x; + *y_offset_sa = y; +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN9_1D. + */ +static void +get_image_offset_sa_gen9_1d(const struct isl_surf *surf, + uint32_t level, uint32_t layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(layer < surf->phys_level0_sa.array_len); + assert(surf->phys_level0_sa.height == 1); + assert(surf->phys_level0_sa.depth == 1); + assert(surf->samples == 1); + + const uint32_t W0 = surf->phys_level0_sa.width; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + uint32_t x = 0; + + for (uint32_t l = 0; l < level; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t w = isl_align_npot(W, image_align_sa.w); + + x += w; + } + + *x_offset_sa = x; + *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); +} + +/** + * Calculate the offset, in units of surface samples, to a subimage in the + * surface. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +static void +get_image_offset_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(logical_array_layer < surf->logical_level0_px.array_len); + assert(logical_z_offset_px + < isl_minify(surf->logical_level0_px.depth, level)); + + switch (surf->dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, + x_offset_sa, y_offset_sa); + break; + case ISL_DIM_LAYOUT_GEN4_2D: + get_image_offset_sa_gen4_2d(surf, level, logical_array_layer + + logical_z_offset_px, + x_offset_sa, y_offset_sa); + break; + case ISL_DIM_LAYOUT_GEN4_3D: + get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px, + x_offset_sa, y_offset_sa); + break; + } +} + +void +isl_surf_get_image_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + assert(level < surf->levels); + assert(logical_array_layer < surf->logical_level0_px.array_len); + assert(logical_z_offset_px + < isl_minify(surf->logical_level0_px.depth, level)); + + uint32_t x_offset_sa, y_offset_sa; + get_image_offset_sa(surf, level, + logical_array_layer, + logical_z_offset_px, + &x_offset_sa, + &y_offset_sa); + + *x_offset_el = x_offset_sa / fmtl->bw; + *y_offset_el = y_offset_sa / fmtl->bh; +} + +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + struct isl_tile_info tile_info; + isl_surf_get_tile_info(dev, surf, &tile_info); + + uint32_t total_x_offset_el; + uint32_t total_y_offset_el; + isl_surf_get_image_offset_el(surf, level, + logical_array_layer, + logical_z_offset, + &total_x_offset_el, + &total_y_offset_el); + + uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; + uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; + uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; + + uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; + uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; + uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; + uint32_t big_x_offset_B = total_x_offset_B - small_x_offset_B; + + *base_address_offset = big_y_offset_B + big_x_offset_B; + *x_offset_el = small_x_offset_el; + *y_offset_el = small_y_offset_el; +} + +uint32_t +isl_surf_get_depth_format(const struct isl_device *dev, + const struct isl_surf *surf) +{ + /* Support for separate stencil buffers began in gen5. Support for + * interleaved depthstencil buffers ceased in gen7. The intermediate gens, + * those that supported separate and interleaved stencil, were gen5 and + * gen6. + * + * For a list of all available formats, see the Sandybridge PRM >> Volume + * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface + * Format (p321). + */ + + bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; + + assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); + + if (has_stencil) + assert(ISL_DEV_GEN(dev) < 7); + + switch (surf->format) { + default: + unreachable("bad isl depth format"); + case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: + assert(ISL_DEV_GEN(dev) < 7); + return 0; /* D32_FLOAT_S8X24_UINT */ + case ISL_FORMAT_R32_FLOAT: + assert(!has_stencil); + return 1; /* D32_FLOAT */ + case ISL_FORMAT_R24_UNORM_X8_TYPELESS: + if (has_stencil) { + assert(ISL_DEV_GEN(dev) < 7); + return 2; /* D24_UNORM_S8_UINT */ + } else { + assert(ISL_DEV_GEN(dev) >= 5); + return 3; /* D24_UNORM_X8_UINT */ + } + case ISL_FORMAT_R16_UNORM: + assert(!has_stencil); + return 5; /* D16_UNORM */ + } +} diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h new file mode 100644 index 00000000000..3e0ff935948 --- /dev/null +++ b/src/intel/isl/isl.h @@ -0,0 +1,1025 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file + * @brief Intel Surface Layout + * + * Header Layout + * ------------- + * The header is ordered as: + * - forward declarations + * - macros that may be overridden at compile-time for specific gens + * - enums and constants + * - structs and unions + * - functions + */ + +#pragma once + +#include +#include +#include + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct brw_device_info; +struct brw_image_param; + +#ifndef ISL_DEV_GEN +/** + * @brief Get the hardware generation of isl_device. + * + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_GEN(dev)=9 ...`. + */ +#define ISL_DEV_GEN(__dev) ((__dev)->info->gen) +#endif + +#ifndef ISL_DEV_USE_SEPARATE_STENCIL +/** + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_USE_SEPARATE_STENCIL(dev)=1 ...`. + */ +#define ISL_DEV_USE_SEPARATE_STENCIL(__dev) ((__dev)->use_separate_stencil) +#endif + +/** + * Hardware enumeration SURFACE_FORMAT. + * + * For the official list, see Broadwell PRM: Volume 2b: Command Reference: + * Enumerations: SURFACE_FORMAT. + */ +enum isl_format { + ISL_FORMAT_R32G32B32A32_FLOAT = 0, + ISL_FORMAT_R32G32B32A32_SINT = 1, + ISL_FORMAT_R32G32B32A32_UINT = 2, + ISL_FORMAT_R32G32B32A32_UNORM = 3, + ISL_FORMAT_R32G32B32A32_SNORM = 4, + ISL_FORMAT_R64G64_FLOAT = 5, + ISL_FORMAT_R32G32B32X32_FLOAT = 6, + ISL_FORMAT_R32G32B32A32_SSCALED = 7, + ISL_FORMAT_R32G32B32A32_USCALED = 8, + ISL_FORMAT_R32G32B32A32_SFIXED = 32, + ISL_FORMAT_R64G64_PASSTHRU = 33, + ISL_FORMAT_R32G32B32_FLOAT = 64, + ISL_FORMAT_R32G32B32_SINT = 65, + ISL_FORMAT_R32G32B32_UINT = 66, + ISL_FORMAT_R32G32B32_UNORM = 67, + ISL_FORMAT_R32G32B32_SNORM = 68, + ISL_FORMAT_R32G32B32_SSCALED = 69, + ISL_FORMAT_R32G32B32_USCALED = 70, + ISL_FORMAT_R32G32B32_SFIXED = 80, + ISL_FORMAT_R16G16B16A16_UNORM = 128, + ISL_FORMAT_R16G16B16A16_SNORM = 129, + ISL_FORMAT_R16G16B16A16_SINT = 130, + ISL_FORMAT_R16G16B16A16_UINT = 131, + ISL_FORMAT_R16G16B16A16_FLOAT = 132, + ISL_FORMAT_R32G32_FLOAT = 133, + ISL_FORMAT_R32G32_SINT = 134, + ISL_FORMAT_R32G32_UINT = 135, + ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS = 136, + ISL_FORMAT_X32_TYPELESS_G8X24_UINT = 137, + ISL_FORMAT_L32A32_FLOAT = 138, + ISL_FORMAT_R32G32_UNORM = 139, + ISL_FORMAT_R32G32_SNORM = 140, + ISL_FORMAT_R64_FLOAT = 141, + ISL_FORMAT_R16G16B16X16_UNORM = 142, + ISL_FORMAT_R16G16B16X16_FLOAT = 143, + ISL_FORMAT_A32X32_FLOAT = 144, + ISL_FORMAT_L32X32_FLOAT = 145, + ISL_FORMAT_I32X32_FLOAT = 146, + ISL_FORMAT_R16G16B16A16_SSCALED = 147, + ISL_FORMAT_R16G16B16A16_USCALED = 148, + ISL_FORMAT_R32G32_SSCALED = 149, + ISL_FORMAT_R32G32_USCALED = 150, + ISL_FORMAT_R32G32_SFIXED = 160, + ISL_FORMAT_R64_PASSTHRU = 161, + ISL_FORMAT_B8G8R8A8_UNORM = 192, + ISL_FORMAT_B8G8R8A8_UNORM_SRGB = 193, + ISL_FORMAT_R10G10B10A2_UNORM = 194, + ISL_FORMAT_R10G10B10A2_UNORM_SRGB = 195, + ISL_FORMAT_R10G10B10A2_UINT = 196, + ISL_FORMAT_R10G10B10_SNORM_A2_UNORM = 197, + ISL_FORMAT_R8G8B8A8_UNORM = 199, + ISL_FORMAT_R8G8B8A8_UNORM_SRGB = 200, + ISL_FORMAT_R8G8B8A8_SNORM = 201, + ISL_FORMAT_R8G8B8A8_SINT = 202, + ISL_FORMAT_R8G8B8A8_UINT = 203, + ISL_FORMAT_R16G16_UNORM = 204, + ISL_FORMAT_R16G16_SNORM = 205, + ISL_FORMAT_R16G16_SINT = 206, + ISL_FORMAT_R16G16_UINT = 207, + ISL_FORMAT_R16G16_FLOAT = 208, + ISL_FORMAT_B10G10R10A2_UNORM = 209, + ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, + ISL_FORMAT_R11G11B10_FLOAT = 211, + ISL_FORMAT_R32_SINT = 214, + ISL_FORMAT_R32_UINT = 215, + ISL_FORMAT_R32_FLOAT = 216, + ISL_FORMAT_R24_UNORM_X8_TYPELESS = 217, + ISL_FORMAT_X24_TYPELESS_G8_UINT = 218, + ISL_FORMAT_L32_UNORM = 221, + ISL_FORMAT_A32_UNORM = 222, + ISL_FORMAT_L16A16_UNORM = 223, + ISL_FORMAT_I24X8_UNORM = 224, + ISL_FORMAT_L24X8_UNORM = 225, + ISL_FORMAT_A24X8_UNORM = 226, + ISL_FORMAT_I32_FLOAT = 227, + ISL_FORMAT_L32_FLOAT = 228, + ISL_FORMAT_A32_FLOAT = 229, + ISL_FORMAT_X8B8_UNORM_G8R8_SNORM = 230, + ISL_FORMAT_A8X8_UNORM_G8R8_SNORM = 231, + ISL_FORMAT_B8X8_UNORM_G8R8_SNORM = 232, + ISL_FORMAT_B8G8R8X8_UNORM = 233, + ISL_FORMAT_B8G8R8X8_UNORM_SRGB = 234, + ISL_FORMAT_R8G8B8X8_UNORM = 235, + ISL_FORMAT_R8G8B8X8_UNORM_SRGB = 236, + ISL_FORMAT_R9G9B9E5_SHAREDEXP = 237, + ISL_FORMAT_B10G10R10X2_UNORM = 238, + ISL_FORMAT_L16A16_FLOAT = 240, + ISL_FORMAT_R32_UNORM = 241, + ISL_FORMAT_R32_SNORM = 242, + ISL_FORMAT_R10G10B10X2_USCALED = 243, + ISL_FORMAT_R8G8B8A8_SSCALED = 244, + ISL_FORMAT_R8G8B8A8_USCALED = 245, + ISL_FORMAT_R16G16_SSCALED = 246, + ISL_FORMAT_R16G16_USCALED = 247, + ISL_FORMAT_R32_SSCALED = 248, + ISL_FORMAT_R32_USCALED = 249, + ISL_FORMAT_B5G6R5_UNORM = 256, + ISL_FORMAT_B5G6R5_UNORM_SRGB = 257, + ISL_FORMAT_B5G5R5A1_UNORM = 258, + ISL_FORMAT_B5G5R5A1_UNORM_SRGB = 259, + ISL_FORMAT_B4G4R4A4_UNORM = 260, + ISL_FORMAT_B4G4R4A4_UNORM_SRGB = 261, + ISL_FORMAT_R8G8_UNORM = 262, + ISL_FORMAT_R8G8_SNORM = 263, + ISL_FORMAT_R8G8_SINT = 264, + ISL_FORMAT_R8G8_UINT = 265, + ISL_FORMAT_R16_UNORM = 266, + ISL_FORMAT_R16_SNORM = 267, + ISL_FORMAT_R16_SINT = 268, + ISL_FORMAT_R16_UINT = 269, + ISL_FORMAT_R16_FLOAT = 270, + ISL_FORMAT_A8P8_UNORM_PALETTE0 = 271, + ISL_FORMAT_A8P8_UNORM_PALETTE1 = 272, + ISL_FORMAT_I16_UNORM = 273, + ISL_FORMAT_L16_UNORM = 274, + ISL_FORMAT_A16_UNORM = 275, + ISL_FORMAT_L8A8_UNORM = 276, + ISL_FORMAT_I16_FLOAT = 277, + ISL_FORMAT_L16_FLOAT = 278, + ISL_FORMAT_A16_FLOAT = 279, + ISL_FORMAT_L8A8_UNORM_SRGB = 280, + ISL_FORMAT_R5G5_SNORM_B6_UNORM = 281, + ISL_FORMAT_B5G5R5X1_UNORM = 282, + ISL_FORMAT_B5G5R5X1_UNORM_SRGB = 283, + ISL_FORMAT_R8G8_SSCALED = 284, + ISL_FORMAT_R8G8_USCALED = 285, + ISL_FORMAT_R16_SSCALED = 286, + ISL_FORMAT_R16_USCALED = 287, + ISL_FORMAT_P8A8_UNORM_PALETTE0 = 290, + ISL_FORMAT_P8A8_UNORM_PALETTE1 = 291, + ISL_FORMAT_A1B5G5R5_UNORM = 292, + ISL_FORMAT_A4B4G4R4_UNORM = 293, + ISL_FORMAT_L8A8_UINT = 294, + ISL_FORMAT_L8A8_SINT = 295, + ISL_FORMAT_R8_UNORM = 320, + ISL_FORMAT_R8_SNORM = 321, + ISL_FORMAT_R8_SINT = 322, + ISL_FORMAT_R8_UINT = 323, + ISL_FORMAT_A8_UNORM = 324, + ISL_FORMAT_I8_UNORM = 325, + ISL_FORMAT_L8_UNORM = 326, + ISL_FORMAT_P4A4_UNORM_PALETTE0 = 327, + ISL_FORMAT_A4P4_UNORM_PALETTE0 = 328, + ISL_FORMAT_R8_SSCALED = 329, + ISL_FORMAT_R8_USCALED = 330, + ISL_FORMAT_P8_UNORM_PALETTE0 = 331, + ISL_FORMAT_L8_UNORM_SRGB = 332, + ISL_FORMAT_P8_UNORM_PALETTE1 = 333, + ISL_FORMAT_P4A4_UNORM_PALETTE1 = 334, + ISL_FORMAT_A4P4_UNORM_PALETTE1 = 335, + ISL_FORMAT_Y8_UNORM = 336, + ISL_FORMAT_L8_UINT = 338, + ISL_FORMAT_L8_SINT = 339, + ISL_FORMAT_I8_UINT = 340, + ISL_FORMAT_I8_SINT = 341, + ISL_FORMAT_DXT1_RGB_SRGB = 384, + ISL_FORMAT_R1_UNORM = 385, + ISL_FORMAT_YCRCB_NORMAL = 386, + ISL_FORMAT_YCRCB_SWAPUVY = 387, + ISL_FORMAT_P2_UNORM_PALETTE0 = 388, + ISL_FORMAT_P2_UNORM_PALETTE1 = 389, + ISL_FORMAT_BC1_UNORM = 390, + ISL_FORMAT_BC2_UNORM = 391, + ISL_FORMAT_BC3_UNORM = 392, + ISL_FORMAT_BC4_UNORM = 393, + ISL_FORMAT_BC5_UNORM = 394, + ISL_FORMAT_BC1_UNORM_SRGB = 395, + ISL_FORMAT_BC2_UNORM_SRGB = 396, + ISL_FORMAT_BC3_UNORM_SRGB = 397, + ISL_FORMAT_MONO8 = 398, + ISL_FORMAT_YCRCB_SWAPUV = 399, + ISL_FORMAT_YCRCB_SWAPY = 400, + ISL_FORMAT_DXT1_RGB = 401, + ISL_FORMAT_FXT1 = 402, + ISL_FORMAT_R8G8B8_UNORM = 403, + ISL_FORMAT_R8G8B8_SNORM = 404, + ISL_FORMAT_R8G8B8_SSCALED = 405, + ISL_FORMAT_R8G8B8_USCALED = 406, + ISL_FORMAT_R64G64B64A64_FLOAT = 407, + ISL_FORMAT_R64G64B64_FLOAT = 408, + ISL_FORMAT_BC4_SNORM = 409, + ISL_FORMAT_BC5_SNORM = 410, + ISL_FORMAT_R16G16B16_FLOAT = 411, + ISL_FORMAT_R16G16B16_UNORM = 412, + ISL_FORMAT_R16G16B16_SNORM = 413, + ISL_FORMAT_R16G16B16_SSCALED = 414, + ISL_FORMAT_R16G16B16_USCALED = 415, + ISL_FORMAT_BC6H_SF16 = 417, + ISL_FORMAT_BC7_UNORM = 418, + ISL_FORMAT_BC7_UNORM_SRGB = 419, + ISL_FORMAT_BC6H_UF16 = 420, + ISL_FORMAT_PLANAR_420_8 = 421, + ISL_FORMAT_R8G8B8_UNORM_SRGB = 424, + ISL_FORMAT_ETC1_RGB8 = 425, + ISL_FORMAT_ETC2_RGB8 = 426, + ISL_FORMAT_EAC_R11 = 427, + ISL_FORMAT_EAC_RG11 = 428, + ISL_FORMAT_EAC_SIGNED_R11 = 429, + ISL_FORMAT_EAC_SIGNED_RG11 = 430, + ISL_FORMAT_ETC2_SRGB8 = 431, + ISL_FORMAT_R16G16B16_UINT = 432, + ISL_FORMAT_R16G16B16_SINT = 433, + ISL_FORMAT_R32_SFIXED = 434, + ISL_FORMAT_R10G10B10A2_SNORM = 435, + ISL_FORMAT_R10G10B10A2_USCALED = 436, + ISL_FORMAT_R10G10B10A2_SSCALED = 437, + ISL_FORMAT_R10G10B10A2_SINT = 438, + ISL_FORMAT_B10G10R10A2_SNORM = 439, + ISL_FORMAT_B10G10R10A2_USCALED = 440, + ISL_FORMAT_B10G10R10A2_SSCALED = 441, + ISL_FORMAT_B10G10R10A2_UINT = 442, + ISL_FORMAT_B10G10R10A2_SINT = 443, + ISL_FORMAT_R64G64B64A64_PASSTHRU = 444, + ISL_FORMAT_R64G64B64_PASSTHRU = 445, + ISL_FORMAT_ETC2_RGB8_PTA = 448, + ISL_FORMAT_ETC2_SRGB8_PTA = 449, + ISL_FORMAT_ETC2_EAC_RGBA8 = 450, + ISL_FORMAT_ETC2_EAC_SRGB8_A8 = 451, + ISL_FORMAT_R8G8B8_UINT = 456, + ISL_FORMAT_R8G8B8_SINT = 457, + ISL_FORMAT_RAW = 511, + + /* Hardware doesn't understand this out-of-band value */ + ISL_FORMAT_UNSUPPORTED = UINT16_MAX, +}; + +/** + * Numerical base type for channels of isl_format. + */ +enum isl_base_type { + ISL_VOID, + ISL_RAW, + ISL_UNORM, + ISL_SNORM, + ISL_UFLOAT, + ISL_SFLOAT, + ISL_UFIXED, + ISL_SFIXED, + ISL_UINT, + ISL_SINT, + ISL_USCALED, + ISL_SSCALED, +}; + +/** + * Colorspace of isl_format. + */ +enum isl_colorspace { + ISL_COLORSPACE_NONE = 0, + ISL_COLORSPACE_LINEAR, + ISL_COLORSPACE_SRGB, + ISL_COLORSPACE_YUV, +}; + +/** + * Texture compression mode of isl_format. + */ +enum isl_txc { + ISL_TXC_NONE = 0, + ISL_TXC_DXT1, + ISL_TXC_DXT3, + ISL_TXC_DXT5, + ISL_TXC_FXT1, + ISL_TXC_RGTC1, + ISL_TXC_RGTC2, + ISL_TXC_BPTC, + ISL_TXC_ETC1, + ISL_TXC_ETC2, +}; + +/** + * @brief Hardware tile mode + * + * WARNING: These values differ from the hardware enum values, which are + * unstable across hardware generations. + * + * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to + * clearly distinguish it from Yf and Ys. + */ +enum isl_tiling { + ISL_TILING_LINEAR = 0, + ISL_TILING_W, + ISL_TILING_X, + ISL_TILING_Y0, /**< Legacy Y tiling */ + ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */ + ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */ +}; + +/** + * @defgroup Tiling Flags + * @{ + */ +typedef uint32_t isl_tiling_flags_t; +#define ISL_TILING_LINEAR_BIT (1u << ISL_TILING_LINEAR) +#define ISL_TILING_W_BIT (1u << ISL_TILING_W) +#define ISL_TILING_X_BIT (1u << ISL_TILING_X) +#define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) +#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) +#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) +#define ISL_TILING_ANY_MASK (~0u) +#define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) + +/** Any Y tiling, including legacy Y tiling. */ +#define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \ + ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) + +/** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */ +#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) +/** @} */ + +/** + * @brief Logical dimension of surface. + * + * Note: There is no dimension for cube map surfaces. ISL interprets cube maps + * as 2D array surfaces. + */ +enum isl_surf_dim { + ISL_SURF_DIM_1D, + ISL_SURF_DIM_2D, + ISL_SURF_DIM_3D, +}; + +/** + * @brief Physical layout of the surface's dimensions. + */ +enum isl_dim_layout { + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.3: 2D Surfaces. + * + * On many gens, 1D surfaces share the same layout as 2D surfaces. From + * the G35 PRM >> Volume 1: Graphics Core >> Section 6.17.2: 1D Surfaces: + * + * One-dimensional surfaces are identical to 2D surfaces with height of + * one. + * + * @invariant isl_surf::phys_level0_sa::depth == 1 + */ + ISL_DIM_LAYOUT_GEN4_2D, + + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.5: 3D Surfaces. + * + * @invariant isl_surf::phys_level0_sa::array_len == 1 + */ + ISL_DIM_LAYOUT_GEN4_3D, + + /** + * For details, see the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> » 1D Surfaces. + */ + ISL_DIM_LAYOUT_GEN9_1D, +}; + +/* TODO(chadv): Explain */ +enum isl_array_pitch_span { + ISL_ARRAY_PITCH_SPAN_FULL, + ISL_ARRAY_PITCH_SPAN_COMPACT, +}; + +/** + * @defgroup Surface Usage + * @{ + */ +typedef uint64_t isl_surf_usage_flags_t; +#define ISL_SURF_USAGE_RENDER_TARGET_BIT (1u << 0) +#define ISL_SURF_USAGE_DEPTH_BIT (1u << 1) +#define ISL_SURF_USAGE_STENCIL_BIT (1u << 2) +#define ISL_SURF_USAGE_TEXTURE_BIT (1u << 3) +#define ISL_SURF_USAGE_CUBE_BIT (1u << 4) +#define ISL_SURF_USAGE_DISABLE_AUX_BIT (1u << 5) +#define ISL_SURF_USAGE_DISPLAY_BIT (1u << 6) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT (1u << 7) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT (1u << 8) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) +#define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) +#define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) +/** @} */ + +/** + * Identical to VkSampleCountFlagBits. + */ +enum isl_sample_count { + ISL_SAMPLE_COUNT_1_BIT = 1u, + ISL_SAMPLE_COUNT_2_BIT = 2u, + ISL_SAMPLE_COUNT_4_BIT = 4u, + ISL_SAMPLE_COUNT_8_BIT = 8u, + ISL_SAMPLE_COUNT_16_BIT = 16u, +}; +typedef uint32_t isl_sample_count_mask_t; + +/** + * @brief Multisample Format + */ +enum isl_msaa_layout { + /** + * @brief Suface is single-sampled. + */ + ISL_MSAA_LAYOUT_NONE, + + /** + * @brief [SNB+] Interleaved Multisample Format + * + * In this format, multiple samples are interleaved into each cacheline. + * In other words, the sample index is swizzled into the low 6 bits of the + * surface's virtual address space. + * + * For example, suppose the surface is legacy Y tiled, is 4x multisampled, + * and its pixel format is 32bpp. Then the first cacheline is arranged + * thus: + * + * (0,0,0) (0,1,0) (0,0,1) (1,0,1) + * (1,0,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * The hardware docs refer to this format with multiple terms. In + * Sandybridge, this is the only multisample format; so no term is used. + * The Ivybridge docs refer to surfaces in this format as IMS (Interleaved + * Multisample Surface). Later hardware docs additionally refer to this + * format as MSFMT_DEPTH_STENCIL (because the format is deprecated for + * color surfaces). + * + * See the Sandybridge PRM, Volume 4, Part 1, Section 2.7 "Multisampled + * Surface Behavior". + * + * See the Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.1 "Interleaved + * Multisampled Surfaces". + */ + ISL_MSAA_LAYOUT_INTERLEAVED, + + /** + * @brief [IVB+] Array Multisample Format + * + * In this format, the surface's physical layout resembles that of a + * 2D array surface. + * + * Suppose the multisample surface's logical extent is (w, h) and its + * sample count is N. Then surface's physical extent is the same as + * a singlesample 2D surface whose logical extent is (w, h) and array + * length is N. Array slice `i` contains the pixel values for sample + * index `i`. + * + * The Ivybridge docs refer to surfaces in this format as UMS + * (Uncompressed Multsample Layout) and CMS (Compressed Multisample + * Surface). The Broadwell docs additionally refer to this format as + * MSFMT_MSS (MSS=Multisample Surface Storage). + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Uncompressed + * Multisample Surfaces". + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed + * Multisample Surfaces". + */ + ISL_MSAA_LAYOUT_ARRAY, +}; + + +struct isl_device { + const struct brw_device_info *info; + bool use_separate_stencil; + bool has_bit6_swizzling; +}; + +struct isl_extent2d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; +}; + +struct isl_extent3d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; +}; + +struct isl_extent4d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; + union { uint32_t a, array_len; }; +}; + +struct isl_channel_layout { + enum isl_base_type type; + uint8_t bits; /**< Size in bits */ +}; + +/** + * Each format has 3D block extent (width, height, depth). The block extent of + * compressed formats is that of the format's compression block. For example, + * the block extent of ISL_FORMAT_ETC2_RGB8 is (w=4, h=4, d=1). The block + * extent of uncompressed pixel formats, such as ISL_FORMAT_R8G8B8A8_UNORM, is + * is (w=1, h=1, d=1). + */ +struct isl_format_layout { + enum isl_format format; + + uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ + uint8_t bw; /**< Block width, in pixels */ + uint8_t bh; /**< Block height, in pixels */ + uint8_t bd; /**< Block depth, in pixels */ + + struct { + struct isl_channel_layout r; /**< Red channel */ + struct isl_channel_layout g; /**< Green channel */ + struct isl_channel_layout b; /**< Blue channel */ + struct isl_channel_layout a; /**< Alpha channel */ + struct isl_channel_layout l; /**< Luminance channel */ + struct isl_channel_layout i; /**< Intensity channel */ + struct isl_channel_layout p; /**< Palette channel */ + } channels; + + enum isl_colorspace colorspace; + enum isl_txc txc; +}; + +struct isl_tile_info { + enum isl_tiling tiling; + uint32_t width; /**< in bytes */ + uint32_t height; /**< in rows of memory */ + uint32_t size; /**< in bytes */ +}; + +/** + * @brief Input to surface initialization + * + * @invariant width >= 1 + * @invariant height >= 1 + * @invariant depth >= 1 + * @invariant levels >= 1 + * @invariant samples >= 1 + * @invariant array_len >= 1 + * + * @invariant if 1D then height == 1 and depth == 1 and samples == 1 + * @invariant if 2D then depth == 1 + * @invariant if 3D then array_len == 1 and samples == 1 + */ +struct isl_surf_init_info { + enum isl_surf_dim dim; + enum isl_format format; + + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t levels; + uint32_t array_len; + uint32_t samples; + + /** Lower bound for isl_surf::alignment, in bytes. */ + uint32_t min_alignment; + + /** Lower bound for isl_surf::pitch, in bytes. */ + uint32_t min_pitch; + + isl_surf_usage_flags_t usage; + + /** Flags that alter how ISL selects isl_surf::tiling. */ + isl_tiling_flags_t tiling_flags; +}; + +struct isl_surf { + enum isl_surf_dim dim; + enum isl_dim_layout dim_layout; + enum isl_msaa_layout msaa_layout; + enum isl_tiling tiling; + enum isl_format format; + + /** + * Alignment of the upper-left sample of each subimage, in units of surface + * elements. + */ + struct isl_extent3d image_alignment_el; + + /** + * Logical extent of the surface's base level, in units of pixels. This is + * identical to the extent defined in isl_surf_init_info. + */ + struct isl_extent4d logical_level0_px; + + /** + * Physical extent of the surface's base level, in units of physical + * surface samples and aligned to the format's compression block. + * + * Consider isl_dim_layout as an operator that transforms a logical surface + * layout to a physical surface layout. Then + * + * logical_layout := (isl_surf::dim, isl_surf::logical_level0_px) + * isl_surf::phys_level0_sa := isl_surf::dim_layout * logical_layout + */ + struct isl_extent4d phys_level0_sa; + + uint32_t levels; + uint32_t samples; + + /** Total size of the surface, in bytes. */ + uint32_t size; + + /** Required alignment for the surface's base address. */ + uint32_t alignment; + + /** + * Pitch between vertically adjacent surface elements, in bytes. + */ + uint32_t row_pitch; + + /** + * Pitch between physical array slices, in rows of surface elements. + */ + uint32_t array_pitch_el_rows; + + enum isl_array_pitch_span array_pitch_span; + + /** Copy of isl_surf_init_info::usage. */ + isl_surf_usage_flags_t usage; +}; + +extern const struct isl_format_layout isl_format_layouts[]; + +void +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info, + bool has_bit6_swizzling); + +isl_sample_count_mask_t ATTRIBUTE_CONST +isl_device_get_sample_counts(struct isl_device *dev); + +static inline const struct isl_format_layout * ATTRIBUTE_CONST +isl_format_get_layout(enum isl_format fmt) +{ + return &isl_format_layouts[fmt]; +} + +bool +isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; + +static inline bool +isl_format_is_compressed(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->txc != ISL_TXC_NONE; +} + +static inline bool +isl_format_has_bc_compression(enum isl_format fmt) +{ + switch (isl_format_get_layout(fmt)->txc) { + case ISL_TXC_DXT1: + case ISL_TXC_DXT3: + case ISL_TXC_DXT5: + return true; + case ISL_TXC_NONE: + case ISL_TXC_FXT1: + case ISL_TXC_RGTC1: + case ISL_TXC_RGTC2: + case ISL_TXC_BPTC: + case ISL_TXC_ETC1: + case ISL_TXC_ETC2: + return false; + } + + unreachable("bad texture compression mode"); + return false; +} + +static inline bool +isl_format_is_yuv(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->colorspace == ISL_COLORSPACE_YUV; +} + +static inline bool +isl_format_block_is_1x1x1(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; +} + +static inline bool +isl_format_is_rgb(enum isl_format fmt) +{ + return isl_format_layouts[fmt].channels.r.bits > 0 && + isl_format_layouts[fmt].channels.g.bits > 0 && + isl_format_layouts[fmt].channels.b.bits > 0 && + isl_format_layouts[fmt].channels.a.bits == 0; +} + +enum isl_format isl_format_rgb_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST; +enum isl_format isl_format_rgb_to_rgbx(enum isl_format rgb) ATTRIBUTE_CONST; + +bool isl_is_storage_image_format(enum isl_format fmt); + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format fmt); + +static inline bool +isl_tiling_is_any_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_ANY_MASK; +} + +static inline bool +isl_tiling_is_std_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_STD_Y_MASK; +} + +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *info); + +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_extent2d *e); +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling); + +static inline bool +isl_surf_usage_is_display(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DISPLAY_BIT; +} + +static inline bool +isl_surf_usage_is_depth(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DEPTH_BIT; +} + +static inline bool +isl_surf_usage_is_stencil(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_STENCIL_BIT; +} + +static inline bool +isl_surf_usage_is_depth_and_stencil(isl_surf_usage_flags_t usage) +{ + return (usage & ISL_SURF_USAGE_DEPTH_BIT) && + (usage & ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_usage_is_depth_or_stencil(isl_surf_usage_flags_t usage) +{ + return usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_info_is_z16(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R16_UNORM); +} + +static inline bool +isl_surf_info_is_z32_float(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R32_FLOAT); +} + +static inline struct isl_extent2d +isl_extent2d(uint32_t width, uint32_t height) +{ + return (struct isl_extent2d) { .w = width, .h = height }; +} + +static inline struct isl_extent3d +isl_extent3d(uint32_t width, uint32_t height, uint32_t depth) +{ + return (struct isl_extent3d) { .w = width, .h = height, .d = depth }; +} + +static inline struct isl_extent4d +isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, + uint32_t array_len) +{ + return (struct isl_extent4d) { + .w = width, + .h = height, + .d = depth, + .a = array_len, + }; +} + +#define isl_surf_init(dev, surf, ...) \ + isl_surf_init_s((dev), (surf), \ + &(struct isl_surf_init_info) { __VA_ARGS__ }); + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info); + +void +isl_surf_get_tile_info(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_tile_info *tile_info); + +/** + * Alignment of the upper-left sample of each subimage, in units of surface + * elements. + */ +static inline struct isl_extent3d +isl_surf_get_image_alignment_el(const struct isl_surf *surf) +{ + return surf->image_alignment_el; +} + +/** + * Alignment of the upper-left sample of each subimage, in units of surface + * samples. + */ +static inline struct isl_extent3d +isl_surf_get_image_alignment_sa(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return (struct isl_extent3d) { + .w = fmtl->bw * surf->image_alignment_el.w, + .h = fmtl->bh * surf->image_alignment_el.h, + .d = fmtl->bd * surf->image_alignment_el.d, + }; +} + +/** + * Pitch between vertically adjacent surface elements, in bytes. + */ +static inline uint32_t +isl_surf_get_row_pitch(const struct isl_surf *surf) +{ + return surf->row_pitch; +} + +/** + * Pitch between vertically adjacent surface elements, in units of surface elements. + */ +static inline uint32_t +isl_surf_get_row_pitch_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + assert(surf->row_pitch % fmtl->bs == 0); + return surf->row_pitch / fmtl->bs; +} + +/** + * Pitch between physical array slices, in rows of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) +{ + return surf->array_pitch_el_rows; +} + +/** + * Pitch between physical array slices, in units of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_el_rows(surf) * + isl_surf_get_row_pitch_el(surf); +} + +/** + * Pitch between physical array slices, in rows of surface samples. + */ +static inline uint32_t +isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + return fmtl->bh * isl_surf_get_array_pitch_el_rows(surf); +} + +/** + * Pitch between physical array slices, in bytes. + */ +static inline uint32_t +isl_surf_get_array_pitch(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; +} + +/** + * Calculate the offset, in units of surface elements, to a subimage in the + * surface. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_el, + uint32_t *y_offset_el); + +/** + * @brief Calculate the intratile offsets to a subimage in the surface. + * + * In @a base_address_offset return the offset from the base of the surface to + * the base address of the first tile of the subimage. In @a x_offset_el and + * @a y_offset_el, return the offset, in units of surface elements, from the + * tile's base to the subimage's first surface element. The x and y offsets + * are intratile offsets; that is, they do not exceed the boundary of the + * surface's tiling format. + */ +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el); + +/** + * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat + * + * @pre surf->usage has ISL_SURF_USAGE_DEPTH_BIT + * @pre surf->format must be a valid format for depth surfaces + */ +uint32_t +isl_surf_get_depth_format(const struct isl_device *dev, + const struct isl_surf *surf); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c new file mode 100644 index 00000000000..0fe6e9b83ab --- /dev/null +++ b/src/intel/isl/isl_format.c @@ -0,0 +1,95 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "isl.h" + +bool +isl_format_has_sint_channel(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT || + fmtl->channels.b.type == ISL_SINT || + fmtl->channels.a.type == ISL_SINT || + fmtl->channels.l.type == ISL_SINT || + fmtl->channels.i.type == ISL_SINT || + fmtl->channels.p.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT; +} + +enum isl_format +isl_format_rgb_to_rgba(enum isl_format rgb) +{ + assert(isl_format_is_rgb(rgb)); + + switch (rgb) { + case ISL_FORMAT_R32G32B32_FLOAT: return ISL_FORMAT_R32G32B32A32_FLOAT; + case ISL_FORMAT_R32G32B32_SINT: return ISL_FORMAT_R32G32B32A32_SINT; + case ISL_FORMAT_R32G32B32_UINT: return ISL_FORMAT_R32G32B32A32_UINT; + case ISL_FORMAT_R32G32B32_UNORM: return ISL_FORMAT_R32G32B32A32_UNORM; + case ISL_FORMAT_R32G32B32_SNORM: return ISL_FORMAT_R32G32B32A32_SNORM; + case ISL_FORMAT_R32G32B32_SSCALED: return ISL_FORMAT_R32G32B32A32_SSCALED; + case ISL_FORMAT_R32G32B32_USCALED: return ISL_FORMAT_R32G32B32A32_USCALED; + case ISL_FORMAT_R32G32B32_SFIXED: return ISL_FORMAT_R32G32B32A32_SFIXED; + case ISL_FORMAT_R8G8B8_UNORM: return ISL_FORMAT_R8G8B8A8_UNORM; + case ISL_FORMAT_R8G8B8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM; + case ISL_FORMAT_R8G8B8_SSCALED: return ISL_FORMAT_R8G8B8A8_SSCALED; + case ISL_FORMAT_R8G8B8_USCALED: return ISL_FORMAT_R8G8B8A8_USCALED; + case ISL_FORMAT_R16G16B16_FLOAT: return ISL_FORMAT_R16G16B16A16_FLOAT; + case ISL_FORMAT_R16G16B16_UNORM: return ISL_FORMAT_R16G16B16A16_UNORM; + case ISL_FORMAT_R16G16B16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM; + case ISL_FORMAT_R16G16B16_SSCALED: return ISL_FORMAT_R16G16B16A16_SSCALED; + case ISL_FORMAT_R16G16B16_USCALED: return ISL_FORMAT_R16G16B16A16_USCALED; + case ISL_FORMAT_R8G8B8_UNORM_SRGB: return ISL_FORMAT_R8G8B8A8_UNORM_SRGB; + case ISL_FORMAT_R16G16B16_UINT: return ISL_FORMAT_R16G16B16A16_UINT; + case ISL_FORMAT_R16G16B16_SINT: return ISL_FORMAT_R16G16B16A16_SINT; + case ISL_FORMAT_R8G8B8_UINT: return ISL_FORMAT_R8G8B8A8_UINT; + case ISL_FORMAT_R8G8B8_SINT: return ISL_FORMAT_R8G8B8A8_SINT; + default: + return ISL_FORMAT_UNSUPPORTED; + } +} + +enum isl_format +isl_format_rgb_to_rgbx(enum isl_format rgb) +{ + assert(isl_format_is_rgb(rgb)); + + switch (rgb) { + case ISL_FORMAT_R32G32B32_FLOAT: + return ISL_FORMAT_R32G32B32X32_FLOAT; + case ISL_FORMAT_R16G16B16_UNORM: + return ISL_FORMAT_R16G16B16X16_UNORM; + case ISL_FORMAT_R16G16B16_FLOAT: + return ISL_FORMAT_R16G16B16X16_FLOAT; + case ISL_FORMAT_R8G8B8_UNORM: + return ISL_FORMAT_R8G8B8X8_UNORM; + case ISL_FORMAT_R8G8B8_UNORM_SRGB: + return ISL_FORMAT_R8G8B8X8_UNORM_SRGB; + default: + return ISL_FORMAT_UNSUPPORTED; + } +} diff --git a/src/intel/isl/isl_format_layout.csv b/src/intel/isl/isl_format_layout.csv new file mode 100644 index 00000000000..af2786ae630 --- /dev/null +++ b/src/intel/isl/isl_format_layout.csv @@ -0,0 +1,287 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +# +# @file +# @brief Layout of all hardware surface formats +# +# For the official list, see Broadwell PRM: Volume 2b: Command Reference: +# Enumerations: SURFACE_FORMAT. +# + + +# Columns: +# name: format name in PRM +# bpb: bits per block +# bw: block width, in pixels +# bh: block height, in pixels +# bd: block depth, in pixels +# r: red channel, data type and bitwidth +# g: green channel +# b: blue channel +# a: alpha channel +# l: luminance channel +# i: intensity channel +# p: palette channel +# space: colorspace +# txc: texture compression +# +# Data Types: +# x: void +# r: raw +# un: unorm +# sn: snorm +# uf: ufloat +# sf: sfloat +# ux: ufixed +# sx: sfixed +# ui: uint +# si: sint +# us: uscaled +# ss: sscaled + + +# Table is aligned with the Vim commands below, using the Align plugin: +# :AlignCtrl lr+ p8000000000000P1 +# /^# name/,$ Align, + +# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc +R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear, +R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear, +R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear, +R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear, +R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear, +R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear, +R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear, +R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear, +R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear, +R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear, +R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , , +R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear, +R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear, +R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear, +R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear, +R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear, +R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear, +R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear, +R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear, +R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear, +R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear, +R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear, +R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear, +R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear, +R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear, +R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear, +R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear, +R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear, +X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear, +L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear, +R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear, +R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear, +R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear, +R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear, +R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear, +A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha, +L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear, +I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear, +R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear, +R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear, +R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear, +R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear, +R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear, +R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , , +B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear, +R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear, +R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear, +R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear, +R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear, +R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear, +R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear, +R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear, +R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, +B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R11G11B10_FLOAT , 32, 1, 1, 1, uf11, uf11, uf10, , , , , linear, +R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, +R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, +R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, +R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear, +X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear, +L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear, +A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha, +L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear, +I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear, +L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear, +A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha, +I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear, +L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear, +A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha, +X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear, +B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear, +B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear, +L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear, +R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear, +R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear, +R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear, +R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear, +R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear, +R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear, +R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear, +R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear, +R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear, +B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear, +B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb, +B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb, +B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb, +R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear, +R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear, +R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear, +R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear, +R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear, +R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear, +R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear, +R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear, +R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear, +A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear, +L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear, +A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha, +L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear, +I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear, +L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear, +A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha, +L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb, +R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear, +B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear, +B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb, +R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear, +R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear, +R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear, +R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear, +P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear, +L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear, +R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear, +R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear, +R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear, +R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear, +A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha, +I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear, +L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear, +P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear, +R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear, +P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear, +L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear, +P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear, +P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv, +L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear, +L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear, +I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear, +I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear, +DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1 +R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear, +YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv, +P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear, +P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear, +BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1 +BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3 +BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5 +BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1 +BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2 +BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1 +BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3 +BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5 +MONO8 , 1, 1, 1, 1, , , , , , , , , +YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv, +DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1 +FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1 +R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear, +R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear, +R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear, +R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear, +R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear, +R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear, +BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1 +BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2 +R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear, +R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear, +R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear, +R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear, +R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear, +BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc +BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc +BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc +BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc +PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv, +R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb, +ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1 +ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2 +EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2 +EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2 +EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2 +EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2 +ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2 +R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear, +R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear, +R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear, +R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , , +R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , , +ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2 +ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2 +ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2 +ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2 +R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear, +R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear, +RAW , 0, 0, 0, 0, , , , , , , , , diff --git a/src/intel/isl/isl_format_layout_gen.bash b/src/intel/isl/isl_format_layout_gen.bash new file mode 100755 index 00000000000..db883827376 --- /dev/null +++ b/src/intel/isl/isl_format_layout_gen.bash @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +set -eu +set -o pipefail + +cat <<'EOF' +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" + +const struct isl_format_layout +isl_format_layouts[] = { +EOF + +sed -r ' +# Delete comment lines and empty lines +/^[[:space:]]*#/d +/^[[:space:]]*$/d + +# Delete spaces +s/[[:space:]]//g + +# Translate formats +s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/ + +# Translate data type of channels +s/\/ISL_COLORSPACE_\1/ +s/\// + +# Translate texture compression +s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/ +' | +tr 'a-z' 'A-Z' | # Convert to uppersace +while IFS=, read -r format bpb bw bh bd \ + red green blue alpha \ + luminance intensity palette \ + colorspace txc +do + : ${colorspace:=ISL_COLORSPACE_NONE} + : ${txc:=ISL_TXC_NONE} + + cat <samples >= 1); + + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; +} + +void +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + assert(info->samples == 1); + assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); + assert(!isl_tiling_is_std_y(tiling)); + + /* Note that neither the surface's horizontal nor vertical image alignment + * is programmable on gen4 nor gen5. + * + * From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression + * cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | 2 | + * | uncompressed formats | 4 | 2 | + * +------------------------+--------+--------+ + */ + + if (isl_format_is_compressed(info->format)) { + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + *image_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/intel/isl/isl_gen4.h b/src/intel/isl/isl_gen4.h new file mode 100644 index 00000000000..06cd70b9206 --- /dev/null +++ b/src/intel/isl/isl_gen4.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen4_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/isl/isl_gen6.c b/src/intel/isl/isl_gen6.c new file mode 100644 index 00000000000..24c393925ed --- /dev/null +++ b/src/intel/isl/isl_gen6.c @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen6.h" +#include "isl_priv.h" + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(ISL_DEV_GEN(dev) == 6); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return false; + } + + /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: + * + * - any format with greater than 64 bits per element + * - any compressed texture format (BC*) + * - any YCRCB* format + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of + * Multisamples: + * + * If this field is any value other than MULTISAMPLECOUNT_1 the + * following restrictions apply: + * + * - the Surface Type must be SURFTYPE_2D + * - [...] + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + if (info->levels > 1) + return false; + + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; +} + +void +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* Note that the surface's horizontal image alignment is not programmable + * on Sandybridge. + * + * From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | * | + * | uncompressed formats | 4 | * | + * +------------------------+--------+--------+ + * + * * For these formats, the vertical alignment factor “j” is determined + * as follows: + * - j = 4 for any depth buffer + * - j = 2 for separate stencil buffer + * - j = 4 for any render target surface is multisampled (4x) + * - j = 2 for all other render target surface + * + * From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2 + * SURFACE_STATE, Surface Vertical Alignment: + * + * - This field must be set to VALIGN_2 if the Surface Format is 96 bits + * per element (BPE). + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + */ + + if (isl_format_is_compressed(info->format)) { + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + if (isl_format_is_yuv(info->format)) { + *image_align_el = isl_extent3d(4, 2, 1); + return; + } + + if (info->samples > 1) { + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage) && + !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { + /* interleaved depthstencil buffer */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth(info->usage)) { + /* separate depth buffer */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* separate stencil buffer */ + *image_align_el = isl_extent3d(4, 2, 1); + return; + } + + *image_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/intel/isl/isl_gen6.h b/src/intel/isl/isl_gen6.h new file mode 100644 index 00000000000..0779c674940 --- /dev/null +++ b/src/intel/isl/isl_gen6.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c new file mode 100644 index 00000000000..7064e852e65 --- /dev/null +++ b/src/intel/isl/isl_gen7.c @@ -0,0 +1,395 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen7.h" +#include "isl_priv.h" + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + bool require_array = false; + bool require_interleaved = false; + + assert(ISL_DEV_GEN(dev) == 7); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: any format with greater than 64 bits per element, any + * compressed texture format (BC*), and any YCRCB* format. + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* The Ivyrbridge PRM insists twice that signed integer formats cannot be + * multisampled. + * + * From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when + * all RT channels are not written. + * + * And errata from the Ivybridge PRM, Volume 4 Part 1 p77, + * RENDER_SURFACE_STATE, MCS Enable: + * + * This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs + * when all RT channels are not written. + * + * Note that the above SINT restrictions apply only to *MSRTs* (that is, + * *multisampled* render targets). The restrictions seem to permit an MCS + * if the render target is singlesampled. + */ + if (isl_format_has_sint_channel(info->format)) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * +---------------------+----------------------------------------------------------------+ + * | MSFMT_MSS | Multsampled surface was/is rendered as a render target | + * | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer | + * +---------------------+----------------------------------------------------------------+ + * + * In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and + * MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED. + */ + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width + * is >= 8192 (meaning the actual surface width is >= 8193 pixels), this + * field must be set to MSFMT_MSS. + */ + if (info->samples == 8 && info->width == 8192) + require_array = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, + * ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number + * of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is + * > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL. + */ + if ((info->samples == 8 && info->height > 4194304u) || + (info->samples == 4 && info->height > 8388608u)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is + * one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or + * R24_UNORM_X8_TYPELESS. + */ + if (info->format == ISL_FORMAT_I24X8_UNORM || + info->format == ISL_FORMAT_L24X8_UNORM || + info->format == ISL_FORMAT_A24X8_UNORM || + info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + /* Default to the array layout because it permits multisample + * compression. + */ + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +static bool +gen7_format_needs_valign2(const struct isl_device *dev, + enum isl_format format) +{ + /* This workaround applies only to gen7 */ + if (ISL_DEV_GEN(dev) > 7) + return false; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + * + * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. + */ + return isl_format_is_yuv(format) || + format == ISL_FORMAT_R32G32B32_FLOAT; +} + +/** + * @brief Filter out tiling flags that are incompatible with the surface. + * + * The resultant outgoing @a flags is a subset of the incoming @a flags. The + * outgoing flags may be empty (0x0) if the incoming flags were too + * restrictive. + * + * For example, if the surface will be used for a display + * (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling + * flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT. + */ +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags) +{ + /* IVB+ requires separate stencil */ + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); + + /* Clear flags unsupported on this hardware */ + if (ISL_DEV_GEN(dev) < 9) { + *flags &= ~ISL_TILING_Yf_BIT; + *flags &= ~ISL_TILING_Ys_BIT; + } + + /* And... clear the Yf and Ys bits anyway because Anvil doesn't support + * them yet. + */ + *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */ + *flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */ + + if (isl_surf_usage_is_depth(info->usage)) { + /* Depth requires Y. */ + *flags &= ISL_TILING_ANY_Y_MASK; + } + + /* Separate stencil requires W tiling, and W tiling requires separate + * stencil. + */ + if (isl_surf_usage_is_stencil(info->usage)) { + *flags &= ISL_TILING_W_BIT; + } else { + *flags &= ~ISL_TILING_W_BIT; + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle rotated display surfaces", + __FILE__, __func__); + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT | + ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle flipped display surfaces", + __FILE__, __func__); + } + + if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { + /* Before Skylake, the display engine does not accept Y */ + /* FINISHME[SKL]: Y tiling for display surfaces */ + *flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT); + } + + if (info->samples > 1) { + /* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled + * Surface: + * + * For multisample render targets, this field must be 1 (true). MSRTs + * can only be tiled. + * + * Multisample surfaces never require X tiling, and Y tiling generally + * performs better than X. So choose Y. (Unless it's stencil, then it + * must be W). + */ + *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); + } + + /* workaround */ + if (ISL_DEV_GEN(dev) == 7 && + gen7_format_needs_valign2(dev, info->format) && + (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && + info->samples == 1) { + /* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1, + * SURFACE_STATE Surface Vertical Alignment: + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + */ + *flags &= ~ISL_TILING_Y0_BIT; + } +} + +/** + * Choose horizontal subimage alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Hoizontal Alignment: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer, + * since these surfaces support only alignment of 8. Use of HALIGN_8 + * for other surfaces is supported, but uses more memory. + */ + if (isl_surf_info_is_z16(info) || + isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +/** + * Choose vertical subimage alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling) +{ + bool require_valign2 = false; + bool require_valign4 = false; + + if (isl_format_is_compressed(info->format)) + return 1; + + if (gen7_format_needs_valign2(dev, info->format)) + require_valign2 = true; + + /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but uses more memory. This field must be set to + * VALIGN_4 for all tiled Y Render Target surfaces. + * + */ + if (isl_surf_usage_is_depth(info->usage) || + info->samples > 1 || + tiling == ISL_TILING_Y0) { + require_valign4 = true; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* The Ivybridge PRM states that the stencil buffer's vertical alignment + * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment + * Unit Size]. However, valign=8 is outside the set of valid values of + * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 + * (0x0) and VALIGN_4 (0x1). + * + * The PRM is generally confused about the width, height, and alignment + * of the stencil buffer; and this confusion appears elsewhere. For + * example, the following PRM text effectively converts the stencil + * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, + * Volume 1, Part 1, Section + * 6.18.4.2 Base Address and LOD Calculation]: + * + * For separate stencil buffer, the width must be mutiplied by 2 and + * height divided by 2 as follows: + * + * w_L = 2*i*ceil(W_L/i) + * h_L = 1/2*j*ceil(H_L/j) + * + * The root of the confusion is that, in W tiling, each pair of rows is + * interleaved into one. + * + * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API + * is more polished. + */ + require_valign4 = true; + } + + assert(!require_valign2 || !require_valign4); + + if (require_valign4) + return 4; + + /* Prefer VALIGN_2 because it conserves memory. */ + return 2; +} + +void +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* IVB+ does not support combined depthstencil. */ + assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); + + *image_align_el = (struct isl_extent3d) { + .w = gen7_choose_halign_el(dev, info), + .h = gen7_choose_valign_el(dev, info, tiling), + .d = 1, + }; +} diff --git a/src/intel/isl/isl_gen7.h b/src/intel/isl/isl_gen7.h new file mode 100644 index 00000000000..2a95b68a9bd --- /dev/null +++ b/src/intel/isl/isl_gen7.h @@ -0,0 +1,52 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags); + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/isl/isl_gen8.c b/src/intel/isl/isl_gen8.c new file mode 100644 index 00000000000..a46427aacc8 --- /dev/null +++ b/src/intel/isl/isl_gen8.c @@ -0,0 +1,229 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_priv.h" + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + bool require_array = false; + bool require_interleaved = false; + + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Tile Mode: + * + * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * must be YMAJOR. + * + * As usual, though, stencil is special. + */ + if (!isl_tiling_is_any_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) + return false; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Multisampled Surface Storage Format: + * + * All multisampled render target surfaces must have this field set to + * MSFMT_MSS + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) + require_array = true; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Number of Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D This field must be set to + * MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface + * or Render Target surface. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero. + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +/** + * Choose horizontal subimage alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer. + * In this case it must be set to HALIGN_8 since these surfaces + * support only alignment of 8. [...] + */ + if (isl_surf_info_is_z16(info)) + return 8; + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * [...] For Z32 formats it must be set to HALIGN_4. + */ + if (isl_surf_usage_is_depth(info->usage)) + return 4; + + if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, + * HALIGN 16 must be used. + * + * This case handles color surfaces that may own an auxiliary MCS, CCS_D, + * or CCS_E. Depth buffers, including those that own an auxiliary HiZ + * surface, are handled above and do not require HALIGN_16. + */ + assert(!isl_surf_usage_is_depth(info->usage)); + return 16; + } + + /* XXX(chadv): I believe the hardware requires each image to be + * cache-aligned. If that's true, then defaulting to halign=4 is wrong for + * many formats. Depending on the format's block size, we may need to + * increase halign to 8. + */ + return 4; +} + +/** + * Choose vertical subimage alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + /* From the Broadwell PRM > Volume 2d: Command Reference: Structures + * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but increases memory usage. + * + * - This field is intended to be set to VALIGN_8 only if the surface + * was rendered as a stencil buffer, since stencil buffer surfaces + * support only alignment of 8. If set to VALIGN_8, Surface Format + * must be R8_UINT. + */ + + if (isl_format_is_compressed(info->format)) + return 1; + + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +void +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + assert(!isl_tiling_is_std_y(tiling)); + + /* The below text from the Broadwell PRM provides some insight into the + * hardware's requirements for LOD alignment. From the Broadwell PRM >> + * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: + * + * These [2D surfaces] must adhere to the following memory organization + * rules: + * + * - For non-compressed texture formats, each mipmap must start on an + * even row within the monolithic rectangular area. For + * 1-texel-high mipmaps, this may require a row of padding below + * the previous mipmap. This restriction does not apply to any + * compressed texture formats; each subsequent (lower-res) + * compressed mipmap is positioned directly below the previous + * mipmap. + * + * - Vertical alignment restrictions vary with memory tiling type: + * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled + * mipmaps are not required to start at the left edge of a tile + * row.) + */ + + *image_align_el = (struct isl_extent3d) { + .w = gen8_choose_halign_el(dev, info), + .h = gen8_choose_valign_el(dev, info), + .d = 1, + }; +} diff --git a/src/intel/isl/isl_gen8.h b/src/intel/isl/isl_gen8.h new file mode 100644 index 00000000000..2017ea8ddc1 --- /dev/null +++ b/src/intel/isl/isl_gen8.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/isl/isl_gen9.c b/src/intel/isl/isl_gen9.c new file mode 100644 index 00000000000..aa290aa1c35 --- /dev/null +++ b/src/intel/isl/isl_gen9.c @@ -0,0 +1,185 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +/** + * Calculate the surface's subimage alignment, in units of surface samples, + * for the standard tiling formats Yf and Ys. + */ +static void +gen9_calc_std_image_alignment_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *align_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(isl_tiling_is_std_y(tiling)); + + const uint32_t bs = fmtl->bs; + const uint32_t is_Ys = tiling == ISL_TILING_Ys; + + switch (info->dim) { + case ISL_SURF_DIM_1D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)), + .h = 1, + .d = 1, + }; + return; + case ISL_SURF_DIM_2D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment + * Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)), + .h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)), + .d = 1, + }; + + if (is_Ys) { + /* FINISHME(chadv): I don't trust this code. Untested. */ + isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + case ISL_MSAA_LAYOUT_INTERLEAVED: + break; + case ISL_MSAA_LAYOUT_ARRAY: + align_sa->w >>= (ffs(info->samples) - 0) / 2; + align_sa->h >>= (ffs(info->samples) - 1) / 2; + break; + } + } + return; + + case ISL_SURF_DIM_3D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)), + .h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)), + .d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)), + }; + return; + } + + unreachable("bad isl_surface_type"); +} + +void +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* This BSpec text provides some insight into the hardware's alignment + * requirements [Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces]: + * + * An LOD must be aligned to a cache-line except for some special cases + * related to Planar YUV surfaces. In general, the cache-alignment + * restriction implies there is a minimum height for an LOD of 4 texels. + * So, LODs which are smaller than 4 high are padded. + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is used for 2D, CUBE, and 3D surface alignment when Tiled + * Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled). + * This field is ignored for 1D surfaces and also when Tiled Resource + * Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled). + * + * See the appropriate Alignment table in the "Surface Layout and + * Tiling" section under Common Surface Formats for the table of + * alignment values for Tiled Resrouces. + * + * - For uncompressed surfaces, the units of "j" are rows of pixels on + * the physical surface. For compressed texture formats, the units of + * "j" are in compression blocks, thus each increment in "j" is equal + * to h pixels, where h is the height of the compression block in + * pixels. + * + * - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16 + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal + * Alignment: + * + * - For uncompressed surfaces, the units of "i" are pixels on the + * physical surface. For compressed texture formats, the units of "i" + * are in compression blocks, thus each increment in "i" is equal to + * w pixels, where w is the width of the compression block in pixels. + * + * - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16 + */ + + if (isl_tiling_is_std_y(tiling)) { + struct isl_extent3d image_align_sa; + gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout, + &image_align_sa); + + *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa); + return; + } + + if (info->dim == ISL_SURF_DIM_1D) { + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *image_align_el = isl_extent3d(64, 1, 1); + return; + } + + if (isl_format_is_compressed(info->format)) { + /* On Gen9, the meaning of RENDER_SURFACE_STATE's + * SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for + * compressed formats. They now indicate a multiple of the compression + * block. For example, if the compression mode is ETC2 then HALIGN_4 + * indicates a horizontal alignment of 16 pixels. + * + * To avoid wasting memory, choose the smallest alignment possible: + * HALIGN_4 and VALIGN_4. + */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); +} diff --git a/src/intel/isl/isl_gen9.h b/src/intel/isl/isl_gen9.h new file mode 100644 index 00000000000..64ed0aa44ef --- /dev/null +++ b/src/intel/isl/isl_gen9.h @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/isl/isl_image.c b/src/intel/isl/isl_image.c new file mode 100644 index 00000000000..773160432b9 --- /dev/null +++ b/src/intel/isl/isl_image.c @@ -0,0 +1,188 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" +#include "brw_compiler.h" + +bool +isl_is_storage_image_format(enum isl_format format) +{ + /* XXX: Maybe we should put this in the CSV? */ + + switch (format) { + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return true; + default: + return false; + } +} + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format format) +{ + switch (format) { + /* These are never lowered. Up to BDW we'll have to fall back to untyped + * surface access for 128bpp formats. + */ + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + return format; + + /* From HSW to BDW the only 64bpp format supported for typed access is + * RGBA_UINT16. IVB falls back to untyped. + */ + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component + * are supported. IVB doesn't support formats with more than one component + * for typed access. For 8 and 16 bpp formats IVB relies on the + * undocumented behavior that typed reads from R_UINT8 and R_UINT16 + * surfaces actually do a 32-bit misaligned read. The alternative would be + * to use two surface state entries with different formats for each image, + * one for reading (using R_UINT32) and another one for writing (using + * R_UINT8 or R_UINT16), but that would complicate the shaders we generate + * even more. + */ + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); + + /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported + * by the hardware. + */ + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + return ISL_FORMAT_R32_UINT; + + /* No normalized fixed-point formats are supported by the hardware. */ + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + return ISL_FORMAT_R16_UINT; + + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return ISL_FORMAT_R8_UINT; + + default: + assert(!"Unknown image format"); + return ISL_FORMAT_UNSUPPORTED; + } +} diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h new file mode 100644 index 00000000000..b399e0f8116 --- /dev/null +++ b/src/intel/isl/isl_priv.h @@ -0,0 +1,141 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +#include "brw_device_info.h" +#include "mesa/main/imports.h" +#include "util/macros.h" + +#include "isl.h" + +#define isl_finishme(format, ...) \ + __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...); + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +ffs(uint32_t n) { + return __builtin_ffs(n); +} + +static inline bool +isl_is_pow2(uintmax_t n) +{ + return !(n & (n - 1)); +} + +/** + * Alignment must be a power of 2. + */ +static inline bool +isl_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(isl_is_pow2(a)); + return (n & (a - 1)) == 0; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align(uintmax_t n, uintmax_t a) +{ + assert(a != 0 && isl_is_pow2(a)); + return (n + a - 1) & ~(a - 1); +} + +static inline uintmax_t +isl_align_npot(uintmax_t n, uintmax_t a) +{ + assert(a > 0); + return ((n + a - 1) / a) * a; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align_div(uintmax_t n, uintmax_t a) +{ + return isl_align(n, a) / a; +} + +static inline uintmax_t +isl_align_div_npot(uintmax_t n, uintmax_t a) +{ + return isl_align_npot(n, a) / a; +} + +/** + * Log base 2, rounding towards zero. + */ +static inline uint32_t +isl_log2u(uint32_t n) +{ + assert(n != 0); + return 31 - __builtin_clz(n); +} + +static inline uint32_t +isl_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline struct isl_extent3d +isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + assert(extent_sa.w % fmtl->bw == 0); + assert(extent_sa.h % fmtl->bh == 0); + assert(extent_sa.d % fmtl->bd == 0); + + return (struct isl_extent3d) { + .w = extent_sa.w / fmtl->bw, + .h = extent_sa.h / fmtl->bh, + .d = extent_sa.d / fmtl->bd, + }; +} + +static inline struct isl_extent3d +isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return (struct isl_extent3d) { + .w = extent_el.w * fmtl->bw, + .h = extent_el.h * fmtl->bh, + .d = extent_el.d * fmtl->bd, + }; +} diff --git a/src/intel/isl/tests/.gitignore b/src/intel/isl/tests/.gitignore new file mode 100644 index 00000000000..ba70ecfbee4 --- /dev/null +++ b/src/intel/isl/tests/.gitignore @@ -0,0 +1 @@ +/isl_surf_get_image_offset_test diff --git a/src/intel/isl/tests/isl_surf_get_image_offset_test.c b/src/intel/isl/tests/isl_surf_get_image_offset_test.c new file mode 100644 index 00000000000..cda8583daeb --- /dev/null +++ b/src/intel/isl/tests/isl_surf_get_image_offset_test.c @@ -0,0 +1,353 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "brw_device_info.h" +#include "isl.h" +#include "isl_priv.h" + +#define BDW_GT2_DEVID 0x161a + +// An asssert that works regardless of NDEBUG. +#define t_assert(cond) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + abort(); \ + } \ + } while (0) + +static void +t_assert_extent4d(const struct isl_extent4d *e, uint32_t width, + uint32_t height, uint32_t depth, uint32_t array_len) +{ + t_assert(e->width == width); + t_assert(e->height == height); + t_assert(e->depth == depth); + t_assert(e->array_len == array_len); +} + +static void +t_assert_image_alignment_el(const struct isl_surf *surf, + uint32_t w, uint32_t h, uint32_t d) +{ + struct isl_extent3d align_el; + + align_el = isl_surf_get_image_alignment_el(surf); + t_assert(align_el.w == w); + t_assert(align_el.h == h); + t_assert(align_el.d == d); + +} + +static void +t_assert_image_alignment_sa(const struct isl_surf *surf, + uint32_t w, uint32_t h, uint32_t d) +{ + struct isl_extent3d align_sa; + + align_sa = isl_surf_get_image_alignment_sa(surf); + t_assert(align_sa.w == w); + t_assert(align_sa.h == h); + t_assert(align_sa.d == d); + +} + +static void +t_assert_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) +{ + uint32_t x, y; + isl_surf_get_image_offset_el(surf, level, logical_array_layer, + logical_z_offset_px, &x, &y); + + t_assert(x == expected_x_offset_el); + t_assert(y == expected_y_offset_el); +} + +static void +t_assert_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_base_address_offset, + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) +{ + uint32_t base_address_offset; + uint32_t x_offset_el, y_offset_el; + isl_surf_get_image_intratile_offset_el(dev, surf, + level, + logical_array_layer, + logical_z_offset_px, + &base_address_offset, + &x_offset_el, + &y_offset_el); + + t_assert(base_address_offset == expected_base_address_offset); + t_assert(x_offset_el == expected_x_offset_el); + t_assert(y_offset_el == expected_y_offset_el); +} + +static void +t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, + uint32_t height, uint32_t depth, uint32_t array_len) +{ + t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len); +} + +static void +t_assert_gen4_3d_layer(const struct isl_surf *surf, + uint32_t level, + uint32_t aligned_width, + uint32_t aligned_height, + uint32_t depth, + uint32_t horiz_layers, + uint32_t vert_layers, + uint32_t *base_y) +{ + for (uint32_t z = 0; z < depth; ++z) { + t_assert_offset_el(surf, level, 0, z, + aligned_width * (z % horiz_layers), + *base_y + aligned_height * (z / horiz_layers)); + } + + *base_y += aligned_height * vert_layers; +} + +static void +test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 512, + .height = 512, + .depth = 1, + .levels = 10, + .array_len = 1, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert_phys_level0_sa(&surf, 512, 512, 1, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == + isl_surf_get_array_pitch_sa_rows(&surf)); + + /* Row pitch should be minimal possible */ + t_assert(surf.row_pitch == 2048); + + t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0 + t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512 + t_assert_offset_el(&surf, 2, 0, 0, 256, 512); // +256, +0 + t_assert_offset_el(&surf, 3, 0, 0, 256, 640); // +0, +128 + t_assert_offset_el(&surf, 4, 0, 0, 256, 704); // +0, +64 + t_assert_offset_el(&surf, 5, 0, 0, 256, 736); // +0, +32 + t_assert_offset_el(&surf, 6, 0, 0, 256, 752); // +0, +16 + t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8 + t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4 + t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4 + + t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x100000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x100400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x140400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x160400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x170400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x170400, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x170400, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x170400, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x180400, 0, 0); +} + +static void +test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 1024, + .height = 1024, + .depth = 1, + .levels = 11, + .array_len = 6, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == + isl_surf_get_array_pitch_sa_rows(&surf)); + + /* Row pitch should be minimal possible */ + t_assert(surf.row_pitch == 4096); + + for (uint32_t a = 0; a < 6; ++a) { + uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf); + + t_assert_offset_el(&surf, 0, a, 0, 0, b + 0); // +0, +0 + t_assert_offset_el(&surf, 1, a, 0, 0, b + 1024); // +0, +1024 + t_assert_offset_el(&surf, 2, a, 0, 512, b + 1024); // +512, +0 + t_assert_offset_el(&surf, 3, a, 0, 512, b + 1280); // +0, +256 + t_assert_offset_el(&surf, 4, a, 0, 512, b + 1408); // +0, +128 + t_assert_offset_el(&surf, 5, a, 0, 512, b + 1472); // +0, +64 + t_assert_offset_el(&surf, 6, a, 0, 512, b + 1504); // +0, +32 + t_assert_offset_el(&surf, 7, a, 0, 512, b + 1520); // +0, +16 + t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8 + t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4 + t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4 + + } + + /* The layout below assumes a specific array pitch. It will need updating + * if isl's array pitch calculations ever change. + */ + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540); + + /* array layer 0 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x400000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x400800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x500800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x580800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5c0800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5e0800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5e0800, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5e0800, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5e0800, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x600800, 0, 0); + + /* array layer 1 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 1, 0, 0x600000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 1, 1, 0, 0xa00000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb80800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbc0800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbe0800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbe0800, 0, 20); + t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbe0800, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc00800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc00800, 0, 4); + + /* array layer 2 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 2, 0, 0xc00000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 1, 2, 0, 0x1000000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1000800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1100800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1180800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11c0800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11e0800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11e0800, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1200800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1200800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1200800, 0, 8); + + /* skip the remaining array layers */ +} + +static void +test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_3D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 256, + .height = 256, + .depth = 256, + .levels = 9, + .array_len = 1, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 74916); + t_assert(isl_surf_get_array_pitch_sa_rows(&surf) == + isl_surf_get_array_pitch_el_rows(&surf)); + + uint32_t base_y = 0; + + t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y); + t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y); + t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y); + t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y); + t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y); +} + +int main(void) +{ + /* FINISHME: Add tests for npot sizes */ + /* FINISHME: Add tests for 1D surfaces */ + + test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(); + test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(); + test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(); +} diff --git a/src/isl/.gitignore b/src/isl/.gitignore deleted file mode 100644 index e9cfd67b94e..00000000000 --- a/src/isl/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/isl_format_layout.c diff --git a/src/isl/Makefile.am b/src/isl/Makefile.am deleted file mode 100644 index 72f5460554f..00000000000 --- a/src/isl/Makefile.am +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -SUBDIRS = . - -noinst_LTLIBRARIES = libisl.la - -EXTRA_DIST = tests - -# The gallium includes are for the util/u_math.h include from main/macros.h -AM_CPPFLAGS = \ - $(INTEL_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $(DEFINES) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/mapi \ - -I$(top_srcdir)/src/mesa \ - -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_builddir)/src - -libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init - -libisl_la_SOURCES = \ - isl.c \ - isl.h \ - isl_format.c \ - isl_format_layout.c \ - isl_gen4.c \ - isl_gen4.h \ - isl_gen6.c \ - isl_gen6.h \ - isl_gen7.c \ - isl_gen7.h \ - isl_gen8.c \ - isl_gen8.h \ - isl_gen9.c \ - isl_gen9.h \ - isl_image.c \ - $(NULL) - -BUILT_SOURCES = \ - isl_format_layout.c - -isl_format_layout.c: isl_format_layout_gen.bash \ - isl_format_layout.csv - $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ - <$(srcdir)/isl_format_layout.csv >$@ - -# ---------------------------------------------------------------------------- -# Tests -# ---------------------------------------------------------------------------- - -TESTS = tests/isl_surf_get_image_offset_test - -check_PROGRAMS = $(TESTS) - -# Link tests to lib965_compiler.la for brw_get_device_info(). -tests_ldadd = \ - libisl.la \ - $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la - -tests_isl_surf_get_image_offset_test_SOURCES = \ - tests/isl_surf_get_image_offset_test.c -tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd) - -# ---------------------------------------------------------------------------- - -include $(top_srcdir)/install-lib-links.mk diff --git a/src/isl/README b/src/isl/README deleted file mode 100644 index 1ab4313fcc5..00000000000 --- a/src/isl/README +++ /dev/null @@ -1,113 +0,0 @@ -Intel Surface Layout - -Introduction -============ -isl is a small library that calculates the layout of Intel GPU surfaces, queries -those layouts, and queries the properties of surface formats. - - -Independence from User APIs -=========================== -isl's API is independent of any user-facing graphics API, such as OpenGL and -Vulkan. This independence allows isl to be used a shared component by multiple -Intel drivers. - -Rather than mimic the user-facing APIs, the isl API attempts to reflect Intel -hardware: the actual memory layout of Intel GPU surfaces and how one programs -the GPU to use those surfaces. For example: - - - The tokens of `enum isl_format` (such as `ISL_FORMAT_R8G8B8A8_UNORM`) - match those of the hardware enum `SURFACE_FORMAT` rather than the OpenGL - or Vulkan format tokens. And the values of `isl_format` and - `SURFACE_FORMAT` are identical. - - - The OpenGL and Vulkan APIs contain depth and stencil formats. However the - hardware enum `SURFACE_FORMAT` does not, and therefore neither does `enum - isl_format`. Rather than define new pixel formats that have no hardware - counterpart, isl records the intent to use a surface as a depth or stencil - buffer with the usage flags `ISL_SURF_USAGE_DEPTH_BIT` and - `ISL_SURF_USAGE_STENCIL_BIT`. - - - `struct isl_surf` distinguishes between the surface's logical dimension - from the user API's perspective (`enum isl_surf_dim`, which may be 1D, 2D, - or 3D) and the layout of those dimensions in memory (`enum isl_dim_layout`). - - -Surface Units -============= - -Intro ------ -ISL takes care in its equations to correctly handle conversion among surface -units (such as pixels and compression blocks) and to carefully distinguish -between a surface's logical layout in the client API and its physical layout -in memory. - -Symbol names often explicitly declare their unit with a suffix: - - - px: logical pixels - - sa: physical surface samples - - el: physical surface elements - - sa_rows: rows of physical surface samples - - el_rows: rows of physical surface elements - -Logical units are independent of hardware generation and are closely related -to the user-facing API (OpenGL and Vulkan). Physical units are dependent on -hardware generation and reflect the surface's layout in memory. - -Definitions ------------ -- Logical Pixels (px): - - The surface's layout from the perspective of the client API (OpenGL and - Vulkan) is in units of logical pixels. Logical pixels are independent of the - surface's layout in memory. - - A surface's width and height, in units of logical pixels, is not affected by - the surface's sample count. For example, consider a VkImage created with - VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's width and - height at level 0 is, in units of logical pixels, w0 and h0 regardless of - the value of s0. - - For example, the logical array length of a 3D surface is always 1, even on - Gen9 where the surface's memory layout is that of an array surface - (ISL_DIM_LAYOUT_GEN4_2D). - -- Physical Surface Samples (sa): - - For a multisampled surface, this unit has the obvious meaning. - A singlesampled surface, from ISL's perspective, is simply a multisampled - surface whose sample count is 1. - - For example, consider a 2D single-level non-array surface with samples=4, - width_px=64, and height_px=64 (note that the suffix 'px' indicates logical - pixels). If the surface's multisample layout is ISL_MSAA_LAYOUT_INTERLEAVED, - then the extent of level 0 is, in units of physical surface samples, - width_sa=128, height_sa=128, depth_sa=1, array_length_sa=1. If - ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, height_sa=64, depth_sa=1, - array_length_sa=4. - -- Physical Surface Elements (el): - - This unit allows ISL to treat compressed and uncompressed formats - identically in many calculations. - - If the surface's pixel format is compressed, such as ETC2, then a surface - element is equivalent to a compression block. If uncompressed, then - a surface element is equivalent to a surface sample. As a corollary, for - a given surface a surface element is at least as large as a surface sample. - -Errata ------- -ISL acquired the term 'surface element' from the Broadwell PRM [1], which -defines it as follows: - - An element is defined as a pixel in uncompresed surface formats, and as - a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL - type multisampled surfaces, an element is a sample. - - -References -========== -[1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> - RENDER_SURFACE_STATE Surface Vertical Alignment (p325) diff --git a/src/isl/isl.c b/src/isl/isl.c deleted file mode 100644 index 27928fd0850..00000000000 --- a/src/isl/isl.c +++ /dev/null @@ -1,1428 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "isl.h" -#include "isl_gen4.h" -#include "isl_gen6.h" -#include "isl_gen7.h" -#include "isl_gen8.h" -#include "isl_gen9.h" -#include "isl_priv.h" - -void PRINTFLIKE(3, 4) UNUSED -__isl_finishme(const char *file, int line, const char *fmt, ...) -{ - va_list ap; - char buf[512]; - - va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); -} - -void -isl_device_init(struct isl_device *dev, - const struct brw_device_info *info, - bool has_bit6_swizzling) -{ - dev->info = info; - dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; - dev->has_bit6_swizzling = has_bit6_swizzling; - - /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some - * device properties at buildtime. Verify that the macros with the device - * properties chosen during runtime. - */ - assert(ISL_DEV_GEN(dev) == dev->info->gen); - assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil); - - /* Did we break hiz or stencil? */ - if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) - assert(info->has_hiz_and_separate_stencil); - if (info->must_use_separate_stencil) - assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); -} - -/** - * @brief Query the set of multisamples supported by the device. - * - * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always - * supported. - */ -isl_sample_count_mask_t ATTRIBUTE_CONST -isl_device_get_sample_counts(struct isl_device *dev) -{ - if (ISL_DEV_GEN(dev) >= 9) { - return ISL_SAMPLE_COUNT_1_BIT | - ISL_SAMPLE_COUNT_2_BIT | - ISL_SAMPLE_COUNT_4_BIT | - ISL_SAMPLE_COUNT_8_BIT | - ISL_SAMPLE_COUNT_16_BIT; - } else if (ISL_DEV_GEN(dev) >= 8) { - return ISL_SAMPLE_COUNT_1_BIT | - ISL_SAMPLE_COUNT_2_BIT | - ISL_SAMPLE_COUNT_4_BIT | - ISL_SAMPLE_COUNT_8_BIT; - } else if (ISL_DEV_GEN(dev) >= 7) { - return ISL_SAMPLE_COUNT_1_BIT | - ISL_SAMPLE_COUNT_4_BIT | - ISL_SAMPLE_COUNT_8_BIT; - } else if (ISL_DEV_GEN(dev) >= 6) { - return ISL_SAMPLE_COUNT_1_BIT | - ISL_SAMPLE_COUNT_4_BIT; - } else { - return ISL_SAMPLE_COUNT_1_BIT; - } -} - -/** - * @param[out] info is written only on success - */ -bool -isl_tiling_get_info(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_tile_info *tile_info) -{ - const uint32_t bs = format_block_size; - uint32_t width, height; - - assert(bs > 0); - - switch (tiling) { - case ISL_TILING_LINEAR: - width = 1; - height = 1; - break; - - case ISL_TILING_X: - width = 1 << 9; - height = 1 << 3; - break; - - case ISL_TILING_Y0: - width = 1 << 7; - height = 1 << 5; - break; - - case ISL_TILING_W: - /* XXX: Should W tile be same as Y? */ - width = 1 << 6; - height = 1 << 6; - break; - - case ISL_TILING_Yf: - case ISL_TILING_Ys: { - if (ISL_DEV_GEN(dev) < 9) - return false; - - if (!isl_is_pow2(bs)) - return false; - - bool is_Ys = tiling == ISL_TILING_Ys; - - width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); - height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); - break; - } - } /* end switch */ - - *tile_info = (struct isl_tile_info) { - .tiling = tiling, - .width = width, - .height = height, - .size = width * height, - }; - - return true; -} - -void -isl_tiling_get_extent(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_extent2d *e) -{ - struct isl_tile_info tile_info; - isl_tiling_get_info(dev, tiling, format_block_size, &tile_info); - *e = isl_extent2d(tile_info.width, tile_info.height); -} - -/** - * @param[out] tiling is set only on success - */ -bool -isl_surf_choose_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling *tiling) -{ - isl_tiling_flags_t tiling_flags = info->tiling_flags; - - if (ISL_DEV_GEN(dev) >= 7) { - gen7_filter_tiling(dev, info, &tiling_flags); - } else { - isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); - gen7_filter_tiling(dev, info, &tiling_flags); - } - - #define CHOOSE(__tiling) \ - do { \ - if (tiling_flags & (1u << (__tiling))) { \ - *tiling = (__tiling); \ - return true; \ - } \ - } while (0) - - /* Of the tiling modes remaining, choose the one that offers the best - * performance. - */ - - if (info->dim == ISL_SURF_DIM_1D) { - /* Prefer linear for 1D surfaces because they do not benefit from - * tiling. To the contrary, tiling leads to wasted memory and poor - * memory locality due to the swizzling and alignment restrictions - * required in tiled surfaces. - */ - CHOOSE(ISL_TILING_LINEAR); - } - - CHOOSE(ISL_TILING_Ys); - CHOOSE(ISL_TILING_Yf); - CHOOSE(ISL_TILING_Y0); - CHOOSE(ISL_TILING_X); - CHOOSE(ISL_TILING_W); - CHOOSE(ISL_TILING_LINEAR); - - #undef CHOOSE - - /* No tiling mode accomodates the inputs. */ - return false; -} - -static bool -isl_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - if (ISL_DEV_GEN(dev) >= 8) { - return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); - } else if (ISL_DEV_GEN(dev) >= 7) { - return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); - } else if (ISL_DEV_GEN(dev) >= 6) { - return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); - } else { - return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); - } -} - -static void -isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, - uint32_t *width, uint32_t *height) -{ - assert(isl_is_pow2(samples)); - - /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level - * Sizes (p133): - * - * If the surface is multisampled and it is a depth or stencil surface - * or Multisampled Surface StorageFormat in SURFACE_STATE is - * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before - * proceeding: [...] - */ - if (width) - *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); - if (height) - *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); -} - -static enum isl_array_pitch_span -isl_choose_array_pitch_span(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - const struct isl_extent4d *phys_level0_sa) -{ - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - case ISL_DIM_LAYOUT_GEN4_2D: - if (ISL_DEV_GEN(dev) >= 8) { - /* QPitch becomes programmable in Broadwell. So choose the - * most compact QPitch possible in order to conserve memory. - * - * From the Broadwell PRM >> Volume 2d: Command Reference: Structures - * >> RENDER_SURFACE_STATE Surface QPitch (p325): - * - * - Software must ensure that this field is set to a value - * sufficiently large such that the array slices in the surface - * do not overlap. Refer to the Memory Data Formats section for - * information on how surfaces are stored in memory. - * - * - This field specifies the distance in rows between array - * slices. It is used only in the following cases: - * - * - Surface Array is enabled OR - * - Number of Mulitsamples is not NUMSAMPLES_1 and - * Multisampled Surface Storage Format set to MSFMT_MSS OR - * - Surface Type is SURFTYPE_CUBE - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } else if (ISL_DEV_GEN(dev) >= 7) { - /* Note that Ivybridge introduces - * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the - * driver more control over the QPitch. - */ - - if (phys_level0_sa->array_len == 1) { - /* The hardware will never use the QPitch. So choose the most - * compact QPitch possible in order to conserve memory. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - if (isl_surf_usage_is_depth_or_stencil(info->usage)) { - /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> - * Section 6.18.4.7: Surface Arrays (p112): - * - * If Surface Array Spacing is set to ARYSPC_FULL (note that - * the depth buffer and stencil buffer have an implied value of - * ARYSPC_FULL): - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - if (info->levels == 1) { - /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing - * to ARYSPC_LOD0. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - return ISL_ARRAY_PITCH_SPAN_FULL; - } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && - ISL_DEV_USE_SEPARATE_STENCIL(dev) && - isl_surf_usage_is_stencil(info->usage)) { - /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * The separate stencil buffer does not support mip mapping, thus - * the storage for LODs other than LOD 0 is not needed. - */ - assert(info->levels == 1); - assert(phys_level0_sa->array_len == 1); - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } else { - if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && - ISL_DEV_USE_SEPARATE_STENCIL(dev) && - isl_surf_usage_is_stencil(info->usage)) { - /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * The separate stencil buffer does not support mip mapping, - * thus the storage for LODs other than LOD 0 is not needed. - */ - assert(info->levels == 1); - assert(phys_level0_sa->array_len == 1); - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - if (phys_level0_sa->array_len == 1) { - /* The hardware will never use the QPitch. So choose the most - * compact QPitch possible in order to conserve memory. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - return ISL_ARRAY_PITCH_SPAN_FULL; - } - - case ISL_DIM_LAYOUT_GEN4_3D: - /* The hardware will never use the QPitch. So choose the most - * compact QPitch possible in order to conserve memory. - */ - return ISL_ARRAY_PITCH_SPAN_COMPACT; - } - - unreachable("bad isl_dim_layout"); - return ISL_ARRAY_PITCH_SPAN_FULL; -} - -static void -isl_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - if (ISL_DEV_GEN(dev) >= 9) { - gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else if (ISL_DEV_GEN(dev) >= 8) { - gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else if (ISL_DEV_GEN(dev) >= 7) { - gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else if (ISL_DEV_GEN(dev) >= 6) { - gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } else { - gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); - } -} - -static enum isl_dim_layout -isl_surf_choose_dim_layout(const struct isl_device *dev, - enum isl_surf_dim logical_dim) -{ - if (ISL_DEV_GEN(dev) >= 9) { - switch (logical_dim) { - case ISL_SURF_DIM_1D: - return ISL_DIM_LAYOUT_GEN9_1D; - case ISL_SURF_DIM_2D: - case ISL_SURF_DIM_3D: - return ISL_DIM_LAYOUT_GEN4_2D; - } - } else { - switch (logical_dim) { - case ISL_SURF_DIM_1D: - case ISL_SURF_DIM_2D: - return ISL_DIM_LAYOUT_GEN4_2D; - case ISL_SURF_DIM_3D: - return ISL_DIM_LAYOUT_GEN4_3D; - } - } - - unreachable("bad isl_surf_dim"); - return ISL_DIM_LAYOUT_GEN4_2D; -} - -/** - * Calculate the physical extent of the surface's first level, in units of - * surface samples. The result is aligned to the format's compression block. - */ -static void -isl_calc_phys_level0_extent_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent4d *phys_level0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - if (isl_format_is_yuv(info->format)) - isl_finishme("%s:%s: YUV format", __FILE__, __func__); - - switch (info->dim) { - case ISL_SURF_DIM_1D: - assert(info->height == 1); - assert(info->depth == 1); - assert(info->samples == 1); - assert(!isl_format_is_compressed(info->format)); - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN4_3D: - unreachable("bad isl_dim_layout"); - - case ISL_DIM_LAYOUT_GEN9_1D: - case ISL_DIM_LAYOUT_GEN4_2D: - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = 1, - .d = 1, - .a = info->array_len, - }; - break; - } - break; - - case ISL_SURF_DIM_2D: - assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); - - if (tiling == ISL_TILING_Ys && info->samples > 1) - isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); - - switch (msaa_layout) { - case ISL_MSAA_LAYOUT_NONE: - assert(info->depth == 1); - assert(info->samples == 1); - - *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align(info->width, fmtl->bw), - .h = isl_align(info->height, fmtl->bh), - .d = 1, - .a = info->array_len, - }; - break; - - case ISL_MSAA_LAYOUT_ARRAY: - assert(info->depth == 1); - assert(info->array_len == 1); - assert(!isl_format_is_compressed(info->format)); - - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = 1, - .a = info->samples, - }; - break; - - case ISL_MSAA_LAYOUT_INTERLEAVED: - assert(info->depth == 1); - assert(info->array_len == 1); - assert(!isl_format_is_compressed(info->format)); - - *phys_level0_sa = (struct isl_extent4d) { - .w = info->width, - .h = info->height, - .d = 1, - .a = 1, - }; - - isl_msaa_interleaved_scale_px_to_sa(info->samples, - &phys_level0_sa->w, - &phys_level0_sa->h); - break; - } - break; - - case ISL_SURF_DIM_3D: - assert(info->array_len == 1); - assert(info->samples == 1); - - if (fmtl->bd > 1) { - isl_finishme("%s:%s: compression block with depth > 1", - __FILE__, __func__); - } - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - unreachable("bad isl_dim_layout"); - - case ISL_DIM_LAYOUT_GEN4_2D: - assert(ISL_DEV_GEN(dev) >= 9); - - *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align(info->width, fmtl->bw), - .h = isl_align(info->height, fmtl->bh), - .d = 1, - .a = info->depth, - }; - break; - - case ISL_DIM_LAYOUT_GEN4_3D: - assert(ISL_DEV_GEN(dev) < 9); - *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align(info->width, fmtl->bw), - .h = isl_align(info->height, fmtl->bh), - .d = info->depth, - .a = 1, - }; - break; - } - break; - } -} - -/** - * A variant of isl_calc_phys_slice0_extent_sa() specific to - * ISL_DIM_LAYOUT_GEN4_2D. - */ -static void -isl_calc_phys_slice0_extent_sa_gen4_2d( - const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - assert(phys_level0_sa->depth == 1); - - if (info->levels == 1 && msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) { - /* Do not pad the surface to the image alignment. Instead, pad it only - * to the pixel format's block alignment. - * - * For tiled surfaces, using a reduced alignment here avoids wasting CPU - * cycles on the below mipmap layout caluclations. Reducing the - * alignment here is safe because we later align the row pitch and array - * pitch to the tile boundary. It is safe even for - * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled - * to accomodate the interleaved samples. - * - * For linear surfaces, reducing the alignment here permits us to later - * choose an arbitrary, non-aligned row pitch. If the surface backs - * a VkBuffer, then an arbitrary pitch may be needed to accomodate - * VkBufferImageCopy::bufferRowLength. - */ - *phys_slice0_sa = (struct isl_extent2d) { - .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), - .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), - }; - return; - } - - uint32_t slice_top_w = 0; - uint32_t slice_bottom_w = 0; - uint32_t slice_left_h = 0; - uint32_t slice_right_h = 0; - - uint32_t W0 = phys_level0_sa->w; - uint32_t H0 = phys_level0_sa->h; - - for (uint32_t l = 0; l < info->levels; ++l) { - uint32_t W = isl_minify(W0, l); - uint32_t H = isl_minify(H0, l); - - if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { - /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level - * Sizes (p133): - * - * If the surface is multisampled and it is a depth or stencil - * surface or Multisampled Surface StorageFormat in - * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be - * adjusted as follows before proceeding: [...] - */ - isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); - } - - uint32_t w = isl_align_npot(W, image_align_sa->w); - uint32_t h = isl_align_npot(H, image_align_sa->h); - - if (l == 0) { - slice_top_w = w; - slice_left_h = h; - slice_right_h = h; - } else if (l == 1) { - slice_bottom_w = w; - slice_left_h += h; - } else if (l == 2) { - slice_bottom_w += w; - slice_right_h += h; - } else { - slice_right_h += h; - } - } - - *phys_slice0_sa = (struct isl_extent2d) { - .w = MAX(slice_top_w, slice_bottom_w), - .h = MAX(slice_left_h, slice_right_h), - }; -} - -/** - * A variant of isl_calc_phys_slice0_extent_sa() specific to - * ISL_DIM_LAYOUT_GEN4_3D. - */ -static void -isl_calc_phys_slice0_extent_sa_gen4_3d( - const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - assert(info->samples == 1); - assert(phys_level0_sa->array_len == 1); - - uint32_t slice_w = 0; - uint32_t slice_h = 0; - - uint32_t W0 = phys_level0_sa->w; - uint32_t H0 = phys_level0_sa->h; - uint32_t D0 = phys_level0_sa->d; - - for (uint32_t l = 0; l < info->levels; ++l) { - uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); - uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); - uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); - - uint32_t max_layers_horiz = MIN(level_d, 1u << l); - uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); - - slice_w = MAX(slice_w, level_w * max_layers_horiz); - slice_h += level_h * max_layers_vert; - } - - *phys_slice0_sa = (struct isl_extent2d) { - .w = slice_w, - .h = slice_h, - }; -} - -/** - * A variant of isl_calc_phys_slice0_extent_sa() specific to - * ISL_DIM_LAYOUT_GEN9_1D. - */ -static void -isl_calc_phys_slice0_extent_sa_gen9_1d( - const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - assert(phys_level0_sa->height == 1); - assert(phys_level0_sa->depth == 1); - assert(info->samples == 1); - assert(image_align_sa->w >= fmtl->bw); - - uint32_t slice_w = 0; - const uint32_t W0 = phys_level0_sa->w; - - for (uint32_t l = 0; l < info->levels; ++l) { - uint32_t W = isl_minify(W0, l); - uint32_t w = isl_align_npot(W, image_align_sa->w); - - slice_w += w; - } - - *phys_slice0_sa = isl_extent2d(slice_w, 1); -} - -/** - * Calculate the physical extent of the surface's first array slice, in units - * of surface samples. If the surface is multi-leveled, then the result will - * be aligned to \a image_align_sa. - */ -static void -isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_dim_layout dim_layout, - enum isl_msaa_layout msaa_layout, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - struct isl_extent2d *phys_slice0_sa) -{ - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info, - image_align_sa, phys_level0_sa, - phys_slice0_sa); - return; - case ISL_DIM_LAYOUT_GEN4_2D: - isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, - image_align_sa, phys_level0_sa, - phys_slice0_sa); - return; - case ISL_DIM_LAYOUT_GEN4_3D: - isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, - phys_level0_sa, phys_slice0_sa); - return; - } -} - -/** - * Calculate the pitch between physical array slices, in units of rows of - * surface elements. - */ -static uint32_t -isl_calc_array_pitch_el_rows(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - enum isl_dim_layout dim_layout, - enum isl_array_pitch_span array_pitch_span, - const struct isl_extent3d *image_align_sa, - const struct isl_extent4d *phys_level0_sa, - const struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - uint32_t pitch_sa_rows = 0; - - switch (dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - /* Each row is an array slice */ - pitch_sa_rows = 1; - break; - case ISL_DIM_LAYOUT_GEN4_2D: - switch (array_pitch_span) { - case ISL_ARRAY_PITCH_SPAN_COMPACT: - pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); - break; - case ISL_ARRAY_PITCH_SPAN_FULL: { - /* The QPitch equation is found in the Broadwell PRM >> Volume 5: - * Memory Views >> Common Surface Formats >> Surface Layout >> 2D - * Surfaces >> Surface Arrays. - */ - uint32_t H0_sa = phys_level0_sa->h; - uint32_t H1_sa = isl_minify(H0_sa, 1); - - uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); - uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); - - uint32_t m; - if (ISL_DEV_GEN(dev) >= 7) { - /* The QPitch equation changed slightly in Ivybridge. */ - m = 12; - } else { - m = 11; - } - - pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); - - if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && - (info->height % 4 == 1)) { - /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: - * Graphics Core >> Section 7.18.3.7: Surface Arrays: - * - * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than - * the value calculated in the equation above , for every - * other odd Surface Height starting from 1 i.e. 1,5,9,13. - * - * XXX(chadv): Is the errata natural corollary of the physical - * layout of interleaved samples? - */ - pitch_sa_rows += 4; - } - - pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); - } /* end case */ - break; - } - break; - case ISL_DIM_LAYOUT_GEN4_3D: - assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); - pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); - break; - default: - unreachable("bad isl_dim_layout"); - break; - } - - assert(pitch_sa_rows % fmtl->bh == 0); - uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; - - if (ISL_DEV_GEN(dev) >= 9 && - info->dim == ISL_SURF_DIM_3D && - tile_info->tiling != ISL_TILING_LINEAR) { - /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: - * - * Tile Mode != Linear: This field must be set to an integer multiple - * of the tile height - */ - pitch_el_rows = isl_align(pitch_el_rows, tile_info->height); - } - - return pitch_el_rows; -} - -/** - * Calculate the pitch of each surface row, in bytes. - */ -static uint32_t -isl_calc_row_pitch(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - const struct isl_extent3d *image_align_sa, - const struct isl_extent2d *phys_slice0_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - uint32_t row_pitch = info->min_pitch; - - /* First, align the surface to a cache line boundary, as the PRM explains - * below. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Render Target and Media - * Surfaces: - * - * The data port accesses data (pixels) outside of the surface if they - * are contained in the same cache request as pixels that are within the - * surface. These pixels will not be returned by the requesting message, - * however if these pixels lie outside of defined pages in the GTT, - * a GTT error will result when the cache request is processed. In order - * to avoid these GTT errors, “padding” at the bottom of the surface is - * sometimes necessary. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: - * - * The sampling engine accesses texels outside of the surface if they - * are contained in the same cache line as texels that are within the - * surface. These texels will not participate in any calculation - * performed by the sampling engine and will not affect the result of - * any sampling engine operation, however if these texels lie outside of - * defined pages in the GTT, a GTT error will result when the cache line - * is accessed. In order to avoid these GTT errors, “padding” at the - * bottom and right side of a sampling engine surface is sometimes - * necessary. - * - * It is possible that a cache line will straddle a page boundary if the - * base address or pitch is not aligned. All pages included in the cache - * lines that are part of the surface must map to valid GTT entries to - * avoid errors. To determine the necessary padding on the bottom and - * right side of the surface, refer to the table in Alignment Unit Size - * section for the i and j parameters for the surface format in use. The - * surface must then be extended to the next multiple of the alignment - * unit size in each dimension, and all texels contained in this - * extended surface must have valid GTT entries. - * - * For example, suppose the surface size is 15 texels by 10 texels and - * the alignment parameters are i=4 and j=2. In this case, the extended - * surface would be 16 by 10. Note that these calculations are done in - * texels, and must be converted to bytes based on the surface format - * being used to determine whether additional pages need to be defined. - */ - assert(phys_slice0_sa->w % fmtl->bw == 0); - row_pitch = MAX(row_pitch, fmtl->bs * (phys_slice0_sa->w / fmtl->bw)); - - switch (tile_info->tiling) { - case ISL_TILING_LINEAR: - /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> - * RENDER_SURFACE_STATE Surface Pitch (p349): - * - * - For linear render target surfaces and surfaces accessed with the - * typed data port messages, the pitch must be a multiple of the - * element size for non-YUV surface formats. Pitch must be - * a multiple of 2 * element size for YUV surface formats. - * - * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we - * ignore because isl doesn't do buffers.] - * - * - For other linear surfaces, the pitch can be any multiple of - * bytes. - */ - if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { - if (isl_format_is_yuv(info->format)) { - row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs); - } else { - row_pitch = isl_align_npot(row_pitch, fmtl->bs); - } - } - break; - default: - /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> - * RENDER_SURFACE_STATE Surface Pitch (p349): - * - * - For tiled surfaces, the pitch must be a multiple of the tile - * width. - */ - row_pitch = isl_align(row_pitch, tile_info->width); - break; - } - - return row_pitch; -} - -/** - * Calculate the surface's total height, including padding, in units of - * surface elements. - */ -static uint32_t -isl_calc_total_height_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - const struct isl_tile_info *tile_info, - uint32_t phys_array_len, - uint32_t row_pitch, - uint32_t array_pitch_el_rows) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - uint32_t total_h_el = phys_array_len * array_pitch_el_rows; - uint32_t pad_bytes = 0; - - /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Render Target and Media - * Surfaces: - * - * The data port accesses data (pixels) outside of the surface if they - * are contained in the same cache request as pixels that are within the - * surface. These pixels will not be returned by the requesting message, - * however if these pixels lie outside of defined pages in the GTT, - * a GTT error will result when the cache request is processed. In - * order to avoid these GTT errors, “padding” at the bottom of the - * surface is sometimes necessary. - * - * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface - * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: - * - * ... Lots of padding requirements, all listed separately below. - */ - - /* We can safely ignore the first padding requirement, quoted below, - * because isl doesn't do buffers. - * - * - [pre-BDW] For buffers, which have no inherent “height,” padding - * requirements are different. A buffer must be padded to the next - * multiple of 256 array elements, with an additional 16 bytes added - * beyond that to account for the L1 cache line. - */ - - /* - * - For compressed textures [...], padding at the bottom of the surface - * is to an even compressed row. - */ - if (isl_format_is_compressed(info->format)) - total_h_el = isl_align(total_h_el, 2); - - /* - * - For cube surfaces, an additional two rows of padding are required - * at the bottom of the surface. - */ - if (info->usage & ISL_SURF_USAGE_CUBE_BIT) - total_h_el += 2; - - /* - * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, - * additional padding is required. These surfaces require an extra row - * plus 16 bytes of padding at the bottom in addition to the general - * padding requirements. - */ - if (isl_format_is_yuv(info->format) && - (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) { - total_h_el += 1; - pad_bytes += 16; - } - - /* - * - For linear surfaces, additional padding of 64 bytes is required at - * the bottom of the surface. This is in addition to the padding - * required above. - */ - if (tile_info->tiling == ISL_TILING_LINEAR) - pad_bytes += 64; - - /* The below text weakens, not strengthens, the padding requirements for - * linear surfaces. Therefore we can safely ignore it. - * - * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, - * non-MSAA, non-mip-mapped surfaces in linear memory, the only - * padding requirement is to the next aligned 64-byte boundary beyond - * the end of the surface. The rest of the padding requirements - * documented above do not apply to these surfaces. - */ - - /* - * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and - * height % 4 != 0, the surface must be padded with - * 4-(height % 4)*Surface Pitch # of bytes. - */ - if (ISL_DEV_GEN(dev) >= 9 && - tile_info->tiling == ISL_TILING_LINEAR && - (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { - total_h_el = isl_align(total_h_el, 4); - } - - /* - * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded - * to 4 times the Surface Pitch # of bytes - */ - if (ISL_DEV_GEN(dev) >= 9 && - tile_info->tiling == ISL_TILING_LINEAR && - info->dim == ISL_SURF_DIM_1D) { - total_h_el += 4; - } - - /* Be sloppy. Align any leftover padding to a row boundary. */ - total_h_el += isl_align_div_npot(pad_bytes, row_pitch); - - return total_h_el; -} - -bool -isl_surf_init_s(const struct isl_device *dev, - struct isl_surf *surf, - const struct isl_surf_init_info *restrict info) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - const struct isl_extent4d logical_level0_px = { - .w = info->width, - .h = info->height, - .d = info->depth, - .a = info->array_len, - }; - - enum isl_dim_layout dim_layout = - isl_surf_choose_dim_layout(dev, info->dim); - - enum isl_tiling tiling; - if (!isl_surf_choose_tiling(dev, info, &tiling)) - return false; - - struct isl_tile_info tile_info; - if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info)) - return false; - - enum isl_msaa_layout msaa_layout; - if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) - return false; - - struct isl_extent3d image_align_el; - isl_choose_image_alignment_el(dev, info, tiling, msaa_layout, - &image_align_el); - - struct isl_extent3d image_align_sa = - isl_extent3d_el_to_sa(info->format, image_align_el); - - struct isl_extent4d phys_level0_sa; - isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, - &phys_level0_sa); - assert(phys_level0_sa.w % fmtl->bw == 0); - assert(phys_level0_sa.h % fmtl->bh == 0); - - enum isl_array_pitch_span array_pitch_span = - isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); - - struct isl_extent2d phys_slice0_sa; - isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, - &image_align_sa, &phys_level0_sa, - &phys_slice0_sa); - assert(phys_slice0_sa.w % fmtl->bw == 0); - assert(phys_slice0_sa.h % fmtl->bh == 0); - - const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, - &image_align_sa, - &phys_slice0_sa); - - const uint32_t array_pitch_el_rows = - isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout, - array_pitch_span, &image_align_sa, - &phys_level0_sa, &phys_slice0_sa); - - const uint32_t total_h_el = - isl_calc_total_height_el(dev, info, &tile_info, - phys_level0_sa.array_len, row_pitch, - array_pitch_el_rows); - - const uint32_t total_h_sa = total_h_el * fmtl->bh; - const uint32_t size = row_pitch * isl_align(total_h_sa, tile_info.height); - - /* Alignment of surface base address, in bytes */ - uint32_t base_alignment = MAX(1, info->min_alignment); - assert(isl_is_pow2(base_alignment) && isl_is_pow2(tile_info.size)); - base_alignment = MAX(base_alignment, tile_info.size); - - *surf = (struct isl_surf) { - .dim = info->dim, - .dim_layout = dim_layout, - .msaa_layout = msaa_layout, - .tiling = tiling, - .format = info->format, - - .levels = info->levels, - .samples = info->samples, - - .image_alignment_el = image_align_el, - .logical_level0_px = logical_level0_px, - .phys_level0_sa = phys_level0_sa, - - .size = size, - .alignment = base_alignment, - .row_pitch = row_pitch, - .array_pitch_el_rows = array_pitch_el_rows, - .array_pitch_span = array_pitch_span, - - .usage = info->usage, - }; - - return true; -} - -void -isl_surf_get_tile_info(const struct isl_device *dev, - const struct isl_surf *surf, - struct isl_tile_info *tile_info) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - isl_tiling_get_info(dev, surf->tiling, fmtl->bs, tile_info); -} - -/** - * A variant of isl_surf_get_image_offset_sa() specific to - * ISL_DIM_LAYOUT_GEN4_2D. - */ -static void -get_image_offset_sa_gen4_2d(const struct isl_surf *surf, - uint32_t level, uint32_t layer, - uint32_t *x_offset_sa, - uint32_t *y_offset_sa) -{ - assert(level < surf->levels); - assert(layer < surf->phys_level0_sa.array_len); - assert(surf->phys_level0_sa.depth == 1); - - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - const uint32_t W0 = surf->phys_level0_sa.width; - const uint32_t H0 = surf->phys_level0_sa.height; - - uint32_t x = 0; - uint32_t y = layer * isl_surf_get_array_pitch_sa_rows(surf); - - for (uint32_t l = 0; l < level; ++l) { - if (l == 1) { - uint32_t W = isl_minify(W0, l); - - if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) - isl_msaa_interleaved_scale_px_to_sa(surf->samples, &W, NULL); - - x += isl_align_npot(W, image_align_sa.w); - } else { - uint32_t H = isl_minify(H0, l); - - if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) - isl_msaa_interleaved_scale_px_to_sa(surf->samples, NULL, &H); - - y += isl_align_npot(H, image_align_sa.h); - } - } - - *x_offset_sa = x; - *y_offset_sa = y; -} - -/** - * A variant of isl_surf_get_image_offset_sa() specific to - * ISL_DIM_LAYOUT_GEN4_3D. - */ -static void -get_image_offset_sa_gen4_3d(const struct isl_surf *surf, - uint32_t level, uint32_t logical_z_offset_px, - uint32_t *x_offset_sa, - uint32_t *y_offset_sa) -{ - assert(level < surf->levels); - assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); - assert(surf->phys_level0_sa.array_len == 1); - - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - const uint32_t W0 = surf->phys_level0_sa.width; - const uint32_t H0 = surf->phys_level0_sa.height; - const uint32_t D0 = surf->phys_level0_sa.depth; - - uint32_t x = 0; - uint32_t y = 0; - - for (uint32_t l = 0; l < level; ++l) { - const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); - const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d); - const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); - - y += level_h * max_layers_vert; - } - - const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); - const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); - const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); - - const uint32_t max_layers_horiz = MIN(level_d, 1u << level); - - x += level_w * (logical_z_offset_px % max_layers_horiz); - y += level_h * (logical_z_offset_px / max_layers_horiz); - - *x_offset_sa = x; - *y_offset_sa = y; -} - -/** - * A variant of isl_surf_get_image_offset_sa() specific to - * ISL_DIM_LAYOUT_GEN9_1D. - */ -static void -get_image_offset_sa_gen9_1d(const struct isl_surf *surf, - uint32_t level, uint32_t layer, - uint32_t *x_offset_sa, - uint32_t *y_offset_sa) -{ - assert(level < surf->levels); - assert(layer < surf->phys_level0_sa.array_len); - assert(surf->phys_level0_sa.height == 1); - assert(surf->phys_level0_sa.depth == 1); - assert(surf->samples == 1); - - const uint32_t W0 = surf->phys_level0_sa.width; - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - uint32_t x = 0; - - for (uint32_t l = 0; l < level; ++l) { - uint32_t W = isl_minify(W0, l); - uint32_t w = isl_align_npot(W, image_align_sa.w); - - x += w; - } - - *x_offset_sa = x; - *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); -} - -/** - * Calculate the offset, in units of surface samples, to a subimage in the - * surface. - * - * @invariant level < surface levels - * @invariant logical_array_layer < logical array length of surface - * @invariant logical_z_offset_px < logical depth of surface at level - */ -static void -get_image_offset_sa(const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t *x_offset_sa, - uint32_t *y_offset_sa) -{ - assert(level < surf->levels); - assert(logical_array_layer < surf->logical_level0_px.array_len); - assert(logical_z_offset_px - < isl_minify(surf->logical_level0_px.depth, level)); - - switch (surf->dim_layout) { - case ISL_DIM_LAYOUT_GEN9_1D: - get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, - x_offset_sa, y_offset_sa); - break; - case ISL_DIM_LAYOUT_GEN4_2D: - get_image_offset_sa_gen4_2d(surf, level, logical_array_layer - + logical_z_offset_px, - x_offset_sa, y_offset_sa); - break; - case ISL_DIM_LAYOUT_GEN4_3D: - get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px, - x_offset_sa, y_offset_sa); - break; - } -} - -void -isl_surf_get_image_offset_el(const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t *x_offset_el, - uint32_t *y_offset_el) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - - assert(level < surf->levels); - assert(logical_array_layer < surf->logical_level0_px.array_len); - assert(logical_z_offset_px - < isl_minify(surf->logical_level0_px.depth, level)); - - uint32_t x_offset_sa, y_offset_sa; - get_image_offset_sa(surf, level, - logical_array_layer, - logical_z_offset_px, - &x_offset_sa, - &y_offset_sa); - - *x_offset_el = x_offset_sa / fmtl->bw; - *y_offset_el = y_offset_sa / fmtl->bh; -} - -void -isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - - struct isl_tile_info tile_info; - isl_surf_get_tile_info(dev, surf, &tile_info); - - uint32_t total_x_offset_el; - uint32_t total_y_offset_el; - isl_surf_get_image_offset_el(surf, level, - logical_array_layer, - logical_z_offset, - &total_x_offset_el, - &total_y_offset_el); - - uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; - uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; - uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; - - uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; - uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; - uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; - uint32_t big_x_offset_B = total_x_offset_B - small_x_offset_B; - - *base_address_offset = big_y_offset_B + big_x_offset_B; - *x_offset_el = small_x_offset_el; - *y_offset_el = small_y_offset_el; -} - -uint32_t -isl_surf_get_depth_format(const struct isl_device *dev, - const struct isl_surf *surf) -{ - /* Support for separate stencil buffers began in gen5. Support for - * interleaved depthstencil buffers ceased in gen7. The intermediate gens, - * those that supported separate and interleaved stencil, were gen5 and - * gen6. - * - * For a list of all available formats, see the Sandybridge PRM >> Volume - * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface - * Format (p321). - */ - - bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; - - assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); - - if (has_stencil) - assert(ISL_DEV_GEN(dev) < 7); - - switch (surf->format) { - default: - unreachable("bad isl depth format"); - case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: - assert(ISL_DEV_GEN(dev) < 7); - return 0; /* D32_FLOAT_S8X24_UINT */ - case ISL_FORMAT_R32_FLOAT: - assert(!has_stencil); - return 1; /* D32_FLOAT */ - case ISL_FORMAT_R24_UNORM_X8_TYPELESS: - if (has_stencil) { - assert(ISL_DEV_GEN(dev) < 7); - return 2; /* D24_UNORM_S8_UINT */ - } else { - assert(ISL_DEV_GEN(dev) >= 5); - return 3; /* D24_UNORM_X8_UINT */ - } - case ISL_FORMAT_R16_UNORM: - assert(!has_stencil); - return 5; /* D16_UNORM */ - } -} diff --git a/src/isl/isl.h b/src/isl/isl.h deleted file mode 100644 index 3e0ff935948..00000000000 --- a/src/isl/isl.h +++ /dev/null @@ -1,1025 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file - * @brief Intel Surface Layout - * - * Header Layout - * ------------- - * The header is ordered as: - * - forward declarations - * - macros that may be overridden at compile-time for specific gens - * - enums and constants - * - structs and unions - * - functions - */ - -#pragma once - -#include -#include -#include - -#include "util/macros.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_device_info; -struct brw_image_param; - -#ifndef ISL_DEV_GEN -/** - * @brief Get the hardware generation of isl_device. - * - * You can define this as a compile-time constant in the CFLAGS. For example, - * `gcc -DISL_DEV_GEN(dev)=9 ...`. - */ -#define ISL_DEV_GEN(__dev) ((__dev)->info->gen) -#endif - -#ifndef ISL_DEV_USE_SEPARATE_STENCIL -/** - * You can define this as a compile-time constant in the CFLAGS. For example, - * `gcc -DISL_DEV_USE_SEPARATE_STENCIL(dev)=1 ...`. - */ -#define ISL_DEV_USE_SEPARATE_STENCIL(__dev) ((__dev)->use_separate_stencil) -#endif - -/** - * Hardware enumeration SURFACE_FORMAT. - * - * For the official list, see Broadwell PRM: Volume 2b: Command Reference: - * Enumerations: SURFACE_FORMAT. - */ -enum isl_format { - ISL_FORMAT_R32G32B32A32_FLOAT = 0, - ISL_FORMAT_R32G32B32A32_SINT = 1, - ISL_FORMAT_R32G32B32A32_UINT = 2, - ISL_FORMAT_R32G32B32A32_UNORM = 3, - ISL_FORMAT_R32G32B32A32_SNORM = 4, - ISL_FORMAT_R64G64_FLOAT = 5, - ISL_FORMAT_R32G32B32X32_FLOAT = 6, - ISL_FORMAT_R32G32B32A32_SSCALED = 7, - ISL_FORMAT_R32G32B32A32_USCALED = 8, - ISL_FORMAT_R32G32B32A32_SFIXED = 32, - ISL_FORMAT_R64G64_PASSTHRU = 33, - ISL_FORMAT_R32G32B32_FLOAT = 64, - ISL_FORMAT_R32G32B32_SINT = 65, - ISL_FORMAT_R32G32B32_UINT = 66, - ISL_FORMAT_R32G32B32_UNORM = 67, - ISL_FORMAT_R32G32B32_SNORM = 68, - ISL_FORMAT_R32G32B32_SSCALED = 69, - ISL_FORMAT_R32G32B32_USCALED = 70, - ISL_FORMAT_R32G32B32_SFIXED = 80, - ISL_FORMAT_R16G16B16A16_UNORM = 128, - ISL_FORMAT_R16G16B16A16_SNORM = 129, - ISL_FORMAT_R16G16B16A16_SINT = 130, - ISL_FORMAT_R16G16B16A16_UINT = 131, - ISL_FORMAT_R16G16B16A16_FLOAT = 132, - ISL_FORMAT_R32G32_FLOAT = 133, - ISL_FORMAT_R32G32_SINT = 134, - ISL_FORMAT_R32G32_UINT = 135, - ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS = 136, - ISL_FORMAT_X32_TYPELESS_G8X24_UINT = 137, - ISL_FORMAT_L32A32_FLOAT = 138, - ISL_FORMAT_R32G32_UNORM = 139, - ISL_FORMAT_R32G32_SNORM = 140, - ISL_FORMAT_R64_FLOAT = 141, - ISL_FORMAT_R16G16B16X16_UNORM = 142, - ISL_FORMAT_R16G16B16X16_FLOAT = 143, - ISL_FORMAT_A32X32_FLOAT = 144, - ISL_FORMAT_L32X32_FLOAT = 145, - ISL_FORMAT_I32X32_FLOAT = 146, - ISL_FORMAT_R16G16B16A16_SSCALED = 147, - ISL_FORMAT_R16G16B16A16_USCALED = 148, - ISL_FORMAT_R32G32_SSCALED = 149, - ISL_FORMAT_R32G32_USCALED = 150, - ISL_FORMAT_R32G32_SFIXED = 160, - ISL_FORMAT_R64_PASSTHRU = 161, - ISL_FORMAT_B8G8R8A8_UNORM = 192, - ISL_FORMAT_B8G8R8A8_UNORM_SRGB = 193, - ISL_FORMAT_R10G10B10A2_UNORM = 194, - ISL_FORMAT_R10G10B10A2_UNORM_SRGB = 195, - ISL_FORMAT_R10G10B10A2_UINT = 196, - ISL_FORMAT_R10G10B10_SNORM_A2_UNORM = 197, - ISL_FORMAT_R8G8B8A8_UNORM = 199, - ISL_FORMAT_R8G8B8A8_UNORM_SRGB = 200, - ISL_FORMAT_R8G8B8A8_SNORM = 201, - ISL_FORMAT_R8G8B8A8_SINT = 202, - ISL_FORMAT_R8G8B8A8_UINT = 203, - ISL_FORMAT_R16G16_UNORM = 204, - ISL_FORMAT_R16G16_SNORM = 205, - ISL_FORMAT_R16G16_SINT = 206, - ISL_FORMAT_R16G16_UINT = 207, - ISL_FORMAT_R16G16_FLOAT = 208, - ISL_FORMAT_B10G10R10A2_UNORM = 209, - ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, - ISL_FORMAT_R11G11B10_FLOAT = 211, - ISL_FORMAT_R32_SINT = 214, - ISL_FORMAT_R32_UINT = 215, - ISL_FORMAT_R32_FLOAT = 216, - ISL_FORMAT_R24_UNORM_X8_TYPELESS = 217, - ISL_FORMAT_X24_TYPELESS_G8_UINT = 218, - ISL_FORMAT_L32_UNORM = 221, - ISL_FORMAT_A32_UNORM = 222, - ISL_FORMAT_L16A16_UNORM = 223, - ISL_FORMAT_I24X8_UNORM = 224, - ISL_FORMAT_L24X8_UNORM = 225, - ISL_FORMAT_A24X8_UNORM = 226, - ISL_FORMAT_I32_FLOAT = 227, - ISL_FORMAT_L32_FLOAT = 228, - ISL_FORMAT_A32_FLOAT = 229, - ISL_FORMAT_X8B8_UNORM_G8R8_SNORM = 230, - ISL_FORMAT_A8X8_UNORM_G8R8_SNORM = 231, - ISL_FORMAT_B8X8_UNORM_G8R8_SNORM = 232, - ISL_FORMAT_B8G8R8X8_UNORM = 233, - ISL_FORMAT_B8G8R8X8_UNORM_SRGB = 234, - ISL_FORMAT_R8G8B8X8_UNORM = 235, - ISL_FORMAT_R8G8B8X8_UNORM_SRGB = 236, - ISL_FORMAT_R9G9B9E5_SHAREDEXP = 237, - ISL_FORMAT_B10G10R10X2_UNORM = 238, - ISL_FORMAT_L16A16_FLOAT = 240, - ISL_FORMAT_R32_UNORM = 241, - ISL_FORMAT_R32_SNORM = 242, - ISL_FORMAT_R10G10B10X2_USCALED = 243, - ISL_FORMAT_R8G8B8A8_SSCALED = 244, - ISL_FORMAT_R8G8B8A8_USCALED = 245, - ISL_FORMAT_R16G16_SSCALED = 246, - ISL_FORMAT_R16G16_USCALED = 247, - ISL_FORMAT_R32_SSCALED = 248, - ISL_FORMAT_R32_USCALED = 249, - ISL_FORMAT_B5G6R5_UNORM = 256, - ISL_FORMAT_B5G6R5_UNORM_SRGB = 257, - ISL_FORMAT_B5G5R5A1_UNORM = 258, - ISL_FORMAT_B5G5R5A1_UNORM_SRGB = 259, - ISL_FORMAT_B4G4R4A4_UNORM = 260, - ISL_FORMAT_B4G4R4A4_UNORM_SRGB = 261, - ISL_FORMAT_R8G8_UNORM = 262, - ISL_FORMAT_R8G8_SNORM = 263, - ISL_FORMAT_R8G8_SINT = 264, - ISL_FORMAT_R8G8_UINT = 265, - ISL_FORMAT_R16_UNORM = 266, - ISL_FORMAT_R16_SNORM = 267, - ISL_FORMAT_R16_SINT = 268, - ISL_FORMAT_R16_UINT = 269, - ISL_FORMAT_R16_FLOAT = 270, - ISL_FORMAT_A8P8_UNORM_PALETTE0 = 271, - ISL_FORMAT_A8P8_UNORM_PALETTE1 = 272, - ISL_FORMAT_I16_UNORM = 273, - ISL_FORMAT_L16_UNORM = 274, - ISL_FORMAT_A16_UNORM = 275, - ISL_FORMAT_L8A8_UNORM = 276, - ISL_FORMAT_I16_FLOAT = 277, - ISL_FORMAT_L16_FLOAT = 278, - ISL_FORMAT_A16_FLOAT = 279, - ISL_FORMAT_L8A8_UNORM_SRGB = 280, - ISL_FORMAT_R5G5_SNORM_B6_UNORM = 281, - ISL_FORMAT_B5G5R5X1_UNORM = 282, - ISL_FORMAT_B5G5R5X1_UNORM_SRGB = 283, - ISL_FORMAT_R8G8_SSCALED = 284, - ISL_FORMAT_R8G8_USCALED = 285, - ISL_FORMAT_R16_SSCALED = 286, - ISL_FORMAT_R16_USCALED = 287, - ISL_FORMAT_P8A8_UNORM_PALETTE0 = 290, - ISL_FORMAT_P8A8_UNORM_PALETTE1 = 291, - ISL_FORMAT_A1B5G5R5_UNORM = 292, - ISL_FORMAT_A4B4G4R4_UNORM = 293, - ISL_FORMAT_L8A8_UINT = 294, - ISL_FORMAT_L8A8_SINT = 295, - ISL_FORMAT_R8_UNORM = 320, - ISL_FORMAT_R8_SNORM = 321, - ISL_FORMAT_R8_SINT = 322, - ISL_FORMAT_R8_UINT = 323, - ISL_FORMAT_A8_UNORM = 324, - ISL_FORMAT_I8_UNORM = 325, - ISL_FORMAT_L8_UNORM = 326, - ISL_FORMAT_P4A4_UNORM_PALETTE0 = 327, - ISL_FORMAT_A4P4_UNORM_PALETTE0 = 328, - ISL_FORMAT_R8_SSCALED = 329, - ISL_FORMAT_R8_USCALED = 330, - ISL_FORMAT_P8_UNORM_PALETTE0 = 331, - ISL_FORMAT_L8_UNORM_SRGB = 332, - ISL_FORMAT_P8_UNORM_PALETTE1 = 333, - ISL_FORMAT_P4A4_UNORM_PALETTE1 = 334, - ISL_FORMAT_A4P4_UNORM_PALETTE1 = 335, - ISL_FORMAT_Y8_UNORM = 336, - ISL_FORMAT_L8_UINT = 338, - ISL_FORMAT_L8_SINT = 339, - ISL_FORMAT_I8_UINT = 340, - ISL_FORMAT_I8_SINT = 341, - ISL_FORMAT_DXT1_RGB_SRGB = 384, - ISL_FORMAT_R1_UNORM = 385, - ISL_FORMAT_YCRCB_NORMAL = 386, - ISL_FORMAT_YCRCB_SWAPUVY = 387, - ISL_FORMAT_P2_UNORM_PALETTE0 = 388, - ISL_FORMAT_P2_UNORM_PALETTE1 = 389, - ISL_FORMAT_BC1_UNORM = 390, - ISL_FORMAT_BC2_UNORM = 391, - ISL_FORMAT_BC3_UNORM = 392, - ISL_FORMAT_BC4_UNORM = 393, - ISL_FORMAT_BC5_UNORM = 394, - ISL_FORMAT_BC1_UNORM_SRGB = 395, - ISL_FORMAT_BC2_UNORM_SRGB = 396, - ISL_FORMAT_BC3_UNORM_SRGB = 397, - ISL_FORMAT_MONO8 = 398, - ISL_FORMAT_YCRCB_SWAPUV = 399, - ISL_FORMAT_YCRCB_SWAPY = 400, - ISL_FORMAT_DXT1_RGB = 401, - ISL_FORMAT_FXT1 = 402, - ISL_FORMAT_R8G8B8_UNORM = 403, - ISL_FORMAT_R8G8B8_SNORM = 404, - ISL_FORMAT_R8G8B8_SSCALED = 405, - ISL_FORMAT_R8G8B8_USCALED = 406, - ISL_FORMAT_R64G64B64A64_FLOAT = 407, - ISL_FORMAT_R64G64B64_FLOAT = 408, - ISL_FORMAT_BC4_SNORM = 409, - ISL_FORMAT_BC5_SNORM = 410, - ISL_FORMAT_R16G16B16_FLOAT = 411, - ISL_FORMAT_R16G16B16_UNORM = 412, - ISL_FORMAT_R16G16B16_SNORM = 413, - ISL_FORMAT_R16G16B16_SSCALED = 414, - ISL_FORMAT_R16G16B16_USCALED = 415, - ISL_FORMAT_BC6H_SF16 = 417, - ISL_FORMAT_BC7_UNORM = 418, - ISL_FORMAT_BC7_UNORM_SRGB = 419, - ISL_FORMAT_BC6H_UF16 = 420, - ISL_FORMAT_PLANAR_420_8 = 421, - ISL_FORMAT_R8G8B8_UNORM_SRGB = 424, - ISL_FORMAT_ETC1_RGB8 = 425, - ISL_FORMAT_ETC2_RGB8 = 426, - ISL_FORMAT_EAC_R11 = 427, - ISL_FORMAT_EAC_RG11 = 428, - ISL_FORMAT_EAC_SIGNED_R11 = 429, - ISL_FORMAT_EAC_SIGNED_RG11 = 430, - ISL_FORMAT_ETC2_SRGB8 = 431, - ISL_FORMAT_R16G16B16_UINT = 432, - ISL_FORMAT_R16G16B16_SINT = 433, - ISL_FORMAT_R32_SFIXED = 434, - ISL_FORMAT_R10G10B10A2_SNORM = 435, - ISL_FORMAT_R10G10B10A2_USCALED = 436, - ISL_FORMAT_R10G10B10A2_SSCALED = 437, - ISL_FORMAT_R10G10B10A2_SINT = 438, - ISL_FORMAT_B10G10R10A2_SNORM = 439, - ISL_FORMAT_B10G10R10A2_USCALED = 440, - ISL_FORMAT_B10G10R10A2_SSCALED = 441, - ISL_FORMAT_B10G10R10A2_UINT = 442, - ISL_FORMAT_B10G10R10A2_SINT = 443, - ISL_FORMAT_R64G64B64A64_PASSTHRU = 444, - ISL_FORMAT_R64G64B64_PASSTHRU = 445, - ISL_FORMAT_ETC2_RGB8_PTA = 448, - ISL_FORMAT_ETC2_SRGB8_PTA = 449, - ISL_FORMAT_ETC2_EAC_RGBA8 = 450, - ISL_FORMAT_ETC2_EAC_SRGB8_A8 = 451, - ISL_FORMAT_R8G8B8_UINT = 456, - ISL_FORMAT_R8G8B8_SINT = 457, - ISL_FORMAT_RAW = 511, - - /* Hardware doesn't understand this out-of-band value */ - ISL_FORMAT_UNSUPPORTED = UINT16_MAX, -}; - -/** - * Numerical base type for channels of isl_format. - */ -enum isl_base_type { - ISL_VOID, - ISL_RAW, - ISL_UNORM, - ISL_SNORM, - ISL_UFLOAT, - ISL_SFLOAT, - ISL_UFIXED, - ISL_SFIXED, - ISL_UINT, - ISL_SINT, - ISL_USCALED, - ISL_SSCALED, -}; - -/** - * Colorspace of isl_format. - */ -enum isl_colorspace { - ISL_COLORSPACE_NONE = 0, - ISL_COLORSPACE_LINEAR, - ISL_COLORSPACE_SRGB, - ISL_COLORSPACE_YUV, -}; - -/** - * Texture compression mode of isl_format. - */ -enum isl_txc { - ISL_TXC_NONE = 0, - ISL_TXC_DXT1, - ISL_TXC_DXT3, - ISL_TXC_DXT5, - ISL_TXC_FXT1, - ISL_TXC_RGTC1, - ISL_TXC_RGTC2, - ISL_TXC_BPTC, - ISL_TXC_ETC1, - ISL_TXC_ETC2, -}; - -/** - * @brief Hardware tile mode - * - * WARNING: These values differ from the hardware enum values, which are - * unstable across hardware generations. - * - * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to - * clearly distinguish it from Yf and Ys. - */ -enum isl_tiling { - ISL_TILING_LINEAR = 0, - ISL_TILING_W, - ISL_TILING_X, - ISL_TILING_Y0, /**< Legacy Y tiling */ - ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */ - ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */ -}; - -/** - * @defgroup Tiling Flags - * @{ - */ -typedef uint32_t isl_tiling_flags_t; -#define ISL_TILING_LINEAR_BIT (1u << ISL_TILING_LINEAR) -#define ISL_TILING_W_BIT (1u << ISL_TILING_W) -#define ISL_TILING_X_BIT (1u << ISL_TILING_X) -#define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) -#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) -#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) -#define ISL_TILING_ANY_MASK (~0u) -#define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) - -/** Any Y tiling, including legacy Y tiling. */ -#define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \ - ISL_TILING_Yf_BIT | \ - ISL_TILING_Ys_BIT) - -/** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */ -#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \ - ISL_TILING_Ys_BIT) -/** @} */ - -/** - * @brief Logical dimension of surface. - * - * Note: There is no dimension for cube map surfaces. ISL interprets cube maps - * as 2D array surfaces. - */ -enum isl_surf_dim { - ISL_SURF_DIM_1D, - ISL_SURF_DIM_2D, - ISL_SURF_DIM_3D, -}; - -/** - * @brief Physical layout of the surface's dimensions. - */ -enum isl_dim_layout { - /** - * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section - * 6.17.3: 2D Surfaces. - * - * On many gens, 1D surfaces share the same layout as 2D surfaces. From - * the G35 PRM >> Volume 1: Graphics Core >> Section 6.17.2: 1D Surfaces: - * - * One-dimensional surfaces are identical to 2D surfaces with height of - * one. - * - * @invariant isl_surf::phys_level0_sa::depth == 1 - */ - ISL_DIM_LAYOUT_GEN4_2D, - - /** - * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section - * 6.17.5: 3D Surfaces. - * - * @invariant isl_surf::phys_level0_sa::array_len == 1 - */ - ISL_DIM_LAYOUT_GEN4_3D, - - /** - * For details, see the Skylake BSpec >> Memory Views >> Common Surface - * Formats >> Surface Layout and Tiling >> » 1D Surfaces. - */ - ISL_DIM_LAYOUT_GEN9_1D, -}; - -/* TODO(chadv): Explain */ -enum isl_array_pitch_span { - ISL_ARRAY_PITCH_SPAN_FULL, - ISL_ARRAY_PITCH_SPAN_COMPACT, -}; - -/** - * @defgroup Surface Usage - * @{ - */ -typedef uint64_t isl_surf_usage_flags_t; -#define ISL_SURF_USAGE_RENDER_TARGET_BIT (1u << 0) -#define ISL_SURF_USAGE_DEPTH_BIT (1u << 1) -#define ISL_SURF_USAGE_STENCIL_BIT (1u << 2) -#define ISL_SURF_USAGE_TEXTURE_BIT (1u << 3) -#define ISL_SURF_USAGE_CUBE_BIT (1u << 4) -#define ISL_SURF_USAGE_DISABLE_AUX_BIT (1u << 5) -#define ISL_SURF_USAGE_DISPLAY_BIT (1u << 6) -#define ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT (1u << 7) -#define ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT (1u << 8) -#define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) -#define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) -#define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) -/** @} */ - -/** - * Identical to VkSampleCountFlagBits. - */ -enum isl_sample_count { - ISL_SAMPLE_COUNT_1_BIT = 1u, - ISL_SAMPLE_COUNT_2_BIT = 2u, - ISL_SAMPLE_COUNT_4_BIT = 4u, - ISL_SAMPLE_COUNT_8_BIT = 8u, - ISL_SAMPLE_COUNT_16_BIT = 16u, -}; -typedef uint32_t isl_sample_count_mask_t; - -/** - * @brief Multisample Format - */ -enum isl_msaa_layout { - /** - * @brief Suface is single-sampled. - */ - ISL_MSAA_LAYOUT_NONE, - - /** - * @brief [SNB+] Interleaved Multisample Format - * - * In this format, multiple samples are interleaved into each cacheline. - * In other words, the sample index is swizzled into the low 6 bits of the - * surface's virtual address space. - * - * For example, suppose the surface is legacy Y tiled, is 4x multisampled, - * and its pixel format is 32bpp. Then the first cacheline is arranged - * thus: - * - * (0,0,0) (0,1,0) (0,0,1) (1,0,1) - * (1,0,0) (1,1,0) (0,1,1) (1,1,1) - * - * (0,0,2) (1,0,2) (0,0,3) (1,0,3) - * (0,1,2) (1,1,2) (0,1,3) (1,1,3) - * - * The hardware docs refer to this format with multiple terms. In - * Sandybridge, this is the only multisample format; so no term is used. - * The Ivybridge docs refer to surfaces in this format as IMS (Interleaved - * Multisample Surface). Later hardware docs additionally refer to this - * format as MSFMT_DEPTH_STENCIL (because the format is deprecated for - * color surfaces). - * - * See the Sandybridge PRM, Volume 4, Part 1, Section 2.7 "Multisampled - * Surface Behavior". - * - * See the Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.1 "Interleaved - * Multisampled Surfaces". - */ - ISL_MSAA_LAYOUT_INTERLEAVED, - - /** - * @brief [IVB+] Array Multisample Format - * - * In this format, the surface's physical layout resembles that of a - * 2D array surface. - * - * Suppose the multisample surface's logical extent is (w, h) and its - * sample count is N. Then surface's physical extent is the same as - * a singlesample 2D surface whose logical extent is (w, h) and array - * length is N. Array slice `i` contains the pixel values for sample - * index `i`. - * - * The Ivybridge docs refer to surfaces in this format as UMS - * (Uncompressed Multsample Layout) and CMS (Compressed Multisample - * Surface). The Broadwell docs additionally refer to this format as - * MSFMT_MSS (MSS=Multisample Surface Storage). - * - * See the Broadwell PRM, Volume 5 "Memory Views", Section "Uncompressed - * Multisample Surfaces". - * - * See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed - * Multisample Surfaces". - */ - ISL_MSAA_LAYOUT_ARRAY, -}; - - -struct isl_device { - const struct brw_device_info *info; - bool use_separate_stencil; - bool has_bit6_swizzling; -}; - -struct isl_extent2d { - union { uint32_t w, width; }; - union { uint32_t h, height; }; -}; - -struct isl_extent3d { - union { uint32_t w, width; }; - union { uint32_t h, height; }; - union { uint32_t d, depth; }; -}; - -struct isl_extent4d { - union { uint32_t w, width; }; - union { uint32_t h, height; }; - union { uint32_t d, depth; }; - union { uint32_t a, array_len; }; -}; - -struct isl_channel_layout { - enum isl_base_type type; - uint8_t bits; /**< Size in bits */ -}; - -/** - * Each format has 3D block extent (width, height, depth). The block extent of - * compressed formats is that of the format's compression block. For example, - * the block extent of ISL_FORMAT_ETC2_RGB8 is (w=4, h=4, d=1). The block - * extent of uncompressed pixel formats, such as ISL_FORMAT_R8G8B8A8_UNORM, is - * is (w=1, h=1, d=1). - */ -struct isl_format_layout { - enum isl_format format; - - uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ - uint8_t bw; /**< Block width, in pixels */ - uint8_t bh; /**< Block height, in pixels */ - uint8_t bd; /**< Block depth, in pixels */ - - struct { - struct isl_channel_layout r; /**< Red channel */ - struct isl_channel_layout g; /**< Green channel */ - struct isl_channel_layout b; /**< Blue channel */ - struct isl_channel_layout a; /**< Alpha channel */ - struct isl_channel_layout l; /**< Luminance channel */ - struct isl_channel_layout i; /**< Intensity channel */ - struct isl_channel_layout p; /**< Palette channel */ - } channels; - - enum isl_colorspace colorspace; - enum isl_txc txc; -}; - -struct isl_tile_info { - enum isl_tiling tiling; - uint32_t width; /**< in bytes */ - uint32_t height; /**< in rows of memory */ - uint32_t size; /**< in bytes */ -}; - -/** - * @brief Input to surface initialization - * - * @invariant width >= 1 - * @invariant height >= 1 - * @invariant depth >= 1 - * @invariant levels >= 1 - * @invariant samples >= 1 - * @invariant array_len >= 1 - * - * @invariant if 1D then height == 1 and depth == 1 and samples == 1 - * @invariant if 2D then depth == 1 - * @invariant if 3D then array_len == 1 and samples == 1 - */ -struct isl_surf_init_info { - enum isl_surf_dim dim; - enum isl_format format; - - uint32_t width; - uint32_t height; - uint32_t depth; - uint32_t levels; - uint32_t array_len; - uint32_t samples; - - /** Lower bound for isl_surf::alignment, in bytes. */ - uint32_t min_alignment; - - /** Lower bound for isl_surf::pitch, in bytes. */ - uint32_t min_pitch; - - isl_surf_usage_flags_t usage; - - /** Flags that alter how ISL selects isl_surf::tiling. */ - isl_tiling_flags_t tiling_flags; -}; - -struct isl_surf { - enum isl_surf_dim dim; - enum isl_dim_layout dim_layout; - enum isl_msaa_layout msaa_layout; - enum isl_tiling tiling; - enum isl_format format; - - /** - * Alignment of the upper-left sample of each subimage, in units of surface - * elements. - */ - struct isl_extent3d image_alignment_el; - - /** - * Logical extent of the surface's base level, in units of pixels. This is - * identical to the extent defined in isl_surf_init_info. - */ - struct isl_extent4d logical_level0_px; - - /** - * Physical extent of the surface's base level, in units of physical - * surface samples and aligned to the format's compression block. - * - * Consider isl_dim_layout as an operator that transforms a logical surface - * layout to a physical surface layout. Then - * - * logical_layout := (isl_surf::dim, isl_surf::logical_level0_px) - * isl_surf::phys_level0_sa := isl_surf::dim_layout * logical_layout - */ - struct isl_extent4d phys_level0_sa; - - uint32_t levels; - uint32_t samples; - - /** Total size of the surface, in bytes. */ - uint32_t size; - - /** Required alignment for the surface's base address. */ - uint32_t alignment; - - /** - * Pitch between vertically adjacent surface elements, in bytes. - */ - uint32_t row_pitch; - - /** - * Pitch between physical array slices, in rows of surface elements. - */ - uint32_t array_pitch_el_rows; - - enum isl_array_pitch_span array_pitch_span; - - /** Copy of isl_surf_init_info::usage. */ - isl_surf_usage_flags_t usage; -}; - -extern const struct isl_format_layout isl_format_layouts[]; - -void -isl_device_init(struct isl_device *dev, - const struct brw_device_info *info, - bool has_bit6_swizzling); - -isl_sample_count_mask_t ATTRIBUTE_CONST -isl_device_get_sample_counts(struct isl_device *dev); - -static inline const struct isl_format_layout * ATTRIBUTE_CONST -isl_format_get_layout(enum isl_format fmt) -{ - return &isl_format_layouts[fmt]; -} - -bool -isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; - -static inline bool -isl_format_is_compressed(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->txc != ISL_TXC_NONE; -} - -static inline bool -isl_format_has_bc_compression(enum isl_format fmt) -{ - switch (isl_format_get_layout(fmt)->txc) { - case ISL_TXC_DXT1: - case ISL_TXC_DXT3: - case ISL_TXC_DXT5: - return true; - case ISL_TXC_NONE: - case ISL_TXC_FXT1: - case ISL_TXC_RGTC1: - case ISL_TXC_RGTC2: - case ISL_TXC_BPTC: - case ISL_TXC_ETC1: - case ISL_TXC_ETC2: - return false; - } - - unreachable("bad texture compression mode"); - return false; -} - -static inline bool -isl_format_is_yuv(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->colorspace == ISL_COLORSPACE_YUV; -} - -static inline bool -isl_format_block_is_1x1x1(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; -} - -static inline bool -isl_format_is_rgb(enum isl_format fmt) -{ - return isl_format_layouts[fmt].channels.r.bits > 0 && - isl_format_layouts[fmt].channels.g.bits > 0 && - isl_format_layouts[fmt].channels.b.bits > 0 && - isl_format_layouts[fmt].channels.a.bits == 0; -} - -enum isl_format isl_format_rgb_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST; -enum isl_format isl_format_rgb_to_rgbx(enum isl_format rgb) ATTRIBUTE_CONST; - -bool isl_is_storage_image_format(enum isl_format fmt); - -enum isl_format -isl_lower_storage_image_format(const struct isl_device *dev, - enum isl_format fmt); - -static inline bool -isl_tiling_is_any_y(enum isl_tiling tiling) -{ - return (1u << tiling) & ISL_TILING_ANY_MASK; -} - -static inline bool -isl_tiling_is_std_y(enum isl_tiling tiling) -{ - return (1u << tiling) & ISL_TILING_STD_Y_MASK; -} - -bool -isl_tiling_get_info(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_tile_info *info); - -void -isl_tiling_get_extent(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_block_size, - struct isl_extent2d *e); -bool -isl_surf_choose_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling *tiling); - -static inline bool -isl_surf_usage_is_display(isl_surf_usage_flags_t usage) -{ - return usage & ISL_SURF_USAGE_DISPLAY_BIT; -} - -static inline bool -isl_surf_usage_is_depth(isl_surf_usage_flags_t usage) -{ - return usage & ISL_SURF_USAGE_DEPTH_BIT; -} - -static inline bool -isl_surf_usage_is_stencil(isl_surf_usage_flags_t usage) -{ - return usage & ISL_SURF_USAGE_STENCIL_BIT; -} - -static inline bool -isl_surf_usage_is_depth_and_stencil(isl_surf_usage_flags_t usage) -{ - return (usage & ISL_SURF_USAGE_DEPTH_BIT) && - (usage & ISL_SURF_USAGE_STENCIL_BIT); -} - -static inline bool -isl_surf_usage_is_depth_or_stencil(isl_surf_usage_flags_t usage) -{ - return usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT); -} - -static inline bool -isl_surf_info_is_z16(const struct isl_surf_init_info *info) -{ - return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && - (info->format == ISL_FORMAT_R16_UNORM); -} - -static inline bool -isl_surf_info_is_z32_float(const struct isl_surf_init_info *info) -{ - return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && - (info->format == ISL_FORMAT_R32_FLOAT); -} - -static inline struct isl_extent2d -isl_extent2d(uint32_t width, uint32_t height) -{ - return (struct isl_extent2d) { .w = width, .h = height }; -} - -static inline struct isl_extent3d -isl_extent3d(uint32_t width, uint32_t height, uint32_t depth) -{ - return (struct isl_extent3d) { .w = width, .h = height, .d = depth }; -} - -static inline struct isl_extent4d -isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, - uint32_t array_len) -{ - return (struct isl_extent4d) { - .w = width, - .h = height, - .d = depth, - .a = array_len, - }; -} - -#define isl_surf_init(dev, surf, ...) \ - isl_surf_init_s((dev), (surf), \ - &(struct isl_surf_init_info) { __VA_ARGS__ }); - -bool -isl_surf_init_s(const struct isl_device *dev, - struct isl_surf *surf, - const struct isl_surf_init_info *restrict info); - -void -isl_surf_get_tile_info(const struct isl_device *dev, - const struct isl_surf *surf, - struct isl_tile_info *tile_info); - -/** - * Alignment of the upper-left sample of each subimage, in units of surface - * elements. - */ -static inline struct isl_extent3d -isl_surf_get_image_alignment_el(const struct isl_surf *surf) -{ - return surf->image_alignment_el; -} - -/** - * Alignment of the upper-left sample of each subimage, in units of surface - * samples. - */ -static inline struct isl_extent3d -isl_surf_get_image_alignment_sa(const struct isl_surf *surf) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - - return (struct isl_extent3d) { - .w = fmtl->bw * surf->image_alignment_el.w, - .h = fmtl->bh * surf->image_alignment_el.h, - .d = fmtl->bd * surf->image_alignment_el.d, - }; -} - -/** - * Pitch between vertically adjacent surface elements, in bytes. - */ -static inline uint32_t -isl_surf_get_row_pitch(const struct isl_surf *surf) -{ - return surf->row_pitch; -} - -/** - * Pitch between vertically adjacent surface elements, in units of surface elements. - */ -static inline uint32_t -isl_surf_get_row_pitch_el(const struct isl_surf *surf) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - - assert(surf->row_pitch % fmtl->bs == 0); - return surf->row_pitch / fmtl->bs; -} - -/** - * Pitch between physical array slices, in rows of surface elements. - */ -static inline uint32_t -isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) -{ - return surf->array_pitch_el_rows; -} - -/** - * Pitch between physical array slices, in units of surface elements. - */ -static inline uint32_t -isl_surf_get_array_pitch_el(const struct isl_surf *surf) -{ - return isl_surf_get_array_pitch_el_rows(surf) * - isl_surf_get_row_pitch_el(surf); -} - -/** - * Pitch between physical array slices, in rows of surface samples. - */ -static inline uint32_t -isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - return fmtl->bh * isl_surf_get_array_pitch_el_rows(surf); -} - -/** - * Pitch between physical array slices, in bytes. - */ -static inline uint32_t -isl_surf_get_array_pitch(const struct isl_surf *surf) -{ - return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; -} - -/** - * Calculate the offset, in units of surface elements, to a subimage in the - * surface. - * - * @invariant level < surface levels - * @invariant logical_array_layer < logical array length of surface - * @invariant logical_z_offset_px < logical depth of surface at level - */ -void -isl_surf_get_image_offset_el(const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t *x_offset_el, - uint32_t *y_offset_el); - -/** - * @brief Calculate the intratile offsets to a subimage in the surface. - * - * In @a base_address_offset return the offset from the base of the surface to - * the base address of the first tile of the subimage. In @a x_offset_el and - * @a y_offset_el, return the offset, in units of surface elements, from the - * tile's base to the subimage's first surface element. The x and y offsets - * are intratile offsets; that is, they do not exceed the boundary of the - * surface's tiling format. - */ -void -isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el); - -/** - * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat - * - * @pre surf->usage has ISL_SURF_USAGE_DEPTH_BIT - * @pre surf->format must be a valid format for depth surfaces - */ -uint32_t -isl_surf_get_depth_format(const struct isl_device *dev, - const struct isl_surf *surf); - -#ifdef __cplusplus -} -#endif diff --git a/src/isl/isl_format.c b/src/isl/isl_format.c deleted file mode 100644 index 0fe6e9b83ab..00000000000 --- a/src/isl/isl_format.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "isl.h" - -bool -isl_format_has_sint_channel(enum isl_format fmt) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return fmtl->channels.r.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT || - fmtl->channels.b.type == ISL_SINT || - fmtl->channels.a.type == ISL_SINT || - fmtl->channels.l.type == ISL_SINT || - fmtl->channels.i.type == ISL_SINT || - fmtl->channels.p.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT; -} - -enum isl_format -isl_format_rgb_to_rgba(enum isl_format rgb) -{ - assert(isl_format_is_rgb(rgb)); - - switch (rgb) { - case ISL_FORMAT_R32G32B32_FLOAT: return ISL_FORMAT_R32G32B32A32_FLOAT; - case ISL_FORMAT_R32G32B32_SINT: return ISL_FORMAT_R32G32B32A32_SINT; - case ISL_FORMAT_R32G32B32_UINT: return ISL_FORMAT_R32G32B32A32_UINT; - case ISL_FORMAT_R32G32B32_UNORM: return ISL_FORMAT_R32G32B32A32_UNORM; - case ISL_FORMAT_R32G32B32_SNORM: return ISL_FORMAT_R32G32B32A32_SNORM; - case ISL_FORMAT_R32G32B32_SSCALED: return ISL_FORMAT_R32G32B32A32_SSCALED; - case ISL_FORMAT_R32G32B32_USCALED: return ISL_FORMAT_R32G32B32A32_USCALED; - case ISL_FORMAT_R32G32B32_SFIXED: return ISL_FORMAT_R32G32B32A32_SFIXED; - case ISL_FORMAT_R8G8B8_UNORM: return ISL_FORMAT_R8G8B8A8_UNORM; - case ISL_FORMAT_R8G8B8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM; - case ISL_FORMAT_R8G8B8_SSCALED: return ISL_FORMAT_R8G8B8A8_SSCALED; - case ISL_FORMAT_R8G8B8_USCALED: return ISL_FORMAT_R8G8B8A8_USCALED; - case ISL_FORMAT_R16G16B16_FLOAT: return ISL_FORMAT_R16G16B16A16_FLOAT; - case ISL_FORMAT_R16G16B16_UNORM: return ISL_FORMAT_R16G16B16A16_UNORM; - case ISL_FORMAT_R16G16B16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM; - case ISL_FORMAT_R16G16B16_SSCALED: return ISL_FORMAT_R16G16B16A16_SSCALED; - case ISL_FORMAT_R16G16B16_USCALED: return ISL_FORMAT_R16G16B16A16_USCALED; - case ISL_FORMAT_R8G8B8_UNORM_SRGB: return ISL_FORMAT_R8G8B8A8_UNORM_SRGB; - case ISL_FORMAT_R16G16B16_UINT: return ISL_FORMAT_R16G16B16A16_UINT; - case ISL_FORMAT_R16G16B16_SINT: return ISL_FORMAT_R16G16B16A16_SINT; - case ISL_FORMAT_R8G8B8_UINT: return ISL_FORMAT_R8G8B8A8_UINT; - case ISL_FORMAT_R8G8B8_SINT: return ISL_FORMAT_R8G8B8A8_SINT; - default: - return ISL_FORMAT_UNSUPPORTED; - } -} - -enum isl_format -isl_format_rgb_to_rgbx(enum isl_format rgb) -{ - assert(isl_format_is_rgb(rgb)); - - switch (rgb) { - case ISL_FORMAT_R32G32B32_FLOAT: - return ISL_FORMAT_R32G32B32X32_FLOAT; - case ISL_FORMAT_R16G16B16_UNORM: - return ISL_FORMAT_R16G16B16X16_UNORM; - case ISL_FORMAT_R16G16B16_FLOAT: - return ISL_FORMAT_R16G16B16X16_FLOAT; - case ISL_FORMAT_R8G8B8_UNORM: - return ISL_FORMAT_R8G8B8X8_UNORM; - case ISL_FORMAT_R8G8B8_UNORM_SRGB: - return ISL_FORMAT_R8G8B8X8_UNORM_SRGB; - default: - return ISL_FORMAT_UNSUPPORTED; - } -} diff --git a/src/isl/isl_format_layout.csv b/src/isl/isl_format_layout.csv deleted file mode 100644 index af2786ae630..00000000000 --- a/src/isl/isl_format_layout.csv +++ /dev/null @@ -1,287 +0,0 @@ -# Copyright 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -# -# @file -# @brief Layout of all hardware surface formats -# -# For the official list, see Broadwell PRM: Volume 2b: Command Reference: -# Enumerations: SURFACE_FORMAT. -# - - -# Columns: -# name: format name in PRM -# bpb: bits per block -# bw: block width, in pixels -# bh: block height, in pixels -# bd: block depth, in pixels -# r: red channel, data type and bitwidth -# g: green channel -# b: blue channel -# a: alpha channel -# l: luminance channel -# i: intensity channel -# p: palette channel -# space: colorspace -# txc: texture compression -# -# Data Types: -# x: void -# r: raw -# un: unorm -# sn: snorm -# uf: ufloat -# sf: sfloat -# ux: ufixed -# sx: sfixed -# ui: uint -# si: sint -# us: uscaled -# ss: sscaled - - -# Table is aligned with the Vim commands below, using the Align plugin: -# :AlignCtrl lr+ p8000000000000P1 -# /^# name/,$ Align, - -# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc -R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear, -R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear, -R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear, -R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear, -R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear, -R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear, -R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear, -R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear, -R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear, -R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear, -R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , , -R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear, -R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear, -R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear, -R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear, -R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear, -R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear, -R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear, -R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear, -R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear, -R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear, -R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear, -R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear, -R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear, -R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear, -R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear, -R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear, -R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear, -X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear, -L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear, -R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear, -R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear, -R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear, -R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear, -R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear, -A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha, -L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear, -I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear, -R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear, -R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear, -R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear, -R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear, -R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear, -R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , , -B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, -B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, -R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, -R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, -R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, -R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear, -R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, -R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, -R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear, -R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear, -R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear, -R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear, -R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear, -R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear, -R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear, -R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, -B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, -B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, -R11G11B10_FLOAT , 32, 1, 1, 1, uf11, uf11, uf10, , , , , linear, -R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, -R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, -R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, -R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear, -X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear, -L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear, -A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha, -L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear, -I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear, -L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear, -A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha, -I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear, -L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear, -A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha, -X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, -A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear, -B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, -B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, -B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, -R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, -R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, -R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear, -B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear, -L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear, -R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear, -R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear, -R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear, -R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear, -R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear, -R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear, -R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear, -R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear, -R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear, -B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear, -B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb, -B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, -B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb, -B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, -B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb, -R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear, -R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear, -R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear, -R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear, -R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear, -R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear, -R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear, -R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear, -R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear, -A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, -A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, -I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear, -L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear, -A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha, -L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear, -I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear, -L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear, -A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha, -L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb, -R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear, -B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear, -B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb, -R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear, -R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear, -R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear, -R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear, -P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, -P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, -A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, -A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, -L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear, -L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear, -R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear, -R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear, -R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear, -R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear, -A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha, -I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear, -L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear, -P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, -A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, -R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear, -R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear, -P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear, -L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear, -P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear, -P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, -A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, -Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv, -L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear, -L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear, -I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear, -I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear, -DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1 -R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear, -YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv, -YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv, -P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear, -P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear, -BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1 -BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3 -BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5 -BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1 -BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2 -BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1 -BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3 -BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5 -MONO8 , 1, 1, 1, 1, , , , , , , , , -YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv, -YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv, -DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1 -FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1 -R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear, -R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear, -R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear, -R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear, -R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear, -R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear, -BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1 -BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2 -R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear, -R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear, -R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear, -R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear, -R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear, -BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc -BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc -BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc -BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc -PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv, -R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb, -ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1 -ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2 -EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2 -EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2 -EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2 -EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2 -ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2 -R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear, -R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear, -R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear, -R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, -R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, -R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, -R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, -B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, -B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, -B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, -B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, -B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, -R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , , -R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , , -ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2 -ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2 -ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2 -ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2 -R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear, -R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear, -RAW , 0, 0, 0, 0, , , , , , , , , diff --git a/src/isl/isl_format_layout_gen.bash b/src/isl/isl_format_layout_gen.bash deleted file mode 100755 index db883827376..00000000000 --- a/src/isl/isl_format_layout_gen.bash +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -set -eu -set -o pipefail - -cat <<'EOF' -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl.h" - -const struct isl_format_layout -isl_format_layouts[] = { -EOF - -sed -r ' -# Delete comment lines and empty lines -/^[[:space:]]*#/d -/^[[:space:]]*$/d - -# Delete spaces -s/[[:space:]]//g - -# Translate formats -s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/ - -# Translate data type of channels -s/\/ISL_COLORSPACE_\1/ -s/\// - -# Translate texture compression -s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/ -' | -tr 'a-z' 'A-Z' | # Convert to uppersace -while IFS=, read -r format bpb bw bh bd \ - red green blue alpha \ - luminance intensity palette \ - colorspace txc -do - : ${colorspace:=ISL_COLORSPACE_NONE} - : ${txc:=ISL_TXC_NONE} - - cat <samples >= 1); - - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return true; -} - -void -gen4_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - assert(info->samples == 1); - assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); - assert(!isl_tiling_is_std_y(tiling)); - - /* Note that neither the surface's horizontal nor vertical image alignment - * is programmable on gen4 nor gen5. - * - * From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4 - * Alignment Unit Size: - * - * Note that the compressed formats are padded to a full compression - * cell. - * - * +------------------------+--------+--------+ - * | format | halign | valign | - * +------------------------+--------+--------+ - * | YUV 4:2:2 formats | 4 | 2 | - * | uncompressed formats | 4 | 2 | - * +------------------------+--------+--------+ - */ - - if (isl_format_is_compressed(info->format)) { - *image_align_el = isl_extent3d(1, 1, 1); - return; - } - - *image_align_el = isl_extent3d(4, 2, 1); -} diff --git a/src/isl/isl_gen4.h b/src/isl/isl_gen4.h deleted file mode 100644 index 06cd70b9206..00000000000 --- a/src/isl/isl_gen4.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -gen4_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen4_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/isl/isl_gen6.c b/src/isl/isl_gen6.c deleted file mode 100644 index 24c393925ed..00000000000 --- a/src/isl/isl_gen6.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen6.h" -#include "isl_priv.h" - -bool -gen6_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - assert(ISL_DEV_GEN(dev) == 6); - assert(info->samples >= 1); - - if (info->samples == 1) { - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return false; - } - - /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface - * Format: - * - * If Number of Multisamples is set to a value other than - * MULTISAMPLECOUNT_1, this field cannot be set to the following - * formats: - * - * - any format with greater than 64 bits per element - * - any compressed texture format (BC*) - * - any YCRCB* format - */ - if (fmtl->bs > 8) - return false; - if (isl_format_is_compressed(info->format)) - return false; - if (isl_format_is_yuv(info->format)) - return false; - - /* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of - * Multisamples: - * - * If this field is any value other than MULTISAMPLECOUNT_1 the - * following restrictions apply: - * - * - the Surface Type must be SURFTYPE_2D - * - [...] - */ - if (info->dim != ISL_SURF_DIM_2D) - return false; - - /* More obvious restrictions */ - if (isl_surf_usage_is_display(info->usage)) - return false; - if (tiling == ISL_TILING_LINEAR) - return false; - if (info->levels > 1) - return false; - - *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; - return true; -} - -void -gen6_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - /* Note that the surface's horizontal image alignment is not programmable - * on Sandybridge. - * - * From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4 - * Alignment Unit Size: - * - * Note that the compressed formats are padded to a full compression cell. - * - * +------------------------+--------+--------+ - * | format | halign | valign | - * +------------------------+--------+--------+ - * | YUV 4:2:2 formats | 4 | * | - * | uncompressed formats | 4 | * | - * +------------------------+--------+--------+ - * - * * For these formats, the vertical alignment factor “j” is determined - * as follows: - * - j = 4 for any depth buffer - * - j = 2 for separate stencil buffer - * - j = 4 for any render target surface is multisampled (4x) - * - j = 2 for all other render target surface - * - * From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2 - * SURFACE_STATE, Surface Vertical Alignment: - * - * - This field must be set to VALIGN_2 if the Surface Format is 96 bits - * per element (BPE). - * - * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL - * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY - * (0x190) - */ - - if (isl_format_is_compressed(info->format)) { - *image_align_el = isl_extent3d(1, 1, 1); - return; - } - - if (isl_format_is_yuv(info->format)) { - *image_align_el = isl_extent3d(4, 2, 1); - return; - } - - if (info->samples > 1) { - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - if (isl_surf_usage_is_depth_or_stencil(info->usage) && - !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { - /* interleaved depthstencil buffer */ - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - if (isl_surf_usage_is_depth(info->usage)) { - /* separate depth buffer */ - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - if (isl_surf_usage_is_stencil(info->usage)) { - /* separate stencil buffer */ - *image_align_el = isl_extent3d(4, 2, 1); - return; - } - - *image_align_el = isl_extent3d(4, 2, 1); -} diff --git a/src/isl/isl_gen6.h b/src/isl/isl_gen6.h deleted file mode 100644 index 0779c674940..00000000000 --- a/src/isl/isl_gen6.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -gen6_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen6_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/isl/isl_gen7.c b/src/isl/isl_gen7.c deleted file mode 100644 index 7064e852e65..00000000000 --- a/src/isl/isl_gen7.c +++ /dev/null @@ -1,395 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen7.h" -#include "isl_priv.h" - -bool -gen7_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - bool require_array = false; - bool require_interleaved = false; - - assert(ISL_DEV_GEN(dev) == 7); - assert(info->samples >= 1); - - if (info->samples == 1) { - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return true; - } - - /* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface - * Format: - * - * If Number of Multisamples is set to a value other than - * MULTISAMPLECOUNT_1, this field cannot be set to the following - * formats: any format with greater than 64 bits per element, any - * compressed texture format (BC*), and any YCRCB* format. - */ - if (fmtl->bs > 8) - return false; - if (isl_format_is_compressed(info->format)) - return false; - if (isl_format_is_yuv(info->format)) - return false; - - /* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of - * Multisamples: - * - * - If this field is any value other than MULTISAMPLECOUNT_1, the - * Surface Type must be SURFTYPE_2D. - * - * - If this field is any value other than MULTISAMPLECOUNT_1, Surface - * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero - */ - if (info->dim != ISL_SURF_DIM_2D) - return false; - if (info->levels > 1) - return false; - - /* The Ivyrbridge PRM insists twice that signed integer formats cannot be - * multisampled. - * - * From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of - * Multisamples: - * - * - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when - * all RT channels are not written. - * - * And errata from the Ivybridge PRM, Volume 4 Part 1 p77, - * RENDER_SURFACE_STATE, MCS Enable: - * - * This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs - * when all RT channels are not written. - * - * Note that the above SINT restrictions apply only to *MSRTs* (that is, - * *multisampled* render targets). The restrictions seem to permit an MCS - * if the render target is singlesampled. - */ - if (isl_format_has_sint_channel(info->format)) - return false; - - /* More obvious restrictions */ - if (isl_surf_usage_is_display(info->usage)) - return false; - if (tiling == ISL_TILING_LINEAR) - return false; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * +---------------------+----------------------------------------------------------------+ - * | MSFMT_MSS | Multsampled surface was/is rendered as a render target | - * | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer | - * +---------------------+----------------------------------------------------------------+ - * - * In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and - * MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED. - */ - if (isl_surf_usage_is_depth_or_stencil(info->usage)) - require_interleaved = true; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width - * is >= 8192 (meaning the actual surface width is >= 8193 pixels), this - * field must be set to MSFMT_MSS. - */ - if (info->samples == 8 && info->width == 8192) - require_array = true; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, - * ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number - * of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is - * > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL. - */ - if ((info->samples == 8 && info->height > 4194304u) || - (info->samples == 4 && info->height > 8388608u)) - require_interleaved = true; - - /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled - * Suface Storage Format: - * - * This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is - * one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or - * R24_UNORM_X8_TYPELESS. - */ - if (info->format == ISL_FORMAT_I24X8_UNORM || - info->format == ISL_FORMAT_L24X8_UNORM || - info->format == ISL_FORMAT_A24X8_UNORM || - info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) - require_interleaved = true; - - if (require_array && require_interleaved) - return false; - - if (require_interleaved) { - *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; - return true; - } - - /* Default to the array layout because it permits multisample - * compression. - */ - *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; - return true; -} - -static bool -gen7_format_needs_valign2(const struct isl_device *dev, - enum isl_format format) -{ - /* This workaround applies only to gen7 */ - if (ISL_DEV_GEN(dev) > 7) - return false; - - /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, - * RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL - * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY - * (0x190) - * - * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. - */ - return isl_format_is_yuv(format) || - format == ISL_FORMAT_R32G32B32_FLOAT; -} - -/** - * @brief Filter out tiling flags that are incompatible with the surface. - * - * The resultant outgoing @a flags is a subset of the incoming @a flags. The - * outgoing flags may be empty (0x0) if the incoming flags were too - * restrictive. - * - * For example, if the surface will be used for a display - * (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling - * flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT. - */ -void -gen7_filter_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - isl_tiling_flags_t *flags) -{ - /* IVB+ requires separate stencil */ - assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); - - /* Clear flags unsupported on this hardware */ - if (ISL_DEV_GEN(dev) < 9) { - *flags &= ~ISL_TILING_Yf_BIT; - *flags &= ~ISL_TILING_Ys_BIT; - } - - /* And... clear the Yf and Ys bits anyway because Anvil doesn't support - * them yet. - */ - *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */ - *flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */ - - if (isl_surf_usage_is_depth(info->usage)) { - /* Depth requires Y. */ - *flags &= ISL_TILING_ANY_Y_MASK; - } - - /* Separate stencil requires W tiling, and W tiling requires separate - * stencil. - */ - if (isl_surf_usage_is_stencil(info->usage)) { - *flags &= ISL_TILING_W_BIT; - } else { - *flags &= ~ISL_TILING_W_BIT; - } - - if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT | - ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT | - ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) { - assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); - isl_finishme("%s:%s: handle rotated display surfaces", - __FILE__, __func__); - } - - if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT | - ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) { - assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); - isl_finishme("%s:%s: handle flipped display surfaces", - __FILE__, __func__); - } - - if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { - /* Before Skylake, the display engine does not accept Y */ - /* FINISHME[SKL]: Y tiling for display surfaces */ - *flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT); - } - - if (info->samples > 1) { - /* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled - * Surface: - * - * For multisample render targets, this field must be 1 (true). MSRTs - * can only be tiled. - * - * Multisample surfaces never require X tiling, and Y tiling generally - * performs better than X. So choose Y. (Unless it's stencil, then it - * must be W). - */ - *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); - } - - /* workaround */ - if (ISL_DEV_GEN(dev) == 7 && - gen7_format_needs_valign2(dev, info->format) && - (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && - info->samples == 1) { - /* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1, - * SURFACE_STATE Surface Vertical Alignment: - * - * This field must be set to VALIGN_4 for all tiled Y Render Target - * surfaces. - */ - *flags &= ~ISL_TILING_Y0_BIT; - } -} - -/** - * Choose horizontal subimage alignment, in units of surface elements. - */ -static uint32_t -gen7_choose_halign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - if (isl_format_is_compressed(info->format)) - return 1; - - /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, - * RENDER_SURFACE_STATE Surface Hoizontal Alignment: - * - * - This field is intended to be set to HALIGN_8 only if the surface - * was rendered as a depth buffer with Z16 format or a stencil buffer, - * since these surfaces support only alignment of 8. Use of HALIGN_8 - * for other surfaces is supported, but uses more memory. - */ - if (isl_surf_info_is_z16(info) || - isl_surf_usage_is_stencil(info->usage)) - return 8; - - return 4; -} - -/** - * Choose vertical subimage alignment, in units of surface elements. - */ -static uint32_t -gen7_choose_valign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling) -{ - bool require_valign2 = false; - bool require_valign4 = false; - - if (isl_format_is_compressed(info->format)) - return 1; - - if (gen7_format_needs_valign2(dev, info->format)) - require_valign2 = true; - - /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: - * RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - This field is intended to be set to VALIGN_4 if the surface was - * rendered as a depth buffer, for a multisampled (4x) render target, - * or for a multisampled (8x) render target, since these surfaces - * support only alignment of 4. Use of VALIGN_4 for other surfaces is - * supported, but uses more memory. This field must be set to - * VALIGN_4 for all tiled Y Render Target surfaces. - * - */ - if (isl_surf_usage_is_depth(info->usage) || - info->samples > 1 || - tiling == ISL_TILING_Y0) { - require_valign4 = true; - } - - if (isl_surf_usage_is_stencil(info->usage)) { - /* The Ivybridge PRM states that the stencil buffer's vertical alignment - * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment - * Unit Size]. However, valign=8 is outside the set of valid values of - * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 - * (0x0) and VALIGN_4 (0x1). - * - * The PRM is generally confused about the width, height, and alignment - * of the stencil buffer; and this confusion appears elsewhere. For - * example, the following PRM text effectively converts the stencil - * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, - * Volume 1, Part 1, Section - * 6.18.4.2 Base Address and LOD Calculation]: - * - * For separate stencil buffer, the width must be mutiplied by 2 and - * height divided by 2 as follows: - * - * w_L = 2*i*ceil(W_L/i) - * h_L = 1/2*j*ceil(H_L/j) - * - * The root of the confusion is that, in W tiling, each pair of rows is - * interleaved into one. - * - * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API - * is more polished. - */ - require_valign4 = true; - } - - assert(!require_valign2 || !require_valign4); - - if (require_valign4) - return 4; - - /* Prefer VALIGN_2 because it conserves memory. */ - return 2; -} - -void -gen7_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - /* IVB+ does not support combined depthstencil. */ - assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); - - *image_align_el = (struct isl_extent3d) { - .w = gen7_choose_halign_el(dev, info), - .h = gen7_choose_valign_el(dev, info, tiling), - .d = 1, - }; -} diff --git a/src/isl/isl_gen7.h b/src/isl/isl_gen7.h deleted file mode 100644 index 2a95b68a9bd..00000000000 --- a/src/isl/isl_gen7.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void -gen7_filter_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - isl_tiling_flags_t *flags); - -bool -gen7_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen7_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/isl/isl_gen8.c b/src/isl/isl_gen8.c deleted file mode 100644 index a46427aacc8..00000000000 --- a/src/isl/isl_gen8.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen8.h" -#include "isl_priv.h" - -bool -gen8_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout) -{ - bool require_array = false; - bool require_interleaved = false; - - assert(info->samples >= 1); - - if (info->samples == 1) { - *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return true; - } - - /* From the Broadwell PRM >> Volume2d: Command Structures >> - * RENDER_SURFACE_STATE Tile Mode: - * - * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field - * must be YMAJOR. - * - * As usual, though, stencil is special. - */ - if (!isl_tiling_is_any_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) - return false; - - /* From the Broadwell PRM >> Volume2d: Command Structures >> - * RENDER_SURFACE_STATE Multisampled Surface Storage Format: - * - * All multisampled render target surfaces must have this field set to - * MSFMT_MSS - */ - if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) - require_array = true; - - /* From the Broadwell PRM >> Volume2d: Command Structures >> - * RENDER_SURFACE_STATE Number of Multisamples: - * - * - If this field is any value other than MULTISAMPLECOUNT_1, the - * Surface Type must be SURFTYPE_2D This field must be set to - * MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface - * or Render Target surface. - * - * - If this field is any value other than MULTISAMPLECOUNT_1, Surface - * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero. - */ - if (info->dim != ISL_SURF_DIM_2D) - return false; - if (info->levels > 1) - return false; - - /* More obvious restrictions */ - if (isl_surf_usage_is_display(info->usage)) - return false; - if (isl_format_is_compressed(info->format)) - return false; - if (isl_format_is_yuv(info->format)) - return false; - - if (isl_surf_usage_is_depth_or_stencil(info->usage)) - require_interleaved = true; - - if (require_array && require_interleaved) - return false; - - if (require_interleaved) { - *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; - return true; - } - - *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; - return true; -} - -/** - * Choose horizontal subimage alignment, in units of surface elements. - */ -static uint32_t -gen8_choose_halign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - if (isl_format_is_compressed(info->format)) - return 1; - - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * - This field is intended to be set to HALIGN_8 only if the surface - * was rendered as a depth buffer with Z16 format or a stencil buffer. - * In this case it must be set to HALIGN_8 since these surfaces - * support only alignment of 8. [...] - */ - if (isl_surf_info_is_z16(info)) - return 8; - if (isl_surf_usage_is_stencil(info->usage)) - return 8; - - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * [...] For Z32 formats it must be set to HALIGN_4. - */ - if (isl_surf_usage_is_depth(info->usage)) - return 4; - - if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { - /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", - * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: - * - * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, - * HALIGN 16 must be used. - * - * This case handles color surfaces that may own an auxiliary MCS, CCS_D, - * or CCS_E. Depth buffers, including those that own an auxiliary HiZ - * surface, are handled above and do not require HALIGN_16. - */ - assert(!isl_surf_usage_is_depth(info->usage)); - return 16; - } - - /* XXX(chadv): I believe the hardware requires each image to be - * cache-aligned. If that's true, then defaulting to halign=4 is wrong for - * many formats. Depending on the format's block size, we may need to - * increase halign to 8. - */ - return 4; -} - -/** - * Choose vertical subimage alignment, in units of surface elements. - */ -static uint32_t -gen8_choose_valign_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info) -{ - /* From the Broadwell PRM > Volume 2d: Command Reference: Structures - * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): - * - * - For Sampling Engine and Render Target Surfaces: This field - * specifies the vertical alignment requirement in elements for the - * surface. [...] An element is defined as a pixel in uncompresed - * surface formats, and as a compression block in compressed surface - * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an - * element is a sample. - * - * - This field is intended to be set to VALIGN_4 if the surface was - * rendered as a depth buffer, for a multisampled (4x) render target, - * or for a multisampled (8x) render target, since these surfaces - * support only alignment of 4. Use of VALIGN_4 for other surfaces is - * supported, but increases memory usage. - * - * - This field is intended to be set to VALIGN_8 only if the surface - * was rendered as a stencil buffer, since stencil buffer surfaces - * support only alignment of 8. If set to VALIGN_8, Surface Format - * must be R8_UINT. - */ - - if (isl_format_is_compressed(info->format)) - return 1; - - if (isl_surf_usage_is_stencil(info->usage)) - return 8; - - return 4; -} - -void -gen8_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - assert(!isl_tiling_is_std_y(tiling)); - - /* The below text from the Broadwell PRM provides some insight into the - * hardware's requirements for LOD alignment. From the Broadwell PRM >> - * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: - * - * These [2D surfaces] must adhere to the following memory organization - * rules: - * - * - For non-compressed texture formats, each mipmap must start on an - * even row within the monolithic rectangular area. For - * 1-texel-high mipmaps, this may require a row of padding below - * the previous mipmap. This restriction does not apply to any - * compressed texture formats; each subsequent (lower-res) - * compressed mipmap is positioned directly below the previous - * mipmap. - * - * - Vertical alignment restrictions vary with memory tiling type: - * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled - * mipmaps are not required to start at the left edge of a tile - * row.) - */ - - *image_align_el = (struct isl_extent3d) { - .w = gen8_choose_halign_el(dev, info), - .h = gen8_choose_valign_el(dev, info), - .d = 1, - }; -} diff --git a/src/isl/isl_gen8.h b/src/isl/isl_gen8.h deleted file mode 100644 index 2017ea8ddc1..00000000000 --- a/src/isl/isl_gen8.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -gen8_choose_msaa_layout(const struct isl_device *dev, - const struct isl_surf_init_info *info, - enum isl_tiling tiling, - enum isl_msaa_layout *msaa_layout); - -void -gen8_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/isl/isl_gen9.c b/src/isl/isl_gen9.c deleted file mode 100644 index aa290aa1c35..00000000000 --- a/src/isl/isl_gen9.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl_gen8.h" -#include "isl_gen9.h" -#include "isl_priv.h" - -/** - * Calculate the surface's subimage alignment, in units of surface samples, - * for the standard tiling formats Yf and Ys. - */ -static void -gen9_calc_std_image_alignment_sa(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *align_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - - assert(isl_tiling_is_std_y(tiling)); - - const uint32_t bs = fmtl->bs; - const uint32_t is_Ys = tiling == ISL_TILING_Ys; - - switch (info->dim) { - case ISL_SURF_DIM_1D: - /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface - * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. - */ - *align_sa = (struct isl_extent3d) { - .w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)), - .h = 1, - .d = 1, - }; - return; - case ISL_SURF_DIM_2D: - /* See the Skylake BSpec > Memory Views > Common Surface Formats > - * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment - * Requirements. - */ - *align_sa = (struct isl_extent3d) { - .w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)), - .h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)), - .d = 1, - }; - - if (is_Ys) { - /* FINISHME(chadv): I don't trust this code. Untested. */ - isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__); - - switch (msaa_layout) { - case ISL_MSAA_LAYOUT_NONE: - case ISL_MSAA_LAYOUT_INTERLEAVED: - break; - case ISL_MSAA_LAYOUT_ARRAY: - align_sa->w >>= (ffs(info->samples) - 0) / 2; - align_sa->h >>= (ffs(info->samples) - 1) / 2; - break; - } - } - return; - - case ISL_SURF_DIM_3D: - /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface - * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. - */ - *align_sa = (struct isl_extent3d) { - .w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)), - .h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)), - .d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)), - }; - return; - } - - unreachable("bad isl_surface_type"); -} - -void -gen9_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el) -{ - /* This BSpec text provides some insight into the hardware's alignment - * requirements [Skylake BSpec > Memory Views > Common Surface Formats > - * Surface Layout and Tiling > 2D Surfaces]: - * - * An LOD must be aligned to a cache-line except for some special cases - * related to Planar YUV surfaces. In general, the cache-alignment - * restriction implies there is a minimum height for an LOD of 4 texels. - * So, LODs which are smaller than 4 high are padded. - * - * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment: - * - * - For Sampling Engine and Render Target Surfaces: This field - * specifies the vertical alignment requirement in elements for the - * surface. [...] An element is defined as a pixel in uncompresed - * surface formats, and as a compression block in compressed surface - * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an - * element is a sample. - * - * - This field is used for 2D, CUBE, and 3D surface alignment when Tiled - * Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled). - * This field is ignored for 1D surfaces and also when Tiled Resource - * Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled). - * - * See the appropriate Alignment table in the "Surface Layout and - * Tiling" section under Common Surface Formats for the table of - * alignment values for Tiled Resrouces. - * - * - For uncompressed surfaces, the units of "j" are rows of pixels on - * the physical surface. For compressed texture formats, the units of - * "j" are in compression blocks, thus each increment in "j" is equal - * to h pixels, where h is the height of the compression block in - * pixels. - * - * - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16 - * - * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal - * Alignment: - * - * - For uncompressed surfaces, the units of "i" are pixels on the - * physical surface. For compressed texture formats, the units of "i" - * are in compression blocks, thus each increment in "i" is equal to - * w pixels, where w is the width of the compression block in pixels. - * - * - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16 - */ - - if (isl_tiling_is_std_y(tiling)) { - struct isl_extent3d image_align_sa; - gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout, - &image_align_sa); - - *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa); - return; - } - - if (info->dim == ISL_SURF_DIM_1D) { - /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface - * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. - */ - *image_align_el = isl_extent3d(64, 1, 1); - return; - } - - if (isl_format_is_compressed(info->format)) { - /* On Gen9, the meaning of RENDER_SURFACE_STATE's - * SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for - * compressed formats. They now indicate a multiple of the compression - * block. For example, if the compression mode is ETC2 then HALIGN_4 - * indicates a horizontal alignment of 16 pixels. - * - * To avoid wasting memory, choose the smallest alignment possible: - * HALIGN_4 and VALIGN_4. - */ - *image_align_el = isl_extent3d(4, 4, 1); - return; - } - - gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, - image_align_el); -} diff --git a/src/isl/isl_gen9.h b/src/isl/isl_gen9.h deleted file mode 100644 index 64ed0aa44ef..00000000000 --- a/src/isl/isl_gen9.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "isl_priv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void -gen9_choose_image_alignment_el(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling tiling, - enum isl_msaa_layout msaa_layout, - struct isl_extent3d *image_align_el); - -#ifdef __cplusplus -} -#endif diff --git a/src/isl/isl_image.c b/src/isl/isl_image.c deleted file mode 100644 index 773160432b9..00000000000 --- a/src/isl/isl_image.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl.h" -#include "brw_compiler.h" - -bool -isl_is_storage_image_format(enum isl_format format) -{ - /* XXX: Maybe we should put this in the CSV? */ - - switch (format) { - case ISL_FORMAT_R32G32B32A32_UINT: - case ISL_FORMAT_R32G32B32A32_SINT: - case ISL_FORMAT_R32G32B32A32_FLOAT: - case ISL_FORMAT_R32_UINT: - case ISL_FORMAT_R32_SINT: - case ISL_FORMAT_R32_FLOAT: - case ISL_FORMAT_R16G16B16A16_UINT: - case ISL_FORMAT_R16G16B16A16_SINT: - case ISL_FORMAT_R16G16B16A16_FLOAT: - case ISL_FORMAT_R32G32_UINT: - case ISL_FORMAT_R32G32_SINT: - case ISL_FORMAT_R32G32_FLOAT: - case ISL_FORMAT_R8G8B8A8_UINT: - case ISL_FORMAT_R8G8B8A8_SINT: - case ISL_FORMAT_R16G16_UINT: - case ISL_FORMAT_R16G16_SINT: - case ISL_FORMAT_R16G16_FLOAT: - case ISL_FORMAT_R8G8_UINT: - case ISL_FORMAT_R8G8_SINT: - case ISL_FORMAT_R16_UINT: - case ISL_FORMAT_R16_FLOAT: - case ISL_FORMAT_R16_SINT: - case ISL_FORMAT_R8_UINT: - case ISL_FORMAT_R8_SINT: - case ISL_FORMAT_R10G10B10A2_UINT: - case ISL_FORMAT_R10G10B10A2_UNORM: - case ISL_FORMAT_R11G11B10_FLOAT: - case ISL_FORMAT_R16G16B16A16_UNORM: - case ISL_FORMAT_R16G16B16A16_SNORM: - case ISL_FORMAT_R8G8B8A8_UNORM: - case ISL_FORMAT_R8G8B8A8_SNORM: - case ISL_FORMAT_R16G16_UNORM: - case ISL_FORMAT_R16G16_SNORM: - case ISL_FORMAT_R8G8_UNORM: - case ISL_FORMAT_R8G8_SNORM: - case ISL_FORMAT_R16_UNORM: - case ISL_FORMAT_R16_SNORM: - case ISL_FORMAT_R8_UNORM: - case ISL_FORMAT_R8_SNORM: - return true; - default: - return false; - } -} - -enum isl_format -isl_lower_storage_image_format(const struct isl_device *dev, - enum isl_format format) -{ - switch (format) { - /* These are never lowered. Up to BDW we'll have to fall back to untyped - * surface access for 128bpp formats. - */ - case ISL_FORMAT_R32G32B32A32_UINT: - case ISL_FORMAT_R32G32B32A32_SINT: - case ISL_FORMAT_R32G32B32A32_FLOAT: - case ISL_FORMAT_R32_UINT: - case ISL_FORMAT_R32_SINT: - case ISL_FORMAT_R32_FLOAT: - return format; - - /* From HSW to BDW the only 64bpp format supported for typed access is - * RGBA_UINT16. IVB falls back to untyped. - */ - case ISL_FORMAT_R16G16B16A16_UINT: - case ISL_FORMAT_R16G16B16A16_SINT: - case ISL_FORMAT_R16G16B16A16_FLOAT: - case ISL_FORMAT_R32G32_UINT: - case ISL_FORMAT_R32G32_SINT: - case ISL_FORMAT_R32G32_FLOAT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16B16A16_UINT : - ISL_FORMAT_R32G32_UINT); - - /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component - * are supported. IVB doesn't support formats with more than one component - * for typed access. For 8 and 16 bpp formats IVB relies on the - * undocumented behavior that typed reads from R_UINT8 and R_UINT16 - * surfaces actually do a 32-bit misaligned read. The alternative would be - * to use two surface state entries with different formats for each image, - * one for reading (using R_UINT32) and another one for writing (using - * R_UINT8 or R_UINT16), but that would complicate the shaders we generate - * even more. - */ - case ISL_FORMAT_R8G8B8A8_UINT: - case ISL_FORMAT_R8G8B8A8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R16G16_UINT: - case ISL_FORMAT_R16G16_SINT: - case ISL_FORMAT_R16G16_FLOAT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R8G8_UINT: - case ISL_FORMAT_R8G8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R16_UINT: - case ISL_FORMAT_R16_FLOAT: - case ISL_FORMAT_R16_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R8_UINT: - case ISL_FORMAT_R8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); - - /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported - * by the hardware. - */ - case ISL_FORMAT_R10G10B10A2_UINT: - case ISL_FORMAT_R10G10B10A2_UNORM: - case ISL_FORMAT_R11G11B10_FLOAT: - return ISL_FORMAT_R32_UINT; - - /* No normalized fixed-point formats are supported by the hardware. */ - case ISL_FORMAT_R16G16B16A16_UNORM: - case ISL_FORMAT_R16G16B16A16_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16B16A16_UINT : - ISL_FORMAT_R32G32_UINT); - - case ISL_FORMAT_R8G8B8A8_UNORM: - case ISL_FORMAT_R8G8B8A8_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R16G16_UNORM: - case ISL_FORMAT_R16G16_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R8G8_UNORM: - case ISL_FORMAT_R8G8_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R16_UNORM: - case ISL_FORMAT_R16_SNORM: - return ISL_FORMAT_R16_UINT; - - case ISL_FORMAT_R8_UNORM: - case ISL_FORMAT_R8_SNORM: - return ISL_FORMAT_R8_UINT; - - default: - assert(!"Unknown image format"); - return ISL_FORMAT_UNSUPPORTED; - } -} diff --git a/src/isl/isl_priv.h b/src/isl/isl_priv.h deleted file mode 100644 index b399e0f8116..00000000000 --- a/src/isl/isl_priv.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include - -#include "brw_device_info.h" -#include "mesa/main/imports.h" -#include "util/macros.h" - -#include "isl.h" - -#define isl_finishme(format, ...) \ - __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) - -void PRINTFLIKE(3, 4) UNUSED -__isl_finishme(const char *file, int line, const char *fmt, ...); - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -static inline uint32_t -ffs(uint32_t n) { - return __builtin_ffs(n); -} - -static inline bool -isl_is_pow2(uintmax_t n) -{ - return !(n & (n - 1)); -} - -/** - * Alignment must be a power of 2. - */ -static inline bool -isl_is_aligned(uintmax_t n, uintmax_t a) -{ - assert(isl_is_pow2(a)); - return (n & (a - 1)) == 0; -} - -/** - * Alignment must be a power of 2. - */ -static inline uintmax_t -isl_align(uintmax_t n, uintmax_t a) -{ - assert(a != 0 && isl_is_pow2(a)); - return (n + a - 1) & ~(a - 1); -} - -static inline uintmax_t -isl_align_npot(uintmax_t n, uintmax_t a) -{ - assert(a > 0); - return ((n + a - 1) / a) * a; -} - -/** - * Alignment must be a power of 2. - */ -static inline uintmax_t -isl_align_div(uintmax_t n, uintmax_t a) -{ - return isl_align(n, a) / a; -} - -static inline uintmax_t -isl_align_div_npot(uintmax_t n, uintmax_t a) -{ - return isl_align_npot(n, a) / a; -} - -/** - * Log base 2, rounding towards zero. - */ -static inline uint32_t -isl_log2u(uint32_t n) -{ - assert(n != 0); - return 31 - __builtin_clz(n); -} - -static inline uint32_t -isl_minify(uint32_t n, uint32_t levels) -{ - if (unlikely(n == 0)) - return 0; - else - return MAX(n >> levels, 1); -} - -static inline struct isl_extent3d -isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - assert(extent_sa.w % fmtl->bw == 0); - assert(extent_sa.h % fmtl->bh == 0); - assert(extent_sa.d % fmtl->bd == 0); - - return (struct isl_extent3d) { - .w = extent_sa.w / fmtl->bw, - .h = extent_sa.h / fmtl->bh, - .d = extent_sa.d / fmtl->bd, - }; -} - -static inline struct isl_extent3d -isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); - - return (struct isl_extent3d) { - .w = extent_el.w * fmtl->bw, - .h = extent_el.h * fmtl->bh, - .d = extent_el.d * fmtl->bd, - }; -} diff --git a/src/isl/tests/.gitignore b/src/isl/tests/.gitignore deleted file mode 100644 index ba70ecfbee4..00000000000 --- a/src/isl/tests/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/isl_surf_get_image_offset_test diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c deleted file mode 100644 index cda8583daeb..00000000000 --- a/src/isl/tests/isl_surf_get_image_offset_test.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include - -#include "brw_device_info.h" -#include "isl.h" -#include "isl_priv.h" - -#define BDW_GT2_DEVID 0x161a - -// An asssert that works regardless of NDEBUG. -#define t_assert(cond) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ - abort(); \ - } \ - } while (0) - -static void -t_assert_extent4d(const struct isl_extent4d *e, uint32_t width, - uint32_t height, uint32_t depth, uint32_t array_len) -{ - t_assert(e->width == width); - t_assert(e->height == height); - t_assert(e->depth == depth); - t_assert(e->array_len == array_len); -} - -static void -t_assert_image_alignment_el(const struct isl_surf *surf, - uint32_t w, uint32_t h, uint32_t d) -{ - struct isl_extent3d align_el; - - align_el = isl_surf_get_image_alignment_el(surf); - t_assert(align_el.w == w); - t_assert(align_el.h == h); - t_assert(align_el.d == d); - -} - -static void -t_assert_image_alignment_sa(const struct isl_surf *surf, - uint32_t w, uint32_t h, uint32_t d) -{ - struct isl_extent3d align_sa; - - align_sa = isl_surf_get_image_alignment_sa(surf); - t_assert(align_sa.w == w); - t_assert(align_sa.h == h); - t_assert(align_sa.d == d); - -} - -static void -t_assert_offset_el(const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t expected_x_offset_el, - uint32_t expected_y_offset_el) -{ - uint32_t x, y; - isl_surf_get_image_offset_el(surf, level, logical_array_layer, - logical_z_offset_px, &x, &y); - - t_assert(x == expected_x_offset_el); - t_assert(y == expected_y_offset_el); -} - -static void -t_assert_intratile_offset_el(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t expected_base_address_offset, - uint32_t expected_x_offset_el, - uint32_t expected_y_offset_el) -{ - uint32_t base_address_offset; - uint32_t x_offset_el, y_offset_el; - isl_surf_get_image_intratile_offset_el(dev, surf, - level, - logical_array_layer, - logical_z_offset_px, - &base_address_offset, - &x_offset_el, - &y_offset_el); - - t_assert(base_address_offset == expected_base_address_offset); - t_assert(x_offset_el == expected_x_offset_el); - t_assert(y_offset_el == expected_y_offset_el); -} - -static void -t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, - uint32_t height, uint32_t depth, uint32_t array_len) -{ - t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len); -} - -static void -t_assert_gen4_3d_layer(const struct isl_surf *surf, - uint32_t level, - uint32_t aligned_width, - uint32_t aligned_height, - uint32_t depth, - uint32_t horiz_layers, - uint32_t vert_layers, - uint32_t *base_y) -{ - for (uint32_t z = 0; z < depth; ++z) { - t_assert_offset_el(surf, level, 0, z, - aligned_width * (z % horiz_layers), - *base_y + aligned_height * (z / horiz_layers)); - } - - *base_y += aligned_height * vert_layers; -} - -static void -test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) -{ - bool ok; - - struct isl_device dev; - isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), - /*bit6_swizzle*/ false); - - struct isl_surf surf; - ok = isl_surf_init(&dev, &surf, - .dim = ISL_SURF_DIM_2D, - .format = ISL_FORMAT_R8G8B8A8_UNORM, - .width = 512, - .height = 512, - .depth = 1, - .levels = 10, - .array_len = 1, - .samples = 1, - .usage = ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_DISABLE_AUX_BIT, - .tiling_flags = ISL_TILING_Y0_BIT); - t_assert(ok); - - t_assert_image_alignment_el(&surf, 4, 4, 1); - t_assert_image_alignment_sa(&surf, 4, 4, 1); - t_assert_phys_level0_sa(&surf, 512, 512, 1, 1); - t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772); - t_assert(isl_surf_get_array_pitch_el_rows(&surf) == - isl_surf_get_array_pitch_sa_rows(&surf)); - - /* Row pitch should be minimal possible */ - t_assert(surf.row_pitch == 2048); - - t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0 - t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512 - t_assert_offset_el(&surf, 2, 0, 0, 256, 512); // +256, +0 - t_assert_offset_el(&surf, 3, 0, 0, 256, 640); // +0, +128 - t_assert_offset_el(&surf, 4, 0, 0, 256, 704); // +0, +64 - t_assert_offset_el(&surf, 5, 0, 0, 256, 736); // +0, +32 - t_assert_offset_el(&surf, 6, 0, 0, 256, 752); // +0, +16 - t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8 - t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4 - t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4 - - t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x100000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x100400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x140400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x160400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x170400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x170400, 0, 16); - t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x170400, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x170400, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x180400, 0, 0); -} - -static void -test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) -{ - bool ok; - - struct isl_device dev; - isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), - /*bit6_swizzle*/ false); - - struct isl_surf surf; - ok = isl_surf_init(&dev, &surf, - .dim = ISL_SURF_DIM_2D, - .format = ISL_FORMAT_R8G8B8A8_UNORM, - .width = 1024, - .height = 1024, - .depth = 1, - .levels = 11, - .array_len = 6, - .samples = 1, - .usage = ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_DISABLE_AUX_BIT, - .tiling_flags = ISL_TILING_Y0_BIT); - t_assert(ok); - - t_assert_image_alignment_el(&surf, 4, 4, 1); - t_assert_image_alignment_sa(&surf, 4, 4, 1); - - t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540); - t_assert(isl_surf_get_array_pitch_el_rows(&surf) == - isl_surf_get_array_pitch_sa_rows(&surf)); - - /* Row pitch should be minimal possible */ - t_assert(surf.row_pitch == 4096); - - for (uint32_t a = 0; a < 6; ++a) { - uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf); - - t_assert_offset_el(&surf, 0, a, 0, 0, b + 0); // +0, +0 - t_assert_offset_el(&surf, 1, a, 0, 0, b + 1024); // +0, +1024 - t_assert_offset_el(&surf, 2, a, 0, 512, b + 1024); // +512, +0 - t_assert_offset_el(&surf, 3, a, 0, 512, b + 1280); // +0, +256 - t_assert_offset_el(&surf, 4, a, 0, 512, b + 1408); // +0, +128 - t_assert_offset_el(&surf, 5, a, 0, 512, b + 1472); // +0, +64 - t_assert_offset_el(&surf, 6, a, 0, 512, b + 1504); // +0, +32 - t_assert_offset_el(&surf, 7, a, 0, 512, b + 1520); // +0, +16 - t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8 - t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4 - t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4 - - } - - /* The layout below assumes a specific array pitch. It will need updating - * if isl's array pitch calculations ever change. - */ - t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540); - - /* array layer 0 */ - t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x400000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x400800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x500800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x580800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5c0800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5e0800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5e0800, 0, 16); - t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5e0800, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5e0800, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x600800, 0, 0); - - /* array layer 1 */ - t_assert_intratile_offset_el(&dev, &surf, 0, 1, 0, 0x600000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 1, 1, 0, 0xa00000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa00800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb00800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb80800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbc0800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbe0800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbe0800, 0, 20); - t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbe0800, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc00800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc00800, 0, 4); - - /* array layer 2 */ - t_assert_intratile_offset_el(&dev, &surf, 0, 2, 0, 0xc00000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 1, 2, 0, 0x1000000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1000800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1100800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1180800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11c0800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11e0800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11e0800, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1200800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1200800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1200800, 0, 8); - - /* skip the remaining array layers */ -} - -static void -test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void) -{ - bool ok; - - struct isl_device dev; - isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), - /*bit6_swizzle*/ false); - - struct isl_surf surf; - ok = isl_surf_init(&dev, &surf, - .dim = ISL_SURF_DIM_3D, - .format = ISL_FORMAT_R8G8B8A8_UNORM, - .width = 256, - .height = 256, - .depth = 256, - .levels = 9, - .array_len = 1, - .samples = 1, - .usage = ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_DISABLE_AUX_BIT, - .tiling_flags = ISL_TILING_Y0_BIT); - t_assert(ok); - - t_assert_image_alignment_el(&surf, 4, 4, 1); - t_assert_image_alignment_sa(&surf, 4, 4, 1); - t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 74916); - t_assert(isl_surf_get_array_pitch_sa_rows(&surf) == - isl_surf_get_array_pitch_el_rows(&surf)); - - uint32_t base_y = 0; - - t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y); - t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y); - t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y); - t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y); - t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y); - t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y); - t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y); - t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y); - t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y); -} - -int main(void) -{ - /* FINISHME: Add tests for npot sizes */ - /* FINISHME: Add tests for 1D surfaces */ - - test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(); - test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(); - test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(); -} diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 50d972e6c7e..0605dc437d7 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -62,7 +62,6 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/intel/ \ - -I$(top_srcdir)/src/isl/ \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ @@ -165,7 +164,7 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) CLEANFILES = $(BUILT_SOURCES) libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ - $(top_builddir)/src/isl/libisl.la \ + $(top_builddir)/src/intel/isl/libisl.la \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ ../mesa/libmesa.la \ ../mesa/drivers/dri/common/libdri_test_stubs.la \ diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 06b961495c2..ba86333525e 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -62,7 +62,7 @@ typedef uint32_t xcb_window_t; #include "anv_entrypoints.h" #include "anv_gen_macros.h" #include "brw_context.h" -#include "isl.h" +#include "isl/isl.h" #ifdef __cplusplus extern "C" { -- cgit v1.2.3 From 9851c8285f7bf70a6cb4bede2ee94110c14acc19 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:37:59 -0800 Subject: Move the intel vulkan driver to src/intel/vulkan --- configure.ac | 4 +- src/Makefile.am | 1 - src/intel/Makefile.am | 2 +- src/intel/vulkan/.gitignore | 9 + src/intel/vulkan/Makefile.am | 203 +++ src/intel/vulkan/anv_allocator.c | 862 +++++++++ src/intel/vulkan/anv_batch_chain.c | 1077 +++++++++++ src/intel/vulkan/anv_cmd_buffer.c | 1191 +++++++++++++ src/intel/vulkan/anv_descriptor_set.c | 532 ++++++ src/intel/vulkan/anv_device.c | 1789 +++++++++++++++++++ src/intel/vulkan/anv_dump.c | 209 +++ src/intel/vulkan/anv_entrypoints_gen.py | 324 ++++ src/intel/vulkan/anv_formats.c | 603 +++++++ src/intel/vulkan/anv_gem.c | 358 ++++ src/intel/vulkan/anv_gem_stubs.c | 159 ++ src/intel/vulkan/anv_gen_macros.h | 146 ++ src/intel/vulkan/anv_image.c | 911 ++++++++++ src/intel/vulkan/anv_intel.c | 100 ++ src/intel/vulkan/anv_meta.c | 169 ++ src/intel/vulkan/anv_meta.h | 75 + src/intel/vulkan/anv_meta_blit.c | 1442 +++++++++++++++ src/intel/vulkan/anv_meta_clear.c | 1098 ++++++++++++ src/intel/vulkan/anv_meta_resolve.c | 867 +++++++++ src/intel/vulkan/anv_nir.h | 44 + src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 171 ++ src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 394 ++++ src/intel/vulkan/anv_nir_lower_push_constants.c | 77 + src/intel/vulkan/anv_pass.c | 160 ++ src/intel/vulkan/anv_pipeline.c | 1278 +++++++++++++ src/intel/vulkan/anv_pipeline_cache.c | 405 +++++ src/intel/vulkan/anv_private.h | 1876 ++++++++++++++++++++ src/intel/vulkan/anv_query.c | 187 ++ src/intel/vulkan/anv_util.c | 195 ++ src/intel/vulkan/anv_wsi.c | 196 ++ src/intel/vulkan/anv_wsi.h | 74 + src/intel/vulkan/anv_wsi_wayland.c | 871 +++++++++ src/intel/vulkan/anv_wsi_x11.c | 758 ++++++++ src/intel/vulkan/dev_icd.json.in | 7 + src/intel/vulkan/gen7_cmd_buffer.c | 589 ++++++ src/intel/vulkan/gen7_pipeline.c | 410 +++++ src/intel/vulkan/gen7_state.c | 264 +++ src/intel/vulkan/gen8_cmd_buffer.c | 914 ++++++++++ src/intel/vulkan/gen8_pipeline.c | 573 ++++++ src/intel/vulkan/gen8_state.c | 493 +++++ src/intel/vulkan/genX_cmd_buffer.c | 717 ++++++++ src/intel/vulkan/genX_pipeline.c | 126 ++ src/intel/vulkan/genX_pipeline_util.h | 327 ++++ src/intel/vulkan/genX_state_util.h | 112 ++ src/intel/vulkan/intel_icd.json.in | 7 + src/intel/vulkan/tests/.gitignore | 5 + src/intel/vulkan/tests/Makefile.am | 46 + src/intel/vulkan/tests/block_pool_no_free.c | 144 ++ src/intel/vulkan/tests/state_pool.c | 57 + src/intel/vulkan/tests/state_pool_free_list_only.c | 66 + src/intel/vulkan/tests/state_pool_no_free.c | 117 ++ src/intel/vulkan/tests/state_pool_test_helper.h | 71 + src/vulkan/.gitignore | 9 - src/vulkan/Makefile.am | 203 --- src/vulkan/anv_allocator.c | 862 --------- src/vulkan/anv_batch_chain.c | 1077 ----------- src/vulkan/anv_cmd_buffer.c | 1191 ------------- src/vulkan/anv_descriptor_set.c | 532 ------ src/vulkan/anv_device.c | 1789 ------------------- src/vulkan/anv_dump.c | 209 --- src/vulkan/anv_entrypoints_gen.py | 324 ---- src/vulkan/anv_formats.c | 603 ------- src/vulkan/anv_gem.c | 358 ---- src/vulkan/anv_gem_stubs.c | 159 -- src/vulkan/anv_gen_macros.h | 146 -- src/vulkan/anv_image.c | 911 ---------- src/vulkan/anv_intel.c | 100 -- src/vulkan/anv_meta.c | 169 -- src/vulkan/anv_meta.h | 75 - src/vulkan/anv_meta_blit.c | 1442 --------------- src/vulkan/anv_meta_clear.c | 1098 ------------ src/vulkan/anv_meta_resolve.c | 867 --------- src/vulkan/anv_nir.h | 44 - src/vulkan/anv_nir_apply_dynamic_offsets.c | 171 -- src/vulkan/anv_nir_apply_pipeline_layout.c | 394 ---- src/vulkan/anv_nir_lower_push_constants.c | 77 - src/vulkan/anv_pass.c | 160 -- src/vulkan/anv_pipeline.c | 1278 ------------- src/vulkan/anv_pipeline_cache.c | 405 ----- src/vulkan/anv_private.h | 1876 -------------------- src/vulkan/anv_query.c | 187 -- src/vulkan/anv_util.c | 195 -- src/vulkan/anv_wsi.c | 196 -- src/vulkan/anv_wsi.h | 74 - src/vulkan/anv_wsi_wayland.c | 871 --------- src/vulkan/anv_wsi_x11.c | 758 -------- src/vulkan/dev_icd.json.in | 7 - src/vulkan/gen7_cmd_buffer.c | 589 ------ src/vulkan/gen7_pipeline.c | 410 ----- src/vulkan/gen7_state.c | 264 --- src/vulkan/gen8_cmd_buffer.c | 914 ---------- src/vulkan/gen8_pipeline.c | 573 ------ src/vulkan/gen8_state.c | 493 ----- src/vulkan/genX_cmd_buffer.c | 717 -------- src/vulkan/genX_pipeline.c | 126 -- src/vulkan/genX_pipeline_util.h | 327 ---- src/vulkan/genX_state_util.h | 112 -- src/vulkan/intel_icd.json.in | 7 - src/vulkan/tests/.gitignore | 5 - src/vulkan/tests/Makefile.am | 46 - src/vulkan/tests/block_pool_no_free.c | 144 -- src/vulkan/tests/state_pool.c | 57 - src/vulkan/tests/state_pool_free_list_only.c | 66 - src/vulkan/tests/state_pool_no_free.c | 117 -- src/vulkan/tests/state_pool_test_helper.h | 71 - 109 files changed, 23858 insertions(+), 23859 deletions(-) create mode 100644 src/intel/vulkan/.gitignore create mode 100644 src/intel/vulkan/Makefile.am create mode 100644 src/intel/vulkan/anv_allocator.c create mode 100644 src/intel/vulkan/anv_batch_chain.c create mode 100644 src/intel/vulkan/anv_cmd_buffer.c create mode 100644 src/intel/vulkan/anv_descriptor_set.c create mode 100644 src/intel/vulkan/anv_device.c create mode 100644 src/intel/vulkan/anv_dump.c create mode 100644 src/intel/vulkan/anv_entrypoints_gen.py create mode 100644 src/intel/vulkan/anv_formats.c create mode 100644 src/intel/vulkan/anv_gem.c create mode 100644 src/intel/vulkan/anv_gem_stubs.c create mode 100644 src/intel/vulkan/anv_gen_macros.h create mode 100644 src/intel/vulkan/anv_image.c create mode 100644 src/intel/vulkan/anv_intel.c create mode 100644 src/intel/vulkan/anv_meta.c create mode 100644 src/intel/vulkan/anv_meta.h create mode 100644 src/intel/vulkan/anv_meta_blit.c create mode 100644 src/intel/vulkan/anv_meta_clear.c create mode 100644 src/intel/vulkan/anv_meta_resolve.c create mode 100644 src/intel/vulkan/anv_nir.h create mode 100644 src/intel/vulkan/anv_nir_apply_dynamic_offsets.c create mode 100644 src/intel/vulkan/anv_nir_apply_pipeline_layout.c create mode 100644 src/intel/vulkan/anv_nir_lower_push_constants.c create mode 100644 src/intel/vulkan/anv_pass.c create mode 100644 src/intel/vulkan/anv_pipeline.c create mode 100644 src/intel/vulkan/anv_pipeline_cache.c create mode 100644 src/intel/vulkan/anv_private.h create mode 100644 src/intel/vulkan/anv_query.c create mode 100644 src/intel/vulkan/anv_util.c create mode 100644 src/intel/vulkan/anv_wsi.c create mode 100644 src/intel/vulkan/anv_wsi.h create mode 100644 src/intel/vulkan/anv_wsi_wayland.c create mode 100644 src/intel/vulkan/anv_wsi_x11.c create mode 100644 src/intel/vulkan/dev_icd.json.in create mode 100644 src/intel/vulkan/gen7_cmd_buffer.c create mode 100644 src/intel/vulkan/gen7_pipeline.c create mode 100644 src/intel/vulkan/gen7_state.c create mode 100644 src/intel/vulkan/gen8_cmd_buffer.c create mode 100644 src/intel/vulkan/gen8_pipeline.c create mode 100644 src/intel/vulkan/gen8_state.c create mode 100644 src/intel/vulkan/genX_cmd_buffer.c create mode 100644 src/intel/vulkan/genX_pipeline.c create mode 100644 src/intel/vulkan/genX_pipeline_util.h create mode 100644 src/intel/vulkan/genX_state_util.h create mode 100644 src/intel/vulkan/intel_icd.json.in create mode 100644 src/intel/vulkan/tests/.gitignore create mode 100644 src/intel/vulkan/tests/Makefile.am create mode 100644 src/intel/vulkan/tests/block_pool_no_free.c create mode 100644 src/intel/vulkan/tests/state_pool.c create mode 100644 src/intel/vulkan/tests/state_pool_free_list_only.c create mode 100644 src/intel/vulkan/tests/state_pool_no_free.c create mode 100644 src/intel/vulkan/tests/state_pool_test_helper.h delete mode 100644 src/vulkan/.gitignore delete mode 100644 src/vulkan/Makefile.am delete mode 100644 src/vulkan/anv_allocator.c delete mode 100644 src/vulkan/anv_batch_chain.c delete mode 100644 src/vulkan/anv_cmd_buffer.c delete mode 100644 src/vulkan/anv_descriptor_set.c delete mode 100644 src/vulkan/anv_device.c delete mode 100644 src/vulkan/anv_dump.c delete mode 100644 src/vulkan/anv_entrypoints_gen.py delete mode 100644 src/vulkan/anv_formats.c delete mode 100644 src/vulkan/anv_gem.c delete mode 100644 src/vulkan/anv_gem_stubs.c delete mode 100644 src/vulkan/anv_gen_macros.h delete mode 100644 src/vulkan/anv_image.c delete mode 100644 src/vulkan/anv_intel.c delete mode 100644 src/vulkan/anv_meta.c delete mode 100644 src/vulkan/anv_meta.h delete mode 100644 src/vulkan/anv_meta_blit.c delete mode 100644 src/vulkan/anv_meta_clear.c delete mode 100644 src/vulkan/anv_meta_resolve.c delete mode 100644 src/vulkan/anv_nir.h delete mode 100644 src/vulkan/anv_nir_apply_dynamic_offsets.c delete mode 100644 src/vulkan/anv_nir_apply_pipeline_layout.c delete mode 100644 src/vulkan/anv_nir_lower_push_constants.c delete mode 100644 src/vulkan/anv_pass.c delete mode 100644 src/vulkan/anv_pipeline.c delete mode 100644 src/vulkan/anv_pipeline_cache.c delete mode 100644 src/vulkan/anv_private.h delete mode 100644 src/vulkan/anv_query.c delete mode 100644 src/vulkan/anv_util.c delete mode 100644 src/vulkan/anv_wsi.c delete mode 100644 src/vulkan/anv_wsi.h delete mode 100644 src/vulkan/anv_wsi_wayland.c delete mode 100644 src/vulkan/anv_wsi_x11.c delete mode 100644 src/vulkan/dev_icd.json.in delete mode 100644 src/vulkan/gen7_cmd_buffer.c delete mode 100644 src/vulkan/gen7_pipeline.c delete mode 100644 src/vulkan/gen7_state.c delete mode 100644 src/vulkan/gen8_cmd_buffer.c delete mode 100644 src/vulkan/gen8_pipeline.c delete mode 100644 src/vulkan/gen8_state.c delete mode 100644 src/vulkan/genX_cmd_buffer.c delete mode 100644 src/vulkan/genX_pipeline.c delete mode 100644 src/vulkan/genX_pipeline_util.h delete mode 100644 src/vulkan/genX_state_util.h delete mode 100644 src/vulkan/intel_icd.json.in delete mode 100644 src/vulkan/tests/.gitignore delete mode 100644 src/vulkan/tests/Makefile.am delete mode 100644 src/vulkan/tests/block_pool_no_free.c delete mode 100644 src/vulkan/tests/state_pool.c delete mode 100644 src/vulkan/tests/state_pool_free_list_only.c delete mode 100644 src/vulkan/tests/state_pool_no_free.c delete mode 100644 src/vulkan/tests/state_pool_test_helper.h (limited to 'src') diff --git a/configure.ac b/configure.ac index 604ea3728b4..b4e2539e66f 100644 --- a/configure.ac +++ b/configure.ac @@ -2521,6 +2521,8 @@ AC_CONFIG_FILES([Makefile src/intel/Makefile src/intel/genxml/Makefile src/intel/isl/Makefile + src/intel/vulkan/Makefile + src/intel/vulkan/tests/Makefile src/loader/Makefile src/mapi/Makefile src/mapi/es1api/glesv1_cm.pc @@ -2542,8 +2544,6 @@ AC_CONFIG_FILES([Makefile src/mesa/drivers/osmesa/osmesa.pc src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile - src/vulkan/Makefile - src/vulkan/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile]) diff --git a/src/Makefile.am b/src/Makefile.am index 02b83717755..73686a93b3c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -58,7 +58,6 @@ AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) if HAVE_VULKAN SUBDIRS += intel -SUBDIRS += vulkan endif AM_CPPFLAGS = \ diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am index 520602dd290..d5bd0b3b5df 100644 --- a/src/intel/Makefile.am +++ b/src/intel/Makefile.am @@ -19,4 +19,4 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = genxml isl +SUBDIRS = genxml isl vulkan diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore new file mode 100644 index 00000000000..40afc2e3989 --- /dev/null +++ b/src/intel/vulkan/.gitignore @@ -0,0 +1,9 @@ +# Generated source files +/*_spirv_autogen.h +/anv_entrypoints.c +/anv_entrypoints.h +/wayland-drm-protocol.c +/wayland-drm-client-protocol.h +/dev_icd.json +/intel_icd.json +/gen*_pack.h \ No newline at end of file diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am new file mode 100644 index 00000000000..2144e5a691a --- /dev/null +++ b/src/intel/vulkan/Makefile.am @@ -0,0 +1,203 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h + +# Used when generating entrypoints to filter out unwanted extensions +VULKAN_ENTRYPOINT_CPPFLAGS = \ + -I$(top_srcdir)/include/vulkan \ + -DVK_USE_PLATFORM_XCB_KHR \ + -DVK_USE_PLATFORM_WAYLAND_KHR + +lib_LTLIBRARIES = libvulkan_intel.la + +check_LTLIBRARIES = libvulkan-test.la + +PER_GEN_LIBS = \ + libanv-gen7.la \ + libanv-gen75.la \ + libanv-gen8.la \ + libanv-gen9.la + +noinst_LTLIBRARIES = $(PER_GEN_LIBS) + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/compiler \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/intel/ \ + -I$(top_builddir)/src \ + -I$(top_builddir)/src/compiler \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/vulkan + +libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_descriptor_set.c \ + anv_device.c \ + anv_dump.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_meta_blit.c \ + anv_meta_clear.c \ + anv_meta_resolve.c \ + anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ + anv_nir_lower_push_constants.c \ + anv_pass.c \ + anv_pipeline.c \ + anv_pipeline_cache.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_wsi.c \ + anv_wsi_x11.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c + +libanv_gen7_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 + +libanv_gen75_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 + +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 + +libanv_gen9_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 + +if HAVE_EGL_PLATFORM_WAYLAND +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c +libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + +libvulkan_intel_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/intel/isl/libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ + $(top_builddir)/src/mesa/libmesa.la \ + $(top_builddir)/src/mesa/drivers/dri/common/libdri_test_stubs.la \ + -lpthread -ldl -lstdc++ \ + $(PER_GEN_LIBS) + +libvulkan_intel_la_LDFLAGS = \ + -module -avoid-version -shared -shrext .so + + +# Generate icd files. It would be nice to just be able to add these to +# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64', +# which we can't put in the icd file. When running sed from the Makefile we +# can use ${libdir}, which expands completely and we avoid putting Makefile +# variables in the icd file. + +icdconfdir=$(sysconfdir)/vulkan/icd.d +icdconf_DATA = intel_icd.json +noinst_DATA = dev_icd.json + +%.json : %.json.in + $(AM_V_GEN) $(SED) \ + -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ + -e "s#@install_libdir@#${libdir}#" < $< > $@ + + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c new file mode 100644 index 00000000000..a7ae975656b --- /dev/null +++ b/src/intel/vulkan/anv_allocator.c @@ -0,0 +1,862 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#ifdef HAVE_VALGRIND +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) +{ + union anv_free_list current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + int32_t *next_ptr = *map + current.offset; + new.offset = VG_NOACCESS_READ(next_ptr); + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) +{ + union anv_free_list current, old, new; + int32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, current.offset); + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(util_is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->bo.size = 0; + pool->block_size = block_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->back_free_list = ANV_FREE_LIST_EMPTY; + + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + if (pool->fd == -1) + return; + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) + return; + + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + pool->state.next = 0; + pool->state.end = 0; + pool->back_state.next = 0; + pool->back_state.end = 0; + + /* Immediately grow the pool so we'll have a backing bo. */ + pool->state.end = anv_block_pool_grow(pool, &pool->state); +} + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + + close(pool->fd); +} + +#define PAGE_SIZE 4096 + +/** Grows and re-centers the block pool. + * + * We grow the block pool in one or both directions in such a way that the + * following conditions are met: + * + * 1) The size of the entire pool is always a power of two. + * + * 2) The pool only grows on both ends. Neither end can get + * shortened. + * + * 3) At the end of the allocation, we have about twice as much space + * allocated for each end as we have used. This way the pool doesn't + * grow too far in one direction or the other. + * + * 4) If the _alloc_back() has never been called, then the back portion of + * the pool retains a size of zero. (This makes it easier for users of + * the block pool that only want a one-sided pool.) + * + * 5) We have enough space allocated for at least one more block in + * whichever side `state` points to. + * + * 6) The center of the pool is always aligned to both the block_size of + * the pool and a 4K CPU page. + */ +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) +{ + size_t size; + void *map; + uint32_t gem_handle; + struct anv_mmap_cleanup *cleanup; + + pthread_mutex_lock(&pool->device->mutex); + + assert(state == &pool->state || state == &pool->back_state); + + /* Gather a little usage information on the pool. Since we may have + * threadsd waiting in queue to get some storage while we resize, it's + * actually possible that total_used will be larger than old_size. In + * particular, block_pool_alloc() increments state->next prior to + * calling block_pool_grow, so this ensures that we get enough space for + * which ever side tries to grow the pool. + * + * We align to a page size because it makes it easier to do our + * calculations later in such a way that we state page-aigned. + */ + uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); + uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); + uint32_t total_used = front_used + back_used; + + assert(state == &pool->state || back_used > 0); + + size_t old_size = pool->bo.size; + + if (old_size != 0 && + back_used * 2 <= pool->center_bo_offset && + front_used * 2 <= (old_size - pool->center_bo_offset)) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + + if (old_size == 0) { + /* This is the first allocation */ + size = MAX2(32 * pool->block_size, PAGE_SIZE); + } else { + size = old_size * 2; + } + + /* We can't have a block pool bigger than 1GB because we use signed + * 32-bit offsets in the free list and we don't want overflow. We + * should never need a block pool bigger than 1GB anyway. + */ + assert(size <= (1u << 31)); + + /* We compute a new center_bo_offset such that, when we double the size + * of the pool, we maintain the ratio of how much is used by each side. + * This way things should remain more-or-less balanced. + */ + uint32_t center_bo_offset; + if (back_used == 0) { + /* If we're in this case then we have never called alloc_back(). In + * this case, we want keep the offset at 0 to make things as simple + * as possible for users that don't care about back allocations. + */ + center_bo_offset = 0; + } else { + /* Try to "center" the allocation based on how much is currently in + * use on each side of the center line. + */ + center_bo_offset = ((uint64_t)size * back_used) / total_used; + + /* Align down to a multiple of both the block size and page size */ + uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); + assert(util_is_power_of_two(granularity)); + center_bo_offset &= ~(granularity - 1); + + assert(center_bo_offset >= back_used); + + /* Make sure we don't shrink the back end of the pool */ + if (center_bo_offset < pool->back_state.end) + center_bo_offset = pool->back_state.end; + + /* Make sure that we don't shrink the front end of the pool */ + if (size - center_bo_offset < pool->state.end) + center_bo_offset = size - pool->state.end; + } + + assert(center_bo_offset % pool->block_size == 0); + assert(center_bo_offset % PAGE_SIZE == 0); + + /* Assert that we only ever grow the pool */ + assert(center_bo_offset >= pool->back_state.end); + assert(size - center_bo_offset >= pool->state.end); + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + goto fail; + *cleanup = ANV_MMAP_CLEANUP_INIT; + + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); + cleanup->map = map; + cleanup->size = size; + + if (map == MAP_FAILED) + goto fail; + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + goto fail; + cleanup->gem_handle = gem_handle; + +#if 0 + /* Regular objects are created I915_CACHING_CACHED on LLC platforms and + * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are + * always created as I915_CACHING_CACHED, which on non-LLC means + * snooped. That can be useful but comes with a bit of overheard. Since + * we're eplicitly clflushing and don't want the overhead we need to turn + * it off. */ + if (!pool->device->info.has_llc) { + anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); + anv_gem_set_domain(pool->device, gem_handle, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + } +#endif + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map + center_bo_offset; + pool->center_bo_offset = center_bo_offset; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + +done: + pthread_mutex_unlock(&pool->device->mutex); + + /* Return the appropreate new size. This function never actually + * updates state->next. Instead, we let the caller do that because it + * needs to do so in order to maintain its concurrency model. + */ + if (state == &pool->state) { + return pool->bo.size - pool->center_bo_offset; + } else { + assert(pool->center_bo_offset > 0); + return pool->center_bo_offset; + } + +fail: + pthread_mutex_unlock(&pool->device->mutex); + + return 0; +} + +static uint32_t +anv_block_pool_alloc_new(struct anv_block_pool *pool, + struct anv_block_state *pool_state) +{ + struct anv_block_state state, old, new; + + while (1) { + state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); + if (state.next < state.end) { + assert(pool->map); + return state.next; + } else if (state.next == state.end) { + /* We allocated the first block outside the pool, we have to grow it. + * pool_state->next acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, pool_state); + assert(new.end >= new.next && new.end % pool->block_size == 0); + old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); + if (old.next != state.next) + futex_wake(&pool_state->end, INT_MAX); + return state.next; + } else { + futex_wait(&pool_state->end, state.end); + continue; + } + } +} + +int32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(offset >= 0); + assert(pool->map); + return offset; + } + + return anv_block_pool_alloc_new(pool, &pool->state); +} + +/* Allocates a block out of the back of the block pool. + * + * This will allocated a block earlier than the "start" of the block pool. + * The offsets returned from this function will be negative but will still + * be correct relative to the block pool's map pointer. + * + * If you ever use anv_block_pool_alloc_back, then you will have to do + * gymnastics with the block pool's BO when doing relocations. + */ +int32_t +anv_block_pool_alloc_back(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { + assert(offset < 0); + assert(pool->map); + return offset; + } + + offset = anv_block_pool_alloc_new(pool, &pool->back_state); + + /* The offset we get out of anv_block_pool_alloc_new() is actually the + * number of bytes downwards from the middle to the end of the block. + * We need to turn it into a (negative) offset from the middle to the + * start of the block. + */ + assert(offset >= 0); + return -(offset + pool->block_size); +} + +void +anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) +{ + if (offset < 0) { + anv_free_list_push(&pool->back_free_list, pool->map, offset); + } else { + anv_free_list_push(&pool->free_list, pool->map, offset); + } +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && util_is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + int32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { + assert(offset >= 0); + return offset; + } + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + offset = anv_block_pool_alloc(block_pool); + new.next = offset + pool->state_size; + new.end = offset + block_pool->block_size; + old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return offset; + } else { + futex_wait(&pool->block.end, block.end); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_state_pool_finish(struct anv_state_pool *pool) +{ + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(util_is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct anv_state_stream_block { + /* The next block */ + struct anv_state_stream_block *next; + + /* The offset into the block pool at which this block starts */ + uint32_t offset; + +#ifdef HAVE_VALGRIND + /* A pointer to the first user-allocated thing in this block. This is + * what valgrind sees as the start of the block. + */ + void *_vg_ptr; +#endif +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->block = NULL; + + /* Ensure that next + whatever > end. This way the first call to + * state_stream_alloc fetches a new block. + */ + stream->next = 1; + stream->end = 0; + + VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + VG(const uint32_t block_size = stream->block_pool->block_size); + + struct anv_state_stream_block *next = stream->block; + while (next != NULL) { + VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); + struct anv_state_stream_block sb = VG_NOACCESS_READ(next); + VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); + anv_block_pool_free(stream->block_pool, sb.offset); + next = sb.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(stream)); +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct anv_state_stream_block *sb = stream->block; + + struct anv_state state; + + state.offset = align_u32(stream->next, alignment); + if (state.offset + size > stream->end) { + uint32_t block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; + + VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); + sb->next = stream->block; + sb->offset = block; + VG(sb->_vg_ptr = NULL); + VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); + + stream->block = sb; + stream->start = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + + state.offset = align_u32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + assert(state.offset > stream->start); + state.map = (void *)sb + (state.offset - stream->start); + state.alloc_size = size; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); + } else { + void *state_end = state.map + state.alloc_size; + /* This only updates the mempool. The newly allocated chunk is still + * marked as NOACCESS. */ + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); + /* Mark the newly allocated chunk as undefined */ + VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); + } +#endif + + stream->next = state.offset + size; + + return state; +} + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; + + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + *bo = new_bo; + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c new file mode 100644 index 00000000000..d24dd06d7eb --- /dev/null +++ b/src/intel/vulkan/anv_batch_chain.c @@ -0,0 +1,1077 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen8_pack.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->reloc_bos == NULL) { + anv_free(alloc, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + return anv_reloc_list_init_clone(list, alloc, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) { + anv_free(alloc, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + const uint32_t domain = + target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0; + + anv_reloc_list_grow(list, alloc, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = domain; + entry->write_domain = domain; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, alloc, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->alloc, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->alloc, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, + &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->last_ss_pool_bo_offset = 0; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_free(&cmd_buffer->pool->alloc, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, + struct anv_cmd_buffer *cmd_buffer, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo = NULL; + result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + return (struct anv_address) { + .bo = &cmd_buffer->device->surface_state_block_pool.bo, + .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), + }; +} + +static void +emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + /* In gen8+ the address field grew to two dwords to accomodate 48 bit + * offsets. The high 16 bits are in the last dword, so we can use the gen8 + * version in either case, as long as we set the instruction length in the + * header accordingly. This means that we always emit three dwords here + * and all the padding and adjustment we do in this file works for all + * gens. + */ + + const uint32_t gen7_length = + GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; + const uint32_t gen8_length = + GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, + .DWordLength = cmd_buffer->device->info.gen < 8 ? + gen7_length : gen8_length, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { bo, offset }); +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); + struct anv_state state; + + state.alloc_size = align_u32(entries * 4, 32); + + if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) + return (struct anv_state) { 0 }; + + state.offset = cmd_buffer->bt_next; + state.map = block_pool->map + *bt_block + state.offset; + + cmd_buffer->bt_next += state.alloc_size; + + assert(*bt_block < 0); + *state_offset = -(*bt_block); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) +{ + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + + int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); + if (offset == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *offset = anv_block_pool_alloc_back(block_pool); + cmd_buffer->bt_next = 0; + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + + result = anv_batch_bo_create(cmd_buffer, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_batch_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + + success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), + 8 * sizeof(int32_t)); + if (!success) + goto fail_seen_bbos; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, + &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bt_blocks; + + anv_cmd_buffer_new_binding_table_block(cmd_buffer); + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_bt_blocks: + anv_vector_finish(&cmd_buffer->bt_blocks); + fail_seen_bbos: + anv_vector_finish(&cmd_buffer->seen_bbos); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, cmd_buffer); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + int32_t *bt_block; + anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + anv_vector_finish(&cmd_buffer->bt_blocks); + + anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, cmd_buffer); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { + int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); + cmd_buffer->bt_next = 0; + + cmd_buffer->surface_relocs.num_relocs = 0; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* When we start a batch buffer, we subtract a certain amount of + * padding from the end to ensure that we always have room to emit a + * BATCH_BUFFER_START to chain to the next BO. We need to remove + * that padding before we end the batch; otherwise, we may end up + * with our BATCH_BUFFER_END in another BO. + */ + cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (!(cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* When we chain, we need to add an MI_BATCH_BUFFER_START command + * with its relocation. In order to handle this we'll increment here + * so we can unconditionally decrement right before adding the + * MI_BATCH_BUFFER_START command. + */ + batch_bo->relocs.num_relocs++; + cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + emit_batch_buffer_start(primary, &first_bbo->bo, 0); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; + + /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we + * can emit a new command and relocation for the current splice. In + * order to handle the initial-use case, we incremented next and + * num_relocs in end_batch_buffer() so we can alyways just subtract + * here. + */ + last_bbo->relocs.num_relocs--; + secondary->batch.next -= inst_size; + emit_batch_buffer_start(secondary, &this_bbo->bo, offset); + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + + /* After patching up the secondary buffer, we need to clflush the + * modified instruction in case we're on a !llc platform. We use a + * little loop to handle the case where the instruction crosses a cache + * line boundary. + */ + if (!primary->device->info.has_llc) { + void *inst = secondary->batch.next - inst_size; + void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); + __builtin_ia32_mfence(); + while (p < secondary->batch.next) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, + &secondary->surface_relocs, 0); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) { + anv_free(&cmd_buffer->pool->alloc, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* A quick sanity check on relocations */ + assert(relocs->relocs[i].offset < bo->size); + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +static uint64_t +read_reloc(const struct anv_device *device, const void *p) +{ + if (device->info.gen >= 8) + return *(uint64_t *)p; + else + return *(uint32_t *)p; +} + +static void +write_reloc(const struct anv_device *device, void *p, uint64_t v) +{ + if (device->info.gen >= 8) + *(uint64_t *)p = v; + else + *(uint32_t *)p = v; +} + +static void +adjust_relocations_from_block_pool(struct anv_block_pool *pool, + struct anv_reloc_list *relocs) +{ + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* In general, we don't know how stale the relocated value is. It + * may have been used last time or it may not. Since we don't want + * to stomp it while the GPU may be accessing it, we haven't updated + * it anywhere else in the code. Instead, we just set the presumed + * offset to what it is now based on the delta and the data in the + * block pool. Then the kernel will update it for us if needed. + */ + assert(relocs->relocs[i].offset < pool->state.end); + const void *p = pool->map + relocs->relocs[i].offset; + + /* We're reading back the relocated value from potentially incoherent + * memory here. However, any change to the value will be from the kernel + * writing out relocations, which will keep the CPU cache up to date. + */ + relocs->relocs[i].presumed_offset = + read_reloc(pool->device, p) - relocs->relocs[i].delta; + + /* All of the relocations from this block pool to other BO's should + * have been emitted relative to the surface block pool center. We + * need to add the center offset to make them relative to the + * beginning of the actual GEM bo. + */ + relocs->relocs[i].offset += pool->center_bo_offset; + } +} + +static void +adjust_relocations_to_block_pool(struct anv_block_pool *pool, + struct anv_bo *from_bo, + struct anv_reloc_list *relocs, + uint32_t *last_pool_center_bo_offset) +{ + assert(*last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; + + /* When we initially emit relocations into a block pool, we don't + * actually know what the final center_bo_offset will be so we just emit + * it as if center_bo_offset == 0. Now that we know what the center + * offset is, we need to walk the list of relocations and adjust any + * relocations that point to the pool bo with the correct offset. + */ + for (size_t i = 0; i < relocs->num_relocs; i++) { + if (relocs->reloc_bos[i] == &pool->bo) { + /* Adjust the delta value in the relocation to correctly + * correspond to the new delta. Initially, this value may have + * been negative (if treated as unsigned), but we trust in + * uint32_t roll-over to fix that for us at this point. + */ + relocs->relocs[i].delta += delta; + + /* Since the delta has changed, we need to update the actual + * relocated value with the new presumed value. This function + * should only be called on batch buffers, so we know it isn't in + * use by the GPU at the moment. + */ + assert(relocs->relocs[i].offset < from_bo->size); + write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, + relocs->relocs[i].presumed_offset + + relocs->relocs[i].delta); + } + } + + *last_pool_center_bo_offset = pool->center_bo_offset; +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_block_pool *ss_pool = + &cmd_buffer->device->surface_state_block_pool; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + &(*bbo)->last_ss_pool_bo_offset); + + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + } + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; + cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; + cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; + first_batch_bo->bo.index = last_idx; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + + if (!cmd_buffer->device->info.has_llc) { + __builtin_ia32_mfence(); + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) + __builtin_ia32_clflush((*bbo)->bo.map + i); + } + } + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | + I915_EXEC_CONSTANTS_REL_GENERAL, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c new file mode 100644 index 00000000000..b060828cf61 --- /dev/null +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -0,0 +1,1191 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + +static void +anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + + memset(&state->descriptors, 0, sizeof(state->descriptors)); + memset(&state->push_constants, 0, sizeof(state->push_constants)); + memset(state->binding_tables, 0, sizeof(state->binding_tables)); + memset(state->samplers, 0, sizeof(state->samplers)); + + /* 0 isn't a valid config. This ensures that we always configure L3$. */ + cmd_buffer->state.current_l3_config = 0; + + state->dirty = ~0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->push_constants_dirty = 0; + state->pipeline = NULL; + state->restart_index = UINT32_MAX; + state->dynamic = default_dynamic_state; + state->need_query_wa = true; + + if (state->attachments != NULL) { + anv_free(&cmd_buffer->pool->alloc, state->attachments); + state->attachments = NULL; + } + + state->gen7.index_buffer = NULL; +} + +/** + * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. + */ +void +anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); + + anv_free(&cmd_buffer->pool->alloc, state->attachments); + + if (pass->attachment_count == 0) { + state->attachments = NULL; + return; + } + + state->attachments = anv_alloc(&cmd_buffer->pool->alloc, + pass->attachment_count * + sizeof(state->attachments[0]), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ + abort(); + } + + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags clear_aspects = 0; + + if (anv_format_is_color(att->format)) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if (att->format->has_depth && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + state->attachments[i].pending_clear_aspects = clear_aspects; + if (clear_aspects) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } + } +} + +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else if ((*ptr)->size < size) { + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + (*ptr)->size = size; + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, + VkCommandBuffer* pCommandBuffer) +{ + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + cmd_buffer->level = level; + cmd_buffer->state.attachments = NULL; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } + + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + + return result; +} + +VkResult anv_AllocateCommandBuffers( + VkDevice _device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } +} + +VkResult anv_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->usage_flags = 0; + cmd_buffer->state.current_pipeline = UINT32_MAX; + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + anv_cmd_state_reset(cmd_buffer); + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + case 8: + return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); + + cmd_buffer->usage_flags = pBeginInfo->flags; + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); + + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; + + anv_cmd_buffer_set_subpass(cmd_buffer, subpass); + } + + return VK_SUCCESS; +} + +VkResult anv_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstViewport + viewportCount; + if (cmd_buffer->state.dynamic.viewport.count < total_count); + cmd_buffer->state.dynamic.viewport.count = total_count; + + memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void anv_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstScissor + scissorCount; + if (cmd_buffer->state.dynamic.scissor.count < total_count); + cmd_buffer->state.dynamic.scissor.count = total_count; + + memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; +} + +void anv_CmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void anv_CmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; +} + +void anv_CmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConstants, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void anv_CmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void anv_CmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void anv_CmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void anv_CmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void anv_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + descriptorSetCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->dynamic_offset_count > 0) { + anv_foreach_stage(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + uint32_t range = 0; + if (desc->buffer_view) + range = desc->buffer_view->range; + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = range; + desc++; + d++; + } + } + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + } + } +} + +void anv_CmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[firstBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); + } +} + +static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, uint32_t offset) +{ + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); + + default: + unreachable("Invalid descriptor type"); + } +} + +VkResult +anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_bind_map *map; + uint32_t color_count, bias, state_offset; + + switch (stage) { + case MESA_SHADER_FRAGMENT: + map = &cmd_buffer->state.pipeline->bindings[stage]; + bias = MAX_RTS; + color_count = subpass->color_count; + break; + case MESA_SHADER_COMPUTE: + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + bias = 1; + color_count = 0; + break; + default: + map = &cmd_buffer->state.pipeline->bindings[stage]; + bias = 0; + color_count = 0; + break; + } + + if (color_count + map->surface_count == 0) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + map->surface_count, + &state_offset); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t a = 0; a < color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + assert(iview->color_rt_surface_state.alloc_size); + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + + if (stage == MESA_SHADER_COMPUTE && + cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; + uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; + + struct anv_state surface_state; + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + const struct anv_format *format = + anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, + format->isl_format, bo_offset, 12, 1); + + bt_map[0] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + + if (map->surface_count == 0) + goto out; + + if (map->image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; + for (uint32_t s = 0; s < map->surface_count; s++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->sampler_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, + image_param); + image_param->surface_idx = bias + s; + break; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + surface_state = desc->buffer_view->surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + surface_state = desc->buffer_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, + image_param); + image_param->surface_idx = bias + s; + break; + + default: + assert(!"Invalid descriptor type"); + continue; + } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + assert(image == map->image_count); + + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, struct anv_state *state) +{ + struct anv_pipeline_bind_map *map; + + if (stage == MESA_SHADER_COMPUTE) + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + else + map = &cmd_buffer->state.pipeline->bindings[stage]; + + if (map->sampler_count == 0) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + uint32_t size = map->sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t s = 0; s < map->sampler_count; s++) { + struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + struct anv_sampler *sampler = desc->sampler; + + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; + + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); + + return state; +} + +struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +/** + * @brief Setup the command buffer for recording commands inside the given + * subpass. + * + * This does not record all commands needed for starting the subpass. + * Starting the subpass may require additional commands. + * + * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer + * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the + * command buffer for recording commands for some subpass. But only the first + * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. + */ +void +anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 8: + gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 9: + gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + default: + unreachable("unsupported gen\n"); + } +} + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; + struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->prog_data[stage]; + + /* If we don't actually have any push constants, bail. */ + if (data == NULL || prog_data->nr_params == 0) + return (struct anv_state) { .offset = 0 }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + prog_data->nr_params * sizeof(float), + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + const unsigned push_constant_data_size = + (local_id_dwords + prog_data->nr_params) * 4; + const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + const unsigned param_aligned_count = + reg_aligned_constant_size / sizeof(uint32_t); + + /* If we don't actually have any push constants, bail. */ + if (reg_aligned_constant_size == 0) + return (struct anv_state) { .offset = 0 }; + + const unsigned threads = pipeline->cs_thread_width_max; + const unsigned total_push_constants_size = + reg_aligned_constant_size * threads; + const unsigned push_constant_alignment = + cmd_buffer->device->info.gen < 8 ? 32 : 64; + const unsigned aligned_total_push_constants_size = + ALIGN(total_push_constants_size, push_constant_alignment); + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + aligned_total_push_constants_size, + push_constant_alignment); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, + reg_aligned_constant_size); + + /* Setup uniform data for the first thread */ + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + } + + /* Copy uniform data from the first thread to every other thread */ + const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); + for (unsigned t = 1; t < threads; t++) { + memcpy(&u32_map[t * param_aligned_count + local_id_dwords], + &u32_map[local_id_dwords], + uniform_data_size); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +void anv_CmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_foreach_stage(stage, stageFlags) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); + + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); + } + + cmd_buffer->state.push_constants_dirty |= stageFlags; +} + +void anv_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); + + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } +} + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void anv_DestroyCommandPool( + VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + anv_ResetCommandPool(_device, commandPool, 0); + + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + /* FIXME: vkResetCommandPool must not destroy its command buffers. The + * Vulkan 1.0 spec requires that it only reset them: + * + * Resetting a command pool recycles all of the resources from all of + * the command buffers allocated from the command pool back to the + * command pool. All command buffers that have been allocated from the + * command pool are put in the initial state. + */ + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_destroy(cmd_buffer); + } + + return VK_SUCCESS; +} + +/** + * Return NULL if the current subpass has no depthstencil attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + return NULL; + + const struct anv_image_view *iview = + fb->attachments[subpass->depth_stencil_attachment]; + + assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + return iview; +} diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c new file mode 100644 index 00000000000..7a77336602a --- /dev/null +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -0,0 +1,532 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/* + * Descriptor set layouts. + */ + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t max_binding = 0; + uint32_t immutable_sampler_count = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + if (pCreateInfo->pBindings[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; + } + + size_t size = sizeof(struct anv_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct anv_sampler *); + + set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* We just allocate all the samplers at the end of the struct */ + struct anv_sampler **samplers = + (struct anv_sampler **)&set_layout->binding[max_binding + 1]; + + set_layout->binding_count = max_binding + 1; + set_layout->shader_stages = 0; + set_layout->size = 0; + + for (uint32_t b = 0; b <= max_binding; b++) { + /* Initialize all binding_layout entries to -1 */ + memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); + + set_layout->binding[b].immutable_samplers = NULL; + } + + /* Initialize all samplers to 0 */ + memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); + + uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t buffer_count = 0; + uint32_t dynamic_offset_count = 0; + + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; + uint32_t b = binding->binding; + + assert(binding->descriptorCount > 0); + set_layout->binding[b].array_size = binding->descriptorCount; + set_layout->binding[b].descriptor_index = set_layout->size; + set_layout->size += binding->descriptorCount; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; + sampler_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].buffer_index = buffer_count; + buffer_count += binding->descriptorCount; + /* fall through */ + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].surface_index = surface_count[s]; + surface_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; + dynamic_offset_count += binding->descriptorCount; + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].image_index = image_count[s]; + image_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + if (binding->pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += binding->descriptorCount; + + for (uint32_t i = 0; i < binding->descriptorCount; i++) + set_layout->binding[b].immutable_samplers[i] = + anv_sampler_from_handle(binding->pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; + } + + set_layout->shader_stages |= binding->stageFlags; + } + + set_layout->buffer_count = buffer_count; + set_layout->dynamic_offset_count = dynamic_offset_count; + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_free2(&device->alloc, pAllocator, set_layout); +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just muttiple descriptor set layouts pasted together + */ + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; + + unsigned dynamic_offset_count = 0; + + memset(layout->stage, 0, sizeof(layout->stage)); + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + layout->set[set].layout = set_layout; + + layout->set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index >= 0) + dynamic_offset_count += set_layout->binding[b].array_size; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + if (set_layout->binding[b].stage[s].surface_index >= 0) + layout->stage[s].has_dynamic_offsets = true; + } + } + } + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_free2(&device->alloc, pAllocator, pipeline_layout); +} + +/* + * Descriptor pools. These are a no-op for now. + */ + +VkResult anv_CreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + *pDescriptorPool = (VkDescriptorPool)1; + return VK_SUCCESS; +} + +void anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + + set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + set->layout = layout; + + /* Go through and fill out immutable samplers if we have any */ + struct anv_descriptor *desc = set->descriptors; + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; + } + + /* XXX: Use the pool */ + set->buffer_views = + anv_alloc(&device->alloc, + sizeof(set->buffer_views[0]) * layout->buffer_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set->buffer_views) { + anv_free(&device->alloc, set); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t b = 0; b < layout->buffer_count; b++) { + set->buffer_views[b].surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + set->buffer_count = layout->buffer_count; + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + /* XXX: Use the pool */ + for (uint32_t b = 0; b < set->buffer_count; b++) + anv_state_pool_free(&device->surface_state_pool, + set->buffer_views[b].surface_state); + + anv_free(&device->alloc, set->buffer_views); + anv_free(&device->alloc, set); +} + +VkResult anv_AllocateDescriptorSets( + VkDevice _device, + const VkDescriptorSetAllocateInfo* pAllocateInfo, + VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result = VK_SUCCESS; + struct anv_descriptor_set *set; + uint32_t i; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) + anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, + i, pDescriptorSets); + + return result; +} + +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + +void anv_UpdateDescriptorSets( + VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); + const struct anv_descriptor_set_binding_layout *bind_layout = + &set->layout->binding[write->dstBinding]; + struct anv_descriptor *desc = + &set->descriptors[bind_layout->descriptor_index]; + desc += write->dstArrayElement; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = sampler, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + desc[j].image_view = iview; + + /* If this descriptor has an immutable sampler, we don't want + * to stomp on it. + */ + if (sampler) + desc[j].sampler = sampler; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .image_view = iview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = bview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + assert(write->pBufferInfo[j].buffer); + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); + assert(buffer); + + struct anv_buffer_view *view = + &set->buffer_views[bind_layout->buffer_index]; + view += write->dstArrayElement + j; + + const struct anv_format *format = + anv_format_for_descriptor_type(write->descriptorType); + + view->format = format->isl_format; + view->bo = buffer->bo; + view->offset = buffer->offset + write->pBufferInfo[j].offset; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0 || + write->pBufferInfo[j].range == VK_WHOLE_SIZE) + view->range = buffer->size - write->pBufferInfo[j].offset; + else + view->range = write->pBufferInfo[j].range; + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, 1); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = view, + }; + + } + + default: + break; + } + } + + for (uint32_t i = 0; i < descriptorCopyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); + ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); + + const struct anv_descriptor_set_binding_layout *src_layout = + &src->layout->binding[copy->srcBinding]; + struct anv_descriptor *src_desc = + &src->descriptors[src_layout->descriptor_index]; + src_desc += copy->srcArrayElement; + + const struct anv_descriptor_set_binding_layout *dst_layout = + &dst->layout->binding[copy->dstBinding]; + struct anv_descriptor *dst_desc = + &dst->descriptors[dst_layout->descriptor_index]; + dst_desc += copy->dstArrayElement; + + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; + } +} diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c new file mode 100644 index 00000000000..a8835f74179 --- /dev/null +++ b/src/intel/vulkan/anv_device.c @@ -0,0 +1,1789 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "mesa/main/git_sha1.h" +#include "util/strtod.h" +#include "util/debug.h" + +#include "genxml/gen7_pack.h" + +struct anv_dispatch_table dtable; + +static void +compiler_debug_log(void *data, const char *fmt, ...) +{ } + +static void +compiler_perf_log(void *data, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + vfprintf(stderr, fmt, args); + + va_end(args); +} + +static VkResult +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + VkResult result; + int fd; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to open %s: %m", path); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = instance; + device->path = path; + + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + if (!device->chipset_id) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get chipset id: %m"); + goto fail; + } + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id); + if (!device->info) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get device info"); + goto fail; + } + + if (device->info->is_haswell) { + fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && !device->info->is_baytrail) { + fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && device->info->is_baytrail) { + fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); + } else if (device->info->gen >= 8) { + /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully + * supported as anything */ + } else { + result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Vulkan not yet supported on %s", device->name); + goto fail; + } + + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get aperture size: %m"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing gem wait"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing execbuf2"); + goto fail; + } + + if (!device->info->has_llc && + anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing wc mmap"); + goto fail; + } + + bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); + + close(fd); + + brw_process_intel_debug_variable(); + + device->compiler = brw_compiler_create(NULL, device->info); + if (device->compiler == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; + + /* XXX: Actually detect bit6 swizzling */ + isl_device_init(&device->isl_dev, device->info, swizzled); + + return VK_SUCCESS; + +fail: + close(fd); + return result; +} + +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + ralloc_free(device->compiler); +} + +static const VkExtensionProperties global_extensions[] = { + { + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = 25, + }, + { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = 5, + }, +#ifdef HAVE_WAYLAND_PLATFORM + { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = 4, + }, +#endif +}; + +static const VkExtensionProperties device_extensions[] = { + { + .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 67, + }, +}; + +static void * +default_alloc_func(void *pUserData, size_t size, size_t align, + VkSystemAllocationScope allocationScope) +{ + return malloc(size); +} + +static void * +default_realloc_func(void *pUserData, void *pOriginal, size_t size, + size_t align, VkSystemAllocationScope allocationScope) +{ + return realloc(pOriginal, size); +} + +static void +default_free_func(void *pUserData, void *pMemory) +{ + free(pMemory); +} + +static const VkAllocationCallbacks default_alloc = { + .pUserData = NULL, + .pfnAllocation = default_alloc_func, + .pfnReallocation = default_realloc_func, + .pfnFree = default_free_func, +}; + +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + struct anv_instance *instance; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + uint32_t client_version = pCreateInfo->pApplicationInfo ? + pCreateInfo->pApplicationInfo->apiVersion : + VK_MAKE_VERSION(1, 0, 0); + if (VK_MAKE_VERSION(1, 0, 0) > client_version || + client_version > VK_MAKE_VERSION(1, 0, 3)) { + return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Client requested version %d.%d.%d", + VK_VERSION_MAJOR(client_version), + VK_VERSION_MINOR(client_version), + VK_VERSION_PATCH(client_version)); + } + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + global_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + + if (pAllocator) + instance->alloc = *pAllocator; + else + instance->alloc = default_alloc; + + instance->apiVersion = client_version; + instance->physicalDeviceCount = -1; + + _mesa_locale_init(); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + anv_init_wsi(instance); + + *pInstance = anv_instance_to_handle(instance); + + return VK_SUCCESS; +} + +void anv_DestroyInstance( + VkInstance _instance, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + if (instance->physicalDeviceCount > 0) { + /* We support at most one physical device. */ + assert(instance->physicalDeviceCount == 1); + anv_physical_device_finish(&instance->physicalDevice); + } + + anv_finish_wsi(instance); + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + _mesa_locale_fini(); + + anv_free(&instance->alloc, instance); +} + +VkResult anv_EnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + VkResult result; + + if (instance->physicalDeviceCount < 0) { + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { + instance->physicalDeviceCount = 0; + } else if (result == VK_SUCCESS) { + instance->physicalDeviceCount = 1; + } else { + return result; + } + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = true, + .fullDrawIndexUint32 = true, + .imageCubeArray = false, + .independentBlend = pdevice->info->gen >= 8, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSrcBlend = true, + .logicOp = true, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = true, + .samplerAnisotropy = false, /* FINISHME */ + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = true, + .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = true, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .alphaToOne = true, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; +} + +void +anv_device_get_cache_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); +} + +void anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + const struct brw_device_info *devinfo = pdevice->info; + + anv_finishme("Get correct values for VkPhysicalDeviceLimits"); + + const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; + + VkSampleCountFlags sample_counts = + isl_device_get_sample_counts(&pdevice->isl_dev); + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetUniformBuffersDynamic = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetStorageBuffersDynamic = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxDescriptorSetInputAttachments = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -16384.0, 16384.0 }, + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 4096, /* A page */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, + .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .discreteQueuePriorities = 1, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + }; + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = VK_MAKE_VERSION(1, 0, 2), + .driverVersion = 1, + .vendorID = 0x8086, + .deviceID = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ + }; + + strcpy(pProperties->deviceName, pdevice->name); + anv_device_get_cache_uuid(pProperties->pipelineCacheUUID); +} + +void anv_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkQueueFamilyProperties* pQueueFamilyProperties) +{ + if (pQueueFamilyProperties == NULL) { + *pCount = 1; + return; + } + + assert(*pCount >= 1); + + *pQueueFamilyProperties = (VkQueueFamilyProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 36, /* XXX: Real value here */ + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, + }; +} + +void anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkDeviceSize heap_size; + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * physical_device->aperture_size / 4; + + if (physical_device->info->has_llc) { + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } else { + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + pMemoryProperties->memoryTypeCount = 2; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + pMemoryProperties->memoryTypes[1] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + }; +} + +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +/* The loader wants us to expose a second GetInstanceProcAddr function + * to work around certain LD_PRELOAD issues seen in apps. + */ +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_GetInstanceProcAddr(instance, pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + queue->device = device; + queue->pool = &device->surface_state_pool; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +} + +static struct anv_state +anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) +{ + struct anv_state state; + + state = anv_state_pool_alloc(pool, size, align); + memcpy(state.map, p, size); + + if (!pool->block_pool->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct gen8_border_color { + union { + float float32[4]; + uint32_t uint32[4]; + }; + /* Pad out to 64 bytes */ + uint32_t _pad[12]; +}; + +static void +anv_device_init_border_colors(struct anv_device *device) +{ + static const struct gen8_border_color border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, + }; + + device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, + sizeof(border_colors), 64, + border_colors); +} + +VkResult +anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo bo; + VkResult result = VK_SUCCESS; + uint32_t size; + int64_t timeout; + int ret; + + /* Kernel driver requires 8 byte aligned batch length */ + size = align_u32(batch->next - batch->start, 8); + assert(size < device->batch_bo_pool.bo_size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); + if (result != VK_SUCCESS) + return result; + + memcpy(bo.map, batch->start, size); + if (!device->info.has_llc) + anv_clflush_range(bo.map, size); + + exec2_objects[0].handle = bo.gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo.offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo.gem_handle, &timeout); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + fail: + anv_bo_pool_free(&device->batch_bo_pool, &bo); + + return result; +} + +VkResult anv_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkResult result; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + device_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + anv_set_dispatch_devinfo(physical_device->info); + + device = anv_alloc2(&physical_device->instance->alloc, pAllocator, + sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = physical_device->instance; + device->chipset_id = physical_device->chipset_id; + + if (pAllocator) + device->alloc = *pAllocator; + else + device->alloc = physical_device->instance->alloc; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); + if (device->fd == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_device; + } + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + device->info = *physical_device->info; + device->isl_dev = physical_device->isl_dev; + + pthread_mutex_init(&device->mutex, NULL); + + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + + anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); + + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); + anv_pipeline_cache_init(&device->default_pipeline_cache, device); + + anv_block_pool_init(&device->surface_state_block_pool, device, 4096); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + anv_bo_init_new(&device->workaround_bo, device, 1024); + + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + + anv_queue_init(device, &device->queue); + + switch (device->info.gen) { + case 7: + if (!device->info.is_haswell) + result = gen7_init_device_state(device); + else + result = gen75_init_device_state(device); + break; + case 8: + result = gen8_init_device_state(device); + break; + case 9: + result = gen9_init_device_state(device); + break; + default: + /* Shouldn't get here as we don't create physical devices for any other + * gens. */ + unreachable("unhandled gen"); + } + if (result != VK_SUCCESS) + goto fail_fd; + + result = anv_device_init_meta(device); + if (result != VK_SUCCESS) + goto fail_fd; + + anv_device_init_border_colors(device); + + *pDevice = anv_device_to_handle(device); + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_free(&device->alloc, device); + + return result; +} + +void anv_DestroyDevice( + VkDevice _device, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_queue_finish(&device->queue); + + anv_device_finish_meta(device); + +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); +#endif + + anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); + anv_gem_close(device, device->workaround_bo.gem_handle); + + anv_bo_pool_finish(&device->batch_bo_pool); + anv_state_pool_finish(&device->dynamic_state_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->surface_state_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + anv_block_pool_finish(&device->scratch_block_pool); + + close(device->fd); + + pthread_mutex_destroy(&device->mutex); + + anv_free(&device->alloc, device); +} + +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + + *pPropertyCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(device_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + + *pPropertyCount = ARRAY_SIZE(device_extensions); + memcpy(pProperties, device_extensions, sizeof(device_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateInstanceLayerProperties( + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +VkResult anv_EnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +void anv_GetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(queueIndex == 0); + + *pQueue = anv_queue_to_handle(&device->queue); +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + int ret; + + for (uint32_t i = 0; i < submitCount; i++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + + for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) + cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; + } + } + + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + } + + return VK_SUCCESS; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); +} + +VkResult anv_DeviceWaitIdle( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_batch batch; + + uint32_t cmds[8]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + return anv_device_submit_simple_batch(device, &batch); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + bo->is_winsys_bo = false; + + return VK_SUCCESS; +} + +VkResult anv_AllocateMemory( + VkDevice _device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + + /* We support exactly one memory heap. */ + assert(pAllocateInfo->memoryTypeIndex == 0 || + (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); + + /* FINISHME: Fail if allocation request exceeds heap size. */ + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* The kernel is going to give us whole pages anyway */ + uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + + result = anv_bo_init_new(&mem->bo, device, alloc_size); + if (result != VK_SUCCESS) + goto fail; + + mem->type_index = pAllocateInfo->memoryTypeIndex; + + *pMem = anv_device_memory_to_handle(mem); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} + +void anv_FreeMemory( + VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + if (mem == NULL) + return; + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_free2(&device->alloc, pAllocator, mem); +} + +VkResult anv_MapMemory( + VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->bo.size - offset; + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + uint32_t gem_flags = 0; + if (!device->info.has_llc && mem->type_index == 0) + gem_flags |= I915_MMAP_WC; + + /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ + uint64_t map_offset = offset & ~4095ull; + assert(offset >= map_offset); + uint64_t map_size = (offset + size) - map_offset; + + /* Let's map whole pages */ + map_size = align_u64(map_size, 4096); + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, + map_offset, map_size, gem_flags); + mem->map_size = map_size; + + *ppData = mem->map + (offset - map_offset); + + return VK_SUCCESS; +} + +void anv_UnmapMemory( + VkDevice _device, + VkDeviceMemory _memory) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) + return; + + anv_gem_munmap(mem->map, mem->map_size); +} + +static void +clflush_mapped_ranges(struct anv_device *device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); + void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); + void *end; + + if (ranges[i].offset + ranges[i].size > mem->map_size) + end = mem->map + mem->map_size; + else + end = mem->map + ranges[i].offset + ranges[i].size; + + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } +} + +VkResult anv_FlushMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + return VK_SUCCESS; +} + +VkResult anv_InvalidateMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + /* Make sure no reads get moved up above the invalidate. */ + __builtin_ia32_mfence(); + + return VK_SUCCESS; +} + +void anv_GetBufferMemoryRequirements( + VkDevice device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; +} + +void anv_GetImageMemoryRequirements( + VkDevice device, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; +} + +void anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + stub(); +} + +void anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + if (mem) { + buffer->bo = &mem->bo; + buffer->offset = memoryOffset; + } else { + buffer->bo = NULL; + buffer->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (mem) { + image->bo = &mem->bo; + image->offset = memoryOffset; + } else { + image->bo = NULL; + image->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_QueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence) +{ + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + if (!device->info.has_llc) { + assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); + assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + __builtin_ia32_mfence(); + __builtin_ia32_clflush(fence->bo.map); + } + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + fence->ready = false; + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, fence); + + return result; +} + +void anv_DestroyFence( + VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, fence); +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; +} + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed + * to block indefinitely timeouts <= 0. Unfortunately, this was broken + * for a couple of kernel releases. Since there's no way to know + * whether or not the kernel we're using is one of the broken ones, the + * best we can do is to clamp the timeout to INT64_MAX. This limits the + * maximum timeout from 584 years to 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; + + int64_t t = timeout; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem wait failed: %m"); + } + } + + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore) +{ + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ + + *pSemaphore = (VkSemaphore)1; + + return VK_SUCCESS; +} + +void anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator) +{ +} + +// Event functions + +VkResult anv_CreateEvent( + VkDevice _device, + const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkEvent* pEvent) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_event *event; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); + + state = anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(*event), 8); + event = state.map; + event->state = state; + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + *pEvent = anv_event_to_handle(event); + + return VK_SUCCESS; +} + +void anv_DestroyEvent( + VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_state_pool_free(&device->dynamic_state_pool, event->state); +} + +VkResult anv_GetEventStatus( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + if (!device->info.has_llc) { + /* Invalidate read cache before reading event written by GPU. */ + __builtin_ia32_clflush(event); + __builtin_ia32_mfence(); + + } + + return event->semaphore; +} + +VkResult anv_SetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_SET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +VkResult anv_ResetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +// Buffer functions + +VkResult anv_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBuffer* pBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->bo = NULL; + buffer->offset = 0; + + *pBuffer = anv_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_free2(&device->alloc, pAllocator, buffer); +} + +void +anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_buffer_surface_state(state.map, format, offset, range, + stride); + else + gen7_fill_buffer_surface_state(state.map, format, offset, range, + stride); + break; + case 8: + gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + case 9: + gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +void anv_DestroySampler( + VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_free2(&device->alloc, pAllocator, sampler); +} + +VkResult anv_CreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFramebuffer* pFramebuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + framebuffer->attachments[i] = anv_image_view_from_handle(_iview); + } + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +void anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_free2(&device->alloc, pAllocator, fb); +} + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) +{ +} diff --git a/src/intel/vulkan/anv_dump.c b/src/intel/vulkan/anv_dump.c new file mode 100644 index 00000000000..b7fa28be787 --- /dev/null +++ b/src/intel/vulkan/anv_dump.c @@ -0,0 +1,209 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +/* This file contains utility functions for help debugging. They can be + * called from GDB or similar to help inspect images and buffers. + */ + +void +anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename) +{ + VkDevice vk_device = anv_device_to_handle(device); + VkResult result; + + VkExtent2D extent = { image->extent.width, image->extent.height }; + for (unsigned i = 0; i < miplevel; i++) { + extent.width = MAX2(1, extent.width / 2); + extent.height = MAX2(1, extent.height / 2); + } + + VkImage copy_image; + result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = (VkExtent3D) { extent.width, extent.height, 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .flags = 0, + }, NULL, ©_image); + assert(result == VK_SUCCESS); + + VkMemoryRequirements reqs; + anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); + + VkDeviceMemory memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = reqs.size, + .memoryTypeIndex = 0, + }, NULL, &memory); + assert(result == VK_SUCCESS); + + result = anv_BindImageMemory(vk_device, copy_image, memory, 0); + assert(result == VK_SUCCESS); + + VkCommandPool commandPool; + result = anv_CreateCommandPool(vk_device, + &(VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = 0, + .flags = 0, + }, NULL, &commandPool); + assert(result == VK_SUCCESS); + + VkCommandBuffer cmd; + result = anv_AllocateCommandBuffers(vk_device, + &(VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }, &cmd); + assert(result == VK_SUCCESS); + + result = anv_BeginCommandBuffer(cmd, + &(VkCommandBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }); + assert(result == VK_SUCCESS); + + anv_CmdBlitImage(cmd, + anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, + copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, + &(VkImageBlit) { + .srcSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = miplevel, + .baseArrayLayer = array_layer, + .layerCount = 1, + }, + .srcOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + .dstSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + }, VK_FILTER_NEAREST); + + ANV_CALL(CmdPipelineBarrier)(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + true, 0, NULL, 0, NULL, 1, + &(VkImageMemoryBarrier) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = 0, + .dstQueueFamilyIndex = 0, + .image = copy_image, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + + result = anv_EndCommandBuffer(cmd); + assert(result == VK_SUCCESS); + + VkFence fence; + result = anv_CreateFence(vk_device, + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, NULL, &fence); + assert(result == VK_SUCCESS); + + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, + &(VkSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, fence); + assert(result == VK_SUCCESS); + + result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); + assert(result == VK_SUCCESS); + + anv_DestroyFence(vk_device, fence, NULL); + anv_DestroyCommandPool(vk_device, commandPool, NULL); + + uint8_t *map; + result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); + assert(result == VK_SUCCESS); + + VkSubresourceLayout layout; + anv_GetImageSubresourceLayout(vk_device, copy_image, + &(VkImageSubresource) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .arrayLayer = 0, + }, &layout); + + map += layout.offset; + + /* Now we can finally write the PPM file */ + FILE *file = fopen(filename, "wb"); + assert(file); + + fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); + for (unsigned y = 0; y < extent.height; y++) { + uint8_t row[extent.width * 3]; + for (unsigned x = 0; x < extent.width; x++) { + row[x * 3 + 0] = map[x * 4 + 0]; + row[x * 3 + 1] = map[x * 4 + 1]; + row[x * 3 + 2] = map[x * 4 + 2]; + } + fwrite(row, 3, extent.width, file); + + map += layout.rowPitch; + } + fclose(file); + + anv_UnmapMemory(vk_device, memory); + anv_DestroyImage(vk_device, copy_image, NULL); + anv_FreeMemory(vk_device, memory, NULL); +} diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py new file mode 100644 index 00000000000..1e4cfcb1755 --- /dev/null +++ b/src/intel/vulkan/anv_entrypoints_gen.py @@ -0,0 +1,324 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': + continue + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + print "/* This file generated from vk_gen.py, don't edit directly. */\n" + + print "struct anv_dispatch_table {" + print " union {" + print " void *entrypoints[%d];" % len(entrypoints) + print " struct {" + + for type, name, args, num, h in entrypoints: + print " %s (*%s)%s;" % (type, name, args) + print " };\n" + print " };\n" + print "};\n" + + print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" + + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s gen7_%s%s;" % (type, name, args) + print "%s gen75_%s%s;" % (type, name, args) + print "%s gen8_%s%s;" % (type, name, args) + print "%s gen9_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* DO NOT EDIT! This is a generated file. */ + +#include "anv_private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x }," % (offsets[num], h) +print "};\n" + +for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: + for type, name, args, num, h in entrypoints: + print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) + print "\nconst struct anv_dispatch_table %s_layer = {" % layer + for type, name, args, num, h in entrypoints: + print " .%s = %s_%s," % (name, layer, name) + print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static const struct brw_device_info *dispatch_devinfo; + +void +anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) +{ + dispatch_devinfo = devinfo; +} + +void * __attribute__ ((noinline)) +anv_resolve_entrypoint(uint32_t index) +{ + if (enable_validate && validate_layer.entrypoints[index]) + return validate_layer.entrypoints[index]; + + if (dispatch_devinfo == NULL) { + assert(anv_layer.entrypoints[index]); + return anv_layer.entrypoints[index]; + } + + switch (dispatch_devinfo->gen) { + case 9: + if (gen9_layer.entrypoints[index]) + return gen9_layer.entrypoints[index]; + /* fall through */ + case 8: + if (gen8_layer.entrypoints[index]) + return gen8_layer.entrypoints[index]; + /* fall through */ + case 7: + if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) + return gen75_layer.entrypoints[index]; + + if (gen7_layer.entrypoints[index]) + return gen7_layer.entrypoints[index]; + /* fall through */ + case 0: + return anv_layer.entrypoints[index]; + default: + unreachable("unsupported gen\\n"); + } +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return anv_resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c new file mode 100644 index 00000000000..7798a7bbde3 --- /dev/null +++ b/src/intel/vulkan/anv_formats.c @@ -0,0 +1,603 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "brw_surface_formats.h" + +#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) +#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) + +#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ + [__vk_fmt] = { \ + .vk_format = __vk_fmt, \ + .name = #__vk_fmt, \ + .isl_format = __hw_fmt, \ + .isl_layout = &isl_format_layouts[__hw_fmt], \ + .swizzle = __swizzle, \ + __VA_ARGS__ \ + } + +#define fmt(__vk_fmt, __hw_fmt, ...) \ + swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) + +/* HINT: For array formats, the ISL name should match the VK name. For + * packed formats, they should have the channels in reverse order from each + * other. The reason for this is that, for packed formats, the ISL (and + * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. + */ +static const struct anv_format anv_formats[] = { + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), + fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), + swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), + fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), + swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), + fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), + fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), + fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), + fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), + fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), + fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), + fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), + fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), + fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), + fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), + fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), + fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), + fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), + fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), + fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), + fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), + fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), + fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), + fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), + fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), + fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), + fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), + fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), + fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), + fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), + fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), + fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), + fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), + fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), + fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), + fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), + fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), + fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), + fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), + fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), + fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), +}; + +#undef fmt + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle) +{ + const struct anv_format *anv_fmt = &anv_formats[format]; + + if (swizzle) + *swizzle = anv_fmt->swizzle; + + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { + return ISL_FORMAT_UNSUPPORTED; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL && + !util_is_power_of_two(anv_fmt->isl_layout->bs)) { + /* Tiled formats *must* be power-of-two because we need up upload + * them with the render pipeline. For 3-channel formats, we fix + * this by switching them over to RGBX or RGBA formats under the + * hood. + */ + enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); + if (rgbx != ISL_FORMAT_UNSUPPORTED) + return rgbx; + else + return isl_format_rgb_to_rgba(anv_fmt->isl_format); + } else { + return anv_fmt->isl_format; + } + + case VK_IMAGE_ASPECT_DEPTH_BIT: + case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): + assert(anv_fmt->has_depth); + return anv_fmt->isl_format; + + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(anv_fmt->has_stencil); + return ISL_FORMAT_R8_UINT; + + default: + unreachable("bad VkImageAspect"); + return ISL_FORMAT_UNSUPPORTED; + } +} + +// Format capabilities + +void anv_validate_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); +} + +static VkFormatFeatureFlags +get_image_format_properties(int gen, enum isl_format base, + enum isl_format actual, + struct anv_format_swizzle swizzle) +{ + const struct brw_surface_format_info *info = &surface_formats[actual]; + + if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen) { + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + + if (info->filtering <= gen) + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + /* We can render to swizzled formats. However, if the alpha channel is + * moved, then blending won't work correctly. The PRM tells us + * straight-up not to render to such a surface. + */ + if (info->render_target <= gen && swizzle.a == 3) { + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (info->alpha_blend <= gen && swizzle.a == 3) + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + /* Load/store is determined based on base format. This prevents RGB + * formats from showing up as load/store capable. + */ + if (isl_is_storage_image_format(base)) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + + if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + + return flags; +} + +static VkFormatFeatureFlags +get_buffer_format_properties(int gen, enum isl_format format) +{ + const struct brw_surface_format_info *info = &surface_formats[format]; + + if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen && !isl_format_is_compressed(format)) + flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + if (info->input_vb <= gen) + flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + + if (isl_is_storage_image_format(format)) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + + if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + + return flags; +} + +static void +anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, + VkFormat format, + VkFormatProperties *out_properties) +{ + int gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) + gen += 5; + + VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; + if (anv_format_is_depth_or_stencil(&anv_formats[format])) { + tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + if (physical_device->info->gen >= 8) { + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + if (anv_formats[format].has_depth) { + tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } else { + enum isl_format linear_fmt, tiled_fmt; + struct anv_format_swizzle linear_swizzle, tiled_swizzle; + linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, &linear_swizzle); + tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); + + linear = get_image_format_properties(gen, linear_fmt, linear_fmt, + linear_swizzle); + tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, + tiled_swizzle); + buffer = get_buffer_format_properties(gen, linear_fmt); + + /* XXX: We handle 3-channel formats by switching them out for RGBX or + * RGBA formats behind-the-scenes. This works fine for textures + * because the upload process will fill in the extra channel. + * We could also support it for render targets, but it will take + * substantially more work and we have enough RGBX formats to handle + * what most clients will want. + */ + if (linear_fmt != ISL_FORMAT_UNSUPPORTED && + !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && + isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { + tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & + ~VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } + + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = buffer; + + return; +} + + +void anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + anv_physical_device_get_format_properties( + physical_device, + format, + pFormatProperties); +} + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties* pImageFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + + anv_physical_device_get_format_properties(physical_device, format, + &format_props); + + /* Extract the VkFormatFeatureFlags that are relevant for the queried + * tiling. + */ + if (tiling == VK_IMAGE_TILING_LINEAR) { + format_feature_flags = format_props.linearTilingFeatures; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { + format_feature_flags = format_props.optimalTilingFeatures; + } else { + unreachable("bad VkImageTiling"); + } + + switch (type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + sampleCounts = VK_SAMPLE_COUNT_1_BIT; + break; + case VK_IMAGE_TYPE_2D: + /* FINISHME: Does this really differ for cube maps? The documentation + * for RENDER_SURFACE_STATE suggests so. + */ + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + + if (tiling == VK_IMAGE_TILING_OPTIMAL && + type == VK_IMAGE_TYPE_2D && + (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + +#if 0 + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (anv_format_for_vk_format(format)->has_stencil) { + /* Not yet implemented because copying to a W-tiled surface is crazy + * hard. + */ + anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " + "stencil format"); + goto unsupported; + } + } +#endif + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { + /* Nothing to check. */ + } + + if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { + /* Ignore this flag because it was removed from the + * provisional_I_20150910 header. + */ + } + + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + return VK_SUCCESS; + +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArrayLayers = 0, + .sampleCounts = 0, + .maxResourceSize = 0, + }; + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + /* Sparse images are not yet supported. */ + *pNumProperties = 0; +} diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c new file mode 100644 index 00000000000..0a7be353327 --- /dev/null +++ b/src/intel/vulkan/anv_gem.c @@ -0,0 +1,358 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); + gem_mmap.offset = offset; + gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); + gem_mmap.flags = flags; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +int +anv_gem_set_caching(struct anv_device *device, + uint32_t gem_handle, uint32_t caching) +{ + struct drm_i915_gem_caching gem_caching; + + VG_CLEAR(gem_caching); + gem_caching.handle = gem_handle; + gem_caching.caching = caching; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + struct drm_i915_gem_set_domain gem_set_domain; + + VG_CLEAR(gem_set_domain); + gem_set_domain.handle = gem_handle; + gem_set_domain.read_domains = read_domains; + gem_set_domain.write_domain = write_domain; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + struct drm_gem_close close; + int ret; + + struct drm_i915_gem_create gem_create; + VG_CLEAR(gem_create); + gem_create.size = 4096; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { + assert(!"Failed to create GEM BO"); + return false; + } + + bool swizzled = false; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + struct drm_i915_gem_set_tiling set_tiling; + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_create.handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + if (ret != 0) { + assert(!"Failed to set BO tiling"); + goto close_and_return; + } + + struct drm_i915_gem_get_tiling get_tiling; + VG_CLEAR(get_tiling); + get_tiling.handle = gem_create.handle; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { + assert(!"Failed to get BO tiling"); + goto close_and_return; + } + + swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; + +close_and_return: + + VG_CLEAR(close); + close.handle = gem_create.handle; + anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + + return swizzled; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c new file mode 100644 index 00000000000..3204fefb28e --- /dev/null +++ b/src/intel/vulkan/anv_gem_stubs.c @@ -0,0 +1,159 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include + +#include "anv_private.h" + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + int fd = memfd_create("fake bo", MFD_CLOEXEC); + if (fd == -1) + return 0; + + assert(fd != 0); + + if (ftruncate(fd, size) == -1) + return 0; + + return fd; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + close(gem_handle); +} + +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + /* Ignore flags, as they're specific to I915_GEM_MMAP. */ + (void) flags; + + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + gem_handle, offset); +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + return -1; +} + +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + return 0; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return 0; +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, + uint32_t caching) +{ + return 0; +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + return 0; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + unreachable("Unused"); +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + unreachable("Unused"); +} + +int +anv_gem_create_context(struct anv_device *device) +{ + unreachable("Unused"); +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + unreachable("Unused"); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + unreachable("Unused"); +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + unreachable("Unused"); +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + unreachable("Unused"); +} diff --git a/src/intel/vulkan/anv_gen_macros.h b/src/intel/vulkan/anv_gen_macros.h new file mode 100644 index 00000000000..ef2ecd55a9b --- /dev/null +++ b/src/intel/vulkan/anv_gen_macros.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +/* Macros for handling per-gen compilation. + * + * The prefixing macros GENX() and genX() automatically prefix whatever you + * give them by GENX_ or genX_ where X is the gen number. + * + * You can declare a function to be used on some range of gens like this: + * + * GENX_FUNC(GEN7, GEN75) void + * genX(my_function_name)(args...) + * { + * // Do stuff + * } + * + * If the file is compiled for any set of gens containing gen7 and gen75, + * the function will effectively only get compiled twice as + * gen7_my_function_nmae and gen75_my_function_name. The function has to + * be compilable on all gens, but it will become a static inline that gets + * discarded by the compiler on all gens not in range. + * + * You can do pseudo-runtime checks in your function such as + * + * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * // Do something + * } + * + * The contents of the if statement must be valid regardless of gen, but + * the if will get compiled away on everything except haswell. + * + * For places where you really do have a compile-time conflict, you can + * use preprocessor logic: + * + * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * // Do something + * #endif + * + * However, it is strongly recommended that the former be used whenever + * possible. + */ + +/* Base macro defined on the command line. If we don't have this, we can't + * do anything. + */ +#ifdef ANV_GENx10 + +/* Gen checking macros */ +#define ANV_GEN ((ANV_GENx10) / 10) +#define ANV_IS_HASWELL ((ANV_GENx10) == 75) + +/* Prefixing macros */ +#if (ANV_GENx10 == 70) +# define GENX(X) GEN7_##X +# define genX(x) gen7_##x +#elif (ANV_GENx10 == 75) +# define GENX(X) GEN75_##X +# define genX(x) gen75_##x +#elif (ANV_GENx10 == 80) +# define GENX(X) GEN8_##X +# define genX(x) gen8_##x +#elif (ANV_GENx10 == 90) +# define GENX(X) GEN9_##X +# define genX(x) gen9_##x +#else +# error "Need to add prefixing macros for your gen" +#endif + +/* Macros for comparing gens */ +#if (ANV_GENx10 >= 70) +#define __ANV_GEN_GE_GEN7(T, F) T +#else +#define __ANV_GEN_GE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 <= 70) +#define __ANV_GEN_LE_GEN7(T, F) T +#else +#define __ANV_GEN_LE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 >= 75) +#define __ANV_GEN_GE_GEN75(T, F) T +#else +#define __ANV_GEN_GE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 <= 75) +#define __ANV_GEN_LE_GEN75(T, F) T +#else +#define __ANV_GEN_LE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 >= 80) +#define __ANV_GEN_GE_GEN8(T, F) T +#else +#define __ANV_GEN_GE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 <= 80) +#define __ANV_GEN_LE_GEN8(T, F) T +#else +#define __ANV_GEN_LE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 >= 90) +#define __ANV_GEN_GE_GEN9(T, F) T +#else +#define __ANV_GEN_GE_GEN9(T, F) F +#endif + +#if (ANV_GENx10 <= 90) +#define __ANV_GEN_LE_GEN9(T, F) T +#else +#define __ANV_GEN_LE_GEN9(T, F) F +#endif + +#define __ANV_GEN_IN_RANGE(start, end, T, F) \ + __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) + +/* Declares a function as static inlind if it's not in range */ +#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) + +#endif /* ANV_GENx10 */ diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c new file mode 100644 index 00000000000..0a412a3f8c6 --- /dev/null +++ b/src/intel/vulkan/anv_image.c @@ -0,0 +1,911 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** + * Exactly one bit must be set in \a aspect. + */ +static isl_surf_usage_flags_t +choose_isl_surf_usage(VkImageUsageFlags vk_usage, + VkImageAspectFlags aspect) +{ + isl_surf_usage_flags_t isl_usage = 0; + + /* FINISHME: Support aux surfaces */ + isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; + + if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + + if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + isl_usage |= ISL_SURF_USAGE_CUBE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_DEPTH_BIT: + isl_usage |= ISL_SURF_USAGE_DEPTH_BIT; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + isl_usage |= ISL_SURF_USAGE_STENCIL_BIT; + break; + } + } + + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + } + + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Meta implements transfers by rendering into the destination image. */ + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + + return isl_usage; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +static struct anv_surface * +get_surface(struct anv_image *image, VkImageAspectFlags aspect) +{ + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_COLOR_BIT: + return &image->color_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT: + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return &image->stencil_surface; + } +} + +/** + * Initialize the anv_image::*_surface selected by \a aspect. Then update the + * image's memory requirements (that is, the image's size and alignment). + * + * Exactly one bit must be set in \a aspect. + */ +static VkResult +make_surface(const struct anv_device *dev, + struct anv_image *image, + const struct anv_image_create_info *anv_info, + VkImageAspectFlags aspect) +{ + const VkImageCreateInfo *vk_info = anv_info->vk_info; + bool ok UNUSED; + + static const enum isl_surf_dim vk_to_isl_surf_dim[] = { + [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, + [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, + [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, + }; + + isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; + if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) + tiling_flags &= ISL_TILING_LINEAR_BIT; + + struct anv_surface *anv_surf = get_surface(image, aspect); + + VkExtent3D extent; + switch (vk_info->imageType) { + case VK_IMAGE_TYPE_1D: + extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; + break; + case VK_IMAGE_TYPE_2D: + extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; + break; + case VK_IMAGE_TYPE_3D: + extent = vk_info->extent; + break; + default: + unreachable("invalid image type"); + } + + image->extent = extent; + + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, + .dim = vk_to_isl_surf_dim[vk_info->imageType], + .format = anv_get_isl_format(vk_info->format, aspect, + vk_info->tiling, NULL), + .width = extent.width, + .height = extent.height, + .depth = extent.depth, + .levels = vk_info->mipLevels, + .array_len = vk_info->arrayLayers, + .samples = vk_info->samples, + .min_alignment = 0, + .min_pitch = 0, + .usage = choose_isl_surf_usage(image->usage, aspect), + .tiling_flags = tiling_flags); + + /* isl_surf_init() will fail only if provided invalid input. Invalid input + * is illegal in Vulkan. + */ + assert(ok); + + anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); + image->size = anv_surf->offset + anv_surf->isl.size; + image->alignment = MAX(image->alignment, anv_surf->isl.alignment); + + return VK_SUCCESS; +} + +/** + * Parameter @a format is required and overrides VkImageCreateInfo::format. + */ +static VkImageUsageFlags +anv_image_get_full_usage(const VkImageCreateInfo *info, + const struct anv_format *format) +{ + VkImageUsageFlags usage = info->usage; + + if (info->samples > 1 && + (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + /* Meta will resolve the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta will transfer from the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* For non-clear transfer operations, meta will transfer to the image by + * binding it as a color attachment, even if the image format is not + * a color format. + */ + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (anv_format_is_depth_or_stencil(format)) { + /* vkCmdClearDepthStencilImage() only requires that + * VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does + * not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta + * clears the image, though, by binding it as a depthstencil + * attachment. + */ + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + } + + return usage; +} + +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + const VkAllocationCallbacks* alloc, + VkImage *pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + struct anv_image *image = NULL; + const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); + VkResult r; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arrayLayers > 0); + anv_assert(pCreateInfo->samples > 0); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); + anv_assert(pCreateInfo->extent.depth > 0); + + image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->vk_format = pCreateInfo->format; + image->format = format; + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; + image->usage = anv_image_get_full_usage(pCreateInfo, format); + image->tiling = pCreateInfo->tiling; + + if (likely(anv_format_is_color(format))) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_COLOR_BIT); + if (r != VK_SUCCESS) + goto fail; + } else { + if (image->format->has_depth) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_DEPTH_BIT); + if (r != VK_SUCCESS) + goto fail; + } + + if (image->format->has_stencil) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_STENCIL_BIT); + if (r != VK_SUCCESS) + goto fail; + } + } + + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + +fail: + if (image) + anv_free2(&device->alloc, alloc, image); + + return r; +} + +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + .isl_tiling_flags = ISL_TILING_ANY_MASK, + }, + pAllocator, + pImage); +} + +void +anv_DestroyImage(VkDevice _device, VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); +} + +static void +anv_surface_get_subresource_layout(struct anv_image *image, + struct anv_surface *surface, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + /* If we are on a non-zero mip level or array slice, we need to + * calculate a real offset. + */ + anv_assert(subresource->mipLevel == 0); + anv_assert(subresource->arrayLayer == 0); + + layout->offset = surface->offset; + layout->rowPitch = surface->isl.row_pitch; + layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); + layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); + layout->size = surface->isl.size; +} + +void anv_GetImageSubresourceLayout( + VkDevice device, + VkImage _image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + assert(__builtin_popcount(pSubresource->aspectMask) == 1); + + switch (pSubresource->aspectMask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + anv_surface_get_subresource_layout(image, &image->color_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + anv_surface_get_subresource_layout(image, &image->depth_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + anv_surface_get_subresource_layout(image, &image->stencil_surface, + pSubresource, pLayout); + break; + default: + assert(!"Invalid image aspect"); + } +} + +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *subresource; + const struct anv_format *view_format_info; + + /* Validate structure type before dereferencing it. */ + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + subresource = &pCreateInfo->subresourceRange; + + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspectMask != 0); + assert(subresource->levelCount > 0); + assert(subresource->layerCount > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); + assert(subresource->baseArrayLayer < image->array_size); + assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); + assert(pView); + + const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT + | VK_IMAGE_ASPECT_STENCIL_BIT; + + /* Validate format. */ + if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(!image->format->has_depth); + assert(!image->format->has_stencil); + assert(!view_format_info->has_depth); + assert(!view_format_info->has_stencil); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } else if (subresource->aspectMask & ds_flags) { + assert((subresource->aspectMask & ~ds_flags) == 0); + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(image->format->has_depth); + assert(view_format_info->has_depth); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image->format->has_stencil); + assert(view_format_info->has_stencil); + } + } else { + assert(!"bad VkImageSubresourceRange::aspectFlags"); + } + + return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); +} + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + else + gen7_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 8: + gen8_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 9: + gen9_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +static struct anv_state +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + +static bool +has_matching_storage_typed_format(const struct anv_device *device, + enum isl_format format) +{ + return (isl_format_get_layout(format)->bs <= 4 || + (isl_format_get_layout(format)->bs <= 8 && + (device->info.gen >= 8 || device->info.is_haswell)) || + device->info.gen >= 9); +} + +static VkComponentSwizzle +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, + struct anv_format_swizzle format_swizzle) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + swizzle = component; + + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case VK_COMPONENT_SWIZZLE_ONE: + return VK_COMPONENT_SWIZZLE_ONE; + case VK_COMPONENT_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + case VK_COMPONENT_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + case VK_COMPONENT_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + case VK_COMPONENT_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + default: + unreachable("Invalid swizzle"); + } +} + +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + VkImageViewCreateInfo mCreateInfo; + memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); + + assert(range->layerCount > 0); + assert(range->baseMipLevel < image->levels); + assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + switch (image->type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + range->layerCount - 1 + <= anv_minify(image->extent.depth, range->baseMipLevel)); + break; + } + + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + iview->image = image; + iview->bo = image->bo; + iview->offset = image->offset + surface->offset + offset; + + iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; + iview->vk_format = pCreateInfo->format; + + struct anv_format_swizzle swizzle; + iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, + image->tiling, &swizzle); + iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R, swizzle); + iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G, swizzle); + iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B, swizzle); + iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A, swizzle); + + iview->base_layer = range->baseArrayLayer; + iview->base_mip = range->baseMipLevel; + + if (!isl_format_is_compressed(iview->format) && + isl_format_is_compressed(image->format->isl_format)) { + /* Scale the ImageView extent by the backing Image. This is used + * internally when an uncompressed ImageView is created on a + * compressed Image. The ImageView can therefore be used for copying + * data from a source Image to a destination Image. + */ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + + iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); + iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + + iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; + iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); + mCreateInfo.subresourceRange.baseMipLevel = 0; + mCreateInfo.subresourceRange.baseArrayLayer = 0; + } else { + iview->level_0_extent.width = image->extent.width; + iview->level_0_extent.height = image->extent.height; + iview->level_0_extent.depth = image->extent.depth; + } + + iview->extent = (VkExtent3D) { + .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), + .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), + .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), + }; + + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->sampler_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + } else { + iview->sampler_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->color_rt_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } else { + iview->color_rt_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + + if (has_matching_storage_typed_format(device, iview->format)) + anv_fill_image_surface_state(device, iview->storage_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); + else + anv_fill_buffer_surface_state(device, iview->storage_surface_state, + ISL_FORMAT_RAW, + iview->offset, + iview->bo->size - iview->offset, 1); + + } else { + iview->storage_surface_state.alloc_size = 0; + } +} + +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL, 0); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyImageView(VkDevice _device, VkImageView _iview, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + + if (iview->color_rt_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->color_rt_surface_state); + } + + if (iview->sampler_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->sampler_surface_state); + } + + if (iview->storage_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->storage_surface_state); + } + + anv_free2(&device->alloc, pAllocator, iview); +} + +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->format = format->isl_format; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->range = pCreateInfo->range == VK_WHOLE_SIZE ? + buffer->size - view->offset : pCreateInfo->range; + + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, + format->isl_layout->bs); + } else { + view->surface_state = (struct anv_state){ 0 }; + } + + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + view->storage_surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + enum isl_format storage_format = + has_matching_storage_typed_format(device, view->format) ? + isl_lower_storage_image_format(&device->isl_dev, view->format) : + ISL_FORMAT_RAW; + + anv_fill_buffer_surface_state(device, view->storage_surface_state, + storage_format, + view->offset, view->range, + (storage_format == ISL_FORMAT_RAW ? 1 : + format->isl_layout->bs)); + + } else { + view->storage_surface_state = (struct anv_state){ 0 }; + } + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + + if (view->surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->surface_state); + + if (view->storage_surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->storage_surface_state); + + anv_free2(&device->alloc, pAllocator, view); +} + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) +{ + switch (aspect_mask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + /* Dragons will eat you. + * + * Meta attaches all destination surfaces as color render targets. Guess + * what surface the Meta Dragons really want. + */ + if (image->format->has_depth && image->format->has_stencil) { + return &image->depth_surface; + } else if (image->format->has_depth) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } else { + return &image->color_surface; + } + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + assert(image->format->has_depth); + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(image->format->has_stencil); + return &image->stencil_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + if (image->format->has_depth && image->format->has_stencil) { + /* FINISHME: The Vulkan spec (git a511ba2) requires support for + * combined depth stencil formats. Specifically, it states: + * + * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or + * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. + * + * Image views with both depth and stencil aspects are only valid for + * render target attachments, in which case + * cmd_buffer_emit_depth_stencil() will pick out both the depth and + * stencil surfaces from the underlying surface. + */ + return &image->depth_surface; + } else if (image->format->has_depth) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } + /* fallthrough */ + default: + unreachable("image does not have aspect"); + return NULL; + } +} + +static void +image_param_defaults(struct brw_image_param *param) +{ + memset(param, 0, sizeof *param); + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +void +anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + const struct isl_surf *surf = &view->image->color_surface.isl; + const int cpp = isl_format_get_layout(surf->format)->bs; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + param->size[0] = view->extent.width; + param->size[1] = view->extent.height; + if (surf->dim == ISL_SURF_DIM_3D) { + param->size[2] = view->extent.depth; + } else { + param->size[2] = surf->logical_level0_px.array_len - view->base_layer; + } + + isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, + ¶m->offset[0], ¶m->offset[1]); + + param->stride[0] = cpp; + param->stride[1] = surf->row_pitch / cpp; + + if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { + param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); + param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); + } else { + param->stride[2] = 0; + param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); + } + + switch (surf->tiling) { + case ISL_TILING_LINEAR: + /* image_param_defaults is good enough */ + break; + + case ISL_TILING_X: + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = util_logbase2(512 / cpp); + param->tiling[1] = util_logbase2(8); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + break; + + case ISL_TILING_Y0: + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = util_logbase2(16 / cpp); + param->tiling[1] = util_logbase2(32); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 0xff; + } + break; + + default: + assert(!"Unhandled storage image tiling"); + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? + view->base_mip : 0); +} + +void +anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + param->stride[0] = isl_format_layouts[view->format].bs; + param->size[0] = view->range / param->stride[0]; +} diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c new file mode 100644 index 00000000000..d95d9afe8cf --- /dev/null +++ b/src/intel/vulkan/anv_intel.c @@ -0,0 +1,100 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + VkImage image_h; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = pCreateInfo->strideInBytes, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->format, + .extent = pCreateInfo->extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, &image_h); + + image = anv_image_from_handle(image_h); + image->bo = &mem->bo; + image->offset = 0; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c new file mode 100644 index 00000000000..82944ea1a92 --- /dev/null +++ b/src/intel/vulkan/anv_meta.c @@ -0,0 +1,169 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +struct anv_render_pass anv_meta_dummy_renderpass = {0}; + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); + + state->dynamic_mask = dynamic_mask; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_mask); +} + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_mask); + cmd_buffer->state.dirty |= state->dynamic_mask; + + /* Since we've used the pipeline with the VS disabled, set + * need_query_wa. See CmdBeginQuery. + */ + cmd_buffer->state.need_query_wa = true; +} + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image) +{ + switch (image->type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("bad VkImageViewType"); + } +} + +/** + * When creating a destination VkImageView, this function provides the needed + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. + */ +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->baseArrayLayer; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static void * +meta_alloc(void* _device, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void * +meta_realloc(void* _device, void *original, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnReallocation(device->alloc.pUserData, original, + size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void +meta_free(void* _device, void *data) +{ + struct anv_device *device = _device; + return device->alloc.pfnFree(device->alloc.pUserData, data); +} + +VkResult +anv_device_init_meta(struct anv_device *device) +{ + VkResult result; + + device->meta_state.alloc = (VkAllocationCallbacks) { + .pUserData = device, + .pfnAllocation = meta_alloc, + .pfnReallocation = meta_realloc, + .pfnFree = meta_free, + }; + + result = anv_device_init_meta_clear_state(device); + if (result != VK_SUCCESS) + goto fail_clear; + + result = anv_device_init_meta_resolve_state(device); + if (result != VK_SUCCESS) + goto fail_resolve; + + result = anv_device_init_meta_blit_state(device); + if (result != VK_SUCCESS) + goto fail_blit; + + return VK_SUCCESS; + +fail_blit: + anv_device_finish_meta_resolve_state(device); +fail_resolve: + anv_device_finish_meta_clear_state(device); +fail_clear: + return result; +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + anv_device_finish_meta_resolve_state(device); + anv_device_finish_meta_clear_state(device); + anv_device_finish_meta_blit_state(device); +} diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h new file mode 100644 index 00000000000..d33e9e6d8ba --- /dev/null +++ b/src/intel/vulkan/anv_meta.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ANV_META_VERTEX_BINDING_COUNT 2 + +struct anv_meta_saved_state { + struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + + /** + * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic + * state. + */ + uint32_t dynamic_mask; + struct anv_dynamic_state dynamic; +}; + +VkResult anv_device_init_meta_clear_state(struct anv_device *device); +void anv_device_finish_meta_clear_state(struct anv_device *device); + +VkResult anv_device_init_meta_resolve_state(struct anv_device *device); +void anv_device_finish_meta_resolve_state(struct anv_device *device); + +VkResult anv_device_init_meta_blit_state(struct anv_device *device); +void anv_device_finish_meta_blit_state(struct anv_device *device); + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask); + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer); + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image); + +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c new file mode 100644 index 00000000000..07ebcbc06b1 --- /dev/null +++ b/src/intel/vulkan/anv_meta_blit.c @@ -0,0 +1,1442 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "nir/nir_builder.h" + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static nir_shader * +build_nir_vertex_shader(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_tex_pos"); + tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_tex_pos"); + tex_pos_out->data.location = VARYING_SLOT_VAR0; + tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, tex_pos_out, tex_pos_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(vec4)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *saved_state) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *saved_state) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkFormat +vk_format_for_size(int bs) +{ + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + VkFormat copy_format = vk_format_for_size(bs); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } +} + +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(format, aspect, + VK_IMAGE_TILING_LINEAR, + NULL); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = pRegions[r].dstSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, + .z = 0, + }; + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].dstSubresource.layerCount; + } + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_FILTER_NEAREST); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + + anv_finishme("respect VkFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = 0, + }; + + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + + const uint32_t dest_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); + + if (pRegions[r].srcSubresource.layerCount > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_array_slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffsets[0], src_extent, + dest_image, &dest_iview, + dest_offset, dest_extent, + filter); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static struct anv_image * +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + VkImageType image_type, + const VkAllocationCallbacks *alloc, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = buffer_format, + .extent = extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, alloc, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return image; +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(dest_image, aspect); + + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->isl_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = anv_meta_get_view_type(dest_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, + .layerCount = 1 + }, + }, + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + (VkOffset3D){0, 0, 0}, + img_extent_el, + dest_image, + &dest_iview, + dest_offset_el, + img_extent_el, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + src_image->offset += src_image->extent.width * + src_image->extent.height * + src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.layerCount == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.layerCount; + } + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * + src_image->format->isl_layout->bs; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void +anv_device_finish_meta_blit_state(struct anv_device *device) +{ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} + +VkResult +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for blitting, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c new file mode 100644 index 00000000000..739ae09582c --- /dev/null +++ b/src/intel/vulkan/anv_meta_clear.c @@ -0,0 +1,1098 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** Vertex attributes for color clears. */ +struct color_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + VkClearColorValue color; +}; + +/** Vertex attributes for depthstencil clears. */ +struct depthstencil_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /*<< 3DPRIM_RECTLIST */ +}; + +static void +meta_clear_begin(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR) | + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_clear_end(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static void +build_color_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs, + uint32_t frag_output) +{ + nir_builder vs_b; + nir_builder fs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_variable *vs_in_color = + nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, + "a_color"); + vs_in_color->data.location = VERT_ATTRIB_GENERIC1; + + nir_variable *vs_out_color = + nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, + "v_color"); + vs_out_color->data.location = VARYING_SLOT_VAR0; + vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *fs_in_color = + nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, + "v_color"); + fs_in_color->data.location = vs_out_color->data.location; + fs_in_color->data.interpolation = vs_out_color->data.interpolation; + + nir_variable *fs_out_color = + nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, + "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + nir_copy_var(&vs_b, vs_out_color, vs_in_color); + nir_copy_var(&fs_b, fs_out_color, fs_in_color); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t samples, + struct nir_shader *vs_nir, + struct nir_shader *fs_nir, + const VkPipelineVertexInputStateCreateInfo *vi_state, + const VkPipelineDepthStencilStateCreateInfo *ds_state, + const VkPipelineColorBlendStateCreateInfo *cb_state, + const VkAllocationCallbacks *alloc, + bool use_repclear, + struct anv_pipeline **pipeline) +{ + VkDevice device_h = anv_device_to_handle(device); + VkResult result; + + struct anv_shader_module vs_m = { .nir = vs_nir }; + struct anv_shader_module fs_m = { .nir = fs_nir }; + + VkPipeline pipeline_h = VK_NULL_HANDLE; + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = fs_nir ? 2 : 1, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs_m), + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_m), + .pName = "main", + }, + }, + .pVertexInputState = vi_state, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = NULL, /* dynamic */ + .scissorCount = 1, + .pScissors = NULL, /* dynamic */ + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = false, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = samples, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { ~0 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + /* The meta clear pipeline declares all state as dynamic. + * As a consequence, vkCmdBindPipeline writes no dynamic state + * to the cmd buffer. Therefore, at the end of the meta clear, + * we need only restore dynamic state was vkCmdSet. + */ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = MAX_RTS, + .use_repclear = use_repclear, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + alloc, + &pipeline_h); + + ralloc_free(vs_nir); + ralloc_free(fs_nir); + + *pipeline = anv_pipeline_from_handle(pipeline_h); + + return result; +} + +static VkResult +create_color_pipeline(struct anv_device *device, + uint32_t samples, + uint32_t frag_output, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_color_shaders(&vs_nir, &fs_nir, frag_output); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct color_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct color_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, position), + }, + { + /* Color */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, color), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 }; + blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = MAX_RTS, + .pAttachments = blend_attachment_state + }; + + /* Disable repclear because we do not want the compiler to replace the + * shader. We need the shader to write to the specified color attachment, + * but the repclear shader writes to all color attachments. + */ + return + create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ false, pipeline); +} + +static void +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) +{ + if (!pipeline) + return; + + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline), + &device->meta_state.alloc); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + destroy_pipeline(device, state->clear[i].color_pipelines[j]); + } + + destroy_pipeline(device, state->clear[i].depth_only_pipeline); + destroy_pipeline(device, state->clear[i].stencil_only_pipeline); + destroy_pipeline(device, state->clear[i].depthstencil_pipeline); + } +} + +static void +emit_color_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att]; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + struct anv_pipeline *pipeline = + device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; + VkClearColorValue clear_value = clear_att->clearValue.color; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + + assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); + assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(clear_att->colorAttachment < subpass->color_count); + + const struct color_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + .color = clear_value, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + + +static void +build_depthstencil_shader(struct nir_shader **out_vs) +{ + nir_builder vs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + + *out_vs = vs_b.shader; +} + +static VkResult +create_depthstencil_pipeline(struct anv_device *device, + VkImageAspectFlags aspects, + uint32_t samples, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + + build_depthstencil_shader(&vs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct depthstencil_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct depthstencil_clear_vattrs, position), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthBoundsTestEnable = false, + .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), + .front = { + .passOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .writeMask = UINT32_MAX, + .reference = 0, /* dynamic */ + }, + .back = { 0 /* dont care */ }, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 0, + .pAttachments = NULL, + }; + + return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ true, pipeline); +} + +static void +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_meta_state *meta_state = &device->meta_state; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t pass_att = subpass->depth_stencil_attachment; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + + assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || + aspects == VK_IMAGE_ASPECT_STENCIL_BIT || + aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + assert(pass_att != VK_ATTACHMENT_UNUSED); + + const struct depthstencil_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + + /* Ignored when clearing only stencil. */ + .minDepth = clear_value.depth, + .maxDepth = clear_value.depth, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, + clear_value.stencil); + } + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + struct anv_pipeline *pipeline; + switch (aspects) { + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + pipeline = meta_state->clear[samples_log2].depth_only_pipeline; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + anv_pipeline_to_handle(pipeline)); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + +VkResult +anv_device_init_meta_clear_state(struct anv_device *device) +{ + VkResult res; + struct anv_meta_state *state = &device->meta_state; + + zero(device->meta_state.clear); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + uint32_t samples = 1 << i; + + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + res = create_color_pipeline(device, samples, /* frag_output */ j, + &state->clear[i].color_pipelines[j]); + if (res != VK_SUCCESS) + goto fail; + } + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT, samples, + &state->clear[i].depth_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].stencil_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].depthstencil_pipeline); + if (res != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; + +fail: + anv_device_finish_meta_clear_state(device); + return res; +} + +/** + * The parameters mean that same as those in vkCmdClearAttachments. + */ +static void +emit_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_color_clear(cmd_buffer, clear_att, clear_rect); + } else { + assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); + } +} + +static bool +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (cmd_state->attachments[a].pending_clear_aspects) { + return true; + } + } + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + return true; + } + + return false; +} + +/** + * Emit any pending attachment clears for the current subpass. + * + * @see anv_attachment_state::pending_clear_aspects + */ +void +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_meta_saved_state saved_state; + + if (!subpass_needs_clear(cmd_buffer)) + return; + + meta_clear_begin(&saved_state, cmd_buffer); + + if (cmd_state->framebuffer->layers > 1) + anv_finishme("clearing multi-layer framebuffer"); + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + + if (!cmd_state->attachments[a].pending_clear_aspects) + continue; + + assert(cmd_state->attachments[a].pending_clear_aspects == + VK_IMAGE_ASPECT_COLOR_BIT); + + VkClearAttachment clear_att = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, /* Use attachment index relative to subpass */ + .clearValue = cmd_state->attachments[a].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[a].pending_clear_aspects = 0; + } + + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + + VkClearAttachment clear_att = { + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, + .clearValue = cmd_state->attachments[ds].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[ds].pending_clear_aspects = 0; + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout image_layout, + const VkClearValue *clear_value, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + + for (uint32_t l = 0; l < range->levelCount; ++l) { + for (uint32_t s = 0; s < range->layerCount; ++s) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(image), + .viewType = anv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; + } else { + subpass_desc.pDepthStencilAttachment = &att_ref; + } + + VkRenderPass pass; + anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &att_desc, + .subpassCount = 1, + .pSubpasses = &subpass_desc, + }, + &cmd_buffer->pool->alloc, + &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(DestroyRenderPass)(device_h, pass, + &cmd_buffer->pool->alloc); + ANV_CALL(DestroyFramebuffer)(device_h, fb, + &cmd_buffer->pool->alloc); + } + } + } +} + +void anv_CmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pColor, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pDepthStencil, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + /* FINISHME: We can do better than this dumb loop. It thrashes too much + * state. + */ + for (uint32_t a = 0; a < attachmentCount; ++a) { + for (uint32_t r = 0; r < rectCount; ++r) { + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); + } + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat fill_format, uint32_t data) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = fill_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }; + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + const VkClearValue clear_value = { + .color = { + .uint32 = { data, data, data, data } + } + }; + + const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &clear_value, 1, &range); +} + +void anv_CmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + VkFormat format; + int bs; + if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + /* This is maximum possible width/height our HW can handle */ + const uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; + while (fillSize > max_fill_size) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, max_surface_dim, format, data); + fillSize -= max_fill_size; + dstOffset += max_fill_size; + } + + uint64_t height = fillSize / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + const uint64_t rect_fill_size = height * max_surface_dim * bs; + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, height, format, data); + fillSize -= rect_fill_size; + dstOffset += rect_fill_size; + } + + if (fillSize != 0) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + fillSize / bs, 1, format, data); + } + + meta_clear_end(&saved_state, cmd_buffer); +} diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c new file mode 100644 index 00000000000..ea5020c5f24 --- /dev/null +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -0,0 +1,867 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** + * Vertex attributes used by all pipelines. + */ +struct vertex_attrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; +}; + +static void +meta_resolve_save(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_resolve_restore(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkPipeline * +get_pipeline_h(struct anv_device *device, uint32_t samples) +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ + + assert(samples >= 2); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + + return &device->meta_state.resolve.pipelines[i]; +} + +static nir_shader * +build_nir_vs(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + nir_builder b; + nir_variable *a_position; + nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); + + a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_position"); + a_position->data.location = VERT_ATTRIB_GENERIC0; + + v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "gl_Position"); + v_position->data.location = VARYING_SLOT_POS; + + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); + + return b.shader; +} + +static nir_shader * +build_nir_fs(uint32_t num_samples) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + const struct glsl_type *sampler2DMS = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, + /*is_shadow*/ false, + /*is_array*/ false, + GLSL_TYPE_FLOAT); + + nir_builder b; + nir_variable *u_tex; /* uniform sampler */ + nir_variable *v_position; /* vec4, varying fragment position */ + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ + nir_variable *f_color; /* vec4, fragment output color */ + nir_ssa_def *accum; /* vec4, accumulation of sample values */ + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs_samples%02d", + num_samples); + + u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, + "u_tex"); + u_tex->data.descriptor_set = 0; + u_tex->data.binding = 0; + + v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_position"); + v_position->data.location = VARYING_SLOT_POS; + v_position->data.origin_upper_left = true; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "f_color"); + f_color->data.location = FRAG_RESULT_DATA0; + + accum = nir_imm_vec4(&b, 0, 0, 0, 0); + + nir_ssa_def *tex_position_ivec = + nir_f2i(&b, nir_load_var(&b, v_tex_position)); + + for (uint32_t i = 0; i < num_samples; ++i) { + nir_tex_instr *tex; + + tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->texture = nir_deref_var_create(tex, u_tex); + tex->sampler = nir_deref_var_create(tex, u_tex); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src = nir_src_for_ssa(tex_position_ivec); + tex->src[0].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 3; + nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + accum = nir_fadd(&b, accum, &tex->dest.ssa); + } + + accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); + nir_store_var(&b, f_color, accum, /*writemask*/ 4); + + return b.shader; +} + +static VkResult +create_pass(struct anv_device *device) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + result = anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .samples = 1, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + alloc, + &device->meta_state.resolve.pass); + + return result; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t num_samples, + VkShaderModule vs_module_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + + struct anv_shader_module fs_module = { + .nir = build_nir_fs(num_samples), + }; + + if (!fs_module.nir) { + /* XXX: Need more accurate error */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto cleanup; + } + + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vs_module_h, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_module), + .pName = "main", + }, + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct vertex_attrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct vertex_attrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, position), + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, + }, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { 0x1 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = device->meta_state.resolve.pipeline_layout, + .renderPass = device->meta_state.resolve.pass, + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.alloc, + get_pipeline_h(device, num_samples)); + if (result != VK_SUCCESS) + goto cleanup; + + goto cleanup; + +cleanup: + ralloc_free(fs_module.nir); + return result; +} + +void +anv_device_finish_meta_resolve_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + VkDevice device_h = anv_device_to_handle(device); + VkRenderPass pass_h = device->meta_state.resolve.pass; + VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; + VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + if (pass_h) + ANV_CALL(DestroyRenderPass)(device_h, pass_h, + &device->meta_state.alloc); + + if (pipeline_layout_h) + ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); + + if (ds_layout_h) + ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { + VkPipeline pipeline_h = state->resolve.pipelines[i]; + + if (pipeline_h) { + ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); + } + } +} + +VkResult +anv_device_init_meta_resolve_state(struct anv_device *device) +{ + VkResult res = VK_SUCCESS; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + const isl_sample_count_mask_t sample_count_mask = + isl_device_get_sample_counts(&device->isl_dev); + + zero(device->meta_state.resolve); + + struct anv_shader_module vs_module = { .nir = build_nir_vs() }; + if (!vs_module.nir) { + /* XXX: Need more accurate error */ + res = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); + + res = anv_CreateDescriptorSetLayout(device_h, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }, + }, + alloc, + &device->meta_state.resolve.ds_layout); + if (res != VK_SUCCESS) + goto fail; + + res = anv_CreatePipelineLayout(device_h, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + alloc, + &device->meta_state.resolve.pipeline_layout); + if (res != VK_SUCCESS) + goto fail; + + res = create_pass(device); + if (res != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { + + uint32_t sample_count = 1 << (1 + i); + if (!(sample_count_mask & sample_count)) + continue; + + res = create_pipeline(device, sample_count, vs_module_h); + if (res != VK_SUCCESS) + goto fail; + } + + goto cleanup; + +fail: + anv_device_finish_meta_resolve_state(device); + +cleanup: + ralloc_free(vs_module.nir); + + return res; +} + +static void +emit_resolve(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src_iview, + const VkOffset2D *src_offset, + struct anv_image_view *dest_iview, + const VkOffset2D *dest_offset, + const VkExtent2D *resolve_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice device_h = anv_device_to_handle(device); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image *src_image = src_iview->image; + VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; + + const struct vertex_attrs vertex_data[3] = { + { + .vue_header = {0}, + .position = { + dest_offset->x + resolve_extent->width, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, + }, + }, + }; + + struct anv_state vertex_mem = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, + sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, + .offset = vertex_mem.offset, + }; + + VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); + + anv_CmdBindVertexBuffers(cmd_buffer_h, + /*firstBinding*/ 0, + /*bindingCount*/ 1, + (VkBuffer[]) { vertex_buffer_h }, + (VkDeviceSize[]) { 0 }); + + VkSampler sampler_h; + ANV_CALL(CreateSampler)(device_h, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0, + .anisotropyEnable = false, + .compareEnable = false, + .minLod = 0.0, + .maxLod = 0.0, + .unnormalizedCoordinates = false, + }, + &cmd_buffer->pool->alloc, + &sampler_h); + + VkDescriptorSet desc_set_h; + anv_AllocateDescriptorSets(device_h, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool_h, + .descriptorSetCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + &desc_set_h); + + ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); + + anv_UpdateDescriptorSets(device_h, + /*writeCount*/ 1, + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = desc_set_h, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler_h, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, + /*firstViewport*/ 0, + /*viewportCount*/ 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, + /*firstScissor*/ 0, + /*scissorCount*/ 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = (VkExtent2D) { fb->width, fb->height }, + }, + }); + + VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); + ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); + + if (cmd_buffer->state.pipeline != pipeline) { + anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + anv_CmdBindDescriptorSets(cmd_buffer_h, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline_layout, + /*firstSet*/ 0, + /* setCount */ 1, + (VkDescriptorSet[]) { + desc_set_h, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); + + /* All objects below are consumed by the draw call. We may safely destroy + * them. + */ + anv_descriptor_set_destroy(device, desc_set); + anv_DestroySampler(device_h, sampler_h, + &cmd_buffer->pool->alloc); +} + +void anv_CmdResolveImage( + VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve* regions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, src_image, src_image_h); + ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_device *device = cmd_buffer->device; + struct anv_meta_saved_state state; + VkDevice device_h = anv_device_to_handle(device); + + meta_resolve_save(&state, cmd_buffer); + + assert(src_image->samples > 1); + assert(dest_image->samples == 1); + + if (src_image->samples >= 16) { + /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the + * glBlitFramebuffer workaround for samples >= 16. + */ + anv_finishme("vkCmdResolveImage: need interpolation workaround when " + "samples >= 16"); + } + + if (src_image->array_size > 1) + anv_finishme("vkCmdResolveImage: multisample array images"); + + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + + /* From the Vulkan 1.0 spec: + * + * - The aspectMask member of srcSubresource and dstSubresource must + * only contain VK_IMAGE_ASPECT_COLOR_BIT + * + * - The layerCount member of srcSubresource and dstSubresource must + * match + */ + assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->srcSubresource.layerCount == + region->dstSubresource.layerCount); + + const uint32_t src_base_layer = + anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + + const uint32_t dest_base_layer = + anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, + ®ion->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; + ++layer) { + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image_h, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image_h, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dest_iview), + }, + .width = anv_minify(dest_image->extent.width, + region->dstSubresource.mipLevel), + .height = anv_minify(dest_image->extent.height, + region->dstSubresource.mipLevel), + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { + region->dstOffset.x, + region->dstOffset.y, + }, + .extent = { + region->extent.width, + region->extent.height, + } + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + emit_resolve(cmd_buffer, + &src_iview, + &(VkOffset2D) { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + &dest_iview, + &(VkOffset2D) { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + &(VkExtent2D) { + .width = region->extent.width, + .height = region->extent.height, + }); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); + } + } + + meta_resolve_restore(&state, cmd_buffer); +} + +/** + * Emit any needed resolves for the current subpass. + */ +void +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_meta_saved_state saved_state; + + /* FINISHME(perf): Skip clears for resolve attachments. + * + * From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a resolve + * attachment, then the loadOp is effectively ignored as the resolve is + * guaranteed to overwrite all pixels in the render area. + */ + + if (!subpass->has_resolve) + return; + + meta_resolve_save(&saved_state, cmd_buffer); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i]; + uint32_t dest_att = subpass->resolve_attachments[i]; + + if (dest_att == VK_ATTACHMENT_UNUSED) + continue; + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dest_iview = fb->attachments[dest_att]; + + struct anv_subpass resolve_subpass = { + .color_count = 1, + .color_attachments = (uint32_t[]) { dest_att }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + /* Subpass resolves must respect the render area. We can ignore the + * render area here because vkCmdBeginRenderPass set the render area + * with 3DSTATE_DRAWING_RECTANGLE. + * + * XXX(chadv): Does the hardware really respect + * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? + */ + emit_resolve(cmd_buffer, + src_iview, + &(VkOffset2D) { 0, 0 }, + dest_iview, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }); + } + + cmd_buffer->state.subpass = subpass; + meta_resolve_restore(&saved_state, cmd_buffer); +} diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h new file mode 100644 index 00000000000..a7ea3eb0e28 --- /dev/null +++ b/src/intel/vulkan/anv_nir.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir/nir.h" +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); + +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); +void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c new file mode 100644 index 00000000000..e71a8ffb1f4 --- /dev/null +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + const struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + unsigned block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo: + block_idx_src = 1; + break; + default: + continue; /* the loop */ + } + + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; + + set_layout = state->layout->set[set].layout; + if (set_layout->binding[binding].dynamic_offset_index < 0) + continue; + + b->cursor = nir_before_instr(&intrin->instr); + + /* First, we need to generate the uniform load for the buffer offset */ + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 8; + offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, + nir_imm_int(b, 8))); + + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); + nir_builder_instr_insert(b, &offset_load->instr); + + nir_src *offset_src = nir_get_io_offset_src(intrin); + nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, + &offset_load->dest.ssa); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), + offset_src->ssa); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_remove(&intrin->instr); + *offset_src = nir_src_for_ssa(new_offset); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); + + if (intrin->intrinsic != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(&intrin->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(zero); + exec_list_push_tail(&phi->srcs, &src2->node); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&phi->dest.ssa)); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); + } + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) + return; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].range; + } + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; +} diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c new file mode 100644 index 00000000000..c58a93878ee --- /dev/null +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -0,0 +1,394 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "program/prog_parameter.h" +#include "nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + struct { + BITSET_WORD *used; + uint8_t *surface_offsets; + uint8_t *sampler_offsets; + uint8_t *image_offsets; + } set[MAX_SETS]; +}; + +static void +add_binding(struct apply_pipeline_layout_state *state, + uint32_t set, uint32_t binding) +{ + BITSET_SET(state->set[set].used, binding); +} + +static void +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) +{ + add_binding(state, var->data.descriptor_set, var->data.binding); +} + +static bool +get_used_bindings_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + add_binding(state, nir_intrinsic_desc_set(intrin), + nir_intrinsic_binding(intrin)); + break; + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_size: + case nir_intrinsic_image_samples: + add_var_binding(state, intrin->variables[0]->var); + break; + + default: + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + assert(tex->texture); + add_var_binding(state, tex->texture->var); + if (tex->sampler) + add_var_binding(state, tex->sampler->var); + break; + } + default: + continue; + } + } + + return true; +} + +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + + uint32_t surface_index = state->set[set].surface_offsets[binding]; + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[0]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); +} + +static void +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) +{ + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + *const_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = src_type; + tex->num_srcs++; + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); + } + } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->texture); + + unsigned set = tex->texture->var->data.descriptor_set; + unsigned binding = tex->texture->var->data.binding; + tex->texture_index = state->set[set].surface_offsets[binding]; + lower_tex_deref(tex, tex->texture, &tex->texture_index, + nir_tex_src_texture_offset, state); + + if (tex->sampler) { + unsigned set = tex->sampler->var->data.descriptor_set; + unsigned binding = tex->sampler->var->data.binding; + tex->sampler_index = state->set[set].surface_offsets[binding]; + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + } + + /* The backend only ever uses this to mark used surfaces. We don't care + * about that little optimization so it just needs to be non-zero. + */ + tex->texture_array_size = 1; + + cleanup_tex_deref(tex, tex->texture); + if (tex->sampler) + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); + } + break; + } + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + break; + default: + continue; + } + } + + return true; +} + +static void +setup_vec4_uniform_value(const union gl_constant_value **params, + const union gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + +void +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct anv_pipeline_layout *layout = pipeline->layout; + + struct apply_pipeline_layout_state state = { + .shader = shader, + }; + + void *mem_ctx = ralloc_context(NULL); + + for (unsigned s = 0; s < layout->num_sets; s++) { + const unsigned count = layout->set[s].layout->binding_count; + const unsigned words = BITSET_WORDS(count); + state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); + state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); + } + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, get_used_bindings_block, &state); + } + + struct anv_pipeline_bind_map map = { + .surface_count = 0, + .sampler_count = 0, + }; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) + map.surface_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) + map.sampler_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) + map.image_count += set_layout->binding[b].array_size; + } + } + + map.surface_to_descriptor = + malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); + map.sampler_to_descriptor = + malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); + + pipeline->bindings[shader->stage] = map; + + unsigned surface = 0; + unsigned sampler = 0; + unsigned image = 0; + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { + state.set[set].surface_offsets[b] = surface; + for (unsigned i = 0; i < array_size; i++) { + map.surface_to_descriptor[surface + i].set = set; + map.surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { + state.set[set].sampler_offsets[b] = sampler; + for (unsigned i = 0; i < array_size; i++) { + map.sampler_to_descriptor[sampler + i].set = set; + map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { + state.set[set].image_offsets[b] = image; + image += array_size; + } + } + } + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (map.image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned image_index = state.set[set].image_offsets[binding]; + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE * 4; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < map.image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const union gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const union gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const union gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const union gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const union gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const union gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; + } +} diff --git a/src/intel/vulkan/anv_nir_lower_push_constants.c b/src/intel/vulkan/anv_nir_lower_push_constants.c new file mode 100644 index 00000000000..53cd3d73793 --- /dev/null +++ b/src/intel/vulkan/anv_nir_lower_push_constants.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" + +struct lower_push_constants_state { + nir_shader *shader; + bool is_scalar; +}; + +static bool +lower_push_constants_block(nir_block *block, void *void_state) +{ + struct lower_push_constants_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* TODO: Handle indirect push constants */ + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + /* This wont work for vec4 stages. */ + assert(state->is_scalar); + + assert(intrin->const_index[0] % 4 == 0); + assert(intrin->const_index[1] == 128); + + /* We just turn them into uniform loads with the appropreate offset */ + intrin->intrinsic = nir_intrinsic_load_uniform; + } + + return true; +} + +void +anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) +{ + struct lower_push_constants_state state = { + .shader = shader, + .is_scalar = is_scalar, + }; + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, lower_push_constants_block, &state); + } + + assert(shader->num_uniforms % 4 == 0); + if (is_scalar) + shader->num_uniforms /= 4; + else + shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); +} diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c new file mode 100644 index 00000000000..d07e9fec6cc --- /dev/null +++ b/src/intel/vulkan/anv_pass.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + + uint32_t subpass_attachment_count = 0, *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + + desc->colorAttachmentCount + + /* Count colorAttachmentCount again for resolve_attachments */ + desc->colorAttachmentCount; + } + + pass->subpass_attachments = + anv_alloc2(&device->alloc, pAllocator, + subpass_attachment_count * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + anv_free2(&device->alloc, pAllocator, pass); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] + = desc->pInputAttachments[j].attachment; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] + = desc->pColorAttachments[j].attachment; + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = a; + if (a != VK_ATTACHMENT_UNUSED) + subpass->has_resolve = true; + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = + desc->pDepthStencilAttachment->attachment; + } else { + subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; + } + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +void anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); + anv_free2(&device->alloc, pAllocator, pass); +} + +void anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c new file mode 100644 index 00000000000..a7feefb540e --- /dev/null +++ b/src/intel/vulkan/anv_pipeline.c @@ -0,0 +1,1278 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "util/mesa-sha1.h" +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "nir/spirv/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + _mesa_sha1_compute(module->data, module->size, module->sha1); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_free2(&device->alloc, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) +{ + if (strcmp(entrypoint_name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + nir_function *entry_point; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = nir_options; + nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(module->size % 4 == 0); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + const uint32_t *data = + spec_info->pData + spec_info->pMapEntries[i].offset; + assert((const void *)(data + 1) <= + spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + spec_entries[i].data = *data; + } + } + + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, nir_options); + nir = entry_point->shader; + assert(nir->stage == stage); + nir_validate_shader(nir); + + free(spec_entries); + + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); + + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); + + nir_lower_system_values(nir); + nir_validate_shader(nir); + } + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entry_point->impl); + + uint32_t indirect_mask = 0; + if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) + indirect_mask |= (1 << nir_var_shader_in); + if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) + indirect_mask |= 1 << nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); + + return nir; +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_free2(&device->alloc, pAllocator, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + if (extra && extra->color_attachment_count >= 0) { + key->nr_color_regions = extra->color_attachment_count; + } else { + key->nr_color_regions = + render_pass->subpasses[info->subpass].color_count; + } + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage, + spec_info); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (pipeline->bindings[stage].image_count > 0) + prog_data->nr_params += pipeline->bindings[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + + if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ + prog_data->param = (const union gl_constant_value **) + malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const union gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + char surface_usage_mask[256], sampler_usage_mask[256]; + zero(surface_usage_mask); + zero(sampler_usage_mask); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + if (pipeline->layout) + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias; + switch (stage) { + case MESA_SHADER_FRAGMENT: + bias = MAX_RTS; + break; + case MESA_SHADER_COMPUTE: + bias = 1; + break; + default: + bias = 0; + break; + } + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + if (nir->stage != MESA_SHADER_VERTEX && + nir->stage != MESA_SHADER_TESS_CTRL && + nir->stage != MESA_SHADER_TESS_EVAL && + nir->stage != MESA_SHADER_FRAGMENT) { + nir = brw_nir_lower_io(nir, &pipeline->device->info, + compiler->scalar_stage[stage], false, NULL); + } + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params * 4; + + return nir; +} + +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_vs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + ralloc_free(mem_ctx); + } + + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = kernel; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = kernel; + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_gs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + + ralloc_free(mem_ctx); + } + + pipeline->gs_kernel = kernel; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_wm_prog_key(&pipeline->device->info, info, extra, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; + nir_foreach_variable_safe(var, &nir->outputs) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + unsigned rt = var->data.location - FRAG_RESULT_DATA0; + if (rt >= key.nr_color_regions) { + var->data.mode = nir_var_local; + exec_node_remove(&var->node); + exec_list_push_tail(&impl->locals, &var->node); + } + } + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + + ralloc_free(mem_ctx); + } + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = kernel; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_cs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.work_groups_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + prog_data->base.total_shared = nir->num_shared; + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + ralloc_free(mem_ctx); + } + + pipeline->cs_simd = kernel; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterizationState); + dynamic->depth_bias.bias = + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; + dynamic->depth_bias.slope = + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConstants, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.compareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.compareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.writeMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.writeMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.reference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.reference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterizationState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) +{ + VkResult result; + + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + if (alloc == NULL) + alloc = &device->alloc; + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); + if (result != VK_SUCCESS) + return result; + + pipeline->batch.alloc = alloc; + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_GEOMETRY_BIT: + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_FRAGMENT_BIT: + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + + uint64_t inputs_read; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + inputs_read = ~0ull; + } else { + inputs_read = pipeline->vs_prog_data.inputs_read; + } + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + pipeline->vb_used |= 1 << desc->binding; + } + + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->stride; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->inputRate) { + default: + case VK_VERTEX_INPUT_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + while (anv_block_pool_size(&device->scratch_block_pool) < + pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + else + return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + else + return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c new file mode 100644 index 00000000000..c89bb2a2ee1 --- /dev/null +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -0,0 +1,405 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/mesa-sha1.h" +#include "util/debug.h" +#include "anv_private.h" + +/* Remaining work: + * + * - Compact binding table layout so it's tight and not dependent on + * descriptor set layout. + * + * - Review prog_data struct for size and cacheability: struct + * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 + * bit quantities etc; param, pull_param, and image_params are pointers, we + * just need the compation map. use bit fields for all bools, eg + * dual_src_blend. + */ + +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); + + cache->kernel_count = 0; + cache->total_size = 0; + cache->table_size = 1024; + const size_t byte_size = cache->table_size * sizeof(cache->table[0]); + cache->table = malloc(byte_size); + + /* We don't consider allocation failure fatal, we just start with a 0-sized + * cache. */ + if (cache->table == NULL) + cache->table_size = 0; + else + memset(cache->table, 0xff, byte_size); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); + free(cache->table); +} + +struct cache_entry { + unsigned char sha1[20]; + uint32_t prog_data_size; + uint32_t kernel_size; + char prog_data[0]; + + /* kernel follows prog_data at next 64 byte aligned address */ +}; + +void +anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + struct mesa_sha1 *ctx; + + ctx = _mesa_sha1_init(); + _mesa_sha1_update(ctx, &key, sizeof(key)); + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); + /* hash in shader stage, pipeline layout? */ + if (spec_info) { + _mesa_sha1_update(ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); + } + _mesa_sha1_final(ctx, hash); +} + +uint32_t +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) sha1); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + const uint32_t offset = cache->table[index]; + + if (offset == ~0) + return NO_KERNEL; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { + if (prog_data) + memcpy(prog_data, entry->prog_data, entry->prog_data_size); + + const uint32_t preamble_size = + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + return offset + preamble_size; + } + } + + return NO_KERNEL; +} + +static void +anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, + struct cache_entry *entry, uint32_t entry_offset) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) entry->sha1); + + /* We'll always be able to insert when we get here. */ + assert(cache->kernel_count < cache->table_size / 2); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + if (cache->table[index] == ~0) { + cache->table[index] = entry_offset; + break; + } + } + + /* We don't include the alignment padding bytes when we serialize, so + * don't include taht in the the total size. */ + cache->total_size += + sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + cache->kernel_count++; +} + +static VkResult +anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +{ + const uint32_t table_size = cache->table_size * 2; + const uint32_t old_table_size = cache->table_size; + const size_t byte_size = table_size * sizeof(cache->table[0]); + uint32_t *table; + uint32_t *old_table = cache->table; + + table = malloc(byte_size); + if (table == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cache->table = table; + cache->table_size = table_size; + cache->kernel_count = 0; + cache->total_size = 0; + + memset(cache->table, 0xff, byte_size); + for (uint32_t i = 0; i < old_table_size; i++) { + const uint32_t offset = old_table[i]; + if (offset == ~0) + continue; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + anv_pipeline_cache_add_entry(cache, entry, offset); + } + + free(old_table); + + return VK_SUCCESS; +} + +uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, size_t kernel_size, + const void *prog_data, size_t prog_data_size) +{ + pthread_mutex_lock(&cache->mutex); + struct cache_entry *entry; + + /* Meta pipelines don't have SPIR-V, so we can't hash them. + * Consequentally, they just don't get cached. + */ + const uint32_t preamble_size = sha1 ? + align_u32(sizeof(*entry) + prog_data_size, 64) : + 0; + + const uint32_t size = preamble_size + kernel_size; + + assert(size < cache->program_stream.block_pool->block_size); + const struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { + assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); + entry = state.map; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, prog_data, prog_data_size); + entry->kernel_size = kernel_size; + + if (cache->kernel_count == cache->table_size / 2) + anv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + anv_pipeline_cache_add_entry(cache, entry, state.offset); + } + + pthread_mutex_unlock(&cache->mutex); + + memcpy(state.map + preamble_size, kernel, kernel_size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset + preamble_size; +} + +static void +anv_pipeline_cache_load(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + struct anv_device *device = cache->device; + uint8_t uuid[VK_UUID_SIZE]; + struct { + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; + } header; + + if (size < sizeof(header)) + return; + memcpy(&header, data, sizeof(header)); + if (header.device_id != device->chipset_id) + return; + anv_device_get_cache_uuid(uuid); + if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) + return; + + const void *end = data + size; + const void *p = data + sizeof(header); + + while (p < end) { + /* The kernels aren't 64 byte aligned in the serialized format so + * they're always right after the prog_data. + */ + const struct cache_entry *entry = p; + const void *kernel = &entry->prog_data[entry->prog_data_size]; + + anv_pipeline_cache_upload_kernel(cache, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + p = kernel + entry->kernel_size; + } +} + +VkResult anv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + if (pCreateInfo->initialDataSize > 0) + anv_pipeline_cache_load(cache, + pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); +} + +VkResult anv_GetPipelineCacheData( + VkDevice _device, + VkPipelineCache _cache, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + const size_t size = 4 + VK_UUID_SIZE + cache->total_size; + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) { + *pDataSize = 0; + return VK_INCOMPLETE; + } + + void *p = pData; + memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); + p += sizeof(device->chipset_id); + + anv_device_get_cache_uuid(p); + p += VK_UUID_SIZE; + + struct cache_entry *entry; + for (uint32_t i = 0; i < cache->table_size; i++) { + if (cache->table[i] == ~0) + continue; + + entry = cache->program_stream.block_pool->map + cache->table[i]; + + memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); + p += sizeof(*entry) + entry->prog_data_size; + + void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + memcpy(p, kernel, entry->kernel_size); + p += entry->kernel_size; + } + + return VK_SUCCESS; +} + +static void +anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, + struct anv_pipeline_cache *src) +{ + for (uint32_t i = 0; i < src->table_size; i++) { + if (src->table[i] == ~0) + continue; + + struct cache_entry *entry = + src->program_stream.block_pool->map + src->table[i]; + + if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) + continue; + + const void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + anv_pipeline_cache_upload_kernel(dst, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + } +} + +VkResult anv_MergePipelineCaches( + VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + + anv_pipeline_cache_merge(dst, src); + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h new file mode 100644 index 00000000000..ba86333525e --- /dev/null +++ b/src/intel/vulkan/anv_private.h @@ -0,0 +1,1876 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#else +#define VG(x) +#endif + +#include "brw_device_info.h" +#include "util/macros.h" +#include "util/list.h" + +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#define VK_USE_PLATFORM_XCB_KHR +#define VK_USE_PLATFORM_WAYLAND_KHR + +#define VK_PROTOTYPES +#include +#include +#include + +#include "anv_entrypoints.h" +#include "anv_gen_macros.h" +#include "brw_context.h" +#include "isl/isl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_IMAGES 8 +#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ + +#define anv_noreturn __attribute__((__noreturn__)) +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline float +anv_clamp_f(float f, float min, float max) +{ + assert(min < max); + + if (f > max) + return max; + else if (f < min) + return min; + else + return f; +} + +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +#define typed_memcpy(dest, src, count) ({ \ + static_assert(sizeof(*src) == sizeof(*dest), ""); \ + memcpy((dest), (src), (count) * sizeof(*(src))); \ +}) + +#define zero(x) (memset(&(x), 0, sizeof(x))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); + +#ifdef DEBUG +#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); +#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); +#else +#define vk_error(error) error +#define vk_errorf(error, format, ...) error +#endif + +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + +/** + * If a block of code is annotated with anv_validate, then the block runs only + * in debug builds. + */ +#ifdef DEBUG +#define anv_validate if (1) +#else +#define anv_validate if (0) +#endif + +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub() \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void * +anv_vector_head(struct anv_vector *vector) +{ + assert(vector->tail < vector->head); + return (void *)((char *)vector->data + + ((vector->head - vector->element_size) & + (vector->size - 1))); +} + +static inline void * +anv_vector_tail(struct anv_vector *vector) +{ + return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + uint32_t gem_handle; + + /* Index into the current validation list. This is used by the + * validation list building alrogithm to track which buffers are already + * in the validation list so that we can ensure uniqueness. + */ + uint32_t index; + + /* Last known offset. This value is provided by the kernel when we + * execbuf and is used as the presumed offset for the next bunch of + * relocations. + */ + uint64_t offset; + + uint64_t size; + void *map; + + /* We need to set the WRITE flag on winsys bos so GEM will know we're + * writing to them and synchronize uses on other rings (eg if the display + * server uses the blitter ring). + */ + bool is_winsys_bo; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + int32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + + /* The offset from the start of the bo to the "center" of the block + * pool. Pointers to allocated blocks are given by + * bo.map + center_bo_offset + offsets. + */ + uint32_t center_bo_offset; + + /* Current memory map of the block pool. This pointer may or may not + * point to the actual beginning of the block pool memory. If + * anv_block_pool_alloc_back has ever been called, then this pointer + * will point to the "center" position of the buffer and all offsets + * (negative or positive) given out by the block pool alloc functions + * will be valid relative to this pointer. + * + * In particular, map == bo.map + center_offset + */ + void *map; + int fd; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + union anv_free_list free_list; + struct anv_block_state state; + + union anv_free_list back_free_list; + struct anv_block_state back_state; +}; + +/* Block pools are backed by a fixed-size 2GB memfd */ +#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) + +/* The center of the block pool is also the middle of the memfd. This may + * change in the future if we decide differently for some reason. + */ +#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) + +static inline uint32_t +anv_block_pool_size(struct anv_block_pool *pool) +{ + return pool->state.end + pool->back_state.end; +} + +struct anv_state { + int32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream_block; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + + /* The current working block */ + struct anv_state_stream_block *block; + + /* Offset at which the current block starts */ + uint32_t start; + /* Offset at which to allocate the next state */ + uint32_t next; + /* Offset at which the current block ends */ + uint32_t end; +}; + +#define CACHELINE_SIZE 64 +#define CACHELINE_MASK 63 + +static inline void +anv_clflush_range(void *start, size_t size) +{ + void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK); + void *end = start + size; + + __builtin_ia32_mfence(); + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } +} + +static void inline +anv_state_clflush(struct anv_state state) +{ + anv_clflush_range(state.map, state.alloc_size); +} + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +void anv_state_pool_finish(struct anv_state_pool *pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + + +void *anv_resolve_entrypoint(uint32_t index); + +extern struct anv_dispatch_table dtable; + +#define ANV_CALL(func) ({ \ + if (dtable.func == NULL) { \ + size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ + dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ + } \ + dtable.func; \ +}) + +static inline void * +anv_alloc(const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnAllocation(alloc->pUserData, size, align, scope); +} + +static inline void * +anv_realloc(const VkAllocationCallbacks *alloc, + void *ptr, size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); +} + +static inline void +anv_free(const VkAllocationCallbacks *alloc, void *data) +{ + alloc->pfnFree(alloc->pUserData, data); +} + +static inline void * +anv_alloc2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + if (alloc) + return anv_alloc(alloc, size, align, scope); + else + return anv_alloc(parent_alloc, size, align, scope); +} + +static inline void +anv_free2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + void *data) +{ + if (alloc) + anv_free(alloc, data); + else + anv_free(parent_alloc, data); +} + +struct anv_physical_device { + VK_LOADER_DATA _loader_data; + + struct anv_instance * instance; + uint32_t chipset_id; + const char * path; + const char * name; + const struct brw_device_info * info; + uint64_t aperture_size; + struct brw_compiler * compiler; + struct isl_device isl_dev; +}; + +struct anv_wsi_interaface; + +#define VK_ICD_WSI_PLATFORM_MAX 5 + +struct anv_instance { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + uint32_t apiVersion; + int physicalDeviceCount; + struct anv_physical_device physicalDevice; + + struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; +}; + +VkResult anv_init_wsi(struct anv_instance *instance); +void anv_finish_wsi(struct anv_instance *instance); + +struct anv_meta_state { + VkAllocationCallbacks alloc; + + /** + * Use array element `i` for images with `2^i` samples. + */ + struct { + /** + * Pipeline N is used to clear color attachment N of the current + * subpass. + * + * HACK: We use one pipeline per color attachment to work around the + * compiler's inability to dynamically set the render target index of + * the render target write message. + */ + struct anv_pipeline *color_pipelines[MAX_RTS]; + + struct anv_pipeline *depth_only_pipeline; + struct anv_pipeline *stencil_only_pipeline; + struct anv_pipeline *depthstencil_pipeline; + } clear[1 + MAX_SAMPLES_LOG2]; + + struct { + VkRenderPass render_pass; + + /** Pipeline that blits from a 1D image. */ + VkPipeline pipeline_1d_src; + + /** Pipeline that blits from a 2D image. */ + VkPipeline pipeline_2d_src; + + /** Pipeline that blits from a 3D image. */ + VkPipeline pipeline_3d_src; + + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + /** Pipeline [i] resolves an image with 2^(i+1) samples. */ + VkPipeline pipelines[MAX_SAMPLES_LOG2]; + + VkRenderPass pass; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } resolve; +}; + +struct anv_queue { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_state_pool * pool; +}; + +struct anv_pipeline_cache { + struct anv_device * device; + struct anv_state_stream program_stream; + pthread_mutex_t mutex; + + uint32_t total_size; + uint32_t table_size; + uint32_t kernel_count; + uint32_t *table; +}; + +void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device); +void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); +uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data); +uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, + size_t kernel_size, + const void *prog_data, + size_t prog_data_size); + +struct anv_device { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + struct isl_device isl_dev; + int context_id; + int fd; + + struct anv_bo_pool batch_bo_pool; + + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_pipeline_cache default_pipeline_cache; + + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_bo workaround_bo; + + struct anv_meta_state meta_state; + + struct anv_state border_colors; + + struct anv_queue queue; + + struct anv_block_pool scratch_block_pool; + + pthread_mutex_t mutex; +}; + +VkResult gen7_init_device_state(struct anv_device *device); +VkResult gen75_init_device_state(struct anv_device *device); +VkResult gen8_init_device_state(struct anv_device *device); +VkResult gen9_init_device_state(struct anv_device *device); + +void anv_device_get_cache_uuid(void *uuid); + + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); +int anv_gem_get_aperture(int fd, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); +int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +struct anv_reloc_list { + size_t num_relocs; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; +}; + +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); +void anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); + +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + +struct anv_batch_bo { + /* Link in the anv_cmd_buffer.owned_batch_bos list */ + struct list_head link; + + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + /* Last seen surface state block pool bo offset */ + uint32_t last_ss_pool_bo_offset; + + struct anv_reloc_list relocs; +}; + +struct anv_batch { + const VkAllocationCallbacks * alloc; + + void * start; + void * end; + void * next; + + struct anv_reloc_list * relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; +}; + +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); +VkResult anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return address.offset + delta; + } else { + assert(batch->start <= location && location < batch->end); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +/* Wrapper macros needed to work around preprocessor argument issues. In + * particular, arguments don't get pre-evaluated if they are concatenated. + * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the + * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". + * We can work around this easily enough with these helpers. + */ +#define __anv_cmd_length(cmd) cmd ## _length +#define __anv_cmd_length_bias(cmd) cmd ## _length_bias +#define __anv_cmd_header(cmd) cmd ## _header +#define __anv_cmd_pack(cmd) cmd ## _pack + +#define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + .DWordLength = n - __anv_cmd_length_bias(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + __dst; \ + }) + +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ + } while (0) + +#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ + const uint32_t __size = __anv_cmd_length(cmd) * 4; \ + struct anv_state __state = \ + anv_state_pool_alloc((pool), __size, align); \ + struct cmd __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ + if (!(pool)->block_pool->device->info.has_llc) \ + anv_state_clflush(__state); \ + __state; \ + }) + +#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ + .GraphicsDataTypeGFDT = 0, \ + .LLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ + .LLCeLLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. + */ + +#define GEN9_MOCS { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ + } + +#define GEN9_MOCS_PTE { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ + } + +struct anv_device_memory { + struct anv_bo bo; + uint32_t type_index; + VkDeviceSize map_size; + void * map; +}; + +/** + * Header for Vertex URB Entry (VUE) + */ +struct anv_vue_header { + uint32_t Reserved; + uint32_t RTAIndex; /* RenderTargetArrayIndex */ + uint32_t ViewportIndex; + float PointWidth; +}; + +struct anv_descriptor_set_binding_layout { + /* Number of array elements in this binding */ + uint16_t array_size; + + /* Index into the flattend descriptor set */ + uint16_t descriptor_index; + + /* Index into the dynamic state array for a dynamic buffer */ + int16_t dynamic_offset_index; + + /* Index into the descriptor set buffer views */ + int16_t buffer_index; + + struct { + /* Index into the binding table for the associated surface */ + int16_t surface_index; + + /* Index into the sampler table for the associated sampler */ + int16_t sampler_index; + + /* Index into the image table for the associated image */ + int16_t image_index; + } stage[MESA_SHADER_STAGES]; + + /* Immutable samplers (or NULL if no immutable samplers) */ + struct anv_sampler **immutable_samplers; +}; + +struct anv_descriptor_set_layout { + /* Number of bindings in this descriptor set */ + uint16_t binding_count; + + /* Total size of the descriptor set with room for all array entries */ + uint16_t size; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + /* Number of buffers in this descriptor set */ + uint16_t buffer_count; + + /* Number of dynamic offsets used by this descriptor set */ + uint16_t dynamic_offset_count; + + /* Bindings in this descriptor set */ + struct anv_descriptor_set_binding_layout binding[0]; +}; + +struct anv_descriptor { + VkDescriptorType type; + + union { + struct { + struct anv_image_view *image_view; + struct anv_sampler *sampler; + }; + + struct anv_buffer_view *buffer_view; + }; +}; + +struct anv_descriptor_set { + const struct anv_descriptor_set_layout *layout; + uint32_t buffer_count; + struct anv_buffer_view *buffer_views; + struct anv_descriptor descriptors[0]; +}; + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + +struct anv_pipeline_binding { + /* The descriptor set this surface corresponds to */ + uint16_t set; + + /* Offset into the descriptor set */ + uint16_t offset; +}; + +struct anv_pipeline_layout { + struct { + struct anv_descriptor_set_layout *layout; + uint32_t dynamic_offset_start; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { + bool has_dynamic_offsets; + } stage[MESA_SHADER_STAGES]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + VkBufferUsageFlags usage; + + /* Set when bound */ + struct anv_bo * bo; + VkDeviceSize offset; +}; + +enum anv_cmd_dirty_bits { + ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ + ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, + ANV_CMD_DIRTY_PIPELINE = 1 << 9, + ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, + ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, +}; +typedef uint32_t anv_cmd_dirty_mask_t; + +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_push_constants { + /* Current allocated size of this push constants data structure. + * Because a decent chunk of it may not be used (images on SKL, for + * instance), we won't actually allocate the entire structure up-front. + */ + uint32_t size; + + /* Push constant data provided by the client through vkPushConstants */ + uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; + + /* Our hardware only provides zero-based vertex and instance id so, in + * order to satisfy the vulkan requirements, we may have to push one or + * both of these into the shader. + */ + uint32_t base_vertex; + uint32_t base_instance; + + /* Offsets and ranges for dynamically bound buffers */ + struct { + uint32_t offset; + uint32_t range; + } dynamic[MAX_DYNAMIC_BUFFERS]; + + /* Image data for image_load_store on pre-SKL */ + struct brw_image_param images[MAX_IMAGES]; +}; + +struct anv_dynamic_state { + struct { + uint32_t count; + VkViewport viewports[MAX_VIEWPORTS]; + } viewport; + + struct { + uint32_t count; + VkRect2D scissors[MAX_SCISSORS]; + } scissor; + + float line_width; + + struct { + float bias; + float clamp; + float slope; + } depth_bias; + + float blend_constants[4]; + + struct { + float min; + float max; + } depth_bounds; + + struct { + uint32_t front; + uint32_t back; + } stencil_compare_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_write_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_reference; +}; + +extern const struct anv_dynamic_state default_dynamic_state; + +void anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); + +/** + * Attachment state when recording a renderpass instance. + * + * The clear value is valid only if there exists a pending clear. + */ +struct anv_attachment_state { + VkImageAspectFlags pending_clear_aspects; + VkClearValue clear_value; +}; + +/** State required while building cmd buffer */ +struct anv_cmd_state { + /* PIPELINE_SELECT.PipelineSelection */ + uint32_t current_pipeline; + uint32_t current_l3_config; + uint32_t vb_dirty; + anv_cmd_dirty_mask_t dirty; + anv_cmd_dirty_mask_t compute_dirty; + uint32_t num_workgroups_offset; + struct anv_bo *num_workgroups_bo; + VkShaderStageFlags descriptors_dirty; + VkShaderStageFlags push_constants_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + uint32_t restart_index; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set * descriptors[MAX_SETS]; + struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_state binding_tables[MESA_SHADER_STAGES]; + struct anv_state samplers[MESA_SHADER_STAGES]; + struct anv_dynamic_state dynamic; + bool need_query_wa; + + /** + * Array length is anv_cmd_state::pass::attachment_count. Array content is + * valid only when recording a render pass instance. + */ + struct anv_attachment_state * attachments; + + struct { + struct anv_buffer * index_buffer; + uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ + uint32_t index_offset; + } gen7; +}; + +struct anv_cmd_pool { + VkAllocationCallbacks alloc; + struct list_head cmd_buffers; +}; + +#define ANV_CMD_BUFFER_BATCH_SIZE 8192 + +enum anv_cmd_buffer_exec_mode { + ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, + ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, +}; + +struct anv_cmd_buffer { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_cmd_pool * pool; + struct list_head pool_link; + + struct anv_batch batch; + + /* Fields required for the actual chain of anv_batch_bo's. + * + * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). + */ + struct list_head batch_bos; + enum anv_cmd_buffer_exec_mode exec_mode; + + /* A vector of anv_batch_bo pointers for every batch or surface buffer + * referenced by this command buffer + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector seen_bbos; + + /* A vector of int32_t's for every block of binding tables. + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector bt_blocks; + uint32_t bt_next; + struct anv_reloc_list surface_relocs; + + /* Information needed for execbuf + * + * These fields are generated by anv_cmd_buffer_prepare_execbuf(). + */ + struct { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + bool need_reloc; + } execbuf2; + + /* Serial for tracking buffer completion */ + uint32_t serial; + + /* Stream objects for storing temporary data */ + struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; + + VkCommandBufferUsageFlags usage_flags; + VkCommandBufferLevel level; + + struct anv_cmd_state state; +}; + +VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary); +void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); + +VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state); +VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state); +uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages); + +struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment); +struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment); + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset); +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); + +void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info); + +void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage); +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); + +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); + +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + +struct anv_event { + uint64_t semaphore; + struct anv_state state; +}; + +struct nir_shader; + +struct anv_shader_module { + struct nir_shader * nir; + + unsigned char sha1[20]; + uint32_t size; + char data[0]; +}; + +void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +static inline VkShaderStageFlagBits +mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) +{ + return (1 << mesa_stage); +} + +#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) + +#define anv_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, \ + __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ + stage = __builtin_ffs(__tmp) - 1, __tmp; \ + __tmp &= ~(1 << (stage))) + +struct anv_pipeline_bind_map { + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + + struct anv_pipeline_binding * surface_to_descriptor; + struct anv_pipeline_binding * sampler_to_descriptor; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + uint32_t batch_data[512]; + struct anv_reloc_list batch_relocs; + uint32_t dynamic_state_mask; + struct anv_dynamic_state dynamic_state; + + struct anv_pipeline_layout * layout; + struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; + + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; + struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + uint32_t scratch_start[MESA_SHADER_STAGES]; + uint32_t total_scratch; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + VkShaderStageFlags active_stages; + struct anv_state blend_state; + uint32_t vs_simd8; + uint32_t vs_vec4; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t ps_ksp0; + uint32_t ps_ksp2; + uint32_t ps_grf_start0; + uint32_t ps_grf_start2; + uint32_t gs_kernel; + uint32_t cs_simd; + + uint32_t vb_used; + uint32_t binding_stride[MAX_VBS]; + bool instancing_enable[MAX_VBS]; + bool primitive_restart; + uint32_t topology; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; + + struct { + uint32_t sf[7]; + uint32_t depth_stencil_state[3]; + } gen7; + + struct { + uint32_t sf[4]; + uint32_t raster[5]; + uint32_t wm_depth_stencil[3]; + } gen8; + + struct { + uint32_t wm_depth_stencil[4]; + } gen9; +}; + +struct anv_graphics_pipeline_create_info { + /** + * If non-negative, overrides the color attachment count of the pipeline's + * subpass. + */ + int8_t color_attachment_count; + + bool use_repclear; + bool disable_viewport; + bool disable_scissor; + bool disable_vs; + bool use_rectlist; +}; + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc); + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +VkResult +anv_graphics_pipeline_create(VkDevice device, + VkPipelineCache cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen7_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen75_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen7_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen75_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +struct anv_format_swizzle { + unsigned r:2; + unsigned g:2; + unsigned b:2; + unsigned a:2; +}; + +struct anv_format { + const VkFormat vk_format; + const char *name; + enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + const struct isl_format_layout *isl_layout; + struct anv_format_swizzle swizzle; + bool has_depth; + bool has_stencil; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); + +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle); + +static inline bool +anv_format_is_color(const struct anv_format *format) +{ + return !format->has_depth && !format->has_stencil; +} + +static inline bool +anv_format_is_depth_or_stencil(const struct anv_format *format) +{ + return format->has_depth || format->has_stencil; +} + +/** + * Subsurface of an anv_image. + */ +struct anv_surface { + struct isl_surf isl; + + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; +}; + +struct anv_image { + VkImageType type; + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; + const struct anv_format *format; + VkExtent3D extent; + uint32_t levels; + uint32_t array_size; + uint32_t samples; /**< VkImageCreateInfo::samples */ + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ + + VkDeviceSize size; + uint32_t alignment; + + /* Set when bound */ + struct anv_bo *bo; + VkDeviceSize offset; + + /** + * Image subsurfaces + * + * For each foo, anv_image::foo_surface is valid if and only if + * anv_image::format has a foo aspect. + * + * The hardware requires that the depth buffer and stencil buffer be + * separate surfaces. From Vulkan's perspective, though, depth and stencil + * reside in the same VkImage. To satisfy both the hardware and Vulkan, we + * allocate the depth and stencil buffers as separate surfaces in the same + * bo. + */ + union { + struct anv_surface color_surface; + + struct { + struct anv_surface depth_surface; + struct anv_surface stencil_surface; + }; + }; +}; + +struct anv_image_view { + const struct anv_image *image; /**< VkImageViewCreateInfo::image */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + + VkImageAspectFlags aspect_mask; + VkFormat vk_format; + VkComponentMapping swizzle; + enum isl_format format; + uint32_t base_layer; + uint32_t base_mip; + VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + /** RENDER_SURFACE_STATE when using image as a color render target. */ + struct anv_state color_rt_surface_state; + + /** RENDER_SURFACE_STATE when using image as a sampler surface. */ + struct anv_state sampler_surface_state; + + /** RENDER_SURFACE_STATE when using image as a storage image. */ + struct anv_state storage_surface_state; +}; + +struct anv_image_create_info { + const VkImageCreateInfo *vk_info; + isl_tiling_flags_t isl_tiling_flags; + uint32_t stride; +}; + +VkResult anv_image_create(VkDevice _device, + const struct anv_image_create_info *info, + const VkAllocationCallbacks* alloc, + VkImage *pImage); + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, + VkImageAspectFlags aspect_mask); + +void anv_image_view_init(struct anv_image_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset); + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen7_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen75_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen8_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen9_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); + +struct anv_buffer_view { + enum isl_format format; /**< VkBufferViewCreateInfo::format */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + uint64_t range; /**< VkBufferViewCreateInfo::range */ + + struct anv_state surface_state; + struct anv_state storage_surface_state; +}; + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type); + +void anv_fill_buffer_surface_state(struct anv_device *device, + struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void gen7_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen75_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen8_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen9_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param); +void anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param); + +struct anv_sampler { + uint32_t state[4]; +}; + +struct anv_framebuffer { + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct anv_image_view * attachments[0]; +}; + +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; + + /** Subpass has at least one resolve attachment */ + bool has_resolve; +}; + +struct anv_render_pass_attachment { + const struct anv_format *format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; +}; + +struct anv_render_pass { + uint32_t attachment_count; + uint32_t subpass_count; + uint32_t * subpass_attachments; + struct anv_render_pass_attachment * attachments; + struct anv_subpass subpasses[0]; +}; + +extern struct anv_render_pass anv_meta_dummy_renderpass; + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); + +void *anv_lookup_entrypoint(const char *name); + +void anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename); + +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *)(uintptr_t) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType)(uintptr_t) _obj; \ + } + +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = __anv_type ## _from_handle(__handle) + +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c new file mode 100644 index 00000000000..e45b519c0f3 --- /dev/null +++ b/src/intel/vulkan/anv_query.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkQueryPool* pQueryPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_query_pool *pool; + VkResult result; + uint32_t slot_size; + uint64_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_ERROR_INCOMPATIBLE_DRIVER; + default: + assert(!"Invalid query type"); + } + + slot_size = sizeof(struct anv_query_pool_slot); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->type = pCreateInfo->queryType; + pool->slots = pCreateInfo->queryCount; + + size = pCreateInfo->queryCount * slot_size; + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); + + *pQueryPool = anv_query_pool_to_handle(pool); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, pool); + + return result; +} + +void anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void* pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + int64_t timeout = INT64_MAX; + uint64_t result; + int ret; + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem_wait failed %m"); + } + } + + void *data_end = pData + dataSize; + struct anv_query_pool_slot *slot = pool->bo.map; + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: { + result = slot[firstQuery + i].end - slot[firstQuery + i].begin; + break; + } + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + unreachable("pipeline stats not supported"); + case VK_QUERY_TYPE_TIMESTAMP: { + result = slot[firstQuery + i].begin; + break; + } + default: + unreachable("invalid pool type"); + } + + if (flags & VK_QUERY_RESULT_64_BIT) { + uint64_t *dst = pData; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } else { + uint32_t *dst = pData; + if (result > UINT32_MAX) + result = UINT32_MAX; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } + + pData += stride; + if (pData >= data_end) + break; + } + + return VK_SUCCESS; +} + +void anv_CmdResetQueryPool( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: { + struct anv_query_pool_slot *slot = pool->bo.map; + slot[firstQuery + i].available = 0; + break; + } + default: + assert(!"Invalid query type"); + } + } +} diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c new file mode 100644 index 00000000000..22fd01c9495 --- /dev/null +++ b/src/intel/vulkan/anv_util.c @@ -0,0 +1,195 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void anv_printflike(3, 4) +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + +VkResult +__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + +#define ERROR_CASE(error) case error: error_str = #error; break; + + const char *error_str; + switch ((int32_t)error) { + + /* Core errors */ + ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) + ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + ERROR_CASE(VK_ERROR_DEVICE_LOST) + ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) + ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) + ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + + /* Extension errors */ + ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) + + default: + assert(!"Unknown error"); + error_str = "unknown error"; + } + +#undef ERROR_CASE + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); + } + + return error; +} + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(util_is_power_of_two(size)); + assert(element_size < size && util_is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = align_u32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c new file mode 100644 index 00000000000..c5911a3635b --- /dev/null +++ b/src/intel/vulkan/anv_wsi.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_wsi.h" + +VkResult +anv_init_wsi(struct anv_instance *instance) +{ + VkResult result; + + result = anv_x11_init_wsi(instance); + if (result != VK_SUCCESS) + return result; + +#ifdef HAVE_WAYLAND_PLATFORM + result = anv_wl_init_wsi(instance); + if (result != VK_SUCCESS) { + anv_x11_finish_wsi(instance); + return result; + } +#endif + + return VK_SUCCESS; +} + +void +anv_finish_wsi(struct anv_instance *instance) +{ +#ifdef HAVE_WAYLAND_PLATFORM + anv_wl_finish_wsi(instance); +#endif + anv_x11_finish_wsi(instance); +} + +void anv_DestroySurfaceKHR( + VkInstance _instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + + anv_free2(&instance->alloc, pAllocator, surface); +} + +VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR _surface, + VkBool32* pSupported) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_support(surface, device, queueFamilyIndex, pSupported); +} + +VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_capabilities(surface, device, pSurfaceCapabilities); +} + +VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_formats(surface, device, pSurfaceFormatCount, + pSurfaceFormats); +} + +VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_present_modes(surface, device, pPresentModeCount, + pPresentModes); +} + +VkResult anv_CreateSwapchainKHR( + VkDevice _device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + struct anv_swapchain *swapchain; + + VkResult result = iface->create_swapchain(surface, device, pCreateInfo, + pAllocator, &swapchain); + if (result != VK_SUCCESS) + return result; + + *pSwapchain = anv_swapchain_to_handle(swapchain); + + return VK_SUCCESS; +} + +void anv_DestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + swapchain->destroy(swapchain, pAllocator); +} + +VkResult anv_GetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); +} + +VkResult anv_AcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->acquire_next_image(swapchain, timeout, semaphore, + pImageIndex); +} + +VkResult anv_QueuePresentKHR( + VkQueue _queue, + const VkPresentInfoKHR* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + VkResult result; + + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); + + assert(swapchain->device == queue->device); + + result = swapchain->queue_present(swapchain, queue, + pPresentInfo->pImageIndices[i]); + /* TODO: What if one of them returns OUT_OF_DATE? */ + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_wsi.h b/src/intel/vulkan/anv_wsi.h new file mode 100644 index 00000000000..6e9ff9b8447 --- /dev/null +++ b/src/intel/vulkan/anv_wsi.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +struct anv_swapchain; + +struct anv_wsi_interface { + VkResult (*get_support)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported); + VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + VkResult (*get_formats)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); + VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); +}; + +struct anv_swapchain { + struct anv_device *device; + + VkResult (*destroy)(struct anv_swapchain *swapchain, + const VkAllocationCallbacks *pAllocator); + VkResult (*get_images)(struct anv_swapchain *swapchain, + uint32_t *pCount, VkImage *pSwapchainImages); + VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, + uint64_t timeout, VkSemaphore semaphore, + uint32_t *image_index); + VkResult (*queue_present)(struct anv_swapchain *swap_chain, + struct anv_queue *queue, + uint32_t image_index); +}; + +ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) + +VkResult anv_x11_init_wsi(struct anv_instance *instance); +void anv_x11_finish_wsi(struct anv_instance *instance); +VkResult anv_wl_init_wsi(struct anv_instance *instance); +void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/intel/vulkan/anv_wsi_wayland.c b/src/intel/vulkan/anv_wsi_wayland.c new file mode 100644 index 00000000000..6f25eaf43ea --- /dev/null +++ b/src/intel/vulkan/anv_wsi_wayland.c @@ -0,0 +1,871 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_wsi.h" + +#include + +#define MIN_NUM_IMAGES 2 + +struct wsi_wl_display { + struct wl_display * display; + struct wl_drm * drm; + + /* Vector of VkFormats supported */ + struct anv_vector formats; + + uint32_t capabilities; +}; + +struct wsi_wayland { + struct anv_wsi_interface base; + + struct anv_instance * instance; + + pthread_mutex_t mutex; + /* Hash table of wl_display -> wsi_wl_display mappings */ + struct hash_table * displays; +}; + +static void +wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) +{ + /* Don't add a format that's already in the list */ + VkFormat *f; + anv_vector_foreach(f, &display->formats) + if (*f == format) + return; + + /* Don't add formats which aren't supported by the driver */ + if (anv_format_for_vk_format(format)->isl_format == + ISL_FORMAT_UNSUPPORTED) { + return; + } + + f = anv_vector_add(&display->formats); + if (f) + *f = format; +} + +static void +drm_handle_device(void *data, struct wl_drm *drm, const char *name) +{ + fprintf(stderr, "wl_drm.device(%s)\n", name); +} + +static uint32_t +wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) +{ + switch (vk_format) { + /* TODO: Figure out what all the formats mean and make this table + * correct. + */ +#if 0 + case VK_FORMAT_R4G4B4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; + case VK_FORMAT_R5G6B5_UNORM: + return WL_DRM_FORMAT_BGR565; + case VK_FORMAT_R5G5B5A1_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; + case VK_FORMAT_R8G8B8_UNORM: + return WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R8G8B8A8_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R10G10B10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; + case VK_FORMAT_B4G4R4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; + case VK_FORMAT_B5G6R5_UNORM: + return WL_DRM_FORMAT_RGB565; + case VK_FORMAT_B5G5R5A1_UNORM: + return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; +#endif + case VK_FORMAT_B8G8R8_SRGB: + return WL_DRM_FORMAT_BGRX8888; + case VK_FORMAT_B8G8R8A8_SRGB: + return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; +#if 0 + case VK_FORMAT_B10G10R10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; +#endif + + default: + assert("!Unsupported Vulkan format"); + return 0; + } +} + +static void +drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) +{ + struct wsi_wl_display *display = data; + + switch (wl_format) { +#if 0 + case WL_DRM_FORMAT_ABGR4444: + case WL_DRM_FORMAT_XBGR4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); + break; + case WL_DRM_FORMAT_BGR565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); + break; + case WL_DRM_FORMAT_ABGR1555: + case WL_DRM_FORMAT_XBGR1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); + break; + case WL_DRM_FORMAT_XBGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); + /* fallthrough */ + case WL_DRM_FORMAT_ABGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); + break; + case WL_DRM_FORMAT_ABGR2101010: + case WL_DRM_FORMAT_XBGR2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); + break; + case WL_DRM_FORMAT_ARGB4444: + case WL_DRM_FORMAT_XRGB4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); + break; + case WL_DRM_FORMAT_RGB565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); + break; + case WL_DRM_FORMAT_ARGB1555: + case WL_DRM_FORMAT_XRGB1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); + break; +#endif + case WL_DRM_FORMAT_XRGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); + /* fallthrough */ + case WL_DRM_FORMAT_ARGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); + break; +#if 0 + case WL_DRM_FORMAT_ARGB2101010: + case WL_DRM_FORMAT_XRGB2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); + break; +#endif + } +} + +static void +drm_handle_authenticated(void *data, struct wl_drm *drm) +{ +} + +static void +drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) +{ + struct wsi_wl_display *display = data; + + display->capabilities = capabilities; +} + +static const struct wl_drm_listener drm_listener = { + drm_handle_device, + drm_handle_format, + drm_handle_authenticated, + drm_handle_capabilities, +}; + +static void +registry_handle_global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) +{ + struct wsi_wl_display *display = data; + + if (strcmp(interface, "wl_drm") == 0) { + assert(display->drm == NULL); + + assert(version >= 2); + display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); + + if (display->drm) + wl_drm_add_listener(display->drm, &drm_listener, display); + } +} + +static void +registry_handle_global_remove(void *data, struct wl_registry *registry, + uint32_t name) +{ /* No-op */ } + +static const struct wl_registry_listener registry_listener = { + registry_handle_global, + registry_handle_global_remove +}; + +static void +wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) +{ + anv_vector_finish(&display->formats); + if (display->drm) + wl_drm_destroy(display->drm); + anv_free(&wsi->instance->alloc, display); +} + +static struct wsi_wl_display * +wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) +{ + struct wsi_wl_display *display = + anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!display) + return NULL; + + memset(display, 0, sizeof(*display)); + + display->display = wl_display; + + if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) + goto fail; + + struct wl_registry *registry = wl_display_get_registry(wl_display); + if (!registry) + return NULL; + + wl_registry_add_listener(registry, ®istry_listener, display); + + /* Round-rip to get the wl_drm global */ + wl_display_roundtrip(wl_display); + + if (!display->drm) + goto fail; + + /* Round-rip to get wl_drm formats and capabilities */ + wl_display_roundtrip(wl_display); + + /* We need prime support */ + if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) + goto fail; + + /* We don't need this anymore */ + wl_registry_destroy(registry); + + return display; + +fail: + if (registry) + wl_registry_destroy(registry); + + wsi_wl_display_destroy(wsi, display); + return NULL; +} + +static struct wsi_wl_display * +wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, + wl_display); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->displays, wl_display); + if (entry) { + /* Oops, someone raced us to it */ + wsi_wl_display_destroy(wsi, display); + } else { + entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_display(physical_device->instance, display) != NULL; +} + +static VkResult +wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + *pSupported = true; + + return VK_SUCCESS; +} + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, + VK_PRESENT_MODE_FIFO_KHR, +}; + +static VkResult +wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* caps) +{ + caps->minImageCount = MIN_NUM_IMAGES; + caps->maxImageCount = 4; + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + + caps->supportedCompositeAlpha = + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_display *display = + wsi_wl_get_display(device->instance, surface->display); + + uint32_t count = anv_vector_length(&display->formats); + + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = count; + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= count); + *pSurfaceFormatCount = count; + + VkFormat *f; + anv_vector_foreach(f, &display->formats) { + *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { + .format = *f, + /* TODO: We should get this from the compositor somehow */ + .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + }; + } + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateWaylandSurfaceKHR( + VkInstance _instance, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceWayland *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; + surface->display = pCreateInfo->display; + surface->surface = pCreateInfo->surface; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct wsi_wl_image { + struct anv_image * image; + struct anv_device_memory * memory; + struct wl_buffer * buffer; + bool busy; +}; + +struct wsi_wl_swapchain { + struct anv_swapchain base; + + struct wsi_wl_display * display; + struct wl_event_queue * queue; + struct wl_surface * surface; + + VkExtent2D extent; + VkFormat vk_format; + uint32_t drm_format; + + VkPresentModeKHR present_mode; + bool fifo_ready; + + uint32_t image_count; + struct wsi_wl_image images[0]; +}; + +static VkResult +wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, + uint32_t *pCount, VkImage *pSwapchainImages) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + int ret = wl_display_dispatch_queue_pending(chain->display->display, + chain->queue); + /* XXX: I'm not sure if out-of-date is the right error here. If + * wl_display_dispatch_queue_pending fails it most likely means we got + * kicked by the server so this seems more-or-less correct. + */ + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + *image_index = i; + return VK_SUCCESS; + } + } + + /* This time we do a blocking dispatch because we can't go + * anywhere until we get an event. + */ + int ret = wl_display_roundtrip_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } +} + +static void +frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) +{ + struct wsi_wl_swapchain *chain = data; + + chain->fifo_ready = true; + + wl_callback_destroy(callback); +} + +static const struct wl_callback_listener frame_listener = { + frame_handle_done, +}; + +static VkResult +wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + while (!chain->fifo_ready) { + int ret = wl_display_dispatch_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + } + + assert(image_index < chain->image_count); + wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); + wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + struct wl_callback *frame = wl_surface_frame(chain->surface); + wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); + wl_callback_add_listener(frame, &frame_listener, chain); + chain->fifo_ready = false; + } + + chain->images[image_index].busy = true; + wl_surface_commit(chain->surface); + wl_display_flush(chain->display->display); + + return VK_SUCCESS; +} + +static void +wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), + pAllocator); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image), + pAllocator); +} + +static void +buffer_handle_release(void *data, struct wl_buffer *buffer) +{ + struct wsi_wl_image *image = data; + + assert(image->buffer == buffer); + + image->busy = false; +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static VkResult +wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + VkResult result; + + VkImage vk_image; + result = anv_image_create(vk_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = chain->vk_format, + .extent = { + .width = chain->extent.width, + .height = chain->extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, + &vk_image); + + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(vk_image); + assert(anv_format_is_color(image->image->format)); + + struct anv_surface *surface = &image->image->color_surface; + + VkDeviceMemory vk_memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + pAllocator, + &vk_memory); + + if (result != VK_SUCCESS) + goto fail_image; + + image->memory = anv_device_memory_from_handle(vk_memory); + image->memory->bo.is_winsys_bo = true; + + result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); + + if (result != VK_SUCCESS) + goto fail_mem; + + int ret = anv_gem_set_tiling(chain->base.device, + image->memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + int fd = anv_gem_handle_to_fd(chain->base.device, + image->memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + image->buffer = wl_drm_create_prime_buffer(chain->display->drm, + fd, /* name */ + chain->extent.width, + chain->extent.height, + chain->drm_format, + surface->offset, + surface->isl.row_pitch, + 0, 0, 0, 0 /* unused */); + wl_display_roundtrip(chain->display->display); + close(fd); + + wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); + wl_buffer_add_listener(image->buffer, &buffer_listener, image); + + return VK_SUCCESS; + +fail_mem: + anv_FreeMemory(vk_device, vk_memory, pAllocator); +fail_image: + anv_DestroyImage(vk_device, vk_image, pAllocator); + + return result; +} + +static VkResult +wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) { + if (chain->images[i].buffer) + wsi_wl_image_finish(chain, &chain->images[i], pAllocator); + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_swapchain *chain; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + int num_images = pCreateInfo->minImageCount; + + assert(num_images >= MIN_NUM_IMAGES); + + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) + num_images = MAX2(num_images, 4); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = wsi_wl_swapchain_destroy; + chain->base.get_images = wsi_wl_swapchain_get_images; + chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; + chain->base.queue_present = wsi_wl_swapchain_queue_present; + + chain->surface = surface->surface; + chain->extent = pCreateInfo->imageExtent; + chain->vk_format = pCreateInfo->imageFormat; + chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); + + chain->present_mode = pCreateInfo->presentMode; + chain->fifo_ready = true; + + chain->image_count = num_images; + + /* Mark a bunch of stuff as NULL. This way we can just call + * destroy_swapchain for cleanup. + */ + for (uint32_t i = 0; i < chain->image_count; i++) + chain->images[i].buffer = NULL; + chain->queue = NULL; + + chain->display = wsi_wl_get_display(device->instance, surface->display); + if (!chain->display) + goto fail; + + chain->queue = wl_display_create_queue(chain->display->display); + if (!chain->queue) + goto fail; + + for (uint32_t i = 0; i < chain->image_count; i++) { + result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); + if (result != VK_SUCCESS) + goto fail; + chain->images[i].busy = false; + } + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + +fail: + wsi_wl_swapchain_destroy(&chain->base, pAllocator); + + return result; +} + +VkResult +anv_wl_init_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + wsi->instance = instance; + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->displays) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = wsi_wl_surface_get_support; + wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; + wsi->base.get_formats = wsi_wl_surface_get_formats; + wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; + wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); + +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; + + return result; +} + +void +anv_wl_finish_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->displays, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c new file mode 100644 index 00000000000..843a6b62504 --- /dev/null +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -0,0 +1,758 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "anv_wsi.h" + +#include "util/hash_table.h" + +struct wsi_x11_connection { + bool has_dri3; + bool has_present; +}; + +struct wsi_x11 { + struct anv_wsi_interface base; + + pthread_mutex_t mutex; + /* Hash table of xcb_connection -> wsi_x11_connection mappings */ + struct hash_table *connections; +}; + +static struct wsi_x11_connection * +wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) +{ + xcb_query_extension_cookie_t dri3_cookie, pres_cookie; + xcb_query_extension_reply_t *dri3_reply, *pres_reply; + + struct wsi_x11_connection *wsi_conn = + anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi_conn) + return NULL; + + dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); + pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); + + dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); + pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); + if (dri3_reply == NULL || pres_reply == NULL) { + free(dri3_reply); + free(pres_reply); + anv_free(&instance->alloc, wsi_conn); + return NULL; + } + + wsi_conn->has_dri3 = dri3_reply->present != 0; + wsi_conn->has_present = pres_reply->present != 0; + + free(dri3_reply); + free(pres_reply); + + return wsi_conn; +} + +static void +wsi_x11_connection_destroy(struct anv_instance *instance, + struct wsi_x11_connection *conn) +{ + anv_free(&instance->alloc, conn); +} + +static struct wsi_x11_connection * +wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_connection_create(instance, conn); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->connections, conn); + if (entry) { + /* Oops, someone raced us to it */ + wsi_x11_connection_destroy(instance, wsi_conn); + } else { + entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +static const VkSurfaceFormatKHR formats[] = { + { .format = VK_FORMAT_B8G8R8A8_SRGB, }, +}; + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, +}; + +static xcb_screen_t * +get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + if (screen_iter.data->root == root) + return screen_iter.data; + } + + return NULL; +} + +static xcb_visualtype_t * +screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_depth_iterator_t depth_iter = + xcb_screen_allowed_depths_iterator(screen); + + for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { + xcb_visualtype_iterator_t visual_iter = + xcb_depth_visuals_iterator (depth_iter.data); + + for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { + if (visual_iter.data->visual_id == visual_id) { + if (depth) + *depth = depth_iter.data->depth; + return visual_iter.data; + } + } + } + + return NULL; +} + +static xcb_visualtype_t * +connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + /* For this we have to iterate over all of the screens which is rather + * annoying. Fortunately, there is probably only 1. + */ + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, + visual_id, depth); + if (visual) + return visual; + } + + return NULL; +} + +static xcb_visualtype_t * +get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, + unsigned *depth) +{ + xcb_query_tree_cookie_t tree_cookie; + xcb_get_window_attributes_cookie_t attrib_cookie; + xcb_query_tree_reply_t *tree; + xcb_get_window_attributes_reply_t *attrib; + + tree_cookie = xcb_query_tree(conn, window); + attrib_cookie = xcb_get_window_attributes(conn, window); + + tree = xcb_query_tree_reply(conn, tree_cookie, NULL); + attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); + if (attrib == NULL || tree == NULL) { + free(attrib); + free(tree); + return NULL; + } + + xcb_window_t root = tree->root; + xcb_visualid_t visual_id = attrib->visual; + free(attrib); + free(tree); + + xcb_screen_t *screen = get_screen_for_root(conn, root); + if (screen == NULL) + return NULL; + + return screen_get_visualtype(screen, visual_id, depth); +} + +static bool +visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) +{ + uint32_t rgb_mask = visual->red_mask | + visual->green_mask | + visual->blue_mask; + + uint32_t all_mask = 0xffffffff >> (32 - depth); + + /* Do we have bits left over after RGB? */ + return (all_mask & ~rgb_mask) != 0; +} + +VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, connection); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + return false; + } + + unsigned visual_depth; + if (!connection_get_visualtype(connection, visual_id, &visual_depth)) + return false; + + if (visual_depth != 24 && visual_depth != 32) + return false; + + return true; +} + +static VkResult +x11_surface_get_support(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, surface->connection); + if (!wsi_conn) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + *pSupported = false; + return VK_SUCCESS; + } + + unsigned visual_depth; + if (!get_visualtype_for_window(surface->connection, surface->window, + &visual_depth)) { + *pSupported = false; + return VK_SUCCESS; + } + + if (visual_depth != 24 && visual_depth != 32) { + *pSupported = false; + return VK_SUCCESS; + } + + *pSupported = true; + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR *caps) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + xcb_get_geometry_cookie_t geom_cookie; + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom; + unsigned visual_depth; + + geom_cookie = xcb_get_geometry(surface->connection, surface->window); + + /* This does a round-trip. This is why we do get_geometry first and + * wait to read the reply until after we have a visual. + */ + xcb_visualtype_t *visual = + get_visualtype_for_window(surface->connection, surface->window, + &visual_depth); + + geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); + if (geom) { + VkExtent2D extent = { geom->width, geom->height }; + caps->currentExtent = extent; + caps->minImageExtent = extent; + caps->maxImageExtent = extent; + } else { + /* This can happen if the client didn't wait for the configure event + * to come back from the compositor. In that case, we don't know the + * size of the window so we just return valid "I don't know" stuff. + */ + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + } + free(err); + free(geom); + + if (visual_has_alpha(visual, visual_depth)) { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + } else { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } + + caps->minImageCount = 2; + caps->maxImageCount = 4; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_formats(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = ARRAY_SIZE(formats); + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); + *pSurfaceFormatCount = ARRAY_SIZE(formats); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateXcbSurfaceKHR( + VkInstance _instance, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceXcb *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; + surface->connection = pCreateInfo->connection; + surface->window = pCreateInfo->window; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct x11_image { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + xcb_get_geometry_cookie_t geom_cookie; + bool busy; +}; + +struct x11_swapchain { + struct anv_swapchain base; + + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t image_count; + uint32_t next_image; + struct x11_image images[0]; +}; + +static VkResult +x11_get_images(struct anv_swapchain *anv_chain, + uint32_t* pCount, VkImage *pSwapchainImages) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +x11_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[chain->next_image]; + + if (image->busy) { + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom = + xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); + if (!geom) { + free(err); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + + if (geom->width != chain->extent.width || + geom->height != chain->extent.height) { + free(geom); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + free(geom); + + image->busy = false; + } + + *image_index = chain->next_image; + chain->next_image = (chain->next_image + 1) % chain->image_count; + return VK_SUCCESS; +} + +static VkResult +x11_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[image_index]; + + assert(image_index < chain->image_count); + + xcb_void_cookie_t cookie; + + cookie = xcb_copy_area(chain->conn, + image->pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + image->busy = true; + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} + +static VkResult +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + xcb_void_cookie_t cookie; + + for (uint32_t i = 0; i < chain->image_count; i++) { + struct x11_image *image = &chain->images[i]; + + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + /* TODO: Delete images and free memory */ + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + struct x11_swapchain *chain; + xcb_void_cookie_t cookie; + VkResult result; + + int num_images = pCreateInfo->minImageCount; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = x11_swapchain_destroy; + chain->base.get_images = x11_get_images; + chain->base.acquire_next_image = x11_acquire_next_image; + chain->base.queue_present = x11_queue_present; + + chain->conn = surface->connection; + chain->window = surface->window; + chain->extent = pCreateInfo->imageExtent; + chain->image_count = num_images; + chain->next_image = 0; + + for (uint32_t i = 0; i < chain->image_count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(anv_device_to_handle(device), + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + NULL, + &image_h); + + image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->format)); + + surface = &image->color_surface; + + anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + memory->bo.is_winsys_bo = true; + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->isl.row_pitch, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + chain->images[i].busy = false; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult +anv_x11_init_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->connections) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = x11_surface_get_support; + wsi->base.get_capabilities = x11_surface_get_capabilities; + wsi->base.get_formats = x11_surface_get_formats; + wsi->base.get_present_modes = x11_surface_get_present_modes; + wsi->base.create_swapchain = x11_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; + + return result; +} + +void +anv_x11_finish_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->connections, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/intel/vulkan/dev_icd.json.in b/src/intel/vulkan/dev_icd.json.in new file mode 100644 index 00000000000..84920365289 --- /dev/null +++ b/src/intel/vulkan/dev_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@build_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c new file mode 100644 index 00000000000..23327ec0724 --- /dev/null +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -0,0 +1,589 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) +{ + static const uint32_t sampler_state_opcodes[] = { + [MESA_SHADER_VERTEX] = 43, + [MESA_SHADER_TESS_CTRL] = 44, /* HS */ + [MESA_SHADER_TESS_EVAL] = 45, /* DS */ + [MESA_SHADER_GEOMETRY] = 46, + [MESA_SHADER_FRAGMENT] = 47, + [MESA_SHADER_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [MESA_SHADER_VERTEX] = 38, + [MESA_SHADER_TESS_CTRL] = 39, + [MESA_SHADER_TESS_EVAL] = 40, + [MESA_SHADER_GEOMETRY] = 41, + [MESA_SHADER_FRAGMENT] = 42, + [MESA_SHADER_COMPUTE] = 0, + }; + + anv_foreach_stage(s, stages) { + if (cmd_buffer->state.samplers[s].alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[s], + .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); + } + + /* Always emit binding table pointers if we're asked to, since on SKL + * this is what flushes push constants. */ + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[s], + .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); + } +} + +GENX_FUNC(GEN7, GEN7) uint32_t +genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) +{ + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + break; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* Re-emit all active binding tables */ + dirty |= cmd_buffer->state.pipeline->active_stages; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + return result; + } + } + + cmd_buffer->state.descriptors_dirty &= ~dirty; + + return dirty; +} + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +static void +emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkRect2D *scissors) +{ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN7_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN7_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, + &empty_scissor); + } else { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = scissor_state.offset); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(scissor_state); +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.scissor.count > 0) { + emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, + cmd_buffer->state.dynamic.scissor.scissors); + } else { + /* Emit a default scissor based on the currently bound framebuffer */ + emit_scissor_state(cmd_buffer, 1, + &(VkRect2D) { + .offset = { .x = 0, .y = 0, }, + .extent = { + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + }, + }); + } +} + +static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, +}; + +static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, +}; + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + if (ANV_IS_HASWELL) + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + cmd_buffer->state.gen7.index_buffer = buffer; + cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; + cmd_buffer->state.gen7.index_offset = offset; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .ConstantURBEntryReadLength = + push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + /* FIXME: figure out descriptors for gen7 */ + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN7_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN7_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, + .InstanceDataStepRate = 1 + }; + + GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } + + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) { + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + } + + if (cmd_buffer->state.push_constants_dirty) + cmd_buffer_flush_push_constants(cmd_buffer); + + /* We use the gen8 state here because it only contains the additional + * min/max fields and, since they occur at the end of the packet and + * don't change the stride, they work on gen7 too. + */ + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { + + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const uint32_t depth_format = image ? + isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, + &image->depth_surface.isl) : D16_UNORM; + + uint32_t sf_dw[GEN7_3DSTATE_SF_length]; + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .DepthBufferSurfaceFormat = depth_format, + .LineWidth = cmd_buffer->state.dynamic.line_width, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN7_COLOR_CALC_STATE_length * 4, + 64); + struct GEN7_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + + struct anv_state ds_state = + anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = ds_state.offset); + } + + if (cmd_buffer->state.gen7.index_buffer && + cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gen7.index_offset; + + if (ANV_IS_HASWELL) { + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + .CutIndexEnable = pipeline->primitive_restart, + .IndexFormat = cmd_buffer->state.gen7.index_type, + .MemoryObjectControlState = GEN7_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + stub(); +} diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c new file mode 100644 index 00000000000..7c054fa56d5 --- /dev/null +++ b/src/intel/vulkan/gen7_pipeline.c @@ -0,0 +1,410 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +#include "genX_pipeline_util.h" + +static void +gen7_emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + + /* LegacyGlobalDepthBiasEnable */ + + .StatisticsEnable = true, + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ViewTransformEnable = !(extra && extra->disable_viewport), + .FrontWinding = vk_to_gen_front_face[info->frontFace], + /* bool AntiAliasingEnable; */ + + .CullMode = vk_to_gen_cullmode[info->cullMode], + + /* uint32_t LineEndCapAntialiasingRegionWidth; */ + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + + /* uint32_t MultisampleRasterizationMode; */ + /* bool LastPixelEnable; */ + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + + /* uint32_t AALineDistanceMode; */ + /* uint32_t VertexSubPixelPrecisionSelect; */ + .UsePointWidthState = !pipeline->writes_point_size, + .PointWidth = 1.0, + }; + + GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); +} + +static void +gen7_emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen7.depth_stencil_state, 0, + sizeof(pipeline->gen7.depth_stencil_state)); + return; + } + + struct GEN7_DEPTH_STENCIL_STATE state = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); +} + +static void +gen7_emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + if (info == NULL || info->attachmentCount == 0) { + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + .ColorBufferBlendEnable = false, + .WriteDisableAlpha = true, + .WriteDisableRed = true, + .WriteDisableGreen = true, + .WriteDisableBlue = true); + } else { + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + +# if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +# endif + + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +# if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +# endif + ); + } + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset); +} + +GENX_FUNC(GEN7, GEN75) VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + + assert(pCreateInfo->pRasterizationState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); + + gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + emit_urb_setup(pipeline); + + const VkPipelineRasterizationStateCreateInfo *rs_info = + pCreateInfo->pRasterizationState; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + .CullMode = vk_to_gen_cullmode[rs_info->cullMode], + .ClipEnable = true, + .APIMode = APIMODE_OGL, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ClipMode = CLIPMODE_NORMAL, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterizationSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + .PixelLocation = PIXLOC_CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xff); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* The last geometry producing stage will set urb_offset and urb_length, + * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ + uint32_t urb_offset = 1; + uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; + +#if 0 + /* From gen7_vs_state.c */ + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); +#endif + + if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = pipeline->vs_vec4, + .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterforURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = true, + .VSFunctionEnable = true); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + + if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); + } else { + urb_offset = 1; + urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .KernelStartPointer = pipeline->gs_kernel, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterforURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads - 1, + /* This in the next dword on HSW. */ + .ControlDataFormat = gs_prog_data->control_data_format, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .GSStatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, +# if (ANV_IS_HASWELL) + .ReorderMode = REORDER_TRAILING, +# else + .ReorderEnable = true, +# endif + .GSEnable = true); + } + + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_finishme("disabling ps"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = false, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT); + + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + + } else { + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, + + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, + + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + +#if 0 + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ +#endif + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, + .PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c new file mode 100644 index 00000000000..77bdb75260c --- /dev/null +++ b/src/intel/vulkan/gen7_state.c @@ -0,0 +1,264 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +#include "genX_state_util.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +GENX_FUNC(GEN7, GEN75) void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN_4, + .SurfaceHorizontalAlignment = HALIGN_4, + .TiledSurface = false, + .RenderCacheReadWriteMode = false, + .SurfaceObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, +# endif + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN7_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampEnable = CLAMP_ENABLE_OGL, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .BorderColorPointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t anv_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + uint32_t depth = 1; + if (range->layerCount > 1) { + depth = range->layerCount; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(&surface->isl); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, + usage == VK_IMAGE_USAGE_STORAGE_BIT), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], + + /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if + * Tiled Surface is False." + */ + .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, + .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? + TILEWALK_YMAJOR : TILEWALK_XMAJOR, + + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + + .RenderCacheReadWriteMode = 0, /* TEMPLATE */ + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->isl.row_pitch - 1, + .MinimumArrayElement = range->baseArrayLayer, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GENX(MOCS), + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .MCSEnable = false, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], +# else /* XXX: Seriously? */ + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, +# endif + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c new file mode 100644 index 00000000000..b741612c891 --- /dev/null +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -0,0 +1,914 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +#if ANV_GEN == 8 +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = 1.0, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = 0.0, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = vp->y, + .YMaxViewPort = vp->y + vp->height - 1, + }; + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); + } + + if (!cmd_buffer->device->info.has_llc) { + anv_state_clflush(sf_clip_state); + anv_state_clflush(cc_state); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} +#endif + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN8_L3CNTLREG 0x7034 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t val = enable_slm ? + /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ + 0x60000021 : + /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ + 0x60000060; + bool changed = cmd_buffer->state.current_l3_config != val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); + cmd_buffer->state.current_l3_config = val; + } +} + +static void +__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} +static void +__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GEN9_3DSTATE_SF sf = { + GEN9_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} + +static void +__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->device->info.is_cherryview) + __emit_gen9_sf_state(cmd_buffer); + else + __emit_genx_sf_state(cmd_buffer); +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + config_l3(cmd_buffer, false); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. + */ + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { + __emit_sf_state(cmd_buffer); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); + anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, + pipeline->gen8.raster); + } + + /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to + * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split + * across different state packets for gen8 and gen9. We handle that by + * using a big old #if switch here. + */ +#if ANV_GEN == 8 + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN8_COLOR_CALC_STATE_length * 4, + 64); + struct GEN8_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gen8.wm_depth_stencil); + } +#else + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN9_COLOR_CALC_STATE_length * 4, + 64); + struct GEN9_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + }; + GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN9_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN9_3DSTATE_WM_DEPTH_STENCIL_header, + + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front, + .BackfaceStencilReferenceValue = d->stencil_reference.back + }; + GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gen9.wm_depth_stencil); + } +#endif + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index, + ); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, + }; + + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .ConstantIndirectURBEntryReadLength = push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + + if (cmd_buffer->state.current_pipeline != GPGPU) { +#if ANV_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, + .Address = { bo, offset }); +} + +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, offset }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, offset + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = { &pool->bo, offset }); + break; + } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); +} + +#define alu_opcode(v) __gen_uint((v), 20, 31) +#define alu_operand1(v) __gen_uint((v), 10, 19) +#define alu_operand2(v) __gen_uint((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } + + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } + + dst_offset += destStride; + } +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_SET); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_RESET); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + for (uint32_t i = 0; i < eventCount; i++) { + ANV_FROM_HANDLE(anv_event, event, pEvents[i]); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), + .WaitMode = PollingMode, + .CompareOperation = COMPARE_SAD_EQUAL_SDD, + .SemaphoreDataDword = VK_EVENT_SET, + .SemaphoreAddress = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }); + } + + genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, + false, /* byRegion */ + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c new file mode 100644 index 00000000000..f0411562fba --- /dev/null +++ b/src/intel/vulkan/gen8_pipeline.c @@ -0,0 +1,573 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +#include "genX_pipeline_util.h" + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), + .PrimitiveTopologyType = pipeline->topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t samples = 1; + + if (ms_info) + samples = ms_info->rasterizationSamples; + + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); + + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + + /* For details on 3DSTATE_RASTER multisample state, see the BSpec table + * "Multisample Modes State". + */ + .DXMultisampleRasterizationEnable = samples > 1, + .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, + .ForceMultisampling = false, + + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), +#if ANV_GEN == 8 + .ViewportZClipTestEnable = true, +#else + /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ + .ViewportZFarClipTestEnable = true, + .ViewportZNearClipTestEnable = true, +#endif + }; + + GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + uint32_t num_dwords = GENX(BLEND_STATE_length); + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GENX(BLEND_STATE) blend_state = { + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || + a->dstColorBlendFactor != a->dstAlphaBlendFactor || + a->colorBlendOp != a->alphaBlendOp) { + blend_state.IndependentAlphaBlendEnable = true; + } + + blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + } + + for (uint32_t i = info->attachmentCount; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); + if (!device->info.has_llc) + anv_state_clflush(pipeline->blend_state); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + uint32_t *dw = ANV_GEN == 8 ? + pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; + + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen8.wm_depth_stencil, 0, + sizeof(pipeline->gen8.wm_depth_stencil)); + memset(pipeline->gen9.wm_depth_stencil, 0, + sizeof(pipeline->gen9.wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ + + struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); +} + +static void +emit_ms_state(struct anv_pipeline *pipeline, + const VkPipelineMultisampleStateCreateInfo *info) +{ + uint32_t samples = 1; + uint32_t log2_samples = 0; + + /* From the Vulkan 1.0 spec: + * If pSampleMask is NULL, it is treated as if the mask has all bits + * enabled, i.e. no coverage is removed from fragments. + * + * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. + */ + uint32_t sample_mask = 0xffff; + + if (info) { + samples = info->rasterizationSamples; + log2_samples = __builtin_ffs(samples) - 1; + } + + if (info && info->pSampleMask) + sample_mask &= info->pSampleMask[0]; + + if (info && info->sampleShadingEnable) + anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), + + /* The PRM says that this bit is valid only for DX9: + * + * SW can choose to set this bit only for DX9 API. DX10/OGL API's + * should not have any effect by setting or not setting this bit. + */ + .PixelPositionOffsetEnable = false, + + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), + .SampleMask = sample_mask); +} + +VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterizationState); + emit_rs_state(pipeline, pCreateInfo->pRasterizationState, + pCreateInfo->pMultisampleState, extra); + emit_ms_state(pipeline, pCreateInfo->pMultisampleState); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + emit_urb_setup(pipeline); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->ps_ksp0 == NO_KERNEL ? + 0 : pipeline->wm_prog_data.barycentric_interp_modes); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_kernel == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_kernel, + .VectorMaskEnable = false, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = gs_prog_data->vertices_in, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + .StaticOutput = gs_prog_data->static_vertex_count >= 0, + .StaticOutputVertexCount = + gs_prog_data->static_vertex_count >= 0 ? + gs_prog_data->static_vertex_count : 0, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : + pipeline->vs_vec4; + + if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .FunctionEnable = false, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. */ + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = vs_start, + .SingleVertexDispatch = false, + .VectorMaskEnable = false, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = false, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = false); + } else { + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } + } + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), + .AttributeSwizzleEnable = true, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = + DIV_ROUND_UP(max_source_attr + 1, 2), + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + wm_prog_data->num_varying_inputs, + +#if ANV_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + ); + + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + bool per_sample_ps = pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, +#if ANV_GEN >= 9 + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? + ICMS_INNER_CONSERVATIVE : ICMS_NONE, +#else + .PixelShaderUsesInputCoverageMask = + wm_prog_data->uses_sample_mask, +#endif + ); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c new file mode 100644 index 00000000000..04cfff5444d --- /dev/null +++ b/src/intel/vulkan/gen8_state.c @@ -0,0 +1,493 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +#include "genX_state_util.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if ANV_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); + + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +static const uint32_t +isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSS, +}; + +void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +/** + * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment + * and SurfaceVerticalAlignment. + */ +static void +get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) +{ + #if ANV_GENx10 >= 90 + if (isl_tiling_is_std_y(surf->tiling) || + surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* The hardware ignores the alignment values. Anyway, the surface's + * true alignment is likely outside the enum range of HALIGN* and + * VALIGN*. + */ + *halign = 0; + *valign = 0; + } else { + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = anv_halign[image_align_el.width]; + *valign = anv_valign[image_align_el.height]; + } + #else + /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in + * units of surface samples. For example, if SurfaceVerticalAlignment + * is VALIGN_4 and the surface is singlesampled, then for any surface + * format (compressed or not) the vertical alignment is + * 4 pixels. + */ + struct isl_extent3d image_align_sa + = isl_surf_get_image_alignment_sa(surf); + + *halign = anv_halign[image_align_sa.width]; + *valign = anv_valign[image_align_sa.height]; + #endif +} + +static uint32_t +get_qpitch(const struct isl_surf *surf) +{ + switch (surf->dim) { + default: + unreachable(!"bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + #if ANV_GENx10 >= 90 + /* QPitch is usually expressed as rows of surface elements (where + * a surface element is an compression block or a single surface + * sample). Skylake 1D is an outlier. + * + * From the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> 1D Surfaces: + * + * Surface QPitch specifies the distance in pixels between array + * slices. + */ + return isl_surf_get_array_pitch_el(surf); + #else + return isl_surf_get_array_pitch_el_rows(surf); + #endif + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + #if ANV_GEN >= 9 + return isl_surf_get_array_pitch_el_rows(surf); + #else + /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch + * + * "This field must be set to an integer multiple of the Surface + * Vertical Alignment. For compressed textures (BC*, FXT1, + * ETC*, and EAC* Surface Formats), this field is in units of + * rows in the uncompressed surface, and must be set to an + * integer multiple of the vertical alignment parameter "j" + * defined in the Common Surface Formats section." + */ + return isl_surf_get_array_pitch_sa_rows(surf); + #endif + } +} + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + static const uint8_t isl_to_gen_tiling[] = { + [ISL_TILING_LINEAR] = LINEAR, + [ISL_TILING_X] = XMAJOR, + [ISL_TILING_Y0] = YMAJOR, + [ISL_TILING_Yf] = YMAJOR, + [ISL_TILING_Ys] = YMAJOR, + [ISL_TILING_W] = WMAJOR, + }; + + uint32_t halign, valign; + get_halign_valign(&surface->isl, &halign, &valign); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = valign, + .SurfaceHorizontalAlignment = halign, + .TileMode = isl_to_gen_tiling[surface->isl.tiling], + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .CubeFaceEnablePositiveZ = 1, + .CubeFaceEnableNegativeZ = 1, + .CubeFaceEnablePositiveY = 1, + .CubeFaceEnableNegativeY = 1, + .CubeFaceEnablePositiveX = 1, + .CubeFaceEnableNegativeX = 1, + .MemoryObjectControlState = GENX(MOCS), + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, + .Height = iview->level_0_extent.height - 1, + .Width = iview->level_0_extent.width - 1, + .Depth = 0, /* TEMPLATE */ + .SurfacePitch = surface->isl.row_pitch - 1, + .RenderTargetViewExtent = 0, /* TEMPLATE */ + .MinimumArrayElement = 0, /* TEMPLATE */ + .MultisampledSurfaceStorageFormat = + isl_to_gen_multisample_layout[surface->isl.msaa_layout], + .NumberofMultisamples = ffs(surface->isl.samples) - 1, + .MultisamplePositionPaletteIndex = 0, /* UNUSED */ + .XOffset = 0, + .YOffset = 0, + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + switch (template.SurfaceType) { + case SURFTYPE_1D: + case SURFTYPE_2D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + * + * In other words, 'Depth' is the number of array layers. + */ + template.Depth = range->layerCount - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_CUBE: + template.MinimumArrayElement = range->baseArrayLayer; + /* Same as SURFTYPE_2D, but divided by 6 */ + template.Depth = range->layerCount / 6 - 1; + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_3D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ + template.Depth = image->extent.depth - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + template.RenderTargetViewExtent = iview->extent.depth - 1; + break; + default: + unreachable(!"bad SurfaceType"); + } + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + + struct GENX(SAMPLER_STATE) sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = CLAMP_MODE_OGL, +#if ANV_GEN == 8 + .BaseMipLevel = 0.0, +#endif + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .IndirectStatePointer = border_color_offset >> 6, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c new file mode 100644 index 00000000000..5498d1d68c6 --- /dev/null +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -0,0 +1,717 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "genxml/gen9_pack.h" +#elif (ANV_GEN == 8) +# include "genxml/gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "genxml/gen75_pack.h" +#elif (ANV_GEN == 7) +# include "genxml/gen7_pack.h" +#endif + +void +genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + +/* XXX: Do we need this on more than just BDW? */ +#if (ANV_GEN >= 8) + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. Without + * this, we get GPU hangs when using multi-level command buffers which + * clear depth, reset state base address, and then go render stuff. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .RenderTargetCacheFlushEnable = true); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GENX(MOCS), + .GeneralStateBaseAddressModifyEnable = true, + + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), + .SurfaceStateMemoryObjectControlState = GENX(MOCS), + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GENX(MOCS), + .DynamicStateBaseAddressModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GENX(MOCS), + .IndirectObjectBaseAddressModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GENX(MOCS), + .InstructionBaseAddressModifyEnable = true, + +# if (ANV_GEN >= 8) + /* Broadwell requires that we specify a buffer size for a bunch of + * these fields. However, since we will be growing the BO's live, we + * just set them all to the maximum. + */ + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true, +# endif + ); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true); +} + +void genX(CmdPipelineBarrier)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + uint32_t b, *dw; + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkAccessFlags src_flags = 0; + VkAccessFlags dst_flags = 0; + + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + src_flags |= pMemoryBarriers[i].srcAccessMask; + dst_flags |= pMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + src_flags |= pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + src_flags |= pImageMemoryBarriers[i].srcAccessMask; + dst_flags |= pImageMemoryBarriers[i].dstAccessMask; + } + + /* Mask out the Source access flags we care about */ + const uint32_t src_mask = + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT; + + src_flags = src_flags & src_mask; + + /* Mask out the destination access flags we care about */ + const uint32_t dst_mask = + VK_ACCESS_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT; + + dst_flags = dst_flags & dst_mask; + + /* The src flags represent how things were used previously. This is + * what we use for doing flushes. + */ + struct GENX(PIPE_CONTROL) flush_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + flush_cmd.DCFlushEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_cmd.DepthCacheFlushEnable = true; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_cmd.RenderTargetCacheFlushEnable = true; + flush_cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("should've masked this out by now"); + } + } + + /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to + * stall and wait for the flushing to finish, so we don't re-dirty the + * caches with in-flight rendering after the second PIPE_CONTROL + * invalidates. + */ + + if (dst_flags) + flush_cmd.CommandStreamerStallEnable = true; + + if (src_flags && dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd); + } + + /* The dst flags represent how things will be used in the future. This + * is what we use for doing cache invalidations. + */ + struct GENX(PIPE_CONTROL) invalidate_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + invalidate_cmd.VFCacheInvalidationEnable = true; + break; + case VK_ACCESS_UNIFORM_READ_BIT: + invalidate_cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_ACCESS_SHADER_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_TRANSFER_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + default: + unreachable("should've masked this out by now"); + } + } + + if (dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd); + } +} + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .AddressModifyEnable = true, + .BufferPitch = 0, +#if (ANV_GEN >= 8) + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .BufferSize = 8 +#else + .VertexBufferMemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .EndAddress = { bo, offset + 8 }, +#endif + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + /* TODO: We need to stomp base vertex to 0 somehow */ + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} + + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +void +genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } +} + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + const bool has_depth = iview && anv_format->has_depth; + const bool has_stencil = iview && anv_format->has_stencil; + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = true, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, + &image->depth_surface.isl), + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, +#endif + .RenderTargetViewExtent = 1 - 1); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), +#if ANV_GEN >= 8 || ANV_IS_HASWELL + .StencilBufferEnable = true, +#endif + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, +#endif + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); +} + +/** + * @see anv_cmd_buffer_set_subpass() + */ +void +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); +} diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c new file mode 100644 index 00000000000..4c2e0bc6e0d --- /dev/null +++ b/src/intel/vulkan/genX_pipeline.c @@ -0,0 +1,126 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "genxml/gen9_pack.h" +#elif (ANV_GEN == 8) +# include "genxml/gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "genxml/gen75_pack.h" +#elif (ANV_GEN == 7) +# include "genxml/gen7_pack.h" +#endif + +VkResult +genX(compute_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + pipeline->blend_state.map = NULL; + + result = anv_reloc_list_init(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); + anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); + + pipeline->use_repclear = false; + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), +#if ANV_GEN > 7 + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, +#else + .GPGPUMode = true, +#endif + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, + .ResetGatewayTimer = true, +#if ANV_GEN <= 8 + .BypassGatewayControl = true, +#endif + .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h new file mode 100644 index 00000000000..696e2be7c3f --- /dev/null +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -0,0 +1,327 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static uint32_t +vertex_element_comp_control(enum isl_format format, unsigned comp) +{ + uint8_t bits; + switch (comp) { + case 0: bits = isl_format_layouts[format].channels.r.bits; break; + case 1: bits = isl_format_layouts[format].channels.g.bits; break; + case 2: bits = isl_format_layouts[format].channels.b.bits; break; + case 3: bits = isl_format_layouts[format].channels.a.bits; break; + default: unreachable("Invalid component"); + } + + if (bits) { + return VFCOMP_STORE_SRC; + } else if (comp < 3) { + return VFCOMP_STORE_0; + } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || + isl_format_layouts[format].channels.r.type == ISL_SINT) { + assert(comp == 3); + return VFCOMP_STORE_1_INT; + } else { + assert(comp == 3); + return VFCOMP_STORE_1_FP; + } +} + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t elements; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + elements = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + elements |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + elements = inputs_read >> VERT_ATTRIB_GENERIC0; + } + +#if ANV_GEN >= 8 + /* On BDW+, we only need to allocate space for base ids. Setting up + * the actual vertex and instance id is a separate packet. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#else + /* On Haswell and prior, vertex and instance id are created by using the + * ComponentControl fields, so we need an element for any of them. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid || + pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#endif + + uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; + if (elem_count == 0) + return; + + uint32_t *p; + + const uint32_t num_dwords = 1 + elem_count * 2; + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); + + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, + NULL); + + assert(desc->binding < 32); + + if ((elements & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offset, + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); + +#if ANV_GEN >= 8 + /* On Broadwell and later, we have a separate VF_INSTANCING packet + * that controls instancing. On Haswell and prior, that's part of + * VERTEX_BUFFER_STATE which we emit later. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), + .InstancingEnable = pipeline->instancing_enable[desc->binding], + .VertexElementIndex = slot, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); +#endif + } + + const uint32_t id_slot = __builtin_popcount(elements); + if (needs_svgs_elem) { + /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: + * "Within a VERTEX_ELEMENT_STATE structure, if a Component + * Control field is set to something other than VFCOMP_STORE_SRC, + * no higher-numbered Component Control fields may be set to + * VFCOMP_STORE_SRC" + * + * This means, that if we have BaseInstance, we need BaseVertex as + * well. Just do all or nothing. + */ + uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance) ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0; + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = 32, /* Reserved for this */ + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32_UINT, + .Component0Control = base_ctrl, + .Component1Control = base_ctrl, +#if ANV_GEN >= 8 + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#else + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID, +#endif + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); + } + +#if ANV_GEN >= 8 + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = id_slot, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = id_slot); +#endif +} + +static inline void +emit_urb_setup(struct anv_pipeline *pipeline) +{ +#if ANV_GEN == 7 + struct anv_device *device = pipeline->device; + + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &device->workaround_bo, 0 }); +#endif + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); +} + +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 2048); +} + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, + [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, + [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, +}; diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h new file mode 100644 index 00000000000..67f798ab66e --- /dev/null +++ b/src/intel/vulkan/genX_state_util.h @@ -0,0 +1,112 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + +static enum isl_format +anv_surface_format(const struct anv_device *device, enum isl_format format, + bool storage) +{ + if (storage) { + return isl_lower_storage_image_format(&device->isl_dev, format); + } else { + return format; + } +} + +#if ANV_GEN > 7 || ANV_IS_HASWELL +static const uint32_t vk_to_gen_swizzle[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA +}; +#endif + +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; diff --git a/src/intel/vulkan/intel_icd.json.in b/src/intel/vulkan/intel_icd.json.in new file mode 100644 index 00000000000..d9b363a9762 --- /dev/null +++ b/src/intel/vulkan/intel_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@install_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/tests/.gitignore b/src/intel/vulkan/tests/.gitignore new file mode 100644 index 00000000000..5d054055685 --- /dev/null +++ b/src/intel/vulkan/tests/.gitignore @@ -0,0 +1,5 @@ +block_pool +block_pool_no_free +state_pool +state_pool_free_list_only +state_pool_no_free diff --git a/src/intel/vulkan/tests/Makefile.am b/src/intel/vulkan/tests/Makefile.am new file mode 100644 index 00000000000..883013d86c6 --- /dev/null +++ b/src/intel/vulkan/tests/Makefile.am @@ -0,0 +1,46 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ + -I$(top_srcdir)/src/vulkan + +LDADD = \ + $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(PTHREAD_LIBS) -lm -lstdc++ + +check_PROGRAMS = \ + block_pool_no_free \ + state_pool_no_free \ + state_pool_free_list_only \ + state_pool + +TESTS = $(check_PROGRAMS) diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c new file mode 100644 index 00000000000..86d1a76151f --- /dev/null +++ b/src/intel/vulkan/tests/block_pool_no_free.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define BLOCKS_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_block_pool *pool; + uint32_t blocks[BLOCKS_PER_THREAD]; + uint32_t back_blocks[BLOCKS_PER_THREAD]; +} jobs[NUM_THREADS]; + + +static void *alloc_blocks(void *_job) +{ + struct job *job = _job; + int32_t block, *data; + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = anv_block_pool_alloc(job->pool); + data = job->pool->map + block; + *data = block; + assert(block >= 0); + job->blocks[i] = block; + + block = anv_block_pool_alloc_back(job->pool); + data = job->pool->map + block; + *data = block; + assert(block < 0); + job->back_blocks[i] = -block; + } + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = job->blocks[i]; + data = job->pool->map + block; + assert(*data == block); + + block = -job->back_blocks[i]; + data = job->pool->map + block; + assert(*data == block); + } + + return NULL; +} + +static void validate_monotonic(uint32_t **blocks) +{ + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= BLOCKS_PER_THREAD) + continue; + + if (thread_max < blocks[i][next[i]]) { + thread_max = blocks[i][next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); + + highest = blocks[max_thread_idx][next[max_thread_idx]]; + next[max_thread_idx]++; + } +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* Validate that the block allocations were monotonic */ + uint32_t *block_ptrs[NUM_THREADS]; + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].blocks; + validate_monotonic(block_ptrs); + + /* Validate that the back block allocations were monotonic */ + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].back_blocks; + validate_monotonic(block_ptrs); + + anv_block_pool_finish(&pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/intel/vulkan/tests/state_pool.c b/src/intel/vulkan/tests/state_pool.c new file mode 100644 index 00000000000..878ec19a595 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool.c @@ -0,0 +1,57 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 10 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) +#define NUM_RUNS 64 + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + + for (unsigned i = 0; i < NUM_RUNS; i++) { + anv_block_pool_init(&block_pool, &device, 256); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + } + + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/intel/vulkan/tests/state_pool_free_list_only.c b/src/intel/vulkan/tests/state_pool_free_list_only.c new file mode 100644 index 00000000000..2f4eb47fe45 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_free_list_only.c @@ -0,0 +1,66 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 12 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 4096); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab and return enough states that the state pool test below won't + * actually ever resize anything. + */ + { + struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { + states[i] = anv_state_pool_alloc(&state_pool, 16, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) + anv_state_pool_free(&state_pool, states[i]); + } + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/intel/vulkan/tests/state_pool_no_free.c b/src/intel/vulkan/tests/state_pool_no_free.c new file mode 100644 index 00000000000..4b248c2ee66 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_no_free.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define STATES_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_state_pool *pool; + uint32_t offsets[STATES_PER_THREAD]; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *_job) +{ + struct job *job = _job; + + pthread_barrier_wait(&barrier); + + for (unsigned i = 0; i < STATES_PER_THREAD; i++) { + struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); + job->offsets[i] = state.offset; + } + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 64); + anv_state_pool_init(&state_pool, &block_pool); + + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= STATES_PER_THREAD) + continue; + + if (thread_max < jobs[i].offsets[next[i]]) { + thread_max = jobs[i].offsets[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/intel/vulkan/tests/state_pool_test_helper.h b/src/intel/vulkan/tests/state_pool_test_helper.h new file mode 100644 index 00000000000..0e56431303f --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_test_helper.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +struct job { + struct anv_state_pool *pool; + unsigned id; + pthread_t thread; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *void_job) +{ + struct job *job = void_job; + + const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); + const unsigned num_chunks = STATES_PER_THREAD / chunk_size; + + struct anv_state states[chunk_size]; + + pthread_barrier_wait(&barrier); + + for (unsigned c = 0; c < num_chunks; c++) { + for (unsigned i = 0; i < chunk_size; i++) { + states[i] = anv_state_pool_alloc(job->pool, 16, 16); + memset(states[i].map, 139, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < chunk_size; i++) + anv_state_pool_free(job->pool, states[i]); + } + + return NULL; +} + +static void run_state_pool_test(struct anv_state_pool *state_pool) +{ + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); +} diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore deleted file mode 100644 index 40afc2e3989..00000000000 --- a/src/vulkan/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -# Generated source files -/*_spirv_autogen.h -/anv_entrypoints.c -/anv_entrypoints.h -/wayland-drm-protocol.c -/wayland-drm-client-protocol.h -/dev_icd.json -/intel_icd.json -/gen*_pack.h \ No newline at end of file diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am deleted file mode 100644 index 0605dc437d7..00000000000 --- a/src/vulkan/Makefile.am +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright © 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -SUBDIRS = . tests - -vulkan_includedir = $(includedir)/vulkan - -vulkan_include_HEADERS = \ - $(top_srcdir)/include/vulkan/vk_platform.h \ - $(top_srcdir)/include/vulkan/vulkan.h \ - $(top_srcdir)/include/vulkan/vulkan_intel.h - -# Used when generating entrypoints to filter out unwanted extensions -VULKAN_ENTRYPOINT_CPPFLAGS = \ - -I$(top_srcdir)/include/vulkan \ - -DVK_USE_PLATFORM_XCB_KHR \ - -DVK_USE_PLATFORM_WAYLAND_KHR - -lib_LTLIBRARIES = libvulkan_intel.la - -check_LTLIBRARIES = libvulkan-test.la - -PER_GEN_LIBS = \ - libanv-gen7.la \ - libanv-gen75.la \ - libanv-gen8.la \ - libanv-gen9.la - -noinst_LTLIBRARIES = $(PER_GEN_LIBS) - -# The gallium includes are for the util/u_math.h include from main/macros.h - -AM_CPPFLAGS = \ - $(INTEL_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $(DEFINES) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/compiler \ - -I$(top_srcdir)/src/mapi \ - -I$(top_srcdir)/src/mesa \ - -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/intel/ \ - -I$(top_builddir)/src \ - -I$(top_builddir)/src/compiler \ - -I$(top_builddir)/src/compiler/nir \ - -I$(top_builddir)/src/vulkan - -libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init - -VULKAN_SOURCES = \ - anv_allocator.c \ - anv_cmd_buffer.c \ - anv_batch_chain.c \ - anv_descriptor_set.c \ - anv_device.c \ - anv_dump.c \ - anv_entrypoints.c \ - anv_entrypoints.h \ - anv_formats.c \ - anv_image.c \ - anv_intel.c \ - anv_meta.c \ - anv_meta_blit.c \ - anv_meta_clear.c \ - anv_meta_resolve.c \ - anv_nir_apply_dynamic_offsets.c \ - anv_nir_apply_pipeline_layout.c \ - anv_nir_lower_push_constants.c \ - anv_pass.c \ - anv_pipeline.c \ - anv_pipeline_cache.c \ - anv_private.h \ - anv_query.c \ - anv_util.c \ - anv_wsi.c \ - anv_wsi_x11.c - -BUILT_SOURCES = \ - anv_entrypoints.h \ - anv_entrypoints.c - -libanv_gen7_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen7_cmd_buffer.c \ - gen7_pipeline.c \ - gen7_state.c -libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 - -libanv_gen75_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen7_cmd_buffer.c \ - gen7_pipeline.c \ - gen7_state.c -libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 - -libanv_gen8_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen8_cmd_buffer.c \ - gen8_pipeline.c \ - gen8_state.c -libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 - -libanv_gen9_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen8_cmd_buffer.c \ - gen8_pipeline.c \ - gen8_state.c -libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 - -if HAVE_EGL_PLATFORM_WAYLAND -BUILT_SOURCES += \ - wayland-drm-protocol.c \ - wayland-drm-client-protocol.h - -%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml - $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ - -%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml - $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ - -AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm -VULKAN_SOURCES += \ - wayland-drm-protocol.c \ - anv_wsi_wayland.c -libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM -endif - -libvulkan_intel_la_SOURCES = \ - $(VULKAN_SOURCES) \ - anv_gem.c - -anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) - $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ - -anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) - $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ - -CLEANFILES = $(BUILT_SOURCES) - -libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ - $(top_builddir)/src/intel/isl/libisl.la \ - $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ - ../mesa/libmesa.la \ - ../mesa/drivers/dri/common/libdri_test_stubs.la \ - -lpthread -ldl -lstdc++ \ - $(PER_GEN_LIBS) - -libvulkan_intel_la_LDFLAGS = \ - -module -avoid-version -shared -shrext .so - - -# Generate icd files. It would be nice to just be able to add these to -# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64', -# which we can't put in the icd file. When running sed from the Makefile we -# can use ${libdir}, which expands completely and we avoid putting Makefile -# variables in the icd file. - -icdconfdir=$(sysconfdir)/vulkan/icd.d -icdconf_DATA = intel_icd.json -noinst_DATA = dev_icd.json - -%.json : %.json.in - $(AM_V_GEN) $(SED) \ - -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ - -e "s#@install_libdir@#${libdir}#" < $< > $@ - - -# Libvulkan with dummy gem. Used for unit tests. - -libvulkan_test_la_SOURCES = \ - $(VULKAN_SOURCES) \ - anv_gem_stubs.c - -libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) -libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) - -include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c deleted file mode 100644 index a7ae975656b..00000000000 --- a/src/vulkan/anv_allocator.c +++ /dev/null @@ -1,862 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#ifdef HAVE_VALGRIND -#define VG_NOACCESS_READ(__ptr) ({ \ - VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ - __typeof(*(__ptr)) __val = *(__ptr); \ - VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ - __val; \ -}) -#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ - VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ - *(__ptr) = (__val); \ - VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ -}) -#else -#define VG_NOACCESS_READ(__ptr) (*(__ptr)) -#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) -#endif - -/* Design goals: - * - * - Lock free (except when resizing underlying bos) - * - * - Constant time allocation with typically only one atomic - * - * - Multiple allocation sizes without fragmentation - * - * - Can grow while keeping addresses and offset of contents stable - * - * - All allocations within one bo so we can point one of the - * STATE_BASE_ADDRESS pointers at it. - * - * The overall design is a two-level allocator: top level is a fixed size, big - * block (8k) allocator, which operates out of a bo. Allocation is done by - * either pulling a block from the free list or growing the used range of the - * bo. Growing the range may run out of space in the bo which we then need to - * grow. Growing the bo is tricky in a multi-threaded, lockless environment: - * we need to keep all pointers and contents in the old map valid. GEM bos in - * general can't grow, but we use a trick: we create a memfd and use ftruncate - * to grow it as necessary. We mmap the new size and then create a gem bo for - * it using the new gem userptr ioctl. Without heavy-handed locking around - * our allocation fast-path, there isn't really a way to munmap the old mmap, - * so we just keep it around until garbage collection time. While the block - * allocator is lockless for normal operations, we block other threads trying - * to allocate while we're growing the map. It sholdn't happen often, and - * growing is fast anyway. - * - * At the next level we can use various sub-allocators. The state pool is a - * pool of smaller, fixed size objects, which operates much like the block - * pool. It uses a free list for freeing objects, but when it runs out of - * space it just allocates a new block from the block pool. This allocator is - * intended for longer lived state objects such as SURFACE_STATE and most - * other persistent state objects in the API. We may need to track more info - * with these object and a pointer back to the CPU object (eg VkImage). In - * those cases we just allocate a slightly bigger object and put the extra - * state after the GPU state object. - * - * The state stream allocator works similar to how the i965 DRI driver streams - * all its state. Even with Vulkan, we need to emit transient state (whether - * surface state base or dynamic state base), and for that we can just get a - * block and fill it up. These cases are local to a command buffer and the - * sub-allocator need not be thread safe. The streaming allocator gets a new - * block when it runs out of space and chains them together so they can be - * easily freed. - */ - -/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. - * We use it to indicate the free list is empty. */ -#define EMPTY 1 - -struct anv_mmap_cleanup { - void *map; - size_t size; - uint32_t gem_handle; -}; - -#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) - -static inline long -sys_futex(void *addr1, int op, int val1, - struct timespec *timeout, void *addr2, int val3) -{ - return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); -} - -static inline int -futex_wake(uint32_t *addr, int count) -{ - return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); -} - -static inline int -futex_wait(uint32_t *addr, int32_t value) -{ - return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); -} - -static inline int -memfd_create(const char *name, unsigned int flags) -{ - return syscall(SYS_memfd_create, name, flags); -} - -static inline uint32_t -ilog2_round_up(uint32_t value) -{ - assert(value != 0); - return 32 - __builtin_clz(value - 1); -} - -static inline uint32_t -round_to_power_of_two(uint32_t value) -{ - return 1 << ilog2_round_up(value); -} - -static bool -anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) -{ - union anv_free_list current, new, old; - - current.u64 = list->u64; - while (current.offset != EMPTY) { - /* We have to add a memory barrier here so that the list head (and - * offset) gets read before we read the map pointer. This way we - * know that the map pointer is valid for the given offset at the - * point where we read it. - */ - __sync_synchronize(); - - int32_t *next_ptr = *map + current.offset; - new.offset = VG_NOACCESS_READ(next_ptr); - new.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); - if (old.u64 == current.u64) { - *offset = current.offset; - return true; - } - current = old; - } - - return false; -} - -static void -anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) -{ - union anv_free_list current, old, new; - int32_t *next_ptr = map + offset; - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, current.offset); - new.offset = offset; - new.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); - } while (old.u64 != current.u64); -} - -/* All pointers in the ptr_free_list are assumed to be page-aligned. This - * means that the bottom 12 bits should all be zero. - */ -#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) -#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) -#define PFL_PACK(ptr, count) ({ \ - assert(((uintptr_t)(ptr) & 0xfff) == 0); \ - (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ -}) - -static bool -anv_ptr_free_list_pop(void **list, void **elem) -{ - void *current = *list; - while (PFL_PTR(current) != NULL) { - void **next_ptr = PFL_PTR(current); - void *new_ptr = VG_NOACCESS_READ(next_ptr); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(new_ptr, new_count); - void *old = __sync_val_compare_and_swap(list, current, new); - if (old == current) { - *elem = PFL_PTR(current); - return true; - } - current = old; - } - - return false; -} - -static void -anv_ptr_free_list_push(void **list, void *elem) -{ - void *old, *current; - void **next_ptr = elem; - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(elem, new_count); - old = __sync_val_compare_and_swap(list, current, new); - } while (old != current); -} - -static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); - -void -anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size) -{ - assert(util_is_power_of_two(block_size)); - - pool->device = device; - pool->bo.gem_handle = 0; - pool->bo.offset = 0; - pool->bo.size = 0; - pool->block_size = block_size; - pool->free_list = ANV_FREE_LIST_EMPTY; - pool->back_free_list = ANV_FREE_LIST_EMPTY; - - pool->fd = memfd_create("block pool", MFD_CLOEXEC); - if (pool->fd == -1) - return; - - /* Just make it 2GB up-front. The Linux kernel won't actually back it - * with pages until we either map and fault on one of them or we use - * userptr and send a chunk of it off to the GPU. - */ - if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) - return; - - anv_vector_init(&pool->mmap_cleanups, - round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); - - pool->state.next = 0; - pool->state.end = 0; - pool->back_state.next = 0; - pool->back_state.end = 0; - - /* Immediately grow the pool so we'll have a backing bo. */ - pool->state.end = anv_block_pool_grow(pool, &pool->state); -} - -void -anv_block_pool_finish(struct anv_block_pool *pool) -{ - struct anv_mmap_cleanup *cleanup; - - anv_vector_foreach(cleanup, &pool->mmap_cleanups) { - if (cleanup->map) - munmap(cleanup->map, cleanup->size); - if (cleanup->gem_handle) - anv_gem_close(pool->device, cleanup->gem_handle); - } - - anv_vector_finish(&pool->mmap_cleanups); - - close(pool->fd); -} - -#define PAGE_SIZE 4096 - -/** Grows and re-centers the block pool. - * - * We grow the block pool in one or both directions in such a way that the - * following conditions are met: - * - * 1) The size of the entire pool is always a power of two. - * - * 2) The pool only grows on both ends. Neither end can get - * shortened. - * - * 3) At the end of the allocation, we have about twice as much space - * allocated for each end as we have used. This way the pool doesn't - * grow too far in one direction or the other. - * - * 4) If the _alloc_back() has never been called, then the back portion of - * the pool retains a size of zero. (This makes it easier for users of - * the block pool that only want a one-sided pool.) - * - * 5) We have enough space allocated for at least one more block in - * whichever side `state` points to. - * - * 6) The center of the pool is always aligned to both the block_size of - * the pool and a 4K CPU page. - */ -static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) -{ - size_t size; - void *map; - uint32_t gem_handle; - struct anv_mmap_cleanup *cleanup; - - pthread_mutex_lock(&pool->device->mutex); - - assert(state == &pool->state || state == &pool->back_state); - - /* Gather a little usage information on the pool. Since we may have - * threadsd waiting in queue to get some storage while we resize, it's - * actually possible that total_used will be larger than old_size. In - * particular, block_pool_alloc() increments state->next prior to - * calling block_pool_grow, so this ensures that we get enough space for - * which ever side tries to grow the pool. - * - * We align to a page size because it makes it easier to do our - * calculations later in such a way that we state page-aigned. - */ - uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); - uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); - uint32_t total_used = front_used + back_used; - - assert(state == &pool->state || back_used > 0); - - size_t old_size = pool->bo.size; - - if (old_size != 0 && - back_used * 2 <= pool->center_bo_offset && - front_used * 2 <= (old_size - pool->center_bo_offset)) { - /* If we're in this case then this isn't the firsta allocation and we - * already have enough space on both sides to hold double what we - * have allocated. There's nothing for us to do. - */ - goto done; - } - - if (old_size == 0) { - /* This is the first allocation */ - size = MAX2(32 * pool->block_size, PAGE_SIZE); - } else { - size = old_size * 2; - } - - /* We can't have a block pool bigger than 1GB because we use signed - * 32-bit offsets in the free list and we don't want overflow. We - * should never need a block pool bigger than 1GB anyway. - */ - assert(size <= (1u << 31)); - - /* We compute a new center_bo_offset such that, when we double the size - * of the pool, we maintain the ratio of how much is used by each side. - * This way things should remain more-or-less balanced. - */ - uint32_t center_bo_offset; - if (back_used == 0) { - /* If we're in this case then we have never called alloc_back(). In - * this case, we want keep the offset at 0 to make things as simple - * as possible for users that don't care about back allocations. - */ - center_bo_offset = 0; - } else { - /* Try to "center" the allocation based on how much is currently in - * use on each side of the center line. - */ - center_bo_offset = ((uint64_t)size * back_used) / total_used; - - /* Align down to a multiple of both the block size and page size */ - uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); - assert(util_is_power_of_two(granularity)); - center_bo_offset &= ~(granularity - 1); - - assert(center_bo_offset >= back_used); - - /* Make sure we don't shrink the back end of the pool */ - if (center_bo_offset < pool->back_state.end) - center_bo_offset = pool->back_state.end; - - /* Make sure that we don't shrink the front end of the pool */ - if (size - center_bo_offset < pool->state.end) - center_bo_offset = size - pool->state.end; - } - - assert(center_bo_offset % pool->block_size == 0); - assert(center_bo_offset % PAGE_SIZE == 0); - - /* Assert that we only ever grow the pool */ - assert(center_bo_offset >= pool->back_state.end); - assert(size - center_bo_offset >= pool->state.end); - - cleanup = anv_vector_add(&pool->mmap_cleanups); - if (!cleanup) - goto fail; - *cleanup = ANV_MMAP_CLEANUP_INIT; - - /* Just leak the old map until we destroy the pool. We can't munmap it - * without races or imposing locking on the block allocate fast path. On - * the whole the leaked maps adds up to less than the size of the - * current map. MAP_POPULATE seems like the right thing to do, but we - * should try to get some numbers. - */ - map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pool->fd, - BLOCK_POOL_MEMFD_CENTER - center_bo_offset); - cleanup->map = map; - cleanup->size = size; - - if (map == MAP_FAILED) - goto fail; - - gem_handle = anv_gem_userptr(pool->device, map, size); - if (gem_handle == 0) - goto fail; - cleanup->gem_handle = gem_handle; - -#if 0 - /* Regular objects are created I915_CACHING_CACHED on LLC platforms and - * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are - * always created as I915_CACHING_CACHED, which on non-LLC means - * snooped. That can be useful but comes with a bit of overheard. Since - * we're eplicitly clflushing and don't want the overhead we need to turn - * it off. */ - if (!pool->device->info.has_llc) { - anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); - anv_gem_set_domain(pool->device, gem_handle, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); - } -#endif - - /* Now that we successfull allocated everything, we can write the new - * values back into pool. */ - pool->map = map + center_bo_offset; - pool->center_bo_offset = center_bo_offset; - pool->bo.gem_handle = gem_handle; - pool->bo.size = size; - pool->bo.map = map; - pool->bo.index = 0; - -done: - pthread_mutex_unlock(&pool->device->mutex); - - /* Return the appropreate new size. This function never actually - * updates state->next. Instead, we let the caller do that because it - * needs to do so in order to maintain its concurrency model. - */ - if (state == &pool->state) { - return pool->bo.size - pool->center_bo_offset; - } else { - assert(pool->center_bo_offset > 0); - return pool->center_bo_offset; - } - -fail: - pthread_mutex_unlock(&pool->device->mutex); - - return 0; -} - -static uint32_t -anv_block_pool_alloc_new(struct anv_block_pool *pool, - struct anv_block_state *pool_state) -{ - struct anv_block_state state, old, new; - - while (1) { - state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); - if (state.next < state.end) { - assert(pool->map); - return state.next; - } else if (state.next == state.end) { - /* We allocated the first block outside the pool, we have to grow it. - * pool_state->next acts a mutex: threads who try to allocate now will - * get block indexes above the current limit and hit futex_wait - * below. */ - new.next = state.next + pool->block_size; - new.end = anv_block_pool_grow(pool, pool_state); - assert(new.end >= new.next && new.end % pool->block_size == 0); - old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); - if (old.next != state.next) - futex_wake(&pool_state->end, INT_MAX); - return state.next; - } else { - futex_wait(&pool_state->end, state.end); - continue; - } - } -} - -int32_t -anv_block_pool_alloc(struct anv_block_pool *pool) -{ - int32_t offset; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { - assert(offset >= 0); - assert(pool->map); - return offset; - } - - return anv_block_pool_alloc_new(pool, &pool->state); -} - -/* Allocates a block out of the back of the block pool. - * - * This will allocated a block earlier than the "start" of the block pool. - * The offsets returned from this function will be negative but will still - * be correct relative to the block pool's map pointer. - * - * If you ever use anv_block_pool_alloc_back, then you will have to do - * gymnastics with the block pool's BO when doing relocations. - */ -int32_t -anv_block_pool_alloc_back(struct anv_block_pool *pool) -{ - int32_t offset; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { - assert(offset < 0); - assert(pool->map); - return offset; - } - - offset = anv_block_pool_alloc_new(pool, &pool->back_state); - - /* The offset we get out of anv_block_pool_alloc_new() is actually the - * number of bytes downwards from the middle to the end of the block. - * We need to turn it into a (negative) offset from the middle to the - * start of the block. - */ - assert(offset >= 0); - return -(offset + pool->block_size); -} - -void -anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) -{ - if (offset < 0) { - anv_free_list_push(&pool->back_free_list, pool->map, offset); - } else { - anv_free_list_push(&pool->free_list, pool->map, offset); - } -} - -static void -anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, - size_t state_size) -{ - /* At least a cache line and must divide the block size. */ - assert(state_size >= 64 && util_is_power_of_two(state_size)); - - pool->state_size = state_size; - pool->free_list = ANV_FREE_LIST_EMPTY; - pool->block.next = 0; - pool->block.end = 0; -} - -static uint32_t -anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, - struct anv_block_pool *block_pool) -{ - int32_t offset; - struct anv_block_state block, old, new; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { - assert(offset >= 0); - return offset; - } - - /* If free list was empty (or somebody raced us and took the items) we - * allocate a new item from the end of the block */ - restart: - block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); - - if (block.next < block.end) { - return block.next; - } else if (block.next == block.end) { - offset = anv_block_pool_alloc(block_pool); - new.next = offset + pool->state_size; - new.end = offset + block_pool->block_size; - old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); - if (old.next != block.next) - futex_wake(&pool->block.end, INT_MAX); - return offset; - } else { - futex_wait(&pool->block.end, block.end); - goto restart; - } -} - -static void -anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, - struct anv_block_pool *block_pool, - uint32_t offset) -{ - anv_free_list_push(&pool->free_list, block_pool->map, offset); -} - -void -anv_state_pool_init(struct anv_state_pool *pool, - struct anv_block_pool *block_pool) -{ - pool->block_pool = block_pool; - for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { - size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); - anv_fixed_size_state_pool_init(&pool->buckets[i], size); - } - VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); -} - -void -anv_state_pool_finish(struct anv_state_pool *pool) -{ - VG(VALGRIND_DESTROY_MEMPOOL(pool)); -} - -struct anv_state -anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) -{ - unsigned size_log2 = ilog2_round_up(size < align ? align : size); - assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); - if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) - size_log2 = ANV_MIN_STATE_SIZE_LOG2; - unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - - struct anv_state state; - state.alloc_size = 1 << size_log2; - state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], - pool->block_pool); - state.map = pool->block_pool->map + state.offset; - VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); - return state; -} - -void -anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) -{ - assert(util_is_power_of_two(state.alloc_size)); - unsigned size_log2 = ilog2_round_up(state.alloc_size); - assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && - size_log2 <= ANV_MAX_STATE_SIZE_LOG2); - unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - - VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); - anv_fixed_size_state_pool_free(&pool->buckets[bucket], - pool->block_pool, state.offset); -} - -#define NULL_BLOCK 1 -struct anv_state_stream_block { - /* The next block */ - struct anv_state_stream_block *next; - - /* The offset into the block pool at which this block starts */ - uint32_t offset; - -#ifdef HAVE_VALGRIND - /* A pointer to the first user-allocated thing in this block. This is - * what valgrind sees as the start of the block. - */ - void *_vg_ptr; -#endif -}; - -/* The state stream allocator is a one-shot, single threaded allocator for - * variable sized blocks. We use it for allocating dynamic state. - */ -void -anv_state_stream_init(struct anv_state_stream *stream, - struct anv_block_pool *block_pool) -{ - stream->block_pool = block_pool; - stream->block = NULL; - - /* Ensure that next + whatever > end. This way the first call to - * state_stream_alloc fetches a new block. - */ - stream->next = 1; - stream->end = 0; - - VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); -} - -void -anv_state_stream_finish(struct anv_state_stream *stream) -{ - VG(const uint32_t block_size = stream->block_pool->block_size); - - struct anv_state_stream_block *next = stream->block; - while (next != NULL) { - VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); - struct anv_state_stream_block sb = VG_NOACCESS_READ(next); - VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); - VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); - anv_block_pool_free(stream->block_pool, sb.offset); - next = sb.next; - } - - VG(VALGRIND_DESTROY_MEMPOOL(stream)); -} - -struct anv_state -anv_state_stream_alloc(struct anv_state_stream *stream, - uint32_t size, uint32_t alignment) -{ - struct anv_state_stream_block *sb = stream->block; - - struct anv_state state; - - state.offset = align_u32(stream->next, alignment); - if (state.offset + size > stream->end) { - uint32_t block = anv_block_pool_alloc(stream->block_pool); - sb = stream->block_pool->map + block; - - VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); - sb->next = stream->block; - sb->offset = block; - VG(sb->_vg_ptr = NULL); - VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); - - stream->block = sb; - stream->start = block; - stream->next = block + sizeof(*sb); - stream->end = block + stream->block_pool->block_size; - - state.offset = align_u32(stream->next, alignment); - assert(state.offset + size <= stream->end); - } - - assert(state.offset > stream->start); - state.map = (void *)sb + (state.offset - stream->start); - state.alloc_size = size; - -#ifdef HAVE_VALGRIND - void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); - if (vg_ptr == NULL) { - vg_ptr = state.map; - VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); - VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); - } else { - void *state_end = state.map + state.alloc_size; - /* This only updates the mempool. The newly allocated chunk is still - * marked as NOACCESS. */ - VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); - /* Mark the newly allocated chunk as undefined */ - VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); - } -#endif - - stream->next = state.offset + size; - - return state; -} - -struct bo_pool_bo_link { - struct bo_pool_bo_link *next; - struct anv_bo bo; -}; - -void -anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t bo_size) -{ - pool->device = device; - pool->bo_size = bo_size; - pool->free_list = NULL; - - VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); -} - -void -anv_bo_pool_finish(struct anv_bo_pool *pool) -{ - struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); - while (link != NULL) { - struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - - anv_gem_munmap(link_copy.bo.map, pool->bo_size); - anv_gem_close(pool->device, link_copy.bo.gem_handle); - link = link_copy.next; - } - - VG(VALGRIND_DESTROY_MEMPOOL(pool)); -} - -VkResult -anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) -{ - VkResult result; - - void *next_free_void; - if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { - struct bo_pool_bo_link *next_free = next_free_void; - *bo = VG_NOACCESS_READ(&next_free->bo); - assert(bo->map == next_free); - assert(bo->size == pool->bo_size); - - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); - - return VK_SUCCESS; - } - - struct anv_bo new_bo; - - result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); - if (result != VK_SUCCESS) - return result; - - assert(new_bo.size == pool->bo_size); - - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); - if (new_bo.map == NULL) { - anv_gem_close(pool->device, new_bo.gem_handle); - return vk_error(VK_ERROR_MEMORY_MAP_FAILED); - } - - *bo = new_bo; - - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); - - return VK_SUCCESS; -} - -void -anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) -{ - struct bo_pool_bo_link *link = bo->map; - link->bo = *bo; - - VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); - anv_ptr_free_list_push(&pool->free_list, link); -} diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c deleted file mode 100644 index d24dd06d7eb..00000000000 --- a/src/vulkan/anv_batch_chain.c +++ /dev/null @@ -1,1077 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen8_pack.h" - -/** \file anv_batch_chain.c - * - * This file contains functions related to anv_cmd_buffer as a data - * structure. This involves everything required to create and destroy - * the actual batch buffers as well as link them together and handle - * relocations and surface state. It specifically does *not* contain any - * handling of actual vkCmd calls beyond vkCmdExecuteCommands. - */ - -/*-----------------------------------------------------------------------* - * Functions related to anv_reloc_list - *-----------------------------------------------------------------------*/ - -static VkResult -anv_reloc_list_init_clone(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - const struct anv_reloc_list *other_list) -{ - if (other_list) { - list->num_relocs = other_list->num_relocs; - list->array_length = other_list->array_length; - } else { - list->num_relocs = 0; - list->array_length = 256; - } - - list->relocs = - anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (list->relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - list->reloc_bos = - anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (list->reloc_bos == NULL) { - anv_free(alloc, list->relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - if (other_list) { - memcpy(list->relocs, other_list->relocs, - list->array_length * sizeof(*list->relocs)); - memcpy(list->reloc_bos, other_list->reloc_bos, - list->array_length * sizeof(*list->reloc_bos)); - } - - return VK_SUCCESS; -} - -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc) -{ - return anv_reloc_list_init_clone(list, alloc, NULL); -} - -void -anv_reloc_list_finish(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc) -{ - anv_free(alloc, list->relocs); - anv_free(alloc, list->reloc_bos); -} - -static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - size_t num_additional_relocs) -{ - if (list->num_relocs + num_additional_relocs <= list->array_length) - return VK_SUCCESS; - - size_t new_length = list->array_length * 2; - while (new_length < list->num_relocs + num_additional_relocs) - new_length *= 2; - - struct drm_i915_gem_relocation_entry *new_relocs = - anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_reloc_bos = - anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_relocs == NULL) { - anv_free(alloc, new_relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); - memcpy(new_reloc_bos, list->reloc_bos, - list->num_relocs * sizeof(*list->reloc_bos)); - - anv_free(alloc, list->relocs); - anv_free(alloc, list->reloc_bos); - - list->array_length = new_length; - list->relocs = new_relocs; - list->reloc_bos = new_reloc_bos; - - return VK_SUCCESS; -} - -uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - uint32_t offset, struct anv_bo *target_bo, uint32_t delta) -{ - struct drm_i915_gem_relocation_entry *entry; - int index; - - const uint32_t domain = - target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0; - - anv_reloc_list_grow(list, alloc, 1); - /* TODO: Handle failure */ - - /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ - index = list->num_relocs++; - list->reloc_bos[index] = target_bo; - entry = &list->relocs[index]; - entry->target_handle = target_bo->gem_handle; - entry->delta = delta; - entry->offset = offset; - entry->presumed_offset = target_bo->offset; - entry->read_domains = domain; - entry->write_domain = domain; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); - - return target_bo->offset + delta; -} - -static void -anv_reloc_list_append(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - struct anv_reloc_list *other, uint32_t offset) -{ - anv_reloc_list_grow(list, alloc, other->num_relocs); - /* TODO: Handle failure */ - - memcpy(&list->relocs[list->num_relocs], &other->relocs[0], - other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], - other->num_relocs * sizeof(other->reloc_bos[0])); - - for (uint32_t i = 0; i < other->num_relocs; i++) - list->relocs[i + list->num_relocs].offset += offset; - - list->num_relocs += other->num_relocs; -} - -/*-----------------------------------------------------------------------* - * Functions related to anv_batch - *-----------------------------------------------------------------------*/ - -void * -anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) -{ - if (batch->next + num_dwords * 4 > batch->end) - batch->extend_cb(batch, batch->user_data); - - void *p = batch->next; - - batch->next += num_dwords * 4; - assert(batch->next <= batch->end); - - return p; -} - -uint64_t -anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t delta) -{ - return anv_reloc_list_add(batch->relocs, batch->alloc, - location - batch->start, bo, delta); -} - -void -anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) -{ - uint32_t size, offset; - - size = other->next - other->start; - assert(size % 4 == 0); - - if (batch->next + size > batch->end) - batch->extend_cb(batch, batch->user_data); - - assert(batch->next + size <= batch->end); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); - memcpy(batch->next, other->start, size); - - offset = batch->next - batch->start; - anv_reloc_list_append(batch->relocs, batch->alloc, - other->relocs, offset); - - batch->next += size; -} - -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static VkResult -anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo **bbo_out) -{ - VkResult result; - - struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; - - result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); - if (result != VK_SUCCESS) - goto fail_bo_alloc; - - *bbo_out = bbo; - - return VK_SUCCESS; - - fail_bo_alloc: - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_free(&cmd_buffer->pool->alloc, bbo); - - return result; -} - -static VkResult -anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, - const struct anv_batch_bo *other_bbo, - struct anv_batch_bo **bbo_out) -{ - VkResult result; - - struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; - - result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, - &other_bbo->relocs); - if (result != VK_SUCCESS) - goto fail_bo_alloc; - - bbo->length = other_bbo->length; - memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); - - bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; - - *bbo_out = bbo; - - return VK_SUCCESS; - - fail_bo_alloc: - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_free(&cmd_buffer->pool->alloc, bbo); - - return result; -} - -static void -anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; - bbo->last_ss_pool_bo_offset = 0; - bbo->relocs.num_relocs = 0; -} - -static void -anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->start = bbo->bo.map; - batch->next = bbo->bo.map + bbo->length; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; -} - -static void -anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) -{ - assert(batch->start == bbo->bo.map); - bbo->length = batch->next - batch->start; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); -} - -static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - anv_free(&cmd_buffer->pool->alloc, bbo); -} - -static VkResult -anv_batch_bo_list_clone(const struct list_head *list, - struct anv_cmd_buffer *cmd_buffer, - struct list_head *new_list) -{ - VkResult result = VK_SUCCESS; - - list_inithead(new_list); - - struct anv_batch_bo *prev_bbo = NULL; - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo *new_bbo = NULL; - result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); - if (result != VK_SUCCESS) - break; - list_addtail(&new_bbo->link, new_list); - - if (prev_bbo) { - /* As we clone this list of batch_bo's, they chain one to the - * other using MI_BATCH_BUFFER_START commands. We need to fix up - * those relocations as we go. Fortunately, this is pretty easy - * as it will always be the last relocation in the list. - */ - uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; - assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); - prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; - } - - prev_bbo = new_bbo; - } - - if (result != VK_SUCCESS) { - list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) - anv_batch_bo_destroy(bbo, cmd_buffer); - } - - return result; -} - -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static inline struct anv_batch_bo * -anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); -} - -struct anv_address -anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - return (struct anv_address) { - .bo = &cmd_buffer->device->surface_state_block_pool.bo, - .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), - }; -} - -static void -emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - /* In gen8+ the address field grew to two dwords to accomodate 48 bit - * offsets. The high 16 bits are in the last dword, so we can use the gen8 - * version in either case, as long as we set the instruction length in the - * header accordingly. This means that we always emit three dwords here - * and all the padding and adjustment we do in this file works for all - * gens. - */ - - const uint32_t gen7_length = - GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; - const uint32_t gen8_length = - GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, - .DWordLength = cmd_buffer->device->info.gen < 8 ? - gen7_length : gen8_length, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { bo, offset }); -} - -static void -cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo *bbo) -{ - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_batch_bo *current_bbo = - anv_cmd_buffer_current_batch_bo(cmd_buffer); - - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); - - emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); - - anv_batch_bo_finish(current_bbo, batch); -} - -static VkResult -anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) -{ - struct anv_cmd_buffer *cmd_buffer = _data; - struct anv_batch_bo *new_bbo; - - VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); - if (result != VK_SUCCESS) - return result; - - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - *seen_bbo = new_bbo; - - cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); - - list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); - - anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - - return VK_SUCCESS; -} - -struct anv_state -anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, - uint32_t entries, uint32_t *state_offset) -{ - struct anv_block_pool *block_pool = - &cmd_buffer->device->surface_state_block_pool; - int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); - struct anv_state state; - - state.alloc_size = align_u32(entries * 4, 32); - - if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) - return (struct anv_state) { 0 }; - - state.offset = cmd_buffer->bt_next; - state.map = block_pool->map + *bt_block + state.offset; - - cmd_buffer->bt_next += state.alloc_size; - - assert(*bt_block < 0); - *state_offset = -(*bt_block); - - return state; -} - -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) -{ - return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); -} - -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) -{ - return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - size, alignment); -} - -VkResult -anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_block_pool *block_pool = - &cmd_buffer->device->surface_state_block_pool; - - int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); - if (offset == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - *offset = anv_block_pool_alloc_back(block_pool); - cmd_buffer->bt_next = 0; - - return VK_SUCCESS; -} - -VkResult -anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo; - VkResult result; - - list_inithead(&cmd_buffer->batch_bos); - - result = anv_batch_bo_create(cmd_buffer, &batch_bo); - if (result != VK_SUCCESS) - return result; - - list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); - - cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; - cmd_buffer->batch.user_data = cmd_buffer; - - anv_batch_bo_start(batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - int success = anv_vector_init(&cmd_buffer->seen_bbos, - sizeof(struct anv_bo *), - 8 * sizeof(struct anv_bo *)); - if (!success) - goto fail_batch_bo; - - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; - - success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), - 8 * sizeof(int32_t)); - if (!success) - goto fail_seen_bbos; - - result = anv_reloc_list_init(&cmd_buffer->surface_relocs, - &cmd_buffer->pool->alloc); - if (result != VK_SUCCESS) - goto fail_bt_blocks; - - anv_cmd_buffer_new_binding_table_block(cmd_buffer); - - cmd_buffer->execbuf2.objects = NULL; - cmd_buffer->execbuf2.bos = NULL; - cmd_buffer->execbuf2.array_length = 0; - - return VK_SUCCESS; - - fail_bt_blocks: - anv_vector_finish(&cmd_buffer->bt_blocks); - fail_seen_bbos: - anv_vector_finish(&cmd_buffer->seen_bbos); - fail_batch_bo: - anv_batch_bo_destroy(batch_bo, cmd_buffer); - - return result; -} - -void -anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - int32_t *bt_block; - anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { - anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, - *bt_block); - } - anv_vector_finish(&cmd_buffer->bt_blocks); - - anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); - - anv_vector_finish(&cmd_buffer->seen_bbos); - - /* Destroy all of the batch buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_batch_bo_destroy(bbo, cmd_buffer); - } - - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); -} - -void -anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, cmd_buffer); - } - assert(!list_empty(&cmd_buffer->batch_bos)); - - anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), - &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { - int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); - anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, - *bt_block); - } - assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); - cmd_buffer->bt_next = 0; - - cmd_buffer->surface_relocs.num_relocs = 0; - - /* Reset the list of seen buffers */ - cmd_buffer->seen_bbos.head = 0; - cmd_buffer->seen_bbos.tail = 0; - - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_batch_bo(cmd_buffer); -} - -void -anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - /* When we start a batch buffer, we subtract a certain amount of - * padding from the end to ensure that we always have room to emit a - * BATCH_BUFFER_START to chain to the next BO. We need to remove - * that padding before we end the batch; otherwise, we may end up - * with our BATCH_BUFFER_END in another BO. - */ - cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); - - anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); - - /* Round batch up to an even number of dwords. */ - if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); - - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; - } - - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { - /* If this is a secondary command buffer, we need to determine the - * mode in which it will be executed with vkExecuteCommands. We - * determine this statically here so that this stays in sync with the - * actual ExecuteCommands implementation. - */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && - (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { - /* If the secondary has exactly one batch buffer in its list *and* - * that batch buffer is less than half of the maximum size, we're - * probably better of simply copying it into our batch. - */ - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; - } else if (!(cmd_buffer->usage_flags & - VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; - - /* When we chain, we need to add an MI_BATCH_BUFFER_START command - * with its relocation. In order to handle this we'll increment here - * so we can unconditionally decrement right before adding the - * MI_BATCH_BUFFER_START command. - */ - batch_bo->relocs.num_relocs++; - cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; - } else { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; - } - } -} - -static inline VkResult -anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, - struct list_head *list) -{ - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); - if (bbo_ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - *bbo_ptr = bbo; - } - - return VK_SUCCESS; -} - -void -anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary) -{ - switch (secondary->exec_mode) { - case ANV_CMD_BUFFER_EXEC_MODE_EMIT: - anv_batch_emit_batch(&primary->batch, &secondary->batch); - anv_cmd_buffer_emit_state_base_address(primary); - break; - case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { - struct anv_batch_bo *first_bbo = - list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - - emit_batch_buffer_start(primary, &first_bbo->bo, 0); - - struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); - assert(primary->batch.start == this_bbo->bo.map); - uint32_t offset = primary->batch.next - primary->batch.start; - const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; - - /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we - * can emit a new command and relocation for the current splice. In - * order to handle the initial-use case, we incremented next and - * num_relocs in end_batch_buffer() so we can alyways just subtract - * here. - */ - last_bbo->relocs.num_relocs--; - secondary->batch.next -= inst_size; - emit_batch_buffer_start(secondary, &this_bbo->bo, offset); - anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); - - /* After patching up the secondary buffer, we need to clflush the - * modified instruction in case we're on a !llc platform. We use a - * little loop to handle the case where the instruction crosses a cache - * line boundary. - */ - if (!primary->device->info.has_llc) { - void *inst = secondary->batch.next - inst_size; - void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); - __builtin_ia32_mfence(); - while (p < secondary->batch.next) { - __builtin_ia32_clflush(p); - p += CACHELINE_SIZE; - } - } - - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { - struct list_head copy_list; - VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, - secondary, - ©_list); - if (result != VK_SUCCESS) - return; /* FIXME */ - - anv_cmd_buffer_add_seen_bbos(primary, ©_list); - - struct anv_batch_bo *first_bbo = - list_first_entry(©_list, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(©_list, struct anv_batch_bo, link); - - cmd_buffer_chain_to_batch_bo(primary, first_bbo); - - list_splicetail(©_list, &primary->batch_bos); - - anv_batch_bo_continue(last_bbo, &primary->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - default: - assert(!"Invalid execution mode"); - } - - anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, - &secondary->surface_relocs, 0); -} - -static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct anv_reloc_list *relocs) -{ - struct drm_i915_gem_exec_object2 *obj = NULL; - - if (bo->index < cmd_buffer->execbuf2.bo_count && - cmd_buffer->execbuf2.bos[bo->index] == bo) - obj = &cmd_buffer->execbuf2.objects[bo->index]; - - if (obj == NULL) { - /* We've never seen this one before. Add it to the list and assign - * an id that we can use later. - */ - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_objects == NULL) { - anv_free(&cmd_buffer->pool->alloc, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); - } - - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; - } - - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); - - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; - - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - } - - if (relocs != NULL && obj->relocation_count == 0) { - /* This is the first time we've ever seen a list of relocations for - * this BO. Go ahead and set the relocations and then walk the list - * of relocations and add them all. - */ - obj->relocation_count = relocs->num_relocs; - obj->relocs_ptr = (uintptr_t) relocs->relocs; - - for (size_t i = 0; i < relocs->num_relocs; i++) { - /* A quick sanity check on relocations */ - assert(relocs->relocs[i].offset < bo->size); - anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); - } - } - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) -{ - struct anv_bo *bo; - - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ - - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->execbuf2.need_reloc = true; - - list->relocs[i].target_handle = bo->index; - } -} - -static uint64_t -read_reloc(const struct anv_device *device, const void *p) -{ - if (device->info.gen >= 8) - return *(uint64_t *)p; - else - return *(uint32_t *)p; -} - -static void -write_reloc(const struct anv_device *device, void *p, uint64_t v) -{ - if (device->info.gen >= 8) - *(uint64_t *)p = v; - else - *(uint32_t *)p = v; -} - -static void -adjust_relocations_from_block_pool(struct anv_block_pool *pool, - struct anv_reloc_list *relocs) -{ - for (size_t i = 0; i < relocs->num_relocs; i++) { - /* In general, we don't know how stale the relocated value is. It - * may have been used last time or it may not. Since we don't want - * to stomp it while the GPU may be accessing it, we haven't updated - * it anywhere else in the code. Instead, we just set the presumed - * offset to what it is now based on the delta and the data in the - * block pool. Then the kernel will update it for us if needed. - */ - assert(relocs->relocs[i].offset < pool->state.end); - const void *p = pool->map + relocs->relocs[i].offset; - - /* We're reading back the relocated value from potentially incoherent - * memory here. However, any change to the value will be from the kernel - * writing out relocations, which will keep the CPU cache up to date. - */ - relocs->relocs[i].presumed_offset = - read_reloc(pool->device, p) - relocs->relocs[i].delta; - - /* All of the relocations from this block pool to other BO's should - * have been emitted relative to the surface block pool center. We - * need to add the center offset to make them relative to the - * beginning of the actual GEM bo. - */ - relocs->relocs[i].offset += pool->center_bo_offset; - } -} - -static void -adjust_relocations_to_block_pool(struct anv_block_pool *pool, - struct anv_bo *from_bo, - struct anv_reloc_list *relocs, - uint32_t *last_pool_center_bo_offset) -{ - assert(*last_pool_center_bo_offset <= pool->center_bo_offset); - uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; - - /* When we initially emit relocations into a block pool, we don't - * actually know what the final center_bo_offset will be so we just emit - * it as if center_bo_offset == 0. Now that we know what the center - * offset is, we need to walk the list of relocations and adjust any - * relocations that point to the pool bo with the correct offset. - */ - for (size_t i = 0; i < relocs->num_relocs; i++) { - if (relocs->reloc_bos[i] == &pool->bo) { - /* Adjust the delta value in the relocation to correctly - * correspond to the new delta. Initially, this value may have - * been negative (if treated as unsigned), but we trust in - * uint32_t roll-over to fix that for us at this point. - */ - relocs->relocs[i].delta += delta; - - /* Since the delta has changed, we need to update the actual - * relocated value with the new presumed value. This function - * should only be called on batch buffers, so we know it isn't in - * use by the GPU at the moment. - */ - assert(relocs->relocs[i].offset < from_bo->size); - write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, - relocs->relocs[i].presumed_offset + - relocs->relocs[i].delta); - } - } - - *last_pool_center_bo_offset = pool->center_bo_offset; -} - -void -anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_block_pool *ss_pool = - &cmd_buffer->device->surface_state_block_pool; - - cmd_buffer->execbuf2.bo_count = 0; - cmd_buffer->execbuf2.need_reloc = false; - - adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); - anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); - - /* First, we walk over all of the bos we've seen and add them and their - * relocations to the validate list. - */ - struct anv_batch_bo **bbo; - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, - &(*bbo)->last_ss_pool_bo_offset); - - anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); - } - - struct anv_batch_bo *first_batch_bo = - list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); - - /* The kernel requires that the last entry in the validation list be the - * batch buffer to execute. We can simply swap the element - * corresponding to the first batch_bo in the chain with the last - * element in the list. - */ - if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { - uint32_t idx = first_batch_bo->bo.index; - uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; - - struct drm_i915_gem_exec_object2 tmp_obj = - cmd_buffer->execbuf2.objects[idx]; - assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); - - cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; - cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; - cmd_buffer->execbuf2.bos[idx]->index = idx; - - cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; - cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; - first_batch_bo->bo.index = last_idx; - } - - /* Now we go through and fixup all of the relocation lists to point to - * the correct indices in the object array. We have to do this after we - * reorder the list above as some of the indices may have changed. - */ - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); - - anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); - - if (!cmd_buffer->device->info.has_llc) { - __builtin_ia32_mfence(); - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) - __builtin_ia32_clflush((*bbo)->bo.map + i); - } - } - - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, - .buffer_count = cmd_buffer->execbuf2.bo_count, - .batch_start_offset = 0, - .batch_len = batch->next - batch->start, - .cliprects_ptr = 0, - .num_cliprects = 0, - .DR1 = 0, - .DR4 = 0, - .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | - I915_EXEC_CONSTANTS_REL_GENERAL, - .rsvd1 = cmd_buffer->device->context_id, - .rsvd2 = 0, - }; - - if (!cmd_buffer->execbuf2.need_reloc) - cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; -} diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c deleted file mode 100644 index b060828cf61..00000000000 --- a/src/vulkan/anv_cmd_buffer.c +++ /dev/null @@ -1,1191 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** \file anv_cmd_buffer.c - * - * This file contains all of the stuff for emitting commands into a command - * buffer. This includes implementations of most of the vkCmd* - * entrypoints. This file is concerned entirely with state emission and - * not with the command buffer data structure itself. As far as this file - * is concerned, most of anv_cmd_buffer is magic. - */ - -/* TODO: These are taken from GLES. We should check the Vulkan spec */ -const struct anv_dynamic_state default_dynamic_state = { - .viewport = { - .count = 0, - }, - .scissor = { - .count = 0, - }, - .line_width = 1.0f, - .depth_bias = { - .bias = 0.0f, - .clamp = 0.0f, - .slope = 0.0f, - }, - .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, - .depth_bounds = { - .min = 0.0f, - .max = 1.0f, - }, - .stencil_compare_mask = { - .front = ~0u, - .back = ~0u, - }, - .stencil_write_mask = { - .front = ~0u, - .back = ~0u, - }, - .stencil_reference = { - .front = 0u, - .back = 0u, - }, -}; - -void -anv_dynamic_state_copy(struct anv_dynamic_state *dest, - const struct anv_dynamic_state *src, - uint32_t copy_mask) -{ - if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { - dest->viewport.count = src->viewport.count; - typed_memcpy(dest->viewport.viewports, src->viewport.viewports, - src->viewport.count); - } - - if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { - dest->scissor.count = src->scissor.count; - typed_memcpy(dest->scissor.scissors, src->scissor.scissors, - src->scissor.count); - } - - if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) - dest->line_width = src->line_width; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) - dest->depth_bias = src->depth_bias; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) - typed_memcpy(dest->blend_constants, src->blend_constants, 4); - - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) - dest->depth_bounds = src->depth_bounds; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) - dest->stencil_compare_mask = src->stencil_compare_mask; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) - dest->stencil_write_mask = src->stencil_write_mask; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) - dest->stencil_reference = src->stencil_reference; -} - -static void -anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_cmd_state *state = &cmd_buffer->state; - - memset(&state->descriptors, 0, sizeof(state->descriptors)); - memset(&state->push_constants, 0, sizeof(state->push_constants)); - memset(state->binding_tables, 0, sizeof(state->binding_tables)); - memset(state->samplers, 0, sizeof(state->samplers)); - - /* 0 isn't a valid config. This ensures that we always configure L3$. */ - cmd_buffer->state.current_l3_config = 0; - - state->dirty = ~0; - state->vb_dirty = 0; - state->descriptors_dirty = 0; - state->push_constants_dirty = 0; - state->pipeline = NULL; - state->restart_index = UINT32_MAX; - state->dynamic = default_dynamic_state; - state->need_query_wa = true; - - if (state->attachments != NULL) { - anv_free(&cmd_buffer->pool->alloc, state->attachments); - state->attachments = NULL; - } - - state->gen7.index_buffer = NULL; -} - -/** - * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. - */ -void -anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info) -{ - struct anv_cmd_state *state = &cmd_buffer->state; - ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); - - anv_free(&cmd_buffer->pool->alloc, state->attachments); - - if (pass->attachment_count == 0) { - state->attachments = NULL; - return; - } - - state->attachments = anv_alloc(&cmd_buffer->pool->alloc, - pass->attachment_count * - sizeof(state->attachments[0]), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->attachments == NULL) { - /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ - abort(); - } - - for (uint32_t i = 0; i < pass->attachment_count; ++i) { - struct anv_render_pass_attachment *att = &pass->attachments[i]; - VkImageAspectFlags clear_aspects = 0; - - if (anv_format_is_color(att->format)) { - /* color attachment */ - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } else { - /* depthstencil attachment */ - if (att->format->has_depth && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - } - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - } - - state->attachments[i].pending_clear_aspects = clear_aspects; - if (clear_aspects) { - assert(info->clearValueCount > i); - state->attachments[i].clear_value = info->pClearValues[i]; - } - } -} - -static VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size) -{ - struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; - - if (*ptr == NULL) { - *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else if ((*ptr)->size < size) { - *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - (*ptr)->size = size; - - return VK_SUCCESS; -} - -#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ - anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ - (offsetof(struct anv_push_constants, field) + \ - sizeof(cmd_buffer->state.push_constants[0]->field))) - -static VkResult anv_create_cmd_buffer( - struct anv_device * device, - struct anv_cmd_pool * pool, - VkCommandBufferLevel level, - VkCommandBuffer* pCommandBuffer) -{ - struct anv_cmd_buffer *cmd_buffer; - VkResult result; - - cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - cmd_buffer->device = device; - cmd_buffer->pool = pool; - cmd_buffer->level = level; - cmd_buffer->state.attachments = NULL; - - result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); - if (result != VK_SUCCESS) - goto fail; - - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - if (pool) { - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - } else { - /* Init the pool_link so we can safefly call list_del when we destroy - * the command buffer - */ - list_inithead(&cmd_buffer->pool_link); - } - - *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - - return VK_SUCCESS; - - fail: - anv_free(&cmd_buffer->pool->alloc, cmd_buffer); - - return result; -} - -VkResult anv_AllocateCommandBuffers( - VkDevice _device, - const VkCommandBufferAllocateInfo* pAllocateInfo, - VkCommandBuffer* pCommandBuffers) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); - - VkResult result = VK_SUCCESS; - uint32_t i; - - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, - &pCommandBuffers[i]); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) - anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, - i, pCommandBuffers); - - return result; -} - -static void -anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) -{ - list_del(&cmd_buffer->pool_link); - - anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - anv_free(&cmd_buffer->pool->alloc, cmd_buffer); -} - -void anv_FreeCommandBuffers( - VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers) -{ - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); - - anv_cmd_buffer_destroy(cmd_buffer); - } -} - -VkResult anv_ResetCommandBuffer( - VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->usage_flags = 0; - cmd_buffer->state.current_pipeline = UINT32_MAX; - anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - anv_cmd_state_reset(cmd_buffer); - - return VK_SUCCESS; -} - -void -anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - switch (cmd_buffer->device->info.gen) { - case 7: - if (cmd_buffer->device->info.is_haswell) - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - else - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - case 8: - return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); - case 9: - return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); - default: - unreachable("unsupported gen\n"); - } -} - -VkResult anv_BeginCommandBuffer( - VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - /* If this is the first vkBeginCommandBuffer, we must *initialize* the - * command buffer's state. Otherwise, we must *reset* its state. In both - * cases we reset it. - * - * From the Vulkan 1.0 spec: - * - * If a command buffer is in the executable state and the command buffer - * was allocated from a command pool with the - * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then - * vkBeginCommandBuffer implicitly resets the command buffer, behaving - * as if vkResetCommandBuffer had been called with - * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts - * the command buffer in the recording state. - */ - anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); - - cmd_buffer->usage_flags = pBeginInfo->flags; - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || - !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); - - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - if (cmd_buffer->usage_flags & - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { - cmd_buffer->state.framebuffer = - anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); - cmd_buffer->state.pass = - anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); - - struct anv_subpass *subpass = - &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; - - anv_cmd_buffer_set_subpass(cmd_buffer, subpass); - } - - return VK_SUCCESS; -} - -VkResult anv_EndCommandBuffer( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_end_batch_buffer(cmd_buffer); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - } - - return VK_SUCCESS; -} - -void anv_CmdBindPipeline( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - break; - - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->state.pipeline = pipeline; - cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; - cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; - - /* Apply the dynamic state from the pipeline */ - cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; - anv_dynamic_state_copy(&cmd_buffer->state.dynamic, - &pipeline->dynamic_state, - pipeline->dynamic_state_mask); - break; - - default: - assert(!"invalid bind point"); - break; - } -} - -void anv_CmdSetViewport( - VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport* pViewports) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - const uint32_t total_count = firstViewport + viewportCount; - if (cmd_buffer->state.dynamic.viewport.count < total_count); - cmd_buffer->state.dynamic.viewport.count = total_count; - - memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, - pViewports, viewportCount * sizeof(*pViewports)); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; -} - -void anv_CmdSetScissor( - VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D* pScissors) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - const uint32_t total_count = firstScissor + scissorCount; - if (cmd_buffer->state.dynamic.scissor.count < total_count); - cmd_buffer->state.dynamic.scissor.count = total_count; - - memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, - pScissors, scissorCount * sizeof(*pScissors)); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; -} - -void anv_CmdSetLineWidth( - VkCommandBuffer commandBuffer, - float lineWidth) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->state.dynamic.line_width = lineWidth; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; -} - -void anv_CmdSetDepthBias( - VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; - cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; - cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; -} - -void anv_CmdSetBlendConstants( - VkCommandBuffer commandBuffer, - const float blendConstants[4]) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - memcpy(cmd_buffer->state.dynamic.blend_constants, - blendConstants, sizeof(float) * 4); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; -} - -void anv_CmdSetDepthBounds( - VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; - cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; -} - -void anv_CmdSetStencilCompareMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; -} - -void anv_CmdSetStencilWriteMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; -} - -void anv_CmdSetStencilReference( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_reference.front = reference; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_reference.back = reference; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; -} - -void anv_CmdBindDescriptorSets( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - struct anv_descriptor_set_layout *set_layout; - - assert(firstSet + descriptorSetCount < MAX_SETS); - - uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < descriptorSetCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - set_layout = layout->set[firstSet + i].layout; - - if (cmd_buffer->state.descriptors[firstSet + i] != set) { - cmd_buffer->state.descriptors[firstSet + i] = set; - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - } - - if (set_layout->dynamic_offset_count > 0) { - anv_foreach_stage(s, set_layout->shader_stages) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); - - struct anv_push_constants *push = - cmd_buffer->state.push_constants[s]; - - unsigned d = layout->set[firstSet + i].dynamic_offset_start; - const uint32_t *offsets = pDynamicOffsets + dynamic_slot; - struct anv_descriptor *desc = set->descriptors; - - for (unsigned b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index < 0) - continue; - - unsigned array_size = set_layout->binding[b].array_size; - for (unsigned j = 0; j < array_size; j++) { - uint32_t range = 0; - if (desc->buffer_view) - range = desc->buffer_view->range; - push->dynamic[d].offset = *(offsets++); - push->dynamic[d].range = range; - desc++; - d++; - } - } - } - cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - } - } -} - -void anv_CmdBindVertexBuffers( - VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - - /* We have to defer setting up vertex buffer since we need the buffer - * stride from the pipeline. */ - - assert(firstBinding + bindingCount < MAX_VBS); - for (uint32_t i = 0; i < bindingCount; i++) { - vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); - vb[firstBinding + i].offset = pOffsets[i]; - cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); - } -} - -static void -add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, - struct anv_state state, struct anv_bo *bo, uint32_t offset) -{ - /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and - * 9 for gen8+. We only write the first dword for gen8+ here and rely on - * the initial state to set the high bits to 0. */ - - const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; - - anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, - state.offset + dword * 4, bo, offset); -} - -const struct anv_format * -anv_format_for_descriptor_type(VkDescriptorType type) -{ - switch (type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); - - default: - unreachable("Invalid descriptor type"); - } -} - -VkResult -anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, - struct anv_state *bt_state) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_bind_map *map; - uint32_t color_count, bias, state_offset; - - switch (stage) { - case MESA_SHADER_FRAGMENT: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = MAX_RTS; - color_count = subpass->color_count; - break; - case MESA_SHADER_COMPUTE: - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; - bias = 1; - color_count = 0; - break; - default: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = 0; - color_count = 0; - break; - } - - if (color_count + map->surface_count == 0) { - *bt_state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, - bias + map->surface_count, - &state_offset); - uint32_t *bt_map = bt_state->map; - - if (bt_state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t a = 0; a < color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); - } - - if (stage == MESA_SHADER_COMPUTE && - cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { - struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; - uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; - - struct anv_state surface_state; - surface_state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer); - - const struct anv_format *format = - anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, - format->isl_format, bo_offset, 12, 1); - - bt_map[0] = surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); - } - - if (map->surface_count == 0) - goto out; - - if (map->image_count > 0) { - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); - if (result != VK_SUCCESS) - return result; - - cmd_buffer->state.push_constants_dirty |= 1 << stage; - } - - uint32_t image = 0; - for (uint32_t s = 0; s < map->surface_count; s++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; - - struct anv_state surface_state; - struct anv_bo *bo; - uint32_t bo_offset; - - switch (desc->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - /* Nothing for us to do here */ - continue; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - surface_state = desc->image_view->sampler_surface_state; - assert(surface_state.alloc_size); - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { - surface_state = desc->image_view->storage_surface_state; - assert(surface_state.alloc_size); - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, - image_param); - image_param->surface_idx = bias + s; - break; - } - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - surface_state = desc->buffer_view->surface_state; - assert(surface_state.alloc_size); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - surface_state = desc->buffer_view->storage_surface_state; - assert(surface_state.alloc_size); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, - image_param); - image_param->surface_idx = bias + s; - break; - - default: - assert(!"Invalid descriptor type"); - continue; - } - - bt_map[bias + s] = surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); - } - assert(image == map->image_count); - - out: - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*bt_state); - - return VK_SUCCESS; -} - -VkResult -anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, struct anv_state *state) -{ - struct anv_pipeline_bind_map *map; - - if (stage == MESA_SHADER_COMPUTE) - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; - else - map = &cmd_buffer->state.pipeline->bindings[stage]; - - if (map->sampler_count == 0) { - *state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - uint32_t size = map->sampler_count * 16; - *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); - - if (state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t s = 0; s < map->sampler_count; s++) { - struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; - - if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && - desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - continue; - - struct anv_sampler *sampler = desc->sampler; - - /* This can happen if we have an unfilled slot since TYPE_SAMPLER - * happens to be zero. - */ - if (sampler == NULL) - continue; - - memcpy(state->map + (s * 16), - sampler->state, sizeof(sampler->state)); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*state); - - return VK_SUCCESS; -} - -struct anv_state -anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - const void *data, uint32_t size, uint32_t alignment) -{ - struct anv_state state; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); - memcpy(state.map, data, size); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); - - return state; -} - -struct anv_state -anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - uint32_t *p; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - p = state.map; - for (uint32_t i = 0; i < dwords; i++) - p[i] = a[i] | b[i]; - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - - return state; -} - -/** - * @brief Setup the command buffer for recording commands inside the given - * subpass. - * - * This does not record all commands needed for starting the subpass. - * Starting the subpass may require additional commands. - * - * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer - * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the - * command buffer for recording commands for some subpass. But only the first - * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. - */ -void -anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - switch (cmd_buffer->device->info.gen) { - case 7: - gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - case 8: - gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - case 9: - gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - default: - unreachable("unsupported gen\n"); - } -} - -struct anv_state -anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage) -{ - struct anv_push_constants *data = - cmd_buffer->state.push_constants[stage]; - struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->prog_data[stage]; - - /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data->nr_params == 0) - return (struct anv_state) { .offset = 0 }; - - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - prog_data->nr_params * sizeof(float), - 32 /* bottom 5 bits MBZ */); - - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - -struct anv_state -anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_push_constants *data = - cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - const unsigned push_constant_data_size = - (local_id_dwords + prog_data->nr_params) * 4; - const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - const unsigned param_aligned_count = - reg_aligned_constant_size / sizeof(uint32_t); - - /* If we don't actually have any push constants, bail. */ - if (reg_aligned_constant_size == 0) - return (struct anv_state) { .offset = 0 }; - - const unsigned threads = pipeline->cs_thread_width_max; - const unsigned total_push_constants_size = - reg_aligned_constant_size * threads; - const unsigned push_constant_alignment = - cmd_buffer->device->info.gen < 8 ? 32 : 64; - const unsigned aligned_total_push_constants_size = - ALIGN(total_push_constants_size, push_constant_alignment); - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - aligned_total_push_constants_size, - push_constant_alignment); - - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; - - brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, - reg_aligned_constant_size); - - /* Setup uniform data for the first thread */ - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); - } - - /* Copy uniform data from the first thread to every other thread */ - const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); - for (unsigned t = 1; t < threads; t++) { - memcpy(&u32_map[t * param_aligned_count + local_id_dwords], - &u32_map[local_id_dwords], - uniform_data_size); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - -void anv_CmdPushConstants( - VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void* pValues) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - anv_foreach_stage(stage, stageFlags) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); - - memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, - pValues, size); - } - - cmd_buffer->state.push_constants_dirty |= stageFlags; -} - -void anv_CmdExecuteCommands( - VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer* pCmdBuffers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); - - assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - - assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - - anv_cmd_buffer_add_secondary(primary, secondary); - } -} - -VkResult anv_CreateCommandPool( - VkDevice _device, - const VkCommandPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkCommandPool* pCmdPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_pool *pool; - - pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - if (pAllocator) - pool->alloc = *pAllocator; - else - pool->alloc = device->alloc; - - list_inithead(&pool->cmd_buffers); - - *pCmdPool = anv_cmd_pool_to_handle(pool); - - return VK_SUCCESS; -} - -void anv_DestroyCommandPool( - VkDevice _device, - VkCommandPool commandPool, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - - anv_ResetCommandPool(_device, commandPool, 0); - - anv_free2(&device->alloc, pAllocator, pool); -} - -VkResult anv_ResetCommandPool( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - - /* FIXME: vkResetCommandPool must not destroy its command buffers. The - * Vulkan 1.0 spec requires that it only reset them: - * - * Resetting a command pool recycles all of the resources from all of - * the command buffers allocated from the command pool back to the - * command pool. All command buffers that have been allocated from the - * command pool are put in the initial state. - */ - list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - anv_cmd_buffer_destroy(cmd_buffer); - } - - return VK_SUCCESS; -} - -/** - * Return NULL if the current subpass has no depthstencil attachment. - */ -const struct anv_image_view * -anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - - if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) - return NULL; - - const struct anv_image_view *iview = - fb->attachments[subpass->depth_stencil_attachment]; - - assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - - return iview; -} diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c deleted file mode 100644 index 7a77336602a..00000000000 --- a/src/vulkan/anv_descriptor_set.c +++ /dev/null @@ -1,532 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/* - * Descriptor set layouts. - */ - -VkResult anv_CreateDescriptorSetLayout( - VkDevice _device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDescriptorSetLayout* pSetLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_descriptor_set_layout *set_layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - - uint32_t max_binding = 0; - uint32_t immutable_sampler_count = 0; - for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); - if (pCreateInfo->pBindings[j].pImmutableSamplers) - immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; - } - - size_t size = sizeof(struct anv_descriptor_set_layout) + - (max_binding + 1) * sizeof(set_layout->binding[0]) + - immutable_sampler_count * sizeof(struct anv_sampler *); - - set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set_layout) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* We just allocate all the samplers at the end of the struct */ - struct anv_sampler **samplers = - (struct anv_sampler **)&set_layout->binding[max_binding + 1]; - - set_layout->binding_count = max_binding + 1; - set_layout->shader_stages = 0; - set_layout->size = 0; - - for (uint32_t b = 0; b <= max_binding; b++) { - /* Initialize all binding_layout entries to -1 */ - memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); - - set_layout->binding[b].immutable_samplers = NULL; - } - - /* Initialize all samplers to 0 */ - memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); - - uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; - uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; - uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; - uint32_t buffer_count = 0; - uint32_t dynamic_offset_count = 0; - - for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; - uint32_t b = binding->binding; - - assert(binding->descriptorCount > 0); - set_layout->binding[b].array_size = binding->descriptorCount; - set_layout->binding[b].descriptor_index = set_layout->size; - set_layout->size += binding->descriptorCount; - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - anv_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; - sampler_count[s] += binding->descriptorCount; - } - break; - default: - break; - } - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - set_layout->binding[b].buffer_index = buffer_count; - buffer_count += binding->descriptorCount; - /* fall through */ - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - anv_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].surface_index = surface_count[s]; - surface_count[s] += binding->descriptorCount; - } - break; - default: - break; - } - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; - dynamic_offset_count += binding->descriptorCount; - break; - default: - break; - } - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - anv_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].image_index = image_count[s]; - image_count[s] += binding->descriptorCount; - } - break; - default: - break; - } - - if (binding->pImmutableSamplers) { - set_layout->binding[b].immutable_samplers = samplers; - samplers += binding->descriptorCount; - - for (uint32_t i = 0; i < binding->descriptorCount; i++) - set_layout->binding[b].immutable_samplers[i] = - anv_sampler_from_handle(binding->pImmutableSamplers[i]); - } else { - set_layout->binding[b].immutable_samplers = NULL; - } - - set_layout->shader_stages |= binding->stageFlags; - } - - set_layout->buffer_count = buffer_count; - set_layout->dynamic_offset_count = dynamic_offset_count; - - *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); - - return VK_SUCCESS; -} - -void anv_DestroyDescriptorSetLayout( - VkDevice _device, - VkDescriptorSetLayout _set_layout, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); - - anv_free2(&device->alloc, pAllocator, set_layout); -} - -/* - * Pipeline layouts. These have nothing to do with the pipeline. They are - * just muttiple descriptor set layouts pasted together - */ - -VkResult anv_CreatePipelineLayout( - VkDevice _device, - const VkPipelineLayoutCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineLayout* pPipelineLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_layout *layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - - layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (layout == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - layout->num_sets = pCreateInfo->setLayoutCount; - - unsigned dynamic_offset_count = 0; - - memset(layout->stage, 0, sizeof(layout->stage)); - for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, - pCreateInfo->pSetLayouts[set]); - layout->set[set].layout = set_layout; - - layout->set[set].dynamic_offset_start = dynamic_offset_count; - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index >= 0) - dynamic_offset_count += set_layout->binding[b].array_size; - for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { - if (set_layout->binding[b].stage[s].surface_index >= 0) - layout->stage[s].has_dynamic_offsets = true; - } - } - } - - *pPipelineLayout = anv_pipeline_layout_to_handle(layout); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineLayout( - VkDevice _device, - VkPipelineLayout _pipelineLayout, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); - - anv_free2(&device->alloc, pAllocator, pipeline_layout); -} - -/* - * Descriptor pools. These are a no-op for now. - */ - -VkResult anv_CreateDescriptorPool( - VkDevice device, - const VkDescriptorPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDescriptorPool* pDescriptorPool) -{ - anv_finishme("VkDescriptorPool is a stub"); - *pDescriptorPool = (VkDescriptorPool)1; - return VK_SUCCESS; -} - -void anv_DestroyDescriptorPool( - VkDevice _device, - VkDescriptorPool _pool, - const VkAllocationCallbacks* pAllocator) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); -} - -VkResult anv_ResetDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool, - VkDescriptorPoolResetFlags flags) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); - return VK_SUCCESS; -} - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set) -{ - struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); - - set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - - set->layout = layout; - - /* Go through and fill out immutable samplers if we have any */ - struct anv_descriptor *desc = set->descriptors; - for (uint32_t b = 0; b < layout->binding_count; b++) { - if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].sampler = layout->binding[b].immutable_samplers[i]; - } - desc += layout->binding[b].array_size; - } - - /* XXX: Use the pool */ - set->buffer_views = - anv_alloc(&device->alloc, - sizeof(set->buffer_views[0]) * layout->buffer_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set->buffer_views) { - anv_free(&device->alloc, set); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - for (uint32_t b = 0; b < layout->buffer_count; b++) { - set->buffer_views[b].surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - set->buffer_count = layout->buffer_count; - *out_set = set; - - return VK_SUCCESS; -} - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set) -{ - /* XXX: Use the pool */ - for (uint32_t b = 0; b < set->buffer_count; b++) - anv_state_pool_free(&device->surface_state_pool, - set->buffer_views[b].surface_state); - - anv_free(&device->alloc, set->buffer_views); - anv_free(&device->alloc, set); -} - -VkResult anv_AllocateDescriptorSets( - VkDevice _device, - const VkDescriptorSetAllocateInfo* pAllocateInfo, - VkDescriptorSet* pDescriptorSets) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - VkResult result = VK_SUCCESS; - struct anv_descriptor_set *set; - uint32_t i; - - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, - pAllocateInfo->pSetLayouts[i]); - - result = anv_descriptor_set_create(device, layout, &set); - if (result != VK_SUCCESS) - break; - - pDescriptorSets[i] = anv_descriptor_set_to_handle(set); - } - - if (result != VK_SUCCESS) - anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, - i, pDescriptorSets); - - return result; -} - -VkResult anv_FreeDescriptorSets( - VkDevice _device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet* pDescriptorSets) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - for (uint32_t i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - - anv_descriptor_set_destroy(device, set); - } - - return VK_SUCCESS; -} - -void anv_UpdateDescriptorSets( - VkDevice _device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet* pDescriptorCopies) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - for (uint32_t i = 0; i < descriptorWriteCount; i++) { - const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; - ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); - const struct anv_descriptor_set_binding_layout *bind_layout = - &set->layout->binding[write->dstBinding]; - struct anv_descriptor *desc = - &set->descriptors[bind_layout->descriptor_index]; - desc += write->dstArrayElement; - - switch (write->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_sampler, sampler, - write->pImageInfo[j].sampler); - - desc[j] = (struct anv_descriptor) { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .sampler = sampler, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pImageInfo[j].imageView); - ANV_FROM_HANDLE(anv_sampler, sampler, - write->pImageInfo[j].sampler); - - desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - desc[j].image_view = iview; - - /* If this descriptor has an immutable sampler, we don't want - * to stomp on it. - */ - if (sampler) - desc[j].sampler = sampler; - } - break; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pImageInfo[j].imageView); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .image_view = iview, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_buffer_view, bview, - write->pTexelBufferView[j]); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .buffer_view = bview, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - anv_finishme("input attachments not implemented"); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - assert(write->pBufferInfo[j].buffer); - ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); - assert(buffer); - - struct anv_buffer_view *view = - &set->buffer_views[bind_layout->buffer_index]; - view += write->dstArrayElement + j; - - const struct anv_format *format = - anv_format_for_descriptor_type(write->descriptorType); - - view->format = format->isl_format; - view->bo = buffer->bo; - view->offset = buffer->offset + write->pBufferInfo[j].offset; - - /* For buffers with dynamic offsets, we use the full possible - * range in the surface state and do the actual range-checking - * in the shader. - */ - if (bind_layout->dynamic_offset_index >= 0 || - write->pBufferInfo[j].range == VK_WHOLE_SIZE) - view->range = buffer->size - write->pBufferInfo[j].offset; - else - view->range = write->pBufferInfo[j].range; - - anv_fill_buffer_surface_state(device, view->surface_state, - view->format, - view->offset, view->range, 1); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .buffer_view = view, - }; - - } - - default: - break; - } - } - - for (uint32_t i = 0; i < descriptorCopyCount; i++) { - const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; - ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); - ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); - - const struct anv_descriptor_set_binding_layout *src_layout = - &src->layout->binding[copy->srcBinding]; - struct anv_descriptor *src_desc = - &src->descriptors[src_layout->descriptor_index]; - src_desc += copy->srcArrayElement; - - const struct anv_descriptor_set_binding_layout *dst_layout = - &dst->layout->binding[copy->dstBinding]; - struct anv_descriptor *dst_desc = - &dst->descriptors[dst_layout->descriptor_index]; - dst_desc += copy->dstArrayElement; - - for (uint32_t j = 0; j < copy->descriptorCount; j++) - dst_desc[j] = src_desc[j]; - } -} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c deleted file mode 100644 index a8835f74179..00000000000 --- a/src/vulkan/anv_device.c +++ /dev/null @@ -1,1789 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" -#include "mesa/main/git_sha1.h" -#include "util/strtod.h" -#include "util/debug.h" - -#include "genxml/gen7_pack.h" - -struct anv_dispatch_table dtable; - -static void -compiler_debug_log(void *data, const char *fmt, ...) -{ } - -static void -compiler_perf_log(void *data, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) - vfprintf(stderr, fmt, args); - - va_end(args); -} - -static VkResult -anv_physical_device_init(struct anv_physical_device *device, - struct anv_instance *instance, - const char *path) -{ - VkResult result; - int fd; - - fd = open(path, O_RDWR | O_CLOEXEC); - if (fd < 0) - return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to open %s: %m", path); - - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - device->instance = instance; - device->path = path; - - device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); - if (!device->chipset_id) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get chipset id: %m"); - goto fail; - } - - device->name = brw_get_device_name(device->chipset_id); - device->info = brw_get_device_info(device->chipset_id); - if (!device->info) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get device info"); - goto fail; - } - - if (device->info->is_haswell) { - fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); - } else if (device->info->gen == 7 && !device->info->is_baytrail) { - fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); - } else if (device->info->gen == 7 && device->info->is_baytrail) { - fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); - } else if (device->info->gen >= 8) { - /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully - * supported as anything */ - } else { - result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, - "Vulkan not yet supported on %s", device->name); - goto fail; - } - - if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get aperture size: %m"); - goto fail; - } - - if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "kernel missing gem wait"); - goto fail; - } - - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "kernel missing execbuf2"); - goto fail; - } - - if (!device->info->has_llc && - anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "kernel missing wc mmap"); - goto fail; - } - - bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); - - close(fd); - - brw_process_intel_debug_variable(); - - device->compiler = brw_compiler_create(NULL, device->info); - if (device->compiler == NULL) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - device->compiler->shader_debug_log = compiler_debug_log; - device->compiler->shader_perf_log = compiler_perf_log; - - /* XXX: Actually detect bit6 swizzling */ - isl_device_init(&device->isl_dev, device->info, swizzled); - - return VK_SUCCESS; - -fail: - close(fd); - return result; -} - -static void -anv_physical_device_finish(struct anv_physical_device *device) -{ - ralloc_free(device->compiler); -} - -static const VkExtensionProperties global_extensions[] = { - { - .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, - .specVersion = 25, - }, - { - .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, - .specVersion = 5, - }, -#ifdef HAVE_WAYLAND_PLATFORM - { - .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 4, - }, -#endif -}; - -static const VkExtensionProperties device_extensions[] = { - { - .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 67, - }, -}; - -static void * -default_alloc_func(void *pUserData, size_t size, size_t align, - VkSystemAllocationScope allocationScope) -{ - return malloc(size); -} - -static void * -default_realloc_func(void *pUserData, void *pOriginal, size_t size, - size_t align, VkSystemAllocationScope allocationScope) -{ - return realloc(pOriginal, size); -} - -static void -default_free_func(void *pUserData, void *pMemory) -{ - free(pMemory); -} - -static const VkAllocationCallbacks default_alloc = { - .pUserData = NULL, - .pfnAllocation = default_alloc_func, - .pfnReallocation = default_realloc_func, - .pfnFree = default_free_func, -}; - -VkResult anv_CreateInstance( - const VkInstanceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkInstance* pInstance) -{ - struct anv_instance *instance; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - - uint32_t client_version = pCreateInfo->pApplicationInfo ? - pCreateInfo->pApplicationInfo->apiVersion : - VK_MAKE_VERSION(1, 0, 0); - if (VK_MAKE_VERSION(1, 0, 0) > client_version || - client_version > VK_MAKE_VERSION(1, 0, 3)) { - return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, - "Client requested version %d.%d.%d", - VK_VERSION_MAJOR(client_version), - VK_VERSION_MINOR(client_version), - VK_VERSION_PATCH(client_version)); - } - - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - bool found = false; - for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { - if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - global_extensions[j].extensionName) == 0) { - found = true; - break; - } - } - if (!found) - return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); - } - - instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!instance) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - - if (pAllocator) - instance->alloc = *pAllocator; - else - instance->alloc = default_alloc; - - instance->apiVersion = client_version; - instance->physicalDeviceCount = -1; - - _mesa_locale_init(); - - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); - - anv_init_wsi(instance); - - *pInstance = anv_instance_to_handle(instance); - - return VK_SUCCESS; -} - -void anv_DestroyInstance( - VkInstance _instance, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - if (instance->physicalDeviceCount > 0) { - /* We support at most one physical device. */ - assert(instance->physicalDeviceCount == 1); - anv_physical_device_finish(&instance->physicalDevice); - } - - anv_finish_wsi(instance); - - VG(VALGRIND_DESTROY_MEMPOOL(instance)); - - _mesa_locale_fini(); - - anv_free(&instance->alloc, instance); -} - -VkResult anv_EnumeratePhysicalDevices( - VkInstance _instance, - uint32_t* pPhysicalDeviceCount, - VkPhysicalDevice* pPhysicalDevices) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - VkResult result; - - if (instance->physicalDeviceCount < 0) { - result = anv_physical_device_init(&instance->physicalDevice, - instance, "/dev/dri/renderD128"); - if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { - instance->physicalDeviceCount = 0; - } else if (result == VK_SUCCESS) { - instance->physicalDeviceCount = 1; - } else { - return result; - } - } - - /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; - * otherwise it's an inout parameter. - * - * The Vulkan spec (git aaed022) says: - * - * pPhysicalDeviceCount is a pointer to an unsigned integer variable - * that is initialized with the number of devices the application is - * prepared to receive handles to. pname:pPhysicalDevices is pointer to - * an array of at least this many VkPhysicalDevice handles [...]. - * - * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices - * overwrites the contents of the variable pointed to by - * pPhysicalDeviceCount with the number of physical devices in in the - * instance; otherwise, vkEnumeratePhysicalDevices overwrites - * pPhysicalDeviceCount with the number of physical handles written to - * pPhysicalDevices. - */ - if (!pPhysicalDevices) { - *pPhysicalDeviceCount = instance->physicalDeviceCount; - } else if (*pPhysicalDeviceCount >= 1) { - pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); - *pPhysicalDeviceCount = 1; - } else { - *pPhysicalDeviceCount = 0; - } - - return VK_SUCCESS; -} - -void anv_GetPhysicalDeviceFeatures( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures* pFeatures) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - - *pFeatures = (VkPhysicalDeviceFeatures) { - .robustBufferAccess = true, - .fullDrawIndexUint32 = true, - .imageCubeArray = false, - .independentBlend = pdevice->info->gen >= 8, - .geometryShader = true, - .tessellationShader = false, - .sampleRateShading = false, - .dualSrcBlend = true, - .logicOp = true, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = false, - .depthClamp = false, - .depthBiasClamp = false, - .fillModeNonSolid = true, - .depthBounds = false, - .wideLines = true, - .largePoints = true, - .alphaToOne = true, - .multiViewport = true, - .samplerAnisotropy = false, /* FINISHME */ - .textureCompressionETC2 = true, - .textureCompressionASTC_LDR = true, - .textureCompressionBC = true, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, - .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = true, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, - .shaderUniformBufferArrayDynamicIndexing = true, - .shaderSampledImageArrayDynamicIndexing = true, - .shaderStorageBufferArrayDynamicIndexing = true, - .shaderStorageImageArrayDynamicIndexing = true, - .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = true, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .alphaToOne = true, - .variableMultisampleRate = false, - .inheritedQueries = false, - }; -} - -void -anv_device_get_cache_uuid(void *uuid) -{ - memset(uuid, 0, VK_UUID_SIZE); - snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); -} - -void anv_GetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - const struct brw_device_info *devinfo = pdevice->info; - - anv_finishme("Get correct values for VkPhysicalDeviceLimits"); - - const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; - - VkSampleCountFlags sample_counts = - isl_device_get_sample_counts(&pdevice->isl_dev); - - VkPhysicalDeviceLimits limits = { - .maxImageDimension1D = (1 << 14), - .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 10), - .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 10), - .maxTexelBufferElements = 128 * 1024 * 1024, - .maxUniformBufferRange = UINT32_MAX, - .maxStorageBufferRange = UINT32_MAX, - .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, - .maxMemoryAllocationCount = UINT32_MAX, - .maxSamplerAllocationCount = 64 * 1024, - .bufferImageGranularity = 64, /* A cache line */ - .sparseAddressSpaceSize = 0, - .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = 64, - .maxPerStageDescriptorUniformBuffers = 64, - .maxPerStageDescriptorStorageBuffers = 64, - .maxPerStageDescriptorSampledImages = 64, - .maxPerStageDescriptorStorageImages = 64, - .maxPerStageDescriptorInputAttachments = 64, - .maxPerStageResources = 128, - .maxDescriptorSetSamplers = 256, - .maxDescriptorSetUniformBuffers = 256, - .maxDescriptorSetUniformBuffersDynamic = 256, - .maxDescriptorSetStorageBuffers = 256, - .maxDescriptorSetStorageBuffersDynamic = 256, - .maxDescriptorSetSampledImages = 256, - .maxDescriptorSetStorageImages = 256, - .maxDescriptorSetInputAttachments = 256, - .maxVertexInputAttributes = 32, - .maxVertexInputBindings = 32, - .maxVertexInputAttributeOffset = 2047, - .maxVertexInputBindingStride = 2048, - .maxVertexOutputComponents = 128, - .maxTessellationGenerationLevel = 0, - .maxTessellationPatchSize = 0, - .maxTessellationControlPerVertexInputComponents = 0, - .maxTessellationControlPerVertexOutputComponents = 0, - .maxTessellationControlPerPatchOutputComponents = 0, - .maxTessellationControlTotalOutputComponents = 0, - .maxTessellationEvaluationInputComponents = 0, - .maxTessellationEvaluationOutputComponents = 0, - .maxGeometryShaderInvocations = 32, - .maxGeometryInputComponents = 64, - .maxGeometryOutputComponents = 128, - .maxGeometryOutputVertices = 256, - .maxGeometryTotalOutputComponents = 1024, - .maxFragmentInputComponents = 128, - .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 2, - .maxFragmentCombinedOutputResources = 8, - .maxComputeSharedMemorySize = 32768, - .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, - .maxComputeWorkGroupSize = { - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - }, - .subPixelPrecisionBits = 4 /* FIXME */, - .subTexelPrecisionBits = 4 /* FIXME */, - .mipmapPrecisionBits = 4 /* FIXME */, - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = UINT32_MAX, - .maxSamplerLodBias = 16, - .maxSamplerAnisotropy = 16, - .maxViewports = MAX_VIEWPORTS, - .maxViewportDimensions = { (1 << 14), (1 << 14) }, - .viewportBoundsRange = { -16384.0, 16384.0 }, - .viewportSubPixelBits = 13, /* We take a float? */ - .minMemoryMapAlignment = 4096, /* A page */ - .minTexelBufferOffsetAlignment = 1, - .minUniformBufferOffsetAlignment = 1, - .minStorageBufferOffsetAlignment = 1, - .minTexelOffset = -8, - .maxTexelOffset = 7, - .minTexelGatherOffset = -8, - .maxTexelGatherOffset = 7, - .minInterpolationOffset = 0, /* FIXME */ - .maxInterpolationOffset = 0, /* FIXME */ - .subPixelInterpolationOffsetBits = 0, /* FIXME */ - .maxFramebufferWidth = (1 << 14), - .maxFramebufferHeight = (1 << 14), - .maxFramebufferLayers = (1 << 10), - .framebufferColorSampleCounts = sample_counts, - .framebufferDepthSampleCounts = sample_counts, - .framebufferStencilSampleCounts = sample_counts, - .framebufferNoAttachmentsSampleCounts = sample_counts, - .maxColorAttachments = MAX_RTS, - .sampledImageColorSampleCounts = sample_counts, - .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .sampledImageDepthSampleCounts = sample_counts, - .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = false, - .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), - .maxClipDistances = 0 /* FIXME */, - .maxCullDistances = 0 /* FIXME */, - .maxCombinedClipAndCullDistances = 0 /* FIXME */, - .discreteQueuePriorities = 1, - .pointSizeRange = { 0.125, 255.875 }, - .lineWidthRange = { 0.0, 7.9921875 }, - .pointSizeGranularity = (1.0 / 8.0), - .lineWidthGranularity = (1.0 / 128.0), - .strictLines = false, /* FINISHME */ - .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 128, - .optimalBufferCopyRowPitchAlignment = 128, - .nonCoherentAtomSize = 64, - }; - - *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 2), - .driverVersion = 1, - .vendorID = 0x8086, - .deviceID = pdevice->chipset_id, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - .limits = limits, - .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ - }; - - strcpy(pProperties->deviceName, pdevice->name); - anv_device_get_cache_uuid(pProperties->pipelineCacheUUID); -} - -void anv_GetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, - VkQueueFamilyProperties* pQueueFamilyProperties) -{ - if (pQueueFamilyProperties == NULL) { - *pCount = 1; - return; - } - - assert(*pCount >= 1); - - *pQueueFamilyProperties = (VkQueueFamilyProperties) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .timestampValidBits = 36, /* XXX: Real value here */ - .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, - }; -} - -void anv_GetPhysicalDeviceMemoryProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties* pMemoryProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkDeviceSize heap_size; - - /* Reserve some wiggle room for the driver by exposing only 75% of the - * aperture to the heap. - */ - heap_size = 3 * physical_device->aperture_size / 4; - - if (physical_device->info->has_llc) { - /* Big core GPUs share LLC with the CPU and thus one memory type can be - * both cached and coherent at the same time. - */ - pMemoryProperties->memoryTypeCount = 1; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - } else { - /* The spec requires that we expose a host-visible, coherent memory - * type, but Atom GPUs don't share LLC. Thus we offer two memory types - * to give the application a choice between cached, but not coherent and - * coherent but uncached (WC though). - */ - pMemoryProperties->memoryTypeCount = 2; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - pMemoryProperties->memoryTypes[1] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - } - - pMemoryProperties->memoryHeapCount = 1; - pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { - .size = heap_size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - }; -} - -PFN_vkVoidFunction anv_GetInstanceProcAddr( - VkInstance instance, - const char* pName) -{ - return anv_lookup_entrypoint(pName); -} - -/* The loader wants us to expose a second GetInstanceProcAddr function - * to work around certain LD_PRELOAD issues seen in apps. - */ -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( - VkInstance instance, - const char* pName); - -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( - VkInstance instance, - const char* pName) -{ - return anv_GetInstanceProcAddr(instance, pName); -} - -PFN_vkVoidFunction anv_GetDeviceProcAddr( - VkDevice device, - const char* pName) -{ - return anv_lookup_entrypoint(pName); -} - -static VkResult -anv_queue_init(struct anv_device *device, struct anv_queue *queue) -{ - queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - queue->device = device; - queue->pool = &device->surface_state_pool; - - return VK_SUCCESS; -} - -static void -anv_queue_finish(struct anv_queue *queue) -{ -} - -static struct anv_state -anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) -{ - struct anv_state state; - - state = anv_state_pool_alloc(pool, size, align); - memcpy(state.map, p, size); - - if (!pool->block_pool->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - -struct gen8_border_color { - union { - float float32[4]; - uint32_t uint32[4]; - }; - /* Pad out to 64 bytes */ - uint32_t _pad[12]; -}; - -static void -anv_device_init_border_colors(struct anv_device *device) -{ - static const struct gen8_border_color border_colors[] = { - [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, - [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, - [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, - [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, - }; - - device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, - sizeof(border_colors), 64, - border_colors); -} - -VkResult -anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch) -{ - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo bo; - VkResult result = VK_SUCCESS; - uint32_t size; - int64_t timeout; - int ret; - - /* Kernel driver requires 8 byte aligned batch length */ - size = align_u32(batch->next - batch->start, 8); - assert(size < device->batch_bo_pool.bo_size); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); - if (result != VK_SUCCESS) - return result; - - memcpy(bo.map, batch->start, size); - if (!device->info.has_llc) - anv_clflush_range(bo.map, size); - - exec2_objects[0].handle = bo.gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo.offset; - exec2_objects[0].flags = 0; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = 0; - execbuf.batch_len = size; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - ret = anv_gem_execbuffer(device, &execbuf); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); - goto fail; - } - - timeout = INT64_MAX; - ret = anv_gem_wait(device, bo.gem_handle, &timeout); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); - goto fail; - } - - fail: - anv_bo_pool_free(&device->batch_bo_pool, &bo); - - return result; -} - -VkResult anv_CreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDevice* pDevice) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkResult result; - struct anv_device *device; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - bool found = false; - for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { - if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - device_extensions[j].extensionName) == 0) { - found = true; - break; - } - } - if (!found) - return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); - } - - anv_set_dispatch_devinfo(physical_device->info); - - device = anv_alloc2(&physical_device->instance->alloc, pAllocator, - sizeof(*device), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - device->instance = physical_device->instance; - device->chipset_id = physical_device->chipset_id; - - if (pAllocator) - device->alloc = *pAllocator; - else - device->alloc = physical_device->instance->alloc; - - /* XXX(chadv): Can we dup() physicalDevice->fd here? */ - device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); - if (device->fd == -1) { - result = vk_error(VK_ERROR_INITIALIZATION_FAILED); - goto fail_device; - } - - device->context_id = anv_gem_create_context(device); - if (device->context_id == -1) { - result = vk_error(VK_ERROR_INITIALIZATION_FAILED); - goto fail_fd; - } - - device->info = *physical_device->info; - device->isl_dev = physical_device->isl_dev; - - pthread_mutex_init(&device->mutex, NULL); - - anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); - - anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); - - anv_state_pool_init(&device->dynamic_state_pool, - &device->dynamic_state_block_pool); - - anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); - anv_pipeline_cache_init(&device->default_pipeline_cache, device); - - anv_block_pool_init(&device->surface_state_block_pool, device, 4096); - - anv_state_pool_init(&device->surface_state_pool, - &device->surface_state_block_pool); - - anv_bo_init_new(&device->workaround_bo, device, 1024); - - anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); - - anv_queue_init(device, &device->queue); - - switch (device->info.gen) { - case 7: - if (!device->info.is_haswell) - result = gen7_init_device_state(device); - else - result = gen75_init_device_state(device); - break; - case 8: - result = gen8_init_device_state(device); - break; - case 9: - result = gen9_init_device_state(device); - break; - default: - /* Shouldn't get here as we don't create physical devices for any other - * gens. */ - unreachable("unhandled gen"); - } - if (result != VK_SUCCESS) - goto fail_fd; - - result = anv_device_init_meta(device); - if (result != VK_SUCCESS) - goto fail_fd; - - anv_device_init_border_colors(device); - - *pDevice = anv_device_to_handle(device); - - return VK_SUCCESS; - - fail_fd: - close(device->fd); - fail_device: - anv_free(&device->alloc, device); - - return result; -} - -void anv_DestroyDevice( - VkDevice _device, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_queue_finish(&device->queue); - - anv_device_finish_meta(device); - -#ifdef HAVE_VALGRIND - /* We only need to free these to prevent valgrind errors. The backing - * BO will go away in a couple of lines so we don't actually leak. - */ - anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); -#endif - - anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); - anv_gem_close(device, device->workaround_bo.gem_handle); - - anv_bo_pool_finish(&device->batch_bo_pool); - anv_state_pool_finish(&device->dynamic_state_pool); - anv_block_pool_finish(&device->dynamic_state_block_pool); - anv_block_pool_finish(&device->instruction_block_pool); - anv_state_pool_finish(&device->surface_state_pool); - anv_block_pool_finish(&device->surface_state_block_pool); - anv_block_pool_finish(&device->scratch_block_pool); - - close(device->fd); - - pthread_mutex_destroy(&device->mutex); - - anv_free(&device->alloc, device); -} - -VkResult anv_EnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(global_extensions); - return VK_SUCCESS; - } - - assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); - - *pPropertyCount = ARRAY_SIZE(global_extensions); - memcpy(pProperties, global_extensions, sizeof(global_extensions)); - - return VK_SUCCESS; -} - -VkResult anv_EnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(device_extensions); - return VK_SUCCESS; - } - - assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); - - *pPropertyCount = ARRAY_SIZE(device_extensions); - memcpy(pProperties, device_extensions, sizeof(device_extensions)); - - return VK_SUCCESS; -} - -VkResult anv_EnumerateInstanceLayerProperties( - uint32_t* pPropertyCount, - VkLayerProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_LAYER_NOT_PRESENT); -} - -VkResult anv_EnumerateDeviceLayerProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pPropertyCount, - VkLayerProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_LAYER_NOT_PRESENT); -} - -void anv_GetDeviceQueue( - VkDevice _device, - uint32_t queueNodeIndex, - uint32_t queueIndex, - VkQueue* pQueue) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(queueIndex == 0); - - *pQueue = anv_queue_to_handle(&device->queue); -} - -VkResult anv_QueueSubmit( - VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo* pSubmits, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - struct anv_device *device = queue->device; - int ret; - - for (uint32_t i = 0; i < submitCount; i++) { - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, - pSubmits[i].pCommandBuffers[j]); - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - - for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) - cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; - } - } - - if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - } - - return VK_SUCCESS; -} - -VkResult anv_QueueWaitIdle( - VkQueue _queue) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - - return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); -} - -VkResult anv_DeviceWaitIdle( - VkDevice _device) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_batch batch; - - uint32_t cmds[8]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN7_MI_NOOP); - - return anv_device_submit_simple_batch(device, &batch); -} - -VkResult -anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) -{ - bo->gem_handle = anv_gem_create(device, size); - if (!bo->gem_handle) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - - bo->map = NULL; - bo->index = 0; - bo->offset = 0; - bo->size = size; - bo->is_winsys_bo = false; - - return VK_SUCCESS; -} - -VkResult anv_AllocateMemory( - VkDevice _device, - const VkMemoryAllocateInfo* pAllocateInfo, - const VkAllocationCallbacks* pAllocator, - VkDeviceMemory* pMem) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_device_memory *mem; - VkResult result; - - assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); - - if (pAllocateInfo->allocationSize == 0) { - /* Apparently, this is allowed */ - *pMem = VK_NULL_HANDLE; - return VK_SUCCESS; - } - - /* We support exactly one memory heap. */ - assert(pAllocateInfo->memoryTypeIndex == 0 || - (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); - - /* FINISHME: Fail if allocation request exceeds heap size. */ - - mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (mem == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* The kernel is going to give us whole pages anyway */ - uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); - - result = anv_bo_init_new(&mem->bo, device, alloc_size); - if (result != VK_SUCCESS) - goto fail; - - mem->type_index = pAllocateInfo->memoryTypeIndex; - - *pMem = anv_device_memory_to_handle(mem); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, mem); - - return result; -} - -void anv_FreeMemory( - VkDevice _device, - VkDeviceMemory _mem, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - - if (mem == NULL) - return; - - if (mem->bo.map) - anv_gem_munmap(mem->bo.map, mem->bo.size); - - if (mem->bo.gem_handle != 0) - anv_gem_close(device, mem->bo.gem_handle); - - anv_free2(&device->alloc, pAllocator, mem); -} - -VkResult anv_MapMemory( - VkDevice _device, - VkDeviceMemory _memory, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void** ppData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - - if (mem == NULL) { - *ppData = NULL; - return VK_SUCCESS; - } - - if (size == VK_WHOLE_SIZE) - size = mem->bo.size - offset; - - /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only - * takes a VkDeviceMemory pointer, it seems like only one map of the memory - * at a time is valid. We could just mmap up front and return an offset - * pointer here, but that may exhaust virtual memory on 32 bit - * userspace. */ - - uint32_t gem_flags = 0; - if (!device->info.has_llc && mem->type_index == 0) - gem_flags |= I915_MMAP_WC; - - /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ - uint64_t map_offset = offset & ~4095ull; - assert(offset >= map_offset); - uint64_t map_size = (offset + size) - map_offset; - - /* Let's map whole pages */ - map_size = align_u64(map_size, 4096); - - mem->map = anv_gem_mmap(device, mem->bo.gem_handle, - map_offset, map_size, gem_flags); - mem->map_size = map_size; - - *ppData = mem->map + (offset - map_offset); - - return VK_SUCCESS; -} - -void anv_UnmapMemory( - VkDevice _device, - VkDeviceMemory _memory) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - - if (mem == NULL) - return; - - anv_gem_munmap(mem->map, mem->map_size); -} - -static void -clflush_mapped_ranges(struct anv_device *device, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - for (uint32_t i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); - void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); - void *end; - - if (ranges[i].offset + ranges[i].size > mem->map_size) - end = mem->map + mem->map_size; - else - end = mem->map + ranges[i].offset + ranges[i].size; - - while (p < end) { - __builtin_ia32_clflush(p); - p += CACHELINE_SIZE; - } - } -} - -VkResult anv_FlushMappedMemoryRanges( - VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange* pMemoryRanges) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - if (device->info.has_llc) - return VK_SUCCESS; - - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - - clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); - - return VK_SUCCESS; -} - -VkResult anv_InvalidateMappedMemoryRanges( - VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange* pMemoryRanges) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - if (device->info.has_llc) - return VK_SUCCESS; - - clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); - - /* Make sure no reads get moved up above the invalidate. */ - __builtin_ia32_mfence(); - - return VK_SUCCESS; -} - -void anv_GetBufferMemoryRequirements( - VkDevice device, - VkBuffer _buffer, - VkMemoryRequirements* pMemoryRequirements) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - /* The Vulkan spec (git aaed022) says: - * - * memoryTypeBits is a bitfield and contains one bit set for every - * supported memory type for the resource. The bit `1<memoryTypeBits = 1; - - pMemoryRequirements->size = buffer->size; - pMemoryRequirements->alignment = 16; -} - -void anv_GetImageMemoryRequirements( - VkDevice device, - VkImage _image, - VkMemoryRequirements* pMemoryRequirements) -{ - ANV_FROM_HANDLE(anv_image, image, _image); - - /* The Vulkan spec (git aaed022) says: - * - * memoryTypeBits is a bitfield and contains one bit set for every - * supported memory type for the resource. The bit `1<memoryTypeBits = 1; - - pMemoryRequirements->size = image->size; - pMemoryRequirements->alignment = image->alignment; -} - -void anv_GetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t* pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements* pSparseMemoryRequirements) -{ - stub(); -} - -void anv_GetDeviceMemoryCommitment( - VkDevice device, - VkDeviceMemory memory, - VkDeviceSize* pCommittedMemoryInBytes) -{ - *pCommittedMemoryInBytes = 0; -} - -VkResult anv_BindBufferMemory( - VkDevice device, - VkBuffer _buffer, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - if (mem) { - buffer->bo = &mem->bo; - buffer->offset = memoryOffset; - } else { - buffer->bo = NULL; - buffer->offset = 0; - } - - return VK_SUCCESS; -} - -VkResult anv_BindImageMemory( - VkDevice device, - VkImage _image, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - ANV_FROM_HANDLE(anv_image, image, _image); - - if (mem) { - image->bo = &mem->bo; - image->offset = memoryOffset; - } else { - image->bo = NULL; - image->offset = 0; - } - - return VK_SUCCESS; -} - -VkResult anv_QueueBindSparse( - VkQueue queue, - uint32_t bindInfoCount, - const VkBindSparseInfo* pBindInfo, - VkFence fence) -{ - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); -} - -VkResult anv_CreateFence( - VkDevice _device, - const VkFenceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFence* pFence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_fence *fence; - struct anv_batch batch; - VkResult result; - - const uint32_t fence_size = 128; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - - fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (fence == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_init_new(&fence->bo, device, fence_size); - if (result != VK_SUCCESS) - goto fail; - - fence->bo.map = - anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); - batch.next = batch.start = fence->bo.map; - batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN7_MI_NOOP); - - if (!device->info.has_llc) { - assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); - assert(batch.next - fence->bo.map <= CACHELINE_SIZE); - __builtin_ia32_mfence(); - __builtin_ia32_clflush(fence->bo.map); - } - - fence->exec2_objects[0].handle = fence->bo.gem_handle; - fence->exec2_objects[0].relocation_count = 0; - fence->exec2_objects[0].relocs_ptr = 0; - fence->exec2_objects[0].alignment = 0; - fence->exec2_objects[0].offset = fence->bo.offset; - fence->exec2_objects[0].flags = 0; - fence->exec2_objects[0].rsvd1 = 0; - fence->exec2_objects[0].rsvd2 = 0; - - fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; - fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = 0; - fence->execbuf.batch_len = batch.next - fence->bo.map; - fence->execbuf.cliprects_ptr = 0; - fence->execbuf.num_cliprects = 0; - fence->execbuf.DR1 = 0; - fence->execbuf.DR4 = 0; - - fence->execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - fence->execbuf.rsvd1 = device->context_id; - fence->execbuf.rsvd2 = 0; - - fence->ready = false; - - *pFence = anv_fence_to_handle(fence); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, fence); - - return result; -} - -void anv_DestroyFence( - VkDevice _device, - VkFence _fence, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_free2(&device->alloc, pAllocator, fence); -} - -VkResult anv_ResetFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences) -{ - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->ready = false; - } - - return VK_SUCCESS; -} - -VkResult anv_GetFenceStatus( - VkDevice _device, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - int64_t t = 0; - int ret; - - if (fence->ready) - return VK_SUCCESS; - - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == 0) { - fence->ready = true; - return VK_SUCCESS; - } - - return VK_NOT_READY; -} - -VkResult anv_WaitForFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t timeout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed - * to block indefinitely timeouts <= 0. Unfortunately, this was broken - * for a couple of kernel releases. Since there's no way to know - * whether or not the kernel we're using is one of the broken ones, the - * best we can do is to clamp the timeout to INT64_MAX. This limits the - * maximum timeout from 584 years to 292 years - likely not a big deal. - */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - int64_t t = timeout; - - /* FIXME: handle !waitAll */ - - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) { - return VK_TIMEOUT; - } else if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem wait failed: %m"); - } - } - - return VK_SUCCESS; -} - -// Queue semaphore functions - -VkResult anv_CreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSemaphore* pSemaphore) -{ - /* The DRM execbuffer ioctl always execute in-oder, even between different - * rings. As such, there's nothing to do for the user space semaphore. - */ - - *pSemaphore = (VkSemaphore)1; - - return VK_SUCCESS; -} - -void anv_DestroySemaphore( - VkDevice device, - VkSemaphore semaphore, - const VkAllocationCallbacks* pAllocator) -{ -} - -// Event functions - -VkResult anv_CreateEvent( - VkDevice _device, - const VkEventCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkEvent* pEvent) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_state state; - struct anv_event *event; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); - - state = anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(*event), 8); - event = state.map; - event->state = state; - event->semaphore = VK_EVENT_RESET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } - - *pEvent = anv_event_to_handle(event); - - return VK_SUCCESS; -} - -void anv_DestroyEvent( - VkDevice _device, - VkEvent _event, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_state_pool_free(&device->dynamic_state_pool, event->state); -} - -VkResult anv_GetEventStatus( - VkDevice _device, - VkEvent _event) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - if (!device->info.has_llc) { - /* Invalidate read cache before reading event written by GPU. */ - __builtin_ia32_clflush(event); - __builtin_ia32_mfence(); - - } - - return event->semaphore; -} - -VkResult anv_SetEvent( - VkDevice _device, - VkEvent _event) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - event->semaphore = VK_EVENT_SET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } - - return VK_SUCCESS; -} - -VkResult anv_ResetEvent( - VkDevice _device, - VkEvent _event) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - event->semaphore = VK_EVENT_RESET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } - - return VK_SUCCESS; -} - -// Buffer functions - -VkResult anv_CreateBuffer( - VkDevice _device, - const VkBufferCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkBuffer* pBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer *buffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); - - buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - buffer->size = pCreateInfo->size; - buffer->usage = pCreateInfo->usage; - buffer->bo = NULL; - buffer->offset = 0; - - *pBuffer = anv_buffer_to_handle(buffer); - - return VK_SUCCESS; -} - -void anv_DestroyBuffer( - VkDevice _device, - VkBuffer _buffer, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - anv_free2(&device->alloc, pAllocator, buffer); -} - -void -anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, - enum isl_format format, - uint32_t offset, uint32_t range, uint32_t stride) -{ - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_buffer_surface_state(state.map, format, offset, range, - stride); - else - gen7_fill_buffer_surface_state(state.map, format, offset, range, - stride); - break; - case 8: - gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - case 9: - gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - default: - unreachable("unsupported gen\n"); - } - - if (!device->info.has_llc) - anv_state_clflush(state); -} - -void anv_DestroySampler( - VkDevice _device, - VkSampler _sampler, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); - - anv_free2(&device->alloc, pAllocator, sampler); -} - -VkResult anv_CreateFramebuffer( - VkDevice _device, - const VkFramebufferCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFramebuffer* pFramebuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_framebuffer *framebuffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); - - size_t size = sizeof(*framebuffer) + - sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; - framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (framebuffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - framebuffer->attachment_count = pCreateInfo->attachmentCount; - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - VkImageView _iview = pCreateInfo->pAttachments[i]; - framebuffer->attachments[i] = anv_image_view_from_handle(_iview); - } - - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; - - *pFramebuffer = anv_framebuffer_to_handle(framebuffer); - - return VK_SUCCESS; -} - -void anv_DestroyFramebuffer( - VkDevice _device, - VkFramebuffer _fb, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); - - anv_free2(&device->alloc, pAllocator, fb); -} - -void vkCmdDbgMarkerBegin( - VkCommandBuffer commandBuffer, - const char* pMarker) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerEnd( - VkCommandBuffer commandBuffer) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerBegin( - VkCommandBuffer commandBuffer, - const char* pMarker) -{ -} - -void vkCmdDbgMarkerEnd( - VkCommandBuffer commandBuffer) -{ -} diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c deleted file mode 100644 index b7fa28be787..00000000000 --- a/src/vulkan/anv_dump.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -/* This file contains utility functions for help debugging. They can be - * called from GDB or similar to help inspect images and buffers. - */ - -void -anv_dump_image_to_ppm(struct anv_device *device, - struct anv_image *image, unsigned miplevel, - unsigned array_layer, const char *filename) -{ - VkDevice vk_device = anv_device_to_handle(device); - VkResult result; - - VkExtent2D extent = { image->extent.width, image->extent.height }; - for (unsigned i = 0; i < miplevel; i++) { - extent.width = MAX2(1, extent.width / 2); - extent.height = MAX2(1, extent.height / 2); - } - - VkImage copy_image; - result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .extent = (VkExtent3D) { extent.width, extent.height, 1 }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, - .flags = 0, - }, NULL, ©_image); - assert(result == VK_SUCCESS); - - VkMemoryRequirements reqs; - anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); - - VkDeviceMemory memory; - result = anv_AllocateMemory(vk_device, - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = reqs.size, - .memoryTypeIndex = 0, - }, NULL, &memory); - assert(result == VK_SUCCESS); - - result = anv_BindImageMemory(vk_device, copy_image, memory, 0); - assert(result == VK_SUCCESS); - - VkCommandPool commandPool; - result = anv_CreateCommandPool(vk_device, - &(VkCommandPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .queueFamilyIndex = 0, - .flags = 0, - }, NULL, &commandPool); - assert(result == VK_SUCCESS); - - VkCommandBuffer cmd; - result = anv_AllocateCommandBuffers(vk_device, - &(VkCommandBufferAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandPool = commandPool, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, - }, &cmd); - assert(result == VK_SUCCESS); - - result = anv_BeginCommandBuffer(cmd, - &(VkCommandBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }); - assert(result == VK_SUCCESS); - - anv_CmdBlitImage(cmd, - anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, - copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, - &(VkImageBlit) { - .srcSubresource = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = miplevel, - .baseArrayLayer = array_layer, - .layerCount = 1, - }, - .srcOffsets = { - { 0, 0, 0 }, - { extent.width, extent.height, 1 }, - }, - .dstSubresource = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .dstOffsets = { - { 0, 0, 0 }, - { extent.width, extent.height, 1 }, - }, - }, VK_FILTER_NEAREST); - - ANV_CALL(CmdPipelineBarrier)(cmd, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - true, 0, NULL, 0, NULL, 1, - &(VkImageMemoryBarrier) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_HOST_READ_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = 0, - .dstQueueFamilyIndex = 0, - .image = copy_image, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); - - result = anv_EndCommandBuffer(cmd); - assert(result == VK_SUCCESS); - - VkFence fence; - result = anv_CreateFence(vk_device, - &(VkFenceCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .flags = 0, - }, NULL, &fence); - assert(result == VK_SUCCESS); - - result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, - &(VkSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &cmd, - }, fence); - assert(result == VK_SUCCESS); - - result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); - assert(result == VK_SUCCESS); - - anv_DestroyFence(vk_device, fence, NULL); - anv_DestroyCommandPool(vk_device, commandPool, NULL); - - uint8_t *map; - result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); - assert(result == VK_SUCCESS); - - VkSubresourceLayout layout; - anv_GetImageSubresourceLayout(vk_device, copy_image, - &(VkImageSubresource) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .arrayLayer = 0, - }, &layout); - - map += layout.offset; - - /* Now we can finally write the PPM file */ - FILE *file = fopen(filename, "wb"); - assert(file); - - fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); - for (unsigned y = 0; y < extent.height; y++) { - uint8_t row[extent.width * 3]; - for (unsigned x = 0; x < extent.width; x++) { - row[x * 3 + 0] = map[x * 4 + 0]; - row[x * 3 + 1] = map[x * 4 + 1]; - row[x * 3 + 2] = map[x * 4 + 2]; - } - fwrite(row, 3, extent.width, file); - - map += layout.rowPitch; - } - fclose(file); - - anv_UnmapMemory(vk_device, memory); - anv_DestroyImage(vk_device, copy_image, NULL); - anv_FreeMemory(vk_device, memory, NULL); -} diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py deleted file mode 100644 index 1e4cfcb1755..00000000000 --- a/src/vulkan/anv_entrypoints_gen.py +++ /dev/null @@ -1,324 +0,0 @@ -# coding=utf-8 -# -# Copyright © 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# - -import fileinput, re, sys - -# Each function typedef in the vulkan.h header is all on one line and matches -# this regepx. We hope that won't change. - -p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') - -entrypoints = [] - -# We generate a static hash table for entry point lookup -# (vkGetProcAddress). We use a linear congruential generator for our hash -# function and a power-of-two size table. The prime numbers are determined -# experimentally. - -none = 0xffff -hash_size = 256 -u32_mask = 2**32 - 1 -hash_mask = hash_size - 1 - -prime_factor = 5024183 -prime_step = 19 - -def hash(name): - h = 0; - for c in name: - h = (h * prime_factor + ord(c)) & u32_mask - - return h - -opt_header = False -opt_code = False - -if (sys.argv[1] == "header"): - opt_header = True - sys.argv.pop() -elif (sys.argv[1] == "code"): - opt_code = True - sys.argv.pop() - -# Parse the entry points in the header - -i = 0 -for line in fileinput.input(): - m = p.match(line) - if (m): - if m.group(2) == 'VoidFunction': - continue - fullname = "vk" + m.group(2) - h = hash(fullname) - entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) - i = i + 1 - -# For outputting entrypoints.h we generate a anv_EntryPoint() prototype -# per entry point. - -if opt_header: - print "/* This file generated from vk_gen.py, don't edit directly. */\n" - - print "struct anv_dispatch_table {" - print " union {" - print " void *entrypoints[%d];" % len(entrypoints) - print " struct {" - - for type, name, args, num, h in entrypoints: - print " %s (*%s)%s;" % (type, name, args) - print " };\n" - print " };\n" - print "};\n" - - print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" - - for type, name, args, num, h in entrypoints: - print "%s anv_%s%s;" % (type, name, args) - print "%s gen7_%s%s;" % (type, name, args) - print "%s gen75_%s%s;" % (type, name, args) - print "%s gen8_%s%s;" % (type, name, args) - print "%s gen9_%s%s;" % (type, name, args) - print "%s anv_validate_%s%s;" % (type, name, args) - exit() - - - -print """/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* DO NOT EDIT! This is a generated file. */ - -#include "anv_private.h" - -struct anv_entrypoint { - uint32_t name; - uint32_t hash; -}; - -/* We use a big string constant to avoid lots of reloctions from the entry - * point table to lots of little strings. The entries in the entry point table - * store the index into this big string. - */ - -static const char strings[] =""" - -offsets = [] -i = 0; -for type, name, args, num, h in entrypoints: - print " \"vk%s\\0\"" % name - offsets.append(i) - i += 2 + len(name) + 1 -print """ ; - -/* Weak aliases for all potential validate functions. These will resolve to - * NULL if they're not defined, which lets the resolve_entrypoint() function - * either pick a validate wrapper if available or just plug in the actual - * entry point. - */ -""" - -# Now generate the table of all entry points and their validation functions - -print "\nstatic const struct anv_entrypoint entrypoints[] = {" -for type, name, args, num, h in entrypoints: - print " { %5d, 0x%08x }," % (offsets[num], h) -print "};\n" - -for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: - for type, name, args, num, h in entrypoints: - print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) - print "\nconst struct anv_dispatch_table %s_layer = {" % layer - for type, name, args, num, h in entrypoints: - print " .%s = %s_%s," % (name, layer, name) - print "};\n" - -print """ -#ifdef DEBUG -static bool enable_validate = true; -#else -static bool enable_validate = false; -#endif - -/* We can't use symbols that need resolving (like, oh, getenv) in the resolve - * function. This means that we have to determine whether or not to use the - * validation layer sometime before that. The constructor function attribute asks - * the dynamic linker to invoke determine_validate() at dlopen() time which - * works. - */ -static void __attribute__ ((constructor)) -determine_validate(void) -{ - const char *s = getenv("ANV_VALIDATE"); - - if (s) - enable_validate = atoi(s); -} - -static const struct brw_device_info *dispatch_devinfo; - -void -anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) -{ - dispatch_devinfo = devinfo; -} - -void * __attribute__ ((noinline)) -anv_resolve_entrypoint(uint32_t index) -{ - if (enable_validate && validate_layer.entrypoints[index]) - return validate_layer.entrypoints[index]; - - if (dispatch_devinfo == NULL) { - assert(anv_layer.entrypoints[index]); - return anv_layer.entrypoints[index]; - } - - switch (dispatch_devinfo->gen) { - case 9: - if (gen9_layer.entrypoints[index]) - return gen9_layer.entrypoints[index]; - /* fall through */ - case 8: - if (gen8_layer.entrypoints[index]) - return gen8_layer.entrypoints[index]; - /* fall through */ - case 7: - if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) - return gen75_layer.entrypoints[index]; - - if (gen7_layer.entrypoints[index]) - return gen7_layer.entrypoints[index]; - /* fall through */ - case 0: - return anv_layer.entrypoints[index]; - default: - unreachable("unsupported gen\\n"); - } -} -""" - -# Now output ifuncs and their resolve helpers for all entry points. The -# resolve helper calls resolve_entrypoint() with the entry point index, which -# lets the resolver look it up in the table. - -for type, name, args, num, h in entrypoints: - print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) - print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) - - -# Now generate the hash table used for entry point look up. This is a -# uint16_t table of entry point indices. We use 0xffff to indicate an entry -# in the hash table is empty. - -map = [none for f in xrange(hash_size)] -collisions = [0 for f in xrange(10)] -for type, name, args, num, h in entrypoints: - level = 0 - while map[h & hash_mask] != none: - h = h + prime_step - level = level + 1 - if level > 9: - collisions[9] += 1 - else: - collisions[level] += 1 - map[h & hash_mask] = num - -print "/* Hash table stats:" -print " * size %d entries" % hash_size -print " * collisions entries" -for i in xrange(10): - if (i == 9): - plus = "+" - else: - plus = " " - - print " * %2d%s %4d" % (i, plus, collisions[i]) -print " */\n" - -print "#define none 0x%04x\n" % none - -print "static const uint16_t map[] = {" -for i in xrange(0, hash_size, 8): - print " ", - for j in xrange(i, i + 8): - if map[j] & 0xffff == 0xffff: - print " none,", - else: - print "0x%04x," % (map[j] & 0xffff), - print - -print "};" - -# Finally we generate the hash table lookup function. The hash function and -# linear probing algorithm matches the hash table generated above. - -print """ -void * -anv_lookup_entrypoint(const char *name) -{ - static const uint32_t prime_factor = %d; - static const uint32_t prime_step = %d; - const struct anv_entrypoint *e; - uint32_t hash, h, i; - const char *p; - - hash = 0; - for (p = name; *p; p++) - hash = hash * prime_factor + *p; - - h = hash; - do { - i = map[h & %d]; - if (i == none) - return NULL; - e = &entrypoints[i]; - h += prime_step; - } while (e->hash != hash); - - if (strcmp(name, strings + e->name) != 0) - return NULL; - - return anv_resolve_entrypoint(i); -} -""" % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c deleted file mode 100644 index 7798a7bbde3..00000000000 --- a/src/vulkan/anv_formats.c +++ /dev/null @@ -1,603 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" -#include "brw_surface_formats.h" - -#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) -#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) - -#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ - [__vk_fmt] = { \ - .vk_format = __vk_fmt, \ - .name = #__vk_fmt, \ - .isl_format = __hw_fmt, \ - .isl_layout = &isl_format_layouts[__hw_fmt], \ - .swizzle = __swizzle, \ - __VA_ARGS__ \ - } - -#define fmt(__vk_fmt, __hw_fmt, ...) \ - swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) - -/* HINT: For array formats, the ISL name should match the VK name. For - * packed formats, they should have the channels in reverse order from each - * other. The reason for this is that, for packed formats, the ISL (and - * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. - */ -static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), - fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), - swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), - fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), - swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), - fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), - fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), - fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), - fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), - fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), - fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), - fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), - fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), - fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), - fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), - fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), - fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), - fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), - fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), - fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), - fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), - fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), - fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), - fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), - fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), - fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), - fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), - fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), - fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), - fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), - fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), - fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), - fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), - fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), - fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), - fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), - fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), - fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), - fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), - fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), - fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), - fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), - fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), - fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), - fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), - fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), - fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), - fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), - fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), - fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), - fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), - fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), - fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), - fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), - fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), - fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), - fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), - fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), - fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), - fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), - fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), - fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), - fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), - fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), - fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), - fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), - fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), - fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), - fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), - fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), - fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), - fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), - fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), - fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), - fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), - fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), - fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), - fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), - fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), - fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), - fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), - fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), - fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), - fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), - fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), - fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), - fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), - fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), - fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), - fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), - fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), - fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), - fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), - fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), - fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), - fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), - - fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true), - fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), - fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), - fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), - - fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), - fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), - fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), - fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), - fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), - fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), - fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), - fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), - fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), - fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), - fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), - fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), - fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), - fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), - fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), - fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), - fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), - fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), - fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), - fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), - fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), - fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), -}; - -#undef fmt - -const struct anv_format * -anv_format_for_vk_format(VkFormat format) -{ - return &anv_formats[format]; -} - -/** - * Exactly one bit must be set in \a aspect. - */ -enum isl_format -anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, - VkImageTiling tiling, struct anv_format_swizzle *swizzle) -{ - const struct anv_format *anv_fmt = &anv_formats[format]; - - if (swizzle) - *swizzle = anv_fmt->swizzle; - - switch (aspect) { - case VK_IMAGE_ASPECT_COLOR_BIT: - if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { - return ISL_FORMAT_UNSUPPORTED; - } else if (tiling == VK_IMAGE_TILING_OPTIMAL && - !util_is_power_of_two(anv_fmt->isl_layout->bs)) { - /* Tiled formats *must* be power-of-two because we need up upload - * them with the render pipeline. For 3-channel formats, we fix - * this by switching them over to RGBX or RGBA formats under the - * hood. - */ - enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); - if (rgbx != ISL_FORMAT_UNSUPPORTED) - return rgbx; - else - return isl_format_rgb_to_rgba(anv_fmt->isl_format); - } else { - return anv_fmt->isl_format; - } - - case VK_IMAGE_ASPECT_DEPTH_BIT: - case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): - assert(anv_fmt->has_depth); - return anv_fmt->isl_format; - - case VK_IMAGE_ASPECT_STENCIL_BIT: - assert(anv_fmt->has_stencil); - return ISL_FORMAT_R8_UINT; - - default: - unreachable("bad VkImageAspect"); - return ISL_FORMAT_UNSUPPORTED; - } -} - -// Format capabilities - -void anv_validate_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat _format, - VkFormatProperties* pFormatProperties) -{ - const struct anv_format *format = anv_format_for_vk_format(_format); - fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); - anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); -} - -static VkFormatFeatureFlags -get_image_format_properties(int gen, enum isl_format base, - enum isl_format actual, - struct anv_format_swizzle swizzle) -{ - const struct brw_surface_format_info *info = &surface_formats[actual]; - - if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) - return 0; - - VkFormatFeatureFlags flags = 0; - if (info->sampling <= gen) { - flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_BLIT_SRC_BIT; - - if (info->filtering <= gen) - flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - } - - /* We can render to swizzled formats. However, if the alpha channel is - * moved, then blending won't work correctly. The PRM tells us - * straight-up not to render to such a surface. - */ - if (info->render_target <= gen && swizzle.a == 3) { - flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - - if (info->alpha_blend <= gen && swizzle.a == 3) - flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - - /* Load/store is determined based on base format. This prevents RGB - * formats from showing up as load/store capable. - */ - if (isl_is_storage_image_format(base)) - flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; - - if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) - flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; - - return flags; -} - -static VkFormatFeatureFlags -get_buffer_format_properties(int gen, enum isl_format format) -{ - const struct brw_surface_format_info *info = &surface_formats[format]; - - if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) - return 0; - - VkFormatFeatureFlags flags = 0; - if (info->sampling <= gen && !isl_format_is_compressed(format)) - flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - - if (info->input_vb <= gen) - flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - - if (isl_is_storage_image_format(format)) - flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; - - if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) - flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; - - return flags; -} - -static void -anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, - VkFormat format, - VkFormatProperties *out_properties) -{ - int gen = physical_device->info->gen * 10; - if (physical_device->info->is_haswell) - gen += 5; - - VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; - if (anv_format_is_depth_or_stencil(&anv_formats[format])) { - tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - if (physical_device->info->gen >= 8) { - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; - } - if (anv_formats[format].has_depth) { - tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - } else { - enum isl_format linear_fmt, tiled_fmt; - struct anv_format_swizzle linear_swizzle, tiled_swizzle; - linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR, &linear_swizzle); - tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); - - linear = get_image_format_properties(gen, linear_fmt, linear_fmt, - linear_swizzle); - tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, - tiled_swizzle); - buffer = get_buffer_format_properties(gen, linear_fmt); - - /* XXX: We handle 3-channel formats by switching them out for RGBX or - * RGBA formats behind-the-scenes. This works fine for textures - * because the upload process will fill in the extra channel. - * We could also support it for render targets, but it will take - * substantially more work and we have enough RGBX formats to handle - * what most clients will want. - */ - if (linear_fmt != ISL_FORMAT_UNSUPPORTED && - !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && - isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { - tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & - ~VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - } - - out_properties->linearTilingFeatures = linear; - out_properties->optimalTilingFeatures = tiled; - out_properties->bufferFeatures = buffer; - - return; -} - - -void anv_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties* pFormatProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - anv_physical_device_get_format_properties( - physical_device, - format, - pFormatProperties); -} - -VkResult anv_GetPhysicalDeviceImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageCreateFlags createFlags, - VkImageFormatProperties* pImageFormatProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkFormatProperties format_props; - VkFormatFeatureFlags format_feature_flags; - VkExtent3D maxExtent; - uint32_t maxMipLevels; - uint32_t maxArraySize; - VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; - - anv_physical_device_get_format_properties(physical_device, format, - &format_props); - - /* Extract the VkFormatFeatureFlags that are relevant for the queried - * tiling. - */ - if (tiling == VK_IMAGE_TILING_LINEAR) { - format_feature_flags = format_props.linearTilingFeatures; - } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { - format_feature_flags = format_props.optimalTilingFeatures; - } else { - unreachable("bad VkImageTiling"); - } - - switch (type) { - default: - unreachable("bad VkImageType"); - case VK_IMAGE_TYPE_1D: - maxExtent.width = 16384; - maxExtent.height = 1; - maxExtent.depth = 1; - maxMipLevels = 15; /* log2(maxWidth) + 1 */ - maxArraySize = 2048; - sampleCounts = VK_SAMPLE_COUNT_1_BIT; - break; - case VK_IMAGE_TYPE_2D: - /* FINISHME: Does this really differ for cube maps? The documentation - * for RENDER_SURFACE_STATE suggests so. - */ - maxExtent.width = 16384; - maxExtent.height = 16384; - maxExtent.depth = 1; - maxMipLevels = 15; /* log2(maxWidth) + 1 */ - maxArraySize = 2048; - break; - case VK_IMAGE_TYPE_3D: - maxExtent.width = 2048; - maxExtent.height = 2048; - maxExtent.depth = 2048; - maxMipLevels = 12; /* log2(maxWidth) + 1 */ - maxArraySize = 1; - break; - } - - if (tiling == VK_IMAGE_TILING_OPTIMAL && - type == VK_IMAGE_TYPE_2D && - (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && - !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); - } - - if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - /* Meta implements transfers by sampling from the source image. */ - if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { - goto unsupported; - } - } - -#if 0 - if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - if (anv_format_for_vk_format(format)->has_stencil) { - /* Not yet implemented because copying to a W-tiled surface is crazy - * hard. - */ - anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " - "stencil format"); - goto unsupported; - } - } -#endif - - if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { - /* Nothing to check. */ - } - - if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { - /* Ignore this flag because it was removed from the - * provisional_I_20150910 header. - */ - } - - *pImageFormatProperties = (VkImageFormatProperties) { - .maxExtent = maxExtent, - .maxMipLevels = maxMipLevels, - .maxArrayLayers = maxArraySize, - .sampleCounts = sampleCounts, - - /* FINISHME: Accurately calculate - * VkImageFormatProperties::maxResourceSize. - */ - .maxResourceSize = UINT32_MAX, - }; - - return VK_SUCCESS; - -unsupported: - *pImageFormatProperties = (VkImageFormatProperties) { - .maxExtent = { 0, 0, 0 }, - .maxMipLevels = 0, - .maxArrayLayers = 0, - .sampleCounts = 0, - .maxResourceSize = 0, - }; - - return VK_SUCCESS; -} - -void anv_GetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - uint32_t samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t* pNumProperties, - VkSparseImageFormatProperties* pProperties) -{ - /* Sparse images are not yet supported. */ - *pNumProperties = 0; -} diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c deleted file mode 100644 index 0a7be353327..00000000000 --- a/src/vulkan/anv_gem.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) - -static int -anv_ioctl(int fd, unsigned long request, void *arg) -{ - int ret; - - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -/** - * Wrapper around DRM_IOCTL_I915_GEM_CREATE. - * - * Return gem handle, or 0 on failure. Gem handles are never 0. - */ -uint32_t -anv_gem_create(struct anv_device *device, size_t size) -{ - struct drm_i915_gem_create gem_create; - int ret; - - VG_CLEAR(gem_create); - gem_create.size = size; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); - if (ret != 0) { - /* FIXME: What do we do if this fails? */ - return 0; - } - - return gem_create.handle; -} - -void -anv_gem_close(struct anv_device *device, uint32_t gem_handle) -{ - struct drm_gem_close close; - - VG_CLEAR(close); - close.handle = gem_handle; - anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); -} - -/** - * Wrapper around DRM_IOCTL_I915_GEM_MMAP. - */ -void* -anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size, uint32_t flags) -{ - struct drm_i915_gem_mmap gem_mmap; - int ret; - - gem_mmap.handle = gem_handle; - VG_CLEAR(gem_mmap.pad); - gem_mmap.offset = offset; - gem_mmap.size = size; - VG_CLEAR(gem_mmap.addr_ptr); - gem_mmap.flags = flags; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); - if (ret != 0) { - /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ - return NULL; - } - - VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); - return (void *)(uintptr_t) gem_mmap.addr_ptr; -} - -/* This is just a wrapper around munmap, but it also notifies valgrind that - * this map is no longer valid. Pair this with anv_gem_mmap(). - */ -void -anv_gem_munmap(void *p, uint64_t size) -{ - VG(VALGRIND_FREELIKE_BLOCK(p, 0)); - munmap(p, size); -} - -uint32_t -anv_gem_userptr(struct anv_device *device, void *mem, size_t size) -{ - struct drm_i915_gem_userptr userptr; - int ret; - - VG_CLEAR(userptr); - userptr.user_ptr = (__u64)((unsigned long) mem); - userptr.user_size = size; - userptr.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); - if (ret == -1) - return 0; - - return userptr.handle; -} - -int -anv_gem_set_caching(struct anv_device *device, - uint32_t gem_handle, uint32_t caching) -{ - struct drm_i915_gem_caching gem_caching; - - VG_CLEAR(gem_caching); - gem_caching.handle = gem_handle; - gem_caching.caching = caching; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); -} - -int -anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, - uint32_t read_domains, uint32_t write_domain) -{ - struct drm_i915_gem_set_domain gem_set_domain; - - VG_CLEAR(gem_set_domain); - gem_set_domain.handle = gem_handle; - gem_set_domain.read_domains = read_domains; - gem_set_domain.write_domain = write_domain; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); -} - -/** - * On error, \a timeout_ns holds the remaining time. - */ -int -anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) -{ - struct drm_i915_gem_wait wait; - int ret; - - VG_CLEAR(wait); - wait.bo_handle = gem_handle; - wait.timeout_ns = *timeout_ns; - wait.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); - *timeout_ns = wait.timeout_ns; - - return ret; -} - -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); -} - -int -anv_gem_set_tiling(struct anv_device *device, - uint32_t gem_handle, uint32_t stride, uint32_t tiling) -{ - struct drm_i915_gem_set_tiling set_tiling; - int ret; - - /* set_tiling overwrites the input on the error path, so we have to open - * code anv_ioctl. - */ - - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = stride; - - ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -int -anv_gem_get_param(int fd, uint32_t param) -{ - drm_i915_getparam_t gp; - int ret, tmp; - - VG_CLEAR(gp); - gp.param = param; - gp.value = &tmp; - ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret == 0) - return tmp; - - return 0; -} - -bool -anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) -{ - struct drm_gem_close close; - int ret; - - struct drm_i915_gem_create gem_create; - VG_CLEAR(gem_create); - gem_create.size = 4096; - - if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { - assert(!"Failed to create GEM BO"); - return false; - } - - bool swizzled = false; - - /* set_tiling overwrites the input on the error path, so we have to open - * code anv_ioctl. - */ - struct drm_i915_gem_set_tiling set_tiling; - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_create.handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; - - ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - if (ret != 0) { - assert(!"Failed to set BO tiling"); - goto close_and_return; - } - - struct drm_i915_gem_get_tiling get_tiling; - VG_CLEAR(get_tiling); - get_tiling.handle = gem_create.handle; - - if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { - assert(!"Failed to get BO tiling"); - goto close_and_return; - } - - swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; - -close_and_return: - - VG_CLEAR(close); - close.handle = gem_create.handle; - anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); - - return swizzled; -} - -int -anv_gem_create_context(struct anv_device *device) -{ - struct drm_i915_gem_context_create create; - int ret; - - VG_CLEAR(create); - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); - if (ret == -1) - return -1; - - return create.ctx_id; -} - -int -anv_gem_destroy_context(struct anv_device *device, int context) -{ - struct drm_i915_gem_context_destroy destroy; - - VG_CLEAR(destroy); - destroy.ctx_id = context; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); -} - -int -anv_gem_get_aperture(int fd, uint64_t *size) -{ - struct drm_i915_gem_get_aperture aperture; - int ret; - - VG_CLEAR(aperture); - ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - if (ret == -1) - return -1; - - *size = aperture.aper_available_size; - - return 0; -} - -int -anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) -{ - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.handle = gem_handle; - args.flags = DRM_CLOEXEC; - - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); - if (ret == -1) - return -1; - - return args.fd; -} - -uint32_t -anv_gem_fd_to_handle(struct anv_device *device, int fd) -{ - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.fd = fd; - - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); - if (ret == -1) - return 0; - - return args.handle; -} diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c deleted file mode 100644 index 3204fefb28e..00000000000 --- a/src/vulkan/anv_gem_stubs.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include - -#include "anv_private.h" - -static inline int -memfd_create(const char *name, unsigned int flags) -{ - return syscall(SYS_memfd_create, name, flags); -} - -uint32_t -anv_gem_create(struct anv_device *device, size_t size) -{ - int fd = memfd_create("fake bo", MFD_CLOEXEC); - if (fd == -1) - return 0; - - assert(fd != 0); - - if (ftruncate(fd, size) == -1) - return 0; - - return fd; -} - -void -anv_gem_close(struct anv_device *device, uint32_t gem_handle) -{ - close(gem_handle); -} - -void* -anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size, uint32_t flags) -{ - /* Ignore flags, as they're specific to I915_GEM_MMAP. */ - (void) flags; - - return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, - gem_handle, offset); -} - -/* This is just a wrapper around munmap, but it also notifies valgrind that - * this map is no longer valid. Pair this with anv_gem_mmap(). - */ -void -anv_gem_munmap(void *p, uint64_t size) -{ - munmap(p, size); -} - -uint32_t -anv_gem_userptr(struct anv_device *device, void *mem, size_t size) -{ - return -1; -} - -int -anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) -{ - return 0; -} - -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - return 0; -} - -int -anv_gem_set_tiling(struct anv_device *device, - uint32_t gem_handle, uint32_t stride, uint32_t tiling) -{ - return 0; -} - -int -anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, - uint32_t caching) -{ - return 0; -} - -int -anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, - uint32_t read_domains, uint32_t write_domain) -{ - return 0; -} - -int -anv_gem_get_param(int fd, uint32_t param) -{ - unreachable("Unused"); -} - -bool -anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) -{ - unreachable("Unused"); -} - -int -anv_gem_create_context(struct anv_device *device) -{ - unreachable("Unused"); -} - -int -anv_gem_destroy_context(struct anv_device *device, int context) -{ - unreachable("Unused"); -} - -int -anv_gem_get_aperture(int fd, uint64_t *size) -{ - unreachable("Unused"); -} - -int -anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) -{ - unreachable("Unused"); -} - -uint32_t -anv_gem_fd_to_handle(struct anv_device *device, int fd) -{ - unreachable("Unused"); -} diff --git a/src/vulkan/anv_gen_macros.h b/src/vulkan/anv_gen_macros.h deleted file mode 100644 index ef2ecd55a9b..00000000000 --- a/src/vulkan/anv_gen_macros.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -/* Macros for handling per-gen compilation. - * - * The prefixing macros GENX() and genX() automatically prefix whatever you - * give them by GENX_ or genX_ where X is the gen number. - * - * You can declare a function to be used on some range of gens like this: - * - * GENX_FUNC(GEN7, GEN75) void - * genX(my_function_name)(args...) - * { - * // Do stuff - * } - * - * If the file is compiled for any set of gens containing gen7 and gen75, - * the function will effectively only get compiled twice as - * gen7_my_function_nmae and gen75_my_function_name. The function has to - * be compilable on all gens, but it will become a static inline that gets - * discarded by the compiler on all gens not in range. - * - * You can do pseudo-runtime checks in your function such as - * - * if (ANV_GEN > 8 || ANV_IS_HASWELL) { - * // Do something - * } - * - * The contents of the if statement must be valid regardless of gen, but - * the if will get compiled away on everything except haswell. - * - * For places where you really do have a compile-time conflict, you can - * use preprocessor logic: - * - * #if (ANV_GEN > 8 || ANV_IS_HASWELL) - * // Do something - * #endif - * - * However, it is strongly recommended that the former be used whenever - * possible. - */ - -/* Base macro defined on the command line. If we don't have this, we can't - * do anything. - */ -#ifdef ANV_GENx10 - -/* Gen checking macros */ -#define ANV_GEN ((ANV_GENx10) / 10) -#define ANV_IS_HASWELL ((ANV_GENx10) == 75) - -/* Prefixing macros */ -#if (ANV_GENx10 == 70) -# define GENX(X) GEN7_##X -# define genX(x) gen7_##x -#elif (ANV_GENx10 == 75) -# define GENX(X) GEN75_##X -# define genX(x) gen75_##x -#elif (ANV_GENx10 == 80) -# define GENX(X) GEN8_##X -# define genX(x) gen8_##x -#elif (ANV_GENx10 == 90) -# define GENX(X) GEN9_##X -# define genX(x) gen9_##x -#else -# error "Need to add prefixing macros for your gen" -#endif - -/* Macros for comparing gens */ -#if (ANV_GENx10 >= 70) -#define __ANV_GEN_GE_GEN7(T, F) T -#else -#define __ANV_GEN_GE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 <= 70) -#define __ANV_GEN_LE_GEN7(T, F) T -#else -#define __ANV_GEN_LE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 >= 75) -#define __ANV_GEN_GE_GEN75(T, F) T -#else -#define __ANV_GEN_GE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 <= 75) -#define __ANV_GEN_LE_GEN75(T, F) T -#else -#define __ANV_GEN_LE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 >= 80) -#define __ANV_GEN_GE_GEN8(T, F) T -#else -#define __ANV_GEN_GE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 <= 80) -#define __ANV_GEN_LE_GEN8(T, F) T -#else -#define __ANV_GEN_LE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 >= 90) -#define __ANV_GEN_GE_GEN9(T, F) T -#else -#define __ANV_GEN_GE_GEN9(T, F) F -#endif - -#if (ANV_GENx10 <= 90) -#define __ANV_GEN_LE_GEN9(T, F) T -#else -#define __ANV_GEN_LE_GEN9(T, F) F -#endif - -#define __ANV_GEN_IN_RANGE(start, end, T, F) \ - __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) - -/* Declares a function as static inlind if it's not in range */ -#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) - -#endif /* ANV_GENx10 */ diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c deleted file mode 100644 index 0a412a3f8c6..00000000000 --- a/src/vulkan/anv_image.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** - * Exactly one bit must be set in \a aspect. - */ -static isl_surf_usage_flags_t -choose_isl_surf_usage(VkImageUsageFlags vk_usage, - VkImageAspectFlags aspect) -{ - isl_surf_usage_flags_t isl_usage = 0; - - /* FINISHME: Support aux surfaces */ - isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; - - if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) - isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; - - if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) - isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; - - if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; - - if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) - isl_usage |= ISL_SURF_USAGE_CUBE_BIT; - - if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - switch (aspect) { - default: - unreachable("bad VkImageAspect"); - case VK_IMAGE_ASPECT_DEPTH_BIT: - isl_usage |= ISL_SURF_USAGE_DEPTH_BIT; - break; - case VK_IMAGE_ASPECT_STENCIL_BIT: - isl_usage |= ISL_SURF_USAGE_STENCIL_BIT; - break; - } - } - - if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - /* Meta implements transfers by sampling from the source image. */ - isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; - } - - if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - /* Meta implements transfers by rendering into the destination image. */ - isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; - } - - return isl_usage; -} - -/** - * Exactly one bit must be set in \a aspect. - */ -static struct anv_surface * -get_surface(struct anv_image *image, VkImageAspectFlags aspect) -{ - switch (aspect) { - default: - unreachable("bad VkImageAspect"); - case VK_IMAGE_ASPECT_COLOR_BIT: - return &image->color_surface; - case VK_IMAGE_ASPECT_DEPTH_BIT: - return &image->depth_surface; - case VK_IMAGE_ASPECT_STENCIL_BIT: - return &image->stencil_surface; - } -} - -/** - * Initialize the anv_image::*_surface selected by \a aspect. Then update the - * image's memory requirements (that is, the image's size and alignment). - * - * Exactly one bit must be set in \a aspect. - */ -static VkResult -make_surface(const struct anv_device *dev, - struct anv_image *image, - const struct anv_image_create_info *anv_info, - VkImageAspectFlags aspect) -{ - const VkImageCreateInfo *vk_info = anv_info->vk_info; - bool ok UNUSED; - - static const enum isl_surf_dim vk_to_isl_surf_dim[] = { - [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, - [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, - [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, - }; - - isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; - if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) - tiling_flags &= ISL_TILING_LINEAR_BIT; - - struct anv_surface *anv_surf = get_surface(image, aspect); - - VkExtent3D extent; - switch (vk_info->imageType) { - case VK_IMAGE_TYPE_1D: - extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; - break; - case VK_IMAGE_TYPE_2D: - extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; - break; - case VK_IMAGE_TYPE_3D: - extent = vk_info->extent; - break; - default: - unreachable("invalid image type"); - } - - image->extent = extent; - - ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, - .dim = vk_to_isl_surf_dim[vk_info->imageType], - .format = anv_get_isl_format(vk_info->format, aspect, - vk_info->tiling, NULL), - .width = extent.width, - .height = extent.height, - .depth = extent.depth, - .levels = vk_info->mipLevels, - .array_len = vk_info->arrayLayers, - .samples = vk_info->samples, - .min_alignment = 0, - .min_pitch = 0, - .usage = choose_isl_surf_usage(image->usage, aspect), - .tiling_flags = tiling_flags); - - /* isl_surf_init() will fail only if provided invalid input. Invalid input - * is illegal in Vulkan. - */ - assert(ok); - - anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); - image->size = anv_surf->offset + anv_surf->isl.size; - image->alignment = MAX(image->alignment, anv_surf->isl.alignment); - - return VK_SUCCESS; -} - -/** - * Parameter @a format is required and overrides VkImageCreateInfo::format. - */ -static VkImageUsageFlags -anv_image_get_full_usage(const VkImageCreateInfo *info, - const struct anv_format *format) -{ - VkImageUsageFlags usage = info->usage; - - if (info->samples > 1 && - (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { - /* Meta will resolve the image by binding it as a texture. */ - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - } - - if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - /* Meta will transfer from the image by binding it as a texture. */ - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - } - - if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - /* For non-clear transfer operations, meta will transfer to the image by - * binding it as a color attachment, even if the image format is not - * a color format. - */ - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - if (anv_format_is_depth_or_stencil(format)) { - /* vkCmdClearDepthStencilImage() only requires that - * VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does - * not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta - * clears the image, though, by binding it as a depthstencil - * attachment. - */ - usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - } - } - - return usage; -} - -VkResult -anv_image_create(VkDevice _device, - const struct anv_image_create_info *create_info, - const VkAllocationCallbacks* alloc, - VkImage *pImage) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - const VkImageCreateInfo *pCreateInfo = create_info->vk_info; - struct anv_image *image = NULL; - const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - VkResult r; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - - anv_assert(pCreateInfo->mipLevels > 0); - anv_assert(pCreateInfo->arrayLayers > 0); - anv_assert(pCreateInfo->samples > 0); - anv_assert(pCreateInfo->extent.width > 0); - anv_assert(pCreateInfo->extent.height > 0); - anv_assert(pCreateInfo->extent.depth > 0); - - image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!image) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - memset(image, 0, sizeof(*image)); - image->type = pCreateInfo->imageType; - image->extent = pCreateInfo->extent; - image->vk_format = pCreateInfo->format; - image->format = format; - image->levels = pCreateInfo->mipLevels; - image->array_size = pCreateInfo->arrayLayers; - image->samples = pCreateInfo->samples; - image->usage = anv_image_get_full_usage(pCreateInfo, format); - image->tiling = pCreateInfo->tiling; - - if (likely(anv_format_is_color(format))) { - r = make_surface(device, image, create_info, - VK_IMAGE_ASPECT_COLOR_BIT); - if (r != VK_SUCCESS) - goto fail; - } else { - if (image->format->has_depth) { - r = make_surface(device, image, create_info, - VK_IMAGE_ASPECT_DEPTH_BIT); - if (r != VK_SUCCESS) - goto fail; - } - - if (image->format->has_stencil) { - r = make_surface(device, image, create_info, - VK_IMAGE_ASPECT_STENCIL_BIT); - if (r != VK_SUCCESS) - goto fail; - } - } - - *pImage = anv_image_to_handle(image); - - return VK_SUCCESS; - -fail: - if (image) - anv_free2(&device->alloc, alloc, image); - - return r; -} - -VkResult -anv_CreateImage(VkDevice device, - const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImage *pImage) -{ - return anv_image_create(device, - &(struct anv_image_create_info) { - .vk_info = pCreateInfo, - .isl_tiling_flags = ISL_TILING_ANY_MASK, - }, - pAllocator, - pImage); -} - -void -anv_DestroyImage(VkDevice _device, VkImage _image, - const VkAllocationCallbacks *pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); -} - -static void -anv_surface_get_subresource_layout(struct anv_image *image, - struct anv_surface *surface, - const VkImageSubresource *subresource, - VkSubresourceLayout *layout) -{ - /* If we are on a non-zero mip level or array slice, we need to - * calculate a real offset. - */ - anv_assert(subresource->mipLevel == 0); - anv_assert(subresource->arrayLayer == 0); - - layout->offset = surface->offset; - layout->rowPitch = surface->isl.row_pitch; - layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); - layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); - layout->size = surface->isl.size; -} - -void anv_GetImageSubresourceLayout( - VkDevice device, - VkImage _image, - const VkImageSubresource* pSubresource, - VkSubresourceLayout* pLayout) -{ - ANV_FROM_HANDLE(anv_image, image, _image); - - assert(__builtin_popcount(pSubresource->aspectMask) == 1); - - switch (pSubresource->aspectMask) { - case VK_IMAGE_ASPECT_COLOR_BIT: - anv_surface_get_subresource_layout(image, &image->color_surface, - pSubresource, pLayout); - break; - case VK_IMAGE_ASPECT_DEPTH_BIT: - anv_surface_get_subresource_layout(image, &image->depth_surface, - pSubresource, pLayout); - break; - case VK_IMAGE_ASPECT_STENCIL_BIT: - anv_surface_get_subresource_layout(image, &image->stencil_surface, - pSubresource, pLayout); - break; - default: - assert(!"Invalid image aspect"); - } -} - -VkResult -anv_validate_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *subresource; - const struct anv_format *view_format_info; - - /* Validate structure type before dereferencing it. */ - assert(pCreateInfo); - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); - subresource = &pCreateInfo->subresourceRange; - - /* Validate viewType is in range before using it. */ - assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); - assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); - - /* Validate format is in range before using it. */ - assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); - assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); - view_format_info = anv_format_for_vk_format(pCreateInfo->format); - - /* Validate channel swizzles. */ - assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); - - /* Validate subresource. */ - assert(subresource->aspectMask != 0); - assert(subresource->levelCount > 0); - assert(subresource->layerCount > 0); - assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); - assert(subresource->baseArrayLayer < image->array_size); - assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); - assert(pView); - - const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT - | VK_IMAGE_ASPECT_STENCIL_BIT; - - /* Validate format. */ - if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(!image->format->has_depth); - assert(!image->format->has_stencil); - assert(!view_format_info->has_depth); - assert(!view_format_info->has_stencil); - assert(view_format_info->isl_layout->bs == - image->format->isl_layout->bs); - } else if (subresource->aspectMask & ds_flags) { - assert((subresource->aspectMask & ~ds_flags) == 0); - - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - assert(image->format->has_depth); - assert(view_format_info->has_depth); - assert(view_format_info->isl_layout->bs == - image->format->isl_layout->bs); - } - - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - /* FINISHME: Is it legal to have an R8 view of S8? */ - assert(image->format->has_stencil); - assert(view_format_info->has_stencil); - } - } else { - assert(!"bad VkImageSubresourceRange::aspectFlags"); - } - - return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); -} - -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - else - gen7_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 8: - gen8_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 9: - gen9_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - default: - unreachable("unsupported gen\n"); - } - - if (!device->info.has_llc) - anv_state_clflush(state); -} - -static struct anv_state -alloc_surface_state(struct anv_device *device, - struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer) { - return anv_cmd_buffer_alloc_surface_state(cmd_buffer); - } else { - return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } -} - -static bool -has_matching_storage_typed_format(const struct anv_device *device, - enum isl_format format) -{ - return (isl_format_get_layout(format)->bs <= 4 || - (isl_format_get_layout(format)->bs <= 8 && - (device->info.gen >= 8 || device->info.is_haswell)) || - device->info.gen >= 9); -} - -static VkComponentSwizzle -remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, - struct anv_format_swizzle format_swizzle) -{ - if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) - swizzle = component; - - switch (swizzle) { - case VK_COMPONENT_SWIZZLE_ZERO: - return VK_COMPONENT_SWIZZLE_ZERO; - case VK_COMPONENT_SWIZZLE_ONE: - return VK_COMPONENT_SWIZZLE_ONE; - case VK_COMPONENT_SWIZZLE_R: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; - case VK_COMPONENT_SWIZZLE_G: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; - case VK_COMPONENT_SWIZZLE_B: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; - case VK_COMPONENT_SWIZZLE_A: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; - default: - unreachable("Invalid swizzle"); - } -} - -void -anv_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer, - uint32_t offset) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - VkImageViewCreateInfo mCreateInfo; - memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); - - assert(range->layerCount > 0); - assert(range->baseMipLevel < image->levels); - assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - - switch (image->type) { - default: - unreachable("bad VkImageType"); - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); - break; - case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + range->layerCount - 1 - <= anv_minify(image->extent.depth, range->baseMipLevel)); - break; - } - - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - iview->image = image; - iview->bo = image->bo; - iview->offset = image->offset + surface->offset + offset; - - iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - iview->vk_format = pCreateInfo->format; - - struct anv_format_swizzle swizzle; - iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, - image->tiling, &swizzle); - iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R, swizzle); - iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G, swizzle); - iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B, swizzle); - iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A, swizzle); - - iview->base_layer = range->baseArrayLayer; - iview->base_mip = range->baseMipLevel; - - if (!isl_format_is_compressed(iview->format) && - isl_format_is_compressed(image->format->isl_format)) { - /* Scale the ImageView extent by the backing Image. This is used - * internally when an uncompressed ImageView is created on a - * compressed Image. The ImageView can therefore be used for copying - * data from a source Image to a destination Image. - */ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - - iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); - iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); - - iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; - iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); - mCreateInfo.subresourceRange.baseMipLevel = 0; - mCreateInfo.subresourceRange.baseArrayLayer = 0; - } else { - iview->level_0_extent.width = image->extent.width; - iview->level_0_extent.height = image->extent.height; - iview->level_0_extent.depth = image->extent.depth; - } - - iview->extent = (VkExtent3D) { - .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), - .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), - .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), - }; - - if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); - - anv_fill_image_surface_state(device, iview->sampler_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); - } else { - iview->sampler_surface_state.alloc_size = 0; - } - - if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - - anv_fill_image_surface_state(device, iview->color_rt_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - } else { - iview->color_rt_surface_state.alloc_size = 0; - } - - if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { - iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - - if (has_matching_storage_typed_format(device, iview->format)) - anv_fill_image_surface_state(device, iview->storage_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - else - anv_fill_buffer_surface_state(device, iview->storage_surface_state, - ISL_FORMAT_RAW, - iview->offset, - iview->bo->size - iview->offset, 1); - - } else { - iview->storage_surface_state.alloc_size = 0; - } -} - -VkResult -anv_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *view; - - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_image_view_init(view, device, pCreateInfo, NULL, 0); - - *pView = anv_image_view_to_handle(view); - - return VK_SUCCESS; -} - -void -anv_DestroyImageView(VkDevice _device, VkImageView _iview, - const VkAllocationCallbacks *pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_image_view, iview, _iview); - - if (iview->color_rt_surface_state.alloc_size > 0) { - anv_state_pool_free(&device->surface_state_pool, - iview->color_rt_surface_state); - } - - if (iview->sampler_surface_state.alloc_size > 0) { - anv_state_pool_free(&device->surface_state_pool, - iview->sampler_surface_state); - } - - if (iview->storage_surface_state.alloc_size > 0) { - anv_state_pool_free(&device->surface_state_pool, - iview->storage_surface_state); - } - - anv_free2(&device->alloc, pAllocator, iview); -} - -VkResult -anv_CreateBufferView(VkDevice _device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *view; - - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!view) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - - view->format = format->isl_format; - view->bo = buffer->bo; - view->offset = buffer->offset + pCreateInfo->offset; - view->range = pCreateInfo->range == VK_WHOLE_SIZE ? - buffer->size - view->offset : pCreateInfo->range; - - if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - - anv_fill_buffer_surface_state(device, view->surface_state, - view->format, - view->offset, view->range, - format->isl_layout->bs); - } else { - view->surface_state = (struct anv_state){ 0 }; - } - - if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - view->storage_surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - - enum isl_format storage_format = - has_matching_storage_typed_format(device, view->format) ? - isl_lower_storage_image_format(&device->isl_dev, view->format) : - ISL_FORMAT_RAW; - - anv_fill_buffer_surface_state(device, view->storage_surface_state, - storage_format, - view->offset, view->range, - (storage_format == ISL_FORMAT_RAW ? 1 : - format->isl_layout->bs)); - - } else { - view->storage_surface_state = (struct anv_state){ 0 }; - } - - *pView = anv_buffer_view_to_handle(view); - - return VK_SUCCESS; -} - -void -anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, - const VkAllocationCallbacks *pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); - - if (view->surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, - view->surface_state); - - if (view->storage_surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, - view->storage_surface_state); - - anv_free2(&device->alloc, pAllocator, view); -} - -struct anv_surface * -anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) -{ - switch (aspect_mask) { - case VK_IMAGE_ASPECT_COLOR_BIT: - /* Dragons will eat you. - * - * Meta attaches all destination surfaces as color render targets. Guess - * what surface the Meta Dragons really want. - */ - if (image->format->has_depth && image->format->has_stencil) { - return &image->depth_surface; - } else if (image->format->has_depth) { - return &image->depth_surface; - } else if (image->format->has_stencil) { - return &image->stencil_surface; - } else { - return &image->color_surface; - } - break; - case VK_IMAGE_ASPECT_DEPTH_BIT: - assert(image->format->has_depth); - return &image->depth_surface; - case VK_IMAGE_ASPECT_STENCIL_BIT: - assert(image->format->has_stencil); - return &image->stencil_surface; - case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - if (image->format->has_depth && image->format->has_stencil) { - /* FINISHME: The Vulkan spec (git a511ba2) requires support for - * combined depth stencil formats. Specifically, it states: - * - * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or - * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. - * - * Image views with both depth and stencil aspects are only valid for - * render target attachments, in which case - * cmd_buffer_emit_depth_stencil() will pick out both the depth and - * stencil surfaces from the underlying surface. - */ - return &image->depth_surface; - } else if (image->format->has_depth) { - return &image->depth_surface; - } else if (image->format->has_stencil) { - return &image->stencil_surface; - } - /* fallthrough */ - default: - unreachable("image does not have aspect"); - return NULL; - } -} - -static void -image_param_defaults(struct brw_image_param *param) -{ - memset(param, 0, sizeof *param); - /* Set the swizzling shifts to all-ones to effectively disable swizzling -- - * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more - * detailed explanation of these parameters. - */ - param->swizzling[0] = 0xff; - param->swizzling[1] = 0xff; -} - -void -anv_image_view_fill_image_param(struct anv_device *device, - struct anv_image_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - const struct isl_surf *surf = &view->image->color_surface.isl; - const int cpp = isl_format_get_layout(surf->format)->bs; - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - param->size[0] = view->extent.width; - param->size[1] = view->extent.height; - if (surf->dim == ISL_SURF_DIM_3D) { - param->size[2] = view->extent.depth; - } else { - param->size[2] = surf->logical_level0_px.array_len - view->base_layer; - } - - isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, - ¶m->offset[0], ¶m->offset[1]); - - param->stride[0] = cpp; - param->stride[1] = surf->row_pitch / cpp; - - if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { - param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); - param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); - } else { - param->stride[2] = 0; - param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); - } - - switch (surf->tiling) { - case ISL_TILING_LINEAR: - /* image_param_defaults is good enough */ - break; - - case ISL_TILING_X: - /* An X tile is a rectangular block of 512x8 bytes. */ - param->tiling[0] = util_logbase2(512 / cpp); - param->tiling[1] = util_logbase2(8); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shifts required to swizzle bits 9 and 10 of the memory - * address with bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 4; - } - break; - - case ISL_TILING_Y0: - /* The layout of a Y-tiled surface in memory isn't really fundamentally - * different to the layout of an X-tiled surface, we simply pretend that - * the surface is broken up in a number of smaller 16Bx32 tiles, each - * one arranged in X-major order just like is the case for X-tiling. - */ - param->tiling[0] = util_logbase2(16 / cpp); - param->tiling[1] = util_logbase2(32); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shift required to swizzle bit 9 of the memory address with - * bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 0xff; - } - break; - - default: - assert(!"Unhandled storage image tiling"); - } - - /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The - * address calculation algorithm (emit_address_calculation() in - * brw_fs_surface_builder.cpp) handles this as a sort of tiling with - * modulus equal to the LOD. - */ - param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? - view->base_mip : 0); -} - -void -anv_buffer_view_fill_image_param(struct anv_device *device, - struct anv_buffer_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - param->stride[0] = isl_format_layouts[view->format].bs; - param->size[0] = view->range / param->stride[0]; -} diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c deleted file mode 100644 index d95d9afe8cf..00000000000 --- a/src/vulkan/anv_intel.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -VkResult anv_CreateDmaBufImageINTEL( - VkDevice _device, - const VkDmaBufImageCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDeviceMemory* pMem, - VkImage* pImage) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_device_memory *mem; - struct anv_image *image; - VkResult result; - VkImage image_h; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); - - mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (mem == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); - if (!mem->bo.gem_handle) { - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail; - } - - mem->bo.map = NULL; - mem->bo.index = 0; - mem->bo.offset = 0; - mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; - - anv_image_create(_device, - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = pCreateInfo->strideInBytes, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->format, - .extent = pCreateInfo->extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - pAllocator, &image_h); - - image = anv_image_from_handle(image_h); - image->bo = &mem->bo; - image->offset = 0; - - assert(image->extent.width > 0); - assert(image->extent.height > 0); - assert(image->extent.depth == 1); - - *pMem = anv_device_memory_to_handle(mem); - *pImage = anv_image_to_handle(image); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, mem); - - return result; -} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c deleted file mode 100644 index 82944ea1a92..00000000000 --- a/src/vulkan/anv_meta.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_meta.h" - -struct anv_render_pass anv_meta_dummy_renderpass = {0}; - -void -anv_meta_save(struct anv_meta_saved_state *state, - const struct anv_cmd_buffer *cmd_buffer, - uint32_t dynamic_mask) -{ - state->old_pipeline = cmd_buffer->state.pipeline; - state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; - memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, - sizeof(state->old_vertex_bindings)); - - state->dynamic_mask = dynamic_mask; - anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, - dynamic_mask); -} - -void -anv_meta_restore(const struct anv_meta_saved_state *state, - struct anv_cmd_buffer *cmd_buffer) -{ - cmd_buffer->state.pipeline = state->old_pipeline; - cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; - memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, - sizeof(state->old_vertex_bindings)); - - cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, - state->dynamic_mask); - cmd_buffer->state.dirty |= state->dynamic_mask; - - /* Since we've used the pipeline with the VS disabled, set - * need_query_wa. See CmdBeginQuery. - */ - cmd_buffer->state.need_query_wa = true; -} - -VkImageViewType -anv_meta_get_view_type(const struct anv_image *image) -{ - switch (image->type) { - case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; - case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; - case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; - default: - unreachable("bad VkImageViewType"); - } -} - -/** - * When creating a destination VkImageView, this function provides the needed - * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. - */ -uint32_t -anv_meta_get_iview_layer(const struct anv_image *dest_image, - const VkImageSubresourceLayers *dest_subresource, - const VkOffset3D *dest_offset) -{ - switch (dest_image->type) { - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - return dest_subresource->baseArrayLayer; - case VK_IMAGE_TYPE_3D: - /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, - * but meta does it anyway. When doing so, we translate the - * destination's z offset into an array offset. - */ - return dest_offset->z; - default: - assert(!"bad VkImageType"); - return 0; - } -} - -static void * -meta_alloc(void* _device, size_t size, size_t alignment, - VkSystemAllocationScope allocationScope) -{ - struct anv_device *device = _device; - return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); -} - -static void * -meta_realloc(void* _device, void *original, size_t size, size_t alignment, - VkSystemAllocationScope allocationScope) -{ - struct anv_device *device = _device; - return device->alloc.pfnReallocation(device->alloc.pUserData, original, - size, alignment, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); -} - -static void -meta_free(void* _device, void *data) -{ - struct anv_device *device = _device; - return device->alloc.pfnFree(device->alloc.pUserData, data); -} - -VkResult -anv_device_init_meta(struct anv_device *device) -{ - VkResult result; - - device->meta_state.alloc = (VkAllocationCallbacks) { - .pUserData = device, - .pfnAllocation = meta_alloc, - .pfnReallocation = meta_realloc, - .pfnFree = meta_free, - }; - - result = anv_device_init_meta_clear_state(device); - if (result != VK_SUCCESS) - goto fail_clear; - - result = anv_device_init_meta_resolve_state(device); - if (result != VK_SUCCESS) - goto fail_resolve; - - result = anv_device_init_meta_blit_state(device); - if (result != VK_SUCCESS) - goto fail_blit; - - return VK_SUCCESS; - -fail_blit: - anv_device_finish_meta_resolve_state(device); -fail_resolve: - anv_device_finish_meta_clear_state(device); -fail_clear: - return result; -} - -void -anv_device_finish_meta(struct anv_device *device) -{ - anv_device_finish_meta_resolve_state(device); - anv_device_finish_meta_clear_state(device); - anv_device_finish_meta_blit_state(device); -} diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h deleted file mode 100644 index d33e9e6d8ba..00000000000 --- a/src/vulkan/anv_meta.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "anv_private.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define ANV_META_VERTEX_BINDING_COUNT 2 - -struct anv_meta_saved_state { - struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; - struct anv_descriptor_set *old_descriptor_set0; - struct anv_pipeline *old_pipeline; - - /** - * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic - * state. - */ - uint32_t dynamic_mask; - struct anv_dynamic_state dynamic; -}; - -VkResult anv_device_init_meta_clear_state(struct anv_device *device); -void anv_device_finish_meta_clear_state(struct anv_device *device); - -VkResult anv_device_init_meta_resolve_state(struct anv_device *device); -void anv_device_finish_meta_resolve_state(struct anv_device *device); - -VkResult anv_device_init_meta_blit_state(struct anv_device *device); -void anv_device_finish_meta_blit_state(struct anv_device *device); - -void -anv_meta_save(struct anv_meta_saved_state *state, - const struct anv_cmd_buffer *cmd_buffer, - uint32_t dynamic_mask); - -void -anv_meta_restore(const struct anv_meta_saved_state *state, - struct anv_cmd_buffer *cmd_buffer); - -VkImageViewType -anv_meta_get_view_type(const struct anv_image *image); - -uint32_t -anv_meta_get_iview_layer(const struct anv_image *dest_image, - const VkImageSubresourceLayers *dest_subresource, - const VkOffset3D *dest_offset); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/anv_meta_blit.c b/src/vulkan/anv_meta_blit.c deleted file mode 100644 index 07ebcbc06b1..00000000000 --- a/src/vulkan/anv_meta_blit.c +++ /dev/null @@ -1,1442 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_meta.h" -#include "nir/nir_builder.h" - -struct blit_region { - VkOffset3D src_offset; - VkExtent3D src_extent; - VkOffset3D dest_offset; - VkExtent3D dest_extent; -}; - -static nir_shader * -build_nir_vertex_shader(void) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); - - nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "a_pos"); - pos_in->data.location = VERT_ATTRIB_GENERIC0; - nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "gl_Position"); - pos_out->data.location = VARYING_SLOT_POS; - nir_copy_var(&b, pos_out, pos_in); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "a_tex_pos"); - tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; - nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "v_tex_pos"); - tex_pos_out->data.location = VARYING_SLOT_VAR0; - tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; - nir_copy_var(&b, tex_pos_out, tex_pos_in); - - return b.shader; -} - -static nir_shader * -build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "v_tex_pos"); - tex_pos_in->data.location = VARYING_SLOT_VAR0; - - /* Swizzle the array index which comes in as Z coordinate into the right - * position. - */ - unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; - nir_ssa_def *const tex_pos = - nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, - (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); - - const struct glsl_type *sampler_type = - glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, - glsl_get_base_type(vec4)); - nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, - sampler_type, "s_tex"); - sampler->data.descriptor_set = 0; - sampler->data.binding = 0; - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); - tex->sampler_dim = tex_dim; - tex->op = nir_texop_tex; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(tex_pos); - tex->dest_type = nir_type_float; /* TODO */ - tex->is_array = glsl_sampler_type_is_array(sampler_type); - tex->coord_components = tex_pos->num_components; - tex->texture = nir_deref_var_create(tex, sampler); - tex->sampler = nir_deref_var_create(tex, sampler); - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "f_color"); - color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 4); - - return b.shader; -} - -static void -meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *saved_state) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); -} - -/* Returns the user-provided VkBufferImageCopy::imageOffset in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkOffset3D -meta_region_offset_el(const struct anv_image * image, - const struct VkOffset3D * offset) -{ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - return (VkOffset3D) { - .x = offset->x / isl_layout->bw, - .y = offset->y / isl_layout->bh, - .z = offset->z / isl_layout->bd, - }; -} - -/* Returns the user-provided VkBufferImageCopy::imageExtent in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkExtent3D -meta_region_extent_el(const VkFormat format, - const struct VkExtent3D * extent) -{ - const struct isl_format_layout * isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { - .width = DIV_ROUND_UP(extent->width , isl_layout->bw), - .height = DIV_ROUND_UP(extent->height, isl_layout->bh), - .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; -} - -static void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - VkExtent3D src_extent, - struct anv_image *dest_image, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter) -{ - struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; - - struct blit_vb_data { - float pos[2]; - float tex_coord[3]; - } *vb_data; - - assert(src_image->samples == dest_image->samples); - - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct anv_vue_header)); - vb_data = vb_state.map + sizeof(struct anv_vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + dest_extent.width, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)src_offset.y / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - anv_state_clflush(vb_state); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = vb_state.offset, - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct anv_vue_header), - }); - - VkSampler sampler; - ANV_CALL(CreateSampler)(anv_device_to_handle(device), - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = blit_filter, - .minFilter = blit_filter, - }, &cmd_buffer->pool->alloc, &sampler); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(anv_device_to_handle(device), - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout - }, &set); - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = sampler, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), - }, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit.render_pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { dest_extent.width, dest_extent.height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, VK_SUBPASS_CONTENTS_INLINE); - - VkPipeline pipeline; - - switch (src_image->type) { - case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.pipeline_1d_src; - break; - case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.pipeline_2d_src; - break; - case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.pipeline_3d_src; - break; - default: - unreachable(!"bad VkImageType"); - } - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, 0, 1, - &set, 0, NULL); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); - anv_DestroySampler(anv_device_to_handle(device), sampler, - &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb, - &cmd_buffer->pool->alloc); -} - -static void -meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, - const struct anv_meta_saved_state *saved_state) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static VkFormat -vk_format_for_size(int bs) -{ - /* Note: We intentionally use the 4-channel formats whenever we can. - * This is so that, when we do a RGB <-> RGBX copy, the two formats will - * line up even though one of them is 3/4 the size of the other. - */ - switch (bs) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UINT; - case 4: return VK_FORMAT_R8G8B8A8_UINT; - case 6: return VK_FORMAT_R16G16B16_UINT; - case 8: return VK_FORMAT_R16G16B16A16_UINT; - case 12: return VK_FORMAT_R32G32B32_UINT; - case 16: return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format block size"); - } -} - -static void -do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat copy_format) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = 0, - .flags = 0, - }; - - VkImage src_image; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &src_image); - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src; - anv_image_from_handle(src_image)->offset = src_offset; - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - anv_image_from_handle(dest_image), - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); -} - -void anv_CmdCopyBuffer( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - - struct anv_meta_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; - uint64_t copy_size = pRegions[r].size; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - - int fs = ffs(src_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(src_offset % bs == 0); - - fs = ffs(dest_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(dest_offset % bs == 0); - - fs = ffs(pRegions[r].size) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(pRegions[r].size % bs == 0); - - VkFormat copy_format = vk_format_for_size(bs); - - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, copy_format); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dest_offset += max_copy_size; - } - - uint64_t height = copy_size / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * bs; - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, height, copy_format); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dest_offset += rect_copy_size; - } - - if (copy_size != 0) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - copy_size / bs, 1, copy_format); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - /* We can't quite grab a full block because the state stream needs a - * little data at the top to build its linked list. - */ - const uint32_t max_update_size = - cmd_buffer->device->dynamic_state_block_pool.block_size - 64; - - assert(max_update_size < (1 << 14) * 4); - - while (dataSize) { - const uint32_t copy_size = MIN2(dataSize, max_update_size); - - struct anv_state tmp_data = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); - - memcpy(tmp_data.map, pData, copy_size); - - VkFormat format; - int bs; - if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; - bs = 16; - } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - format = VK_FORMAT_R32G32_UINT; - bs = 8; - } else { - assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; - bs = 4; - } - - do_buffer_copy(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, - tmp_data.offset, - dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, format); - - dataSize -= copy_size; - dstOffset += copy_size; - pData = (void *)pData + copy_size; - } -} - -static VkFormat -choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - struct isl_surf *surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - - /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT - * formats for the source and destination image views. - * - * From the Vulkan spec (2015-12-30): - * - * vkCmdCopyImage performs image copies in a similar manner to a host - * memcpy. It does not perform general-purpose conversions such as - * scaling, resizing, blending, color-space conversion, or format - * conversions. Rather, it simply copies raw image data. vkCmdCopyImage - * can copy between images with different formats, provided the formats - * are compatible as defined below. - * - * [The spec later defines compatibility as having the same number of - * bytes per block]. - */ - return vk_format_for_size(isl_format_layouts[surf->format].bs); -} - -static VkFormat -choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - /* vkCmdCopy* commands behave like memcpy. Therefore we choose - * compatable UINT formats for the source and destination image views. - * - * For the buffer, we go back to the original image format and get a - * the format as if it were linear. This way, for RGB formats, we get - * an RGB format here even if the tiled image is RGBA. XXX: This doesn't - * work if the buffer is the destination. - */ - enum isl_format linear_format = anv_get_isl_format(format, aspect, - VK_IMAGE_TILING_LINEAR, - NULL); - - return vk_format_for_size(isl_format_layouts[linear_format].bs); -} - -void anv_CmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdCopyImage can be used to copy image data between multisample - * images, but both images must have the same number of samples. - */ - assert(src_image->samples == dest_image->samples); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - assert(pRegions[r].srcSubresource.aspectMask == - pRegions[r].dstSubresource.aspectMask); - - VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - - VkFormat src_format = choose_iview_format(src_image, aspect); - VkFormat dst_format = choose_iview_format(dest_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = pRegions[r].dstSubresource.layerCount, - }, - }, - cmd_buffer, 0); - - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffset.x, - .y = pRegions[r].dstOffset.y, - .z = 0, - }; - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].srcSubresource.layerCount == 1 && - pRegions[r].dstSubresource.layerCount == 1); - num_slices = pRegions[r].extent.depth; - } else { - assert(pRegions[r].srcSubresource.layerCount == - pRegions[r].dstSubresource.layerCount); - assert(pRegions[r].extent.depth == 1); - num_slices = pRegions[r].dstSubresource.layerCount; - } - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dst_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + slice, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - src_offset, - pRegions[r].extent, - dest_image, &dest_iview, - dest_offset, - pRegions[r].extent, - VK_FILTER_NEAREST); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdBlitImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkFilter filter) - -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdBlitImage must not be used for multisampled source or - * destination images. Use vkCmdResolveImage for this purpose. - */ - assert(src_image->samples == 1); - assert(dest_image->samples == 1); - - anv_finishme("respect VkFilter"); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = pRegions[r].srcSubresource.aspectMask, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffsets[0].x, - .y = pRegions[r].dstOffsets[0].y, - .z = 0, - }; - - if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || - pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || - pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || - pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) - anv_finishme("FINISHME: Allow flipping in blits"); - - const VkExtent3D dest_extent = { - .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, - .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, - }; - - const VkExtent3D src_extent = { - .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, - .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, - }; - - const uint32_t dest_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffsets[0]); - - if (pRegions[r].srcSubresource.layerCount > 1) - anv_finishme("FINISHME: copy multiple array layers"); - - if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || - pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) - anv_finishme("FINISHME: copy multiple depth layers"); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_array_slice, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - pRegions[r].srcOffsets[0], src_extent, - dest_image, &dest_iview, - dest_offset, dest_extent, - filter); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -static struct anv_image * -make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, - VkImageUsageFlags usage, - VkImageType image_type, - const VkAllocationCallbacks *alloc, - const VkBufferImageCopy *copy) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); - - VkExtent3D extent = copy->imageExtent; - if (copy->bufferRowLength) - extent.width = copy->bufferRowLength; - if (copy->bufferImageHeight) - extent.height = copy->bufferImageHeight; - extent.depth = 1; - extent = meta_region_extent_el(format, &extent); - - VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; - VkFormat buffer_format = choose_buffer_format(format, aspect); - - VkImage vk_image; - VkResult result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = buffer_format, - .extent = extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = usage, - .flags = 0, - }, alloc, &vk_image); - assert(result == VK_SUCCESS); - - ANV_FROM_HANDLE(anv_image, image, vk_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - image->bo = buffer->bo; - image->offset = buffer->offset + copy->bufferOffset; - - return image; -} - -void anv_CmdCopyBufferToImage( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(dest_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(dest_image, aspect); - - struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, - VK_IMAGE_USAGE_SAMPLED_BIT, - dest_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, - &pRegions[r].imageOffset); - - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = anv_meta_get_view_type(dest_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o); - - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].imageExtent); - - meta_emit_blit(cmd_buffer, - src_image, - &src_iview, - (VkOffset3D){0, 0, 0}, - img_extent_el, - dest_image, - &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - src_image->offset += src_image->extent.width * - src_image->extent.height * - src_image->format->isl_layout->bs; - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(src_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImageToBuffer( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - - /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(src_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(src_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, - .layerCount = pRegions[r].imageSubresource.layerCount, - }, - }, - cmd_buffer, 0); - - struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - src_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.layerCount == 1); - num_slices = pRegions[r].imageExtent.depth; - } else { - assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.layerCount; - } - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].imageOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - src_offset, - pRegions[r].imageExtent, - dest_image, - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - dest_image->offset += dest_image->extent.width * - dest_image->extent.height * - src_image->format->isl_layout->bs; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void -anv_device_finish_meta_blit_state(struct anv_device *device) -{ - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src, - &device->meta_state.alloc); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, - &device->meta_state.alloc); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, - &device->meta_state.alloc); -} - -VkResult -anv_device_init_meta_blit_state(struct anv_device *device) -{ - VkResult result; - - result = anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); - if (result != VK_SUCCESS) - goto fail; - - /* We don't use a vertex shader for blitting, but instead build and pass - * the VUEs directly to the rasterization backend. However, we do need - * to provide GLSL source for the vertex shader so that the compiler - * does not dead-code our inputs. - */ - struct anv_shader_module vs = { - .nir = build_nir_vertex_shader(), - }; - - struct anv_shader_module fs_1d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), - }; - - struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), - }; - - struct anv_shader_module fs_3d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), - }; - - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - { - .binding = 1, - .stride = 5 * sizeof(float), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offset = 8 - } - } - }; - - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, - &device->meta_state.alloc, - &device->meta_state.blit.ds_layout); - if (result != VK_SUCCESS) - goto fail_render_pass; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); - if (result != VK_SUCCESS) - goto fail_descriptor_set_layout; - - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = anv_shader_module_to_handle(&vs), - .pName = "main", - .pSpecializationInfo = NULL - }, { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ - .pName = "main", - .pSpecializationInfo = NULL - }, - }; - - const VkGraphicsPipelineCreateInfo vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = ARRAY_SIZE(pipeline_shader_stages), - .pStages = pipeline_shader_stages, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .colorWriteMask = - VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT }, - } - }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - .renderPass = device->meta_state.blit.render_pass, - .subpass = 0, - }; - - const struct anv_graphics_pipeline_create_info anv_pipeline_info = { - .color_attachment_count = -1, - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_1d; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_2d; - - ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); - ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); - - return VK_SUCCESS; - - fail_pipeline_2d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - - fail_pipeline_1d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - - fail_pipeline_layout: - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, - &device->meta_state.alloc); - fail_descriptor_set_layout: - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, - &device->meta_state.alloc); - fail_render_pass: - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, - &device->meta_state.alloc); - - ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); - ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); - fail: - return result; -} diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c deleted file mode 100644 index 739ae09582c..00000000000 --- a/src/vulkan/anv_meta_clear.c +++ /dev/null @@ -1,1098 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_meta.h" -#include "anv_private.h" -#include "nir/nir_builder.h" - -/** Vertex attributes for color clears. */ -struct color_clear_vattrs { - struct anv_vue_header vue_header; - float position[2]; /**< 3DPRIM_RECTLIST */ - VkClearColorValue color; -}; - -/** Vertex attributes for depthstencil clears. */ -struct depthstencil_clear_vattrs { - struct anv_vue_header vue_header; - float position[2]; /*<< 3DPRIM_RECTLIST */ -}; - -static void -meta_clear_begin(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR) | - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); - - cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; -} - -static void -meta_clear_end(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static void -build_color_shaders(struct nir_shader **out_vs, - struct nir_shader **out_fs, - uint32_t frag_output) -{ - nir_builder vs_b; - nir_builder fs_b; - - nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); - nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); - - vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); - fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); - - const struct glsl_type *position_type = glsl_vec4_type(); - const struct glsl_type *color_type = glsl_vec4_type(); - - nir_variable *vs_in_pos = - nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, - "a_position"); - vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; - - nir_variable *vs_out_pos = - nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, - "gl_Position"); - vs_out_pos->data.location = VARYING_SLOT_POS; - - nir_variable *vs_in_color = - nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, - "a_color"); - vs_in_color->data.location = VERT_ATTRIB_GENERIC1; - - nir_variable *vs_out_color = - nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, - "v_color"); - vs_out_color->data.location = VARYING_SLOT_VAR0; - vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; - - nir_variable *fs_in_color = - nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, - "v_color"); - fs_in_color->data.location = vs_out_color->data.location; - fs_in_color->data.interpolation = vs_out_color->data.interpolation; - - nir_variable *fs_out_color = - nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, - "f_color"); - fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; - - nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); - nir_copy_var(&vs_b, vs_out_color, vs_in_color); - nir_copy_var(&fs_b, fs_out_color, fs_in_color); - - *out_vs = vs_b.shader; - *out_fs = fs_b.shader; -} - -static VkResult -create_pipeline(struct anv_device *device, - uint32_t samples, - struct nir_shader *vs_nir, - struct nir_shader *fs_nir, - const VkPipelineVertexInputStateCreateInfo *vi_state, - const VkPipelineDepthStencilStateCreateInfo *ds_state, - const VkPipelineColorBlendStateCreateInfo *cb_state, - const VkAllocationCallbacks *alloc, - bool use_repclear, - struct anv_pipeline **pipeline) -{ - VkDevice device_h = anv_device_to_handle(device); - VkResult result; - - struct anv_shader_module vs_m = { .nir = vs_nir }; - struct anv_shader_module fs_m = { .nir = fs_nir }; - - VkPipeline pipeline_h = VK_NULL_HANDLE; - result = anv_graphics_pipeline_create(device_h, - VK_NULL_HANDLE, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = fs_nir ? 2 : 1, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = anv_shader_module_to_handle(&vs_m), - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = anv_shader_module_to_handle(&fs_m), - .pName = "main", - }, - }, - .pVertexInputState = vi_state, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .pViewports = NULL, /* dynamic */ - .scissorCount = 1, - .pScissors = NULL, /* dynamic */ - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasEnable = false, - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = samples, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { ~0 }, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pDepthStencilState = ds_state, - .pColorBlendState = cb_state, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - /* The meta clear pipeline declares all state as dynamic. - * As a consequence, vkCmdBindPipeline writes no dynamic state - * to the cmd buffer. Therefore, at the end of the meta clear, - * we need only restore dynamic state was vkCmdSet. - */ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .flags = 0, - .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), - .subpass = 0, - }, - &(struct anv_graphics_pipeline_create_info) { - .color_attachment_count = MAX_RTS, - .use_repclear = use_repclear, - .disable_viewport = true, - .disable_vs = true, - .use_rectlist = true - }, - alloc, - &pipeline_h); - - ralloc_free(vs_nir); - ralloc_free(fs_nir); - - *pipeline = anv_pipeline_from_handle(pipeline_h); - - return result; -} - -static VkResult -create_color_pipeline(struct anv_device *device, - uint32_t samples, - uint32_t frag_output, - struct anv_pipeline **pipeline) -{ - struct nir_shader *vs_nir; - struct nir_shader *fs_nir; - build_color_shaders(&vs_nir, &fs_nir, frag_output); - - const VkPipelineVertexInputStateCreateInfo vi_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct color_clear_vattrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = offsetof(struct color_clear_vattrs, vue_header), - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct color_clear_vattrs, position), - }, - { - /* Color */ - .location = 2, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offset = offsetof(struct color_clear_vattrs, color), - }, - }, - }; - - const VkPipelineDepthStencilStateCreateInfo ds_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = false, - .depthWriteEnable = false, - .depthBoundsTestEnable = false, - .stencilTestEnable = false, - }; - - VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 }; - blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) { - .blendEnable = false, - .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo cb_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = false, - .attachmentCount = MAX_RTS, - .pAttachments = blend_attachment_state - }; - - /* Disable repclear because we do not want the compiler to replace the - * shader. We need the shader to write to the specified color attachment, - * but the repclear shader writes to all color attachments. - */ - return - create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, &device->meta_state.alloc, - /*use_repclear*/ false, pipeline); -} - -static void -destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) -{ - if (!pipeline) - return; - - ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), - anv_pipeline_to_handle(pipeline), - &device->meta_state.alloc); -} - -void -anv_device_finish_meta_clear_state(struct anv_device *device) -{ - struct anv_meta_state *state = &device->meta_state; - - for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { - for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { - destroy_pipeline(device, state->clear[i].color_pipelines[j]); - } - - destroy_pipeline(device, state->clear[i].depth_only_pipeline); - destroy_pipeline(device, state->clear[i].stencil_only_pipeline); - destroy_pipeline(device, state->clear[i].depthstencil_pipeline); - } -} - -static void -emit_color_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) -{ - struct anv_device *device = cmd_buffer->device; - const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const uint32_t subpass_att = clear_att->colorAttachment; - const uint32_t pass_att = subpass->color_attachments[subpass_att]; - const struct anv_image_view *iview = fb->attachments[pass_att]; - const uint32_t samples = iview->image->samples; - const uint32_t samples_log2 = ffs(samples) - 1; - struct anv_pipeline *pipeline = - device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; - VkClearColorValue clear_value = clear_att->clearValue.color; - - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); - - assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); - assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(clear_att->colorAttachment < subpass->color_count); - - const struct color_clear_vattrs vertex_data[3] = { - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x, - clear_rect->rect.offset.y, - }, - .color = clear_value, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y, - }, - .color = clear_value, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y + clear_rect->rect.extent.height, - }, - .color = clear_value, - }, - }; - - struct anv_state state = - anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = &device->dynamic_state_block_pool.bo, - .offset = state.offset, - }; - - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - - ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, - (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, - (VkDeviceSize[]) { 0 }); - - if (cmd_buffer->state.pipeline != pipeline) { - ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - } - - ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); -} - - -static void -build_depthstencil_shader(struct nir_shader **out_vs) -{ - nir_builder vs_b; - - nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); - - vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); - - const struct glsl_type *position_type = glsl_vec4_type(); - - nir_variable *vs_in_pos = - nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, - "a_position"); - vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; - - nir_variable *vs_out_pos = - nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, - "gl_Position"); - vs_out_pos->data.location = VARYING_SLOT_POS; - - nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); - - *out_vs = vs_b.shader; -} - -static VkResult -create_depthstencil_pipeline(struct anv_device *device, - VkImageAspectFlags aspects, - uint32_t samples, - struct anv_pipeline **pipeline) -{ - struct nir_shader *vs_nir; - - build_depthstencil_shader(&vs_nir); - - const VkPipelineVertexInputStateCreateInfo vi_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct depthstencil_clear_vattrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 2, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct depthstencil_clear_vattrs, position), - }, - }, - }; - - const VkPipelineDepthStencilStateCreateInfo ds_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), - .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), - .depthBoundsTestEnable = false, - .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), - .front = { - .passOp = VK_STENCIL_OP_REPLACE, - .compareOp = VK_COMPARE_OP_ALWAYS, - .writeMask = UINT32_MAX, - .reference = 0, /* dynamic */ - }, - .back = { 0 /* dont care */ }, - }; - - const VkPipelineColorBlendStateCreateInfo cb_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = false, - .attachmentCount = 0, - .pAttachments = NULL, - }; - - return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, - &cb_state, &device->meta_state.alloc, - /*use_repclear*/ true, pipeline); -} - -static void -emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_meta_state *meta_state = &device->meta_state; - const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const uint32_t pass_att = subpass->depth_stencil_attachment; - const struct anv_image_view *iview = fb->attachments[pass_att]; - const uint32_t samples = iview->image->samples; - const uint32_t samples_log2 = ffs(samples) - 1; - VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; - VkImageAspectFlags aspects = clear_att->aspectMask; - - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - - assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); - assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || - aspects == VK_IMAGE_ASPECT_STENCIL_BIT || - aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - assert(pass_att != VK_ATTACHMENT_UNUSED); - - const struct depthstencil_clear_vattrs vertex_data[3] = { - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x, - clear_rect->rect.offset.y, - }, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y, - }, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y + clear_rect->rect.extent.height, - }, - }, - }; - - struct anv_state state = - anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = &device->dynamic_state_block_pool.bo, - .offset = state.offset, - }; - - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - - /* Ignored when clearing only stencil. */ - .minDepth = clear_value.depth, - .maxDepth = clear_value.depth, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, - clear_value.stencil); - } - - ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, - (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, - (VkDeviceSize[]) { 0 }); - - struct anv_pipeline *pipeline; - switch (aspects) { - case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; - break; - case VK_IMAGE_ASPECT_DEPTH_BIT: - pipeline = meta_state->clear[samples_log2].depth_only_pipeline; - break; - case VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; - break; - default: - unreachable("expected depth or stencil aspect"); - } - - if (cmd_buffer->state.pipeline != pipeline) { - ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - anv_pipeline_to_handle(pipeline)); - } - - ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); -} - -VkResult -anv_device_init_meta_clear_state(struct anv_device *device) -{ - VkResult res; - struct anv_meta_state *state = &device->meta_state; - - zero(device->meta_state.clear); - - for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { - uint32_t samples = 1 << i; - - for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { - res = create_color_pipeline(device, samples, /* frag_output */ j, - &state->clear[i].color_pipelines[j]); - if (res != VK_SUCCESS) - goto fail; - } - - res = create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_DEPTH_BIT, samples, - &state->clear[i].depth_only_pipeline); - if (res != VK_SUCCESS) - goto fail; - - res = create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_STENCIL_BIT, samples, - &state->clear[i].stencil_only_pipeline); - if (res != VK_SUCCESS) - goto fail; - - res = create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT, samples, - &state->clear[i].depthstencil_pipeline); - if (res != VK_SUCCESS) - goto fail; - } - - return VK_SUCCESS; - -fail: - anv_device_finish_meta_clear_state(device); - return res; -} - -/** - * The parameters mean that same as those in vkCmdClearAttachments. - */ -static void -emit_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) -{ - if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - emit_color_clear(cmd_buffer, clear_att, clear_rect); - } else { - assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); - } -} - -static bool -subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_cmd_state *cmd_state = &cmd_buffer->state; - uint32_t ds = cmd_state->subpass->depth_stencil_attachment; - - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - uint32_t a = cmd_state->subpass->color_attachments[i]; - if (cmd_state->attachments[a].pending_clear_aspects) { - return true; - } - } - - if (ds != VK_ATTACHMENT_UNUSED && - cmd_state->attachments[ds].pending_clear_aspects) { - return true; - } - - return false; -} - -/** - * Emit any pending attachment clears for the current subpass. - * - * @see anv_attachment_state::pending_clear_aspects - */ -void -anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_cmd_state *cmd_state = &cmd_buffer->state; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_meta_saved_state saved_state; - - if (!subpass_needs_clear(cmd_buffer)) - return; - - meta_clear_begin(&saved_state, cmd_buffer); - - if (cmd_state->framebuffer->layers > 1) - anv_finishme("clearing multi-layer framebuffer"); - - VkClearRect clear_rect = { - .rect = { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - }, - .baseArrayLayer = 0, - .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ - }; - - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - uint32_t a = cmd_state->subpass->color_attachments[i]; - - if (!cmd_state->attachments[a].pending_clear_aspects) - continue; - - assert(cmd_state->attachments[a].pending_clear_aspects == - VK_IMAGE_ASPECT_COLOR_BIT); - - VkClearAttachment clear_att = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .colorAttachment = i, /* Use attachment index relative to subpass */ - .clearValue = cmd_state->attachments[a].clear_value, - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect); - cmd_state->attachments[a].pending_clear_aspects = 0; - } - - uint32_t ds = cmd_state->subpass->depth_stencil_attachment; - - if (ds != VK_ATTACHMENT_UNUSED && - cmd_state->attachments[ds].pending_clear_aspects) { - - VkClearAttachment clear_att = { - .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, - .clearValue = cmd_state->attachments[ds].clear_value, - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect); - cmd_state->attachments[ds].pending_clear_aspects = 0; - } - - meta_clear_end(&saved_state, cmd_buffer); -} - -static void -anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *image, - VkImageLayout image_layout, - const VkClearValue *clear_value, - uint32_t range_count, - const VkImageSubresourceRange *ranges) -{ - VkDevice device_h = anv_device_to_handle(cmd_buffer->device); - - for (uint32_t r = 0; r < range_count; r++) { - const VkImageSubresourceRange *range = &ranges[r]; - - for (uint32_t l = 0; l < range->levelCount; ++l) { - for (uint32_t s = 0; s < range->layerCount; ++s) { - struct anv_image_view iview; - anv_image_view_init(&iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(image), - .viewType = anv_meta_get_view_type(image), - .format = image->vk_format, - .subresourceRange = { - .aspectMask = range->aspectMask, - .baseMipLevel = range->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = range->baseArrayLayer + s, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - VkFramebuffer fb; - anv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&iview), - }, - .width = iview.extent.width, - .height = iview.extent.height, - .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb); - - VkAttachmentDescription att_desc = { - .format = iview.vk_format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = image_layout, - .finalLayout = image_layout, - }; - - VkSubpassDescription subpass_desc = { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 0, - .pColorAttachments = NULL, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = NULL, - .preserveAttachmentCount = 0, - .pPreserveAttachments = NULL, - }; - - const VkAttachmentReference att_ref = { - .attachment = 0, - .layout = image_layout, - }; - - if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - subpass_desc.colorAttachmentCount = 1; - subpass_desc.pColorAttachments = &att_ref; - } else { - subpass_desc.pDepthStencilAttachment = &att_ref; - } - - VkRenderPass pass; - anv_CreateRenderPass(device_h, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &att_desc, - .subpassCount = 1, - .pSubpasses = &subpass_desc, - }, - &cmd_buffer->pool->alloc, - &pass); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderArea = { - .offset = { 0, 0, }, - .extent = { - .width = iview.extent.width, - .height = iview.extent.height, - }, - }, - .renderPass = pass, - .framebuffer = fb, - .clearValueCount = 0, - .pClearValues = NULL, - }, - VK_SUBPASS_CONTENTS_INLINE); - - VkClearAttachment clear_att = { - .aspectMask = range->aspectMask, - .colorAttachment = 0, - .clearValue = *clear_value, - }; - - VkClearRect clear_rect = { - .rect = { - .offset = { 0, 0 }, - .extent = { iview.extent.width, iview.extent.height }, - }, - .baseArrayLayer = range->baseArrayLayer, - .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - ANV_CALL(DestroyRenderPass)(device_h, pass, - &cmd_buffer->pool->alloc); - ANV_CALL(DestroyFramebuffer)(device_h, fb, - &cmd_buffer->pool->alloc); - } - } - } -} - -void anv_CmdClearColorImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, image, image_h); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - anv_cmd_clear_image(cmd_buffer, image, imageLayout, - (const VkClearValue *) pColor, - rangeCount, pRanges); - - meta_clear_end(&saved_state, cmd_buffer); -} - -void anv_CmdClearDepthStencilImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, image, image_h); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - anv_cmd_clear_image(cmd_buffer, image, imageLayout, - (const VkClearValue *) pDepthStencil, - rangeCount, pRanges); - - meta_clear_end(&saved_state, cmd_buffer); -} - -void anv_CmdClearAttachments( - VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment* pAttachments, - uint32_t rectCount, - const VkClearRect* pRects) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - /* FINISHME: We can do better than this dumb loop. It thrashes too much - * state. - */ - for (uint32_t a = 0; a < attachmentCount; ++a) { - for (uint32_t r = 0; r < rectCount; ++r) { - emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); - } - } - - meta_clear_end(&saved_state, cmd_buffer); -} - -static void -do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat fill_format, uint32_t data) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = fill_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }; - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - const VkClearValue clear_value = { - .color = { - .uint32 = { data, data, data, data } - } - }; - - const VkImageSubresourceRange range = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }; - - anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - &clear_value, 1, &range); -} - -void anv_CmdFillBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize fillSize, - uint32_t data) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - VkFormat format; - int bs; - if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; - bs = 16; - } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32_UINT; - bs = 8; - } else { - assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; - bs = 4; - } - - /* This is maximum possible width/height our HW can handle */ - const uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; - while (fillSize > max_fill_size) { - do_buffer_fill(cmd_buffer, dst_buffer->bo, - dst_buffer->offset + dstOffset, - max_surface_dim, max_surface_dim, format, data); - fillSize -= max_fill_size; - dstOffset += max_fill_size; - } - - uint64_t height = fillSize / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - const uint64_t rect_fill_size = height * max_surface_dim * bs; - do_buffer_fill(cmd_buffer, dst_buffer->bo, - dst_buffer->offset + dstOffset, - max_surface_dim, height, format, data); - fillSize -= rect_fill_size; - dstOffset += rect_fill_size; - } - - if (fillSize != 0) { - do_buffer_fill(cmd_buffer, dst_buffer->bo, - dst_buffer->offset + dstOffset, - fillSize / bs, 1, format, data); - } - - meta_clear_end(&saved_state, cmd_buffer); -} diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c deleted file mode 100644 index ea5020c5f24..00000000000 --- a/src/vulkan/anv_meta_resolve.c +++ /dev/null @@ -1,867 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "anv_meta.h" -#include "anv_private.h" -#include "nir/nir_builder.h" - -/** - * Vertex attributes used by all pipelines. - */ -struct vertex_attrs { - struct anv_vue_header vue_header; - float position[2]; /**< 3DPRIM_RECTLIST */ - float tex_position[2]; -}; - -static void -meta_resolve_save(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR)); - - cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; -} - -static void -meta_resolve_restore(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static VkPipeline * -get_pipeline_h(struct anv_device *device, uint32_t samples) -{ - uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ - - assert(samples >= 2); - assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); - - return &device->meta_state.resolve.pipelines[i]; -} - -static nir_shader * -build_nir_vs(void) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - - nir_builder b; - nir_variable *a_position; - nir_variable *v_position; - nir_variable *a_tex_position; - nir_variable *v_tex_position; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); - - a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "a_position"); - a_position->data.location = VERT_ATTRIB_GENERIC0; - - v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "gl_Position"); - v_position->data.location = VARYING_SLOT_POS; - - a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "a_tex_position"); - a_tex_position->data.location = VERT_ATTRIB_GENERIC1; - - v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "v_tex_position"); - v_tex_position->data.location = VARYING_SLOT_VAR0; - - nir_copy_var(&b, v_position, a_position); - nir_copy_var(&b, v_tex_position, a_tex_position); - - return b.shader; -} - -static nir_shader * -build_nir_fs(uint32_t num_samples) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - - const struct glsl_type *sampler2DMS = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, - /*is_shadow*/ false, - /*is_array*/ false, - GLSL_TYPE_FLOAT); - - nir_builder b; - nir_variable *u_tex; /* uniform sampler */ - nir_variable *v_position; /* vec4, varying fragment position */ - nir_variable *v_tex_position; /* vec4, varying texture coordinate */ - nir_variable *f_color; /* vec4, fragment output color */ - nir_ssa_def *accum; /* vec4, accumulation of sample values */ - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_asprintf(b.shader, - "meta_resolve_fs_samples%02d", - num_samples); - - u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, - "u_tex"); - u_tex->data.descriptor_set = 0; - u_tex->data.binding = 0; - - v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "v_position"); - v_position->data.location = VARYING_SLOT_POS; - v_position->data.origin_upper_left = true; - - v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "v_tex_position"); - v_tex_position->data.location = VARYING_SLOT_VAR0; - - f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "f_color"); - f_color->data.location = FRAG_RESULT_DATA0; - - accum = nir_imm_vec4(&b, 0, 0, 0, 0); - - nir_ssa_def *tex_position_ivec = - nir_f2i(&b, nir_load_var(&b, v_tex_position)); - - for (uint32_t i = 0; i < num_samples; ++i) { - nir_tex_instr *tex; - - tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); - tex->texture = nir_deref_var_create(tex, u_tex); - tex->sampler = nir_deref_var_create(tex, u_tex); - tex->sampler_dim = GLSL_SAMPLER_DIM_MS; - tex->op = nir_texop_txf_ms; - tex->src[0].src = nir_src_for_ssa(tex_position_ivec); - tex->src[0].src_type = nir_tex_src_coord; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); - tex->src[1].src_type = nir_tex_src_ms_index; - tex->dest_type = nir_type_float; - tex->is_array = false; - tex->coord_components = 3; - nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - accum = nir_fadd(&b, accum, &tex->dest.ssa); - } - - accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); - nir_store_var(&b, f_color, accum, /*writemask*/ 4); - - return b.shader; -} - -static VkResult -create_pass(struct anv_device *device) -{ - VkResult result; - VkDevice device_h = anv_device_to_handle(device); - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - result = anv_CreateRenderPass(device_h, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .samples = 1, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - }, - .preserveAttachmentCount = 0, - .pPreserveAttachments = NULL, - }, - .dependencyCount = 0, - }, - alloc, - &device->meta_state.resolve.pass); - - return result; -} - -static VkResult -create_pipeline(struct anv_device *device, - uint32_t num_samples, - VkShaderModule vs_module_h) -{ - VkResult result; - VkDevice device_h = anv_device_to_handle(device); - - struct anv_shader_module fs_module = { - .nir = build_nir_fs(num_samples), - }; - - if (!fs_module.nir) { - /* XXX: Need more accurate error */ - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto cleanup; - } - - result = anv_graphics_pipeline_create(device_h, - VK_NULL_HANDLE, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = vs_module_h, - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = anv_shader_module_to_handle(&fs_module), - .pName = "main", - }, - }, - .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct vertex_attrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = offsetof(struct vertex_attrs, vue_header), - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct vertex_attrs, position), - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct vertex_attrs, tex_position), - }, - }, - }, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClampEnable = false, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { 0x1 }, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = false, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT, - }, - }, - }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, - .layout = device->meta_state.resolve.pipeline_layout, - .renderPass = device->meta_state.resolve.pass, - .subpass = 0, - }, - &(struct anv_graphics_pipeline_create_info) { - .color_attachment_count = -1, - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }, - &device->meta_state.alloc, - get_pipeline_h(device, num_samples)); - if (result != VK_SUCCESS) - goto cleanup; - - goto cleanup; - -cleanup: - ralloc_free(fs_module.nir); - return result; -} - -void -anv_device_finish_meta_resolve_state(struct anv_device *device) -{ - struct anv_meta_state *state = &device->meta_state; - VkDevice device_h = anv_device_to_handle(device); - VkRenderPass pass_h = device->meta_state.resolve.pass; - VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; - VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - if (pass_h) - ANV_CALL(DestroyRenderPass)(device_h, pass_h, - &device->meta_state.alloc); - - if (pipeline_layout_h) - ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); - - if (ds_layout_h) - ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); - - for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { - VkPipeline pipeline_h = state->resolve.pipelines[i]; - - if (pipeline_h) { - ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); - } - } -} - -VkResult -anv_device_init_meta_resolve_state(struct anv_device *device) -{ - VkResult res = VK_SUCCESS; - VkDevice device_h = anv_device_to_handle(device); - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - const isl_sample_count_mask_t sample_count_mask = - isl_device_get_sample_counts(&device->isl_dev); - - zero(device->meta_state.resolve); - - struct anv_shader_module vs_module = { .nir = build_nir_vs() }; - if (!vs_module.nir) { - /* XXX: Need more accurate error */ - res = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } - - VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); - - res = anv_CreateDescriptorSetLayout(device_h, - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - }, - }, - }, - alloc, - &device->meta_state.resolve.ds_layout); - if (res != VK_SUCCESS) - goto fail; - - res = anv_CreatePipelineLayout(device_h, - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = (VkDescriptorSetLayout[]) { - device->meta_state.resolve.ds_layout, - }, - }, - alloc, - &device->meta_state.resolve.pipeline_layout); - if (res != VK_SUCCESS) - goto fail; - - res = create_pass(device); - if (res != VK_SUCCESS) - goto fail; - - for (uint32_t i = 0; - i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { - - uint32_t sample_count = 1 << (1 + i); - if (!(sample_count_mask & sample_count)) - continue; - - res = create_pipeline(device, sample_count, vs_module_h); - if (res != VK_SUCCESS) - goto fail; - } - - goto cleanup; - -fail: - anv_device_finish_meta_resolve_state(device); - -cleanup: - ralloc_free(vs_module.nir); - - return res; -} - -static void -emit_resolve(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - const VkOffset2D *src_offset, - struct anv_image_view *dest_iview, - const VkOffset2D *dest_offset, - const VkExtent2D *resolve_extent) -{ - struct anv_device *device = cmd_buffer->device; - VkDevice device_h = anv_device_to_handle(device); - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_image *src_image = src_iview->image; - VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; - - const struct vertex_attrs vertex_data[3] = { - { - .vue_header = {0}, - .position = { - dest_offset->x + resolve_extent->width, - dest_offset->y + resolve_extent->height, - }, - .tex_position = { - src_offset->x + resolve_extent->width, - src_offset->y + resolve_extent->height, - }, - }, - { - .vue_header = {0}, - .position = { - dest_offset->x, - dest_offset->y + resolve_extent->height, - }, - .tex_position = { - src_offset->x, - src_offset->y + resolve_extent->height, - }, - }, - { - .vue_header = {0}, - .position = { - dest_offset->x, - dest_offset->y, - }, - .tex_position = { - src_offset->x, - src_offset->y, - }, - }, - }; - - struct anv_state vertex_mem = - anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, - sizeof(vertex_data), 16); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, - .offset = vertex_mem.offset, - }; - - VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); - - anv_CmdBindVertexBuffers(cmd_buffer_h, - /*firstBinding*/ 0, - /*bindingCount*/ 1, - (VkBuffer[]) { vertex_buffer_h }, - (VkDeviceSize[]) { 0 }); - - VkSampler sampler_h; - ANV_CALL(CreateSampler)(device_h, - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_FILTER_NEAREST, - .minFilter = VK_FILTER_NEAREST, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .mipLodBias = 0.0, - .anisotropyEnable = false, - .compareEnable = false, - .minLod = 0.0, - .maxLod = 0.0, - .unnormalizedCoordinates = false, - }, - &cmd_buffer->pool->alloc, - &sampler_h); - - VkDescriptorSet desc_set_h; - anv_AllocateDescriptorSets(device_h, - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool_h, - .descriptorSetCount = 1, - .pSetLayouts = (VkDescriptorSetLayout[]) { - device->meta_state.resolve.ds_layout, - }, - }, - &desc_set_h); - - ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); - - anv_UpdateDescriptorSets(device_h, - /*writeCount*/ 1, - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = desc_set_h, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = sampler_h, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }, - }, - }, - /*copyCount*/ 0, - /*copies */ NULL); - - ANV_CALL(CmdSetViewport)(cmd_buffer_h, - /*firstViewport*/ 0, - /*viewportCount*/ 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, - /*firstScissor*/ 0, - /*scissorCount*/ 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = (VkExtent2D) { fb->width, fb->height }, - }, - }); - - VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); - ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); - - if (cmd_buffer->state.pipeline != pipeline) { - anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - } - - anv_CmdBindDescriptorSets(cmd_buffer_h, - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.resolve.pipeline_layout, - /*firstSet*/ 0, - /* setCount */ 1, - (VkDescriptorSet[]) { - desc_set_h, - }, - /*copyCount*/ 0, - /*copies */ NULL); - - ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); - - /* All objects below are consumed by the draw call. We may safely destroy - * them. - */ - anv_descriptor_set_destroy(device, desc_set); - anv_DestroySampler(device_h, sampler_h, - &cmd_buffer->pool->alloc); -} - -void anv_CmdResolveImage( - VkCommandBuffer cmd_buffer_h, - VkImage src_image_h, - VkImageLayout src_image_layout, - VkImage dest_image_h, - VkImageLayout dest_image_layout, - uint32_t region_count, - const VkImageResolve* regions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); - ANV_FROM_HANDLE(anv_image, src_image, src_image_h); - ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); - struct anv_device *device = cmd_buffer->device; - struct anv_meta_saved_state state; - VkDevice device_h = anv_device_to_handle(device); - - meta_resolve_save(&state, cmd_buffer); - - assert(src_image->samples > 1); - assert(dest_image->samples == 1); - - if (src_image->samples >= 16) { - /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the - * glBlitFramebuffer workaround for samples >= 16. - */ - anv_finishme("vkCmdResolveImage: need interpolation workaround when " - "samples >= 16"); - } - - if (src_image->array_size > 1) - anv_finishme("vkCmdResolveImage: multisample array images"); - - for (uint32_t r = 0; r < region_count; ++r) { - const VkImageResolve *region = ®ions[r]; - - /* From the Vulkan 1.0 spec: - * - * - The aspectMask member of srcSubresource and dstSubresource must - * only contain VK_IMAGE_ASPECT_COLOR_BIT - * - * - The layerCount member of srcSubresource and dstSubresource must - * match - */ - assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(region->srcSubresource.layerCount == - region->dstSubresource.layerCount); - - const uint32_t src_base_layer = - anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, - ®ion->srcOffset); - - const uint32_t dest_base_layer = - anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, - ®ion->dstOffset); - - for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; - ++layer) { - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image_h, - .viewType = anv_meta_get_view_type(src_image), - .format = src_image->format->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = region->srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = src_base_layer + layer, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image_h, - .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->format->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = region->dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_layer + layer, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - VkFramebuffer fb_h; - anv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&dest_iview), - }, - .width = anv_minify(dest_image->extent.width, - region->dstSubresource.mipLevel), - .height = anv_minify(dest_image->extent.height, - region->dstSubresource.mipLevel), - .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb_h); - - ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.resolve.pass, - .framebuffer = fb_h, - .renderArea = { - .offset = { - region->dstOffset.x, - region->dstOffset.y, - }, - .extent = { - region->extent.width, - region->extent.height, - } - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, - VK_SUBPASS_CONTENTS_INLINE); - - emit_resolve(cmd_buffer, - &src_iview, - &(VkOffset2D) { - .x = region->srcOffset.x, - .y = region->srcOffset.y, - }, - &dest_iview, - &(VkOffset2D) { - .x = region->dstOffset.x, - .y = region->dstOffset.y, - }, - &(VkExtent2D) { - .width = region->extent.width, - .height = region->extent.height, - }); - - ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); - - anv_DestroyFramebuffer(device_h, fb_h, - &cmd_buffer->pool->alloc); - } - } - - meta_resolve_restore(&state, cmd_buffer); -} - -/** - * Emit any needed resolves for the current subpass. - */ -void -anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_meta_saved_state saved_state; - - /* FINISHME(perf): Skip clears for resolve attachments. - * - * From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a resolve - * attachment, then the loadOp is effectively ignored as the resolve is - * guaranteed to overwrite all pixels in the render area. - */ - - if (!subpass->has_resolve) - return; - - meta_resolve_save(&saved_state, cmd_buffer); - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - uint32_t src_att = subpass->color_attachments[i]; - uint32_t dest_att = subpass->resolve_attachments[i]; - - if (dest_att == VK_ATTACHMENT_UNUSED) - continue; - - struct anv_image_view *src_iview = fb->attachments[src_att]; - struct anv_image_view *dest_iview = fb->attachments[dest_att]; - - struct anv_subpass resolve_subpass = { - .color_count = 1, - .color_attachments = (uint32_t[]) { dest_att }, - .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, - }; - - anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); - - /* Subpass resolves must respect the render area. We can ignore the - * render area here because vkCmdBeginRenderPass set the render area - * with 3DSTATE_DRAWING_RECTANGLE. - * - * XXX(chadv): Does the hardware really respect - * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? - */ - emit_resolve(cmd_buffer, - src_iview, - &(VkOffset2D) { 0, 0 }, - dest_iview, - &(VkOffset2D) { 0, 0 }, - &(VkExtent2D) { fb->width, fb->height }); - } - - cmd_buffer->state.subpass = subpass; - meta_resolve_restore(&saved_state, cmd_buffer); -} diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h deleted file mode 100644 index a7ea3eb0e28..00000000000 --- a/src/vulkan/anv_nir.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "nir/nir.h" -#include "anv_private.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); - -void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data); -void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c deleted file mode 100644 index e71a8ffb1f4..00000000000 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_nir.h" -#include "nir/nir_builder.h" - -struct apply_dynamic_offsets_state { - nir_shader *shader; - nir_builder builder; - - const struct anv_pipeline_layout *layout; - - uint32_t indices_start; -}; - -static bool -apply_dynamic_offsets_block(nir_block *block, void *void_state) -{ - struct apply_dynamic_offsets_state *state = void_state; - struct anv_descriptor_set_layout *set_layout; - - nir_builder *b = &state->builder; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - unsigned block_idx_src; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - block_idx_src = 0; - break; - case nir_intrinsic_store_ssbo: - block_idx_src = 1; - break; - default: - continue; /* the loop */ - } - - nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; - assert(res_instr->type == nir_instr_type_intrinsic); - nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); - assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); - - unsigned set = res_intrin->const_index[0]; - unsigned binding = res_intrin->const_index[1]; - - set_layout = state->layout->set[set].layout; - if (set_layout->binding[binding].dynamic_offset_index < 0) - continue; - - b->cursor = nir_before_instr(&intrin->instr); - - /* First, we need to generate the uniform load for the buffer offset */ - uint32_t index = state->layout->set[set].dynamic_offset_start + - set_layout->binding[binding].dynamic_offset_index; - - nir_intrinsic_instr *offset_load = - nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); - offset_load->num_components = 2; - offset_load->const_index[0] = state->indices_start + index * 8; - offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, - nir_imm_int(b, 8))); - - nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); - nir_builder_instr_insert(b, &offset_load->instr); - - nir_src *offset_src = nir_get_io_offset_src(intrin); - nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, - &offset_load->dest.ssa); - - /* In order to avoid out-of-bounds access, we predicate */ - nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), - offset_src->ssa); - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(pred); - nir_cf_node_insert(b->cursor, &if_stmt->cf_node); - - nir_instr_remove(&intrin->instr); - *offset_src = nir_src_for_ssa(new_offset); - nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); - - if (intrin->intrinsic != nir_intrinsic_store_ssbo) { - /* It's a load, we need a phi node */ - nir_phi_instr *phi = nir_phi_instr_create(b->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - intrin->num_components, NULL); - - nir_phi_src *src1 = ralloc(phi, nir_phi_src); - struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); - src1->pred = exec_node_data(nir_block, tnode, cf_node.node); - src1->src = nir_src_for_ssa(&intrin->dest.ssa); - exec_list_push_tail(&phi->srcs, &src1->node); - - b->cursor = nir_after_cf_list(&if_stmt->else_list); - nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, - (nir_const_value) { .u = { 0, 0, 0, 0 } }); - - nir_phi_src *src2 = ralloc(phi, nir_phi_src); - struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); - src2->pred = exec_node_data(nir_block, enode, cf_node.node); - src2->src = nir_src_for_ssa(zero); - exec_list_push_tail(&phi->srcs, &src2->node); - - assert(intrin->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&phi->dest.ssa)); - - nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); - } - } - - return true; -} - -void -anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - struct apply_dynamic_offsets_state state = { - .shader = shader, - .layout = pipeline->layout, - .indices_start = shader->num_uniforms, - }; - - if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) - return; - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_builder_init(&state.builder, function->impl); - nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - } - - struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { - prog_data->param[i * 2 + shader->num_uniforms] = - (const union gl_constant_value *)&null_data->dynamic[i].offset; - prog_data->param[i * 2 + 1 + shader->num_uniforms] = - (const union gl_constant_value *)&null_data->dynamic[i].range; - } - - shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; -} diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c deleted file mode 100644 index c58a93878ee..00000000000 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_nir.h" -#include "program/prog_parameter.h" -#include "nir/nir_builder.h" - -struct apply_pipeline_layout_state { - nir_shader *shader; - nir_builder builder; - - struct { - BITSET_WORD *used; - uint8_t *surface_offsets; - uint8_t *sampler_offsets; - uint8_t *image_offsets; - } set[MAX_SETS]; -}; - -static void -add_binding(struct apply_pipeline_layout_state *state, - uint32_t set, uint32_t binding) -{ - BITSET_SET(state->set[set].used, binding); -} - -static void -add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) -{ - add_binding(state, var->data.descriptor_set, var->data.binding); -} - -static bool -get_used_bindings_block(nir_block *block, void *void_state) -{ - struct apply_pipeline_layout_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_vulkan_resource_index: - add_binding(state, nir_intrinsic_desc_set(intrin), - nir_intrinsic_binding(intrin)); - break; - - case nir_intrinsic_image_load: - case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_min: - case nir_intrinsic_image_atomic_max: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: - case nir_intrinsic_image_size: - case nir_intrinsic_image_samples: - add_var_binding(state, intrin->variables[0]->var); - break; - - default: - break; - } - break; - } - case nir_instr_type_tex: { - nir_tex_instr *tex = nir_instr_as_tex(instr); - assert(tex->texture); - add_var_binding(state, tex->texture->var); - if (tex->sampler) - add_var_binding(state, tex->sampler->var); - break; - } - default: - continue; - } - } - - return true; -} - -static void -lower_res_index_intrinsic(nir_intrinsic_instr *intrin, - struct apply_pipeline_layout_state *state) -{ - nir_builder *b = &state->builder; - - b->cursor = nir_before_instr(&intrin->instr); - - uint32_t set = nir_intrinsic_desc_set(intrin); - uint32_t binding = nir_intrinsic_binding(intrin); - - uint32_t surface_index = state->set[set].surface_offsets[binding]; - - nir_const_value *const_block_idx = - nir_src_as_const_value(intrin->src[0]); - - nir_ssa_def *block_index; - if (const_block_idx) { - block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); - } else { - block_index = nir_iadd(b, nir_imm_int(b, surface_index), - nir_ssa_for_src(b, intrin->src[0], 1)); - } - - assert(intrin->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); - nir_instr_remove(&intrin->instr); -} - -static void -lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, - unsigned *const_index, nir_tex_src_type src_type, - struct apply_pipeline_layout_state *state) -{ - if (deref->deref.child) { - assert(deref->deref.child->deref_type == nir_deref_type_array); - nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); - - *const_index += deref_array->base_offset; - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, - tex->num_srcs + 1); - - for (unsigned i = 0; i < tex->num_srcs; i++) { - new_srcs[i].src_type = tex->src[i].src_type; - nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); - } - - ralloc_free(tex->src); - tex->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - tex->src[tex->num_srcs].src_type = src_type; - tex->num_srcs++; - assert(deref_array->indirect.is_ssa); - nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, - deref_array->indirect); - } - } -} - -static void -cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) -{ - if (deref->deref.child == NULL) - return; - - nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); - - if (deref_array->deref_array_type != nir_deref_array_type_indirect) - return; - - nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); -} - -static void -lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) -{ - /* No one should have come by and lowered it already */ - assert(tex->texture); - - unsigned set = tex->texture->var->data.descriptor_set; - unsigned binding = tex->texture->var->data.binding; - tex->texture_index = state->set[set].surface_offsets[binding]; - lower_tex_deref(tex, tex->texture, &tex->texture_index, - nir_tex_src_texture_offset, state); - - if (tex->sampler) { - unsigned set = tex->sampler->var->data.descriptor_set; - unsigned binding = tex->sampler->var->data.binding; - tex->sampler_index = state->set[set].surface_offsets[binding]; - lower_tex_deref(tex, tex->sampler, &tex->sampler_index, - nir_tex_src_sampler_offset, state); - } - - /* The backend only ever uses this to mark used surfaces. We don't care - * about that little optimization so it just needs to be non-zero. - */ - tex->texture_array_size = 1; - - cleanup_tex_deref(tex, tex->texture); - if (tex->sampler) - cleanup_tex_deref(tex, tex->sampler); - tex->texture = NULL; - tex->sampler = NULL; -} - -static bool -apply_pipeline_layout_block(nir_block *block, void *void_state) -{ - struct apply_pipeline_layout_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { - lower_res_index_intrinsic(intrin, state); - } - break; - } - case nir_instr_type_tex: - lower_tex(nir_instr_as_tex(instr), state); - break; - default: - continue; - } - } - - return true; -} - -static void -setup_vec4_uniform_value(const union gl_constant_value **params, - const union gl_constant_value *values, - unsigned n) -{ - static const gl_constant_value zero = { 0 }; - - for (unsigned i = 0; i < n; ++i) - params[i] = &values[i]; - - for (unsigned i = n; i < 4; ++i) - params[i] = &zero; -} - -void -anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - struct anv_pipeline_layout *layout = pipeline->layout; - - struct apply_pipeline_layout_state state = { - .shader = shader, - }; - - void *mem_ctx = ralloc_context(NULL); - - for (unsigned s = 0; s < layout->num_sets; s++) { - const unsigned count = layout->set[s].layout->binding_count; - const unsigned words = BITSET_WORDS(count); - state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); - state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); - state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); - state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); - } - - nir_foreach_function(shader, function) { - if (function->impl) - nir_foreach_block(function->impl, get_used_bindings_block, &state); - } - - struct anv_pipeline_bind_map map = { - .surface_count = 0, - .sampler_count = 0, - }; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - - BITSET_WORD b, _tmp; - BITSET_FOREACH_SET(b, _tmp, state.set[set].used, - set_layout->binding_count) { - if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) - map.surface_count += set_layout->binding[b].array_size; - if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) - map.sampler_count += set_layout->binding[b].array_size; - if (set_layout->binding[b].stage[shader->stage].image_index >= 0) - map.image_count += set_layout->binding[b].array_size; - } - } - - map.surface_to_descriptor = - malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); - map.sampler_to_descriptor = - malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); - - pipeline->bindings[shader->stage] = map; - - unsigned surface = 0; - unsigned sampler = 0; - unsigned image = 0; - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - - BITSET_WORD b, _tmp; - BITSET_FOREACH_SET(b, _tmp, state.set[set].used, - set_layout->binding_count) { - unsigned array_size = set_layout->binding[b].array_size; - unsigned set_offset = set_layout->binding[b].descriptor_index; - - if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { - state.set[set].surface_offsets[b] = surface; - for (unsigned i = 0; i < array_size; i++) { - map.surface_to_descriptor[surface + i].set = set; - map.surface_to_descriptor[surface + i].offset = set_offset + i; - } - surface += array_size; - } - - if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { - state.set[set].sampler_offsets[b] = sampler; - for (unsigned i = 0; i < array_size; i++) { - map.sampler_to_descriptor[sampler + i].set = set; - map.sampler_to_descriptor[sampler + i].offset = set_offset + i; - } - sampler += array_size; - } - - if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { - state.set[set].image_offsets[b] = image; - image += array_size; - } - } - } - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_builder_init(&state.builder, function->impl); - nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - } - - if (map.image_count > 0) { - nir_foreach_variable(var, &shader->uniforms) { - if (glsl_type_is_image(var->type) || - (glsl_type_is_array(var->type) && - glsl_type_is_image(glsl_get_array_element(var->type)))) { - /* Images are represented as uniform push constants and the actual - * information required for reading/writing to/from the image is - * storred in the uniform. - */ - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned image_index = state.set[set].image_offsets[binding]; - - var->data.driver_location = shader->num_uniforms + - image_index * BRW_IMAGE_PARAM_SIZE * 4; - } - } - - struct anv_push_constants *null_data = NULL; - const gl_constant_value **param = prog_data->param + shader->num_uniforms; - const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map.image_count; i++) { - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const union gl_constant_value *)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const union gl_constant_value *)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const union gl_constant_value *)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const union gl_constant_value *)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (const union gl_constant_value *)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const union gl_constant_value *)image_param->swizzling, 2); - - param += BRW_IMAGE_PARAM_SIZE; - image_param ++; - } - - shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; - } -} diff --git a/src/vulkan/anv_nir_lower_push_constants.c b/src/vulkan/anv_nir_lower_push_constants.c deleted file mode 100644 index 53cd3d73793..00000000000 --- a/src/vulkan/anv_nir_lower_push_constants.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_nir.h" - -struct lower_push_constants_state { - nir_shader *shader; - bool is_scalar; -}; - -static bool -lower_push_constants_block(nir_block *block, void *void_state) -{ - struct lower_push_constants_state *state = void_state; - - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - /* TODO: Handle indirect push constants */ - if (intrin->intrinsic != nir_intrinsic_load_push_constant) - continue; - - /* This wont work for vec4 stages. */ - assert(state->is_scalar); - - assert(intrin->const_index[0] % 4 == 0); - assert(intrin->const_index[1] == 128); - - /* We just turn them into uniform loads with the appropreate offset */ - intrin->intrinsic = nir_intrinsic_load_uniform; - } - - return true; -} - -void -anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) -{ - struct lower_push_constants_state state = { - .shader = shader, - .is_scalar = is_scalar, - }; - - nir_foreach_function(shader, function) { - if (function->impl) - nir_foreach_block(function->impl, lower_push_constants_block, &state); - } - - assert(shader->num_uniforms % 4 == 0); - if (is_scalar) - shader->num_uniforms /= 4; - else - shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); -} diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c deleted file mode 100644 index d07e9fec6cc..00000000000 --- a/src/vulkan/anv_pass.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -VkResult anv_CreateRenderPass( - VkDevice _device, - const VkRenderPassCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkRenderPass* pRenderPass) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_render_pass *pass; - size_t size; - size_t attachments_offset; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - - size = sizeof(*pass); - size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); - attachments_offset = size; - size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); - - pass = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Clear the subpasses along with the parent pass. This required because - * each array member of anv_subpass must be a valid pointer if not NULL. - */ - memset(pass, 0, size); - pass->attachment_count = pCreateInfo->attachmentCount; - pass->subpass_count = pCreateInfo->subpassCount; - pass->attachments = (void *) pass + attachments_offset; - - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - struct anv_render_pass_attachment *att = &pass->attachments[i]; - - att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); - att->samples = pCreateInfo->pAttachments[i].samples; - att->load_op = pCreateInfo->pAttachments[i].loadOp; - att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - } - - uint32_t subpass_attachment_count = 0, *p; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - - subpass_attachment_count += - desc->inputAttachmentCount + - desc->colorAttachmentCount + - /* Count colorAttachmentCount again for resolve_attachments */ - desc->colorAttachmentCount; - } - - pass->subpass_attachments = - anv_alloc2(&device->alloc, pAllocator, - subpass_attachment_count * sizeof(uint32_t), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass->subpass_attachments == NULL) { - anv_free2(&device->alloc, pAllocator, pass); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - p = pass->subpass_attachments; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - struct anv_subpass *subpass = &pass->subpasses[i]; - - subpass->input_count = desc->inputAttachmentCount; - subpass->color_count = desc->colorAttachmentCount; - - if (desc->inputAttachmentCount > 0) { - subpass->input_attachments = p; - p += desc->inputAttachmentCount; - - for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] - = desc->pInputAttachments[j].attachment; - } - } - - if (desc->colorAttachmentCount > 0) { - subpass->color_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] - = desc->pColorAttachments[j].attachment; - } - } - - subpass->has_resolve = false; - if (desc->pResolveAttachments) { - subpass->resolve_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] = a; - if (a != VK_ATTACHMENT_UNUSED) - subpass->has_resolve = true; - } - } - - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = - desc->pDepthStencilAttachment->attachment; - } else { - subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; - } - } - - *pRenderPass = anv_render_pass_to_handle(pass); - - return VK_SUCCESS; -} - -void anv_DestroyRenderPass( - VkDevice _device, - VkRenderPass _pass, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - - anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); - anv_free2(&device->alloc, pAllocator, pass); -} - -void anv_GetRenderAreaGranularity( - VkDevice device, - VkRenderPass renderPass, - VkExtent2D* pGranularity) -{ - *pGranularity = (VkExtent2D) { 1, 1 }; -} diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c deleted file mode 100644 index a7feefb540e..00000000000 --- a/src/vulkan/anv_pipeline.c +++ /dev/null @@ -1,1278 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "util/mesa-sha1.h" -#include "anv_private.h" -#include "brw_nir.h" -#include "anv_nir.h" -#include "nir/spirv/nir_spirv.h" - -/* Needed for SWIZZLE macros */ -#include "program/prog_instruction.h" - -// Shader functions - -VkResult anv_CreateShaderModule( - VkDevice _device, - const VkShaderModuleCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkShaderModule* pShaderModule) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_shader_module *module; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - module = anv_alloc2(&device->alloc, pAllocator, - sizeof(*module) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (module == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - module->nir = NULL; - module->size = pCreateInfo->codeSize; - memcpy(module->data, pCreateInfo->pCode, module->size); - - _mesa_sha1_compute(module->data, module->size, module->sha1); - - *pShaderModule = anv_shader_module_to_handle(module); - - return VK_SUCCESS; -} - -void anv_DestroyShaderModule( - VkDevice _device, - VkShaderModule _module, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader_module, module, _module); - - anv_free2(&device->alloc, pAllocator, module); -} - -#define SPIR_V_MAGIC_NUMBER 0x07230203 - -/* Eventually, this will become part of anv_CreateShader. Unfortunately, - * we can't do that yet because we don't have the ability to copy nir. - */ -static nir_shader * -anv_shader_compile_to_nir(struct anv_device *device, - struct anv_shader_module *module, - const char *entrypoint_name, - gl_shader_stage stage, - const VkSpecializationInfo *spec_info) -{ - if (strcmp(entrypoint_name, "main") != 0) { - anv_finishme("Multiple shaders per module not really supported"); - } - - const struct brw_compiler *compiler = - device->instance->physicalDevice.compiler; - const nir_shader_compiler_options *nir_options = - compiler->glsl_compiler_options[stage].NirOptions; - - nir_shader *nir; - nir_function *entry_point; - if (module->nir) { - /* Some things such as our meta clear/blit code will give us a NIR - * shader directly. In that case, we just ignore the SPIR-V entirely - * and just use the NIR shader */ - nir = module->nir; - nir->options = nir_options; - nir_validate_shader(nir); - - assert(exec_list_length(&nir->functions) == 1); - struct exec_node *node = exec_list_get_head(&nir->functions); - entry_point = exec_node_data(nir_function, node, node); - } else { - uint32_t *spirv = (uint32_t *) module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(module->size % 4 == 0); - - uint32_t num_spec_entries = 0; - struct nir_spirv_specialization *spec_entries = NULL; - if (spec_info && spec_info->mapEntryCount > 0) { - num_spec_entries = spec_info->mapEntryCount; - spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); - for (uint32_t i = 0; i < num_spec_entries; i++) { - const uint32_t *data = - spec_info->pData + spec_info->pMapEntries[i].offset; - assert((const void *)(data + 1) <= - spec_info->pData + spec_info->dataSize); - - spec_entries[i].id = spec_info->pMapEntries[i].constantID; - spec_entries[i].data = *data; - } - } - - entry_point = spirv_to_nir(spirv, module->size / 4, - spec_entries, num_spec_entries, - stage, entrypoint_name, nir_options); - nir = entry_point->shader; - assert(nir->stage == stage); - nir_validate_shader(nir); - - free(spec_entries); - - nir_lower_returns(nir); - nir_validate_shader(nir); - - nir_inline_functions(nir); - nir_validate_shader(nir); - - /* Pick off the single entrypoint that we want */ - foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (func != entry_point) - exec_node_remove(&func->node); - } - assert(exec_list_length(&nir->functions) == 1); - entry_point->name = ralloc_strdup(entry_point, "main"); - - nir_remove_dead_variables(nir, nir_var_shader_in); - nir_remove_dead_variables(nir, nir_var_shader_out); - nir_remove_dead_variables(nir, nir_var_system_value); - nir_validate_shader(nir); - - nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); - - nir_lower_system_values(nir); - nir_validate_shader(nir); - } - - /* Vulkan uses the separate-shader linking model */ - nir->info.separate_shader = true; - - nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); - - nir_shader_gather_info(nir, entry_point->impl); - - uint32_t indirect_mask = 0; - if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) - indirect_mask |= (1 << nir_var_shader_in); - if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) - indirect_mask |= 1 << nir_var_local; - - nir_lower_indirect_derefs(nir, indirect_mask); - - return nir; -} - -void anv_DestroyPipeline( - VkDevice _device, - VkPipeline _pipeline, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - anv_reloc_list_finish(&pipeline->batch_relocs, - pAllocator ? pAllocator : &device->alloc); - if (pipeline->blend_state.map) - anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); - anv_free2(&device->alloc, pAllocator, pipeline); -} - -static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ -}; - -static void -populate_sampler_prog_key(const struct brw_device_info *devinfo, - struct brw_sampler_prog_key_data *key) -{ - /* XXX: Handle texture swizzle on HSW- */ - for (int i = 0; i < MAX_SAMPLERS; i++) { - /* Assume color sampler, no swizzling. (Works for BDW+) */ - key->swizzles[i] = SWIZZLE_XYZW; - } -} - -static void -populate_vs_prog_key(const struct brw_device_info *devinfo, - struct brw_vs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); - - /* XXX: Handle vertex input work-arounds */ - - /* XXX: Handle sampler_prog_key */ -} - -static void -populate_gs_prog_key(const struct brw_device_info *devinfo, - struct brw_gs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); -} - -static void -populate_wm_prog_key(const struct brw_device_info *devinfo, - const VkGraphicsPipelineCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra, - struct brw_wm_prog_key *key) -{ - ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); - - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); - - /* TODO: Fill out key->input_slots_valid */ - - /* Vulkan doesn't specify a default */ - key->high_quality_derivatives = false; - - /* XXX Vulkan doesn't appear to specify */ - key->clamp_fragment_color = false; - - /* Vulkan always specifies upper-left coordinates */ - key->drawable_height = 0; - key->render_to_fbo = false; - - if (extra && extra->color_attachment_count >= 0) { - key->nr_color_regions = extra->color_attachment_count; - } else { - key->nr_color_regions = - render_pass->subpasses[info->subpass].color_count; - } - - key->replicate_alpha = key->nr_color_regions > 1 && - info->pMultisampleState && - info->pMultisampleState->alphaToCoverageEnable; - - if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { - /* We should probably pull this out of the shader, but it's fairly - * harmless to compute it and then let dead-code take care of it. - */ - key->persample_shading = info->pMultisampleState->sampleShadingEnable; - if (key->persample_shading) - key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; - - key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; - key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; - } -} - -static void -populate_cs_prog_key(const struct brw_device_info *devinfo, - struct brw_cs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); -} - -static nir_shader * -anv_pipeline_compile(struct anv_pipeline *pipeline, - struct anv_shader_module *module, - const char *entrypoint, - gl_shader_stage stage, - const VkSpecializationInfo *spec_info, - struct brw_stage_prog_data *prog_data) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - - nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, - module, entrypoint, stage, - spec_info); - if (nir == NULL) - return NULL; - - anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); - - /* Figure out the number of parameters */ - prog_data->nr_params = 0; - - if (nir->num_uniforms > 0) { - /* If the shader uses any push constants at all, we'll just give - * them the maximum possible number - */ - prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } - - if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - - if (pipeline->bindings[stage].image_count > 0) - prog_data->nr_params += pipeline->bindings[stage].image_count * - BRW_IMAGE_PARAM_SIZE; - - if (prog_data->nr_params > 0) { - /* XXX: I think we're leaking this */ - prog_data->param = (const union gl_constant_value **) - malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); - - /* We now set the param values to be offsets into a - * anv_push_constant_data structure. Since the compiler doesn't - * actually dereference any of the gl_constant_value pointers in the - * params array, it doesn't really matter what we put here. - */ - struct anv_push_constants *null_data = NULL; - if (nir->num_uniforms > 0) { - /* Fill out the push constants section of the param array */ - for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) - prog_data->param[i] = (const union gl_constant_value *) - &null_data->client_data[i * sizeof(float)]; - } - } - - /* Set up dynamic offsets */ - anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); - - char surface_usage_mask[256], sampler_usage_mask[256]; - zero(surface_usage_mask); - zero(sampler_usage_mask); - - /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ - if (pipeline->layout) - anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); - - /* All binding table offsets provided by apply_pipeline_layout() are - * relative to the start of the bindint table (plus MAX_RTS for VS). - */ - unsigned bias; - switch (stage) { - case MESA_SHADER_FRAGMENT: - bias = MAX_RTS; - break; - case MESA_SHADER_COMPUTE: - bias = 1; - break; - default: - bias = 0; - break; - } - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.ssbo_start = bias; - prog_data->binding_table.image_start = bias; - - /* Finish the optimization and compilation process */ - if (nir->stage != MESA_SHADER_VERTEX && - nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL && - nir->stage != MESA_SHADER_FRAGMENT) { - nir = brw_nir_lower_io(nir, &pipeline->device->info, - compiler->scalar_stage[stage], false, NULL); - } - - /* nir_lower_io will only handle the push constants; we need to set this - * to the full number of possible uniforms. - */ - nir->num_uniforms = prog_data->nr_params * 4; - - return nir; -} - -static void -anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, - gl_shader_stage stage, - struct brw_stage_prog_data *prog_data) -{ - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = 0, - [MESA_SHADER_TESS_EVAL] = 0, - [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, - [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, - }; - - pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= mesa_to_vk_shader_stage(stage); - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; -} - -static VkResult -anv_pipeline_compile_vs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - struct brw_vs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_vs_prog_key(&pipeline->device->info, &key); - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_VERTEX, spec_info, - &prog_data->base.base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - prog_data->inputs_read = nir->info.inputs_read; - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; - - brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, - nir->info.outputs_written, - nir->info.separate_shader); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, false, -1, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - ralloc_free(mem_ctx); - } - - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = kernel; - pipeline->vs_vec4 = NO_KERNEL; - } else { - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = kernel; - } - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - &prog_data->base.base); - - return VK_SUCCESS; -} - -static VkResult -anv_pipeline_compile_gs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; - struct brw_gs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_gs_prog_key(&pipeline->device->info, &key); - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_GEOMETRY, spec_info, - &prog_data->base.base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; - - brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, - nir->info.outputs_written, - nir->info.separate_shader); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, -1, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - /* TODO: SIMD8 GS */ - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - - ralloc_free(mem_ctx); - } - - pipeline->gs_kernel = kernel; - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - &prog_data->base.base); - - return VK_SUCCESS; -} - -static VkResult -anv_pipeline_compile_fs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; - struct brw_wm_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - - if (pipeline->use_repclear) - key.nr_color_regions = 1; - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - prog_data->binding_table.render_target_start = 0; - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_FRAGMENT, spec_info, - &prog_data->base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; - nir_foreach_variable_safe(var, &nir->outputs) { - if (var->data.location < FRAG_RESULT_DATA0) - continue; - - unsigned rt = var->data.location - FRAG_RESULT_DATA0; - if (rt >= key.nr_color_regions) { - var->data.mode = nir_var_local; - exec_node_remove(&var->node); - exec_list_push_tail(&impl->locals, &var->node); - } - } - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - - ralloc_free(mem_ctx); - } - - if (prog_data->no_8) - pipeline->ps_simd8 = NO_KERNEL; - else - pipeline->ps_simd8 = kernel; - - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; - } else { - pipeline->ps_simd16 = NO_KERNEL; - } - - pipeline->ps_ksp2 = 0; - pipeline->ps_grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; - } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; - } - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - &prog_data->base); - - return VK_SUCCESS; -} - -VkResult -anv_pipeline_compile_cs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct brw_cs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_cs_prog_key(&pipeline->device->info, &key); - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - prog_data->binding_table.work_groups_start = 0; - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_COMPUTE, spec_info, - &prog_data->base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - prog_data->base.total_shared = nir->num_shared; - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, - -1, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - ralloc_free(mem_ctx); - } - - pipeline->cs_simd = kernel; - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - &prog_data->base); - - return VK_SUCCESS; -} - -static const int gen8_push_size = 32 * 1024; - -static void -gen7_compute_urb_partition(struct anv_pipeline *pipeline) -{ - const struct brw_device_info *devinfo = &pipeline->device->info; - bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; - unsigned vs_entry_size_bytes = vs_size * 64; - bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; - unsigned gs_entry_size_bytes = gs_size * 64; - - /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): - * - * VS Number of URB Entries must be divisible by 8 if the VS URB Entry - * Allocation Size is less than 9 512-bit URB entries. - * - * Similar text exists for GS. - */ - unsigned vs_granularity = (vs_size < 9) ? 8 : 1; - unsigned gs_granularity = (gs_size < 9) ? 8 : 1; - - /* URB allocations must be done in 8k chunks. */ - unsigned chunk_size_bytes = 8192; - - /* Determine the size of the URB in chunks. */ - unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; - - /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; - unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - - /* Initially, assign each stage the minimum amount of URB space it needs, - * and make a note of how much additional space it "wants" (the amount of - * additional space it could actually make use of). - */ - - /* VS has a lower limit on the number of URB entries */ - unsigned vs_chunks = - ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - unsigned vs_wants = - ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - vs_chunks; - - unsigned gs_chunks = 0; - unsigned gs_wants = 0; - if (gs_present) { - /* There are two constraints on the minimum amount of URB space we can - * allocate: - * - * (1) We need room for at least 2 URB entries, since we always operate - * the GS in DUAL_OBJECT mode. - * - * (2) We can't allocate less than nr_gs_entries_granularity. - */ - gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - gs_wants = - ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - gs_chunks; - } - - /* There should always be enough URB space to satisfy the minimum - * requirements of each stage. - */ - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; - assert(total_needs <= urb_chunks); - - /* Mete out remaining space (if any) in proportion to "wants". */ - unsigned total_wants = vs_wants + gs_wants; - unsigned remaining_space = urb_chunks - total_needs; - if (remaining_space > total_wants) - remaining_space = total_wants; - if (remaining_space > 0) { - unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); - vs_chunks += vs_additional; - remaining_space -= vs_additional; - gs_chunks += remaining_space; - } - - /* Sanity check that we haven't over-allocated. */ - assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); - - /* Finally, compute the number of entries that can fit in the space - * allocated to each stage. - */ - unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; - unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; - - /* Since we rounded up when computing *_wants, this may be slightly more - * than the maximum allowed amount, so correct for that. - */ - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); - - /* Ensure that we program a multiple of the granularity. */ - nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); - nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); - - /* Finally, sanity check to make sure we have at least the minimum number - * of entries needed for each stage. - */ - assert(nr_vs_entries >= devinfo->urb.min_vs_entries); - if (gs_present) - assert(nr_gs_entries >= 2); - - /* Lay out the URB in the following order: - * - push constants - * - VS - * - GS - */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; - - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; -} - -static void -anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo) -{ - anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; - ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); - struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; - - pipeline->dynamic_state = default_dynamic_state; - - if (pCreateInfo->pDynamicState) { - /* Remove all of the states that are marked as dynamic */ - uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; - for (uint32_t s = 0; s < count; s++) - states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); - } - - struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; - - dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; - if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { - typed_memcpy(dynamic->viewport.viewports, - pCreateInfo->pViewportState->pViewports, - pCreateInfo->pViewportState->viewportCount); - } - - dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; - if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { - typed_memcpy(dynamic->scissor.scissors, - pCreateInfo->pViewportState->pScissors, - pCreateInfo->pViewportState->scissorCount); - } - - if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { - assert(pCreateInfo->pRasterizationState); - dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; - } - - if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { - assert(pCreateInfo->pRasterizationState); - dynamic->depth_bias.bias = - pCreateInfo->pRasterizationState->depthBiasConstantFactor; - dynamic->depth_bias.clamp = - pCreateInfo->pRasterizationState->depthBiasClamp; - dynamic->depth_bias.slope = - pCreateInfo->pRasterizationState->depthBiasSlopeFactor; - } - - if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { - assert(pCreateInfo->pColorBlendState); - typed_memcpy(dynamic->blend_constants, - pCreateInfo->pColorBlendState->blendConstants, 4); - } - - /* If there is no depthstencil attachment, then don't read - * pDepthStencilState. The Vulkan spec states that pDepthStencilState may - * be NULL in this case. Even if pDepthStencilState is non-NULL, there is - * no need to override the depthstencil defaults in - * anv_pipeline::dynamic_state when there is no depthstencil attachment. - * - * From the Vulkan spec (20 Oct 2015, git-aa308cb): - * - * pDepthStencilState [...] may only be NULL if renderPass and subpass - * specify a subpass that has no depth/stencil attachment. - */ - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->depth_bounds.min = - pCreateInfo->pDepthStencilState->minDepthBounds; - dynamic->depth_bounds.max = - pCreateInfo->pDepthStencilState->maxDepthBounds; - } - - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_compare_mask.front = - pCreateInfo->pDepthStencilState->front.compareMask; - dynamic->stencil_compare_mask.back = - pCreateInfo->pDepthStencilState->back.compareMask; - } - - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_write_mask.front = - pCreateInfo->pDepthStencilState->front.writeMask; - dynamic->stencil_write_mask.back = - pCreateInfo->pDepthStencilState->back.writeMask; - } - - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_reference.front = - pCreateInfo->pDepthStencilState->front.reference; - dynamic->stencil_reference.back = - pCreateInfo->pDepthStencilState->back.reference; - } - } - - pipeline->dynamic_state_mask = states; -} - -static void -anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) -{ - struct anv_render_pass *renderpass = NULL; - struct anv_subpass *subpass = NULL; - - /* Assert that all required members of VkGraphicsPipelineCreateInfo are - * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section - * 4.2 Graphics Pipeline. - */ - assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - renderpass = anv_render_pass_from_handle(info->renderPass); - assert(renderpass); - - if (renderpass != &anv_meta_dummy_renderpass) { - assert(info->subpass < renderpass->subpass_count); - subpass = &renderpass->subpasses[info->subpass]; - } - - assert(info->stageCount >= 1); - assert(info->pVertexInputState); - assert(info->pInputAssemblyState); - assert(info->pViewportState); - assert(info->pRasterizationState); - - if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) - assert(info->pDepthStencilState); - - if (subpass && subpass->color_count > 0) - assert(info->pColorBlendState); - - for (uint32_t i = 0; i < info->stageCount; ++i) { - switch (info->pStages[i].stage) { - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: - assert(info->pTessellationState); - break; - default: - break; - } - } -} - -VkResult -anv_pipeline_init(struct anv_pipeline *pipeline, - struct anv_device *device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc) -{ - VkResult result; - - anv_validate { - anv_pipeline_validate_create_info(pCreateInfo); - } - - if (alloc == NULL) - alloc = &device->alloc; - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); - if (result != VK_SUCCESS) - return result; - - pipeline->batch.alloc = alloc; - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); - - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - - pipeline->use_repclear = extra && extra->use_repclear; - pipeline->writes_point_size = false; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); - - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_kernel = NO_KERNEL; - pipeline->ps_ksp0 = NO_KERNEL; - - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - ANV_FROM_HANDLE(anv_shader_module, module, - pCreateInfo->pStages[i].module); - - switch (pCreateInfo->pStages[i].stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_GEOMETRY_BIT: - anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - default: - anv_finishme("Unsupported shader stage"); - } - } - - if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { - /* Vertex is only optional if disable_vs is set */ - assert(extra->disable_vs); - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); - } - - gen7_compute_urb_partition(pipeline); - - const VkPipelineVertexInputStateCreateInfo *vi_info = - pCreateInfo->pVertexInputState; - - uint64_t inputs_read; - if (extra && extra->disable_vs) { - /* If the VS is disabled, just assume the user knows what they're - * doing and apply the layout blindly. This can only come from - * meta, so this *should* be safe. - */ - inputs_read = ~0ull; - } else { - inputs_read = pipeline->vs_prog_data.inputs_read; - } - - pipeline->vb_used = 0; - for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &vi_info->pVertexAttributeDescriptions[i]; - - if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) - pipeline->vb_used |= 1 << desc->binding; - } - - for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription *desc = - &vi_info->pVertexBindingDescriptions[i]; - - pipeline->binding_stride[desc->binding] = desc->stride; - - /* Step rate is programmed per vertex element (attribute), not - * binding. Set up a map of which bindings step per instance, for - * reference by vertex element setup. */ - switch (desc->inputRate) { - default: - case VK_VERTEX_INPUT_RATE_VERTEX: - pipeline->instancing_enable[desc->binding] = false; - break; - case VK_VERTEX_INPUT_RATE_INSTANCE: - pipeline->instancing_enable[desc->binding] = true; - break; - } - } - - const VkPipelineInputAssemblyStateCreateInfo *ia_info = - pCreateInfo->pInputAssemblyState; - pipeline->primitive_restart = ia_info->primitiveRestartEnable; - pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; - - if (extra && extra->use_rectlist) - pipeline->topology = _3DPRIM_RECTLIST; - - while (anv_block_pool_size(&device->scratch_block_pool) < - pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - - return VK_SUCCESS; -} - -VkResult -anv_graphics_pipeline_create( - VkDevice _device, - VkPipelineCache _cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - if (cache == NULL) - cache = &device->default_pipeline_cache; - - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - else - return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - case 8: - return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - case 9: - return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - default: - unreachable("unsupported gen\n"); - } -} - -VkResult anv_CreateGraphicsPipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = anv_graphics_pipeline_create(_device, - pipelineCache, - &pCreateInfos[i], - NULL, pAllocator, &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j], pAllocator); - } - - return result; - } - } - - return VK_SUCCESS; -} - -static VkResult anv_compute_pipeline_create( - VkDevice _device, - VkPipelineCache _cache, - const VkComputePipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - if (cache == NULL) - cache = &device->default_pipeline_cache; - - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - else - return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - case 8: - return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - case 9: - return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - default: - unreachable("unsupported gen\n"); - } -} - -VkResult anv_CreateComputePipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = anv_compute_pipeline_create(_device, pipelineCache, - &pCreateInfos[i], - pAllocator, &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j], pAllocator); - } - - return result; - } - } - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_pipeline_cache.c b/src/vulkan/anv_pipeline_cache.c deleted file mode 100644 index c89bb2a2ee1..00000000000 --- a/src/vulkan/anv_pipeline_cache.c +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/mesa-sha1.h" -#include "util/debug.h" -#include "anv_private.h" - -/* Remaining work: - * - * - Compact binding table layout so it's tight and not dependent on - * descriptor set layout. - * - * - Review prog_data struct for size and cacheability: struct - * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 - * bit quantities etc; param, pull_param, and image_params are pointers, we - * just need the compation map. use bit fields for all bools, eg - * dual_src_blend. - */ - -void -anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) -{ - cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); - pthread_mutex_init(&cache->mutex, NULL); - - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->table[0]); - cache->table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. */ - if (cache->table == NULL) - cache->table_size = 0; - else - memset(cache->table, 0xff, byte_size); -} - -void -anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) -{ - anv_state_stream_finish(&cache->program_stream); - pthread_mutex_destroy(&cache->mutex); - free(cache->table); -} - -struct cache_entry { - unsigned char sha1[20]; - uint32_t prog_data_size; - uint32_t kernel_size; - char prog_data[0]; - - /* kernel follows prog_data at next 64 byte aligned address */ -}; - -void -anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - struct mesa_sha1 *ctx; - - ctx = _mesa_sha1_init(); - _mesa_sha1_update(ctx, &key, sizeof(key)); - _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); - _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); - /* hash in shader stage, pipeline layout? */ - if (spec_info) { - _mesa_sha1_update(ctx, spec_info->pMapEntries, - spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); - _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); - } - _mesa_sha1_final(ctx, hash); -} - -uint32_t -anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->table[index]; - - if (offset == ~0) - return NO_KERNEL; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) - memcpy(prog_data, entry->prog_data, entry->prog_data_size); - - const uint32_t preamble_size = - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - - return offset + preamble_size; - } - } - - return NO_KERNEL; -} - -static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) entry->sha1); - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (cache->table[index] == ~0) { - cache->table[index] = entry_offset; - break; - } - } - - /* We don't include the alignment padding bytes when we serialize, so - * don't include taht in the the total size. */ - cache->total_size += - sizeof(*entry) + entry->prog_data_size + entry->kernel_size; - cache->kernel_count++; -} - -static VkResult -anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) -{ - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->table[0]); - uint32_t *table; - uint32_t *old_table = cache->table; - - table = malloc(byte_size); - if (table == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cache->table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->table, 0xff, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - const uint32_t offset = old_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_add_entry(cache, entry, offset); - } - - free(old_table); - - return VK_SUCCESS; -} - -uint32_t -anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, size_t kernel_size, - const void *prog_data, size_t prog_data_size) -{ - pthread_mutex_lock(&cache->mutex); - struct cache_entry *entry; - - /* Meta pipelines don't have SPIR-V, so we can't hash them. - * Consequentally, they just don't get cached. - */ - const uint32_t preamble_size = sha1 ? - align_u32(sizeof(*entry) + prog_data_size, 64) : - 0; - - const uint32_t size = preamble_size + kernel_size; - - assert(size < cache->program_stream.block_pool->block_size); - const struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { - assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); - entry = state.map; - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, prog_data, prog_data_size); - entry->kernel_size = kernel_size; - - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_add_entry(cache, entry, state.offset); - } - - pthread_mutex_unlock(&cache->mutex); - - memcpy(state.map + preamble_size, kernel, kernel_size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - return state.offset + preamble_size; -} - -static void -anv_pipeline_cache_load(struct anv_pipeline_cache *cache, - const void *data, size_t size) -{ - struct anv_device *device = cache->device; - uint8_t uuid[VK_UUID_SIZE]; - struct { - uint32_t device_id; - uint8_t uuid[VK_UUID_SIZE]; - } header; - - if (size < sizeof(header)) - return; - memcpy(&header, data, sizeof(header)); - if (header.device_id != device->chipset_id) - return; - anv_device_get_cache_uuid(uuid); - if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) - return; - - const void *end = data + size; - const void *p = data + sizeof(header); - - while (p < end) { - /* The kernels aren't 64 byte aligned in the serialized format so - * they're always right after the prog_data. - */ - const struct cache_entry *entry = p; - const void *kernel = &entry->prog_data[entry->prog_data_size]; - - anv_pipeline_cache_upload_kernel(cache, entry->sha1, - kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); - p = kernel + entry->kernel_size; - } -} - -VkResult anv_CreatePipelineCache( - VkDevice _device, - const VkPipelineCacheCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineCache* pPipelineCache) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_cache *cache; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - cache = anv_alloc2(&device->alloc, pAllocator, - sizeof(*cache), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cache == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_pipeline_cache_init(cache, device); - - if (pCreateInfo->initialDataSize > 0) - anv_pipeline_cache_load(cache, - pCreateInfo->pInitialData, - pCreateInfo->initialDataSize); - - *pPipelineCache = anv_pipeline_cache_to_handle(cache); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineCache( - VkDevice _device, - VkPipelineCache _cache, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - anv_pipeline_cache_finish(cache); - - anv_free2(&device->alloc, pAllocator, cache); -} - -VkResult anv_GetPipelineCacheData( - VkDevice _device, - VkPipelineCache _cache, - size_t* pDataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - const size_t size = 4 + VK_UUID_SIZE + cache->total_size; - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } - - if (*pDataSize < size) { - *pDataSize = 0; - return VK_INCOMPLETE; - } - - void *p = pData; - memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); - p += sizeof(device->chipset_id); - - anv_device_get_cache_uuid(p); - p += VK_UUID_SIZE; - - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->table[i] == ~0) - continue; - - entry = cache->program_stream.block_pool->map + cache->table[i]; - - memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); - p += sizeof(*entry) + entry->prog_data_size; - - void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - - memcpy(p, kernel, entry->kernel_size); - p += entry->kernel_size; - } - - return VK_SUCCESS; -} - -static void -anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, - struct anv_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - if (src->table[i] == ~0) - continue; - - struct cache_entry *entry = - src->program_stream.block_pool->map + src->table[i]; - - if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) - continue; - - const void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - anv_pipeline_cache_upload_kernel(dst, entry->sha1, - kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); - } -} - -VkResult anv_MergePipelineCaches( - VkDevice _device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches) -{ - ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); - - for (uint32_t i = 0; i < srcCacheCount; i++) { - ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); - - anv_pipeline_cache_merge(dst, src); - } - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h deleted file mode 100644 index ba86333525e..00000000000 --- a/src/vulkan/anv_private.h +++ /dev/null @@ -1,1876 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x -#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) -#else -#define VG(x) -#endif - -#include "brw_device_info.h" -#include "util/macros.h" -#include "util/list.h" - -/* Pre-declarations needed for WSI entrypoints */ -struct wl_surface; -struct wl_display; -typedef struct xcb_connection_t xcb_connection_t; -typedef uint32_t xcb_visualid_t; -typedef uint32_t xcb_window_t; - -#define VK_USE_PLATFORM_XCB_KHR -#define VK_USE_PLATFORM_WAYLAND_KHR - -#define VK_PROTOTYPES -#include -#include -#include - -#include "anv_entrypoints.h" -#include "anv_gen_macros.h" -#include "brw_context.h" -#include "isl/isl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_VBS 32 -#define MAX_SETS 8 -#define MAX_RTS 8 -#define MAX_VIEWPORTS 16 -#define MAX_SCISSORS 16 -#define MAX_PUSH_CONSTANTS_SIZE 128 -#define MAX_DYNAMIC_BUFFERS 16 -#define MAX_IMAGES 8 -#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ - -#define anv_noreturn __attribute__((__noreturn__)) -#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -static inline uint32_t -align_u32(uint32_t v, uint32_t a) -{ - assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); -} - -static inline uint64_t -align_u64(uint64_t v, uint64_t a) -{ - assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); -} - -static inline int32_t -align_i32(int32_t v, int32_t a) -{ - assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); -} - -/** Alignment must be a power of 2. */ -static inline bool -anv_is_aligned(uintmax_t n, uintmax_t a) -{ - assert(a == (a & -a)); - return (n & (a - 1)) == 0; -} - -static inline uint32_t -anv_minify(uint32_t n, uint32_t levels) -{ - if (unlikely(n == 0)) - return 0; - else - return MAX(n >> levels, 1); -} - -static inline float -anv_clamp_f(float f, float min, float max) -{ - assert(min < max); - - if (f > max) - return max; - else if (f < min) - return min; - else - return f; -} - -static inline bool -anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) -{ - if (*inout_mask & clear_mask) { - *inout_mask &= ~clear_mask; - return true; - } else { - return false; - } -} - -#define for_each_bit(b, dword) \ - for (uint32_t __dword = (dword); \ - (b) = __builtin_ffs(__dword) - 1, __dword; \ - __dword &= ~(1 << (b))) - -#define typed_memcpy(dest, src, count) ({ \ - static_assert(sizeof(*src) == sizeof(*dest), ""); \ - memcpy((dest), (src), (count) * sizeof(*(src))); \ -}) - -#define zero(x) (memset(&(x), 0, sizeof(x))) - -/* Define no kernel as 1, since that's an illegal offset for a kernel */ -#define NO_KERNEL 1 - -struct anv_common { - VkStructureType sType; - const void* pNext; -}; - -/* Whenever we generate an error, pass it through this function. Useful for - * debugging, where we can break on it. Only call at error site, not when - * propagating errors. Might be useful to plug in a stack trace here. - */ - -VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); - -#ifdef DEBUG -#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); -#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); -#else -#define vk_error(error) error -#define vk_errorf(error, format, ...) error -#endif - -void __anv_finishme(const char *file, int line, const char *format, ...) - anv_printflike(3, 4); -void anv_loge(const char *format, ...) anv_printflike(1, 2); -void anv_loge_v(const char *format, va_list va); - -/** - * Print a FINISHME message, including its source location. - */ -#define anv_finishme(format, ...) \ - __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); - -/* A non-fatal assert. Useful for debugging. */ -#ifdef DEBUG -#define anv_assert(x) ({ \ - if (unlikely(!(x))) \ - fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ -}) -#else -#define anv_assert(x) -#endif - -/** - * If a block of code is annotated with anv_validate, then the block runs only - * in debug builds. - */ -#ifdef DEBUG -#define anv_validate if (1) -#else -#define anv_validate if (0) -#endif - -void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); -void anv_abortfv(const char *format, va_list va) anv_noreturn; - -#define stub_return(v) \ - do { \ - anv_finishme("stub %s", __func__); \ - return (v); \ - } while (0) - -#define stub() \ - do { \ - anv_finishme("stub %s", __func__); \ - return; \ - } while (0) - -/** - * A dynamically growable, circular buffer. Elements are added at head and - * removed from tail. head and tail are free-running uint32_t indices and we - * only compute the modulo with size when accessing the array. This way, - * number of bytes in the queue is always head - tail, even in case of - * wraparound. - */ - -struct anv_vector { - uint32_t head; - uint32_t tail; - uint32_t element_size; - uint32_t size; - void *data; -}; - -int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); -void *anv_vector_add(struct anv_vector *queue); -void *anv_vector_remove(struct anv_vector *queue); - -static inline int -anv_vector_length(struct anv_vector *queue) -{ - return (queue->head - queue->tail) / queue->element_size; -} - -static inline void * -anv_vector_head(struct anv_vector *vector) -{ - assert(vector->tail < vector->head); - return (void *)((char *)vector->data + - ((vector->head - vector->element_size) & - (vector->size - 1))); -} - -static inline void * -anv_vector_tail(struct anv_vector *vector) -{ - return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); -} - -static inline void -anv_vector_finish(struct anv_vector *queue) -{ - free(queue->data); -} - -#define anv_vector_foreach(elem, queue) \ - static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ - for (uint32_t __anv_vector_offset = (queue)->tail; \ - elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ - __anv_vector_offset += (queue)->element_size) - -struct anv_bo { - uint32_t gem_handle; - - /* Index into the current validation list. This is used by the - * validation list building alrogithm to track which buffers are already - * in the validation list so that we can ensure uniqueness. - */ - uint32_t index; - - /* Last known offset. This value is provided by the kernel when we - * execbuf and is used as the presumed offset for the next bunch of - * relocations. - */ - uint64_t offset; - - uint64_t size; - void *map; - - /* We need to set the WRITE flag on winsys bos so GEM will know we're - * writing to them and synchronize uses on other rings (eg if the display - * server uses the blitter ring). - */ - bool is_winsys_bo; -}; - -/* Represents a lock-free linked list of "free" things. This is used by - * both the block pool and the state pools. Unfortunately, in order to - * solve the ABA problem, we can't use a single uint32_t head. - */ -union anv_free_list { - struct { - int32_t offset; - - /* A simple count that is incremented every time the head changes. */ - uint32_t count; - }; - uint64_t u64; -}; - -#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) - -struct anv_block_state { - union { - struct { - uint32_t next; - uint32_t end; - }; - uint64_t u64; - }; -}; - -struct anv_block_pool { - struct anv_device *device; - - struct anv_bo bo; - - /* The offset from the start of the bo to the "center" of the block - * pool. Pointers to allocated blocks are given by - * bo.map + center_bo_offset + offsets. - */ - uint32_t center_bo_offset; - - /* Current memory map of the block pool. This pointer may or may not - * point to the actual beginning of the block pool memory. If - * anv_block_pool_alloc_back has ever been called, then this pointer - * will point to the "center" position of the buffer and all offsets - * (negative or positive) given out by the block pool alloc functions - * will be valid relative to this pointer. - * - * In particular, map == bo.map + center_offset - */ - void *map; - int fd; - - /** - * Array of mmaps and gem handles owned by the block pool, reclaimed when - * the block pool is destroyed. - */ - struct anv_vector mmap_cleanups; - - uint32_t block_size; - - union anv_free_list free_list; - struct anv_block_state state; - - union anv_free_list back_free_list; - struct anv_block_state back_state; -}; - -/* Block pools are backed by a fixed-size 2GB memfd */ -#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) - -/* The center of the block pool is also the middle of the memfd. This may - * change in the future if we decide differently for some reason. - */ -#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) - -static inline uint32_t -anv_block_pool_size(struct anv_block_pool *pool) -{ - return pool->state.end + pool->back_state.end; -} - -struct anv_state { - int32_t offset; - uint32_t alloc_size; - void *map; -}; - -struct anv_fixed_size_state_pool { - size_t state_size; - union anv_free_list free_list; - struct anv_block_state block; -}; - -#define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 10 - -#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) - -struct anv_state_pool { - struct anv_block_pool *block_pool; - struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; -}; - -struct anv_state_stream_block; - -struct anv_state_stream { - struct anv_block_pool *block_pool; - - /* The current working block */ - struct anv_state_stream_block *block; - - /* Offset at which the current block starts */ - uint32_t start; - /* Offset at which to allocate the next state */ - uint32_t next; - /* Offset at which the current block ends */ - uint32_t end; -}; - -#define CACHELINE_SIZE 64 -#define CACHELINE_MASK 63 - -static inline void -anv_clflush_range(void *start, size_t size) -{ - void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK); - void *end = start + size; - - __builtin_ia32_mfence(); - while (p < end) { - __builtin_ia32_clflush(p); - p += CACHELINE_SIZE; - } -} - -static void inline -anv_state_clflush(struct anv_state state) -{ - anv_clflush_range(state.map, state.alloc_size); -} - -void anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size); -void anv_block_pool_finish(struct anv_block_pool *pool); -int32_t anv_block_pool_alloc(struct anv_block_pool *pool); -int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); -void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); -void anv_state_pool_init(struct anv_state_pool *pool, - struct anv_block_pool *block_pool); -void anv_state_pool_finish(struct anv_state_pool *pool); -struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, - size_t state_size, size_t alignment); -void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); -void anv_state_stream_init(struct anv_state_stream *stream, - struct anv_block_pool *block_pool); -void anv_state_stream_finish(struct anv_state_stream *stream); -struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, - uint32_t size, uint32_t alignment); - -/** - * Implements a pool of re-usable BOs. The interface is identical to that - * of block_pool except that each block is its own BO. - */ -struct anv_bo_pool { - struct anv_device *device; - - uint32_t bo_size; - - void *free_list; -}; - -void anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t block_size); -void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); -void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); - - -void *anv_resolve_entrypoint(uint32_t index); - -extern struct anv_dispatch_table dtable; - -#define ANV_CALL(func) ({ \ - if (dtable.func == NULL) { \ - size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ - dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ - } \ - dtable.func; \ -}) - -static inline void * -anv_alloc(const VkAllocationCallbacks *alloc, - size_t size, size_t align, - VkSystemAllocationScope scope) -{ - return alloc->pfnAllocation(alloc->pUserData, size, align, scope); -} - -static inline void * -anv_realloc(const VkAllocationCallbacks *alloc, - void *ptr, size_t size, size_t align, - VkSystemAllocationScope scope) -{ - return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); -} - -static inline void -anv_free(const VkAllocationCallbacks *alloc, void *data) -{ - alloc->pfnFree(alloc->pUserData, data); -} - -static inline void * -anv_alloc2(const VkAllocationCallbacks *parent_alloc, - const VkAllocationCallbacks *alloc, - size_t size, size_t align, - VkSystemAllocationScope scope) -{ - if (alloc) - return anv_alloc(alloc, size, align, scope); - else - return anv_alloc(parent_alloc, size, align, scope); -} - -static inline void -anv_free2(const VkAllocationCallbacks *parent_alloc, - const VkAllocationCallbacks *alloc, - void *data) -{ - if (alloc) - anv_free(alloc, data); - else - anv_free(parent_alloc, data); -} - -struct anv_physical_device { - VK_LOADER_DATA _loader_data; - - struct anv_instance * instance; - uint32_t chipset_id; - const char * path; - const char * name; - const struct brw_device_info * info; - uint64_t aperture_size; - struct brw_compiler * compiler; - struct isl_device isl_dev; -}; - -struct anv_wsi_interaface; - -#define VK_ICD_WSI_PLATFORM_MAX 5 - -struct anv_instance { - VK_LOADER_DATA _loader_data; - - VkAllocationCallbacks alloc; - - uint32_t apiVersion; - int physicalDeviceCount; - struct anv_physical_device physicalDevice; - - struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; -}; - -VkResult anv_init_wsi(struct anv_instance *instance); -void anv_finish_wsi(struct anv_instance *instance); - -struct anv_meta_state { - VkAllocationCallbacks alloc; - - /** - * Use array element `i` for images with `2^i` samples. - */ - struct { - /** - * Pipeline N is used to clear color attachment N of the current - * subpass. - * - * HACK: We use one pipeline per color attachment to work around the - * compiler's inability to dynamically set the render target index of - * the render target write message. - */ - struct anv_pipeline *color_pipelines[MAX_RTS]; - - struct anv_pipeline *depth_only_pipeline; - struct anv_pipeline *stencil_only_pipeline; - struct anv_pipeline *depthstencil_pipeline; - } clear[1 + MAX_SAMPLES_LOG2]; - - struct { - VkRenderPass render_pass; - - /** Pipeline that blits from a 1D image. */ - VkPipeline pipeline_1d_src; - - /** Pipeline that blits from a 2D image. */ - VkPipeline pipeline_2d_src; - - /** Pipeline that blits from a 3D image. */ - VkPipeline pipeline_3d_src; - - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; - } blit; - - struct { - /** Pipeline [i] resolves an image with 2^(i+1) samples. */ - VkPipeline pipelines[MAX_SAMPLES_LOG2]; - - VkRenderPass pass; - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; - } resolve; -}; - -struct anv_queue { - VK_LOADER_DATA _loader_data; - - struct anv_device * device; - - struct anv_state_pool * pool; -}; - -struct anv_pipeline_cache { - struct anv_device * device; - struct anv_state_stream program_stream; - pthread_mutex_t mutex; - - uint32_t total_size; - uint32_t table_size; - uint32_t kernel_count; - uint32_t *table; -}; - -void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device); -void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); -uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data); -uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, - size_t kernel_size, - const void *prog_data, - size_t prog_data_size); - -struct anv_device { - VK_LOADER_DATA _loader_data; - - VkAllocationCallbacks alloc; - - struct anv_instance * instance; - uint32_t chipset_id; - struct brw_device_info info; - struct isl_device isl_dev; - int context_id; - int fd; - - struct anv_bo_pool batch_bo_pool; - - struct anv_block_pool dynamic_state_block_pool; - struct anv_state_pool dynamic_state_pool; - - struct anv_block_pool instruction_block_pool; - struct anv_pipeline_cache default_pipeline_cache; - - struct anv_block_pool surface_state_block_pool; - struct anv_state_pool surface_state_pool; - - struct anv_bo workaround_bo; - - struct anv_meta_state meta_state; - - struct anv_state border_colors; - - struct anv_queue queue; - - struct anv_block_pool scratch_block_pool; - - pthread_mutex_t mutex; -}; - -VkResult gen7_init_device_state(struct anv_device *device); -VkResult gen75_init_device_state(struct anv_device *device); -VkResult gen8_init_device_state(struct anv_device *device); -VkResult gen9_init_device_state(struct anv_device *device); - -void anv_device_get_cache_uuid(void *uuid); - - -void* anv_gem_mmap(struct anv_device *device, - uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); -void anv_gem_munmap(void *p, uint64_t size); -uint32_t anv_gem_create(struct anv_device *device, size_t size); -void anv_gem_close(struct anv_device *device, uint32_t gem_handle); -uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); -int anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf); -int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, - uint32_t stride, uint32_t tiling); -int anv_gem_create_context(struct anv_device *device); -int anv_gem_destroy_context(struct anv_device *device, int context); -int anv_gem_get_param(int fd, uint32_t param); -bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); -int anv_gem_get_aperture(int fd, uint64_t *size); -int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); -uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); -int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); -int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, - uint32_t read_domains, uint32_t write_domain); - -VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); - -struct anv_reloc_list { - size_t num_relocs; - size_t array_length; - struct drm_i915_gem_relocation_entry * relocs; - struct anv_bo ** reloc_bos; -}; - -VkResult anv_reloc_list_init(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc); -void anv_reloc_list_finish(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc); - -uint64_t anv_reloc_list_add(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - uint32_t offset, struct anv_bo *target_bo, - uint32_t delta); - -struct anv_batch_bo { - /* Link in the anv_cmd_buffer.owned_batch_bos list */ - struct list_head link; - - struct anv_bo bo; - - /* Bytes actually consumed in this batch BO */ - size_t length; - - /* Last seen surface state block pool bo offset */ - uint32_t last_ss_pool_bo_offset; - - struct anv_reloc_list relocs; -}; - -struct anv_batch { - const VkAllocationCallbacks * alloc; - - void * start; - void * end; - void * next; - - struct anv_reloc_list * relocs; - - /* This callback is called (with the associated user data) in the event - * that the batch runs out of space. - */ - VkResult (*extend_cb)(struct anv_batch *, void *); - void * user_data; -}; - -void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); -void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); -uint64_t anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t offset); -VkResult anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch); - -struct anv_address { - struct anv_bo *bo; - uint32_t offset; -}; - -#define __gen_address_type struct anv_address -#define __gen_user_data struct anv_batch - -static inline uint64_t -__gen_combine_address(struct anv_batch *batch, void *location, - const struct anv_address address, uint32_t delta) -{ - if (address.bo == NULL) { - return address.offset + delta; - } else { - assert(batch->start <= location && location < batch->end); - - return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); - } -} - -/* Wrapper macros needed to work around preprocessor argument issues. In - * particular, arguments don't get pre-evaluated if they are concatenated. - * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the - * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". - * We can work around this easily enough with these helpers. - */ -#define __anv_cmd_length(cmd) cmd ## _length -#define __anv_cmd_length_bias(cmd) cmd ## _length_bias -#define __anv_cmd_header(cmd) cmd ## _header -#define __anv_cmd_pack(cmd) cmd ## _pack - -#define anv_batch_emit(batch, cmd, ...) do { \ - void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ - struct cmd __template = { \ - __anv_cmd_header(cmd), \ - __VA_ARGS__ \ - }; \ - __anv_cmd_pack(cmd)(batch, __dst, &__template); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ - } while (0) - -#define anv_batch_emitn(batch, n, cmd, ...) ({ \ - void *__dst = anv_batch_emit_dwords(batch, n); \ - struct cmd __template = { \ - __anv_cmd_header(cmd), \ - .DWordLength = n - __anv_cmd_length_bias(cmd), \ - __VA_ARGS__ \ - }; \ - __anv_cmd_pack(cmd)(batch, __dst, &__template); \ - __dst; \ - }) - -#define anv_batch_emit_merge(batch, dwords0, dwords1) \ - do { \ - uint32_t *dw; \ - \ - static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ - dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ - for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ - dw[i] = (dwords0)[i] | (dwords1)[i]; \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ - } while (0) - -#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ - const uint32_t __size = __anv_cmd_length(cmd) * 4; \ - struct anv_state __state = \ - anv_state_pool_alloc((pool), __size, align); \ - struct cmd __template = { \ - __VA_ARGS__ \ - }; \ - __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ - if (!(pool)->block_pool->device->info.has_llc) \ - anv_state_clflush(__state); \ - __state; \ - }) - -#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ - .GraphicsDataTypeGFDT = 0, \ - .LLCCacheabilityControlLLCCC = 0, \ - .L3CacheabilityControlL3CC = 1, \ -} - -#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ - .LLCeLLCCacheabilityControlLLCCC = 0, \ - .L3CacheabilityControlL3CC = 1, \ -} - -#define GEN8_MOCS { \ - .MemoryTypeLLCeLLCCacheabilityControl = WB, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ - } - -/* Skylake: MOCS is now an index into an array of 62 different caching - * configurations programmed by the kernel. - */ - -#define GEN9_MOCS { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ - } - -#define GEN9_MOCS_PTE { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ - } - -struct anv_device_memory { - struct anv_bo bo; - uint32_t type_index; - VkDeviceSize map_size; - void * map; -}; - -/** - * Header for Vertex URB Entry (VUE) - */ -struct anv_vue_header { - uint32_t Reserved; - uint32_t RTAIndex; /* RenderTargetArrayIndex */ - uint32_t ViewportIndex; - float PointWidth; -}; - -struct anv_descriptor_set_binding_layout { - /* Number of array elements in this binding */ - uint16_t array_size; - - /* Index into the flattend descriptor set */ - uint16_t descriptor_index; - - /* Index into the dynamic state array for a dynamic buffer */ - int16_t dynamic_offset_index; - - /* Index into the descriptor set buffer views */ - int16_t buffer_index; - - struct { - /* Index into the binding table for the associated surface */ - int16_t surface_index; - - /* Index into the sampler table for the associated sampler */ - int16_t sampler_index; - - /* Index into the image table for the associated image */ - int16_t image_index; - } stage[MESA_SHADER_STAGES]; - - /* Immutable samplers (or NULL if no immutable samplers) */ - struct anv_sampler **immutable_samplers; -}; - -struct anv_descriptor_set_layout { - /* Number of bindings in this descriptor set */ - uint16_t binding_count; - - /* Total size of the descriptor set with room for all array entries */ - uint16_t size; - - /* Shader stages affected by this descriptor set */ - uint16_t shader_stages; - - /* Number of buffers in this descriptor set */ - uint16_t buffer_count; - - /* Number of dynamic offsets used by this descriptor set */ - uint16_t dynamic_offset_count; - - /* Bindings in this descriptor set */ - struct anv_descriptor_set_binding_layout binding[0]; -}; - -struct anv_descriptor { - VkDescriptorType type; - - union { - struct { - struct anv_image_view *image_view; - struct anv_sampler *sampler; - }; - - struct anv_buffer_view *buffer_view; - }; -}; - -struct anv_descriptor_set { - const struct anv_descriptor_set_layout *layout; - uint32_t buffer_count; - struct anv_buffer_view *buffer_views; - struct anv_descriptor descriptors[0]; -}; - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set); - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set); - -struct anv_pipeline_binding { - /* The descriptor set this surface corresponds to */ - uint16_t set; - - /* Offset into the descriptor set */ - uint16_t offset; -}; - -struct anv_pipeline_layout { - struct { - struct anv_descriptor_set_layout *layout; - uint32_t dynamic_offset_start; - } set[MAX_SETS]; - - uint32_t num_sets; - - struct { - bool has_dynamic_offsets; - } stage[MESA_SHADER_STAGES]; -}; - -struct anv_buffer { - struct anv_device * device; - VkDeviceSize size; - - VkBufferUsageFlags usage; - - /* Set when bound */ - struct anv_bo * bo; - VkDeviceSize offset; -}; - -enum anv_cmd_dirty_bits { - ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ - ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ - ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ - ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, - ANV_CMD_DIRTY_PIPELINE = 1 << 9, - ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, - ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, -}; -typedef uint32_t anv_cmd_dirty_mask_t; - -struct anv_vertex_binding { - struct anv_buffer * buffer; - VkDeviceSize offset; -}; - -struct anv_push_constants { - /* Current allocated size of this push constants data structure. - * Because a decent chunk of it may not be used (images on SKL, for - * instance), we won't actually allocate the entire structure up-front. - */ - uint32_t size; - - /* Push constant data provided by the client through vkPushConstants */ - uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; - - /* Our hardware only provides zero-based vertex and instance id so, in - * order to satisfy the vulkan requirements, we may have to push one or - * both of these into the shader. - */ - uint32_t base_vertex; - uint32_t base_instance; - - /* Offsets and ranges for dynamically bound buffers */ - struct { - uint32_t offset; - uint32_t range; - } dynamic[MAX_DYNAMIC_BUFFERS]; - - /* Image data for image_load_store on pre-SKL */ - struct brw_image_param images[MAX_IMAGES]; -}; - -struct anv_dynamic_state { - struct { - uint32_t count; - VkViewport viewports[MAX_VIEWPORTS]; - } viewport; - - struct { - uint32_t count; - VkRect2D scissors[MAX_SCISSORS]; - } scissor; - - float line_width; - - struct { - float bias; - float clamp; - float slope; - } depth_bias; - - float blend_constants[4]; - - struct { - float min; - float max; - } depth_bounds; - - struct { - uint32_t front; - uint32_t back; - } stencil_compare_mask; - - struct { - uint32_t front; - uint32_t back; - } stencil_write_mask; - - struct { - uint32_t front; - uint32_t back; - } stencil_reference; -}; - -extern const struct anv_dynamic_state default_dynamic_state; - -void anv_dynamic_state_copy(struct anv_dynamic_state *dest, - const struct anv_dynamic_state *src, - uint32_t copy_mask); - -/** - * Attachment state when recording a renderpass instance. - * - * The clear value is valid only if there exists a pending clear. - */ -struct anv_attachment_state { - VkImageAspectFlags pending_clear_aspects; - VkClearValue clear_value; -}; - -/** State required while building cmd buffer */ -struct anv_cmd_state { - /* PIPELINE_SELECT.PipelineSelection */ - uint32_t current_pipeline; - uint32_t current_l3_config; - uint32_t vb_dirty; - anv_cmd_dirty_mask_t dirty; - anv_cmd_dirty_mask_t compute_dirty; - uint32_t num_workgroups_offset; - struct anv_bo *num_workgroups_bo; - VkShaderStageFlags descriptors_dirty; - VkShaderStageFlags push_constants_dirty; - uint32_t scratch_size; - struct anv_pipeline * pipeline; - struct anv_pipeline * compute_pipeline; - struct anv_framebuffer * framebuffer; - struct anv_render_pass * pass; - struct anv_subpass * subpass; - uint32_t restart_index; - struct anv_vertex_binding vertex_bindings[MAX_VBS]; - struct anv_descriptor_set * descriptors[MAX_SETS]; - struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; - struct anv_state binding_tables[MESA_SHADER_STAGES]; - struct anv_state samplers[MESA_SHADER_STAGES]; - struct anv_dynamic_state dynamic; - bool need_query_wa; - - /** - * Array length is anv_cmd_state::pass::attachment_count. Array content is - * valid only when recording a render pass instance. - */ - struct anv_attachment_state * attachments; - - struct { - struct anv_buffer * index_buffer; - uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ - uint32_t index_offset; - } gen7; -}; - -struct anv_cmd_pool { - VkAllocationCallbacks alloc; - struct list_head cmd_buffers; -}; - -#define ANV_CMD_BUFFER_BATCH_SIZE 8192 - -enum anv_cmd_buffer_exec_mode { - ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, - ANV_CMD_BUFFER_EXEC_MODE_EMIT, - ANV_CMD_BUFFER_EXEC_MODE_CHAIN, - ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, -}; - -struct anv_cmd_buffer { - VK_LOADER_DATA _loader_data; - - struct anv_device * device; - - struct anv_cmd_pool * pool; - struct list_head pool_link; - - struct anv_batch batch; - - /* Fields required for the actual chain of anv_batch_bo's. - * - * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). - */ - struct list_head batch_bos; - enum anv_cmd_buffer_exec_mode exec_mode; - - /* A vector of anv_batch_bo pointers for every batch or surface buffer - * referenced by this command buffer - * - * initialized by anv_cmd_buffer_init_batch_bo_chain() - */ - struct anv_vector seen_bbos; - - /* A vector of int32_t's for every block of binding tables. - * - * initialized by anv_cmd_buffer_init_batch_bo_chain() - */ - struct anv_vector bt_blocks; - uint32_t bt_next; - struct anv_reloc_list surface_relocs; - - /* Information needed for execbuf - * - * These fields are generated by anv_cmd_buffer_prepare_execbuf(). - */ - struct { - struct drm_i915_gem_execbuffer2 execbuf; - - struct drm_i915_gem_exec_object2 * objects; - uint32_t bo_count; - struct anv_bo ** bos; - - /* Allocated length of the 'objects' and 'bos' arrays */ - uint32_t array_length; - - bool need_reloc; - } execbuf2; - - /* Serial for tracking buffer completion */ - uint32_t serial; - - /* Stream objects for storing temporary data */ - struct anv_state_stream surface_state_stream; - struct anv_state_stream dynamic_state_stream; - - VkCommandBufferUsageFlags usage_flags; - VkCommandBufferLevel level; - - struct anv_cmd_state state; -}; - -VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary); -void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); - -VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state); -VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state); -uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); -void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages); - -struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - const void *data, uint32_t size, uint32_t alignment); -struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment); - -struct anv_address -anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); -struct anv_state -anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, - uint32_t entries, uint32_t *state_offset); -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment); - -VkResult -anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); - -void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); -void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info); - -void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); - -void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); - -struct anv_state -anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage); -struct anv_state -anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); - -const struct anv_image_view * -anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); - -struct anv_fence { - struct anv_bo bo; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - bool ready; -}; - -struct anv_event { - uint64_t semaphore; - struct anv_state state; -}; - -struct nir_shader; - -struct anv_shader_module { - struct nir_shader * nir; - - unsigned char sha1[20]; - uint32_t size; - char data[0]; -}; - -void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info); - -static inline gl_shader_stage -vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) -{ - assert(__builtin_popcount(vk_stage) == 1); - return ffs(vk_stage) - 1; -} - -static inline VkShaderStageFlagBits -mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) -{ - return (1 << mesa_stage); -} - -#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) - -#define anv_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, \ - __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ - stage = __builtin_ffs(__tmp) - 1, __tmp; \ - __tmp &= ~(1 << (stage))) - -struct anv_pipeline_bind_map { - uint32_t surface_count; - uint32_t sampler_count; - uint32_t image_count; - - struct anv_pipeline_binding * surface_to_descriptor; - struct anv_pipeline_binding * sampler_to_descriptor; -}; - -struct anv_pipeline { - struct anv_device * device; - struct anv_batch batch; - uint32_t batch_data[512]; - struct anv_reloc_list batch_relocs; - uint32_t dynamic_state_mask; - struct anv_dynamic_state dynamic_state; - - struct anv_pipeline_layout * layout; - struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; - - bool use_repclear; - - struct brw_vs_prog_data vs_prog_data; - struct brw_wm_prog_data wm_prog_data; - struct brw_gs_prog_data gs_prog_data; - struct brw_cs_prog_data cs_prog_data; - bool writes_point_size; - struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; - uint32_t scratch_start[MESA_SHADER_STAGES]; - uint32_t total_scratch; - struct { - uint32_t vs_start; - uint32_t vs_size; - uint32_t nr_vs_entries; - uint32_t gs_start; - uint32_t gs_size; - uint32_t nr_gs_entries; - } urb; - - VkShaderStageFlags active_stages; - struct anv_state blend_state; - uint32_t vs_simd8; - uint32_t vs_vec4; - uint32_t ps_simd8; - uint32_t ps_simd16; - uint32_t ps_ksp0; - uint32_t ps_ksp2; - uint32_t ps_grf_start0; - uint32_t ps_grf_start2; - uint32_t gs_kernel; - uint32_t cs_simd; - - uint32_t vb_used; - uint32_t binding_stride[MAX_VBS]; - bool instancing_enable[MAX_VBS]; - bool primitive_restart; - uint32_t topology; - - uint32_t cs_thread_width_max; - uint32_t cs_right_mask; - - struct { - uint32_t sf[7]; - uint32_t depth_stencil_state[3]; - } gen7; - - struct { - uint32_t sf[4]; - uint32_t raster[5]; - uint32_t wm_depth_stencil[3]; - } gen8; - - struct { - uint32_t wm_depth_stencil[4]; - } gen9; -}; - -struct anv_graphics_pipeline_create_info { - /** - * If non-negative, overrides the color attachment count of the pipeline's - * subpass. - */ - int8_t color_attachment_count; - - bool use_repclear; - bool disable_viewport; - bool disable_scissor; - bool disable_vs; - bool use_rectlist; -}; - -VkResult -anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc); - -VkResult -anv_pipeline_compile_cs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info); - -VkResult -anv_graphics_pipeline_create(VkDevice device, - VkPipelineCache cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen7_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen75_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen7_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen75_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -struct anv_format_swizzle { - unsigned r:2; - unsigned g:2; - unsigned b:2; - unsigned a:2; -}; - -struct anv_format { - const VkFormat vk_format; - const char *name; - enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ - const struct isl_format_layout *isl_layout; - struct anv_format_swizzle swizzle; - bool has_depth; - bool has_stencil; -}; - -const struct anv_format * -anv_format_for_vk_format(VkFormat format); - -enum isl_format -anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, - VkImageTiling tiling, struct anv_format_swizzle *swizzle); - -static inline bool -anv_format_is_color(const struct anv_format *format) -{ - return !format->has_depth && !format->has_stencil; -} - -static inline bool -anv_format_is_depth_or_stencil(const struct anv_format *format) -{ - return format->has_depth || format->has_stencil; -} - -/** - * Subsurface of an anv_image. - */ -struct anv_surface { - struct isl_surf isl; - - /** - * Offset from VkImage's base address, as bound by vkBindImageMemory(). - */ - uint32_t offset; -}; - -struct anv_image { - VkImageType type; - /* The original VkFormat provided by the client. This may not match any - * of the actual surface formats. - */ - VkFormat vk_format; - const struct anv_format *format; - VkExtent3D extent; - uint32_t levels; - uint32_t array_size; - uint32_t samples; /**< VkImageCreateInfo::samples */ - VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ - VkImageTiling tiling; /** VkImageCreateInfo::tiling */ - - VkDeviceSize size; - uint32_t alignment; - - /* Set when bound */ - struct anv_bo *bo; - VkDeviceSize offset; - - /** - * Image subsurfaces - * - * For each foo, anv_image::foo_surface is valid if and only if - * anv_image::format has a foo aspect. - * - * The hardware requires that the depth buffer and stencil buffer be - * separate surfaces. From Vulkan's perspective, though, depth and stencil - * reside in the same VkImage. To satisfy both the hardware and Vulkan, we - * allocate the depth and stencil buffers as separate surfaces in the same - * bo. - */ - union { - struct anv_surface color_surface; - - struct { - struct anv_surface depth_surface; - struct anv_surface stencil_surface; - }; - }; -}; - -struct anv_image_view { - const struct anv_image *image; /**< VkImageViewCreateInfo::image */ - struct anv_bo *bo; - uint32_t offset; /**< Offset into bo. */ - - VkImageAspectFlags aspect_mask; - VkFormat vk_format; - VkComponentMapping swizzle; - enum isl_format format; - uint32_t base_layer; - uint32_t base_mip; - VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ - VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ - - /** RENDER_SURFACE_STATE when using image as a color render target. */ - struct anv_state color_rt_surface_state; - - /** RENDER_SURFACE_STATE when using image as a sampler surface. */ - struct anv_state sampler_surface_state; - - /** RENDER_SURFACE_STATE when using image as a storage image. */ - struct anv_state storage_surface_state; -}; - -struct anv_image_create_info { - const VkImageCreateInfo *vk_info; - isl_tiling_flags_t isl_tiling_flags; - uint32_t stride; -}; - -VkResult anv_image_create(VkDevice _device, - const struct anv_image_create_info *info, - const VkAllocationCallbacks* alloc, - VkImage *pImage); - -struct anv_surface * -anv_image_get_surface_for_aspect_mask(struct anv_image *image, - VkImageAspectFlags aspect_mask); - -void anv_image_view_init(struct anv_image_view *view, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer, - uint32_t offset); - -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen7_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen75_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen8_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen9_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); - -struct anv_buffer_view { - enum isl_format format; /**< VkBufferViewCreateInfo::format */ - struct anv_bo *bo; - uint32_t offset; /**< Offset into bo. */ - uint64_t range; /**< VkBufferViewCreateInfo::range */ - - struct anv_state surface_state; - struct anv_state storage_surface_state; -}; - -const struct anv_format * -anv_format_for_descriptor_type(VkDescriptorType type); - -void anv_fill_buffer_surface_state(struct anv_device *device, - struct anv_state state, - enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); - -void gen7_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen75_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen8_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen9_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); - -void anv_image_view_fill_image_param(struct anv_device *device, - struct anv_image_view *view, - struct brw_image_param *param); -void anv_buffer_view_fill_image_param(struct anv_device *device, - struct anv_buffer_view *view, - struct brw_image_param *param); - -struct anv_sampler { - uint32_t state[4]; -}; - -struct anv_framebuffer { - uint32_t width; - uint32_t height; - uint32_t layers; - - uint32_t attachment_count; - struct anv_image_view * attachments[0]; -}; - -struct anv_subpass { - uint32_t input_count; - uint32_t * input_attachments; - uint32_t color_count; - uint32_t * color_attachments; - uint32_t * resolve_attachments; - uint32_t depth_stencil_attachment; - - /** Subpass has at least one resolve attachment */ - bool has_resolve; -}; - -struct anv_render_pass_attachment { - const struct anv_format *format; - uint32_t samples; - VkAttachmentLoadOp load_op; - VkAttachmentLoadOp stencil_load_op; -}; - -struct anv_render_pass { - uint32_t attachment_count; - uint32_t subpass_count; - uint32_t * subpass_attachments; - struct anv_render_pass_attachment * attachments; - struct anv_subpass subpasses[0]; -}; - -extern struct anv_render_pass anv_meta_dummy_renderpass; - -struct anv_query_pool_slot { - uint64_t begin; - uint64_t end; - uint64_t available; -}; - -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - -VkResult anv_device_init_meta(struct anv_device *device); -void anv_device_finish_meta(struct anv_device *device); - -void *anv_lookup_entrypoint(const char *name); - -void anv_dump_image_to_ppm(struct anv_device *device, - struct anv_image *image, unsigned miplevel, - unsigned array_layer, const char *filename); - -#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType) _obj; \ - } - -#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *)(uintptr_t) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType)(uintptr_t) _obj; \ - } - -#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ - struct __anv_type *__name = __anv_type ## _from_handle(__handle) - -ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) -ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) -ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) -ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) -ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) - -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) - -#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ - \ - static inline const __VkType * \ - __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ - { \ - return (const __VkType *) __anv_obj; \ - } - -#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ - const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) - -ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) -ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) -ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c deleted file mode 100644 index e45b519c0f3..00000000000 --- a/src/vulkan/anv_query.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -VkResult anv_CreateQueryPool( - VkDevice _device, - const VkQueryPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkQueryPool* pQueryPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_query_pool *pool; - VkResult result; - uint32_t slot_size; - uint64_t size; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); - - switch (pCreateInfo->queryType) { - case VK_QUERY_TYPE_OCCLUSION: - case VK_QUERY_TYPE_TIMESTAMP: - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - return VK_ERROR_INCOMPATIBLE_DRIVER; - default: - assert(!"Invalid query type"); - } - - slot_size = sizeof(struct anv_query_pool_slot); - pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pool->type = pCreateInfo->queryType; - pool->slots = pCreateInfo->queryCount; - - size = pCreateInfo->queryCount * slot_size; - result = anv_bo_init_new(&pool->bo, device, size); - if (result != VK_SUCCESS) - goto fail; - - pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); - - *pQueryPool = anv_query_pool_to_handle(pool); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, pool); - - return result; -} - -void anv_DestroyQueryPool( - VkDevice _device, - VkQueryPool _pool, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_query_pool, pool, _pool); - - anv_gem_munmap(pool->bo.map, pool->bo.size); - anv_gem_close(device, pool->bo.gem_handle); - anv_free2(&device->alloc, pAllocator, pool); -} - -VkResult anv_GetQueryPoolResults( - VkDevice _device, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - size_t dataSize, - void* pData, - VkDeviceSize stride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - int64_t timeout = INT64_MAX; - uint64_t result; - int ret; - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION || - pool->type == VK_QUERY_TYPE_TIMESTAMP); - - if (pData == NULL) - return VK_SUCCESS; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); - if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem_wait failed %m"); - } - } - - void *data_end = pData + dataSize; - struct anv_query_pool_slot *slot = pool->bo.map; - - for (uint32_t i = 0; i < queryCount; i++) { - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: { - result = slot[firstQuery + i].end - slot[firstQuery + i].begin; - break; - } - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - unreachable("pipeline stats not supported"); - case VK_QUERY_TYPE_TIMESTAMP: { - result = slot[firstQuery + i].begin; - break; - } - default: - unreachable("invalid pool type"); - } - - if (flags & VK_QUERY_RESULT_64_BIT) { - uint64_t *dst = pData; - dst[0] = result; - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - dst[1] = slot[firstQuery + i].available; - } else { - uint32_t *dst = pData; - if (result > UINT32_MAX) - result = UINT32_MAX; - dst[0] = result; - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - dst[1] = slot[firstQuery + i].available; - } - - pData += stride; - if (pData >= data_end) - break; - } - - return VK_SUCCESS; -} - -void anv_CmdResetQueryPool( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount) -{ - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - for (uint32_t i = 0; i < queryCount; i++) { - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - case VK_QUERY_TYPE_TIMESTAMP: { - struct anv_query_pool_slot *slot = pool->bo.map; - slot[firstQuery + i].available = 0; - break; - } - default: - assert(!"Invalid query type"); - } - } -} diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c deleted file mode 100644 index 22fd01c9495..00000000000 --- a/src/vulkan/anv_util.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** Log an error message. */ -void anv_printflike(1, 2) -anv_loge(const char *format, ...) -{ - va_list va; - - va_start(va, format); - anv_loge_v(format, va); - va_end(va); -} - -/** \see anv_loge() */ -void -anv_loge_v(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); -} - -void anv_printflike(3, 4) -__anv_finishme(const char *file, int line, const char *format, ...) -{ - va_list ap; - char buffer[256]; - - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); -} - -void anv_noreturn anv_printflike(1, 2) -anv_abortf(const char *format, ...) -{ - va_list va; - - va_start(va, format); - anv_abortfv(format, va); - va_end(va); -} - -void anv_noreturn -anv_abortfv(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); - abort(); -} - -VkResult -__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) -{ - va_list ap; - char buffer[256]; - -#define ERROR_CASE(error) case error: error_str = #error; break; - - const char *error_str; - switch ((int32_t)error) { - - /* Core errors */ - ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) - ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) - ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) - ERROR_CASE(VK_ERROR_DEVICE_LOST) - ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) - ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) - ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) - ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) - - /* Extension errors */ - ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) - - default: - assert(!"Unknown error"); - error_str = "unknown error"; - } - -#undef ERROR_CASE - - if (format) { - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); - } else { - fprintf(stderr, "%s:%d: %s\n", file, line, error_str); - } - - return error; -} - -int -anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) -{ - assert(util_is_power_of_two(size)); - assert(element_size < size && util_is_power_of_two(element_size)); - - vector->head = 0; - vector->tail = 0; - vector->element_size = element_size; - vector->size = size; - vector->data = malloc(size); - - return vector->data != NULL; -} - -void * -anv_vector_add(struct anv_vector *vector) -{ - uint32_t offset, size, split, tail; - void *data; - - if (vector->head - vector->tail == vector->size) { - size = vector->size * 2; - data = malloc(size); - if (data == NULL) - return NULL; - split = align_u32(vector->tail, vector->size); - tail = vector->tail & (vector->size - 1); - if (vector->head - split < vector->size) { - memcpy(data + tail, - vector->data + tail, - split - vector->tail); - memcpy(data + vector->size, - vector->data, vector->head - split); - } else { - memcpy(data + tail, - vector->data + tail, - vector->head - vector->tail); - } - free(vector->data); - vector->data = data; - vector->size = size; - } - - assert(vector->head - vector->tail < vector->size); - - offset = vector->head & (vector->size - 1); - vector->head += vector->element_size; - - return vector->data + offset; -} - -void * -anv_vector_remove(struct anv_vector *vector) -{ - uint32_t offset; - - if (vector->head == vector->tail) - return NULL; - - assert(vector->head - vector->tail <= vector->size); - - offset = vector->tail & (vector->size - 1); - vector->tail += vector->element_size; - - return vector->data + offset; -} diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c deleted file mode 100644 index c5911a3635b..00000000000 --- a/src/vulkan/anv_wsi.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_wsi.h" - -VkResult -anv_init_wsi(struct anv_instance *instance) -{ - VkResult result; - - result = anv_x11_init_wsi(instance); - if (result != VK_SUCCESS) - return result; - -#ifdef HAVE_WAYLAND_PLATFORM - result = anv_wl_init_wsi(instance); - if (result != VK_SUCCESS) { - anv_x11_finish_wsi(instance); - return result; - } -#endif - - return VK_SUCCESS; -} - -void -anv_finish_wsi(struct anv_instance *instance) -{ -#ifdef HAVE_WAYLAND_PLATFORM - anv_wl_finish_wsi(instance); -#endif - anv_x11_finish_wsi(instance); -} - -void anv_DestroySurfaceKHR( - VkInstance _instance, - VkSurfaceKHR _surface, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - - anv_free2(&instance->alloc, pAllocator, surface); -} - -VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - VkSurfaceKHR _surface, - VkBool32* pSupported) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_support(surface, device, queueFamilyIndex, pSupported); -} - -VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR _surface, - VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_capabilities(surface, device, pSurfaceCapabilities); -} - -VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR _surface, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_formats(surface, device, pSurfaceFormatCount, - pSurfaceFormats); -} - -VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR _surface, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_present_modes(surface, device, pPresentModeCount, - pPresentModes); -} - -VkResult anv_CreateSwapchainKHR( - VkDevice _device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSwapchainKHR* pSwapchain) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - struct anv_swapchain *swapchain; - - VkResult result = iface->create_swapchain(surface, device, pCreateInfo, - pAllocator, &swapchain); - if (result != VK_SUCCESS) - return result; - - *pSwapchain = anv_swapchain_to_handle(swapchain); - - return VK_SUCCESS; -} - -void anv_DestroySwapchainKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - - swapchain->destroy(swapchain, pAllocator); -} - -VkResult anv_GetSwapchainImagesKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - uint32_t* pSwapchainImageCount, - VkImage* pSwapchainImages) -{ - ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - - return swapchain->get_images(swapchain, pSwapchainImageCount, - pSwapchainImages); -} - -VkResult anv_AcquireNextImageKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - uint64_t timeout, - VkSemaphore semaphore, - VkFence fence, - uint32_t* pImageIndex) -{ - ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - - return swapchain->acquire_next_image(swapchain, timeout, semaphore, - pImageIndex); -} - -VkResult anv_QueuePresentKHR( - VkQueue _queue, - const VkPresentInfoKHR* pPresentInfo) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - VkResult result; - - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); - - assert(swapchain->device == queue->device); - - result = swapchain->queue_present(swapchain, queue, - pPresentInfo->pImageIndices[i]); - /* TODO: What if one of them returns OUT_OF_DATE? */ - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h deleted file mode 100644 index 6e9ff9b8447..00000000000 --- a/src/vulkan/anv_wsi.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "anv_private.h" - -struct anv_swapchain; - -struct anv_wsi_interface { - VkResult (*get_support)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t queueFamilyIndex, - VkBool32* pSupported); - VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); - VkResult (*get_formats)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats); - VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes); - VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain); -}; - -struct anv_swapchain { - struct anv_device *device; - - VkResult (*destroy)(struct anv_swapchain *swapchain, - const VkAllocationCallbacks *pAllocator); - VkResult (*get_images)(struct anv_swapchain *swapchain, - uint32_t *pCount, VkImage *pSwapchainImages); - VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, - uint64_t timeout, VkSemaphore semaphore, - uint32_t *image_index); - VkResult (*queue_present)(struct anv_swapchain *swap_chain, - struct anv_queue *queue, - uint32_t image_index); -}; - -ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) - -VkResult anv_x11_init_wsi(struct anv_instance *instance); -void anv_x11_finish_wsi(struct anv_instance *instance); -VkResult anv_wl_init_wsi(struct anv_instance *instance); -void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c deleted file mode 100644 index 6f25eaf43ea..00000000000 --- a/src/vulkan/anv_wsi_wayland.c +++ /dev/null @@ -1,871 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "anv_wsi.h" - -#include - -#define MIN_NUM_IMAGES 2 - -struct wsi_wl_display { - struct wl_display * display; - struct wl_drm * drm; - - /* Vector of VkFormats supported */ - struct anv_vector formats; - - uint32_t capabilities; -}; - -struct wsi_wayland { - struct anv_wsi_interface base; - - struct anv_instance * instance; - - pthread_mutex_t mutex; - /* Hash table of wl_display -> wsi_wl_display mappings */ - struct hash_table * displays; -}; - -static void -wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) -{ - /* Don't add a format that's already in the list */ - VkFormat *f; - anv_vector_foreach(f, &display->formats) - if (*f == format) - return; - - /* Don't add formats which aren't supported by the driver */ - if (anv_format_for_vk_format(format)->isl_format == - ISL_FORMAT_UNSUPPORTED) { - return; - } - - f = anv_vector_add(&display->formats); - if (f) - *f = format; -} - -static void -drm_handle_device(void *data, struct wl_drm *drm, const char *name) -{ - fprintf(stderr, "wl_drm.device(%s)\n", name); -} - -static uint32_t -wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) -{ - switch (vk_format) { - /* TODO: Figure out what all the formats mean and make this table - * correct. - */ -#if 0 - case VK_FORMAT_R4G4B4A4_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; - case VK_FORMAT_R5G6B5_UNORM: - return WL_DRM_FORMAT_BGR565; - case VK_FORMAT_R5G5B5A1_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; - case VK_FORMAT_R8G8B8_UNORM: - return WL_DRM_FORMAT_XBGR8888; - case VK_FORMAT_R8G8B8A8_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; - case VK_FORMAT_R10G10B10A2_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; - case VK_FORMAT_B4G4R4A4_UNORM: - return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; - case VK_FORMAT_B5G6R5_UNORM: - return WL_DRM_FORMAT_RGB565; - case VK_FORMAT_B5G5R5A1_UNORM: - return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; -#endif - case VK_FORMAT_B8G8R8_SRGB: - return WL_DRM_FORMAT_BGRX8888; - case VK_FORMAT_B8G8R8A8_SRGB: - return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; -#if 0 - case VK_FORMAT_B10G10R10A2_UNORM: - return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; -#endif - - default: - assert("!Unsupported Vulkan format"); - return 0; - } -} - -static void -drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) -{ - struct wsi_wl_display *display = data; - - switch (wl_format) { -#if 0 - case WL_DRM_FORMAT_ABGR4444: - case WL_DRM_FORMAT_XBGR4444: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); - break; - case WL_DRM_FORMAT_BGR565: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); - break; - case WL_DRM_FORMAT_ABGR1555: - case WL_DRM_FORMAT_XBGR1555: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); - break; - case WL_DRM_FORMAT_XBGR8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); - /* fallthrough */ - case WL_DRM_FORMAT_ABGR8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); - break; - case WL_DRM_FORMAT_ABGR2101010: - case WL_DRM_FORMAT_XBGR2101010: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); - break; - case WL_DRM_FORMAT_ARGB4444: - case WL_DRM_FORMAT_XRGB4444: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); - break; - case WL_DRM_FORMAT_RGB565: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); - break; - case WL_DRM_FORMAT_ARGB1555: - case WL_DRM_FORMAT_XRGB1555: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); - break; -#endif - case WL_DRM_FORMAT_XRGB8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); - /* fallthrough */ - case WL_DRM_FORMAT_ARGB8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); - break; -#if 0 - case WL_DRM_FORMAT_ARGB2101010: - case WL_DRM_FORMAT_XRGB2101010: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); - break; -#endif - } -} - -static void -drm_handle_authenticated(void *data, struct wl_drm *drm) -{ -} - -static void -drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) -{ - struct wsi_wl_display *display = data; - - display->capabilities = capabilities; -} - -static const struct wl_drm_listener drm_listener = { - drm_handle_device, - drm_handle_format, - drm_handle_authenticated, - drm_handle_capabilities, -}; - -static void -registry_handle_global(void *data, struct wl_registry *registry, - uint32_t name, const char *interface, uint32_t version) -{ - struct wsi_wl_display *display = data; - - if (strcmp(interface, "wl_drm") == 0) { - assert(display->drm == NULL); - - assert(version >= 2); - display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); - - if (display->drm) - wl_drm_add_listener(display->drm, &drm_listener, display); - } -} - -static void -registry_handle_global_remove(void *data, struct wl_registry *registry, - uint32_t name) -{ /* No-op */ } - -static const struct wl_registry_listener registry_listener = { - registry_handle_global, - registry_handle_global_remove -}; - -static void -wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) -{ - anv_vector_finish(&display->formats); - if (display->drm) - wl_drm_destroy(display->drm); - anv_free(&wsi->instance->alloc, display); -} - -static struct wsi_wl_display * -wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) -{ - struct wsi_wl_display *display = - anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!display) - return NULL; - - memset(display, 0, sizeof(*display)); - - display->display = wl_display; - - if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) - goto fail; - - struct wl_registry *registry = wl_display_get_registry(wl_display); - if (!registry) - return NULL; - - wl_registry_add_listener(registry, ®istry_listener, display); - - /* Round-rip to get the wl_drm global */ - wl_display_roundtrip(wl_display); - - if (!display->drm) - goto fail; - - /* Round-rip to get wl_drm formats and capabilities */ - wl_display_roundtrip(wl_display); - - /* We need prime support */ - if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) - goto fail; - - /* We don't need this anymore */ - wl_registry_destroy(registry); - - return display; - -fail: - if (registry) - wl_registry_destroy(registry); - - wsi_wl_display_destroy(wsi, display); - return NULL; -} - -static struct wsi_wl_display * -wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) -{ - struct wsi_wayland *wsi = - (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; - - pthread_mutex_lock(&wsi->mutex); - - struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, - wl_display); - if (!entry) { - /* We're about to make a bunch of blocking calls. Let's drop the - * mutex for now so we don't block up too badly. - */ - pthread_mutex_unlock(&wsi->mutex); - - struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); - - pthread_mutex_lock(&wsi->mutex); - - entry = _mesa_hash_table_search(wsi->displays, wl_display); - if (entry) { - /* Oops, someone raced us to it */ - wsi_wl_display_destroy(wsi, display); - } else { - entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); - } - } - - pthread_mutex_unlock(&wsi->mutex); - - return entry->data; -} - -VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - struct wl_display* display) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - return wsi_wl_get_display(physical_device->instance, display) != NULL; -} - -static VkResult -wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t queueFamilyIndex, - VkBool32* pSupported) -{ - *pSupported = true; - - return VK_SUCCESS; -} - -static const VkPresentModeKHR present_modes[] = { - VK_PRESENT_MODE_MAILBOX_KHR, - VK_PRESENT_MODE_FIFO_KHR, -}; - -static VkResult -wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - VkSurfaceCapabilitiesKHR* caps) -{ - caps->minImageCount = MIN_NUM_IMAGES; - caps->maxImageCount = 4; - caps->currentExtent = (VkExtent2D) { -1, -1 }; - caps->minImageExtent = (VkExtent2D) { 1, 1 }; - caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->maxImageArrayLayers = 1; - - caps->supportedCompositeAlpha = - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; - - caps->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, - struct anv_physical_device *device, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats) -{ - VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; - struct wsi_wl_display *display = - wsi_wl_get_display(device->instance, surface->display); - - uint32_t count = anv_vector_length(&display->formats); - - if (pSurfaceFormats == NULL) { - *pSurfaceFormatCount = count; - return VK_SUCCESS; - } - - assert(*pSurfaceFormatCount >= count); - *pSurfaceFormatCount = count; - - VkFormat *f; - anv_vector_foreach(f, &display->formats) { - *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { - .format = *f, - /* TODO: We should get this from the compositor somehow */ - .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, - }; - } - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes) -{ - if (pPresentModes == NULL) { - *pPresentModeCount = ARRAY_SIZE(present_modes); - return VK_SUCCESS; - } - - assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); - typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); - *pPresentModeCount = ARRAY_SIZE(present_modes); - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain); - -VkResult anv_CreateWaylandSurfaceKHR( - VkInstance _instance, - const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); - - VkIcdSurfaceWayland *surface; - - surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (surface == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; - surface->display = pCreateInfo->display; - surface->surface = pCreateInfo->surface; - - *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); - - return VK_SUCCESS; -} - -struct wsi_wl_image { - struct anv_image * image; - struct anv_device_memory * memory; - struct wl_buffer * buffer; - bool busy; -}; - -struct wsi_wl_swapchain { - struct anv_swapchain base; - - struct wsi_wl_display * display; - struct wl_event_queue * queue; - struct wl_surface * surface; - - VkExtent2D extent; - VkFormat vk_format; - uint32_t drm_format; - - VkPresentModeKHR present_mode; - bool fifo_ready; - - uint32_t image_count; - struct wsi_wl_image images[0]; -}; - -static VkResult -wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, - uint32_t *pCount, VkImage *pSwapchainImages) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - if (pSwapchainImages == NULL) { - *pCount = chain->image_count; - return VK_SUCCESS; - } - - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); - - *pCount = chain->image_count; - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - int ret = wl_display_dispatch_queue_pending(chain->display->display, - chain->queue); - /* XXX: I'm not sure if out-of-date is the right error here. If - * wl_display_dispatch_queue_pending fails it most likely means we got - * kicked by the server so this seems more-or-less correct. - */ - if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - - while (1) { - for (uint32_t i = 0; i < chain->image_count; i++) { - if (!chain->images[i].busy) { - /* We found a non-busy image */ - *image_index = i; - return VK_SUCCESS; - } - } - - /* This time we do a blocking dispatch because we can't go - * anywhere until we get an event. - */ - int ret = wl_display_roundtrip_queue(chain->display->display, - chain->queue); - if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } -} - -static void -frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) -{ - struct wsi_wl_swapchain *chain = data; - - chain->fifo_ready = true; - - wl_callback_destroy(callback); -} - -static const struct wl_callback_listener frame_listener = { - frame_handle_done, -}; - -static VkResult -wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, - struct anv_queue *queue, - uint32_t image_index) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { - while (!chain->fifo_ready) { - int ret = wl_display_dispatch_queue(chain->display->display, - chain->queue); - if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - } - - assert(image_index < chain->image_count); - wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); - wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); - - if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { - struct wl_callback *frame = wl_surface_frame(chain->surface); - wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); - wl_callback_add_listener(frame, &frame_listener, chain); - chain->fifo_ready = false; - } - - chain->images[image_index].busy = true; - wl_surface_commit(chain->surface); - wl_display_flush(chain->display->display); - - return VK_SUCCESS; -} - -static void -wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, - const VkAllocationCallbacks* pAllocator) -{ - VkDevice vk_device = anv_device_to_handle(chain->base.device); - anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), - pAllocator); - anv_DestroyImage(vk_device, anv_image_to_handle(image->image), - pAllocator); -} - -static void -buffer_handle_release(void *data, struct wl_buffer *buffer) -{ - struct wsi_wl_image *image = data; - - assert(image->buffer == buffer); - - image->busy = false; -} - -static const struct wl_buffer_listener buffer_listener = { - buffer_handle_release, -}; - -static VkResult -wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, - const VkAllocationCallbacks* pAllocator) -{ - VkDevice vk_device = anv_device_to_handle(chain->base.device); - VkResult result; - - VkImage vk_image; - result = anv_image_create(vk_device, - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = chain->vk_format, - .extent = { - .width = chain->extent.width, - .height = chain->extent.height, - .depth = 1 - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - pAllocator, - &vk_image); - - if (result != VK_SUCCESS) - return result; - - image->image = anv_image_from_handle(vk_image); - assert(anv_format_is_color(image->image->format)); - - struct anv_surface *surface = &image->image->color_surface; - - VkDeviceMemory vk_memory; - result = anv_AllocateMemory(vk_device, - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = image->image->size, - .memoryTypeIndex = 0, - }, - pAllocator, - &vk_memory); - - if (result != VK_SUCCESS) - goto fail_image; - - image->memory = anv_device_memory_from_handle(vk_memory); - image->memory->bo.is_winsys_bo = true; - - result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); - - if (result != VK_SUCCESS) - goto fail_mem; - - int ret = anv_gem_set_tiling(chain->base.device, - image->memory->bo.gem_handle, - surface->isl.row_pitch, I915_TILING_X); - if (ret) { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_mem; - } - - int fd = anv_gem_handle_to_fd(chain->base.device, - image->memory->bo.gem_handle); - if (fd == -1) { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_mem; - } - - image->buffer = wl_drm_create_prime_buffer(chain->display->drm, - fd, /* name */ - chain->extent.width, - chain->extent.height, - chain->drm_format, - surface->offset, - surface->isl.row_pitch, - 0, 0, 0, 0 /* unused */); - wl_display_roundtrip(chain->display->display); - close(fd); - - wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); - wl_buffer_add_listener(image->buffer, &buffer_listener, image); - - return VK_SUCCESS; - -fail_mem: - anv_FreeMemory(vk_device, vk_memory, pAllocator); -fail_image: - anv_DestroyImage(vk_device, vk_image, pAllocator); - - return result; -} - -static VkResult -wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, - const VkAllocationCallbacks *pAllocator) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - for (uint32_t i = 0; i < chain->image_count; i++) { - if (chain->images[i].buffer) - wsi_wl_image_finish(chain, &chain->images[i], pAllocator); - } - - anv_free2(&chain->base.device->alloc, pAllocator, chain); - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain_out) -{ - VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; - struct wsi_wl_swapchain *chain; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - - int num_images = pCreateInfo->minImageCount; - - assert(num_images >= MIN_NUM_IMAGES); - - /* For true mailbox mode, we need at least 4 images: - * 1) One to scan out from - * 2) One to have queued for scan-out - * 3) One to be currently held by the Wayland compositor - * 4) One to render to - */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) - num_images = MAX2(num_images, 4); - - size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (chain == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - chain->base.device = device; - chain->base.destroy = wsi_wl_swapchain_destroy; - chain->base.get_images = wsi_wl_swapchain_get_images; - chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; - chain->base.queue_present = wsi_wl_swapchain_queue_present; - - chain->surface = surface->surface; - chain->extent = pCreateInfo->imageExtent; - chain->vk_format = pCreateInfo->imageFormat; - chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); - - chain->present_mode = pCreateInfo->presentMode; - chain->fifo_ready = true; - - chain->image_count = num_images; - - /* Mark a bunch of stuff as NULL. This way we can just call - * destroy_swapchain for cleanup. - */ - for (uint32_t i = 0; i < chain->image_count; i++) - chain->images[i].buffer = NULL; - chain->queue = NULL; - - chain->display = wsi_wl_get_display(device->instance, surface->display); - if (!chain->display) - goto fail; - - chain->queue = wl_display_create_queue(chain->display->display); - if (!chain->queue) - goto fail; - - for (uint32_t i = 0; i < chain->image_count; i++) { - result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); - if (result != VK_SUCCESS) - goto fail; - chain->images[i].busy = false; - } - - *swapchain_out = &chain->base; - - return VK_SUCCESS; - -fail: - wsi_wl_swapchain_destroy(&chain->base, pAllocator); - - return result; -} - -VkResult -anv_wl_init_wsi(struct anv_instance *instance) -{ - struct wsi_wayland *wsi; - VkResult result; - - wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!wsi) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - wsi->instance = instance; - - int ret = pthread_mutex_init(&wsi->mutex, NULL); - if (ret != 0) { - if (ret == ENOMEM) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - goto fail_alloc; - } - - wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - if (!wsi->displays) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_mutex; - } - - wsi->base.get_support = wsi_wl_surface_get_support; - wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; - wsi->base.get_formats = wsi_wl_surface_get_formats; - wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; - wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; - - instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; - - return VK_SUCCESS; - -fail_mutex: - pthread_mutex_destroy(&wsi->mutex); - -fail_alloc: - anv_free(&instance->alloc, wsi); -fail: - instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; - - return result; -} - -void -anv_wl_finish_wsi(struct anv_instance *instance) -{ - struct wsi_wayland *wsi = - (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; - - if (wsi) { - _mesa_hash_table_destroy(wsi->displays, NULL); - - pthread_mutex_destroy(&wsi->mutex); - - anv_free(&instance->alloc, wsi); - } -} diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c deleted file mode 100644 index 843a6b62504..00000000000 --- a/src/vulkan/anv_wsi_x11.c +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "anv_wsi.h" - -#include "util/hash_table.h" - -struct wsi_x11_connection { - bool has_dri3; - bool has_present; -}; - -struct wsi_x11 { - struct anv_wsi_interface base; - - pthread_mutex_t mutex; - /* Hash table of xcb_connection -> wsi_x11_connection mappings */ - struct hash_table *connections; -}; - -static struct wsi_x11_connection * -wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) -{ - xcb_query_extension_cookie_t dri3_cookie, pres_cookie; - xcb_query_extension_reply_t *dri3_reply, *pres_reply; - - struct wsi_x11_connection *wsi_conn = - anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!wsi_conn) - return NULL; - - dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); - pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); - - dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); - pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); - if (dri3_reply == NULL || pres_reply == NULL) { - free(dri3_reply); - free(pres_reply); - anv_free(&instance->alloc, wsi_conn); - return NULL; - } - - wsi_conn->has_dri3 = dri3_reply->present != 0; - wsi_conn->has_present = pres_reply->present != 0; - - free(dri3_reply); - free(pres_reply); - - return wsi_conn; -} - -static void -wsi_x11_connection_destroy(struct anv_instance *instance, - struct wsi_x11_connection *conn) -{ - anv_free(&instance->alloc, conn); -} - -static struct wsi_x11_connection * -wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) -{ - struct wsi_x11 *wsi = - (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; - - pthread_mutex_lock(&wsi->mutex); - - struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); - if (!entry) { - /* We're about to make a bunch of blocking calls. Let's drop the - * mutex for now so we don't block up too badly. - */ - pthread_mutex_unlock(&wsi->mutex); - - struct wsi_x11_connection *wsi_conn = - wsi_x11_connection_create(instance, conn); - - pthread_mutex_lock(&wsi->mutex); - - entry = _mesa_hash_table_search(wsi->connections, conn); - if (entry) { - /* Oops, someone raced us to it */ - wsi_x11_connection_destroy(instance, wsi_conn); - } else { - entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); - } - } - - pthread_mutex_unlock(&wsi->mutex); - - return entry->data; -} - -static const VkSurfaceFormatKHR formats[] = { - { .format = VK_FORMAT_B8G8R8A8_SRGB, }, -}; - -static const VkPresentModeKHR present_modes[] = { - VK_PRESENT_MODE_MAILBOX_KHR, -}; - -static xcb_screen_t * -get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) -{ - xcb_screen_iterator_t screen_iter = - xcb_setup_roots_iterator(xcb_get_setup(conn)); - - for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { - if (screen_iter.data->root == root) - return screen_iter.data; - } - - return NULL; -} - -static xcb_visualtype_t * -screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, - unsigned *depth) -{ - xcb_depth_iterator_t depth_iter = - xcb_screen_allowed_depths_iterator(screen); - - for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { - xcb_visualtype_iterator_t visual_iter = - xcb_depth_visuals_iterator (depth_iter.data); - - for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { - if (visual_iter.data->visual_id == visual_id) { - if (depth) - *depth = depth_iter.data->depth; - return visual_iter.data; - } - } - } - - return NULL; -} - -static xcb_visualtype_t * -connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, - unsigned *depth) -{ - xcb_screen_iterator_t screen_iter = - xcb_setup_roots_iterator(xcb_get_setup(conn)); - - /* For this we have to iterate over all of the screens which is rather - * annoying. Fortunately, there is probably only 1. - */ - for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { - xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, - visual_id, depth); - if (visual) - return visual; - } - - return NULL; -} - -static xcb_visualtype_t * -get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, - unsigned *depth) -{ - xcb_query_tree_cookie_t tree_cookie; - xcb_get_window_attributes_cookie_t attrib_cookie; - xcb_query_tree_reply_t *tree; - xcb_get_window_attributes_reply_t *attrib; - - tree_cookie = xcb_query_tree(conn, window); - attrib_cookie = xcb_get_window_attributes(conn, window); - - tree = xcb_query_tree_reply(conn, tree_cookie, NULL); - attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); - if (attrib == NULL || tree == NULL) { - free(attrib); - free(tree); - return NULL; - } - - xcb_window_t root = tree->root; - xcb_visualid_t visual_id = attrib->visual; - free(attrib); - free(tree); - - xcb_screen_t *screen = get_screen_for_root(conn, root); - if (screen == NULL) - return NULL; - - return screen_get_visualtype(screen, visual_id, depth); -} - -static bool -visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) -{ - uint32_t rgb_mask = visual->red_mask | - visual->green_mask | - visual->blue_mask; - - uint32_t all_mask = 0xffffffff >> (32 - depth); - - /* Do we have bits left over after RGB? */ - return (all_mask & ~rgb_mask) != 0; -} - -VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - xcb_connection_t* connection, - xcb_visualid_t visual_id) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(device->instance, connection); - - if (!wsi_conn->has_dri3) { - fprintf(stderr, "vulkan: No DRI3 support\n"); - return false; - } - - unsigned visual_depth; - if (!connection_get_visualtype(connection, visual_id, &visual_depth)) - return false; - - if (visual_depth != 24 && visual_depth != 32) - return false; - - return true; -} - -static VkResult -x11_surface_get_support(VkIcdSurfaceBase *icd_surface, - struct anv_physical_device *device, - uint32_t queueFamilyIndex, - VkBool32* pSupported) -{ - VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(device->instance, surface->connection); - if (!wsi_conn) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - if (!wsi_conn->has_dri3) { - fprintf(stderr, "vulkan: No DRI3 support\n"); - *pSupported = false; - return VK_SUCCESS; - } - - unsigned visual_depth; - if (!get_visualtype_for_window(surface->connection, surface->window, - &visual_depth)) { - *pSupported = false; - return VK_SUCCESS; - } - - if (visual_depth != 24 && visual_depth != 32) { - *pSupported = false; - return VK_SUCCESS; - } - - *pSupported = true; - return VK_SUCCESS; -} - -static VkResult -x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, - struct anv_physical_device *device, - VkSurfaceCapabilitiesKHR *caps) -{ - VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - xcb_get_geometry_cookie_t geom_cookie; - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom; - unsigned visual_depth; - - geom_cookie = xcb_get_geometry(surface->connection, surface->window); - - /* This does a round-trip. This is why we do get_geometry first and - * wait to read the reply until after we have a visual. - */ - xcb_visualtype_t *visual = - get_visualtype_for_window(surface->connection, surface->window, - &visual_depth); - - geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); - if (geom) { - VkExtent2D extent = { geom->width, geom->height }; - caps->currentExtent = extent; - caps->minImageExtent = extent; - caps->maxImageExtent = extent; - } else { - /* This can happen if the client didn't wait for the configure event - * to come back from the compositor. In that case, we don't know the - * size of the window so we just return valid "I don't know" stuff. - */ - caps->currentExtent = (VkExtent2D) { -1, -1 }; - caps->minImageExtent = (VkExtent2D) { 1, 1 }; - caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - } - free(err); - free(geom); - - if (visual_has_alpha(visual, visual_depth)) { - caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; - } else { - caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - } - - caps->minImageCount = 2; - caps->maxImageCount = 4; - caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->maxImageArrayLayers = 1; - caps->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - return VK_SUCCESS; -} - -static VkResult -x11_surface_get_formats(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t *pSurfaceFormatCount, - VkSurfaceFormatKHR *pSurfaceFormats) -{ - if (pSurfaceFormats == NULL) { - *pSurfaceFormatCount = ARRAY_SIZE(formats); - return VK_SUCCESS; - } - - assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); - typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); - *pSurfaceFormatCount = ARRAY_SIZE(formats); - - return VK_SUCCESS; -} - -static VkResult -x11_surface_get_present_modes(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t *pPresentModeCount, - VkPresentModeKHR *pPresentModes) -{ - if (pPresentModes == NULL) { - *pPresentModeCount = ARRAY_SIZE(present_modes); - return VK_SUCCESS; - } - - assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); - typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); - *pPresentModeCount = ARRAY_SIZE(present_modes); - - return VK_SUCCESS; -} - -static VkResult -x11_surface_create_swapchain(VkIcdSurfaceBase *surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain); - -VkResult anv_CreateXcbSurfaceKHR( - VkInstance _instance, - const VkXcbSurfaceCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); - - VkIcdSurfaceXcb *surface; - - surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (surface == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; - surface->connection = pCreateInfo->connection; - surface->window = pCreateInfo->window; - - *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); - - return VK_SUCCESS; -} - -struct x11_image { - struct anv_image * image; - struct anv_device_memory * memory; - xcb_pixmap_t pixmap; - xcb_get_geometry_cookie_t geom_cookie; - bool busy; -}; - -struct x11_swapchain { - struct anv_swapchain base; - - xcb_connection_t * conn; - xcb_window_t window; - xcb_gc_t gc; - VkExtent2D extent; - uint32_t image_count; - uint32_t next_image; - struct x11_image images[0]; -}; - -static VkResult -x11_get_images(struct anv_swapchain *anv_chain, - uint32_t* pCount, VkImage *pSwapchainImages) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - - if (pSwapchainImages == NULL) { - *pCount = chain->image_count; - return VK_SUCCESS; - } - - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); - - *pCount = chain->image_count; - - return VK_SUCCESS; -} - -static VkResult -x11_acquire_next_image(struct anv_swapchain *anv_chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - struct x11_image *image = &chain->images[chain->next_image]; - - if (image->busy) { - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = - xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); - if (!geom) { - free(err); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - - if (geom->width != chain->extent.width || - geom->height != chain->extent.height) { - free(geom); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - free(geom); - - image->busy = false; - } - - *image_index = chain->next_image; - chain->next_image = (chain->next_image + 1) % chain->image_count; - return VK_SUCCESS; -} - -static VkResult -x11_queue_present(struct anv_swapchain *anv_chain, - struct anv_queue *queue, - uint32_t image_index) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - struct x11_image *image = &chain->images[image_index]; - - assert(image_index < chain->image_count); - - xcb_void_cookie_t cookie; - - cookie = xcb_copy_area(chain->conn, - image->pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); - - image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); - image->busy = true; - - xcb_flush(chain->conn); - - return VK_SUCCESS; -} - -static VkResult -x11_swapchain_destroy(struct anv_swapchain *anv_chain, - const VkAllocationCallbacks *pAllocator) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - xcb_void_cookie_t cookie; - - for (uint32_t i = 0; i < chain->image_count; i++) { - struct x11_image *image = &chain->images[i]; - - if (image->busy) - xcb_discard_reply(chain->conn, image->geom_cookie.sequence); - - cookie = xcb_free_pixmap(chain->conn, image->pixmap); - xcb_discard_reply(chain->conn, cookie.sequence); - - /* TODO: Delete images and free memory */ - } - - anv_free2(&chain->base.device->alloc, pAllocator, chain); - - return VK_SUCCESS; -} - -static VkResult -x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR *pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain_out) -{ - VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - struct x11_swapchain *chain; - xcb_void_cookie_t cookie; - VkResult result; - - int num_images = pCreateInfo->minImageCount; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - - size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (chain == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - chain->base.device = device; - chain->base.destroy = x11_swapchain_destroy; - chain->base.get_images = x11_get_images; - chain->base.acquire_next_image = x11_acquire_next_image; - chain->base.queue_present = x11_queue_present; - - chain->conn = surface->connection; - chain->window = surface->window; - chain->extent = pCreateInfo->imageExtent; - chain->image_count = num_images; - chain->next_image = 0; - - for (uint32_t i = 0; i < chain->image_count; i++) { - VkDeviceMemory memory_h; - VkImage image_h; - struct anv_image *image; - struct anv_surface *surface; - struct anv_device_memory *memory; - - anv_image_create(anv_device_to_handle(device), - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - NULL, - &image_h); - - image = anv_image_from_handle(image_h); - assert(anv_format_is_color(image->format)); - - surface = &image->color_surface; - - anv_AllocateMemory(anv_device_to_handle(device), - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - NULL /* XXX: pAllocator */, - &memory_h); - - memory = anv_device_memory_from_handle(memory_h); - memory->bo.is_winsys_bo = true; - - anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), - memory_h, 0); - - int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->isl.row_pitch, I915_TILING_X); - if (ret) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "set_tiling failed: %m"); - goto fail; - } - - int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); - if (fd == -1) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "handle_to_fd failed: %m"); - goto fail; - } - - uint32_t bpp = 32; - uint32_t depth = 24; - xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); - - cookie = - xcb_dri3_pixmap_from_buffer_checked(chain->conn, - pixmap, - chain->window, - image->size, - pCreateInfo->imageExtent.width, - pCreateInfo->imageExtent.height, - surface->isl.row_pitch, - depth, bpp, fd); - - chain->images[i].image = image; - chain->images[i].memory = memory; - chain->images[i].pixmap = pixmap; - chain->images[i].busy = false; - - xcb_discard_reply(chain->conn, cookie.sequence); - } - - chain->gc = xcb_generate_id(chain->conn); - if (!chain->gc) { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - cookie = xcb_create_gc(chain->conn, - chain->gc, - chain->window, - XCB_GC_GRAPHICS_EXPOSURES, - (uint32_t []) { 0 }); - xcb_discard_reply(chain->conn, cookie.sequence); - - *swapchain_out = &chain->base; - - return VK_SUCCESS; - - fail: - return result; -} - -VkResult -anv_x11_init_wsi(struct anv_instance *instance) -{ - struct wsi_x11 *wsi; - VkResult result; - - wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!wsi) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - int ret = pthread_mutex_init(&wsi->mutex, NULL); - if (ret != 0) { - if (ret == ENOMEM) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - goto fail_alloc; - } - - wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - if (!wsi->connections) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_mutex; - } - - wsi->base.get_support = x11_surface_get_support; - wsi->base.get_capabilities = x11_surface_get_capabilities; - wsi->base.get_formats = x11_surface_get_formats; - wsi->base.get_present_modes = x11_surface_get_present_modes; - wsi->base.create_swapchain = x11_surface_create_swapchain; - - instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; - - return VK_SUCCESS; - -fail_mutex: - pthread_mutex_destroy(&wsi->mutex); -fail_alloc: - anv_free(&instance->alloc, wsi); -fail: - instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; - - return result; -} - -void -anv_x11_finish_wsi(struct anv_instance *instance) -{ - struct wsi_x11 *wsi = - (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; - - if (wsi) { - _mesa_hash_table_destroy(wsi->connections, NULL); - - pthread_mutex_destroy(&wsi->mutex); - - anv_free(&instance->alloc, wsi); - } -} diff --git a/src/vulkan/dev_icd.json.in b/src/vulkan/dev_icd.json.in deleted file mode 100644 index 84920365289..00000000000 --- a/src/vulkan/dev_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@build_libdir@/libvulkan_intel.so", - "abi_versions": "1.0.3" - } -} diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c deleted file mode 100644 index 23327ec0724..00000000000 --- a/src/vulkan/gen7_cmd_buffer.c +++ /dev/null @@ -1,589 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" - -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages) -{ - static const uint32_t sampler_state_opcodes[] = { - [MESA_SHADER_VERTEX] = 43, - [MESA_SHADER_TESS_CTRL] = 44, /* HS */ - [MESA_SHADER_TESS_EVAL] = 45, /* DS */ - [MESA_SHADER_GEOMETRY] = 46, - [MESA_SHADER_FRAGMENT] = 47, - [MESA_SHADER_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [MESA_SHADER_VERTEX] = 38, - [MESA_SHADER_TESS_CTRL] = 39, - [MESA_SHADER_TESS_EVAL] = 40, - [MESA_SHADER_GEOMETRY] = 41, - [MESA_SHADER_FRAGMENT] = 42, - [MESA_SHADER_COMPUTE] = 0, - }; - - anv_foreach_stage(s, stages) { - if (cmd_buffer->state.samplers[s].alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[s], - .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); - } - - /* Always emit binding table pointers if we're asked to, since on SKL - * this is what flushes push constants. */ - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[s], - .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); - } -} - -GENX_FUNC(GEN7, GEN7) uint32_t -genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) -{ - VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - anv_foreach_stage(s, dirty) { - result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, - &cmd_buffer->state.samplers[s]); - if (result != VK_SUCCESS) - break; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - dirty |= cmd_buffer->state.pipeline->active_stages; - anv_foreach_stage(s, dirty) { - result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, - &cmd_buffer->state.samplers[s]); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); - if (result != VK_SUCCESS) - return result; - } - } - - cmd_buffer->state.descriptors_dirty &= ~dirty; - - return dirty; -} - -static inline int64_t -clamp_int64(int64_t x, int64_t min, int64_t max) -{ - if (x < min) - return min; - else if (x < max) - return x; - else - return max; -} - -static void -emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkRect2D *scissors) -{ - struct anv_state scissor_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkRect2D *s = &scissors[i]; - - /* Since xmax and ymax are inclusive, we have to have xmax < xmin or - * ymax < ymin for empty clips. In case clip x, y, width height are all - * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't - * what we want. Just special case empty clips and produce a canonical - * empty clip. */ - static const struct GEN7_SCISSOR_RECT empty_scissor = { - .ScissorRectangleYMin = 1, - .ScissorRectangleXMin = 1, - .ScissorRectangleYMax = 0, - .ScissorRectangleXMax = 0 - }; - - const int max = 0xffff; - struct GEN7_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) - }; - - if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, - &empty_scissor); - } else { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); - } - } - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = scissor_state.offset); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(scissor_state); -} - -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.scissor.count > 0) { - emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, - cmd_buffer->state.dynamic.scissor.scissors); - } else { - /* Emit a default scissor based on the currently bound framebuffer */ - emit_scissor_state(cmd_buffer, 1, - &(VkRect2D) { - .offset = { .x = 0, .y = 0, }, - .extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - }, - }); - } -} - -static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, -}; - -static const uint32_t restart_index_for_type[] = { - [VK_INDEX_TYPE_UINT16] = UINT16_MAX, - [VK_INDEX_TYPE_UINT32] = UINT32_MAX, -}; - -void genX(CmdBindIndexBuffer)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; - if (ANV_IS_HASWELL) - cmd_buffer->state.restart_index = restart_index_for_type[indexType]; - cmd_buffer->state.gen7.index_buffer = buffer; - cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; - cmd_buffer->state.gen7.index_offset = offset; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * 4; - unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - unsigned push_constant_regs = reg_aligned_constant_size / 32; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), - .CURBETotalDataLength = push_state.alloc_size, - .CURBEDataStartAddress = push_state.offset); - } - - assert(prog_data->total_shared <= 64 * 1024); - uint32_t slm_size = 0; - if (prog_data->total_shared > 0) { - /* slm_size is in 4k increments, but must be a power of 2. */ - slm_size = 4 * 1024; - while (slm_size < prog_data->total_shared) - slm_size <<= 1; - slm_size /= 4 * 1024; - } - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_INTERFACE_DESCRIPTOR_DATA, 64, - .KernelStartPointer = pipeline->cs_simd, - .BindingTablePointer = surfaces.offset, - .SamplerStatePointer = samplers.offset, - .ConstantURBEntryReadLength = - push_constant_regs, - .ConstantURBEntryReadOffset = 0, - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - pipeline->cs_thread_width_max); - - const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - /* FIXME: figure out descriptors for gen7 */ - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; -} - -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN7_3DSTATE_VERTEX_BUFFERS); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GEN7_VERTEX_BUFFER_STATE state = { - .VertexBufferIndex = vb, - .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GEN7_MOCS, - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, - .InstanceDataStepRate = 1 - }; - - GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || - cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, - * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, - * 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one - * PIPE_CONTROL needs to be sent before any combination of VS - * associated 3DSTATE." - */ - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &cmd_buffer->device->workaround_bo, 0 }); - } - - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) { - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - } - - if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); - - /* We use the gen8 state here because it only contains the additional - * min/max fields and, since they occur at the end of the packet and - * don't change the stride, they work on gen7 too. - */ - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS | - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { - - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const uint32_t depth_format = image ? - isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, - &image->depth_surface.isl) : D16_UNORM; - - uint32_t sf_dw[GEN7_3DSTATE_SF_length]; - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, - .DepthBufferSurfaceFormat = depth_format, - .LineWidth = cmd_buffer->state.dynamic.line_width, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, - .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp - }; - GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); - - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN7_COLOR_CALC_STATE_length * 4, - 64); - struct GEN7_COLOR_CALC_STATE cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, - }; - GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = cc_state.offset); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; - - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - - struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { - .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, - }; - GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); - - struct anv_state ds_state = - anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, - pipeline->gen7.depth_stencil_state, - GEN7_DEPTH_STENCIL_STATE_length, 64); - - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, - .PointertoDEPTH_STENCIL_STATE = ds_state.offset); - } - - if (cmd_buffer->state.gen7.index_buffer && - cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_INDEX_BUFFER)) { - struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; - uint32_t offset = cmd_buffer->state.gen7.index_offset; - - if (ANV_IS_HASWELL) { - anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index); - } - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, - .CutIndexEnable = pipeline->primitive_restart, - .IndexFormat = cmd_buffer->state.gen7.index_type, - .MemoryObjectControlState = GEN7_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - stub(); -} diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c deleted file mode 100644 index 7c054fa56d5..00000000000 --- a/src/vulkan/gen7_pipeline.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" - -#include "genX_pipeline_util.h" - -static void -gen7_emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterizationStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, - - /* LegacyGlobalDepthBiasEnable */ - - .StatisticsEnable = true, - .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ViewTransformEnable = !(extra && extra->disable_viewport), - .FrontWinding = vk_to_gen_front_face[info->frontFace], - /* bool AntiAliasingEnable; */ - - .CullMode = vk_to_gen_cullmode[info->cullMode], - - /* uint32_t LineEndCapAntialiasingRegionWidth; */ - .ScissorRectangleEnable = !(extra && extra->disable_scissor), - - /* uint32_t MultisampleRasterizationMode; */ - /* bool LastPixelEnable; */ - - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - - /* uint32_t AALineDistanceMode; */ - /* uint32_t VertexSubPixelPrecisionSelect; */ - .UsePointWidthState = !pipeline->writes_point_size, - .PointWidth = 1.0, - }; - - GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); -} - -static void -gen7_emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->gen7.depth_stencil_state, 0, - sizeof(pipeline->gen7.depth_stencil_state)); - return; - } - - struct GEN7_DEPTH_STENCIL_STATE state = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], - - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], - .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], - .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], - }; - - GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); -} - -static void -gen7_emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - struct anv_device *device = pipeline->device; - - if (info == NULL || info->attachmentCount == 0) { - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, - .ColorBufferBlendEnable = false, - .WriteDisableAlpha = true, - .WriteDisableRed = true, - .WriteDisableGreen = true, - .WriteDisableBlue = true); - } else { - /* FIXME-GEN7: All render targets share blend state settings on gen7, we - * can't implement this. - */ - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, - - .ColorBufferBlendEnable = a->blendEnable, - .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - -# if 0 - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; -# endif - - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - -# if 0 - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -# endif - ); - } - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, - .BlendStatePointer = pipeline->blend_state.offset); -} - -GENX_FUNC(GEN7, GEN75) VkResult -genX(graphics_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_pipeline_init(pipeline, device, cache, - pCreateInfo, extra, pAllocator); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); - - assert(pCreateInfo->pRasterizationState); - gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); - - gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - - gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, - pCreateInfo->pMultisampleState); - - emit_urb_setup(pipeline); - - const VkPipelineRasterizationStateCreateInfo *rs_info = - pCreateInfo->pRasterizationState; - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, - .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], - .CullMode = vk_to_gen_cullmode[rs_info->cullMode], - .ClipEnable = true, - .APIMode = APIMODE_OGL, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .ClipMode = CLIPMODE_NORMAL, - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875, - .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); - - if (pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->rasterizationSamples > 1) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); - - uint32_t samples = 1; - uint32_t log2_samples = __builtin_ffs(samples) - 1; - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, - .PixelLocation = PIXLOC_CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, - .SampleMask = 0xff); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* The last geometry producing stage will set urb_offset and urb_length, - * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ - uint32_t urb_offset = 1; - uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; - -#if 0 - /* From gen7_vs_state.c */ - - /** - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > - * Geometry > Geometry Shader > State: - * - * "Note: Because of corruption in IVB:GT2, software needs to flush the - * whole fixed function pipeline when the GS enable changes value in - * the 3DSTATE_GS." - * - * The hardware architects have clarified that in this context "flush the - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS - * Stall" bit set. - */ - if (!brw->is_haswell && !brw->is_baytrail) - gen7_emit_vs_workaround_flush(brw); -#endif - - if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); - else - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .KernelStartPointer = pipeline->vs_vec4, - .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), - - .DispatchGRFStartRegisterforURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = true, - .VSFunctionEnable = true); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - - if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); - } else { - urb_offset = 1; - urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), - .KernelStartPointer = pipeline->gs_kernel, - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], - .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, - .DispatchGRFStartRegisterforURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads - 1, - /* This in the next dword on HSW. */ - .ControlDataFormat = gs_prog_data->control_data_format, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, - .DispatchMode = gs_prog_data->base.dispatch_mode, - .GSStatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, -# if (ANV_IS_HASWELL) - .ReorderMode = REORDER_TRAILING, -# else - .ReorderEnable = true, -# endif - .GSEnable = true); - } - - if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_finishme("disabling ps"); - - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, - .StatisticsEnable = true, - .ThreadDispatchEnable = false, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT); - - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); - - } else { - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || - wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) - anv_finishme("two-sided color needs sbe swizzling setup"); - if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) - anv_finishme("primitive_id needs sbe swizzling setup"); - - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, - .VertexURBEntryReadLength = urb_length, - .VertexURBEntryReadOffset = urb_offset, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), - - .MaximumNumberofThreads = device->info.max_wm_threads - 1, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - - .RenderTargetFastClearEnable = false, - .DualSourceBlendEnable = false, - .RenderTargetResolveEnable = false, - - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE : POSOFFSET_NONE, - - ._32PixelDispatchEnable = false, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - - .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterforConstantSetupData1 = 0, - .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, - -#if 0 - /* Haswell requires the sample mask to be set in this packet as well as - * in 3DSTATE_SAMPLE_MASK; the values should match. */ - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ -#endif - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, - .StatisticsEnable = true, - .ThreadDispatchEnable = true, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, - .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, - .PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask, - .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); - } - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c deleted file mode 100644 index 77bdb75260c..00000000000 --- a/src/vulkan/gen7_state.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -GENX_FUNC(GEN7, GEN75) void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN_4, - .SurfaceHorizontalAlignment = HALIGN_4, - .TiledSurface = false, - .RenderCacheReadWriteMode = false, - .SurfaceObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, -# if (ANV_IS_HASWELL) - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, -# endif - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN7_SAMPLER_STATE sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampEnable = CLAMP_ENABLE_OGL, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, - pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, - pCreateInfo->anisotropyEnable), - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .BorderColorPointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} - -static const uint8_t anv_halign[] = { - [4] = HALIGN_4, - [8] = HALIGN_8, -}; - -static const uint8_t anv_valign[] = { - [2] = VALIGN_2, - [4] = VALIGN_4, -}; - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - uint32_t depth = 1; - if (range->layerCount > 1) { - depth = range->layerCount; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(&surface->isl); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, - usage == VK_IMAGE_USAGE_STORAGE_BIT), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], - .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], - - /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if - * Tiled Surface is False." - */ - .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, - .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? - TILEWALK_YMAJOR : TILEWALK_XMAJOR, - - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - - .RenderCacheReadWriteMode = 0, /* TEMPLATE */ - - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->isl.row_pitch - 1, - .MinimumArrayElement = range->baseArrayLayer, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - .SurfaceObjectControlState = GENX(MOCS), - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .MCSEnable = false, -# if (ANV_IS_HASWELL) - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], -# else /* XXX: Seriously? */ - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, -# endif - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c deleted file mode 100644 index b741612c891..00000000000 --- a/src/vulkan/gen8_cmd_buffer.c +++ /dev/null @@ -1,914 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" - -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, - .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - -#if ANV_GEN == 8 -static void -emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkViewport *viewports) -{ - struct anv_state sf_clip_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkViewport *vp = &viewports[i]; - - /* The gen7 state struct has just the matrix and guardband fields, the - * gen8 struct adds the min/max viewport fields. */ - struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { - .ViewportMatrixElementm00 = vp->width / 2, - .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = 1.0, - .ViewportMatrixElementm30 = vp->x + vp->width / 2, - .ViewportMatrixElementm31 = vp->y + vp->height / 2, - .ViewportMatrixElementm32 = 0.0, - .XMinClipGuardband = -1.0f, - .XMaxClipGuardband = 1.0f, - .YMinClipGuardband = -1.0f, - .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->x, - .XMaxViewPort = vp->x + vp->width - 1, - .YMinViewPort = vp->y, - .YMaxViewPort = vp->y + vp->height - 1, - }; - - struct GENX(CC_VIEWPORT) cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth - }; - - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, - &sf_clip_viewport); - GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); - } - - if (!cmd_buffer->device->info.has_llc) { - anv_state_clflush(sf_clip_state); - anv_state_clflush(cc_state); - } - - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), - .CCViewportPointer = cc_state.offset); - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), - .SFClipViewportPointer = sf_clip_state.offset); -} - -void -gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.viewport.count > 0) { - emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, - cmd_buffer->state.dynamic.viewport.viewports); - } else { - /* If viewport count is 0, this is taken to mean "use the default" */ - emit_viewport_state(cmd_buffer, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - } -} -#endif - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -#define GEN8_L3CNTLREG 0x7034 - -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) -{ - /* References for GL state: - * - * - commits e307cfa..228d5a3 - * - src/mesa/drivers/dri/i965/gen7_l3_state.c - */ - - uint32_t val = enable_slm ? - /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ - 0x60000021 : - /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ - 0x60000060; - bool changed = cmd_buffer->state.current_l3_config != val; - - if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, - .DCFlushEnable = true, - .PostSyncOperation = NoWrite, - .CommandStreamerStallEnable = true); - - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DCFlushEnable = true, - .PostSyncOperation = NoWrite, - .CommandStreamerStallEnable = true); - - emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); - cmd_buffer->state.current_l3_config = val; - } -} - -static void -__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t sf_dw[GENX(3DSTATE_SF_length)]; - struct GENX(3DSTATE_SF) sf = { - GENX(3DSTATE_SF_header), - .LineWidth = cmd_buffer->state.dynamic.line_width, - }; - GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); - /* FIXME: gen9.fs */ - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, - cmd_buffer->state.pipeline->gen8.sf); -} -static void -__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t sf_dw[GENX(3DSTATE_SF_length)]; - struct GEN9_3DSTATE_SF sf = { - GEN9_3DSTATE_SF_header, - .LineWidth = cmd_buffer->state.dynamic.line_width, - }; - GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); - /* FIXME: gen9.fs */ - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, - cmd_buffer->state.pipeline->gen8.sf); -} - -static void -__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->device->info.is_cherryview) - __emit_gen9_sf_state(cmd_buffer); - else - __emit_genx_sf_state(cmd_buffer); -} - -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - config_l3(cmd_buffer, false); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - /* We emit the binding tables and sampler tables first, then emit push - * constants and then finally emit binding table and sampler table - * pointers. It has to happen in this order, since emitting the binding - * tables may change the push constants (in case of storage images). After - * emitting push constants, on SKL+ we have to emit the corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. - */ - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.push_constants_dirty) - dirty |= cmd_buffer_flush_push_constants(cmd_buffer); - - if (dirty) - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { - __emit_sf_state(cmd_buffer); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - - uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; - struct GENX(3DSTATE_RASTER) raster = { - GENX(3DSTATE_RASTER_header), - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, - .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp - }; - GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); - anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, - pipeline->gen8.raster); - } - - /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to - * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split - * across different state packets for gen8 and gen9. We handle that by - * using a big old #if switch here. - */ -#if ANV_GEN == 8 - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN8_COLOR_CALC_STATE_length * 4, - 64); - struct GEN8_COLOR_CALC_STATE cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, - }; - GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = cc_state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - - /* Is this what we need to do? */ - .StencilBufferWriteEnable = - cmd_buffer->state.dynamic.stencil_write_mask.front != 0, - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, - }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, - &wm_depth_stencil); - - anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, - pipeline->gen8.wm_depth_stencil); - } -#else - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN9_COLOR_CALC_STATE_length * 4, - 64); - struct GEN9_COLOR_CALC_STATE cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - }; - GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - - anv_batch_emit(&cmd_buffer->batch, - GEN9_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = cc_state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { - uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; - struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; - struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN9_3DSTATE_WM_DEPTH_STENCIL_header, - - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - - .StencilTestMask = d->stencil_compare_mask.front & 0xff, - .StencilWriteMask = d->stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, - - .StencilReferenceValue = d->stencil_reference.front, - .BackfaceStencilReferenceValue = d->stencil_reference.back - }; - GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); - - anv_batch_emit_merge(&cmd_buffer->batch, dwords, - pipeline->gen9.wm_depth_stencil); - } -#endif - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_INDEX_BUFFER)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index, - ); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void genX(CmdBindIndexBuffer)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - static const uint32_t restart_index_for_type[] = { - [VK_INDEX_TYPE_UINT16] = UINT16_MAX, - [VK_INDEX_TYPE_UINT32] = UINT32_MAX, - }; - - cmd_buffer->state.restart_index = restart_index_for_type[indexType]; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * 4; - unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - unsigned push_constant_regs = reg_aligned_constant_size / 32; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), - .CURBETotalDataLength = push_state.alloc_size, - .CURBEDataStartAddress = push_state.offset); - } - - assert(prog_data->total_shared <= 64 * 1024); - uint32_t slm_size = 0; - if (prog_data->total_shared > 0) { - /* slm_size is in 4k increments, but must be a power of 2. */ - slm_size = 4 * 1024; - while (slm_size < prog_data->total_shared) - slm_size <<= 1; - slm_size /= 4 * 1024; - } - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(INTERFACE_DESCRIPTOR_DATA), 64, - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .ConstantIndirectURBEntryReadLength = push_constant_regs, - .ConstantURBEntryReadOffset = 0, - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - pipeline->cs_thread_width_max); - - uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); - - if (cmd_buffer->state.current_pipeline != GPGPU) { -#if ANV_GEN < 10 - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gen9 - * hardware too. - */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS)); -#endif - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; -} - -static void -emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .DepthStallEnable = true, - .Address = { bo, offset }); -} - -static void -emit_query_availability(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { bo, offset }, - .ImmediateData = 1); -} - -void genX(CmdBeginQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - /* Workaround: When meta uses the pipeline with the VS disabled, it seems - * that the pipelining of the depth write breaks. What we see is that - * samples from the render pass clear leaks into the first query - * immediately after the clear. Doing a pipecontrol with a post-sync - * operation and DepthStallEnable seems to work around the issue. - */ - if (cmd_buffer->state.need_query_wa) { - cmd_buffer->state.need_query_wa = false; - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthCacheFlushEnable = true, - .DepthStallEnable = true); - } - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void genX(CmdEndQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 8); - - emit_query_availability(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 16); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -#define TIMESTAMP 0x2358 - -void genX(CmdWriteTimestamp)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - uint32_t offset = query * sizeof(struct anv_query_pool_slot); - - assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); - - switch (pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { &pool->bo, offset }); - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { &pool->bo, offset + 4 }); - break; - - default: - /* Everything else is bottom-of-pipe */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = { &pool->bo, offset }); - break; - } - - emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); -} - -#define alu_opcode(v) __gen_uint((v), 20, 31) -#define alu_operand1(v) __gen_uint((v), 10, 19) -#define alu_operand2(v) __gen_uint((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -static void -store_query_result(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) -{ - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void genX(CmdCopyQueryPoolResults)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - break; - - case VK_QUERY_TYPE_TIMESTAMP: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(2), &pool->bo, slot_offset); - break; - - default: - unreachable("unhandled query type"); - } - - store_query_result(&cmd_buffer->batch, - CS_GPR(2), buffer->bo, dst_offset, flags); - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), - &pool->bo, slot_offset + 16); - if (flags & VK_QUERY_RESULT_64_BIT) - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 8, flags); - else - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 4, flags); - } - - dst_offset += destStride; - } -} - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { - &cmd_buffer->device->dynamic_state_block_pool.bo, - event->state.offset - }, - .ImmediateData = VK_EVENT_SET); -} - -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { - &cmd_buffer->device->dynamic_state_block_pool.bo, - event->state.offset - }, - .ImmediateData = VK_EVENT_RESET); -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - for (uint32_t i = 0; i < eventCount; i++) { - ANV_FROM_HANDLE(anv_event, event, pEvents[i]); - - anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), - .WaitMode = PollingMode, - .CompareOperation = COMPARE_SAD_EQUAL_SDD, - .SemaphoreDataDword = VK_EVENT_SET, - .SemaphoreAddress = { - &cmd_buffer->device->dynamic_state_block_pool.bo, - event->state.offset - }); - } - - genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, - false, /* byRegion */ - memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers); -} diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c deleted file mode 100644 index f0411562fba..00000000000 --- a/src/vulkan/gen8_pipeline.c +++ /dev/null @@ -1,573 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" - -#include "genX_pipeline_util.h" - -static void -emit_ia_state(struct anv_pipeline *pipeline, - const VkPipelineInputAssemblyStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), - .PrimitiveTopologyType = pipeline->topology); -} - -static void -emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterizationStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info, - const struct anv_graphics_pipeline_create_info *extra) -{ - uint32_t samples = 1; - - if (ms_info) - samples = ms_info->rasterizationSamples; - - struct GENX(3DSTATE_SF) sf = { - GENX(3DSTATE_SF_header), - .ViewportTransformEnable = !(extra && extra->disable_viewport), - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, - .PointWidth = 1.0, - }; - - /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - - GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); - - struct GENX(3DSTATE_RASTER) raster = { - GENX(3DSTATE_RASTER_header), - - /* For details on 3DSTATE_RASTER multisample state, see the BSpec table - * "Multisample Modes State". - */ - .DXMultisampleRasterizationEnable = samples > 1, - .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, - .ForceMultisampling = false, - - .FrontWinding = vk_to_gen_front_face[info->frontFace], - .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), -#if ANV_GEN == 8 - .ViewportZClipTestEnable = true, -#else - /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ - .ViewportZFarClipTestEnable = true, - .ViewportZNearClipTestEnable = true, -#endif - }; - - GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); -} - -static void -emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - struct anv_device *device = pipeline->device; - - uint32_t num_dwords = GENX(BLEND_STATE_length); - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - - struct GENX(BLEND_STATE) blend_state = { - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, - }; - - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - - if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || - a->dstColorBlendFactor != a->dstAlphaBlendFactor || - a->colorBlendOp != a->alphaBlendOp) { - blend_state.IndependentAlphaBlendEnable = true; - } - - blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .PreBlendSourceOnlyClampEnable = false, - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - }; - - /* Our hardware applies the blend factor prior to the blend function - * regardless of what function is used. Technically, this means the - * hardware can do MORE than GL or Vulkan specify. However, it also - * means that, for MIN and MAX, we have to stomp the blend factor to - * ONE to make it a no-op. - */ - if (a->colorBlendOp == VK_BLEND_OP_MIN || - a->colorBlendOp == VK_BLEND_OP_MAX) { - blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; - blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; - } - if (a->alphaBlendOp == VK_BLEND_OP_MIN || - a->alphaBlendOp == VK_BLEND_OP_MAX) { - blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; - blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; - } - } - - for (uint32_t i = info->attachmentCount; i < 8; i++) { - blend_state.Entry[i].WriteDisableAlpha = true; - blend_state.Entry[i].WriteDisableRed = true; - blend_state.Entry[i].WriteDisableGreen = true; - blend_state.Entry[i].WriteDisableBlue = true; - } - - GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); - if (!device->info.has_llc) - anv_state_clflush(pipeline->blend_state); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), - .BlendStatePointer = pipeline->blend_state.offset, - .BlendStatePointerValid = true); -} - -static void -emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - uint32_t *dw = ANV_GEN == 8 ? - pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; - - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->gen8.wm_depth_stencil, 0, - sizeof(pipeline->gen8.wm_depth_stencil)); - memset(pipeline->gen9.wm_depth_stencil, 0, - sizeof(pipeline->gen9.wm_depth_stencil)); - return; - } - - /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ - - struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], - .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], - .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], - }; - - GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); -} - -static void -emit_ms_state(struct anv_pipeline *pipeline, - const VkPipelineMultisampleStateCreateInfo *info) -{ - uint32_t samples = 1; - uint32_t log2_samples = 0; - - /* From the Vulkan 1.0 spec: - * If pSampleMask is NULL, it is treated as if the mask has all bits - * enabled, i.e. no coverage is removed from fragments. - * - * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. - */ - uint32_t sample_mask = 0xffff; - - if (info) { - samples = info->rasterizationSamples; - log2_samples = __builtin_ffs(samples) - 1; - } - - if (info && info->pSampleMask) - sample_mask &= info->pSampleMask[0]; - - if (info && info->sampleShadingEnable) - anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), - - /* The PRM says that this bit is valid only for DX9: - * - * SW can choose to set this bit only for DX9 API. DX10/OGL API's - * should not have any effect by setting or not setting this bit. - */ - .PixelPositionOffsetEnable = false, - - .PixelLocation = CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), - .SampleMask = sample_mask); -} - -VkResult -genX(graphics_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - uint32_t offset, length; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_pipeline_init(pipeline, device, cache, - pCreateInfo, extra, pAllocator); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); - assert(pCreateInfo->pInputAssemblyState); - emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); - assert(pCreateInfo->pRasterizationState); - emit_rs_state(pipeline, pCreateInfo->pRasterizationState, - pCreateInfo->pMultisampleState, extra); - emit_ms_state(pipeline, pCreateInfo->pMultisampleState); - emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - emit_cb_state(pipeline, pCreateInfo->pColorBlendState, - pCreateInfo->pMultisampleState); - - emit_urb_setup(pipeline); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), - .ClipEnable = true, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875, - .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), - .StatisticsEnable = true, - .LineEndCapAntialiasingRegionWidth = _05pixels, - .LineAntialiasingRegionWidth = _10pixels, - .EarlyDepthStencilControl = NORMAL, - .ForceThreadDispatchEnable = NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .BarycentricInterpolationMode = - pipeline->ps_ksp0 == NO_KERNEL ? - 0 : pipeline->wm_prog_data.barycentric_interp_modes); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_kernel == NO_KERNEL) - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); - else - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), - .SingleProgramFlow = false, - .KernelStartPointer = pipeline->gs_kernel, - .VectorMaskEnable = false, - .SamplerCount = 0, - .BindingTableEntryCount = 0, - .ExpectedVertexCount = gs_prog_data->vertices_in, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], - .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, - .DispatchGRFStartRegisterForURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - .DispatchMode = gs_prog_data->base.dispatch_mode, - .StatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, - .ReorderMode = TRAILING, - .Enable = true, - - .ControlDataFormat = gs_prog_data->control_data_format, - - .StaticOutput = gs_prog_data->static_vertex_count >= 0, - .StaticOutputVertexCount = - gs_prog_data->static_vertex_count >= 0 ? - gs_prog_data->static_vertex_count : 0, - - /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: - * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) - * UserClipDistanceCullTestEnableBitmask(v) - */ - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* Skip the VUE header and position slots */ - offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - - uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : - pipeline->vs_vec4; - - if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .FunctionEnable = false, - /* Even if VS is disabled, SBE still gets the amount of - * vertex data to read from this field. */ - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - else - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .KernelStartPointer = vs_start, - .SingleVertexDispatch = false, - .VectorMaskEnable = false, - .SamplerCount = 0, - .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = false, - .FloatingPointMode = IEEE754, - .IllegalOpcodeExceptionEnable = false, - .AccessesUAV = false, - .SoftwareExceptionEnable = false, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), - - .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = false, - .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, - .VertexCacheDisable = false, - .FunctionEnable = true, - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length, - .UserClipDistanceClipTestEnableBitmask = 0, - .UserClipDistanceCullTestEnableBitmask = 0); - - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - - const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; - if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), - .PixelShaderValid = false); - } else { - /* TODO: We should clean this up. Among other things, this is mostly - * shared with other gens. - */ - const struct brw_vue_map *fs_input_map; - if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &vue_prog_data->vue_map; - else - fs_input_map = &gs_prog_data->base.vue_map; - - struct GENX(3DSTATE_SBE_SWIZ) swiz = { - GENX(3DSTATE_SBE_SWIZ_header), - }; - - int max_source_attr = 0; - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = wm_prog_data->urb_setup[attr]; - - if (input_index < 0) - continue; - - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); - - if (input_index >= 16) - continue; - - if (source_attr == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by - * the vertex shader or it's gl_PrimitiveID. In the first case the - * value is undefined, in the second it needs to be - * gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - } else { - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; - } - } - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .AttributeSwizzleEnable = true, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .VertexURBEntryReadLength = - DIV_ROUND_UP(max_source_attr + 1, 2), - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - wm_prog_data->num_varying_inputs, - -#if ANV_GEN >= 9 - .Attribute0ActiveComponentFormat = ACF_XYZW, - .Attribute1ActiveComponentFormat = ACF_XYZW, - .Attribute2ActiveComponentFormat = ACF_XYZW, - .Attribute3ActiveComponentFormat = ACF_XYZW, - .Attribute4ActiveComponentFormat = ACF_XYZW, - .Attribute5ActiveComponentFormat = ACF_XYZW, - .Attribute6ActiveComponentFormat = ACF_XYZW, - .Attribute7ActiveComponentFormat = ACF_XYZW, - .Attribute8ActiveComponentFormat = ACF_XYZW, - .Attribute9ActiveComponentFormat = ACF_XYZW, - .Attribute10ActiveComponentFormat = ACF_XYZW, - .Attribute11ActiveComponentFormat = ACF_XYZW, - .Attribute12ActiveComponentFormat = ACF_XYZW, - .Attribute13ActiveComponentFormat = ACF_XYZW, - .Attribute14ActiveComponentFormat = ACF_XYZW, - .Attribute15ActiveComponentFormat = ACF_XYZW, - /* wow, much field, very attribute */ - .Attribute16ActiveComponentFormat = ACF_XYZW, - .Attribute17ActiveComponentFormat = ACF_XYZW, - .Attribute18ActiveComponentFormat = ACF_XYZW, - .Attribute19ActiveComponentFormat = ACF_XYZW, - .Attribute20ActiveComponentFormat = ACF_XYZW, - .Attribute21ActiveComponentFormat = ACF_XYZW, - .Attribute22ActiveComponentFormat = ACF_XYZW, - .Attribute23ActiveComponentFormat = ACF_XYZW, - .Attribute24ActiveComponentFormat = ACF_XYZW, - .Attribute25ActiveComponentFormat = ACF_XYZW, - .Attribute26ActiveComponentFormat = ACF_XYZW, - .Attribute27ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute30ActiveComponentFormat = ACF_XYZW, -#endif - ); - - uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GENX(3DSTATE_SBE_SWIZ_length)); - GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), - - .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - bool per_sample_ps = pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->sampleShadingEnable; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps, - .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, - .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, -#if ANV_GEN >= 9 - .PixelShaderPullsBary = wm_prog_data->pulls_bary, - .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? - ICMS_INNER_CONSERVATIVE : ICMS_NONE, -#else - .PixelShaderUsesInputCoverageMask = - wm_prog_data->uses_sample_mask, -#endif - ); - } - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c deleted file mode 100644 index 04cfff5444d..00000000000 --- a/src/vulkan/gen8_state.c +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), - .ChromaKeyKillEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - - /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and - * VkPhysicalDeviceFeatures::standardSampleLocations. - */ - anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), - ._1xSample0XOffset = 0.5, - ._1xSample0YOffset = 0.5, - ._2xSample0XOffset = 0.25, - ._2xSample0YOffset = 0.25, - ._2xSample1XOffset = 0.75, - ._2xSample1YOffset = 0.75, - ._4xSample0XOffset = 0.375, - ._4xSample0YOffset = 0.125, - ._4xSample1XOffset = 0.875, - ._4xSample1YOffset = 0.375, - ._4xSample2XOffset = 0.125, - ._4xSample2YOffset = 0.625, - ._4xSample3XOffset = 0.625, - ._4xSample3YOffset = 0.875, - ._8xSample0XOffset = 0.5625, - ._8xSample0YOffset = 0.3125, - ._8xSample1XOffset = 0.4375, - ._8xSample1YOffset = 0.6875, - ._8xSample2XOffset = 0.8125, - ._8xSample2YOffset = 0.5625, - ._8xSample3XOffset = 0.3125, - ._8xSample3YOffset = 0.1875, - ._8xSample4XOffset = 0.1875, - ._8xSample4YOffset = 0.8125, - ._8xSample5XOffset = 0.0625, - ._8xSample5YOffset = 0.4375, - ._8xSample6XOffset = 0.6875, - ._8xSample6YOffset = 0.9375, - ._8xSample7XOffset = 0.9375, - ._8xSample7YOffset = 0.0625, -#if ANV_GEN >= 9 - ._16xSample0XOffset = 0.5625, - ._16xSample0YOffset = 0.5625, - ._16xSample1XOffset = 0.4375, - ._16xSample1YOffset = 0.3125, - ._16xSample2XOffset = 0.3125, - ._16xSample2YOffset = 0.6250, - ._16xSample3XOffset = 0.7500, - ._16xSample3YOffset = 0.4375, - ._16xSample4XOffset = 0.1875, - ._16xSample4YOffset = 0.3750, - ._16xSample5XOffset = 0.6250, - ._16xSample5YOffset = 0.8125, - ._16xSample6XOffset = 0.8125, - ._16xSample6YOffset = 0.6875, - ._16xSample7XOffset = 0.6875, - ._16xSample7YOffset = 0.1875, - ._16xSample8XOffset = 0.3750, - ._16xSample8YOffset = 0.8750, - ._16xSample9XOffset = 0.5000, - ._16xSample9YOffset = 0.0625, - ._16xSample10XOffset = 0.2500, - ._16xSample10YOffset = 0.1250, - ._16xSample11XOffset = 0.1250, - ._16xSample11YOffset = 0.7500, - ._16xSample12XOffset = 0.0000, - ._16xSample12YOffset = 0.5000, - ._16xSample13XOffset = 0.9375, - ._16xSample13YOffset = 0.2500, - ._16xSample14XOffset = 0.8750, - ._16xSample14YOffset = 0.9375, - ._16xSample15XOffset = 0.0625, - ._16xSample15YOffset = 0.0000, -#endif - ); - - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -static const uint32_t -isl_to_gen_multisample_layout[] = { - [ISL_MSAA_LAYOUT_NONE] = MSS, - [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, - [ISL_MSAA_LAYOUT_ARRAY] = MSS, -}; - -void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - -static const uint8_t anv_halign[] = { - [4] = HALIGN4, - [8] = HALIGN8, - [16] = HALIGN16, -}; - -static const uint8_t anv_valign[] = { - [4] = VALIGN4, - [8] = VALIGN8, - [16] = VALIGN16, -}; - -/** - * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment - * and SurfaceVerticalAlignment. - */ -static void -get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) -{ - #if ANV_GENx10 >= 90 - if (isl_tiling_is_std_y(surf->tiling) || - surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { - /* The hardware ignores the alignment values. Anyway, the surface's - * true alignment is likely outside the enum range of HALIGN* and - * VALIGN*. - */ - *halign = 0; - *valign = 0; - } else { - /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units - * of surface elements (not pixels nor samples). For compressed formats, - * a "surface element" is defined as a compression block. For example, - * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 - * format (ETC2 has a block height of 4), then the vertical alignment is - * 4 compression blocks or, equivalently, 16 pixels. - */ - struct isl_extent3d image_align_el - = isl_surf_get_image_alignment_el(surf); - - *halign = anv_halign[image_align_el.width]; - *valign = anv_valign[image_align_el.height]; - } - #else - /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in - * units of surface samples. For example, if SurfaceVerticalAlignment - * is VALIGN_4 and the surface is singlesampled, then for any surface - * format (compressed or not) the vertical alignment is - * 4 pixels. - */ - struct isl_extent3d image_align_sa - = isl_surf_get_image_alignment_sa(surf); - - *halign = anv_halign[image_align_sa.width]; - *valign = anv_valign[image_align_sa.height]; - #endif -} - -static uint32_t -get_qpitch(const struct isl_surf *surf) -{ - switch (surf->dim) { - default: - unreachable(!"bad isl_surf_dim"); - case ISL_SURF_DIM_1D: - #if ANV_GENx10 >= 90 - /* QPitch is usually expressed as rows of surface elements (where - * a surface element is an compression block or a single surface - * sample). Skylake 1D is an outlier. - * - * From the Skylake BSpec >> Memory Views >> Common Surface - * Formats >> Surface Layout and Tiling >> 1D Surfaces: - * - * Surface QPitch specifies the distance in pixels between array - * slices. - */ - return isl_surf_get_array_pitch_el(surf); - #else - return isl_surf_get_array_pitch_el_rows(surf); - #endif - case ISL_SURF_DIM_2D: - case ISL_SURF_DIM_3D: - #if ANV_GEN >= 9 - return isl_surf_get_array_pitch_el_rows(surf); - #else - /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch - * - * "This field must be set to an integer multiple of the Surface - * Vertical Alignment. For compressed textures (BC*, FXT1, - * ETC*, and EAC* Surface Formats), this field is in units of - * rows in the uncompressed surface, and must be set to an - * integer multiple of the vertical alignment parameter "j" - * defined in the Common Surface Formats section." - */ - return isl_surf_get_array_pitch_sa_rows(surf); - #endif - } -} - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - static const uint8_t isl_to_gen_tiling[] = { - [ISL_TILING_LINEAR] = LINEAR, - [ISL_TILING_X] = XMAJOR, - [ISL_TILING_Y0] = YMAJOR, - [ISL_TILING_Yf] = YMAJOR, - [ISL_TILING_Ys] = YMAJOR, - [ISL_TILING_W] = WMAJOR, - }; - - uint32_t halign, valign; - get_halign_valign(&surface->isl, &halign, &valign); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = valign, - .SurfaceHorizontalAlignment = halign, - .TileMode = isl_to_gen_tiling[surface->isl.tiling], - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .CubeFaceEnablePositiveZ = 1, - .CubeFaceEnableNegativeZ = 1, - .CubeFaceEnablePositiveY = 1, - .CubeFaceEnableNegativeY = 1, - .CubeFaceEnablePositiveX = 1, - .CubeFaceEnableNegativeX = 1, - .MemoryObjectControlState = GENX(MOCS), - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, - .Height = iview->level_0_extent.height - 1, - .Width = iview->level_0_extent.width - 1, - .Depth = 0, /* TEMPLATE */ - .SurfacePitch = surface->isl.row_pitch - 1, - .RenderTargetViewExtent = 0, /* TEMPLATE */ - .MinimumArrayElement = 0, /* TEMPLATE */ - .MultisampledSurfaceStorageFormat = - isl_to_gen_multisample_layout[surface->isl.msaa_layout], - .NumberofMultisamples = ffs(surface->isl.samples) - 1, - .MultisamplePositionPaletteIndex = 0, /* UNUSED */ - .XOffset = 0, - .YOffset = 0, - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - switch (template.SurfaceType) { - case SURFTYPE_1D: - case SURFTYPE_2D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced - * by one for each increase from zero of Minimum Array Element. For - * example, if Minimum Array Element is set to 1024 on a 2D surface, - * the range of this field is reduced to [0,1023]. - * - * In other words, 'Depth' is the number of array layers. - */ - template.Depth = range->layerCount - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 1D and 2D Surfaces: - * This field must be set to the same value as the Depth field. - */ - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_CUBE: - template.MinimumArrayElement = range->baseArrayLayer; - /* Same as SURFTYPE_2D, but divided by 6 */ - template.Depth = range->layerCount / 6 - 1; - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_3D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * If the volume texture is MIP-mapped, this field specifies the - * depth of the base MIP level. - */ - template.Depth = image->extent.depth - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 3D Surfaces: This field - * indicates the extent of the accessible 'R' coordinates minus 1 on - * the LOD currently being rendered to. - */ - template.RenderTargetViewExtent = iview->extent.depth - 1; - break; - default: - unreachable(!"bad SurfaceType"); - } - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - uint32_t border_color_offset = device->border_colors.offset + - pCreateInfo->borderColor * 64; - - struct GENX(SAMPLER_STATE) sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = CLAMP_MODE_OGL, -#if ANV_GEN == 8 - .BaseMipLevel = 0.0, -#endif - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), - .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), - .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .IndirectStatePointer = border_color_offset >> 6, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c deleted file mode 100644 index 5498d1d68c6..00000000000 --- a/src/vulkan/genX_cmd_buffer.c +++ /dev/null @@ -1,717 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "anv_private.h" - -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif - -void -genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - -/* XXX: Do we need this on more than just BDW? */ -#if (ANV_GEN >= 8) - /* Emit a render target cache flush. - * - * This isn't documented anywhere in the PRM. However, it seems to be - * necessary prior to changing the surface state base adress. Without - * this, we get GPU hangs when using multi-level command buffers which - * clear depth, reset state base address, and then go render stuff. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .RenderTargetCacheFlushEnable = true); -#endif - - anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GENX(MOCS), - .GeneralStateBaseAddressModifyEnable = true, - - .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), - .SurfaceStateMemoryObjectControlState = GENX(MOCS), - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GENX(MOCS), - .DynamicStateBaseAddressModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GENX(MOCS), - .IndirectObjectBaseAddressModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GENX(MOCS), - .InstructionBaseAddressModifyEnable = true, - -# if (ANV_GEN >= 8) - /* Broadwell requires that we specify a buffer size for a bunch of - * these fields. However, since we will be growing the BO's live, we - * just set them all to the maximum. - */ - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true, -# endif - ); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true); -} - -void genX(CmdPipelineBarrier)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - uint32_t b, *dw; - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkAccessFlags src_flags = 0; - VkAccessFlags dst_flags = 0; - - for (uint32_t i = 0; i < memoryBarrierCount; i++) { - src_flags |= pMemoryBarriers[i].srcAccessMask; - dst_flags |= pMemoryBarriers[i].dstAccessMask; - } - - for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - src_flags |= pBufferMemoryBarriers[i].srcAccessMask; - dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; - } - - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - src_flags |= pImageMemoryBarriers[i].srcAccessMask; - dst_flags |= pImageMemoryBarriers[i].dstAccessMask; - } - - /* Mask out the Source access flags we care about */ - const uint32_t src_mask = - VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT; - - src_flags = src_flags & src_mask; - - /* Mask out the destination access flags we care about */ - const uint32_t dst_mask = - VK_ACCESS_INDIRECT_COMMAND_READ_BIT | - VK_ACCESS_INDEX_READ_BIT | - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | - VK_ACCESS_UNIFORM_READ_BIT | - VK_ACCESS_SHADER_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_TRANSFER_READ_BIT; - - dst_flags = dst_flags & dst_mask; - - /* The src flags represent how things were used previously. This is - * what we use for doing flushes. - */ - struct GENX(PIPE_CONTROL) flush_cmd = { - GENX(PIPE_CONTROL_header), - .PostSyncOperation = NoWrite, - }; - - for_each_bit(b, src_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: - flush_cmd.DCFlushEnable = true; - break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - flush_cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - flush_cmd.DepthCacheFlushEnable = true; - break; - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_cmd.RenderTargetCacheFlushEnable = true; - flush_cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("should've masked this out by now"); - } - } - - /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to - * stall and wait for the flushing to finish, so we don't re-dirty the - * caches with in-flight rendering after the second PIPE_CONTROL - * invalidates. - */ - - if (dst_flags) - flush_cmd.CommandStreamerStallEnable = true; - - if (src_flags && dst_flags) { - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); - GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd); - } - - /* The dst flags represent how things will be used in the future. This - * is what we use for doing cache invalidations. - */ - struct GENX(PIPE_CONTROL) invalidate_cmd = { - GENX(PIPE_CONTROL_header), - .PostSyncOperation = NoWrite, - }; - - for_each_bit(b, dst_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - invalidate_cmd.VFCacheInvalidationEnable = true; - break; - case VK_ACCESS_UNIFORM_READ_BIT: - invalidate_cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_ACCESS_SHADER_READ_BIT: - invalidate_cmd.TextureCacheInvalidationEnable = true; - break; - case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - invalidate_cmd.TextureCacheInvalidationEnable = true; - break; - case VK_ACCESS_TRANSFER_READ_BIT: - invalidate_cmd.TextureCacheInvalidationEnable = true; - break; - default: - unreachable("should've masked this out by now"); - } - } - - if (dst_flags) { - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); - GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd); - } -} - -static void -emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, - GENX(3DSTATE_VERTEX_BUFFERS)); - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, - &(struct GENX(VERTEX_BUFFER_STATE)) { - .VertexBufferIndex = 32, /* Reserved for this */ - .AddressModifyEnable = true, - .BufferPitch = 0, -#if (ANV_GEN >= 8) - .MemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { bo, offset }, - .BufferSize = 8 -#else - .VertexBufferMemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { bo, offset }, - .EndAddress = { bo, offset + 8 }, -#endif - }); -} - -static void -emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, - uint32_t base_vertex, uint32_t base_instance) -{ - struct anv_state id_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); - - ((uint32_t *)id_state.map)[0] = base_vertex; - ((uint32_t *)id_state.map)[1] = base_instance; - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(id_state); - - emit_base_vertex_instance_bo(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); -} - -void genX(CmdDraw)( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .VertexAccessType = SEQUENTIAL, - .PrimitiveTopologyType = pipeline->topology, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void genX(CmdDrawIndexed)( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .VertexAccessType = RANDOM, - .PrimitiveTopologyType = pipeline->topology, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -void genX(CmdDrawIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL, - .PrimitiveTopologyType = pipeline->topology); -} - -void genX(CmdDrawIndexedIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - /* TODO: We need to stomp base vertex to 0 somehow */ - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM, - .PrimitiveTopologyType = pipeline->topology); -} - - -void genX(CmdDispatch)( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - if (prog_data->uses_num_work_groups) { - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); - uint32_t *sizes = state.map; - sizes[0] = x; - sizes[1] = y; - sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - cmd_buffer->state.num_workgroups_offset = state.offset; - cmd_buffer->state.num_workgroups_bo = - &cmd_buffer->device->dynamic_state_block_pool.bo; - } - - genX(cmd_buffer_flush_compute_state)(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void genX(CmdDispatchIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - if (prog_data->uses_num_work_groups) { - cmd_buffer->state.num_workgroups_offset = bo_offset; - cmd_buffer->state.num_workgroups_bo = bo; - } - - genX(cmd_buffer_flush_compute_state)(cmd_buffer); - - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - -void -genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } -} - -static void -cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const struct anv_format *anv_format = - iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - const bool has_depth = iview && anv_format->has_depth; - const bool has_stencil = iview && anv_format->has_stencil; - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - /* Emit 3DSTATE_DEPTH_BUFFER */ - if (has_depth) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = true, - .StencilWriteEnable = has_stencil, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, - &image->depth_surface.isl), - .SurfacePitch = image->depth_surface.isl.row_pitch - 1, - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->depth_surface.offset, - }, - .Height = fb->height - 1, - .Width = fb->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GENX(MOCS), -#if ANV_GEN >= 8 - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, -#endif - .RenderTargetViewExtent = 1 - 1); - } else { - /* Even when no depth buffer is present, the hardware requires that - * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: - * - * If a null depth buffer is bound, the driver must instead bind depth as: - * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D - * 3DSTATE_DEPTH.Width = 1 - * 3DSTATE_DEPTH.Height = 1 - * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM - * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 - * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 - * - * The PRM is wrong, though. The width and height must be programmed to - * actual framebuffer's width and height, even when neither depth buffer - * nor stencil buffer is present. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .SurfaceFormat = D16_UNORM, - .Width = fb->width - 1, - .Height = fb->height - 1, - .StencilWriteEnable = has_stencil); - } - - /* Emit 3DSTATE_STENCIL_BUFFER */ - if (has_stencil) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), -#if ANV_GEN >= 8 || ANV_IS_HASWELL - .StencilBufferEnable = true, -#endif - .StencilBufferObjectControlState = GENX(MOCS), - - /* Stencil buffers have strange pitch. The PRM says: - * - * The pitch must be set to 2x the value computed based on width, - * as the stencil buffer is stored with two rows interleaved. - */ - .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, - -#if ANV_GEN >= 8 - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, -#endif - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->offset + image->stencil_surface.offset, - }); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); - } - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); -} - -/** - * @see anv_cmd_buffer_set_subpass() - */ -void -genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void genX(CmdBeginRenderPass)( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdNextSubpass)( - VkCommandBuffer commandBuffer, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); - genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdEndRenderPass)( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); -} diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c deleted file mode 100644 index 4c2e0bc6e0d..00000000000 --- a/src/vulkan/genX_pipeline.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif - -VkResult -genX(compute_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkComputePipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - pipeline->blend_state.map = NULL; - - result = anv_reloc_list_init(&pipeline->batch_relocs, - pAllocator ? pAllocator : &device->alloc); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_kernel = NO_KERNEL; - - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); - ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); - anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, - pCreateInfo->stage.pName, - pCreateInfo->stage.pSpecializationInfo); - - pipeline->use_repclear = false; - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - - anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], - .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), -#if ANV_GEN > 7 - .ScratchSpaceBasePointerHigh = 0, - .StackSize = 0, -#else - .GPGPUMode = true, -#endif - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, - .ResetGatewayTimer = true, -#if ANV_GEN <= 8 - .BypassGatewayControl = true, -#endif - .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h deleted file mode 100644 index 696e2be7c3f..00000000000 --- a/src/vulkan/genX_pipeline_util.h +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static uint32_t -vertex_element_comp_control(enum isl_format format, unsigned comp) -{ - uint8_t bits; - switch (comp) { - case 0: bits = isl_format_layouts[format].channels.r.bits; break; - case 1: bits = isl_format_layouts[format].channels.g.bits; break; - case 2: bits = isl_format_layouts[format].channels.b.bits; break; - case 3: bits = isl_format_layouts[format].channels.a.bits; break; - default: unreachable("Invalid component"); - } - - if (bits) { - return VFCOMP_STORE_SRC; - } else if (comp < 3) { - return VFCOMP_STORE_0; - } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || - isl_format_layouts[format].channels.r.type == ISL_SINT) { - assert(comp == 3); - return VFCOMP_STORE_1_INT; - } else { - assert(comp == 3); - return VFCOMP_STORE_1_FP; - } -} - -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - uint32_t elements; - if (extra && extra->disable_vs) { - /* If the VS is disabled, just assume the user knows what they're - * doing and apply the layout blindly. This can only come from - * meta, so this *should* be safe. - */ - elements = 0; - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) - elements |= (1 << info->pVertexAttributeDescriptions[i].location); - } else { - /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; - assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); - elements = inputs_read >> VERT_ATTRIB_GENERIC0; - } - -#if ANV_GEN >= 8 - /* On BDW+, we only need to allocate space for base ids. Setting up - * the actual vertex and instance id is a separate packet. - */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; -#else - /* On Haswell and prior, vertex and instance id are created by using the - * ComponentControl fields, so we need an element for any of them. - */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid || - pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; -#endif - - uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; - if (elem_count == 0) - return; - - uint32_t *p; - - const uint32_t num_dwords = 1 + elem_count * 2; - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GENX(3DSTATE_VERTEX_ELEMENTS)); - memset(p + 1, 0, (num_dwords - 1) * 4); - - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - enum isl_format format = anv_get_isl_format(desc->format, - VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR, - NULL); - - assert(desc->binding < 32); - - if ((elements & (1 << desc->location)) == 0) - continue; /* Binding unused */ - - uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); - - struct GENX(VERTEX_ELEMENT_STATE) element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offset, - .Component0Control = vertex_element_comp_control(format, 0), - .Component1Control = vertex_element_comp_control(format, 1), - .Component2Control = vertex_element_comp_control(format, 2), - .Component3Control = vertex_element_comp_control(format, 3), - }; - GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); - -#if ANV_GEN >= 8 - /* On Broadwell and later, we have a separate VF_INSTANCING packet - * that controls instancing. On Haswell and prior, that's part of - * VERTEX_BUFFER_STATE which we emit later. - */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), - .InstancingEnable = pipeline->instancing_enable[desc->binding], - .VertexElementIndex = slot, - /* Vulkan so far doesn't have an instance divisor, so - * this is always 1 (ignored if not instancing). */ - .InstanceDataStepRate = 1); -#endif - } - - const uint32_t id_slot = __builtin_popcount(elements); - if (needs_svgs_elem) { - /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: - * "Within a VERTEX_ELEMENT_STATE structure, if a Component - * Control field is set to something other than VFCOMP_STORE_SRC, - * no higher-numbered Component Control fields may be set to - * VFCOMP_STORE_SRC" - * - * This means, that if we have BaseInstance, we need BaseVertex as - * well. Just do all or nothing. - */ - uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance) ? - VFCOMP_STORE_SRC : VFCOMP_STORE_0; - - struct GENX(VERTEX_ELEMENT_STATE) element = { - .VertexBufferIndex = 32, /* Reserved for this */ - .Valid = true, - .SourceElementFormat = ISL_FORMAT_R32G32_UINT, - .Component0Control = base_ctrl, - .Component1Control = base_ctrl, -#if ANV_GEN >= 8 - .Component2Control = VFCOMP_STORE_0, - .Component3Control = VFCOMP_STORE_0, -#else - .Component2Control = VFCOMP_STORE_VID, - .Component3Control = VFCOMP_STORE_IID, -#endif - }; - GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); - } - -#if ANV_GEN >= 8 - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, - .VertexIDComponentNumber = 2, - .VertexIDElementOffset = id_slot, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, - .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = id_slot); -#endif -} - -static inline void -emit_urb_setup(struct anv_pipeline *pipeline) -{ -#if ANV_GEN == 7 - struct anv_device *device = pipeline->device; - - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall - * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, - * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL - * needs to be sent before any combination of VS associated 3DSTATE." - */ - anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &device->workaround_bo, 0 }); -#endif - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); -} - -static inline uint32_t -scratch_space(const struct brw_stage_prog_data *prog_data) -{ - return ffs(prog_data->total_scratch / 2048); -} - -static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH -}; - -static const uint32_t vk_to_gen_fillmode[] = { - [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, - [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, - [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, -}; - -static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, - [VK_FRONT_FACE_CLOCKWISE] = 0 -}; - -static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, -}; - -static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, -}; - -static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, - [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, - [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, - [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, - [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, - [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, - [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, - [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, -}; diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h deleted file mode 100644 index 67f798ab66e..00000000000 --- a/src/vulkan/genX_state_util.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type, - bool storage) -{ - switch (view_type) { - default: - unreachable("bad VkImageViewType"); - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_1D); - return SURFTYPE_1D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return storage ? SURFTYPE_2D : SURFTYPE_CUBE; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_2D; - case VK_IMAGE_VIEW_TYPE_3D: - assert(image->type == VK_IMAGE_TYPE_3D); - return SURFTYPE_3D; - } -} - -static enum isl_format -anv_surface_format(const struct anv_device *device, enum isl_format format, - bool storage) -{ - if (storage) { - return isl_lower_storage_image_format(&device->isl_dev, format); - } else { - return format; - } -} - -#if ANV_GEN > 7 || ANV_IS_HASWELL -static const uint32_t vk_to_gen_swizzle[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, - [VK_COMPONENT_SWIZZLE_R] = SCS_RED, - [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, - [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, - [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA -}; -#endif - -static inline uint32_t -vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) -{ - switch (filter) { - default: - assert(!"Invalid filter"); - case VK_FILTER_NEAREST: - return MAPFILTER_NEAREST; - case VK_FILTER_LINEAR: - return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; - } -} - -static inline uint32_t -vk_to_gen_max_anisotropy(float ratio) -{ - return (anv_clamp_f(ratio, 2, 16) - 2) / 2; -} - -static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR -}; - -static const uint32_t vk_to_gen_tex_address[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; diff --git a/src/vulkan/intel_icd.json.in b/src/vulkan/intel_icd.json.in deleted file mode 100644 index d9b363a9762..00000000000 --- a/src/vulkan/intel_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@install_libdir@/libvulkan_intel.so", - "abi_versions": "1.0.3" - } -} diff --git a/src/vulkan/tests/.gitignore b/src/vulkan/tests/.gitignore deleted file mode 100644 index 5d054055685..00000000000 --- a/src/vulkan/tests/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -block_pool -block_pool_no_free -state_pool -state_pool_free_list_only -state_pool_no_free diff --git a/src/vulkan/tests/Makefile.am b/src/vulkan/tests/Makefile.am deleted file mode 100644 index 883013d86c6..00000000000 --- a/src/vulkan/tests/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright © 2009 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -AM_CPPFLAGS = \ - $(INTEL_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $(DEFINES) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/mapi \ - -I$(top_srcdir)/src/mesa \ - -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/isl/ \ - -I$(top_srcdir)/src/vulkan - -LDADD = \ - $(top_builddir)/src/vulkan/libvulkan-test.la \ - $(PTHREAD_LIBS) -lm -lstdc++ - -check_PROGRAMS = \ - block_pool_no_free \ - state_pool_no_free \ - state_pool_free_list_only \ - state_pool - -TESTS = $(check_PROGRAMS) diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c deleted file mode 100644 index 86d1a76151f..00000000000 --- a/src/vulkan/tests/block_pool_no_free.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 16 -#define BLOCKS_PER_THREAD 1024 -#define NUM_RUNS 64 - -struct job { - pthread_t thread; - unsigned id; - struct anv_block_pool *pool; - uint32_t blocks[BLOCKS_PER_THREAD]; - uint32_t back_blocks[BLOCKS_PER_THREAD]; -} jobs[NUM_THREADS]; - - -static void *alloc_blocks(void *_job) -{ - struct job *job = _job; - int32_t block, *data; - - for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { - block = anv_block_pool_alloc(job->pool); - data = job->pool->map + block; - *data = block; - assert(block >= 0); - job->blocks[i] = block; - - block = anv_block_pool_alloc_back(job->pool); - data = job->pool->map + block; - *data = block; - assert(block < 0); - job->back_blocks[i] = -block; - } - - for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { - block = job->blocks[i]; - data = job->pool->map + block; - assert(*data == block); - - block = -job->back_blocks[i]; - data = job->pool->map + block; - assert(*data == block); - } - - return NULL; -} - -static void validate_monotonic(uint32_t **blocks) -{ - /* A list of indices, one per thread */ - unsigned next[NUM_THREADS]; - memset(next, 0, sizeof(next)); - - int highest = -1; - while (true) { - /* First, we find which thread has the highest next element */ - int thread_max = -1; - int max_thread_idx = -1; - for (unsigned i = 0; i < NUM_THREADS; i++) { - if (next[i] >= BLOCKS_PER_THREAD) - continue; - - if (thread_max < blocks[i][next[i]]) { - thread_max = blocks[i][next[i]]; - max_thread_idx = i; - } - } - - /* The only way this can happen is if all of the next[] values are at - * BLOCKS_PER_THREAD, in which case, we're done. - */ - if (thread_max == -1) - break; - - /* That next element had better be higher than the previous highest */ - assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); - - highest = blocks[max_thread_idx][next[max_thread_idx]]; - next[max_thread_idx]++; - } -} - -static void run_test() -{ - struct anv_device device; - struct anv_block_pool pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&pool, &device, 16); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = &pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); - - /* Validate that the block allocations were monotonic */ - uint32_t *block_ptrs[NUM_THREADS]; - for (unsigned i = 0; i < NUM_THREADS; i++) - block_ptrs[i] = jobs[i].blocks; - validate_monotonic(block_ptrs); - - /* Validate that the back block allocations were monotonic */ - for (unsigned i = 0; i < NUM_THREADS; i++) - block_ptrs[i] = jobs[i].back_blocks; - validate_monotonic(block_ptrs); - - anv_block_pool_finish(&pool); - pthread_mutex_destroy(&device.mutex); -} - -int main(int argc, char **argv) -{ - for (unsigned i = 0; i < NUM_RUNS; i++) - run_test(); -} diff --git a/src/vulkan/tests/state_pool.c b/src/vulkan/tests/state_pool.c deleted file mode 100644 index 878ec19a595..00000000000 --- a/src/vulkan/tests/state_pool.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 8 -#define STATES_PER_THREAD_LOG2 10 -#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) -#define NUM_RUNS 64 - -#include "state_pool_test_helper.h" - -int main(int argc, char **argv) -{ - struct anv_device device; - struct anv_block_pool block_pool; - struct anv_state_pool state_pool; - - pthread_mutex_init(&device.mutex, NULL); - - for (unsigned i = 0; i < NUM_RUNS; i++) { - anv_block_pool_init(&block_pool, &device, 256); - anv_state_pool_init(&state_pool, &block_pool); - - /* Grab one so a zero offset is impossible */ - anv_state_pool_alloc(&state_pool, 16, 16); - - run_state_pool_test(&state_pool); - - anv_state_pool_finish(&state_pool); - anv_block_pool_finish(&block_pool); - } - - pthread_mutex_destroy(&device.mutex); -} diff --git a/src/vulkan/tests/state_pool_free_list_only.c b/src/vulkan/tests/state_pool_free_list_only.c deleted file mode 100644 index 2f4eb47fe45..00000000000 --- a/src/vulkan/tests/state_pool_free_list_only.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 8 -#define STATES_PER_THREAD_LOG2 12 -#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) - -#include "state_pool_test_helper.h" - -int main(int argc, char **argv) -{ - struct anv_device device; - struct anv_block_pool block_pool; - struct anv_state_pool state_pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&block_pool, &device, 4096); - anv_state_pool_init(&state_pool, &block_pool); - - /* Grab one so a zero offset is impossible */ - anv_state_pool_alloc(&state_pool, 16, 16); - - /* Grab and return enough states that the state pool test below won't - * actually ever resize anything. - */ - { - struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; - for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { - states[i] = anv_state_pool_alloc(&state_pool, 16, 16); - assert(states[i].offset != 0); - } - - for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) - anv_state_pool_free(&state_pool, states[i]); - } - - run_state_pool_test(&state_pool); - - anv_state_pool_finish(&state_pool); - anv_block_pool_finish(&block_pool); - pthread_mutex_destroy(&device.mutex); -} diff --git a/src/vulkan/tests/state_pool_no_free.c b/src/vulkan/tests/state_pool_no_free.c deleted file mode 100644 index 4b248c2ee66..00000000000 --- a/src/vulkan/tests/state_pool_no_free.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 16 -#define STATES_PER_THREAD 1024 -#define NUM_RUNS 64 - -struct job { - pthread_t thread; - unsigned id; - struct anv_state_pool *pool; - uint32_t offsets[STATES_PER_THREAD]; -} jobs[NUM_THREADS]; - -pthread_barrier_t barrier; - -static void *alloc_states(void *_job) -{ - struct job *job = _job; - - pthread_barrier_wait(&barrier); - - for (unsigned i = 0; i < STATES_PER_THREAD; i++) { - struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); - job->offsets[i] = state.offset; - } - - return NULL; -} - -static void run_test() -{ - struct anv_device device; - struct anv_block_pool block_pool; - struct anv_state_pool state_pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&block_pool, &device, 64); - anv_state_pool_init(&state_pool, &block_pool); - - pthread_barrier_init(&barrier, NULL, NUM_THREADS); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = &state_pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); - - /* A list of indices, one per thread */ - unsigned next[NUM_THREADS]; - memset(next, 0, sizeof(next)); - - int highest = -1; - while (true) { - /* First, we find which thread has the highest next element */ - int thread_max = -1; - int max_thread_idx = -1; - for (unsigned i = 0; i < NUM_THREADS; i++) { - if (next[i] >= STATES_PER_THREAD) - continue; - - if (thread_max < jobs[i].offsets[next[i]]) { - thread_max = jobs[i].offsets[next[i]]; - max_thread_idx = i; - } - } - - /* The only way this can happen is if all of the next[] values are at - * BLOCKS_PER_THREAD, in which case, we're done. - */ - if (thread_max == -1) - break; - - /* That next element had better be higher than the previous highest */ - assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); - - highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; - next[max_thread_idx]++; - } - - anv_state_pool_finish(&state_pool); - anv_block_pool_finish(&block_pool); - pthread_mutex_destroy(&device.mutex); -} - -int main(int argc, char **argv) -{ - for (unsigned i = 0; i < NUM_RUNS; i++) - run_test(); -} diff --git a/src/vulkan/tests/state_pool_test_helper.h b/src/vulkan/tests/state_pool_test_helper.h deleted file mode 100644 index 0e56431303f..00000000000 --- a/src/vulkan/tests/state_pool_test_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -struct job { - struct anv_state_pool *pool; - unsigned id; - pthread_t thread; -} jobs[NUM_THREADS]; - -pthread_barrier_t barrier; - -static void *alloc_states(void *void_job) -{ - struct job *job = void_job; - - const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); - const unsigned num_chunks = STATES_PER_THREAD / chunk_size; - - struct anv_state states[chunk_size]; - - pthread_barrier_wait(&barrier); - - for (unsigned c = 0; c < num_chunks; c++) { - for (unsigned i = 0; i < chunk_size; i++) { - states[i] = anv_state_pool_alloc(job->pool, 16, 16); - memset(states[i].map, 139, 16); - assert(states[i].offset != 0); - } - - for (unsigned i = 0; i < chunk_size; i++) - anv_state_pool_free(job->pool, states[i]); - } - - return NULL; -} - -static void run_state_pool_test(struct anv_state_pool *state_pool) -{ - pthread_barrier_init(&barrier, NULL, NUM_THREADS); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = state_pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); -} -- cgit v1.2.3 From 8c23392c26916711b7b02337fd342ee9765b6fd4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:44:06 -0800 Subject: anv/formats: Don't use a compound literal to initialize a const array Doing so makes older versions of GCC rather grumpy. Newere GCC fixes this, but using a compound literal isn't really gaining us anything anyway. --- src/intel/vulkan/anv_formats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 7798a7bbde3..b4b52aa6053 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -24,8 +24,8 @@ #include "anv_private.h" #include "brw_surface_formats.h" -#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) -#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) +#define RGBA { 0, 1, 2, 3 } +#define BGRA { 2, 1, 0, 3 } #define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ [__vk_fmt] = { \ -- cgit v1.2.3 From e881c73975cb12ce58d4ebc362c6ad18a8e4b3ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 11:04:53 -0800 Subject: anv/pipeline: Don't leak the binding map --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 2 ++ src/intel/vulkan/anv_pipeline.c | 5 +++++ src/intel/vulkan/genX_pipeline.c | 1 + 3 files changed, 8 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index c58a93878ee..4600872d1f6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -391,4 +391,6 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; } + + ralloc_free(mem_ctx); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index a7feefb540e..2f1ce3956a9 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -193,6 +193,11 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + free(pipeline->bindings[s].surface_to_descriptor); + free(pipeline->bindings[s].sampler_to_descriptor); + } + anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 4c2e0bc6e0d..54ec8307d02 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -72,6 +72,7 @@ genX(compute_pipeline_create)( */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; -- cgit v1.2.3 From 79c0781f44af2a93473c68cf317bb6844f31cfc8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 11:42:36 -0800 Subject: nir/gather_info: Count textures and images --- src/compiler/nir/nir_gather_info.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index b84915c2d2b..8f0abd33ce6 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -105,5 +105,22 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) foreach_list_typed(nir_variable, var, node, &shader->system_values) shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage); + shader->info.num_textures = 0; + shader->info.num_images = 0; + nir_foreach_variable(var, &shader->uniforms) { + const struct glsl_type *type = var->type; + unsigned count = 1; + if (glsl_type_is_array(type)) { + count = glsl_get_length(type); + type = glsl_get_array_element(type); + } + + if (glsl_type_is_image(type)) { + shader->info.num_images += count; + } else if (glsl_type_is_sampler(type)) { + shader->info.num_textures += count; + } + } + nir_foreach_block(entrypoint, gather_info_block, shader); } -- cgit v1.2.3 From e0565f40ea7f1653318a3e33cfeb46dcdbfd28ae Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 11:44:26 -0800 Subject: anv/pipeline: Use nir's num_images for allocating image_params --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 5 ++++- src/intel/vulkan/anv_pipeline.c | 5 ++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 4600872d1f6..4be630bcbe8 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -280,6 +280,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, struct anv_pipeline_bind_map map = { .surface_count = 0, .sampler_count = 0, + .image_count = 0, }; for (uint32_t set = 0; set < layout->num_sets; set++) { @@ -351,6 +352,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } if (map.image_count > 0) { + assert(map.image_count <= MAX_IMAGES); nir_foreach_variable(var, &shader->uniforms) { if (glsl_type_is_image(var->type) || (glsl_type_is_array(var->type) && @@ -369,7 +371,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } struct anv_push_constants *null_data = NULL; - const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const gl_constant_value **param = + prog_data->param + (shader->num_uniforms / 4); const struct brw_image_param *image_param = null_data->images; for (uint32_t i = 0; i < map.image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 2f1ce3956a9..27872d2769a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -341,9 +341,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - if (pipeline->bindings[stage].image_count > 0) - prog_data->nr_params += pipeline->bindings[stage].image_count * - BRW_IMAGE_PARAM_SIZE; + if (nir->info.num_images > 0) + prog_data->nr_params += nir->info.num_images * BRW_IMAGE_PARAM_SIZE; if (prog_data->nr_params > 0) { /* XXX: I think we're leaking this */ -- cgit v1.2.3 From 1b37276467e47919256c0a171b92004d3cfaaab4 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Thu, 18 Feb 2016 12:30:27 -0800 Subject: vulkan: fix out-of-tree build We need to be able to find the generated gen*pack.h headers. Acked-by: Jason Ekstrand --- src/intel/vulkan/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 2144e5a691a..ccd98856b4b 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -65,6 +65,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/intel \ -I$(top_builddir)/src/vulkan libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From d5bb23156d698675fff74b1e8207ce0217c148db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 13:37:01 -0800 Subject: anv/allocator: Set is_winsys_bo to false for block pool BOs --- src/intel/vulkan/anv_allocator.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index a7ae975656b..3b62bda3e93 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -253,6 +253,7 @@ anv_block_pool_init(struct anv_block_pool *pool, pool->bo.gem_handle = 0; pool->bo.offset = 0; pool->bo.size = 0; + pool->bo.is_winsys_bo = false; pool->block_size = block_size; pool->free_list = ANV_FREE_LIST_EMPTY; pool->back_free_list = ANV_FREE_LIST_EMPTY; -- cgit v1.2.3 From 698ea542830ba0d56e514492fbdf73e3898d4c17 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 13:54:15 -0800 Subject: anv/pipeline: Fix a typo in the pipeline layout code --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 4be630bcbe8..e745bf661ee 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -194,7 +194,7 @@ lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) if (tex->sampler) { unsigned set = tex->sampler->var->data.descriptor_set; unsigned binding = tex->sampler->var->data.binding; - tex->sampler_index = state->set[set].surface_offsets[binding]; + tex->sampler_index = state->set[set].sampler_offsets[binding]; lower_tex_deref(tex, tex->sampler, &tex->sampler_index, nir_tex_src_sampler_offset, state); } -- cgit v1.2.3 From 1375cb3c273453920d9a15fb6ca67714f534704d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 17 Feb 2016 12:23:18 +1000 Subject: anv: fix warning about unused width variable. We don't use width outside the debug clause here. --- src/intel/genxml/gen_pack_header.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen_pack_header.py b/src/intel/genxml/gen_pack_header.py index 3cabb5864aa..75c4f269cdd 100755 --- a/src/intel/genxml/gen_pack_header.py +++ b/src/intel/genxml/gen_pack_header.py @@ -62,11 +62,10 @@ __gen_mbo(uint32_t start, uint32_t end) static inline uint64_t __gen_uint(uint64_t v, uint32_t start, uint32_t end) { - const int width = end - start + 1; - __gen_validate_value(v); #if DEBUG + const int width = end - start + 1; if (width < 64) { const uint64_t max = (1ull << width) - 1; assert(v <= max); -- cgit v1.2.3 From 2b858074580ddf0b8e50d86203bc07230317a6b1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Feb 2016 08:04:32 -0800 Subject: genxml: Stop using unicode in the pack generator This causes python problems and problems when people don't have a locale set properly in their shell. --- src/intel/genxml/gen_pack_header.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/genxml/gen_pack_header.py b/src/intel/genxml/gen_pack_header.py index 75c4f269cdd..9dae2d57e42 100755 --- a/src/intel/genxml/gen_pack_header.py +++ b/src/intel/genxml/gen_pack_header.py @@ -6,7 +6,7 @@ import sys import copy license = """/* - * Copyright © 2016 Intel Corporation + * Copyright (C) 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), -- cgit v1.2.3 From 0d76aa94855462cbd6094897d3de831d15d23297 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 08:35:36 -0800 Subject: intel/genxml: Add a couple of helper headers --- src/intel/genxml/genX_pack.h | 40 ++++++++++++ src/intel/genxml/gen_macros.h | 148 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 src/intel/genxml/genX_pack.h create mode 100644 src/intel/genxml/gen_macros.h (limited to 'src') diff --git a/src/intel/genxml/genX_pack.h b/src/intel/genxml/genX_pack.h new file mode 100644 index 00000000000..69fc340762b --- /dev/null +++ b/src/intel/genxml/genX_pack.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#ifndef GEN_VERSIONx10 +# error "The GEN_VERSIONx10 macro must be defined" +#endif + +#if (GEN_VERSIONx10 == 70) +# include "gen7_pack.h" +#elif (GEN_VERSIONx10 == 75) +# include "gen75_pack.h" +#elif (GEN_VERSIONx10 == 80) +# include "gen8_pack.h" +#elif (GEN_VERSIONx10 == 90) +# include "gen9_pack.h" +#else +# error "Need to add a pack header include for this gen" +#endif diff --git a/src/intel/genxml/gen_macros.h b/src/intel/genxml/gen_macros.h new file mode 100644 index 00000000000..2c47979f35a --- /dev/null +++ b/src/intel/genxml/gen_macros.h @@ -0,0 +1,148 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +/* Macros for handling per-gen compilation. + * + * The prefixing macros GENX() and genX() automatically prefix whatever you + * give them by GENX_ or genX_ where X is the gen number. + * + * You can declare a function to be used on some range of gens like this: + * + * GENX_FUNC(GEN7, GEN75) void + * genX(my_function_name)(args...) + * { + * // Do stuff + * } + * + * If the file is compiled for any set of gens containing gen7 and gen75, + * the function will effectively only get compiled twice as + * gen7_my_function_nmae and gen75_my_function_name. The function has to + * be compilable on all gens, but it will become a static inline that gets + * discarded by the compiler on all gens not in range. + * + * You can do pseudo-runtime checks in your function such as + * + * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * // Do something + * } + * + * The contents of the if statement must be valid regardless of gen, but + * the if will get compiled away on everything except haswell. + * + * For places where you really do have a compile-time conflict, you can + * use preprocessor logic: + * + * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * // Do something + * #endif + * + * However, it is strongly recommended that the former be used whenever + * possible. + */ + +/* Base macro defined on the command line. If we don't have this, we can't + * do anything. + */ +#ifndef GEN_VERSIONx10 +# error "The GEN_VERSIONx10 macro must be defined" +#endif + +#define GEN_GEN ((GEN_VERSIONx10) / 10) +#define GEN_IS_HASWELL ((GEN_VERSIONx10) == 75) + +/* Prefixing macros */ +#if (GEN_VERSIONx10 == 70) +# define GENX(X) GEN7_##X +# define genX(x) gen7_##x +#elif (GEN_VERSIONx10 == 75) +# define GENX(X) GEN75_##X +# define genX(x) gen75_##x +#elif (GEN_VERSIONx10 == 80) +# define GENX(X) GEN8_##X +# define genX(x) gen8_##x +#elif (GEN_VERSIONx10 == 90) +# define GENX(X) GEN9_##X +# define genX(x) gen9_##x +#else +# error "Need to add prefixing macros for this gen" +#endif + +/* Macros for comparing gens + * + * TODO: This wasn't the best idea. We really need to deprecate it. + */ +#if (GEN_VERSIONx10 >= 70) +#define __ANV_GEN_GE_GEN7(T, F) T +#else +#define __ANV_GEN_GE_GEN7(T, F) F +#endif + +#if (GEN_VERSIONx10 <= 70) +#define __ANV_GEN_LE_GEN7(T, F) T +#else +#define __ANV_GEN_LE_GEN7(T, F) F +#endif + +#if (GEN_VERSIONx10 >= 75) +#define __ANV_GEN_GE_GEN75(T, F) T +#else +#define __ANV_GEN_GE_GEN75(T, F) F +#endif + +#if (GEN_VERSIONx10 <= 75) +#define __ANV_GEN_LE_GEN75(T, F) T +#else +#define __ANV_GEN_LE_GEN75(T, F) F +#endif + +#if (GEN_VERSIONx10 >= 80) +#define __ANV_GEN_GE_GEN8(T, F) T +#else +#define __ANV_GEN_GE_GEN8(T, F) F +#endif + +#if (GEN_VERSIONx10 <= 80) +#define __ANV_GEN_LE_GEN8(T, F) T +#else +#define __ANV_GEN_LE_GEN8(T, F) F +#endif + +#if (GEN_VERSIONx10 >= 90) +#define __ANV_GEN_GE_GEN9(T, F) T +#else +#define __ANV_GEN_GE_GEN9(T, F) F +#endif + +#if (GEN_VERSIONx10 <= 90) +#define __ANV_GEN_LE_GEN9(T, F) T +#else +#define __ANV_GEN_LE_GEN9(T, F) F +#endif + +#define __ANV_GEN_IN_RANGE(start, end, T, F) \ + __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) + +/* Declares a function as static inlind if it's not in range */ +#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) -- cgit v1.2.3 From 371b4a5b33a13f35fa7783510d2d90685a9a2e8a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:08:27 -0800 Subject: anv: Switch over to the macros in genxml --- src/intel/vulkan/Makefile.am | 8 +- src/intel/vulkan/anv_gen_macros.h | 146 ---------------------------------- src/intel/vulkan/anv_private.h | 1 - src/intel/vulkan/gen7_cmd_buffer.c | 80 ++++++++++--------- src/intel/vulkan/gen7_pipeline.c | 38 ++++----- src/intel/vulkan/gen7_state.c | 18 ++--- src/intel/vulkan/gen8_cmd_buffer.c | 32 ++++---- src/intel/vulkan/gen8_pipeline.c | 14 ++-- src/intel/vulkan/gen8_state.c | 4 +- src/intel/vulkan/genX_cmd_buffer.c | 25 +++--- src/intel/vulkan/genX_pipeline.c | 19 ++--- src/intel/vulkan/genX_pipeline_util.h | 10 +-- src/intel/vulkan/genX_state_util.h | 2 +- 13 files changed, 122 insertions(+), 275 deletions(-) delete mode 100644 src/intel/vulkan/anv_gen_macros.h (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index ccd98856b4b..6be4f9fb427 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -108,7 +108,7 @@ libanv_gen7_la_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c -libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 +libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=70 libanv_gen75_la_SOURCES = \ genX_cmd_buffer.c \ @@ -116,7 +116,7 @@ libanv_gen75_la_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c -libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 +libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=75 libanv_gen8_la_SOURCES = \ genX_cmd_buffer.c \ @@ -124,7 +124,7 @@ libanv_gen8_la_SOURCES = \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c -libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 +libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=80 libanv_gen9_la_SOURCES = \ genX_cmd_buffer.c \ @@ -132,7 +132,7 @@ libanv_gen9_la_SOURCES = \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c -libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 +libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=90 if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ diff --git a/src/intel/vulkan/anv_gen_macros.h b/src/intel/vulkan/anv_gen_macros.h deleted file mode 100644 index ef2ecd55a9b..00000000000 --- a/src/intel/vulkan/anv_gen_macros.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -/* Macros for handling per-gen compilation. - * - * The prefixing macros GENX() and genX() automatically prefix whatever you - * give them by GENX_ or genX_ where X is the gen number. - * - * You can declare a function to be used on some range of gens like this: - * - * GENX_FUNC(GEN7, GEN75) void - * genX(my_function_name)(args...) - * { - * // Do stuff - * } - * - * If the file is compiled for any set of gens containing gen7 and gen75, - * the function will effectively only get compiled twice as - * gen7_my_function_nmae and gen75_my_function_name. The function has to - * be compilable on all gens, but it will become a static inline that gets - * discarded by the compiler on all gens not in range. - * - * You can do pseudo-runtime checks in your function such as - * - * if (ANV_GEN > 8 || ANV_IS_HASWELL) { - * // Do something - * } - * - * The contents of the if statement must be valid regardless of gen, but - * the if will get compiled away on everything except haswell. - * - * For places where you really do have a compile-time conflict, you can - * use preprocessor logic: - * - * #if (ANV_GEN > 8 || ANV_IS_HASWELL) - * // Do something - * #endif - * - * However, it is strongly recommended that the former be used whenever - * possible. - */ - -/* Base macro defined on the command line. If we don't have this, we can't - * do anything. - */ -#ifdef ANV_GENx10 - -/* Gen checking macros */ -#define ANV_GEN ((ANV_GENx10) / 10) -#define ANV_IS_HASWELL ((ANV_GENx10) == 75) - -/* Prefixing macros */ -#if (ANV_GENx10 == 70) -# define GENX(X) GEN7_##X -# define genX(x) gen7_##x -#elif (ANV_GENx10 == 75) -# define GENX(X) GEN75_##X -# define genX(x) gen75_##x -#elif (ANV_GENx10 == 80) -# define GENX(X) GEN8_##X -# define genX(x) gen8_##x -#elif (ANV_GENx10 == 90) -# define GENX(X) GEN9_##X -# define genX(x) gen9_##x -#else -# error "Need to add prefixing macros for your gen" -#endif - -/* Macros for comparing gens */ -#if (ANV_GENx10 >= 70) -#define __ANV_GEN_GE_GEN7(T, F) T -#else -#define __ANV_GEN_GE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 <= 70) -#define __ANV_GEN_LE_GEN7(T, F) T -#else -#define __ANV_GEN_LE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 >= 75) -#define __ANV_GEN_GE_GEN75(T, F) T -#else -#define __ANV_GEN_GE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 <= 75) -#define __ANV_GEN_LE_GEN75(T, F) T -#else -#define __ANV_GEN_LE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 >= 80) -#define __ANV_GEN_GE_GEN8(T, F) T -#else -#define __ANV_GEN_GE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 <= 80) -#define __ANV_GEN_LE_GEN8(T, F) T -#else -#define __ANV_GEN_LE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 >= 90) -#define __ANV_GEN_GE_GEN9(T, F) T -#else -#define __ANV_GEN_GE_GEN9(T, F) F -#endif - -#if (ANV_GENx10 <= 90) -#define __ANV_GEN_LE_GEN9(T, F) T -#else -#define __ANV_GEN_LE_GEN9(T, F) F -#endif - -#define __ANV_GEN_IN_RANGE(start, end, T, F) \ - __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) - -/* Declares a function as static inlind if it's not in range */ -#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) - -#endif /* ANV_GENx10 */ diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ba86333525e..479f3826135 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -60,7 +60,6 @@ typedef uint32_t xcb_window_t; #include #include "anv_entrypoints.h" -#include "anv_gen_macros.h" #include "brw_context.h" #include "isl/isl.h" diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 23327ec0724..e96400d5b6c 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) @@ -55,7 +55,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) if (state.offset == 0) continue; - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), ._3DCommandSubOpcode = push_constant_opcodes[stage], .ConstantBody = { .PointerToConstantBuffer0 = { .offset = state.offset }, @@ -95,7 +95,7 @@ genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, anv_foreach_stage(s, stages) { if (cmd_buffer->state.samplers[s].alloc_size > 0) { anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ._3DCommandSubOpcode = sampler_state_opcodes[s], .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); } @@ -103,7 +103,7 @@ genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, /* Always emit binding table pointers if we're asked to, since on SKL * this is what flushes push constants. */ anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ._3DCommandSubOpcode = binding_table_opcodes[s], .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); } @@ -168,6 +168,7 @@ clamp_int64(int64_t x, int64_t min, int64_t max) return max; } +#if GEN_GEN == 7 && !GEN_IS_HASWELL static void emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, uint32_t count, const VkRect2D *scissors) @@ -214,8 +215,8 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, anv_state_clflush(scissor_state); } -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) +void +gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.dynamic.scissor.count > 0) { emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, @@ -232,6 +233,7 @@ genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) }); } } +#endif static const uint32_t vk_to_gen_index_type[] = { [VK_INDEX_TYPE_UINT16] = INDEX_WORD, @@ -253,7 +255,7 @@ void genX(CmdBindIndexBuffer)( ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; - if (ANV_IS_HASWELL) + if (GEN_IS_HASWELL) cmd_buffer->state.restart_index = restart_index_for_type[indexType]; cmd_buffer->state.gen7.index_buffer = buffer; cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; @@ -306,20 +308,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, .KernelStartPointer = pipeline->cs_simd, .BindingTablePointer = surfaces.offset, .SamplerStatePointer = samplers.offset, .ConstantURBEntryReadLength = push_constant_regs, +#if !GEN_IS_HASWELL .ConstantURBEntryReadOffset = 0, +#endif .BarrierEnable = cs_prog_data->uses_barrier, .SharedLocalMemorySize = slm_size, .NumberofThreadsinGPGPUThreadGroup = pipeline->cs_thread_width_max); - const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), .InterfaceDescriptorTotalLength = size, .InterfaceDescriptorDataStartAddress = state.offset); @@ -335,7 +339,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), .PipelineSelection = GPGPU); cmd_buffer->state.current_pipeline = GPGPU; } @@ -371,16 +375,16 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) const uint32_t num_dwords = 1 + num_buffers * 4; p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN7_3DSTATE_VERTEX_BUFFERS); + GENX(3DSTATE_VERTEX_BUFFERS)); uint32_t vb, i = 0; for_each_bit(vb, vb_emit) { struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - struct GEN7_VERTEX_BUFFER_STATE state = { + struct GENX(VERTEX_BUFFER_STATE) state = { .VertexBufferIndex = vb, .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .VertexBufferMemoryObjectControlState = GENX(MOCS), .AddressModifyEnable = true, .BufferPitch = pipeline->binding_stride[vb], .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, @@ -388,7 +392,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .InstanceDataStepRate = 1 }; - GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); i++; } } @@ -416,7 +420,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) * PIPE_CONTROL needs to be sent before any combination of VS * associated 3DSTATE." */ - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DepthStallEnable = true, .PostSyncOperation = WriteImmediateData, .Address = { &cmd_buffer->device->workaround_bo, 0 }); @@ -456,9 +460,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, &image->depth_surface.isl) : D16_UNORM; - uint32_t sf_dw[GEN7_3DSTATE_SF_length]; - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), .DepthBufferSurfaceFormat = depth_format, .LineWidth = cmd_buffer->state.dynamic.line_width, .GlobalDepthOffsetEnableSolid = enable_bias, @@ -468,7 +472,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp }; - GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); } @@ -477,9 +481,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN7_COLOR_CALC_STATE_length * 4, + GENX(COLOR_CALC_STATE_length) * 4, 64); - struct GEN7_COLOR_CALC_STATE cc = { + struct GENX(COLOR_CALC_STATE) cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], @@ -489,12 +493,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackFaceStencilReferenceValue = cmd_buffer->state.dynamic.stencil_reference.back, }; - GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) anv_state_clflush(cc_state); anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_CC_STATE_POINTERS, + GENX(3DSTATE_CC_STATE_POINTERS), .ColorCalcStatePointer = cc_state.offset); } @@ -502,12 +506,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_RENDER_TARGETS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), .StencilTestMask = @@ -520,15 +524,15 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackfaceStencilWriteMask = cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, }; - GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); struct anv_state ds_state = anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, pipeline->gen7.depth_stencil_state, - GEN7_DEPTH_STENCIL_STATE_length, 64); + GENX(DEPTH_STENCIL_STATE_length), 64); anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), .PointertoDEPTH_STENCIL_STATE = ds_state.offset); } @@ -538,16 +542,18 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; uint32_t offset = cmd_buffer->state.gen7.index_offset; - if (ANV_IS_HASWELL) { - anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index); - } +#if GEN_IS_HASWELL + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); +#endif - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), +#if !GEN_IS_HASWELL .CutIndexEnable = pipeline->primitive_restart, +#endif .IndexFormat = cmd_buffer->state.gen7.index_type, - .MemoryObjectControlState = GEN7_MOCS, + .MemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); } diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 7c054fa56d5..009a79ac815 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_pipeline_util.h" @@ -39,8 +39,8 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), /* LegacyGlobalDepthBiasEnable */ @@ -69,7 +69,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .PointWidth = 1.0, }; - GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); + GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); } static void @@ -85,7 +85,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, return; } - struct GEN7_DEPTH_STENCIL_STATE state = { + struct GENX(DEPTH_STENCIL_STATE) state = { .DepthTestEnable = info->depthTestEnable, .DepthBufferWriteEnable = info->depthWriteEnable, .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], @@ -103,7 +103,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], }; - GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); + GENX(DEPTH_STENCIL_STATE_pack)(NULL, &pipeline->gen7.depth_stencil_state, &state); } static void @@ -116,7 +116,7 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, if (info == NULL || info->attachmentCount == 0) { pipeline->blend_state = anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, + GENX(BLEND_STATE), 64, .ColorBufferBlendEnable = false, .WriteDisableAlpha = true, .WriteDisableRed = true, @@ -129,7 +129,7 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; pipeline->blend_state = anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, + GENX(BLEND_STATE), 64, .ColorBufferBlendEnable = a->blendEnable, .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ @@ -169,11 +169,11 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, ); } - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), .BlendStatePointer = pipeline->blend_state.offset); } -GENX_FUNC(GEN7, GEN75) VkResult +VkResult genX(graphics_pipeline_create)( VkDevice _device, struct anv_pipeline_cache * cache, @@ -216,7 +216,7 @@ genX(graphics_pipeline_create)( const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState; - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], .CullMode = vk_to_gen_cullmode[rs_info->cullMode], .ClipEnable = true, @@ -237,11 +237,11 @@ genX(graphics_pipeline_create)( uint32_t samples = 1; uint32_t log2_samples = __builtin_ffs(samples) - 1; - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), .PixelLocation = PIXLOC_CENTER, .NumberofMultisamples = log2_samples); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = 0xff); const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; @@ -314,7 +314,7 @@ genX(graphics_pipeline_create)( .DispatchMode = gs_prog_data->base.dispatch_mode, .GSStatisticsEnable = true, .IncludePrimitiveID = gs_prog_data->include_primitive_id, -# if (ANV_IS_HASWELL) +# if (GEN_IS_HASWELL) .ReorderMode = REORDER_TRAILING, # else .ReorderEnable = true, @@ -326,10 +326,10 @@ genX(graphics_pipeline_create)( anv_finishme("disabling ps"); /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE)); /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .ThreadDispatchEnable = false, .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ @@ -349,7 +349,7 @@ genX(graphics_pipeline_create)( anv_finishme("primitive_id needs sbe swizzling setup"); /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, .VertexURBEntryReadLength = urb_length, .VertexURBEntryReadOffset = urb_offset, @@ -390,7 +390,7 @@ genX(graphics_pipeline_create)( .KernelStartPointer2 = pipeline->ps_ksp2); /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .ThreadDispatchEnable = true, .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index 77bdb75260c..5323c378d02 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_state_util.h" @@ -43,7 +43,7 @@ genX(init_device_state)(struct anv_device *device) batch.start = batch.next = cmds; batch.end = (void *) cmds + sizeof(cmds); - anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), .PipelineSelection = _3D); anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), @@ -52,7 +52,7 @@ genX(init_device_state)(struct anv_device *device) anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); assert(batch.next <= batch.end); @@ -60,7 +60,7 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -GENX_FUNC(GEN7, GEN75) void +void genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) @@ -79,7 +79,7 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, .Width = (num_elements - 1) & 0x7f, .Depth = ((num_elements - 1) >> 21) & 0x3f, .SurfacePitch = stride - 1, -# if (ANV_IS_HASWELL) +# if (GEN_IS_HASWELL) .ShaderChannelSelectRed = SCS_RED, .ShaderChannelSelectGreen = SCS_GREEN, .ShaderChannelSelectBlue = SCS_BLUE, @@ -107,7 +107,7 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - struct GEN7_SAMPLER_STATE sampler_state = { + struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampEnable = CLAMP_ENABLE_OGL, @@ -145,7 +145,7 @@ VkResult genX(CreateSampler)( .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], }; - GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); *pSampler = anv_sampler_to_handle(sampler); @@ -227,7 +227,7 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .SurfaceMinLOD = 0, /* TEMPLATE */ .MCSEnable = false, -# if (ANV_IS_HASWELL) +# if (GEN_IS_HASWELL) .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index b741612c891..3221f5e2dc4 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) @@ -70,7 +70,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) return flushed; } -#if ANV_GEN == 8 +#if GEN_GEN == 8 static void emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, uint32_t count, const VkViewport *viewports) @@ -213,6 +213,8 @@ __emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, cmd_buffer->state.pipeline->gen8.sf); } + +#include "genxml/gen9_pack.h" static void __emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) { @@ -339,14 +341,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) * across different state packets for gen8 and gen9. We handle that by * using a big old #if switch here. */ -#if ANV_GEN == 8 +#if GEN_GEN == 8 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN8_COLOR_CALC_STATE_length * 4, + GENX(COLOR_CALC_STATE_length) * 4, 64); - struct GEN8_COLOR_CALC_STATE cc = { + struct GENX(COLOR_CALC_STATE) cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], @@ -356,13 +358,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackFaceStencilReferenceValue = cmd_buffer->state.dynamic.stencil_reference.back, }; - GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) anv_state_clflush(cc_state); anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, + GENX(3DSTATE_CC_STATE_POINTERS), .ColorCalcStatePointer = cc_state.offset, .ColorCalcStatePointerValid = true); } @@ -370,10 +372,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { + GENX(3DSTATE_WM_DEPTH_STENCIL_header), /* Is this what we need to do? */ .StencilBufferWriteEnable = @@ -389,8 +391,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackfaceStencilWriteMask = cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, - &wm_depth_stencil); + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, pipeline->gen8.wm_depth_stencil); @@ -568,7 +570,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { -#if ANV_GEN < 10 +#if GEN_GEN < 10 /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: * * Software must clear the COLOR_CALC_STATE Valid field in @@ -583,7 +585,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) #endif anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .MaskBits = 3, #endif .PipelineSelection = GPGPU); diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index f0411562fba..dc15e2066c5 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_pipeline_util.h" @@ -83,7 +83,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ScissorRectangleEnable = !(extra && extra->disable_scissor), -#if ANV_GEN == 8 +#if GEN_GEN == 8 .ViewportZClipTestEnable = true, #else /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ @@ -178,7 +178,7 @@ static void emit_ds_state(struct anv_pipeline *pipeline, const VkPipelineDepthStencilStateCreateInfo *info) { - uint32_t *dw = ANV_GEN == 8 ? + uint32_t *dw = GEN_GEN == 8 ? pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; if (info == NULL) { @@ -414,7 +414,7 @@ genX(graphics_pipeline_create)( const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; if (pipeline->ps_ksp0 == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), @@ -477,7 +477,7 @@ genX(graphics_pipeline_create)( .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, .Attribute1ActiveComponentFormat = ACF_XYZW, .Attribute2ActiveComponentFormat = ACF_XYZW, @@ -556,7 +556,7 @@ genX(graphics_pipeline_create)( .PixelShaderIsPerSample = per_sample_ps, .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .PixelShaderPullsBary = wm_prog_data->pulls_bary, .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? ICMS_INNER_CONSERVATIVE : ICMS_NONE, diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 04cfff5444d..fdde705f0d6 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_state_util.h" diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 5498d1d68c6..9be87a3ff05 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -26,15 +26,8 @@ #include "anv_private.h" -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) @@ -48,7 +41,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) scratch_bo = &device->scratch_block_pool.bo; /* XXX: Do we need this on more than just BDW? */ -#if (ANV_GEN >= 8) +#if (GEN_GEN >= 8) /* Emit a render target cache flush. * * This isn't documented anywhere in the PRM. However, it seems to be @@ -81,7 +74,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) .InstructionMemoryObjectControlState = GENX(MOCS), .InstructionBaseAddressModifyEnable = true, -# if (ANV_GEN >= 8) +# if (GEN_GEN >= 8) /* Broadwell requires that we specify a buffer size for a bunch of * these fields. However, since we will be growing the BO's live, we * just set them all to the maximum. @@ -288,7 +281,7 @@ emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, .VertexBufferIndex = 32, /* Reserved for this */ .AddressModifyEnable = true, .BufferPitch = 0, -#if (ANV_GEN >= 8) +#if (GEN_GEN >= 8) .MemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = { bo, offset }, .BufferSize = 8 @@ -543,7 +536,7 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.current_pipeline != _3D) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .MaskBits = 3, #endif .PipelineSelection = _3D); @@ -587,7 +580,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .Depth = 1 - 1, .MinimumArrayElement = 0, .DepthBufferObjectControlState = GENX(MOCS), -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, #endif .RenderTargetViewExtent = 1 - 1); @@ -620,7 +613,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* Emit 3DSTATE_STENCIL_BUFFER */ if (has_stencil) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), -#if ANV_GEN >= 8 || ANV_IS_HASWELL +#if GEN_GEN >= 8 || GEN_IS_HASWELL .StencilBufferEnable = true, #endif .StencilBufferObjectControlState = GENX(MOCS), @@ -632,7 +625,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) */ .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, #endif .SurfaceBaseAddress = { diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 54ec8307d02..41a5d0f889c 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -23,15 +23,8 @@ #include "anv_private.h" -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" VkResult genX(compute_pipeline_create)( @@ -94,19 +87,19 @@ genX(compute_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), -#if ANV_GEN > 7 +#if GEN_GEN > 7 .ScratchSpaceBasePointerHigh = 0, .StackSize = 0, #else .GPGPUMode = true, #endif .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, + .NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2, .ResetGatewayTimer = true, -#if ANV_GEN <= 8 +#if GEN_GEN <= 8 .BypassGatewayControl = true, #endif - .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, + .URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2, .CURBEAllocationSize = 0); struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 696e2be7c3f..51fbd8bf273 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -68,7 +68,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, elements = inputs_read >> VERT_ATTRIB_GENERIC0; } -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 /* On BDW+, we only need to allocate space for base ids. Setting up * the actual vertex and instance id is a separate packet. */ @@ -123,7 +123,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, }; GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 /* On Broadwell and later, we have a separate VF_INSTANCING packet * that controls instancing. On Haswell and prior, that's part of * VERTEX_BUFFER_STATE which we emit later. @@ -158,7 +158,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, .SourceElementFormat = ISL_FORMAT_R32G32_UINT, .Component0Control = base_ctrl, .Component1Control = base_ctrl, -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 .Component2Control = VFCOMP_STORE_0, .Component3Control = VFCOMP_STORE_0, #else @@ -169,7 +169,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); } -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, .VertexIDComponentNumber = 2, @@ -183,7 +183,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, static inline void emit_urb_setup(struct anv_pipeline *pipeline) { -#if ANV_GEN == 7 +#if GEN_GEN == 7 && !GEN_IS_HASWELL struct anv_device *device = pipeline->device; /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h index 67f798ab66e..10b3a9f42c5 100644 --- a/src/intel/vulkan/genX_state_util.h +++ b/src/intel/vulkan/genX_state_util.h @@ -57,7 +57,7 @@ anv_surface_format(const struct anv_device *device, enum isl_format format, } } -#if ANV_GEN > 7 || ANV_IS_HASWELL +#if GEN_GEN > 7 || GEN_IS_HASWELL static const uint32_t vk_to_gen_swizzle[] = { [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, -- cgit v1.2.3 From 1f1cf6fcb0ea7c27b573aab0396942875fa3dba6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:12:36 -0800 Subject: anv: Get rid of GENX_FUNC It was a bad idea. --- src/intel/genxml/gen_macros.h | 58 -------------------------------------- src/intel/vulkan/gen7_cmd_buffer.c | 12 ++++---- 2 files changed, 7 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen_macros.h b/src/intel/genxml/gen_macros.h index 2c47979f35a..2658d032928 100644 --- a/src/intel/genxml/gen_macros.h +++ b/src/intel/genxml/gen_macros.h @@ -88,61 +88,3 @@ #else # error "Need to add prefixing macros for this gen" #endif - -/* Macros for comparing gens - * - * TODO: This wasn't the best idea. We really need to deprecate it. - */ -#if (GEN_VERSIONx10 >= 70) -#define __ANV_GEN_GE_GEN7(T, F) T -#else -#define __ANV_GEN_GE_GEN7(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 70) -#define __ANV_GEN_LE_GEN7(T, F) T -#else -#define __ANV_GEN_LE_GEN7(T, F) F -#endif - -#if (GEN_VERSIONx10 >= 75) -#define __ANV_GEN_GE_GEN75(T, F) T -#else -#define __ANV_GEN_GE_GEN75(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 75) -#define __ANV_GEN_LE_GEN75(T, F) T -#else -#define __ANV_GEN_LE_GEN75(T, F) F -#endif - -#if (GEN_VERSIONx10 >= 80) -#define __ANV_GEN_GE_GEN8(T, F) T -#else -#define __ANV_GEN_GE_GEN8(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 80) -#define __ANV_GEN_LE_GEN8(T, F) T -#else -#define __ANV_GEN_LE_GEN8(T, F) F -#endif - -#if (GEN_VERSIONx10 >= 90) -#define __ANV_GEN_GE_GEN9(T, F) T -#else -#define __ANV_GEN_GE_GEN9(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 90) -#define __ANV_GEN_LE_GEN9(T, F) T -#else -#define __ANV_GEN_LE_GEN9(T, F) F -#endif - -#define __ANV_GEN_IN_RANGE(start, end, T, F) \ - __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) - -/* Declares a function as static inlind if it's not in range */ -#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index e96400d5b6c..7377487cf7e 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -70,9 +70,10 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) return flushed; } -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages) +#if GEN_GEN == 7 && !GEN_IS_HASWELL +void +gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) { static const uint32_t sampler_state_opcodes[] = { [MESA_SHADER_VERTEX] = 43, @@ -109,8 +110,8 @@ genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, } } -GENX_FUNC(GEN7, GEN7) uint32_t -genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) +uint32_t +gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & cmd_buffer->state.pipeline->active_stages; @@ -156,6 +157,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) return dirty; } +#endif /* GEN_GEN == 7 && !GEN_IS_HASWELL */ static inline int64_t clamp_int64(int64_t x, int64_t min, int64_t max) -- cgit v1.2.3 From 853fc3e43179a4cbc8023daa28fc0d20c08b6c24 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:31:31 -0800 Subject: genxml: Add mote includes in the generated headers --- src/intel/genxml/gen_pack_header.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/intel/genxml/gen_pack_header.py b/src/intel/genxml/gen_pack_header.py index 9dae2d57e42..5bc18c70c60 100755 --- a/src/intel/genxml/gen_pack_header.py +++ b/src/intel/genxml/gen_pack_header.py @@ -39,7 +39,10 @@ pack_header = """%(license)s #pragma once #include +#include +#include #include +#include #ifndef __gen_validate_value #define __gen_validate_value(x) -- cgit v1.2.3 From bc696f1db69bf70cf1d1c4b6b837db25378b820c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 10:25:49 -0800 Subject: isl: Stop including mesa/main/imports.h It pulls in all sorts of stuff we don't want. --- src/intel/isl/isl.c | 2 ++ src/intel/isl/isl_priv.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 27928fd0850..c1d47d5a579 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -22,6 +22,8 @@ */ #include +#include +#include #include "isl.h" #include "isl_gen4.h" diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h index b399e0f8116..bca8503b4fd 100644 --- a/src/intel/isl/isl_priv.h +++ b/src/intel/isl/isl_priv.h @@ -26,7 +26,6 @@ #include #include "brw_device_info.h" -#include "mesa/main/imports.h" #include "util/macros.h" #include "isl.h" -- cgit v1.2.3 From b5868d2343dab94be6a8a8e56632fbd0c42a1a2c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 19:29:05 -0800 Subject: anv: Zero out the WSI array when initializing the instance --- src/intel/vulkan/anv_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a8835f74179..7a5cb234ac5 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -254,6 +254,8 @@ VkResult anv_CreateInstance( instance->apiVersion = client_version; instance->physicalDeviceCount = -1; + memset(instance->wsi, 0, sizeof(instance->wsi)); + _mesa_locale_init(); VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); -- cgit v1.2.3 From f1dddeadc235cff20ceb7b8f7d3b70dc92cbe76b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 20:02:37 -0800 Subject: anv: Fix a typo in apply_dynamic_offsets shader->num_uniforms is in terms of bytes in i965. --- src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c index e71a8ffb1f4..46bc5d23a4e 100644 --- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c @@ -161,9 +161,9 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, struct anv_push_constants *null_data = NULL; for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { - prog_data->param[i * 2 + shader->num_uniforms] = + prog_data->param[i * 2 + shader->num_uniforms / 4] = (const union gl_constant_value *)&null_data->dynamic[i].offset; - prog_data->param[i * 2 + 1 + shader->num_uniforms] = + prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] = (const union gl_constant_value *)&null_data->dynamic[i].range; } -- cgit v1.2.3 From f49ba0f7d85d390e133799bf05f52b5537d38117 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 21 Feb 2016 21:42:33 -0800 Subject: nir/spirv: Add support for multisampled textures --- src/compiler/nir/spirv/spirv_to_nir.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index a50a581cfb6..725781f5de4 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -739,7 +739,10 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, else val->type->access_qualifier = SpvAccessQualifierReadWrite; - assert(!multisampled && "FIXME: Handl multi-sampled textures"); + if (multisampled) { + assert(dim == GLSL_SAMPLER_DIM_2D); + dim = GLSL_SAMPLER_DIM_MS; + } val->type->image_format = translate_image_format(format); @@ -1199,6 +1202,13 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, sampled.sampler = sampled_val->access_chain; } + const struct glsl_type *image_type; + if (sampled.image) { + image_type = sampled.image->var->var->interface_type; + } else { + image_type = sampled.sampler->var->var->interface_type; + } + nir_tex_src srcs[8]; /* 8 should be enough */ nir_tex_src *p = srcs; @@ -1265,7 +1275,11 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, break; case SpvOpImageFetch: - texop = nir_texop_txf; + if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) { + texop = nir_texop_txf_ms; + } else { + texop = nir_texop_txf; + } break; case SpvOpImageGather: @@ -1303,7 +1317,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, if (operands & SpvImageOperandsLodMask) { assert(texop == nir_texop_txl || texop == nir_texop_txf || - texop == nir_texop_txs); + texop == nir_texop_txf_ms || texop == nir_texop_txs); (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); } @@ -1322,7 +1336,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, assert(!"Constant offsets to texture gather not yet implemented"); if (operands & SpvImageOperandsSampleMask) { - assert(texop == nir_texop_txf); + assert(texop == nir_texop_txf_ms); texop = nir_texop_txf_ms; (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); } @@ -1335,13 +1349,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); - const struct glsl_type *image_type; - if (sampled.image) { - image_type = sampled.image->var->var->interface_type; - } else { - image_type = sampled.sampler->var->var->interface_type; - } - instr->sampler_dim = glsl_get_sampler_dim(image_type); instr->is_array = glsl_sampler_type_is_array(image_type); instr->is_shadow = glsl_sampler_type_is_shadow(image_type); @@ -1355,11 +1362,11 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, break; case GLSL_SAMPLER_DIM_2D: case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_MS: instr->coord_components = 2; break; case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: - case GLSL_SAMPLER_DIM_MS: instr->coord_components = 3; break; default: -- cgit v1.2.3 From 7b2c63a53ca0ec685085cbf6b2e1f0da00752d91 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 18 Feb 2016 14:05:31 -0800 Subject: anv/meta_blit: Handle compressed textures in anv_CmdCopyImage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with anv_CmdCopyBufferToImage, compressed textures require special handling during copies. Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 62 ++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 07ebcbc06b1..06f13ecc8db 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -696,31 +696,34 @@ void anv_CmdCopyImage( }, cmd_buffer, 0); - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffset.x, - .y = pRegions[r].dstOffset.y, - .z = 0, - }; - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].srcSubresource.layerCount == 1 && - pRegions[r].dstSubresource.layerCount == 1); - num_slices = pRegions[r].extent.depth; - } else { - assert(pRegions[r].srcSubresource.layerCount == - pRegions[r].dstSubresource.layerCount); - assert(pRegions[r].extent.depth == 1); - num_slices = pRegions[r].dstSubresource.layerCount; - } - const uint32_t dest_base_array_slice = anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, &pRegions[r].dstOffset); - for (unsigned slice = 0; slice < num_slices; slice++) { + + unsigned num_slices_3d = pRegions[r].extent.depth; + unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice; + src_offset.z += slice_3d + slice_array; + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->isl_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer + slice_array, + pRegions[r].dstOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -733,20 +736,29 @@ void anv_CmdCopyImage( .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + slice, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].extent); meta_emit_blit(cmd_buffer, src_image, &src_iview, src_offset, - pRegions[r].extent, + img_extent_el, dest_image, &dest_iview, - dest_offset, - pRegions[r].extent, + dest_offset_el, + img_extent_el, VK_FILTER_NEAREST); + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; } } -- cgit v1.2.3 From f843aabdd42051abc8b22437d5c9167fc867ac46 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 22 Feb 2016 09:14:25 -0800 Subject: intel/genxml: Add README I've had people ask about the design of the pack functions, for example, why aren't we using bitfields. I wrote up a bit of background on why and how we ended up with the current design and we might as well keep that with the code. --- src/intel/genxml/README | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/intel/genxml/README (limited to 'src') diff --git a/src/intel/genxml/README b/src/intel/genxml/README new file mode 100644 index 00000000000..bc518c60bad --- /dev/null +++ b/src/intel/genxml/README @@ -0,0 +1,60 @@ +This provides some background the design of the generated headers. We +started out trying to generate bit fields but it evolved into the pack +functions because of a few limitations: + + 1) Bit fields still generate terrible code today. Even with modern + optimizing compilers you get multiple load+mask+store operations + to the same dword in memory as you set individual bits. The + compiler also has to generate code to mask out overflowing values + (for example, if you assign 200 to a 2 bit field). Our driver + never writes overflowing values so that's not needed. On the + other hand, most compiler recognize that the template struct we + use is a temporary variable and copy propagate the individual + fields and do amazing constant folding. You should take a look + at the code that gets generated when you compile in release mode + with optimizations. + + 2) For some types we need to have overlapping bit fields. For + example, some values are 64 byte aligned 32 bit offsets. The + lower 5 bits of the offset are always zero, so the hw packs in a + few misc bits in the lower 5 bits there. Other times a field can + be either a u32 or a float. I tried to do this with overlapping + anonymous unions and it became a big mess. Also, when using + initializers, you can only initialize one union member so this + just doesn't work with out approach. + + The pack functions on the other hand allows us a great deal of + flexibility in how we combine things. In the case of overlapping + fields (the u32 and float case), if we only set one of them in + the pack function, the compiler will recognize that the other is + initialized to 0 and optimize out the code to or it it. + + 3) Bit fields (and certainly overlapping anonymous unions of bit + fields) aren't generally stable across compilers in how they're + laid out and aligned. Our pack functions let us control exactly + how things get packed, using only simple and unambiguous bitwise + shifting and or'ing that works on any compiler. + +Once we have the pack function it allows us to hook in various +transformations and validation as we go from template struct to dwords +in memory: + + 1) Validation: As I said above, our driver isn't supposed to write + overflowing values to the fields, but we've of course had lots of + cases where we make mistakes and write overflowing values. With + the pack function, we can actually assert on that and catch it at + runtime. bitfields would just silently truncate. + + 2) Type conversions: some times it's just a matter of writing a + float to a u32, but we also convert from bool to bits, from + floats to fixed point integers. + + 3) Relocations: whenever we have a pointer from one buffer to + another (for example a pointer from the meta data for a texture + to the raw texture data), we have to tell the kernel about it so + it can adjust the pointer to point to the final location. That + means extra work we have to do extra work to record and annotate + the dword location that holds the pointer. With bit fields, we'd + have to call a function to do this, but with the pack function we + generate code in the pack function to do this for us. That's a + lot less error prone and less work. -- cgit v1.2.3 From 08b408311cb8fdbeae6d7ff5474107b0868c6ec9 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Mon, 22 Feb 2016 11:31:15 -0800 Subject: vulkan: fix out-of-tree builds --- src/intel/vulkan/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 6be4f9fb427..53cfa20a263 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -66,6 +66,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/intel \ + -I$(top_builddir)/src/intel/genxml \ -I$(top_builddir)/src/vulkan libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From 353d5bf286e1509af9ec2f1b8152d1f64790b52c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 22 Feb 2016 10:19:43 -0800 Subject: anv/x11: Free swapchain images and memory on destroy --- src/intel/vulkan/anv_wsi_x11.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index 843a6b62504..a63cb6e7c5b 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -535,7 +535,11 @@ x11_swapchain_destroy(struct anv_swapchain *anv_chain, cookie = xcb_free_pixmap(chain->conn, image->pixmap); xcb_discard_reply(chain->conn, cookie.sequence); - /* TODO: Delete images and free memory */ + anv_DestroyImage(anv_device_to_handle(chain->base.device), + anv_image_to_handle(image->image), pAllocator); + + anv_FreeMemory(anv_device_to_handle(chain->base.device), + anv_device_memory_to_handle(image->memory), pAllocator); } anv_free2(&chain->base.device->alloc, pAllocator, chain); -- cgit v1.2.3 From 2570a58bcdf30d699b89323fef60692093dee7ea Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Feb 2016 22:46:28 -0800 Subject: anv: Implement descriptor pools Descriptor pools are an optimization that lets applications allocate descriptor sets through an externally synchronized object (that is, unlocked). In our case it's also plugging a memory leak, since we didn't track all allocated sets and failed to free them in vkResetDescriptorPool() and vkDestroyDescriptorPool(). --- src/intel/vulkan/anv_descriptor_set.c | 189 +++++++++++++++++++++++++++------- src/intel/vulkan/anv_meta.c | 25 +++++ src/intel/vulkan/anv_meta_blit.c | 6 +- src/intel/vulkan/anv_meta_resolve.c | 8 +- src/intel/vulkan/anv_private.h | 17 +++ 5 files changed, 200 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 7a77336602a..718bc216f73 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -244,17 +244,67 @@ void anv_DestroyPipelineLayout( } /* - * Descriptor pools. These are a no-op for now. + * Descriptor pools. + * + * These are implemented using a big pool of memory and a free-list for the + * host memory allocations and a state_stream and a free list for the buffer + * view surface state. The spec allows us to fail to allocate due to + * fragmentation in all cases but two: 1) after pool reset, allocating up + * until the pool size with no freeing must succeed and 2) allocating and + * freeing only descriptor sets with the same layout. Case 1) is easy enogh, + * and the free lists lets us recycle blocks for case 2). */ +#define EMPTY 1 + VkResult anv_CreateDescriptorPool( - VkDevice device, + VkDevice _device, const VkDescriptorPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool) { - anv_finishme("VkDescriptorPool is a stub"); - *pDescriptorPool = (VkDescriptorPool)1; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_pool *pool; + + uint32_t descriptor_count = 0; + uint32_t buffer_count = 0; + for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) { + switch (pCreateInfo->pPoolSizes[i].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount; + default: + descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount; + break; + } + } + + const size_t set_size = + sizeof(struct anv_descriptor_set) + + descriptor_count * sizeof(struct anv_descriptor) + + buffer_count * sizeof(struct anv_buffer_view); + + const size_t size = + sizeof(*pool) + + pCreateInfo->maxSets * set_size; + + pool = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pool) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->size = size; + pool->next = 0; + pool->free_list = EMPTY; + + anv_state_stream_init(&pool->surface_state_stream, + &device->surface_state_block_pool); + pool->surface_state_free_list = NULL; + + *pDescriptorPool = anv_descriptor_pool_to_handle(pool); + return VK_SUCCESS; } @@ -263,37 +313,85 @@ void anv_DestroyDescriptorPool( VkDescriptorPool _pool, const VkAllocationCallbacks* pAllocator) { - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool); + + anv_state_stream_finish(&pool->surface_state_stream); + anv_free2(&device->alloc, pAllocator, pool); } VkResult anv_ResetDescriptorPool( - VkDevice device, + VkDevice _device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags) { - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool); + + pool->next = 0; + pool->free_list = EMPTY; + anv_state_stream_finish(&pool->surface_state_stream); + anv_state_stream_init(&pool->surface_state_stream, + &device->surface_state_block_pool); + pool->surface_state_free_list = NULL; + return VK_SUCCESS; } +struct pool_free_list_entry { + uint32_t next; + uint32_t size; +}; + +static size_t +layout_size(const struct anv_descriptor_set_layout *layout) +{ + return + sizeof(struct anv_descriptor_set) + + layout->size * sizeof(struct anv_descriptor) + + layout->buffer_count * sizeof(struct anv_buffer_view); +} + +struct surface_state_free_list_entry { + void *next; + uint32_t offset; +}; + VkResult anv_descriptor_set_create(struct anv_device *device, + struct anv_descriptor_pool *pool, const struct anv_descriptor_set_layout *layout, struct anv_descriptor_set **out_set) { struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + const size_t size = layout_size(layout); + + set = NULL; + if (size <= pool->size - pool->next) { + set = (struct anv_descriptor_set *) (pool->data + pool->next); + pool->next += size; + } else { + struct pool_free_list_entry *entry; + uint32_t *link = &pool->free_list; + for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) { + entry = (struct pool_free_list_entry *) (pool->data + f); + if (size <= entry->size) { + *link = entry->next; + set = (struct anv_descriptor_set *) entry; + break; + } + link = &entry->next; + } + } - set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set) + if (set == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - + set->size = size; set->layout = layout; + set->buffer_views = + (struct anv_buffer_view *) &set->descriptors[layout->size]; + set->buffer_count = layout->buffer_count; /* Go through and fill out immutable samplers if we have any */ struct anv_descriptor *desc = set->descriptors; @@ -305,21 +403,24 @@ anv_descriptor_set_create(struct anv_device *device, desc += layout->binding[b].array_size; } - /* XXX: Use the pool */ - set->buffer_views = - anv_alloc(&device->alloc, - sizeof(set->buffer_views[0]) * layout->buffer_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set->buffer_views) { - anv_free(&device->alloc, set); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - + /* Allocate surface state for the buffer views. */ for (uint32_t b = 0; b < layout->buffer_count; b++) { - set->buffer_views[b].surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + struct surface_state_free_list_entry *entry = + pool->surface_state_free_list; + struct anv_state state; + + if (entry) { + state.map = entry; + state.offset = entry->offset; + state.alloc_size = 64; + pool->surface_state_free_list = entry->next; + } else { + state = anv_state_stream_alloc(&pool->surface_state_stream, 64, 64); + } + + set->buffer_views[b].surface_state = state; } - set->buffer_count = layout->buffer_count; + *out_set = set; return VK_SUCCESS; @@ -327,15 +428,27 @@ anv_descriptor_set_create(struct anv_device *device, void anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_pool *pool, struct anv_descriptor_set *set) { - /* XXX: Use the pool */ - for (uint32_t b = 0; b < set->buffer_count; b++) - anv_state_pool_free(&device->surface_state_pool, - set->buffer_views[b].surface_state); + /* Put the buffer view surface state back on the free list. */ + for (uint32_t b = 0; b < set->buffer_count; b++) { + struct surface_state_free_list_entry *entry = + set->buffer_views[b].surface_state.map; + entry->next = pool->surface_state_free_list; + pool->surface_state_free_list = entry; + } - anv_free(&device->alloc, set->buffer_views); - anv_free(&device->alloc, set); + /* Put the descriptor set allocation back on the free list. */ + const uint32_t index = (char *) set - pool->data; + if (index + set->size == pool->next) { + pool->next = index; + } else { + struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set; + entry->next = pool->free_list; + entry->size = set->size; + pool->free_list = (char *) entry - pool->data; + } } VkResult anv_AllocateDescriptorSets( @@ -344,6 +457,7 @@ VkResult anv_AllocateDescriptorSets( VkDescriptorSet* pDescriptorSets) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, pAllocateInfo->descriptorPool); VkResult result = VK_SUCCESS; struct anv_descriptor_set *set; @@ -353,7 +467,7 @@ VkResult anv_AllocateDescriptorSets( ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); - result = anv_descriptor_set_create(device, layout, &set); + result = anv_descriptor_set_create(device, pool, layout, &set); if (result != VK_SUCCESS) break; @@ -374,11 +488,12 @@ VkResult anv_FreeDescriptorSets( const VkDescriptorSet* pDescriptorSets) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool); for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - anv_descriptor_set_destroy(device, set); + anv_descriptor_set_destroy(device, pool, set); } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c index 82944ea1a92..683a1623cc3 100644 --- a/src/intel/vulkan/anv_meta.c +++ b/src/intel/vulkan/anv_meta.c @@ -138,6 +138,27 @@ anv_device_init_meta(struct anv_device *device) .pfnFree = meta_free, }; + const VkDescriptorPoolCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }; + + result = anv_CreateDescriptorPool(anv_device_to_handle(device), + &create_info, + &device->meta_state.alloc, + &device->meta_state.desc_pool); + if (result != VK_SUCCESS) + goto fail_desc_pool; + result = anv_device_init_meta_clear_state(device); if (result != VK_SUCCESS) goto fail_clear; @@ -157,6 +178,10 @@ fail_blit: fail_resolve: anv_device_finish_meta_clear_state(device); fail_clear: + anv_DestroyDescriptorPool(anv_device_to_handle(device), + device->meta_state.desc_pool, + &device->meta_state.alloc); +fail_desc_pool: return result; } diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 06f13ecc8db..9c6cd8c510e 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -165,7 +165,6 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkFilter blit_filter) { struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; struct blit_vb_data { float pos[2]; @@ -248,7 +247,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_AllocateDescriptorSets(anv_device_to_handle(device), &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool, + .descriptorPool = device->meta_state.desc_pool, .descriptorSetCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout }, &set); @@ -341,7 +340,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_ResetDescriptorPool(anv_device_to_handle(device), + device->meta_state.desc_pool, 0); anv_DestroySampler(anv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc); anv_DestroyFramebuffer(anv_device_to_handle(device), fb, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index ea5020c5f24..9a77d21452f 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -483,7 +483,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image *src_image = src_iview->image; - VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; const struct vertex_attrs vertex_data[3] = { { @@ -564,7 +563,7 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, anv_AllocateDescriptorSets(device_h, &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool_h, + .descriptorPool = device->meta_state.desc_pool, .descriptorSetCount = 1, .pSetLayouts = (VkDescriptorSetLayout[]) { device->meta_state.resolve.ds_layout, @@ -572,8 +571,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, }, &desc_set_h); - ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); - anv_UpdateDescriptorSets(device_h, /*writeCount*/ 1, (VkWriteDescriptorSet[]) { @@ -644,7 +641,8 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /* All objects below are consumed by the draw call. We may safely destroy * them. */ - anv_descriptor_set_destroy(device, desc_set); + anv_ResetDescriptorPool(anv_device_to_handle(device), + device->meta_state.desc_pool, 0); anv_DestroySampler(device_h, sampler_h, &cmd_buffer->pool->alloc); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 479f3826135..6ce3f02d1f7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -571,6 +571,8 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { VkAllocationCallbacks alloc; + VkDescriptorPool desc_pool; + /** * Use array element `i` for images with `2^i` samples. */ @@ -959,18 +961,32 @@ struct anv_descriptor { struct anv_descriptor_set { const struct anv_descriptor_set_layout *layout; + uint32_t size; uint32_t buffer_count; struct anv_buffer_view *buffer_views; struct anv_descriptor descriptors[0]; }; +struct anv_descriptor_pool { + uint32_t size; + uint32_t next; + uint32_t free_list; + + struct anv_state_stream surface_state_stream; + void *surface_state_free_list; + + char data[0]; +}; + VkResult anv_descriptor_set_create(struct anv_device *device, + struct anv_descriptor_pool *pool, const struct anv_descriptor_set_layout *layout, struct anv_descriptor_set **out_set); void anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); struct anv_pipeline_binding { @@ -1839,6 +1855,7 @@ ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -- cgit v1.2.3 From 442dff8cf4c99d67e7258e376d38ec32b92a2fbf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 17:13:46 -0800 Subject: anv/descriptor_set: Stop marking everything as having dynamic offsets --- src/intel/vulkan/anv_descriptor_set.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 718bc216f73..b439f2a0c36 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -218,8 +218,10 @@ VkResult anv_CreatePipelineLayout( layout->set[set].dynamic_offset_start = dynamic_offset_count; for (uint32_t b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index >= 0) - dynamic_offset_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + dynamic_offset_count += set_layout->binding[b].array_size; for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { if (set_layout->binding[b].stage[s].surface_index >= 0) layout->stage[s].has_dynamic_offsets = true; -- cgit v1.2.3 From ae619a035573a2d13fb49537ef8769c97688e77f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 19:19:00 -0800 Subject: anv/state: Replace a bunch of ANV_GEN with GEN_GEN --- src/intel/vulkan/gen8_state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index fdde705f0d6..2686bfa8f3c 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -44,7 +44,7 @@ genX(init_device_state)(struct anv_device *device) batch.end = (void *) cmds + sizeof(cmds); anv_batch_emit(&batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .MaskBits = 3, #endif .PipelineSelection = _3D); @@ -93,7 +93,7 @@ genX(init_device_state)(struct anv_device *device) ._8xSample6YOffset = 0.9375, ._8xSample7XOffset = 0.9375, ._8xSample7YOffset = 0.0625, -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 ._16xSample0XOffset = 0.5625, ._16xSample0YOffset = 0.5625, ._16xSample1XOffset = 0.4375, @@ -194,7 +194,7 @@ static const uint8_t anv_valign[] = { static void get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) { - #if ANV_GENx10 >= 90 + #if GEN_GEN >= 9 if (isl_tiling_is_std_y(surf->tiling) || surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { /* The hardware ignores the alignment values. Anyway, the surface's @@ -239,7 +239,7 @@ get_qpitch(const struct isl_surf *surf) default: unreachable(!"bad isl_surf_dim"); case ISL_SURF_DIM_1D: - #if ANV_GENx10 >= 90 + #if GEN_GEN >= 9 /* QPitch is usually expressed as rows of surface elements (where * a surface element is an compression block or a single surface * sample). Skylake 1D is an outlier. @@ -256,7 +256,7 @@ get_qpitch(const struct isl_surf *surf) #endif case ISL_SURF_DIM_2D: case ISL_SURF_DIM_3D: - #if ANV_GEN >= 9 + #if GEN_GEN >= 9 return isl_surf_get_array_pitch_el_rows(surf); #else /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch @@ -452,7 +452,7 @@ VkResult genX(CreateSampler)( .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampMode = CLAMP_MODE_OGL, -#if ANV_GEN == 8 +#if GEN_GEN == 8 .BaseMipLevel = 0.0, #endif .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], -- cgit v1.2.3 From 64e1c8405920a160c0fdac47ebe4f1e1c0468d7a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 21:20:04 -0800 Subject: intel/genxml: Update macro documentation --- src/intel/genxml/gen_macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen_macros.h b/src/intel/genxml/gen_macros.h index 2658d032928..052c57f8a77 100644 --- a/src/intel/genxml/gen_macros.h +++ b/src/intel/genxml/gen_macros.h @@ -44,7 +44,7 @@ * * You can do pseudo-runtime checks in your function such as * - * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * if (GEN_GEN > 8 || GEN_IS_HASWELL) { * // Do something * } * @@ -54,7 +54,7 @@ * For places where you really do have a compile-time conflict, you can * use preprocessor logic: * - * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * #if (GEN_GEN > 8 || GEN_IS_HASWELL) * // Do something * #endif * -- cgit v1.2.3 From bfbb238dea91b1c2bde4f2f3eb20d39c95da3850 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 21:39:14 -0800 Subject: anv/descriptor_set: Set descriptor type for immuatable samplers --- src/intel/vulkan/anv_descriptor_set.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index b439f2a0c36..fe105b23f42 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -399,8 +399,17 @@ anv_descriptor_set_create(struct anv_device *device, struct anv_descriptor *desc = set->descriptors; for (uint32_t b = 0; b < layout->binding_count; b++) { if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].sampler = layout->binding[b].immutable_samplers[i]; + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) { + /* The type will get changed to COMBINED_IMAGE_SAMPLER in + * UpdateDescriptorSets if needed. However, if the descriptor + * set has an immutable sampler, UpdateDescriptorSets may never + * touch it, so we need to make sure it's 100% valid now. + */ + desc[i] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = layout->binding[b].immutable_samplers[i], + }; + } } desc += layout->binding[b].array_size; } -- cgit v1.2.3 From bd3db3d6655beeb3da817a96d524f537092e386b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2016 17:04:19 -0800 Subject: anv/meta: Allocate descriptor pools on-the-fly We can't use a global descriptor pool like we were because it's not thread-safe. For now, we'll allocate them on-the-fly and that should work fine. At some point in the future, we could do something where we stack-allocate them or allocate them out of one of the state streams. --- src/intel/vulkan/anv_meta.c | 25 ------------------------- src/intel/vulkan/anv_meta_blit.c | 23 ++++++++++++++++++++--- src/intel/vulkan/anv_meta_resolve.c | 22 +++++++++++++++++++--- src/intel/vulkan/anv_private.h | 2 -- 4 files changed, 39 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c index 683a1623cc3..82944ea1a92 100644 --- a/src/intel/vulkan/anv_meta.c +++ b/src/intel/vulkan/anv_meta.c @@ -138,27 +138,6 @@ anv_device_init_meta(struct anv_device *device) .pfnFree = meta_free, }; - const VkDescriptorPoolCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1 - }, - } - }; - - result = anv_CreateDescriptorPool(anv_device_to_handle(device), - &create_info, - &device->meta_state.alloc, - &device->meta_state.desc_pool); - if (result != VK_SUCCESS) - goto fail_desc_pool; - result = anv_device_init_meta_clear_state(device); if (result != VK_SUCCESS) goto fail_clear; @@ -178,10 +157,6 @@ fail_blit: fail_resolve: anv_device_finish_meta_clear_state(device); fail_clear: - anv_DestroyDescriptorPool(anv_device_to_handle(device), - device->meta_state.desc_pool, - &device->meta_state.alloc); -fail_desc_pool: return result; } diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 9c6cd8c510e..8ef943aa512 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -243,14 +243,31 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .minFilter = blit_filter, }, &cmd_buffer->pool->alloc, &sampler); + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(anv_device_to_handle(device), + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + VkDescriptorSet set; anv_AllocateDescriptorSets(anv_device_to_handle(device), &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = device->meta_state.desc_pool, + .descriptorPool = desc_pool, .descriptorSetCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), 1, /* writeCount */ (VkWriteDescriptorSet[]) { @@ -340,8 +357,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_ResetDescriptorPool(anv_device_to_handle(device), - device->meta_state.desc_pool, 0); + anv_DestroyDescriptorPool(anv_device_to_handle(device), + desc_pool, &cmd_buffer->pool->alloc); anv_DestroySampler(anv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc); anv_DestroyFramebuffer(anv_device_to_handle(device), fb, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 9a77d21452f..8eb2548b5ae 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -559,11 +559,27 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, &cmd_buffer->pool->alloc, &sampler_h); + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(anv_device_to_handle(device), + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + VkDescriptorSet desc_set_h; anv_AllocateDescriptorSets(device_h, &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = device->meta_state.desc_pool, + .descriptorPool = desc_pool, .descriptorSetCount = 1, .pSetLayouts = (VkDescriptorSetLayout[]) { device->meta_state.resolve.ds_layout, @@ -641,8 +657,8 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /* All objects below are consumed by the draw call. We may safely destroy * them. */ - anv_ResetDescriptorPool(anv_device_to_handle(device), - device->meta_state.desc_pool, 0); + anv_DestroyDescriptorPool(anv_device_to_handle(device), + desc_pool, &cmd_buffer->pool->alloc); anv_DestroySampler(device_h, sampler_h, &cmd_buffer->pool->alloc); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6ce3f02d1f7..b1b4d265b89 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -571,8 +571,6 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { VkAllocationCallbacks alloc; - VkDescriptorPool desc_pool; - /** * Use array element `i` for images with `2^i` samples. */ -- cgit v1.2.3 From 040355b688a3d034bef37631fa35ff012132deaf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2016 21:01:00 -0800 Subject: nir/spirv: Add more capabilities --- src/compiler/nir/spirv/spirv_to_nir.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index 725781f5de4..91f5c4b5186 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -2161,11 +2161,20 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvCapabilityMatrix: case SpvCapabilityShader: case SpvCapabilityGeometry: + case SpvCapabilityTessellationPointSize: + case SpvCapabilityGeometryPointSize: case SpvCapabilityUniformBufferArrayDynamicIndexing: case SpvCapabilitySampledImageArrayDynamicIndexing: case SpvCapabilityStorageBufferArrayDynamicIndexing: case SpvCapabilityStorageImageArrayDynamicIndexing: case SpvCapabilityClipDistance: + case SpvCapabilityImageRect: + case SpvCapabilitySampledRect: + case SpvCapabilitySampled1D: + case SpvCapabilityImage1D: + case SpvCapabilitySampledCubeArray: + case SpvCapabilitySampledBuffer: + case SpvCapabilityImageBuffer: case SpvCapabilityImageQuery: break; default: -- cgit v1.2.3 From f0f7cc22f3f061416c81cf80ccbe4a6a390082a7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2016 21:23:06 -0800 Subject: anv/descriptor_set: Use the correct size for the descriptor pool The descriptor sizes array gives the total number of each type of descriptor that will ever be allocated from the pool, not the total amount that may be in any particular set. In our case, this simply means that we have to sum a bunch of things up and there we go. --- src/intel/vulkan/anv_descriptor_set.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index fe105b23f42..dd645c3effc 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -283,14 +283,11 @@ VkResult anv_CreateDescriptorPool( } } - const size_t set_size = - sizeof(struct anv_descriptor_set) + - descriptor_count * sizeof(struct anv_descriptor) + - buffer_count * sizeof(struct anv_buffer_view); - const size_t size = sizeof(*pool) + - pCreateInfo->maxSets * set_size; + pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) + + descriptor_count * sizeof(struct anv_descriptor) + + buffer_count * sizeof(struct anv_buffer_view); pool = anv_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); -- cgit v1.2.3 From c9564fd598573f609480f70a141b95900047ba58 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2016 22:02:41 -0800 Subject: nir/spirv: Allow but warn for a few capabilities Unfortunately, glslang gives us cull/clip distance and GS streams even if the shader doesn't use it whenever a shader is declared as version 450. This is a glslang bug, but we can easily enough ignore it for now. --- src/compiler/nir/spirv/spirv_to_nir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index 91f5c4b5186..cb069b93ae8 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -2167,7 +2167,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvCapabilitySampledImageArrayDynamicIndexing: case SpvCapabilityStorageBufferArrayDynamicIndexing: case SpvCapabilityStorageImageArrayDynamicIndexing: - case SpvCapabilityClipDistance: case SpvCapabilityImageRect: case SpvCapabilitySampledRect: case SpvCapabilitySampled1D: @@ -2177,6 +2176,11 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvCapabilityImageBuffer: case SpvCapabilityImageQuery: break; + case SpvCapabilityClipDistance: + case SpvCapabilityCullDistance: + case SpvCapabilityGeometryStreams: + fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n"); + break; default: assert(!"Unsupported capability"); } -- cgit v1.2.3 From 1024a66fc4ff34a03ecfdf3ec053cb874fb206fe Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 15:23:06 -0800 Subject: anv: Emit 3DSTATE_URB_* via a loop. Rather than keeping separate {vs,hs,ds,gs}_start fields, we now store an array indexed by the shader stage (MESA_SHADER_*). The 3DSTATE_URB_* commands are also sequentially numbered. This makes it easy to just emit them in a loop. This simplifies the code a little, and also will make it easier to add more credible HS and DS code later. --- src/intel/vulkan/anv_pipeline.c | 20 ++++++++++++++------ src/intel/vulkan/anv_private.h | 9 +++------ src/intel/vulkan/genX_pipeline_util.h | 26 +++++++------------------- 3 files changed, 24 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 27872d2769a..df78362ee70 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -870,13 +870,21 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) * - VS * - GS */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; + pipeline->urb.start[MESA_SHADER_VERTEX] = push_constant_chunks; + pipeline->urb.size[MESA_SHADER_VERTEX] = vs_size; + pipeline->urb.entries[MESA_SHADER_VERTEX] = nr_vs_entries; - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; + pipeline->urb.start[MESA_SHADER_GEOMETRY] = push_constant_chunks + vs_chunks; + pipeline->urb.size[MESA_SHADER_GEOMETRY] = gs_size; + pipeline->urb.entries[MESA_SHADER_GEOMETRY] = nr_gs_entries; + + pipeline->urb.start[MESA_SHADER_TESS_CTRL] = push_constant_chunks; + pipeline->urb.size[MESA_SHADER_TESS_CTRL] = 1; + pipeline->urb.entries[MESA_SHADER_TESS_CTRL] = 0; + + pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks; + pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; + pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; } static void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index b1b4d265b89..3e3cbf09a68 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1410,12 +1410,9 @@ struct anv_pipeline { uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { - uint32_t vs_start; - uint32_t vs_size; - uint32_t nr_vs_entries; - uint32_t gs_start; - uint32_t gs_size; - uint32_t nr_gs_entries; + uint32_t start[MESA_SHADER_GEOMETRY + 1]; + uint32_t size[MESA_SHADER_GEOMETRY + 1]; + uint32_t entries[MESA_SHADER_GEOMETRY + 1]; } urb; VkShaderStageFlags active_stages; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 51fbd8bf273..dea96a934b8 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -210,25 +210,13 @@ emit_urb_setup(struct anv_pipeline *pipeline) .ConstantBufferOffset = 8, .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + ._3DCommandSubOpcode = 48 + i, + .VSURBStartingAddress = pipeline->urb.start[i], + .VSURBEntryAllocationSize = pipeline->urb.size[i] - 1, + .VSNumberofURBEntries = pipeline->urb.entries[i]); + } } static inline uint32_t -- cgit v1.2.3 From 7f9b03cc8b44759895d5c4c42cfef8fa78269e7c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 15:46:23 -0800 Subject: anv: Emit 3DSTATE_PUSH_CONSTANT_ALLOC_* via a loop. Now we're emitting HS and DS packets as well. --- src/intel/vulkan/anv_pipeline.c | 6 ++++++ src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/genX_pipeline_util.h | 17 ++++++++--------- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index df78362ee70..672640ac24c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -885,6 +885,12 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks; pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; + + pipeline->urb.push_size[MESA_SHADER_VERTEX] = 4; + pipeline->urb.push_size[MESA_SHADER_TESS_CTRL] = 0; + pipeline->urb.push_size[MESA_SHADER_TESS_EVAL] = 0; + pipeline->urb.push_size[MESA_SHADER_GEOMETRY] = 4; + pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = 4; } static void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3e3cbf09a68..041ad87f75d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1410,6 +1410,7 @@ struct anv_pipeline { uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { + uint8_t push_size[MESA_SHADER_FRAGMENT + 1]; uint32_t start[MESA_SHADER_GEOMETRY + 1]; uint32_t size[MESA_SHADER_GEOMETRY + 1]; uint32_t entries[MESA_SHADER_GEOMETRY + 1]; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index dea96a934b8..cf4e0358741 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -200,15 +200,14 @@ emit_urb_setup(struct anv_pipeline *pipeline) .Address = { &device->workaround_bo, 0 }); #endif - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); + unsigned push_start = 0; + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + ._3DCommandSubOpcode = 18 + i, + .ConstantBufferOffset = push_start, + .ConstantBufferSize = pipeline->urb.push_size[i]); + push_start += pipeline->urb.push_size[i]; + } for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), -- cgit v1.2.3 From 3f115177302d1a969181649fde8c2332563aac73 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 17:26:15 -0800 Subject: anv: Properly size the push constant L3 area. We were assuming it was 32kB everywhere, reducing the available URB space. It's actually 16kB on Ivybridge, Baytrail, and Haswell GT1-2. --- src/intel/vulkan/anv_pipeline.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 672640ac24c..6c8d4add6e8 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -755,8 +755,6 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, return VK_SUCCESS; } -static const int gen8_push_size = 32 * 1024; - static void gen7_compute_urb_partition(struct anv_pipeline *pipeline) { @@ -785,7 +783,14 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; +#if GEN_GEN >= 8 + unsigned push_constant_kb = 32; +#elif GEN_IS_HASWELL + unsigned push_constant_kb = pipeline->device->info.gt == 3 ? 32 : 16; +#else + unsigned push_constant_kb = 16; +#endif + unsigned push_constant_bytes = push_constant_kb * 1024; unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; -- cgit v1.2.3 From 3ecd357d816dc71b2c6ebd6ace38c76ebb25674e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 17:28:22 -0800 Subject: anv: Allocate more push constant space. Previously we allocated 4kB of push constant space for VS, GS, and PS (for a total of 12kB) no matter what. This works, but doesn't fully utilize the space - we have 16kB or 32kB of space. This makes anv use the same method as brw - divide up the space evenly among all active shader stages. This means HS and DS would get space, if those shader stages existed. In the future, we can probably do better by inspecting how many push constants each shader stage uses, and weight things accordingly. But this is strictly better than the old code, and ideally we'd justify a fancier solution with actual performance data. --- src/intel/vulkan/anv_pipeline.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 6c8d4add6e8..92c5c35699c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -891,11 +891,17 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; - pipeline->urb.push_size[MESA_SHADER_VERTEX] = 4; - pipeline->urb.push_size[MESA_SHADER_TESS_CTRL] = 0; - pipeline->urb.push_size[MESA_SHADER_TESS_EVAL] = 0; - pipeline->urb.push_size[MESA_SHADER_GEOMETRY] = 4; - pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = 4; + const unsigned stages = + _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); + const unsigned size_per_stage = push_constant_kb / stages; + + for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { + pipeline->urb.push_size[i] = + (pipeline->active_stages & (1 << i)) ? size_per_stage : 1; + } + + pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = + push_constant_kb - size_per_stage * (stages - 1); } static void -- cgit v1.2.3 From 25c2470b24ce8411f6747eb887137b2511b6d529 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Feb 2016 21:11:48 -0800 Subject: anv: Set max_hs_threads/max_ds_threads --- src/intel/vulkan/anv_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 92c5c35699c..1173b4f0cba 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -420,8 +420,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = 0, - [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads, + [MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads, [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, -- cgit v1.2.3 From c32273d246e8bf46924d8852d1b3fd1d34194df2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Feb 2016 08:52:35 -0800 Subject: anv/device: Properly handle apiVersion == 0 From the Vulkan 1.0 spec section 3.2: "If apiVersion is 0 the implementation must ignore it" --- src/intel/vulkan/anv_device.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 7a5cb234ac5..59930552f59 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -214,9 +214,14 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - uint32_t client_version = pCreateInfo->pApplicationInfo ? - pCreateInfo->pApplicationInfo->apiVersion : - VK_MAKE_VERSION(1, 0, 0); + uint32_t client_version; + if (pCreateInfo->pApplicationInfo && + pCreateInfo->pApplicationInfo->apiVersion != 0) { + client_version = pCreateInfo->pApplicationInfo->apiVersion; + } else { + client_version = VK_MAKE_VERSION(1, 0, 0); + } + if (VK_MAKE_VERSION(1, 0, 0) > client_version || client_version > VK_MAKE_VERSION(1, 0, 3)) { return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, -- cgit v1.2.3 From b7bc52b5b18ac3f2d1e4ce399a701f4d272e7439 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 12:37:04 -0800 Subject: anv/gen8: Emit the 3DSTATE_PS_BLEND packet --- src/intel/vulkan/gen8_pipeline.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index dc15e2066c5..c9545c898f3 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -111,6 +111,7 @@ emit_cb_state(struct anv_pipeline *pipeline, .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, }; + bool has_writeable_rt = false; for (uint32_t i = 0; i < info->attachmentCount; i++) { const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; @@ -140,6 +141,9 @@ emit_cb_state(struct anv_pipeline *pipeline, .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), }; + if (a->colorWriteMask != 0) + has_writeable_rt = true; + /* Our hardware applies the blend factor prior to the blend function * regardless of what function is used. Technically, this means the * hardware can do MORE than GL or Vulkan specify. However, it also @@ -165,6 +169,25 @@ emit_cb_state(struct anv_pipeline *pipeline, blend_state.Entry[i].WriteDisableBlue = true; } + if (info->attachmentCount > 0) { + struct GENX(BLEND_STATE_ENTRY) *bs = &blend_state.Entry[0]; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), + .AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable, + .HasWriteableRT = has_writeable_rt, + .ColorBufferBlendEnable = bs->ColorBufferBlendEnable, + .SourceAlphaBlendFactor = bs->SourceAlphaBlendFactor, + .DestinationAlphaBlendFactor = + bs->DestinationAlphaBlendFactor, + .SourceBlendFactor = bs->SourceBlendFactor, + .DestinationBlendFactor = bs->DestinationBlendFactor, + .AlphaTestEnable = false, + .IndependentAlphaBlendEnable = + blend_state.IndependentAlphaBlendEnable); + } else { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND)); + } + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); if (!device->info.has_llc) anv_state_clflush(pipeline->blend_state); -- cgit v1.2.3 From 6bb6b5c341a5a75763d565565f164b63cff3388a Mon Sep 17 00:00:00 2001 From: Thomas Hindoe Paaboel Andersen Date: Thu, 25 Feb 2016 22:00:09 +0100 Subject: anv: remove stray ; after if Both logic and indentation suggests that the ; were not intended here. Reviewed-by: Matt Turner Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index b060828cf61..827c3ed4142 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -465,7 +465,7 @@ void anv_CmdSetViewport( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); const uint32_t total_count = firstViewport + viewportCount; - if (cmd_buffer->state.dynamic.viewport.count < total_count); + if (cmd_buffer->state.dynamic.viewport.count < total_count) cmd_buffer->state.dynamic.viewport.count = total_count; memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, @@ -483,7 +483,7 @@ void anv_CmdSetScissor( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); const uint32_t total_count = firstScissor + scissorCount; - if (cmd_buffer->state.dynamic.scissor.count < total_count); + if (cmd_buffer->state.dynamic.scissor.count < total_count) cmd_buffer->state.dynamic.scissor.count = total_count; memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, -- cgit v1.2.3 From 265d4c415ca6b41bdd9a4a3e9f311d8f6e69cef1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 24 Feb 2016 12:10:44 -0800 Subject: isl: Fix isl_surf_get_image_intratile_offset_el() Consecutive tiles are separated by the size of the tile, not by the logical tile width. v2: Remove extra subtraction (Ville) Add parenthesis (Jason) v3: Update the unit tests for the function Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl.c | 2 +- .../isl/tests/isl_surf_get_image_offset_test.c | 70 +++++++++++----------- 2 files changed, 36 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index c1d47d5a579..3b6bee10081 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1378,7 +1378,7 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; - uint32_t big_x_offset_B = total_x_offset_B - small_x_offset_B; + uint32_t big_x_offset_B = (total_x_offset_B / tile_info.width) * tile_info.size; *base_address_offset = big_y_offset_B + big_x_offset_B; *x_offset_el = small_x_offset_el; diff --git a/src/intel/isl/tests/isl_surf_get_image_offset_test.c b/src/intel/isl/tests/isl_surf_get_image_offset_test.c index cda8583daeb..34b336e8d81 100644 --- a/src/intel/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/intel/isl/tests/isl_surf_get_image_offset_test.c @@ -191,14 +191,14 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x100000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x100400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x140400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x160400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x170400, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x170400, 0, 16); - t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x170400, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x170400, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x180400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x108000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x148000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x168000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x178000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x178000, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x178000, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x178000, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x188000, 0, 0); } static void @@ -260,41 +260,41 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) /* array layer 0 */ t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x400000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x400800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x500800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x580800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5c0800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5e0800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5e0800, 0, 16); - t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5e0800, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5e0800, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x600800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x410000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x510000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x590000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5d0000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5f0000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5f0000, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5f0000, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5f0000, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x610000, 0, 0); /* array layer 1 */ t_assert_intratile_offset_el(&dev, &surf, 0, 1, 0, 0x600000, 0, 4); t_assert_intratile_offset_el(&dev, &surf, 1, 1, 0, 0xa00000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa00800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb00800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb80800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbc0800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbe0800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbe0800, 0, 20); - t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbe0800, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc00800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa10000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb10000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb90000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbd0000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbf0000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbf0000, 0, 20); + t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbf0000, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc10000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc10000, 0, 4); /* array layer 2 */ t_assert_intratile_offset_el(&dev, &surf, 0, 2, 0, 0xc00000, 0, 8); t_assert_intratile_offset_el(&dev, &surf, 1, 2, 0, 0x1000000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1000800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1100800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1180800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11c0800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11e0800, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11e0800, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1200800, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1200800, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1200800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1010000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1110000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1190000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11d0000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11f0000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11f0000, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1210000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1210000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1210000, 0, 8); /* skip the remaining array layers */ } -- cgit v1.2.3 From 452782f68b3e8d25538fbe65b942c0af7c3bb147 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 15:51:36 -0800 Subject: gen/genX_pack: Add genxml to the pack header path If you have an out-of-tree build, gen8_pack.h and friends will not be in the same folder as genX_pack.h so this will be a problem. We fixed out-of-tree earlier by adding the genxml folder to the includes for the vulkan driver. However, this is not a good long-term solution because we want to use it in ISL as well. --- src/intel/genxml/genX_pack.h | 8 ++++---- src/intel/vulkan/Makefile.am | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/genX_pack.h b/src/intel/genxml/genX_pack.h index 69fc340762b..7967c292645 100644 --- a/src/intel/genxml/genX_pack.h +++ b/src/intel/genxml/genX_pack.h @@ -28,13 +28,13 @@ #endif #if (GEN_VERSIONx10 == 70) -# include "gen7_pack.h" +# include "genxml/gen7_pack.h" #elif (GEN_VERSIONx10 == 75) -# include "gen75_pack.h" +# include "genxml/gen75_pack.h" #elif (GEN_VERSIONx10 == 80) -# include "gen8_pack.h" +# include "genxml/gen8_pack.h" #elif (GEN_VERSIONx10 == 90) -# include "gen9_pack.h" +# include "genxml/gen9_pack.h" #else # error "Need to add a pack header include for this gen" #endif diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 53cfa20a263..6be4f9fb427 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -66,7 +66,6 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/intel \ - -I$(top_builddir)/src/intel/genxml \ -I$(top_builddir)/src/vulkan libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From 82d2db80bb0b7222063775fec7191b8da0392e9d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 16:11:33 -0800 Subject: genxml: Add MOCS fields to RENDER_SURFACE_STATE This allows us to set MOCS as a single uint32_t on all platforms. --- src/intel/genxml/gen7.xml | 1 + src/intel/genxml/gen75.xml | 1 + src/intel/genxml/gen8.xml | 1 + src/intel/genxml/gen9.xml | 1 + 4 files changed, 4 insertions(+) (limited to 'src') diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index 8faffcbf876..7e0fce0f04f 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -370,6 +370,7 @@ + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index c1cbce9fdb6..f42c026d2f7 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -381,6 +381,7 @@ + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index fff6ae2ed42..9a52fbaa330 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -292,6 +292,7 @@ + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 3f229b0d0fc..2b73f5f2a39 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -298,6 +298,7 @@ + -- cgit v1.2.3 From 6b06072ba846f4adcdf9472dc2ffa1f91aca2c9d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:22:07 -0800 Subject: isl: Create per-gen helper libraries for gens 7, 8, and 9 --- src/intel/isl/Makefile.am | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am index 72f5460554f..74e8ab04be5 100644 --- a/src/intel/isl/Makefile.am +++ b/src/intel/isl/Makefile.am @@ -21,7 +21,14 @@ SUBDIRS = . -noinst_LTLIBRARIES = libisl.la + +ISL_GEN_LIBS = \ + libisl-gen7.la \ + libisl-gen8.la \ + libisl-gen9.la \ + $(NULL) + +noinst_LTLIBRARIES = $(ISL_GEN_LIBS) libisl.la EXTRA_DIST = tests @@ -42,6 +49,8 @@ AM_CPPFLAGS = \ libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init +libisl_la_LIBADD = $(ISL_GEN_LIBS) + libisl_la_SOURCES = \ isl.c \ isl.h \ @@ -51,14 +60,26 @@ libisl_la_SOURCES = \ isl_gen4.h \ isl_gen6.c \ isl_gen6.h \ + isl_image.c \ + $(NULL) + +libisl_gen7_la_SOURCES = \ isl_gen7.c \ isl_gen7.h \ + $(NULL) +libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) + +libisl_gen8_la_SOURCES = \ isl_gen8.c \ isl_gen8.h \ + $(NULL) +libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) + +libisl_gen9_la_SOURCES = \ isl_gen9.c \ isl_gen9.h \ - isl_image.c \ $(NULL) +libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) BUILT_SOURCES = \ isl_format_layout.c -- cgit v1.2.3 From d798762cdbe16b094efba7ced798752daf2e6efd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:32:43 -0800 Subject: isl: Add a function for filling out a surface state --- src/intel/isl/Makefile.am | 19 +- src/intel/isl/isl.c | 42 ++++ src/intel/isl/isl.h | 102 ++++++++++ src/intel/isl/isl_priv.h | 14 ++ src/intel/isl/isl_surface_state.c | 394 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 567 insertions(+), 4 deletions(-) create mode 100644 src/intel/isl/isl_surface_state.c (limited to 'src') diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am index 74e8ab04be5..b4af3c86f3d 100644 --- a/src/intel/isl/Makefile.am +++ b/src/intel/isl/Makefile.am @@ -24,6 +24,7 @@ SUBDIRS = . ISL_GEN_LIBS = \ libisl-gen7.la \ + libisl-gen75.la \ libisl-gen8.la \ libisl-gen9.la \ $(NULL) @@ -39,13 +40,15 @@ AM_CPPFLAGS = \ $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/intel \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ - -I$(top_builddir)/src + -I$(top_builddir)/src \ + -I$(top_builddir)/src/intel libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init @@ -66,20 +69,28 @@ libisl_la_SOURCES = \ libisl_gen7_la_SOURCES = \ isl_gen7.c \ isl_gen7.h \ + isl_surface_state.c \ $(NULL) -libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) +libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70 + +libisl_gen75_la_SOURCES = \ + isl_surface_state.c \ + $(NULL) +libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75 libisl_gen8_la_SOURCES = \ isl_gen8.c \ isl_gen8.h \ + isl_surface_state.c \ $(NULL) -libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) +libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80 libisl_gen9_la_SOURCES = \ isl_gen9.c \ isl_gen9.h \ + isl_surface_state.c \ $(NULL) -libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) +libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90 BUILT_SOURCES = \ isl_format_layout.c diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 3b6bee10081..f7f276f16df 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1161,6 +1161,48 @@ isl_surf_get_tile_info(const struct isl_device *dev, isl_tiling_get_info(dev, surf->tiling, fmtl->bs, tile_info); } +void +isl_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info) +{ +#ifndef NDEBUG + isl_surf_usage_flags_t _base_usage = + info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | + ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_STORAGE_BIT); + /* They may only specify one of the above bits at a time */ + assert(__builtin_popcount(_base_usage) == 1); + /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */ + assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage); +#endif + + if (info->surf->dim == ISL_SURF_DIM_3D) { + assert(info->view->base_array_layer + info->view->array_len <= + info->surf->logical_level0_px.depth); + } else { + assert(info->view->base_array_layer + info->view->array_len <= + info->surf->logical_level0_px.array_len); + } + + switch (ISL_DEV_GEN(dev)) { + case 7: + if (ISL_DEV_IS_HASWELL(dev)) { + isl_gen75_surf_fill_state_s(dev, state, info); + } else { + isl_gen7_surf_fill_state_s(dev, state, info); + } + break; + case 8: + isl_gen8_surf_fill_state_s(dev, state, info); + break; + case 9: + isl_gen9_surf_fill_state_s(dev, state, info); + break; + default: + assert(!"Cannot fill surface state for this gen"); + } +} + /** * A variant of isl_surf_get_image_offset_sa() specific to * ISL_DIM_LAYOUT_GEN4_2D. diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 3e0ff935948..dbd480c51ce 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -60,6 +60,16 @@ struct brw_image_param; #define ISL_DEV_GEN(__dev) ((__dev)->info->gen) #endif +#ifndef ISL_DEV_IS_HASWELL +/** + * @brief Get the hardware generation of isl_device. + * + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_GEN(dev)=9 ...`. + */ +#define ISL_DEV_IS_HASWELL(__dev) ((__dev)->info->is_haswell) +#endif + #ifndef ISL_DEV_USE_SEPARATE_STENCIL /** * You can define this as a compile-time constant in the CFLAGS. For example, @@ -455,8 +465,21 @@ typedef uint64_t isl_surf_usage_flags_t; #define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) #define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) #define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) +#define ISL_SURF_USAGE_STORAGE_BIT (1u << 12) /** @} */ +/** + * @brief A channel select (also known as texture swizzle) value + */ +enum isl_channel_select { + ISL_CHANNEL_SELECT_ZERO = 0, + ISL_CHANNEL_SELECT_ONE = 1, + ISL_CHANNEL_SELECT_RED = 4, + ISL_CHANNEL_SELECT_GREEN = 5, + ISL_CHANNEL_SELECT_BLUE = 6, + ISL_CHANNEL_SELECT_ALPHA = 7, +}; + /** * Identical to VkSampleCountFlagBits. */ @@ -695,6 +718,77 @@ struct isl_surf { isl_surf_usage_flags_t usage; }; +struct isl_view { + /** + * Indicates the usage of the particular view + * + * Normally, this is one bit. However, for a cube map texture, it + * should be ISL_SURF_USAGE_TEXTURE_BIT | ISL_SURF_USAGE_CUBE_BIT. + */ + isl_surf_usage_flags_t usage; + + /** + * The format to use in the view + * + * This may differ from the format of the actual isl_surf but must have + * the same block size. + */ + enum isl_format format; + + uint32_t base_level; + uint32_t levels; + + /** + * Base array layer + * + * For cube maps, both base_array_layer and array_len should be + * specified in terms of 2-D layers and must be a multiple of 6. + */ + uint32_t base_array_layer; + uint32_t array_len; + + enum isl_channel_select channel_select[4]; +}; + +union isl_color_value { + float f32[4]; + uint32_t u32[4]; + int32_t i32[4]; +}; + +struct isl_surf_fill_state_info { + const struct isl_surf *surf; + const struct isl_view *view; + + /** + * The address of the surface in GPU memory. + */ + uint64_t address; + + /** + * The Memory Object Control state for the filled surface state. + * + * The exact format of this value depends on hardware generation. + */ + uint32_t mocs; + + /** + * This allows the caller to over-ride the dimensions of the surface. + * This is used at the moment for compressed surfaces to let us hack + * around the fact that we can't actually render to them. + * + * FIXME: We really need to get rid of this. It's a lie. + */ + struct isl_extent4d level0_extent_px; + + /** + * The clear color for this surface + * + * Valid values depend on hardware generation. + */ + union isl_color_value clear_color; +}; + extern const struct isl_format_layout isl_format_layouts[]; void @@ -889,6 +983,14 @@ isl_surf_get_tile_info(const struct isl_device *dev, const struct isl_surf *surf, struct isl_tile_info *tile_info); +#define isl_surf_fill_state(dev, state, ...) \ + isl_surf_fill_state_s((dev), (state), \ + &(struct isl_surf_fill_state_info) { __VA_ARGS__ }); + +void +isl_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); + /** * Alignment of the upper-left sample of each subimage, in units of surface * elements. diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h index bca8503b4fd..d3f08094df9 100644 --- a/src/intel/isl/isl_priv.h +++ b/src/intel/isl/isl_priv.h @@ -138,3 +138,17 @@ isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) .d = extent_el.d * fmtl->bd, }; } + +void +isl_gen7_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); + +void +isl_gen75_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); +void +isl_gen8_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); +void +isl_gen9_surf_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c new file mode 100644 index 00000000000..fe3c083c574 --- /dev/null +++ b/src/intel/isl/isl_surface_state.c @@ -0,0 +1,394 @@ +/* + * Copyright 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#define __gen_address_type uint64_t +#define __gen_user_data void + +static inline uint64_t +__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta) +{ + return addr + delta; +} + +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" + +#include "isl_priv.h" + +#define __PASTE2(x, y) x ## y +#define __PASTE(x, y) __PASTE2(x, y) +#define isl_genX(x) __PASTE(isl_, genX(x)) + +#if GEN_GEN >= 8 +static const uint8_t isl_to_gen_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t isl_to_gen_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; +#else +static const uint8_t isl_to_gen_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t isl_to_gen_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; +#endif + +#if GEN_GEN >= 8 +static const uint8_t isl_to_gen_tiling[] = { + [ISL_TILING_LINEAR] = LINEAR, + [ISL_TILING_X] = XMAJOR, + [ISL_TILING_Y0] = YMAJOR, + [ISL_TILING_Yf] = YMAJOR, + [ISL_TILING_Ys] = YMAJOR, + [ISL_TILING_W] = WMAJOR, +}; +#endif + +#if GEN_GEN >= 8 +static const uint32_t isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSS, +}; +#else +static const uint32_t isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSFMT_MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = MSFMT_DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSFMT_MSS, +}; +#endif + +static const uint8_t +get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage) +{ + switch (dim) { + default: + unreachable("bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + assert(!(usage & ISL_SURF_USAGE_CUBE_BIT)); + return SURFTYPE_1D; + case ISL_SURF_DIM_2D: + if (usage & ISL_SURF_USAGE_STORAGE_BIT) { + /* Storage images are always plain 2-D, not cube */ + return SURFTYPE_2D; + } else if (usage & ISL_SURF_USAGE_CUBE_BIT) { + return SURFTYPE_CUBE; + } else { + return SURFTYPE_2D; + } + case ISL_SURF_DIM_3D: + assert(!(usage & ISL_SURF_USAGE_CUBE_BIT)); + return SURFTYPE_3D; + } +} + +/** + * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment + * and SurfaceVerticalAlignment. + */ +static void +get_halign_valign(const struct isl_surf *surf, + uint32_t *halign, uint32_t *valign) +{ + if (GEN_GEN >= 9) { + if (isl_tiling_is_std_y(surf->tiling) || + surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* The hardware ignores the alignment values. Anyway, the surface's + * true alignment is likely outside the enum range of HALIGN* and + * VALIGN*. + */ + *halign = 0; + *valign = 0; + } else { + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = isl_to_gen_halign[image_align_el.width]; + *valign = isl_to_gen_valign[image_align_el.height]; + } + } else { + /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in + * units of surface samples. For example, if SurfaceVerticalAlignment + * is VALIGN_4 and the surface is singlesampled, then for any surface + * format (compressed or not) the vertical alignment is + * 4 pixels. + */ + struct isl_extent3d image_align_sa + = isl_surf_get_image_alignment_sa(surf); + + *halign = isl_to_gen_halign[image_align_sa.width]; + *valign = isl_to_gen_valign[image_align_sa.height]; + } +} + +#if GEN_GEN >= 8 +static uint32_t +get_qpitch(const struct isl_surf *surf) +{ + switch (surf->dim) { + default: + assert(!"Bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + if (GEN_GEN >= 9) { + /* QPitch is usually expressed as rows of surface elements (where + * a surface element is an compression block or a single surface + * sample). Skylake 1D is an outlier. + * + * From the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> 1D Surfaces: + * + * Surface QPitch specifies the distance in pixels between array + * slices. + */ + return isl_surf_get_array_pitch_el(surf); + } else { + return isl_surf_get_array_pitch_el_rows(surf); + } + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + if (GEN_GEN >= 9) { + return isl_surf_get_array_pitch_el_rows(surf); + } else { + /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch + * + * "This field must be set to an integer multiple of the Surface + * Vertical Alignment. For compressed textures (BC*, FXT1, + * ETC*, and EAC* Surface Formats), this field is in units of + * rows in the uncompressed surface, and must be set to an + * integer multiple of the vertical alignment parameter "j" + * defined in the Common Surface Formats section." + */ + return isl_surf_get_array_pitch_sa_rows(surf); + } + } +} +#endif /* GEN_GEN >= 8 */ + +void +isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info) +{ + uint32_t halign, valign; + get_halign_valign(info->surf, &halign, &valign); + + struct GENX(RENDER_SURFACE_STATE) s = { + .SurfaceType = get_surftype(info->surf->dim, info->view->usage), + .SurfaceArray = info->surf->phys_level0_sa.array_len > 1, + .SurfaceVerticalAlignment = valign, + .SurfaceHorizontalAlignment = halign, + +#if GEN_GEN >= 8 + .TileMode = isl_to_gen_tiling[info->surf->tiling], +#else + .TiledSurface = info->surf->tiling != ISL_TILING_LINEAR, + .TileWalk = info->surf->tiling == ISL_TILING_X ? TILEWALK_XMAJOR : + TILEWALK_YMAJOR, +#endif + + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + +#if GEN_GEN >= 8 + .SamplerL2BypassModeDisable = true, +#endif + +#if GEN_GEN >= 8 + .RenderCacheReadWriteMode = WriteOnlyCache, +#else + .RenderCacheReadWriteMode = 0, +#endif + +#if GEN_GEN >= 8 + .CubeFaceEnablePositiveZ = 1, + .CubeFaceEnableNegativeZ = 1, + .CubeFaceEnablePositiveY = 1, + .CubeFaceEnableNegativeY = 1, + .CubeFaceEnablePositiveX = 1, + .CubeFaceEnableNegativeX = 1, +#else + .CubeFaceEnables = 0x3f, +#endif + +#if GEN_GEN >= 8 + .SurfaceQPitch = get_qpitch(info->surf) >> 2, +#endif + + .Width = info->level0_extent_px.width - 1, + .Height = info->level0_extent_px.height - 1, + .Depth = 0, /* TEMPLATE */ + + .SurfacePitch = info->surf->row_pitch - 1, + .RenderTargetViewExtent = 0, /* TEMPLATE */ + .MinimumArrayElement = 0, /* TEMPLATE */ + + .MultisampledSurfaceStorageFormat = + isl_to_gen_multisample_layout[info->surf->msaa_layout], + .NumberofMultisamples = ffs(info->surf->samples) - 1, + .MultisamplePositionPaletteIndex = 0, /* UNUSED */ + + .XOffset = 0, + .YOffset = 0, + + .ResourceMinLOD = 0.0, + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + +#if (GEN_GEN >= 8 || GEN_IS_HASWELL) + .ShaderChannelSelectRed = info->view->channel_select[0], + .ShaderChannelSelectGreen = info->view->channel_select[1], + .ShaderChannelSelectBlue = info->view->channel_select[2], + .ShaderChannelSelectAlpha = info->view->channel_select[3], +#endif + + .SurfaceBaseAddress = info->address, + .MOCS = info->mocs, + +#if GEN_GEN >= 8 + .AuxiliarySurfaceMode = AUX_NONE, +#else + .MCSEnable = false, +#endif + }; + + if (info->view->usage & ISL_SURF_USAGE_STORAGE_BIT) { + s.SurfaceFormat = isl_lower_storage_image_format(dev, info->view->format); + } else { + s.SurfaceFormat = info->view->format; + } + + switch (s.SurfaceType) { + case SURFTYPE_1D: + case SURFTYPE_2D: + s.MinimumArrayElement = info->view->base_array_layer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + * + * In other words, 'Depth' is the number of array layers. + */ + s.Depth = info->view->array_len - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + s.RenderTargetViewExtent = s.Depth; + break; + case SURFTYPE_CUBE: + s.MinimumArrayElement = info->view->base_array_layer; + /* Same as SURFTYPE_2D, but divided by 6 */ + s.Depth = info->view->array_len / 6 - 1; + s.RenderTargetViewExtent = s.Depth; + break; + case SURFTYPE_3D: + s.MinimumArrayElement = info->view->base_array_layer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ + s.Depth = info->level0_extent_px.depth - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + s.RenderTargetViewExtent = info->level0_extent_px.depth - 1; + break; + default: + unreachable(!"bad SurfaceType"); + } + + if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + s.MIPCountLOD = info->view->base_level; + s.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + s.SurfaceMinLOD = info->view->base_level; + s.MIPCountLOD = MAX(info->view->levels, 1) - 1; + } + +#if 0 + if (GEN_GEN == 8) { + if (isl_format_is_integer(info->view->format)) { + for (unsigned i = 0; i < 4; i++) { + assert(info->clear_color.u32[i] == 0 || + info->clear_color.u32[i] == 1); + } + } else { + for (unsigned i = 0; i < 4; i++) { + assert(info->clear_color.f32[i] == 0.0f || + info->clear_color.f32[i] == 1.0f); + } + } + s.RedClearColor = info->clear_color.u32[0] != 0, + s.GreenClearColor = info->clear_color.u32[1] != 0, + s.BlueClearColor = info->clear_color.u32[2] != 0, + s.AlphaClearColor = info->clear_color.u32[3] != 0, + } else { + .RedClearColor = info->clear_color.u32[0], + .GreenClearColor = info->clear_color.u32[1], + .BlueClearColor = info->clear_color.u32[2], + .AlphaClearColor = info->clear_color.u32[3], + } +#endif + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s); +} -- cgit v1.2.3 From 4a9b805ce55b495576627465b7cca034b468653a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 16:22:47 -0800 Subject: anv/device: Store the default MOCS in the device --- src/intel/vulkan/anv_private.h | 16 +++++++++------- src/intel/vulkan/gen7_state.c | 3 +++ src/intel/vulkan/gen8_state.c | 3 +++ 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 041ad87f75d..2f3a6597ac2 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -680,6 +680,8 @@ struct anv_device { struct anv_block_pool scratch_block_pool; + uint32_t default_mocs; + pthread_mutex_t mutex; }; @@ -859,19 +861,19 @@ __gen_combine_address(struct anv_batch *batch, void *location, .L3CacheabilityControlL3CC = 1, \ } -#define GEN8_MOCS { \ - .MemoryTypeLLCeLLCCacheabilityControl = WB, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ +#define GEN8_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ } /* Skylake: MOCS is now an index into an array of 62 different caching * configurations programmed by the kernel. */ -#define GEN9_MOCS { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ +#define GEN9_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ } #define GEN9_MOCS_PTE { \ diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index 5323c378d02..48c41faf57f 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -37,6 +37,9 @@ VkResult genX(init_device_state)(struct anv_device *device) { + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, + &GENX(MOCS)); + struct anv_batch batch; uint32_t cmds[64]; diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 2686bfa8f3c..6226eba43ec 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -37,6 +37,9 @@ VkResult genX(init_device_state)(struct anv_device *device) { + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, + &GENX(MOCS)); + struct anv_batch batch; uint32_t cmds[64]; -- cgit v1.2.3 From ded57c3ccaf79d139ec64ce6711f4de0fea681c7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 11:45:50 -0800 Subject: anv: Use ISL to fill out surface states --- src/intel/vulkan/anv_image.c | 150 +++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 71 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 0a412a3f8c6..11ceea38829 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -427,37 +427,6 @@ anv_validate_CreateImageView(VkDevice _device, return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); } -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - else - gen7_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 8: - gen8_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 9: - gen9_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - default: - unreachable("unsupported gen\n"); - } - - if (!device->info.has_llc) - anv_state_clflush(state); -} - static struct anv_state alloc_surface_state(struct anv_device *device, struct anv_cmd_buffer *cmd_buffer) @@ -479,7 +448,7 @@ has_matching_storage_typed_format(const struct anv_device *device, device->info.gen >= 9); } -static VkComponentSwizzle +static enum isl_channel_select remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, struct anv_format_swizzle format_swizzle) { @@ -488,17 +457,17 @@ remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, switch (swizzle) { case VK_COMPONENT_SWIZZLE_ZERO: - return VK_COMPONENT_SWIZZLE_ZERO; + return ISL_CHANNEL_SELECT_ZERO; case VK_COMPONENT_SWIZZLE_ONE: - return VK_COMPONENT_SWIZZLE_ONE; + return ISL_CHANNEL_SELECT_ONE; case VK_COMPONENT_SWIZZLE_R: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + return ISL_CHANNEL_SELECT_RED + format_swizzle.r; case VK_COMPONENT_SWIZZLE_G: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + return ISL_CHANNEL_SELECT_RED + format_swizzle.g; case VK_COMPONENT_SWIZZLE_B: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + return ISL_CHANNEL_SELECT_RED + format_swizzle.b; case VK_COMPONENT_SWIZZLE_A: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + return ISL_CHANNEL_SELECT_RED + format_swizzle.a; default: unreachable("Invalid swizzle"); } @@ -513,8 +482,6 @@ anv_image_view_init(struct anv_image_view *iview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - VkImageViewCreateInfo mCreateInfo; - memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); assert(range->layerCount > 0); assert(range->baseMipLevel < image->levels); @@ -549,18 +516,30 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_format_swizzle swizzle; iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, image->tiling, &swizzle); - iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R, swizzle); - iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G, swizzle); - iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B, swizzle); - iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A, swizzle); iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; + struct isl_view isl_view = { + .format = iview->format, + .base_level = range->baseMipLevel, + .levels = range->levelCount, + .base_array_layer = range->baseArrayLayer, + .array_len = range->layerCount, + .channel_select = { + remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R, swizzle), + remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G, swizzle), + remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B, swizzle), + remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A, swizzle), + }, + }; + + struct isl_extent4d level0_extent_px; + if (!isl_format_is_compressed(iview->format) && isl_format_is_compressed(image->format->isl_format)) { /* Scale the ImageView extent by the backing Image. This is used @@ -570,31 +549,46 @@ anv_image_view_init(struct anv_image_view *iview, */ const struct isl_format_layout * isl_layout = image->format->isl_layout; - iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); - iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + level0_extent_px.depth = anv_minify(image->extent.depth, range->baseMipLevel); + level0_extent_px.depth = DIV_ROUND_UP(level0_extent_px.depth, isl_layout->bd); - iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; - iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); - mCreateInfo.subresourceRange.baseMipLevel = 0; - mCreateInfo.subresourceRange.baseArrayLayer = 0; + level0_extent_px.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; + level0_extent_px.width = isl_surf_get_row_pitch_el(&surface->isl); + isl_view.base_level = 0; + isl_view.base_array_layer = 0; } else { - iview->level_0_extent.width = image->extent.width; - iview->level_0_extent.height = image->extent.height; - iview->level_0_extent.depth = image->extent.depth; + level0_extent_px.width = image->extent.width; + level0_extent_px.height = image->extent.height; + level0_extent_px.depth = image->extent.depth; } iview->extent = (VkExtent3D) { - .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), - .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), - .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), + .width = anv_minify(image->extent.width , range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth , range->baseMipLevel), }; + isl_surf_usage_flags_t cube_usage; + if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE || + pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { + cube_usage = ISL_SURF_USAGE_CUBE_BIT; + } else { + cube_usage = 0; + } + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->sampler_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); + isl_view.usage = cube_usage | ISL_SURF_USAGE_TEXTURE_BIT; + isl_surf_fill_state(&device->isl_dev, + iview->sampler_surface_state.map, + .surf = &surface->isl, + .view = &isl_view, + .mocs = device->default_mocs, + .level0_extent_px = level0_extent_px); + + if (!device->info.has_llc) + anv_state_clflush(iview->sampler_surface_state); } else { iview->sampler_surface_state.alloc_size = 0; } @@ -602,9 +596,16 @@ anv_image_view_init(struct anv_image_view *iview, if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->color_rt_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + isl_view.usage = cube_usage | ISL_SURF_USAGE_RENDER_TARGET_BIT; + isl_surf_fill_state(&device->isl_dev, + iview->color_rt_surface_state.map, + .surf = &surface->isl, + .view = &isl_view, + .mocs = device->default_mocs, + .level0_extent_px = level0_extent_px); + + if (!device->info.has_llc) + anv_state_clflush(iview->color_rt_surface_state); } else { iview->color_rt_surface_state.alloc_size = 0; } @@ -612,16 +613,23 @@ anv_image_view_init(struct anv_image_view *iview, if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - if (has_matching_storage_typed_format(device, iview->format)) - anv_fill_image_surface_state(device, iview->storage_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - else + if (has_matching_storage_typed_format(device, iview->format)) { + isl_view.usage = cube_usage | ISL_SURF_USAGE_STORAGE_BIT; + isl_surf_fill_state(&device->isl_dev, + iview->storage_surface_state.map, + .surf = &surface->isl, + .view = &isl_view, + .mocs = device->default_mocs, + .level0_extent_px = level0_extent_px); + } else { anv_fill_buffer_surface_state(device, iview->storage_surface_state, ISL_FORMAT_RAW, iview->offset, iview->bo->size - iview->offset, 1); + } + if (!device->info.has_llc) + anv_state_clflush(iview->storage_surface_state); } else { iview->storage_surface_state.alloc_size = 0; } -- cgit v1.2.3 From b70a8d40fa1bdb21376b96534c846ba8c1c82878 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 21:39:46 -0800 Subject: anv/state: Remove unused fill_surface_state functions --- src/intel/vulkan/gen7_state.c | 111 ---------------- src/intel/vulkan/gen8_state.c | 261 ------------------------------------- src/intel/vulkan/genX_state_util.h | 47 ------- 3 files changed, 419 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index 48c41faf57f..ac5b5ed21cd 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -154,114 +154,3 @@ VkResult genX(CreateSampler)( return VK_SUCCESS; } - -static const uint8_t anv_halign[] = { - [4] = HALIGN_4, - [8] = HALIGN_8, -}; - -static const uint8_t anv_valign[] = { - [2] = VALIGN_2, - [4] = VALIGN_4, -}; - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - uint32_t depth = 1; - if (range->layerCount > 1) { - depth = range->layerCount; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(&surface->isl); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, - usage == VK_IMAGE_USAGE_STORAGE_BIT), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], - .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], - - /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if - * Tiled Surface is False." - */ - .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, - .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? - TILEWALK_YMAJOR : TILEWALK_XMAJOR, - - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - - .RenderCacheReadWriteMode = 0, /* TEMPLATE */ - - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->isl.row_pitch - 1, - .MinimumArrayElement = range->baseArrayLayer, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - .SurfaceObjectControlState = GENX(MOCS), - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .MCSEnable = false, -# if (GEN_IS_HASWELL) - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], -# else /* XXX: Seriously? */ - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, -# endif - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 6226eba43ec..1ecd34058d9 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -139,13 +139,6 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -static const uint32_t -isl_to_gen_multisample_layout[] = { - [ISL_MSAA_LAYOUT_NONE] = MSS, - [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, - [ISL_MSAA_LAYOUT_ARRAY] = MSS, -}; - void genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) @@ -178,260 +171,6 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); } -static const uint8_t anv_halign[] = { - [4] = HALIGN4, - [8] = HALIGN8, - [16] = HALIGN16, -}; - -static const uint8_t anv_valign[] = { - [4] = VALIGN4, - [8] = VALIGN8, - [16] = VALIGN16, -}; - -/** - * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment - * and SurfaceVerticalAlignment. - */ -static void -get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) -{ - #if GEN_GEN >= 9 - if (isl_tiling_is_std_y(surf->tiling) || - surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { - /* The hardware ignores the alignment values. Anyway, the surface's - * true alignment is likely outside the enum range of HALIGN* and - * VALIGN*. - */ - *halign = 0; - *valign = 0; - } else { - /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units - * of surface elements (not pixels nor samples). For compressed formats, - * a "surface element" is defined as a compression block. For example, - * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 - * format (ETC2 has a block height of 4), then the vertical alignment is - * 4 compression blocks or, equivalently, 16 pixels. - */ - struct isl_extent3d image_align_el - = isl_surf_get_image_alignment_el(surf); - - *halign = anv_halign[image_align_el.width]; - *valign = anv_valign[image_align_el.height]; - } - #else - /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in - * units of surface samples. For example, if SurfaceVerticalAlignment - * is VALIGN_4 and the surface is singlesampled, then for any surface - * format (compressed or not) the vertical alignment is - * 4 pixels. - */ - struct isl_extent3d image_align_sa - = isl_surf_get_image_alignment_sa(surf); - - *halign = anv_halign[image_align_sa.width]; - *valign = anv_valign[image_align_sa.height]; - #endif -} - -static uint32_t -get_qpitch(const struct isl_surf *surf) -{ - switch (surf->dim) { - default: - unreachable(!"bad isl_surf_dim"); - case ISL_SURF_DIM_1D: - #if GEN_GEN >= 9 - /* QPitch is usually expressed as rows of surface elements (where - * a surface element is an compression block or a single surface - * sample). Skylake 1D is an outlier. - * - * From the Skylake BSpec >> Memory Views >> Common Surface - * Formats >> Surface Layout and Tiling >> 1D Surfaces: - * - * Surface QPitch specifies the distance in pixels between array - * slices. - */ - return isl_surf_get_array_pitch_el(surf); - #else - return isl_surf_get_array_pitch_el_rows(surf); - #endif - case ISL_SURF_DIM_2D: - case ISL_SURF_DIM_3D: - #if GEN_GEN >= 9 - return isl_surf_get_array_pitch_el_rows(surf); - #else - /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch - * - * "This field must be set to an integer multiple of the Surface - * Vertical Alignment. For compressed textures (BC*, FXT1, - * ETC*, and EAC* Surface Formats), this field is in units of - * rows in the uncompressed surface, and must be set to an - * integer multiple of the vertical alignment parameter "j" - * defined in the Common Surface Formats section." - */ - return isl_surf_get_array_pitch_sa_rows(surf); - #endif - } -} - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - static const uint8_t isl_to_gen_tiling[] = { - [ISL_TILING_LINEAR] = LINEAR, - [ISL_TILING_X] = XMAJOR, - [ISL_TILING_Y0] = YMAJOR, - [ISL_TILING_Yf] = YMAJOR, - [ISL_TILING_Ys] = YMAJOR, - [ISL_TILING_W] = WMAJOR, - }; - - uint32_t halign, valign; - get_halign_valign(&surface->isl, &halign, &valign); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = valign, - .SurfaceHorizontalAlignment = halign, - .TileMode = isl_to_gen_tiling[surface->isl.tiling], - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .CubeFaceEnablePositiveZ = 1, - .CubeFaceEnableNegativeZ = 1, - .CubeFaceEnablePositiveY = 1, - .CubeFaceEnableNegativeY = 1, - .CubeFaceEnablePositiveX = 1, - .CubeFaceEnableNegativeX = 1, - .MemoryObjectControlState = GENX(MOCS), - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, - .Height = iview->level_0_extent.height - 1, - .Width = iview->level_0_extent.width - 1, - .Depth = 0, /* TEMPLATE */ - .SurfacePitch = surface->isl.row_pitch - 1, - .RenderTargetViewExtent = 0, /* TEMPLATE */ - .MinimumArrayElement = 0, /* TEMPLATE */ - .MultisampledSurfaceStorageFormat = - isl_to_gen_multisample_layout[surface->isl.msaa_layout], - .NumberofMultisamples = ffs(surface->isl.samples) - 1, - .MultisamplePositionPaletteIndex = 0, /* UNUSED */ - .XOffset = 0, - .YOffset = 0, - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - switch (template.SurfaceType) { - case SURFTYPE_1D: - case SURFTYPE_2D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced - * by one for each increase from zero of Minimum Array Element. For - * example, if Minimum Array Element is set to 1024 on a 2D surface, - * the range of this field is reduced to [0,1023]. - * - * In other words, 'Depth' is the number of array layers. - */ - template.Depth = range->layerCount - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 1D and 2D Surfaces: - * This field must be set to the same value as the Depth field. - */ - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_CUBE: - template.MinimumArrayElement = range->baseArrayLayer; - /* Same as SURFTYPE_2D, but divided by 6 */ - template.Depth = range->layerCount / 6 - 1; - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_3D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * If the volume texture is MIP-mapped, this field specifies the - * depth of the base MIP level. - */ - template.Depth = image->extent.depth - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 3D Surfaces: This field - * indicates the extent of the accessible 'R' coordinates minus 1 on - * the LOD currently being rendered to. - */ - template.RenderTargetViewExtent = iview->extent.depth - 1; - break; - default: - unreachable(!"bad SurfaceType"); - } - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h index 10b3a9f42c5..aabcea9c183 100644 --- a/src/intel/vulkan/genX_state_util.h +++ b/src/intel/vulkan/genX_state_util.h @@ -21,53 +21,6 @@ * IN THE SOFTWARE. */ -static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type, - bool storage) -{ - switch (view_type) { - default: - unreachable("bad VkImageViewType"); - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_1D); - return SURFTYPE_1D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return storage ? SURFTYPE_2D : SURFTYPE_CUBE; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_2D; - case VK_IMAGE_VIEW_TYPE_3D: - assert(image->type == VK_IMAGE_TYPE_3D); - return SURFTYPE_3D; - } -} - -static enum isl_format -anv_surface_format(const struct anv_device *device, enum isl_format format, - bool storage) -{ - if (storage) { - return isl_lower_storage_image_format(&device->isl_dev, format); - } else { - return format; - } -} - -#if GEN_GEN > 7 || GEN_IS_HASWELL -static const uint32_t vk_to_gen_swizzle[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, - [VK_COMPONENT_SWIZZLE_R] = SCS_RED, - [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, - [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, - [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA -}; -#endif - static inline uint32_t vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) { -- cgit v1.2.3 From 9d5b8f7709d7cce1493cc0b38c750ad1173f7327 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 21:40:25 -0800 Subject: anv: Remove unneeded fiels from anv_image_view --- src/intel/vulkan/anv_image.c | 11 ++++++----- src/intel/vulkan/anv_private.h | 3 --- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 11ceea38829..145db6de039 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -514,14 +514,15 @@ anv_image_view_init(struct anv_image_view *iview, iview->vk_format = pCreateInfo->format; struct anv_format_swizzle swizzle; - iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, - image->tiling, &swizzle); + enum isl_format format = anv_get_isl_format(pCreateInfo->format, + range->aspectMask, + image->tiling, &swizzle); iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; struct isl_view isl_view = { - .format = iview->format, + .format = format, .base_level = range->baseMipLevel, .levels = range->levelCount, .base_array_layer = range->baseArrayLayer, @@ -540,7 +541,7 @@ anv_image_view_init(struct anv_image_view *iview, struct isl_extent4d level0_extent_px; - if (!isl_format_is_compressed(iview->format) && + if (!isl_format_is_compressed(format) && isl_format_is_compressed(image->format->isl_format)) { /* Scale the ImageView extent by the backing Image. This is used * internally when an uncompressed ImageView is created on a @@ -613,7 +614,7 @@ anv_image_view_init(struct anv_image_view *iview, if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - if (has_matching_storage_typed_format(device, iview->format)) { + if (has_matching_storage_typed_format(device, format)) { isl_view.usage = cube_usage | ISL_SURF_USAGE_STORAGE_BIT; isl_surf_fill_state(&device->isl_dev, iview->storage_surface_state.map, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 2f3a6597ac2..e50a6dbdbfe 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1647,11 +1647,8 @@ struct anv_image_view { VkImageAspectFlags aspect_mask; VkFormat vk_format; - VkComponentMapping swizzle; - enum isl_format format; uint32_t base_layer; uint32_t base_mip; - VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ /** RENDER_SURFACE_STATE when using image as a color render target. */ -- cgit v1.2.3 From a0cd20eb7fc9396f5ba2ad201018989074091b8f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 16:48:19 -0800 Subject: isl: Add a helper for filling a buffer surface state --- src/intel/isl/isl.c | 23 ++++++++++++++++++++ src/intel/isl/isl.h | 37 ++++++++++++++++++++++++++++++++ src/intel/isl/isl_priv.h | 16 ++++++++++++++ src/intel/isl/isl_surface_state.c | 45 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index f7f276f16df..f7b4c701841 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1203,6 +1203,29 @@ isl_surf_fill_state_s(const struct isl_device *dev, void *state, } } +void +isl_buffer_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_buffer_fill_state_info *restrict info) +{ + switch (ISL_DEV_GEN(dev)) { + case 7: + if (ISL_DEV_IS_HASWELL(dev)) { + isl_gen75_buffer_fill_state_s(state, info); + } else { + isl_gen7_buffer_fill_state_s(state, info); + } + break; + case 8: + isl_gen8_buffer_fill_state_s(state, info); + break; + case 9: + isl_gen9_buffer_fill_state_s(state, info); + break; + default: + assert(!"Cannot fill surface state for this gen"); + } +} + /** * A variant of isl_surf_get_image_offset_sa() specific to * ISL_DIM_LAYOUT_GEN4_2D. diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index dbd480c51ce..151f1ca27cb 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -789,6 +789,35 @@ struct isl_surf_fill_state_info { union isl_color_value clear_color; }; +struct isl_buffer_fill_state_info { + /** + * The address of the surface in GPU memory. + */ + uint64_t address; + + /** + * The size of the buffer + */ + uint64_t size; + + /** + * The Memory Object Control state for the filled surface state. + * + * The exact format of this value depends on hardware generation. + */ + uint32_t mocs; + + /** + * The format to use in the surface state + * + * This may differ from the format of the actual isl_surf but have the + * same block size. + */ + enum isl_format format; + + uint32_t stride; +}; + extern const struct isl_format_layout isl_format_layouts[]; void @@ -991,6 +1020,14 @@ void isl_surf_fill_state_s(const struct isl_device *dev, void *state, const struct isl_surf_fill_state_info *restrict info); +#define isl_buffer_fill_state(dev, state, ...) \ + isl_buffer_fill_state_s((dev), (state), \ + &(struct isl_buffer_fill_state_info) { __VA_ARGS__ }); + +void +isl_buffer_fill_state_s(const struct isl_device *dev, void *state, + const struct isl_buffer_fill_state_info *restrict info); + /** * Alignment of the upper-left sample of each subimage, in units of surface * elements. diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h index d3f08094df9..7b222594fd4 100644 --- a/src/intel/isl/isl_priv.h +++ b/src/intel/isl/isl_priv.h @@ -152,3 +152,19 @@ isl_gen8_surf_fill_state_s(const struct isl_device *dev, void *state, void isl_gen9_surf_fill_state_s(const struct isl_device *dev, void *state, const struct isl_surf_fill_state_info *restrict info); + +void +isl_gen7_buffer_fill_state_s(void *state, + const struct isl_buffer_fill_state_info *restrict info); + +void +isl_gen75_buffer_fill_state_s(void *state, + const struct isl_buffer_fill_state_info *restrict info); + +void +isl_gen8_buffer_fill_state_s(void *state, + const struct isl_buffer_fill_state_info *restrict info); + +void +isl_gen9_buffer_fill_state_s(void *state, + const struct isl_buffer_fill_state_info *restrict info); diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index fe3c083c574..12f4fb6bd98 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -392,3 +392,48 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s); } + +void +isl_genX(buffer_fill_state_s)(void *state, + const struct isl_buffer_fill_state_info *restrict info) +{ + uint32_t num_elements = info->size / info->stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = info->format, + .SurfaceVerticalAlignment = isl_to_gen_valign[4], + .SurfaceHorizontalAlignment = isl_to_gen_halign[4], + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = info->stride - 1, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + +#if (GEN_GEN >= 8) + .TileMode = LINEAR, +#else + .TiledSurface = false, +#endif + +#if (GEN_GEN >= 8) + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, +#else + .RenderCacheReadWriteMode = 0, +#endif + + .MOCS = info->mocs, + +#if (GEN_GEN >= 8 || GEN_IS_HASWELL) + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, +#endif + .SurfaceBaseAddress = info->address, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} -- cgit v1.2.3 From eb19d640eb46249ba20734aa478c84fadfc35d80 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 16:54:25 -0800 Subject: anv: Use isl to fill buffer surface states --- src/intel/vulkan/anv_device.c | 24 ++++++------------------ src/intel/vulkan/gen7_state.c | 31 ------------------------------- src/intel/vulkan/gen8_state.c | 32 -------------------------------- 3 files changed, 6 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 59930552f59..857c4b1d1f4 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1697,24 +1697,12 @@ anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) { - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_buffer_surface_state(state.map, format, offset, range, - stride); - else - gen7_fill_buffer_surface_state(state.map, format, offset, range, - stride); - break; - case 8: - gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - case 9: - gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - default: - unreachable("unsupported gen\n"); - } + isl_buffer_fill_state(&device->isl_dev, state.map, + .address = offset, + .mocs = device->default_mocs, + .size = range, + .format = format, + .stride = stride); if (!device->info.has_llc) anv_state_clflush(state); diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index ac5b5ed21cd..1360697f0de 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -63,37 +63,6 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN_4, - .SurfaceHorizontalAlignment = HALIGN_4, - .TiledSurface = false, - .RenderCacheReadWriteMode = false, - .SurfaceObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, -# if (GEN_IS_HASWELL) - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, -# endif - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 1ecd34058d9..784269b98cb 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -139,38 +139,6 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, -- cgit v1.2.3 From b4c16fd01a4ea7f2fd579ee30b8a6a8d00bdc3b3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 11:14:15 -0800 Subject: isl: Move isl_image.c to isl_storage_image.c --- src/intel/isl/Makefile.am | 2 +- src/intel/isl/isl_image.c | 188 -------------------------------------- src/intel/isl/isl_storage_image.c | 188 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 189 deletions(-) delete mode 100644 src/intel/isl/isl_image.c create mode 100644 src/intel/isl/isl_storage_image.c (limited to 'src') diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am index b4af3c86f3d..ffc2669f6bf 100644 --- a/src/intel/isl/Makefile.am +++ b/src/intel/isl/Makefile.am @@ -63,7 +63,7 @@ libisl_la_SOURCES = \ isl_gen4.h \ isl_gen6.c \ isl_gen6.h \ - isl_image.c \ + isl_storage_image.c \ $(NULL) libisl_gen7_la_SOURCES = \ diff --git a/src/intel/isl/isl_image.c b/src/intel/isl/isl_image.c deleted file mode 100644 index 773160432b9..00000000000 --- a/src/intel/isl/isl_image.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "isl.h" -#include "brw_compiler.h" - -bool -isl_is_storage_image_format(enum isl_format format) -{ - /* XXX: Maybe we should put this in the CSV? */ - - switch (format) { - case ISL_FORMAT_R32G32B32A32_UINT: - case ISL_FORMAT_R32G32B32A32_SINT: - case ISL_FORMAT_R32G32B32A32_FLOAT: - case ISL_FORMAT_R32_UINT: - case ISL_FORMAT_R32_SINT: - case ISL_FORMAT_R32_FLOAT: - case ISL_FORMAT_R16G16B16A16_UINT: - case ISL_FORMAT_R16G16B16A16_SINT: - case ISL_FORMAT_R16G16B16A16_FLOAT: - case ISL_FORMAT_R32G32_UINT: - case ISL_FORMAT_R32G32_SINT: - case ISL_FORMAT_R32G32_FLOAT: - case ISL_FORMAT_R8G8B8A8_UINT: - case ISL_FORMAT_R8G8B8A8_SINT: - case ISL_FORMAT_R16G16_UINT: - case ISL_FORMAT_R16G16_SINT: - case ISL_FORMAT_R16G16_FLOAT: - case ISL_FORMAT_R8G8_UINT: - case ISL_FORMAT_R8G8_SINT: - case ISL_FORMAT_R16_UINT: - case ISL_FORMAT_R16_FLOAT: - case ISL_FORMAT_R16_SINT: - case ISL_FORMAT_R8_UINT: - case ISL_FORMAT_R8_SINT: - case ISL_FORMAT_R10G10B10A2_UINT: - case ISL_FORMAT_R10G10B10A2_UNORM: - case ISL_FORMAT_R11G11B10_FLOAT: - case ISL_FORMAT_R16G16B16A16_UNORM: - case ISL_FORMAT_R16G16B16A16_SNORM: - case ISL_FORMAT_R8G8B8A8_UNORM: - case ISL_FORMAT_R8G8B8A8_SNORM: - case ISL_FORMAT_R16G16_UNORM: - case ISL_FORMAT_R16G16_SNORM: - case ISL_FORMAT_R8G8_UNORM: - case ISL_FORMAT_R8G8_SNORM: - case ISL_FORMAT_R16_UNORM: - case ISL_FORMAT_R16_SNORM: - case ISL_FORMAT_R8_UNORM: - case ISL_FORMAT_R8_SNORM: - return true; - default: - return false; - } -} - -enum isl_format -isl_lower_storage_image_format(const struct isl_device *dev, - enum isl_format format) -{ - switch (format) { - /* These are never lowered. Up to BDW we'll have to fall back to untyped - * surface access for 128bpp formats. - */ - case ISL_FORMAT_R32G32B32A32_UINT: - case ISL_FORMAT_R32G32B32A32_SINT: - case ISL_FORMAT_R32G32B32A32_FLOAT: - case ISL_FORMAT_R32_UINT: - case ISL_FORMAT_R32_SINT: - case ISL_FORMAT_R32_FLOAT: - return format; - - /* From HSW to BDW the only 64bpp format supported for typed access is - * RGBA_UINT16. IVB falls back to untyped. - */ - case ISL_FORMAT_R16G16B16A16_UINT: - case ISL_FORMAT_R16G16B16A16_SINT: - case ISL_FORMAT_R16G16B16A16_FLOAT: - case ISL_FORMAT_R32G32_UINT: - case ISL_FORMAT_R32G32_SINT: - case ISL_FORMAT_R32G32_FLOAT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16B16A16_UINT : - ISL_FORMAT_R32G32_UINT); - - /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component - * are supported. IVB doesn't support formats with more than one component - * for typed access. For 8 and 16 bpp formats IVB relies on the - * undocumented behavior that typed reads from R_UINT8 and R_UINT16 - * surfaces actually do a 32-bit misaligned read. The alternative would be - * to use two surface state entries with different formats for each image, - * one for reading (using R_UINT32) and another one for writing (using - * R_UINT8 or R_UINT16), but that would complicate the shaders we generate - * even more. - */ - case ISL_FORMAT_R8G8B8A8_UINT: - case ISL_FORMAT_R8G8B8A8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R16G16_UINT: - case ISL_FORMAT_R16G16_SINT: - case ISL_FORMAT_R16G16_FLOAT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R8G8_UINT: - case ISL_FORMAT_R8G8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : - ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R16_UINT: - case ISL_FORMAT_R16_FLOAT: - case ISL_FORMAT_R16_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R8_UINT: - case ISL_FORMAT_R8_SINT: - return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); - - /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported - * by the hardware. - */ - case ISL_FORMAT_R10G10B10A2_UINT: - case ISL_FORMAT_R10G10B10A2_UNORM: - case ISL_FORMAT_R11G11B10_FLOAT: - return ISL_FORMAT_R32_UINT; - - /* No normalized fixed-point formats are supported by the hardware. */ - case ISL_FORMAT_R16G16B16A16_UNORM: - case ISL_FORMAT_R16G16B16A16_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16B16A16_UINT : - ISL_FORMAT_R32G32_UINT); - - case ISL_FORMAT_R8G8B8A8_UNORM: - case ISL_FORMAT_R8G8B8A8_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R16G16_UNORM: - case ISL_FORMAT_R16G16_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); - - case ISL_FORMAT_R8G8_UNORM: - case ISL_FORMAT_R8G8_SNORM: - return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? - ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); - - case ISL_FORMAT_R16_UNORM: - case ISL_FORMAT_R16_SNORM: - return ISL_FORMAT_R16_UINT; - - case ISL_FORMAT_R8_UNORM: - case ISL_FORMAT_R8_SNORM: - return ISL_FORMAT_R8_UINT; - - default: - assert(!"Unknown image format"); - return ISL_FORMAT_UNSUPPORTED; - } -} diff --git a/src/intel/isl/isl_storage_image.c b/src/intel/isl/isl_storage_image.c new file mode 100644 index 00000000000..773160432b9 --- /dev/null +++ b/src/intel/isl/isl_storage_image.c @@ -0,0 +1,188 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" +#include "brw_compiler.h" + +bool +isl_is_storage_image_format(enum isl_format format) +{ + /* XXX: Maybe we should put this in the CSV? */ + + switch (format) { + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return true; + default: + return false; + } +} + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format format) +{ + switch (format) { + /* These are never lowered. Up to BDW we'll have to fall back to untyped + * surface access for 128bpp formats. + */ + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + return format; + + /* From HSW to BDW the only 64bpp format supported for typed access is + * RGBA_UINT16. IVB falls back to untyped. + */ + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component + * are supported. IVB doesn't support formats with more than one component + * for typed access. For 8 and 16 bpp formats IVB relies on the + * undocumented behavior that typed reads from R_UINT8 and R_UINT16 + * surfaces actually do a 32-bit misaligned read. The alternative would be + * to use two surface state entries with different formats for each image, + * one for reading (using R_UINT32) and another one for writing (using + * R_UINT8 or R_UINT16), but that would complicate the shaders we generate + * even more. + */ + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); + + /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported + * by the hardware. + */ + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + return ISL_FORMAT_R32_UINT; + + /* No normalized fixed-point formats are supported by the hardware. */ + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + return ISL_FORMAT_R16_UINT; + + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return ISL_FORMAT_R8_UINT; + + default: + assert(!"Unknown image format"); + return ISL_FORMAT_UNSUPPORTED; + } +} -- cgit v1.2.3 From e9d126f23b66751ae644c3125668ecf5d1e0f86b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 19:49:12 -0800 Subject: anv/image: Add a ussage_mask field to image_view_init This allows us to avoid doing some unneeded work on the meta paths where we know that the image view will be used for exactly one thing. The meta paths also sometimes do things that aren't quite valid like setting the array slice on a 3-D texture and we want to limit the number of paths that need to be able to sensibly handle the lies. --- src/intel/vulkan/anv_image.c | 11 ++++++----- src/intel/vulkan/anv_meta_blit.c | 20 ++++++++++---------- src/intel/vulkan/anv_meta_clear.c | 2 +- src/intel/vulkan/anv_meta_resolve.c | 4 ++-- src/intel/vulkan/anv_private.h | 3 ++- 5 files changed, 21 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 145db6de039..4caab5804ae 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -478,7 +478,8 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset) + uint32_t offset, + VkImageUsageFlags usage_mask) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; @@ -577,7 +578,7 @@ anv_image_view_init(struct anv_image_view *iview, cube_usage = 0; } - if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (image->usage & usage_mask & VK_IMAGE_USAGE_SAMPLED_BIT) { iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); isl_view.usage = cube_usage | ISL_SURF_USAGE_TEXTURE_BIT; @@ -594,7 +595,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->sampler_surface_state.alloc_size = 0; } - if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (image->usage & usage_mask & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); isl_view.usage = cube_usage | ISL_SURF_USAGE_RENDER_TARGET_BIT; @@ -611,7 +612,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state.alloc_size = 0; } - if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (image->usage & usage_mask & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); if (has_matching_storage_typed_format(device, format)) { @@ -650,7 +651,7 @@ anv_CreateImageView(VkDevice _device, if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_image_view_init(view, device, pCreateInfo, NULL, 0); + anv_image_view_init(view, device, pCreateInfo, NULL, 0, ~0); *pView = anv_image_view_to_handle(view); diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 8ef943aa512..2ec428b5f4a 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -451,7 +451,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -468,7 +468,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), @@ -711,7 +711,7 @@ void anv_CmdCopyImage( .layerCount = pRegions[r].dstSubresource.layerCount, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); const uint32_t dest_base_array_slice = anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, @@ -758,7 +758,7 @@ void anv_CmdCopyImage( .layerCount = 1 }, }, - cmd_buffer, img_o); + cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, &pRegions[r].extent); @@ -826,7 +826,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); const VkOffset3D dest_offset = { .x = pRegions[r].dstOffsets[0].x, @@ -876,7 +876,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, src_image, &src_iview, @@ -991,7 +991,7 @@ void anv_CmdCopyBufferToImage( .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); uint32_t img_x = 0; uint32_t img_y = 0; @@ -1025,7 +1025,7 @@ void anv_CmdCopyBufferToImage( .layerCount = 1 }, }, - cmd_buffer, img_o); + cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, &pRegions[r].imageExtent); @@ -1104,7 +1104,7 @@ void anv_CmdCopyImageToBuffer( .layerCount = pRegions[r].imageSubresource.layerCount, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image *dest_image = make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, @@ -1140,7 +1140,7 @@ void anv_CmdCopyImageToBuffer( .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, anv_image_from_handle(srcImage), diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 739ae09582c..227f8f35115 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -811,7 +811,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 8eb2548b5ae..19fb3ad3003 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -737,7 +737,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -754,7 +754,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb_h; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e50a6dbdbfe..05cb77c5509 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1680,7 +1680,8 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset); + uint32_t offset, + VkImageUsageFlags usage_mask); void anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, -- cgit v1.2.3 From 7363024cbda0c513ad76d22580ce363f30ae1eda Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 11:38:14 -0800 Subject: anv: Fill out image_param structs at view creation time --- src/intel/vulkan/anv_cmd_buffer.c | 6 ++---- src/intel/vulkan/anv_image.c | 5 +++++ src/intel/vulkan/anv_private.h | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 827c3ed4142..418a143b7bc 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -812,8 +812,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct brw_image_param *image_param = &cmd_buffer->state.push_constants[stage]->images[image++]; - anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, - image_param); + *image_param = desc->image_view->storage_image_param; image_param->surface_idx = bias + s; break; } @@ -838,8 +837,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct brw_image_param *image_param = &cmd_buffer->state.push_constants[stage]->images[image++]; - anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, - image_param); + *image_param = desc->buffer_view->storage_image_param; image_param->surface_idx = bias + s; break; diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 4caab5804ae..72f29f1dba8 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -630,6 +630,9 @@ anv_image_view_init(struct anv_image_view *iview, iview->bo->size - iview->offset, 1); } + anv_image_view_fill_image_param(device, iview, + &iview->storage_image_param); + if (!device->info.has_llc) anv_state_clflush(iview->storage_surface_state); } else { @@ -734,6 +737,8 @@ anv_CreateBufferView(VkDevice _device, (storage_format == ISL_FORMAT_RAW ? 1 : format->isl_layout->bs)); + anv_buffer_view_fill_image_param(device, view, + &view->storage_image_param); } else { view->storage_surface_state = (struct anv_state){ 0 }; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 05cb77c5509..cb4f9736fdf 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -41,6 +41,7 @@ #endif #include "brw_device_info.h" +#include "brw_compiler.h" #include "util/macros.h" #include "util/list.h" @@ -1659,6 +1660,8 @@ struct anv_image_view { /** RENDER_SURFACE_STATE when using image as a storage image. */ struct anv_state storage_surface_state; + + struct brw_image_param storage_image_param; }; struct anv_image_create_info { @@ -1717,6 +1720,8 @@ struct anv_buffer_view { struct anv_state surface_state; struct anv_state storage_surface_state; + + struct brw_image_param storage_image_param; }; const struct anv_format * -- cgit v1.2.3 From bd6470fa6cdc3d969c170d4d9a8aeee12747ecaf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 12:45:42 -0800 Subject: isl: Add helpers for filling out brw_image_param --- src/intel/isl/isl.h | 12 +++++ src/intel/isl/isl_storage_image.c | 107 +++++++++++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 151f1ca27cb..7456975014b 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1028,6 +1028,18 @@ void isl_buffer_fill_state_s(const struct isl_device *dev, void *state, const struct isl_buffer_fill_state_info *restrict info); +void +isl_surf_fill_image_param(const struct isl_device *dev, + struct brw_image_param *param, + const struct isl_surf *surf, + const struct isl_view *view); + +void +isl_buffer_fill_image_param(const struct isl_device *dev, + struct brw_image_param *param, + enum isl_format format, + uint64_t size); + /** * Alignment of the upper-left sample of each subimage, in units of surface * elements. diff --git a/src/intel/isl/isl_storage_image.c b/src/intel/isl/isl_storage_image.c index 773160432b9..2b5b5cdbaed 100644 --- a/src/intel/isl/isl_storage_image.c +++ b/src/intel/isl/isl_storage_image.c @@ -21,7 +21,7 @@ * IN THE SOFTWARE. */ -#include "isl.h" +#include "isl_priv.h" #include "brw_compiler.h" bool @@ -186,3 +186,108 @@ isl_lower_storage_image_format(const struct isl_device *dev, return ISL_FORMAT_UNSUPPORTED; } } + +static const struct brw_image_param image_param_defaults = { + /* Set the swizzling shifts to all-ones to effectively disable + * swizzling -- See emit_address_calculation() in + * brw_fs_surface_builder.cpp for a more detailed explanation of + * these parameters. + */ + .swizzling = { 0xff, 0xff }, +}; + +void +isl_surf_fill_image_param(const struct isl_device *dev, + struct brw_image_param *param, + const struct isl_surf *surf, + const struct isl_view *view) +{ + *param = image_param_defaults; + + param->size[0] = isl_minify(surf->logical_level0_px.w, view->base_level); + param->size[1] = isl_minify(surf->logical_level0_px.h, view->base_level); + if (surf->dim == ISL_SURF_DIM_3D) { + param->size[2] = isl_minify(surf->logical_level0_px.d, view->base_level); + } else { + param->size[2] = surf->logical_level0_px.array_len - + view->base_array_layer; + } + + isl_surf_get_image_offset_el(surf, view->base_level, view->base_array_layer, + 0, ¶m->offset[0], ¶m->offset[1]); + + const int cpp = isl_format_get_layout(surf->format)->bs; + param->stride[0] = cpp; + param->stride[1] = surf->row_pitch / cpp; + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + if (ISL_DEV_GEN(dev) < 9 && surf->dim == ISL_SURF_DIM_3D) { + param->stride[2] = isl_align_npot(param->size[0], image_align_sa.w); + param->stride[3] = isl_align_npot(param->size[1], image_align_sa.h); + } else { + param->stride[2] = 0; + param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); + } + + switch (surf->tiling) { + case ISL_TILING_LINEAR: + /* image_param_defaults is good enough */ + break; + + case ISL_TILING_X: + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = isl_log2u(512 / cpp); + param->tiling[1] = isl_log2u(8); + + if (dev->has_bit6_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + break; + + case ISL_TILING_Y0: + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = isl_log2u(16 / cpp); + param->tiling[1] = isl_log2u(32); + + if (dev->has_bit6_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 0xff; + } + break; + + default: + assert(!"Unhandled storage image tiling"); + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (ISL_DEV_GEN(dev) < 9 && surf->dim == ISL_SURF_DIM_3D ? + view->base_level : 0); +} + +void +isl_buffer_fill_image_param(const struct isl_device *dev, + struct brw_image_param *param, + enum isl_format format, + uint64_t size) +{ + *param = image_param_defaults; + + param->stride[0] = isl_format_layouts[format].bs; + param->size[0] = size / param->stride[0]; +} -- cgit v1.2.3 From 4b34f2ccb8b97aaf46b2dadb8098463969064753 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 12:50:31 -0800 Subject: anv/image: Use isl for filling brw_image_param --- src/intel/vulkan/anv_image.c | 117 +++---------------------------------------- 1 file changed, 6 insertions(+), 111 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 72f29f1dba8..46cf2413468 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -630,8 +630,9 @@ anv_image_view_init(struct anv_image_view *iview, iview->bo->size - iview->offset, 1); } - anv_image_view_fill_image_param(device, iview, - &iview->storage_image_param); + isl_surf_fill_image_param(&device->isl_dev, + &iview->storage_image_param, + &surface->isl, &isl_view); if (!device->info.has_llc) anv_state_clflush(iview->storage_surface_state); @@ -737,8 +738,9 @@ anv_CreateBufferView(VkDevice _device, (storage_format == ISL_FORMAT_RAW ? 1 : format->isl_layout->bs)); - anv_buffer_view_fill_image_param(device, view, - &view->storage_image_param); + isl_buffer_fill_image_param(&device->isl_dev, + &view->storage_image_param, + view->format, view->range); } else { view->storage_surface_state = (struct anv_state){ 0 }; } @@ -817,110 +819,3 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag return NULL; } } - -static void -image_param_defaults(struct brw_image_param *param) -{ - memset(param, 0, sizeof *param); - /* Set the swizzling shifts to all-ones to effectively disable swizzling -- - * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more - * detailed explanation of these parameters. - */ - param->swizzling[0] = 0xff; - param->swizzling[1] = 0xff; -} - -void -anv_image_view_fill_image_param(struct anv_device *device, - struct anv_image_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - const struct isl_surf *surf = &view->image->color_surface.isl; - const int cpp = isl_format_get_layout(surf->format)->bs; - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - param->size[0] = view->extent.width; - param->size[1] = view->extent.height; - if (surf->dim == ISL_SURF_DIM_3D) { - param->size[2] = view->extent.depth; - } else { - param->size[2] = surf->logical_level0_px.array_len - view->base_layer; - } - - isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, - ¶m->offset[0], ¶m->offset[1]); - - param->stride[0] = cpp; - param->stride[1] = surf->row_pitch / cpp; - - if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { - param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); - param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); - } else { - param->stride[2] = 0; - param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); - } - - switch (surf->tiling) { - case ISL_TILING_LINEAR: - /* image_param_defaults is good enough */ - break; - - case ISL_TILING_X: - /* An X tile is a rectangular block of 512x8 bytes. */ - param->tiling[0] = util_logbase2(512 / cpp); - param->tiling[1] = util_logbase2(8); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shifts required to swizzle bits 9 and 10 of the memory - * address with bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 4; - } - break; - - case ISL_TILING_Y0: - /* The layout of a Y-tiled surface in memory isn't really fundamentally - * different to the layout of an X-tiled surface, we simply pretend that - * the surface is broken up in a number of smaller 16Bx32 tiles, each - * one arranged in X-major order just like is the case for X-tiling. - */ - param->tiling[0] = util_logbase2(16 / cpp); - param->tiling[1] = util_logbase2(32); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shift required to swizzle bit 9 of the memory address with - * bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 0xff; - } - break; - - default: - assert(!"Unhandled storage image tiling"); - } - - /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The - * address calculation algorithm (emit_address_calculation() in - * brw_fs_surface_builder.cpp) handles this as a sort of tiling with - * modulus equal to the LOD. - */ - param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? - view->base_mip : 0); -} - -void -anv_buffer_view_fill_image_param(struct anv_device *device, - struct anv_buffer_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - param->stride[0] = isl_format_layouts[view->format].bs; - param->size[0] = view->range / param->stride[0]; -} -- cgit v1.2.3 From ad50896c8769adcf141619774f8c156a2bcf920a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 Feb 2016 09:26:04 -0800 Subject: anv/gen7: Only try to get the depth format the surface has depth --- src/intel/vulkan/gen7_cmd_buffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 7377487cf7e..9681f22dc3d 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -458,7 +458,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; - const uint32_t depth_format = image ? + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + const bool has_depth = iview && anv_format->has_depth; + const uint32_t depth_format = has_depth ? isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, &image->depth_surface.isl) : D16_UNORM; -- cgit v1.2.3 From e18a2f037a074788ee3cf6cb00697b5b0152fe29 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 Feb 2016 09:43:14 -0800 Subject: anv/gen7: Set MaximumNumberofThreads in the dummy PS packet --- src/intel/vulkan/gen7_pipeline.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 009a79ac815..2167f296b2f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -338,7 +338,11 @@ genX(graphics_pipeline_create)( .PointRasterizationRule = RASTRULE_UPPER_RIGHT); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + /* Even if no fragments are ever dispatched, the hardware hangs if we + * don't at least set the maximum number of threads. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; -- cgit v1.2.3 From 46b7c242da7c7c9ea7877a2c4b1fecdf5c1c0452 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 Feb 2016 09:46:40 -0800 Subject: anv/gen7: Clean up the dummy PS case Fix whitespace and remove dead comments --- src/intel/vulkan/gen7_pipeline.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 2167f296b2f..7151e36f17d 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -323,27 +323,21 @@ genX(graphics_pipeline_create)( } if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_finishme("disabling ps"); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE)); - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE)); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), - .StatisticsEnable = true, - .ThreadDispatchEnable = false, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT); - - - /* Even if no fragments are ever dispatched, the hardware hangs if we - * don't at least set the maximum number of threads. - */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .MaximumNumberofThreads = device->info.max_wm_threads - 1); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), + .StatisticsEnable = true, + .ThreadDispatchEnable = false, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT); + /* Even if no fragments are ever dispatched, the hardware hangs if we + * don't at least set the maximum number of threads. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || -- cgit v1.2.3 From 45d8ce07a5838977bd875fdeb008ccecc6eb976e Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 28 Feb 2016 10:44:08 -0800 Subject: anv/pipeline: Set stage URB size to zero if it is unused Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 1173b4f0cba..f6e3aedda40 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -897,7 +897,7 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { pipeline->urb.push_size[i] = - (pipeline->active_stages & (1 << i)) ? size_per_stage : 1; + (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; } pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = -- cgit v1.2.3 From ef06ddb08a066a72b9a98cd2fbef8a74c99b8b32 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 28 Feb 2016 10:47:35 -0800 Subject: anv/pipeline: Set FS URB space to zero if the FS is unused Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_pipeline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index f6e3aedda40..81d0d9c9bd9 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -894,14 +894,17 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) const unsigned stages = _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); const unsigned size_per_stage = push_constant_kb / stages; + unsigned used_kb = 0; for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { pipeline->urb.push_size[i] = (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; + used_kb += pipeline->urb.push_size[i]; + assert(used_kb <= push_constant_kb); } pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = - push_constant_kb - size_per_stage * (stages - 1); + push_constant_kb - used_kb; } static void -- cgit v1.2.3 From 72efb68d48082a3da819ca47adc12733a3e8d105 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 28 Feb 2016 10:39:17 -0800 Subject: anv/pipeline: Set URB offset to zero if size is zero After 3ecd357d816dc71b2c6ebd6ace38c76ebb25674e, it may be possible for the VS to get assigned all of the URB space. On Ivy Bridge, this will cause the offset for the other stages to be 16, which cannot be packed into the ConstantBufferOffset field of 3DSTATE_PUSH_CONSTANT_ALLOC_*. Instead we can set the offset to zero if the stage size is zero. Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_pipeline_util.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index cf4e0358741..d940aba67b5 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -202,10 +202,11 @@ emit_urb_setup(struct anv_pipeline *pipeline) unsigned push_start = 0; for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + unsigned push_size = pipeline->urb.push_size[i]; anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), ._3DCommandSubOpcode = 18 + i, - .ConstantBufferOffset = push_start, - .ConstantBufferSize = pipeline->urb.push_size[i]); + .ConstantBufferOffset = (push_size > 0) ? push_start : 0, + .ConstantBufferSize = push_size); push_start += pipeline->urb.push_size[i]; } -- cgit v1.2.3 From b00b42d99b0fe1e7009858d20bf6e6fc880b8fa7 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 24 Feb 2016 16:10:08 -0800 Subject: nir/spirv: Use the new bare sampler type --- src/compiler/nir/spirv/spirv_to_nir.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index cb069b93ae8..5a7184acac6 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -769,12 +769,8 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, * thrown away the moment you combine it with an image. What really * matters is that it's a sampler type as opposed to an integer type * so the backend knows what to do. - * - * TODO: Eventually we should consider adding a "bare sampler" type - * to glsl_types. */ - val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, - GLSL_TYPE_FLOAT); + val->type->type = glsl_bare_sampler_type(); break; case SpvOpTypeOpaque: -- cgit v1.2.3 From 1af5dacd76afe410374d442f4e0cd50820103fe8 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 29 Jan 2016 15:31:30 -0800 Subject: anv/gen7: Enable SLM in L3 cache control register Port 1983003 to gen7. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 9681f22dc3d..26339bbf0d9 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -332,6 +332,65 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN7_L3SQCREG1 0xb010 +#define GEN7_L3CNTLREG2 0xb020 +#define GEN7_L3CNTLREG3 0xb024 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t l3c2_val = enable_slm ? + /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */ + /*0x02040021*/0x010000a1 : + /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */ + /*0x04080040*/0x02000030; + bool changed = cmd_buffer->state.current_l3_config != l3c2_val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + anv_finishme("write GEN7_L3SQCREG1"); + emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val); + emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3, + enable_slm ? 0x00040810 : 0x00040410); + cmd_buffer->state.current_l3_config = l3c2_val; + } +} + void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -340,6 +399,9 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), .PipelineSelection = GPGPU); -- cgit v1.2.3 From 635c0e92b777aefc9f82ffebfe982f57ac4503a8 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 30 Jan 2016 00:25:16 -0800 Subject: anv: Set CURBEAllocationSize in MEDIA_VFE_STATE Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_pipeline.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 41a5d0f889c..1605661f971 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -83,6 +83,27 @@ genX(compute_pipeline_create)( pipeline->use_repclear = false; const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + uint32_t group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + pipeline->cs_thread_width_max = + DIV_ROUND_UP(group_size, cs_prog_data->simd_size); + uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size); + + const uint32_t vfe_curbe_allocation = + push_constant_regs * pipeline->cs_thread_width_max; anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], @@ -100,19 +121,7 @@ genX(compute_pipeline_create)( .BypassGatewayControl = true, #endif .URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - + .CURBEAllocationSize = vfe_curbe_allocation); *pPipeline = anv_pipeline_to_handle(pipeline); -- cgit v1.2.3 From 9d8bae613779e0cc7382c9252ccd7f5e7cd5cada Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 29 Feb 2016 10:55:39 -0800 Subject: anv: Don't advertise pipelineStatisticsQuery We don't support that just yet. Reported-by: Jacek Konieczny --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 857c4b1d1f4..c68280fe8d7 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -373,7 +373,7 @@ void anv_GetPhysicalDeviceFeatures( .textureCompressionASTC_LDR = true, .textureCompressionBC = true, .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, + .pipelineStatisticsQuery = false, .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, .fragmentStoresAndAtomics = true, .shaderTessellationAndGeometryPointSize = true, -- cgit v1.2.3 From 74b7b59db5d3ce986f92599b14feaaade63f7b12 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 11:24:34 -0800 Subject: isl/surface_state: Fix array spacing on Gen7 v2: Don't cast the enum to a boolean (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl_surface_state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 12f4fb6bd98..0f45100fba3 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -227,6 +227,11 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, .VerticalLineStride = 0, .VerticalLineStrideOffset = 0, +#if (GEN_GEN == 7) + .SurfaceArraySpacing = info->surf->array_pitch_span == + ISL_ARRAY_PITCH_SPAN_COMPACT, +#endif + #if GEN_GEN >= 8 .SamplerL2BypassModeDisable = true, #endif -- cgit v1.2.3 From 51b618285d846295ef90fa49364d39eea4843801 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 22:47:59 -0800 Subject: anv/pipeline: Use dynamic checks for max push constants The GEN_GEN macros aren't available in anv_pipeline since it only gets compiled once for the whold driver. --- src/intel/vulkan/anv_pipeline.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 81d0d9c9bd9..df265842ccc 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -783,13 +783,14 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; /* Reserve space for push constants */ -#if GEN_GEN >= 8 - unsigned push_constant_kb = 32; -#elif GEN_IS_HASWELL - unsigned push_constant_kb = pipeline->device->info.gt == 3 ? 32 : 16; -#else - unsigned push_constant_kb = 16; -#endif + unsigned push_constant_kb; + if (pipeline->device->info.gen >= 8) + push_constant_kb = 32; + else if (pipeline->device->info.is_haswell) + push_constant_kb = pipeline->device->info.gt == 3 ? 32 : 16; + else + push_constant_kb = 16; + unsigned push_constant_bytes = push_constant_kb * 1024; unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; -- cgit v1.2.3 From 6986ae35adbd83ff4f3c84946e998db488416b72 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:13:07 -0800 Subject: anv/pipeline: Avoid a division by zero --- src/intel/vulkan/anv_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index df265842ccc..cbd3a21abd7 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -894,7 +894,7 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) const unsigned stages = _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); - const unsigned size_per_stage = push_constant_kb / stages; + const unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; unsigned used_kb = 0; for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { -- cgit v1.2.3 From 9715724015b49278fa3d110221ab39e1ed00c8c2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:13:56 -0800 Subject: anv/pipeline: Follow push constant alignment restrictions on BDW+ and HSW gt3 --- src/intel/vulkan/anv_pipeline.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index cbd3a21abd7..3dab205e5cc 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -894,9 +894,16 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) const unsigned stages = _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); - const unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; + unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; unsigned used_kb = 0; + /* Broadwell+ and Haswell gt3 require that the push constant sizes be in + * units of 2KB. Incidentally, these are the same platforms that have + * 32KB worth of push constant space. + */ + if (push_constant_kb == 32) + size_per_stage &= ~1u; + for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { pipeline->urb.push_size[i] = (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; -- cgit v1.2.3 From d29fd1c7cba7775298fb5a5e23d2c5026b3997af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:27:10 -0800 Subject: anv/cmd_buffer: Re-emit push constants packets for all stages --- src/intel/vulkan/gen7_cmd_buffer.c | 24 ++++++++++++------------ src/intel/vulkan/gen8_cmd_buffer.c | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 26339bbf0d9..b0456ae4c67 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -52,20 +52,20 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + } } - cmd_buffer->state.push_constants_dirty &= ~flushed; + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; return flushed; } diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 3221f5e2dc4..2e979d92760 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -52,20 +52,20 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, - .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + } } - cmd_buffer->state.push_constants_dirty &= ~flushed; + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; return flushed; } -- cgit v1.2.3 From 097564bb8e30e3c13674a2aa113c373657628eb1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:27:34 -0800 Subject: anv/cmd_buffer: Dirty push constants when changing pipelines. --- src/intel/vulkan/gen7_cmd_buffer.c | 11 +++++++++++ src/intel/vulkan/gen8_cmd_buffer.c | 11 +++++++++++ 2 files changed, 22 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index b0456ae4c67..d2c4297cbca 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -470,6 +470,17 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) gen7_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; } if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 2e979d92760..9dc2abd1f29 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -286,6 +286,17 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; } /* We emit the binding tables and sampler tables first, then emit push -- cgit v1.2.3 From 3f8df795c145113de32a0c5a30607a67f1d94839 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 16:47:39 -0800 Subject: genxml: Break output detail of 3DSTATE_SBE on gen7 into a struct This makes it work like 3DSTATE_SBE_SWIZ on gen8+ which is much more convenient. --- src/intel/genxml/gen7.xml | 58 ++++++++++++++++++---------------------------- src/intel/genxml/gen75.xml | 58 ++++++++++++++++++---------------------------- 2 files changed, 46 insertions(+), 70 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index 7e0fce0f04f..268ca3d97d7 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -59,6 +59,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -1524,41 +1545,8 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index f42c026d2f7..adbeeb6c788 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -69,6 +69,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -1765,41 +1786,8 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + -- cgit v1.2.3 From 38f4c11c2f5c00a1d2addddcd0508ad89a7cead4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 17:27:11 -0800 Subject: anv/pipeline: Pull 3DSTATE_SBE into a shared helper --- src/intel/vulkan/gen7_pipeline.c | 14 +---- src/intel/vulkan/gen8_pipeline.c | 99 +----------------------------- src/intel/vulkan/genX_pipeline_util.h | 109 ++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 111 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 7151e36f17d..c356fed7d68 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -245,10 +245,6 @@ genX(graphics_pipeline_create)( .SampleMask = 0xff); const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* The last geometry producing stage will set urb_offset and urb_length, - * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ - uint32_t urb_offset = 1; - uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; #if 0 /* From gen7_vs_state.c */ @@ -291,9 +287,6 @@ genX(graphics_pipeline_create)( if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); } else { - urb_offset = 1; - urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .KernelStartPointer = pipeline->gs_kernel, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], @@ -346,12 +339,7 @@ genX(graphics_pipeline_create)( if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) anv_finishme("primitive_id needs sbe swizzling setup"); - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, - .VertexURBEntryReadLength = urb_length, - .VertexURBEntryReadOffset = urb_offset, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + emit_3dstate_sbe(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index c9545c898f3..494a64949b6 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -443,104 +443,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), .PixelShaderValid = false); } else { - /* TODO: We should clean this up. Among other things, this is mostly - * shared with other gens. - */ - const struct brw_vue_map *fs_input_map; - if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &vue_prog_data->vue_map; - else - fs_input_map = &gs_prog_data->base.vue_map; - - struct GENX(3DSTATE_SBE_SWIZ) swiz = { - GENX(3DSTATE_SBE_SWIZ_header), - }; - - int max_source_attr = 0; - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = wm_prog_data->urb_setup[attr]; - - if (input_index < 0) - continue; - - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); - - if (input_index >= 16) - continue; - - if (source_attr == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by - * the vertex shader or it's gl_PrimitiveID. In the first case the - * value is undefined, in the second it needs to be - * gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - } else { - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; - } - } - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .AttributeSwizzleEnable = true, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .VertexURBEntryReadLength = - DIV_ROUND_UP(max_source_attr + 1, 2), - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - wm_prog_data->num_varying_inputs, - -#if GEN_GEN >= 9 - .Attribute0ActiveComponentFormat = ACF_XYZW, - .Attribute1ActiveComponentFormat = ACF_XYZW, - .Attribute2ActiveComponentFormat = ACF_XYZW, - .Attribute3ActiveComponentFormat = ACF_XYZW, - .Attribute4ActiveComponentFormat = ACF_XYZW, - .Attribute5ActiveComponentFormat = ACF_XYZW, - .Attribute6ActiveComponentFormat = ACF_XYZW, - .Attribute7ActiveComponentFormat = ACF_XYZW, - .Attribute8ActiveComponentFormat = ACF_XYZW, - .Attribute9ActiveComponentFormat = ACF_XYZW, - .Attribute10ActiveComponentFormat = ACF_XYZW, - .Attribute11ActiveComponentFormat = ACF_XYZW, - .Attribute12ActiveComponentFormat = ACF_XYZW, - .Attribute13ActiveComponentFormat = ACF_XYZW, - .Attribute14ActiveComponentFormat = ACF_XYZW, - .Attribute15ActiveComponentFormat = ACF_XYZW, - /* wow, much field, very attribute */ - .Attribute16ActiveComponentFormat = ACF_XYZW, - .Attribute17ActiveComponentFormat = ACF_XYZW, - .Attribute18ActiveComponentFormat = ACF_XYZW, - .Attribute19ActiveComponentFormat = ACF_XYZW, - .Attribute20ActiveComponentFormat = ACF_XYZW, - .Attribute21ActiveComponentFormat = ACF_XYZW, - .Attribute22ActiveComponentFormat = ACF_XYZW, - .Attribute23ActiveComponentFormat = ACF_XYZW, - .Attribute24ActiveComponentFormat = ACF_XYZW, - .Attribute25ActiveComponentFormat = ACF_XYZW, - .Attribute26ActiveComponentFormat = ACF_XYZW, - .Attribute27ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute30ActiveComponentFormat = ACF_XYZW, -#endif - ); - - uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GENX(3DSTATE_SBE_SWIZ_length)); - GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + emit_3dstate_sbe(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index d940aba67b5..66250e5d4d6 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -219,6 +219,115 @@ emit_urb_setup(struct anv_pipeline *pipeline) } } +static void +emit_3dstate_sbe(struct anv_pipeline *pipeline) +{ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &pipeline->vs_prog_data.base.vue_map; + else + fs_input_map = &pipeline->gs_prog_data.base.vue_map; + + struct GENX(3DSTATE_SBE) sbe = { + GENX(3DSTATE_SBE_header), + .AttributeSwizzleEnable = true, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + +#if GEN_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + }; + +#if GEN_GEN >= 8 + /* On Broadwell, they broke 3DSTATE_SBE into two packets */ + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; +#else +# define swiz sbe +#endif + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = pipeline->wm_prog_data.urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } + } + + sbe.VertexURBEntryReadOffset = 1; /* Skip the VUE header and position slots */ + sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); + + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_length)); + GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe); + +#if GEN_GEN >= 8 + dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); +#endif +} + static inline uint32_t scratch_space(const struct brw_stage_prog_data *prog_data) { -- cgit v1.2.3 From 22d8666d74f6fa6de53366f76a56277976eced21 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 1 Mar 2016 09:17:16 -0800 Subject: anv: Add in image->offset when setting up depth buffer Fix from Neil Roberts. https://bugs.freedesktop.org/show_bug.cgi?id=94348 --- src/intel/vulkan/genX_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 9be87a3ff05..c00c6d0decc 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -572,7 +572,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, - .offset = image->depth_surface.offset, + .offset = image->offset + image->depth_surface.offset, }, .Height = fb->height - 1, .Width = fb->width - 1, -- cgit v1.2.3 From bb08d86efe32e5d59e6dde1a062539e626727d0a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 10:56:46 -0800 Subject: anv/cmd_buffer: Clean up stencil state setup on gen8 --- src/intel/vulkan/gen8_cmd_buffer.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 9dc2abd1f29..8a0fe60db33 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -355,6 +355,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) #if GEN_GEN == 8 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, @@ -364,10 +365,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front, + .BackFaceStencilReferenceValue = d->stencil_reference.back, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); @@ -384,23 +383,19 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { GENX(3DSTATE_WM_DEPTH_STENCIL_header), /* Is this what we need to do? */ - .StencilBufferWriteEnable = - cmd_buffer->state.dynamic.stencil_write_mask.front != 0, - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, }; GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw, &wm_depth_stencil); -- cgit v1.2.3 From 4cfdd1650083f3e425112ff697538e9818bc8946 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 11:02:12 -0800 Subject: anv/cmd_buffer: Clean up stencil state setup on gen7 --- src/intel/vulkan/gen7_cmd_buffer.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index d2c4297cbca..09025ef2323 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -557,6 +557,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, @@ -566,10 +567,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front, + .BackFaceStencilReferenceValue = d->stencil_reference.back, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) @@ -585,6 +584,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); @@ -592,15 +592,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, }; GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); -- cgit v1.2.3 From 6e20c1e058d7449c800506d05cd1c6431fa77a4b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 10:59:21 -0800 Subject: anv/cmd_buffer: Look at both sides for stencil enable Now it's all consistent with gen9 --- src/intel/vulkan/gen7_cmd_buffer.c | 6 ++---- src/intel/vulkan/gen8_cmd_buffer.c | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 09025ef2323..3ed93137f6a 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -586,11 +586,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { - .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8a0fe60db33..884152da207 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -388,8 +388,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { GENX(3DSTATE_WM_DEPTH_STENCIL_header), - /* Is this what we need to do? */ - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, -- cgit v1.2.3 From 8b091deb5e229dd67c7b9c72d511d3eaa7c9b7d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 11:38:11 -0800 Subject: anv: Unify gen7 and gen8 state Now that we've pulled surface state setup into ISL, there's not much to do here. --- src/intel/vulkan/Makefile.am | 8 +- src/intel/vulkan/gen7_state.c | 125 ------------------ src/intel/vulkan/gen8_state.c | 203 ---------------------------- src/intel/vulkan/genX_state.c | 263 +++++++++++++++++++++++++++++++++++++ src/intel/vulkan/genX_state_util.h | 65 --------- 5 files changed, 267 insertions(+), 397 deletions(-) delete mode 100644 src/intel/vulkan/gen7_state.c delete mode 100644 src/intel/vulkan/gen8_state.c create mode 100644 src/intel/vulkan/genX_state.c delete mode 100644 src/intel/vulkan/genX_state_util.h (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 6be4f9fb427..7d078cff91c 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -107,7 +107,7 @@ libanv_gen7_la_SOURCES = \ genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ - gen7_state.c + genX_state.c libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=70 libanv_gen75_la_SOURCES = \ @@ -115,7 +115,7 @@ libanv_gen75_la_SOURCES = \ genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ - gen7_state.c + genX_state.c libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=75 libanv_gen8_la_SOURCES = \ @@ -123,7 +123,7 @@ libanv_gen8_la_SOURCES = \ genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ - gen8_state.c + genX_state.c libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=80 libanv_gen9_la_SOURCES = \ @@ -131,7 +131,7 @@ libanv_gen9_la_SOURCES = \ genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ - gen8_state.c + genX_state.c libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=90 if HAVE_EGL_PLATFORM_WAYLAND diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c deleted file mode 100644 index 1360697f0de..00000000000 --- a/src/intel/vulkan/gen7_state.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen_macros.h" -#include "genxml/genX_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, - &GENX(MOCS)); - - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GENX(PIPELINE_SELECT), - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GENX(SAMPLER_STATE) sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampEnable = CLAMP_ENABLE_OGL, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, - pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, - pCreateInfo->anisotropyEnable), - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .BorderColorPointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c deleted file mode 100644 index 784269b98cb..00000000000 --- a/src/intel/vulkan/gen8_state.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen_macros.h" -#include "genxml/genX_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, - &GENX(MOCS)); - - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GENX(PIPELINE_SELECT), -#if GEN_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), - .ChromaKeyKillEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - - /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and - * VkPhysicalDeviceFeatures::standardSampleLocations. - */ - anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), - ._1xSample0XOffset = 0.5, - ._1xSample0YOffset = 0.5, - ._2xSample0XOffset = 0.25, - ._2xSample0YOffset = 0.25, - ._2xSample1XOffset = 0.75, - ._2xSample1YOffset = 0.75, - ._4xSample0XOffset = 0.375, - ._4xSample0YOffset = 0.125, - ._4xSample1XOffset = 0.875, - ._4xSample1YOffset = 0.375, - ._4xSample2XOffset = 0.125, - ._4xSample2YOffset = 0.625, - ._4xSample3XOffset = 0.625, - ._4xSample3YOffset = 0.875, - ._8xSample0XOffset = 0.5625, - ._8xSample0YOffset = 0.3125, - ._8xSample1XOffset = 0.4375, - ._8xSample1YOffset = 0.6875, - ._8xSample2XOffset = 0.8125, - ._8xSample2YOffset = 0.5625, - ._8xSample3XOffset = 0.3125, - ._8xSample3YOffset = 0.1875, - ._8xSample4XOffset = 0.1875, - ._8xSample4YOffset = 0.8125, - ._8xSample5XOffset = 0.0625, - ._8xSample5YOffset = 0.4375, - ._8xSample6XOffset = 0.6875, - ._8xSample6YOffset = 0.9375, - ._8xSample7XOffset = 0.9375, - ._8xSample7YOffset = 0.0625, -#if GEN_GEN >= 9 - ._16xSample0XOffset = 0.5625, - ._16xSample0YOffset = 0.5625, - ._16xSample1XOffset = 0.4375, - ._16xSample1YOffset = 0.3125, - ._16xSample2XOffset = 0.3125, - ._16xSample2YOffset = 0.6250, - ._16xSample3XOffset = 0.7500, - ._16xSample3YOffset = 0.4375, - ._16xSample4XOffset = 0.1875, - ._16xSample4YOffset = 0.3750, - ._16xSample5XOffset = 0.6250, - ._16xSample5YOffset = 0.8125, - ._16xSample6XOffset = 0.8125, - ._16xSample6YOffset = 0.6875, - ._16xSample7XOffset = 0.6875, - ._16xSample7YOffset = 0.1875, - ._16xSample8XOffset = 0.3750, - ._16xSample8YOffset = 0.8750, - ._16xSample9XOffset = 0.5000, - ._16xSample9YOffset = 0.0625, - ._16xSample10XOffset = 0.2500, - ._16xSample10YOffset = 0.1250, - ._16xSample11XOffset = 0.1250, - ._16xSample11YOffset = 0.7500, - ._16xSample12XOffset = 0.0000, - ._16xSample12YOffset = 0.5000, - ._16xSample13XOffset = 0.9375, - ._16xSample13YOffset = 0.2500, - ._16xSample14XOffset = 0.8750, - ._16xSample14YOffset = 0.9375, - ._16xSample15XOffset = 0.0625, - ._16xSample15YOffset = 0.0000, -#endif - ); - - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - uint32_t border_color_offset = device->border_colors.offset + - pCreateInfo->borderColor * 64; - - struct GENX(SAMPLER_STATE) sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = CLAMP_MODE_OGL, -#if GEN_GEN == 8 - .BaseMipLevel = 0.0, -#endif - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), - .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), - .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .IndirectStatePointer = border_color_offset >> 6, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c new file mode 100644 index 00000000000..866f1bfd337 --- /dev/null +++ b/src/intel/vulkan/genX_state.c @@ -0,0 +1,263 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, + &GENX(MOCS)); + + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), +#if GEN_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS)); + anv_batch_emit(&batch, GENX(3DSTATE_TE)); + anv_batch_emit(&batch, GENX(3DSTATE_DS)); + + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + +#if GEN_GEN >= 8 + anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if GEN_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); +#endif + + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + + struct GENX(SAMPLER_STATE) sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + +#if GEN_GEN >= 8 + .LODPreClampMode = CLAMP_MODE_OGL, +#else + .LODPreClampEnable = CLAMP_ENABLE_OGL, +#endif + +#if GEN_GEN == 8 + .BaseMipLevel = 0.0, +#endif + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + +#if GEN_GEN >= 8 + .IndirectStatePointer = border_color_offset >> 6, +#else + .BorderColorPointer = border_color_offset >> 5, +#endif + +#if GEN_GEN >= 8 + .LODClampMagnificationMode = MIPNONE, +#endif + + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h deleted file mode 100644 index aabcea9c183..00000000000 --- a/src/intel/vulkan/genX_state_util.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static inline uint32_t -vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) -{ - switch (filter) { - default: - assert(!"Invalid filter"); - case VK_FILTER_NEAREST: - return MAPFILTER_NEAREST; - case VK_FILTER_LINEAR: - return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; - } -} - -static inline uint32_t -vk_to_gen_max_anisotropy(float ratio) -{ - return (anv_clamp_f(ratio, 2, 16) - 2) / 2; -} - -static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR -}; - -static const uint32_t vk_to_gen_tex_address[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; -- cgit v1.2.3 From eecd1f80011701de6174f22106014910c9c79484 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 13:51:50 -0800 Subject: gen7/pipeline: Add competent blending This is mostly a copy-and-paste from gen8. Blending still isn't 100% but it fixes about 1100 CTS blend tests on HSW. --- src/intel/vulkan/gen7_pipeline.c | 88 ++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index c356fed7d68..7d283f18f40 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -123,50 +123,52 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, .WriteDisableGreen = true, .WriteDisableBlue = true); } else { - /* FIXME-GEN7: All render targets share blend state settings on gen7, we - * can't implement this. - */ const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(BLEND_STATE), 64, - - .ColorBufferBlendEnable = a->blendEnable, - .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - -# if 0 - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; -# endif - - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - -# if 0 - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -# endif - ); + struct GENX(BLEND_STATE) blend = { + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend.SourceBlendFactor = BLENDFACTOR_ONE; + blend.DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + + pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, + GENX(BLEND_STATE_length) * 4, + 64); + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend); + if (pipeline->device->info.has_llc) + anv_state_clflush(pipeline->blend_state); } anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), -- cgit v1.2.3 From e941fd84707d4ed04a683f8862d184956a60f9ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 14:43:05 -0800 Subject: genxml: Make the border color pointer consistent across gens --- src/intel/genxml/gen8.xml | 2 +- src/intel/genxml/gen9.xml | 2 +- src/intel/vulkan/genX_state.c | 6 +----- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 9a52fbaa330..96eda703453 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -412,7 +412,7 @@ - + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 2b73f5f2a39..79d3006d24b 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -437,7 +437,7 @@ - + diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 866f1bfd337..63ea26937e5 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -231,11 +231,7 @@ VkResult genX(CreateSampler)( .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = OVERRIDE, -#if GEN_GEN >= 8 - .IndirectStatePointer = border_color_offset >> 6, -#else - .BorderColorPointer = border_color_offset >> 5, -#endif + .BorderColorPointer = border_color_offset, #if GEN_GEN >= 8 .LODClampMagnificationMode = MIPNONE, -- cgit v1.2.3 From 5b70aa11ee136baf5aa1b2ba21f10fc42af53c88 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 17:19:43 -0800 Subject: anv/meta_blit: Use unorm formats for 8 and 16-bit RGB and RGBA values While Broadwell is very good about UINT formats, HSW is more restrictive. Neither R8G8B8_UINT nor R16G16B16_UINT really exist on HSW. It should be safe to just use the unorm formats. --- src/intel/vulkan/anv_meta_blit.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 2ec428b5f4a..96a3b7669ac 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -375,17 +375,26 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, static VkFormat vk_format_for_size(int bs) { - /* Note: We intentionally use the 4-channel formats whenever we can. - * This is so that, when we do a RGB <-> RGBX copy, the two formats will - * line up even though one of them is 3/4 the size of the other. + /* The choice of UNORM and UINT formats is very intentional here. Most of + * the time, we want to use a UINT format to avoid any rounding error in + * the blit. For stencil blits, R8_UINT is required by the hardware. + * (It's the only format allowed in conjunction with W-tiling.) Also we + * intentionally use the 4-channel formats whenever we can. This is so + * that, when we do a RGB <-> RGBX copy, the two formats will line up even + * though one of them is 3/4 the size of the other. The choice of UNORM + * vs. UINT is also very intentional because Haswell doesn't handle 8 or + * 16-bit RGB UINT formats at all so we have to use UNORM there. + * Fortunately, the only time we should ever use two different formats in + * the table below is for RGB -> RGBA blits and so we will never have any + * UNORM/UINT mismatch. */ switch (bs) { case 1: return VK_FORMAT_R8_UINT; case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UINT; - case 4: return VK_FORMAT_R8G8B8A8_UINT; - case 6: return VK_FORMAT_R16G16B16_UINT; - case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 3: return VK_FORMAT_R8G8B8_UNORM; + case 4: return VK_FORMAT_R8G8B8A8_UNORM; + case 6: return VK_FORMAT_R16G16B16_UNORM; + case 8: return VK_FORMAT_R16G16B16A16_UNORM; case 12: return VK_FORMAT_R32G32B32_UINT; case 16: return VK_FORMAT_R32G32B32A32_UINT; default: -- cgit v1.2.3 From 8f5a64e44f4daf2c44c35f209b2452b4b6c6e4e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Mar 2016 10:46:13 -0800 Subject: gen8/cmd_buffer: Properly return flushed push constant stages This is required on SKL so that we can properly re-emit binding table pointers commands. --- src/intel/vulkan/gen8_cmd_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 884152da207..9d4926f86cd 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -63,9 +63,11 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), }); } + + flushed |= mesa_to_vk_shader_stage(stage); } - cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; + cmd_buffer->state.push_constants_dirty &= ~flushed; return flushed; } -- cgit v1.2.3 From 2168082a48e1f81c9c781dac9e05c61f052b86a1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Mar 2016 11:31:22 -0800 Subject: isl: Fix make check --- src/intel/isl/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am index ffc2669f6bf..806934eae5b 100644 --- a/src/intel/isl/Makefile.am +++ b/src/intel/isl/Makefile.am @@ -110,6 +110,7 @@ check_PROGRAMS = $(TESTS) # Link tests to lib965_compiler.la for brw_get_device_info(). tests_ldadd = \ + -lm \ libisl.la \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la -- cgit v1.2.3 From b0867ca4b26aa6b3e30af8d6050b94d283636cbc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Mar 2016 11:31:32 -0800 Subject: anv: Fix make check --- src/intel/vulkan/Makefile.am | 10 ++++++---- src/intel/vulkan/tests/Makefile.am | 7 ++++--- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 7d078cff91c..272db40d10b 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -65,8 +65,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ - -I$(top_builddir)/src/intel \ - -I$(top_builddir)/src/vulkan + -I$(top_builddir)/src/intel libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init @@ -198,7 +197,10 @@ libvulkan_test_la_SOURCES = \ $(VULKAN_SOURCES) \ anv_gem_stubs.c -libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) -libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) +libvulkan_test_la_CFLAGS = \ + -I$(top_srcdir)/src/intel/vulkan \ + $(libvulkan_intel_la_CFLAGS) + +libvulkan_test_la_LIBADD = $(libvulkan_intel_la_LIBADD) include $(top_srcdir)/install-lib-links.mk diff --git a/src/intel/vulkan/tests/Makefile.am b/src/intel/vulkan/tests/Makefile.am index 883013d86c6..ddff73c1707 100644 --- a/src/intel/vulkan/tests/Makefile.am +++ b/src/intel/vulkan/tests/Makefile.am @@ -30,11 +30,12 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/isl/ \ - -I$(top_srcdir)/src/vulkan + -I$(top_srcdir)/src/intel \ + -I$(top_srcdir)/src/intel/vulkan \ + -I$(top_builddir)/src/intel/vulkan LDADD = \ - $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(top_builddir)/src/intel/vulkan/libvulkan-test.la \ $(PTHREAD_LIBS) -lm -lstdc++ check_PROGRAMS = \ -- cgit v1.2.3 From da4745104cc02fc0052a2e05e37c69a4dce76eef Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 2 Mar 2016 01:09:16 -0800 Subject: anv: Save batch to local variable for indirect compute Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c00c6d0decc..dbb72b44ee2 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -507,6 +507,7 @@ void genX(CmdDispatchIndirect)( struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; + struct anv_batch *batch = &cmd_buffer->batch; if (prog_data->uses_num_work_groups) { cmd_buffer->state.num_workgroups_offset = bo_offset; @@ -515,11 +516,11 @@ void genX(CmdDispatchIndirect)( genX(cmd_buffer_flush_compute_state)(cmd_buffer); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + emit_lrm(batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + anv_batch_emit(batch, GENX(GPGPU_WALKER), .IndirectParameterEnable = true, .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, @@ -528,7 +529,7 @@ void genX(CmdDispatchIndirect)( .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); + anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH)); } void -- cgit v1.2.3 From 98cdce1ce4737cf09c5d9613a85bb118f0f1757b Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 2 Mar 2016 01:11:29 -0800 Subject: anv/gen7: Use predicated rendering for indirect compute For OpenGL, see commit 9a939ebb47a0d37a6b29e3dbb1b20bdc9538a721. Fixes: * dEQP-VK.compute.indirect_dispatch.upload_buffer.empty_command * dEQP-VK.compute.indirect_dispatch.gen_in_compute.empty_command Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index dbb72b44ee2..a888c360673 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -496,6 +496,9 @@ void genX(CmdDispatch)( #define GPGPU_DISPATCHDIMY 0x2504 #define GPGPU_DISPATCHDIMZ 0x2508 +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 + void genX(CmdDispatchIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, @@ -520,8 +523,50 @@ void genX(CmdDispatchIndirect)( emit_lrm(batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); emit_lrm(batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); +#if GEN_GEN <= 7 + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(batch, MI_PREDICATE_SRC1 + 0, 0); + emit_lri(batch, MI_PREDICATE_SRC1 + 4, 0); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 0); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_SET, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 4); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 8); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* predicate = !predicate; */ +#define COMPARE_FALSE 1 + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOADINV, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_FALSE); +#endif + anv_batch_emit(batch, GENX(GPGPU_WALKER), .IndirectParameterEnable = true, + .PredicateEnable = GEN_GEN <= 7, .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, -- cgit v1.2.3 From 206414f92edb4a2149b504f9c296f687a9572ffe Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Mar 2016 08:17:36 -0800 Subject: anv/util: Fix vector resizing It wasn't properly handling the fact that wrap-around in the source may not translate to wrap-around in the destination. This really needs unit tests. --- src/intel/vulkan/anv_util.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index 22fd01c9495..62f47051ec7 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -144,7 +144,7 @@ anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) void * anv_vector_add(struct anv_vector *vector) { - uint32_t offset, size, split, tail; + uint32_t offset, size, split, src_tail, dst_tail; void *data; if (vector->head - vector->tail == vector->size) { @@ -152,18 +152,25 @@ anv_vector_add(struct anv_vector *vector) data = malloc(size); if (data == NULL) return NULL; - split = align_u32(vector->tail, vector->size); - tail = vector->tail & (vector->size - 1); - if (vector->head - split < vector->size) { - memcpy(data + tail, - vector->data + tail, - split - vector->tail); - memcpy(data + vector->size, - vector->data, vector->head - split); + src_tail = vector->tail & (vector->size - 1); + dst_tail = vector->tail & (size - 1); + if (src_tail == 0) { + /* Since we know that the vector is full, this means that it's + * linear from start to end so we can do one copy. + */ + memcpy(data + dst_tail, vector->data, vector->size); } else { - memcpy(data + tail, - vector->data + tail, - vector->head - vector->tail); + /* In this case, the vector is split into two pieces and we have + * to do two copies. We have to be careful to make sure each + * piece goes to the right locations. Thanks to the change in + * size, it may or may not still wrap around. + */ + split = align_u32(vector->tail, vector->size); + assert(vector->tail <= split && split < vector->head); + memcpy(data + dst_tail, vector->data + src_tail, + split - vector->tail); + memcpy(data + (split & (size - 1)), vector->data, + vector->head - split); } free(vector->data); vector->data = data; -- cgit v1.2.3 From 456f5b0314747c5e47435a6d9d708ef7895c7a49 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 26 Feb 2016 14:49:51 -0800 Subject: isl: Add function to get intratile offsets from x/y offsets Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl.c | 43 +++++++++++++++++++++++++++++++------------ src/intel/isl/isl.h | 12 ++++++++++++ 2 files changed, 43 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index f7b4c701841..7fd9eeac515 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1414,11 +1414,10 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, } void -isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, +isl_surf_get_image_intratile_offset_el_xy(const struct isl_device *dev, const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset, + uint32_t total_x_offset_el, + uint32_t total_y_offset_el, uint32_t *base_address_offset, uint32_t *x_offset_el, uint32_t *y_offset_el) @@ -1428,14 +1427,6 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, struct isl_tile_info tile_info; isl_surf_get_tile_info(dev, surf, &tile_info); - uint32_t total_x_offset_el; - uint32_t total_y_offset_el; - isl_surf_get_image_offset_el(surf, level, - logical_array_layer, - logical_z_offset, - &total_x_offset_el, - &total_y_offset_el); - uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; @@ -1448,6 +1439,34 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, *base_address_offset = big_y_offset_B + big_x_offset_B; *x_offset_el = small_x_offset_el; *y_offset_el = small_y_offset_el; + + +} + +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + uint32_t total_x_offset_el; + uint32_t total_y_offset_el; + isl_surf_get_image_offset_el(surf, level, + logical_array_layer, + logical_z_offset, + &total_x_offset_el, + &total_y_offset_el); + + isl_surf_get_image_intratile_offset_el_xy(dev, surf, + total_x_offset_el, + total_y_offset_el, + base_address_offset, + x_offset_el, + y_offset_el); } uint32_t diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 7456975014b..5a48bce6c64 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1161,6 +1161,18 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_el, uint32_t *y_offset_el); +/** + * See above. + */ +void +isl_surf_get_image_intratile_offset_el_xy(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t total_x_offset_el, + uint32_t total_y_offset_el, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el); + /** * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat * -- cgit v1.2.3 From 091f1da902c71ac8d3d27b325a118e2f683f1ae5 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 1 Mar 2016 17:32:14 -0800 Subject: isl: Don't filter tiling flags if a specific tiling bit is set If a specific bit is set, the intention to create a surface with a specific tiling format should be respected. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 7fd9eeac515..a36638071d5 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -184,11 +184,14 @@ isl_surf_choose_tiling(const struct isl_device *dev, { isl_tiling_flags_t tiling_flags = info->tiling_flags; - if (ISL_DEV_GEN(dev) >= 7) { - gen7_filter_tiling(dev, info, &tiling_flags); - } else { - isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); - gen7_filter_tiling(dev, info, &tiling_flags); + /* Filter if multiple tiling options are given */ + if (!isl_is_pow2(tiling_flags)) { + if (ISL_DEV_GEN(dev) >= 7) { + gen7_filter_tiling(dev, info, &tiling_flags); + } else { + isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); + gen7_filter_tiling(dev, info, &tiling_flags); + } } #define CHOOSE(__tiling) \ -- cgit v1.2.3 From 1d9d90d9a6323c37e80b7870946597b470d8dec0 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 2 Mar 2016 09:44:48 -0800 Subject: anv/image: Create a linear image when requested If a linear image is requested, the only possible result should be a linearly-tiled surface. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 46cf2413468..dc1ea9c80cc 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -120,7 +120,7 @@ make_surface(const struct anv_device *dev, isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) - tiling_flags &= ISL_TILING_LINEAR_BIT; + tiling_flags = ISL_TILING_LINEAR_BIT; struct anv_surface *anv_surf = get_surface(image, aspect); -- cgit v1.2.3 From d50ff250ec25e4903ef9e82c47981aaed962e464 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 2 Mar 2016 14:27:17 -0800 Subject: anv/meta: Add missing command to exit meta in anv_CmdUpdateBuffer() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 96a3b7669ac..216a0bfa39d 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -625,6 +625,8 @@ void anv_CmdUpdateBuffer( dstOffset += copy_size; pData = (void *)pData + copy_size; } + + meta_finish_blit(cmd_buffer, &saved_state); } static VkFormat -- cgit v1.2.3 From cfe70367503ffb49a850a17e03f4c7e4138af4f1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 14:25:00 -0800 Subject: anv/meta: Replace copy_format w/ block size in do_buffer_copy() This is a preparatory commit that will simplify the future usage of this function. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 216a0bfa39d..9d41add6079 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -406,9 +406,10 @@ static void do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *src, uint64_t src_offset, struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat copy_format) + int width, int height, int bs) { VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + VkFormat copy_format = vk_format_for_size(bs); VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -534,8 +535,6 @@ void anv_CmdCopyBuffer( bs = MIN2(bs, 1 << fs); assert(pRegions[r].size % bs == 0); - VkFormat copy_format = vk_format_for_size(bs); - /* This is maximum possible width/height our HW can handle */ uint64_t max_surface_dim = 1 << 14; @@ -544,7 +543,7 @@ void anv_CmdCopyBuffer( while (copy_size >= max_copy_size) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, copy_format); + max_surface_dim, max_surface_dim, bs); copy_size -= max_copy_size; src_offset += max_copy_size; dest_offset += max_copy_size; @@ -556,7 +555,7 @@ void anv_CmdCopyBuffer( uint64_t rect_copy_size = height * max_surface_dim * bs; do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - max_surface_dim, height, copy_format); + max_surface_dim, height, bs); copy_size -= rect_copy_size; src_offset += rect_copy_size; dest_offset += rect_copy_size; @@ -565,7 +564,7 @@ void anv_CmdCopyBuffer( if (copy_size != 0) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - copy_size / bs, 1, copy_format); + copy_size / bs, 1, bs); } } @@ -601,17 +600,13 @@ void anv_CmdUpdateBuffer( memcpy(tmp_data.map, pData, copy_size); - VkFormat format; int bs; if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; bs = 16; } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - format = VK_FORMAT_R32G32_UINT; bs = 8; } else { assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; bs = 4; } @@ -619,7 +614,7 @@ void anv_CmdUpdateBuffer( &cmd_buffer->device->dynamic_state_block_pool.bo, tmp_data.offset, dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, format); + copy_size / bs, 1, bs); dataSize -= copy_size; dstOffset += copy_size; -- cgit v1.2.3 From d1e48b994565c1d6c1cfa546ba7ab09145c12601 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 13:29:04 -0800 Subject: anv/meta: Remove redundancies in do_buffer_copy() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 46 ++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 9d41add6079..478b1997172 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -446,39 +446,29 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(dest_image)->bo = dest; anv_image_from_handle(dest_image)->offset = dest_offset; + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = 0, /* TEMPLATE */ + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }; + struct anv_image_view src_iview; + iview_info.image = src_image; anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); + &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; + iview_info.image = dest_image; anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), -- cgit v1.2.3 From 654f79a04512502df96d9e6ce99ac0f95516d193 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 15:37:54 -0800 Subject: anv/meta: Add the beginnings of a blitter API This API is designed to be an abstraction that sits between the VkCmdCopy commands and the hardware. The idea is that it is simple enough that it *should* be implementable using the blitter but with enough extra data that we can implement it with the 3-D pipeline efficiently. One design objective is to allow the user to supply enough information that we can handle most blit operations with a single draw call even if they require copying multiple rectangles. --- src/intel/vulkan/anv_meta.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index d33e9e6d8ba..f5dac12a04a 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -70,6 +70,54 @@ anv_meta_get_iview_layer(const struct anv_image *dest_image, const VkImageSubresourceLayers *dest_subresource, const VkOffset3D *dest_offset); +struct anv_meta_blit2d_surf { + struct anv_bo *bo; + enum isl_tiling tiling; + + /** Base offset to the start of the image */ + uint64_t base_offset; + + uint32_t offset_x; + uint32_t offset_y; + + /** The size of a unit in bytes. (Usually texel size) */ + uint8_t units; + + /** Stride between rows in bytes. */ + uint32_t stride; + + /** Possible vertical stride in rows. + * + * This is a hint to the blit engine that tells it that it can, if it + * wants, split the surface into v_stride tall chunks. The user makes + * the guarantee that no rectangles it passes in will every cross a + * v_stride boundary. A v_stride value of 0 indicates that the user + * cannot make such a guarantee. + */ + uint32_t v_stride; +}; + +struct anv_meta_blit2d_rect { + uint32_t src_x, src_y; + uint32_t dst_x, dst_y; + uint32_t width, height; +}; + +static void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save); + +static void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects); + +static void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 032bf172b48211af8fc892747dc4600fb6595f99 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 25 Feb 2016 15:21:12 -0800 Subject: anv/meta: Modify blitter API fields Some fields are unnecessary. The variables "pitch" and "bs" are used for consistency with ISL. v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index f5dac12a04a..952176453e3 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -77,24 +77,11 @@ struct anv_meta_blit2d_surf { /** Base offset to the start of the image */ uint64_t base_offset; - uint32_t offset_x; - uint32_t offset_y; - - /** The size of a unit in bytes. (Usually texel size) */ - uint8_t units; - - /** Stride between rows in bytes. */ - uint32_t stride; - - /** Possible vertical stride in rows. - * - * This is a hint to the blit engine that tells it that it can, if it - * wants, split the surface into v_stride tall chunks. The user makes - * the guarantee that no rectangles it passes in will every cross a - * v_stride boundary. A v_stride value of 0 indicates that the user - * cannot make such a guarantee. - */ - uint32_t v_stride; + /** The size of an element in bytes. */ + uint8_t bs; + + /** Pitch between rows in bytes. */ + uint32_t pitch; }; struct anv_meta_blit2d_rect { -- cgit v1.2.3 From 2e9b08b9b89c0cf10cc7ca73fd39380766943283 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 12:26:21 -0800 Subject: anv/meta: Implement the blitter API functions Most of the code in anv_meta_blit2d() is borrowed from do_buffer_copy(). Create an image and image view for each rectangle. Note: For tiled RGB images, ISL will align the image's row_pitch up to the nearest tile width. v2 (Jason): Keep pitch in units of bytes Make src_format and dst_format variables s/dest/dst/ in every usage v3: Fix dst_image width Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 6 +- src/intel/vulkan/anv_meta_blit.c | 142 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 952176453e3..587c044fa5f 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -90,18 +90,18 @@ struct anv_meta_blit2d_rect { uint32_t width, height; }; -static void +void anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); -static void +void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, struct anv_meta_blit2d_surf *dst, unsigned num_rects, struct anv_meta_blit2d_rect *rects); -static void +void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 478b1997172..bef66751a7f 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -119,6 +119,14 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } +void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + meta_prepare_blit(cmd_buffer, save); +} + + /* Returns the user-provided VkBufferImageCopy::imageOffset in units of * elements rather than texels. One element equals one texel or one block * if Image is uncompressed or compressed, respectively. @@ -372,6 +380,13 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, anv_meta_restore(saved_state, cmd_buffer); } +void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + meta_finish_blit(cmd_buffer, save); +} + static VkFormat vk_format_for_size(int bs) { @@ -402,6 +417,133 @@ vk_format_for_size(int bs) } } +void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + VkFormat src_format = vk_format_for_size(src->bs); + VkFormat dst_format = vk_format_for_size(dst->bs); + + for (unsigned r = 0; r < num_rects; ++r) { + + /* Create VkImages */ + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = 0, /* TEMPLATE */ + .extent = { + .width = 0, /* TEMPLATE */ + /* Pad to highest tile height to compensate for a vertical intratile offset */ + .height = MIN(rects[r].height + 64, 1 << 14), + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = 0, /* TEMPLATE */ + .usage = 0, /* TEMPLATE */ + }; + struct anv_image_create_info anv_image_info = { + .vk_info = &image_info, + .isl_tiling_flags = 0, /* TEMPLATE */ + }; + + anv_image_info.isl_tiling_flags = 1 << src->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.format = src_format, + image_info.extent.width = src->pitch / src->bs; + VkImage src_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &src_image); + + anv_image_info.isl_tiling_flags = 1 << dst->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image_info.format = dst_format, + image_info.extent.width = dst->pitch / dst->bs; + VkImage dst_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &dst_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src->bo; + anv_image_from_handle(src_image)->offset = src->base_offset; + anv_image_from_handle(dst_image)->bo = dst->bo; + anv_image_from_handle(dst_image)->offset = dst->base_offset; + + /* Create VkImageViews */ + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = 0, /* TEMPLATE */ + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = 0, /* TEMPLATE */ + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }; + uint32_t img_o = 0; + + iview_info.image = src_image; + iview_info.format = src_format; + VkOffset3D src_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(src_image)-> + color_surface.isl, + rects[r].src_x, + rects[r].src_y, + &img_o, + (uint32_t*)&src_offset_el.x, + (uint32_t*)&src_offset_el.y); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_SAMPLED_BIT); + + iview_info.image = dst_image; + iview_info.format = dst_format; + VkOffset3D dst_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(dst_image)-> + color_surface.isl, + rects[r].dst_x, + rects[r].dst_y, + &img_o, + (uint32_t*)&dst_offset_el.x, + (uint32_t*)&dst_offset_el.y); + struct anv_image_view dst_iview; + anv_image_view_init(&dst_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + + /* Perform blit */ + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + src_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + anv_image_from_handle(dst_image), + &dst_iview, + dst_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); + } +} + static void do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *src, uint64_t src_offset, -- cgit v1.2.3 From 61ad78d0d1ffafc89cdc9da9d5ae710be36e3089 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 1 Mar 2016 23:15:35 -0800 Subject: anv/meta: Add function to create anv_meta_blit2d_surf from anv_image v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index bef66751a7f..044998d0f56 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -417,6 +417,19 @@ vk_format_for_size(int bs) } } +static struct anv_meta_blit2d_surf +blit_surf_for_image(const struct anv_image* image, + const struct isl_surf *img_isl_surf) +{ + return (struct anv_meta_blit2d_surf) { + .bo = image->bo, + .tiling = img_isl_surf->tiling, + .base_offset = image->offset, + .bs = isl_format_get_layout(img_isl_surf->format)->bs, + .pitch = isl_surf_get_row_pitch(img_isl_surf), + }; +} + void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, -- cgit v1.2.3 From 91640c34c6b474903fa5634f86f87c774d16db88 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 14:28:25 -0800 Subject: anv/meta: Add function which copies between Buffers and Images v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 92 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 044998d0f56..8cda3d587fa 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -1040,6 +1040,98 @@ void anv_CmdBlitImage( meta_finish_blit(cmd_buffer, &saved_state); } +static void +meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_buffer* buffer, + struct anv_image* image, + uint32_t regionCount, + const VkBufferImageCopy* pRegions, + bool forward) +{ + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(image->samples == 1); + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + + /* Start creating blit rect */ + const VkOffset3D img_offset_el = meta_region_offset_el(image, &pRegions[r].imageOffset); + const VkExtent3D bufferExtent = { + .width = pRegions[r].bufferRowLength, + .height = pRegions[r].bufferImageHeight, + }; + const VkExtent3D buf_extent_el = meta_region_extent_el(image->vk_format, &bufferExtent); + const VkExtent3D img_extent_el = meta_region_extent_el(image->vk_format, + &pRegions[r].imageExtent); + struct anv_meta_blit2d_rect rect = { + .width = MAX2(buf_extent_el.width, img_extent_el.width), + .height = MAX2(buf_extent_el.height, img_extent_el.height), + }; + + /* Create blit surfaces */ + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + const struct isl_surf *img_isl_surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + struct anv_meta_blit2d_surf img_bsurf = blit_surf_for_image(image, img_isl_surf); + struct anv_meta_blit2d_surf buf_bsurf = { + .bo = buffer->bo, + .tiling = ISL_TILING_LINEAR, + .base_offset = buffer->offset + pRegions[r].bufferOffset, + .bs = forward ? image->format->isl_layout->bs : img_bsurf.bs, + .pitch = rect.width * buf_bsurf.bs, + }; + + /* Set direction-dependent variables */ + struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf; + struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf; + uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x; + uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; + + /* Loop through each 3D or array slice */ + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(img_isl_surf, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + x_offset, + y_offset); + *x_offset += img_offset_el.x; + *y_offset += img_offset_el.y; + + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, + src_bsurf, + dst_bsurf, + 1, + &rect); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs; + + if (image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + } + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + static struct anv_image * make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, VkImageUsageFlags usage, -- cgit v1.2.3 From 9b6c95d46ee19224e8013a0fbc991f8b2135017d Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 17:11:55 -0800 Subject: anv/meta: Use blitter API for copies between Images and Buffers Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 214 ++------------------------------------- 1 file changed, 6 insertions(+), 208 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 8cda3d587fa..8ca1871f9d8 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -1189,121 +1189,10 @@ void anv_CmdCopyBufferToImage( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(dest_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(dest_image, aspect); - - struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, - VK_IMAGE_USAGE_SAMPLED_BIT, - dest_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, - &pRegions[r].imageOffset); - - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = anv_meta_get_view_type(dest_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].imageExtent); - - meta_emit_blit(cmd_buffer, - src_image, - &src_iview, - (VkOffset3D){0, 0, 0}, - img_extent_el, - dest_image, - &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - src_image->offset += src_image->extent.width * - src_image->extent.height * - src_image->format->isl_layout->bs; - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(src_image), - &cmd_buffer->pool->alloc); - } + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - meta_finish_blit(cmd_buffer, &saved_state); + meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, + regionCount, pRegions, true); } void anv_CmdCopyImageToBuffer( @@ -1316,101 +1205,10 @@ void anv_CmdCopyImageToBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - - /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(src_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(src_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, - .layerCount = pRegions[r].imageSubresource.layerCount, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - src_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.layerCount == 1); - num_slices = pRegions[r].imageExtent.depth; - } else { - assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.layerCount; - } - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].imageOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer); - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - src_offset, - pRegions[r].imageExtent, - dest_image, - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - dest_image->offset += dest_image->extent.width * - dest_image->extent.height * - src_image->format->isl_layout->bs; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); + meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, + regionCount, pRegions, false); } void -- cgit v1.2.3 From 96ff4d0679e9243d4df070cc0eaef41e1b9d742b Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 12:19:51 -0800 Subject: anv/meta: Use blitter API in anv_CmdCopyImage() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 113 +++++++++++++++------------------------ 1 file changed, 42 insertions(+), 71 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 8ca1871f9d8..2e529eed0f8 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -836,7 +836,7 @@ void anv_CmdCopyImage( */ assert(src_image->samples == dest_image->samples); - meta_prepare_blit(cmd_buffer, &saved_state); + anv_meta_begin_blit2d(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { assert(pRegions[r].srcSubresource.aspectMask == @@ -844,84 +844,55 @@ void anv_CmdCopyImage( VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - VkFormat src_format = choose_iview_format(src_image, aspect); - VkFormat dst_format = choose_iview_format(dest_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = pRegions[r].dstSubresource.layerCount, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); + /* Create blit surfaces */ + struct isl_surf *src_isl_surf = + &anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl; + struct isl_surf *dst_isl_surf = + &anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl; + struct anv_meta_blit2d_surf b_src = blit_surf_for_image(src_image, src_isl_surf); + struct anv_meta_blit2d_surf b_dst = blit_surf_for_image(dest_image, dst_isl_surf); + /* Start creating blit rect */ + const VkOffset3D dst_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); + const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); + const VkExtent3D img_extent_el = meta_region_extent_el(src_image->vk_format, + &pRegions[r].extent); + struct anv_meta_blit2d_rect rect = { + .width = img_extent_el.width, + .height = img_extent_el.height, + }; + /* Loop through each 3D or array slice */ unsigned num_slices_3d = pRegions[r].extent.depth; unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; unsigned slice_3d = 0; unsigned slice_array = 0; while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice_3d + slice_array; - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].dstSubresource.mipLevel, - pRegions[r].dstSubresource.baseArrayLayer + slice_array, - pRegions[r].dstOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dst_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].extent); + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(dst_isl_surf, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer + slice_array, + pRegions[r].dstOffset.z + slice_3d, + &rect.dst_x, + &rect.dst_y); + isl_surf_get_image_offset_el(src_isl_surf, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer + slice_array, + pRegions[r].srcOffset.z + slice_3d, + &rect.src_x, + &rect.src_y); + rect.dst_x += dst_offset_el.x; + rect.dst_y += dst_offset_el.y; + rect.src_x += src_offset_el.x; + rect.src_y += src_offset_el.y; - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - src_offset, - img_extent_el, - dest_image, &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, + &b_src, + &b_dst, + 1, + &rect); if (dest_image->type == VK_IMAGE_TYPE_3D) slice_3d++; @@ -930,7 +901,7 @@ void anv_CmdCopyImage( } } - meta_finish_blit(cmd_buffer, &saved_state); + anv_meta_end_blit2d(cmd_buffer, &saved_state); } void anv_CmdBlitImage( -- cgit v1.2.3 From 318b67d1578213b802443f06f767a1c9d6d00310 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 17:11:55 -0800 Subject: anv/meta: Use blitter API in do_buffer_copy() v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 94 +++++++++------------------------------- 1 file changed, 21 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 2e529eed0f8..bd1c9798823 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -563,81 +563,29 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *dest, uint64_t dest_offset, int width, int height, int bs) { - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkFormat copy_format = vk_format_for_size(bs); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = 0, - .flags = 0, + struct anv_meta_blit2d_surf b_src = { + .bo = src, + .tiling = ISL_TILING_LINEAR, + .base_offset = src_offset, + .bs = bs, + .pitch = width * bs, }; - - VkImage src_image; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &src_image); - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src; - anv_image_from_handle(src_image)->offset = src_offset; - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = 0, /* TEMPLATE */ - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, + struct anv_meta_blit2d_surf b_dst = { + .bo = dest, + .tiling = ISL_TILING_LINEAR, + .base_offset = dest_offset, + .bs = bs, + .pitch = width * bs, }; - - struct anv_image_view src_iview; - iview_info.image = src_image; - anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - struct anv_image_view dest_iview; - iview_info.image = dest_image; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - anv_image_from_handle(dest_image), - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); + struct anv_meta_blit2d_rect rect = { + .width = width, + .height = height, + }; + anv_meta_blit2d(cmd_buffer, + &b_src, + &b_dst, + 1, + &rect); } void anv_CmdCopyBuffer( -- cgit v1.2.3 From d20f6abc85c1dc4f4a2aadd352b64502930a0541 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 2 Mar 2016 14:33:51 -0800 Subject: anv/meta: Use blitter API for state-handling in Buffer Update/Copy Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index bd1c9798823..3fc46318150 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -601,7 +601,7 @@ void anv_CmdCopyBuffer( struct anv_meta_saved_state saved_state; - meta_prepare_blit(cmd_buffer, &saved_state); + anv_meta_begin_blit2d(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; @@ -661,7 +661,7 @@ void anv_CmdCopyBuffer( } } - meta_finish_blit(cmd_buffer, &saved_state); + anv_meta_end_blit2d(cmd_buffer, &saved_state); } void anv_CmdUpdateBuffer( @@ -675,7 +675,7 @@ void anv_CmdUpdateBuffer( ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); struct anv_meta_saved_state saved_state; - meta_prepare_blit(cmd_buffer, &saved_state); + anv_meta_begin_blit2d(cmd_buffer, &saved_state); /* We can't quite grab a full block because the state stream needs a * little data at the top to build its linked list. @@ -714,7 +714,7 @@ void anv_CmdUpdateBuffer( pData = (void *)pData + copy_size; } - meta_finish_blit(cmd_buffer, &saved_state); + anv_meta_end_blit2d(cmd_buffer, &saved_state); } static VkFormat -- cgit v1.2.3 From 8dddc3fb1e55a7cc82c0afe2c880c1ef485d21c1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 14:37:48 -0800 Subject: anv/meta: Delete unused functions Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 93 ---------------------------------------- 1 file changed, 93 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 3fc46318150..b8a42f99eec 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -717,52 +717,6 @@ void anv_CmdUpdateBuffer( anv_meta_end_blit2d(cmd_buffer, &saved_state); } -static VkFormat -choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - struct isl_surf *surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - - /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT - * formats for the source and destination image views. - * - * From the Vulkan spec (2015-12-30): - * - * vkCmdCopyImage performs image copies in a similar manner to a host - * memcpy. It does not perform general-purpose conversions such as - * scaling, resizing, blending, color-space conversion, or format - * conversions. Rather, it simply copies raw image data. vkCmdCopyImage - * can copy between images with different formats, provided the formats - * are compatible as defined below. - * - * [The spec later defines compatibility as having the same number of - * bytes per block]. - */ - return vk_format_for_size(isl_format_layouts[surf->format].bs); -} - -static VkFormat -choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - /* vkCmdCopy* commands behave like memcpy. Therefore we choose - * compatable UINT formats for the source and destination image views. - * - * For the buffer, we go back to the original image format and get a - * the format as if it were linear. This way, for RGB formats, we get - * an RGB format here even if the tiled image is RGBA. XXX: This doesn't - * work if the buffer is the destination. - */ - enum isl_format linear_format = anv_get_isl_format(format, aspect, - VK_IMAGE_TILING_LINEAR, - NULL); - - return vk_format_for_size(isl_format_layouts[linear_format].bs); -} - void anv_CmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -1051,53 +1005,6 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, anv_meta_end_blit2d(cmd_buffer, &saved_state); } -static struct anv_image * -make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, - VkImageUsageFlags usage, - VkImageType image_type, - const VkAllocationCallbacks *alloc, - const VkBufferImageCopy *copy) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); - - VkExtent3D extent = copy->imageExtent; - if (copy->bufferRowLength) - extent.width = copy->bufferRowLength; - if (copy->bufferImageHeight) - extent.height = copy->bufferImageHeight; - extent.depth = 1; - extent = meta_region_extent_el(format, &extent); - - VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; - VkFormat buffer_format = choose_buffer_format(format, aspect); - - VkImage vk_image; - VkResult result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = buffer_format, - .extent = extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = usage, - .flags = 0, - }, alloc, &vk_image); - assert(result == VK_SUCCESS); - - ANV_FROM_HANDLE(anv_image, image, vk_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - image->bo = buffer->bo; - image->offset = buffer->offset + copy->bufferOffset; - - return image; -} - void anv_CmdCopyBufferToImage( VkCommandBuffer commandBuffer, VkBuffer srcBuffer, -- cgit v1.2.3 From 623ce595a97cc3ec47be042867e24047162cd371 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 24 Feb 2016 15:41:24 -0800 Subject: anv: Compile shader stages in pipeline order. Instead of the arbitrary order modules might be specified in. Acked-by: Jason Ekstrand --- src/intel/vulkan/anv_pipeline.c | 48 ++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 647f2eb96b0..fd6f8c92cfa 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1108,29 +1108,33 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->active_stages = 0; pipeline->total_scratch = 0; + const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, }; + struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, }; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - ANV_FROM_HANDLE(anv_shader_module, module, - pCreateInfo->pStages[i].module); - - switch (pCreateInfo->pStages[i].stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_GEOMETRY_BIT: - anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - default: - anv_finishme("Unsupported shader stage"); - } + gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1; + pStages[stage] = &pCreateInfo->pStages[i]; + modules[stage] = anv_shader_module_from_handle(pStages[stage]->module); + } + + if (modules[MESA_SHADER_VERTEX]) { + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_VERTEX], + pStages[MESA_SHADER_VERTEX]->pName, + pStages[MESA_SHADER_VERTEX]->pSpecializationInfo); + } + + if (modules[MESA_SHADER_GEOMETRY]) { + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_GEOMETRY], + pStages[MESA_SHADER_GEOMETRY]->pName, + pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo); + } + + if (modules[MESA_SHADER_FRAGMENT]) { + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, + modules[MESA_SHADER_FRAGMENT], + pStages[MESA_SHADER_FRAGMENT]->pName, + pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo); } if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { -- cgit v1.2.3 From 56ba13c994fc3b1c4058bb99656dad6fa2aa819d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Mar 2016 16:15:54 -0800 Subject: isl/surface_state: Set L2 bypass disable for certain BC* formats --- src/intel/isl/isl_surface_state.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src') diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 0f45100fba3..1607aa6233d 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -370,6 +370,28 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, s.MIPCountLOD = MAX(info->view->levels, 1) - 1; } +#if GEN_GEN >= 8 + /* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0 + * bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes): + * + * This bit must be set for the following surface types: BC2_UNORM + * BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM + */ + if (GEN_GEN >= 9 || dev->info->is_cherryview) { + switch (info->view->format) { + case ISL_FORMAT_BC2_UNORM: + case ISL_FORMAT_BC3_UNORM: + case ISL_FORMAT_BC5_UNORM: + case ISL_FORMAT_BC5_SNORM: + case ISL_FORMAT_BC7_UNORM: + s.SamplerL2BypassModeDisable = true; + break; + default: + break; + } + } +#endif + #if 0 if (GEN_GEN == 8) { if (isl_format_is_integer(info->view->format)) { -- cgit v1.2.3 From 3ed260f54cc353398965037f12f39aafa086bcc5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 3 Mar 2016 16:21:09 -0800 Subject: hack to make dota 2 menus work --- src/intel/vulkan/genX_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a888c360673..a31ecc398e7 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -721,8 +721,8 @@ void genX(CmdBeginRenderPass)( const VkRect2D *render_area = &pRenderPassBegin->renderArea; anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMin = MAX2(render_area->offset.y, 0), + .ClippedDrawingRectangleXMin = MAX2(render_area->offset.x, 0), .ClippedDrawingRectangleYMax = render_area->offset.y + render_area->extent.height - 1, .ClippedDrawingRectangleXMax = -- cgit v1.2.3 From 9d7faadd8a319d07616d7288451562bf8b9a4be8 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 3 Mar 2016 16:43:49 -0800 Subject: anv: Fix backwards shadow comparisons sample_c is backwards from what GL and Vulkan expect. See intel_state.c in i965. v2: Drop unused vk_to_gen_compare_op. Reviewed-by: Jason Ekstrand --- src/intel/vulkan/genX_state.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 63ea26937e5..900f6dc8eec 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -173,15 +173,26 @@ static const uint32_t vk_to_gen_tex_address[] = { [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, }; -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +/* Vulkan specifies the result of shadow comparisons as: + * 1 if ref texel, + * 0 otherwise. + * + * The hardware does: + * 0 if texel ref, + * 1 otherwise. + * + * So, these look a bit strange because there's both a negation + * and swapping of the arguments involved. + */ +static const uint32_t vk_to_gen_shadow_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPALWAYS, + [VK_COMPARE_OP_LESS] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLESS, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGREATER, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPNEVER, }; VkResult genX(CreateSampler)( @@ -228,7 +239,7 @@ VkResult genX(CreateSampler)( .ChromaKeyEnable = 0, .ChromaKeyIndex = 0, .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .ShadowFunction = vk_to_gen_shadow_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = OVERRIDE, .BorderColorPointer = border_color_offset, -- cgit v1.2.3 From fa8539dd6b1fd9bb1fc7f2f79889aacf14231d13 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 00:01:25 -0800 Subject: anv/pipeline: Respect pRasterizationState->depthBiasEnable --- src/intel/vulkan/gen7_cmd_buffer.c | 6 ------ src/intel/vulkan/gen7_pipeline.c | 3 +++ src/intel/vulkan/gen8_cmd_buffer.c | 6 ------ src/intel/vulkan/gen8_pipeline.c | 3 +++ 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 3ed93137f6a..1713cc17836 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -525,9 +525,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; @@ -543,9 +540,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) GENX(3DSTATE_SF_header), .DepthBufferSurfaceFormat = depth_format, .LineWidth = cmd_buffer->state.dynamic.line_width, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 7d283f18f40..c114bfef3ac 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -67,6 +67,9 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* uint32_t VertexSubPixelPrecisionSelect; */ .UsePointWidthState = !pipeline->writes_point_size, .PointWidth = 1.0, + .GlobalDepthOffsetConstant = info->depthBiasEnable, + .GlobalDepthOffsetScale = info->depthBiasEnable, + .GlobalDepthOffsetClamp = info->depthBiasEnable, }; GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 9d4926f86cd..d506cf48b0d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -331,15 +331,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; struct GENX(3DSTATE_RASTER) raster = { GENX(3DSTATE_RASTER_header), - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 494a64949b6..a464006566b 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -90,6 +90,9 @@ emit_rs_state(struct anv_pipeline *pipeline, .ViewportZFarClipTestEnable = true, .ViewportZNearClipTestEnable = true, #endif + .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, + .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, + .GlobalDepthOffsetEnablePoint = info->depthBiasEnable, }; GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); -- cgit v1.2.3 From a8afd296537a0e61fd12c817c3003309346a5e75 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 00:02:18 -0800 Subject: anv/pipeline: Use the right provoking vertex for triangle fans --- src/intel/vulkan/gen7_pipeline.c | 4 ++-- src/intel/vulkan/gen8_pipeline.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index c114bfef3ac..22a892bba3a 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -61,7 +61,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, /* uint32_t AALineDistanceMode; */ /* uint32_t VertexSubPixelPrecisionSelect; */ @@ -230,7 +230,7 @@ genX(graphics_pipeline_create)( .ClipMode = CLIPMODE_NORMAL, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, .MinimumPointWidth = 0.125, .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index a464006566b..52629a73342 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -59,7 +59,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .ViewportTransformEnable = !(extra && extra->disable_viewport), .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, .PointWidthSource = pipeline->writes_point_size ? Vertex : State, .PointWidth = 1.0, }; -- cgit v1.2.3 From fcd8e571851c18a259fdc4ccb34f6ba23f3d29ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 13:39:04 -0800 Subject: anv/pipeline: More competent gen8 clipping --- src/intel/vulkan/gen8_pipeline.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 52629a73342..ecb8f6d7b09 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -326,7 +326,21 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .ClipEnable = true, + .EarlyCullEnable = true, + .APIMode = 1, /* D3D */ .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + + .ClipMode = + pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? + REJECT_ALL : NORMAL, + + .NonPerspectiveBarycentricEnable = + (pipeline->wm_prog_data.barycentric_interp_modes & 0x38) != 0, + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, + .MinimumPointWidth = 0.125, .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); -- cgit v1.2.3 From ec18fef88d8a7a1a3541b0d40708a6637412f50e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 10:45:24 -0800 Subject: anv/pipeline: Set StencilBufferWriteEnable from the pipeline The hardware docs say that StencilBufferWriteEnable should only be set if StencilTestEnable is set. It seems reasonable to set them together. --- src/intel/vulkan/gen7_cmd_buffer.c | 3 --- src/intel/vulkan/gen7_pipeline.c | 1 + src/intel/vulkan/gen8_cmd_buffer.c | 3 --- src/intel/vulkan/gen8_pipeline.c | 1 + 4 files changed, 2 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 1713cc17836..71010583129 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -581,9 +581,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 22a892bba3a..d563a8c26cd 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -95,6 +95,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, .DoubleSidedStencilEnable = true, .StencilTestEnable = info->stencilTestEnable, + .StencilBufferWriteEnable = info->stencilTestEnable, .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index d506cf48b0d..8e7a078d84b 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -384,9 +384,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { GENX(3DSTATE_WM_DEPTH_STENCIL_header), - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index ecb8f6d7b09..e8a067851cc 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -227,6 +227,7 @@ emit_ds_state(struct anv_pipeline *pipeline, .DoubleSidedStencilEnable = true, .StencilTestEnable = info->stencilTestEnable, + .StencilBufferWriteEnable = info->stencilTestEnable, .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], -- cgit v1.2.3 From d61dcec64dc66fea7f15f296212c68f18fe5aaa0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 11:07:27 -0800 Subject: anv/clear: Pull the stencil write mask from the pipeline The stencil write mask wasn't getting set at all so we were using whatever write mask happend to be left over by the application. --- src/intel/vulkan/anv_meta_clear.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 227f8f35115..c052b8b11f6 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -45,7 +45,8 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, anv_meta_save(saved_state, cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT) | (1 << VK_DYNAMIC_STATE_SCISSOR) | - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) | + (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)); cmd_buffer->state.dynamic.viewport.count = 0; cmd_buffer->state.dynamic.scissor.count = 0; @@ -193,6 +194,7 @@ create_pipeline(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .dynamicStateCount = 9, .pDynamicStates = (VkDynamicState[]) { + /* Everything except stencil write mask */ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, @@ -200,7 +202,6 @@ create_pipeline(struct anv_device *device, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }, }, -- cgit v1.2.3 From f374765ce685d30b4eabe8085dc3daa95a75f8f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 12:22:32 -0800 Subject: anv/cmd_buffer: Mask stencil reference values --- src/intel/vulkan/gen7_cmd_buffer.c | 4 ++-- src/intel/vulkan/gen8_cmd_buffer.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 71010583129..985907872fa 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -561,8 +561,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = d->stencil_reference.front, - .BackFaceStencilReferenceValue = d->stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8e7a078d84b..dc0d5570904 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -361,8 +361,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = d->stencil_reference.front, - .BackFaceStencilReferenceValue = d->stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); @@ -437,8 +437,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, - .StencilReferenceValue = d->stencil_reference.front, - .BackfaceStencilReferenceValue = d->stencil_reference.back + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); -- cgit v1.2.3 From d154a5ebd68b3d1f465d6bb77e34b3ff04bc8a9f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 12:23:01 -0800 Subject: anv/cmd_buffer: Let the pipeline set StencilBufferWriteEnable on gen9 --- src/intel/vulkan/gen8_cmd_buffer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index dc0d5570904..8972a8db6fc 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -428,9 +428,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { GEN9_3DSTATE_WM_DEPTH_STENCIL_header, - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, -- cgit v1.2.3 From b80c8ebc4587a15c823b5223419eadb0ca51cdd1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 3 Mar 2016 15:40:13 -0800 Subject: isl: Get rid of isl_surf_fill_state_info::level0_extent_px This field is no longer needed. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl.h | 9 --------- src/intel/isl/isl_surface_state.c | 8 ++++---- src/intel/vulkan/anv_image.c | 33 +++------------------------------ 3 files changed, 7 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 5a48bce6c64..248a94d64eb 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -772,15 +772,6 @@ struct isl_surf_fill_state_info { */ uint32_t mocs; - /** - * This allows the caller to over-ride the dimensions of the surface. - * This is used at the moment for compressed surfaces to let us hack - * around the fact that we can't actually render to them. - * - * FIXME: We really need to get rid of this. It's a lie. - */ - struct isl_extent4d level0_extent_px; - /** * The clear color for this surface * diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 1607aa6233d..fe8f07cfe30 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -257,8 +257,8 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, .SurfaceQPitch = get_qpitch(info->surf) >> 2, #endif - .Width = info->level0_extent_px.width - 1, - .Height = info->level0_extent_px.height - 1, + .Width = info->surf->logical_level0_px.width - 1, + .Height = info->surf->logical_level0_px.height - 1, .Depth = 0, /* TEMPLATE */ .SurfacePitch = info->surf->row_pitch - 1, @@ -338,7 +338,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, * If the volume texture is MIP-mapped, this field specifies the * depth of the base MIP level. */ - s.Depth = info->level0_extent_px.depth - 1; + s.Depth = info->surf->logical_level0_px.depth - 1; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: * @@ -346,7 +346,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, * indicates the extent of the accessible 'R' coordinates minus 1 on * the LOD currently being rendered to. */ - s.RenderTargetViewExtent = info->level0_extent_px.depth - 1; + s.RenderTargetViewExtent = info->surf->logical_level0_px.depth - 1; break; default: unreachable(!"bad SurfaceType"); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index dc1ea9c80cc..c76a5f6dba9 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -540,30 +540,6 @@ anv_image_view_init(struct anv_image_view *iview, }, }; - struct isl_extent4d level0_extent_px; - - if (!isl_format_is_compressed(format) && - isl_format_is_compressed(image->format->isl_format)) { - /* Scale the ImageView extent by the backing Image. This is used - * internally when an uncompressed ImageView is created on a - * compressed Image. The ImageView can therefore be used for copying - * data from a source Image to a destination Image. - */ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - - level0_extent_px.depth = anv_minify(image->extent.depth, range->baseMipLevel); - level0_extent_px.depth = DIV_ROUND_UP(level0_extent_px.depth, isl_layout->bd); - - level0_extent_px.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; - level0_extent_px.width = isl_surf_get_row_pitch_el(&surface->isl); - isl_view.base_level = 0; - isl_view.base_array_layer = 0; - } else { - level0_extent_px.width = image->extent.width; - level0_extent_px.height = image->extent.height; - level0_extent_px.depth = image->extent.depth; - } - iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width , range->baseMipLevel), .height = anv_minify(image->extent.height, range->baseMipLevel), @@ -586,8 +562,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->sampler_surface_state.map, .surf = &surface->isl, .view = &isl_view, - .mocs = device->default_mocs, - .level0_extent_px = level0_extent_px); + .mocs = device->default_mocs); if (!device->info.has_llc) anv_state_clflush(iview->sampler_surface_state); @@ -603,8 +578,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state.map, .surf = &surface->isl, .view = &isl_view, - .mocs = device->default_mocs, - .level0_extent_px = level0_extent_px); + .mocs = device->default_mocs); if (!device->info.has_llc) anv_state_clflush(iview->color_rt_surface_state); @@ -621,8 +595,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->storage_surface_state.map, .surf = &surface->isl, .view = &isl_view, - .mocs = device->default_mocs, - .level0_extent_px = level0_extent_px); + .mocs = device->default_mocs); } else { anv_fill_buffer_surface_state(device, iview->storage_surface_state, ISL_FORMAT_RAW, -- cgit v1.2.3 From a6fb62a86441ce7296f9d06cf7a71d2f51f3a3f5 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 3 Mar 2016 15:49:13 -0800 Subject: isl: Fix RenderTargetViewExtent for mipmapped 3D surfaces Match the comment stated above the assignment. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl_surface_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index fe8f07cfe30..f3390a6c553 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -346,7 +346,8 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, * indicates the extent of the accessible 'R' coordinates minus 1 on * the LOD currently being rendered to. */ - s.RenderTargetViewExtent = info->surf->logical_level0_px.depth - 1; + s.RenderTargetViewExtent = isl_minify(info->surf->logical_level0_px.depth, + info->view->base_level) - 1; break; default: unreachable(!"bad SurfaceType"); -- cgit v1.2.3 From f700d16a892cbedc58a8c7850f1e75d1c127ce26 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 15:38:11 -0800 Subject: anv/cmd_buffer: Include Haswell in set_subpass --- src/intel/vulkan/anv_cmd_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 418a143b7bc..aa54bdecb0a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -965,7 +965,11 @@ anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, { switch (cmd_buffer->device->info.gen) { case 7: - gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + if (cmd_buffer->device->info.is_haswell) { + gen75_cmd_buffer_set_subpass(cmd_buffer, subpass); + } else { + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + } break; case 8: gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); -- cgit v1.2.3 From 653261285e1758f6fde0fb49b3fe30d6d2631077 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 12:42:03 -0800 Subject: anv/cmd_buffer: Reset the state streams when resetting the command buffer --- src/intel/vulkan/anv_cmd_buffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index aa54bdecb0a..6ff5f35bc6a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -333,6 +333,14 @@ VkResult anv_ResetCommandBuffer( anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); anv_cmd_state_reset(cmd_buffer); + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &cmd_buffer->device->surface_state_block_pool); + + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &cmd_buffer->device->dynamic_state_block_pool); + return VK_SUCCESS; } -- cgit v1.2.3 From cc57efc67abb5b81ebc2648775d8829ab27b7df8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 17:56:12 -0800 Subject: anv/pipeline: Fix depthBiasEnable on gen7 The first time I tried to fix this, I set the wrong fields. --- src/intel/vulkan/gen7_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index d563a8c26cd..5235d399ce5 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -67,9 +67,9 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* uint32_t VertexSubPixelPrecisionSelect; */ .UsePointWidthState = !pipeline->writes_point_size, .PointWidth = 1.0, - .GlobalDepthOffsetConstant = info->depthBiasEnable, - .GlobalDepthOffsetScale = info->depthBiasEnable, - .GlobalDepthOffsetClamp = info->depthBiasEnable, + .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, + .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, + .GlobalDepthOffsetEnablePoint = info->depthBiasEnable, }; GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); -- cgit v1.2.3 From c1436e80efee072f1fc3e3b4af0d5e7ad9dd3fb7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 19:14:48 -0800 Subject: anv/meta_clear: Set the right number of dynamic states --- src/intel/vulkan/anv_meta_clear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index c052b8b11f6..8f92a9d755c 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -192,7 +192,7 @@ create_pipeline(struct anv_device *device, * we need only restore dynamic state was vkCmdSet. */ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, + .dynamicStateCount = 8, .pDynamicStates = (VkDynamicState[]) { /* Everything except stencil write mask */ VK_DYNAMIC_STATE_VIEWPORT, -- cgit v1.2.3 From 4e75f9b219f674ea79e6d521dd8a6b1ccd8b3c10 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 4 Mar 2016 20:41:05 -0800 Subject: anv: Implement VK_REMAINING_{MIP_LEVELS,ARRAY_LAYERS} v2: Subtract the baseMipLevel and baseArrayLayer (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_image.c | 12 ++++++------ src/intel/vulkan/anv_meta_clear.c | 4 ++-- src/intel/vulkan/anv_private.h | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index c76a5f6dba9..143a08413f7 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -388,9 +388,9 @@ anv_validate_CreateImageView(VkDevice _device, assert(subresource->levelCount > 0); assert(subresource->layerCount > 0); assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); + assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels); assert(subresource->baseArrayLayer < image->array_size); - assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); + assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size); assert(pView); const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT @@ -496,10 +496,10 @@ anv_image_view_init(struct anv_image_view *iview, unreachable("bad VkImageType"); case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); + assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1 <= image->array_size); break; case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + range->layerCount - 1 + assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1 <= anv_minify(image->extent.depth, range->baseMipLevel)); break; } @@ -525,9 +525,9 @@ anv_image_view_init(struct anv_image_view *iview, struct isl_view isl_view = { .format = format, .base_level = range->baseMipLevel, - .levels = range->levelCount, + .levels = anv_get_levelCount(image, range), .base_array_layer = range->baseArrayLayer, - .array_len = range->layerCount, + .array_len = anv_get_layerCount(image, range), .channel_select = { remap_swizzle(pCreateInfo->components.r, VK_COMPONENT_SWIZZLE_R, swizzle), diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 8f92a9d755c..bce94460844 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -795,8 +795,8 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; - for (uint32_t l = 0; l < range->levelCount; ++l) { - for (uint32_t s = 0; s < range->layerCount; ++s) { + for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) { + for (uint32_t s = 0; s < anv_get_layerCount(image, range); ++s) { struct anv_image_view iview; anv_image_view_init(&iview, cmd_buffer->device, &(VkImageViewCreateInfo) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index cb4f9736fdf..f87270466ae 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1641,6 +1641,23 @@ struct anv_image { }; }; +static inline uint32_t +anv_get_layerCount(const struct anv_image *image, + const VkImageSubresourceRange *range) +{ + return range->layerCount == VK_REMAINING_ARRAY_LAYERS ? + image->array_size - range->baseArrayLayer : range->layerCount; +} + +static inline uint32_t +anv_get_levelCount(const struct anv_image *image, + const VkImageSubresourceRange *range) +{ + return range->levelCount == VK_REMAINING_MIP_LEVELS ? + image->levels - range->baseMipLevel : range->levelCount; +} + + struct anv_image_view { const struct anv_image *image; /**< VkImageViewCreateInfo::image */ struct anv_bo *bo; -- cgit v1.2.3 From 81f30e2f509b4fcd79376ff02363aba831918ac6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 5 Mar 2016 00:54:54 -0800 Subject: anv/hsw: Move query code to genX file for Haswell This fixes many CTS cases, but will require an update to the kernel command parser register whitelist. (The CS GPRs and TIMESTAMP registers need to be whitelisted.) Signed-off-by: Jordan Justen --- src/intel/vulkan/gen8_cmd_buffer.c | 240 ------------------------------------ src/intel/vulkan/genX_cmd_buffer.c | 244 +++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+), 240 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8972a8db6fc..8d8775fb01d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -601,246 +601,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -static void -emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .DepthStallEnable = true, - .Address = { bo, offset }); -} - -static void -emit_query_availability(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { bo, offset }, - .ImmediateData = 1); -} - -void genX(CmdBeginQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - /* Workaround: When meta uses the pipeline with the VS disabled, it seems - * that the pipelining of the depth write breaks. What we see is that - * samples from the render pass clear leaks into the first query - * immediately after the clear. Doing a pipecontrol with a post-sync - * operation and DepthStallEnable seems to work around the issue. - */ - if (cmd_buffer->state.need_query_wa) { - cmd_buffer->state.need_query_wa = false; - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthCacheFlushEnable = true, - .DepthStallEnable = true); - } - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void genX(CmdEndQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 8); - - emit_query_availability(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 16); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -#define TIMESTAMP 0x2358 - -void genX(CmdWriteTimestamp)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - uint32_t offset = query * sizeof(struct anv_query_pool_slot); - - assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); - - switch (pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { &pool->bo, offset }); - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { &pool->bo, offset + 4 }); - break; - - default: - /* Everything else is bottom-of-pipe */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = { &pool->bo, offset }); - break; - } - - emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); -} - -#define alu_opcode(v) __gen_uint((v), 20, 31) -#define alu_operand1(v) __gen_uint((v), 10, 19) -#define alu_operand2(v) __gen_uint((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -static void -store_query_result(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) -{ - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void genX(CmdCopyQueryPoolResults)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - break; - - case VK_QUERY_TYPE_TIMESTAMP: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(2), &pool->bo, slot_offset); - break; - - default: - unreachable("unhandled query type"); - } - - store_query_result(&cmd_buffer->batch, - CS_GPR(2), buffer->bo, dst_offset, flags); - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), - &pool->bo, slot_offset + 16); - if (flags & VK_QUERY_RESULT_64_BIT) - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 8, flags); - else - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 4, flags); - } - - dst_offset += destStride; - } -} - void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent _event, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a31ecc398e7..82959f3abf6 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -754,3 +754,247 @@ void genX(CmdEndRenderPass)( anv_cmd_buffer_resolve_subpass(cmd_buffer); } + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, + .Address = { bo, offset }); +} + +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, offset }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, offset + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = { &pool->bo, offset }); + break; + } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); +} + +#if GEN_GEN > 7 || GEN_IS_HASWELL + +#define alu_opcode(v) __gen_uint((v), 20, 31) +#define alu_operand1(v) __gen_uint((v), 10, 19) +#define alu_operand2(v) __gen_uint((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } + + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } + + dst_offset += destStride; + } +} + +#endif -- cgit v1.2.3 From 9a90176d488c00700cbd832a6a2d53a78114a21e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 08:45:01 -0800 Subject: anv/pipeline: Calculate the correct max_source_attr for 3DSTATE_SBE --- src/intel/vulkan/genX_pipeline_util.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 66250e5d4d6..28b2a032c46 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -288,13 +288,12 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) if (input_index < 0) continue; - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); + const int slot = fs_input_map->varying_to_slot[attr]; if (input_index >= 16) continue; - if (source_attr == -1) { + if (slot == -1) { /* This attribute does not exist in the VUE--that means that the * vertex shader did not write to it. It could be that it's a * regular varying read by the fragment shader but not written by @@ -308,10 +307,13 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) swiz.Attribute[input_index].ComponentOverrideZ = true; swiz.Attribute[input_index].ComponentOverrideW = true; } else { + assert(slot >= 2); + const int source_attr = slot - 2; + max_source_attr = MAX2(max_source_attr, source_attr); /* We have to subtract two slots to accout for the URB entry output * read offset in the VS and GS stages. */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + swiz.Attribute[input_index].SourceAttribute = source_attr; } } -- cgit v1.2.3 From 7c1660aa14094e40fba9f39ce194cb6238311b65 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 09:13:44 -0800 Subject: anv: Don't allow D16_UNORM to be combined with stencil Among other things, this can cause the depth or stencil test to spurriously fail when the fragment shader uses discard. --- src/intel/vulkan/anv_formats.c | 2 +- src/intel/vulkan/genX_cmd_buffer.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index b4b52aa6053..4d279a8fb72 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -161,7 +161,7 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 82959f3abf6..88cc13b580a 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -646,11 +646,12 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * * The PRM is wrong, though. The width and height must be programmed to * actual framebuffer's width and height, even when neither depth buffer - * nor stencil buffer is present. + * nor stencil buffer is present. Also, D16_UNORM is not allowed to + * be combined with a stencil buffer so we use D32_FLOAT instead. */ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, - .SurfaceFormat = D16_UNORM, + .SurfaceFormat = D32_FLOAT, .Width = fb->width - 1, .Height = fb->height - 1, .StencilWriteEnable = has_stencil); -- cgit v1.2.3 From 1afdfc3e6e022a4e5e9701b365c4f92554be999a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 09:19:01 -0800 Subject: anv/pipeline: Implement the depth compare EQUAL workaround on gen8+ --- src/intel/vulkan/gen8_pipeline.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index e8a067851cc..4a5e8674189 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -238,6 +238,14 @@ emit_ds_state(struct anv_pipeline *pipeline, .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], }; + /* From the Broadwell PRM: + * + * "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the + * Depth_Write_Enable must be set to 0." + */ + if (info->depthTestEnable && info->depthCompareOp == VK_COMPARE_OP_EQUAL) + wm_depth_stencil.DepthBufferWriteEnable = false; + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); } -- cgit v1.2.3 From ab36eae5e78a2edf4f699fc43fc9c89e90aabd07 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:10:29 -0800 Subject: anv: Remove left-over bits of sparse-descriptor code --- src/intel/vulkan/anv_pipeline.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index fd6f8c92cfa..c93b1a07246 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -366,10 +366,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Set up dynamic offsets */ anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); - char surface_usage_mask[256], sampler_usage_mask[256]; - zero(surface_usage_mask); - zero(sampler_usage_mask); - /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); -- cgit v1.2.3 From 3baf8af947ab0d4e016f79ed76d0bab78c0294ef Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:18:30 -0800 Subject: anv: Remove excess whitespace --- src/intel/vulkan/anv_pipeline_cache.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index c89bb2a2ee1..c8ff7e52996 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -165,7 +165,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) table = malloc(byte_size); if (table == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; - + cache->table = table; cache->table_size = table_size; cache->kernel_count = 0; @@ -176,7 +176,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) const uint32_t offset = old_table[i]; if (offset == ~0) continue; - + struct cache_entry *entry = cache->program_stream.block_pool->map + offset; anv_pipeline_cache_add_entry(cache, entry, offset); @@ -228,7 +228,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, } pthread_mutex_unlock(&cache->mutex); - + memcpy(state.map + preamble_size, kernel, kernel_size); if (!cache->device->info.has_llc) @@ -240,14 +240,14 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, static void anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *data, size_t size) -{ +{ struct anv_device *device = cache->device; uint8_t uuid[VK_UUID_SIZE]; struct { uint32_t device_id; uint8_t uuid[VK_UUID_SIZE]; } header; - + if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); @@ -259,7 +259,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *end = data + size; const void *p = data + sizeof(header); - + while (p < end) { /* The kernels aren't 64 byte aligned in the serialized format so * they're always right after the prog_data. @@ -327,7 +327,7 @@ VkResult anv_GetPipelineCacheData( ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); const size_t size = 4 + VK_UUID_SIZE + cache->total_size; - + if (pData == NULL) { *pDataSize = size; return VK_SUCCESS; @@ -341,10 +341,10 @@ VkResult anv_GetPipelineCacheData( void *p = pData; memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); p += sizeof(device->chipset_id); - + anv_device_get_cache_uuid(p); p += VK_UUID_SIZE; - + struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { if (cache->table[i] == ~0) @@ -357,7 +357,7 @@ VkResult anv_GetPipelineCacheData( void *kernel = (void *) entry + align_u32(sizeof(*entry) + entry->prog_data_size, 64); - + memcpy(p, kernel, entry->kernel_size); p += entry->kernel_size; } @@ -375,7 +375,7 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct cache_entry *entry = src->program_stream.block_pool->map + src->table[i]; - + if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) continue; @@ -400,6 +400,6 @@ VkResult anv_MergePipelineCaches( anv_pipeline_cache_merge(dst, src); } - + return VK_SUCCESS; } -- cgit v1.2.3 From 26ed943eb961e3c9cb939097dbbdb5bd547e4302 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:21:17 -0800 Subject: anv: Fix shader key hashing This was copied from inline code to a helper and wasn't updated to hash a pointer instead. --- src/intel/vulkan/anv_pipeline_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index c8ff7e52996..7e20ff74db0 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -86,7 +86,7 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct mesa_sha1 *ctx; ctx = _mesa_sha1_init(); - _mesa_sha1_update(ctx, &key, sizeof(key)); + _mesa_sha1_update(ctx, key, key_size); _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); /* hash in shader stage, pipeline layout? */ -- cgit v1.2.3 From cd812f086e4eda30ae4859bdfef21f06700918a9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:39:59 -0800 Subject: anv: Use 1.0 pipeline cache header The final version of the pipeline cache header adds a few more fields. --- src/intel/vulkan/anv_pipeline_cache.c | 36 ++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 7e20ff74db0..fa41637d2c0 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -237,20 +237,31 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, return state.offset + preamble_size; } +struct cache_header { + uint32_t header_size; + uint32_t header_version; + uint32_t vendor_id; + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; +}; + static void anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *data, size_t size) { struct anv_device *device = cache->device; + struct cache_header header; uint8_t uuid[VK_UUID_SIZE]; - struct { - uint32_t device_id; - uint8_t uuid[VK_UUID_SIZE]; - } header; if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); + if (header.header_size < sizeof(header)) + return; + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + return; + if (header.vendor_id != 0x8086) + return; if (header.device_id != device->chipset_id) return; anv_device_get_cache_uuid(uuid); @@ -258,7 +269,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, return; const void *end = data + size; - const void *p = data + sizeof(header); + const void *p = data + header.header_size; while (p < end) { /* The kernels aren't 64 byte aligned in the serialized format so @@ -325,8 +336,9 @@ VkResult anv_GetPipelineCacheData( { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + struct cache_header *header; - const size_t size = 4 + VK_UUID_SIZE + cache->total_size; + const size_t size = sizeof(*header) + cache->total_size; if (pData == NULL) { *pDataSize = size; @@ -339,11 +351,13 @@ VkResult anv_GetPipelineCacheData( } void *p = pData; - memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); - p += sizeof(device->chipset_id); - - anv_device_get_cache_uuid(p); - p += VK_UUID_SIZE; + header = p; + header->header_size = sizeof(*header); + header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + header->vendor_id = 0x8086; + header->device_id = device->chipset_id; + anv_device_get_cache_uuid(header->uuid); + p += header->header_size; struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { -- cgit v1.2.3 From c028ffea7085297ea21d565dbc3913162ab70635 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:48:31 -0800 Subject: anv: Serialize as much pipeline cache as we can We can serialize as much as the application asks for and just stop once we run out of memory. This lets applications use a fixed amount of space for caching and still get some benefit. --- src/intel/vulkan/anv_pipeline_cache.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index fa41637d2c0..932baddb83a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -77,6 +77,16 @@ struct cache_entry { /* kernel follows prog_data at next 64 byte aligned address */ }; +static uint32_t +entry_size(struct cache_entry *entry) +{ + /* This returns the number of bytes needed to serialize an entry, which + * doesn't include the alignment padding bytes. + */ + + return sizeof(*entry) + entry->prog_data_size + entry->kernel_size; +} + void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct anv_shader_module *module, @@ -146,10 +156,7 @@ anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, } } - /* We don't include the alignment padding bytes when we serialize, so - * don't include taht in the the total size. */ - cache->total_size += - sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + cache->total_size += entry_size(entry); cache->kernel_count++; } @@ -345,12 +352,12 @@ VkResult anv_GetPipelineCacheData( return VK_SUCCESS; } - if (*pDataSize < size) { + if (*pDataSize < sizeof(*header)) { *pDataSize = 0; return VK_INCOMPLETE; } - void *p = pData; + void *p = pData, *end = pData + *pDataSize; header = p; header->header_size = sizeof(*header); header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; @@ -365,6 +372,8 @@ VkResult anv_GetPipelineCacheData( continue; entry = cache->program_stream.block_pool->map + cache->table[i]; + if (end < p + entry_size(entry)) + break; memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); p += sizeof(*entry) + entry->prog_data_size; @@ -376,6 +385,8 @@ VkResult anv_GetPipelineCacheData( p += entry->kernel_size; } + *pDataSize = p - pData; + return VK_SUCCESS; } -- cgit v1.2.3 From 37c5e7025333fed2943630fa94e59ef2d413030b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:52:20 -0800 Subject: anv: Rename 'table' to 'hash_table' in anv_pipeline_cache A little less ambiguous. --- src/intel/vulkan/anv_pipeline_cache.c | 32 ++++++++++++++++---------------- src/intel/vulkan/anv_private.h | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 932baddb83a..48f36706fef 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -49,15 +49,15 @@ anv_pipeline_cache_init(struct anv_pipeline_cache *cache, cache->kernel_count = 0; cache->total_size = 0; cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->table[0]); - cache->table = malloc(byte_size); + const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); + cache->hash_table = malloc(byte_size); /* We don't consider allocation failure fatal, we just start with a 0-sized * cache. */ - if (cache->table == NULL) + if (cache->hash_table == NULL) cache->table_size = 0; else - memset(cache->table, 0xff, byte_size); + memset(cache->hash_table, 0xff, byte_size); } void @@ -65,7 +65,7 @@ anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { anv_state_stream_finish(&cache->program_stream); pthread_mutex_destroy(&cache->mutex); - free(cache->table); + free(cache->hash_table); } struct cache_entry { @@ -117,7 +117,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->table[index]; + const uint32_t offset = cache->hash_table[index]; if (offset == ~0) return NO_KERNEL; @@ -150,8 +150,8 @@ anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; - if (cache->table[index] == ~0) { - cache->table[index] = entry_offset; + if (cache->hash_table[index] == ~0) { + cache->hash_table[index] = entry_offset; break; } } @@ -165,20 +165,20 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) { const uint32_t table_size = cache->table_size * 2; const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->table[0]); + const size_t byte_size = table_size * sizeof(cache->hash_table[0]); uint32_t *table; - uint32_t *old_table = cache->table; + uint32_t *old_table = cache->hash_table; table = malloc(byte_size); if (table == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; - cache->table = table; + cache->hash_table = table; cache->table_size = table_size; cache->kernel_count = 0; cache->total_size = 0; - memset(cache->table, 0xff, byte_size); + memset(cache->hash_table, 0xff, byte_size); for (uint32_t i = 0; i < old_table_size; i++) { const uint32_t offset = old_table[i]; if (offset == ~0) @@ -368,10 +368,10 @@ VkResult anv_GetPipelineCacheData( struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->table[i] == ~0) + if (cache->hash_table[i] == ~0) continue; - entry = cache->program_stream.block_pool->map + cache->table[i]; + entry = cache->program_stream.block_pool->map + cache->hash_table[i]; if (end < p + entry_size(entry)) break; @@ -395,11 +395,11 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct anv_pipeline_cache *src) { for (uint32_t i = 0; i < src->table_size; i++) { - if (src->table[i] == ~0) + if (src->hash_table[i] == ~0) continue; struct cache_entry *entry = - src->program_stream.block_pool->map + src->table[i]; + src->program_stream.block_pool->map + src->hash_table[i]; if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) continue; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f87270466ae..32c8b13c952 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -633,7 +633,7 @@ struct anv_pipeline_cache { uint32_t total_size; uint32_t table_size; uint32_t kernel_count; - uint32_t *table; + uint32_t * hash_table; }; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, -- cgit v1.2.3 From 2b29342fae14d8626ca58f8a7ec358b70886ced3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 08:15:16 -0800 Subject: anv: Store prog data in pipeline cache stream We have to keep it there for the cache to work, so let's not have an extra copy in struct anv_pipeline too. --- src/intel/vulkan/anv_cmd_buffer.c | 6 +- src/intel/vulkan/anv_pipeline.c | 104 +++++++++++++++++++--------------- src/intel/vulkan/anv_pipeline_cache.c | 39 +++++++------ src/intel/vulkan/anv_private.h | 35 +++++++++--- src/intel/vulkan/gen7_cmd_buffer.c | 5 +- src/intel/vulkan/gen7_pipeline.c | 12 ++-- src/intel/vulkan/gen8_cmd_buffer.c | 5 +- src/intel/vulkan/gen8_pipeline.c | 30 +++++----- src/intel/vulkan/genX_cmd_buffer.c | 20 +++---- src/intel/vulkan/genX_pipeline.c | 2 +- src/intel/vulkan/genX_pipeline_util.h | 36 +++++++----- 11 files changed, 171 insertions(+), 123 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6ff5f35bc6a..5ec242fbf2a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } if (stage == MESA_SHADER_COMPUTE && - cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) { struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; @@ -996,7 +996,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, { struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; - struct brw_stage_prog_data *prog_data = + const struct brw_stage_prog_data *prog_data = cmd_buffer->state.pipeline->prog_data[stage]; /* If we don't actually have any push constants, bail. */ @@ -1027,7 +1027,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_push_constants *data = cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index c93b1a07246..868215cd22d 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -406,7 +406,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - struct brw_stage_prog_data *prog_data) + const struct brw_stage_prog_data *prog_data) { struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { @@ -436,7 +436,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_vs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -446,17 +446,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_vs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_VERTEX, spec_info, - &prog_data->base.base); + &prog_data.base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -465,31 +465,36 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->nir == NULL) ralloc_steal(mem_ctx, nir); - prog_data->inputs_read = nir->info.inputs_read; + prog_data.inputs_read = nir->info.inputs_read; if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, + &prog_data.base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = - brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, false, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, + sizeof(prog_data)); ralloc_free(mem_ctx); } - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + const struct brw_vs_prog_data *vs_prog_data = + (const struct brw_vs_prog_data *) stage_prog_data; + + if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { pipeline->vs_simd8 = kernel; pipeline->vs_vec4 = NO_KERNEL; } else { @@ -498,7 +503,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - &prog_data->base.base); + stage_prog_data); return VK_SUCCESS; } @@ -513,7 +518,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_gs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -523,17 +528,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_gs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_GEOMETRY, spec_info, - &prog_data->base.base); + &prog_data.base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -546,13 +551,13 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, + &prog_data.base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = - brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); @@ -560,9 +565,10 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ + stage_prog_data = &prog_data.base.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } @@ -570,7 +576,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->gs_kernel = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - &prog_data->base.base); + stage_prog_data); return VK_SUCCESS; } @@ -586,7 +592,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_wm_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -599,19 +605,19 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_wm_prog_data prog_data = { 0, }; - prog_data->binding_table.render_target_start = 0; + prog_data.binding_table.render_target_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_FRAGMENT, spec_info, - &prog_data->base); + &prog_data.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -635,27 +641,31 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = - brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } - if (prog_data->no_8) + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *) stage_prog_data; + + if (wm_prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else pipeline->ps_simd8 = kernel; - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; + if (wm_prog_data->no_8 || wm_prog_data->prog_offset_16) { + pipeline->ps_simd16 = kernel + wm_prog_data->prog_offset_16; } else { pipeline->ps_simd16 = NO_KERNEL; } @@ -664,18 +674,18 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->ps_grf_start2 = 0; if (pipeline->ps_simd8 != NO_KERNEL) { pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; if (pipeline->ps_simd16 != NO_KERNEL) { pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; } } else if (pipeline->ps_simd16 != NO_KERNEL) { pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - &prog_data->base); + stage_prog_data); return VK_SUCCESS; } @@ -690,7 +700,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_cs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -700,23 +710,23 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_cs_prog_data prog_data = { 0, }; - prog_data->binding_table.work_groups_start = 0; + prog_data.binding_table.work_groups_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_COMPUTE, spec_info, - &prog_data->base); + &prog_data.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - prog_data->base.total_shared = nir->num_shared; + prog_data.base.total_shared = nir->num_shared; void *mem_ctx = ralloc_context(NULL); @@ -725,23 +735,24 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = - brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } pipeline->cs_simd = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - &prog_data->base); + stage_prog_data); return VK_SUCCESS; } @@ -751,10 +762,12 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) { const struct brw_device_info *devinfo = &pipeline->device->info; bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_size = vs_present ? + get_vs_prog_data(pipeline)->base.urb_entry_size : 1; unsigned vs_entry_size_bytes = vs_size * 64; bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_size = gs_present ? + get_gs_prog_data(pipeline)->base.urb_entry_size : 1; unsigned gs_entry_size_bytes = gs_size * 64; /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): @@ -1136,7 +1149,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { /* Vertex is only optional if disable_vs is set */ assert(extra->disable_vs); - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); } gen7_compute_urb_partition(pipeline); @@ -1152,7 +1164,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, */ inputs_read = ~0ull; } else { - inputs_read = pipeline->vs_prog_data.inputs_read; + inputs_read = get_vs_prog_data(pipeline)->inputs_read; } pipeline->vb_used = 0; diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 48f36706fef..024fdf7d5a9 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -110,7 +110,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data) + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -126,7 +127,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, cache->program_stream.block_pool->map + offset; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { if (prog_data) - memcpy(prog_data, entry->prog_data, entry->prog_data_size); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; const uint32_t preamble_size = align_u32(sizeof(*entry) + entry->prog_data_size, 64); @@ -198,17 +199,14 @@ uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, - const void *prog_data, size_t prog_data_size) + const struct brw_stage_prog_data **prog_data, + size_t prog_data_size) { pthread_mutex_lock(&cache->mutex); struct cache_entry *entry; - /* Meta pipelines don't have SPIR-V, so we can't hash them. - * Consequentally, they just don't get cached. - */ - const uint32_t preamble_size = sha1 ? - align_u32(sizeof(*entry) + prog_data_size, 64) : - 0; + const uint32_t preamble_size = + align_u32(sizeof(*entry) + prog_data_size, 64); const uint32_t size = preamble_size + kernel_size; @@ -216,14 +214,16 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const struct anv_state state = anv_state_stream_alloc(&cache->program_stream, size, 64); + entry = state.map; + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, *prog_data, prog_data_size); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + entry->kernel_size = kernel_size; + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); - entry = state.map; - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, prog_data, prog_data_size); - entry->kernel_size = kernel_size; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); if (cache->kernel_count == cache->table_size / 2) anv_pipeline_cache_grow(cache); @@ -285,9 +285,13 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const struct cache_entry *entry = p; const void *kernel = &entry->prog_data[entry->prog_data_size]; + const struct brw_stage_prog_data *prog_data = + (const struct brw_stage_prog_data *) entry->prog_data; + anv_pipeline_cache_upload_kernel(cache, entry->sha1, kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); + &prog_data, + entry->prog_data_size); p = kernel + entry->kernel_size; } } @@ -406,9 +410,12 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, const void *kernel = (void *) entry + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + const struct brw_stage_prog_data *prog_data = + (const struct brw_stage_prog_data *) entry->prog_data; + anv_pipeline_cache_upload_kernel(dst, entry->sha1, kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); + &prog_data, entry->prog_data_size); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 32c8b13c952..70b6dd995a1 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -640,12 +640,13 @@ void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data); + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data); uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, - const void *prog_data, + const struct brw_stage_prog_data **prog_data, size_t prog_data_size); struct anv_device { @@ -1404,12 +1405,8 @@ struct anv_pipeline { bool use_repclear; - struct brw_vs_prog_data vs_prog_data; - struct brw_wm_prog_data wm_prog_data; - struct brw_gs_prog_data gs_prog_data; - struct brw_cs_prog_data cs_prog_data; bool writes_point_size; - struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { @@ -1457,6 +1454,30 @@ struct anv_pipeline { } gen9; }; +static inline const struct brw_vs_prog_data * +get_vs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX]; +} + +static inline const struct brw_gs_prog_data * +get_gs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY]; +} + +static inline const struct brw_wm_prog_data * +get_wm_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT]; +} + +static inline const struct brw_cs_prog_data * +get_cs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE]; +} + struct anv_graphics_pipeline_create_info { /** * If non-negative, overrides the color attachment count of the pipeline's diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 985907872fa..8dce586eec7 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -283,7 +283,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -395,11 +395,12 @@ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + bool needs_slm = cs_prog_data->base.total_shared > 0; config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5235d399ce5..5f480edf809 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -250,7 +250,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = 0xff); - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); #if 0 /* From gen7_vs_state.c */ @@ -277,18 +277,18 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = pipeline->vs_vec4, .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base), .DispatchGRFStartRegisterforURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + vs_prog_data->base.base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length, .VertexURBEntryReadOffset = 0, .MaximumNumberofThreads = device->info.max_vs_threads - 1, .StatisticsEnable = true, .VSFunctionEnable = true); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); @@ -338,7 +338,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) anv_finishme("two-sided color needs sbe swizzling setup"); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8d8775fb01d..0d27c27f5b7 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -505,7 +505,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -558,11 +558,12 @@ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + bool needs_slm = cs_prog_data->base.total_shared > 0; config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 4a5e8674189..5ce1307f090 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -354,6 +354,7 @@ genX(graphics_pipeline_create)( .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .LineEndCapAntialiasingRegionWidth = _05pixels, @@ -363,15 +364,15 @@ genX(graphics_pipeline_create)( .PointRasterizationRule = RASTRULE_UPPER_RIGHT, .BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ? - 0 : pipeline->wm_prog_data.barycentric_interp_modes); + 0 : wm_prog_data->barycentric_interp_modes); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_kernel == NO_KERNEL) + if (pipeline->gs_kernel == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); - else + } else { + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .SingleProgramFlow = false, .KernelStartPointer = pipeline->gs_kernel, @@ -412,11 +413,12 @@ genX(graphics_pipeline_create)( .VertexURBEntryOutputReadOffset = offset, .VertexURBEntryOutputLength = length); + } - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); /* Skip the VUE header and position slots */ offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : pipeline->vs_vec4; @@ -435,7 +437,7 @@ genX(graphics_pipeline_create)( .VectorMaskEnable = false, .SamplerCount = 0, .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, + vs_prog_data->base.base.binding_table.size_bytes / 4, .ThreadDispatchPriority = false, .FloatingPointMode = IEEE754, .IllegalOpcodeExceptionEnable = false, @@ -443,11 +445,11 @@ genX(graphics_pipeline_create)( .SoftwareExceptionEnable = false, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base), .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + vs_prog_data->base.base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length, .VertexURBEntryReadOffset = 0, .MaximumNumberofThreads = device->info.max_vs_threads - 1, @@ -461,8 +463,6 @@ genX(graphics_pipeline_create)( .UserClipDistanceClipTestEnableBitmask = 0, .UserClipDistanceCullTestEnableBitmask = 0); - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; if (pipeline->ps_ksp0 == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 88cc13b580a..2606a66f2a7 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -319,11 +319,11 @@ void genX(CmdDraw)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), @@ -346,11 +346,11 @@ void genX(CmdDrawIndexed)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), @@ -398,13 +398,13 @@ void genX(CmdDrawIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); @@ -429,14 +429,14 @@ void genX(CmdDrawIndexedIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; genX(cmd_buffer_flush_state)(cmd_buffer); /* TODO: We need to stomp base vertex to 0 somehow */ - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); @@ -460,7 +460,7 @@ void genX(CmdDispatch)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); if (prog_data->uses_num_work_groups) { struct anv_state state = @@ -507,7 +507,7 @@ void genX(CmdDispatchIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; struct anv_batch *batch = &cmd_buffer->batch; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 1605661f971..cc8841ea8a0 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -82,7 +82,7 @@ genX(compute_pipeline_create)( pipeline->use_repclear = false; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 28b2a032c46..cd138dfae61 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -52,6 +52,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + uint32_t elements; if (extra && extra->disable_vs) { /* If the VS is disabled, just assume the user knows what they're @@ -63,7 +65,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, elements |= (1 << info->pVertexAttributeDescriptions[i].location); } else { /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + uint64_t inputs_read = vs_prog_data->inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); elements = inputs_read >> VERT_ATTRIB_GENERIC0; } @@ -72,16 +74,16 @@ emit_vertex_input(struct anv_pipeline *pipeline, /* On BDW+, we only need to allocate space for base ids. Setting up * the actual vertex and instance id is a separate packet. */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; + const bool needs_svgs_elem = vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; #else /* On Haswell and prior, vertex and instance id are created by using the * ComponentControl fields, so we need an element for any of them. */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid || - pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; + const bool needs_svgs_elem = vs_prog_data->uses_vertexid || + vs_prog_data->uses_instanceid || + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; #endif uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; @@ -148,8 +150,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, * This means, that if we have BaseInstance, we need BaseVertex as * well. Just do all or nothing. */ - uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance) ? + uint32_t base_ctrl = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0; struct GENX(VERTEX_ELEMENT_STATE) element = { @@ -171,10 +173,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, #if GEN_GEN >= 8 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDEnable = vs_prog_data->uses_vertexid, .VertexIDComponentNumber = 2, .VertexIDElementOffset = id_slot, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDEnable = vs_prog_data->uses_instanceid, .InstanceIDComponentNumber = 3, .InstanceIDElementOffset = id_slot); #endif @@ -222,17 +224,21 @@ emit_urb_setup(struct anv_pipeline *pipeline) static void emit_3dstate_sbe(struct anv_pipeline *pipeline) { + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &pipeline->vs_prog_data.base.vue_map; + fs_input_map = &vs_prog_data->base.vue_map; else - fs_input_map = &pipeline->gs_prog_data.base.vue_map; + fs_input_map = &gs_prog_data->base.vue_map; struct GENX(3DSTATE_SBE) sbe = { GENX(3DSTATE_SBE_header), .AttributeSwizzleEnable = true, .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, #if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, @@ -283,7 +289,7 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) int max_source_attr = 0; for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = pipeline->wm_prog_data.urb_setup[attr]; + int input_index = wm_prog_data->urb_setup[attr]; if (input_index < 0) continue; -- cgit v1.2.3 From 87967a2c854c200ba8a7cabe1fe3f7e19291f187 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 10:59:21 -0800 Subject: anv: Simplify pipeline cache control flow a bit No functional change, but the control flow around searching the cache and falling back to compiling is a bit simpler. --- src/intel/vulkan/anv_pipeline.c | 62 ++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 868215cd22d..6a0b48b620c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -438,20 +438,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_vs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_vs_prog_key(&pipeline->device->info, &key); if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (kernel == NO_KERNEL) { struct brw_vs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, @@ -484,7 +481,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); @@ -520,20 +518,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_gs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_gs_prog_key(&pipeline->device->info, &key); if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (kernel == NO_KERNEL) { struct brw_gs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, @@ -566,7 +561,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, /* TODO: SIMD8 GS */ stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); @@ -594,8 +590,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_wm_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_wm_prog_key(&pipeline->device->info, info, extra, &key); @@ -603,14 +599,11 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, key.nr_color_regions = 1; if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (kernel == NO_KERNEL) { struct brw_wm_prog_data prog_data = { 0, }; prog_data.binding_table.render_target_start = 0; @@ -649,7 +642,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, } stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); @@ -702,17 +696,14 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_cs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_cs_prog_key(&pipeline->device->info, &key); if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } if (module->size == 0 || kernel == NO_KERNEL) { @@ -743,7 +734,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, } stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); -- cgit v1.2.3 From 07441c344c845bd663398529dbf484759d09cd54 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:21:43 -0800 Subject: anv: Rename anv_pipeline_cache_add_entry() to 'set' This function is a helper that unconditionally sets a hash table entry and expects the cache to have enough room. Calling it 'add_entry' suggests it will grow the cache as needed. --- src/intel/vulkan/anv_pipeline_cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 024fdf7d5a9..0b260528f81 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -140,7 +140,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, } static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, +anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, struct cache_entry *entry, uint32_t entry_offset) { const uint32_t mask = cache->table_size - 1; @@ -187,7 +187,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) struct cache_entry *entry = cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_add_entry(cache, entry, offset); + anv_pipeline_cache_set_entry(cache, entry, offset); } free(old_table); @@ -231,7 +231,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, * have enough space to add this new kernel. Only add it if there's room. */ if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_add_entry(cache, entry, state.offset); + anv_pipeline_cache_set_entry(cache, entry, state.offset); } pthread_mutex_unlock(&cache->mutex); -- cgit v1.2.3 From 626559ed3717a205c1315040caa4308e77c70eb5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:25:23 -0800 Subject: anv: Add anv_pipeline_cache_add_entry() This function will grow the cache to make room and then add the entry. --- src/intel/vulkan/anv_pipeline_cache.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 0b260528f81..c85916fd1f8 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -195,6 +195,20 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) return VK_SUCCESS; } +static void +anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, + struct cache_entry *entry, uint32_t entry_offset) +{ + if (cache->kernel_count == cache->table_size / 2) + anv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + anv_pipeline_cache_set_entry(cache, entry, entry_offset); +} + uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, @@ -224,14 +238,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_set_entry(cache, entry, state.offset); + anv_pipeline_cache_add_entry(cache, entry, state.offset); } pthread_mutex_unlock(&cache->mutex); -- cgit v1.2.3 From 584f39c65ed24d6c331d8ccf05d93678f3fafe16 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:27:31 -0800 Subject: anv: Don't re-upload shaders when merging Using anv_pipeline_cache_upload_kernel() will re-upload the kernel and prog_data when we merge caches. Since the kernel and prog_data is already in the program_stream, use anv_pipeline_cache_add_entry() instead to only add the entry to the hash table. --- src/intel/vulkan/anv_pipeline_cache.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index c85916fd1f8..376cd2a7716 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -406,23 +406,17 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct anv_pipeline_cache *src) { for (uint32_t i = 0; i < src->table_size; i++) { - if (src->hash_table[i] == ~0) + const uint32_t offset = src->hash_table[i]; + if (offset == ~0) continue; struct cache_entry *entry = - src->program_stream.block_pool->map + src->hash_table[i]; + src->program_stream.block_pool->map + offset; if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) continue; - const void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - const struct brw_stage_prog_data *prog_data = - (const struct brw_stage_prog_data *) entry->prog_data; - - anv_pipeline_cache_upload_kernel(dst, entry->sha1, - kernel, entry->kernel_size, - &prog_data, entry->prog_data_size); + anv_pipeline_cache_add_entry(dst, entry, offset); } } -- cgit v1.2.3 From 6139fe9a7790e0946e465f275d3f530552edbcdc Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:56:14 -0800 Subject: anv: Also cache the struct anv_pipeline_binding maps This is state the we generate when compiling the shaders and we need it for mapping resources from descriptor sets to binding table indices. --- src/intel/vulkan/anv_nir.h | 3 +- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 38 +++----- src/intel/vulkan/anv_pipeline.c | 84 +++++++++++------ src/intel/vulkan/anv_pipeline_cache.c | 112 ++++++++++++++++------- src/intel/vulkan/anv_private.h | 8 +- 5 files changed, 158 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index a7ea3eb0e28..606fd1c0565 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -37,7 +37,8 @@ void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, struct brw_stage_prog_data *prog_data); void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_shader *shader, - struct brw_stage_prog_data *prog_data); + struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map); #ifdef __cplusplus } diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index e745bf661ee..eeb9b97f554 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -253,7 +253,8 @@ setup_vec4_uniform_value(const union gl_constant_value **params, void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_shader *shader, - struct brw_stage_prog_data *prog_data) + struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map) { struct anv_pipeline_layout *layout = pipeline->layout; @@ -277,12 +278,6 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_foreach_block(function->impl, get_used_bindings_block, &state); } - struct anv_pipeline_bind_map map = { - .surface_count = 0, - .sampler_count = 0, - .image_count = 0, - }; - for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; @@ -290,21 +285,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, BITSET_FOREACH_SET(b, _tmp, state.set[set].used, set_layout->binding_count) { if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) - map.surface_count += set_layout->binding[b].array_size; + map->surface_count += set_layout->binding[b].array_size; if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) - map.sampler_count += set_layout->binding[b].array_size; + map->sampler_count += set_layout->binding[b].array_size; if (set_layout->binding[b].stage[shader->stage].image_index >= 0) - map.image_count += set_layout->binding[b].array_size; + map->image_count += set_layout->binding[b].array_size; } } - map.surface_to_descriptor = - malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); - map.sampler_to_descriptor = - malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); - - pipeline->bindings[shader->stage] = map; - unsigned surface = 0; unsigned sampler = 0; unsigned image = 0; @@ -320,8 +308,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { state.set[set].surface_offsets[b] = surface; for (unsigned i = 0; i < array_size; i++) { - map.surface_to_descriptor[surface + i].set = set; - map.surface_to_descriptor[surface + i].offset = set_offset + i; + map->surface_to_descriptor[surface + i].set = set; + map->surface_to_descriptor[surface + i].offset = set_offset + i; } surface += array_size; } @@ -329,8 +317,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { state.set[set].sampler_offsets[b] = sampler; for (unsigned i = 0; i < array_size; i++) { - map.sampler_to_descriptor[sampler + i].set = set; - map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + map->sampler_to_descriptor[sampler + i].set = set; + map->sampler_to_descriptor[sampler + i].offset = set_offset + i; } sampler += array_size; } @@ -351,8 +339,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } } - if (map.image_count > 0) { - assert(map.image_count <= MAX_IMAGES); + if (map->image_count > 0) { + assert(map->image_count <= MAX_IMAGES); nir_foreach_variable(var, &shader->uniforms) { if (glsl_type_is_image(var->type) || (glsl_type_is_array(var->type) && @@ -374,7 +362,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, const gl_constant_value **param = prog_data->param + (shader->num_uniforms / 4); const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map.image_count; i++) { + for (uint32_t i = 0; i < map->image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, (const union gl_constant_value *)&image_param->surface_idx, 1); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, @@ -392,7 +380,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, image_param ++; } - shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; + shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; } ralloc_free(mem_ctx); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 6a0b48b620c..f3f5ecdf660 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -193,11 +193,6 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { - free(pipeline->bindings[s].surface_to_descriptor); - free(pipeline->bindings[s].sampler_to_descriptor); - } - anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) @@ -315,7 +310,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, const char *entrypoint, gl_shader_stage stage, const VkSpecializationInfo *spec_info, - struct brw_stage_prog_data *prog_data) + struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -368,7 +364,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) - anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map); /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). @@ -406,7 +402,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - const struct brw_stage_prog_data *prog_data) + const struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map) { struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { @@ -424,6 +421,7 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, pipeline->total_scratch = align_u32(pipeline->total_scratch, 1024) + prog_data->total_scratch * max_threads[stage]; + pipeline->bindings[stage] = *map; } static VkResult @@ -437,6 +435,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_vs_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -445,15 +444,22 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (kernel == NO_KERNEL) { struct brw_vs_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; + + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_VERTEX, spec_info, - &prog_data.base.base); + &prog_data.base.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -484,8 +490,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, - sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); ralloc_free(mem_ctx); } @@ -501,7 +507,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } @@ -517,6 +523,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_gs_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -525,15 +532,22 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (kernel == NO_KERNEL) { struct brw_gs_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; + + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_GEOMETRY, spec_info, - &prog_data.base.base); + &prog_data.base.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -564,7 +578,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); ralloc_free(mem_ctx); } @@ -572,7 +587,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->gs_kernel = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } @@ -589,6 +604,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_wm_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -600,17 +616,22 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (kernel == NO_KERNEL) { struct brw_wm_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; - prog_data.binding_table.render_target_start = 0; + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_FRAGMENT, spec_info, - &prog_data.base); + &prog_data.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -645,7 +666,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); ralloc_free(mem_ctx); } @@ -679,7 +701,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } @@ -695,6 +717,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_cs_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -703,17 +726,22 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (module->size == 0 || kernel == NO_KERNEL) { struct brw_cs_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; - prog_data.binding_table.work_groups_start = 0; + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_COMPUTE, spec_info, - &prog_data.base); + &prog_data.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -737,14 +765,16 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); + ralloc_free(mem_ctx); } pipeline->cs_simd = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 376cd2a7716..3d2429a4e2a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -72,6 +72,10 @@ struct cache_entry { unsigned char sha1[20]; uint32_t prog_data_size; uint32_t kernel_size; + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + char prog_data[0]; /* kernel follows prog_data at next 64 byte aligned address */ @@ -84,7 +88,11 @@ entry_size(struct cache_entry *entry) * doesn't include the alignment padding bytes. */ - return sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + const uint32_t map_size = + entry->surface_count * sizeof(struct anv_pipeline_binding) + + entry->sampler_count * sizeof(struct anv_pipeline_binding); + + return sizeof(*entry) + entry->prog_data_size + map_size; } void @@ -111,7 +119,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data) + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -126,13 +135,20 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, struct cache_entry *entry = cache->program_stream.block_pool->map + offset; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; - - const uint32_t preamble_size = - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - - return offset + preamble_size; + if (prog_data) { + assert(map); + void *p = entry->prog_data; + *prog_data = p; + p += entry->prog_data_size; + map->surface_count = entry->surface_count; + map->sampler_count = entry->sampler_count; + map->image_count = entry->image_count; + map->surface_to_descriptor = p; + p += map->surface_count * sizeof(struct anv_pipeline_binding); + map->sampler_to_descriptor = p; + } + + return offset + align_u32(entry_size(entry), 64); } } @@ -157,7 +173,7 @@ anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, } } - cache->total_size += entry_size(entry); + cache->total_size += entry_size(entry) + entry->kernel_size; cache->kernel_count++; } @@ -214,13 +230,18 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, const struct brw_stage_prog_data **prog_data, - size_t prog_data_size) + size_t prog_data_size, + struct anv_pipeline_bind_map *map) { pthread_mutex_lock(&cache->mutex); struct cache_entry *entry; + const uint32_t map_size = + map->surface_count * sizeof(struct anv_pipeline_binding) + + map->sampler_count * sizeof(struct anv_pipeline_binding); + const uint32_t preamble_size = - align_u32(sizeof(*entry) + prog_data_size, 64); + align_u32(sizeof(*entry) + prog_data_size + map_size, 64); const uint32_t size = preamble_size + kernel_size; @@ -230,12 +251,26 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, entry = state.map; entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, *prog_data, prog_data_size); - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + entry->surface_count = map->surface_count; + entry->sampler_count = map->sampler_count; + entry->image_count = map->image_count; entry->kernel_size = kernel_size; + void *p = entry->prog_data; + memcpy(p, *prog_data, prog_data_size); + p += prog_data_size; + + memcpy(p, map->surface_to_descriptor, + map->surface_count * sizeof(struct anv_pipeline_binding)); + map->surface_to_descriptor = p; + p += map->surface_count * sizeof(struct anv_pipeline_binding); + + memcpy(p, map->sampler_to_descriptor, + map->sampler_count * sizeof(struct anv_pipeline_binding)); + map->sampler_to_descriptor = p; + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { - assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); + assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL); memcpy(entry->sha1, sha1, sizeof(entry->sha1)); anv_pipeline_cache_add_entry(cache, entry, state.offset); @@ -248,6 +283,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, if (!cache->device->info.has_llc) anv_state_clflush(state); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + return state.offset + preamble_size; } @@ -282,23 +319,34 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; - const void *end = data + size; - const void *p = data + header.header_size; + void *end = (void *) data + size; + void *p = (void *) data + header.header_size; while (p < end) { - /* The kernels aren't 64 byte aligned in the serialized format so - * they're always right after the prog_data. - */ - const struct cache_entry *entry = p; - const void *kernel = &entry->prog_data[entry->prog_data_size]; - - const struct brw_stage_prog_data *prog_data = - (const struct brw_stage_prog_data *) entry->prog_data; + struct cache_entry *entry = p; + + void *data = entry->prog_data; + const struct brw_stage_prog_data *prog_data = data; + data += entry->prog_data_size; + + struct anv_pipeline_binding *surface_to_descriptor = data; + data += entry->surface_count * sizeof(struct anv_pipeline_binding); + struct anv_pipeline_binding *sampler_to_descriptor = data; + data += entry->sampler_count * sizeof(struct anv_pipeline_binding); + void *kernel = data; + + struct anv_pipeline_bind_map map = { + .surface_count = entry->surface_count, + .sampler_count = entry->sampler_count, + .image_count = entry->image_count, + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; anv_pipeline_cache_upload_kernel(cache, entry->sha1, kernel, entry->kernel_size, &prog_data, - entry->prog_data_size); + entry->prog_data_size, &map); p = kernel + entry->kernel_size; } } @@ -383,14 +431,14 @@ VkResult anv_GetPipelineCacheData( continue; entry = cache->program_stream.block_pool->map + cache->hash_table[i]; - if (end < p + entry_size(entry)) + const uint32_t size = entry_size(entry); + if (end < p + size + entry->kernel_size) break; - memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); - p += sizeof(*entry) + entry->prog_data_size; + memcpy(p, entry, size); + p += size; - void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); + void *kernel = (void *) entry + align_u32(size, 64); memcpy(p, kernel, entry->kernel_size); p += entry->kernel_size; @@ -413,7 +461,7 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct cache_entry *entry = src->program_stream.block_pool->map + offset; - if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) + if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL) continue; anv_pipeline_cache_add_entry(dst, entry, offset); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 70b6dd995a1..b112b457b99 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -636,18 +636,22 @@ struct anv_pipeline_cache { uint32_t * hash_table; }; +struct anv_pipeline_bind_map; + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data); + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map); uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, const struct brw_stage_prog_data **prog_data, - size_t prog_data_size); + size_t prog_data_size, + struct anv_pipeline_bind_map *map); struct anv_device { VK_LOADER_DATA _loader_data; -- cgit v1.2.3 From 30bbe28b7efc7e6b6fef78ac3233bb7485679d1e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 22:07:02 -0800 Subject: anv: Always use point size from the shader There is no API for setting the point size and the shader is always required to set it. Section 24.4: "If the value written to PointSize is less than or equal to zero, or if no value was written to PointSize, results are undefined." As such, we can just always program PointWidthSource to Vertex. This simplifies anv_pipeline a bit and avoids trouble when we enable the pipeline cache and don't have writes_point_size in the prog_data. --- src/intel/vulkan/anv_pipeline.c | 6 ------ src/intel/vulkan/anv_private.h | 1 - src/intel/vulkan/gen7_pipeline.c | 2 +- src/intel/vulkan/gen8_pipeline.c | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index f3f5ecdf660..183589611a1 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -469,8 +469,6 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, ralloc_steal(mem_ctx, nir); prog_data.inputs_read = nir->info.inputs_read; - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, &prog_data.base.vue_map, @@ -556,9 +554,6 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->nir == NULL) ralloc_steal(mem_ctx, nir); - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; - brw_compute_vue_map(&pipeline->device->info, &prog_data.base.vue_map, nir->info.outputs_written, @@ -1122,7 +1117,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; - pipeline->writes_point_size = false; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index b112b457b99..8c3318816c6 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1409,7 +1409,6 @@ struct anv_pipeline { bool use_repclear; - bool writes_point_size; const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5f480edf809..37e4639b287 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -65,7 +65,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* uint32_t AALineDistanceMode; */ /* uint32_t VertexSubPixelPrecisionSelect; */ - .UsePointWidthState = !pipeline->writes_point_size, + .UsePointWidthState = false, .PointWidth = 1.0, .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 5ce1307f090..8edc1574ac3 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -60,7 +60,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, .TriangleFanProvokingVertexSelect = 1, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidthSource = Vertex, .PointWidth = 1.0, }; -- cgit v1.2.3 From f2b37132cb6a804b958d2e1dff17e7d77e430b96 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 5 Mar 2016 12:20:16 -0800 Subject: anv: Check if shader if present before uploading to cache Between the initial check the returns NO_KERNEL and compiling the shader, other threads may have added the shader to the cache. Before uploading the kernel, check again (under the mutex) that the compiled shader still isn't present. --- src/intel/vulkan/anv_pipeline_cache.c | 45 +++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 3d2429a4e2a..f7a1e1c679a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -116,11 +116,11 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_final(ctx, hash); } -uint32_t -anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) +static uint32_t +anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -152,7 +152,24 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, } } - return NO_KERNEL; + unreachable("hash table should never be full"); +} + +uint32_t +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map) +{ + uint32_t kernel; + + pthread_mutex_lock(&cache->mutex); + + kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + + pthread_mutex_unlock(&cache->mutex); + + return kernel; } static void @@ -234,6 +251,19 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, struct anv_pipeline_bind_map *map) { pthread_mutex_lock(&cache->mutex); + + /* Before uploading, check again that another thread didn't upload this + * shader while we were compiling it. + */ + if (sha1) { + uint32_t cached_kernel = + anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + if (cached_kernel != NO_KERNEL) { + pthread_mutex_unlock(&cache->mutex); + return cached_kernel; + } + } + struct cache_entry *entry; const uint32_t map_size = @@ -270,7 +300,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, map->sampler_to_descriptor = p; if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { - assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL); + assert(anv_pipeline_cache_search_unlocked(cache, sha1, + NULL, NULL) == NO_KERNEL); memcpy(entry->sha1, sha1, sizeof(entry->sha1)); anv_pipeline_cache_add_entry(cache, entry, state.offset); -- cgit v1.2.3 From 34326f46dfe1511529363b4ab46477f04d3e1574 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 15:03:23 -0800 Subject: anv: Turn pipeline cache on by default Move the environment variable check to cache creation time so we block both lookups and uploads if it's turned off. --- src/intel/vulkan/anv_pipeline_cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index f7a1e1c679a..62dbe3eda74 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -54,7 +54,8 @@ anv_pipeline_cache_init(struct anv_pipeline_cache *cache, /* We don't consider allocation failure fatal, we just start with a 0-sized * cache. */ - if (cache->hash_table == NULL) + if (cache->hash_table == NULL || + !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) cache->table_size = 0; else memset(cache->hash_table, 0xff, byte_size); @@ -299,7 +300,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, map->sampler_count * sizeof(struct anv_pipeline_binding)); map->sampler_to_descriptor = p; - if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { + if (sha1) { assert(anv_pipeline_cache_search_unlocked(cache, sha1, NULL, NULL) == NO_KERNEL); -- cgit v1.2.3 From 7b348ab8a0d38b504f659a0b4b6c8aca1a52ea6b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 5 Mar 2016 14:33:50 -0800 Subject: anv: Fix rebase error --- src/intel/vulkan/gen8_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 8edc1574ac3..8471fc733ba 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -333,6 +333,7 @@ genX(graphics_pipeline_create)( emit_urb_setup(pipeline); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .ClipEnable = true, .EarlyCullEnable = true, @@ -344,7 +345,7 @@ genX(graphics_pipeline_create)( REJECT_ALL : NORMAL, .NonPerspectiveBarycentricEnable = - (pipeline->wm_prog_data.barycentric_interp_modes & 0x38) != 0, + (wm_prog_data->barycentric_interp_modes & 0x38) != 0, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, @@ -354,7 +355,6 @@ genX(graphics_pipeline_create)( .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .LineEndCapAntialiasingRegionWidth = _05pixels, -- cgit v1.2.3 From 8502794c1232ea0654c879ce565fef72e3ab522d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 14:42:16 -0800 Subject: anv/pipeline: Handle null wm_prog_data in 3DSTATE_CLIP --- src/intel/vulkan/gen8_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 8471fc733ba..10dd6457fbc 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -344,8 +344,8 @@ genX(graphics_pipeline_create)( pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? REJECT_ALL : NORMAL, - .NonPerspectiveBarycentricEnable = - (wm_prog_data->barycentric_interp_modes & 0x38) != 0, + .NonPerspectiveBarycentricEnable = wm_prog_data ? + (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, -- cgit v1.2.3 From 21ee5fd3263e034a54d7a37d9e5b6e6f9ef49f54 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 11:31:04 -0800 Subject: anv: Emit null render targets v2 (Francisco Jerez): Add the state_offset to the surface state offset --- src/intel/vulkan/anv_cmd_buffer.c | 52 ++++++++++++++++++++++++++++---------- src/intel/vulkan/anv_private.h | 13 ++++++++++ src/intel/vulkan/genX_cmd_buffer.c | 30 ++++++++++++++++++++++ 3 files changed, 82 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5ec242fbf2a..9dca21d527a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -705,6 +705,26 @@ anv_format_for_descriptor_type(VkDescriptorType type) } } +static struct anv_state +anv_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) { + return gen75_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + } else { + return gen7_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + } + case 8: + return gen8_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + case 9: + return gen9_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + default: + unreachable("Invalid hardware generation"); + } +} + VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, @@ -713,27 +733,24 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_bind_map *map; - uint32_t color_count, bias, state_offset; + uint32_t bias, state_offset; switch (stage) { case MESA_SHADER_FRAGMENT: map = &cmd_buffer->state.pipeline->bindings[stage]; bias = MAX_RTS; - color_count = subpass->color_count; break; case MESA_SHADER_COMPUTE: map = &cmd_buffer->state.compute_pipeline->bindings[stage]; bias = 1; - color_count = 0; break; default: map = &cmd_buffer->state.pipeline->bindings[stage]; bias = 0; - color_count = 0; break; } - if (color_count + map->surface_count == 0) { + if (bias + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; } @@ -746,14 +763,23 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t a = 0; a < color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); + if (stage == MESA_SHADER_FRAGMENT) { + if (subpass->color_count == 0) { + struct anv_state null_surface = + anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, + cmd_buffer->state.framebuffer); + bt_map[0] = null_surface.offset + state_offset; + } else { + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + assert(iview->color_rt_surface_state.alloc_size); + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + } } if (stage == MESA_SHADER_COMPUTE && diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 8c3318816c6..7791bbc1649 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1299,6 +1299,19 @@ void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo *info); +struct anv_state +gen7_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); +struct anv_state +gen75_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); +struct anv_state +gen8_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); +struct anv_state +gen9_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); + void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2606a66f2a7..b969fab35bc 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -590,6 +590,36 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) } } +struct anv_state +genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb) +{ + struct anv_state state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + + struct GENX(RENDER_SURFACE_STATE) null_ss = { + .SurfaceType = SURFTYPE_NULL, + .SurfaceArray = fb->layers > 0, + .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, +#if GEN_GEN >= 8 + .TileMode = YMAJOR, +#else + .TiledSurface = true, +#endif + .Width = fb->width - 1, + .Height = fb->height - 1, + .Depth = fb->layers - 1, + .RenderTargetViewExtent = fb->layers - 1, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state.map, &null_ss); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { -- cgit v1.2.3 From 23de78768b69d5600233df022431b8f26a0907fc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 6 Mar 2016 14:16:51 -0800 Subject: anv: Create fences from the batch BO pool Applications may create a *lot* of fences, perhaps as much as one per vkQueueSubmit. Really, they're supposed to use ResetFence, but it's easy enough for us to make them crazy-cheap so we might as well. --- src/intel/vulkan/anv_device.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index c68280fe8d7..8aa1e61acad 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1387,8 +1387,6 @@ VkResult anv_CreateFence( struct anv_batch batch; VkResult result; - const uint32_t fence_size = 128; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, @@ -1396,12 +1394,10 @@ VkResult anv_CreateFence( if (fence == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_init_new(&fence->bo, device, fence_size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence->bo); if (result != VK_SUCCESS) goto fail; - fence->bo.map = - anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); batch.next = batch.start = fence->bo.map; batch.end = fence->bo.map + fence->bo.size; anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); @@ -1457,9 +1453,7 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_free2(&device->alloc, pAllocator, fence); + anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); } VkResult anv_ResetFences( -- cgit v1.2.3 From 32aa01663ff649a399480886106e203cc347c212 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 6 Mar 2016 22:06:24 -0800 Subject: anv: Quiet pTessellationState warning Some application pass a dummy for pTessellationState which results in a lot of noise. Only warn if we're actually given tessellation shadear stages. --- src/intel/vulkan/anv_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 183589611a1..86831eae30e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1113,9 +1113,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - pipeline->use_repclear = extra && extra->use_repclear; /* When we free the pipeline, we detect stages based on the NULL status @@ -1148,6 +1145,9 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pStages[MESA_SHADER_VERTEX]->pSpecializationInfo); } + if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL]) + anv_finishme("no tessellation support"); + if (modules[MESA_SHADER_GEOMETRY]) { anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, modules[MESA_SHADER_GEOMETRY], -- cgit v1.2.3 From 428ffc9c13c24c30c317c2e985b9097956c583b0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 14:48:35 -0800 Subject: anv/device: Actually free the CPU-side fence struct again In 23de78768, when we switched from allocating individual BOs to using the pool for fences, we accidentally deleted the free. --- src/intel/vulkan/anv_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 8aa1e61acad..816f780c6ff 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1454,6 +1454,7 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_fence, fence, _fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); + anv_free2(&device->alloc, pAllocator, fence); } VkResult anv_ResetFences( -- cgit v1.2.3 From 181b142fbd176f24a73cabf209000a9187f275e8 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 5 Mar 2016 15:17:00 -0800 Subject: anv/device: Up device limits for 3D and array texture dimensions The limit for these textures is 2048 not 1024. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 816f780c6ff..44eb0ed2d6c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -421,9 +421,9 @@ void anv_GetPhysicalDeviceProperties( VkPhysicalDeviceLimits limits = { .maxImageDimension1D = (1 << 14), .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 10), + .maxImageDimension3D = (1 << 11), .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 10), + .maxImageArrayLayers = (1 << 11), .maxTexelBufferElements = 128 * 1024 * 1024, .maxUniformBufferRange = UINT32_MAX, .maxStorageBufferRange = UINT32_MAX, -- cgit v1.2.3 From 8c2b9d152941f49d956bb2775a48158d1d10253b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 14:56:58 -0800 Subject: anv/bo_pool: Allow freeing BOs where the anv_bo is in the BO itself --- src/intel/vulkan/anv_allocator.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 3b62bda3e93..d7c09103344 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -853,11 +853,13 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) } void -anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in) { - struct bo_pool_bo_link *link = bo->map; - link->bo = *bo; + /* Make a copy in case the anv_bo happens to be storred in the BO */ + struct anv_bo bo = *bo_in; + struct bo_pool_bo_link *link = bo.map; + link->bo = bo; - VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + VG(VALGRIND_MEMPOOL_FREE(pool, bo.map)); anv_ptr_free_list_push(&pool->free_list, link); } -- cgit v1.2.3 From 3d4f2b0927acaac05e87ed07ae492e39b4c82ff7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 21:22:46 -0800 Subject: anv/allocator: Move the alignment assert for the pointer free list Previously we asserted every time you tried to pack a pointer and a counter together. However, this wasn't really correct. In the case where you try to grab the last element of the list, the "next elemnet" value you get may be bogus if someonoe else got there first. This was leading to assertion failures even though the allocator would safely fall through to the failure case below. --- src/intel/vulkan/anv_allocator.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index d7c09103344..385c63f9945 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -200,7 +200,6 @@ anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) #define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) #define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) #define PFL_PACK(ptr, count) ({ \ - assert(((uintptr_t)(ptr) & 0xfff) == 0); \ (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ }) @@ -230,6 +229,12 @@ anv_ptr_free_list_push(void **list, void *elem) void *old, *current; void **next_ptr = elem; + /* The pointer-based free list requires that the pointer be + * page-aligned. This is because we use the bottom 12 bits of the + * pointer to store a counter to solve the ABA concurrency problem. + */ + assert(((uintptr_t)elem & 0xfff) == 0); + old = *list; do { current = old; -- cgit v1.2.3 From f61d40adc2a09221453b7a87880e134a5424773e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 21:27:55 -0800 Subject: anv/allocator: Better casting in PFL macros We cast he constant 0xfff values to a uintptr_t before applying a bitwise negate to ensure that they are actually 64-bit when needed. Also, the count variable doesn't need to be explicitly cast, it will get upcast as needed by the "|" operation. --- src/intel/vulkan/anv_allocator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 385c63f9945..4fc83386a71 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -198,9 +198,9 @@ anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) * means that the bottom 12 bits should all be zero. */ #define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) -#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~(uintptr_t)0xfff)) #define PFL_PACK(ptr, count) ({ \ - (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ + (void *)(((uintptr_t)(ptr) & ~(uintptr_t)0xfff) | ((count) & 0xfff)); \ }) static bool -- cgit v1.2.3 From 2308891edea4d8508d3e95f29c58b4089e96b5e7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 13:45:25 -0800 Subject: anv: Store CPU-side fence information in the BO This reduces the number of allocations a bit and cuts back on memory usage. Kind-of a micro-optimization but it also makes the error handling a bit simpler so it seems like a win. --- src/intel/vulkan/anv_device.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 44eb0ed2d6c..768e2eb3be1 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1383,29 +1383,31 @@ VkResult anv_CreateFence( VkFence* pFence) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_bo fence_bo; struct anv_fence *fence; struct anv_batch batch; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (fence == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence->bo); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo); if (result != VK_SUCCESS) - goto fail; + return result; + + /* Fences are small. Just store the CPU data structure in the BO. */ + fence = fence_bo.map; + fence->bo = fence_bo; - batch.next = batch.start = fence->bo.map; + /* Place the batch after the CPU data but on its own cache line. */ + const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); + batch.next = batch.start = fence->bo.map + batch_offset; batch.end = fence->bo.map + fence->bo.size; anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN7_MI_NOOP); if (!device->info.has_llc) { - assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); - assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); + assert(batch.next - batch.start <= CACHELINE_SIZE); __builtin_ia32_mfence(); __builtin_ia32_clflush(fence->bo.map); } @@ -1421,8 +1423,8 @@ VkResult anv_CreateFence( fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = 0; - fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.batch_start_offset = batch.start - fence->bo.map; + fence->execbuf.batch_len = batch.next - batch.start; fence->execbuf.cliprects_ptr = 0; fence->execbuf.num_cliprects = 0; fence->execbuf.DR1 = 0; @@ -1438,11 +1440,6 @@ VkResult anv_CreateFence( *pFence = anv_fence_to_handle(fence); return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, fence); - - return result; } void anv_DestroyFence( @@ -1453,8 +1450,8 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); + assert(fence->bo.map == fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); - anv_free2(&device->alloc, pAllocator, fence); } VkResult anv_ResetFences( -- cgit v1.2.3 From 75af420cb1145f5fc34af6728047a2404b5f1add Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 18:07:48 -0800 Subject: anv/pipeline: Move binding table setup to its own helper --- src/intel/vulkan/anv_pipeline.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 86831eae30e..22af44d6020 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -366,27 +366,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout) anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map); - /* All binding table offsets provided by apply_pipeline_layout() are - * relative to the start of the bindint table (plus MAX_RTS for VS). - */ - unsigned bias; - switch (stage) { - case MESA_SHADER_FRAGMENT: - bias = MAX_RTS; - break; - case MESA_SHADER_COMPUTE: - bias = 1; - break; - default: - bias = 0; - break; - } - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.ssbo_start = bias; - prog_data->binding_table.image_start = bias; - /* Finish the optimization and compilation process */ if (nir->stage == MESA_SHADER_COMPUTE) brw_nir_lower_shared(nir); @@ -399,6 +378,16 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, return nir; } +static void +anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias) +{ + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; +} + static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, @@ -463,6 +452,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_fill_binding_table(&prog_data.base.base, 0); + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) @@ -549,6 +540,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_fill_binding_table(&prog_data.base.base, 0); + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) @@ -643,6 +636,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, } } + anv_fill_binding_table(&prog_data.base, MAX_RTS); + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) @@ -740,6 +735,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_fill_binding_table(&prog_data.base, 1); + prog_data.base.total_shared = nir->num_shared; void *mem_ctx = ralloc_context(NULL); -- cgit v1.2.3 From cce65471b8667e1752754c53361031cded5b39d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 17:28:00 -0800 Subject: anv: Compact render targets Previously, we would always emit all of the render targets in the subpass. This commit changes it so that we compact render targets just like we do with other resources. Render targets are represented in the surface map by using a descriptor set index of UINT16_MAX. --- src/intel/vulkan/anv_cmd_buffer.c | 52 +++++++++++++++++++-------------------- src/intel/vulkan/anv_pipeline.c | 48 ++++++++++++++++++++++++++++++++---- src/intel/vulkan/anv_private.h | 11 +++++++-- src/intel/vulkan/gen8_pipeline.c | 35 +++++++++++++++++++------- 4 files changed, 104 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 9dca21d527a..ac8bf5fc619 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -736,10 +736,6 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t bias, state_offset; switch (stage) { - case MESA_SHADER_FRAGMENT: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = MAX_RTS; - break; case MESA_SHADER_COMPUTE: map = &cmd_buffer->state.compute_pipeline->bindings[stage]; bias = 1; @@ -763,25 +759,6 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - if (stage == MESA_SHADER_FRAGMENT) { - if (subpass->color_count == 0) { - struct anv_state null_surface = - anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, - cmd_buffer->state.framebuffer); - bt_map[0] = null_surface.offset + state_offset; - } else { - for (uint32_t a = 0; a < subpass->color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); - } - } - } - if (stage == MESA_SHADER_COMPUTE && get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) { struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; @@ -815,14 +792,37 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t image = 0; for (uint32_t s = 0; s < map->surface_count; s++) { struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; struct anv_state surface_state; struct anv_bo *bo; uint32_t bo_offset; + if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) { + /* Color attachment binding */ + assert(stage == MESA_SHADER_FRAGMENT); + if (binding->offset < subpass->color_count) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[binding->offset]]; + + assert(iview->color_rt_surface_state.alloc_size); + surface_state = iview->color_rt_surface_state; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } else { + /* Null render target */ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + surface_state = + anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + } + + bt_map[bias + s] = surface_state.offset + state_offset; + continue; + } + + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + switch (desc->type) { case VK_DESCRIPTOR_TYPE_SAMPLER: /* Nothing for us to do here */ diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 22af44d6020..abe93a50af8 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -599,9 +599,6 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - if (pipeline->use_repclear) - key.nr_color_regions = 1; - if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); @@ -613,7 +610,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, struct anv_pipeline_binding sampler_to_descriptor[256]; map = (struct anv_pipeline_bind_map) { - .surface_to_descriptor = surface_to_descriptor, + .surface_to_descriptor = surface_to_descriptor + 8, .sampler_to_descriptor = sampler_to_descriptor }; @@ -623,6 +620,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + unsigned num_rts = 0; + struct anv_pipeline_binding rt_bindings[8]; nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; nir_foreach_variable_safe(var, &nir->outputs) { if (var->data.location < FRAG_RESULT_DATA0) @@ -630,13 +629,52 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned rt = var->data.location - FRAG_RESULT_DATA0; if (rt >= key.nr_color_regions) { + /* Out-of-bounds, throw it away */ var->data.mode = nir_var_local; exec_node_remove(&var->node); exec_list_push_tail(&impl->locals, &var->node); + continue; + } + + /* Give it a new, compacted, location */ + var->data.location = FRAG_RESULT_DATA0 + num_rts; + + unsigned array_len = + glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; + assert(num_rts + array_len <= 8); + + for (unsigned i = 0; i < array_len; i++) { + rt_bindings[num_rts] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .offset = rt + i, + }; } + + num_rts += array_len; + } + + if (pipeline->use_repclear) { + assert(num_rts == 1); + key.nr_color_regions = 1; } - anv_fill_binding_table(&prog_data.base, MAX_RTS); + if (num_rts == 0) { + /* If we have no render targets, we need a null render target */ + rt_bindings[0] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .offset = UINT16_MAX, + }; + num_rts = 1; + } + + assert(num_rts <= 8); + map.surface_to_descriptor -= num_rts; + map.surface_count += num_rts; + assert(map.surface_count <= 256); + memcpy(map.surface_to_descriptor, rt_bindings, + num_rts * sizeof(*rt_bindings)); + + anv_fill_binding_table(&prog_data.base, num_rts); void *mem_ctx = ralloc_context(NULL); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7791bbc1649..f24ea20115b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -995,11 +995,16 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); +#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT16_MAX + struct anv_pipeline_binding { - /* The descriptor set this surface corresponds to */ + /* The descriptor set this surface corresponds to. The special value of + * ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS indicates that the offset refers + * to a color attachment and not a regular descriptor. + */ uint16_t set; - /* Offset into the descriptor set */ + /* Offset into the descriptor set or attachment list. */ uint16_t offset; }; @@ -1404,9 +1409,11 @@ struct anv_pipeline_bind_map { uint32_t surface_count; uint32_t sampler_count; uint32_t image_count; + uint32_t attachment_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; + uint32_t * surface_to_attachment; }; struct anv_pipeline { diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 10dd6457fbc..71705d23200 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -114,9 +114,33 @@ emit_cb_state(struct anv_pipeline *pipeline, .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, }; + /* Default everything to disabled */ + for (uint32_t i = 0; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + + struct anv_pipeline_bind_map *map = + &pipeline->bindings[MESA_SHADER_FRAGMENT]; + bool has_writeable_rt = false; - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + for (unsigned i = 0; i < map->surface_count; i++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; + + /* All color attachments are at the beginning of the binding table */ + if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) + break; + + /* We can have at most 8 attachments */ + assert(i < 8); + + if (binding->offset >= info->attachmentCount) + continue; + + const VkPipelineColorBlendAttachmentState *a = + &info->pAttachments[binding->offset]; if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || a->dstColorBlendFactor != a->dstAlphaBlendFactor || @@ -165,13 +189,6 @@ emit_cb_state(struct anv_pipeline *pipeline, } } - for (uint32_t i = info->attachmentCount; i < 8; i++) { - blend_state.Entry[i].WriteDisableAlpha = true; - blend_state.Entry[i].WriteDisableRed = true; - blend_state.Entry[i].WriteDisableGreen = true; - blend_state.Entry[i].WriteDisableBlue = true; - } - if (info->attachmentCount > 0) { struct GENX(BLEND_STATE_ENTRY) *bs = &blend_state.Entry[0]; -- cgit v1.2.3 From dc504a51fb47d1b4a12011cb1986c3897fad007f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 10:25:00 -0800 Subject: anv/pipeline: Unconditionally emit PS_BLEND on gen8+ Special-casing the PS_BLEND packet wasn't really gaining us anything. It's defined to be more-or-less the contents of blend state entry 0 only without the indirection. We can just copy-and-paste the contents. If there are no valid color targets, then blend state 0 will be 0-initialized anyway so it's basically the same as the special case we had before. --- src/intel/vulkan/gen8_pipeline.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 71705d23200..b8b29d46b8a 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -189,24 +189,20 @@ emit_cb_state(struct anv_pipeline *pipeline, } } - if (info->attachmentCount > 0) { - struct GENX(BLEND_STATE_ENTRY) *bs = &blend_state.Entry[0]; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), - .AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable, - .HasWriteableRT = has_writeable_rt, - .ColorBufferBlendEnable = bs->ColorBufferBlendEnable, - .SourceAlphaBlendFactor = bs->SourceAlphaBlendFactor, - .DestinationAlphaBlendFactor = - bs->DestinationAlphaBlendFactor, - .SourceBlendFactor = bs->SourceBlendFactor, - .DestinationBlendFactor = bs->DestinationBlendFactor, - .AlphaTestEnable = false, - .IndependentAlphaBlendEnable = - blend_state.IndependentAlphaBlendEnable); - } else { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND)); - } + struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0]; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), + .AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable, + .HasWriteableRT = has_writeable_rt, + .ColorBufferBlendEnable = bs0->ColorBufferBlendEnable, + .SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor, + .DestinationAlphaBlendFactor = + bs0->DestinationAlphaBlendFactor, + .SourceBlendFactor = bs0->SourceBlendFactor, + .DestinationBlendFactor = bs0->DestinationBlendFactor, + .AlphaTestEnable = false, + .IndependentAlphaBlendEnable = + blend_state.IndependentAlphaBlendEnable); GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); if (!device->info.has_llc) -- cgit v1.2.3 From bbbdd32c192a350dd63f21cf0b01a30ee6a085ff Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 11:19:02 -0800 Subject: anv/meta_clear: Use repclear again --- src/intel/vulkan/anv_meta_clear.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index bce94460844..a24e59950be 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -297,14 +297,15 @@ create_color_pipeline(struct anv_device *device, .pAttachments = blend_attachment_state }; - /* Disable repclear because we do not want the compiler to replace the - * shader. We need the shader to write to the specified color attachment, - * but the repclear shader writes to all color attachments. + /* Use the repclear shader. Since the NIR shader we are providing has + * exactly one output, that output will get compacted down to binding + * table entry 0. The hard-coded repclear shader is then exactly what + * we want regardless of what attachment we are actually clearing. */ return create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &device->meta_state.alloc, - /*use_repclear*/ false, pipeline); + /*use_repclear*/ true, pipeline); } static void -- cgit v1.2.3 From 42b4c0fa6e0909e9622b03d56393ddec173ebe5d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 16:49:06 -0800 Subject: anv: Pull all of the genX_foo functions into anv_genX.h This way we only have to declare them each once and we get it for all gens at a single go. --- src/intel/vulkan/anv_genX.h | 61 ++++++++++++++++ src/intel/vulkan/anv_private.h | 159 +++++------------------------------------ 2 files changed, 79 insertions(+), 141 deletions(-) create mode 100644 src/intel/vulkan/anv_genX.h (limited to 'src') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h new file mode 100644 index 00000000000..a8b96e48be5 --- /dev/null +++ b/src/intel/vulkan/anv_genX.h @@ -0,0 +1,61 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * Gen-specific function declarations. This header must *not* be included + * directly. Instead, it is included multiple times by gen8_private.h. + * + * In this header file, the usual genx() macro is available. + */ + +VkResult genX(init_device_state)(struct anv_device *device); + +void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer); + +struct anv_state +genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); + +void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); + +void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); + +void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); + +VkResult +genX(graphics_pipeline_create)(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +genX(compute_pipeline_create)(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f24ea20115b..0ef840da10e 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -691,11 +691,6 @@ struct anv_device { pthread_mutex_t mutex; }; -VkResult gen7_init_device_state(struct anv_device *device); -VkResult gen75_init_device_state(struct anv_device *device); -VkResult gen8_init_device_state(struct anv_device *device); -VkResult gen9_init_device_state(struct anv_device *device); - void anv_device_get_cache_uuid(void *uuid); @@ -1294,55 +1289,14 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); -void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo *info); -struct anv_state -gen7_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); -struct anv_state -gen75_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); -struct anv_state -gen8_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); -struct anv_state -gen9_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); - -void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); -void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); - struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage); @@ -1538,62 +1492,6 @@ anv_graphics_pipeline_create(VkDevice device, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); -VkResult -gen7_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen75_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen7_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen75_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - struct anv_format_swizzle { unsigned r:2; unsigned g:2; @@ -1747,32 +1645,6 @@ void anv_image_view_init(struct anv_image_view *view, uint32_t offset, VkImageUsageFlags usage_mask); -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen7_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen75_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen8_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen9_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); - struct anv_buffer_view { enum isl_format format; /**< VkBufferViewCreateInfo::format */ struct anv_bo *bo; @@ -1794,19 +1666,6 @@ void anv_fill_buffer_surface_state(struct anv_device *device, uint32_t offset, uint32_t range, uint32_t stride); -void gen7_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen75_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen8_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen9_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); - void anv_image_view_fill_image_param(struct anv_device *device, struct anv_image_view *view, struct brw_image_param *param); @@ -1949,6 +1808,24 @@ ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) +/* Gen-specific function declarations */ +#ifdef genX +# include "anv_genX.h" +#else +# define genX(x) gen7_##x +# include "anv_genX.h" +# undef genX +# define genX(x) gen75_##x +# include "anv_genX.h" +# undef genX +# define genX(x) gen8_##x +# include "anv_genX.h" +# undef genX +# define genX(x) gen9_##x +# include "anv_genX.h" +# undef genX +#endif + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 28cbc45b3c83d645bb2b805a0ed6008e2f9dad61 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 16:54:07 -0800 Subject: anv/cmd_buffer: Split flush_state into two functions --- src/intel/vulkan/anv_genX.h | 1 + src/intel/vulkan/gen7_cmd_buffer.c | 11 ++++++++++- src/intel/vulkan/gen8_cmd_buffer.c | 11 ++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index a8b96e48be5..f98127ba238 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -42,6 +42,7 @@ void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 8dce586eec7..d552f1b4b8a 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -462,6 +462,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) } } + cmd_buffer->state.vb_dirty &= ~vb_emit; + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and @@ -521,6 +523,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) gen7_cmd_buffer_emit_scissor(cmd_buffer); + genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); +} + +void +genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_RENDER_TARGETS | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | @@ -622,7 +632,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); } - cmd_buffer->state.vb_dirty &= ~vb_emit; cmd_buffer->state.dirty = 0; } diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 0d27c27f5b7..f1c82235d3d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -279,6 +279,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) } } + cmd_buffer->state.vb_dirty &= ~vb_emit; + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and @@ -324,6 +326,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) gen7_cmd_buffer_emit_scissor(cmd_buffer); + genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); +} + +void +genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { __emit_sf_state(cmd_buffer); @@ -452,7 +462,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ); } - cmd_buffer->state.vb_dirty &= ~vb_emit; cmd_buffer->state.dirty = 0; } -- cgit v1.2.3 From 248ab61740c4082517424f5aa94b2f4e7b210d76 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 17:10:05 -0800 Subject: anv/cmd_buffer: Pull the core of flush_state into genX_cmd_buffer --- src/intel/vulkan/anv_genX.h | 3 + src/intel/vulkan/gen7_cmd_buffer.c | 147 +------------------------------ src/intel/vulkan/gen8_cmd_buffer.c | 135 +--------------------------- src/intel/vulkan/genX_cmd_buffer.c | 176 +++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 276 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index f98127ba238..77d387ae748 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -41,6 +41,9 @@ void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, + bool enable_slm); + void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index d552f1b4b8a..56f03268133 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -32,44 +32,6 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage]); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - } - } - - cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; - - return flushed; -} - #if GEN_GEN == 7 && !GEN_IS_HASWELL void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, @@ -344,8 +306,8 @@ emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_L3CNTLREG2 0xb020 #define GEN7_L3CNTLREG3 0xb024 -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +void +genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) { /* References for GL state: * @@ -401,7 +363,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); bool needs_slm = cs_prog_data->base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); + genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), @@ -423,109 +385,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, - .InstanceDataStepRate = 1 - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: - * - * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to - * the next 3DPRIMITIVE command after programming the - * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" - * - * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of - * pipeline setup, we need to dirty push constants. - */ - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; - } - - if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || - cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, - * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, - * 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one - * PIPE_CONTROL needs to be sent before any combination of VS - * associated 3DSTATE." - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &cmd_buffer->device->workaround_bo, 0 }); - } - - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) { - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - } - - if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); - - /* We use the gen8 state here because it only contains the additional - * min/max fields and, since they occur at the end of the packet and - * don't change the stride, they work on gen7 too. - */ - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); -} - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index f1c82235d3d..4a926255a5d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -32,46 +32,6 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage]); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, - .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - } - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - #if GEN_GEN == 8 static void emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, @@ -158,8 +118,8 @@ emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN8_L3CNTLREG 0x7034 -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +void +genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) { /* References for GL state: * @@ -240,95 +200,6 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer) __emit_genx_sf_state(cmd_buffer); } -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - config_l3(cmd_buffer, false); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: - * - * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to - * the next 3DPRIMITIVE command after programming the - * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" - * - * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of - * pipeline setup, we need to dirty push constants. - */ - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; - } - - /* We emit the binding tables and sampler tables first, then emit push - * constants and then finally emit binding table and sampler table - * pointers. It has to happen in this order, since emitting the binding - * tables may change the push constants (in case of storage images). After - * emitting push constants, on SKL+ we have to emit the corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. - */ - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.push_constants_dirty) - dirty |= cmd_buffer_flush_push_constants(cmd_buffer); - - if (dirty) - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); -} - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -573,7 +444,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); bool needs_slm = cs_prog_data->base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); + genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { #if GEN_GEN < 10 diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b969fab35bc..c3d2043dcdf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -269,6 +269,182 @@ void genX(CmdPipelineBarrier)( } } +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { +#if GEN_GEN >= 9 + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), +#else + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), +#endif + }); + } + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; + + return flushed; +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + genX(cmd_buffer_config_l3)(cmd_buffer, false); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + +#if GEN_GEN >= 8 + .MemoryObjectControlState = GENX(MOCS), +#else + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .InstanceDataStepRate = 1, + .VertexBufferMemoryObjectControlState = GENX(MOCS), +#endif + + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + +#if GEN_GEN >= 8 + .BufferSize = buffer->size - offset +#else + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, +#endif + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; + } + +#if GEN_GEN <= 7 + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } +#endif + + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. + */ + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) { +#if GEN_GEN >= 9 + /* On Sky Lake and later, the binding table pointers commands are + * what actually flush the changes to push constant state so we need + * to dirty them so they get re-emitted below. + */ + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); +#else + cmd_buffer_flush_push_constants(cmd_buffer); +#endif + } + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); +} + static void emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, uint32_t offset) -- cgit v1.2.3 From 7ebbc3946ae9cffb3c3db522dcbe2f1041633164 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 4 Mar 2016 11:43:19 -0800 Subject: anv/meta: Minimize height of images used for copies In addition to demystifying the value being added to the height, this future-proofs the code for new tiling modes and keeps the image height as small as possible. v2: Actually use the smallest height possible. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta_blit.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index b8a42f99eec..ecd4d2d3536 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -450,8 +450,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .format = 0, /* TEMPLATE */ .extent = { .width = 0, /* TEMPLATE */ - /* Pad to highest tile height to compensate for a vertical intratile offset */ - .height = MIN(rects[r].height + 64, 1 << 14), + .height = 0, /* TEMPLATE */ .depth = 1, }, .mipLevels = 1, @@ -465,11 +464,19 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .isl_tiling_flags = 0, /* TEMPLATE */ }; + /* The image height is the rect height + src/dst y-offset from the + * tile-aligned base address. + */ + struct isl_tile_info tile_info; + anv_image_info.isl_tiling_flags = 1 << src->tiling; image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; image_info.format = src_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, &tile_info); + image_info.extent.height = rects[r].height + + rects[r].src_y % tile_info.height; image_info.extent.width = src->pitch / src->bs; VkImage src_image; anv_image_create(vk_device, &anv_image_info, @@ -480,6 +487,9 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; image_info.format = dst_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, &tile_info); + image_info.extent.height = rects[r].height + + rects[r].dst_y % tile_info.height; image_info.extent.width = dst->pitch / dst->bs; VkImage dst_image; anv_image_create(vk_device, &anv_image_info, -- cgit v1.2.3 From ddbc6458464b86fa3f4f87f0f2db2f117fa04cdc Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 7 Mar 2016 14:18:27 -0800 Subject: anv/meta: Store src and dst usage flags in a variable Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta_blit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index ecd4d2d3536..82b79b88f2c 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -440,6 +440,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); VkFormat src_format = vk_format_for_size(src->bs); VkFormat dst_format = vk_format_for_size(dst->bs); + VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { @@ -472,7 +474,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, anv_image_info.isl_tiling_flags = 1 << src->tiling; image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.usage = src_usage; image_info.format = src_format, isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, &tile_info); image_info.extent.height = rects[r].height + @@ -485,7 +487,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, anv_image_info.isl_tiling_flags = 1 << dst->tiling; image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image_info.usage = dst_usage; image_info.format = dst_format, isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, &tile_info); image_info.extent.height = rects[r].height + @@ -533,7 +535,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_SAMPLED_BIT); + &iview_info, cmd_buffer, img_o, src_usage); iview_info.image = dst_image; iview_info.format = dst_format; @@ -548,7 +550,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, (uint32_t*)&dst_offset_el.y); struct anv_image_view dst_iview; anv_image_view_init(&dst_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + &iview_info, cmd_buffer, img_o, dst_usage); /* Perform blit */ meta_emit_blit(cmd_buffer, -- cgit v1.2.3 From f39168392243d6dacefbc8708b764c5978ff24df Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 7 Mar 2016 22:38:05 -0800 Subject: anv/meta: Make meta_emit_blit() public This can be reverted if the only other consumer, anv_meta_blit2d(), uses a different method. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta.h | 11 +++++++++++ src/intel/vulkan/anv_meta_blit.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 587c044fa5f..fb562dbd564 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -105,6 +105,17 @@ void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); +void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter); #ifdef __cplusplus } #endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 82b79b88f2c..57833bf66ac 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -160,7 +160,7 @@ meta_region_extent_el(const VkFormat format, }; } -static void +void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, -- cgit v1.2.3 From 627728cce55b8b67bb30bdd206affb6f0885315b Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 7 Mar 2016 15:15:33 -0800 Subject: anv/meta: Split anv_meta_blit.c into three files The new organization is as follows: * anv_meta_blit.c: Blit and state setup/teardown commands * anv_meta_copy.c: Copy and update commands * anv_meta_blit2d.c: 2D Blitter API commands Also, change the formatting to contain most lines within 80 columns. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/Makefile.am | 2 + src/intel/vulkan/anv_meta_blit.c | 612 +------------------------------------ src/intel/vulkan/anv_meta_blit2d.c | 213 +++++++++++++ src/intel/vulkan/anv_meta_copy.c | 441 ++++++++++++++++++++++++++ 4 files changed, 662 insertions(+), 606 deletions(-) create mode 100644 src/intel/vulkan/anv_meta_blit2d.c create mode 100644 src/intel/vulkan/anv_meta_copy.c (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 272db40d10b..f20cd41fbba 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -83,7 +83,9 @@ VULKAN_SOURCES = \ anv_intel.c \ anv_meta.c \ anv_meta_blit.c \ + anv_meta_blit2d.c \ anv_meta_clear.c \ + anv_meta_copy.c \ anv_meta_resolve.c \ anv_nir_apply_dynamic_offsets.c \ anv_nir_apply_pipeline_layout.c \ diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 57833bf66ac..7bddc6b2d42 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -119,47 +119,6 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } -void -anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *save) -{ - meta_prepare_blit(cmd_buffer, save); -} - - -/* Returns the user-provided VkBufferImageCopy::imageOffset in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkOffset3D -meta_region_offset_el(const struct anv_image * image, - const struct VkOffset3D * offset) -{ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - return (VkOffset3D) { - .x = offset->x / isl_layout->bw, - .y = offset->y / isl_layout->bh, - .z = offset->z / isl_layout->bd, - }; -} - -/* Returns the user-provided VkBufferImageCopy::imageExtent in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkExtent3D -meta_region_extent_el(const VkFormat format, - const struct VkExtent3D * extent) -{ - const struct isl_format_layout * isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { - .width = DIV_ROUND_UP(extent->width , isl_layout->bw), - .height = DIV_ROUND_UP(extent->height, isl_layout->bh), - .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; -} - void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, @@ -194,8 +153,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.x + src_extent.width) + / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) + / (float)src_iview->extent.height, (float)src_offset.z / (float)src_iview->extent.depth, }, }; @@ -207,7 +168,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, .tex_coord = { (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.y + src_extent.height) / + (float)src_iview->extent.height, (float)src_offset.z / (float)src_iview->extent.depth, }, }; @@ -380,444 +342,6 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, anv_meta_restore(saved_state, cmd_buffer); } -void -anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *save) -{ - meta_finish_blit(cmd_buffer, save); -} - -static VkFormat -vk_format_for_size(int bs) -{ - /* The choice of UNORM and UINT formats is very intentional here. Most of - * the time, we want to use a UINT format to avoid any rounding error in - * the blit. For stencil blits, R8_UINT is required by the hardware. - * (It's the only format allowed in conjunction with W-tiling.) Also we - * intentionally use the 4-channel formats whenever we can. This is so - * that, when we do a RGB <-> RGBX copy, the two formats will line up even - * though one of them is 3/4 the size of the other. The choice of UNORM - * vs. UINT is also very intentional because Haswell doesn't handle 8 or - * 16-bit RGB UINT formats at all so we have to use UNORM there. - * Fortunately, the only time we should ever use two different formats in - * the table below is for RGB -> RGBA blits and so we will never have any - * UNORM/UINT mismatch. - */ - switch (bs) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UNORM; - case 4: return VK_FORMAT_R8G8B8A8_UNORM; - case 6: return VK_FORMAT_R16G16B16_UNORM; - case 8: return VK_FORMAT_R16G16B16A16_UNORM; - case 12: return VK_FORMAT_R32G32B32_UINT; - case 16: return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format block size"); - } -} - -static struct anv_meta_blit2d_surf -blit_surf_for_image(const struct anv_image* image, - const struct isl_surf *img_isl_surf) -{ - return (struct anv_meta_blit2d_surf) { - .bo = image->bo, - .tiling = img_isl_surf->tiling, - .base_offset = image->offset, - .bs = isl_format_get_layout(img_isl_surf->format)->bs, - .pitch = isl_surf_get_row_pitch(img_isl_surf), - }; -} - -void -anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_blit2d_surf *src, - struct anv_meta_blit2d_surf *dst, - unsigned num_rects, - struct anv_meta_blit2d_rect *rects) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkFormat src_format = vk_format_for_size(src->bs); - VkFormat dst_format = vk_format_for_size(dst->bs); - VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; - VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - for (unsigned r = 0; r < num_rects; ++r) { - - /* Create VkImages */ - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = 0, /* TEMPLATE */ - .extent = { - .width = 0, /* TEMPLATE */ - .height = 0, /* TEMPLATE */ - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = 0, /* TEMPLATE */ - .usage = 0, /* TEMPLATE */ - }; - struct anv_image_create_info anv_image_info = { - .vk_info = &image_info, - .isl_tiling_flags = 0, /* TEMPLATE */ - }; - - /* The image height is the rect height + src/dst y-offset from the - * tile-aligned base address. - */ - struct isl_tile_info tile_info; - - anv_image_info.isl_tiling_flags = 1 << src->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = src_usage; - image_info.format = src_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, &tile_info); - image_info.extent.height = rects[r].height + - rects[r].src_y % tile_info.height; - image_info.extent.width = src->pitch / src->bs; - VkImage src_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &src_image); - - anv_image_info.isl_tiling_flags = 1 << dst->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = dst_usage; - image_info.format = dst_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, &tile_info); - image_info.extent.height = rects[r].height + - rects[r].dst_y % tile_info.height; - image_info.extent.width = dst->pitch / dst->bs; - VkImage dst_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &dst_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src->bo; - anv_image_from_handle(src_image)->offset = src->base_offset; - anv_image_from_handle(dst_image)->bo = dst->bo; - anv_image_from_handle(dst_image)->offset = dst->base_offset; - - /* Create VkImageViews */ - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = 0, /* TEMPLATE */ - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = 0, /* TEMPLATE */ - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }; - uint32_t img_o = 0; - - iview_info.image = src_image; - iview_info.format = src_format; - VkOffset3D src_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(src_image)-> - color_surface.isl, - rects[r].src_x, - rects[r].src_y, - &img_o, - (uint32_t*)&src_offset_el.x, - (uint32_t*)&src_offset_el.y); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, src_usage); - - iview_info.image = dst_image; - iview_info.format = dst_format; - VkOffset3D dst_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(dst_image)-> - color_surface.isl, - rects[r].dst_x, - rects[r].dst_y, - &img_o, - (uint32_t*)&dst_offset_el.x, - (uint32_t*)&dst_offset_el.y); - struct anv_image_view dst_iview; - anv_image_view_init(&dst_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, dst_usage); - - /* Perform blit */ - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - src_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, - anv_image_from_handle(dst_image), - &dst_iview, - dst_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); - } -} - -static void -do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, int bs) -{ - struct anv_meta_blit2d_surf b_src = { - .bo = src, - .tiling = ISL_TILING_LINEAR, - .base_offset = src_offset, - .bs = bs, - .pitch = width * bs, - }; - struct anv_meta_blit2d_surf b_dst = { - .bo = dest, - .tiling = ISL_TILING_LINEAR, - .base_offset = dest_offset, - .bs = bs, - .pitch = width * bs, - }; - struct anv_meta_blit2d_rect rect = { - .width = width, - .height = height, - }; - anv_meta_blit2d(cmd_buffer, - &b_src, - &b_dst, - 1, - &rect); -} - -void anv_CmdCopyBuffer( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - - struct anv_meta_saved_state saved_state; - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; - uint64_t copy_size = pRegions[r].size; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - - int fs = ffs(src_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(src_offset % bs == 0); - - fs = ffs(dest_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(dest_offset % bs == 0); - - fs = ffs(pRegions[r].size) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(pRegions[r].size % bs == 0); - - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, bs); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dest_offset += max_copy_size; - } - - uint64_t height = copy_size / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * bs; - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, height, bs); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dest_offset += rect_copy_size; - } - - if (copy_size != 0) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - copy_size / bs, 1, bs); - } - } - - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - -void anv_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - /* We can't quite grab a full block because the state stream needs a - * little data at the top to build its linked list. - */ - const uint32_t max_update_size = - cmd_buffer->device->dynamic_state_block_pool.block_size - 64; - - assert(max_update_size < (1 << 14) * 4); - - while (dataSize) { - const uint32_t copy_size = MIN2(dataSize, max_update_size); - - struct anv_state tmp_data = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); - - memcpy(tmp_data.map, pData, copy_size); - - int bs; - if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - bs = 16; - } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - bs = 8; - } else { - assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - bs = 4; - } - - do_buffer_copy(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, - tmp_data.offset, - dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, bs); - - dataSize -= copy_size; - dstOffset += copy_size; - pData = (void *)pData + copy_size; - } - - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdCopyImage can be used to copy image data between multisample - * images, but both images must have the same number of samples. - */ - assert(src_image->samples == dest_image->samples); - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - assert(pRegions[r].srcSubresource.aspectMask == - pRegions[r].dstSubresource.aspectMask); - - VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - - /* Create blit surfaces */ - struct isl_surf *src_isl_surf = - &anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl; - struct isl_surf *dst_isl_surf = - &anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl; - struct anv_meta_blit2d_surf b_src = blit_surf_for_image(src_image, src_isl_surf); - struct anv_meta_blit2d_surf b_dst = blit_surf_for_image(dest_image, dst_isl_surf); - - /* Start creating blit rect */ - const VkOffset3D dst_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); - const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); - const VkExtent3D img_extent_el = meta_region_extent_el(src_image->vk_format, - &pRegions[r].extent); - struct anv_meta_blit2d_rect rect = { - .width = img_extent_el.width, - .height = img_extent_el.height, - }; - - /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].extent.depth; - unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - - /* Finish creating blit rect */ - isl_surf_get_image_offset_el(dst_isl_surf, - pRegions[r].dstSubresource.mipLevel, - pRegions[r].dstSubresource.baseArrayLayer + slice_array, - pRegions[r].dstOffset.z + slice_3d, - &rect.dst_x, - &rect.dst_y); - isl_surf_get_image_offset_el(src_isl_surf, - pRegions[r].srcSubresource.mipLevel, - pRegions[r].srcSubresource.baseArrayLayer + slice_array, - pRegions[r].srcOffset.z + slice_3d, - &rect.src_x, - &rect.src_y); - rect.dst_x += dst_offset_el.x; - rect.dst_y += dst_offset_el.y; - rect.src_x += src_offset_el.x; - rect.src_y += src_offset_el.y; - - /* Perform Blit */ - anv_meta_blit2d(cmd_buffer, - &b_src, - &b_dst, - 1, - &rect); - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - } - - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - void anv_CmdBlitImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -925,130 +449,6 @@ void anv_CmdBlitImage( meta_finish_blit(cmd_buffer, &saved_state); } -static void -meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, - struct anv_buffer* buffer, - struct anv_image* image, - uint32_t regionCount, - const VkBufferImageCopy* pRegions, - bool forward) -{ - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(image->samples == 1); - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - - /* Start creating blit rect */ - const VkOffset3D img_offset_el = meta_region_offset_el(image, &pRegions[r].imageOffset); - const VkExtent3D bufferExtent = { - .width = pRegions[r].bufferRowLength, - .height = pRegions[r].bufferImageHeight, - }; - const VkExtent3D buf_extent_el = meta_region_extent_el(image->vk_format, &bufferExtent); - const VkExtent3D img_extent_el = meta_region_extent_el(image->vk_format, - &pRegions[r].imageExtent); - struct anv_meta_blit2d_rect rect = { - .width = MAX2(buf_extent_el.width, img_extent_el.width), - .height = MAX2(buf_extent_el.height, img_extent_el.height), - }; - - /* Create blit surfaces */ - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - const struct isl_surf *img_isl_surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - struct anv_meta_blit2d_surf img_bsurf = blit_surf_for_image(image, img_isl_surf); - struct anv_meta_blit2d_surf buf_bsurf = { - .bo = buffer->bo, - .tiling = ISL_TILING_LINEAR, - .base_offset = buffer->offset + pRegions[r].bufferOffset, - .bs = forward ? image->format->isl_layout->bs : img_bsurf.bs, - .pitch = rect.width * buf_bsurf.bs, - }; - - /* Set direction-dependent variables */ - struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf; - struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf; - uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x; - uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; - - /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - - /* Finish creating blit rect */ - isl_surf_get_image_offset_el(img_isl_surf, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - x_offset, - y_offset); - *x_offset += img_offset_el.x; - *y_offset += img_offset_el.y; - - /* Perform Blit */ - anv_meta_blit2d(cmd_buffer, - src_bsurf, - dst_bsurf, - 1, - &rect); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs; - - if (image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - } - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - -void anv_CmdCopyBufferToImage( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - - meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, - regionCount, pRegions, true); -} - -void anv_CmdCopyImageToBuffer( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer); - - meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, - regionCount, pRegions, false); -} - void anv_device_finish_meta_blit_state(struct anv_device *device) { diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c new file mode 100644 index 00000000000..b165abd9b6c --- /dev/null +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -0,0 +1,213 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +static VkFormat +vk_format_for_size(int bs) +{ + /* The choice of UNORM and UINT formats is very intentional here. Most of + * the time, we want to use a UINT format to avoid any rounding error in + * the blit. For stencil blits, R8_UINT is required by the hardware. + * (It's the only format allowed in conjunction with W-tiling.) Also we + * intentionally use the 4-channel formats whenever we can. This is so + * that, when we do a RGB <-> RGBX copy, the two formats will line up even + * though one of them is 3/4 the size of the other. The choice of UNORM + * vs. UINT is also very intentional because Haswell doesn't handle 8 or + * 16-bit RGB UINT formats at all so we have to use UNORM there. + * Fortunately, the only time we should ever use two different formats in + * the table below is for RGB -> RGBA blits and so we will never have any + * UNORM/UINT mismatch. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UNORM; + case 4: return VK_FORMAT_R8G8B8A8_UNORM; + case 6: return VK_FORMAT_R16G16B16_UNORM; + case 8: return VK_FORMAT_R16G16B16A16_UNORM; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + anv_meta_restore(save, cmd_buffer); +} + +void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + anv_meta_save(save, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + VkFormat src_format = vk_format_for_size(src->bs); + VkFormat dst_format = vk_format_for_size(dst->bs); + VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + for (unsigned r = 0; r < num_rects; ++r) { + + /* Create VkImages */ + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = 0, /* TEMPLATE */ + .extent = { + .width = 0, /* TEMPLATE */ + .height = 0, /* TEMPLATE */ + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = 0, /* TEMPLATE */ + .usage = 0, /* TEMPLATE */ + }; + struct anv_image_create_info anv_image_info = { + .vk_info = &image_info, + .isl_tiling_flags = 0, /* TEMPLATE */ + }; + + /* The image height is the rect height + src/dst y-offset from the + * tile-aligned base address. + */ + struct isl_tile_info tile_info; + + anv_image_info.isl_tiling_flags = 1 << src->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == + ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = src_usage; + image_info.format = src_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, + &tile_info); + image_info.extent.height = rects[r].height + + rects[r].src_y % tile_info.height; + image_info.extent.width = src->pitch / src->bs; + VkImage src_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &src_image); + + anv_image_info.isl_tiling_flags = 1 << dst->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == + ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = dst_usage; + image_info.format = dst_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, + &tile_info); + image_info.extent.height = rects[r].height + + rects[r].dst_y % tile_info.height; + image_info.extent.width = dst->pitch / dst->bs; + VkImage dst_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &dst_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src->bo; + anv_image_from_handle(src_image)->offset = src->base_offset; + anv_image_from_handle(dst_image)->bo = dst->bo; + anv_image_from_handle(dst_image)->offset = dst->base_offset; + + /* Create VkImageViews */ + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = 0, /* TEMPLATE */ + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = 0, /* TEMPLATE */ + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }; + uint32_t img_o = 0; + + iview_info.image = src_image; + iview_info.format = src_format; + VkOffset3D src_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(src_image)-> + color_surface.isl, + rects[r].src_x, + rects[r].src_y, + &img_o, + (uint32_t*)&src_offset_el.x, + (uint32_t*)&src_offset_el.y); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, src_usage); + + iview_info.image = dst_image; + iview_info.format = dst_format; + VkOffset3D dst_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(dst_image)-> + color_surface.isl, + rects[r].dst_x, + rects[r].dst_y, + &img_o, + (uint32_t*)&dst_offset_el.x, + (uint32_t*)&dst_offset_el.y); + struct anv_image_view dst_iview; + anv_image_view_init(&dst_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, dst_usage); + + /* Perform blit */ + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + src_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + anv_image_from_handle(dst_image), + &dst_iview, + dst_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); + } +} + diff --git a/src/intel/vulkan/anv_meta_copy.c b/src/intel/vulkan/anv_meta_copy.c new file mode 100644 index 00000000000..1a2bfd6cf01 --- /dev/null +++ b/src/intel/vulkan/anv_meta_copy.c @@ -0,0 +1,441 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D *extent) +{ + const struct isl_format_layout *isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image *image, + const struct VkOffset3D *offset) +{ + const struct isl_format_layout *isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +static struct anv_meta_blit2d_surf +blit_surf_for_image(const struct anv_image* image, + const struct isl_surf *img_isl_surf) +{ + return (struct anv_meta_blit2d_surf) { + .bo = image->bo, + .tiling = img_isl_surf->tiling, + .base_offset = image->offset, + .bs = isl_format_get_layout(img_isl_surf->format)->bs, + .pitch = isl_surf_get_row_pitch(img_isl_surf), + }; +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, int bs) +{ + struct anv_meta_blit2d_surf b_src = { + .bo = src, + .tiling = ISL_TILING_LINEAR, + .base_offset = src_offset, + .bs = bs, + .pitch = width * bs, + }; + struct anv_meta_blit2d_surf b_dst = { + .bo = dest, + .tiling = ISL_TILING_LINEAR, + .base_offset = dest_offset, + .bs = bs, + .pitch = width * bs, + }; + struct anv_meta_blit2d_rect rect = { + .width = width, + .height = height, + }; + anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect); +} + +static void +meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_buffer* buffer, + struct anv_image* image, + uint32_t regionCount, + const VkBufferImageCopy* pRegions, + bool forward) +{ + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(image->samples == 1); + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + + /* Start creating blit rect */ + const VkOffset3D img_offset_el = + meta_region_offset_el(image, &pRegions[r].imageOffset); + const VkExtent3D bufferExtent = { + .width = pRegions[r].bufferRowLength, + .height = pRegions[r].bufferImageHeight, + }; + const VkExtent3D buf_extent_el = + meta_region_extent_el(image->vk_format, &bufferExtent); + const VkExtent3D img_extent_el = + meta_region_extent_el(image->vk_format, &pRegions[r].imageExtent); + struct anv_meta_blit2d_rect rect = { + .width = MAX2(buf_extent_el.width, img_extent_el.width), + .height = MAX2(buf_extent_el.height, img_extent_el.height), + }; + + /* Create blit surfaces */ + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + const struct isl_surf *img_isl_surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + struct anv_meta_blit2d_surf img_bsurf = + blit_surf_for_image(image, img_isl_surf); + struct anv_meta_blit2d_surf buf_bsurf = { + .bo = buffer->bo, + .tiling = ISL_TILING_LINEAR, + .base_offset = buffer->offset + pRegions[r].bufferOffset, + .bs = forward ? image->format->isl_layout->bs : img_bsurf.bs, + .pitch = rect.width * buf_bsurf.bs, + }; + + /* Set direction-dependent variables */ + struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf; + struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf; + uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x; + uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; + + /* Loop through each 3D or array slice */ + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(img_isl_surf, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + + slice_array, + pRegions[r].imageOffset.z + slice_3d, + x_offset, + y_offset); + *x_offset += img_offset_el.x; + *y_offset += img_offset_el.y; + + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, src_bsurf, dst_bsurf, 1, &rect); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs; + + if (image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + } + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + + meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, + regionCount, pRegions, true); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer); + + meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, + regionCount, pRegions, false); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + /* Create blit surfaces */ + struct isl_surf *src_isl_surf = + &anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl; + struct isl_surf *dst_isl_surf = + &anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl; + struct anv_meta_blit2d_surf b_src = + blit_surf_for_image(src_image, src_isl_surf); + struct anv_meta_blit2d_surf b_dst = + blit_surf_for_image(dest_image, dst_isl_surf); + + /* Start creating blit rect */ + const VkOffset3D dst_offset_el = + meta_region_offset_el(dest_image, &pRegions[r].dstOffset); + const VkOffset3D src_offset_el = + meta_region_offset_el(src_image, &pRegions[r].srcOffset); + const VkExtent3D img_extent_el = + meta_region_extent_el(src_image->vk_format, &pRegions[r].extent); + struct anv_meta_blit2d_rect rect = { + .width = img_extent_el.width, + .height = img_extent_el.height, + }; + + /* Loop through each 3D or array slice */ + unsigned num_slices_3d = pRegions[r].extent.depth; + unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(dst_isl_surf, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer + + slice_array, + pRegions[r].dstOffset.z + slice_3d, + &rect.dst_x, + &rect.dst_y); + isl_surf_get_image_offset_el(src_isl_surf, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer + + slice_array, + pRegions[r].srcOffset.z + slice_3d, + &rect.src_x, + &rect.src_y); + rect.dst_x += dst_offset_el.x; + rect.dst_y += dst_offset_el.y; + rect.src_x += src_offset_el.x; + rect.src_y += src_offset_el.y; + + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect); + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + } + + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, bs); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, bs); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, bs); + } + } + + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, bs); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } + + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} -- cgit v1.2.3 From 514c0557178b0325c59a28d68b0f250f0eeaddf5 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 8 Mar 2016 12:45:55 -0800 Subject: anv/meta: Prefix anv_ to meta_emit_blit() Follow the convention for non-static functions. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta.h | 2 +- src/intel/vulkan/anv_meta_blit.c | 4 ++-- src/intel/vulkan/anv_meta_blit2d.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index fb562dbd564..e2e00433c49 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -106,7 +106,7 @@ anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 7bddc6b2d42..2c3c91733a7 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -120,7 +120,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, } void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, @@ -438,7 +438,7 @@ void anv_CmdBlitImage( }, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - meta_emit_blit(cmd_buffer, + anv_meta_emit_blit(cmd_buffer, src_image, &src_iview, pRegions[r].srcOffsets[0], src_extent, dest_image, &dest_iview, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index b165abd9b6c..d49b4708f8e 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -195,7 +195,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, &iview_info, cmd_buffer, img_o, dst_usage); /* Perform blit */ - meta_emit_blit(cmd_buffer, + anv_meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), &src_iview, src_offset_el, -- cgit v1.2.3 From 7fbbad01706f08645e832e6dd2f5eeaf1e3c6894 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 8 Mar 2016 09:37:43 -0800 Subject: anv/blit2d: Use the tiling enum for simplicity Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta_blit2d.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index d49b4708f8e..6f0734254bf 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -110,8 +110,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct isl_tile_info tile_info; anv_image_info.isl_tiling_flags = 1 << src->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == - ISL_TILING_LINEAR_BIT ? + image_info.tiling = src->tiling == ISL_TILING_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = src_usage; image_info.format = src_format, @@ -125,8 +124,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, &cmd_buffer->pool->alloc, &src_image); anv_image_info.isl_tiling_flags = 1 << dst->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == - ISL_TILING_LINEAR_BIT ? + image_info.tiling = dst->tiling == ISL_TILING_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = dst_usage; image_info.format = dst_format, -- cgit v1.2.3 From b0e423cc4f0b19a266f3ecaf379c0a25bf332e04 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 9 Mar 2016 12:23:22 -0800 Subject: isl: Remove redundant check The green channel was checked twice. --- src/intel/isl/isl_format.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c index 0fe6e9b83ab..7842197d861 100644 --- a/src/intel/isl/isl_format.c +++ b/src/intel/isl/isl_format.c @@ -36,8 +36,7 @@ isl_format_has_sint_channel(enum isl_format fmt) fmtl->channels.a.type == ISL_SINT || fmtl->channels.l.type == ISL_SINT || fmtl->channels.i.type == ISL_SINT || - fmtl->channels.p.type == ISL_SINT || - fmtl->channels.g.type == ISL_SINT; + fmtl->channels.p.type == ISL_SINT; } enum isl_format -- cgit v1.2.3 From 6f47ed28b4b2a007adc4183c818b608ec2f0be83 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 9 Mar 2016 12:25:47 -0800 Subject: isl: Add more helpers for determining if a format is an integer format --- src/intel/isl/isl.h | 10 ++++++++++ src/intel/isl/isl_format.c | 14 ++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'src') diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 248a94d64eb..90193ca08c1 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -825,9 +825,19 @@ isl_format_get_layout(enum isl_format fmt) return &isl_format_layouts[fmt]; } +bool +isl_format_has_uint_channel(enum isl_format fmt) ATTRIBUTE_CONST; + bool isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; +static inline bool +isl_format_has_int_channel(enum isl_format fmt) +{ + return isl_format_has_uint_channel(fmt) || + isl_format_has_sint_channel(fmt); +} + static inline bool isl_format_is_compressed(enum isl_format fmt) { diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c index 7842197d861..32bd70186c6 100644 --- a/src/intel/isl/isl_format.c +++ b/src/intel/isl/isl_format.c @@ -25,6 +25,20 @@ #include "isl.h" +bool +isl_format_has_uint_channel(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.type == ISL_UINT || + fmtl->channels.g.type == ISL_UINT || + fmtl->channels.b.type == ISL_UINT || + fmtl->channels.a.type == ISL_UINT || + fmtl->channels.l.type == ISL_UINT || + fmtl->channels.i.type == ISL_UINT || + fmtl->channels.p.type == ISL_UINT; +} + bool isl_format_has_sint_channel(enum isl_format fmt) { -- cgit v1.2.3 From 8c819b8c2b4ebe63b431e34946b4ea3a43b6c118 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 9 Mar 2016 12:36:07 -0800 Subject: genxml/gen75: Add the clear color bits to RENDER_SURFACE_STATE --- src/intel/genxml/gen75.xml | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index adbeeb6c788..94bb64e595e 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -413,6 +413,10 @@ + + + + -- cgit v1.2.3 From 1f3d582cba4921fa638c1aa0d8e7861f91059547 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 9 Mar 2016 12:38:58 -0800 Subject: isl/surface_state: Set the clear color --- src/intel/isl/isl_surface_state.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index f3390a6c553..6afe45d650e 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -393,9 +393,11 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, } #endif -#if 0 - if (GEN_GEN == 8) { - if (isl_format_is_integer(info->view->format)) { + if (GEN_GEN <= 8) { + /* Prior to Sky Lake, we only have one bit for the clear color which + * gives us 0 or 1 in whatever the surface's format happens to be. + */ + if (isl_format_has_int_channel(info->view->format)) { for (unsigned i = 0; i < 4; i++) { assert(info->clear_color.u32[i] == 0 || info->clear_color.u32[i] == 1); @@ -406,17 +408,16 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, info->clear_color.f32[i] == 1.0f); } } - s.RedClearColor = info->clear_color.u32[0] != 0, - s.GreenClearColor = info->clear_color.u32[1] != 0, - s.BlueClearColor = info->clear_color.u32[2] != 0, - s.AlphaClearColor = info->clear_color.u32[3] != 0, + s.RedClearColor = info->clear_color.u32[0] != 0; + s.GreenClearColor = info->clear_color.u32[1] != 0; + s.BlueClearColor = info->clear_color.u32[2] != 0; + s.AlphaClearColor = info->clear_color.u32[3] != 0; } else { - .RedClearColor = info->clear_color.u32[0], - .GreenClearColor = info->clear_color.u32[1], - .BlueClearColor = info->clear_color.u32[2], - .AlphaClearColor = info->clear_color.u32[3], + s.RedClearColor = info->clear_color.u32[0]; + s.GreenClearColor = info->clear_color.u32[1]; + s.BlueClearColor = info->clear_color.u32[2]; + s.AlphaClearColor = info->clear_color.u32[3]; } -#endif GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s); } -- cgit v1.2.3 From 132f079a8cbaeab442a7ea8b0f02b9f07dfdd310 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Mar 2016 11:24:46 -0800 Subject: anv/gem: Use C99-style struct initializers for DRM structs This is more consistent with the way the rest of the driver works and ensures that all structs we pass into the kernel are zero'd out except for the fields we actually want to fill. We were previously doing then when building with valgrind to keep valgrind from complaining. However, we need to start doing this unconditionally as recent kernels have been getting touchier about this. In particular, as of kernel commit b31e51360e88 from Chris Wilson, context creation and destroy fail if the padding bits are not set to 0. --- src/intel/vulkan/anv_gem.c | 175 ++++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 99 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c index 0a7be353327..a886f7c2cb0 100644 --- a/src/intel/vulkan/anv_gem.c +++ b/src/intel/vulkan/anv_gem.c @@ -32,8 +32,6 @@ #include "anv_private.h" -#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) - static int anv_ioctl(int fd, unsigned long request, void *arg) { @@ -54,13 +52,11 @@ anv_ioctl(int fd, unsigned long request, void *arg) uint32_t anv_gem_create(struct anv_device *device, size_t size) { - struct drm_i915_gem_create gem_create; - int ret; - - VG_CLEAR(gem_create); - gem_create.size = size; + struct drm_i915_gem_create gem_create = { + .size = size, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); if (ret != 0) { /* FIXME: What do we do if this fails? */ return 0; @@ -72,10 +68,10 @@ anv_gem_create(struct anv_device *device, size_t size) void anv_gem_close(struct anv_device *device, uint32_t gem_handle) { - struct drm_gem_close close; + struct drm_gem_close close = { + .handle = gem_handle, + }; - VG_CLEAR(close); - close.handle = gem_handle; anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); } @@ -86,17 +82,14 @@ void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags) { - struct drm_i915_gem_mmap gem_mmap; - int ret; - - gem_mmap.handle = gem_handle; - VG_CLEAR(gem_mmap.pad); - gem_mmap.offset = offset; - gem_mmap.size = size; - VG_CLEAR(gem_mmap.addr_ptr); - gem_mmap.flags = flags; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + struct drm_i915_gem_mmap gem_mmap = { + .handle = gem_handle, + .offset = offset, + .size = size, + .flags = flags, + }; + + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); if (ret != 0) { /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ return NULL; @@ -119,15 +112,13 @@ anv_gem_munmap(void *p, uint64_t size) uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size) { - struct drm_i915_gem_userptr userptr; - int ret; - - VG_CLEAR(userptr); - userptr.user_ptr = (__u64)((unsigned long) mem); - userptr.user_size = size; - userptr.flags = 0; + struct drm_i915_gem_userptr userptr = { + .user_ptr = (__u64)((unsigned long) mem), + .user_size = size, + .flags = 0, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); if (ret == -1) return 0; @@ -138,11 +129,10 @@ int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching) { - struct drm_i915_gem_caching gem_caching; - - VG_CLEAR(gem_caching); - gem_caching.handle = gem_handle; - gem_caching.caching = caching; + struct drm_i915_gem_caching gem_caching = { + .handle = gem_handle, + .caching = caching, + }; return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); } @@ -151,12 +141,11 @@ int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, uint32_t read_domains, uint32_t write_domain) { - struct drm_i915_gem_set_domain gem_set_domain; - - VG_CLEAR(gem_set_domain); - gem_set_domain.handle = gem_handle; - gem_set_domain.read_domains = read_domains; - gem_set_domain.write_domain = write_domain; + struct drm_i915_gem_set_domain gem_set_domain = { + .handle = gem_handle, + .read_domains = read_domains, + .write_domain = write_domain, + }; return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); } @@ -167,15 +156,13 @@ anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) { - struct drm_i915_gem_wait wait; - int ret; + struct drm_i915_gem_wait wait = { + .bo_handle = gem_handle, + .timeout_ns = *timeout_ns, + .flags = 0, + }; - VG_CLEAR(wait); - wait.bo_handle = gem_handle; - wait.timeout_ns = *timeout_ns; - wait.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); *timeout_ns = wait.timeout_ns; return ret; @@ -192,18 +179,17 @@ int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, uint32_t stride, uint32_t tiling) { - struct drm_i915_gem_set_tiling set_tiling; int ret; /* set_tiling overwrites the input on the error path, so we have to open * code anv_ioctl. */ - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = stride; + struct drm_i915_gem_set_tiling set_tiling = { + .handle = gem_handle, + .tiling_mode = tiling, + .stride = stride, + }; ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); @@ -214,13 +200,14 @@ anv_gem_set_tiling(struct anv_device *device, int anv_gem_get_param(int fd, uint32_t param) { - drm_i915_getparam_t gp; - int ret, tmp; + int tmp; + + drm_i915_getparam_t gp = { + .param = param, + .value = &tmp, + }; - VG_CLEAR(gp); - gp.param = param; - gp.value = &tmp; - ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + int ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); if (ret == 0) return tmp; @@ -233,9 +220,9 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) struct drm_gem_close close; int ret; - struct drm_i915_gem_create gem_create; - VG_CLEAR(gem_create); - gem_create.size = 4096; + struct drm_i915_gem_create gem_create = { + .size = 4096, + }; if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { assert(!"Failed to create GEM BO"); @@ -247,12 +234,12 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) /* set_tiling overwrites the input on the error path, so we have to open * code anv_ioctl. */ - struct drm_i915_gem_set_tiling set_tiling; do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_create.handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + struct drm_i915_gem_set_tiling set_tiling = { + .handle = gem_create.handle, + .tiling_mode = tiling, + .stride = tiling == I915_TILING_X ? 512 : 128, + }; ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); @@ -262,9 +249,9 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) goto close_and_return; } - struct drm_i915_gem_get_tiling get_tiling; - VG_CLEAR(get_tiling); - get_tiling.handle = gem_create.handle; + struct drm_i915_gem_get_tiling get_tiling = { + .handle = gem_create.handle, + }; if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { assert(!"Failed to get BO tiling"); @@ -275,7 +262,7 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) close_and_return: - VG_CLEAR(close); + memset(&close, 0, sizeof(close)); close.handle = gem_create.handle; anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); @@ -285,12 +272,9 @@ close_and_return: int anv_gem_create_context(struct anv_device *device) { - struct drm_i915_gem_context_create create; - int ret; - - VG_CLEAR(create); + struct drm_i915_gem_context_create create = { 0 }; - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); if (ret == -1) return -1; @@ -300,10 +284,9 @@ anv_gem_create_context(struct anv_device *device) int anv_gem_destroy_context(struct anv_device *device, int context) { - struct drm_i915_gem_context_destroy destroy; - - VG_CLEAR(destroy); - destroy.ctx_id = context; + struct drm_i915_gem_context_destroy destroy = { + .ctx_id = context, + }; return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); } @@ -311,11 +294,9 @@ anv_gem_destroy_context(struct anv_device *device, int context) int anv_gem_get_aperture(int fd, uint64_t *size) { - struct drm_i915_gem_get_aperture aperture; - int ret; + struct drm_i915_gem_get_aperture aperture = { 0 }; - VG_CLEAR(aperture); - ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + int ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); if (ret == -1) return -1; @@ -327,14 +308,12 @@ anv_gem_get_aperture(int fd, uint64_t *size) int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) { - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.handle = gem_handle; - args.flags = DRM_CLOEXEC; + struct drm_prime_handle args = { + .handle = gem_handle, + .flags = DRM_CLOEXEC, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); if (ret == -1) return -1; @@ -344,13 +323,11 @@ anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd) { - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.fd = fd; + struct drm_prime_handle args = { + .fd = fd, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); if (ret == -1) return 0; -- cgit v1.2.3 From 41a147904a95ba699358e8209e91d535f853ba61 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Mar 2016 18:35:00 -0800 Subject: anv/wsi: Throttle rendering to no more than 2 frames ahead Right now, Vulkan apps can pretty easily DOS the GPU by simply submitting a lot of batches. This commit makes us wait until the rendering for earlier frames is comlete before continuing. By waiting 2 frames out, we can still keep the pipe reasonably full but without taking the entire system down. This is similar to what the GL driver does today. --- src/intel/vulkan/anv_wsi.c | 38 ++++++++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_wsi.h | 4 ++++ 2 files changed, 42 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index c5911a3635b..c2938f3836f 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -132,6 +132,14 @@ VkResult anv_CreateSwapchainKHR( if (result != VK_SUCCESS) return result; + if (pAllocator) + swapchain->alloc = *pAllocator; + else + swapchain->alloc = device->alloc; + + for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) + swapchain->fences[i] = VK_NULL_HANDLE; + *pSwapchain = anv_swapchain_to_handle(swapchain); return VK_SUCCESS; @@ -144,6 +152,11 @@ void anv_DestroySwapchainKHR( { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) { + if (swapchain->fences[i] != VK_NULL_HANDLE) + anv_DestroyFence(device, swapchain->fences[i], pAllocator); + } + swapchain->destroy(swapchain, pAllocator); } @@ -185,11 +198,36 @@ VkResult anv_QueuePresentKHR( assert(swapchain->device == queue->device); + if (swapchain->fences[0] == VK_NULL_HANDLE) { + result = anv_CreateFence(anv_device_to_handle(queue->device), + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, &swapchain->alloc, &swapchain->fences[0]); + if (result != VK_SUCCESS) + return result; + } else { + anv_ResetFences(anv_device_to_handle(queue->device), + 1, &swapchain->fences[0]); + } + + anv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]); + result = swapchain->queue_present(swapchain, queue, pPresentInfo->pImageIndices[i]); /* TODO: What if one of them returns OUT_OF_DATE? */ if (result != VK_SUCCESS) return result; + + VkFence last = swapchain->fences[2]; + swapchain->fences[2] = swapchain->fences[1]; + swapchain->fences[1] = swapchain->fences[0]; + swapchain->fences[0] = last; + + if (last != VK_NULL_HANDLE) { + anv_WaitForFences(anv_device_to_handle(queue->device), + 1, &last, true, 1); + } } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_wsi.h b/src/intel/vulkan/anv_wsi.h index 6e9ff9b8447..bf17f033173 100644 --- a/src/intel/vulkan/anv_wsi.h +++ b/src/intel/vulkan/anv_wsi.h @@ -53,6 +53,10 @@ struct anv_wsi_interface { struct anv_swapchain { struct anv_device *device; + VkAllocationCallbacks alloc; + + VkFence fences[3]; + VkResult (*destroy)(struct anv_swapchain *swapchain, const VkAllocationCallbacks *pAllocator); VkResult (*get_images)(struct anv_swapchain *swapchain, -- cgit v1.2.3 From e920b184e9a0cd3864b1db95921ef5d8b2227c6a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Mar 2016 12:28:32 -0800 Subject: anv/x11: Split image creation into a helper function This lets us clean up error handling and make it correct. --- src/intel/vulkan/anv_wsi_x11.c | 250 +++++++++++++++++++++++------------------ 1 file changed, 142 insertions(+), 108 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index a63cb6e7c5b..aa7e6ebe3e7 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -520,27 +520,138 @@ x11_queue_present(struct anv_swapchain *anv_chain, } static VkResult -x11_swapchain_destroy(struct anv_swapchain *anv_chain, - const VkAllocationCallbacks *pAllocator) +x11_image_init(struct anv_device *device, struct x11_swapchain *chain, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct x11_image *image) { - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - xcb_void_cookie_t cookie; + VkResult result; + + VkImage image_h; + result = anv_image_create(anv_device_to_handle(device), + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + NULL, + &image_h); + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->image->format)); + + VkDeviceMemory memory_h; + result = anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); + if (result != VK_SUCCESS) + goto fail_create_image; + + image->memory = anv_device_memory_from_handle(memory_h); + image->memory->bo.is_winsys_bo = true; + + anv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0); + + struct anv_surface *surface = &image->image->color_surface; + assert(surface->isl.tiling == ISL_TILING_X); + + int ret = anv_gem_set_tiling(device, image->memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); + goto fail_alloc_memory; + } - for (uint32_t i = 0; i < chain->image_count; i++) { - struct x11_image *image = &chain->images[i]; + int fd = anv_gem_handle_to_fd(device, image->memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); + goto fail_alloc_memory; + } - if (image->busy) - xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + uint32_t bpp = 32; + uint32_t depth = 24; + image->pixmap = xcb_generate_id(chain->conn); - cookie = xcb_free_pixmap(chain->conn, image->pixmap); - xcb_discard_reply(chain->conn, cookie.sequence); + xcb_void_cookie_t cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + image->pixmap, + chain->window, + image->image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->isl.row_pitch, + depth, bpp, fd); - anv_DestroyImage(anv_device_to_handle(chain->base.device), - anv_image_to_handle(image->image), pAllocator); + image->busy = false; - anv_FreeMemory(anv_device_to_handle(chain->base.device), - anv_device_memory_to_handle(image->memory), pAllocator); - } + xcb_discard_reply(chain->conn, cookie.sequence); + + return VK_SUCCESS; + +fail_alloc_memory: + anv_FreeMemory(anv_device_to_handle(chain->base.device), + anv_device_memory_to_handle(image->memory), pAllocator); + +fail_create_image: + anv_DestroyImage(anv_device_to_handle(chain->base.device), + anv_image_to_handle(image->image), pAllocator); + + return result; +} + +static void +x11_image_finish(struct x11_swapchain *chain, + const VkAllocationCallbacks* pAllocator, + struct x11_image *image) +{ + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + xcb_void_cookie_t cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + anv_DestroyImage(anv_device_to_handle(chain->base.device), + anv_image_to_handle(image->image), pAllocator); + + anv_FreeMemory(anv_device_to_handle(chain->base.device), + anv_device_memory_to_handle(image->memory), pAllocator); +} + +static VkResult +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) + x11_image_finish(chain, pAllocator, &chain->images[i]); anv_free2(&chain->base.device->alloc, pAllocator, chain); @@ -581,102 +692,11 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->image_count = num_images; chain->next_image = 0; - for (uint32_t i = 0; i < chain->image_count; i++) { - VkDeviceMemory memory_h; - VkImage image_h; - struct anv_image *image; - struct anv_surface *surface; - struct anv_device_memory *memory; - - anv_image_create(anv_device_to_handle(device), - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - NULL, - &image_h); - - image = anv_image_from_handle(image_h); - assert(anv_format_is_color(image->format)); - - surface = &image->color_surface; - - anv_AllocateMemory(anv_device_to_handle(device), - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - NULL /* XXX: pAllocator */, - &memory_h); - - memory = anv_device_memory_from_handle(memory_h); - memory->bo.is_winsys_bo = true; - - anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), - memory_h, 0); - - int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->isl.row_pitch, I915_TILING_X); - if (ret) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "set_tiling failed: %m"); - goto fail; - } - - int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); - if (fd == -1) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "handle_to_fd failed: %m"); - goto fail; - } - - uint32_t bpp = 32; - uint32_t depth = 24; - xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); - - cookie = - xcb_dri3_pixmap_from_buffer_checked(chain->conn, - pixmap, - chain->window, - image->size, - pCreateInfo->imageExtent.width, - pCreateInfo->imageExtent.height, - surface->isl.row_pitch, - depth, bpp, fd); - - chain->images[i].image = image; - chain->images[i].memory = memory; - chain->images[i].pixmap = pixmap; - chain->images[i].busy = false; - - xcb_discard_reply(chain->conn, cookie.sequence); - } - chain->gc = xcb_generate_id(chain->conn); if (!chain->gc) { /* FINISHME: Choose a better error. */ result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; + goto fail_alloc; } cookie = xcb_create_gc(chain->conn, @@ -686,11 +706,25 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, (uint32_t []) { 0 }); xcb_discard_reply(chain->conn, cookie.sequence); + uint32_t image = 0; + for (; image < chain->image_count; image++) { + result = x11_image_init(device, chain, pCreateInfo, pAllocator, + &chain->images[image]); + if (result != VK_SUCCESS) + goto fail_init_images; + } + *swapchain_out = &chain->base; return VK_SUCCESS; - fail: +fail_init_images: + for (uint32_t j = 0; j < image; j++) + x11_image_finish(chain, pAllocator, &chain->images[j]); + +fail_alloc: + anv_free2(&device->alloc, pAllocator, chain); + return result; } -- cgit v1.2.3 From 9bff5266beb9bacf86b199ca5ecaafaac8fae948 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 10 Mar 2016 21:47:56 -0800 Subject: anv/x11: Add present support The old DRI3 implementation just used CopyArea instead of present. We still don't support all the MST fancyness, but it should at least avoid some copies and allow for. v2 (Jason Ekstrand): - Better object cleanup and destruction - Handle the CONFIGURE_NOTIFY event and return OUT_OF_DATE when needed - Track dirtyness via IDLE_NOTIFY rather than interating through the images sequentially --- src/intel/vulkan/Makefile.am | 3 +- src/intel/vulkan/anv_wsi_x11.c | 188 ++++++++++++++++++++++++++++++++--------- 2 files changed, 148 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index f20cd41fbba..acf84e55871 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -165,7 +165,8 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) CLEANFILES = $(BUILT_SOURCES) -libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ +libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) \ + -lxcb -lxcb-dri3 -lxcb-present -lxcb-sync -lxshmfence \ $(top_builddir)/src/intel/isl/libisl.la \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ $(top_builddir)/src/mesa/libmesa.la \ diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index aa7e6ebe3e7..f9f72be154b 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include #include #include #include @@ -420,8 +421,9 @@ struct x11_image { struct anv_image * image; struct anv_device_memory * memory; xcb_pixmap_t pixmap; - xcb_get_geometry_cookie_t geom_cookie; bool busy; + struct xshmfence * shm_fence; + uint32_t sync_fence; }; struct x11_swapchain { @@ -432,7 +434,12 @@ struct x11_swapchain { xcb_gc_t gc; VkExtent2D extent; uint32_t image_count; - uint32_t next_image; + + xcb_present_event_t event_id; + xcb_special_event_t * special_event; + uint64_t send_sbc; + uint32_t stamp; + struct x11_image images[0]; }; @@ -456,6 +463,42 @@ x11_get_images(struct anv_swapchain *anv_chain, return VK_SUCCESS; } +static VkResult +x11_handle_dri3_present_event(struct x11_swapchain *chain, + xcb_present_generic_event_t *event) +{ + switch (event->evtype) { + case XCB_PRESENT_CONFIGURE_NOTIFY: { + xcb_present_configure_notify_event_t *config = (void *) event; + + if (config->width != chain->extent.width || + config->height != chain->extent.height) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + break; + } + + case XCB_PRESENT_EVENT_IDLE_NOTIFY: { + xcb_present_idle_notify_event_t *idle = (void *) event; + + for (unsigned i = 0; i < chain->image_count; i++) { + if (chain->images[i].pixmap == idle->pixmap) { + chain->images[i].busy = false; + break; + } + } + + break; + } + + case XCB_PRESENT_COMPLETE_NOTIFY: + default: + break; + } + + return VK_SUCCESS; +} + static VkResult x11_acquire_next_image(struct anv_swapchain *anv_chain, uint64_t timeout, @@ -463,30 +506,28 @@ x11_acquire_next_image(struct anv_swapchain *anv_chain, uint32_t *image_index) { struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - struct x11_image *image = &chain->images[chain->next_image]; - - if (image->busy) { - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = - xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); - if (!geom) { - free(err); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + xshmfence_await(chain->images[i].shm_fence); + *image_index = i; + return VK_SUCCESS; + } } - if (geom->width != chain->extent.width || - geom->height != chain->extent.height) { - free(geom); + xcb_flush(chain->conn); + xcb_generic_event_t *event = + xcb_wait_for_special_event(chain->conn, chain->special_event); + if (!event) return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - free(geom); - image->busy = false; + VkResult result = x11_handle_dri3_present_event(chain, (void *)event); + free(event); + if (result != VK_SUCCESS) + return result; } - - *image_index = chain->next_image; - chain->next_image = (chain->next_image + 1) % chain->image_count; - return VK_SUCCESS; } static VkResult @@ -499,19 +540,31 @@ x11_queue_present(struct anv_swapchain *anv_chain, assert(image_index < chain->image_count); - xcb_void_cookie_t cookie; + uint32_t options = XCB_PRESENT_OPTION_NONE; - cookie = xcb_copy_area(chain->conn, - image->pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); + int64_t target_msc = 0; + int64_t divisor = 0; + int64_t remainder = 0; + + options |= XCB_PRESENT_OPTION_ASYNC; - image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + xcb_void_cookie_t cookie = + xcb_present_pixmap(chain->conn, + chain->window, + image->pixmap, + (uint32_t) chain->send_sbc, + 0, /* valid */ + 0, /* update */ + 0, /* x_off */ + 0, /* y_off */ + XCB_NONE, /* target_crtc */ + XCB_NONE, + image->sync_fence, + options, + target_msc, + divisor, + remainder, 0, NULL); + xcb_discard_reply(chain->conn, cookie.sequence); image->busy = true; xcb_flush(chain->conn); @@ -525,6 +578,7 @@ x11_image_init(struct anv_device *device, struct x11_swapchain *chain, const VkAllocationCallbacks* pAllocator, struct x11_image *image) { + xcb_void_cookie_t cookie; VkResult result; VkImage image_h; @@ -599,7 +653,7 @@ x11_image_init(struct anv_device *device, struct x11_swapchain *chain, uint32_t depth = 24; image->pixmap = xcb_generate_id(chain->conn); - xcb_void_cookie_t cookie = + cookie = xcb_dri3_pixmap_from_buffer_checked(chain->conn, image->pixmap, chain->window, @@ -608,13 +662,35 @@ x11_image_init(struct anv_device *device, struct x11_swapchain *chain, pCreateInfo->imageExtent.height, surface->isl.row_pitch, depth, bpp, fd); + xcb_discard_reply(chain->conn, cookie.sequence); - image->busy = false; + int fence_fd = xshmfence_alloc_shm(); + if (fence_fd < 0) + goto fail_pixmap; - xcb_discard_reply(chain->conn, cookie.sequence); + image->shm_fence = xshmfence_map_shm(fence_fd); + if (image->shm_fence == NULL) + goto fail_shmfence_alloc; + + image->sync_fence = xcb_generate_id(chain->conn); + xcb_dri3_fence_from_fd(chain->conn, + image->pixmap, + image->sync_fence, + false, + fence_fd); + + image->busy = false; + xshmfence_trigger(image->shm_fence); return VK_SUCCESS; +fail_shmfence_alloc: + close(fence_fd); + +fail_pixmap: + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + fail_alloc_memory: anv_FreeMemory(anv_device_to_handle(chain->base.device), anv_device_memory_to_handle(image->memory), pAllocator); @@ -631,10 +707,13 @@ x11_image_finish(struct x11_swapchain *chain, const VkAllocationCallbacks* pAllocator, struct x11_image *image) { - if (image->busy) - xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + xcb_void_cookie_t cookie; + + cookie = xcb_sync_destroy_fence(chain->conn, image->sync_fence); + xcb_discard_reply(chain->conn, cookie.sequence); + xshmfence_unmap_shm(image->shm_fence); - xcb_void_cookie_t cookie = xcb_free_pixmap(chain->conn, image->pixmap); + cookie = xcb_free_pixmap(chain->conn, image->pixmap); xcb_discard_reply(chain->conn, cookie.sequence); anv_DestroyImage(anv_device_to_handle(chain->base.device), @@ -653,6 +732,8 @@ x11_swapchain_destroy(struct anv_swapchain *anv_chain, for (uint32_t i = 0; i < chain->image_count; i++) x11_image_finish(chain, pAllocator, &chain->images[i]); + xcb_unregister_for_special_event(chain->conn, chain->special_event); + anv_free2(&chain->base.device->alloc, pAllocator, chain); return VK_SUCCESS; @@ -670,9 +751,18 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, xcb_void_cookie_t cookie; VkResult result; + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + int num_images = pCreateInfo->minImageCount; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) + num_images = MAX2(num_images, 4); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = anv_alloc2(&device->alloc, pAllocator, size, 8, @@ -690,13 +780,25 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->window = surface->window; chain->extent = pCreateInfo->imageExtent; chain->image_count = num_images; - chain->next_image = 0; + + chain->event_id = xcb_generate_id(chain->conn); + xcb_present_select_input(chain->conn, chain->event_id, chain->window, + XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY | + XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY | + XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); + + /* Create an XCB event queue to hold present events outside of the usual + * application event queue + */ + chain->special_event = + xcb_register_for_special_xge(chain->conn, &xcb_present_id, + chain->event_id, NULL); chain->gc = xcb_generate_id(chain->conn); if (!chain->gc) { /* FINISHME: Choose a better error. */ result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_alloc; + goto fail_register; } cookie = xcb_create_gc(chain->conn, @@ -722,7 +824,9 @@ fail_init_images: for (uint32_t j = 0; j < image; j++) x11_image_finish(chain, pAllocator, &chain->images[j]); -fail_alloc: +fail_register: + xcb_unregister_for_special_event(chain->conn, chain->special_event); + anv_free2(&device->alloc, pAllocator, chain); return result; -- cgit v1.2.3 From 753ebe4457444e13124eba1e2e2e07edc9ab3c09 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Mar 2016 17:22:44 -0800 Subject: anv/x11: Reset the SHM fence before presenting the pixmap This seems to fix the flicker issue that I was seeing with dota2 --- src/intel/vulkan/anv_wsi_x11.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index f9f72be154b..9ef02961a93 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -548,6 +548,8 @@ x11_queue_present(struct anv_swapchain *anv_chain, options |= XCB_PRESENT_OPTION_ASYNC; + xshmfence_reset(image->shm_fence); + xcb_void_cookie_t cookie = xcb_present_pixmap(chain->conn, chain->window, -- cgit v1.2.3 From 41af9b2e517dd0c17e519490ca915b96f6898390 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 12 Mar 2016 08:54:41 -0800 Subject: HACK: Don't re-configure L3$ in render stages pre-BDW This fixes a "regression" on Haswell and prior caused by merging the gen7 and gen8 flush_state functions. Haswell should still work just fine if you're on a 4.4 kernel, but we really should make it detect the command parser version and do something intelligent. --- src/intel/vulkan/genX_cmd_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c3d2043dcdf..723f6d81a39 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -324,7 +324,18 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); +#if GEN_GEN >= 8 + /* FIXME (jason): Currently, the config_l3 function causes problems on + * Haswell and prior if you have a kernel older than 4.4. In order to + * work, it requires a couple of registers be white-listed in the + * command parser and they weren't added until 4.4. What we should do + * is check the command parser version and make it a no-op if your + * command parser is either off or too old. Compute won't work 100%, + * but at least 3-D will. In the mean time, I'm going to make this + * gen8+ only so that we can get Haswell working again. + */ genX(cmd_buffer_config_l3)(cmd_buffer, false); +#endif genX(flush_pipeline_select_3d)(cmd_buffer); -- cgit v1.2.3 From 1b126305ded36f6b416ada08e29ff84faeafef99 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 10 Mar 2016 17:16:58 -0800 Subject: anv/genX: Add flush_pipeline_select_gpgpu Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_genX.h | 1 + src/intel/vulkan/gen7_cmd_buffer.c | 6 +----- src/intel/vulkan/gen8_cmd_buffer.c | 23 +---------------------- src/intel/vulkan/genX_cmd_buffer.c | 27 +++++++++++++++++++++++++++ 4 files changed, 30 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 77d387ae748..908a9e0efa9 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -40,6 +40,7 @@ void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); +void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 56f03268133..dbf05d06d0f 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -365,11 +365,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) bool needs_slm = cs_prog_data->base.total_shared > 0; genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } + genX(flush_pipeline_select_gpgpu)(cmd_buffer); if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 4a926255a5d..87b5e340772 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -446,28 +446,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) bool needs_slm = cs_prog_data->base.total_shared > 0; genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); - if (cmd_buffer->state.current_pipeline != GPGPU) { -#if GEN_GEN < 10 - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gen9 - * hardware too. - */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS)); -#endif - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if GEN_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } + genX(flush_pipeline_select_gpgpu)(cmd_buffer); if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 723f6d81a39..d0a80f53e63 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -777,6 +777,33 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) } } +void +genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.current_pipeline != GPGPU) { +#if GEN_GEN >= 8 && GEN_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if GEN_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } +} + struct anv_state genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb) -- cgit v1.2.3 From c8ec65a1f5a85dbef3210dc49684fcfed49b7ea2 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 10 Mar 2016 17:25:45 -0800 Subject: anv: Add flush_pipeline_before_pipeline_select flush_pipeline_before_pipeline_select adds workarounds required before switching the pipeline. Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index d0a80f53e63..1ce53a81f1b 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -764,10 +764,31 @@ void genX(CmdDispatchIndirect)( anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH)); } +static void +flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, + uint32_t pipeline) +{ +#if GEN_GEN >= 8 && GEN_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + if (pipeline == GPGPU) + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS)); +#endif +} + void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.current_pipeline != _3D) { + flush_pipeline_before_pipeline_select(cmd_buffer, _3D); + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if GEN_GEN >= 9 .MaskBits = 3, @@ -781,19 +802,7 @@ void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.current_pipeline != GPGPU) { -#if GEN_GEN >= 8 && GEN_GEN < 10 - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gen9 - * hardware too. - */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS)); -#endif + flush_pipeline_before_pipeline_select(cmd_buffer, GPGPU); anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if GEN_GEN >= 9 -- cgit v1.2.3 From b83785d86d2c7f07323920615c72a9f09695a9a7 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 10 Mar 2016 17:19:13 -0800 Subject: anv/gen7: Add stall and flushes before switching pipelines This is a port of 18c76551ee425b981efefc61f663a7781df17882 from OpenGL to Vulkan. Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1ce53a81f1b..1b53f85419b 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -780,6 +780,30 @@ flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, */ if (pipeline == GPGPU) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS)); +#elif GEN_GEN <= 7 + /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] + * PIPELINE_SELECT [DevBWR+]": + * + * Project: DEVSNB+ + * + * Software must ensure all the write caches are flushed through a + * stalling PIPE_CONTROL command followed by another PIPE_CONTROL + * command to invalidate read only caches prior to programming + * MI_PIPELINE_SELECT command to change the Pipeline Select Mode. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .RenderTargetCacheFlushEnable = true, + .DepthCacheFlushEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .StateCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .PostSyncOperation = NoWrite); #endif } -- cgit v1.2.3 From abaa3bed22ebb580724a5741bb8bee69e476a85f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Mar 2016 15:24:24 -0700 Subject: anv/device: Flush the fence batch rather than the start of the BO --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 768e2eb3be1..bcd7a9e3c0d 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1409,7 +1409,7 @@ VkResult anv_CreateFence( assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); assert(batch.next - batch.start <= CACHELINE_SIZE); __builtin_ia32_mfence(); - __builtin_ia32_clflush(fence->bo.map); + __builtin_ia32_clflush(batch.start); } fence->exec2_objects[0].handle = fence->bo.gem_handle; -- cgit v1.2.3 From 2d8c6321177a92f6f1383adc1e75dde1610cfc64 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 8 Mar 2016 14:12:55 -0800 Subject: anv/blit2d: Copy anv_meta_blit.c functions These will be customized for blit2d operations. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 3 + src/intel/vulkan/anv_meta_blit2d.c | 597 +++++++++++++++++++++++++++++++++++++ 2 files changed, 600 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index e2e00433c49..98888aea87f 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -53,6 +53,9 @@ void anv_device_finish_meta_resolve_state(struct anv_device *device); VkResult anv_device_init_meta_blit_state(struct anv_device *device); void anv_device_finish_meta_blit_state(struct anv_device *device); +VkResult anv_device_init_meta_blit2d_state(struct anv_device *device); +void anv_device_finish_meta_blit2d_state(struct anv_device *device); + void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 6f0734254bf..2d447ebe3a8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -22,6 +22,7 @@ */ #include "anv_meta.h" +#include "nir/nir_builder.h" static VkFormat vk_format_for_size(int bs) @@ -53,6 +54,222 @@ vk_format_for_size(int bs) } } +static void +meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) + / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) + / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / + (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(anv_device_to_handle(device), + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_DestroyDescriptorPool(anv_device_to_handle(device), + desc_pool, &cmd_buffer->pool->alloc); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -209,3 +426,383 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, } } + +static nir_shader * +build_nir_vertex_shader(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_tex_pos"); + tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_tex_pos"); + tex_pos_out->data.location = VARYING_SLOT_VAR0; + tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, tex_pos_out, tex_pos_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(vec4)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +void +anv_device_finish_meta_blit2d_state(struct anv_device *device) +{ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} + +VkResult +anv_device_init_meta_blit2d_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for blitting, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} -- cgit v1.2.3 From 997a873f0c67108acf5733538c4475285e3f9b2e Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 9 Mar 2016 11:31:49 -0800 Subject: anv/blit2d: Customize meta blit structs and functions for blit2d API * Add fields in meta struct * Add support in meta init/teardown * Switch to custom meta_emit_blit2d() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.c | 7 +++ src/intel/vulkan/anv_meta_blit2d.c | 114 ++++++++----------------------------- src/intel/vulkan/anv_private.h | 10 ++++ 3 files changed, 41 insertions(+), 90 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c index 82944ea1a92..1ac0306e031 100644 --- a/src/intel/vulkan/anv_meta.c +++ b/src/intel/vulkan/anv_meta.c @@ -150,8 +150,14 @@ anv_device_init_meta(struct anv_device *device) if (result != VK_SUCCESS) goto fail_blit; + result = anv_device_init_meta_blit2d_state(device); + if (result != VK_SUCCESS) + goto fail_blit2d; + return VK_SUCCESS; +fail_blit2d: + anv_device_finish_meta_blit_state(device); fail_blit: anv_device_finish_meta_resolve_state(device); fail_resolve: @@ -166,4 +172,5 @@ anv_device_finish_meta(struct anv_device *device) anv_device_finish_meta_resolve_state(device); anv_device_finish_meta_clear_state(device); anv_device_finish_meta_blit_state(device); + anv_device_finish_meta_blit2d_state(device); } diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 2d447ebe3a8..78d4b04d5d8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -56,15 +56,12 @@ vk_format_for_size(int bs) static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, VkExtent3D src_extent, - struct anv_image *dest_image, struct anv_image_view *dest_iview, VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter) + VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; @@ -73,8 +70,6 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, float tex_coord[3]; } *vb_data; - assert(src_image->samples == dest_image->samples); - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); struct anv_state vb_state = @@ -144,8 +139,8 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CreateSampler)(anv_device_to_handle(device), &(VkSamplerCreateInfo) { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = blit_filter, - .minFilter = blit_filter, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, }, &cmd_buffer->pool->alloc, &sampler); VkDescriptorPool desc_pool; @@ -170,7 +165,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = desc_pool, .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout + .pSetLayouts = &device->meta_state.blit2d.ds_layout }, &set); anv_UpdateDescriptorSets(anv_device_to_handle(device), @@ -209,7 +204,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit.render_pass, + .renderPass = device->meta_state.blit2d.render_pass, .framebuffer = fb, .renderArea = { .offset = { dest_offset.x, dest_offset.y }, @@ -219,21 +214,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .pClearValues = NULL, }, VK_SUBPASS_CONTENTS_INLINE); - VkPipeline pipeline; - - switch (src_image->type) { - case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.pipeline_1d_src; - break; - case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.pipeline_2d_src; - break; - case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.pipeline_3d_src; - break; - default: - unreachable(!"bad VkImageType"); - } + VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), @@ -252,7 +233,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, 0, 1, + device->meta_state.blit2d.pipeline_layout, 0, 1, &set, 0, NULL); ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); @@ -410,16 +391,13 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, &iview_info, cmd_buffer, img_o, dst_usage); /* Perform blit */ - anv_meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), + meta_emit_blit2d(cmd_buffer, &src_iview, src_offset_el, (VkExtent3D){rects[r].width, rects[r].height, 1}, - anv_image_from_handle(dst_image), &dst_iview, dst_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, - VK_FILTER_NEAREST); + (VkExtent3D){rects[r].width, rects[r].height, 1}); anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); @@ -511,22 +489,16 @@ void anv_device_finish_meta_blit2d_state(struct anv_device *device) { anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, + device->meta_state.blit2d.render_pass, &device->meta_state.alloc); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src, + device->meta_state.blit2d.pipeline_2d_src, &device->meta_state.alloc); anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, + device->meta_state.blit2d.pipeline_layout, &device->meta_state.alloc); anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, + device->meta_state.blit2d.ds_layout, &device->meta_state.alloc); } @@ -564,7 +536,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pPreserveAttachments = (uint32_t[]) { 0 }, }, .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); if (result != VK_SUCCESS) goto fail; @@ -577,18 +549,10 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .nir = build_nir_vertex_shader(), }; - struct anv_shader_module fs_1d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), - }; - struct anv_shader_module fs_2d = { .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), }; - struct anv_shader_module fs_3d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), - }; - VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 2, @@ -646,7 +610,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, &device->meta_state.alloc, - &device->meta_state.blit.ds_layout); + &device->meta_state.blit2d.ds_layout); if (result != VK_SUCCESS) goto fail_render_pass; @@ -654,9 +618,9 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, + .pSetLayouts = &device->meta_state.blit2d.ds_layout, }, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); if (result != VK_SUCCESS) goto fail_descriptor_set_layout; @@ -731,8 +695,8 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - .renderPass = device->meta_state.blit.render_pass, + .layout = device->meta_state.blit2d.pipeline_layout, + .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; @@ -745,64 +709,34 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .use_rectlist = true }; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_1d; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); if (result != VK_SUCCESS) - goto fail_pipeline_2d; + goto fail_pipeline_layout; ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); return VK_SUCCESS; - fail_pipeline_2d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - - fail_pipeline_1d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - fail_pipeline_layout: anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, + device->meta_state.blit2d.pipeline_layout, &device->meta_state.alloc); fail_descriptor_set_layout: anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, + device->meta_state.blit2d.ds_layout, &device->meta_state.alloc); fail_render_pass: anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, + device->meta_state.blit2d.render_pass, &device->meta_state.alloc); ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); fail: return result; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0ef840da10e..939cd087377 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -607,6 +607,16 @@ struct anv_meta_state { VkDescriptorSetLayout ds_layout; } blit; + struct { + VkRenderPass render_pass; + + /** Pipeline that copies from a 2D image. */ + VkPipeline pipeline_2d_src; + + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit2d; + struct { /** Pipeline [i] resolves an image with 2^(i+1) samples. */ VkPipeline pipelines[MAX_SAMPLES_LOG2]; -- cgit v1.2.3 From 1a0c63b8804812081d660642539bb411dc560992 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 11 Mar 2016 16:25:02 -0800 Subject: Revert "anv/meta: Prefix anv_ to meta_emit_blit()" This reverts commit 514c0557178b0325c59a28d68b0f250f0eeaddf5. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 2 +- src/intel/vulkan/anv_meta_blit.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 98888aea87f..5616252b5a4 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -109,7 +109,7 @@ anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); void -anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 2c3c91733a7..7bddc6b2d42 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -120,7 +120,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, } void -anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, @@ -438,7 +438,7 @@ void anv_CmdBlitImage( }, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - anv_meta_emit_blit(cmd_buffer, + meta_emit_blit(cmd_buffer, src_image, &src_iview, pRegions[r].srcOffsets[0], src_extent, dest_image, &dest_iview, -- cgit v1.2.3 From b487acc489afc277a7611f14e7319bc7340e7777 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 11 Mar 2016 16:26:28 -0800 Subject: Revert "anv/meta: Make meta_emit_blit() public" This reverts commit f39168392243d6dacefbc8708b764c5978ff24df. Some conflicts had to be resolved in order for this revert to be successful. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 11 ----------- src/intel/vulkan/anv_meta_blit.c | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 5616252b5a4..6bd5c1dfba2 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -108,17 +108,6 @@ void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); -void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - VkExtent3D src_extent, - struct anv_image *dest_image, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter); #ifdef __cplusplus } #endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 7bddc6b2d42..09e2dbb2918 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -119,7 +119,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } -void +static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, -- cgit v1.2.3 From f8f98869157b678320ab8b8fcd50ab7285dac0be Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 10 Mar 2016 16:06:14 -0800 Subject: anv/blit2d: Use texel fetch in frag shader The texelFetch operation requires that the sampled texture coordinates be unnormalized integers. This will simplify the copy shader for w-tiled images (stencil buffers). v2 (Jason): Use f2i for texel coords Fix num_components indirectly Use float inputs for interpolation Nest tex_pos functions Suggested-by: Jason Ekstrand Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 43 ++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 78d4b04d5d8..839ab02c904 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -83,11 +83,9 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) - / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) - / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, + src_offset.x + src_extent.width, + src_offset.y + src_extent.height, + src_offset.z, }, }; @@ -97,10 +95,9 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / - (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, + src_offset.x, + src_offset.y + src_extent.height, + src_offset.z, }, }; @@ -110,9 +107,9 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, dest_offset.y, }, .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)src_offset.y / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, + src_offset.x, + src_offset.y, + src_offset.z, }, }; @@ -438,22 +435,16 @@ static nir_shader * build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) { const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "v_tex_pos"); + vec3, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; - - /* Swizzle the array index which comes in as Z coordinate into the right - * position. - */ - unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; - nir_ssa_def *const tex_pos = - nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, - (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); const struct glsl_type *sampler_type = glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, @@ -463,16 +454,18 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) sampler->data.descriptor_set = 0; sampler->data.binding = 0; - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); tex->sampler_dim = tex_dim; - tex->op = nir_texop_tex; + tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->src[1].src_type = nir_tex_src_lod; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; /* TODO */ tex->is_array = glsl_sampler_type_is_array(sampler_type); tex->coord_components = tex_pos->num_components; tex->texture = nir_deref_var_create(tex, sampler); - tex->sampler = nir_deref_var_create(tex, sampler); + tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); nir_builder_instr_insert(&b, &tex->instr); -- cgit v1.2.3 From 92fb65f1176334159c9b2327bbb693332b0b6bb6 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 10 Mar 2016 18:25:10 -0800 Subject: anv/blit2d: Remove sampler from pipeline Since we're using texelFetch with a sampled image, a sampler is no longer needed. This agrees with the Vulkan Spec section 13.2.4 Descriptor Set Updates: sampler is a sampler handle, and is used in descriptor updates for types VK_DESCRIPTOR_TYPE_SAMPLER and VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER if the binding being updated does not use immutable samplers. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 839ab02c904..4fab5d1b3dd 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -132,14 +132,6 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); - VkSampler sampler; - ANV_CALL(CreateSampler)(anv_device_to_handle(device), - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_FILTER_NEAREST, - .minFilter = VK_FILTER_NEAREST, - }, &cmd_buffer->pool->alloc, &sampler); - VkDescriptorPool desc_pool; anv_CreateDescriptorPool(anv_device_to_handle(device), &(const VkDescriptorPoolCreateInfo) { @@ -150,7 +142,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .poolSizeCount = 1, .pPoolSizes = (VkDescriptorPoolSize[]) { { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .descriptorCount = 1 }, } @@ -174,10 +166,10 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pImageInfo = (VkDescriptorImageInfo[]) { { - .sampler = sampler, + .sampler = NULL, .imageView = anv_image_view_to_handle(src_iview), .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }, @@ -242,8 +234,6 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, */ anv_DestroyDescriptorPool(anv_device_to_handle(device), desc_pool, &cmd_buffer->pool->alloc); - anv_DestroySampler(anv_device_to_handle(device), sampler, - &cmd_buffer->pool->alloc); anv_DestroyFramebuffer(anv_device_to_handle(device), fb, &cmd_buffer->pool->alloc); } @@ -593,7 +583,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pBindings = (VkDescriptorSetLayoutBinding[]) { { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = NULL -- cgit v1.2.3 From 5647de8ba56d8b4f2247eecc2cfcb3ec596dafe1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 14 Mar 2016 08:15:16 -0700 Subject: anv/blit2d: Only use one extent in meta_emit_blit2d Since scaling isn't involved, we don't need multiple extents. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 4fab5d1b3dd..cb7e64b01df 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -58,10 +58,9 @@ static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, VkOffset3D src_offset, - VkExtent3D src_extent, struct anv_image_view *dest_iview, VkOffset3D dest_offset, - VkExtent3D dest_extent) + VkExtent3D extent) { struct anv_device *device = cmd_buffer->device; @@ -79,12 +78,12 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, vb_data[0] = (struct blit_vb_data) { .pos = { - dest_offset.x + dest_extent.width, - dest_offset.y + dest_extent.height, + dest_offset.x + extent.width, + dest_offset.y + extent.height, }, .tex_coord = { - src_offset.x + src_extent.width, - src_offset.y + src_extent.height, + src_offset.x + extent.width, + src_offset.y + extent.height, src_offset.z, }, }; @@ -92,11 +91,11 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, vb_data[1] = (struct blit_vb_data) { .pos = { dest_offset.x, - dest_offset.y + dest_extent.height, + dest_offset.y + extent.height, }, .tex_coord = { src_offset.x, - src_offset.y + src_extent.height, + src_offset.y + extent.height, src_offset.z, }, }; @@ -197,7 +196,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .framebuffer = fb, .renderArea = { .offset = { dest_offset.x, dest_offset.y }, - .extent = { dest_extent.width, dest_extent.height }, + .extent = { extent.width, extent.height }, }, .clearValueCount = 0, .pClearValues = NULL, @@ -381,7 +380,6 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, meta_emit_blit2d(cmd_buffer, &src_iview, src_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, &dst_iview, dst_offset_el, (VkExtent3D){rects[r].width, rects[r].height, 1}); -- cgit v1.2.3 From f33866ae0a4279af03997fa3fa7a47e2eb7ec8fe Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 14 Mar 2016 11:11:50 -0700 Subject: anv/blit: Remove completed finishme for VkFilter This task was finished as of: d9079648d0f1c380929dea0f3a447ddfdf5dcd27. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 09e2dbb2918..b83fa684ffd 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -366,8 +366,6 @@ void anv_CmdBlitImage( assert(src_image->samples == 1); assert(dest_image->samples == 1); - anv_finishme("respect VkFilter"); - meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { -- cgit v1.2.3 From 5464f0c046ac68630355932639f18a02dc4b2d92 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 10 Mar 2016 11:06:25 -0800 Subject: anv/blit: Reduce number of VUE headers being read Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 2 +- src/intel/vulkan/anv_meta_blit2d.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index b83fa684ffd..e23b6978819 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -536,7 +536,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) { .binding = 0, .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE }, { .binding = 1, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index cb7e64b01df..4a0bed1a335 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -541,7 +541,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) { .binding = 0, .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE }, { .binding = 1, -- cgit v1.2.3 From b1c5d45872c4c156b1366071c8532a0057a70745 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Mar 2016 11:50:53 -0700 Subject: anv/allocator: Add a size field to bo_pool_alloc --- src/intel/vulkan/anv_allocator.c | 4 +++- src/intel/vulkan/anv_batch_chain.c | 6 ++++-- src/intel/vulkan/anv_device.c | 4 ++-- src/intel/vulkan/anv_private.h | 3 ++- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 4fc83386a71..1928b9bb157 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -820,10 +820,12 @@ anv_bo_pool_finish(struct anv_bo_pool *pool) } VkResult -anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) { VkResult result; + assert(pool->bo_size <= size); + void *next_free_void; if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { struct bo_pool_bo_link *next_free = next_free_void; diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index d24dd06d7eb..eab050f066b 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -251,7 +251,8 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, + ANV_CMD_BUFFER_BATCH_SIZE); if (result != VK_SUCCESS) goto fail_alloc; @@ -283,7 +284,8 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, + other_bbo->bo.size); if (result != VK_SUCCESS) goto fail_alloc; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index bcd7a9e3c0d..77fd72c42e1 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -717,7 +717,7 @@ anv_device_submit_simple_batch(struct anv_device *device, /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); assert(size < device->batch_bo_pool.bo_size); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, 4096); if (result != VK_SUCCESS) return result; @@ -1390,7 +1390,7 @@ VkResult anv_CreateFence( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); if (result != VK_SUCCESS) return result; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 939cd087377..46e377c0490 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -476,7 +476,8 @@ struct anv_bo_pool { void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, uint32_t block_size); void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, + uint32_t size); void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); -- cgit v1.2.3 From ecfb07427632b99a9f424ad06f0967fa8a7fe2a2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Mar 2016 13:06:08 -0700 Subject: anv/allocator: Make the bo_pool dynamically sized --- src/intel/vulkan/anv_allocator.c | 48 +++++++++++++++++++++++----------------- src/intel/vulkan/anv_device.c | 5 ++--- src/intel/vulkan/anv_private.h | 7 ++---- 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 1928b9bb157..e3b07ffa8bb 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -794,12 +794,10 @@ struct bo_pool_bo_link { }; void -anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t bo_size) +anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device) { pool->device = device; - pool->bo_size = bo_size; - pool->free_list = NULL; + memset(pool->free_list, 0, sizeof(pool->free_list)); VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); } @@ -807,13 +805,15 @@ anv_bo_pool_init(struct anv_bo_pool *pool, void anv_bo_pool_finish(struct anv_bo_pool *pool) { - struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); - while (link != NULL) { - struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - - anv_gem_munmap(link_copy.bo.map, pool->bo_size); - anv_gem_close(pool->device, link_copy.bo.gem_handle); - link = link_copy.next; + for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list[i]); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, link_copy.bo.size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } } VG(VALGRIND_DESTROY_MEMPOOL(pool)); @@ -824,29 +824,32 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) { VkResult result; - assert(pool->bo_size <= size); + const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size); + const unsigned pow2_size = 1 << size_log2; + const unsigned bucket = size_log2 - 12; + assert(bucket < ARRAY_SIZE(pool->free_list)); void *next_free_void; - if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) { struct bo_pool_bo_link *next_free = next_free_void; *bo = VG_NOACCESS_READ(&next_free->bo); assert(bo->map == next_free); - assert(bo->size == pool->bo_size); + assert(size <= bo->size); - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); return VK_SUCCESS; } struct anv_bo new_bo; - result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + result = anv_bo_init_new(&new_bo, pool->device, pow2_size); if (result != VK_SUCCESS) return result; - assert(new_bo.size == pool->bo_size); + assert(new_bo.size == pow2_size); - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0); if (new_bo.map == NULL) { anv_gem_close(pool->device, new_bo.gem_handle); return vk_error(VK_ERROR_MEMORY_MAP_FAILED); @@ -854,7 +857,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) *bo = new_bo; - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); return VK_SUCCESS; } @@ -867,6 +870,11 @@ anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in) struct bo_pool_bo_link *link = bo.map; link->bo = bo; + assert(util_is_power_of_two(bo.size)); + const unsigned size_log2 = ilog2_round_up(bo.size); + const unsigned bucket = size_log2 - 12; + assert(bucket < ARRAY_SIZE(pool->free_list)); + VG(VALGRIND_MEMPOOL_FREE(pool, bo.map)); - anv_ptr_free_list_push(&pool->free_list, link); + anv_ptr_free_list_push(&pool->free_list[bucket], link); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 77fd72c42e1..068626d1c1f 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -716,8 +716,7 @@ anv_device_submit_simple_batch(struct anv_device *device, /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); - assert(size < device->batch_bo_pool.bo_size); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, 4096); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); if (result != VK_SUCCESS) return result; @@ -829,7 +828,7 @@ VkResult anv_CreateDevice( pthread_mutex_init(&device->mutex, NULL); - anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + anv_bo_pool_init(&device->batch_bo_pool, device); anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 46e377c0490..6d98e0267a4 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -468,13 +468,10 @@ struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, struct anv_bo_pool { struct anv_device *device; - uint32_t bo_size; - - void *free_list; + void *free_list[16]; }; -void anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t block_size); +void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device); void anv_bo_pool_finish(struct anv_bo_pool *pool); VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size); -- cgit v1.2.3 From 869e393eb36e6912ca16fcfd060892c0de07bb49 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Mar 2016 16:32:46 -0700 Subject: anv/batch_chain: Fall back to growing batches when chaining isn't available --- src/intel/vulkan/anv_batch_chain.c | 63 ++++++++++++++++++++++++++++++++++++-- src/intel/vulkan/anv_device.c | 7 +++++ src/intel/vulkan/anv_private.h | 2 ++ 3 files changed, 70 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index eab050f066b..034f3fda24a 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -340,6 +340,37 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); } +static VkResult +anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo, + struct anv_batch *batch, size_t aditional, + size_t batch_padding) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + + size_t new_size = bbo->bo.size; + while (new_size <= bbo->length + aditional + batch_padding) + new_size *= 2; + + if (new_size == bbo->bo.size) + return VK_SUCCESS; + + struct anv_bo new_bo; + VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, + &new_bo, new_size); + if (result != VK_SUCCESS) + return result; + + memcpy(new_bo.map, bbo->bo.map, bbo->length); + + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + + bbo->bo = new_bo; + anv_batch_bo_continue(bbo, batch, batch_padding); + + return VK_SUCCESS; +} + static void anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_cmd_buffer *cmd_buffer) @@ -478,6 +509,18 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) return VK_SUCCESS; } +static VkResult +anv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + + anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + struct anv_state anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t entries, uint32_t *state_offset) @@ -548,9 +591,14 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; cmd_buffer->batch.user_data = cmd_buffer; + if (cmd_buffer->device->can_chain_batches) { + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + } else { + cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch; + } + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); @@ -680,7 +728,9 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * determine this statically here so that this stays in sync with the * actual ExecuteCommands implementation. */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + if (!cmd_buffer->device->can_chain_batches) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT; + } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { /* If the secondary has exactly one batch buffer in its list *and* * that batch buffer is less than half of the maximum size, we're @@ -728,6 +778,15 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, anv_batch_emit_batch(&primary->batch, &secondary->batch); anv_cmd_buffer_emit_state_base_address(primary); break; + case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary); + unsigned length = secondary->batch.end - secondary->batch.start; + anv_batch_bo_grow(primary, bbo, &primary->batch, length, + GEN8_MI_BATCH_BUFFER_START_length * 4); + anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); + break; + } case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { struct anv_batch_bo *first_bbo = list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 068626d1c1f..ce2045ecf7b 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -826,6 +826,13 @@ VkResult anv_CreateDevice( device->info = *physical_device->info; device->isl_dev = physical_device->isl_dev; + /* On Broadwell and later, we can use batch chaining to more efficiently + * implement growing command buffers. Prior to Haswell, the kernel + * command parser gets in the way and we have to fall back to growing + * the batch. + */ + device->can_chain_batches = device->info.gen >= 8; + pthread_mutex_init(&device->mutex, NULL); anv_bo_pool_init(&device->batch_bo_pool, device); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6d98e0267a4..03e87670165 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -672,6 +672,7 @@ struct anv_device { struct isl_device isl_dev; int context_id; int fd; + bool can_chain_batches; struct anv_bo_pool batch_bo_pool; @@ -1192,6 +1193,7 @@ struct anv_cmd_pool { enum anv_cmd_buffer_exec_mode { ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT, ANV_CMD_BUFFER_EXEC_MODE_CHAIN, ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, }; -- cgit v1.2.3 From 4844723405d901afee3ab6a4a6c642ae8ef8bcb4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Mar 2016 16:11:53 -0700 Subject: anv: Don't assert-fail if someone asks for a non-existent entrypoint --- src/intel/vulkan/anv_entrypoints_gen.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py index 1e4cfcb1755..cedecfeac70 100644 --- a/src/intel/vulkan/anv_entrypoints_gen.py +++ b/src/intel/vulkan/anv_entrypoints_gen.py @@ -210,7 +210,6 @@ anv_resolve_entrypoint(uint32_t index) return validate_layer.entrypoints[index]; if (dispatch_devinfo == NULL) { - assert(anv_layer.entrypoints[index]); return anv_layer.entrypoints[index]; } -- cgit v1.2.3 From 204d937ac2623b230260f60d2d7c5d7233d697fb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Mar 2016 16:17:09 -0700 Subject: anv/device: Ignore the patch portion of the requested API version Fixes dEQP-VK.api.device_init.create_instance_name_version Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94661 --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index ce2045ecf7b..622e6422c5a 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -223,7 +223,7 @@ VkResult anv_CreateInstance( } if (VK_MAKE_VERSION(1, 0, 0) > client_version || - client_version > VK_MAKE_VERSION(1, 0, 3)) { + client_version > VK_MAKE_VERSION(1, 0, 0xfff)) { return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, "Client requested version %d.%d.%d", VK_VERSION_MAJOR(client_version), -- cgit v1.2.3 From 20417b2cb05ff0f710eb6b6fbd9299ba915f8fc1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Mar 2016 16:21:21 -0700 Subject: anv/device: Advertise version 1.0.5 Nothing substantial has changed since 1.0.2 --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 622e6422c5a..54a1f1274ab 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -532,7 +532,7 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 2), + .apiVersion = VK_MAKE_VERSION(1, 0, 5), .driverVersion = 1, .vendorID = 0x8086, .deviceID = pdevice->chipset_id, -- cgit v1.2.3 From a5dc3c0f02aa523d1d3d123b62b9a187821079fe Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 22 Mar 2016 10:53:37 -0700 Subject: anv: Sanitize Image extents and offsets Prepare Image extents and offsets for internal consumption by assigning the default values implicitly defned by the spec. Fixes textures on several Vulkan demos in which the VkImageCopy depth is set to zero when copying a 2D image. v2 (Jason Ekstrand): Replace "prep" with "sanitize" Make function static inline Pass structs instead of pointers Reviewed-by: Jason Ekstrand Signed-off-by: Nanley Chery --- src/intel/vulkan/anv_image.c | 24 ++++------------- src/intel/vulkan/anv_meta_copy.c | 53 ++++++++++++++++++++++++++----------- src/intel/vulkan/anv_meta_resolve.c | 41 +++++++++++++++++++++------- src/intel/vulkan/anv_private.h | 33 +++++++++++++++++++++++ 4 files changed, 106 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 143a08413f7..b47425bd0e1 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -124,30 +124,16 @@ make_surface(const struct anv_device *dev, struct anv_surface *anv_surf = get_surface(image, aspect); - VkExtent3D extent; - switch (vk_info->imageType) { - case VK_IMAGE_TYPE_1D: - extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; - break; - case VK_IMAGE_TYPE_2D: - extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; - break; - case VK_IMAGE_TYPE_3D: - extent = vk_info->extent; - break; - default: - unreachable("invalid image type"); - } - - image->extent = extent; + image->extent = anv_sanitize_image_extent(vk_info->imageType, + vk_info->extent); ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect, vk_info->tiling, NULL), - .width = extent.width, - .height = extent.height, - .depth = extent.depth, + .width = image->extent.width, + .height = image->extent.height, + .depth = image->extent.depth, .levels = vk_info->mipLevels, .array_len = vk_info->arrayLayers, .samples = vk_info->samples, diff --git a/src/intel/vulkan/anv_meta_copy.c b/src/intel/vulkan/anv_meta_copy.c index 1a2bfd6cf01..982fa7e10c1 100644 --- a/src/intel/vulkan/anv_meta_copy.c +++ b/src/intel/vulkan/anv_meta_copy.c @@ -28,16 +28,16 @@ * if Image is uncompressed or compressed, respectively. */ static struct VkExtent3D -meta_region_extent_el(const VkFormat format, +meta_region_extent_el(const struct anv_image *image, const struct VkExtent3D *extent) { const struct isl_format_layout *isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { + anv_format_for_vk_format(image->vk_format)->isl_layout; + return anv_sanitize_image_extent(image->type, (VkExtent3D) { .width = DIV_ROUND_UP(extent->width , isl_layout->bw), .height = DIV_ROUND_UP(extent->height, isl_layout->bh), .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; + }); } /* Returns the user-provided VkBufferImageCopy::imageOffset in units of @@ -49,11 +49,11 @@ meta_region_offset_el(const struct anv_image *image, const struct VkOffset3D *offset) { const struct isl_format_layout *isl_layout = image->format->isl_layout; - return (VkOffset3D) { + return anv_sanitize_image_offset(image->type, (VkOffset3D) { .x = offset->x / isl_layout->bw, .y = offset->y / isl_layout->bh, .z = offset->z / isl_layout->bd, - }; + }); } static struct anv_meta_blit2d_surf @@ -115,17 +115,28 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, for (unsigned r = 0; r < regionCount; r++) { - /* Start creating blit rect */ + /** + * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images + * extent is the size in texels of the source image to copy in width, + * height and depth. 1D images use only x and width. 2D images use x, y, + * width and height. 3D images use x, y, z, width, height and depth. + * + * + * Also, convert the offsets and extent from units of texels to units of + * blocks - which is the highest resolution accessible in this command. + */ const VkOffset3D img_offset_el = meta_region_offset_el(image, &pRegions[r].imageOffset); const VkExtent3D bufferExtent = { .width = pRegions[r].bufferRowLength, .height = pRegions[r].bufferImageHeight, }; + + /* Start creating blit rect */ const VkExtent3D buf_extent_el = - meta_region_extent_el(image->vk_format, &bufferExtent); + meta_region_extent_el(image, &bufferExtent); const VkExtent3D img_extent_el = - meta_region_extent_el(image->vk_format, &pRegions[r].imageExtent); + meta_region_extent_el(image, &pRegions[r].imageExtent); struct anv_meta_blit2d_rect rect = { .width = MAX2(buf_extent_el.width, img_extent_el.width), .height = MAX2(buf_extent_el.height, img_extent_el.height), @@ -152,7 +163,7 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; unsigned slice_3d = 0; unsigned slice_array = 0; @@ -163,7 +174,7 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, pRegions[r].imageSubresource.mipLevel, pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, + img_offset_el.z + slice_3d, x_offset, y_offset); *x_offset += img_offset_el.x; @@ -259,20 +270,30 @@ void anv_CmdCopyImage( struct anv_meta_blit2d_surf b_dst = blit_surf_for_image(dest_image, dst_isl_surf); - /* Start creating blit rect */ + /** + * From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images + * imageExtent is the size in texels of the image to copy in width, height + * and depth. 1D images use only x and width. 2D images use x, y, width + * and height. 3D images use x, y, z, width, height and depth. + * + * Also, convert the offsets and extent from units of texels to units of + * blocks - which is the highest resolution accessible in this command. + */ const VkOffset3D dst_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); const VkExtent3D img_extent_el = - meta_region_extent_el(src_image->vk_format, &pRegions[r].extent); + meta_region_extent_el(src_image, &pRegions[r].extent); + + /* Start creating blit rect */ struct anv_meta_blit2d_rect rect = { .width = img_extent_el.width, .height = img_extent_el.height, }; /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].extent.depth; + unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; unsigned slice_3d = 0; unsigned slice_array = 0; @@ -283,14 +304,14 @@ void anv_CmdCopyImage( pRegions[r].dstSubresource.mipLevel, pRegions[r].dstSubresource.baseArrayLayer + slice_array, - pRegions[r].dstOffset.z + slice_3d, + dst_offset_el.z + slice_3d, &rect.dst_x, &rect.dst_y); isl_surf_get_image_offset_el(src_isl_surf, pRegions[r].srcSubresource.mipLevel, pRegions[r].srcSubresource.baseArrayLayer + slice_array, - pRegions[r].srcOffset.z + slice_3d, + src_offset_el.z + slice_3d, &rect.src_x, &rect.src_y); rect.dst_x += dst_offset_el.x; diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 19fb3ad3003..f50af52ece5 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -719,6 +719,27 @@ void anv_CmdResolveImage( anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, ®ion->dstOffset); + /** + * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images + * + * extent is the size in texels of the source image to resolve in width, + * height and depth. 1D images use only x and width. 2D images use x, y, + * width and height. 3D images use x, y, z, width, height and depth. + * + * srcOffset and dstOffset select the initial x, y, and z offsets in + * texels of the sub-regions of the source and destination image data. + * extent is the size in texels of the source image to resolve in width, + * height and depth. 1D images use only x and width. 2D images use x, y, + * width and height. 3D images use x, y, z, width, height and depth. + */ + const struct VkExtent3D extent = + anv_sanitize_image_extent(src_image->type, region->extent); + const struct VkOffset3D srcOffset = + anv_sanitize_image_offset(src_image->type, region->srcOffset); + const struct VkOffset3D dstOffset = + anv_sanitize_image_offset(dest_image->type, region->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) { @@ -780,12 +801,12 @@ void anv_CmdResolveImage( .framebuffer = fb_h, .renderArea = { .offset = { - region->dstOffset.x, - region->dstOffset.y, + dstOffset.x, + dstOffset.y, }, .extent = { - region->extent.width, - region->extent.height, + extent.width, + extent.height, } }, .clearValueCount = 0, @@ -796,17 +817,17 @@ void anv_CmdResolveImage( emit_resolve(cmd_buffer, &src_iview, &(VkOffset2D) { - .x = region->srcOffset.x, - .y = region->srcOffset.y, + .x = srcOffset.x, + .y = srcOffset.y, }, &dest_iview, &(VkOffset2D) { - .x = region->dstOffset.x, - .y = region->dstOffset.y, + .x = dstOffset.x, + .y = dstOffset.y, }, &(VkExtent2D) { - .width = region->extent.width, - .height = region->extent.height, + .width = extent.width, + .height = extent.height, }); ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 03e87670165..94a13d7d331 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1670,6 +1670,39 @@ struct anv_buffer_view { const struct anv_format * anv_format_for_descriptor_type(VkDescriptorType type); +static inline struct VkExtent3D +anv_sanitize_image_extent(const VkImageType imageType, + const struct VkExtent3D imageExtent) +{ + switch (imageType) { + case VK_IMAGE_TYPE_1D: + return (VkExtent3D) { imageExtent.width, 1, 1 }; + case VK_IMAGE_TYPE_2D: + return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 }; + case VK_IMAGE_TYPE_3D: + return imageExtent; + default: + unreachable("invalid image type"); + } +} + +static inline struct VkOffset3D +anv_sanitize_image_offset(const VkImageType imageType, + const struct VkOffset3D imageOffset) +{ + switch (imageType) { + case VK_IMAGE_TYPE_1D: + return (VkOffset3D) { imageOffset.x, 0, 0 }; + case VK_IMAGE_TYPE_2D: + return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 }; + case VK_IMAGE_TYPE_3D: + return imageOffset; + default: + unreachable("invalid image type"); + } +} + + void anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, enum isl_format format, -- cgit v1.2.3 From b332013a56186d04d7b05b232a8ba021e95c44ba Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 23 Mar 2016 23:24:25 -0700 Subject: genxml: Add register support Signed-off-by: Jordan Justen --- src/intel/genxml/gen_pack_header.py | 62 +++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen_pack_header.py b/src/intel/genxml/gen_pack_header.py index 5bc18c70c60..9ef7122ae9e 100755 --- a/src/intel/genxml/gen_pack_header.py +++ b/src/intel/genxml/gen_pack_header.py @@ -202,6 +202,13 @@ def safe_name(name): return name +def num_from_str(num_str): + if num_str.lower().startswith('0x'): + return int(num_str, base=16) + else: + assert(not num_str.startswith('0') and 'octals numbers not allowed') + return int(num_str) + class Field: ufixed_pattern = re.compile("u(\d+)\.(\d+)") sfixed_pattern = re.compile("s(\d+)\.(\d+)") @@ -472,25 +479,24 @@ class Parser: self.instruction = None self.structs = {} + self.registers = {} def start_element(self, name, attrs): if name == "genxml": self.platform = attrs["name"] self.gen = attrs["gen"].replace('.', '') print(pack_header % {'license': license, 'platform': self.platform}) - elif name == "instruction": - self.instruction = safe_name(attrs["name"]) - self.length_bias = int(attrs["bias"]) - if "length" in attrs: - self.length = int(attrs["length"]) - size = self.length * 32 - else: - self.length = None - size = 0 - self.group = Group(self, None, 0, 1, size) - elif name == "struct": - self.struct = safe_name(attrs["name"]) - self.structs[attrs["name"]] = 1 + elif name in ("instruction", "struct", "register"): + if name == "instruction": + self.instruction = safe_name(attrs["name"]) + self.length_bias = int(attrs["bias"]) + elif name == "struct": + self.struct = safe_name(attrs["name"]) + self.structs[attrs["name"]] = 1 + elif name == "register": + self.register = safe_name(attrs["name"]) + self.reg_num = num_from_str(attrs["num"]) + self.registers[attrs["name"]] = 1 if "length" in attrs: self.length = int(attrs["length"]) size = self.length * 32 @@ -522,10 +528,15 @@ class Parser: self.emit_instruction() self.instruction = None self.group = None - elif name == "struct": + elif name == "struct": self.emit_struct() self.struct = None self.group = None + elif name == "register": + self.emit_register() + self.register = None + self.reg_num = None + self.group = None elif name == "group": self.group = self.group.parent elif name == "field": @@ -560,9 +571,9 @@ class Parser: def emit_instruction(self): name = self.instruction if not self.length == None: - print('#define %-33s %4d' % + print('#define %-33s %6d' % (self.gen_prefix(name + "_length"), self.length)) - print('#define %-33s %4d' % + print('#define %-33s %6d' % (self.gen_prefix(name + "_length_bias"), self.length_bias)) default_fields = [] @@ -571,7 +582,7 @@ class Parser: continue if field.default == None: continue - default_fields.append(" .%-35s = %4d" % (field.name, field.default)) + default_fields.append(" .%-35s = %6d" % (field.name, field.default)) if default_fields: print('#define %-40s\\' % (self.gen_prefix(name + '_header'))) @@ -582,10 +593,23 @@ class Parser: self.emit_pack_function(self.instruction, self.group) + def emit_register(self): + name = self.register + if not self.reg_num == None: + print('#define %-33s 0x%04x' % + (self.gen_prefix(name + "_num"), self.reg_num)) + + if not self.length == None: + print('#define %-33s %6d' % + (self.gen_prefix(name + "_length"), self.length)) + + self.emit_template_struct(self.register, self.group) + self.emit_pack_function(self.register, self.group) + def emit_struct(self): name = self.struct if not self.length == None: - print('#define %-33s %4d' % + print('#define %-33s %6d' % (self.gen_prefix(name + "_length"), self.length)) self.emit_template_struct(self.struct, self.group) @@ -598,7 +622,7 @@ class Parser: name = self.prefix + "_" + value.name else: name = value.name - print('#define %-36s %4d' % (name.upper(), value.value)) + print('#define %-36s %6d' % (name.upper(), value.value)) print('') def parse(self, filename): -- cgit v1.2.3 From d353ba8f5fee23e9d9c8165b6cbfaba33e19ace6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 23 Mar 2016 23:24:25 -0700 Subject: anv: Add genxml register support Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_private.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 94a13d7d331..77f453afb36 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -813,6 +813,15 @@ __gen_combine_address(struct anv_batch *batch, void *location, #define __anv_cmd_length_bias(cmd) cmd ## _length_bias #define __anv_cmd_header(cmd) cmd ## _header #define __anv_cmd_pack(cmd) cmd ## _pack +#define __anv_reg_num(reg) reg ## _num + +#define anv_pack_struct(dst, struc, ...) do { \ + struct struc __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(struc)(NULL, dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \ + } while (0) #define anv_batch_emit(batch, cmd, ...) do { \ void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ -- cgit v1.2.3 From 7a03fb9ccb3f8a94ec697bc6ebed8c5f859c8b8e Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 24 Mar 2016 00:29:50 -0700 Subject: genxml: Add L3 Cache Control register definitions Based on intel_reg.h (5912da45a69923afa1b7f2eb5bb371d848813c41) Signed-off-by: Jordan Justen --- src/intel/genxml/gen7.xml | 27 +++++++++++++++++++++++++++ src/intel/genxml/gen75.xml | 26 ++++++++++++++++++++++++++ src/intel/genxml/gen8.xml | 8 ++++++++ src/intel/genxml/gen9.xml | 8 ++++++++ 4 files changed, 69 insertions(+) (limited to 'src') diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index 268ca3d97d7..960df5eaf9f 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -2508,4 +2508,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index 94bb64e595e..26c1f9ecdf6 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -2906,4 +2906,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 96eda703453..694e691e5ea 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -3163,4 +3163,12 @@ + + + + + + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 79d3006d24b..bc2639a7878 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -3467,4 +3467,12 @@ + + + + + + + + -- cgit v1.2.3 From 8f3c23667433aacf5ad65a699c7ce082f3d6e416 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 24 Mar 2016 13:05:04 -0700 Subject: anv: Use genxml register support for L3 Cache config The programming of the L3 Cache registers should match the previous manually packed LRI values. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 53 +++++++++++++++++++++++--------------- src/intel/vulkan/gen8_cmd_buffer.c | 33 ++++++++++++------------ 2 files changed, 48 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index dbf05d06d0f..04c1d3b3477 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -294,17 +294,10 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -#define GEN7_L3SQCREG1 0xb010 -#define GEN7_L3CNTLREG2 0xb020 -#define GEN7_L3CNTLREG3 0xb024 +#define emit_lri(batch, reg, imm) \ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \ + .RegisterOffset = __anv_reg_num(reg), \ + .DataDWord = imm) void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) @@ -315,12 +308,19 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) * - src/mesa/drivers/dri/i965/gen7_l3_state.c */ - uint32_t l3c2_val = enable_slm ? - /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */ - /*0x02040021*/0x010000a1 : - /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */ - /*0x04080040*/0x02000030; - bool changed = cmd_buffer->state.current_l3_config != l3c2_val; + uint32_t l3cr2_slm, l3cr2_noslm; + anv_pack_struct(&l3cr2_noslm, GENX(L3CNTLREG2), + .URBAllocation = 24, + .ROAllocation = 0, + .DCAllocation = 16); + anv_pack_struct(&l3cr2_slm, GENX(L3CNTLREG2), + .SLMEnable = 1, + .URBAllocation = 16, + .URBLowBandwidth = 1, + .ROAllocation = 0, + .DCAllocation = 8); + const uint32_t l3cr2_val = enable_slm ? l3cr2_slm : l3cr2_noslm; + bool changed = cmd_buffer->state.current_l3_config != l3cr2_val; if (changed) { /* According to the hardware docs, the L3 partitioning can only be changed @@ -346,10 +346,21 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .CommandStreamerStallEnable = true); anv_finishme("write GEN7_L3SQCREG1"); - emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val); - emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3, - enable_slm ? 0x00040810 : 0x00040410); - cmd_buffer->state.current_l3_config = l3c2_val; + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2_val); + + uint32_t l3cr3_slm, l3cr3_noslm; + anv_pack_struct(&l3cr3_noslm, GENX(L3CNTLREG3), + .ISAllocation = 8, + .CAllocation = 4, + .TAllocation = 8); + anv_pack_struct(&l3cr3_slm, GENX(L3CNTLREG3), + .ISAllocation = 8, + .CAllocation = 8, + .TAllocation = 8); + const uint32_t l3cr3_val = enable_slm ? l3cr3_slm : l3cr3_noslm; + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3_val); + + cmd_buffer->state.current_l3_config = l3cr2_val; } } diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 87b5e340772..3fb5c276107 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -108,15 +108,10 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } #endif -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -#define GEN8_L3CNTLREG 0x7034 +#define emit_lri(batch, reg, imm) \ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \ + .RegisterOffset = __anv_reg_num(reg), \ + .DataDWord = imm) void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) @@ -127,12 +122,16 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) * - src/mesa/drivers/dri/i965/gen7_l3_state.c */ - uint32_t val = enable_slm ? - /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ - 0x60000021 : - /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ - 0x60000060; - bool changed = cmd_buffer->state.current_l3_config != val; + uint32_t l3cr_slm, l3cr_noslm; + anv_pack_struct(&l3cr_noslm, GENX(L3CNTLREG), + .URBAllocation = 48, + .AllAllocation = 48); + anv_pack_struct(&l3cr_slm, GENX(L3CNTLREG), + .SLMEnable = 1, + .URBAllocation = 16, + .AllAllocation = 48); + const uint32_t l3cr_val = enable_slm ? l3cr_slm : l3cr_noslm; + bool changed = cmd_buffer->state.current_l3_config != l3cr_val; if (changed) { /* According to the hardware docs, the L3 partitioning can only be changed @@ -157,8 +156,8 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .PostSyncOperation = NoWrite, .CommandStreamerStallEnable = true); - emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); - cmd_buffer->state.current_l3_config = val; + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr_val); + cmd_buffer->state.current_l3_config = l3cr_val; } } -- cgit v1.2.3 From add8c837b5ec7e8ef6a0eefe4bd673f944210fdb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 10:05:36 -0700 Subject: nir/glsl: Stop carying a pointer to the nir_shader in the visitor --- src/compiler/nir/glsl_to_nir.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp index 7b8b4668848..2a469ec7355 100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -46,7 +46,7 @@ namespace { class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader, gl_shader *sh); + nir_visitor(nir_shader *shader); ~nir_visitor(); virtual void visit(ir_variable *); @@ -85,8 +85,6 @@ private: bool supports_ints; - struct gl_shader *sh; - nir_shader *shader; nir_function_impl *impl; nir_builder b; @@ -140,7 +138,7 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, nir_shader *shader = nir_shader_create(NULL, stage, options); - nir_visitor v1(shader, sh); + nir_visitor v1(shader); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); @@ -215,11 +213,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, return shader; } -nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh) +nir_visitor::nir_visitor(nir_shader *shader) { this->supports_ints = shader->options->native_integers; this->shader = shader; - this->sh = sh; this->is_global = true; this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); -- cgit v1.2.3 From b50f7f0011e6f497277c575bc36b5dab80d45bea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 10:12:52 -0700 Subject: nir: Add a better comment for INTRINSIC_RANGE --- src/compiler/nir/nir.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 2fd75ec8be5..de6b93c955c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -986,8 +986,8 @@ typedef enum { NIR_INTRINSIC_UCP_ID = 4, /** - * The range of a load operation. This specifies the maximum amount of - * data starting at the base offset (if any) that can be accessed. + * The ammount of data, starting from BASE, that this instruction may + * access. This is used to provide bounds if the offset is not constant. */ NIR_INTRINSIC_RANGE = 5, -- cgit v1.2.3 From d2eee52a6554217b21b93ab9d8ab39abee331dd8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 10:40:24 -0700 Subject: nir/builder: Add a bit size field to nir_ssa_undef --- src/compiler/nir/nir_builder.h | 3 ++- src/compiler/nir/spirv/spirv_to_nir.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 94f183c1552..22646f7afb4 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -75,10 +75,11 @@ nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) } static inline nir_ssa_def * -nir_ssa_undef(nir_builder *build, unsigned num_components) +nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size) { nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(build->shader, num_components); + undef->def.bit_size = bit_size; if (!undef) return NULL; diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index 42a1f953848..cbc87d1f740 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -1496,7 +1496,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, assert(w[5] == SpvImageOperandsSampleMask); image.sample = vtn_ssa_value(b, w[6])->def; } else { - image.sample = nir_ssa_undef(&b->nb, 1); + image.sample = nir_ssa_undef(&b->nb, 1, 32); } break; @@ -1511,7 +1511,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, assert(w[4] == SpvImageOperandsSampleMask); image.sample = vtn_ssa_value(b, w[5])->def; } else { - image.sample = nir_ssa_undef(&b->nb, 1); + image.sample = nir_ssa_undef(&b->nb, 1, 32); } break; -- cgit v1.2.3 From ab31951bef17a46d4fb38c42be46dd5955463801 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 10:40:45 -0700 Subject: nir/spirv: Use the nir_ssa_undef helper from nir_builder --- src/compiler/nir/spirv/spirv_to_nir.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index cbc87d1f740..663f41fa876 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -38,11 +38,8 @@ vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) if (glsl_type_is_vector_or_scalar(type)) { unsigned num_components = glsl_get_vector_elements(val->type); - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(b->shader, num_components); - - nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr); - val->def = &undef->def; + unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(val->type)); + val->def = nir_ssa_undef(&b->nb, num_components, bit_size); } else { unsigned elems = glsl_get_length(val->type); val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); @@ -1863,13 +1860,11 @@ vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, { nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size); - nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); - nir_builder_instr_insert(&b->nb, &undef->instr); - for (unsigned i = 0; i < num_components; i++) { uint32_t index = indices[i]; if (index == 0xffffffff) { - vec->src[i].src = nir_src_for_ssa(&undef->def); + vec->src[i].src = + nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); } else if (index < src0->num_components) { vec->src[i].src = nir_src_for_ssa(src0); vec->src[i].swizzle[0] = index; -- cgit v1.2.3 From b75d770963a6fde474ed84cad73ee754922bccbf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 10:43:17 -0700 Subject: nir/builder: Simplify nir_ssa_undef a bit --- src/compiler/nir/nir_builder.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 22646f7afb4..3dc7c25ec28 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -83,8 +83,7 @@ nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size) if (!undef) return NULL; - nir_instr_insert(nir_before_block(nir_start_block(build->impl)), - &undef->instr); + nir_instr_insert(nir_before_cf_list(&build->impl->body), &undef->instr); return &undef->def; } -- cgit v1.2.3 From 08fe89864b94f970ce73368636a87eace3c81663 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 12:09:33 -0700 Subject: nir/algebraic: Add better lowering of ldexp --- src/compiler/nir/nir_opt_algebraic.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index ed21c5d5c8c..60cd73132f7 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -276,8 +276,6 @@ optimizations = [ (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'), (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), - (('ldexp', 'x', 'exp'), - ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))), (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), ('bcsel', ('ilt', 31, 'bits'), 'insert', @@ -359,6 +357,33 @@ optimizations = [ 'options->lower_unpack_snorm_4x8'), ] +def fexp2i(exp): + # We assume that exp is already in range. + return ('ishl', ('iadd', exp, 127), 23) + +def ldexp32(f, exp): + # First, we clamp exp to a reasonable range. The maximum range that we + # need is the largest range for an exponent, ([-127, 128] if you include + # inf and 0) plus the number of mantissa bits in either direction to + # account for denormals. This means that we need at least a range of + # [-150, 151]. For our implementation, however, what we really care + # about is that neither exp/2 nor exp-exp/2 go out of the regular range + # for floating-point exponents. + exp = ('imin', ('imax', exp, -252), 254) + + # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2. + # While the spec technically defines ldexp as f * 2.0^exp, simply + # multiplying once doesn't work when denormals are involved because + # 2.0^exp may not be representable even though ldexp(f, exp) is (see + # comments above about range). Instead, we create two powers of two and + # multiply by them each in turn. That way the effective range of our + # exponent is doubled. + pow2_1 = fexp2i(('ishr', exp, 1)) + pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1))) + return ('fmul', ('fmul', f, pow2_1), pow2_2) + +optimizations += [(('ldexp', 'x', 'exp'), ldexp32('x', 'exp'))] + # Unreal Engine 4 demo applications open-codes bitfieldReverse() def bitfield_reverse(u): step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16)) -- cgit v1.2.3 From 13bad493b49fe24bd16cbec14592f22c94a826f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 12:12:12 -0700 Subject: nir/algebraic: Get rid of a redundant copy of fdiv lowering --- src/compiler/nir/nir_opt_algebraic.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 60cd73132f7..2e9cd5fcc94 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -82,7 +82,6 @@ optimizations = [ (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), (('imul', a, -1), ('ineg', a)), - (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), (('~ffma', 0.0, a, b), b), (('~ffma', a, 0.0, b), b), (('~ffma', a, b, 0.0), ('fmul', a, b)), -- cgit v1.2.3 From 38250a9ca32ea79ada10918952c2917b535a4536 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 13:55:37 -0700 Subject: i965/vec4: Get rid of a stray predicate inverse in opquantizef16 This fixes 30 opquantize CTS tests on HSW --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 7c06f92e784..a5db2f94d73 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1248,7 +1248,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) /* Select that or zero based on normal status */ inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; inst->saturate = instr->dest.saturate; break; } -- cgit v1.2.3 From 4eab37d6cda54a4ac600347f764ef223c3a7459f Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 21 Mar 2016 10:41:06 -0700 Subject: anv/image: Enable specifying a surface's minimum pitch This is required to create multiple, horizontally adjacent, max-width images from one blit2d surface. This is also required for more accurate width specification of surfaces within a larger surface (which is seen as the smaller surface's enclosing region). Note that anv_image_create_info::stride has been unused since commit, b36938964063a4072abfd779f5607743dbc3b6f1 . Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index b47425bd0e1..266fbe73ddc 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -138,7 +138,7 @@ make_surface(const struct anv_device *dev, .array_len = vk_info->arrayLayers, .samples = vk_info->samples, .min_alignment = 0, - .min_pitch = 0, + .min_pitch = anv_info->stride, .usage = choose_isl_surf_usage(image->usage, aspect), .tiling_flags = tiling_flags); -- cgit v1.2.3 From 0e82896a116ea0212dfcb13fb1456c93732d8564 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 18 Mar 2016 15:12:32 -0700 Subject: anv/blit2d: Add a function to create an ImageView This function differs from the open-coded implementation in that the ImageView's width is determined by the caller and is not unconditionally set to match the number of texels within the surface's pitch. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 196 ++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 113 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 87c3358f045..734ba8ec1d6 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -54,6 +54,81 @@ vk_format_for_size(int bs) } } +static void +create_iview(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *surf, + struct anv_meta_blit2d_rect *rect, + VkImageUsageFlags usage, + VkImage *img, + struct anv_image_view *iview) +{ + struct isl_tile_info tile_info; + isl_tiling_get_info(&cmd_buffer->device->isl_dev, + surf->tiling, surf->bs, &tile_info); + const unsigned tile_width_px = tile_info.width > surf->bs ? + tile_info.width / surf->bs : 1; + uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? + &rect->src_y : &rect->dst_y; + uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? + &rect->src_x : &rect->dst_x; + + /* Define the shared state among all created image views */ + const VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = vk_format_for_size(surf->bs), + .extent = { + .width = rect->width + (*rect_x) % tile_width_px, + .height = rect->height + (*rect_y) % tile_info.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = surf->tiling == ISL_TILING_LINEAR ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL, + .usage = usage, + }; + + /* Create the VkImage that is bound to the surface's memory. */ + anv_image_create(anv_device_to_handle(cmd_buffer->device), + &(struct anv_image_create_info) { + .vk_info = &image_info, + .isl_tiling_flags = 1 << surf->tiling, + .stride = surf->pitch, + }, &cmd_buffer->pool->alloc, img); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(*img)->bo = surf->bo; + anv_image_from_handle(*img)->offset = surf->base_offset; + + /* Create a VkImageView that starts at the tile aligned offset closest + * to the provided x/y offset into the surface. + */ + uint32_t img_o = 0; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(*img)-> + color_surface.isl, + *rect_x, *rect_y, + &img_o, rect_x, rect_y); + anv_image_view_init(iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = *img, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_info.format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, cmd_buffer, img_o, usage); +} + static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, @@ -260,132 +335,27 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_rect *rects) { VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkFormat src_format = vk_format_for_size(src->bs); - VkFormat dst_format = vk_format_for_size(dst->bs); VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { - - /* Create VkImages */ - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = 0, /* TEMPLATE */ - .extent = { - .width = 0, /* TEMPLATE */ - .height = 0, /* TEMPLATE */ - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = 0, /* TEMPLATE */ - .usage = 0, /* TEMPLATE */ - }; - struct anv_image_create_info anv_image_info = { - .vk_info = &image_info, - .isl_tiling_flags = 0, /* TEMPLATE */ - }; - - /* The image height is the rect height + src/dst y-offset from the - * tile-aligned base address. - */ - struct isl_tile_info tile_info; - - anv_image_info.isl_tiling_flags = 1 << src->tiling; - image_info.tiling = src->tiling == ISL_TILING_LINEAR ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = src_usage; - image_info.format = src_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, - &tile_info); - image_info.extent.height = rects[r].height + - rects[r].src_y % tile_info.height; - image_info.extent.width = src->pitch / src->bs; - VkImage src_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &src_image); - - anv_image_info.isl_tiling_flags = 1 << dst->tiling; - image_info.tiling = dst->tiling == ISL_TILING_LINEAR ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = dst_usage; - image_info.format = dst_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, - &tile_info); - image_info.extent.height = rects[r].height + - rects[r].dst_y % tile_info.height; - image_info.extent.width = dst->pitch / dst->bs; - VkImage dst_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &dst_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src->bo; - anv_image_from_handle(src_image)->offset = src->base_offset; - anv_image_from_handle(dst_image)->bo = dst->bo; - anv_image_from_handle(dst_image)->offset = dst->base_offset; - - /* Create VkImageViews */ - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = 0, /* TEMPLATE */ - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = 0, /* TEMPLATE */ - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }; - uint32_t img_o = 0; - - iview_info.image = src_image; - iview_info.format = src_format; - VkOffset3D src_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(src_image)-> - color_surface.isl, - rects[r].src_x, - rects[r].src_y, - &img_o, - (uint32_t*)&src_offset_el.x, - (uint32_t*)&src_offset_el.y); - + VkImage src_img; + VkImage dst_img; struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, src_usage); - - iview_info.image = dst_image; - iview_info.format = dst_format; - VkOffset3D dst_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(dst_image)-> - color_surface.isl, - rects[r].dst_x, - rects[r].dst_y, - &img_o, - (uint32_t*)&dst_offset_el.x, - (uint32_t*)&dst_offset_el.y); struct anv_image_view dst_iview; - anv_image_view_init(&dst_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, dst_usage); + create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); + create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); /* Perform blit */ meta_emit_blit2d(cmd_buffer, &src_iview, - src_offset_el, + (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, &dst_iview, - dst_offset_el, + (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, (VkExtent3D){rects[r].width, rects[r].height, 1}); - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); } } -- cgit v1.2.3 From 98522c1853eac22b3501e8739020b362786d1811 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 14:40:57 -0700 Subject: nir/spirv: Get rid of the spirv2nir helper binary This was useful once upon a time but now that we have a real Vulkan driver to run our SPIR-V binaries through, there's really no point. --- src/compiler/Makefile.am | 16 +------------ src/compiler/nir/spirv2nir.c | 55 -------------------------------------------- 2 files changed, 1 insertion(+), 70 deletions(-) delete mode 100644 src/compiler/nir/spirv2nir.c (limited to 'src') diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am index 8f374484696..5032890e73d 100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@ -84,7 +84,7 @@ check_PROGRAMS += \ glsl/tests/sampler-types-test \ glsl/tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler spirv2nir +noinst_PROGRAMS = glsl_compiler glsl_tests_blob_test_SOURCES = \ glsl/tests/blob_test.c @@ -176,20 +176,6 @@ glsl_glsl_test_LDADD = \ $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) -spirv2nir_SOURCES = \ - nir/spirv2nir.c - -spirv2nir_CPPFLAGS = \ - $(AM_CPPFLAGS) \ - -I$(top_builddir)/src/compiler/nir \ - -I$(top_srcdir)/src/compiler/nir - -spirv2nir_LDADD = \ - nir/libnir.la \ - $(top_builddir)/src/util/libmesautil.la \ - -lm -lstdc++ \ - $(PTHREAD_LIBS) - # We write our own rules for yacc and lex below. We'd rather use automake, # but automake makes it especially difficult for a number of reasons: # diff --git a/src/compiler/nir/spirv2nir.c b/src/compiler/nir/spirv2nir.c deleted file mode 100644 index c837186bdfc..00000000000 --- a/src/compiler/nir/spirv2nir.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -/* - * A simple executable that opens a SPIR-V shader, converts it to NIR, and - * dumps out the result. This should be useful for testing the - * spirv_to_nir code. - */ - -#include "spirv/nir_spirv.h" - -#include -#include -#include -#include - -int main(int argc, char **argv) -{ - int fd = open(argv[1], O_RDONLY); - off_t len = lseek(fd, 0, SEEK_END); - - assert(len % 4 == 0); - size_t word_count = len / 4; - - const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); - assert(map != NULL); - - nir_function *func = spirv_to_nir(map, word_count, NULL, 0, - MESA_SHADER_FRAGMENT, "main", NULL); - nir_print_shader(func->shader, stderr); -} -- cgit v1.2.3 From 00fa795cd3bfd89b925698367173167656d4ae6c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 15:15:45 -0700 Subject: spirv/glsl: Add a helper for converting glsl opcodes into nir opcodes This is similar to the way that regular ALU operations are handled. --- src/compiler/nir/spirv/vtn_glsl450.c | 117 +++++++++++++++++------------------ 1 file changed, 56 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c index 3360fda067a..e05d28ffede 100644 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@ -359,6 +359,57 @@ build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) nir_bcsel(b, is_not_zero, exponent_value, zero)); } +static nir_op +vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode) +{ + switch (opcode) { + case GLSLstd450Round: return nir_op_fround_even; + case GLSLstd450RoundEven: return nir_op_fround_even; + case GLSLstd450Trunc: return nir_op_ftrunc; + case GLSLstd450FAbs: return nir_op_fabs; + case GLSLstd450SAbs: return nir_op_iabs; + case GLSLstd450FSign: return nir_op_fsign; + case GLSLstd450SSign: return nir_op_isign; + case GLSLstd450Floor: return nir_op_ffloor; + case GLSLstd450Ceil: return nir_op_fceil; + case GLSLstd450Fract: return nir_op_ffract; + case GLSLstd450Sin: return nir_op_fsin; + case GLSLstd450Cos: return nir_op_fcos; + case GLSLstd450Pow: return nir_op_fpow; + case GLSLstd450Exp2: return nir_op_fexp2; + case GLSLstd450Log2: return nir_op_flog2; + case GLSLstd450Sqrt: return nir_op_fsqrt; + case GLSLstd450InverseSqrt: return nir_op_frsq; + case GLSLstd450FMin: return nir_op_fmin; + case GLSLstd450UMin: return nir_op_umin; + case GLSLstd450SMin: return nir_op_imin; + case GLSLstd450FMax: return nir_op_fmax; + case GLSLstd450UMax: return nir_op_umax; + case GLSLstd450SMax: return nir_op_imax; + case GLSLstd450FMix: return nir_op_flrp; + case GLSLstd450Fma: return nir_op_ffma; + case GLSLstd450Ldexp: return nir_op_ldexp; + case GLSLstd450FindILsb: return nir_op_find_lsb; + case GLSLstd450FindSMsb: return nir_op_ifind_msb; + case GLSLstd450FindUMsb: return nir_op_ufind_msb; + + /* Packing/Unpacking functions */ + case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8; + case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8; + case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16; + case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16; + case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16; + case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8; + case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; + case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; + case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; + case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; + + default: + unreachable("No NIR equivalent"); + } +} + static void handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, const uint32_t *w, unsigned count) @@ -372,39 +423,21 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, /* Collect the various SSA sources */ unsigned num_inputs = count - 5; - nir_ssa_def *src[3]; + nir_ssa_def *src[3] = { NULL, }; for (unsigned i = 0; i < num_inputs; i++) src[i] = vtn_ssa_value(b, w[i + 5])->def; - nir_op op; switch (entrypoint) { - case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */ - case GLSLstd450RoundEven: op = nir_op_fround_even; break; - case GLSLstd450Trunc: op = nir_op_ftrunc; break; - case GLSLstd450FAbs: op = nir_op_fabs; break; - case GLSLstd450SAbs: op = nir_op_iabs; break; - case GLSLstd450FSign: op = nir_op_fsign; break; - case GLSLstd450SSign: op = nir_op_isign; break; - case GLSLstd450Floor: op = nir_op_ffloor; break; - case GLSLstd450Ceil: op = nir_op_fceil; break; - case GLSLstd450Fract: op = nir_op_ffract; break; case GLSLstd450Radians: val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); return; case GLSLstd450Degrees: val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); return; - case GLSLstd450Sin: op = nir_op_fsin; break; - case GLSLstd450Cos: op = nir_op_fcos; break; case GLSLstd450Tan: val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), nir_fcos(nb, src[0])); return; - case GLSLstd450Pow: op = nir_op_fpow; break; - case GLSLstd450Exp2: op = nir_op_fexp2; break; - case GLSLstd450Log2: op = nir_op_flog2; break; - case GLSLstd450Sqrt: op = nir_op_fsqrt; break; - case GLSLstd450InverseSqrt: op = nir_op_frsq; break; case GLSLstd450Modf: { nir_ssa_def *sign = nir_fsign(nb, src[0]); @@ -424,32 +457,10 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } - case GLSLstd450FMin: op = nir_op_fmin; break; - case GLSLstd450UMin: op = nir_op_umin; break; - case GLSLstd450SMin: op = nir_op_imin; break; - case GLSLstd450FMax: op = nir_op_fmax; break; - case GLSLstd450UMax: op = nir_op_umax; break; - case GLSLstd450SMax: op = nir_op_imax; break; - case GLSLstd450FMix: op = nir_op_flrp; break; case GLSLstd450Step: val->ssa->def = nir_sge(nb, src[1], src[0]); return; - case GLSLstd450Fma: op = nir_op_ffma; break; - case GLSLstd450Ldexp: op = nir_op_ldexp; break; - - /* Packing/Unpacking functions */ - case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; - case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; - case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; - case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; - case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break; - case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; - case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; - case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; - case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; - case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; - case GLSLstd450Length: val->ssa->def = build_length(nb, src[0]); return; @@ -584,10 +595,6 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } - case GLSLstd450FindILsb: op = nir_op_find_lsb; break; - case GLSLstd450FindSMsb: op = nir_op_ifind_msb; break; - case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; - case GLSLstd450Asin: val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); return; @@ -619,24 +626,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, return; } - case GLSLstd450PackDouble2x32: - case GLSLstd450UnpackDouble2x32: default: - unreachable("Unhandled opcode"); + val->ssa->def = + nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint), + src[0], src[1], src[2], NULL); + return; } - - nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); - nir_ssa_dest_init(&instr->instr, &instr->dest.dest, - glsl_get_vector_elements(val->ssa->type), - glsl_get_bit_size(glsl_get_base_type(val->ssa->type)), - val->name); - instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1; - val->ssa->def = &instr->dest.dest.ssa; - - for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) - instr->src[i].src = nir_src_for_ssa(src[i]); - - nir_builder_instr_insert(nb, &instr->instr); } bool -- cgit v1.2.3 From fbb9e1f008f8059c373ae9f130be139e0d4e1ae9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 15:30:46 -0700 Subject: spirv/alu: Add support for the NoContraction decoration --- src/compiler/nir/spirv/vtn_alu.c | 69 ++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_alu.c b/src/compiler/nir/spirv/vtn_alu.c index 450bc158be9..c9526f1d818 100644 --- a/src/compiler/nir/spirv/vtn_alu.c +++ b/src/compiler/nir/spirv/vtn_alu.c @@ -305,6 +305,17 @@ vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) } } +static void +handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *_void) +{ + assert(dec->scope == VTN_DEC_DECORATION); + if (dec->decoration != SpvDecorationNoContraction) + return; + + b->nb.exact = true; +} + void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -313,15 +324,39 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, const struct glsl_type *type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + vtn_foreach_decoration(b, val, handle_no_contraction, NULL); + /* Collect the various SSA sources */ const unsigned num_inputs = count - 3; struct vtn_ssa_value *vtn_src[4] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) + for (unsigned i = 0; i < num_inputs; i++) { vtn_src[i] = vtn_ssa_value(b, w[i + 3]); + /* The way SPIR-V defines the NoContraction decoration is rediculous. + * It expressly says in the SPIR-V spec: + * + * "For example, if applied to an OpFMul, that multiply can’t be + * combined with an addition to yield a fused multiply-add + * operation." + * + * Technically, this means we would have to either rewrite NIR with + * another silly "don't fuse me" flag or we would have to propagate + * the NoContraction decoration to all consumers of a value which + * would make it far more infectious than anyone intended. + * + * Instead, we take a short-cut by simply looking at the sources and + * see if any of them have it. That should be good enough. + * + * See also issue #17 on the SPIR-V gitlab + */ + vtn_foreach_decoration(b, vtn_untyped_value(b, w[i + 3]), + handle_no_contraction, NULL); + } + if (glsl_type_is_matrix(vtn_src[0]->type) || (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); + b->nb.exact = false; return; } @@ -347,7 +382,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_imm_int(&b->nb, NIR_FALSE), NULL, NULL); } - return; + break; case SpvOpAll: if (src[0]->num_components == 1) { @@ -363,73 +398,73 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_imm_int(&b->nb, NIR_TRUE), NULL, NULL); } - return; + break; case SpvOpOuterProduct: { for (unsigned i = 0; i < src[1]->num_components; i++) { val->ssa->elems[i]->def = nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); } - return; + break; } case SpvOpDot: val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); - return; + break; case SpvOpIAddCarry: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); - return; + break; case SpvOpISubBorrow: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); - return; + break; case SpvOpUMulExtended: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); - return; + break; case SpvOpSMulExtended: assert(glsl_type_is_struct(val->ssa->type)); val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); - return; + break; case SpvOpFwidth: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); - return; + break; case SpvOpFwidthFine: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); - return; + break; case SpvOpFwidthCoarse: val->ssa->def = nir_fadd(&b->nb, nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); - return; + break; case SpvOpVectorTimesScalar: /* The builder will take care of splatting for us. */ val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); - return; + break; case SpvOpIsNan: val->ssa->def = nir_fne(&b->nb, src[0], src[0]); - return; + break; case SpvOpIsInf: val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), nir_imm_float(&b->nb, INFINITY)); - return; + break; default: { bool swap; @@ -442,7 +477,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, } val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); - return; + break; } /* default */ } + + b->nb.exact = false; } -- cgit v1.2.3 From 6d658e9bd546581a0841e7acb8915dc05d44c628 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 15:53:40 -0700 Subject: i965: Allow mul+add fusing again --- src/mesa/drivers/dri/i965/brw_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index ab6000b0573..83921891d1c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -495,7 +495,7 @@ brw_postprocess_nir(nir_shader *nir, if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ -// OPT(brw_nir_opt_peephole_ffma); + OPT(brw_nir_opt_peephole_ffma); } OPT(nir_opt_algebraic_late); -- cgit v1.2.3 From 035f66025b4d82ce40573da86bde6afda4a05ec7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 11:12:33 -0700 Subject: nir/search: Don't match inexact expressions with exact subexpressions In the first pass of implementing exact handling, I made a mistake with search-and-replace. In particular, we only reallly handled exact/inexact on the root of the tree. Instead, we need to check every node in the tree for an exact/inexact match. As an example of this, consider the following GLSL code precise float a = b + c; if (a < 0) { do_stuff(); } In that case, only the add will be declared "exact" and an expression that looks for "b + c < 0" will still match and replace it with "b < -c" which may yield different results. The solution is to simply bail if any of the values are exact when matching an inexact expression. --- src/compiler/nir/nir_search.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 6e630631453..b915101ce32 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -28,6 +28,8 @@ #include "nir_search.h" struct match_state { + bool inexact_match; + bool has_exact_alu; unsigned variables_seen; nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES]; }; @@ -239,7 +241,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, return false; assert(instr->dest.dest.is_ssa); - if (expr->inexact && instr->exact) + + state->inexact_match = expr->inexact || state->inexact_match; + state->has_exact_alu = instr->exact || state->has_exact_alu; + if (state->inexact_match && state->has_exact_alu) return false; assert(!instr->dest.saturate); @@ -410,7 +415,7 @@ bitsize_tree_filter_down(bitsize_tree *tree, unsigned size) static nir_alu_src construct_value(const nir_search_value *value, - unsigned num_components, bitsize_tree *bitsize, bool exact, + unsigned num_components, bitsize_tree *bitsize, struct match_state *state, nir_instr *instr, void *mem_ctx) { @@ -424,10 +429,16 @@ construct_value(const nir_search_value *value, nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode); nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, bitsize->dest_size, NULL); - alu->exact = exact; alu->dest.write_mask = (1 << num_components) - 1; alu->dest.saturate = false; + /* We have no way of knowing what values in a given search expression + * map to a particular replacement value. Therefore, if the + * expression we are replacing has any exact values, the entire + * replacement should be exact. + */ + alu->exact = state->has_exact_alu; + for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) { /* If the source is an explicitly sized source, then we need to reset * the number of components to match. @@ -436,7 +447,7 @@ construct_value(const nir_search_value *value, num_components = nir_op_infos[alu->op].input_sizes[i]; alu->src[i] = construct_value(expr->srcs[i], - num_components, bitsize->srcs[i], exact, + num_components, bitsize->srcs[i], state, instr, mem_ctx); } @@ -546,6 +557,8 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, assert(instr->dest.dest.is_ssa); struct match_state state; + state.inexact_match = false; + state.has_exact_alu = false; state.variables_seen = 0; if (!match_expression(search, instr, instr->dest.dest.ssa.num_components, @@ -569,7 +582,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, mov->src[0] = construct_value(replace, instr->dest.dest.ssa.num_components, tree, - instr->exact, &state, &instr->instr, mem_ctx); + &state, &instr->instr, mem_ctx); nir_instr_insert_before(&instr->instr, &mov->instr); nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, -- cgit v1.2.3 From 5d9afb65a60f022af0daddbb21bc57bb2c830606 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 11:47:27 -0700 Subject: i965/peephole_ffma: Only match a mul+add if none of the ops are exact --- src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c index 6e8b1f99505..22ff2e3c9f8 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c +++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c @@ -84,6 +84,17 @@ get_mul_for_src(nir_alu_src *src, int num_components, return NULL; nir_alu_instr *alu = nir_instr_as_alu(instr); + + /* We want to bail if any of the other ALU operations involved is labled + * exact. One reason for this is that, while the value that is changing is + * actually the result of the add and not the multiply, the intention of + * the user when they specify an exact multiply is that they want *that* + * value and what they don't care about is the add. Another reason is that + * SPIR-V explicitly requires this behaviour. + */ + if (alu->exact) + return NULL; + switch (alu->op) { case nir_op_imov: case nir_op_fmov: -- cgit v1.2.3 From 433cf90650f12039e16c2c245fd2e1cbf1ed3ac2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 12:27:40 -0700 Subject: nir/spirv: Remove the NoContraction hack NIR now just handles this for us by not fusing if the multiply is marked as exact. --- src/compiler/nir/spirv/vtn_alu.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/spirv/vtn_alu.c b/src/compiler/nir/spirv/vtn_alu.c index c9526f1d818..8b9a63ce760 100644 --- a/src/compiler/nir/spirv/vtn_alu.c +++ b/src/compiler/nir/spirv/vtn_alu.c @@ -329,30 +329,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, /* Collect the various SSA sources */ const unsigned num_inputs = count - 3; struct vtn_ssa_value *vtn_src[4] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) { + for (unsigned i = 0; i < num_inputs; i++) vtn_src[i] = vtn_ssa_value(b, w[i + 3]); - /* The way SPIR-V defines the NoContraction decoration is rediculous. - * It expressly says in the SPIR-V spec: - * - * "For example, if applied to an OpFMul, that multiply can’t be - * combined with an addition to yield a fused multiply-add - * operation." - * - * Technically, this means we would have to either rewrite NIR with - * another silly "don't fuse me" flag or we would have to propagate - * the NoContraction decoration to all consumers of a value which - * would make it far more infectious than anyone intended. - * - * Instead, we take a short-cut by simply looking at the sources and - * see if any of them have it. That should be good enough. - * - * See also issue #17 on the SPIR-V gitlab - */ - vtn_foreach_decoration(b, vtn_untyped_value(b, w[i + 3]), - handle_no_contraction, NULL); - } - if (glsl_type_is_matrix(vtn_src[0]->type) || (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); -- cgit v1.2.3 From 5879cb0251e7b4593eb4fd01684bd68f0945e3d1 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 12:08:49 -0700 Subject: anv: Fix cache pollution race during L3 partitioning set-up. Port 0aa4f99f562a05880a779707cbcd46be459863bf to anv. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 36 ++++++++++++++++++++++++++---------- src/intel/vulkan/gen8_cmd_buffer.c | 36 ++++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 04c1d3b3477..06b3a75cbef 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -323,22 +323,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) bool changed = cmd_buffer->state.current_l3_config != l3cr2_val; if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... + /* According to the hardware docs, the L3 partitioning can only be + * changed while the pipeline is completely drained and the caches are + * flushed, which involves a first PIPE_CONTROL flush which stalls the + * pipeline... */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, .DCFlushEnable = true, .PostSyncOperation = NoWrite, .CommandStreamerStallEnable = true); - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO + * invalidation happens at the top of the pipeline (i.e. right away as + * the PIPE_CONTROL command is processed by the CS) we cannot combine it + * with the previous stalling flush as the hardware documentation + * suggests, because that would cause the CS to stall on previous + * rendering *after* RO invalidation and wouldn't prevent the RO caches + * from being polluted by concurrent rendering before the stall + * completes. This intentionally doesn't implement the SKL+ hardware + * workaround suggesting to enable CS stall on PIPE_CONTROLs with the + * texture cache invalidation bit set for GPGPU workloads because the + * previous and subsequent PIPE_CONTROLs already guarantee that there is + * no concurrent GPGPU kernel execution (see SKL HSD 2132585). + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .PostSyncOperation = NoWrite); + + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DCFlushEnable = true, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 3fb5c276107..dab1d7411e7 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -134,22 +134,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) bool changed = cmd_buffer->state.current_l3_config != l3cr_val; if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... + /* According to the hardware docs, the L3 partitioning can only be + * changed while the pipeline is completely drained and the caches are + * flushed, which involves a first PIPE_CONTROL flush which stalls the + * pipeline... */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, .DCFlushEnable = true, .PostSyncOperation = NoWrite, .CommandStreamerStallEnable = true); - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO + * invalidation happens at the top of the pipeline (i.e. right away as + * the PIPE_CONTROL command is processed by the CS) we cannot combine it + * with the previous stalling flush as the hardware documentation + * suggests, because that would cause the CS to stall on previous + * rendering *after* RO invalidation and wouldn't prevent the RO caches + * from being polluted by concurrent rendering before the stall + * completes. This intentionally doesn't implement the SKL+ hardware + * workaround suggesting to enable CS stall on PIPE_CONTROLs with the + * texture cache invalidation bit set for GPGPU workloads because the + * previous and subsequent PIPE_CONTROLs already guarantee that there is + * no concurrent GPGPU kernel execution (see SKL HSD 2132585). + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .PostSyncOperation = NoWrite); + + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DCFlushEnable = true, -- cgit v1.2.3 From f60683b32a45081979df089894d36d05f2d0ec0c Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 12:08:31 -0700 Subject: anv: Invalidate state cache before L3 partitioning set-up. Port 10d84ba9f084174a1e8e7639dfb05dd855ba86e8 to anv. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 1 + src/intel/vulkan/gen8_cmd_buffer.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 06b3a75cbef..b5d21efb203 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -351,6 +351,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .TextureCacheInvalidationEnable = true, .ConstantCacheInvalidationEnable = true, .InstructionCacheInvalidateEnable = true, + .StateCacheInvalidationEnable = true, .PostSyncOperation = NoWrite); /* Now send a third stalling flush to make sure that invalidation is diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index dab1d7411e7..5b6afb3d70d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -162,6 +162,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .TextureCacheInvalidationEnable = true, .ConstantCacheInvalidationEnable = true, .InstructionCacheInvalidateEnable = true, + .StateCacheInvalidationEnable = true, .PostSyncOperation = NoWrite); /* Now send a third stalling flush to make sure that invalidation is -- cgit v1.2.3 From 1a3adae84aa16247cba0e3619d54e6d8d543fcf1 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 14:45:24 -0700 Subject: anv/gen7: Save kernel command parser version Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_device.c | 11 +++++++++++ src/intel/vulkan/anv_private.h | 1 + 2 files changed, 12 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 54a1f1274ab..01d3c996ad2 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -99,6 +99,17 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } + device->cmd_parser_version = -1; + if (device->info->gen == 7) { + device->cmd_parser_version = + anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION); + if (device->cmd_parser_version == -1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get command parser version"); + goto fail; + } + } + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "failed to get aperture size: %m"); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 77f453afb36..48ebff456e3 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -546,6 +546,7 @@ struct anv_physical_device { uint64_t aperture_size; struct brw_compiler * compiler; struct isl_device isl_dev; + int cmd_parser_version; }; struct anv_wsi_interaface; -- cgit v1.2.3 From 8dbfa265a439904628c2d875885e80bc45a90a05 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 14:49:56 -0700 Subject: anv/gen7: DispatchIndirect requires cmd parser 5 Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1b53f85419b..35c46008a84 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -638,6 +638,24 @@ void genX(CmdDrawIndexedIndirect)( .PrimitiveTopologyType = pipeline->topology); } +#if GEN_GEN == 7 + +static bool +verify_cmd_parser(const struct anv_device *device, + int required_version, + const char *function) +{ + if (device->instance->physicalDevice.cmd_parser_version < required_version) { + vk_errorf(VK_ERROR_FEATURE_NOT_PRESENT, + "cmd parser version %d is required for %s", + required_version, function); + return false; + } else { + return true; + } +} + +#endif void genX(CmdDispatch)( VkCommandBuffer commandBuffer, @@ -699,6 +717,14 @@ void genX(CmdDispatchIndirect)( uint32_t bo_offset = buffer->offset + offset; struct anv_batch *batch = &cmd_buffer->batch; +#if GEN_GEN == 7 + /* Linux 4.4 added command parser version 5 which allows the GPGPU + * indirect dispatch registers to be written. + */ + if (verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect")) + return; +#endif + if (prog_data->uses_num_work_groups) { cmd_buffer->state.num_workgroups_offset = bo_offset; cmd_buffer->state.num_workgroups_bo = bo; -- cgit v1.2.3 From f56f538ce4753a6fdd969b610f35433fd657e4ee Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 22:26:47 -0700 Subject: anv/gen7: Fix command parser version test with indirect dispatch Caught-by: Ilia Mirkin Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 35c46008a84..d642832dd57 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -721,7 +721,7 @@ void genX(CmdDispatchIndirect)( /* Linux 4.4 added command parser version 5 which allows the GPGPU * indirect dispatch registers to be written. */ - if (verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect")) + if (!verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect")) return; #endif -- cgit v1.2.3 From cf2257069cbde19fd177a02c079206914aac5d14 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Mar 2016 20:29:07 -0700 Subject: nir/spirv: Set a default number of invocations for geometry shaders The SPIR-V spec says geometry shaders are supposed to have one invocation by default. The execution mode is only required if there are multiple invocations. --- src/compiler/nir/spirv/spirv_to_nir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c index 663f41fa876..948454494fa 100644 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -2676,6 +2676,9 @@ spirv_to_nir(const uint32_t *words, size_t word_count, b->shader = nir_shader_create(NULL, stage, options); + /* Set shader info defaults */ + b->shader->info.gs.invocations = 1; + /* Parse execution modes */ vtn_foreach_execution_mode(b, b->entry_point, vtn_handle_execution_mode, NULL); -- cgit v1.2.3 From ebb0bcc11d24835cd0c30a824fa86bd6577e0684 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Apr 2016 15:39:46 -0700 Subject: nir: Move variable_get_io_mask back into gather_info It used to be in nir_gather_info.c until I moved it out to nir.h so it could be re-used with some linking code that never got merged. We'll move it back out if and when we have real code to share it with. --- src/compiler/nir/nir.h | 28 ---------------------------- src/compiler/nir/nir_gather_info.c | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d9e0d679c66..8e45cba5a16 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -348,34 +348,6 @@ nir_variable_is_global(const nir_variable *var) return var->data.mode != nir_var_local && var->data.mode != nir_var_param; } -/** - * Returns the bits in the inputs_read, outputs_written, or - * system_values_read bitfield corresponding to this variable. - */ -static inline uint64_t -nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage) -{ - assert(var->data.mode == nir_var_shader_in || - var->data.mode == nir_var_shader_out || - var->data.mode == nir_var_system_value); - assert(var->data.location >= 0); - - const struct glsl_type *var_type = var->type; - if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) { - /* Most geometry shader inputs are per-vertex arrays */ - if (var->data.location >= VARYING_SLOT_VAR0) - assert(glsl_type_is_array(var_type)); - - if (glsl_type_is_array(var_type)) - var_type = glsl_get_array_element(var_type); - } - - bool is_vertex_input = (var->data.mode == nir_var_shader_in && - stage == MESA_SHADER_VERTEX); - unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input); - return ((1ull << slots) - 1) << var->data.location; -} - typedef struct nir_register { struct exec_node node; diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 8f0abd33ce6..1b519d7139c 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -89,21 +89,49 @@ gather_info_block(nir_block *block, void *shader) return true; } +/** + * Returns the bits in the inputs_read, outputs_written, or + * system_values_read bitfield corresponding to this variable. + */ +static inline uint64_t +get_io_mask(nir_variable *var, gl_shader_stage stage) +{ + assert(var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.mode == nir_var_system_value); + assert(var->data.location >= 0); + + const struct glsl_type *var_type = var->type; + if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) { + /* Most geometry shader inputs are per-vertex arrays */ + if (var->data.location >= VARYING_SLOT_VAR0) + assert(glsl_type_is_array(var_type)); + + if (glsl_type_is_array(var_type)) + var_type = glsl_get_array_element(var_type); + } + + bool is_vertex_input = (var->data.mode == nir_var_shader_in && + stage == MESA_SHADER_VERTEX); + unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input); + return ((1ull << slots) - 1) << var->data.location; +} + void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) { shader->info.inputs_read = 0; foreach_list_typed(nir_variable, var, node, &shader->inputs) - shader->info.inputs_read |= nir_variable_get_io_mask(var, shader->stage); + shader->info.inputs_read |= get_io_mask(var, shader->stage); /* TODO: Some day we may need to add stream support to NIR */ shader->info.outputs_written = 0; foreach_list_typed(nir_variable, var, node, &shader->outputs) - shader->info.outputs_written |= nir_variable_get_io_mask(var, shader->stage); + shader->info.outputs_written |= get_io_mask(var, shader->stage); shader->info.system_values_read = 0; foreach_list_typed(nir_variable, var, node, &shader->system_values) - shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage); + shader->info.system_values_read |= get_io_mask(var, shader->stage); shader->info.num_textures = 0; shader->info.num_images = 0; -- cgit v1.2.3 From cc1320220f165269776dbd848b89f8561727d453 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Apr 2016 15:44:43 -0700 Subject: nir/gather_info: Add an assert for supported stages --- src/compiler/nir/nir_gather_info.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 1b519d7139c..046836fc534 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -120,6 +120,12 @@ get_io_mask(nir_variable *var, gl_shader_stage stage) void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) { + /* This pass does not yet support tessellation shaders */ + assert(shader->stage == MESA_SHADER_VERTEX || + shader->stage == MESA_SHADER_GEOMETRY || + shader->stage == MESA_SHADER_FRAGMENT || + shader->stage == MESA_SHADER_COMPUTE); + shader->info.inputs_read = 0; foreach_list_typed(nir_variable, var, node, &shader->inputs) shader->info.inputs_read |= get_io_mask(var, shader->stage); -- cgit v1.2.3 From 8c8157bf6f529de881a38f603b873f1a867bf9b5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Apr 2016 11:24:48 -0700 Subject: Remove more spirv2nir remnants --- src/compiler/glsl/.gitignore | 1 - src/compiler/glsl/Makefile.am | 13 ++----------- src/compiler/glsl/Makefile.sources | 16 ---------------- 3 files changed, 2 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore index 6db4e738f6e..09951bac445 100644 --- a/src/compiler/glsl/.gitignore +++ b/src/compiler/glsl/.gitignore @@ -3,7 +3,6 @@ glsl_parser.cpp glsl_parser.h glsl_parser.output glsl_test -spirv2nir subtest-cr/ subtest-lf/ subtest-cr-lf/ diff --git a/src/compiler/glsl/Makefile.am b/src/compiler/glsl/Makefile.am index d6b1f9ed695..9954b812403 100644 --- a/src/compiler/glsl/Makefile.am +++ b/src/compiler/glsl/Makefile.am @@ -66,7 +66,7 @@ check_PROGRAMS = \ tests/sampler-types-test \ tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler spirv2nir +noinst_PROGRAMS = glsl_compiler tests_blob_test_SOURCES = \ tests/blob_test.c @@ -135,6 +135,7 @@ libglsl_la_SOURCES = \ glsl_parser.h \ $(LIBGLSL_FILES) + glsl_compiler_SOURCES = \ $(GLSL_COMPILER_CXX_FILES) @@ -144,16 +145,6 @@ glsl_compiler_LDADD = \ $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) -spirv2nir_SOURCES = \ - standalone_scaffolding.cpp \ - standalone_scaffolding.h \ - nir/spirv2nir.c - -spirv2nir_LDADD = \ - libglsl.la \ - $(top_builddir)/src/libglsl_util.la \ - $(PTHREAD_LIBS) - glsl_test_SOURCES = \ standalone_scaffolding.cpp \ test.cpp \ diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources index 970fab03bbf..538196a79a9 100644 --- a/src/compiler/glsl/Makefile.sources +++ b/src/compiler/glsl/Makefile.sources @@ -29,9 +29,7 @@ NIR_FILES = \ nir/nir_control_flow_private.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ - nir/nir_gather_info.c \ nir/nir_gs_count_vertices.c \ - nir/nir_inline_functions.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ nir/nir_instr_set.c \ @@ -40,10 +38,8 @@ NIR_FILES = \ nir/nir_lower_alu_to_scalar.c \ nir/nir_lower_atomics.c \ nir/nir_lower_clip.c \ - nir/nir_lower_returns.c \ nir/nir_lower_global_vars_to_local.c \ nir/nir_lower_gs_intrinsics.c \ - nir/nir_lower_indirect_derefs.c \ nir/nir_lower_load_const_to_scalar.c \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ @@ -71,11 +67,8 @@ NIR_FILES = \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_opt_undef.c \ - nir/nir_phi_builder.c \ - nir/nir_phi_builder.h \ nir/nir_print.c \ nir/nir_remove_dead_variables.c \ - nir/nir_repair_ssa.c \ nir/nir_search.c \ nir/nir_search.h \ nir/nir_split_var_copies.c \ @@ -86,15 +79,6 @@ NIR_FILES = \ nir/nir_worklist.c \ nir/nir_worklist.h -SPIRV_FILES = \ - nir/spirv/nir_spirv.h \ - nir/spirv/spirv_to_nir.c \ - nir/spirv/vtn_alu.c \ - nir/spirv/vtn_cfg.c \ - nir/spirv/vtn_glsl450.c \ - nir/spirv/vtn_private.h \ - nir/spirv/vtn_variables.c - # libglsl LIBGLSL_FILES = \ -- cgit v1.2.3 From 4d040a4ad35f0ab25e0f9ba031a71885a4cec1dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Apr 2016 12:05:07 -0700 Subject: glsl/standalone: Get rid of the unneeded _mesa_error_no_memory stub This hasn't been needed since we stopped using the GLSL compiler in the Vulkan driver and it was tripping up scons. Removing it fixes the scons build. --- src/compiler/glsl/standalone_scaffolding.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src') diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp index 5ce804eed20..e350f702099 100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -35,12 +35,6 @@ #include "util/ralloc.h" #include "util/strtod.h" -extern "C" void -_mesa_error_no_memory(const char *caller) -{ - fprintf(stderr, "Mesa error: out of memory in %s", caller); -} - void _mesa_warning(struct gl_context *ctx, const char *fmt, ...) { -- cgit v1.2.3 From cb317b8d074853f93b7d6965f8652bd200085d5b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Apr 2016 12:09:10 -0700 Subject: glsl: Stop force-enabling compute shaders This isn't needed since we no longer use the GLSL compiler in Vulkan. --- src/compiler/glsl/glsl_parser_extras.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 2941277b525..3dc68741902 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -86,8 +86,6 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->extensions = &ctx->Extensions; - this->ARB_compute_shader_enable = true; - this->Const.MaxLights = ctx->Const.MaxLights; this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; -- cgit v1.2.3 From fe247bbe922ab862914bc76c10557157a06700ac Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Apr 2016 12:10:20 -0700 Subject: nir: Stop double-printing function arguments --- src/compiler/nir/nir_print.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 60b74d13dc7..17ae3681e21 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -964,16 +964,6 @@ print_function_impl(nir_function_impl *impl, print_state *state) fprintf(fp, "{\n"); - for (unsigned i = 0; i < impl->num_params; i++) { - fprintf(fp, "\t"); - print_var_decl(impl->params[i], state); - } - - if (impl->return_var) { - fprintf(fp, "\t"); - print_var_decl(impl->return_var, state); - } - nir_foreach_variable(var, &impl->locals) { fprintf(fp, "\t"); print_var_decl(var, state); -- cgit v1.2.3 From db35a851ad03d14ae4ba81095c09cb2afd511e4f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Apr 2016 14:25:36 -0700 Subject: i965/defines: Unconditionally define primitives --- src/mesa/drivers/dri/i965/brw_defines.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8ef5afea149..60b696cfb98 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -63,7 +63,6 @@ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) -#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */ #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 #define _3DPRIM_LINESTRIP 0x03 @@ -87,7 +86,6 @@ #define _3DPRIM_TRIFAN_NOSTIPPLE 0x16 #define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); }) -#endif /* bdw_pack.h */ /* We use this offset to be able to pass native primitive types in struct * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + -- cgit v1.2.3 From e5c833db5a7ad35a0dc4b0f170fafd090c2b8805 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 4 Apr 2016 14:51:35 -0700 Subject: i965/compiler: Remove a redundant declaration of brw_compiler_create --- src/mesa/drivers/dri/i965/brw_compiler.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index cf0b088613b..231e0001d54 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -100,9 +100,6 @@ struct brw_compiler { bool precise_trig; }; -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); - /** * Program key structures. -- cgit v1.2.3 From 068935844cff985c1b68a6d8aaa9b014f2584e5b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 6 Apr 2016 17:07:08 -0700 Subject: genxml: Add GEN6 genxml Not used yet, but let's put it here for now. --- src/intel/genxml/Makefile.am | 1 + src/intel/genxml/gen6.xml | 1939 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1940 insertions(+) create mode 100644 src/intel/genxml/gen6.xml (limited to 'src') diff --git a/src/intel/genxml/Makefile.am b/src/intel/genxml/Makefile.am index 36ba526c154..77b2f193d9f 100644 --- a/src/intel/genxml/Makefile.am +++ b/src/intel/genxml/Makefile.am @@ -20,6 +20,7 @@ # IN THE SOFTWARE. BUILT_SOURCES = \ + gen6_pack.h \ gen7_pack.h \ gen75_pack.h \ gen8_pack.h \ diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml new file mode 100644 index 00000000000..027ab34b288 --- /dev/null +++ b/src/intel/genxml/gen6.xml @@ -0,0 +1,1939 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- cgit v1.2.3 From 15895bf777bd5f68a197506fdeaced28aa440622 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 5 Apr 2016 10:37:54 -0700 Subject: i965/fs: Use the scale helper in surface_builder As requested by Curro --- src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 4adffdd75fb..96731ffac7f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -725,7 +725,7 @@ namespace { */ if (is_signed && widths[c] < 32) bld.AND(offset(dst, bld, c), offset(dst, bld, c), - brw_imm_d((1 << widths[c]) - 1)); + brw_imm_d(scale(widths[c]))); } } @@ -804,7 +804,7 @@ namespace { */ if (is_signed && widths[c] < 32) bld.AND(offset(dst, bld, c), offset(dst, bld, c), - brw_imm_d((1 << widths[c]) - 1)); + brw_imm_d(scale(widths[c]))); } } -- cgit v1.2.3 From dd6f7200466b06e13f7c9cd70710bc2c57433706 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:20:26 -0700 Subject: anv/blit2d: Remove the tex_dim parameter from copy_fragment_shader Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 734ba8ec1d6..f40dc2f7828 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -390,7 +390,7 @@ build_nir_vertex_shader(void) } static nir_shader * -build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +build_nir_copy_fragment_shader() { const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); @@ -405,7 +405,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); const struct glsl_type *sampler_type = - glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, glsl_get_base_type(vec4)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); @@ -413,7 +413,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) sampler->data.binding = 0; nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); - tex->sampler_dim = tex_dim; + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); @@ -501,7 +501,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }; struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + .nir = build_nir_copy_fragment_shader(), }; VkPipelineVertexInputStateCreateInfo vi_create_info = { -- cgit v1.2.3 From b38a0d64ba2274c3d48b731e86a7bbb04fca81c8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:25:47 -0700 Subject: anv/meta2d: Don't declare an array sampler in the fragment shader With the new blit framework we aren't using array textures and, from talking with Nanley, we don't think it's going to be useful in the future either. Just get rid of it for now. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index f40dc2f7828..10e9ba3befd 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -393,19 +393,19 @@ static nir_shader * build_nir_copy_fragment_shader() { const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec3, "v_tex_pos"); + vec2, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, glsl_get_base_type(vec4)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); @@ -420,7 +420,7 @@ build_nir_copy_fragment_shader() tex->src[1].src_type = nir_tex_src_lod; tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; /* TODO */ - tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->is_array = false; tex->coord_components = tex_pos->num_components; tex->texture = nir_deref_var_create(tex, sampler); tex->sampler = NULL; -- cgit v1.2.3 From 9553fd2c97bda18b997845610be365d6adf0fd4c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:39:17 -0700 Subject: anv/blit2d: Fix a NIR writemask Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 10e9ba3befd..144a62481b8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -431,7 +431,7 @@ build_nir_copy_fragment_shader() nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 4); + nir_store_var(&b, color_out, &tex->dest.ssa, 0xf); return b.shader; } -- cgit v1.2.3 From afada45590789191e96860851df4696170e4231c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Mar 2016 14:06:32 -0700 Subject: anv/blit2d: Fix whitespace Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 144a62481b8..6a7845fd30f 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -131,11 +131,11 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D extent) + struct anv_image_view *src_iview, + VkOffset3D src_offset, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D extent) { struct anv_device *device = cmd_buffer->device; @@ -348,11 +348,11 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* Perform blit */ meta_emit_blit2d(cmd_buffer, - &src_iview, - (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, - &dst_iview, - (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, - (VkExtent3D){rects[r].width, rects[r].height, 1}); + &src_iview, + (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, + &dst_iview, + (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, + (VkExtent3D){rects[r].width, rects[r].height, 1}); anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); -- cgit v1.2.3 From e466164c87b95e5a5ef6263ad3f62c37d37b9488 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:37:47 -0700 Subject: anv/blit2d: Break the texelfetch portion of shader building into a helper Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 59 +++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 6a7845fd30f..d3b0adce641 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -389,49 +389,62 @@ build_nir_vertex_shader(void) return b.shader; } -static nir_shader * -build_nir_copy_fragment_shader() -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec2, "v_tex_pos"); - tex_pos_in->data.location = VARYING_SLOT_VAR0; - nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); +typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *, + struct anv_device *, + nir_ssa_def *, nir_ssa_def *); +static nir_ssa_def * +build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, + nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) +{ const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, - glsl_get_base_type(vec4)); - nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); sampler->data.descriptor_set = 0; sampler->data.binding = 0; - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2); tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); tex->dest_type = nir_type_float; /* TODO */ tex->is_array = false; - tex->coord_components = tex_pos->num_components; + tex->coord_components = 2; tex->texture = nir_deref_var_create(tex, sampler); tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); - nir_builder_instr_insert(&b, &tex->instr); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + +static nir_shader * +build_nir_copy_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec2, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 0xf); + + nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *color = txf_func(&b, device, tex_pos, NULL); + nir_store_var(&b, color_out, color, 0xf); return b.shader; } @@ -501,7 +514,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }; struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(), + .nir = build_nir_copy_fragment_shader(device, build_nir_texel_fetch), }; VkPipelineVertexInputStateCreateInfo vi_create_info = { -- cgit v1.2.3 From b0a6cfb9b46dcd6b1c60d85c0c4b4ca119d53e5c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Mar 2016 14:24:31 -0700 Subject: anv/blit2d: Pass the source pitch into the shader Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index d3b0adce641..a78536f4e52 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -132,6 +132,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, + uint32_t src_pitch, VkOffset3D src_offset, struct anv_image_view *dest_iview, VkOffset3D dest_offset, @@ -159,7 +160,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { src_offset.x + extent.width, src_offset.y + extent.height, - src_offset.z, + src_pitch, }, }; @@ -171,7 +172,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { src_offset.x, src_offset.y + extent.height, - src_offset.z, + src_pitch, }, }; @@ -183,7 +184,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { src_offset.x, src_offset.y, - src_offset.z, + src_pitch, }, }; @@ -348,7 +349,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* Perform blit */ meta_emit_blit2d(cmd_buffer, - &src_iview, + &src_iview, src->pitch, (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, &dst_iview, (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, @@ -428,22 +429,26 @@ build_nir_copy_fragment_shader(struct anv_device *device, texel_fetch_build_func txf_func) { const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec2, "v_tex_pos"); + vec3, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); - nir_ssa_def *color = txf_func(&b, device, tex_pos, NULL); + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + unsigned swiz[4] = { 0, 1 }; + nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); + nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); nir_store_var(&b, color_out, color, 0xf); return b.shader; -- cgit v1.2.3 From 5187ab05b8f0719b0ecb922e36c5d5c3d118ea31 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 11:51:50 -0700 Subject: anv/blit2d: Inline meta_emit_blit2d Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 361 +++++++++++++++++-------------------- 1 file changed, 170 insertions(+), 191 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index a78536f4e52..fc72f7808f8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -129,190 +129,6 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer, img_o, usage); } -static void -meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - uint32_t src_pitch, - VkOffset3D src_offset, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D extent) -{ - struct anv_device *device = cmd_buffer->device; - - struct blit_vb_data { - float pos[2]; - float tex_coord[3]; - } *vb_data; - - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct anv_vue_header)); - vb_data = vb_state.map + sizeof(struct anv_vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + extent.width, - dest_offset.y + extent.height, - }, - .tex_coord = { - src_offset.x + extent.width, - src_offset.y + extent.height, - src_pitch, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + extent.height, - }, - .tex_coord = { - src_offset.x, - src_offset.y + extent.height, - src_pitch, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - src_offset.x, - src_offset.y, - src_pitch, - }, - }; - - anv_state_clflush(vb_state); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = vb_state.offset, - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct anv_vue_header), - }); - - VkDescriptorPool desc_pool; - anv_CreateDescriptorPool(anv_device_to_handle(device), - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { - { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &desc_pool); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(anv_device_to_handle(device), - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout - }, &set); - - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = NULL, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), - }, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit2d.render_pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { extent.width, extent.height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, VK_SUBPASS_CONTENTS_INLINE); - - VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, - &set, 0, NULL); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_DestroyDescriptorPool(anv_device_to_handle(device), - desc_pool, &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb, - &cmd_buffer->pool->alloc); -} - void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -335,6 +151,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, unsigned num_rects, struct anv_meta_blit2d_rect *rects) { + struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -347,13 +164,175 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); - /* Perform blit */ - meta_emit_blit2d(cmd_buffer, - &src_iview, src->pitch, - (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, - &dst_iview, - (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, - (VkExtent3D){rects[r].width, rects[r].height, 1}); + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x + rects[r].width, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x, + rects[r].dst_y, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y, + src->pitch, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.ds_layout + }, &set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(vk_device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dst_iview), + }, + .width = dst_iview.extent.width, + .height = dst_iview.extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { rects[r].dst_x, rects[r].dst_y, }, + .extent = { rects[r].width, rects[r].height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dst_iview.extent.width, + .height = dst_iview.extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_DestroyDescriptorPool(vk_device, desc_pool, &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); -- cgit v1.2.3 From 43fbdd7156197518c2f2f8a1c0befa7f4ffd16e1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:32:08 -0700 Subject: anv/blit2d: Factor binding the source image into a helper Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 139 ++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 57 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index fc72f7808f8..7424a00b7f2 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -129,6 +129,84 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer, img_o, usage); } +struct blit2d_src_temps { + VkImage image; + struct anv_image_view iview; + VkDescriptorPool desc_pool; + VkDescriptorSet set; +}; + +static void +blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_rect *rect, + struct blit2d_src_temps *tmp) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, + &tmp->image, &tmp->iview); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.pipeline_layout, 0, 1, + &tmp->set, 0, NULL); +} + +static void +blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, + struct blit2d_src_temps *tmp) +{ + anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), + tmp->desc_pool, &cmd_buffer->pool->alloc); + anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), + tmp->image, &cmd_buffer->pool->alloc); +} + void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -153,15 +231,14 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, { struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { - VkImage src_img; + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, &rects[r], &src_temps); + VkImage dst_img; - struct anv_image_view src_iview; struct anv_image_view dst_iview; - create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); struct blit_vb_data { @@ -231,51 +308,6 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); - VkDescriptorPool desc_pool; - anv_CreateDescriptorPool(vk_device, - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { - { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &desc_pool); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(vk_device, - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout - }, &set); - - anv_UpdateDescriptorSets(vk_device, - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = NULL, - .imageView = anv_image_view_to_handle(&src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - VkFramebuffer fb; anv_CreateFramebuffer(vk_device, &(VkFramebufferCreateInfo) { @@ -319,11 +351,6 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .maxDepth = 1.0f, }); - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, - &set, 0, NULL); - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); @@ -331,10 +358,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_DestroyDescriptorPool(vk_device, desc_pool, &cmd_buffer->pool->alloc); + blit2d_unbind_src(cmd_buffer, &src_temps); anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); - - anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); } } -- cgit v1.2.3 From 00e70868ee81f964bbdb33b40938eec7e4bef1f3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:47:41 -0700 Subject: anv/blit2d: Enhance teardown and clean up init error paths Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 117 ++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 7424a00b7f2..3dc0c66d1cc 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -461,18 +461,29 @@ build_nir_copy_fragment_shader(struct anv_device *device, void anv_device_finish_meta_blit2d_state(struct anv_device *device) { - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit2d.render_pass, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, - &device->meta_state.alloc); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, - &device->meta_state.alloc); + if (device->meta_state.blit2d.render_pass) { + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit2d.render_pass, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.pipeline_2d_src) { + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit2d.pipeline_2d_src, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.pipeline_layout) { + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit2d.pipeline_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.ds_layout) { + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit2d.ds_layout, + &device->meta_state.alloc); + } } VkResult @@ -480,6 +491,8 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) { VkResult result; + zero(device->meta_state.blit2d); + result = anv_CreateRenderPass(anv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, @@ -513,6 +526,33 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) if (result != VK_SUCCESS) goto fail; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); + if (result != VK_SUCCESS) + goto fail; + /* We don't use a vertex shader for blitting, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need * to provide GLSL source for the vertex shader so that the compiler @@ -567,36 +607,6 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) } }; - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, - &device->meta_state.alloc, - &device->meta_state.blit2d.ds_layout); - if (result != VK_SUCCESS) - goto fail_render_pass; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); - if (result != VK_SUCCESS) - goto fail_descriptor_set_layout; - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -687,29 +697,16 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; ralloc_free(vs.nir); ralloc_free(fs_2d.nir); - return VK_SUCCESS; + if (result != VK_SUCCESS) + goto fail; - fail_pipeline_layout: - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, - &device->meta_state.alloc); - fail_descriptor_set_layout: - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, - &device->meta_state.alloc); - fail_render_pass: - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit2d.render_pass, - &device->meta_state.alloc); + return VK_SUCCESS; - ralloc_free(vs.nir); - ralloc_free(fs_2d.nir); - fail: +fail: + anv_device_finish_meta_blit2d_state(device); return result; } -- cgit v1.2.3 From 28eb02e345c5642d49037759b5b0eee8d71e7feb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:51:21 -0700 Subject: anv/blit2d: Rename the descriptor set and pipeline layouts Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 20 ++++++++++---------- src/intel/vulkan/anv_private.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 3dc0c66d1cc..878ae3f096e 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -168,7 +168,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = tmp->desc_pool, .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout + .pSetLayouts = &device->meta_state.blit2d.image_ds_layout }, &tmp->set); anv_UpdateDescriptorSets(vk_device, @@ -193,7 +193,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, + device->meta_state.blit2d.image_p_layout, 0, 1, &tmp->set, 0, NULL); } @@ -473,15 +473,15 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) &device->meta_state.alloc); } - if (device->meta_state.blit2d.pipeline_layout) { + if (device->meta_state.blit2d.image_p_layout) { anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, + device->meta_state.blit2d.image_p_layout, &device->meta_state.alloc); } - if (device->meta_state.blit2d.ds_layout) { + if (device->meta_state.blit2d.image_ds_layout) { anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, + device->meta_state.blit2d.image_ds_layout, &device->meta_state.alloc); } } @@ -539,7 +539,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pImmutableSamplers = NULL }, } - }, &device->meta_state.alloc, &device->meta_state.blit2d.ds_layout); + }, &device->meta_state.alloc, &device->meta_state.blit2d.image_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -547,9 +547,9 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout, + .pSetLayouts = &device->meta_state.blit2d.image_ds_layout, }, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); + &device->meta_state.alloc, &device->meta_state.blit2d.image_p_layout); if (result != VK_SUCCESS) goto fail; @@ -678,7 +678,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit2d.pipeline_layout, + .layout = device->meta_state.blit2d.image_p_layout, .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 48ebff456e3..9e3978a4691 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -612,8 +612,8 @@ struct anv_meta_state { /** Pipeline that copies from a 2D image. */ VkPipeline pipeline_2d_src; - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; + VkPipelineLayout image_p_layout; + VkDescriptorSetLayout image_ds_layout; } blit2d; struct { -- cgit v1.2.3 From 85b9a007acb9bf53e509974f4112accb8e9a29f4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:54:56 -0700 Subject: anv/blit2d: Add layouts for using a texel buffer source Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 59 +++++++++++++++++++++++++++++++------- src/intel/vulkan/anv_private.h | 6 ++-- 2 files changed, 53 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 878ae3f096e..22b763aee2d 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -168,7 +168,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = tmp->desc_pool, .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.image_ds_layout + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout }, &tmp->set); anv_UpdateDescriptorSets(vk_device, @@ -193,7 +193,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.image_p_layout, 0, 1, + device->meta_state.blit2d.img_p_layout, 0, 1, &tmp->set, 0, NULL); } @@ -473,15 +473,27 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) &device->meta_state.alloc); } - if (device->meta_state.blit2d.image_p_layout) { + if (device->meta_state.blit2d.img_p_layout) { anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.image_p_layout, + device->meta_state.blit2d.img_p_layout, &device->meta_state.alloc); } - if (device->meta_state.blit2d.image_ds_layout) { + if (device->meta_state.blit2d.img_ds_layout) { anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.image_ds_layout, + device->meta_state.blit2d.img_ds_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.buf_p_layout) { + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit2d.buf_p_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.buf_ds_layout) { + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit2d.buf_ds_layout, &device->meta_state.alloc); } } @@ -539,7 +551,34 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pImmutableSamplers = NULL }, } - }, &device->meta_state.alloc, &device->meta_state.blit2d.image_ds_layout); + }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -547,9 +586,9 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.image_ds_layout, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, }, - &device->meta_state.alloc, &device->meta_state.blit2d.image_p_layout); + &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); if (result != VK_SUCCESS) goto fail; @@ -678,7 +717,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit2d.image_p_layout, + .layout = device->meta_state.blit2d.img_p_layout, .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9e3978a4691..7c140a33cb7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -612,8 +612,10 @@ struct anv_meta_state { /** Pipeline that copies from a 2D image. */ VkPipeline pipeline_2d_src; - VkPipelineLayout image_p_layout; - VkDescriptorSetLayout image_ds_layout; + VkPipelineLayout img_p_layout; + VkDescriptorSetLayout img_ds_layout; + VkPipelineLayout buf_p_layout; + VkDescriptorSetLayout buf_ds_layout; } blit2d; struct { -- cgit v1.2.3 From 4ee80e8816091869943d98cbe261c49406bb8039 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 15:03:34 -0700 Subject: anv/blit2d: Refactor in preparation for different src/dst types Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 349 ++++++++++++++++++++++++------------- src/intel/vulkan/anv_private.h | 9 +- 2 files changed, 238 insertions(+), 120 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 22b763aee2d..8e63eee462d 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -24,6 +24,44 @@ #include "anv_meta.h" #include "nir/nir_builder.h" +enum blit2d_src_type { + /* We can make a "normal" image view of this source and just texture + * from it like you would in any other shader. + */ + BLIT2D_SRC_TYPE_NORMAL, + + /* The source is W-tiled and we need to detile manually in the shader. + * This will work on any platform but is needed for all W-tiled sources + * prior to Broadwell. + */ + BLIT2D_SRC_TYPE_W_DETILE, + + BLIT2D_NUM_SRC_TYPES, +}; + +enum blit2d_dst_type { + /* We can bind this destination as a "normal" render target and render + * to it just like you would anywhere else. + */ + BLIT2D_DST_TYPE_NORMAL, + + /* The destination is W-tiled and we need to do the tiling manually in + * the shader. This is required for all W-tiled destinations. + * + * Sky Lake adds a feature for providing explicit stencil values in the + * shader but mesa doesn't support that yet so neither do we. + */ + BLIT2D_DST_TYPE_W_TILE, + + /* The destination has a 3-channel RGB format. Since we can't render to + * non-power-of-two textures, we have to bind it as a red texture and + * select the correct component for the given red pixel in the shader. + */ + BLIT2D_DST_TYPE_RGB, + + BLIT2D_NUM_DST_TYPES, +}; + static VkFormat vk_format_for_size(int bs) { @@ -139,6 +177,7 @@ struct blit2d_src_temps { static void blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, struct anv_meta_blit2d_rect *rect, struct blit2d_src_temps *tmp) { @@ -199,6 +238,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, static void blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, + enum blit2d_src_type src_type, struct blit2d_src_temps *tmp) { anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), @@ -222,12 +262,27 @@ anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } -void -anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_blit2d_surf *src, - struct anv_meta_blit2d_surf *dst, - unsigned num_rects, - struct anv_meta_blit2d_rect *rects) +static void +bind_pipeline(struct anv_cmd_buffer *cmd_buffer, + enum blit2d_src_type src_type, + enum blit2d_dst_type dst_type) +{ + VkPipeline pipeline = + cmd_buffer->device->meta_state.blit2d.pipelines[src_type][dst_type]; + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } +} + +static void +anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) { struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); @@ -235,7 +290,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, for (unsigned r = 0; r < num_rects; ++r) { struct blit2d_src_temps src_temps; - blit2d_bind_src(cmd_buffer, src, &rects[r], &src_temps); + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); VkImage dst_img; struct anv_image_view dst_iview; @@ -334,12 +389,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .pClearValues = NULL, }, VK_SUBPASS_CONTENTS_INLINE); - VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL); anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { @@ -358,12 +408,39 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - blit2d_unbind_src(cmd_buffer, &src_temps); + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); } } +void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + enum blit2d_src_type src_type; + if (src->tiling == ISL_TILING_W && cmd_buffer->device->info.gen < 8) { + src_type = BLIT2D_SRC_TYPE_W_DETILE; + } else { + src_type = BLIT2D_SRC_TYPE_NORMAL; + } + + if (dst->tiling == ISL_TILING_W) { + assert(dst->bs == 1); + anv_finishme("Blitting to w-tiled destinations not yet supported"); + return; + } else if (dst->bs % 3 == 0) { + anv_finishme("Blitting to RGB destinations not yet supported"); + return; + } else { + assert(util_is_power_of_two(dst->bs)); + anv_meta_blit2d_normal_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); + } +} static nir_shader * build_nir_vertex_shader(void) @@ -467,12 +544,6 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) &device->meta_state.alloc); } - if (device->meta_state.blit2d.pipeline_2d_src) { - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_2d_src, - &device->meta_state.alloc); - } - if (device->meta_state.blit2d.img_p_layout) { anv_DestroyPipelineLayout(anv_device_to_handle(device), device->meta_state.blit2d.img_p_layout, @@ -496,101 +567,47 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) device->meta_state.blit2d.buf_ds_layout, &device->meta_state.alloc); } + + for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { + for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) { + if (device->meta_state.blit2d.pipelines[src][dst]) { + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit2d.pipelines[src][dst], + &device->meta_state.alloc); + } + } + } } -VkResult -anv_device_init_meta_blit2d_state(struct anv_device *device) +static VkResult +blit2d_init_pipeline(struct anv_device *device, + enum blit2d_src_type src_type, + enum blit2d_dst_type dst_type) { VkResult result; - zero(device->meta_state.blit2d); - - result = anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); - if (result != VK_SUCCESS) - goto fail; - - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); - if (result != VK_SUCCESS) - goto fail; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); - if (result != VK_SUCCESS) - goto fail; - - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); - if (result != VK_SUCCESS) - goto fail; + texel_fetch_build_func src_func; + switch (src_type) { + case BLIT2D_SRC_TYPE_NORMAL: + src_func = build_nir_texel_fetch; + break; + case BLIT2D_SRC_TYPE_W_DETILE: + /* Not yet supported */ + default: + return VK_SUCCESS; + } - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); - if (result != VK_SUCCESS) - goto fail; + struct anv_shader_module fs = { .nir = NULL }; + switch (dst_type) { + case BLIT2D_DST_TYPE_NORMAL: + fs.nir = build_nir_copy_fragment_shader(device, src_func); + break; + case BLIT2D_DST_TYPE_W_TILE: + case BLIT2D_DST_TYPE_RGB: + /* Not yet supported */ + default: + return VK_SUCCESS; + } /* We don't use a vertex shader for blitting, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need @@ -601,10 +618,6 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .nir = build_nir_vertex_shader(), }; - struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(device, build_nir_texel_fetch), - }; - VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 2, @@ -656,7 +669,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .module = anv_shader_module_to_handle(&fs), .pName = "main", .pSpecializationInfo = NULL }, @@ -731,18 +744,120 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .use_rectlist = true }; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); + &device->meta_state.alloc, + &device->meta_state.blit2d.pipelines[src_type][dst_type]); ralloc_free(vs.nir); - ralloc_free(fs_2d.nir); + ralloc_free(fs.nir); + + return result; +} + +VkResult +anv_device_init_meta_blit2d_state(struct anv_device *device) +{ + VkResult result; + zero(device->meta_state.blit2d); + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); if (result != VK_SUCCESS) goto fail; + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); + if (result != VK_SUCCESS) + goto fail; + + for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { + for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) { + result = blit2d_init_pipeline(device, src, dst); + if (result != VK_SUCCESS) + goto fail; + } + } + return VK_SUCCESS; fail: diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7c140a33cb7..1a18dd15e65 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -609,13 +609,16 @@ struct anv_meta_state { struct { VkRenderPass render_pass; - /** Pipeline that copies from a 2D image. */ - VkPipeline pipeline_2d_src; - VkPipelineLayout img_p_layout; VkDescriptorSetLayout img_ds_layout; VkPipelineLayout buf_p_layout; VkDescriptorSetLayout buf_ds_layout; + + /* Pipelines indexed by source and destination type. See the + * blit2d_src_type and blit2d_dst_type enums in anv_meta_blit2d.c to + * see what these mean. + */ + VkPipeline pipelines[2][3]; } blit2d; struct { -- cgit v1.2.3 From 4caba940869602b750e21a444523b068b1bea339 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 15:20:11 -0700 Subject: anv/image: Expose the guts of CreateBufferView for meta Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_image.c | 40 ++++++++++++++++++++++++---------------- src/intel/vulkan/anv_private.h | 5 +++++ 2 files changed, 29 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 266fbe73ddc..759c8612005 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -646,20 +646,13 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, anv_free2(&device->alloc, pAllocator, iview); } -VkResult -anv_CreateBufferView(VkDevice _device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *pView) + +void anv_buffer_view_init(struct anv_buffer_view *view, + struct anv_device *device, + const VkBufferViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) { - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *view; - - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!view) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -671,8 +664,7 @@ anv_CreateBufferView(VkDevice _device, buffer->size - view->offset : pCreateInfo->range; if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_buffer_surface_state(device, view->surface_state, view->format, @@ -683,8 +675,7 @@ anv_CreateBufferView(VkDevice _device, } if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - view->storage_surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->storage_surface_state = alloc_surface_state(device, cmd_buffer); enum isl_format storage_format = has_matching_storage_typed_format(device, view->format) ? @@ -703,6 +694,23 @@ anv_CreateBufferView(VkDevice _device, } else { view->storage_surface_state = (struct anv_state){ 0 }; } +} + +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_buffer_view_init(view, device, pCreateInfo, NULL); *pView = anv_buffer_view_to_handle(view); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1a18dd15e65..a394fe8a683 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1682,6 +1682,11 @@ struct anv_buffer_view { struct brw_image_param storage_image_param; }; +void anv_buffer_view_init(struct anv_buffer_view *view, + struct anv_device *device, + const VkBufferViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + const struct anv_format * anv_format_for_descriptor_type(VkDescriptorType type); -- cgit v1.2.3 From b37502b9832f02626b0caca22500b46ebbbe8007 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 16:15:54 -0700 Subject: isl: Rework the get_intratile_offset function The old function tried to work in elements which isn't, strictly speaking, a valid thing to do. In the case of a non-power-of-two format, there is no guarantee that the x offset into the tile is a multiple of the format block size. This commit refactors it to work entirely in terms of a tiling (not a surface) and bytes/rows. Reviewed-by: Nanley Chery --- src/intel/isl/isl.c | 52 +++++++++++++++++++++++--------------- src/intel/isl/isl.h | 33 +++++++++++++++--------- src/intel/vulkan/anv_meta_blit2d.c | 13 ++++++---- 3 files changed, 60 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index a36638071d5..37d8bcba078 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1417,33 +1417,39 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, } void -isl_surf_get_image_intratile_offset_el_xy(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t total_x_offset_el, - uint32_t total_y_offset_el, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el) +isl_tiling_get_intratile_offset_el(const struct isl_device *dev, + enum isl_tiling tiling, + uint8_t bs, + uint32_t row_pitch, + uint32_t total_x_offset_el, + uint32_t total_y_offset_el, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) { - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - struct isl_tile_info tile_info; - isl_surf_get_tile_info(dev, surf, &tile_info); + isl_tiling_get_info(dev, tiling, bs, &tile_info); + + /* This function only really works for power-of-two surfaces. In + * theory, we could make it work for non-power-of-two surfaces by going + * to the left until we find a block that is bs-aligned. The Vulkan + * driver doesn't use non-power-of-two tiled surfaces so we'll leave + * this unimplemented for now. + */ + assert(tiling == ISL_TILING_LINEAR || isl_is_pow2(bs)); uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; - uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; + uint32_t big_y_offset_B = big_y_offset_el * row_pitch; - uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; + uint32_t total_x_offset_B = total_x_offset_el * bs; uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; - uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; + uint32_t small_x_offset_el = small_x_offset_B / bs; uint32_t big_x_offset_B = (total_x_offset_B / tile_info.width) * tile_info.size; *base_address_offset = big_y_offset_B + big_x_offset_B; *x_offset_el = small_x_offset_el; *y_offset_el = small_y_offset_el; - - } void @@ -1456,6 +1462,8 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_el, uint32_t *y_offset_el) { + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + uint32_t total_x_offset_el; uint32_t total_y_offset_el; isl_surf_get_image_offset_el(surf, level, @@ -1464,12 +1472,14 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, &total_x_offset_el, &total_y_offset_el); - isl_surf_get_image_intratile_offset_el_xy(dev, surf, - total_x_offset_el, - total_y_offset_el, - base_address_offset, - x_offset_el, - y_offset_el); + + isl_tiling_get_intratile_offset_el(dev, surf->tiling, fmtl->bs, + surf->row_pitch, + total_x_offset_el, + total_y_offset_el, + base_address_offset, + x_offset_el, + y_offset_el); } uint32_t diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 90193ca08c1..4f796f6c6a8 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1142,6 +1142,27 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, uint32_t *x_offset_el, uint32_t *y_offset_el); +/** + * @brief Calculate the intratile offsets to a surface. + * + * In @a base_address_offset return the offset from the base of the surface to + * the base address of the first tile of the subimage. In @a x_offset_B and + * @a y_offset_rows, return the offset, in units of bytes and rows, from the + * tile's base to the subimage's first surface element. The x and y offsets + * are intratile offsets; that is, they do not exceed the boundary of the + * surface's tiling format. + */ +void +isl_tiling_get_intratile_offset_el(const struct isl_device *dev, + enum isl_tiling tiling, + uint8_t bs, + uint32_t row_pitch, + uint32_t total_x_offset_B, + uint32_t total_y_offset_rows, + uint32_t *base_address_offset, + uint32_t *x_offset_B, + uint32_t *y_offset_rows); + /** * @brief Calculate the intratile offsets to a subimage in the surface. * @@ -1162,18 +1183,6 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_el, uint32_t *y_offset_el); -/** - * See above. - */ -void -isl_surf_get_image_intratile_offset_el_xy(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t total_x_offset_el, - uint32_t total_y_offset_el, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el); - /** * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat * diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 8e63eee462d..cf2dc66597d 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -145,12 +145,15 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, /* Create a VkImageView that starts at the tile aligned offset closest * to the provided x/y offset into the surface. */ + struct isl_surf *isl_surf = &anv_image_from_handle(*img)->color_surface.isl; + uint32_t img_o = 0; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(*img)-> - color_surface.isl, - *rect_x, *rect_y, - &img_o, rect_x, rect_y); + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + isl_surf->tiling, surf->bs, + isl_surf->row_pitch, + *rect_x * surf->bs, *rect_y, + &img_o, rect_x, rect_y); + anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, -- cgit v1.2.3 From b0a5ca5cfc5b65262e834cdfa3cb7c6a1cfa8259 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 16:23:02 -0700 Subject: isl: Remove surf_get_intratile_offset_el The intratile offset may not be a multiple of the element size so this calculation is invalid. Reviewed-by: Nanley Chery --- src/intel/isl/isl.c | 30 --------- src/intel/isl/isl.h | 20 ------ .../isl/tests/isl_surf_get_image_offset_test.c | 75 ---------------------- 3 files changed, 125 deletions(-) (limited to 'src') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 37d8bcba078..0eaa8087c71 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1452,36 +1452,6 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev, *y_offset_el = small_y_offset_el; } -void -isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el) -{ - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - - uint32_t total_x_offset_el; - uint32_t total_y_offset_el; - isl_surf_get_image_offset_el(surf, level, - logical_array_layer, - logical_z_offset, - &total_x_offset_el, - &total_y_offset_el); - - - isl_tiling_get_intratile_offset_el(dev, surf->tiling, fmtl->bs, - surf->row_pitch, - total_x_offset_el, - total_y_offset_el, - base_address_offset, - x_offset_el, - y_offset_el); -} - uint32_t isl_surf_get_depth_format(const struct isl_device *dev, const struct isl_surf *surf) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 4f796f6c6a8..8f796b034f8 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1163,26 +1163,6 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_B, uint32_t *y_offset_rows); -/** - * @brief Calculate the intratile offsets to a subimage in the surface. - * - * In @a base_address_offset return the offset from the base of the surface to - * the base address of the first tile of the subimage. In @a x_offset_el and - * @a y_offset_el, return the offset, in units of surface elements, from the - * tile's base to the subimage's first surface element. The x and y offsets - * are intratile offsets; that is, they do not exceed the boundary of the - * surface's tiling format. - */ -void -isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el); - /** * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat * diff --git a/src/intel/isl/tests/isl_surf_get_image_offset_test.c b/src/intel/isl/tests/isl_surf_get_image_offset_test.c index 34b336e8d81..9d6a8348800 100644 --- a/src/intel/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/intel/isl/tests/isl_surf_get_image_offset_test.c @@ -93,31 +93,6 @@ t_assert_offset_el(const struct isl_surf *surf, t_assert(y == expected_y_offset_el); } -static void -t_assert_intratile_offset_el(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t level, - uint32_t logical_array_layer, - uint32_t logical_z_offset_px, - uint32_t expected_base_address_offset, - uint32_t expected_x_offset_el, - uint32_t expected_y_offset_el) -{ - uint32_t base_address_offset; - uint32_t x_offset_el, y_offset_el; - isl_surf_get_image_intratile_offset_el(dev, surf, - level, - logical_array_layer, - logical_z_offset_px, - &base_address_offset, - &x_offset_el, - &y_offset_el); - - t_assert(base_address_offset == expected_base_address_offset); - t_assert(x_offset_el == expected_x_offset_el); - t_assert(y_offset_el == expected_y_offset_el); -} - static void t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, uint32_t height, uint32_t depth, uint32_t array_len) @@ -188,17 +163,6 @@ test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8 t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4 t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4 - - t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x100000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x108000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x148000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x168000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x178000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x178000, 0, 16); - t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x178000, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x178000, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x188000, 0, 0); } static void @@ -257,45 +221,6 @@ test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) */ t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540); - /* array layer 0 */ - t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x400000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x410000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x510000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x590000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5d0000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5f0000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5f0000, 0, 16); - t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5f0000, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5f0000, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x610000, 0, 0); - - /* array layer 1 */ - t_assert_intratile_offset_el(&dev, &surf, 0, 1, 0, 0x600000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 1, 1, 0, 0xa00000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa10000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb10000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb90000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbd0000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbf0000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbf0000, 0, 20); - t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbf0000, 0, 28); - t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc10000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc10000, 0, 4); - - /* array layer 2 */ - t_assert_intratile_offset_el(&dev, &surf, 0, 2, 0, 0xc00000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 1, 2, 0, 0x1000000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1010000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1110000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1190000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11d0000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11f0000, 0, 8); - t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11f0000, 0, 24); - t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1210000, 0, 0); - t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1210000, 0, 4); - t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1210000, 0, 8); - /* skip the remaining array layers */ } -- cgit v1.2.3 From 819d0e1a7c06e98cfe82f687e30dcbe6b974a1e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 16:52:56 -0700 Subject: anv/meta2d: Add support for blitting from W-tiled sources on gen7 Reviewed-by: Nanley Chery Reviewed-by: Chad Versace --- src/intel/vulkan/anv_meta_blit2d.c | 253 ++++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index cf2dc66597d..c3bf4152902 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -173,6 +173,10 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, struct blit2d_src_temps { VkImage image; struct anv_image_view iview; + + struct anv_buffer buffer; + struct anv_buffer_view bview; + VkDescriptorPool desc_pool; VkDescriptorSet set; }; @@ -187,56 +191,130 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, - &tmp->image, &tmp->iview); - - anv_CreateDescriptorPool(vk_device, - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { + if (src_type == BLIT2D_SRC_TYPE_NORMAL) { + create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, + &tmp->image, &tmp->iview); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &tmp->desc_pool); - - anv_AllocateDescriptorSets(vk_device, - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = tmp->desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.img_ds_layout - }, &tmp->set); - - anv_UpdateDescriptorSets(vk_device, - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = tmp->set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.img_p_layout, 0, 1, + &tmp->set, 0, NULL); + } else { + assert(src_type == BLIT2D_SRC_TYPE_W_DETILE); + assert(src->tiling == ISL_TILING_W); + assert(src->bs == 1); + + uint32_t tile_offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + ISL_TILING_W, 1, src->pitch, + rect->src_x, rect->src_y, + &tile_offset, + &rect->src_x, &rect->src_y); + + tmp->buffer = (struct anv_buffer) { + .device = device, + .size = align_u32(rect->src_y + rect->height, 64) * src->pitch, + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + .bo = src->bo, + .offset = src->base_offset + tile_offset, + }; + + anv_buffer_view_init(&tmp->bview, device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = anv_buffer_to_handle(&tmp->buffer), + .format = VK_FORMAT_R8_UINT, + .offset = 0, + .range = VK_WHOLE_SIZE, + }, cmd_buffer); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { { - .sampler = NULL, - .imageView = anv_image_view_to_handle(&tmp->iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1 }, } - } - }, 0, NULL); + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { + anv_buffer_view_to_handle(&tmp->bview), + }, + } + }, 0, NULL); - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.img_p_layout, 0, 1, - &tmp->set, 0, NULL); + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.buf_p_layout, 0, 1, + &tmp->set, 0, NULL); + } } static void @@ -246,8 +324,10 @@ blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, { anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), tmp->desc_pool, &cmd_buffer->pool->alloc); - anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), - tmp->image, &cmd_buffer->pool->alloc); + if (src_type == BLIT2D_SRC_TYPE_NORMAL) { + anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), + tmp->image, &cmd_buffer->pool->alloc); + } } void @@ -478,6 +558,80 @@ typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *, struct anv_device *, nir_ssa_def *, nir_ssa_def *); +static nir_ssa_def * +nir_copy_bits(struct nir_builder *b, nir_ssa_def *dst, unsigned dst_offset, + nir_ssa_def *src, unsigned src_offset, unsigned num_bits) +{ + unsigned src_mask = (~1u >> (32 - num_bits)) << src_offset; + nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask)); + + nir_ssa_def *shifted; + if (dst_offset > src_offset) { + shifted = nir_ishl(b, masked, nir_imm_int(b, dst_offset - src_offset)); + } else if (dst_offset < src_offset) { + shifted = nir_ushr(b, masked, nir_imm_int(b, src_offset - dst_offset)); + } else { + assert(dst_offset == src_offset); + shifted = masked; + } + + return nir_ior(b, dst, shifted); +} + +static nir_ssa_def * +build_nir_w_tiled_fetch(struct nir_builder *b, struct anv_device *device, + nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) +{ + nir_ssa_def *x = nir_channel(b, tex_pos, 0); + nir_ssa_def *y = nir_channel(b, tex_pos, 1); + + /* First, compute the block-aligned offset */ + nir_ssa_def *x_major = nir_ushr(b, x, nir_imm_int(b, 6)); + nir_ssa_def *y_major = nir_ushr(b, y, nir_imm_int(b, 6)); + nir_ssa_def *offset = + nir_iadd(b, nir_imul(b, y_major, + nir_imul(b, tex_pitch, nir_imm_int(b, 64))), + nir_imul(b, x_major, nir_imm_int(b, 4096))); + + /* Compute the bottom 12 bits of the offset */ + offset = nir_copy_bits(b, offset, 0, x, 0, 1); + offset = nir_copy_bits(b, offset, 1, y, 0, 1); + offset = nir_copy_bits(b, offset, 2, x, 1, 1); + offset = nir_copy_bits(b, offset, 3, y, 1, 1); + offset = nir_copy_bits(b, offset, 4, x, 2, 1); + offset = nir_copy_bits(b, offset, 5, y, 2, 4); + offset = nir_copy_bits(b, offset, 9, x, 3, 3); + + if (device->isl_dev.has_bit6_swizzling) { + offset = nir_ixor(b, offset, + nir_ushr(b, nir_iand(b, offset, nir_imm_int(b, 0x0200)), + nir_imm_int(b, 3))); + } + + const struct glsl_type *sampler_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; + tex->op = nir_texop_txf; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(offset); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = false; + tex->coord_components = 1; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = NULL; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + static nir_ssa_def * build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) @@ -595,9 +749,10 @@ blit2d_init_pipeline(struct anv_device *device, src_func = build_nir_texel_fetch; break; case BLIT2D_SRC_TYPE_W_DETILE: - /* Not yet supported */ + src_func = build_nir_w_tiled_fetch; + break; default: - return VK_SUCCESS; + unreachable("Invalid blit2d source type"); } struct anv_shader_module fs = { .nir = NULL }; -- cgit v1.2.3 From b8f3909b73a9b893b6cfed752e65516dff384b6c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 20:37:18 -0700 Subject: nir/gather_info: Handle discard_if Reviewed-by: Nanley Chery --- src/compiler/nir/nir_gather_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 046836fc534..bff235bb377 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -28,6 +28,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) { switch (instr->intrinsic) { case nir_intrinsic_discard: + case nir_intrinsic_discard_if: assert(shader->stage == MESA_SHADER_FRAGMENT); shader->info.fs.uses_discard = true; break; -- cgit v1.2.3 From 15a9468d85a01042743cab93593a57aec8ed3f0a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 09:14:57 -0700 Subject: anv/blit2d: Simplify create_iview Now it just creates the image and view. The caller is responsible for handling the offset calculations. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 55 +++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index c3bf4152902..eb0c048b533 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -95,29 +95,20 @@ vk_format_for_size(int bs) static void create_iview(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *surf, - struct anv_meta_blit2d_rect *rect, + uint64_t offset, VkImageUsageFlags usage, + uint32_t width, + uint32_t height, VkImage *img, struct anv_image_view *iview) { - struct isl_tile_info tile_info; - isl_tiling_get_info(&cmd_buffer->device->isl_dev, - surf->tiling, surf->bs, &tile_info); - const unsigned tile_width_px = tile_info.width > surf->bs ? - tile_info.width / surf->bs : 1; - uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? - &rect->src_y : &rect->dst_y; - uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? - &rect->src_x : &rect->dst_x; - - /* Define the shared state among all created image views */ const VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .format = vk_format_for_size(surf->bs), .extent = { - .width = rect->width + (*rect_x) % tile_width_px, - .height = rect->height + (*rect_y) % tile_info.height, + .width = width, + .height = height, .depth = 1, }, .mipLevels = 1, @@ -142,18 +133,6 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(*img)->bo = surf->bo; anv_image_from_handle(*img)->offset = surf->base_offset; - /* Create a VkImageView that starts at the tile aligned offset closest - * to the provided x/y offset into the surface. - */ - struct isl_surf *isl_surf = &anv_image_from_handle(*img)->color_surface.isl; - - uint32_t img_o = 0; - isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, - isl_surf->tiling, surf->bs, - isl_surf->row_pitch, - *rect_x * surf->bs, *rect_y, - &img_o, rect_x, rect_y); - anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -167,7 +146,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, .baseArrayLayer = 0, .layerCount = 1 }, - }, cmd_buffer, img_o, usage); + }, cmd_buffer, offset, usage); } struct blit2d_src_temps { @@ -192,7 +171,14 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); if (src_type == BLIT2D_SRC_TYPE_NORMAL) { - create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, + uint32_t offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + src->tiling, src->bs, src->pitch, + rect->src_x, rect->src_y, + &offset, &rect->src_x, &rect->src_y); + + create_iview(cmd_buffer, src, offset, VK_IMAGE_USAGE_SAMPLED_BIT, + rect->src_x + rect->width, rect->src_y + rect->height, &tmp->image, &tmp->iview); anv_CreateDescriptorPool(vk_device, @@ -369,15 +355,24 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, { struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { struct blit2d_src_temps src_temps; blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + uint32_t offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + dst->tiling, dst->bs, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + VkImage dst_img; struct anv_image_view dst_iview; - create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); + create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + &dst_img, &dst_iview); struct blit_vb_data { float pos[2]; -- cgit v1.2.3 From f9a2570a06949810b10395d81c19f6295d76c530 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 09:27:42 -0700 Subject: anv/blit2d: Add a bind_dst helper function Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 88 ++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index eb0c048b533..49646dd0374 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -316,6 +316,56 @@ blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, } } +struct blit2d_dst_temps { + VkImage image; + struct anv_image_view iview; + VkFramebuffer fb; +}; + +static void +blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *dst, + uint64_t offset, + uint32_t width, + uint32_t height, + struct blit2d_dst_temps *tmp) +{ + create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + width, height, &tmp->image, &tmp->iview); + + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&tmp->iview), + }, + .width = width, + .height = height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &tmp->fb); + + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = width, + .height = height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); +} + +static void +blit2d_unbind_dst(struct anv_cmd_buffer *cmd_buffer, + struct blit2d_dst_temps *tmp) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + anv_DestroyFramebuffer(vk_device, tmp->fb, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, tmp->image, &cmd_buffer->pool->alloc); +} + void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -354,7 +404,6 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_rect *rects) { struct anv_device *device = cmd_buffer->device; - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); for (unsigned r = 0; r < num_rects; ++r) { struct blit2d_src_temps src_temps; @@ -367,12 +416,9 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, &offset, &rects[r].dst_x, &rects[r].dst_y); - VkImage dst_img; - struct anv_image_view dst_iview; - create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - rects[r].dst_x + rects[r].width, - rects[r].dst_y + rects[r].height, - &dst_img, &dst_iview); + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, &dst_temps); struct blit_vb_data { float pos[2]; @@ -441,24 +487,11 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); - VkFramebuffer fb; - anv_CreateFramebuffer(vk_device, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&dst_iview), - }, - .width = dst_iview.extent.width, - .height = dst_iview.extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = device->meta_state.blit2d.render_pass, - .framebuffer = fb, + .framebuffer = dst_temps.fb, .renderArea = { .offset = { rects[r].dst_x, rects[r].dst_y, }, .extent = { rects[r].width, rects[r].height }, @@ -469,16 +502,6 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL); - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dst_iview.extent.width, - .height = dst_iview.extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); @@ -487,8 +510,7 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, * descriptor sets, etc. has been used. We are free to delete it. */ blit2d_unbind_src(cmd_buffer, src_type, &src_temps); - anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); + blit2d_unbind_dst(cmd_buffer, &dst_temps); } } -- cgit v1.2.3 From b377c1d08ed16ca0abeabc86e8d651036eea3145 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 09:32:05 -0700 Subject: anv/image: Remove the offset parameter from image_view_init The only place we were using this was in meta_blit2d which always creates a new image anyway so we can just use the image offset. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_image.c | 5 ++--- src/intel/vulkan/anv_meta_blit.c | 4 ++-- src/intel/vulkan/anv_meta_blit2d.c | 4 ++-- src/intel/vulkan/anv_meta_clear.c | 2 +- src/intel/vulkan/anv_meta_resolve.c | 4 ++-- src/intel/vulkan/anv_private.h | 1 - 6 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 759c8612005..db109625316 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -464,7 +464,6 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset, VkImageUsageFlags usage_mask) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); @@ -495,7 +494,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->image = image; iview->bo = image->bo; - iview->offset = image->offset + surface->offset + offset; + iview->offset = image->offset + surface->offset; iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; iview->vk_format = pCreateInfo->format; @@ -614,7 +613,7 @@ anv_CreateImageView(VkDevice _device, if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_image_view_init(view, device, pCreateInfo, NULL, 0, ~0); + anv_image_view_init(view, device, pCreateInfo, NULL, ~0); *pView = anv_image_view_to_handle(view); diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 218499a8787..b726b94c3cd 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -384,7 +384,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); + cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); const VkOffset3D dest_offset = { .x = pRegions[r].dstOffsets[0].x, @@ -434,7 +434,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, src_image, &src_iview, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 49646dd0374..68ade55d8ea 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -131,7 +131,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, * creating a dummy memory object etc. so there's really no point. */ anv_image_from_handle(*img)->bo = surf->bo; - anv_image_from_handle(*img)->offset = surf->base_offset; + anv_image_from_handle(*img)->offset = surf->base_offset + offset; anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -146,7 +146,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, .baseArrayLayer = 0, .layerCount = 1 }, - }, cmd_buffer, offset, usage); + }, cmd_buffer, usage); } struct blit2d_src_temps { diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index a24e59950be..a03701c684b 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -813,7 +813,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 3e7c7d39ba5..8c1bdc06f84 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -758,7 +758,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); + cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -775,7 +775,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb_h; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a394fe8a683..99d3934ddda 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1667,7 +1667,6 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset, VkImageUsageFlags usage_mask); struct anv_buffer_view { -- cgit v1.2.3 From 2e827816fa10f6b5c9c13c5833e3af5db2621efa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 15:27:30 -0700 Subject: anv/blit2d: Add another passthrough varying to the VS We need the VS to provide some setup data for other stages. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 68ade55d8ea..b6e33c84fdd 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -568,6 +568,15 @@ build_nir_vertex_shader(void) tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; nir_copy_var(&b, tex_pos_out, tex_pos_in); + nir_variable *other_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_other"); + other_in->data.location = VERT_ATTRIB_GENERIC2; + nir_variable *other_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_other"); + other_out->data.location = VARYING_SLOT_VAR1; + other_out->data.interpolation = INTERP_QUALIFIER_FLAT; + nir_copy_var(&b, other_out, other_in); + return b.shader; } -- cgit v1.2.3 From 0a6842c1bd18e1b2b7ef5e969b9f5df891604815 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 18:38:21 -0700 Subject: isl/surface_state: Set the correct pitch for W-tiled surfaces Reviewed-by: Nanley Chery --- src/intel/isl/isl_surface_state.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 6afe45d650e..cac59fab78e 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -261,7 +261,6 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, .Height = info->surf->logical_level0_px.height - 1, .Depth = 0, /* TEMPLATE */ - .SurfacePitch = info->surf->row_pitch - 1, .RenderTargetViewExtent = 0, /* TEMPLATE */ .MinimumArrayElement = 0, /* TEMPLATE */ @@ -295,6 +294,19 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, #endif }; + if (info->surf->tiling == ISL_TILING_W) { + /* From the Broadwell PRM documentation for this field: + * + * "If the surface is a stencil buffer (and thus has Tile Mode set + * to TILEMODE_WMAJOR), the pitch must be set to 2x the value + * computed based on width, as the stencil buffer is stored with + * two rows interleaved." + */ + s.SurfacePitch = info->surf->row_pitch * 2 - 1; + } else { + s.SurfacePitch = info->surf->row_pitch - 1; + } + if (info->view->usage & ISL_SURF_USAGE_STORAGE_BIT) { s.SurfaceFormat = isl_lower_storage_image_format(dev, info->view->format); } else { -- cgit v1.2.3 From e3312644cbc8a74c262e35672547d5cce83fd1bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 20:31:25 -0700 Subject: anv/blit2d: Add support for W-tiled destinations Reviewed-by: Nanley Chery Reviewed-by: Chad Versace --- src/intel/vulkan/anv_meta_blit2d.c | 366 ++++++++++++++++++++++++++++++++----- 1 file changed, 322 insertions(+), 44 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index b6e33c84fdd..6d6127a8693 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -514,6 +514,149 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, } } +static void +anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + struct anv_device *device = cmd_buffer->device; + + for (unsigned r = 0; r < num_rects; ++r) { + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + + assert(dst->bs == 1); + uint32_t offset; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + ISL_TILING_W, 1, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + + /* The original coordinates were in terms of an actual W-tiled offset + * but we are binding this image as Y-tiled. We need to adjust our + * rectangle accordingly. + */ + uint32_t xmin_Y, xmax_Y, ymin_Y, ymax_Y; + xmin_Y = (rects[r].dst_x / 8) * 16; + xmax_Y = DIV_ROUND_UP(rects[r].dst_x + rects[r].width, 8) * 16; + ymin_Y = (rects[r].dst_y / 4) * 2; + ymax_Y = DIV_ROUND_UP(rects[r].dst_y + rects[r].height, 4) * 2; + + struct anv_meta_blit2d_surf dst_Y = { + .bo = dst->bo, + .tiling = ISL_TILING_Y0, + .base_offset = dst->base_offset, + .bs = 1, + .pitch = dst->pitch * 2, + }; + + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, &dst_temps); + + struct blit_vb_header { + struct anv_vue_header vue; + int32_t tex_offset[2]; + uint32_t tex_pitch; + uint32_t bounds[4]; + } *vb_header; + + struct blit_vb_data { + float pos[2]; + } *vb_data; + + unsigned vb_size = sizeof(*vb_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + vb_header = vb_state.map; + + *vb_header = (struct blit_vb_header) { + .tex_offset = { + rects[r].src_x - rects[r].dst_x, + rects[r].src_y - rects[r].dst_y, + }, + .tex_pitch = src->pitch, + .bounds = { + rects[r].dst_x, + rects[r].dst_y, + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + }, + }; + + vb_data = (void *)(vb_header + 1); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + xmax_Y, + ymax_Y, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + xmin_Y, + ymax_Y, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + xmin_Y, + ymin_Y, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + (void *)vb_data - vb_state.map, + }); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = dst_temps.fb, + .renderArea = { + .offset = { xmin_Y, ymin_Y, }, + .extent = { xmax_Y - xmin_Y, ymax_Y - ymin_Y }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_W_TILE); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); + blit2d_unbind_dst(cmd_buffer, &dst_temps); + } +} + void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, @@ -529,8 +672,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, } if (dst->tiling == ISL_TILING_W) { - assert(dst->bs == 1); - anv_finishme("Blitting to w-tiled destinations not yet supported"); + anv_meta_blit2d_w_tiled_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); return; } else if (dst->bs % 3 == 0) { anv_finishme("Blitting to RGB destinations not yet supported"); @@ -688,6 +831,47 @@ build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, return &tex->dest.ssa; } +static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + }, + }, +}; + static nir_shader * build_nir_copy_fragment_shader(struct anv_device *device, texel_fetch_build_func txf_func) @@ -718,6 +902,136 @@ build_nir_copy_fragment_shader(struct anv_device *device, return b.shader; } +static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE + }, + { + .binding = 1, + .stride = 2 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 4, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Offset */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32_UINT, + .offset = 16 + }, + { + /* Destination bounds */ + .location = 3, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 28 + }, + }, +}; + +static nir_shader * +build_nir_w_tiled_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *ivec3 = glsl_vector_type(GLSL_TYPE_INT, 3); + const struct glsl_type *uvec4 = glsl_vector_type(GLSL_TYPE_UINT, 4); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + /* We need gl_FragCoord so we know our Y-tiled position */ + nir_variable *frag_coord_in = nir_variable_create(b.shader, + nir_var_shader_in, + vec4, "gl_FragCoord"); + frag_coord_in->data.location = VARYING_SLOT_POS; + frag_coord_in->data.origin_upper_left = true; + + /* In location 0 we have an ivec3 that has the offset from dest to + * source in the first two components and the stride in the third. + */ + nir_variable *tex_off_in = nir_variable_create(b.shader, nir_var_shader_in, + ivec3, "v_tex_off"); + tex_off_in->data.location = VARYING_SLOT_VAR0; + tex_off_in->data.interpolation = INTERP_QUALIFIER_FLAT; + + /* In location 1 we have a uvec4 that gives us the bounds of the + * destination. We need to discard if we get outside this boundary. + */ + nir_variable *bounds_in = nir_variable_create(b.shader, nir_var_shader_in, + uvec4, "v_bounds"); + bounds_in->data.location = VARYING_SLOT_VAR1; + bounds_in->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + + nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in)); + nir_ssa_def *x_Y = nir_channel(&b, frag_coord_int, 0); + nir_ssa_def *y_Y = nir_channel(&b, frag_coord_int, 1); + + /* Compute the W-tiled position from the Y-tiled position */ + nir_ssa_def *x_W = nir_iand(&b, x_Y, nir_imm_int(&b, 0xffffff80)); + x_W = nir_ushr(&b, x_W, nir_imm_int(&b, 1)); + x_W = nir_copy_bits(&b, x_W, 0, x_Y, 0, 1); + x_W = nir_copy_bits(&b, x_W, 1, x_Y, 2, 1); + x_W = nir_copy_bits(&b, x_W, 2, y_Y, 0, 1); + x_W = nir_copy_bits(&b, x_W, 3, x_Y, 4, 3); + + nir_ssa_def *y_W = nir_iand(&b, y_Y, nir_imm_int(&b, 0xffffffe0)); + y_W = nir_ishl(&b, y_W, nir_imm_int(&b, 1)); + y_W = nir_copy_bits(&b, y_W, 0, x_Y, 1, 1); + y_W = nir_copy_bits(&b, y_W, 1, x_Y, 3, 1); + y_W = nir_copy_bits(&b, y_W, 2, y_Y, 1, 4); + + /* Figure out if we are out-of-bounds and discard */ + nir_ssa_def *bounds = nir_load_var(&b, bounds_in); + nir_ssa_def *oob = + nir_ior(&b, nir_ult(&b, x_W, nir_channel(&b, bounds, 0)), + nir_ior(&b, nir_ult(&b, y_W, nir_channel(&b, bounds, 1)), + nir_ior(&b, nir_uge(&b, x_W, nir_channel(&b, bounds, 2)), + nir_uge(&b, y_W, nir_channel(&b, bounds, 3))))); + + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); + discard->src[0] = nir_src_for_ssa(oob); + nir_builder_instr_insert(&b, &discard->instr); + + unsigned swiz[4] = { 0, 1, 0, 0 }; + nir_ssa_def *tex_off = + nir_swizzle(&b, nir_load_var(&b, tex_off_in), swiz, 2, false); + nir_ssa_def *tex_pos = nir_iadd(&b, nir_vec2(&b, x_W, y_W), tex_off); + nir_ssa_def *tex_pitch = nir_channel(&b, nir_load_var(&b, tex_off_in), 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); + nir_store_var(&b, color_out, color, 0xf); + + return b.shader; +} + void anv_device_finish_meta_blit2d_state(struct anv_device *device) { @@ -781,12 +1095,17 @@ blit2d_init_pipeline(struct anv_device *device, unreachable("Invalid blit2d source type"); } + const VkPipelineVertexInputStateCreateInfo *vi_create_info; struct anv_shader_module fs = { .nir = NULL }; switch (dst_type) { case BLIT2D_DST_TYPE_NORMAL: fs.nir = build_nir_copy_fragment_shader(device, src_func); + vi_create_info = &normal_vi_create_info; break; case BLIT2D_DST_TYPE_W_TILE: + fs.nir = build_nir_w_tiled_fragment_shader(device, src_func); + vi_create_info = &w_tiled_vi_create_info; + break; case BLIT2D_DST_TYPE_RGB: /* Not yet supported */ default: @@ -802,47 +1121,6 @@ blit2d_init_pipeline(struct anv_device *device, .nir = build_nir_vertex_shader(), }; - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE - }, - { - .binding = 1, - .stride = 5 * sizeof(float), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offset = 8 - } - } - }; - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -863,7 +1141,7 @@ blit2d_init_pipeline(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = ARRAY_SIZE(pipeline_shader_stages), .pStages = pipeline_shader_stages, - .pVertexInputState = &vi_create_info, + .pVertexInputState = vi_create_info, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, -- cgit v1.2.3 From c226e72a399199a71579f22e5b088f50b1a10ac1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 17:43:44 -0700 Subject: anv/formats: Advertise blit support for stencil Thanks to advances in the blit code, we can do this now. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_formats.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 4d279a8fb72..750af793bdf 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -381,13 +381,11 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; if (anv_format_is_depth_or_stencil(&anv_formats[format])) { tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - if (physical_device->info->gen >= 8) { + if (physical_device->info->gen >= 8) tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; - } - if (anv_formats[format].has_depth) { - tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; - } + + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; } else { enum isl_format linear_fmt, tiled_fmt; struct anv_format_swizzle linear_swizzle, tiled_swizzle; -- cgit v1.2.3 From d4a28ae52abddd37c2adc6bb1f4e4b2de76a16fa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Apr 2016 17:07:49 -0700 Subject: anv/meta: Make clflushes conditional on !devinfo->has_llc --- src/intel/vulkan/anv_meta_blit.c | 3 ++- src/intel/vulkan/anv_meta_blit2d.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index b726b94c3cd..24e47142a5c 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -186,7 +186,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, }; - anv_state_clflush(vb_state); + if (!device->info.has_llc) + anv_state_clflush(vb_state); struct anv_buffer vertex_buffer = { .device = device, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 6d6127a8693..a7405e00810 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -468,7 +468,8 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, }, }; - anv_state_clflush(vb_state); + if (!device->info.has_llc) + anv_state_clflush(vb_state); struct anv_buffer vertex_buffer = { .device = device, @@ -611,7 +612,8 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, }, }; - anv_state_clflush(vb_state); + if (!device->info.has_llc) + anv_state_clflush(vb_state); struct anv_buffer vertex_buffer = { .device = device, -- cgit v1.2.3 From ddae34261875b682545b523c0ba5bd033af53654 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 9 Apr 2016 16:59:04 -0700 Subject: genxml: Fix up MOCS in RENDER_SURFACE_STATE on gen6 to match gen7 --- src/intel/genxml/gen6.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml index 027ab34b288..ccc4387c6ac 100644 --- a/src/intel/genxml/gen6.xml +++ b/src/intel/genxml/gen6.xml @@ -340,7 +340,8 @@ - + + -- cgit v1.2.3 From aa6f9a4e1ea1beaa5de8582d42deffd45d00b0e5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 9 Apr 2016 17:00:19 -0700 Subject: genxml: Break output detail of 3DSTATE_SF on gen7 into a struct This makes it work like 3DSTATE_SBE[_SWIZ] on gen7+ --- src/intel/genxml/gen6.xml | 65 +++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml index ccc4387c6ac..0d4ca5a2a58 100644 --- a/src/intel/genxml/gen6.xml +++ b/src/intel/genxml/gen6.xml @@ -31,6 +31,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -1212,47 +1233,9 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + -- cgit v1.2.3 From 1275c7c74496ac26286eeb9f2e2b6b735f82cfcb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 9 Apr 2016 17:02:21 -0700 Subject: genxml: Fix the name of a 3DSTATE_SF/SBE field on gen6-7.5 --- src/intel/genxml/gen6.xml | 2 +- src/intel/genxml/gen7.xml | 2 +- src/intel/genxml/gen75.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml index 0d4ca5a2a58..4103f879541 100644 --- a/src/intel/genxml/gen6.xml +++ b/src/intel/genxml/gen6.xml @@ -1237,7 +1237,7 @@ - + diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index 960df5eaf9f..23d1738cc17 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -1549,7 +1549,7 @@ - + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index 26c1f9ecdf6..698d93f12ae 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -1794,7 +1794,7 @@ - + -- cgit v1.2.3 From bff7a8c4f343a67149e6a6854e0597696b3d4b03 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 9 Apr 2016 17:06:59 -0700 Subject: anv/pipeline: Set up flat enables correctly --- src/intel/vulkan/genX_pipeline_util.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index cd138dfae61..654d2e0d43f 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -239,6 +239,7 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) .AttributeSwizzleEnable = true, .PointSpriteTextureCoordinateOrigin = UPPERLEFT, .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, + .ConstantInterpolationEnable = wm_prog_data->flat_inputs, #if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, -- cgit v1.2.3 From 3aa1a5ee8841fa93da5617630601e8110d428e8a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 10 Apr 2016 23:43:34 -0700 Subject: nir/lower_system_values: Simplify the computation of LocalInvocationIndex --- src/compiler/nir/nir_lower_system_values.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index c1cd1398aa1..2d3ccd7d0f9 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -83,7 +83,7 @@ convert_block(nir_block *block, void *void_state) case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: { /* From the GLSL man page for gl_LocalInvocationIndex: * - * ?The value of gl_LocalInvocationIndex is equal to + * "The value of gl_LocalInvocationIndex is equal to * gl_LocalInvocationID.z * gl_WorkGroupSize.x * * gl_WorkGroupSize.y + gl_LocalInvocationID.y * * gl_WorkGroupSize.x + gl_LocalInvocationID.x" @@ -91,15 +91,14 @@ convert_block(nir_block *block, void *void_state) nir_ssa_def *local_id = nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); - unsigned stride_y = b->shader->info.cs.local_size[0]; - unsigned stride_z = b->shader->info.cs.local_size[0] * - b->shader->info.cs.local_size[1]; + nir_ssa_def *size_x = nir_imm_int(b, b->shader->info.cs.local_size[0]); + nir_ssa_def *size_y = nir_imm_int(b, b->shader->info.cs.local_size[1]); - sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2), - nir_imm_int(b, stride_z)), - nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1), - nir_imm_int(b, stride_y)), - nir_channel(b, local_id, 0))); + sysval = nir_imul(b, nir_channel(b, local_id, 2), + nir_imul(b, size_x, size_y)); + sysval = nir_iadd(b, sysval, + nir_imul(b, nir_channel(b, local_id, 1), size_x)); + sysval = nir_iadd(b, sysval, nir_channel(b, local_id, 0)); break; } -- cgit v1.2.3 From 9e351e077befbdc179f7926edd8b5dde02f20494 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Tue, 12 Apr 2016 11:52:53 -0700 Subject: util: Fix race condition on libgcrypt initialization Fixes intermittent Vulkan CTS failures within the test groups: dEQP-VK.api.object_management.multithreaded_per_thread_device dEQP-VK.api.object_management.multithreaded_per_thread_resources dEQP-VK.api.object_management.multithreaded_shared_resources Signed-off-by: Mark Janes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94904 Reviewed-by: Edward O'Callaghan Reviewed-by: Jason Ekstrand --- src/util/mesa-sha1.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/util/mesa-sha1.c b/src/util/mesa-sha1.c index faa1c871b5d..ca6b89bb2c7 100644 --- a/src/util/mesa-sha1.c +++ b/src/util/mesa-sha1.c @@ -175,21 +175,24 @@ _mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20]) #elif defined(HAVE_SHA1_IN_LIBGCRYPT) /* Use libgcrypt for SHA1 */ #include +#include "c11/threads.h" + +static void _mesa_libgcrypt_init(void) +{ + if (!gcry_check_version(NULL)) + return NULL; + gcry_control(GCRYCTL_DISABLE_SECMEM, 0); + gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); +} struct mesa_sha1 * _mesa_sha1_init(void) { - static int init; + static once_flag flag = ONCE_FLAG_INIT; gcry_md_hd_t h; gcry_error_t err; - if (!init) { - if (!gcry_check_version(NULL)) - return NULL; - gcry_control(GCRYCTL_DISABLE_SECMEM, 0); - gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); - init = 1; - } + call_once(&flag, _mesa_libgcrypt_init); err = gcry_md_open(&h, GCRY_MD_SHA1, 0); if (err) -- cgit v1.2.3 From 76b0ba087c50a271867f98eaf2acf0364d5b706e Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 6 Apr 2016 15:57:32 -0700 Subject: anv/clear: Disable the scissor operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the scissor rectangle always matches that of the framebuffer, this operation isn't needed. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_clear.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index a03701c684b..50085894b9c 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -213,6 +213,7 @@ create_pipeline(struct anv_device *device, .color_attachment_count = MAX_RTS, .use_repclear = use_repclear, .disable_viewport = true, + .disable_scissor = true, .disable_vs = true, .use_rectlist = true }, -- cgit v1.2.3 From 9fae6ee02659463259e9d7d90a2edf5261887503 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 29 Mar 2016 13:31:30 -0700 Subject: anv/meta: Don't set the dynamic state for disabled operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CmdSet* functions dirty the CommandBuffer's dynamic state. This causes the new state to be emitted when CmdDraw is called. Since we don't need the state that would be emitted, don't call the CmdSet* functions. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 13 +------------ src/intel/vulkan/anv_meta_blit2d.c | 14 +------------- src/intel/vulkan/anv_meta_clear.c | 29 ----------------------------- src/intel/vulkan/anv_meta_resolve.c | 29 +---------------------------- 4 files changed, 3 insertions(+), 82 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 24e47142a5c..72eb0d1b15a 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -115,8 +115,7 @@ static void meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *saved_state) { - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(saved_state, cmd_buffer, 0); } static void @@ -306,16 +305,6 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout, 0, 1, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index a7405e00810..1bde38785db 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -344,17 +344,6 @@ blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer, .height = height, .layers = 1 }, &cmd_buffer->pool->alloc, &tmp->fb); - - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = width, - .height = height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); } static void @@ -377,8 +366,7 @@ void anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) { - anv_meta_save(save, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(save, cmd_buffer, 0); } static void diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 50085894b9c..7512afeb584 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -44,7 +44,6 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, { anv_meta_save(saved_state, cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR) | (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) | (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)); @@ -397,26 +396,6 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, .offset = state.offset, }; - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, (VkDeviceSize[]) { 0 }); @@ -596,14 +575,6 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, }, }); - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil); diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 8c1bdc06f84..87ebcaad215 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -41,9 +41,7 @@ static void meta_resolve_save(struct anv_meta_saved_state *saved_state, struct anv_cmd_buffer *cmd_buffer) { - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR)); + anv_meta_save(saved_state, cmd_buffer, 0); cmd_buffer->state.dynamic.viewport.count = 0; cmd_buffer->state.dynamic.scissor.count = 0; @@ -481,7 +479,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, struct anv_device *device = cmd_buffer->device; VkDevice device_h = anv_device_to_handle(device); VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image *src_image = src_iview->image; const struct vertex_attrs vertex_data[3] = { @@ -609,30 +606,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /*copyCount*/ 0, /*copies */ NULL); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, - /*firstViewport*/ 0, - /*viewportCount*/ 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, - /*firstScissor*/ 0, - /*scissorCount*/ 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = (VkExtent2D) { fb->width, fb->height }, - }, - }); - VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); -- cgit v1.2.3 From 88d1c19c9dfd6be1a374917f707e3c77089d7013 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 7 Apr 2016 02:47:28 -0700 Subject: anv_cmd_buffer: Don't make the initial state dirty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid excessive state emission. Relevant state for an action command will get set by the user: From Chapter 5. Command Buffers, When a command buffer begins recording, all state in that command buffer is undefined. [...] Whenever the state of a command buffer is undefined, the application must set all relevant state on the command buffer before any state dependent commands such as draws and dispatches are recorded, otherwise the behavior of executing that command buffer is undefined. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index ac8bf5fc619..5693fab7678 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -123,7 +123,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) /* 0 isn't a valid config. This ensures that we always configure L3$. */ cmd_buffer->state.current_l3_config = 0; - state->dirty = ~0; + state->dirty = 0; state->vb_dirty = 0; state->descriptors_dirty = 0; state->push_constants_dirty = 0; -- cgit v1.2.3 From 992bbed98d0eb226c2ad45eafb3cb2ad68f3fed7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 31 Mar 2016 10:04:46 -0700 Subject: gen{7,8}_pipeline: Apply 3DPRIM_RECTLIST restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to 3D Primitives Overview in the Bspec, when the RECTLIST primitive is in use, the CLIP stage should be disabled or set to have a different Clip Mode, and Viewport Mapping must be disabled: Clipping: Must not require clipping or rely on the CLIP unit’s ClipTest logic to determine if clipping is required. Either the CLIP unit should be DISABLED, or the CLIP unit’s Clip Mode should be set to a value other than CLIPMODE_NORMAL. Viewport Mapping must be DISABLED (as is typical with the use of screen-space coordinates). We swap out ::disable_viewport for ::use_rectlist, because we currently always use the RECTLIST primitive when we disable viewport mapping, and we'll likely continue to use this primitive. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/gen7_pipeline.c | 4 ++-- src/intel/vulkan/gen8_pipeline.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 37e4639b287..10397343e5f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -47,7 +47,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .StatisticsEnable = true, .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ViewTransformEnable = !(extra && extra->disable_viewport), + .ViewTransformEnable = !(extra && extra->use_rectlist), .FrontWinding = vk_to_gen_front_face[info->frontFace], /* bool AntiAliasingEnable; */ @@ -225,7 +225,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], .CullMode = vk_to_gen_cullmode[rs_info->cullMode], - .ClipEnable = true, + .ClipEnable = !(extra && extra->use_rectlist), .APIMode = APIMODE_OGL, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), .ClipMode = CLIPMODE_NORMAL, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index b8b29d46b8a..0d71e0719da 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -56,7 +56,7 @@ emit_rs_state(struct anv_pipeline *pipeline, struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), - .ViewportTransformEnable = !(extra && extra->disable_viewport), + .ViewportTransformEnable = !(extra && extra->use_rectlist), .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, .TriangleFanProvokingVertexSelect = 1, @@ -348,7 +348,7 @@ genX(graphics_pipeline_create)( const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), - .ClipEnable = true, + .ClipEnable = !(extra && extra->use_rectlist), .EarlyCullEnable = true, .APIMode = 1, /* D3D */ .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), -- cgit v1.2.3 From cff0f6b027f139cc33c9ecbfd22f9662d75c0cb7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 31 Mar 2016 23:16:12 -0700 Subject: gen{7,8}_pipeline: Always set ViewportXYClipTestEnable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the following reasons, there is no behavioural change with this commit: the ViewportXYClipTest function of the CLIP stage will continue to be enabled outside of Meta (where disable_viewport is always false), and the CLIP stage is turned off within Meta, so this function will continue to be disabled in that case. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/gen7_pipeline.c | 2 +- src/intel/vulkan/gen8_pipeline.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 10397343e5f..5c04fb749cc 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -227,7 +227,7 @@ genX(graphics_pipeline_create)( .CullMode = vk_to_gen_cullmode[rs_info->cullMode], .ClipEnable = !(extra && extra->use_rectlist), .APIMode = APIMODE_OGL, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ViewportXYClipTestEnable = true, .ClipMode = CLIPMODE_NORMAL, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 0d71e0719da..7f26ef5e197 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -351,7 +351,7 @@ genX(graphics_pipeline_create)( .ClipEnable = !(extra && extra->use_rectlist), .EarlyCullEnable = true, .APIMode = 1, /* D3D */ - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ViewportXYClipTestEnable = true, .ClipMode = pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? -- cgit v1.2.3 From 9f72466e9f03e72cc805775e8f6104c212150ba7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 11 Apr 2016 09:57:21 -0700 Subject: anv: Delete anv_graphics_pipeline_create_info::disable_viewport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users of this field. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 1 - src/intel/vulkan/anv_meta_blit2d.c | 1 - src/intel/vulkan/anv_meta_clear.c | 1 - src/intel/vulkan/anv_meta_resolve.c | 1 - src/intel/vulkan/anv_private.h | 1 - 5 files changed, 5 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 72eb0d1b15a..6c3668bdbf7 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -669,7 +669,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 1bde38785db..5c1e30c12ac 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -1190,7 +1190,6 @@ blit2d_init_pipeline(struct anv_device *device, const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 7512afeb584..6dd3e0be759 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -211,7 +211,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = MAX_RTS, .use_repclear = use_repclear, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 87ebcaad215..9efe6f7d986 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -343,7 +343,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 99d3934ddda..d62e5baeaa9 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1488,7 +1488,6 @@ struct anv_graphics_pipeline_create_info { int8_t color_attachment_count; bool use_repclear; - bool disable_viewport; bool disable_scissor; bool disable_vs; bool use_rectlist; -- cgit v1.2.3 From 1949e502bc74f0d65127ceef024b2c8af848f94c Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 13 Apr 2016 11:59:54 -0700 Subject: anv: Replace ::disable_scissor with ::use_rectlists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Meta currently uses screenspace RECTLIST primitives that lie within the framebuffer rectangle. Since this behavior shouldn't change in the future, disable the scissor operation whenever rectlists are used. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 1 - src/intel/vulkan/anv_meta_blit2d.c | 1 - src/intel/vulkan/anv_meta_clear.c | 1 - src/intel/vulkan/anv_meta_resolve.c | 1 - src/intel/vulkan/anv_private.h | 1 - src/intel/vulkan/gen7_pipeline.c | 2 +- src/intel/vulkan/gen8_pipeline.c | 2 +- 7 files changed, 2 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 6c3668bdbf7..3c54ef4bafb 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -669,7 +669,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }; diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 5c1e30c12ac..577eeaea104 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -1190,7 +1190,6 @@ blit2d_init_pipeline(struct anv_device *device, const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }; diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 6dd3e0be759..c5c7d563e91 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -211,7 +211,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = MAX_RTS, .use_repclear = use_repclear, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 9efe6f7d986..f83bb6bbfd3 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -343,7 +343,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = -1, .use_repclear = false, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d62e5baeaa9..ae2e08d2dfb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1488,7 +1488,6 @@ struct anv_graphics_pipeline_create_info { int8_t color_attachment_count; bool use_repclear; - bool disable_scissor; bool disable_vs; bool use_rectlist; }; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5c04fb749cc..d6d5ce6778f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -54,7 +54,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .CullMode = vk_to_gen_cullmode[info->cullMode], /* uint32_t LineEndCapAntialiasingRegionWidth; */ - .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ScissorRectangleEnable = !(extra && extra->use_rectlist), /* uint32_t MultisampleRasterizationMode; */ /* bool LastPixelEnable; */ diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 7f26ef5e197..6f6868ea5ea 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -82,7 +82,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .CullMode = vk_to_gen_cullmode[info->cullMode], .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ScissorRectangleEnable = !(extra && extra->use_rectlist), #if GEN_GEN == 8 .ViewportZClipTestEnable = true, #else -- cgit v1.2.3 From 79fbec30fc16399ede9385ef52cb62cefbb388f4 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 30 Mar 2016 17:13:01 -0700 Subject: anv: Remove default scissor and viewport concepts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users should never provide a scissor or viewport count of 0 because they are required to set such state in a graphics pipeline. This behavior was previously only used in Meta, which actually just disables those hardware operations at pipeline creation time. Kristian noticed that the current assignment of viewport count reduces the number of viewport uploads, so it is not removed. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_clear.c | 2 +- src/intel/vulkan/anv_meta_resolve.c | 3 --- src/intel/vulkan/gen7_cmd_buffer.c | 26 ++++---------------------- src/intel/vulkan/gen8_cmd_buffer.c | 27 ++++----------------------- 4 files changed, 9 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index c5c7d563e91..eb4e56984c3 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -47,8 +47,8 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) | (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)); + /* Avoid uploading more viewport states than necessary */ cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; } static void diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index f83bb6bbfd3..7d2a75bb752 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -42,9 +42,6 @@ meta_resolve_save(struct anv_meta_saved_state *saved_state, struct anv_cmd_buffer *cmd_buffer) { anv_meta_save(saved_state, cmd_buffer, 0); - - cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; } static void diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index b5d21efb203..5130a40d277 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -133,10 +133,11 @@ clamp_int64(int64_t x, int64_t min, int64_t max) } #if GEN_GEN == 7 && !GEN_IS_HASWELL -static void -emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkRect2D *scissors) +void +gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) { + uint32_t count = cmd_buffer->state.dynamic.scissor.count; + const VkRect2D *scissors = cmd_buffer->state.dynamic.scissor.scissors; struct anv_state scissor_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); @@ -178,25 +179,6 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, if (!cmd_buffer->device->info.has_llc) anv_state_clflush(scissor_state); } - -void -gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.scissor.count > 0) { - emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, - cmd_buffer->state.dynamic.scissor.scissors); - } else { - /* Emit a default scissor based on the currently bound framebuffer */ - emit_scissor_state(cmd_buffer, 1, - &(VkRect2D) { - .offset = { .x = 0, .y = 0, }, - .extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - }, - }); - } -} #endif static const uint32_t vk_to_gen_index_type[] = { diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 5b6afb3d70d..3956a58d201 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -33,10 +33,11 @@ #include "genxml/genX_pack.h" #if GEN_GEN == 8 -static void -emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkViewport *viewports) +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) { + uint32_t count = cmd_buffer->state.dynamic.viewport.count; + const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; struct anv_state sf_clip_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); struct anv_state cc_state = @@ -86,26 +87,6 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), .SFClipViewportPointer = sf_clip_state.offset); } - -void -gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.viewport.count > 0) { - emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, - cmd_buffer->state.dynamic.viewport.viewports); - } else { - /* If viewport count is 0, this is taken to mean "use the default" */ - emit_viewport_state(cmd_buffer, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - } -} #endif #define emit_lri(batch, reg, imm) \ -- cgit v1.2.3 From bfa3a38280d27fe373cb78d666e926265ef80854 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 10:24:33 -0700 Subject: nir: Remove some pointless delta between vulkan and master --- src/compiler/nir/glsl_to_nir.cpp | 1 - src/compiler/nir/nir.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'src') diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp index d4c58a9ba2e..fafa8bbe013 100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -391,7 +391,6 @@ nir_visitor::visit(ir_variable *ir) } var->data.index = ir->data.index; - var->data.descriptor_set = 0; var->data.binding = ir->data.binding; var->data.offset = ir->data.offset; var->data.image.read_only = ir->data.image_read_only; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c3a33431239..fede1954cf0 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2217,8 +2217,6 @@ void nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint); void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); -void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); - void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); -- cgit v1.2.3 From c34be07230ef98d5021f0bdc88c3b0bc804ee2dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 10:28:45 -0700 Subject: spirv: Move to compiler/ While it does rely on NIR, it's not really part of the NIR core. At the moment, it still builds as part of libnir but that can be changed later if desired. --- src/compiler/Makefile.sources | 14 +- src/compiler/nir/spirv/GLSL.std.450.h | 127 -- src/compiler/nir/spirv/nir_spirv.h | 54 - src/compiler/nir/spirv/spirv.h | 870 ---------- src/compiler/nir/spirv/spirv_to_nir.c | 2710 -------------------------------- src/compiler/nir/spirv/vtn_alu.c | 464 ------ src/compiler/nir/spirv/vtn_cfg.c | 778 --------- src/compiler/nir/spirv/vtn_glsl450.c | 666 -------- src/compiler/nir/spirv/vtn_private.h | 484 ------ src/compiler/nir/spirv/vtn_variables.c | 1415 ----------------- src/compiler/spirv/GLSL.std.450.h | 127 ++ src/compiler/spirv/nir_spirv.h | 54 + src/compiler/spirv/spirv.h | 870 ++++++++++ src/compiler/spirv/spirv_to_nir.c | 2710 ++++++++++++++++++++++++++++++++ src/compiler/spirv/vtn_alu.c | 464 ++++++ src/compiler/spirv/vtn_cfg.c | 778 +++++++++ src/compiler/spirv/vtn_glsl450.c | 666 ++++++++ src/compiler/spirv/vtn_private.h | 484 ++++++ src/compiler/spirv/vtn_variables.c | 1415 +++++++++++++++++ src/intel/vulkan/anv_pipeline.c | 2 +- 20 files changed, 7576 insertions(+), 7576 deletions(-) delete mode 100644 src/compiler/nir/spirv/GLSL.std.450.h delete mode 100644 src/compiler/nir/spirv/nir_spirv.h delete mode 100644 src/compiler/nir/spirv/spirv.h delete mode 100644 src/compiler/nir/spirv/spirv_to_nir.c delete mode 100644 src/compiler/nir/spirv/vtn_alu.c delete mode 100644 src/compiler/nir/spirv/vtn_cfg.c delete mode 100644 src/compiler/nir/spirv/vtn_glsl450.c delete mode 100644 src/compiler/nir/spirv/vtn_private.h delete mode 100644 src/compiler/nir/spirv/vtn_variables.c create mode 100644 src/compiler/spirv/GLSL.std.450.h create mode 100644 src/compiler/spirv/nir_spirv.h create mode 100644 src/compiler/spirv/spirv.h create mode 100644 src/compiler/spirv/spirv_to_nir.c create mode 100644 src/compiler/spirv/vtn_alu.c create mode 100644 src/compiler/spirv/vtn_cfg.c create mode 100644 src/compiler/spirv/vtn_glsl450.c create mode 100644 src/compiler/spirv/vtn_private.h create mode 100644 src/compiler/spirv/vtn_variables.c (limited to 'src') diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index adc7a428469..19735339bca 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -235,10 +235,10 @@ NIR_FILES = \ nir/nir_worklist.h SPIRV_FILES = \ - nir/spirv/nir_spirv.h \ - nir/spirv/spirv_to_nir.c \ - nir/spirv/vtn_alu.c \ - nir/spirv/vtn_cfg.c \ - nir/spirv/vtn_glsl450.c \ - nir/spirv/vtn_private.h \ - nir/spirv/vtn_variables.c + spirv/nir_spirv.h \ + spirv/spirv_to_nir.c \ + spirv/vtn_alu.c \ + spirv/vtn_cfg.c \ + spirv/vtn_glsl450.c \ + spirv/vtn_private.h \ + spirv/vtn_variables.c diff --git a/src/compiler/nir/spirv/GLSL.std.450.h b/src/compiler/nir/spirv/GLSL.std.450.h deleted file mode 100644 index d1c9b5c1d44..00000000000 --- a/src/compiler/nir/spirv/GLSL.std.450.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -** Copyright (c) 2014-2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -const int GLSLstd450Version = 99; -const int GLSLstd450Revision = 3; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/src/compiler/nir/spirv/nir_spirv.h b/src/compiler/nir/spirv/nir_spirv.h deleted file mode 100644 index 500f2cb94df..00000000000 --- a/src/compiler/nir/spirv/nir_spirv.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#pragma once - -#ifndef _NIR_SPIRV_H_ -#define _NIR_SPIRV_H_ - -#include "nir/nir.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct nir_spirv_specialization { - uint32_t id; - uint32_t data; -}; - -nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, - struct nir_spirv_specialization *specializations, - unsigned num_specializations, - gl_shader_stage stage, const char *entry_point_name, - const nir_shader_compiler_options *options); - -#ifdef __cplusplus -} -#endif - -#endif /* _NIR_SPIRV_H_ */ diff --git a/src/compiler/nir/spirv/spirv.h b/src/compiler/nir/spirv/spirv.h deleted file mode 100644 index 63bcb2f88dd..00000000000 --- a/src/compiler/nir/spirv/spirv.h +++ /dev/null @@ -1,870 +0,0 @@ -/* -** Copyright (c) 2014-2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -/* -** This header is automatically generated by the same tool that creates -** the Binary Section of the SPIR-V specification. -*/ - -/* -** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python -** -** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** -** Some tokens act like mask values, which can be OR'd together, -** while others are mutually exclusive. The mask-like ones have -** "Mask" in their name, and a parallel enum that has the shift -** amount (1 << x) for each corresponding enumerant. -*/ - -#ifndef spirv_H -#define spirv_H - -typedef unsigned int SpvId; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 2 - -static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010000; -static const unsigned int SpvRevision = 2; -static const unsigned int SpvOpCodeMask = 0xffff; -static const unsigned int SpvWordCountShift = 16; - -typedef enum SpvSourceLanguage_ { - SpvSourceLanguageUnknown = 0, - SpvSourceLanguageESSL = 1, - SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL_C = 3, - SpvSourceLanguageOpenCL_CPP = 4, -} SpvSourceLanguage; - -typedef enum SpvExecutionModel_ { - SpvExecutionModelVertex = 0, - SpvExecutionModelTessellationControl = 1, - SpvExecutionModelTessellationEvaluation = 2, - SpvExecutionModelGeometry = 3, - SpvExecutionModelFragment = 4, - SpvExecutionModelGLCompute = 5, - SpvExecutionModelKernel = 6, -} SpvExecutionModel; - -typedef enum SpvAddressingModel_ { - SpvAddressingModelLogical = 0, - SpvAddressingModelPhysical32 = 1, - SpvAddressingModelPhysical64 = 2, -} SpvAddressingModel; - -typedef enum SpvMemoryModel_ { - SpvMemoryModelSimple = 0, - SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL = 2, -} SpvMemoryModel; - -typedef enum SpvExecutionMode_ { - SpvExecutionModeInvocations = 0, - SpvExecutionModeSpacingEqual = 1, - SpvExecutionModeSpacingFractionalEven = 2, - SpvExecutionModeSpacingFractionalOdd = 3, - SpvExecutionModeVertexOrderCw = 4, - SpvExecutionModeVertexOrderCcw = 5, - SpvExecutionModePixelCenterInteger = 6, - SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeOriginLowerLeft = 8, - SpvExecutionModeEarlyFragmentTests = 9, - SpvExecutionModePointMode = 10, - SpvExecutionModeXfb = 11, - SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthGreater = 14, - SpvExecutionModeDepthLess = 15, - SpvExecutionModeDepthUnchanged = 16, - SpvExecutionModeLocalSize = 17, - SpvExecutionModeLocalSizeHint = 18, - SpvExecutionModeInputPoints = 19, - SpvExecutionModeInputLines = 20, - SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeTriangles = 22, - SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeQuads = 24, - SpvExecutionModeIsolines = 25, - SpvExecutionModeOutputVertices = 26, - SpvExecutionModeOutputPoints = 27, - SpvExecutionModeOutputLineStrip = 28, - SpvExecutionModeOutputTriangleStrip = 29, - SpvExecutionModeVecTypeHint = 30, - SpvExecutionModeContractionOff = 31, -} SpvExecutionMode; - -typedef enum SpvStorageClass_ { - SpvStorageClassUniformConstant = 0, - SpvStorageClassInput = 1, - SpvStorageClassUniform = 2, - SpvStorageClassOutput = 3, - SpvStorageClassWorkgroup = 4, - SpvStorageClassCrossWorkgroup = 5, - SpvStorageClassPrivate = 6, - SpvStorageClassFunction = 7, - SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, - SpvStorageClassAtomicCounter = 10, - SpvStorageClassImage = 11, -} SpvStorageClass; - -typedef enum SpvDim_ { - SpvDim1D = 0, - SpvDim2D = 1, - SpvDim3D = 2, - SpvDimCube = 3, - SpvDimRect = 4, - SpvDimBuffer = 5, - SpvDimSubpassData = 6, -} SpvDim; - -typedef enum SpvSamplerAddressingMode_ { - SpvSamplerAddressingModeNone = 0, - SpvSamplerAddressingModeClampToEdge = 1, - SpvSamplerAddressingModeClamp = 2, - SpvSamplerAddressingModeRepeat = 3, - SpvSamplerAddressingModeRepeatMirrored = 4, -} SpvSamplerAddressingMode; - -typedef enum SpvSamplerFilterMode_ { - SpvSamplerFilterModeNearest = 0, - SpvSamplerFilterModeLinear = 1, -} SpvSamplerFilterMode; - -typedef enum SpvImageFormat_ { - SpvImageFormatUnknown = 0, - SpvImageFormatRgba32f = 1, - SpvImageFormatRgba16f = 2, - SpvImageFormatR32f = 3, - SpvImageFormatRgba8 = 4, - SpvImageFormatRgba8Snorm = 5, - SpvImageFormatRg32f = 6, - SpvImageFormatRg16f = 7, - SpvImageFormatR11fG11fB10f = 8, - SpvImageFormatR16f = 9, - SpvImageFormatRgba16 = 10, - SpvImageFormatRgb10A2 = 11, - SpvImageFormatRg16 = 12, - SpvImageFormatRg8 = 13, - SpvImageFormatR16 = 14, - SpvImageFormatR8 = 15, - SpvImageFormatRgba16Snorm = 16, - SpvImageFormatRg16Snorm = 17, - SpvImageFormatRg8Snorm = 18, - SpvImageFormatR16Snorm = 19, - SpvImageFormatR8Snorm = 20, - SpvImageFormatRgba32i = 21, - SpvImageFormatRgba16i = 22, - SpvImageFormatRgba8i = 23, - SpvImageFormatR32i = 24, - SpvImageFormatRg32i = 25, - SpvImageFormatRg16i = 26, - SpvImageFormatRg8i = 27, - SpvImageFormatR16i = 28, - SpvImageFormatR8i = 29, - SpvImageFormatRgba32ui = 30, - SpvImageFormatRgba16ui = 31, - SpvImageFormatRgba8ui = 32, - SpvImageFormatR32ui = 33, - SpvImageFormatRgb10a2ui = 34, - SpvImageFormatRg32ui = 35, - SpvImageFormatRg16ui = 36, - SpvImageFormatRg8ui = 37, - SpvImageFormatR16ui = 38, - SpvImageFormatR8ui = 39, -} SpvImageFormat; - -typedef enum SpvImageChannelOrder_ { - SpvImageChannelOrderR = 0, - SpvImageChannelOrderA = 1, - SpvImageChannelOrderRG = 2, - SpvImageChannelOrderRA = 3, - SpvImageChannelOrderRGB = 4, - SpvImageChannelOrderRGBA = 5, - SpvImageChannelOrderBGRA = 6, - SpvImageChannelOrderARGB = 7, - SpvImageChannelOrderIntensity = 8, - SpvImageChannelOrderLuminance = 9, - SpvImageChannelOrderRx = 10, - SpvImageChannelOrderRGx = 11, - SpvImageChannelOrderRGBx = 12, - SpvImageChannelOrderDepth = 13, - SpvImageChannelOrderDepthStencil = 14, - SpvImageChannelOrdersRGB = 15, - SpvImageChannelOrdersRGBx = 16, - SpvImageChannelOrdersRGBA = 17, - SpvImageChannelOrdersBGRA = 18, -} SpvImageChannelOrder; - -typedef enum SpvImageChannelDataType_ { - SpvImageChannelDataTypeSnormInt8 = 0, - SpvImageChannelDataTypeSnormInt16 = 1, - SpvImageChannelDataTypeUnormInt8 = 2, - SpvImageChannelDataTypeUnormInt16 = 3, - SpvImageChannelDataTypeUnormShort565 = 4, - SpvImageChannelDataTypeUnormShort555 = 5, - SpvImageChannelDataTypeUnormInt101010 = 6, - SpvImageChannelDataTypeSignedInt8 = 7, - SpvImageChannelDataTypeSignedInt16 = 8, - SpvImageChannelDataTypeSignedInt32 = 9, - SpvImageChannelDataTypeUnsignedInt8 = 10, - SpvImageChannelDataTypeUnsignedInt16 = 11, - SpvImageChannelDataTypeUnsignedInt32 = 12, - SpvImageChannelDataTypeHalfFloat = 13, - SpvImageChannelDataTypeFloat = 14, - SpvImageChannelDataTypeUnormInt24 = 15, - SpvImageChannelDataTypeUnormInt101010_2 = 16, -} SpvImageChannelDataType; - -typedef enum SpvImageOperandsShift_ { - SpvImageOperandsBiasShift = 0, - SpvImageOperandsLodShift = 1, - SpvImageOperandsGradShift = 2, - SpvImageOperandsConstOffsetShift = 3, - SpvImageOperandsOffsetShift = 4, - SpvImageOperandsConstOffsetsShift = 5, - SpvImageOperandsSampleShift = 6, - SpvImageOperandsMinLodShift = 7, -} SpvImageOperandsShift; - -typedef enum SpvImageOperandsMask_ { - SpvImageOperandsMaskNone = 0, - SpvImageOperandsBiasMask = 0x00000001, - SpvImageOperandsLodMask = 0x00000002, - SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsConstOffsetMask = 0x00000008, - SpvImageOperandsOffsetMask = 0x00000010, - SpvImageOperandsConstOffsetsMask = 0x00000020, - SpvImageOperandsSampleMask = 0x00000040, - SpvImageOperandsMinLodMask = 0x00000080, -} SpvImageOperandsMask; - -typedef enum SpvFPFastMathModeShift_ { - SpvFPFastMathModeNotNaNShift = 0, - SpvFPFastMathModeNotInfShift = 1, - SpvFPFastMathModeNSZShift = 2, - SpvFPFastMathModeAllowRecipShift = 3, - SpvFPFastMathModeFastShift = 4, -} SpvFPFastMathModeShift; - -typedef enum SpvFPFastMathModeMask_ { - SpvFPFastMathModeMaskNone = 0, - SpvFPFastMathModeNotNaNMask = 0x00000001, - SpvFPFastMathModeNotInfMask = 0x00000002, - SpvFPFastMathModeNSZMask = 0x00000004, - SpvFPFastMathModeAllowRecipMask = 0x00000008, - SpvFPFastMathModeFastMask = 0x00000010, -} SpvFPFastMathModeMask; - -typedef enum SpvFPRoundingMode_ { - SpvFPRoundingModeRTE = 0, - SpvFPRoundingModeRTZ = 1, - SpvFPRoundingModeRTP = 2, - SpvFPRoundingModeRTN = 3, -} SpvFPRoundingMode; - -typedef enum SpvLinkageType_ { - SpvLinkageTypeExport = 0, - SpvLinkageTypeImport = 1, -} SpvLinkageType; - -typedef enum SpvAccessQualifier_ { - SpvAccessQualifierReadOnly = 0, - SpvAccessQualifierWriteOnly = 1, - SpvAccessQualifierReadWrite = 2, -} SpvAccessQualifier; - -typedef enum SpvFunctionParameterAttribute_ { - SpvFunctionParameterAttributeZext = 0, - SpvFunctionParameterAttributeSext = 1, - SpvFunctionParameterAttributeByVal = 2, - SpvFunctionParameterAttributeSret = 3, - SpvFunctionParameterAttributeNoAlias = 4, - SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeNoWrite = 6, - SpvFunctionParameterAttributeNoReadWrite = 7, -} SpvFunctionParameterAttribute; - -typedef enum SpvDecoration_ { - SpvDecorationRelaxedPrecision = 0, - SpvDecorationSpecId = 1, - SpvDecorationBlock = 2, - SpvDecorationBufferBlock = 3, - SpvDecorationRowMajor = 4, - SpvDecorationColMajor = 5, - SpvDecorationArrayStride = 6, - SpvDecorationMatrixStride = 7, - SpvDecorationGLSLShared = 8, - SpvDecorationGLSLPacked = 9, - SpvDecorationCPacked = 10, - SpvDecorationBuiltIn = 11, - SpvDecorationNoPerspective = 13, - SpvDecorationFlat = 14, - SpvDecorationPatch = 15, - SpvDecorationCentroid = 16, - SpvDecorationSample = 17, - SpvDecorationInvariant = 18, - SpvDecorationRestrict = 19, - SpvDecorationAliased = 20, - SpvDecorationVolatile = 21, - SpvDecorationConstant = 22, - SpvDecorationCoherent = 23, - SpvDecorationNonWritable = 24, - SpvDecorationNonReadable = 25, - SpvDecorationUniform = 26, - SpvDecorationSaturatedConversion = 28, - SpvDecorationStream = 29, - SpvDecorationLocation = 30, - SpvDecorationComponent = 31, - SpvDecorationIndex = 32, - SpvDecorationBinding = 33, - SpvDecorationDescriptorSet = 34, - SpvDecorationOffset = 35, - SpvDecorationXfbBuffer = 36, - SpvDecorationXfbStride = 37, - SpvDecorationFuncParamAttr = 38, - SpvDecorationFPRoundingMode = 39, - SpvDecorationFPFastMathMode = 40, - SpvDecorationLinkageAttributes = 41, - SpvDecorationNoContraction = 42, - SpvDecorationInputAttachmentIndex = 43, - SpvDecorationAlignment = 44, -} SpvDecoration; - -typedef enum SpvBuiltIn_ { - SpvBuiltInPosition = 0, - SpvBuiltInPointSize = 1, - SpvBuiltInClipDistance = 3, - SpvBuiltInCullDistance = 4, - SpvBuiltInVertexId = 5, - SpvBuiltInInstanceId = 6, - SpvBuiltInPrimitiveId = 7, - SpvBuiltInInvocationId = 8, - SpvBuiltInLayer = 9, - SpvBuiltInViewportIndex = 10, - SpvBuiltInTessLevelOuter = 11, - SpvBuiltInTessLevelInner = 12, - SpvBuiltInTessCoord = 13, - SpvBuiltInPatchVertices = 14, - SpvBuiltInFragCoord = 15, - SpvBuiltInPointCoord = 16, - SpvBuiltInFrontFacing = 17, - SpvBuiltInSampleId = 18, - SpvBuiltInSamplePosition = 19, - SpvBuiltInSampleMask = 20, - SpvBuiltInFragDepth = 22, - SpvBuiltInHelperInvocation = 23, - SpvBuiltInNumWorkgroups = 24, - SpvBuiltInWorkgroupSize = 25, - SpvBuiltInWorkgroupId = 26, - SpvBuiltInLocalInvocationId = 27, - SpvBuiltInGlobalInvocationId = 28, - SpvBuiltInLocalInvocationIndex = 29, - SpvBuiltInWorkDim = 30, - SpvBuiltInGlobalSize = 31, - SpvBuiltInEnqueuedWorkgroupSize = 32, - SpvBuiltInGlobalOffset = 33, - SpvBuiltInGlobalLinearId = 34, - SpvBuiltInSubgroupSize = 36, - SpvBuiltInSubgroupMaxSize = 37, - SpvBuiltInNumSubgroups = 38, - SpvBuiltInNumEnqueuedSubgroups = 39, - SpvBuiltInSubgroupId = 40, - SpvBuiltInSubgroupLocalInvocationId = 41, - SpvBuiltInVertexIndex = 42, - SpvBuiltInInstanceIndex = 43, -} SpvBuiltIn; - -typedef enum SpvSelectionControlShift_ { - SpvSelectionControlFlattenShift = 0, - SpvSelectionControlDontFlattenShift = 1, -} SpvSelectionControlShift; - -typedef enum SpvSelectionControlMask_ { - SpvSelectionControlMaskNone = 0, - SpvSelectionControlFlattenMask = 0x00000001, - SpvSelectionControlDontFlattenMask = 0x00000002, -} SpvSelectionControlMask; - -typedef enum SpvLoopControlShift_ { - SpvLoopControlUnrollShift = 0, - SpvLoopControlDontUnrollShift = 1, -} SpvLoopControlShift; - -typedef enum SpvLoopControlMask_ { - SpvLoopControlMaskNone = 0, - SpvLoopControlUnrollMask = 0x00000001, - SpvLoopControlDontUnrollMask = 0x00000002, -} SpvLoopControlMask; - -typedef enum SpvFunctionControlShift_ { - SpvFunctionControlInlineShift = 0, - SpvFunctionControlDontInlineShift = 1, - SpvFunctionControlPureShift = 2, - SpvFunctionControlConstShift = 3, -} SpvFunctionControlShift; - -typedef enum SpvFunctionControlMask_ { - SpvFunctionControlMaskNone = 0, - SpvFunctionControlInlineMask = 0x00000001, - SpvFunctionControlDontInlineMask = 0x00000002, - SpvFunctionControlPureMask = 0x00000004, - SpvFunctionControlConstMask = 0x00000008, -} SpvFunctionControlMask; - -typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsAcquireShift = 1, - SpvMemorySemanticsReleaseShift = 2, - SpvMemorySemanticsAcquireReleaseShift = 3, - SpvMemorySemanticsSequentiallyConsistentShift = 4, - SpvMemorySemanticsUniformMemoryShift = 6, - SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupMemoryShift = 8, - SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, - SpvMemorySemanticsAtomicCounterMemoryShift = 10, - SpvMemorySemanticsImageMemoryShift = 11, -} SpvMemorySemanticsShift; - -typedef enum SpvMemorySemanticsMask_ { - SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsAcquireMask = 0x00000002, - SpvMemorySemanticsReleaseMask = 0x00000004, - SpvMemorySemanticsAcquireReleaseMask = 0x00000008, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, - SpvMemorySemanticsUniformMemoryMask = 0x00000040, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, - SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, - SpvMemorySemanticsImageMemoryMask = 0x00000800, -} SpvMemorySemanticsMask; - -typedef enum SpvMemoryAccessShift_ { - SpvMemoryAccessVolatileShift = 0, - SpvMemoryAccessAlignedShift = 1, - SpvMemoryAccessNontemporalShift = 2, -} SpvMemoryAccessShift; - -typedef enum SpvMemoryAccessMask_ { - SpvMemoryAccessMaskNone = 0, - SpvMemoryAccessVolatileMask = 0x00000001, - SpvMemoryAccessAlignedMask = 0x00000002, - SpvMemoryAccessNontemporalMask = 0x00000004, -} SpvMemoryAccessMask; - -typedef enum SpvScope_ { - SpvScopeCrossDevice = 0, - SpvScopeDevice = 1, - SpvScopeWorkgroup = 2, - SpvScopeSubgroup = 3, - SpvScopeInvocation = 4, -} SpvScope; - -typedef enum SpvGroupOperation_ { - SpvGroupOperationReduce = 0, - SpvGroupOperationInclusiveScan = 1, - SpvGroupOperationExclusiveScan = 2, -} SpvGroupOperation; - -typedef enum SpvKernelEnqueueFlags_ { - SpvKernelEnqueueFlagsNoWait = 0, - SpvKernelEnqueueFlagsWaitKernel = 1, - SpvKernelEnqueueFlagsWaitWorkGroup = 2, -} SpvKernelEnqueueFlags; - -typedef enum SpvKernelProfilingInfoShift_ { - SpvKernelProfilingInfoCmdExecTimeShift = 0, -} SpvKernelProfilingInfoShift; - -typedef enum SpvKernelProfilingInfoMask_ { - SpvKernelProfilingInfoMaskNone = 0, - SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, -} SpvKernelProfilingInfoMask; - -typedef enum SpvCapability_ { - SpvCapabilityMatrix = 0, - SpvCapabilityShader = 1, - SpvCapabilityGeometry = 2, - SpvCapabilityTessellation = 3, - SpvCapabilityAddresses = 4, - SpvCapabilityLinkage = 5, - SpvCapabilityKernel = 6, - SpvCapabilityVector16 = 7, - SpvCapabilityFloat16Buffer = 8, - SpvCapabilityFloat16 = 9, - SpvCapabilityFloat64 = 10, - SpvCapabilityInt64 = 11, - SpvCapabilityInt64Atomics = 12, - SpvCapabilityImageBasic = 13, - SpvCapabilityImageReadWrite = 14, - SpvCapabilityImageMipmap = 15, - SpvCapabilityPipes = 17, - SpvCapabilityGroups = 18, - SpvCapabilityDeviceEnqueue = 19, - SpvCapabilityLiteralSampler = 20, - SpvCapabilityAtomicStorage = 21, - SpvCapabilityInt16 = 22, - SpvCapabilityTessellationPointSize = 23, - SpvCapabilityGeometryPointSize = 24, - SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageMultisample = 27, - SpvCapabilityUniformBufferArrayDynamicIndexing = 28, - SpvCapabilitySampledImageArrayDynamicIndexing = 29, - SpvCapabilityStorageBufferArrayDynamicIndexing = 30, - SpvCapabilityStorageImageArrayDynamicIndexing = 31, - SpvCapabilityClipDistance = 32, - SpvCapabilityCullDistance = 33, - SpvCapabilityImageCubeArray = 34, - SpvCapabilitySampleRateShading = 35, - SpvCapabilityImageRect = 36, - SpvCapabilitySampledRect = 37, - SpvCapabilityGenericPointer = 38, - SpvCapabilityInt8 = 39, - SpvCapabilityInputAttachment = 40, - SpvCapabilitySparseResidency = 41, - SpvCapabilityMinLod = 42, - SpvCapabilitySampled1D = 43, - SpvCapabilityImage1D = 44, - SpvCapabilitySampledCubeArray = 45, - SpvCapabilitySampledBuffer = 46, - SpvCapabilityImageBuffer = 47, - SpvCapabilityImageMSArray = 48, - SpvCapabilityStorageImageExtendedFormats = 49, - SpvCapabilityImageQuery = 50, - SpvCapabilityDerivativeControl = 51, - SpvCapabilityInterpolationFunction = 52, - SpvCapabilityTransformFeedback = 53, - SpvCapabilityGeometryStreams = 54, - SpvCapabilityStorageImageReadWithoutFormat = 55, - SpvCapabilityStorageImageWriteWithoutFormat = 56, - SpvCapabilityMultiViewport = 57, -} SpvCapability; - -typedef enum SpvOp_ { - SpvOpNop = 0, - SpvOpUndef = 1, - SpvOpSourceContinued = 2, - SpvOpSource = 3, - SpvOpSourceExtension = 4, - SpvOpName = 5, - SpvOpMemberName = 6, - SpvOpString = 7, - SpvOpLine = 8, - SpvOpExtension = 10, - SpvOpExtInstImport = 11, - SpvOpExtInst = 12, - SpvOpMemoryModel = 14, - SpvOpEntryPoint = 15, - SpvOpExecutionMode = 16, - SpvOpCapability = 17, - SpvOpTypeVoid = 19, - SpvOpTypeBool = 20, - SpvOpTypeInt = 21, - SpvOpTypeFloat = 22, - SpvOpTypeVector = 23, - SpvOpTypeMatrix = 24, - SpvOpTypeImage = 25, - SpvOpTypeSampler = 26, - SpvOpTypeSampledImage = 27, - SpvOpTypeArray = 28, - SpvOpTypeRuntimeArray = 29, - SpvOpTypeStruct = 30, - SpvOpTypeOpaque = 31, - SpvOpTypePointer = 32, - SpvOpTypeFunction = 33, - SpvOpTypeEvent = 34, - SpvOpTypeDeviceEvent = 35, - SpvOpTypeReserveId = 36, - SpvOpTypeQueue = 37, - SpvOpTypePipe = 38, - SpvOpTypeForwardPointer = 39, - SpvOpConstantTrue = 41, - SpvOpConstantFalse = 42, - SpvOpConstant = 43, - SpvOpConstantComposite = 44, - SpvOpConstantSampler = 45, - SpvOpConstantNull = 46, - SpvOpSpecConstantTrue = 48, - SpvOpSpecConstantFalse = 49, - SpvOpSpecConstant = 50, - SpvOpSpecConstantComposite = 51, - SpvOpSpecConstantOp = 52, - SpvOpFunction = 54, - SpvOpFunctionParameter = 55, - SpvOpFunctionEnd = 56, - SpvOpFunctionCall = 57, - SpvOpVariable = 59, - SpvOpImageTexelPointer = 60, - SpvOpLoad = 61, - SpvOpStore = 62, - SpvOpCopyMemory = 63, - SpvOpCopyMemorySized = 64, - SpvOpAccessChain = 65, - SpvOpInBoundsAccessChain = 66, - SpvOpPtrAccessChain = 67, - SpvOpArrayLength = 68, - SpvOpGenericPtrMemSemantics = 69, - SpvOpInBoundsPtrAccessChain = 70, - SpvOpDecorate = 71, - SpvOpMemberDecorate = 72, - SpvOpDecorationGroup = 73, - SpvOpGroupDecorate = 74, - SpvOpGroupMemberDecorate = 75, - SpvOpVectorExtractDynamic = 77, - SpvOpVectorInsertDynamic = 78, - SpvOpVectorShuffle = 79, - SpvOpCompositeConstruct = 80, - SpvOpCompositeExtract = 81, - SpvOpCompositeInsert = 82, - SpvOpCopyObject = 83, - SpvOpTranspose = 84, - SpvOpSampledImage = 86, - SpvOpImageSampleImplicitLod = 87, - SpvOpImageSampleExplicitLod = 88, - SpvOpImageSampleDrefImplicitLod = 89, - SpvOpImageSampleDrefExplicitLod = 90, - SpvOpImageSampleProjImplicitLod = 91, - SpvOpImageSampleProjExplicitLod = 92, - SpvOpImageSampleProjDrefImplicitLod = 93, - SpvOpImageSampleProjDrefExplicitLod = 94, - SpvOpImageFetch = 95, - SpvOpImageGather = 96, - SpvOpImageDrefGather = 97, - SpvOpImageRead = 98, - SpvOpImageWrite = 99, - SpvOpImage = 100, - SpvOpImageQueryFormat = 101, - SpvOpImageQueryOrder = 102, - SpvOpImageQuerySizeLod = 103, - SpvOpImageQuerySize = 104, - SpvOpImageQueryLod = 105, - SpvOpImageQueryLevels = 106, - SpvOpImageQuerySamples = 107, - SpvOpConvertFToU = 109, - SpvOpConvertFToS = 110, - SpvOpConvertSToF = 111, - SpvOpConvertUToF = 112, - SpvOpUConvert = 113, - SpvOpSConvert = 114, - SpvOpFConvert = 115, - SpvOpQuantizeToF16 = 116, - SpvOpConvertPtrToU = 117, - SpvOpSatConvertSToU = 118, - SpvOpSatConvertUToS = 119, - SpvOpConvertUToPtr = 120, - SpvOpPtrCastToGeneric = 121, - SpvOpGenericCastToPtr = 122, - SpvOpGenericCastToPtrExplicit = 123, - SpvOpBitcast = 124, - SpvOpSNegate = 126, - SpvOpFNegate = 127, - SpvOpIAdd = 128, - SpvOpFAdd = 129, - SpvOpISub = 130, - SpvOpFSub = 131, - SpvOpIMul = 132, - SpvOpFMul = 133, - SpvOpUDiv = 134, - SpvOpSDiv = 135, - SpvOpFDiv = 136, - SpvOpUMod = 137, - SpvOpSRem = 138, - SpvOpSMod = 139, - SpvOpFRem = 140, - SpvOpFMod = 141, - SpvOpVectorTimesScalar = 142, - SpvOpMatrixTimesScalar = 143, - SpvOpVectorTimesMatrix = 144, - SpvOpMatrixTimesVector = 145, - SpvOpMatrixTimesMatrix = 146, - SpvOpOuterProduct = 147, - SpvOpDot = 148, - SpvOpIAddCarry = 149, - SpvOpISubBorrow = 150, - SpvOpUMulExtended = 151, - SpvOpSMulExtended = 152, - SpvOpAny = 154, - SpvOpAll = 155, - SpvOpIsNan = 156, - SpvOpIsInf = 157, - SpvOpIsFinite = 158, - SpvOpIsNormal = 159, - SpvOpSignBitSet = 160, - SpvOpLessOrGreater = 161, - SpvOpOrdered = 162, - SpvOpUnordered = 163, - SpvOpLogicalEqual = 164, - SpvOpLogicalNotEqual = 165, - SpvOpLogicalOr = 166, - SpvOpLogicalAnd = 167, - SpvOpLogicalNot = 168, - SpvOpSelect = 169, - SpvOpIEqual = 170, - SpvOpINotEqual = 171, - SpvOpUGreaterThan = 172, - SpvOpSGreaterThan = 173, - SpvOpUGreaterThanEqual = 174, - SpvOpSGreaterThanEqual = 175, - SpvOpULessThan = 176, - SpvOpSLessThan = 177, - SpvOpULessThanEqual = 178, - SpvOpSLessThanEqual = 179, - SpvOpFOrdEqual = 180, - SpvOpFUnordEqual = 181, - SpvOpFOrdNotEqual = 182, - SpvOpFUnordNotEqual = 183, - SpvOpFOrdLessThan = 184, - SpvOpFUnordLessThan = 185, - SpvOpFOrdGreaterThan = 186, - SpvOpFUnordGreaterThan = 187, - SpvOpFOrdLessThanEqual = 188, - SpvOpFUnordLessThanEqual = 189, - SpvOpFOrdGreaterThanEqual = 190, - SpvOpFUnordGreaterThanEqual = 191, - SpvOpShiftRightLogical = 194, - SpvOpShiftRightArithmetic = 195, - SpvOpShiftLeftLogical = 196, - SpvOpBitwiseOr = 197, - SpvOpBitwiseXor = 198, - SpvOpBitwiseAnd = 199, - SpvOpNot = 200, - SpvOpBitFieldInsert = 201, - SpvOpBitFieldSExtract = 202, - SpvOpBitFieldUExtract = 203, - SpvOpBitReverse = 204, - SpvOpBitCount = 205, - SpvOpDPdx = 207, - SpvOpDPdy = 208, - SpvOpFwidth = 209, - SpvOpDPdxFine = 210, - SpvOpDPdyFine = 211, - SpvOpFwidthFine = 212, - SpvOpDPdxCoarse = 213, - SpvOpDPdyCoarse = 214, - SpvOpFwidthCoarse = 215, - SpvOpEmitVertex = 218, - SpvOpEndPrimitive = 219, - SpvOpEmitStreamVertex = 220, - SpvOpEndStreamPrimitive = 221, - SpvOpControlBarrier = 224, - SpvOpMemoryBarrier = 225, - SpvOpAtomicLoad = 227, - SpvOpAtomicStore = 228, - SpvOpAtomicExchange = 229, - SpvOpAtomicCompareExchange = 230, - SpvOpAtomicCompareExchangeWeak = 231, - SpvOpAtomicIIncrement = 232, - SpvOpAtomicIDecrement = 233, - SpvOpAtomicIAdd = 234, - SpvOpAtomicISub = 235, - SpvOpAtomicSMin = 236, - SpvOpAtomicUMin = 237, - SpvOpAtomicSMax = 238, - SpvOpAtomicUMax = 239, - SpvOpAtomicAnd = 240, - SpvOpAtomicOr = 241, - SpvOpAtomicXor = 242, - SpvOpPhi = 245, - SpvOpLoopMerge = 246, - SpvOpSelectionMerge = 247, - SpvOpLabel = 248, - SpvOpBranch = 249, - SpvOpBranchConditional = 250, - SpvOpSwitch = 251, - SpvOpKill = 252, - SpvOpReturn = 253, - SpvOpReturnValue = 254, - SpvOpUnreachable = 255, - SpvOpLifetimeStart = 256, - SpvOpLifetimeStop = 257, - SpvOpGroupAsyncCopy = 259, - SpvOpGroupWaitEvents = 260, - SpvOpGroupAll = 261, - SpvOpGroupAny = 262, - SpvOpGroupBroadcast = 263, - SpvOpGroupIAdd = 264, - SpvOpGroupFAdd = 265, - SpvOpGroupFMin = 266, - SpvOpGroupUMin = 267, - SpvOpGroupSMin = 268, - SpvOpGroupFMax = 269, - SpvOpGroupUMax = 270, - SpvOpGroupSMax = 271, - SpvOpReadPipe = 274, - SpvOpWritePipe = 275, - SpvOpReservedReadPipe = 276, - SpvOpReservedWritePipe = 277, - SpvOpReserveReadPipePackets = 278, - SpvOpReserveWritePipePackets = 279, - SpvOpCommitReadPipe = 280, - SpvOpCommitWritePipe = 281, - SpvOpIsValidReserveId = 282, - SpvOpGetNumPipePackets = 283, - SpvOpGetMaxPipePackets = 284, - SpvOpGroupReserveReadPipePackets = 285, - SpvOpGroupReserveWritePipePackets = 286, - SpvOpGroupCommitReadPipe = 287, - SpvOpGroupCommitWritePipe = 288, - SpvOpEnqueueMarker = 291, - SpvOpEnqueueKernel = 292, - SpvOpGetKernelNDrangeSubGroupCount = 293, - SpvOpGetKernelNDrangeMaxSubGroupSize = 294, - SpvOpGetKernelWorkGroupSize = 295, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, - SpvOpRetainEvent = 297, - SpvOpReleaseEvent = 298, - SpvOpCreateUserEvent = 299, - SpvOpIsValidEvent = 300, - SpvOpSetUserEventStatus = 301, - SpvOpCaptureEventProfilingInfo = 302, - SpvOpGetDefaultQueue = 303, - SpvOpBuildNDRange = 304, - SpvOpImageSparseSampleImplicitLod = 305, - SpvOpImageSparseSampleExplicitLod = 306, - SpvOpImageSparseSampleDrefImplicitLod = 307, - SpvOpImageSparseSampleDrefExplicitLod = 308, - SpvOpImageSparseSampleProjImplicitLod = 309, - SpvOpImageSparseSampleProjExplicitLod = 310, - SpvOpImageSparseSampleProjDrefImplicitLod = 311, - SpvOpImageSparseSampleProjDrefExplicitLod = 312, - SpvOpImageSparseFetch = 313, - SpvOpImageSparseGather = 314, - SpvOpImageSparseDrefGather = 315, - SpvOpImageSparseTexelsResident = 316, - SpvOpNoLine = 317, - SpvOpAtomicFlagTestAndSet = 318, - SpvOpAtomicFlagClear = 319, -} SpvOp; - -#endif // #ifndef spirv_H - diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c deleted file mode 100644 index 99514b49650..00000000000 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ /dev/null @@ -1,2710 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "vtn_private.h" -#include "nir/nir_vla.h" -#include "nir/nir_control_flow.h" -#include "nir/nir_constant_expressions.h" - -static struct vtn_ssa_value * -vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (glsl_type_is_vector_or_scalar(type)) { - unsigned num_components = glsl_get_vector_elements(val->type); - unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(val->type)); - val->def = nir_ssa_undef(&b->nb, num_components, bit_size); - } else { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - if (glsl_type_is_matrix(type)) { - const struct glsl_type *elem_type = - glsl_vector_type(glsl_get_base_type(type), - glsl_get_vector_elements(type)); - - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } else if (glsl_type_is_array(type)) { - const struct glsl_type *elem_type = glsl_get_array_element(type); - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } else { - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *elem_type = glsl_get_struct_field(type, i); - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } - } - } - - return val; -} - -static struct vtn_ssa_value * -vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, - const struct glsl_type *type) -{ - struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); - - if (entry) - return entry->data; - - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - if (glsl_type_is_vector_or_scalar(type)) { - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components, 32); - - for (unsigned i = 0; i < num_components; i++) - load->value.u32[i] = constant->value.u[i]; - - nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); - val->def = &load->def; - } else { - assert(glsl_type_is_matrix(type)); - unsigned rows = glsl_get_vector_elements(val->type); - unsigned columns = glsl_get_matrix_columns(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); - - for (unsigned i = 0; i < columns; i++) { - struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); - col_val->type = glsl_get_column_type(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, rows, 32); - - for (unsigned j = 0; j < rows; j++) - load->value.u32[j] = constant->value.u[rows * i + j]; - - nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); - col_val->def = &load->def; - - val->elems[i] = col_val; - } - } - break; - - case GLSL_TYPE_ARRAY: { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - const struct glsl_type *elem_type = glsl_get_array_element(val->type); - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], - elem_type); - break; - } - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *elem_type = - glsl_get_struct_field(val->type, i); - val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], - elem_type); - } - break; - } - - default: - unreachable("bad constant type"); - } - - return val; -} - -struct vtn_ssa_value * -vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - switch (val->value_type) { - case vtn_value_type_undef: - return vtn_undef_ssa_value(b, val->type->type); - - case vtn_value_type_constant: - return vtn_const_ssa_value(b, val->constant, val->const_type); - - case vtn_value_type_ssa: - return val->ssa; - - case vtn_value_type_access_chain: - /* This is needed for function parameters */ - return vtn_variable_load(b, val->access_chain); - - default: - unreachable("Invalid type for an SSA value"); - } -} - -static char * -vtn_string_literal(struct vtn_builder *b, const uint32_t *words, - unsigned word_count, unsigned *words_used) -{ - char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); - if (words_used) { - /* Ammount of space taken by the string (including the null) */ - unsigned len = strlen(dup) + 1; - *words_used = DIV_ROUND_UP(len, sizeof(*words)); - } - return dup; -} - -const uint32_t * -vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, - const uint32_t *end, vtn_instruction_handler handler) -{ - b->file = NULL; - b->line = -1; - b->col = -1; - - const uint32_t *w = start; - while (w < end) { - SpvOp opcode = w[0] & SpvOpCodeMask; - unsigned count = w[0] >> SpvWordCountShift; - assert(count >= 1 && w + count <= end); - - switch (opcode) { - case SpvOpNop: - break; /* Do nothing */ - - case SpvOpLine: - b->file = vtn_value(b, w[1], vtn_value_type_string)->str; - b->line = w[2]; - b->col = w[3]; - break; - - case SpvOpNoLine: - b->file = NULL; - b->line = -1; - b->col = -1; - break; - - default: - if (!handler(b, opcode, w, count)) - return w; - break; - } - - w += count; - } - assert(w == end); - return w; -} - -static void -vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpExtInstImport: { - struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); - if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { - val->ext_handler = vtn_handle_glsl450_instruction; - } else { - assert(!"Unsupported extension"); - } - break; - } - - case SpvOpExtInst: { - struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); - bool handled = val->ext_handler(b, w[4], w, count); - (void)handled; - assert(handled); - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -static void -_foreach_decoration_helper(struct vtn_builder *b, - struct vtn_value *base_value, - int parent_member, - struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data) -{ - for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { - int member; - if (dec->scope == VTN_DEC_DECORATION) { - member = parent_member; - } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { - assert(parent_member == -1); - member = dec->scope - VTN_DEC_STRUCT_MEMBER0; - } else { - /* Not a decoration */ - continue; - } - - if (dec->group) { - assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, member, dec->group, - cb, data); - } else { - cb(b, base_value, member, dec, data); - } - } -} - -/** Iterates (recursively if needed) over all of the decorations on a value - * - * This function iterates over all of the decorations applied to a given - * value. If it encounters a decoration group, it recurses into the group - * and iterates over all of those decorations as well. - */ -void -vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data) -{ - _foreach_decoration_helper(b, value, -1, value, cb, data); -} - -void -vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, - vtn_execution_mode_foreach_cb cb, void *data) -{ - for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { - if (dec->scope != VTN_DEC_EXECUTION_MODE) - continue; - - assert(dec->group == NULL); - cb(b, value, dec, data); - } -} - -static void -vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - const uint32_t *w_end = w + count; - const uint32_t target = w[1]; - w += 2; - - switch (opcode) { - case SpvOpDecorationGroup: - vtn_push_value(b, target, vtn_value_type_decoration_group); - break; - - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpExecutionMode: { - struct vtn_value *val = &b->values[target]; - - struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - switch (opcode) { - case SpvOpDecorate: - dec->scope = VTN_DEC_DECORATION; - break; - case SpvOpMemberDecorate: - dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); - break; - case SpvOpExecutionMode: - dec->scope = VTN_DEC_EXECUTION_MODE; - break; - default: - unreachable("Invalid decoration opcode"); - } - dec->decoration = *(w++); - dec->literals = w; - - /* Link into the list */ - dec->next = val->decoration; - val->decoration = dec; - break; - } - - case SpvOpGroupMemberDecorate: - case SpvOpGroupDecorate: { - struct vtn_value *group = - vtn_value(b, target, vtn_value_type_decoration_group); - - for (; w < w_end; w++) { - struct vtn_value *val = vtn_untyped_value(b, *w); - struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - - dec->group = group; - if (opcode == SpvOpGroupDecorate) { - dec->scope = VTN_DEC_DECORATION; - } else { - dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); - } - - /* Link into the list */ - dec->next = val->decoration; - val->decoration = dec; - } - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -struct member_decoration_ctx { - unsigned num_fields; - struct glsl_struct_field *fields; - struct vtn_type *type; -}; - -/* does a shallow copy of a vtn_type */ - -static struct vtn_type * -vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) -{ - struct vtn_type *dest = ralloc(b, struct vtn_type); - dest->type = src->type; - dest->is_builtin = src->is_builtin; - if (src->is_builtin) - dest->builtin = src->builtin; - - if (!glsl_type_is_scalar(src->type)) { - switch (glsl_get_base_type(src->type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_ARRAY: - dest->row_major = src->row_major; - dest->stride = src->stride; - dest->array_element = src->array_element; - break; - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(src->type); - - dest->members = ralloc_array(b, struct vtn_type *, elems); - memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); - - dest->offsets = ralloc_array(b, unsigned, elems); - memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); - break; - } - - default: - unreachable("unhandled type"); - } - } - - return dest; -} - -static struct vtn_type * -mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) -{ - type->members[member] = vtn_type_copy(b, type->members[member]); - type = type->members[member]; - - /* We may have an array of matrices.... Oh, joy! */ - while (glsl_type_is_array(type->type)) { - type->array_element = vtn_type_copy(b, type->array_element); - type = type->array_element; - } - - assert(glsl_type_is_matrix(type->type)); - - return type; -} - -static void -struct_member_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_ctx) -{ - struct member_decoration_ctx *ctx = void_ctx; - - if (member < 0) - return; - - assert(member < ctx->num_fields); - - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - ctx->fields[member].centroid = true; - break; - case SpvDecorationSample: - ctx->fields[member].sample = true; - break; - case SpvDecorationLocation: - ctx->fields[member].location = dec->literals[0]; - break; - case SpvDecorationBuiltIn: - ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); - ctx->type->members[member]->is_builtin = true; - ctx->type->members[member]->builtin = dec->literals[0]; - ctx->type->builtin_block = true; - break; - case SpvDecorationOffset: - ctx->type->offsets[member] = dec->literals[0]; - break; - case SpvDecorationMatrixStride: - mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; - break; - case SpvDecorationColMajor: - break; /* Nothing to do here. Column-major is the default. */ - case SpvDecorationRowMajor: - mutable_matrix_member(b, ctx->type, member)->row_major = true; - break; - default: - unreachable("Unhandled member decoration"); - } -} - -static void -type_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *ctx) -{ - struct vtn_type *type = val->type; - - if (member != -1) - return; - - switch (dec->decoration) { - case SpvDecorationArrayStride: - type->stride = dec->literals[0]; - break; - case SpvDecorationBlock: - type->block = true; - break; - case SpvDecorationBufferBlock: - type->buffer_block = true; - break; - case SpvDecorationGLSLShared: - case SpvDecorationGLSLPacked: - /* Ignore these, since we get explicit offsets anyways */ - break; - - case SpvDecorationStream: - assert(dec->literals[0] == 0); - break; - - default: - unreachable("Unhandled type decoration"); - } -} - -static unsigned -translate_image_format(SpvImageFormat format) -{ - switch (format) { - case SpvImageFormatUnknown: return 0; /* GL_NONE */ - case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ - case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ - case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ - case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ - case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ - case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ - case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ - case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ - case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ - case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ - case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ - case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ - case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ - case SpvImageFormatR16: return 0x822A; /* GL_R16 */ - case SpvImageFormatR8: return 0x8229; /* GL_R8 */ - case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ - case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ - case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ - case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ - case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ - case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ - case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ - case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ - case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ - case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ - case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ - case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ - case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ - case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ - case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ - case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ - case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ - case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ - case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ - case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ - case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ - case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ - case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ - case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ - default: - assert(!"Invalid image format"); - return 0; - } -} - -static void -vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); - - val->type = rzalloc(b, struct vtn_type); - val->type->is_builtin = false; - val->type->val = val; - - switch (opcode) { - case SpvOpTypeVoid: - val->type->type = glsl_void_type(); - break; - case SpvOpTypeBool: - val->type->type = glsl_bool_type(); - break; - case SpvOpTypeInt: { - const bool signedness = w[3]; - val->type->type = (signedness ? glsl_int_type() : glsl_uint_type()); - break; - } - case SpvOpTypeFloat: - val->type->type = glsl_float_type(); - break; - - case SpvOpTypeVector: { - struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; - unsigned elems = w[3]; - - assert(glsl_type_is_scalar(base->type)); - val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); - - /* Vectors implicitly have sizeof(base_type) stride. For now, this - * is always 4 bytes. This will have to change if we want to start - * supporting doubles or half-floats. - */ - val->type->stride = 4; - val->type->array_element = base; - break; - } - - case SpvOpTypeMatrix: { - struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; - unsigned columns = w[3]; - - assert(glsl_type_is_vector(base->type)); - val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), - glsl_get_vector_elements(base->type), - columns); - assert(!glsl_type_is_error(val->type->type)); - val->type->array_element = base; - val->type->row_major = false; - val->type->stride = 0; - break; - } - - case SpvOpTypeRuntimeArray: - case SpvOpTypeArray: { - struct vtn_type *array_element = - vtn_value(b, w[2], vtn_value_type_type)->type; - - unsigned length; - if (opcode == SpvOpTypeRuntimeArray) { - /* A length of 0 is used to denote unsized arrays */ - length = 0; - } else { - length = - vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; - } - - val->type->type = glsl_array_type(array_element->type, length); - val->type->array_element = array_element; - val->type->stride = 0; - break; - } - - case SpvOpTypeStruct: { - unsigned num_fields = count - 2; - val->type->members = ralloc_array(b, struct vtn_type *, num_fields); - val->type->offsets = ralloc_array(b, unsigned, num_fields); - - NIR_VLA(struct glsl_struct_field, fields, count); - for (unsigned i = 0; i < num_fields; i++) { - val->type->members[i] = - vtn_value(b, w[i + 2], vtn_value_type_type)->type; - fields[i] = (struct glsl_struct_field) { - .type = val->type->members[i]->type, - .name = ralloc_asprintf(b, "field%d", i), - .location = -1, - }; - } - - struct member_decoration_ctx ctx = { - .num_fields = num_fields, - .fields = fields, - .type = val->type - }; - - vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); - - const char *name = val->name ? val->name : "struct"; - - val->type->type = glsl_struct_type(fields, num_fields, name); - break; - } - - case SpvOpTypeFunction: { - const struct glsl_type *return_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - NIR_VLA(struct glsl_function_param, params, count - 3); - for (unsigned i = 0; i < count - 3; i++) { - params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; - - /* FIXME: */ - params[i].in = true; - params[i].out = true; - } - val->type->type = glsl_function_type(return_type, params, count - 3); - break; - } - - case SpvOpTypePointer: - /* FIXME: For now, we'll just do the really lame thing and return - * the same type. The validator should ensure that the proper number - * of dereferences happen - */ - val->type = vtn_value(b, w[3], vtn_value_type_type)->type; - break; - - case SpvOpTypeImage: { - const struct glsl_type *sampled_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - - assert(glsl_type_is_vector_or_scalar(sampled_type)); - - enum glsl_sampler_dim dim; - switch ((SpvDim)w[3]) { - case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; - case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; - case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; - case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; - case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; - case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; - default: - unreachable("Invalid SPIR-V Sampler dimension"); - } - - bool is_shadow = w[4]; - bool is_array = w[5]; - bool multisampled = w[6]; - unsigned sampled = w[7]; - SpvImageFormat format = w[8]; - - if (count > 9) - val->type->access_qualifier = w[9]; - else - val->type->access_qualifier = SpvAccessQualifierReadWrite; - - if (multisampled) { - assert(dim == GLSL_SAMPLER_DIM_2D); - dim = GLSL_SAMPLER_DIM_MS; - } - - val->type->image_format = translate_image_format(format); - - if (sampled == 1) { - val->type->type = glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); - } else if (sampled == 2) { - assert(format); - assert(!is_shadow); - val->type->type = glsl_image_type(dim, is_array, - glsl_get_base_type(sampled_type)); - } else { - assert(!"We need to know if the image will be sampled"); - } - break; - } - - case SpvOpTypeSampledImage: - val->type = vtn_value(b, w[2], vtn_value_type_type)->type; - break; - - case SpvOpTypeSampler: - /* The actual sampler type here doesn't really matter. It gets - * thrown away the moment you combine it with an image. What really - * matters is that it's a sampler type as opposed to an integer type - * so the backend knows what to do. - */ - val->type->type = glsl_bare_sampler_type(); - break; - - case SpvOpTypeOpaque: - case SpvOpTypeEvent: - case SpvOpTypeDeviceEvent: - case SpvOpTypeReserveId: - case SpvOpTypeQueue: - case SpvOpTypePipe: - default: - unreachable("Unhandled opcode"); - } - - vtn_foreach_decoration(b, val, type_decoration_cb, NULL); -} - -static nir_constant * -vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) -{ - nir_constant *c = rzalloc(b, nir_constant); - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - /* Nothing to do here. It's already initialized to zero */ - break; - - case GLSL_TYPE_ARRAY: - assert(glsl_get_length(type) > 0); - c->num_elements = glsl_get_length(type); - c->elements = ralloc_array(b, nir_constant *, c->num_elements); - - c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); - for (unsigned i = 1; i < c->num_elements; i++) - c->elements[i] = c->elements[0]; - break; - - case GLSL_TYPE_STRUCT: - c->num_elements = glsl_get_length(type); - c->elements = ralloc_array(b, nir_constant *, c->num_elements); - - for (unsigned i = 0; i < c->num_elements; i++) { - c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); - } - break; - - default: - unreachable("Invalid type for null constant"); - } - - return c; -} - -static void -spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, - int member, const struct vtn_decoration *dec, - void *data) -{ - assert(member == -1); - if (dec->decoration != SpvDecorationSpecId) - return; - - uint32_t *const_value = data; - - for (unsigned i = 0; i < b->num_specializations; i++) { - if (b->specializations[i].id == dec->literals[0]) { - *const_value = b->specializations[i].data; - return; - } - } -} - -static uint32_t -get_specialization(struct vtn_builder *b, struct vtn_value *val, - uint32_t const_value) -{ - vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); - return const_value; -} - -static void -vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); - val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->constant = rzalloc(b, nir_constant); - switch (opcode) { - case SpvOpConstantTrue: - assert(val->const_type == glsl_bool_type()); - val->constant->value.u[0] = NIR_TRUE; - break; - case SpvOpConstantFalse: - assert(val->const_type == glsl_bool_type()); - val->constant->value.u[0] = NIR_FALSE; - break; - - case SpvOpSpecConstantTrue: - case SpvOpSpecConstantFalse: { - assert(val->const_type == glsl_bool_type()); - uint32_t int_val = - get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); - val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; - break; - } - - case SpvOpConstant: - assert(glsl_type_is_scalar(val->const_type)); - val->constant->value.u[0] = w[3]; - break; - case SpvOpSpecConstant: - assert(glsl_type_is_scalar(val->const_type)); - val->constant->value.u[0] = get_specialization(b, val, w[3]); - break; - case SpvOpSpecConstantComposite: - case SpvOpConstantComposite: { - unsigned elem_count = count - 3; - nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); - for (unsigned i = 0; i < elem_count; i++) - elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - - switch (glsl_get_base_type(val->const_type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(val->const_type)) { - unsigned rows = glsl_get_vector_elements(val->const_type); - assert(glsl_get_matrix_columns(val->const_type) == elem_count); - for (unsigned i = 0; i < elem_count; i++) - for (unsigned j = 0; j < rows; j++) - val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; - } else { - assert(glsl_type_is_vector(val->const_type)); - assert(glsl_get_vector_elements(val->const_type) == elem_count); - for (unsigned i = 0; i < elem_count; i++) - val->constant->value.u[i] = elems[i]->value.u[0]; - } - ralloc_free(elems); - break; - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - ralloc_steal(val->constant, elems); - val->constant->num_elements = elem_count; - val->constant->elements = elems; - break; - - default: - unreachable("Unsupported type for constants"); - } - break; - } - - case SpvOpSpecConstantOp: { - SpvOp opcode = get_specialization(b, val, w[3]); - switch (opcode) { - case SpvOpVectorShuffle: { - struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); - struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); - unsigned len0 = glsl_get_vector_elements(v0->const_type); - unsigned len1 = glsl_get_vector_elements(v1->const_type); - - uint32_t u[8]; - for (unsigned i = 0; i < len0; i++) - u[i] = v0->constant->value.u[i]; - for (unsigned i = 0; i < len1; i++) - u[len0 + i] = v1->constant->value.u[i]; - - for (unsigned i = 0; i < count - 6; i++) { - uint32_t comp = w[i + 6]; - if (comp == (uint32_t)-1) { - val->constant->value.u[i] = 0xdeadbeef; - } else { - val->constant->value.u[i] = u[comp]; - } - } - return; - } - - case SpvOpCompositeExtract: - case SpvOpCompositeInsert: { - struct vtn_value *comp; - unsigned deref_start; - struct nir_constant **c; - if (opcode == SpvOpCompositeExtract) { - comp = vtn_value(b, w[4], vtn_value_type_constant); - deref_start = 5; - c = &comp->constant; - } else { - comp = vtn_value(b, w[5], vtn_value_type_constant); - deref_start = 6; - val->constant = nir_constant_clone(comp->constant, - (nir_variable *)b); - c = &val->constant; - } - - int elem = -1; - const struct glsl_type *type = comp->const_type; - for (unsigned i = deref_start; i < count; i++) { - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* If we hit this granularity, we're picking off an element */ - if (elem < 0) - elem = 0; - - if (glsl_type_is_matrix(type)) { - elem += w[i] * glsl_get_vector_elements(type); - type = glsl_get_column_type(type); - } else { - assert(glsl_type_is_vector(type)); - elem += w[i]; - type = glsl_scalar_type(glsl_get_base_type(type)); - } - continue; - - case GLSL_TYPE_ARRAY: - c = &(*c)->elements[w[i]]; - type = glsl_get_array_element(type); - continue; - - case GLSL_TYPE_STRUCT: - c = &(*c)->elements[w[i]]; - type = glsl_get_struct_field(type, w[i]); - continue; - - default: - unreachable("Invalid constant type"); - } - } - - if (opcode == SpvOpCompositeExtract) { - if (elem == -1) { - val->constant = *c; - } else { - unsigned num_components = glsl_get_vector_elements(type); - for (unsigned i = 0; i < num_components; i++) - val->constant->value.u[i] = (*c)->value.u[elem + i]; - } - } else { - struct vtn_value *insert = - vtn_value(b, w[4], vtn_value_type_constant); - assert(insert->const_type == type); - if (elem == -1) { - *c = insert->constant; - } else { - unsigned num_components = glsl_get_vector_elements(type); - for (unsigned i = 0; i < num_components; i++) - (*c)->value.u[elem + i] = insert->constant->value.u[i]; - } - } - return; - } - - default: { - bool swap; - nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); - - unsigned num_components = glsl_get_vector_elements(val->const_type); - unsigned bit_size = - glsl_get_bit_size(glsl_get_base_type(val->const_type)); - - nir_const_value src[3]; - assert(count <= 7); - for (unsigned i = 0; i < count - 4; i++) { - nir_constant *c = - vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; - - unsigned j = swap ? 1 - i : i; - assert(bit_size == 32); - for (unsigned k = 0; k < num_components; k++) - src[j].u32[k] = c->value.u[k]; - } - - nir_const_value res = nir_eval_const_opcode(op, num_components, - bit_size, src); - - for (unsigned k = 0; k < num_components; k++) - val->constant->value.u[k] = res.u32[k]; - - return; - } /* default */ - } - } - - case SpvOpConstantNull: - val->constant = vtn_null_constant(b, val->const_type); - break; - - case SpvOpConstantSampler: - assert(!"OpConstantSampler requires Kernel Capability"); - break; - - default: - unreachable("Unhandled opcode"); - } -} - -static void -vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct nir_function *callee = - vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; - - nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); - for (unsigned i = 0; i < call->num_params; i++) { - unsigned arg_id = w[4 + i]; - struct vtn_value *arg = vtn_untyped_value(b, arg_id); - if (arg->value_type == vtn_value_type_access_chain) { - nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); - call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); - } else { - struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); - - /* Make a temporary to store the argument in */ - nir_variable *tmp = - nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); - call->params[i] = nir_deref_var_create(call, tmp); - - vtn_local_store(b, arg_ssa, call->params[i]); - } - } - - nir_variable *out_tmp = NULL; - if (!glsl_type_is_void(callee->return_type)) { - out_tmp = nir_local_variable_create(b->impl, callee->return_type, - "out_tmp"); - call->return_deref = nir_deref_var_create(call, out_tmp); - } - - nir_builder_instr_insert(&b->nb, &call->instr); - - if (glsl_type_is_void(callee->return_type)) { - vtn_push_value(b, w[2], vtn_value_type_undef); - } else { - struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); - retval->ssa = vtn_local_load(b, call->return_deref); - } -} - -struct vtn_ssa_value * -vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (!glsl_type_is_vector_or_scalar(type)) { - unsigned elems = glsl_get_length(type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *child_type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - child_type = glsl_get_column_type(type); - break; - case GLSL_TYPE_ARRAY: - child_type = glsl_get_array_element(type); - break; - case GLSL_TYPE_STRUCT: - child_type = glsl_get_struct_field(type, i); - break; - default: - unreachable("unkown base type"); - } - - val->elems[i] = vtn_create_ssa_value(b, child_type); - } - } - - return val; -} - -static nir_tex_src -vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) -{ - nir_tex_src src; - src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); - src.src_type = type; - return src; -} - -static void -vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpSampledImage) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - val->sampled_image->sampler = - vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; - return; - } else if (opcode == SpvOpImage) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - struct vtn_value *src_val = vtn_untyped_value(b, w[3]); - if (src_val->value_type == vtn_value_type_sampled_image) { - val->access_chain = src_val->sampled_image->image; - } else { - assert(src_val->value_type == vtn_value_type_access_chain); - val->access_chain = src_val->access_chain; - } - return; - } - - struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - struct vtn_sampled_image sampled; - struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); - if (sampled_val->value_type == vtn_value_type_sampled_image) { - sampled = *sampled_val->sampled_image; - } else { - assert(sampled_val->value_type == vtn_value_type_access_chain); - sampled.image = NULL; - sampled.sampler = sampled_val->access_chain; - } - - const struct glsl_type *image_type; - if (sampled.image) { - image_type = sampled.image->var->var->interface_type; - } else { - image_type = sampled.sampler->var->var->interface_type; - } - - nir_tex_src srcs[8]; /* 8 should be enough */ - nir_tex_src *p = srcs; - - unsigned idx = 4; - - bool has_coord = false; - switch (opcode) { - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - case SpvOpImageFetch: - case SpvOpImageGather: - case SpvOpImageDrefGather: - case SpvOpImageQueryLod: { - /* All these types have the coordinate as their first real argument */ - struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); - has_coord = true; - p->src = nir_src_for_ssa(coord->def); - p->src_type = nir_tex_src_coord; - p++; - break; - } - - default: - break; - } - - /* These all have an explicit depth value as their next source */ - switch (opcode) { - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); - break; - default: - break; - } - - /* For OpImageQuerySizeLod, we always have an LOD */ - if (opcode == SpvOpImageQuerySizeLod) - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); - - /* Figure out the base texture operation */ - nir_texop texop; - switch (opcode) { - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - texop = nir_texop_tex; - break; - - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - texop = nir_texop_txl; - break; - - case SpvOpImageFetch: - if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) { - texop = nir_texop_txf_ms; - } else { - texop = nir_texop_txf; - } - break; - - case SpvOpImageGather: - case SpvOpImageDrefGather: - texop = nir_texop_tg4; - break; - - case SpvOpImageQuerySizeLod: - case SpvOpImageQuerySize: - texop = nir_texop_txs; - break; - - case SpvOpImageQueryLod: - texop = nir_texop_lod; - break; - - case SpvOpImageQueryLevels: - texop = nir_texop_query_levels; - break; - - case SpvOpImageQuerySamples: - default: - unreachable("Unhandled opcode"); - } - - /* Now we need to handle some number of optional arguments */ - if (idx < count) { - uint32_t operands = w[idx++]; - - if (operands & SpvImageOperandsBiasMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txb; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); - } - - if (operands & SpvImageOperandsLodMask) { - assert(texop == nir_texop_txl || texop == nir_texop_txf || - texop == nir_texop_txf_ms || texop == nir_texop_txs); - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); - } - - if (operands & SpvImageOperandsGradMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txd; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); - } - - if (operands & SpvImageOperandsOffsetMask || - operands & SpvImageOperandsConstOffsetMask) - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); - - if (operands & SpvImageOperandsConstOffsetsMask) - assert(!"Constant offsets to texture gather not yet implemented"); - - if (operands & SpvImageOperandsSampleMask) { - assert(texop == nir_texop_txf_ms); - texop = nir_texop_txf_ms; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); - } - } - /* We should have now consumed exactly all of the arguments */ - assert(idx == count); - - nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); - instr->op = texop; - - memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); - - instr->sampler_dim = glsl_get_sampler_dim(image_type); - instr->is_array = glsl_sampler_type_is_array(image_type); - instr->is_shadow = glsl_sampler_type_is_shadow(image_type); - instr->is_new_style_shadow = instr->is_shadow; - - if (has_coord) { - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - instr->coord_components = 1; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_MS: - instr->coord_components = 2; - break; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - instr->coord_components = 3; - break; - default: - assert("Invalid sampler type"); - } - - if (instr->is_array) - instr->coord_components++; - } else { - instr->coord_components = 0; - } - - switch (glsl_get_sampler_result_type(image_type)) { - case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; - case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; - case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; - case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; - default: - unreachable("Invalid base type for sampler result"); - } - - nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); - if (sampled.image) { - nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); - instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); - } else { - instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); - } - - switch (instr->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_txl: - case nir_texop_txd: - /* These operations require a sampler */ - instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); - break; - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_txs: - case nir_texop_lod: - case nir_texop_tg4: - case nir_texop_query_levels: - case nir_texop_texture_samples: - case nir_texop_samples_identical: - /* These don't */ - instr->sampler = NULL; - break; - } - - nir_ssa_dest_init(&instr->instr, &instr->dest, - nir_tex_instr_dest_size(instr), 32, NULL); - - assert(glsl_get_vector_elements(ret_type->type) == - nir_tex_instr_dest_size(instr)); - - val->ssa = vtn_create_ssa_value(b, ret_type->type); - val->ssa->def = &instr->dest.ssa; - - nir_builder_instr_insert(&b->nb, &instr->instr); -} - -static nir_ssa_def * -get_image_coord(struct vtn_builder *b, uint32_t value) -{ - struct vtn_ssa_value *coord = vtn_ssa_value(b, value); - - /* The image_load_store intrinsics assume a 4-dim coordinate */ - unsigned dim = glsl_get_vector_elements(coord->type); - unsigned swizzle[4]; - for (unsigned i = 0; i < 4; i++) - swizzle[i] = MIN2(i, dim - 1); - - return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); -} - -static void -vtn_handle_image(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - /* Just get this one out of the way */ - if (opcode == SpvOpImageTexelPointer) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_image_pointer); - val->image = ralloc(b, struct vtn_image_pointer); - - val->image->image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - val->image->coord = get_image_coord(b, w[4]); - val->image->sample = vtn_ssa_value(b, w[5])->def; - return; - } - - struct vtn_image_pointer image; - - switch (opcode) { - case SpvOpAtomicExchange: - case SpvOpAtomicCompareExchange: - case SpvOpAtomicCompareExchangeWeak: - case SpvOpAtomicIIncrement: - case SpvOpAtomicIDecrement: - case SpvOpAtomicIAdd: - case SpvOpAtomicISub: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; - break; - - case SpvOpImageQuerySize: - image.image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - image.coord = NULL; - image.sample = NULL; - break; - - case SpvOpImageRead: - image.image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - image.coord = get_image_coord(b, w[4]); - - if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { - assert(w[5] == SpvImageOperandsSampleMask); - image.sample = vtn_ssa_value(b, w[6])->def; - } else { - image.sample = nir_ssa_undef(&b->nb, 1, 32); - } - break; - - case SpvOpImageWrite: - image.image = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; - image.coord = get_image_coord(b, w[2]); - - /* texel = w[3] */ - - if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { - assert(w[4] == SpvImageOperandsSampleMask); - image.sample = vtn_ssa_value(b, w[5])->def; - } else { - image.sample = nir_ssa_undef(&b->nb, 1, 32); - } - break; - - default: - unreachable("Invalid image opcode"); - } - - nir_intrinsic_op op; - switch (opcode) { -#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; - OP(ImageQuerySize, size) - OP(ImageRead, load) - OP(ImageWrite, store) - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_min) - OP(AtomicUMin, atomic_min) - OP(AtomicSMax, atomic_max) - OP(AtomicUMax, atomic_max) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid image opcode"); - } - - nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); - - nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); - intrin->variables[0] = - nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); - - /* ImageQuerySize doesn't take any extra parameters */ - if (opcode != SpvOpImageQuerySize) { - /* The image coordinate is always 4 components but we may not have that - * many. Swizzle to compensate. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < image.coord->num_components ? i : 0; - intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, - swiz, 4, false)); - intrin->src[1] = nir_src_for_ssa(image.sample); - } - - switch (opcode) { - case SpvOpImageQuerySize: - case SpvOpImageRead: - break; - case SpvOpImageWrite: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); - break; - case SpvOpAtomicIIncrement: - intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); - break; - case SpvOpAtomicIDecrement: - intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); - break; - - case SpvOpAtomicExchange: - case SpvOpAtomicIAdd: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - case SpvOpAtomicCompareExchange: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - case SpvOpAtomicISub: - intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); - break; - - default: - unreachable("Invalid image opcode"); - } - - if (opcode != SpvOpImageWrite) { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL); - - nir_builder_instr_insert(&b->nb, &intrin->instr); - - /* The image intrinsics always return 4 channels but we may not want - * that many. Emit a mov to trim it down. - */ - unsigned swiz[4] = {0, 1, 2, 3}; - val->ssa = vtn_create_ssa_value(b, type->type); - val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, - glsl_get_vector_elements(type->type), false); - } else { - nir_builder_instr_insert(&b->nb, &intrin->instr); - } -} - -static nir_intrinsic_op -get_ssbo_nir_atomic_op(SpvOp opcode) -{ - switch (opcode) { -#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_imin) - OP(AtomicUMin, atomic_umin) - OP(AtomicSMax, atomic_imax) - OP(AtomicUMax, atomic_umax) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid SSBO atomic"); - } -} - -static nir_intrinsic_op -get_shared_nir_atomic_op(SpvOp opcode) -{ - switch (opcode) { -#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_imin) - OP(AtomicUMin, atomic_umin) - OP(AtomicSMax, atomic_imax) - OP(AtomicUMax, atomic_umax) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid shared atomic"); - } -} - -static void -fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, nir_src *src) -{ - switch (opcode) { - case SpvOpAtomicIIncrement: - src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); - break; - - case SpvOpAtomicIDecrement: - src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); - break; - - case SpvOpAtomicISub: - src[0] = - nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); - break; - - case SpvOpAtomicCompareExchange: - src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); - break; - /* Fall through */ - - case SpvOpAtomicExchange: - case SpvOpAtomicIAdd: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - default: - unreachable("Invalid SPIR-V atomic"); - } -} - -static void -vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_access_chain *chain = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - nir_intrinsic_instr *atomic; - - /* - SpvScope scope = w[4]; - SpvMemorySemanticsMask semantics = w[5]; - */ - - if (chain->var->mode == vtn_variable_mode_workgroup) { - nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; - nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); - atomic = nir_intrinsic_instr_create(b->nb.shader, op); - atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); - fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); - } else { - assert(chain->var->mode == vtn_variable_mode_ssbo); - struct vtn_type *type; - nir_ssa_def *offset, *index; - offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); - - nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); - - atomic = nir_intrinsic_instr_create(b->nb.shader, op); - atomic->src[0] = nir_src_for_ssa(index); - atomic->src[1] = nir_src_for_ssa(offset); - fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); - } - - nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->def = &atomic->dest.ssa; - val->ssa->type = type->type; - - nir_builder_instr_insert(&b->nb, &atomic->instr); -} - -static nir_alu_instr * -create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size) -{ - nir_op op; - switch (num_components) { - case 1: op = nir_op_fmov; break; - case 2: op = nir_op_vec2; break; - case 3: op = nir_op_vec3; break; - case 4: op = nir_op_vec4; break; - default: unreachable("bad vector size"); - } - - nir_alu_instr *vec = nir_alu_instr_create(shader, op); - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, - bit_size, NULL); - vec->dest.write_mask = (1 << num_components) - 1; - - return vec; -} - -struct vtn_ssa_value * -vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - if (src->transposed) - return src->transposed; - - struct vtn_ssa_value *dest = - vtn_create_ssa_value(b, glsl_transposed_type(src->type)); - - for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { - nir_alu_instr *vec = create_vec(b->shader, - glsl_get_matrix_columns(src->type), - glsl_get_bit_size(glsl_get_base_type(src->type))); - if (glsl_type_is_vector_or_scalar(src->type)) { - vec->src[0].src = nir_src_for_ssa(src->def); - vec->src[0].swizzle[0] = i; - } else { - for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { - vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); - vec->src[j].swizzle[0] = i; - } - } - nir_builder_instr_insert(&b->nb, &vec->instr); - dest->elems[i]->def = &vec->dest.dest.ssa; - } - - dest->transposed = src; - - return dest; -} - -nir_ssa_def * -vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) -{ - unsigned swiz[4] = { index }; - return nir_swizzle(&b->nb, src, swiz, 1, true); -} - -nir_ssa_def * -vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, - unsigned index) -{ - nir_alu_instr *vec = create_vec(b->shader, src->num_components, - src->bit_size); - - for (unsigned i = 0; i < src->num_components; i++) { - if (i == index) { - vec->src[i].src = nir_src_for_ssa(insert); - } else { - vec->src[i].src = nir_src_for_ssa(src); - vec->src[i].swizzle[0] = i; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -nir_ssa_def * -vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *index) -{ - nir_ssa_def *dest = vtn_vector_extract(b, src, 0); - for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), - vtn_vector_extract(b, src, i), dest); - - return dest; -} - -nir_ssa_def * -vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, nir_ssa_def *index) -{ - nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); - for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), - vtn_vector_insert(b, src, insert, i), dest); - - return dest; -} - -static nir_ssa_def * -vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, - nir_ssa_def *src0, nir_ssa_def *src1, - const uint32_t *indices) -{ - nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size); - - for (unsigned i = 0; i < num_components; i++) { - uint32_t index = indices[i]; - if (index == 0xffffffff) { - vec->src[i].src = - nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); - } else if (index < src0->num_components) { - vec->src[i].src = nir_src_for_ssa(src0); - vec->src[i].swizzle[0] = index; - } else { - vec->src[i].src = nir_src_for_ssa(src1); - vec->src[i].swizzle[0] = index - src0->num_components; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -/* - * Concatentates a number of vectors/scalars together to produce a vector - */ -static nir_ssa_def * -vtn_vector_construct(struct vtn_builder *b, unsigned num_components, - unsigned num_srcs, nir_ssa_def **srcs) -{ - nir_alu_instr *vec = create_vec(b->shader, num_components, - srcs[0]->bit_size); - - unsigned dest_idx = 0; - for (unsigned i = 0; i < num_srcs; i++) { - nir_ssa_def *src = srcs[i]; - for (unsigned j = 0; j < src->num_components; j++) { - vec->src[dest_idx].src = nir_src_for_ssa(src); - vec->src[dest_idx].swizzle[0] = j; - dest_idx++; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -static struct vtn_ssa_value * -vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) -{ - struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); - dest->type = src->type; - - if (glsl_type_is_vector_or_scalar(src->type)) { - dest->def = src->def; - } else { - unsigned elems = glsl_get_length(src->type); - - dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) - dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); - } - - return dest; -} - -static struct vtn_ssa_value * -vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_ssa_value *insert, const uint32_t *indices, - unsigned num_indices) -{ - struct vtn_ssa_value *dest = vtn_composite_copy(b, src); - - struct vtn_ssa_value *cur = dest; - unsigned i; - for (i = 0; i < num_indices - 1; i++) { - cur = cur->elems[indices[i]]; - } - - if (glsl_type_is_vector_or_scalar(cur->type)) { - /* According to the SPIR-V spec, OpCompositeInsert may work down to - * the component granularity. In that case, the last index will be - * the index to insert the scalar into the vector. - */ - - cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); - } else { - cur->elems[indices[i]] = insert; - } - - return dest; -} - -static struct vtn_ssa_value * -vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, - const uint32_t *indices, unsigned num_indices) -{ - struct vtn_ssa_value *cur = src; - for (unsigned i = 0; i < num_indices; i++) { - if (glsl_type_is_vector_or_scalar(cur->type)) { - assert(i == num_indices - 1); - /* According to the SPIR-V spec, OpCompositeExtract may work down to - * the component granularity. The last index will be the index of the - * vector to extract. - */ - - struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); - ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); - ret->def = vtn_vector_extract(b, cur->def, indices[i]); - return ret; - } else { - cur = cur->elems[indices[i]]; - } - } - - return cur; -} - -static void -vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_create_ssa_value(b, type); - - switch (opcode) { - case SpvOpVectorExtractDynamic: - val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def); - break; - - case SpvOpVectorInsertDynamic: - val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - vtn_ssa_value(b, w[5])->def); - break; - - case SpvOpVectorShuffle: - val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), - vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - w + 5); - break; - - case SpvOpCompositeConstruct: { - unsigned elems = count - 3; - if (glsl_type_is_vector_or_scalar(type)) { - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < elems; i++) - srcs[i] = vtn_ssa_value(b, w[3 + i])->def; - val->ssa->def = - vtn_vector_construct(b, glsl_get_vector_elements(type), - elems, srcs); - } else { - val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) - val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); - } - break; - } - case SpvOpCompositeExtract: - val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), - w + 4, count - 4); - break; - - case SpvOpCompositeInsert: - val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), - vtn_ssa_value(b, w[3]), - w + 5, count - 5); - break; - - case SpvOpCopyObject: - val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); - break; - - default: - unreachable("unknown composite operation"); - } -} - -static void -vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - nir_intrinsic_op intrinsic_op; - switch (opcode) { - case SpvOpEmitVertex: - case SpvOpEmitStreamVertex: - intrinsic_op = nir_intrinsic_emit_vertex; - break; - case SpvOpEndPrimitive: - case SpvOpEndStreamPrimitive: - intrinsic_op = nir_intrinsic_end_primitive; - break; - case SpvOpMemoryBarrier: - intrinsic_op = nir_intrinsic_memory_barrier; - break; - case SpvOpControlBarrier: - intrinsic_op = nir_intrinsic_barrier; - break; - default: - unreachable("unknown barrier instruction"); - } - - nir_intrinsic_instr *intrin = - nir_intrinsic_instr_create(b->shader, intrinsic_op); - - if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) - nir_intrinsic_set_stream_id(intrin, w[1]); - - nir_builder_instr_insert(&b->nb, &intrin->instr); -} - -static unsigned -gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) -{ - switch (mode) { - case SpvExecutionModeInputPoints: - case SpvExecutionModeOutputPoints: - return 0; /* GL_POINTS */ - case SpvExecutionModeInputLines: - return 1; /* GL_LINES */ - case SpvExecutionModeInputLinesAdjacency: - return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ - case SpvExecutionModeTriangles: - return 4; /* GL_TRIANGLES */ - case SpvExecutionModeInputTrianglesAdjacency: - return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ - case SpvExecutionModeQuads: - return 7; /* GL_QUADS */ - case SpvExecutionModeIsolines: - return 0x8E7A; /* GL_ISOLINES */ - case SpvExecutionModeOutputLineStrip: - return 3; /* GL_LINE_STRIP */ - case SpvExecutionModeOutputTriangleStrip: - return 5; /* GL_TRIANGLE_STRIP */ - default: - assert(!"Invalid primitive type"); - return 4; - } -} - -static unsigned -vertices_in_from_spv_execution_mode(SpvExecutionMode mode) -{ - switch (mode) { - case SpvExecutionModeInputPoints: - return 1; - case SpvExecutionModeInputLines: - return 2; - case SpvExecutionModeInputLinesAdjacency: - return 4; - case SpvExecutionModeTriangles: - return 3; - case SpvExecutionModeInputTrianglesAdjacency: - return 6; - default: - assert(!"Invalid GS input mode"); - return 0; - } -} - -static gl_shader_stage -stage_for_execution_model(SpvExecutionModel model) -{ - switch (model) { - case SpvExecutionModelVertex: - return MESA_SHADER_VERTEX; - case SpvExecutionModelTessellationControl: - return MESA_SHADER_TESS_CTRL; - case SpvExecutionModelTessellationEvaluation: - return MESA_SHADER_TESS_EVAL; - case SpvExecutionModelGeometry: - return MESA_SHADER_GEOMETRY; - case SpvExecutionModelFragment: - return MESA_SHADER_FRAGMENT; - case SpvExecutionModelGLCompute: - return MESA_SHADER_COMPUTE; - default: - unreachable("Unsupported execution model"); - } -} - -static bool -vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpSource: - case SpvOpSourceExtension: - case SpvOpSourceContinued: - case SpvOpExtension: - /* Unhandled, but these are for debug so that's ok. */ - break; - - case SpvOpCapability: { - SpvCapability cap = w[1]; - switch (cap) { - case SpvCapabilityMatrix: - case SpvCapabilityShader: - case SpvCapabilityGeometry: - case SpvCapabilityTessellationPointSize: - case SpvCapabilityGeometryPointSize: - case SpvCapabilityUniformBufferArrayDynamicIndexing: - case SpvCapabilitySampledImageArrayDynamicIndexing: - case SpvCapabilityStorageBufferArrayDynamicIndexing: - case SpvCapabilityStorageImageArrayDynamicIndexing: - case SpvCapabilityImageRect: - case SpvCapabilitySampledRect: - case SpvCapabilitySampled1D: - case SpvCapabilityImage1D: - case SpvCapabilitySampledCubeArray: - case SpvCapabilitySampledBuffer: - case SpvCapabilityImageBuffer: - case SpvCapabilityImageQuery: - break; - case SpvCapabilityClipDistance: - case SpvCapabilityCullDistance: - case SpvCapabilityGeometryStreams: - fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n"); - break; - default: - assert(!"Unsupported capability"); - } - break; - } - - case SpvOpExtInstImport: - vtn_handle_extension(b, opcode, w, count); - break; - - case SpvOpMemoryModel: - assert(w[1] == SpvAddressingModelLogical); - assert(w[2] == SpvMemoryModelGLSL450); - break; - - case SpvOpEntryPoint: { - struct vtn_value *entry_point = &b->values[w[2]]; - /* Let this be a name label regardless */ - unsigned name_words; - entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); - - if (strcmp(entry_point->name, b->entry_point_name) != 0 || - stage_for_execution_model(w[1]) != b->entry_point_stage) - break; - - assert(b->entry_point == NULL); - b->entry_point = entry_point; - break; - } - - case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string)->str = - vtn_string_literal(b, &w[2], count - 2, NULL); - break; - - case SpvOpName: - b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); - break; - - case SpvOpMemberName: - /* TODO */ - break; - - case SpvOpExecutionMode: - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - vtn_handle_decoration(b, opcode, w, count); - break; - - default: - return false; /* End of preamble */ - } - - return true; -} - -static void -vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, - const struct vtn_decoration *mode, void *data) -{ - assert(b->entry_point == entry_point); - - switch(mode->exec_mode) { - case SpvExecutionModeOriginUpperLeft: - case SpvExecutionModeOriginLowerLeft: - b->origin_upper_left = - (mode->exec_mode == SpvExecutionModeOriginUpperLeft); - break; - - case SpvExecutionModeEarlyFragmentTests: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.early_fragment_tests = true; - break; - - case SpvExecutionModeInvocations: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); - break; - - case SpvExecutionModeDepthReplacing: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; - break; - case SpvExecutionModeDepthGreater: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case SpvExecutionModeDepthLess: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; - break; - case SpvExecutionModeDepthUnchanged: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - - case SpvExecutionModeLocalSize: - assert(b->shader->stage == MESA_SHADER_COMPUTE); - b->shader->info.cs.local_size[0] = mode->literals[0]; - b->shader->info.cs.local_size[1] = mode->literals[1]; - b->shader->info.cs.local_size[2] = mode->literals[2]; - break; - case SpvExecutionModeLocalSizeHint: - break; /* Nothing do do with this */ - - case SpvExecutionModeOutputVertices: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.vertices_out = mode->literals[0]; - break; - - case SpvExecutionModeInputPoints: - case SpvExecutionModeInputLines: - case SpvExecutionModeInputLinesAdjacency: - case SpvExecutionModeTriangles: - case SpvExecutionModeInputTrianglesAdjacency: - case SpvExecutionModeQuads: - case SpvExecutionModeIsolines: - if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader->info.gs.vertices_in = - vertices_in_from_spv_execution_mode(mode->exec_mode); - } else { - assert(!"Tesselation shaders not yet supported"); - } - break; - - case SpvExecutionModeOutputPoints: - case SpvExecutionModeOutputLineStrip: - case SpvExecutionModeOutputTriangleStrip: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.output_primitive = - gl_primitive_from_spv_execution_mode(mode->exec_mode); - break; - - case SpvExecutionModeSpacingEqual: - case SpvExecutionModeSpacingFractionalEven: - case SpvExecutionModeSpacingFractionalOdd: - case SpvExecutionModeVertexOrderCw: - case SpvExecutionModeVertexOrderCcw: - case SpvExecutionModePointMode: - assert(!"TODO: Add tessellation metadata"); - break; - - case SpvExecutionModePixelCenterInteger: - case SpvExecutionModeXfb: - assert(!"Unhandled execution mode"); - break; - - case SpvExecutionModeVecTypeHint: - case SpvExecutionModeContractionOff: - break; /* OpenCL */ - } -} - -static bool -vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpSource: - case SpvOpSourceContinued: - case SpvOpSourceExtension: - case SpvOpExtension: - case SpvOpCapability: - case SpvOpExtInstImport: - case SpvOpMemoryModel: - case SpvOpEntryPoint: - case SpvOpExecutionMode: - case SpvOpString: - case SpvOpName: - case SpvOpMemberName: - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - assert(!"Invalid opcode types and variables section"); - break; - - case SpvOpTypeVoid: - case SpvOpTypeBool: - case SpvOpTypeInt: - case SpvOpTypeFloat: - case SpvOpTypeVector: - case SpvOpTypeMatrix: - case SpvOpTypeImage: - case SpvOpTypeSampler: - case SpvOpTypeSampledImage: - case SpvOpTypeArray: - case SpvOpTypeRuntimeArray: - case SpvOpTypeStruct: - case SpvOpTypeOpaque: - case SpvOpTypePointer: - case SpvOpTypeFunction: - case SpvOpTypeEvent: - case SpvOpTypeDeviceEvent: - case SpvOpTypeReserveId: - case SpvOpTypeQueue: - case SpvOpTypePipe: - vtn_handle_type(b, opcode, w, count); - break; - - case SpvOpConstantTrue: - case SpvOpConstantFalse: - case SpvOpConstant: - case SpvOpConstantComposite: - case SpvOpConstantSampler: - case SpvOpConstantNull: - case SpvOpSpecConstantTrue: - case SpvOpSpecConstantFalse: - case SpvOpSpecConstant: - case SpvOpSpecConstantComposite: - case SpvOpSpecConstantOp: - vtn_handle_constant(b, opcode, w, count); - break; - - case SpvOpVariable: - vtn_handle_variables(b, opcode, w, count); - break; - - default: - return false; /* End of preamble */ - } - - return true; -} - -static bool -vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpLabel: - break; - - case SpvOpLoopMerge: - case SpvOpSelectionMerge: - /* This is handled by cfg pre-pass and walk_blocks */ - break; - - case SpvOpUndef: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - break; - } - - case SpvOpExtInst: - vtn_handle_extension(b, opcode, w, count); - break; - - case SpvOpVariable: - case SpvOpLoad: - case SpvOpStore: - case SpvOpCopyMemory: - case SpvOpCopyMemorySized: - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: - case SpvOpArrayLength: - vtn_handle_variables(b, opcode, w, count); - break; - - case SpvOpFunctionCall: - vtn_handle_function_call(b, opcode, w, count); - break; - - case SpvOpSampledImage: - case SpvOpImage: - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - case SpvOpImageFetch: - case SpvOpImageGather: - case SpvOpImageDrefGather: - case SpvOpImageQuerySizeLod: - case SpvOpImageQueryLod: - case SpvOpImageQueryLevels: - case SpvOpImageQuerySamples: - vtn_handle_texture(b, opcode, w, count); - break; - - case SpvOpImageRead: - case SpvOpImageWrite: - case SpvOpImageTexelPointer: - vtn_handle_image(b, opcode, w, count); - break; - - case SpvOpImageQuerySize: { - struct vtn_access_chain *image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - if (glsl_type_is_image(image->var->var->interface_type)) { - vtn_handle_image(b, opcode, w, count); - } else { - vtn_handle_texture(b, opcode, w, count); - } - break; - } - - case SpvOpAtomicExchange: - case SpvOpAtomicCompareExchange: - case SpvOpAtomicCompareExchangeWeak: - case SpvOpAtomicIIncrement: - case SpvOpAtomicIDecrement: - case SpvOpAtomicIAdd: - case SpvOpAtomicISub: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: { - struct vtn_value *pointer = vtn_untyped_value(b, w[3]); - if (pointer->value_type == vtn_value_type_image_pointer) { - vtn_handle_image(b, opcode, w, count); - } else { - assert(pointer->value_type == vtn_value_type_access_chain); - vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); - } - break; - } - - case SpvOpSNegate: - case SpvOpFNegate: - case SpvOpNot: - case SpvOpAny: - case SpvOpAll: - case SpvOpConvertFToU: - case SpvOpConvertFToS: - case SpvOpConvertSToF: - case SpvOpConvertUToF: - case SpvOpUConvert: - case SpvOpSConvert: - case SpvOpFConvert: - case SpvOpQuantizeToF16: - case SpvOpConvertPtrToU: - case SpvOpConvertUToPtr: - case SpvOpPtrCastToGeneric: - case SpvOpGenericCastToPtr: - case SpvOpBitcast: - case SpvOpIsNan: - case SpvOpIsInf: - case SpvOpIsFinite: - case SpvOpIsNormal: - case SpvOpSignBitSet: - case SpvOpLessOrGreater: - case SpvOpOrdered: - case SpvOpUnordered: - case SpvOpIAdd: - case SpvOpFAdd: - case SpvOpISub: - case SpvOpFSub: - case SpvOpIMul: - case SpvOpFMul: - case SpvOpUDiv: - case SpvOpSDiv: - case SpvOpFDiv: - case SpvOpUMod: - case SpvOpSRem: - case SpvOpSMod: - case SpvOpFRem: - case SpvOpFMod: - case SpvOpVectorTimesScalar: - case SpvOpDot: - case SpvOpIAddCarry: - case SpvOpISubBorrow: - case SpvOpUMulExtended: - case SpvOpSMulExtended: - case SpvOpShiftRightLogical: - case SpvOpShiftRightArithmetic: - case SpvOpShiftLeftLogical: - case SpvOpLogicalEqual: - case SpvOpLogicalNotEqual: - case SpvOpLogicalOr: - case SpvOpLogicalAnd: - case SpvOpLogicalNot: - case SpvOpBitwiseOr: - case SpvOpBitwiseXor: - case SpvOpBitwiseAnd: - case SpvOpSelect: - case SpvOpIEqual: - case SpvOpFOrdEqual: - case SpvOpFUnordEqual: - case SpvOpINotEqual: - case SpvOpFOrdNotEqual: - case SpvOpFUnordNotEqual: - case SpvOpULessThan: - case SpvOpSLessThan: - case SpvOpFOrdLessThan: - case SpvOpFUnordLessThan: - case SpvOpUGreaterThan: - case SpvOpSGreaterThan: - case SpvOpFOrdGreaterThan: - case SpvOpFUnordGreaterThan: - case SpvOpULessThanEqual: - case SpvOpSLessThanEqual: - case SpvOpFOrdLessThanEqual: - case SpvOpFUnordLessThanEqual: - case SpvOpUGreaterThanEqual: - case SpvOpSGreaterThanEqual: - case SpvOpFOrdGreaterThanEqual: - case SpvOpFUnordGreaterThanEqual: - case SpvOpDPdx: - case SpvOpDPdy: - case SpvOpFwidth: - case SpvOpDPdxFine: - case SpvOpDPdyFine: - case SpvOpFwidthFine: - case SpvOpDPdxCoarse: - case SpvOpDPdyCoarse: - case SpvOpFwidthCoarse: - case SpvOpBitFieldInsert: - case SpvOpBitFieldSExtract: - case SpvOpBitFieldUExtract: - case SpvOpBitReverse: - case SpvOpBitCount: - case SpvOpTranspose: - case SpvOpOuterProduct: - case SpvOpMatrixTimesScalar: - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - vtn_handle_alu(b, opcode, w, count); - break; - - case SpvOpVectorExtractDynamic: - case SpvOpVectorInsertDynamic: - case SpvOpVectorShuffle: - case SpvOpCompositeConstruct: - case SpvOpCompositeExtract: - case SpvOpCompositeInsert: - case SpvOpCopyObject: - vtn_handle_composite(b, opcode, w, count); - break; - - case SpvOpEmitVertex: - case SpvOpEndPrimitive: - case SpvOpEmitStreamVertex: - case SpvOpEndStreamPrimitive: - case SpvOpControlBarrier: - case SpvOpMemoryBarrier: - vtn_handle_barrier(b, opcode, w, count); - break; - - default: - unreachable("Unhandled opcode"); - } - - return true; -} - -nir_function * -spirv_to_nir(const uint32_t *words, size_t word_count, - struct nir_spirv_specialization *spec, unsigned num_spec, - gl_shader_stage stage, const char *entry_point_name, - const nir_shader_compiler_options *options) -{ - const uint32_t *word_end = words + word_count; - - /* Handle the SPIR-V header (first 4 dwords) */ - assert(word_count > 5); - - assert(words[0] == SpvMagicNumber); - assert(words[1] >= 0x10000); - /* words[2] == generator magic */ - unsigned value_id_bound = words[3]; - assert(words[4] == 0); - - words+= 5; - - /* Initialize the stn_builder object */ - struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); - b->value_id_bound = value_id_bound; - b->values = rzalloc_array(b, struct vtn_value, value_id_bound); - exec_list_make_empty(&b->functions); - b->entry_point_stage = stage; - b->entry_point_name = entry_point_name; - - /* Handle all the preamble instructions */ - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_preamble_instruction); - - if (b->entry_point == NULL) { - assert(!"Entry point not found"); - ralloc_free(b); - return NULL; - } - - b->shader = nir_shader_create(NULL, stage, options); - - /* Set shader info defaults */ - b->shader->info.gs.invocations = 1; - - /* Parse execution modes */ - vtn_foreach_execution_mode(b, b->entry_point, - vtn_handle_execution_mode, NULL); - - b->specializations = spec; - b->num_specializations = num_spec; - - /* Handle all variable, type, and constant instructions */ - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_variable_or_type_instruction); - - vtn_build_cfg(b, words, word_end); - - foreach_list_typed(struct vtn_function, func, node, &b->functions) { - b->impl = func->impl; - b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - vtn_function_emit(b, func, vtn_handle_body_instruction); - } - - assert(b->entry_point->value_type == vtn_value_type_function); - nir_function *entry_point = b->entry_point->func->impl->function; - assert(entry_point); - - ralloc_free(b); - - return entry_point; -} diff --git a/src/compiler/nir/spirv/vtn_alu.c b/src/compiler/nir/spirv/vtn_alu.c deleted file mode 100644 index 8b9a63ce760..00000000000 --- a/src/compiler/nir/spirv/vtn_alu.c +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "vtn_private.h" - -/* - * Normally, column vectors in SPIR-V correspond to a single NIR SSA - * definition. But for matrix multiplies, we want to do one routine for - * multiplying a matrix by a matrix and then pretend that vectors are matrices - * with one column. So we "wrap" these things, and unwrap the result before we - * send it off. - */ - -static struct vtn_ssa_value * -wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) -{ - if (val == NULL) - return NULL; - - if (glsl_type_is_matrix(val->type)) - return val; - - struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); - dest->type = val->type; - dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); - dest->elems[0] = val; - - return dest; -} - -static struct vtn_ssa_value * -unwrap_matrix(struct vtn_ssa_value *val) -{ - if (glsl_type_is_matrix(val->type)) - return val; - - return val->elems[0]; -} - -static struct vtn_ssa_value * -matrix_multiply(struct vtn_builder *b, - struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) -{ - - struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); - struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); - struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); - struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); - - unsigned src0_rows = glsl_get_vector_elements(src0->type); - unsigned src0_columns = glsl_get_matrix_columns(src0->type); - unsigned src1_columns = glsl_get_matrix_columns(src1->type); - - const struct glsl_type *dest_type; - if (src1_columns > 1) { - dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), - src0_rows, src1_columns); - } else { - dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); - } - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); - - dest = wrap_matrix(b, dest); - - bool transpose_result = false; - if (src0_transpose && src1_transpose) { - /* transpose(A) * transpose(B) = transpose(B * A) */ - src1 = src0_transpose; - src0 = src1_transpose; - src0_transpose = NULL; - src1_transpose = NULL; - transpose_result = true; - } - - if (src0_transpose && !src1_transpose && - glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { - /* We already have the rows of src0 and the columns of src1 available, - * so we can just take the dot product of each row with each column to - * get the result. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - nir_ssa_def *vec_src[4]; - for (unsigned j = 0; j < src0_rows; j++) { - vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, - src1->elems[i]->def); - } - dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); - } - } else { - /* We don't handle the case where src1 is transposed but not src0, since - * the general case only uses individual components of src1 so the - * optimizer should chew through the transpose we emitted for src1. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ - dest->elems[i]->def = - nir_fmul(&b->nb, src0->elems[0]->def, - nir_channel(&b->nb, src1->elems[i]->def, 0)); - for (unsigned j = 1; j < src0_columns; j++) { - dest->elems[i]->def = - nir_fadd(&b->nb, dest->elems[i]->def, - nir_fmul(&b->nb, src0->elems[j]->def, - nir_channel(&b->nb, src1->elems[i]->def, j))); - } - } - } - - dest = unwrap_matrix(dest); - - if (transpose_result) - dest = vtn_ssa_transpose(b, dest); - - return dest; -} - -static struct vtn_ssa_value * -mat_times_scalar(struct vtn_builder *b, - struct vtn_ssa_value *mat, - nir_ssa_def *scalar) -{ - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); - for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { - if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) - dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); - else - dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); - } - - return dest; -} - -static void -vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, - struct vtn_value *dest, - struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) -{ - switch (opcode) { - case SpvOpFNegate: { - dest->ssa = vtn_create_ssa_value(b, src0->type); - unsigned cols = glsl_get_matrix_columns(src0->type); - for (unsigned i = 0; i < cols; i++) - dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); - break; - } - - case SpvOpFAdd: { - dest->ssa = vtn_create_ssa_value(b, src0->type); - unsigned cols = glsl_get_matrix_columns(src0->type); - for (unsigned i = 0; i < cols; i++) - dest->ssa->elems[i]->def = - nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); - break; - } - - case SpvOpFSub: { - dest->ssa = vtn_create_ssa_value(b, src0->type); - unsigned cols = glsl_get_matrix_columns(src0->type); - for (unsigned i = 0; i < cols; i++) - dest->ssa->elems[i]->def = - nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); - break; - } - - case SpvOpTranspose: - dest->ssa = vtn_ssa_transpose(b, src0); - break; - - case SpvOpMatrixTimesScalar: - if (src0->transposed) { - dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, - src1->def)); - } else { - dest->ssa = mat_times_scalar(b, src0, src1->def); - } - break; - - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - if (opcode == SpvOpVectorTimesMatrix) { - dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); - } else { - dest->ssa = matrix_multiply(b, src0, src1); - } - break; - - default: unreachable("unknown matrix opcode"); - } -} - -nir_op -vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) -{ - /* Indicates that the first two arguments should be swapped. This is - * used for implementing greater-than and less-than-or-equal. - */ - *swap = false; - - switch (opcode) { - case SpvOpSNegate: return nir_op_ineg; - case SpvOpFNegate: return nir_op_fneg; - case SpvOpNot: return nir_op_inot; - case SpvOpIAdd: return nir_op_iadd; - case SpvOpFAdd: return nir_op_fadd; - case SpvOpISub: return nir_op_isub; - case SpvOpFSub: return nir_op_fsub; - case SpvOpIMul: return nir_op_imul; - case SpvOpFMul: return nir_op_fmul; - case SpvOpUDiv: return nir_op_udiv; - case SpvOpSDiv: return nir_op_idiv; - case SpvOpFDiv: return nir_op_fdiv; - case SpvOpUMod: return nir_op_umod; - case SpvOpSMod: return nir_op_imod; - case SpvOpFMod: return nir_op_fmod; - case SpvOpSRem: return nir_op_irem; - case SpvOpFRem: return nir_op_frem; - - case SpvOpShiftRightLogical: return nir_op_ushr; - case SpvOpShiftRightArithmetic: return nir_op_ishr; - case SpvOpShiftLeftLogical: return nir_op_ishl; - case SpvOpLogicalOr: return nir_op_ior; - case SpvOpLogicalEqual: return nir_op_ieq; - case SpvOpLogicalNotEqual: return nir_op_ine; - case SpvOpLogicalAnd: return nir_op_iand; - case SpvOpLogicalNot: return nir_op_inot; - case SpvOpBitwiseOr: return nir_op_ior; - case SpvOpBitwiseXor: return nir_op_ixor; - case SpvOpBitwiseAnd: return nir_op_iand; - case SpvOpSelect: return nir_op_bcsel; - case SpvOpIEqual: return nir_op_ieq; - - case SpvOpBitFieldInsert: return nir_op_bitfield_insert; - case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; - case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; - case SpvOpBitReverse: return nir_op_bitfield_reverse; - case SpvOpBitCount: return nir_op_bit_count; - - /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ - case SpvOpFOrdEqual: return nir_op_feq; - case SpvOpFUnordEqual: return nir_op_feq; - case SpvOpINotEqual: return nir_op_ine; - case SpvOpFOrdNotEqual: return nir_op_fne; - case SpvOpFUnordNotEqual: return nir_op_fne; - case SpvOpULessThan: return nir_op_ult; - case SpvOpSLessThan: return nir_op_ilt; - case SpvOpFOrdLessThan: return nir_op_flt; - case SpvOpFUnordLessThan: return nir_op_flt; - case SpvOpUGreaterThan: *swap = true; return nir_op_ult; - case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; - case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; - case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; - case SpvOpULessThanEqual: *swap = true; return nir_op_uge; - case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; - case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; - case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; - case SpvOpUGreaterThanEqual: return nir_op_uge; - case SpvOpSGreaterThanEqual: return nir_op_ige; - case SpvOpFOrdGreaterThanEqual: return nir_op_fge; - case SpvOpFUnordGreaterThanEqual: return nir_op_fge; - - /* Conversions: */ - case SpvOpConvertFToU: return nir_op_f2u; - case SpvOpConvertFToS: return nir_op_f2i; - case SpvOpConvertSToF: return nir_op_i2f; - case SpvOpConvertUToF: return nir_op_u2f; - case SpvOpBitcast: return nir_op_imov; - case SpvOpUConvert: - case SpvOpQuantizeToF16: return nir_op_fquantize2f16; - /* TODO: NIR is 32-bit only; these are no-ops. */ - case SpvOpSConvert: return nir_op_imov; - case SpvOpFConvert: return nir_op_fmov; - - /* Derivatives: */ - case SpvOpDPdx: return nir_op_fddx; - case SpvOpDPdy: return nir_op_fddy; - case SpvOpDPdxFine: return nir_op_fddx_fine; - case SpvOpDPdyFine: return nir_op_fddy_fine; - case SpvOpDPdxCoarse: return nir_op_fddx_coarse; - case SpvOpDPdyCoarse: return nir_op_fddy_coarse; - - default: - unreachable("No NIR equivalent"); - } -} - -static void -handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *_void) -{ - assert(dec->scope == VTN_DEC_DECORATION); - if (dec->decoration != SpvDecorationNoContraction) - return; - - b->nb.exact = true; -} - -void -vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - - vtn_foreach_decoration(b, val, handle_no_contraction, NULL); - - /* Collect the various SSA sources */ - const unsigned num_inputs = count - 3; - struct vtn_ssa_value *vtn_src[4] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) - vtn_src[i] = vtn_ssa_value(b, w[i + 3]); - - if (glsl_type_is_matrix(vtn_src[0]->type) || - (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { - vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); - b->nb.exact = false; - return; - } - - val->ssa = vtn_create_ssa_value(b, type); - nir_ssa_def *src[4] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) { - assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); - src[i] = vtn_src[i]->def; - } - - switch (opcode) { - case SpvOpAny: - if (src[0]->num_components == 1) { - val->ssa->def = nir_imov(&b->nb, src[0]); - } else { - nir_op op; - switch (src[0]->num_components) { - case 2: op = nir_op_bany_inequal2; break; - case 3: op = nir_op_bany_inequal3; break; - case 4: op = nir_op_bany_inequal4; break; - } - val->ssa->def = nir_build_alu(&b->nb, op, src[0], - nir_imm_int(&b->nb, NIR_FALSE), - NULL, NULL); - } - break; - - case SpvOpAll: - if (src[0]->num_components == 1) { - val->ssa->def = nir_imov(&b->nb, src[0]); - } else { - nir_op op; - switch (src[0]->num_components) { - case 2: op = nir_op_ball_iequal2; break; - case 3: op = nir_op_ball_iequal3; break; - case 4: op = nir_op_ball_iequal4; break; - } - val->ssa->def = nir_build_alu(&b->nb, op, src[0], - nir_imm_int(&b->nb, NIR_TRUE), - NULL, NULL); - } - break; - - case SpvOpOuterProduct: { - for (unsigned i = 0; i < src[1]->num_components; i++) { - val->ssa->elems[i]->def = - nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); - } - break; - } - - case SpvOpDot: - val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); - break; - - case SpvOpIAddCarry: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); - break; - - case SpvOpISubBorrow: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); - break; - - case SpvOpUMulExtended: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); - break; - - case SpvOpSMulExtended: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); - break; - - case SpvOpFwidth: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); - break; - case SpvOpFwidthFine: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); - break; - case SpvOpFwidthCoarse: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); - break; - - case SpvOpVectorTimesScalar: - /* The builder will take care of splatting for us. */ - val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); - break; - - case SpvOpIsNan: - val->ssa->def = nir_fne(&b->nb, src[0], src[0]); - break; - - case SpvOpIsInf: - val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), - nir_imm_float(&b->nb, INFINITY)); - break; - - default: { - bool swap; - nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); - - if (swap) { - nir_ssa_def *tmp = src[0]; - src[0] = src[1]; - src[1] = tmp; - } - - val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); - break; - } /* default */ - } - - b->nb.exact = false; -} diff --git a/src/compiler/nir/spirv/vtn_cfg.c b/src/compiler/nir/spirv/vtn_cfg.c deleted file mode 100644 index 6a43ef8b2dd..00000000000 --- a/src/compiler/nir/spirv/vtn_cfg.c +++ /dev/null @@ -1,778 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "vtn_private.h" -#include "nir/nir_vla.h" - -static bool -vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpFunction: { - assert(b->func == NULL); - b->func = rzalloc(b, struct vtn_function); - - list_inithead(&b->func->body); - b->func->control = w[3]; - - const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); - val->func = b->func; - - const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type->type; - - assert(glsl_get_function_return_type(func_type) == result_type); - - nir_function *func = - nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); - - func->num_params = glsl_get_length(func_type); - func->params = ralloc_array(b->shader, nir_parameter, func->num_params); - for (unsigned i = 0; i < func->num_params; i++) { - const struct glsl_function_param *param = - glsl_get_function_param(func_type, i); - func->params[i].type = param->type; - if (param->in) { - if (param->out) { - func->params[i].param_type = nir_parameter_inout; - } else { - func->params[i].param_type = nir_parameter_in; - } - } else { - if (param->out) { - func->params[i].param_type = nir_parameter_out; - } else { - assert(!"Parameter is neither in nor out"); - } - } - } - - func->return_type = glsl_get_function_return_type(func_type); - - b->func->impl = nir_function_impl_create(func); - - b->func_param_idx = 0; - break; - } - - case SpvOpFunctionEnd: - b->func->end = w; - b->func = NULL; - break; - - case SpvOpFunctionParameter: { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - - assert(b->func_param_idx < b->func->impl->num_params); - nir_variable *param = b->func->impl->params[b->func_param_idx++]; - - assert(param->type == type->type); - - /* Name the parameter so it shows up nicely in NIR */ - param->name = ralloc_strdup(param, val->name); - - struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); - vtn_var->type = type; - vtn_var->var = param; - vtn_var->chain.var = vtn_var; - vtn_var->chain.length = 0; - - struct vtn_type *without_array = type; - while(glsl_type_is_array(without_array->type)) - without_array = without_array->array_element; - - if (glsl_type_is_image(without_array->type)) { - vtn_var->mode = vtn_variable_mode_image; - param->interface_type = without_array->type; - } else if (glsl_type_is_sampler(without_array->type)) { - vtn_var->mode = vtn_variable_mode_sampler; - param->interface_type = without_array->type; - } else { - vtn_var->mode = vtn_variable_mode_param; - } - - val->access_chain = &vtn_var->chain; - break; - } - - case SpvOpLabel: { - assert(b->block == NULL); - b->block = rzalloc(b, struct vtn_block); - b->block->node.type = vtn_cf_node_type_block; - b->block->label = w; - vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; - - if (b->func->start_block == NULL) { - /* This is the first block encountered for this function. In this - * case, we set the start block and add it to the list of - * implemented functions that we'll walk later. - */ - b->func->start_block = b->block; - exec_list_push_tail(&b->functions, &b->func->node); - } - break; - } - - case SpvOpSelectionMerge: - case SpvOpLoopMerge: - assert(b->block && b->block->merge == NULL); - b->block->merge = w; - break; - - case SpvOpBranch: - case SpvOpBranchConditional: - case SpvOpSwitch: - case SpvOpKill: - case SpvOpReturn: - case SpvOpReturnValue: - case SpvOpUnreachable: - assert(b->block && b->block->branch == NULL); - b->block->branch = w; - b->block = NULL; - break; - - default: - /* Continue on as per normal */ - return true; - } - - return true; -} - -static void -vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, - struct vtn_block *break_block, - uint32_t block_id, uint32_t val, bool is_default) -{ - struct vtn_block *case_block = - vtn_value(b, block_id, vtn_value_type_block)->block; - - /* Don't create dummy cases that just break */ - if (case_block == break_block) - return; - - if (case_block->switch_case == NULL) { - struct vtn_case *c = ralloc(b, struct vtn_case); - - list_inithead(&c->body); - c->start_block = case_block; - c->fallthrough = NULL; - nir_array_init(&c->values, b); - c->is_default = false; - c->visited = false; - - list_addtail(&c->link, &swtch->cases); - - case_block->switch_case = c; - } - - if (is_default) { - case_block->switch_case->is_default = true; - } else { - nir_array_add(&case_block->switch_case->values, uint32_t, val); - } -} - -/* This function performs a depth-first search of the cases and puts them - * in fall-through order. - */ -static void -vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) -{ - if (cse->visited) - return; - - cse->visited = true; - - list_del(&cse->link); - - if (cse->fallthrough) { - vtn_order_case(swtch, cse->fallthrough); - - /* If we have a fall-through, place this case right before the case it - * falls through to. This ensures that fallthroughs come one after - * the other. These two can never get separated because that would - * imply something else falling through to the same case. Also, this - * can't break ordering because the DFS ensures that this case is - * visited before anything that falls through to it. - */ - list_addtail(&cse->link, &cse->fallthrough->link); - } else { - list_add(&cse->link, &swtch->cases); - } -} - -static enum vtn_branch_type -vtn_get_branch_type(struct vtn_block *block, - struct vtn_case *swcase, struct vtn_block *switch_break, - struct vtn_block *loop_break, struct vtn_block *loop_cont) -{ - if (block->switch_case) { - /* This branch is actually a fallthrough */ - assert(swcase->fallthrough == NULL || - swcase->fallthrough == block->switch_case); - swcase->fallthrough = block->switch_case; - return vtn_branch_type_switch_fallthrough; - } else if (block == switch_break) { - return vtn_branch_type_switch_break; - } else if (block == loop_break) { - return vtn_branch_type_loop_break; - } else if (block == loop_cont) { - return vtn_branch_type_loop_continue; - } else { - return vtn_branch_type_none; - } -} - -static void -vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, - struct vtn_block *start, struct vtn_case *switch_case, - struct vtn_block *switch_break, - struct vtn_block *loop_break, struct vtn_block *loop_cont, - struct vtn_block *end) -{ - struct vtn_block *block = start; - while (block != end) { - if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && - !block->loop) { - struct vtn_loop *loop = ralloc(b, struct vtn_loop); - - loop->node.type = vtn_cf_node_type_loop; - list_inithead(&loop->body); - list_inithead(&loop->cont_body); - loop->control = block->merge[3]; - - list_addtail(&loop->node.link, cf_list); - block->loop = loop; - - struct vtn_block *new_loop_break = - vtn_value(b, block->merge[1], vtn_value_type_block)->block; - struct vtn_block *new_loop_cont = - vtn_value(b, block->merge[2], vtn_value_type_block)->block; - - /* Note: This recursive call will start with the current block as - * its start block. If we weren't careful, we would get here - * again and end up in infinite recursion. This is why we set - * block->loop above and check for it before creating one. This - * way, we only create the loop once and the second call that - * tries to handle this loop goes to the cases below and gets - * handled as a regular block. - * - * Note: When we make the recursive walk calls, we pass NULL for - * the switch break since you have to break out of the loop first. - * We do, however, still pass the current switch case because it's - * possible that the merge block for the loop is the start of - * another case. - */ - vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, - new_loop_break, new_loop_cont, NULL ); - vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, - new_loop_break, NULL, block); - - block = new_loop_break; - continue; - } - - assert(block->node.link.next == NULL); - list_addtail(&block->node.link, cf_list); - - switch (*block->branch & SpvOpCodeMask) { - case SpvOpBranch: { - struct vtn_block *branch_block = - vtn_value(b, block->branch[1], vtn_value_type_block)->block; - - block->branch_type = vtn_get_branch_type(branch_block, - switch_case, switch_break, - loop_break, loop_cont); - - if (block->branch_type != vtn_branch_type_none) - return; - - block = branch_block; - continue; - } - - case SpvOpReturn: - case SpvOpReturnValue: - block->branch_type = vtn_branch_type_return; - return; - - case SpvOpKill: - block->branch_type = vtn_branch_type_discard; - return; - - case SpvOpBranchConditional: { - struct vtn_block *then_block = - vtn_value(b, block->branch[2], vtn_value_type_block)->block; - struct vtn_block *else_block = - vtn_value(b, block->branch[3], vtn_value_type_block)->block; - - struct vtn_if *if_stmt = ralloc(b, struct vtn_if); - - if_stmt->node.type = vtn_cf_node_type_if; - if_stmt->condition = block->branch[1]; - list_inithead(&if_stmt->then_body); - list_inithead(&if_stmt->else_body); - - list_addtail(&if_stmt->node.link, cf_list); - - if (block->merge && - (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { - if_stmt->control = block->merge[2]; - } - - if_stmt->then_type = vtn_get_branch_type(then_block, - switch_case, switch_break, - loop_break, loop_cont); - if_stmt->else_type = vtn_get_branch_type(else_block, - switch_case, switch_break, - loop_break, loop_cont); - - if (if_stmt->then_type == vtn_branch_type_none && - if_stmt->else_type == vtn_branch_type_none) { - /* Neither side of the if is something we can short-circuit. */ - assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); - struct vtn_block *merge_block = - vtn_value(b, block->merge[1], vtn_value_type_block)->block; - - vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, - switch_case, switch_break, - loop_break, loop_cont, merge_block); - vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, - switch_case, switch_break, - loop_break, loop_cont, merge_block); - - enum vtn_branch_type merge_type = - vtn_get_branch_type(merge_block, switch_case, switch_break, - loop_break, loop_cont); - if (merge_type == vtn_branch_type_none) { - block = merge_block; - continue; - } else { - return; - } - } else if (if_stmt->then_type != vtn_branch_type_none && - if_stmt->else_type != vtn_branch_type_none) { - /* Both sides were short-circuited. We're done here. */ - return; - } else { - /* Exeactly one side of the branch could be short-circuited. - * We set the branch up as a predicated break/continue and we - * continue on with the other side as if it were what comes - * after the if. - */ - if (if_stmt->then_type == vtn_branch_type_none) { - block = then_block; - } else { - block = else_block; - } - continue; - } - unreachable("Should have returned or continued"); - } - - case SpvOpSwitch: { - assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); - struct vtn_block *break_block = - vtn_value(b, block->merge[1], vtn_value_type_block)->block; - - struct vtn_switch *swtch = ralloc(b, struct vtn_switch); - - swtch->node.type = vtn_cf_node_type_switch; - swtch->selector = block->branch[1]; - list_inithead(&swtch->cases); - - list_addtail(&swtch->node.link, cf_list); - - /* First, we go through and record all of the cases. */ - const uint32_t *branch_end = - block->branch + (block->branch[0] >> SpvWordCountShift); - - vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); - for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) - vtn_add_case(b, swtch, break_block, w[1], w[0], false); - - /* Now, we go through and walk the blocks. While we walk through - * the blocks, we also gather the much-needed fall-through - * information. - */ - list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { - assert(cse->start_block != break_block); - vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, - break_block, NULL, loop_cont, NULL); - } - - /* Finally, we walk over all of the cases one more time and put - * them in fall-through order. - */ - for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { - struct vtn_block *case_block = - vtn_value(b, *w, vtn_value_type_block)->block; - - if (case_block == break_block) - continue; - - assert(case_block->switch_case); - - vtn_order_case(swtch, case_block->switch_case); - } - - block = break_block; - continue; - } - - case SpvOpUnreachable: - return; - - default: - unreachable("Unhandled opcode"); - } - } -} - -void -vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) -{ - vtn_foreach_instruction(b, words, end, - vtn_cfg_handle_prepass_instruction); - - foreach_list_typed(struct vtn_function, func, node, &b->functions) { - vtn_cfg_walk_blocks(b, &func->body, func->start_block, - NULL, NULL, NULL, NULL, NULL); - } -} - -static bool -vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpLabel) - return true; /* Nothing to do */ - - /* If this isn't a phi node, stop. */ - if (opcode != SpvOpPhi) - return false; - - /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. - * For each phi, we create a variable with the appropreate type and - * do a load from that variable. Then, in a second pass, we add - * stores to that variable to each of the predecessor blocks. - * - * We could do something more intelligent here. However, in order to - * handle loops and things properly, we really need dominance - * information. It would end up basically being the into-SSA - * algorithm all over again. It's easier if we just let - * lower_vars_to_ssa do that for us instead of repeating it here. - */ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_variable *phi_var = - nir_local_variable_create(b->nb.impl, type->type, "phi"); - _mesa_hash_table_insert(b->phi_table, w, phi_var); - - val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); - - return true; -} - -static bool -vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode != SpvOpPhi) - return true; - - struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); - assert(phi_entry); - nir_variable *phi_var = phi_entry->data; - - for (unsigned i = 3; i < count; i += 2) { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); - struct vtn_block *pred = - vtn_value(b, w[i + 1], vtn_value_type_block)->block; - - b->nb.cursor = nir_after_block_before_jump(pred->end_block); - - vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); - } - - return true; -} - -static void -vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, - nir_variable *switch_fall_var, bool *has_switch_break) -{ - switch (branch_type) { - case vtn_branch_type_switch_break: - nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); - *has_switch_break = true; - break; - case vtn_branch_type_switch_fallthrough: - break; /* Nothing to do */ - case vtn_branch_type_loop_break: - nir_jump(&b->nb, nir_jump_break); - break; - case vtn_branch_type_loop_continue: - nir_jump(&b->nb, nir_jump_continue); - break; - case vtn_branch_type_return: - nir_jump(&b->nb, nir_jump_return); - break; - case vtn_branch_type_discard: { - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); - nir_builder_instr_insert(&b->nb, &discard->instr); - break; - } - default: - unreachable("Invalid branch type"); - } -} - -static void -vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, - nir_variable *switch_fall_var, bool *has_switch_break, - vtn_instruction_handler handler) -{ - list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { - switch (node->type) { - case vtn_cf_node_type_block: { - struct vtn_block *block = (struct vtn_block *)node; - - const uint32_t *block_start = block->label; - const uint32_t *block_end = block->merge ? block->merge : - block->branch; - - block_start = vtn_foreach_instruction(b, block_start, block_end, - vtn_handle_phis_first_pass); - - vtn_foreach_instruction(b, block_start, block_end, handler); - - block->end_block = nir_cursor_current_block(b->nb.cursor); - - if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { - struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); - vtn_local_store(b, src, - nir_deref_var_create(b, b->impl->return_var)); - } - - if (block->branch_type != vtn_branch_type_none) { - vtn_emit_branch(b, block->branch_type, - switch_fall_var, has_switch_break); - } - - break; - } - - case vtn_cf_node_type_if: { - struct vtn_if *vtn_if = (struct vtn_if *)node; - - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = - nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); - nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); - - bool sw_break = false; - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - if (vtn_if->then_type == vtn_branch_type_none) { - vtn_emit_cf_list(b, &vtn_if->then_body, - switch_fall_var, &sw_break, handler); - } else { - vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); - } - - b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); - if (vtn_if->else_type == vtn_branch_type_none) { - vtn_emit_cf_list(b, &vtn_if->else_body, - switch_fall_var, &sw_break, handler); - } else { - vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); - } - - b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); - - /* If we encountered a switch break somewhere inside of the if, - * then it would have been handled correctly by calling - * emit_cf_list or emit_branch for the interrior. However, we - * need to predicate everything following on wether or not we're - * still going. - */ - if (sw_break) { - *has_switch_break = true; - - nir_if *switch_if = nir_if_create(b->shader); - switch_if->condition = - nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); - nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - } - break; - } - - case vtn_cf_node_type_loop: { - struct vtn_loop *vtn_loop = (struct vtn_loop *)node; - - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert(b->nb.cursor, &loop->cf_node); - - b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); - - if (!list_empty(&vtn_loop->cont_body)) { - /* If we have a non-trivial continue body then we need to put - * it at the beginning of the loop with a flag to ensure that - * it doesn't get executed in the first iteration. - */ - nir_variable *do_cont = - nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); - - b->nb.cursor = nir_before_cf_node(&loop->cf_node); - nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); - - b->nb.cursor = nir_before_cf_list(&loop->body); - nir_if *cont_if = nir_if_create(b->shader); - cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); - nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); - - b->nb.cursor = nir_after_cf_list(&cont_if->then_list); - vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); - - b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); - nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); - - b->has_loop_continue = true; - } - - b->nb.cursor = nir_after_cf_node(&loop->cf_node); - break; - } - - case vtn_cf_node_type_switch: { - struct vtn_switch *vtn_switch = (struct vtn_switch *)node; - - /* First, we create a variable to keep track of whether or not the - * switch is still going at any given point. Any switch breaks - * will set this variable to false. - */ - nir_variable *fall_var = - nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); - nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); - - /* Next, we gather up all of the conditions. We have to do this - * up-front because we also need to build an "any" condition so - * that we can use !any for default. - */ - const int num_cases = list_length(&vtn_switch->cases); - NIR_VLA(nir_ssa_def *, conditions, num_cases); - - nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; - /* An accumulation of all conditions. Used for the default */ - nir_ssa_def *any = NULL; - - int i = 0; - list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { - if (cse->is_default) { - conditions[i++] = NULL; - continue; - } - - nir_ssa_def *cond = NULL; - nir_array_foreach(&cse->values, uint32_t, val) { - nir_ssa_def *is_val = - nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); - - cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; - } - - any = any ? nir_ior(&b->nb, any, cond) : cond; - conditions[i++] = cond; - } - assert(i == num_cases); - - /* Now we can walk the list of cases and actually emit code */ - i = 0; - list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { - /* Figure out the condition */ - nir_ssa_def *cond = conditions[i++]; - if (cse->is_default) { - assert(cond == NULL); - cond = nir_inot(&b->nb, any); - } - /* Take fallthrough into account */ - cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); - - nir_if *case_if = nir_if_create(b->nb.shader); - case_if->condition = nir_src_for_ssa(cond); - nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); - - bool has_break = false; - b->nb.cursor = nir_after_cf_list(&case_if->then_list); - nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); - vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); - (void)has_break; /* We don't care */ - - b->nb.cursor = nir_after_cf_node(&case_if->cf_node); - } - assert(i == num_cases); - - break; - } - - default: - unreachable("Invalid CF node type"); - } - } -} - -void -vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, - vtn_instruction_handler instruction_handler) -{ - nir_builder_init(&b->nb, func->impl); - b->nb.cursor = nir_after_cf_list(&func->impl->body); - b->has_loop_continue = false; - b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); - - vtn_foreach_instruction(b, func->start_block->label, func->end, - vtn_handle_phi_second_pass); - - /* Continue blocks for loops get inserted before the body of the loop - * but instructions in the continue may use SSA defs in the loop body. - * Therefore, we need to repair SSA to insert the needed phi nodes. - */ - if (b->has_loop_continue) - nir_repair_ssa_impl(func->impl); -} diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c deleted file mode 100644 index e05d28ffede..00000000000 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ /dev/null @@ -1,666 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "vtn_private.h" -#include "GLSL.std.450.h" - -#define M_PIf ((float) M_PI) -#define M_PI_2f ((float) M_PI_2) -#define M_PI_4f ((float) M_PI_4) - -static nir_ssa_def * -build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) -{ - unsigned swiz[4] = {1, 0, 0, 0}; - nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); - return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); -} - -static nir_ssa_def * -build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) -{ - unsigned yzx[4] = {1, 2, 0, 0}; - unsigned zxy[4] = {2, 0, 1, 0}; - - nir_ssa_def *prod0 = - nir_fmul(b, col[0], - nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), - nir_swizzle(b, col[2], zxy, 3, true))); - nir_ssa_def *prod1 = - nir_fmul(b, col[0], - nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), - nir_swizzle(b, col[2], yzx, 3, true))); - - nir_ssa_def *diff = nir_fsub(b, prod0, prod1); - - return nir_fadd(b, nir_channel(b, diff, 0), - nir_fadd(b, nir_channel(b, diff, 1), - nir_channel(b, diff, 2))); -} - -static nir_ssa_def * -build_mat4_det(nir_builder *b, nir_ssa_def **col) -{ - nir_ssa_def *subdet[4]; - for (unsigned i = 0; i < 4; i++) { - unsigned swiz[3]; - for (unsigned j = 0; j < 3; j++) - swiz[j] = j + (j >= i); - - nir_ssa_def *subcol[3]; - subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); - subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); - subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); - - subdet[i] = build_mat3_det(b, subcol); - } - - nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); - - return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), - nir_channel(b, prod, 1)), - nir_fsub(b, nir_channel(b, prod, 2), - nir_channel(b, prod, 3))); -} - -static nir_ssa_def * -build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - unsigned size = glsl_get_vector_elements(src->type); - - nir_ssa_def *cols[4]; - for (unsigned i = 0; i < size; i++) - cols[i] = src->elems[i]->def; - - switch(size) { - case 2: return build_mat2_det(&b->nb, cols); - case 3: return build_mat3_det(&b->nb, cols); - case 4: return build_mat4_det(&b->nb, cols); - default: - unreachable("Invalid matrix size"); - } -} - -/* Computes the determinate of the submatrix given by taking src and - * removing the specified row and column. - */ -static nir_ssa_def * -build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, - unsigned size, unsigned row, unsigned col) -{ - assert(row < size && col < size); - if (size == 2) { - return nir_channel(b, src->elems[1 - col]->def, 1 - row); - } else { - /* Swizzle to get all but the specified row */ - unsigned swiz[3]; - for (unsigned j = 0; j < 3; j++) - swiz[j] = j + (j >= row); - - /* Grab all but the specified column */ - nir_ssa_def *subcol[3]; - for (unsigned j = 0; j < size; j++) { - if (j != col) { - subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, - swiz, size - 1, true); - } - } - - if (size == 3) { - return build_mat2_det(b, subcol); - } else { - assert(size == 4); - return build_mat3_det(b, subcol); - } - } -} - -static struct vtn_ssa_value * -matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - nir_ssa_def *adj_col[4]; - unsigned size = glsl_get_vector_elements(src->type); - - /* Build up an adjugate matrix */ - for (unsigned c = 0; c < size; c++) { - nir_ssa_def *elem[4]; - for (unsigned r = 0; r < size; r++) { - elem[r] = build_mat_subdet(&b->nb, src, size, c, r); - - if ((r + c) % 2) - elem[r] = nir_fneg(&b->nb, elem[r]); - } - - adj_col[c] = nir_vec(&b->nb, elem, size); - } - - nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); - - struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); - for (unsigned i = 0; i < size; i++) - val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); - - return val; -} - -static nir_ssa_def* -build_length(nir_builder *b, nir_ssa_def *vec) -{ - switch (vec->num_components) { - case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); - case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); - case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); - case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); - default: - unreachable("Invalid number of components"); - } -} - -static inline nir_ssa_def * -build_fclamp(nir_builder *b, - nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) -{ - return nir_fmin(b, nir_fmax(b, x, min_val), max_val); -} - -/** - * Return e^x. - */ -static nir_ssa_def * -build_exp(nir_builder *b, nir_ssa_def *x) -{ - return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); -} - -/** - * Return ln(x) - the natural logarithm of x. - */ -static nir_ssa_def * -build_log(nir_builder *b, nir_ssa_def *x) -{ - return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); -} - -/** - * Approximate asin(x) by the formula: - * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1)))) - * - * which is correct to first order at x=0 and x=±1 regardless of the p - * coefficients but can be made second-order correct at both ends by selecting - * the fit coefficients appropriately. Different p coefficients can be used - * in the asin and acos implementation to minimize some relative error metric - * in each case. - */ -static nir_ssa_def * -build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1) -{ - nir_ssa_def *abs_x = nir_fabs(b, x); - return nir_fmul(b, nir_fsign(b, x), - nir_fsub(b, nir_imm_float(b, M_PI_2f), - nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), - nir_fadd(b, nir_imm_float(b, M_PI_2f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, p0), - nir_fmul(b, abs_x, - nir_imm_float(b, p1)))))))))); -} - -/** - * Compute xs[0] + xs[1] + xs[2] + ... using fadd. - */ -static nir_ssa_def * -build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) -{ - nir_ssa_def *accum = xs[0]; - - for (int i = 1; i < terms; i++) - accum = nir_fadd(b, accum, xs[i]); - - return accum; -} - -static nir_ssa_def * -build_atan(nir_builder *b, nir_ssa_def *y_over_x) -{ - nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); - nir_ssa_def *one = nir_imm_float(b, 1.0f); - - /* - * range-reduction, first step: - * - * / y_over_x if |y_over_x| <= 1.0; - * x = < - * \ 1.0 / y_over_x otherwise - */ - nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), - nir_fmax(b, abs_y_over_x, one)); - - /* - * approximate atan by evaluating polynomial: - * - * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + - * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + - * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 - */ - nir_ssa_def *x_2 = nir_fmul(b, x, x); - nir_ssa_def *x_3 = nir_fmul(b, x_2, x); - nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); - nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); - nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); - nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); - - nir_ssa_def *polynomial_terms[] = { - nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), - nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), - nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), - nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), - nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), - nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), - }; - - nir_ssa_def *tmp = - build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); - - /* range-reduction fixup */ - tmp = nir_fadd(b, tmp, - nir_fmul(b, - nir_b2f(b, nir_flt(b, one, abs_y_over_x)), - nir_fadd(b, nir_fmul(b, tmp, - nir_imm_float(b, -2.0f)), - nir_imm_float(b, M_PI_2f)))); - - /* sign fixup */ - return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); -} - -static nir_ssa_def * -build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) -{ - nir_ssa_def *zero = nir_imm_float(b, 0.0f); - - /* If |x| >= 1.0e-8 * |y|: */ - nir_ssa_def *condition = - nir_fge(b, nir_fabs(b, x), - nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); - - /* Then...call atan(y/x) and fix it up: */ - nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); - nir_ssa_def *r_then = - nir_bcsel(b, nir_flt(b, x, zero), - nir_fadd(b, atan1, - nir_bcsel(b, nir_fge(b, y, zero), - nir_imm_float(b, M_PIf), - nir_imm_float(b, -M_PIf))), - atan1); - - /* Else... */ - nir_ssa_def *r_else = - nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); - - return nir_bcsel(b, condition, r_then, r_else); -} - -static nir_ssa_def * -build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) -{ - nir_ssa_def *abs_x = nir_fabs(b, x); - nir_ssa_def *zero = nir_imm_float(b, 0.0f); - - /* Single-precision floating-point values are stored as - * 1 sign bit; - * 8 exponent bits; - * 23 mantissa bits. - * - * An exponent shift of 23 will shift the mantissa out, leaving only the - * exponent and sign bit (which itself may be zero, if the absolute value - * was taken before the bitcast and shift. - */ - nir_ssa_def *exponent_shift = nir_imm_int(b, 23); - nir_ssa_def *exponent_bias = nir_imm_int(b, -126); - - nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); - - /* Exponent of floating-point values in the range [0.5, 1.0). */ - nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); - - nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); - - *exponent = - nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), - nir_bcsel(b, is_not_zero, exponent_bias, zero)); - - return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), - nir_bcsel(b, is_not_zero, exponent_value, zero)); -} - -static nir_op -vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode) -{ - switch (opcode) { - case GLSLstd450Round: return nir_op_fround_even; - case GLSLstd450RoundEven: return nir_op_fround_even; - case GLSLstd450Trunc: return nir_op_ftrunc; - case GLSLstd450FAbs: return nir_op_fabs; - case GLSLstd450SAbs: return nir_op_iabs; - case GLSLstd450FSign: return nir_op_fsign; - case GLSLstd450SSign: return nir_op_isign; - case GLSLstd450Floor: return nir_op_ffloor; - case GLSLstd450Ceil: return nir_op_fceil; - case GLSLstd450Fract: return nir_op_ffract; - case GLSLstd450Sin: return nir_op_fsin; - case GLSLstd450Cos: return nir_op_fcos; - case GLSLstd450Pow: return nir_op_fpow; - case GLSLstd450Exp2: return nir_op_fexp2; - case GLSLstd450Log2: return nir_op_flog2; - case GLSLstd450Sqrt: return nir_op_fsqrt; - case GLSLstd450InverseSqrt: return nir_op_frsq; - case GLSLstd450FMin: return nir_op_fmin; - case GLSLstd450UMin: return nir_op_umin; - case GLSLstd450SMin: return nir_op_imin; - case GLSLstd450FMax: return nir_op_fmax; - case GLSLstd450UMax: return nir_op_umax; - case GLSLstd450SMax: return nir_op_imax; - case GLSLstd450FMix: return nir_op_flrp; - case GLSLstd450Fma: return nir_op_ffma; - case GLSLstd450Ldexp: return nir_op_ldexp; - case GLSLstd450FindILsb: return nir_op_find_lsb; - case GLSLstd450FindSMsb: return nir_op_ifind_msb; - case GLSLstd450FindUMsb: return nir_op_ufind_msb; - - /* Packing/Unpacking functions */ - case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8; - case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8; - case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16; - case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16; - case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16; - case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8; - case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; - case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; - case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; - case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; - - default: - unreachable("No NIR equivalent"); - } -} - -static void -handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, - const uint32_t *w, unsigned count) -{ - struct nir_builder *nb = &b->nb; - const struct glsl_type *dest_type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_create_ssa_value(b, dest_type); - - /* Collect the various SSA sources */ - unsigned num_inputs = count - 5; - nir_ssa_def *src[3] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5])->def; - - switch (entrypoint) { - case GLSLstd450Radians: - val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); - return; - case GLSLstd450Degrees: - val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); - return; - case GLSLstd450Tan: - val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), - nir_fcos(nb, src[0])); - return; - - case GLSLstd450Modf: { - nir_ssa_def *sign = nir_fsign(nb, src[0]); - nir_ssa_def *abs = nir_fabs(nb, src[0]); - val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); - nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), - nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); - return; - } - - case GLSLstd450ModfStruct: { - nir_ssa_def *sign = nir_fsign(nb, src[0]); - nir_ssa_def *abs = nir_fabs(nb, src[0]); - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); - val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); - return; - } - - case GLSLstd450Step: - val->ssa->def = nir_sge(nb, src[1], src[0]); - return; - - case GLSLstd450Length: - val->ssa->def = build_length(nb, src[0]); - return; - case GLSLstd450Distance: - val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); - return; - case GLSLstd450Normalize: - val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); - return; - - case GLSLstd450Exp: - val->ssa->def = build_exp(nb, src[0]); - return; - - case GLSLstd450Log: - val->ssa->def = build_log(nb, src[0]); - return; - - case GLSLstd450FClamp: - val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); - return; - case GLSLstd450UClamp: - val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); - return; - case GLSLstd450SClamp: - val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); - return; - - case GLSLstd450Cross: { - unsigned yzx[4] = { 1, 2, 0, 0 }; - unsigned zxy[4] = { 2, 0, 1, 0 }; - val->ssa->def = - nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), - nir_swizzle(nb, src[1], zxy, 3, true)), - nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), - nir_swizzle(nb, src[1], yzx, 3, true))); - return; - } - - case GLSLstd450SmoothStep: { - /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ - nir_ssa_def *t = - build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), - nir_fsub(nb, src[1], src[0])), - nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); - /* result = t * t * (3 - 2 * t) */ - val->ssa->def = - nir_fmul(nb, t, nir_fmul(nb, t, - nir_fsub(nb, nir_imm_float(nb, 3.0), - nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); - return; - } - - case GLSLstd450FaceForward: - val->ssa->def = - nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), - nir_imm_float(nb, 0.0)), - src[0], nir_fneg(nb, src[0])); - return; - - case GLSLstd450Reflect: - /* I - 2 * dot(N, I) * N */ - val->ssa->def = - nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), - nir_fmul(nb, nir_fdot(nb, src[0], src[1]), - src[1]))); - return; - - case GLSLstd450Refract: { - nir_ssa_def *I = src[0]; - nir_ssa_def *N = src[1]; - nir_ssa_def *eta = src[2]; - nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); - nir_ssa_def *one = nir_imm_float(nb, 1.0); - nir_ssa_def *zero = nir_imm_float(nb, 0.0); - /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ - nir_ssa_def *k = - nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, - nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); - nir_ssa_def *result = - nir_fsub(nb, nir_fmul(nb, eta, I), - nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), - nir_fsqrt(nb, k)), N)); - /* XXX: bcsel, or if statement? */ - val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); - return; - } - - case GLSLstd450Sinh: - /* 0.5 * (e^x - e^(-x)) */ - val->ssa->def = - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fsub(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))); - return; - - case GLSLstd450Cosh: - /* 0.5 * (e^x + e^(-x)) */ - val->ssa->def = - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fadd(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))); - return; - - case GLSLstd450Tanh: - /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ - val->ssa->def = - nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fsub(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))), - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fadd(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0]))))); - return; - - case GLSLstd450Asinh: - val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), - build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), - nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), - nir_imm_float(nb, 1.0f)))))); - return; - case GLSLstd450Acosh: - val->ssa->def = build_log(nb, nir_fadd(nb, src[0], - nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), - nir_imm_float(nb, 1.0f))))); - return; - case GLSLstd450Atanh: { - nir_ssa_def *one = nir_imm_float(nb, 1.0); - val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), - build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), - nir_fsub(nb, one, src[0])))); - return; - } - - case GLSLstd450Asin: - val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); - return; - - case GLSLstd450Acos: - val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), - build_asin(nb, src[0], 0.08132463, -0.02363318)); - return; - - case GLSLstd450Atan: - val->ssa->def = build_atan(nb, src[0]); - return; - - case GLSLstd450Atan2: - val->ssa->def = build_atan2(nb, src[0], src[1]); - return; - - case GLSLstd450Frexp: { - nir_ssa_def *exponent; - val->ssa->def = build_frexp(nb, src[0], &exponent); - nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); - return; - } - - case GLSLstd450FrexpStruct: { - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = build_frexp(nb, src[0], - &val->ssa->elems[1]->def); - return; - } - - default: - val->ssa->def = - nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint), - src[0], src[1], src[2], NULL); - return; - } -} - -bool -vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *w, unsigned count) -{ - switch ((enum GLSLstd450)ext_opcode) { - case GLSLstd450Determinant: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); - break; - } - - case GLSLstd450MatrixInverse: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); - break; - } - - case GLSLstd450InterpolateAtCentroid: - case GLSLstd450InterpolateAtSample: - case GLSLstd450InterpolateAtOffset: - unreachable("Unhandled opcode"); - - default: - handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); - } - - return true; -} diff --git a/src/compiler/nir/spirv/vtn_private.h b/src/compiler/nir/spirv/vtn_private.h deleted file mode 100644 index 3840d8c4b65..00000000000 --- a/src/compiler/nir/spirv/vtn_private.h +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir/nir.h" -#include "nir/nir_builder.h" -#include "nir/nir_array.h" -#include "nir_spirv.h" -#include "spirv.h" - -struct vtn_builder; -struct vtn_decoration; - -enum vtn_value_type { - vtn_value_type_invalid = 0, - vtn_value_type_undef, - vtn_value_type_string, - vtn_value_type_decoration_group, - vtn_value_type_type, - vtn_value_type_constant, - vtn_value_type_access_chain, - vtn_value_type_function, - vtn_value_type_block, - vtn_value_type_ssa, - vtn_value_type_extension, - vtn_value_type_image_pointer, - vtn_value_type_sampled_image, -}; - -enum vtn_branch_type { - vtn_branch_type_none, - vtn_branch_type_switch_break, - vtn_branch_type_switch_fallthrough, - vtn_branch_type_loop_break, - vtn_branch_type_loop_continue, - vtn_branch_type_discard, - vtn_branch_type_return, -}; - -enum vtn_cf_node_type { - vtn_cf_node_type_block, - vtn_cf_node_type_if, - vtn_cf_node_type_loop, - vtn_cf_node_type_switch, -}; - -struct vtn_cf_node { - struct list_head link; - enum vtn_cf_node_type type; -}; - -struct vtn_loop { - struct vtn_cf_node node; - - /* The main body of the loop */ - struct list_head body; - - /* The "continue" part of the loop. This gets executed after the body - * and is where you go when you hit a continue. - */ - struct list_head cont_body; - - SpvLoopControlMask control; -}; - -struct vtn_if { - struct vtn_cf_node node; - - uint32_t condition; - - enum vtn_branch_type then_type; - struct list_head then_body; - - enum vtn_branch_type else_type; - struct list_head else_body; - - SpvSelectionControlMask control; -}; - -struct vtn_case { - struct list_head link; - - struct list_head body; - - /* The block that starts this case */ - struct vtn_block *start_block; - - /* The fallthrough case, if any */ - struct vtn_case *fallthrough; - - /* The uint32_t values that map to this case */ - nir_array values; - - /* True if this is the default case */ - bool is_default; - - /* Initialized to false; used when sorting the list of cases */ - bool visited; -}; - -struct vtn_switch { - struct vtn_cf_node node; - - uint32_t selector; - - struct list_head cases; -}; - -struct vtn_block { - struct vtn_cf_node node; - - /** A pointer to the label instruction */ - const uint32_t *label; - - /** A pointer to the merge instruction (or NULL if non exists) */ - const uint32_t *merge; - - /** A pointer to the branch instruction that ends this block */ - const uint32_t *branch; - - enum vtn_branch_type branch_type; - - /** Points to the loop that this block starts (if it starts a loop) */ - struct vtn_loop *loop; - - /** Points to the switch case started by this block (if any) */ - struct vtn_case *switch_case; - - /** The last block in this SPIR-V block. */ - nir_block *end_block; -}; - -struct vtn_function { - struct exec_node node; - - nir_function_impl *impl; - struct vtn_block *start_block; - - struct list_head body; - - const uint32_t *end; - - SpvFunctionControlMask control; -}; - -typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, - const uint32_t *, unsigned); - -void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, - const uint32_t *end); -void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, - vtn_instruction_handler instruction_handler); - -const uint32_t * -vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, - const uint32_t *end, vtn_instruction_handler handler); - -struct vtn_ssa_value { - union { - nir_ssa_def *def; - struct vtn_ssa_value **elems; - }; - - /* For matrices, if this is non-NULL, then this value is actually the - * transpose of some other value. The value that `transposed` points to - * always dominates this value. - */ - struct vtn_ssa_value *transposed; - - const struct glsl_type *type; -}; - -struct vtn_type { - const struct glsl_type *type; - - /* The value that declares this type. Used for finding decorations */ - struct vtn_value *val; - - /* for matrices, whether the matrix is stored row-major */ - bool row_major; - - /* for structs, the offset of each member */ - unsigned *offsets; - - /* for structs, whether it was decorated as a "non-SSBO-like" block */ - bool block; - - /* for structs, whether it was decorated as an "SSBO-like" block */ - bool buffer_block; - - /* for structs with block == true, whether this is a builtin block (i.e. a - * block that contains only builtins). - */ - bool builtin_block; - - /* Image format for image_load_store type images */ - unsigned image_format; - - /* Access qualifier for storage images */ - SpvAccessQualifier access_qualifier; - - /* for arrays and matrices, the array stride */ - unsigned stride; - - /* for arrays, the vtn_type for the elements of the array */ - struct vtn_type *array_element; - - /* for structures, the vtn_type for each member */ - struct vtn_type **members; - - /* Whether this type, or a parent type, has been decorated as a builtin */ - bool is_builtin; - - SpvBuiltIn builtin; -}; - -struct vtn_variable; - -enum vtn_access_mode { - vtn_access_mode_id, - vtn_access_mode_literal, -}; - -struct vtn_access_link { - enum vtn_access_mode mode; - uint32_t id; -}; - -struct vtn_access_chain { - struct vtn_variable *var; - - uint32_t length; - - /* Struct elements and array offsets */ - struct vtn_access_link link[0]; -}; - -enum vtn_variable_mode { - vtn_variable_mode_local, - vtn_variable_mode_global, - vtn_variable_mode_param, - vtn_variable_mode_ubo, - vtn_variable_mode_ssbo, - vtn_variable_mode_push_constant, - vtn_variable_mode_image, - vtn_variable_mode_sampler, - vtn_variable_mode_workgroup, - vtn_variable_mode_input, - vtn_variable_mode_output, -}; - -struct vtn_variable { - enum vtn_variable_mode mode; - - struct vtn_type *type; - - unsigned descriptor_set; - unsigned binding; - - nir_variable *var; - nir_variable **members; - - struct vtn_access_chain chain; -}; - -struct vtn_image_pointer { - struct vtn_access_chain *image; - nir_ssa_def *coord; - nir_ssa_def *sample; -}; - -struct vtn_sampled_image { - struct vtn_access_chain *image; /* Image or array of images */ - struct vtn_access_chain *sampler; /* Sampler */ -}; - -struct vtn_value { - enum vtn_value_type value_type; - const char *name; - struct vtn_decoration *decoration; - union { - void *ptr; - char *str; - struct vtn_type *type; - struct { - nir_constant *constant; - const struct glsl_type *const_type; - }; - struct vtn_access_chain *access_chain; - struct vtn_image_pointer *image; - struct vtn_sampled_image *sampled_image; - struct vtn_function *func; - struct vtn_block *block; - struct vtn_ssa_value *ssa; - vtn_instruction_handler ext_handler; - }; -}; - -#define VTN_DEC_DECORATION -1 -#define VTN_DEC_EXECUTION_MODE -2 -#define VTN_DEC_STRUCT_MEMBER0 0 - -struct vtn_decoration { - struct vtn_decoration *next; - - /* Specifies how to apply this decoration. Negative values represent a - * decoration or execution mode. (See the VTN_DEC_ #defines above.) - * Non-negative values specify that it applies to a structure member. - */ - int scope; - - const uint32_t *literals; - struct vtn_value *group; - - union { - SpvDecoration decoration; - SpvExecutionMode exec_mode; - }; -}; - -struct vtn_builder { - nir_builder nb; - - nir_shader *shader; - nir_function_impl *impl; - struct vtn_block *block; - - /* Current file, line, and column. Useful for debugging. Set - * automatically by vtn_foreach_instruction. - */ - char *file; - int line, col; - - /* - * In SPIR-V, constants are global, whereas in NIR, the load_const - * instruction we use is per-function. So while we parse each function, we - * keep a hash table of constants we've resolved to nir_ssa_value's so - * far, and we lazily resolve them when we see them used in a function. - */ - struct hash_table *const_table; - - /* - * Map from phi instructions (pointer to the start of the instruction) - * to the variable corresponding to it. - */ - struct hash_table *phi_table; - - unsigned num_specializations; - struct nir_spirv_specialization *specializations; - - unsigned value_id_bound; - struct vtn_value *values; - - gl_shader_stage entry_point_stage; - const char *entry_point_name; - struct vtn_value *entry_point; - bool origin_upper_left; - - struct vtn_function *func; - struct exec_list functions; - - /* Current function parameter index */ - unsigned func_param_idx; - - bool has_loop_continue; -}; - -static inline struct vtn_value * -vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == vtn_value_type_invalid); - - b->values[value_id].value_type = value_type; - - return &b->values[value_id]; -} - -static inline struct vtn_value * -vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) -{ - assert(value_id < b->value_id_bound); - return &b->values[value_id]; -} - -static inline struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - assert(val->value_type == value_type); - return val; -} - -struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); - -struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, - const struct glsl_type *type); - -struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, - struct vtn_ssa_value *src); - -nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, - unsigned index); -nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *index); -nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, unsigned index); -nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, nir_ssa_def *index); - -nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); - -nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, - struct vtn_access_chain *chain); -nir_ssa_def * -vtn_access_chain_to_offset(struct vtn_builder *b, - struct vtn_access_chain *chain, - nir_ssa_def **index_out, struct vtn_type **type_out, - unsigned *end_idx_out, bool stop_at_matrix); - -struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); - -void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest); - -struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); - -void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dest); - -void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count); - - -typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - int member, - const struct vtn_decoration *, - void *); - -void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data); - -typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - const struct vtn_decoration *, - void *); - -void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, - vtn_execution_mode_foreach_cb cb, void *data); - -nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); - -void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count); - -bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *words, unsigned count); diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c deleted file mode 100644 index 3cbac1e5da8..00000000000 --- a/src/compiler/nir/spirv/vtn_variables.c +++ /dev/null @@ -1,1415 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "vtn_private.h" - -static struct vtn_access_chain * -vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, - unsigned new_ids) -{ - struct vtn_access_chain *chain; - - unsigned new_len = old->length + new_ids; - chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); - - chain->var = old->var; - chain->length = new_len; - - for (unsigned i = 0; i < old->length; i++) - chain->link[i] = old->link[i]; - - return chain; -} - -static nir_ssa_def * -vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, - unsigned stride) -{ - assert(stride > 0); - if (link.mode == vtn_access_mode_literal) { - return nir_imm_int(&b->nb, link.id * stride); - } else if (stride == 1) { - return vtn_ssa_value(b, link.id)->def; - } else { - return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, - nir_imm_int(&b->nb, stride)); - } -} - -static struct vtn_type * -vtn_access_chain_tail_type(struct vtn_builder *b, - struct vtn_access_chain *chain) -{ - struct vtn_type *type = chain->var->type; - for (unsigned i = 0; i < chain->length; i++) { - if (glsl_type_is_struct(type->type)) { - assert(chain->link[i].mode == vtn_access_mode_literal); - type = type->members[chain->link[i].id]; - } else { - type = type->array_element; - } - } - return type; -} - -/* Crawls a chain of array derefs and rewrites the types so that the - * lengths stay the same but the terminal type is the one given by - * tail_type. This is useful for split structures. - */ -static void -rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) -{ - deref->type = type; - if (deref->child) { - assert(deref->child->deref_type == nir_deref_type_array); - assert(glsl_type_is_array(deref->type)); - rewrite_deref_types(deref->child, glsl_get_array_element(type)); - } -} - -nir_deref_var * -vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) -{ - nir_deref_var *deref_var; - if (chain->var->var) { - deref_var = nir_deref_var_create(b, chain->var->var); - } else { - assert(chain->var->members); - /* Create the deref_var manually. It will get filled out later. */ - deref_var = rzalloc(b, nir_deref_var); - deref_var->deref.deref_type = nir_deref_type_var; - } - - struct vtn_type *deref_type = chain->var->type; - nir_deref *tail = &deref_var->deref; - nir_variable **members = chain->var->members; - - for (unsigned i = 0; i < chain->length; i++) { - enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_ARRAY: { - deref_type = deref_type->array_element; - - nir_deref_array *deref_arr = nir_deref_array_create(b); - deref_arr->deref.type = deref_type->type; - - if (chain->link[i].mode == vtn_access_mode_literal) { - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->base_offset = chain->link[i].id; - } else { - assert(chain->link[i].mode == vtn_access_mode_id); - deref_arr->deref_array_type = nir_deref_array_type_indirect; - deref_arr->base_offset = 0; - deref_arr->indirect = - nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); - } - tail->child = &deref_arr->deref; - tail = tail->child; - break; - } - - case GLSL_TYPE_STRUCT: { - assert(chain->link[i].mode == vtn_access_mode_literal); - unsigned idx = chain->link[i].id; - deref_type = deref_type->members[idx]; - if (members) { - /* This is a pre-split structure. */ - deref_var->var = members[idx]; - rewrite_deref_types(&deref_var->deref, members[idx]->type); - assert(tail->type == deref_type->type); - members = NULL; - } else { - nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = deref_type->type; - tail->child = &deref_struct->deref; - tail = tail->child; - } - break; - } - default: - unreachable("Invalid type for deref"); - } - } - - assert(members == NULL); - return deref_var; -} - -static void -_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, - nir_deref *tail, struct vtn_ssa_value *inout) -{ - /* The deref tail may contain a deref to select a component of a vector (in - * other words, it might not be an actual tail) so we have to save it away - * here since we overwrite it later. - */ - nir_deref *old_child = tail->child; - - if (glsl_type_is_vector_or_scalar(tail->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - tail->child = NULL; - - nir_intrinsic_op op = load ? nir_intrinsic_load_var : - nir_intrinsic_store_var; - - nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); - intrin->variables[0] = - nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); - intrin->num_components = glsl_get_vector_elements(tail->type); - - if (load) { - nir_ssa_dest_init(&intrin->instr, &intrin->dest, - intrin->num_components, - glsl_get_bit_size(glsl_get_base_type(tail->type)), - NULL); - inout->def = &intrin->dest.ssa; - } else { - nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); - intrin->src[0] = nir_src_for_ssa(inout->def); - } - - nir_builder_instr_insert(&b->nb, &intrin->instr); - } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(tail->type)) { - unsigned elems = glsl_get_length(tail->type); - nir_deref_array *deref_arr = nir_deref_array_create(b); - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->deref.type = glsl_get_array_element(tail->type); - tail->child = &deref_arr->deref; - for (unsigned i = 0; i < elems; i++) { - deref_arr->base_offset = i; - _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); - } - } else { - assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(tail->type); - nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); - tail->child = &deref_struct->deref; - for (unsigned i = 0; i < elems; i++) { - deref_struct->index = i; - deref_struct->deref.type = glsl_get_struct_field(tail->type, i); - _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); - } - } - - tail->child = old_child; -} - -nir_deref_var * -vtn_nir_deref(struct vtn_builder *b, uint32_t id) -{ - struct vtn_access_chain *chain = - vtn_value(b, id, vtn_value_type_access_chain)->access_chain; - - return vtn_access_chain_to_deref(b, chain); -} - -/* - * Gets the NIR-level deref tail, which may have as a child an array deref - * selecting which component due to OpAccessChain supporting per-component - * indexing in SPIR-V. - */ -static nir_deref * -get_deref_tail(nir_deref_var *deref) -{ - nir_deref *cur = &deref->deref; - while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) - cur = cur->child; - - return cur; -} - -struct vtn_ssa_value * -vtn_local_load(struct vtn_builder *b, nir_deref_var *src) -{ - nir_deref *src_tail = get_deref_tail(src); - struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); - _vtn_local_load_store(b, true, src, src_tail, val); - - if (src_tail->child) { - nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); - assert(vec_deref->deref.child == NULL); - val->type = vec_deref->deref.type; - if (vec_deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); - else - val->def = vtn_vector_extract_dynamic(b, val->def, - vec_deref->indirect.ssa); - } - - return val; -} - -void -vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest) -{ - nir_deref *dest_tail = get_deref_tail(dest); - - if (dest_tail->child) { - struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); - _vtn_local_load_store(b, true, dest, dest_tail, val); - nir_deref_array *deref = nir_deref_as_array(dest_tail->child); - assert(deref->deref.child == NULL); - if (deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_insert(b, val->def, src->def, - deref->base_offset); - else - val->def = vtn_vector_insert_dynamic(b, val->def, src->def, - deref->indirect.ssa); - _vtn_local_load_store(b, false, dest, dest_tail, val); - } else { - _vtn_local_load_store(b, false, dest, dest_tail, src); - } -} - -static nir_ssa_def * -get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, - struct vtn_type **type, unsigned *chain_idx) -{ - /* Push constants have no explicit binding */ - if (chain->var->mode == vtn_variable_mode_push_constant) { - *chain_idx = 0; - *type = chain->var->type; - return NULL; - } - - nir_ssa_def *array_index; - if (glsl_type_is_array(chain->var->type->type)) { - assert(chain->length > 0); - array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); - *chain_idx = 1; - *type = chain->var->type->array_element; - } else { - array_index = nir_imm_int(&b->nb, 0); - *chain_idx = 0; - *type = chain->var->type; - } - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->nb.shader, - nir_intrinsic_vulkan_resource_index); - instr->src[0] = nir_src_for_ssa(array_index); - nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set); - nir_intrinsic_set_binding(instr, chain->var->binding); - - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); - nir_builder_instr_insert(&b->nb, &instr->instr); - - return &instr->dest.ssa; -} - -nir_ssa_def * -vtn_access_chain_to_offset(struct vtn_builder *b, - struct vtn_access_chain *chain, - nir_ssa_def **index_out, struct vtn_type **type_out, - unsigned *end_idx_out, bool stop_at_matrix) -{ - unsigned idx = 0; - struct vtn_type *type; - *index_out = get_vulkan_resource_index(b, chain, &type, &idx); - - nir_ssa_def *offset = nir_imm_int(&b->nb, 0); - for (; idx < chain->length; idx++) { - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - /* Some users may not want matrix or vector derefs */ - if (stop_at_matrix) - goto end; - /* Fall through */ - - case GLSL_TYPE_ARRAY: - offset = nir_iadd(&b->nb, offset, - vtn_access_link_as_ssa(b, chain->link[idx], - type->stride)); - - type = type->array_element; - break; - - case GLSL_TYPE_STRUCT: { - assert(chain->link[idx].mode == vtn_access_mode_literal); - unsigned member = chain->link[idx].id; - offset = nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, type->offsets[member])); - type = type->members[member]; - break; - } - - default: - unreachable("Invalid type for deref"); - } - } - -end: - *type_out = type; - if (end_idx_out) - *end_idx_out = idx; - - return offset; -} - -static void -_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, - struct vtn_ssa_value **inout, const struct glsl_type *type) -{ - nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); - instr->num_components = glsl_get_vector_elements(type); - - int src = 0; - if (!load) { - nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); - instr->src[src++] = nir_src_for_ssa((*inout)->def); - } - - /* We set the base and size for push constant load to the entire push - * constant block for now. - */ - if (op == nir_intrinsic_load_push_constant) { - nir_intrinsic_set_base(instr, 0); - nir_intrinsic_set_range(instr, 128); - } - - if (index) - instr->src[src++] = nir_src_for_ssa(index); - - instr->src[src++] = nir_src_for_ssa(offset); - - if (load) { - nir_ssa_dest_init(&instr->instr, &instr->dest, - instr->num_components, - glsl_get_bit_size(glsl_get_base_type(type)), NULL); - (*inout)->def = &instr->dest.ssa; - } - - nir_builder_instr_insert(&b->nb, &instr->instr); - - if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) - (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); -} - -static void -_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, - struct vtn_access_chain *chain, unsigned chain_idx, - struct vtn_type *type, struct vtn_ssa_value **inout) -{ - if (chain && chain_idx >= chain->length) - chain = NULL; - - if (load && chain == NULL && *inout == NULL) - *inout = vtn_create_ssa_value(b, type->type); - - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* This is where things get interesting. At this point, we've hit - * a vector, a scalar, or a matrix. - */ - if (glsl_type_is_matrix(type->type)) { - if (chain == NULL) { - /* Loading the whole matrix */ - struct vtn_ssa_value *transpose; - unsigned num_ops, vec_width; - if (type->row_major) { - num_ops = glsl_get_vector_elements(type->type); - vec_width = glsl_get_matrix_columns(type->type); - if (load) { - const struct glsl_type *transpose_type = - glsl_matrix_type(base_type, vec_width, num_ops); - *inout = vtn_create_ssa_value(b, transpose_type); - } else { - transpose = vtn_ssa_transpose(b, *inout); - inout = &transpose; - } - } else { - num_ops = glsl_get_matrix_columns(type->type); - vec_width = glsl_get_vector_elements(type->type); - } - - for (unsigned i = 0; i < num_ops; i++) { - nir_ssa_def *elem_offset = - nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, i * type->stride)); - _vtn_load_store_tail(b, op, load, index, elem_offset, - &(*inout)->elems[i], - glsl_vector_type(base_type, vec_width)); - } - - if (load && type->row_major) - *inout = vtn_ssa_transpose(b, *inout); - } else if (type->row_major) { - /* Row-major but with an access chiain. */ - nir_ssa_def *col_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx], - type->array_element->stride); - offset = nir_iadd(&b->nb, offset, col_offset); - - if (chain_idx + 1 < chain->length) { - /* Picking off a single element */ - nir_ssa_def *row_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], - type->stride); - offset = nir_iadd(&b->nb, offset, row_offset); - if (load) - *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); - _vtn_load_store_tail(b, op, load, index, offset, inout, - glsl_scalar_type(base_type)); - } else { - /* Grabbing a column; picking one element off each row */ - unsigned num_comps = glsl_get_vector_elements(type->type); - const struct glsl_type *column_type = - glsl_get_column_type(type->type); - - nir_ssa_def *comps[4]; - for (unsigned i = 0; i < num_comps; i++) { - nir_ssa_def *elem_offset = - nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, i * type->stride)); - - struct vtn_ssa_value *comp, temp_val; - if (!load) { - temp_val.def = nir_channel(&b->nb, (*inout)->def, i); - temp_val.type = glsl_scalar_type(base_type); - } - comp = &temp_val; - _vtn_load_store_tail(b, op, load, index, elem_offset, - &comp, glsl_scalar_type(base_type)); - comps[i] = comp->def; - } - - if (load) { - if (*inout == NULL) - *inout = vtn_create_ssa_value(b, column_type); - - (*inout)->def = nir_vec(&b->nb, comps, num_comps); - } - } - } else { - /* Column-major with a deref. Fall through to array case. */ - nir_ssa_def *col_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); - offset = nir_iadd(&b->nb, offset, col_offset); - - _vtn_block_load_store(b, op, load, index, offset, - chain, chain_idx + 1, - type->array_element, inout); - } - } else if (chain == NULL) { - /* Single whole vector */ - assert(glsl_type_is_vector_or_scalar(type->type)); - _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); - } else { - /* Single component of a vector. Fall through to array case. */ - nir_ssa_def *elem_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); - offset = nir_iadd(&b->nb, offset, elem_offset); - - _vtn_block_load_store(b, op, load, index, offset, NULL, 0, - type->array_element, inout); - } - return; - - case GLSL_TYPE_ARRAY: { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, - type->array_element, &(*inout)->elems[i]); - } - return; - } - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, - type->members[i], &(*inout)->elems[i]); - } - return; - } - - default: - unreachable("Invalid block member type"); - } -} - -static struct vtn_ssa_value * -vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) -{ - nir_intrinsic_op op; - switch (src->var->mode) { - case vtn_variable_mode_ubo: - op = nir_intrinsic_load_ubo; - break; - case vtn_variable_mode_ssbo: - op = nir_intrinsic_load_ssbo; - break; - case vtn_variable_mode_push_constant: - op = nir_intrinsic_load_push_constant; - break; - default: - assert(!"Invalid block variable mode"); - } - - nir_ssa_def *offset, *index = NULL; - struct vtn_type *type; - unsigned chain_idx; - offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); - - struct vtn_ssa_value *value = NULL; - _vtn_block_load_store(b, op, true, index, offset, - src, chain_idx, type, &value); - return value; -} - -static void -vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dst) -{ - nir_ssa_def *offset, *index = NULL; - struct vtn_type *type; - unsigned chain_idx; - offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); - - _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, - dst, chain_idx, type, &src); -} - -static bool -vtn_variable_is_external_block(struct vtn_variable *var) -{ - return var->mode == vtn_variable_mode_ssbo || - var->mode == vtn_variable_mode_ubo || - var->mode == vtn_variable_mode_push_constant; -} - -static void -_vtn_variable_load_store(struct vtn_builder *b, bool load, - struct vtn_access_chain *chain, - struct vtn_type *tail_type, - struct vtn_ssa_value **inout) -{ - enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* At this point, we have a scalar, vector, or matrix so we know that - * there cannot be any structure splitting still in the way. By - * stopping at the matrix level rather than the vector level, we - * ensure that matrices get loaded in the optimal way even if they - * are storred row-major in a UBO. - */ - if (load) { - *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); - } else { - vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); - } - return; - - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: { - struct vtn_access_chain *new_chain = - vtn_access_chain_extend(b, chain, 1); - new_chain->link[chain->length].mode = vtn_access_mode_literal; - unsigned elems = glsl_get_length(tail_type->type); - if (load) { - assert(*inout == NULL); - *inout = rzalloc(b, struct vtn_ssa_value); - (*inout)->type = tail_type->type; - (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); - } - for (unsigned i = 0; i < elems; i++) { - new_chain->link[chain->length].id = i; - struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? - tail_type->array_element : tail_type->members[i]; - _vtn_variable_load_store(b, load, new_chain, elem_type, - &(*inout)->elems[i]); - } - return; - } - - default: - unreachable("Invalid access chain type"); - } -} - -struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) -{ - if (vtn_variable_is_external_block(src->var)) { - return vtn_block_load(b, src); - } else { - struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); - struct vtn_ssa_value *val = NULL; - _vtn_variable_load_store(b, true, src, tail_type, &val); - return val; - } -} - -void -vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dest) -{ - if (vtn_variable_is_external_block(dest->var)) { - assert(dest->var->mode == vtn_variable_mode_ssbo); - vtn_block_store(b, src, dest); - } else { - struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); - _vtn_variable_load_store(b, false, dest, tail_type, &src); - } -} - -static void -_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, - struct vtn_access_chain *src, struct vtn_type *tail_type) -{ - enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* At this point, we have a scalar, vector, or matrix so we know that - * there cannot be any structure splitting still in the way. By - * stopping at the matrix level rather than the vector level, we - * ensure that matrices get loaded in the optimal way even if they - * are storred row-major in a UBO. - */ - vtn_variable_store(b, vtn_variable_load(b, src), dest); - return; - - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: { - struct vtn_access_chain *new_src, *new_dest; - new_src = vtn_access_chain_extend(b, src, 1); - new_dest = vtn_access_chain_extend(b, dest, 1); - new_src->link[src->length].mode = vtn_access_mode_literal; - new_dest->link[dest->length].mode = vtn_access_mode_literal; - unsigned elems = glsl_get_length(tail_type->type); - for (unsigned i = 0; i < elems; i++) { - new_src->link[src->length].id = i; - new_dest->link[dest->length].id = i; - struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? - tail_type->array_element : tail_type->members[i]; - _vtn_variable_copy(b, new_dest, new_src, elem_type); - } - return; - } - - default: - unreachable("Invalid access chain type"); - } -} - -static void -vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, - struct vtn_access_chain *src) -{ - struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); - assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); - - /* TODO: At some point, we should add a special-case for when we can - * just emit a copy_var intrinsic. - */ - _vtn_variable_copy(b, dest, src, tail_type); -} - -static void -set_mode_system_value(nir_variable_mode *mode) -{ - assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); - *mode = nir_var_system_value; -} - -static void -vtn_get_builtin_location(struct vtn_builder *b, - SpvBuiltIn builtin, int *location, - nir_variable_mode *mode) -{ - switch (builtin) { - case SpvBuiltInPosition: - *location = VARYING_SLOT_POS; - break; - case SpvBuiltInPointSize: - *location = VARYING_SLOT_PSIZ; - break; - case SpvBuiltInClipDistance: - *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ - break; - case SpvBuiltInCullDistance: - /* XXX figure this out */ - break; - case SpvBuiltInVertexIndex: - *location = SYSTEM_VALUE_VERTEX_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInVertexId: - /* Vulkan defines VertexID to be zero-based and reserves the new - * builtin keyword VertexIndex to indicate the non-zero-based value. - */ - *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - set_mode_system_value(mode); - break; - case SpvBuiltInInstanceIndex: - *location = SYSTEM_VALUE_INSTANCE_INDEX; - set_mode_system_value(mode); - break; - case SpvBuiltInInstanceId: - *location = SYSTEM_VALUE_INSTANCE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInPrimitiveId: - *location = VARYING_SLOT_PRIMITIVE_ID; - *mode = nir_var_shader_out; - break; - case SpvBuiltInInvocationId: - *location = SYSTEM_VALUE_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLayer: - *location = VARYING_SLOT_LAYER; - *mode = nir_var_shader_out; - break; - case SpvBuiltInViewportIndex: - *location = VARYING_SLOT_VIEWPORT; - if (b->shader->stage == MESA_SHADER_GEOMETRY) - *mode = nir_var_shader_out; - else if (b->shader->stage == MESA_SHADER_FRAGMENT) - *mode = nir_var_shader_in; - else - unreachable("invalid stage for SpvBuiltInViewportIndex"); - break; - case SpvBuiltInTessLevelOuter: - case SpvBuiltInTessLevelInner: - case SpvBuiltInTessCoord: - case SpvBuiltInPatchVertices: - unreachable("no tessellation support"); - case SpvBuiltInFragCoord: - *location = VARYING_SLOT_POS; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInPointCoord: - *location = VARYING_SLOT_PNTC; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInFrontFacing: - *location = VARYING_SLOT_FACE; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInSampleId: - *location = SYSTEM_VALUE_SAMPLE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInSamplePosition: - *location = SYSTEM_VALUE_SAMPLE_POS; - set_mode_system_value(mode); - break; - case SpvBuiltInSampleMask: - *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ - set_mode_system_value(mode); - break; - case SpvBuiltInFragDepth: - *location = FRAG_RESULT_DEPTH; - assert(*mode == nir_var_shader_out); - break; - case SpvBuiltInNumWorkgroups: - *location = SYSTEM_VALUE_NUM_WORK_GROUPS; - set_mode_system_value(mode); - break; - case SpvBuiltInWorkgroupSize: - /* This should already be handled */ - unreachable("unsupported builtin"); - break; - case SpvBuiltInWorkgroupId: - *location = SYSTEM_VALUE_WORK_GROUP_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationId: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationIndex: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; - set_mode_system_value(mode); - break; - case SpvBuiltInGlobalInvocationId: - *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInHelperInvocation: - default: - unreachable("unsupported builtin"); - } -} - -static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_var) -{ - struct vtn_variable *vtn_var = void_var; - - /* Handle decorations that apply to a vtn_variable as a whole */ - switch (dec->decoration) { - case SpvDecorationBinding: - vtn_var->binding = dec->literals[0]; - return; - case SpvDecorationDescriptorSet: - vtn_var->descriptor_set = dec->literals[0]; - return; - - case SpvDecorationLocation: { - unsigned location = dec->literals[0]; - bool is_vertex_input; - if (b->shader->stage == MESA_SHADER_FRAGMENT && - vtn_var->mode == vtn_variable_mode_output) { - is_vertex_input = false; - location += FRAG_RESULT_DATA0; - } else if (b->shader->stage == MESA_SHADER_VERTEX && - vtn_var->mode == vtn_variable_mode_input) { - is_vertex_input = true; - location += VERT_ATTRIB_GENERIC0; - } else if (vtn_var->mode == vtn_variable_mode_input || - vtn_var->mode == vtn_variable_mode_output) { - is_vertex_input = false; - location += VARYING_SLOT_VAR0; - } else { - assert(!"Location must be on input or output variable"); - } - - if (vtn_var->var) { - vtn_var->var->data.location = location; - vtn_var->var->data.explicit_location = true; - } else { - assert(vtn_var->members); - unsigned length = glsl_get_length(vtn_var->type->type); - for (unsigned i = 0; i < length; i++) { - vtn_var->members[i]->data.location = location; - vtn_var->members[i]->data.explicit_location = true; - location += - glsl_count_attribute_slots(vtn_var->members[i]->interface_type, - is_vertex_input); - } - } - return; - } - - default: - break; - } - - /* Now we handle decorations that apply to a particular nir_variable */ - nir_variable *nir_var = vtn_var->var; - if (val->value_type == vtn_value_type_access_chain) { - assert(val->access_chain->length == 0); - assert(val->access_chain->var == void_var); - assert(member == -1); - } else { - assert(val->value_type == vtn_value_type_type); - if (member != -1) - nir_var = vtn_var->members[member]; - } - - if (nir_var == NULL) - return; - - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - nir_var->data.centroid = true; - break; - case SpvDecorationSample: - nir_var->data.sample = true; - break; - case SpvDecorationInvariant: - nir_var->data.invariant = true; - break; - case SpvDecorationConstant: - assert(nir_var->constant_initializer != NULL); - nir_var->data.read_only = true; - break; - case SpvDecorationNonWritable: - nir_var->data.read_only = true; - break; - case SpvDecorationComponent: - nir_var->data.location_frac = dec->literals[0]; - break; - case SpvDecorationIndex: - nir_var->data.explicit_index = true; - nir_var->data.index = dec->literals[0]; - break; - case SpvDecorationBuiltIn: { - SpvBuiltIn builtin = dec->literals[0]; - - if (builtin == SpvBuiltInWorkgroupSize) { - /* This shouldn't be a builtin. It's actually a constant. */ - nir_var->data.mode = nir_var_global; - nir_var->data.read_only = true; - - nir_constant *c = rzalloc(nir_var, nir_constant); - c->value.u[0] = b->shader->info.cs.local_size[0]; - c->value.u[1] = b->shader->info.cs.local_size[1]; - c->value.u[2] = b->shader->info.cs.local_size[2]; - nir_var->constant_initializer = c; - break; - } - - nir_variable_mode mode = nir_var->data.mode; - vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); - nir_var->data.explicit_location = true; - nir_var->data.mode = mode; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - nir_var->data.origin_upper_left = b->origin_upper_left; - break; - } - case SpvDecorationRowMajor: - case SpvDecorationColMajor: - case SpvDecorationGLSLShared: - case SpvDecorationPatch: - case SpvDecorationRestrict: - case SpvDecorationAliased: - case SpvDecorationVolatile: - case SpvDecorationCoherent: - case SpvDecorationNonReadable: - case SpvDecorationUniform: - /* This is really nice but we have no use for it right now. */ - case SpvDecorationCPacked: - case SpvDecorationSaturatedConversion: - case SpvDecorationStream: - case SpvDecorationOffset: - case SpvDecorationXfbBuffer: - case SpvDecorationFuncParamAttr: - case SpvDecorationFPRoundingMode: - case SpvDecorationFPFastMathMode: - case SpvDecorationLinkageAttributes: - case SpvDecorationSpecId: - break; - default: - unreachable("Unhandled variable decoration"); - } -} - -/* Tries to compute the size of an interface block based on the strides and - * offsets that are provided to us in the SPIR-V source. - */ -static unsigned -vtn_type_block_size(struct vtn_type *type) -{ - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_DOUBLE: { - unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : - glsl_get_matrix_columns(type->type); - if (cols > 1) { - assert(type->stride > 0); - return type->stride * cols; - } else if (base_type == GLSL_TYPE_DOUBLE) { - return glsl_get_vector_elements(type->type) * 8; - } else { - return glsl_get_vector_elements(type->type) * 4; - } - } - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: { - unsigned size = 0; - unsigned num_fields = glsl_get_length(type->type); - for (unsigned f = 0; f < num_fields; f++) { - unsigned field_end = type->offsets[f] + - vtn_type_block_size(type->members[f]); - size = MAX2(size, field_end); - } - return size; - } - - case GLSL_TYPE_ARRAY: - assert(type->stride > 0); - assert(glsl_get_length(type->type) > 0); - return type->stride * glsl_get_length(type->type); - - default: - assert(!"Invalid block type"); - return 0; - } -} - -void -vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpVariable: { - struct vtn_variable *var = rzalloc(b, struct vtn_variable); - var->type = vtn_value(b, w[1], vtn_value_type_type)->type; - - var->chain.var = var; - var->chain.length = 0; - - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - val->access_chain = &var->chain; - - struct vtn_type *without_array = var->type; - while(glsl_type_is_array(without_array->type)) - without_array = without_array->array_element; - - nir_variable_mode nir_mode; - switch ((SpvStorageClass)w[3]) { - case SpvStorageClassUniform: - case SpvStorageClassUniformConstant: - if (without_array->block) { - var->mode = vtn_variable_mode_ubo; - b->shader->info.num_ubos++; - } else if (without_array->buffer_block) { - var->mode = vtn_variable_mode_ssbo; - b->shader->info.num_ssbos++; - } else if (glsl_type_is_image(without_array->type)) { - var->mode = vtn_variable_mode_image; - nir_mode = nir_var_uniform; - b->shader->info.num_images++; - } else if (glsl_type_is_sampler(without_array->type)) { - var->mode = vtn_variable_mode_sampler; - nir_mode = nir_var_uniform; - b->shader->info.num_textures++; - } else { - assert(!"Invalid uniform variable type"); - } - break; - case SpvStorageClassPushConstant: - var->mode = vtn_variable_mode_push_constant; - assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; - break; - case SpvStorageClassInput: - var->mode = vtn_variable_mode_input; - nir_mode = nir_var_shader_in; - break; - case SpvStorageClassOutput: - var->mode = vtn_variable_mode_output; - nir_mode = nir_var_shader_out; - break; - case SpvStorageClassPrivate: - var->mode = vtn_variable_mode_global; - nir_mode = nir_var_global; - break; - case SpvStorageClassFunction: - var->mode = vtn_variable_mode_local; - nir_mode = nir_var_local; - break; - case SpvStorageClassWorkgroup: - var->mode = vtn_variable_mode_workgroup; - nir_mode = nir_var_shared; - break; - case SpvStorageClassCrossWorkgroup: - case SpvStorageClassGeneric: - case SpvStorageClassAtomicCounter: - default: - unreachable("Unhandled variable storage class"); - } - - switch (var->mode) { - case vtn_variable_mode_local: - case vtn_variable_mode_global: - case vtn_variable_mode_image: - case vtn_variable_mode_sampler: - case vtn_variable_mode_workgroup: - /* For these, we create the variable normally */ - var->var = rzalloc(b->shader, nir_variable); - var->var->name = ralloc_strdup(var->var, val->name); - var->var->type = var->type->type; - var->var->data.mode = nir_mode; - - switch (var->mode) { - case vtn_variable_mode_image: - case vtn_variable_mode_sampler: - var->var->interface_type = without_array->type; - break; - default: - var->var->interface_type = NULL; - break; - } - break; - - case vtn_variable_mode_input: - case vtn_variable_mode_output: { - /* For inputs and outputs, we immediately split structures. This - * is for a couple of reasons. For one, builtins may all come in - * a struct and we really want those split out into separate - * variables. For another, interpolation qualifiers can be - * applied to members of the top-level struct ane we need to be - * able to preserve that information. - */ - - int array_length = -1; - struct vtn_type *interface_type = var->type; - if (b->shader->stage == MESA_SHADER_GEOMETRY && - glsl_type_is_array(var->type->type)) { - /* In Geometry shaders (and some tessellation), inputs come - * in per-vertex arrays. However, some builtins come in - * non-per-vertex, hence the need for the is_array check. In - * any case, there are no non-builtin arrays allowed so this - * check should be sufficient. - */ - interface_type = var->type->array_element; - array_length = glsl_get_length(var->type->type); - } - - if (glsl_type_is_struct(interface_type->type)) { - /* It's a struct. Split it. */ - unsigned num_members = glsl_get_length(interface_type->type); - var->members = ralloc_array(b, nir_variable *, num_members); - - for (unsigned i = 0; i < num_members; i++) { - const struct glsl_type *mtype = interface_type->members[i]->type; - if (array_length >= 0) - mtype = glsl_array_type(mtype, array_length); - - var->members[i] = rzalloc(b->shader, nir_variable); - var->members[i]->name = - ralloc_asprintf(var->members[i], "%s.%d", val->name, i); - var->members[i]->type = mtype; - var->members[i]->interface_type = - interface_type->members[i]->type; - var->members[i]->data.mode = nir_mode; - } - } else { - var->var = rzalloc(b->shader, nir_variable); - var->var->name = ralloc_strdup(var->var, val->name); - var->var->type = var->type->type; - var->var->interface_type = interface_type->type; - var->var->data.mode = nir_mode; - } - - /* For inputs and outputs, we need to grab locations and builtin - * information from the interface type. - */ - vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); - break; - - case vtn_variable_mode_param: - unreachable("Not created through OpVariable"); - } - - case vtn_variable_mode_ubo: - case vtn_variable_mode_ssbo: - case vtn_variable_mode_push_constant: - /* These don't need actual variables. */ - break; - } - - if (count > 4) { - assert(count == 5); - nir_constant *constant = - vtn_value(b, w[4], vtn_value_type_constant)->constant; - var->var->constant_initializer = - nir_constant_clone(constant, var->var); - } - - vtn_foreach_decoration(b, val, var_decoration_cb, var); - - if (var->mode == vtn_variable_mode_image || - var->mode == vtn_variable_mode_sampler) { - /* XXX: We still need the binding information in the nir_variable - * for these. We should fix that. - */ - var->var->data.binding = var->binding; - var->var->data.descriptor_set = var->descriptor_set; - - if (var->mode == vtn_variable_mode_image) - var->var->data.image.format = without_array->image_format; - } - - if (var->mode == vtn_variable_mode_local) { - assert(var->members == NULL && var->var != NULL); - nir_function_impl_add_variable(b->impl, var->var); - } else if (var->var) { - nir_shader_add_variable(b->shader, var->var); - } else if (var->members) { - unsigned count = glsl_get_length(without_array->type); - for (unsigned i = 0; i < count; i++) { - assert(var->members[i]->data.mode != nir_var_local); - nir_shader_add_variable(b->shader, var->members[i]); - } - } else { - assert(var->mode == vtn_variable_mode_ubo || - var->mode == vtn_variable_mode_ssbo || - var->mode == vtn_variable_mode_push_constant); - } - break; - } - - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: { - struct vtn_access_chain *base, *chain; - struct vtn_value *base_val = vtn_untyped_value(b, w[3]); - if (base_val->value_type == vtn_value_type_sampled_image) { - /* This is rather insane. SPIR-V allows you to use OpSampledImage - * to combine an array of images with a single sampler to get an - * array of sampled images that all share the same sampler. - * Fortunately, this means that we can more-or-less ignore the - * sampler when crawling the access chain, but it does leave us - * with this rather awkward little special-case. - */ - base = base_val->sampled_image->image; - } else { - assert(base_val->value_type == vtn_value_type_access_chain); - base = base_val->access_chain; - } - - chain = vtn_access_chain_extend(b, base, count - 4); - - unsigned idx = base->length; - for (int i = 4; i < count; i++) { - struct vtn_value *link_val = vtn_untyped_value(b, w[i]); - if (link_val->value_type == vtn_value_type_constant) { - chain->link[idx].mode = vtn_access_mode_literal; - chain->link[idx].id = link_val->constant->value.u[0]; - } else { - chain->link[idx].mode = vtn_access_mode_id; - chain->link[idx].id = w[i]; - } - idx++; - } - - if (base_val->value_type == vtn_value_type_sampled_image) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = chain; - val->sampled_image->sampler = base_val->sampled_image->sampler; - } else { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - val->access_chain = chain; - } - break; - } - - case SpvOpCopyMemory: { - struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); - struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); - - vtn_variable_copy(b, dest->access_chain, src->access_chain); - break; - } - - case SpvOpLoad: { - struct vtn_access_chain *src = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - - if (src->var->mode == vtn_variable_mode_image || - src->var->mode == vtn_variable_mode_sampler) { - vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; - return; - } - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_variable_load(b, src); - break; - } - - case SpvOpStore: { - struct vtn_access_chain *dest = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; - struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); - vtn_variable_store(b, src, dest); - break; - } - - case SpvOpArrayLength: { - struct vtn_access_chain *chain = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - - const uint32_t offset = chain->var->type->offsets[w[4]]; - const uint32_t stride = chain->var->type->members[w[4]]->stride; - - unsigned chain_idx; - struct vtn_type *type; - nir_ssa_def *index = - get_vulkan_resource_index(b, chain, &type, &chain_idx); - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->nb.shader, - nir_intrinsic_get_buffer_size); - instr->src[0] = nir_src_for_ssa(index); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); - nir_builder_instr_insert(&b->nb, &instr->instr); - nir_ssa_def *buf_size = &instr->dest.ssa; - - /* array_length = max(buffer_size - offset, 0) / stride */ - nir_ssa_def *array_length = - nir_idiv(&b->nb, - nir_imax(&b->nb, - nir_isub(&b->nb, - buf_size, - nir_imm_int(&b->nb, offset)), - nir_imm_int(&b->nb, 0u)), - nir_imm_int(&b->nb, stride)); - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); - val->ssa->def = array_length; - break; - } - - case SpvOpCopyMemorySized: - default: - unreachable("Unhandled opcode"); - } -} diff --git a/src/compiler/spirv/GLSL.std.450.h b/src/compiler/spirv/GLSL.std.450.h new file mode 100644 index 00000000000..d1c9b5c1d44 --- /dev/null +++ b/src/compiler/spirv/GLSL.std.450.h @@ -0,0 +1,127 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +const int GLSLstd450Version = 99; +const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h new file mode 100644 index 00000000000..500f2cb94df --- /dev/null +++ b/src/compiler/spirv/nir_spirv.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir/nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nir_spirv_specialization { + uint32_t id; + uint32_t data; +}; + +nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *specializations, + unsigned num_specializations, + gl_shader_stage stage, const char *entry_point_name, + const nir_shader_compiler_options *options); + +#ifdef __cplusplus +} +#endif + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/compiler/spirv/spirv.h b/src/compiler/spirv/spirv.h new file mode 100644 index 00000000000..63bcb2f88dd --- /dev/null +++ b/src/compiler/spirv/spirv.h @@ -0,0 +1,870 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 2 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 2; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, +} SpvOp; + +#endif // #ifndef spirv_H + diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c new file mode 100644 index 00000000000..99514b49650 --- /dev/null +++ b/src/compiler/spirv/spirv_to_nir.c @@ -0,0 +1,2710 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" +#include "nir/nir_control_flow.h" +#include "nir/nir_constant_expressions.h" + +static struct vtn_ssa_value * +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(val->type)); + val->def = nir_ssa_undef(&b->nb, num_components, bit_size); + } else { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_matrix(type)) { + const struct glsl_type *elem_type = + glsl_vector_type(glsl_get_base_type(type), + glsl_get_vector_elements(type)); + + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else if (glsl_type_is_array(type)) { + const struct glsl_type *elem_type = glsl_get_array_element(type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else { + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = glsl_get_struct_field(type, i); + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components, 32); + + for (unsigned i = 0; i < num_components; i++) + load->value.u32[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows, 32); + + for (unsigned j = 0; j < rows; j++) + load->value.u32[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_undef: + return vtn_undef_ssa_value(b, val->type->type); + + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + + case vtn_value_type_access_chain: + /* This is needed for function parameters */ + return vtn_variable_load(b, val->access_chain); + + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count, unsigned *words_used) +{ + char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); + if (words_used) { + /* Ammount of space taken by the string (including the null) */ + unsigned len = strlen(dup) + 1; + *words_used = DIV_ROUND_UP(len, sizeof(*words)); + } + return dup; +} + +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + b->file = NULL; + b->line = -1; + b->col = -1; + + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + switch (opcode) { + case SpvOpNop: + break; /* Do nothing */ + + case SpvOpLine: + b->file = vtn_value(b, w[1], vtn_value_type_string)->str; + b->line = w[2]; + b->col = w[3]; + break; + + case SpvOpNoLine: + b->file = NULL; + b->line = -1; + b->col = -1; + break; + + default: + if (!handler(b, opcode, w, count)) + return w; + break; + } + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int parent_member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + int member; + if (dec->scope == VTN_DEC_DECORATION) { + member = parent_member; + } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { + assert(parent_member == -1); + member = dec->scope - VTN_DEC_STRUCT_MEMBER0; + } else { + /* Not a decoration */ + continue; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, member, dec->group, + cb, data); + } else { + cb(b, base_value, member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +void +vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->scope != VTN_DEC_EXECUTION_MODE) + continue; + + assert(dec->group == NULL); + cb(b, value, dec, data); + } +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_decoration_group); + break; + + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpExecutionMode: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + switch (opcode) { + case SpvOpDecorate: + dec->scope = VTN_DEC_DECORATION; + break; + case SpvOpMemberDecorate: + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + break; + case SpvOpExecutionMode: + dec->scope = VTN_DEC_EXECUTION_MODE; + break; + default: + unreachable("Invalid decoration opcode"); + } + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + case SpvOpGroupDecorate: { + struct vtn_value *group = + vtn_value(b, target, vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = vtn_untyped_value(b, *w); + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + + dec->group = group; + if (opcode == SpvOpGroupDecorate) { + dec->scope = VTN_DEC_DECORATION; + } else { + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); + } + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + unsigned num_fields; + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_ARRAY: + dest->row_major = src->row_major; + dest->stride = src->stride; + dest->array_element = src->array_element; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static struct vtn_type * +mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) +{ + type->members[member] = vtn_type_copy(b, type->members[member]); + type = type->members[member]; + + /* We may have an array of matrices.... Oh, joy! */ + while (glsl_type_is_array(type->type)) { + type->array_element = vtn_type_copy(b, type->array_element); + type = type->array_element; + } + + assert(glsl_type_is_matrix(type->type)); + + return type; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + assert(member < ctx->num_fields); + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + case SpvDecorationMatrixStride: + mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; + break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ + case SpvDecorationRowMajor: + mutable_matrix_member(b, ctx->type, member)->row_major = true; + break; + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + val->type->val = val; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: { + const bool signedness = w[3]; + val->type->type = (signedness ? glsl_int_type() : glsl_uint_type()); + break; + } + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base->type)); + val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); + + /* Vectors implicitly have sizeof(base_type) stride. For now, this + * is always 4 bytes. This will have to change if we want to start + * supporting doubles or half-floats. + */ + val->type->stride = 4; + val->type->array_element = base; + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + assert(!glsl_type_is_error(val->type->type)); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeRuntimeArray: + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + + unsigned length; + if (opcode == SpvOpTypeRuntimeArray) { + /* A length of 0 is used to denote unsized arrays */ + length = 0; + } else { + length = + vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; + } + + val->type->type = glsl_array_type(array_element->type, length); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i] = (struct glsl_struct_field) { + .type = val->type->members[i]->type, + .name = ralloc_asprintf(b, "field%d", i), + .location = -1, + }; + } + + struct member_decoration_ctx ctx = { + .num_fields = num_fields, + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + if (count > 9) + val->type->access_qualifier = w[9]; + else + val->type->access_qualifier = SpvAccessQualifierReadWrite; + + if (multisampled) { + assert(dim == GLSL_SAMPLER_DIM_2D); + dim = GLSL_SAMPLER_DIM_MS; + } + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + */ + val->type->type = glsl_bare_sampler_type(); + break; + + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static nir_constant * +vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) +{ + nir_constant *c = rzalloc(b, nir_constant); + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* Nothing to do here. It's already initialized to zero */ + break; + + case GLSL_TYPE_ARRAY: + assert(glsl_get_length(type) > 0); + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); + for (unsigned i = 1; i < c->num_elements; i++) + c->elements[i] = c->elements[0]; + break; + + case GLSL_TYPE_STRUCT: + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + for (unsigned i = 0; i < c->num_elements; i++) { + c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); + } + break; + + default: + unreachable("Invalid type for null constant"); + } + + return c; +} + +static void +spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, + int member, const struct vtn_decoration *dec, + void *data) +{ + assert(member == -1); + if (dec->decoration != SpvDecorationSpecId) + return; + + uint32_t *const_value = data; + + for (unsigned i = 0; i < b->num_specializations; i++) { + if (b->specializations[i].id == dec->literals[0]) { + *const_value = b->specializations[i].data; + return; + } + } +} + +static uint32_t +get_specialization(struct vtn_builder *b, struct vtn_value *val, + uint32_t const_value) +{ + vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); + return const_value; +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = rzalloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: { + assert(val->const_type == glsl_bool_type()); + uint32_t int_val = + get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); + val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; + break; + } + + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpSpecConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = get_specialization(b, val, w[3]); + break; + case SpvOpSpecConstantComposite: + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->num_elements = elem_count; + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + case SpvOpSpecConstantOp: { + SpvOp opcode = get_specialization(b, val, w[3]); + switch (opcode) { + case SpvOpVectorShuffle: { + struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); + struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); + unsigned len0 = glsl_get_vector_elements(v0->const_type); + unsigned len1 = glsl_get_vector_elements(v1->const_type); + + uint32_t u[8]; + for (unsigned i = 0; i < len0; i++) + u[i] = v0->constant->value.u[i]; + for (unsigned i = 0; i < len1; i++) + u[len0 + i] = v1->constant->value.u[i]; + + for (unsigned i = 0; i < count - 6; i++) { + uint32_t comp = w[i + 6]; + if (comp == (uint32_t)-1) { + val->constant->value.u[i] = 0xdeadbeef; + } else { + val->constant->value.u[i] = u[comp]; + } + } + return; + } + + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: { + struct vtn_value *comp; + unsigned deref_start; + struct nir_constant **c; + if (opcode == SpvOpCompositeExtract) { + comp = vtn_value(b, w[4], vtn_value_type_constant); + deref_start = 5; + c = &comp->constant; + } else { + comp = vtn_value(b, w[5], vtn_value_type_constant); + deref_start = 6; + val->constant = nir_constant_clone(comp->constant, + (nir_variable *)b); + c = &val->constant; + } + + int elem = -1; + const struct glsl_type *type = comp->const_type; + for (unsigned i = deref_start; i < count; i++) { + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* If we hit this granularity, we're picking off an element */ + if (elem < 0) + elem = 0; + + if (glsl_type_is_matrix(type)) { + elem += w[i] * glsl_get_vector_elements(type); + type = glsl_get_column_type(type); + } else { + assert(glsl_type_is_vector(type)); + elem += w[i]; + type = glsl_scalar_type(glsl_get_base_type(type)); + } + continue; + + case GLSL_TYPE_ARRAY: + c = &(*c)->elements[w[i]]; + type = glsl_get_array_element(type); + continue; + + case GLSL_TYPE_STRUCT: + c = &(*c)->elements[w[i]]; + type = glsl_get_struct_field(type, w[i]); + continue; + + default: + unreachable("Invalid constant type"); + } + } + + if (opcode == SpvOpCompositeExtract) { + if (elem == -1) { + val->constant = *c; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + val->constant->value.u[i] = (*c)->value.u[elem + i]; + } + } else { + struct vtn_value *insert = + vtn_value(b, w[4], vtn_value_type_constant); + assert(insert->const_type == type); + if (elem == -1) { + *c = insert->constant; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + (*c)->value.u[elem + i] = insert->constant->value.u[i]; + } + } + return; + } + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + unsigned num_components = glsl_get_vector_elements(val->const_type); + unsigned bit_size = + glsl_get_bit_size(glsl_get_base_type(val->const_type)); + + nir_const_value src[3]; + assert(count <= 7); + for (unsigned i = 0; i < count - 4; i++) { + nir_constant *c = + vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; + + unsigned j = swap ? 1 - i : i; + assert(bit_size == 32); + for (unsigned k = 0; k < num_components; k++) + src[j].u32[k] = c->value.u[k]; + } + + nir_const_value res = nir_eval_const_opcode(op, num_components, + bit_size, src); + + for (unsigned k = 0; k < num_components; k++) + val->constant->value.u[k] = res.u32[k]; + + return; + } /* default */ + } + } + + case SpvOpConstantNull: + val->constant = vtn_null_constant(b, val->const_type); + break; + + case SpvOpConstantSampler: + assert(!"OpConstantSampler requires Kernel Capability"); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct nir_function *callee = + vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; + + nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); + for (unsigned i = 0; i < call->num_params; i++) { + unsigned arg_id = w[4 + i]; + struct vtn_value *arg = vtn_untyped_value(b, arg_id); + if (arg->value_type == vtn_value_type_access_chain) { + nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); + call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); + } else { + struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); + + /* Make a temporary to store the argument in */ + nir_variable *tmp = + nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); + call->params[i] = nir_deref_var_create(call, tmp); + + vtn_local_store(b, arg_ssa, call->params[i]); + } + } + + nir_variable *out_tmp = NULL; + if (!glsl_type_is_void(callee->return_type)) { + out_tmp = nir_local_variable_create(b->impl, callee->return_type, + "out_tmp"); + call->return_deref = nir_deref_var_create(call, out_tmp); + } + + nir_builder_instr_insert(&b->nb, &call->instr); + + if (glsl_type_is_void(callee->return_type)) { + vtn_push_value(b, w[2], vtn_value_type_undef); + } else { + struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); + retval->ssa = vtn_local_load(b, call->return_deref); + } +} + +struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; + return; + } else if (opcode == SpvOpImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + struct vtn_value *src_val = vtn_untyped_value(b, w[3]); + if (src_val->value_type == vtn_value_type_sampled_image) { + val->access_chain = src_val->sampled_image->image; + } else { + assert(src_val->value_type == vtn_value_type_access_chain); + val->access_chain = src_val->access_chain; + } + return; + } + + struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_access_chain); + sampled.image = NULL; + sampled.sampler = sampled_val->access_chain; + } + + const struct glsl_type *image_type; + if (sampled.image) { + image_type = sampled.image->var->var->interface_type; + } else { + image_type = sampled.sampler->var->var->interface_type; + } + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + bool has_coord = false; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + has_coord = true; + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + /* These all have an explicit depth value as their next source */ + switch (opcode) { + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); + break; + default: + break; + } + + /* For OpImageQuerySizeLod, we always have an LOD */ + if (opcode == SpvOpImageQuerySizeLod) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + + case SpvOpImageFetch: + if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) { + texop = nir_texop_txf_ms; + } else { + texop = nir_texop_txf; + } + break; + + case SpvOpImageGather: + case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* Now we need to handle some number of optional arguments */ + if (idx < count) { + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txf_ms || texop == nir_texop_txs); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf_ms); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + instr->op = texop; + + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + + instr->sampler_dim = glsl_get_sampler_dim(image_type); + instr->is_array = glsl_sampler_type_is_array(image_type); + instr->is_shadow = glsl_sampler_type_is_shadow(image_type); + instr->is_new_style_shadow = instr->is_shadow; + + if (has_coord) { + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + instr->coord_components = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_MS: + instr->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + instr->coord_components = 3; + break; + default: + assert("Invalid sampler type"); + } + + if (instr->is_array) + instr->coord_components++; + } else { + instr->coord_components = 0; + } + + switch (glsl_get_sampler_result_type(image_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); + if (sampled.image) { + nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); + } else { + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + } + + switch (instr->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + /* These operations require a sampler */ + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + break; + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_tg4: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + /* These don't */ + instr->sampler = NULL; + break; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest, + nir_tex_instr_dest_size(instr), 32, NULL); + + assert(glsl_get_vector_elements(ret_type->type) == + nir_tex_instr_dest_size(instr)); + + val->ssa = vtn_create_ssa_value(b, ret_type->type); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageQuerySize: + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + image.coord = NULL; + image.sample = NULL; + break; + + case SpvOpImageRead: + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1, 32); + } + break; + + case SpvOpImageWrite: + image.image = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1, 32); + } + break; + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageQuerySize, size) + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + + nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); + + /* ImageQuerySize doesn't take any extra parameters */ + if (opcode != SpvOpImageQuerySize) { + /* The image coordinate is always 4 components but we may not have that + * many. Swizzle to compensate. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < image.coord->num_components ? i : 0; + intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, + swiz, 4, false)); + intrin->src[1] = nir_src_for_ssa(image.sample); + } + + switch (opcode) { + case SpvOpImageQuerySize: + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL); + + nir_builder_instr_insert(&b->nb, &intrin->instr); + + /* The image intrinsics always return 4 channels but we may not want + * that many. Emit a mov to trim it down. + */ + unsigned swiz[4] = {0, 1, 2, 3}; + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, + glsl_get_vector_elements(type->type), false); + } else { + nir_builder_instr_insert(&b->nb, &intrin->instr); + } +} + +static nir_intrinsic_op +get_ssbo_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid SSBO atomic"); + } +} + +static nir_intrinsic_op +get_shared_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid shared atomic"); + } +} + +static void +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, nir_src *src) +{ + switch (opcode) { + case SpvOpAtomicIIncrement: + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + + case SpvOpAtomicIDecrement: + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicISub: + src[0] = + nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + case SpvOpAtomicCompareExchange: + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + break; + /* Fall through */ + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + default: + unreachable("Invalid SPIR-V atomic"); + } +} + +static void +vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + nir_intrinsic_instr *atomic; + + /* + SpvScope scope = w[4]; + SpvMemorySemanticsMask semantics = w[5]; + */ + + if (chain->var->mode == vtn_variable_mode_workgroup) { + nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; + nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); + fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); + } else { + assert(chain->var->mode == vtn_variable_mode_ssbo); + struct vtn_type *type; + nir_ssa_def *offset, *index; + offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); + + nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); + + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->src[0] = nir_src_for_ssa(index); + atomic->src[1] = nir_src_for_ssa(offset); + fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); + } + + nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->def = &atomic->dest.ssa; + val->ssa->type = type->type; + + nir_builder_instr_insert(&b->nb, &atomic->instr); +} + +static nir_alu_instr * +create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(shader, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, + bit_size, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +struct vtn_ssa_value * +vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b->shader, + glsl_get_matrix_columns(src->type), + glsl_get_bit_size(glsl_get_base_type(src->type))); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + +nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components, + src->bit_size); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = + nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components, + srcs[0]->bit_size); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } else { + cur = cur->elems[indices[i]]; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + intrinsic_op = nir_intrinsic_barrier; + break; + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + nir_intrinsic_set_stream_id(intrin, w[1]); + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + +static gl_shader_stage +stage_for_execution_model(SpvExecutionModel model) +{ + switch (model) { + case SpvExecutionModelVertex: + return MESA_SHADER_VERTEX; + case SpvExecutionModelTessellationControl: + return MESA_SHADER_TESS_CTRL; + case SpvExecutionModelTessellationEvaluation: + return MESA_SHADER_TESS_EVAL; + case SpvExecutionModelGeometry: + return MESA_SHADER_GEOMETRY; + case SpvExecutionModelFragment: + return MESA_SHADER_FRAGMENT; + case SpvExecutionModelGLCompute: + return MESA_SHADER_COMPUTE; + default: + unreachable("Unsupported execution model"); + } +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpSourceContinued: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: { + SpvCapability cap = w[1]; + switch (cap) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + case SpvCapabilityGeometry: + case SpvCapabilityTessellationPointSize: + case SpvCapabilityGeometryPointSize: + case SpvCapabilityUniformBufferArrayDynamicIndexing: + case SpvCapabilitySampledImageArrayDynamicIndexing: + case SpvCapabilityStorageBufferArrayDynamicIndexing: + case SpvCapabilityStorageImageArrayDynamicIndexing: + case SpvCapabilityImageRect: + case SpvCapabilitySampledRect: + case SpvCapabilitySampled1D: + case SpvCapabilityImage1D: + case SpvCapabilitySampledCubeArray: + case SpvCapabilitySampledBuffer: + case SpvCapabilityImageBuffer: + case SpvCapabilityImageQuery: + break; + case SpvCapabilityClipDistance: + case SpvCapabilityCullDistance: + case SpvCapabilityGeometryStreams: + fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n"); + break; + default: + assert(!"Unsupported capability"); + } + break; + } + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: { + struct vtn_value *entry_point = &b->values[w[2]]; + /* Let this be a name label regardless */ + unsigned name_words; + entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); + + if (strcmp(entry_point->name, b->entry_point_name) != 0 || + stage_for_execution_model(w[1]) != b->entry_point_stage) + break; + + assert(b->entry_point == NULL); + b->entry_point = entry_point; + break; + } + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2, NULL); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpExecutionMode: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static void +vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, + const struct vtn_decoration *mode, void *data) +{ + assert(b->entry_point == entry_point); + + switch(mode->exec_mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = + (mode->exec_mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = mode->literals[0]; + b->shader->info.cs.local_size[1] = mode->literals[1]; + b->shader->info.cs.local_size[2] = mode->literals[2]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = mode->literals[0]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode->exec_mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode->exec_mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + break; /* OpenCL */ + } +} + +static bool +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceContinued: + case SpvOpSourceExtension: + case SpvOpExtension: + case SpvOpCapability: + case SpvOpExtInstImport: + case SpvOpMemoryModel: + case SpvOpEntryPoint: + case SpvOpExecutionMode: + case SpvOpString: + case SpvOpName: + case SpvOpMemberName: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + assert(!"Invalid opcode types and variables section"); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpConstantNull: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + case SpvOpSpecConstantOp: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: + break; + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpSampledImage: + case SpvOpImage: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpImageQuerySize: { + struct vtn_access_chain *image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + if (glsl_type_is_image(image->var->var->interface_type)) { + vtn_handle_image(b, opcode, w, count); + } else { + vtn_handle_texture(b, opcode, w, count); + } + break; + } + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(pointer->value_type == vtn_value_type_access_chain); + vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); + } + break; + } + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpQuantizeToF16: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpIAddCarry: + case SpvOpISubBorrow: + case SpvOpUMulExtended: + case SpvOpSMulExtended: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalOr: + case SpvOpLogicalAnd: + case SpvOpLogicalNot: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + case SpvOpBitFieldInsert: + case SpvOpBitFieldSExtract: + case SpvOpBitFieldUExtract: + case SpvOpBitReverse: + case SpvOpBitCount: + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +nir_function * +spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *spec, unsigned num_spec, + gl_shader_stage stage, const char *entry_point_name, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] >= 0x10000); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + b->entry_point_stage = stage; + b->entry_point_name = entry_point_name; + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + if (b->entry_point == NULL) { + assert(!"Entry point not found"); + ralloc_free(b); + return NULL; + } + + b->shader = nir_shader_create(NULL, stage, options); + + /* Set shader info defaults */ + b->shader->info.gs.invocations = 1; + + /* Parse execution modes */ + vtn_foreach_execution_mode(b, b->entry_point, + vtn_handle_execution_mode, NULL); + + b->specializations = spec; + b->num_specializations = num_spec; + + /* Handle all variable, type, and constant instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_variable_or_type_instruction); + + vtn_build_cfg(b, words, word_end); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = func->impl; + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + vtn_function_emit(b, func, vtn_handle_body_instruction); + } + + assert(b->entry_point->value_type == vtn_value_type_function); + nir_function *entry_point = b->entry_point->func->impl->function; + assert(entry_point); + + ralloc_free(b); + + return entry_point; +} diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c new file mode 100644 index 00000000000..8b9a63ce760 --- /dev/null +++ b/src/compiler/spirv/vtn_alu.c @@ -0,0 +1,464 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + const struct glsl_type *dest_type; + if (src1_columns > 1) { + dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns); + } else { + dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); + } + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); + + dest = wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_ssa_def *vec_src[4]; + for (unsigned j = 0; j < src0_rows; j++) { + vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def); + } + dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + nir_channel(&b->nb, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + nir_channel(&b->nb, src1->elems[i]->def, j))); + } + } + } + + dest = unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_ssa_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + struct vtn_value *dest, + struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) +{ + switch (opcode) { + case SpvOpFNegate: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); + break; + } + + case SpvOpFAdd: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpFSub: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpTranspose: + dest->ssa = vtn_ssa_transpose(b, src0); + break; + + case SpvOpMatrixTimesScalar: + if (src0->transposed) { + dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, + src1->def)); + } else { + dest->ssa = mat_times_scalar(b, src0, src1->def); + } + break; + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + if (opcode == SpvOpVectorTimesMatrix) { + dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); + } else { + dest->ssa = matrix_multiply(b, src0, src1); + } + break; + + default: unreachable("unknown matrix opcode"); + } +} + +nir_op +vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) +{ + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + *swap = false; + + switch (opcode) { + case SpvOpSNegate: return nir_op_ineg; + case SpvOpFNegate: return nir_op_fneg; + case SpvOpNot: return nir_op_inot; + case SpvOpIAdd: return nir_op_iadd; + case SpvOpFAdd: return nir_op_fadd; + case SpvOpISub: return nir_op_isub; + case SpvOpFSub: return nir_op_fsub; + case SpvOpIMul: return nir_op_imul; + case SpvOpFMul: return nir_op_fmul; + case SpvOpUDiv: return nir_op_udiv; + case SpvOpSDiv: return nir_op_idiv; + case SpvOpFDiv: return nir_op_fdiv; + case SpvOpUMod: return nir_op_umod; + case SpvOpSMod: return nir_op_imod; + case SpvOpFMod: return nir_op_fmod; + case SpvOpSRem: return nir_op_irem; + case SpvOpFRem: return nir_op_frem; + + case SpvOpShiftRightLogical: return nir_op_ushr; + case SpvOpShiftRightArithmetic: return nir_op_ishr; + case SpvOpShiftLeftLogical: return nir_op_ishl; + case SpvOpLogicalOr: return nir_op_ior; + case SpvOpLogicalEqual: return nir_op_ieq; + case SpvOpLogicalNotEqual: return nir_op_ine; + case SpvOpLogicalAnd: return nir_op_iand; + case SpvOpLogicalNot: return nir_op_inot; + case SpvOpBitwiseOr: return nir_op_ior; + case SpvOpBitwiseXor: return nir_op_ixor; + case SpvOpBitwiseAnd: return nir_op_iand; + case SpvOpSelect: return nir_op_bcsel; + case SpvOpIEqual: return nir_op_ieq; + + case SpvOpBitFieldInsert: return nir_op_bitfield_insert; + case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; + case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; + case SpvOpBitReverse: return nir_op_bitfield_reverse; + case SpvOpBitCount: return nir_op_bit_count; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: return nir_op_feq; + case SpvOpFUnordEqual: return nir_op_feq; + case SpvOpINotEqual: return nir_op_ine; + case SpvOpFOrdNotEqual: return nir_op_fne; + case SpvOpFUnordNotEqual: return nir_op_fne; + case SpvOpULessThan: return nir_op_ult; + case SpvOpSLessThan: return nir_op_ilt; + case SpvOpFOrdLessThan: return nir_op_flt; + case SpvOpFUnordLessThan: return nir_op_flt; + case SpvOpUGreaterThan: *swap = true; return nir_op_ult; + case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; + case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; + case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; + case SpvOpULessThanEqual: *swap = true; return nir_op_uge; + case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; + case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpUGreaterThanEqual: return nir_op_uge; + case SpvOpSGreaterThanEqual: return nir_op_ige; + case SpvOpFOrdGreaterThanEqual: return nir_op_fge; + case SpvOpFUnordGreaterThanEqual: return nir_op_fge; + + /* Conversions: */ + case SpvOpConvertFToU: return nir_op_f2u; + case SpvOpConvertFToS: return nir_op_f2i; + case SpvOpConvertSToF: return nir_op_i2f; + case SpvOpConvertUToF: return nir_op_u2f; + case SpvOpBitcast: return nir_op_imov; + case SpvOpUConvert: + case SpvOpQuantizeToF16: return nir_op_fquantize2f16; + /* TODO: NIR is 32-bit only; these are no-ops. */ + case SpvOpSConvert: return nir_op_imov; + case SpvOpFConvert: return nir_op_fmov; + + /* Derivatives: */ + case SpvOpDPdx: return nir_op_fddx; + case SpvOpDPdy: return nir_op_fddy; + case SpvOpDPdxFine: return nir_op_fddx_fine; + case SpvOpDPdyFine: return nir_op_fddy_fine; + case SpvOpDPdxCoarse: return nir_op_fddx_coarse; + case SpvOpDPdyCoarse: return nir_op_fddy_coarse; + + default: + unreachable("No NIR equivalent"); + } +} + +static void +handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *_void) +{ + assert(dec->scope == VTN_DEC_DECORATION); + if (dec->decoration != SpvDecorationNoContraction) + return; + + b->nb.exact = true; +} + +void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + vtn_foreach_decoration(b, val, handle_no_contraction, NULL); + + /* Collect the various SSA sources */ + const unsigned num_inputs = count - 3; + struct vtn_ssa_value *vtn_src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) + vtn_src[i] = vtn_ssa_value(b, w[i + 3]); + + if (glsl_type_is_matrix(vtn_src[0]->type) || + (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { + vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); + b->nb.exact = false; + return; + } + + val->ssa = vtn_create_ssa_value(b, type); + nir_ssa_def *src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) { + assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); + src[i] = vtn_src[i]->def; + } + + switch (opcode) { + case SpvOpAny: + if (src[0]->num_components == 1) { + val->ssa->def = nir_imov(&b->nb, src[0]); + } else { + nir_op op; + switch (src[0]->num_components) { + case 2: op = nir_op_bany_inequal2; break; + case 3: op = nir_op_bany_inequal3; break; + case 4: op = nir_op_bany_inequal4; break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_FALSE), + NULL, NULL); + } + break; + + case SpvOpAll: + if (src[0]->num_components == 1) { + val->ssa->def = nir_imov(&b->nb, src[0]); + } else { + nir_op op; + switch (src[0]->num_components) { + case 2: op = nir_op_ball_iequal2; break; + case 3: op = nir_op_ball_iequal3; break; + case 4: op = nir_op_ball_iequal4; break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_TRUE), + NULL, NULL); + } + break; + + case SpvOpOuterProduct: { + for (unsigned i = 0; i < src[1]->num_components; i++) { + val->ssa->elems[i]->def = + nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); + } + break; + } + + case SpvOpDot: + val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); + break; + + case SpvOpIAddCarry: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); + break; + + case SpvOpISubBorrow: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); + break; + + case SpvOpUMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); + break; + + case SpvOpSMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); + break; + + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); + break; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); + break; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); + break; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + break; + + case SpvOpIsNan: + val->ssa->def = nir_fne(&b->nb, src[0], src[0]); + break; + + case SpvOpIsInf: + val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), + nir_imm_float(&b->nb, INFINITY)); + break; + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); + break; + } /* default */ + } + + b->nb.exact = false; +} diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c new file mode 100644 index 00000000000..6a43ef8b2dd --- /dev/null +++ b/src/compiler/spirv/vtn_cfg.c @@ -0,0 +1,778 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" + +static bool +vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + list_inithead(&b->func->body); + b->func->control = w[3]; + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + val->func = b->func; + + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + func->num_params = glsl_get_length(func_type); + func->params = ralloc_array(b->shader, nir_parameter, func->num_params); + for (unsigned i = 0; i < func->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + func->params[i].type = param->type; + if (param->in) { + if (param->out) { + func->params[i].param_type = nir_parameter_inout; + } else { + func->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + func->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + func->return_type = glsl_get_function_return_type(func_type); + + b->func->impl = nir_function_impl_create(func); + + b->func_param_idx = 0; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + + assert(b->func_param_idx < b->func->impl->num_params); + nir_variable *param = b->func->impl->params[b->func_param_idx++]; + + assert(param->type == type->type); + + /* Name the parameter so it shows up nicely in NIR */ + param->name = ralloc_strdup(param, val->name); + + struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); + vtn_var->type = type; + vtn_var->var = param; + vtn_var->chain.var = vtn_var; + vtn_var->chain.length = 0; + + struct vtn_type *without_array = type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; + + if (glsl_type_is_image(without_array->type)) { + vtn_var->mode = vtn_variable_mode_image; + param->interface_type = without_array->type; + } else if (glsl_type_is_sampler(without_array->type)) { + vtn_var->mode = vtn_variable_mode_sampler; + param->interface_type = without_array->type; + } else { + vtn_var->mode = vtn_variable_mode_param; + } + + val->access_chain = &vtn_var->chain; + break; + } + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->node.type = vtn_cf_node_type_block; + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge == NULL); + b->block->merge = w; + break; + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block && b->block->branch == NULL); + b->block->branch = w; + b->block = NULL; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static void +vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, + struct vtn_block *break_block, + uint32_t block_id, uint32_t val, bool is_default) +{ + struct vtn_block *case_block = + vtn_value(b, block_id, vtn_value_type_block)->block; + + /* Don't create dummy cases that just break */ + if (case_block == break_block) + return; + + if (case_block->switch_case == NULL) { + struct vtn_case *c = ralloc(b, struct vtn_case); + + list_inithead(&c->body); + c->start_block = case_block; + c->fallthrough = NULL; + nir_array_init(&c->values, b); + c->is_default = false; + c->visited = false; + + list_addtail(&c->link, &swtch->cases); + + case_block->switch_case = c; + } + + if (is_default) { + case_block->switch_case->is_default = true; + } else { + nir_array_add(&case_block->switch_case->values, uint32_t, val); + } +} + +/* This function performs a depth-first search of the cases and puts them + * in fall-through order. + */ +static void +vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) +{ + if (cse->visited) + return; + + cse->visited = true; + + list_del(&cse->link); + + if (cse->fallthrough) { + vtn_order_case(swtch, cse->fallthrough); + + /* If we have a fall-through, place this case right before the case it + * falls through to. This ensures that fallthroughs come one after + * the other. These two can never get separated because that would + * imply something else falling through to the same case. Also, this + * can't break ordering because the DFS ensures that this case is + * visited before anything that falls through to it. + */ + list_addtail(&cse->link, &cse->fallthrough->link); + } else { + list_add(&cse->link, &swtch->cases); + } +} + +static enum vtn_branch_type +vtn_get_branch_type(struct vtn_block *block, + struct vtn_case *swcase, struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont) +{ + if (block->switch_case) { + /* This branch is actually a fallthrough */ + assert(swcase->fallthrough == NULL || + swcase->fallthrough == block->switch_case); + swcase->fallthrough = block->switch_case; + return vtn_branch_type_switch_fallthrough; + } else if (block == switch_break) { + return vtn_branch_type_switch_break; + } else if (block == loop_break) { + return vtn_branch_type_loop_break; + } else if (block == loop_cont) { + return vtn_branch_type_loop_continue; + } else { + return vtn_branch_type_none; + } +} + +static void +vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, + struct vtn_block *start, struct vtn_case *switch_case, + struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont, + struct vtn_block *end) +{ + struct vtn_block *block = start; + while (block != end) { + if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && + !block->loop) { + struct vtn_loop *loop = ralloc(b, struct vtn_loop); + + loop->node.type = vtn_cf_node_type_loop; + list_inithead(&loop->body); + list_inithead(&loop->cont_body); + loop->control = block->merge[3]; + + list_addtail(&loop->node.link, cf_list); + block->loop = loop; + + struct vtn_block *new_loop_break = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + struct vtn_block *new_loop_cont = + vtn_value(b, block->merge[2], vtn_value_type_block)->block; + + /* Note: This recursive call will start with the current block as + * its start block. If we weren't careful, we would get here + * again and end up in infinite recursion. This is why we set + * block->loop above and check for it before creating one. This + * way, we only create the loop once and the second call that + * tries to handle this loop goes to the cases below and gets + * handled as a regular block. + * + * Note: When we make the recursive walk calls, we pass NULL for + * the switch break since you have to break out of the loop first. + * We do, however, still pass the current switch case because it's + * possible that the merge block for the loop is the start of + * another case. + */ + vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, + new_loop_break, new_loop_cont, NULL ); + vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, + new_loop_break, NULL, block); + + block = new_loop_break; + continue; + } + + assert(block->node.link.next == NULL); + list_addtail(&block->node.link, cf_list); + + switch (*block->branch & SpvOpCodeMask) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, block->branch[1], vtn_value_type_block)->block; + + block->branch_type = vtn_get_branch_type(branch_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (block->branch_type != vtn_branch_type_none) + return; + + block = branch_block; + continue; + } + + case SpvOpReturn: + case SpvOpReturnValue: + block->branch_type = vtn_branch_type_return; + return; + + case SpvOpKill: + block->branch_type = vtn_branch_type_discard; + return; + + case SpvOpBranchConditional: { + struct vtn_block *then_block = + vtn_value(b, block->branch[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, block->branch[3], vtn_value_type_block)->block; + + struct vtn_if *if_stmt = ralloc(b, struct vtn_if); + + if_stmt->node.type = vtn_cf_node_type_if; + if_stmt->condition = block->branch[1]; + list_inithead(&if_stmt->then_body); + list_inithead(&if_stmt->else_body); + + list_addtail(&if_stmt->node.link, cf_list); + + if (block->merge && + (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { + if_stmt->control = block->merge[2]; + } + + if_stmt->then_type = vtn_get_branch_type(then_block, + switch_case, switch_break, + loop_break, loop_cont); + if_stmt->else_type = vtn_get_branch_type(else_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (if_stmt->then_type == vtn_branch_type_none && + if_stmt->else_type == vtn_branch_type_none) { + /* Neither side of the if is something we can short-circuit. */ + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, + switch_case, switch_break, + loop_break, loop_cont, merge_block); + vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, + switch_case, switch_break, + loop_break, loop_cont, merge_block); + + enum vtn_branch_type merge_type = + vtn_get_branch_type(merge_block, switch_case, switch_break, + loop_break, loop_cont); + if (merge_type == vtn_branch_type_none) { + block = merge_block; + continue; + } else { + return; + } + } else if (if_stmt->then_type != vtn_branch_type_none && + if_stmt->else_type != vtn_branch_type_none) { + /* Both sides were short-circuited. We're done here. */ + return; + } else { + /* Exeactly one side of the branch could be short-circuited. + * We set the branch up as a predicated break/continue and we + * continue on with the other side as if it were what comes + * after the if. + */ + if (if_stmt->then_type == vtn_branch_type_none) { + block = then_block; + } else { + block = else_block; + } + continue; + } + unreachable("Should have returned or continued"); + } + + case SpvOpSwitch: { + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *break_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + struct vtn_switch *swtch = ralloc(b, struct vtn_switch); + + swtch->node.type = vtn_cf_node_type_switch; + swtch->selector = block->branch[1]; + list_inithead(&swtch->cases); + + list_addtail(&swtch->node.link, cf_list); + + /* First, we go through and record all of the cases. */ + const uint32_t *branch_end = + block->branch + (block->branch[0] >> SpvWordCountShift); + + vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); + for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) + vtn_add_case(b, swtch, break_block, w[1], w[0], false); + + /* Now, we go through and walk the blocks. While we walk through + * the blocks, we also gather the much-needed fall-through + * information. + */ + list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { + assert(cse->start_block != break_block); + vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, + break_block, NULL, loop_cont, NULL); + } + + /* Finally, we walk over all of the cases one more time and put + * them in fall-through order. + */ + for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { + struct vtn_block *case_block = + vtn_value(b, *w, vtn_value_type_block)->block; + + if (case_block == break_block) + continue; + + assert(case_block->switch_case); + + vtn_order_case(swtch, case_block->switch_case); + } + + block = break_block; + continue; + } + + case SpvOpUnreachable: + return; + + default: + unreachable("Unhandled opcode"); + } + } +} + +void +vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) +{ + vtn_foreach_instruction(b, words, end, + vtn_cfg_handle_prepass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + vtn_cfg_walk_blocks(b, &func->body, func->start_block, + NULL, NULL, NULL, NULL, NULL); + } +} + +static bool +vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) + return true; /* Nothing to do */ + + /* If this isn't a phi node, stop. */ + if (opcode != SpvOpPhi) + return false; + + /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. + * For each phi, we create a variable with the appropreate type and + * do a load from that variable. Then, in a second pass, we add + * stores to that variable to each of the predecessor blocks. + * + * We could do something more intelligent here. However, in order to + * handle loops and things properly, we really need dominance + * information. It would end up basically being the into-SSA + * algorithm all over again. It's easier if we just let + * lower_vars_to_ssa do that for us instead of repeating it here. + */ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_variable *phi_var = + nir_local_variable_create(b->nb.impl, type->type, "phi"); + _mesa_hash_table_insert(b->phi_table, w, phi_var); + + val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); + + return true; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode != SpvOpPhi) + return true; + + struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); + assert(phi_entry); + nir_variable *phi_var = phi_entry->data; + + for (unsigned i = 3; i < count; i += 2) { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); + struct vtn_block *pred = + vtn_value(b, w[i + 1], vtn_value_type_block)->block; + + b->nb.cursor = nir_after_block_before_jump(pred->end_block); + + vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); + } + + return true; +} + +static void +vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, + nir_variable *switch_fall_var, bool *has_switch_break) +{ + switch (branch_type) { + case vtn_branch_type_switch_break: + nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + *has_switch_break = true; + break; + case vtn_branch_type_switch_fallthrough: + break; /* Nothing to do */ + case vtn_branch_type_loop_break: + nir_jump(&b->nb, nir_jump_break); + break; + case vtn_branch_type_loop_continue: + nir_jump(&b->nb, nir_jump_continue); + break; + case vtn_branch_type_return: + nir_jump(&b->nb, nir_jump_return); + break; + case vtn_branch_type_discard: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + break; + } + default: + unreachable("Invalid branch type"); + } +} + +static void +vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, + nir_variable *switch_fall_var, bool *has_switch_break, + vtn_instruction_handler handler) +{ + list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { + switch (node->type) { + case vtn_cf_node_type_block: { + struct vtn_block *block = (struct vtn_block *)node; + + const uint32_t *block_start = block->label; + const uint32_t *block_end = block->merge ? block->merge : + block->branch; + + block_start = vtn_foreach_instruction(b, block_start, block_end, + vtn_handle_phis_first_pass); + + vtn_foreach_instruction(b, block_start, block_end, handler); + + block->end_block = nir_cursor_current_block(b->nb.cursor); + + if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { + struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); + vtn_local_store(b, src, + nir_deref_var_create(b, b->impl->return_var)); + } + + if (block->branch_type != vtn_branch_type_none) { + vtn_emit_branch(b, block->branch_type, + switch_fall_var, has_switch_break); + } + + break; + } + + case vtn_cf_node_type_if: { + struct vtn_if *vtn_if = (struct vtn_if *)node; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = + nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + bool sw_break = false; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + if (vtn_if->then_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->then_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + if (vtn_if->else_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->else_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + + /* If we encountered a switch break somewhere inside of the if, + * then it would have been handled correctly by calling + * emit_cf_list or emit_branch for the interrior. However, we + * need to predicate everything following on wether or not we're + * still going. + */ + if (sw_break) { + *has_switch_break = true; + + nir_if *switch_if = nir_if_create(b->shader); + switch_if->condition = + nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); + nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + } + break; + } + + case vtn_cf_node_type_loop: { + struct vtn_loop *vtn_loop = (struct vtn_loop *)node; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); + + if (!list_empty(&vtn_loop->cont_body)) { + /* If we have a non-trivial continue body then we need to put + * it at the beginning of the loop with a flag to ensure that + * it doesn't get executed in the first iteration. + */ + nir_variable *do_cont = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); + + b->nb.cursor = nir_before_cf_node(&loop->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); + + b->nb.cursor = nir_before_cf_list(&loop->body); + nir_if *cont_if = nir_if_create(b->shader); + cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); + nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&cont_if->then_list); + vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); + + b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); + + b->has_loop_continue = true; + } + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + break; + } + + case vtn_cf_node_type_switch: { + struct vtn_switch *vtn_switch = (struct vtn_switch *)node; + + /* First, we create a variable to keep track of whether or not the + * switch is still going at any given point. Any switch breaks + * will set this variable to false. + */ + nir_variable *fall_var = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + + /* Next, we gather up all of the conditions. We have to do this + * up-front because we also need to build an "any" condition so + * that we can use !any for default. + */ + const int num_cases = list_length(&vtn_switch->cases); + NIR_VLA(nir_ssa_def *, conditions, num_cases); + + nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; + /* An accumulation of all conditions. Used for the default */ + nir_ssa_def *any = NULL; + + int i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + if (cse->is_default) { + conditions[i++] = NULL; + continue; + } + + nir_ssa_def *cond = NULL; + nir_array_foreach(&cse->values, uint32_t, val) { + nir_ssa_def *is_val = + nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); + + cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; + } + + any = any ? nir_ior(&b->nb, any, cond) : cond; + conditions[i++] = cond; + } + assert(i == num_cases); + + /* Now we can walk the list of cases and actually emit code */ + i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + /* Figure out the condition */ + nir_ssa_def *cond = conditions[i++]; + if (cse->is_default) { + assert(cond == NULL); + cond = nir_inot(&b->nb, any); + } + /* Take fallthrough into account */ + cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); + + nir_if *case_if = nir_if_create(b->nb.shader); + case_if->condition = nir_src_for_ssa(cond); + nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); + + bool has_break = false; + b->nb.cursor = nir_after_cf_list(&case_if->then_list); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); + vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); + (void)has_break; /* We don't care */ + + b->nb.cursor = nir_after_cf_node(&case_if->cf_node); + } + assert(i == num_cases); + + break; + } + + default: + unreachable("Invalid CF node type"); + } + } +} + +void +vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler) +{ + nir_builder_init(&b->nb, func->impl); + b->nb.cursor = nir_after_cf_list(&func->impl->body); + b->has_loop_continue = false; + b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); + + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + + /* Continue blocks for loops get inserted before the body of the loop + * but instructions in the continue may use SSA defs in the loop body. + * Therefore, we need to repair SSA to insert the needed phi nodes. + */ + if (b->has_loop_continue) + nir_repair_ssa_impl(func->impl); +} diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c new file mode 100644 index 00000000000..e05d28ffede --- /dev/null +++ b/src/compiler/spirv/vtn_glsl450.c @@ -0,0 +1,666 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" +#include "GLSL.std.450.h" + +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + +static nir_ssa_def * +build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) +{ + unsigned swiz[4] = {1, 0, 0, 0}; + nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); + return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); +} + +static nir_ssa_def * +build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) +{ + unsigned yzx[4] = {1, 2, 0, 0}; + unsigned zxy[4] = {2, 0, 1, 0}; + + nir_ssa_def *prod0 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), + nir_swizzle(b, col[2], zxy, 3, true))); + nir_ssa_def *prod1 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), + nir_swizzle(b, col[2], yzx, 3, true))); + + nir_ssa_def *diff = nir_fsub(b, prod0, prod1); + + return nir_fadd(b, nir_channel(b, diff, 0), + nir_fadd(b, nir_channel(b, diff, 1), + nir_channel(b, diff, 2))); +} + +static nir_ssa_def * +build_mat4_det(nir_builder *b, nir_ssa_def **col) +{ + nir_ssa_def *subdet[4]; + for (unsigned i = 0; i < 4; i++) { + unsigned swiz[3]; + for (unsigned j = 0; j < 3; j++) + swiz[j] = j + (j >= i); + + nir_ssa_def *subcol[3]; + subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); + subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); + subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); + + subdet[i] = build_mat3_det(b, subcol); + } + + nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); + + return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), + nir_channel(b, prod, 1)), + nir_fsub(b, nir_channel(b, prod, 2), + nir_channel(b, prod, 3))); +} + +static nir_ssa_def * +build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + unsigned size = glsl_get_vector_elements(src->type); + + nir_ssa_def *cols[4]; + for (unsigned i = 0; i < size; i++) + cols[i] = src->elems[i]->def; + + switch(size) { + case 2: return build_mat2_det(&b->nb, cols); + case 3: return build_mat3_det(&b->nb, cols); + case 4: return build_mat4_det(&b->nb, cols); + default: + unreachable("Invalid matrix size"); + } +} + +/* Computes the determinate of the submatrix given by taking src and + * removing the specified row and column. + */ +static nir_ssa_def * +build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, + unsigned size, unsigned row, unsigned col) +{ + assert(row < size && col < size); + if (size == 2) { + return nir_channel(b, src->elems[1 - col]->def, 1 - row); + } else { + /* Swizzle to get all but the specified row */ + unsigned swiz[3]; + for (unsigned j = 0; j < 3; j++) + swiz[j] = j + (j >= row); + + /* Grab all but the specified column */ + nir_ssa_def *subcol[3]; + for (unsigned j = 0; j < size; j++) { + if (j != col) { + subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, + swiz, size - 1, true); + } + } + + if (size == 3) { + return build_mat2_det(b, subcol); + } else { + assert(size == 4); + return build_mat3_det(b, subcol); + } + } +} + +static struct vtn_ssa_value * +matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + nir_ssa_def *adj_col[4]; + unsigned size = glsl_get_vector_elements(src->type); + + /* Build up an adjugate matrix */ + for (unsigned c = 0; c < size; c++) { + nir_ssa_def *elem[4]; + for (unsigned r = 0; r < size; r++) { + elem[r] = build_mat_subdet(&b->nb, src, size, c, r); + + if ((r + c) % 2) + elem[r] = nir_fneg(&b->nb, elem[r]); + } + + adj_col[c] = nir_vec(&b->nb, elem, size); + } + + nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); + + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); + for (unsigned i = 0; i < size; i++) + val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); + + return val; +} + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static inline nir_ssa_def * +build_fclamp(nir_builder *b, + nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) +{ + return nir_fmin(b, nir_fmax(b, x, min_val), max_val); +} + +/** + * Return e^x. + */ +static nir_ssa_def * +build_exp(nir_builder *b, nir_ssa_def *x) +{ + return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); +} + +/** + * Return ln(x) - the natural logarithm of x. + */ +static nir_ssa_def * +build_log(nir_builder *b, nir_ssa_def *x) +{ + return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); +} + +/** + * Approximate asin(x) by the formula: + * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1)))) + * + * which is correct to first order at x=0 and x=±1 regardless of the p + * coefficients but can be made second-order correct at both ends by selecting + * the fit coefficients appropriately. Different p coefficients can be used + * in the asin and acos implementation to minimize some relative error metric + * in each case. + */ +static nir_ssa_def * +build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + return nir_fmul(b, nir_fsign(b, x), + nir_fsub(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), + nir_fadd(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, p0), + nir_fmul(b, abs_x, + nir_imm_float(b, p1)))))))))); +} + +/** + * Compute xs[0] + xs[1] + xs[2] + ... using fadd. + */ +static nir_ssa_def * +build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) +{ + nir_ssa_def *accum = xs[0]; + + for (int i = 1; i < terms; i++) + accum = nir_fadd(b, accum, xs[i]); + + return accum; +} + +static nir_ssa_def * +build_atan(nir_builder *b, nir_ssa_def *y_over_x) +{ + nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); + nir_ssa_def *one = nir_imm_float(b, 1.0f); + + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), + nir_fmax(b, abs_y_over_x, one)); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + nir_ssa_def *x_2 = nir_fmul(b, x, x); + nir_ssa_def *x_3 = nir_fmul(b, x_2, x); + nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); + nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); + nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); + nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); + + nir_ssa_def *polynomial_terms[] = { + nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), + nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), + nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), + nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), + nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), + nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), + }; + + nir_ssa_def *tmp = + build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); + + /* range-reduction fixup */ + tmp = nir_fadd(b, tmp, + nir_fmul(b, + nir_b2f(b, nir_flt(b, one, abs_y_over_x)), + nir_fadd(b, nir_fmul(b, tmp, + nir_imm_float(b, -2.0f)), + nir_imm_float(b, M_PI_2f)))); + + /* sign fixup */ + return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); +} + +static nir_ssa_def * +build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) +{ + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* If |x| >= 1.0e-8 * |y|: */ + nir_ssa_def *condition = + nir_fge(b, nir_fabs(b, x), + nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); + + /* Then...call atan(y/x) and fix it up: */ + nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); + nir_ssa_def *r_then = + nir_bcsel(b, nir_flt(b, x, zero), + nir_fadd(b, atan1, + nir_bcsel(b, nir_fge(b, y, zero), + nir_imm_float(b, M_PIf), + nir_imm_float(b, -M_PIf))), + atan1); + + /* Else... */ + nir_ssa_def *r_else = + nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); + + return nir_bcsel(b, condition, r_then, r_else); +} + +static nir_ssa_def * +build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + nir_ssa_def *exponent_shift = nir_imm_int(b, 23); + nir_ssa_def *exponent_bias = nir_imm_int(b, -126); + + nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); + + nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); + + *exponent = + nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), + nir_bcsel(b, is_not_zero, exponent_bias, zero)); + + return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), + nir_bcsel(b, is_not_zero, exponent_value, zero)); +} + +static nir_op +vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode) +{ + switch (opcode) { + case GLSLstd450Round: return nir_op_fround_even; + case GLSLstd450RoundEven: return nir_op_fround_even; + case GLSLstd450Trunc: return nir_op_ftrunc; + case GLSLstd450FAbs: return nir_op_fabs; + case GLSLstd450SAbs: return nir_op_iabs; + case GLSLstd450FSign: return nir_op_fsign; + case GLSLstd450SSign: return nir_op_isign; + case GLSLstd450Floor: return nir_op_ffloor; + case GLSLstd450Ceil: return nir_op_fceil; + case GLSLstd450Fract: return nir_op_ffract; + case GLSLstd450Sin: return nir_op_fsin; + case GLSLstd450Cos: return nir_op_fcos; + case GLSLstd450Pow: return nir_op_fpow; + case GLSLstd450Exp2: return nir_op_fexp2; + case GLSLstd450Log2: return nir_op_flog2; + case GLSLstd450Sqrt: return nir_op_fsqrt; + case GLSLstd450InverseSqrt: return nir_op_frsq; + case GLSLstd450FMin: return nir_op_fmin; + case GLSLstd450UMin: return nir_op_umin; + case GLSLstd450SMin: return nir_op_imin; + case GLSLstd450FMax: return nir_op_fmax; + case GLSLstd450UMax: return nir_op_umax; + case GLSLstd450SMax: return nir_op_imax; + case GLSLstd450FMix: return nir_op_flrp; + case GLSLstd450Fma: return nir_op_ffma; + case GLSLstd450Ldexp: return nir_op_ldexp; + case GLSLstd450FindILsb: return nir_op_find_lsb; + case GLSLstd450FindSMsb: return nir_op_ifind_msb; + case GLSLstd450FindUMsb: return nir_op_ufind_msb; + + /* Packing/Unpacking functions */ + case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8; + case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8; + case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16; + case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16; + case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16; + case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8; + case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; + case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; + case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; + case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; + + default: + unreachable("No NIR equivalent"); + } +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, + const uint32_t *w, unsigned count) +{ + struct nir_builder *nb = &b->nb; + const struct glsl_type *dest_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, dest_type); + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5])->def; + + switch (entrypoint) { + case GLSLstd450Radians: + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); + return; + case GLSLstd450Degrees: + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); + return; + case GLSLstd450Tan: + val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), + nir_fcos(nb, src[0])); + return; + + case GLSLstd450Modf: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), + nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); + return; + } + + case GLSLstd450ModfStruct: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); + return; + } + + case GLSLstd450Step: + val->ssa->def = nir_sge(nb, src[1], src[0]); + return; + + case GLSLstd450Length: + val->ssa->def = build_length(nb, src[0]); + return; + case GLSLstd450Distance: + val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); + return; + case GLSLstd450Normalize: + val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); + return; + + case GLSLstd450Exp: + val->ssa->def = build_exp(nb, src[0]); + return; + + case GLSLstd450Log: + val->ssa->def = build_log(nb, src[0]); + return; + + case GLSLstd450FClamp: + val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); + return; + case GLSLstd450UClamp: + val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); + return; + case GLSLstd450SClamp: + val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); + return; + + case GLSLstd450Cross: { + unsigned yzx[4] = { 1, 2, 0, 0 }; + unsigned zxy[4] = { 2, 0, 1, 0 }; + val->ssa->def = + nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), + nir_swizzle(nb, src[1], zxy, 3, true)), + nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), + nir_swizzle(nb, src[1], yzx, 3, true))); + return; + } + + case GLSLstd450SmoothStep: { + /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ + nir_ssa_def *t = + build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), + nir_fsub(nb, src[1], src[0])), + nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); + /* result = t * t * (3 - 2 * t) */ + val->ssa->def = + nir_fmul(nb, t, nir_fmul(nb, t, + nir_fsub(nb, nir_imm_float(nb, 3.0), + nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); + return; + } + + case GLSLstd450FaceForward: + val->ssa->def = + nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), + nir_imm_float(nb, 0.0)), + src[0], nir_fneg(nb, src[0])); + return; + + case GLSLstd450Reflect: + /* I - 2 * dot(N, I) * N */ + val->ssa->def = + nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), + nir_fmul(nb, nir_fdot(nb, src[0], src[1]), + src[1]))); + return; + + case GLSLstd450Refract: { + nir_ssa_def *I = src[0]; + nir_ssa_def *N = src[1]; + nir_ssa_def *eta = src[2]; + nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); + nir_ssa_def *one = nir_imm_float(nb, 1.0); + nir_ssa_def *zero = nir_imm_float(nb, 0.0); + /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ + nir_ssa_def *k = + nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, + nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); + nir_ssa_def *result = + nir_fsub(nb, nir_fmul(nb, eta, I), + nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), + nir_fsqrt(nb, k)), N)); + /* XXX: bcsel, or if statement? */ + val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); + return; + } + + case GLSLstd450Sinh: + /* 0.5 * (e^x - e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Cosh: + /* 0.5 * (e^x + e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Tanh: + /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ + val->ssa->def = + nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))), + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0]))))); + return; + + case GLSLstd450Asinh: + val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), + build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), + nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f)))))); + return; + case GLSLstd450Acosh: + val->ssa->def = build_log(nb, nir_fadd(nb, src[0], + nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f))))); + return; + case GLSLstd450Atanh: { + nir_ssa_def *one = nir_imm_float(nb, 1.0); + val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), + build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), + nir_fsub(nb, one, src[0])))); + return; + } + + case GLSLstd450Asin: + val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); + return; + + case GLSLstd450Acos: + val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), + build_asin(nb, src[0], 0.08132463, -0.02363318)); + return; + + case GLSLstd450Atan: + val->ssa->def = build_atan(nb, src[0]); + return; + + case GLSLstd450Atan2: + val->ssa->def = build_atan2(nb, src[0], src[1]); + return; + + case GLSLstd450Frexp: { + nir_ssa_def *exponent; + val->ssa->def = build_frexp(nb, src[0], &exponent); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); + return; + } + + case GLSLstd450FrexpStruct: { + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = build_frexp(nb, src[0], + &val->ssa->elems[1]->def); + return; + } + + default: + val->ssa->def = + nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint), + src[0], src[1], src[2], NULL); + return; + } +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *w, unsigned count) +{ + switch ((enum GLSLstd450)ext_opcode) { + case GLSLstd450Determinant: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450MatrixInverse: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); + } + + return true; +} diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h new file mode 100644 index 00000000000..3840d8c4b65 --- /dev/null +++ b/src/compiler/spirv/vtn_private.h @@ -0,0 +1,484 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir/nir_array.h" +#include "nir_spirv.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_access_chain, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, + vtn_value_type_extension, + vtn_value_type_image_pointer, + vtn_value_type_sampled_image, +}; + +enum vtn_branch_type { + vtn_branch_type_none, + vtn_branch_type_switch_break, + vtn_branch_type_switch_fallthrough, + vtn_branch_type_loop_break, + vtn_branch_type_loop_continue, + vtn_branch_type_discard, + vtn_branch_type_return, +}; + +enum vtn_cf_node_type { + vtn_cf_node_type_block, + vtn_cf_node_type_if, + vtn_cf_node_type_loop, + vtn_cf_node_type_switch, +}; + +struct vtn_cf_node { + struct list_head link; + enum vtn_cf_node_type type; +}; + +struct vtn_loop { + struct vtn_cf_node node; + + /* The main body of the loop */ + struct list_head body; + + /* The "continue" part of the loop. This gets executed after the body + * and is where you go when you hit a continue. + */ + struct list_head cont_body; + + SpvLoopControlMask control; +}; + +struct vtn_if { + struct vtn_cf_node node; + + uint32_t condition; + + enum vtn_branch_type then_type; + struct list_head then_body; + + enum vtn_branch_type else_type; + struct list_head else_body; + + SpvSelectionControlMask control; +}; + +struct vtn_case { + struct list_head link; + + struct list_head body; + + /* The block that starts this case */ + struct vtn_block *start_block; + + /* The fallthrough case, if any */ + struct vtn_case *fallthrough; + + /* The uint32_t values that map to this case */ + nir_array values; + + /* True if this is the default case */ + bool is_default; + + /* Initialized to false; used when sorting the list of cases */ + bool visited; +}; + +struct vtn_switch { + struct vtn_cf_node node; + + uint32_t selector; + + struct list_head cases; +}; + +struct vtn_block { + struct vtn_cf_node node; + + /** A pointer to the label instruction */ + const uint32_t *label; + + /** A pointer to the merge instruction (or NULL if non exists) */ + const uint32_t *merge; + + /** A pointer to the branch instruction that ends this block */ + const uint32_t *branch; + + enum vtn_branch_type branch_type; + + /** Points to the loop that this block starts (if it starts a loop) */ + struct vtn_loop *loop; + + /** Points to the switch case started by this block (if any) */ + struct vtn_case *switch_case; + + /** The last block in this SPIR-V block. */ + nir_block *end_block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_impl *impl; + struct vtn_block *start_block; + + struct list_head body; + + const uint32_t *end; + + SpvFunctionControlMask control; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, + const uint32_t *end); +void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler); + +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler); + +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, if this is non-NULL, then this value is actually the + * transpose of some other value. The value that `transposed` points to + * always dominates this value. + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + +struct vtn_type { + const struct glsl_type *type; + + /* The value that declares this type. Used for finding decorations */ + struct vtn_value *val; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + + /* Image format for image_load_store type images */ + unsigned image_format; + + /* Access qualifier for storage images */ + SpvAccessQualifier access_qualifier; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + +struct vtn_variable; + +enum vtn_access_mode { + vtn_access_mode_id, + vtn_access_mode_literal, +}; + +struct vtn_access_link { + enum vtn_access_mode mode; + uint32_t id; +}; + +struct vtn_access_chain { + struct vtn_variable *var; + + uint32_t length; + + /* Struct elements and array offsets */ + struct vtn_access_link link[0]; +}; + +enum vtn_variable_mode { + vtn_variable_mode_local, + vtn_variable_mode_global, + vtn_variable_mode_param, + vtn_variable_mode_ubo, + vtn_variable_mode_ssbo, + vtn_variable_mode_push_constant, + vtn_variable_mode_image, + vtn_variable_mode_sampler, + vtn_variable_mode_workgroup, + vtn_variable_mode_input, + vtn_variable_mode_output, +}; + +struct vtn_variable { + enum vtn_variable_mode mode; + + struct vtn_type *type; + + unsigned descriptor_set; + unsigned binding; + + nir_variable *var; + nir_variable **members; + + struct vtn_access_chain chain; +}; + +struct vtn_image_pointer { + struct vtn_access_chain *image; + nir_ssa_def *coord; + nir_ssa_def *sample; +}; + +struct vtn_sampled_image { + struct vtn_access_chain *image; /* Image or array of images */ + struct vtn_access_chain *sampler; /* Sampler */ +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + struct vtn_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; + struct vtn_access_chain *access_chain; + struct vtn_image_pointer *image; + struct vtn_sampled_image *sampled_image; + struct vtn_function *func; + struct vtn_block *block; + struct vtn_ssa_value *ssa; + vtn_instruction_handler ext_handler; + }; +}; + +#define VTN_DEC_DECORATION -1 +#define VTN_DEC_EXECUTION_MODE -2 +#define VTN_DEC_STRUCT_MEMBER0 0 + +struct vtn_decoration { + struct vtn_decoration *next; + + /* Specifies how to apply this decoration. Negative values represent a + * decoration or execution mode. (See the VTN_DEC_ #defines above.) + * Non-negative values specify that it applies to a structure member. + */ + int scope; + + const uint32_t *literals; + struct vtn_value *group; + + union { + SpvDecoration decoration; + SpvExecutionMode exec_mode; + }; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + + /* Current file, line, and column. Useful for debugging. Set + * automatically by vtn_foreach_instruction. + */ + char *file; + int line, col; + + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from phi instructions (pointer to the start of the instruction) + * to the variable corresponding to it. + */ + struct hash_table *phi_table; + + unsigned num_specializations; + struct nir_spirv_specialization *specializations; + + unsigned value_id_bound; + struct vtn_value *values; + + gl_shader_stage entry_point_stage; + const char *entry_point_name; + struct vtn_value *entry_point; + bool origin_upper_left; + + struct vtn_function *func; + struct exec_list functions; + + /* Current function parameter index */ + unsigned func_param_idx; + + bool has_loop_continue; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, + const struct glsl_type *type); + +struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, + struct vtn_ssa_value *src); + +nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, + unsigned index); +nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index); +nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, unsigned index); +nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index); + +nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); + +nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, + struct vtn_access_chain *chain); +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix); + +struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); + +void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest); + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); + +void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest); + +void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + int member, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); + +typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data); + +nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); + +void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c new file mode 100644 index 00000000000..3cbac1e5da8 --- /dev/null +++ b/src/compiler/spirv/vtn_variables.c @@ -0,0 +1,1415 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" + +static struct vtn_access_chain * +vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, + unsigned new_ids) +{ + struct vtn_access_chain *chain; + + unsigned new_len = old->length + new_ids; + chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); + + chain->var = old->var; + chain->length = new_len; + + for (unsigned i = 0; i < old->length; i++) + chain->link[i] = old->link[i]; + + return chain; +} + +static nir_ssa_def * +vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, + unsigned stride) +{ + assert(stride > 0); + if (link.mode == vtn_access_mode_literal) { + return nir_imm_int(&b->nb, link.id * stride); + } else if (stride == 1) { + return vtn_ssa_value(b, link.id)->def; + } else { + return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, + nir_imm_int(&b->nb, stride)); + } +} + +static struct vtn_type * +vtn_access_chain_tail_type(struct vtn_builder *b, + struct vtn_access_chain *chain) +{ + struct vtn_type *type = chain->var->type; + for (unsigned i = 0; i < chain->length; i++) { + if (glsl_type_is_struct(type->type)) { + assert(chain->link[i].mode == vtn_access_mode_literal); + type = type->members[chain->link[i].id]; + } else { + type = type->array_element; + } + } + return type; +} + +/* Crawls a chain of array derefs and rewrites the types so that the + * lengths stay the same but the terminal type is the one given by + * tail_type. This is useful for split structures. + */ +static void +rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) +{ + deref->type = type; + if (deref->child) { + assert(deref->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_array(deref->type)); + rewrite_deref_types(deref->child, glsl_get_array_element(type)); + } +} + +nir_deref_var * +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) +{ + nir_deref_var *deref_var; + if (chain->var->var) { + deref_var = nir_deref_var_create(b, chain->var->var); + } else { + assert(chain->var->members); + /* Create the deref_var manually. It will get filled out later. */ + deref_var = rzalloc(b, nir_deref_var); + deref_var->deref.deref_type = nir_deref_type_var; + } + + struct vtn_type *deref_type = chain->var->type; + nir_deref *tail = &deref_var->deref; + nir_variable **members = chain->var->members; + + for (unsigned i = 0; i < chain->length; i++) { + enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + deref_type = deref_type->array_element; + + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref.type = deref_type->type; + + if (chain->link[i].mode == vtn_access_mode_literal) { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = chain->link[i].id; + } else { + assert(chain->link[i].mode == vtn_access_mode_id); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); + } + tail->child = &deref_arr->deref; + tail = tail->child; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(chain->link[i].mode == vtn_access_mode_literal); + unsigned idx = chain->link[i].id; + deref_type = deref_type->members[idx]; + if (members) { + /* This is a pre-split structure. */ + deref_var->var = members[idx]; + rewrite_deref_types(&deref_var->deref, members[idx]->type); + assert(tail->type == deref_type->type); + members = NULL; + } else { + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + tail = tail->child; + } + break; + } + default: + unreachable("Invalid type for deref"); + } + } + + assert(members == NULL); + return deref_var; +} + +static void +_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, + nir_deref *tail, struct vtn_ssa_value *inout) +{ + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = tail->child; + + if (glsl_type_is_vector_or_scalar(tail->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + tail->child = NULL; + + nir_intrinsic_op op = load ? nir_intrinsic_load_var : + nir_intrinsic_store_var; + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); + intrin->num_components = glsl_get_vector_elements(tail->type); + + if (load) { + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + intrin->num_components, + glsl_get_bit_size(glsl_get_base_type(tail->type)), + NULL); + inout->def = &intrin->dest.ssa; + } else { + nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); + intrin->src[0] = nir_src_for_ssa(inout->def); + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); + } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + unsigned elems = glsl_get_length(tail->type); + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->deref.type = glsl_get_array_element(tail->type); + tail->child = &deref_arr->deref; + for (unsigned i = 0; i < elems; i++) { + deref_arr->base_offset = i; + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); + } + } else { + assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(tail->type); + nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); + tail->child = &deref_struct->deref; + for (unsigned i = 0; i < elems; i++) { + deref_struct->index = i; + deref_struct->deref.type = glsl_get_struct_field(tail->type, i); + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); + } + } + + tail->child = old_child; +} + +nir_deref_var * +vtn_nir_deref(struct vtn_builder *b, uint32_t id) +{ + struct vtn_access_chain *chain = + vtn_value(b, id, vtn_value_type_access_chain)->access_chain; + + return vtn_access_chain_to_deref(b, chain); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +struct vtn_ssa_value * +vtn_local_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); + _vtn_local_load_store(b, true, src, src_tail, val); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +void +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + + if (dest_tail->child) { + struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); + _vtn_local_load_store(b, true, dest, dest_tail, val); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_local_load_store(b, false, dest, dest_tail, val); + } else { + _vtn_local_load_store(b, false, dest, dest_tail, src); + } +} + +static nir_ssa_def * +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, + struct vtn_type **type, unsigned *chain_idx) +{ + /* Push constants have no explicit binding */ + if (chain->var->mode == vtn_variable_mode_push_constant) { + *chain_idx = 0; + *type = chain->var->type; + return NULL; + } + + nir_ssa_def *array_index; + if (glsl_type_is_array(chain->var->type->type)) { + assert(chain->length > 0); + array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); + *chain_idx = 1; + *type = chain->var->type->array_element; + } else { + array_index = nir_imm_int(&b->nb, 0); + *chain_idx = 0; + *type = chain->var->type; + } + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set); + nir_intrinsic_set_binding(instr, chain->var->binding); + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + + return &instr->dest.ssa; +} + +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix) +{ + unsigned idx = 0; + struct vtn_type *type; + *index_out = get_vulkan_resource_index(b, chain, &type, &idx); + + nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + for (; idx < chain->length; idx++) { + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + /* Some users may not want matrix or vector derefs */ + if (stop_at_matrix) + goto end; + /* Fall through */ + + case GLSL_TYPE_ARRAY: + offset = nir_iadd(&b->nb, offset, + vtn_access_link_as_ssa(b, chain->link[idx], + type->stride)); + + type = type->array_element; + break; + + case GLSL_TYPE_STRUCT: { + assert(chain->link[idx].mode == vtn_access_mode_literal); + unsigned member = chain->link[idx].id; + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + type = type->members[member]; + break; + } + + default: + unreachable("Invalid type for deref"); + } + } + +end: + *type_out = type; + if (end_idx_out) + *end_idx_out = idx; + + return offset; +} + +static void +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_ssa_value **inout, const struct glsl_type *type) +{ + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); + instr->num_components = glsl_get_vector_elements(type); + + int src = 0; + if (!load) { + nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); + instr->src[src++] = nir_src_for_ssa((*inout)->def); + } + + /* We set the base and size for push constant load to the entire push + * constant block for now. + */ + if (op == nir_intrinsic_load_push_constant) { + nir_intrinsic_set_base(instr, 0); + nir_intrinsic_set_range(instr, 128); + } + + if (index) + instr->src[src++] = nir_src_for_ssa(index); + + instr->src[src++] = nir_src_for_ssa(offset); + + if (load) { + nir_ssa_dest_init(&instr->instr, &instr->dest, + instr->num_components, + glsl_get_bit_size(glsl_get_base_type(type)), NULL); + (*inout)->def = &instr->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &instr->instr); + + if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) + (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); +} + +static void +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_access_chain *chain, unsigned chain_idx, + struct vtn_type *type, struct vtn_ssa_value **inout) +{ + if (chain && chain_idx >= chain->length) + chain = NULL; + + if (load && chain == NULL && *inout == NULL) + *inout = vtn_create_ssa_value(b, type->type); + + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* This is where things get interesting. At this point, we've hit + * a vector, a scalar, or a matrix. + */ + if (glsl_type_is_matrix(type->type)) { + if (chain == NULL) { + /* Loading the whole matrix */ + struct vtn_ssa_value *transpose; + unsigned num_ops, vec_width; + if (type->row_major) { + num_ops = glsl_get_vector_elements(type->type); + vec_width = glsl_get_matrix_columns(type->type); + if (load) { + const struct glsl_type *transpose_type = + glsl_matrix_type(base_type, vec_width, num_ops); + *inout = vtn_create_ssa_value(b, transpose_type); + } else { + transpose = vtn_ssa_transpose(b, *inout); + inout = &transpose; + } + } else { + num_ops = glsl_get_matrix_columns(type->type); + vec_width = glsl_get_vector_elements(type->type); + } + + for (unsigned i = 0; i < num_ops; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + _vtn_load_store_tail(b, op, load, index, elem_offset, + &(*inout)->elems[i], + glsl_vector_type(base_type, vec_width)); + } + + if (load && type->row_major) + *inout = vtn_ssa_transpose(b, *inout); + } else if (type->row_major) { + /* Row-major but with an access chiain. */ + nir_ssa_def *col_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], + type->array_element->stride); + offset = nir_iadd(&b->nb, offset, col_offset); + + if (chain_idx + 1 < chain->length) { + /* Picking off a single element */ + nir_ssa_def *row_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], + type->stride); + offset = nir_iadd(&b->nb, offset, row_offset); + if (load) + *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, + glsl_scalar_type(base_type)); + } else { + /* Grabbing a column; picking one element off each row */ + unsigned num_comps = glsl_get_vector_elements(type->type); + const struct glsl_type *column_type = + glsl_get_column_type(type->type); + + nir_ssa_def *comps[4]; + for (unsigned i = 0; i < num_comps; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + + struct vtn_ssa_value *comp, temp_val; + if (!load) { + temp_val.def = nir_channel(&b->nb, (*inout)->def, i); + temp_val.type = glsl_scalar_type(base_type); + } + comp = &temp_val; + _vtn_load_store_tail(b, op, load, index, elem_offset, + &comp, glsl_scalar_type(base_type)); + comps[i] = comp->def; + } + + if (load) { + if (*inout == NULL) + *inout = vtn_create_ssa_value(b, column_type); + + (*inout)->def = nir_vec(&b->nb, comps, num_comps); + } + } + } else { + /* Column-major with a deref. Fall through to array case. */ + nir_ssa_def *col_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); + offset = nir_iadd(&b->nb, offset, col_offset); + + _vtn_block_load_store(b, op, load, index, offset, + chain, chain_idx + 1, + type->array_element, inout); + } + } else if (chain == NULL) { + /* Single whole vector */ + assert(glsl_type_is_vector_or_scalar(type->type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); + } else { + /* Single component of a vector. Fall through to array case. */ + nir_ssa_def *elem_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); + offset = nir_iadd(&b->nb, offset, elem_offset); + + _vtn_block_load_store(b, op, load, index, offset, NULL, 0, + type->array_element, inout); + } + return; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->array_element, &(*inout)->elems[i]); + } + return; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->members[i], &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid block member type"); + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + nir_intrinsic_op op; + switch (src->var->mode) { + case vtn_variable_mode_ubo: + op = nir_intrinsic_load_ubo; + break; + case vtn_variable_mode_ssbo: + op = nir_intrinsic_load_ssbo; + break; + case vtn_variable_mode_push_constant: + op = nir_intrinsic_load_push_constant; + break; + default: + assert(!"Invalid block variable mode"); + } + + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); + + struct vtn_ssa_value *value = NULL; + _vtn_block_load_store(b, op, true, index, offset, + src, chain_idx, type, &value); + return value; +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dst) +{ + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); + + _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, + dst, chain_idx, type, &src); +} + +static bool +vtn_variable_is_external_block(struct vtn_variable *var) +{ + return var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_push_constant; +} + +static void +_vtn_variable_load_store(struct vtn_builder *b, bool load, + struct vtn_access_chain *chain, + struct vtn_type *tail_type, + struct vtn_ssa_value **inout) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + if (load) { + *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); + } else { + vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); + } + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_chain = + vtn_access_chain_extend(b, chain, 1); + new_chain->link[chain->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + if (load) { + assert(*inout == NULL); + *inout = rzalloc(b, struct vtn_ssa_value); + (*inout)->type = tail_type->type; + (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); + } + for (unsigned i = 0; i < elems; i++) { + new_chain->link[chain->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_load_store(b, load, new_chain, elem_type, + &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + if (vtn_variable_is_external_block(src->var)) { + return vtn_block_load(b, src); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + struct vtn_ssa_value *val = NULL; + _vtn_variable_load_store(b, true, src, tail_type, &val); + return val; + } +} + +void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest) +{ + if (vtn_variable_is_external_block(dest->var)) { + assert(dest->var->mode == vtn_variable_mode_ssbo); + vtn_block_store(b, src, dest); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); + _vtn_variable_load_store(b, false, dest, tail_type, &src); + } +} + +static void +_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src, struct vtn_type *tail_type) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + vtn_variable_store(b, vtn_variable_load(b, src), dest); + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_src, *new_dest; + new_src = vtn_access_chain_extend(b, src, 1); + new_dest = vtn_access_chain_extend(b, dest, 1); + new_src->link[src->length].mode = vtn_access_mode_literal; + new_dest->link[dest->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + for (unsigned i = 0; i < elems; i++) { + new_src->link[src->length].id = i; + new_dest->link[dest->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_copy(b, new_dest, new_src, elem_type); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src) +{ + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); + + /* TODO: At some point, we should add a special-case for when we can + * just emit a copy_var intrinsic. + */ + _vtn_variable_copy(b, dest, src, tail_type); +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + break; + case SpvBuiltInVertexIndex: + *location = SYSTEM_VALUE_VERTEX_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceIndex: + *location = SYSTEM_VALUE_INSTANCE_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInViewportIndex: + *location = VARYING_SLOT_VIEWPORT; + if (b->shader->stage == MESA_SHADER_GEOMETRY) + *mode = nir_var_shader_out; + else if (b->shader->stage == MESA_SHADER_FRAGMENT) + *mode = nir_var_shader_in; + else + unreachable("invalid stage for SpvBuiltInViewportIndex"); + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + *location = SYSTEM_VALUE_NUM_WORK_GROUPS; + set_mode_system_value(mode); + break; + case SpvBuiltInWorkgroupSize: + /* This should already be handled */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationIndex: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInGlobalInvocationId: + *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + struct vtn_variable *vtn_var = void_var; + + /* Handle decorations that apply to a vtn_variable as a whole */ + switch (dec->decoration) { + case SpvDecorationBinding: + vtn_var->binding = dec->literals[0]; + return; + case SpvDecorationDescriptorSet: + vtn_var->descriptor_set = dec->literals[0]; + return; + + case SpvDecorationLocation: { + unsigned location = dec->literals[0]; + bool is_vertex_input; + if (b->shader->stage == MESA_SHADER_FRAGMENT && + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + vtn_var->mode == vtn_variable_mode_input) { + is_vertex_input = true; + location += VERT_ATTRIB_GENERIC0; + } else if (vtn_var->mode == vtn_variable_mode_input || + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += VARYING_SLOT_VAR0; + } else { + assert(!"Location must be on input or output variable"); + } + + if (vtn_var->var) { + vtn_var->var->data.location = location; + vtn_var->var->data.explicit_location = true; + } else { + assert(vtn_var->members); + unsigned length = glsl_get_length(vtn_var->type->type); + for (unsigned i = 0; i < length; i++) { + vtn_var->members[i]->data.location = location; + vtn_var->members[i]->data.explicit_location = true; + location += + glsl_count_attribute_slots(vtn_var->members[i]->interface_type, + is_vertex_input); + } + } + return; + } + + default: + break; + } + + /* Now we handle decorations that apply to a particular nir_variable */ + nir_variable *nir_var = vtn_var->var; + if (val->value_type == vtn_value_type_access_chain) { + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); + assert(member == -1); + } else { + assert(val->value_type == vtn_value_type_type); + if (member != -1) + nir_var = vtn_var->members[member]; + } + + if (nir_var == NULL) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + nir_var->data.centroid = true; + break; + case SpvDecorationSample: + nir_var->data.sample = true; + break; + case SpvDecorationInvariant: + nir_var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(nir_var->constant_initializer != NULL); + nir_var->data.read_only = true; + break; + case SpvDecorationNonWritable: + nir_var->data.read_only = true; + break; + case SpvDecorationComponent: + nir_var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + nir_var->data.explicit_index = true; + nir_var->data.index = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + if (builtin == SpvBuiltInWorkgroupSize) { + /* This shouldn't be a builtin. It's actually a constant. */ + nir_var->data.mode = nir_var_global; + nir_var->data.read_only = true; + + nir_constant *c = rzalloc(nir_var, nir_constant); + c->value.u[0] = b->shader->info.cs.local_size[0]; + c->value.u[1] = b->shader->info.cs.local_size[1]; + c->value.u[2] = b->shader->info.cs.local_size[2]; + nir_var->constant_initializer = c; + break; + } + + nir_variable_mode mode = nir_var->data.mode; + vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); + nir_var->data.explicit_location = true; + nir_var->data.mode = mode; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + nir_var->data.origin_upper_left = b->origin_upper_left; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_variable *var = rzalloc(b, struct vtn_variable); + var->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + var->chain.var = var; + var->chain.length = 0; + + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = &var->chain; + + struct vtn_type *without_array = var->type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; + + nir_variable_mode nir_mode; + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (without_array->block) { + var->mode = vtn_variable_mode_ubo; + b->shader->info.num_ubos++; + } else if (without_array->buffer_block) { + var->mode = vtn_variable_mode_ssbo; + b->shader->info.num_ssbos++; + } else if (glsl_type_is_image(without_array->type)) { + var->mode = vtn_variable_mode_image; + nir_mode = nir_var_uniform; + b->shader->info.num_images++; + } else if (glsl_type_is_sampler(without_array->type)) { + var->mode = vtn_variable_mode_sampler; + nir_mode = nir_var_uniform; + b->shader->info.num_textures++; + } else { + assert(!"Invalid uniform variable type"); + } + break; + case SpvStorageClassPushConstant: + var->mode = vtn_variable_mode_push_constant; + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; + break; + case SpvStorageClassInput: + var->mode = vtn_variable_mode_input; + nir_mode = nir_var_shader_in; + break; + case SpvStorageClassOutput: + var->mode = vtn_variable_mode_output; + nir_mode = nir_var_shader_out; + break; + case SpvStorageClassPrivate: + var->mode = vtn_variable_mode_global; + nir_mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->mode = vtn_variable_mode_local; + nir_mode = nir_var_local; + break; + case SpvStorageClassWorkgroup: + var->mode = vtn_variable_mode_workgroup; + nir_mode = nir_var_shared; + break; + case SpvStorageClassCrossWorkgroup: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + switch (var->mode) { + case vtn_variable_mode_local: + case vtn_variable_mode_global: + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + case vtn_variable_mode_workgroup: + /* For these, we create the variable normally */ + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->data.mode = nir_mode; + + switch (var->mode) { + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + var->var->interface_type = without_array->type; + break; + default: + var->var->interface_type = NULL; + break; + } + break; + + case vtn_variable_mode_input: + case vtn_variable_mode_output: { + /* For inputs and outputs, we immediately split structures. This + * is for a couple of reasons. For one, builtins may all come in + * a struct and we really want those split out into separate + * variables. For another, interpolation qualifiers can be + * applied to members of the top-level struct ane we need to be + * able to preserve that information. + */ + + int array_length = -1; + struct vtn_type *interface_type = var->type; + if (b->shader->stage == MESA_SHADER_GEOMETRY && + glsl_type_is_array(var->type->type)) { + /* In Geometry shaders (and some tessellation), inputs come + * in per-vertex arrays. However, some builtins come in + * non-per-vertex, hence the need for the is_array check. In + * any case, there are no non-builtin arrays allowed so this + * check should be sufficient. + */ + interface_type = var->type->array_element; + array_length = glsl_get_length(var->type->type); + } + + if (glsl_type_is_struct(interface_type->type)) { + /* It's a struct. Split it. */ + unsigned num_members = glsl_get_length(interface_type->type); + var->members = ralloc_array(b, nir_variable *, num_members); + + for (unsigned i = 0; i < num_members; i++) { + const struct glsl_type *mtype = interface_type->members[i]->type; + if (array_length >= 0) + mtype = glsl_array_type(mtype, array_length); + + var->members[i] = rzalloc(b->shader, nir_variable); + var->members[i]->name = + ralloc_asprintf(var->members[i], "%s.%d", val->name, i); + var->members[i]->type = mtype; + var->members[i]->interface_type = + interface_type->members[i]->type; + var->members[i]->data.mode = nir_mode; + } + } else { + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->interface_type = interface_type->type; + var->var->data.mode = nir_mode; + } + + /* For inputs and outputs, we need to grab locations and builtin + * information from the interface type. + */ + vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); + break; + + case vtn_variable_mode_param: + unreachable("Not created through OpVariable"); + } + + case vtn_variable_mode_ubo: + case vtn_variable_mode_ssbo: + case vtn_variable_mode_push_constant: + /* These don't need actual variables. */ + break; + } + + if (count > 4) { + assert(count == 5); + nir_constant *constant = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + var->var->constant_initializer = + nir_constant_clone(constant, var->var); + } + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (var->mode == vtn_variable_mode_image || + var->mode == vtn_variable_mode_sampler) { + /* XXX: We still need the binding information in the nir_variable + * for these. We should fix that. + */ + var->var->data.binding = var->binding; + var->var->data.descriptor_set = var->descriptor_set; + + if (var->mode == vtn_variable_mode_image) + var->var->data.image.format = without_array->image_format; + } + + if (var->mode == vtn_variable_mode_local) { + assert(var->members == NULL && var->var != NULL); + nir_function_impl_add_variable(b->impl, var->var); + } else if (var->var) { + nir_shader_add_variable(b->shader, var->var); + } else if (var->members) { + unsigned count = glsl_get_length(without_array->type); + for (unsigned i = 0; i < count; i++) { + assert(var->members[i]->data.mode != nir_var_local); + nir_shader_add_variable(b->shader, var->members[i]); + } + } else { + assert(var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_push_constant); + } + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_access_chain *base, *chain; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_access_chain); + base = base_val->access_chain; + } + + chain = vtn_access_chain_extend(b, base, count - 4); + + unsigned idx = base->length; + for (int i = 4; i < count; i++) { + struct vtn_value *link_val = vtn_untyped_value(b, w[i]); + if (link_val->value_type == vtn_value_type_constant) { + chain->link[idx].mode = vtn_access_mode_literal; + chain->link[idx].id = link_val->constant->value.u[0]; + } else { + chain->link[idx].mode = vtn_access_mode_id; + chain->link[idx].id = w[i]; + } + idx++; + } + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = chain; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = chain; + } + break; + } + + case SpvOpCopyMemory: { + struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); + struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); + + vtn_variable_copy(b, dest->access_chain, src->access_chain); + break; + } + + case SpvOpLoad: { + struct vtn_access_chain *src = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + if (src->var->mode == vtn_variable_mode_image || + src->var->mode == vtn_variable_mode_sampler) { + vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src); + break; + } + + case SpvOpStore: { + struct vtn_access_chain *dest = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); + break; + } + + case SpvOpArrayLength: { + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + const uint32_t offset = chain->var->type->offsets[w[4]]; + const uint32_t stride = chain->var->type->members[w[4]]->stride; + + unsigned chain_idx; + struct vtn_type *type; + nir_ssa_def *index = + get_vulkan_resource_index(b, chain, &type, &chain_idx); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_get_buffer_size); + instr->src[0] = nir_src_for_ssa(index); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + nir_ssa_def *buf_size = &instr->dest.ssa; + + /* array_length = max(buffer_size - offset, 0) / stride */ + nir_ssa_def *array_length = + nir_idiv(&b->nb, + nir_imax(&b->nb, + nir_isub(&b->nb, + buf_size, + nir_imm_int(&b->nb, offset)), + nir_imm_int(&b->nb, 0u)), + nir_imm_int(&b->nb, stride)); + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); + val->ssa->def = array_length; + break; + } + + case SpvOpCopyMemorySized: + default: + unreachable("Unhandled opcode"); + } +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 52748a0619a..90732dba961 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -31,7 +31,7 @@ #include "anv_private.h" #include "brw_nir.h" #include "anv_nir.h" -#include "nir/spirv/nir_spirv.h" +#include "spirv/nir_spirv.h" /* Needed for SWIZZLE macros */ #include "program/prog_instruction.h" -- cgit v1.2.3 From 769b5614f85992c333fa87f4faf23656da439d48 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 10:35:37 -0700 Subject: nir/opt_algebraic: Remove the encoding line This is an unneeded diff between the vulkan and master branches --- src/compiler/nir/nir_opt_algebraic.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index dd41931b345..8f08e6b1341 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1,5 +1,4 @@ #! /usr/bin/env python -# -*- encoding: utf-8 -*- # # Copyright (C) 2014 Intel Corporation # -- cgit v1.2.3 From 34b5db17d965b94f018189950be241e24d006421 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 10:39:54 -0700 Subject: i965: remove pointless diff with the master branch --- src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index b093a87bb82..3112c0c4014 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -279,7 +279,7 @@ brw_get_scratch_bo(struct brw_context *brw, void brwInitFragProgFuncs( struct dd_function_table *functions ) { - /* assert(functions->ProgramStringNotify == _tnl_program_string); */ + assert(functions->ProgramStringNotify == _tnl_program_string); functions->NewProgram = brwNewProgram; functions->DeleteProgram = brwDeleteProgram; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 032fdaa4d23..b67d104c340 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1647,6 +1647,7 @@ vec4_visitor::nir_emit_jump(nir_jump_instr *instr) break; case nir_jump_return: + /* fall through */ default: unreachable("unknown jump"); } -- cgit v1.2.3 From e61c812f76eda0cf70317ee8c4070e80e3312e67 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 15:12:41 -0700 Subject: anv/pipeline: Use the right mask for lower_indirect_derefs --- src/intel/vulkan/anv_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 90732dba961..a215a377a96 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -174,11 +174,11 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_shader_gather_info(nir, entry_point->impl); - uint32_t indirect_mask = 0; + nir_variable_mode indirect_mask = 0; if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) - indirect_mask |= (1 << nir_var_shader_in); + indirect_mask |= nir_var_shader_in; if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) - indirect_mask |= 1 << nir_var_local; + indirect_mask |= nir_var_local; nir_lower_indirect_derefs(nir, indirect_mask); -- cgit v1.2.3 From ca16373a2bedfaf5d5f1b668f080cda91d96171c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Mar 2016 12:00:24 -0700 Subject: configure: Add initial support for enabling Vulkan drivers --- configure.ac | 68 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/Makefile.am | 2 +- src/intel/Makefile.am | 6 ++++- 3 files changed, 72 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/configure.ac b/configure.ac index 24c069f23a3..0d3f0ff3c9d 100644 --- a/configure.ac +++ b/configure.ac @@ -1545,6 +1545,59 @@ if test -n "$with_dri_drivers"; then DRI_DIRS=`echo $DRI_DIRS|tr " " "\n"|sort -u|tr "\n" " "` fi + +# +# Vulkan driver configuration +# + +# Keep this in sync with the --with-vulkan-drivers help string default value +VULKAN_DRIVERS_DEFAULT="intel" + +AC_ARG_WITH([vulkan-drivers], + [AS_HELP_STRING([--with-vulkan-drivers@<:@=DIRS...@:>@], + [comma delimited Vulkan drivers list, e.g. + "intel" + @<:@default=intel@:>@])], + [with_vulkan_drivers="$withval"], + [with_vulkan_drivers="$VULKAN_DRIVERS_DEFAULT"]) + +# Doing '--without-vulkan-drivers' will set this variable to 'no'. Clear it +# here so that the script doesn't choke on an unknown driver name later. +case "$with_vulkan_drivers" in + yes) with_vulkan_drivers="$VULKAN_DRIVERS_DEFAULT" ;; + no) with_vulkan_drivers='' ;; +esac + +AC_ARG_WITH([vulkan-icddir], + [AS_HELP_STRING([--with-vulkan-icddir=DIR], + [directory for the Vulkan driver icd files @<:@${libdir}/dri@:>@])], + [VULKAN_ICD_INSTALL_DIR="$withval"], + [VULKAN_ICD_INSTALL_DIR='${sysconfdir}/vulkan/icd.d']) +AC_SUBST([VULKAN_ICD_INSTALL_DIR]) + +if test -n "$with_vulkan_drivers"; then + VULKAN_DRIVERS=`IFS=', '; echo $with_vulkan_drivers` + for driver in $VULKAN_DRIVERS; do + case "x$driver" in + xintel) + if test "x$HAVE_I965_DRI" != xyes; then + AC_MSG_ERROR([Intel Vulkan driver requires the i965 dri driver]) + fi + if test "x$with_sha1" == "x"; then + AC_MSG_ERROR([Intel Vulkan driver requires SHA1]) + fi + HAVE_INTEL_VULKAN=yes; + + ;; + *) + AC_MSG_ERROR([Vulkan driver '$driver' does not exist]) + ;; + esac + done + VULKAN_DRIVERS=`echo $VULKAN_DRIVERS|tr " " "\n"|sort -u|tr "\n" " "` +fi + + AM_CONDITIONAL(NEED_MEGADRIVER, test -n "$DRI_DIRS") AM_CONDITIONAL(NEED_LIBMESA, test "x$enable_xlib_glx" = xyes -o \ "x$enable_osmesa" = xyes -o \ @@ -1632,8 +1685,6 @@ GBM_PC_LIB_PRIV="$DLOPEN_LIBS" AC_SUBST([GBM_PC_REQ_PRIV]) AC_SUBST([GBM_PC_LIB_PRIV]) -AM_CONDITIONAL(HAVE_VULKAN, true) - dnl dnl EGL configuration dnl @@ -2401,6 +2452,10 @@ AM_CONDITIONAL(HAVE_R200_DRI, test x$HAVE_R200_DRI = xyes) AM_CONDITIONAL(HAVE_RADEON_DRI, test x$HAVE_RADEON_DRI = xyes) AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes) +AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes) + +AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes) + AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test "x$HAVE_GALLIUM_R300" = xyes -o \ "x$HAVE_GALLIUM_R600" = xyes -o \ "x$HAVE_GALLIUM_RADEONSI" = xyes) @@ -2647,6 +2702,15 @@ if test "$enable_egl" = yes; then echo " EGL drivers: $egl_drivers" fi +# Vulkan +echo "" +if test "x$VULKAN_DRIVERS" != x; then + echo " Vulkan drivers: $VULKAN_DRIVERS" + echo " Vulkan ICD dir: $VULKAN_ICD_INSTALL_DIR" +else + echo " Vulkan drivers: no" +fi + echo "" if test "x$MESA_LLVM" = x1; then echo " llvm: yes" diff --git a/src/Makefile.am b/src/Makefile.am index 73686a93b3c..a0572b7a563 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -56,7 +56,7 @@ EXTRA_DIST = \ AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) -if HAVE_VULKAN +if HAVE_INTEL_DRIVERS SUBDIRS += intel endif diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am index d5bd0b3b5df..c3673c6e7c8 100644 --- a/src/intel/Makefile.am +++ b/src/intel/Makefile.am @@ -19,4 +19,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = genxml isl vulkan +SUBDIRS = genxml isl + +if HAVE_INTEL_VULKAN +SUBDIRS += vulkan +endif -- cgit v1.2.3 From e40b867145160dfb258a6f03c7e6b02f3f839aa4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 10:30:42 -0700 Subject: anv/intel_icd: Don't provide an absolute path The driver will be installed to $(libdir)/libvulkan_intel.so and just providing a driver name is enough for the loader. This also ensures that multi-arch systems work ok. --- src/intel/vulkan/.gitignore | 3 +-- src/intel/vulkan/intel_icd.json | 7 +++++++ src/intel/vulkan/intel_icd.json.in | 7 ------- 3 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 src/intel/vulkan/intel_icd.json delete mode 100644 src/intel/vulkan/intel_icd.json.in (limited to 'src') diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore index 40afc2e3989..4a683b45487 100644 --- a/src/intel/vulkan/.gitignore +++ b/src/intel/vulkan/.gitignore @@ -5,5 +5,4 @@ /wayland-drm-protocol.c /wayland-drm-client-protocol.h /dev_icd.json -/intel_icd.json -/gen*_pack.h \ No newline at end of file +/gen*_pack.h diff --git a/src/intel/vulkan/intel_icd.json b/src/intel/vulkan/intel_icd.json new file mode 100644 index 00000000000..277c14ec444 --- /dev/null +++ b/src/intel/vulkan/intel_icd.json @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/intel_icd.json.in b/src/intel/vulkan/intel_icd.json.in deleted file mode 100644 index d9b363a9762..00000000000 --- a/src/intel/vulkan/intel_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@install_libdir@/libvulkan_intel.so", - "abi_versions": "1.0.3" - } -} -- cgit v1.2.3 From 48cc8c284aa28405eaa2335bb8f96324c8153ca7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 10:34:58 -0700 Subject: anv: Install the installable ICD --- src/intel/vulkan/Makefile.am | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index acf84e55871..cba66713948 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -207,3 +207,6 @@ libvulkan_test_la_CFLAGS = \ libvulkan_test_la_LIBADD = $(libvulkan_intel_la_LIBADD) include $(top_srcdir)/install-lib-links.mk + +install-data-local: + $(INSTALL_DATA) -D $(srcdir)/intel_icd.json $(VULKAN_ICD_INSTALL_DIR)/intel_icd.json -- cgit v1.2.3 From f0bbb34e4962f328df3d1d1f7cd08ceb237ab7a8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 18:22:08 -0700 Subject: Revert "i965/vec4: Add support for SHADER_OPCODE_MOV_INDIRECT" This reverts commit 4115648a6be2e846660a35a0e260ae53b809b7e0. This commit was half-baked and probably never should have been committed. We'll add this back in properly later when we need it. --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 45 ------------------------ 1 file changed, 45 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 4b3b08903c9..8409e820f09 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1398,48 +1398,6 @@ generate_set_simd4x2_header_gen9(struct brw_codegen *p, brw_pop_insn_state(p); } -static void -generate_mov_indirect(struct brw_codegen *p, - vec4_instruction *inst, - struct brw_reg dst, struct brw_reg reg, - struct brw_reg indirect, struct brw_reg length) -{ - assert(indirect.type == BRW_REGISTER_TYPE_UD); - - unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2); - - /* This instruction acts in align1 mode */ - assert(inst->force_writemask_all || reg.writemask == 0xf); - - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - struct brw_reg addr = vec2(brw_address_reg(0)); - - /* We need to move the indirect value into the address register. In order - * to make things make some sense, we want to respect at least the X - * component of the swizzle. In order to do that, we need to convert the - * subnr (probably 0) to an align1 subnr and add in the swizzle. We then - * use a region of <8,4,0>:uw to pick off the first 2 bytes of the indirect - * and splat it out to all four channels of the given half of a0. - */ - assert(brw_is_single_value_swizzle(indirect.swizzle)); - indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)) * 2; - indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0); - - brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset)); - - /* Use a <4,1> region Vx1 region*/ - struct brw_reg src = brw_VxH_indirect(0, 0); - src.width = BRW_WIDTH_4; - src.hstride = BRW_HORIZONTAL_STRIDE_1; - - brw_MOV(p, dst, retype(src, reg.type)); - - brw_pop_insn_state(p); -} - static void generate_code(struct brw_codegen *p, const struct brw_compiler *compiler, @@ -1988,9 +1946,6 @@ generate_code(struct brw_codegen *p, brw_WAIT(p); break; - case SHADER_OPCODE_MOV_INDIRECT: - generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]); - default: unreachable("Unsupported opcode"); } -- cgit v1.2.3 From 547032c56ab032f51781b8fb5808e911a18cc87e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 18:23:59 -0700 Subject: main/mtypes: Remove the "set" parameter from gl_uniform_block This is a left-over from the early days of the Vulkan driver --- src/mesa/main/mtypes.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index eec057e0137..e4a3036437e 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2494,11 +2494,6 @@ struct gl_uniform_block */ GLuint Binding; - /** - * Vulkan descriptor set qualifier for this block. - */ - GLuint Set; - /** * Minimum size (in bytes) of a buffer object to back this uniform buffer * (GL_UNIFORM_BLOCK_DATA_SIZE). -- cgit v1.2.3 From c3362453f9235251253daefbb46c40e1cb07a790 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Apr 2016 09:53:42 -0700 Subject: Revert "i965/fs: Feel free to spill partial reads/writes" This reverts commit 2434ceabf41e66f2a3627ea8591e5ca427a78cce. --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 8396854fcb1..791da0e038e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -808,13 +808,30 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) */ foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == VGRF) + if (inst->src[i].file == VGRF) { spill_costs[inst->src[i].nr] += loop_scale; + + /* Register spilling logic assumes full-width registers; smeared + * registers have a width of 1 so if we try to spill them we'll + * generate invalid assembly. This shouldn't be a problem because + * smeared registers are only used as short-term temporaries when + * loading pull constants, so spilling them is unlikely to reduce + * register pressure anyhow. + */ + if (!inst->src[i].is_contiguous()) { + no_spill[inst->src[i].nr] = true; + } + } } - if (inst->dst.file == VGRF) + if (inst->dst.file == VGRF) { spill_costs[inst->dst.nr] += inst->regs_written * loop_scale; + if (!inst->dst.is_contiguous()) { + no_spill[inst->dst.nr] = true; + } + } + switch (inst->opcode) { case BRW_OPCODE_DO: -- cgit v1.2.3 From d7189bdeeeae3179a62cee0d31a3d0a9d4f96f6f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Apr 2016 09:45:55 -0700 Subject: Revert "i965/fs: Properly write-mask spills" This reverts commit 9c0109a1f6e0c9c8dd81fc215f8c0c5976e05790. --- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2b00129b4ba..6afb9b6c8a6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -225,7 +225,7 @@ public: void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, - uint32_t spill_offset, int count, bool we_all); + uint32_t spill_offset, int count); void emit_nir_code(); void nir_setup_inputs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 791da0e038e..2347cd5d33f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -751,7 +751,6 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, dst); unspill_inst->offset = spill_offset; unspill_inst->regs_written = reg_size; - unspill_inst->force_writemask_all = true; if (!gen7_read) { unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; @@ -765,11 +764,11 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, void fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, - uint32_t spill_offset, int count, bool we_all) + uint32_t spill_offset, int count) { int reg_size = 1; int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; - if (inst->exec_size == 16 && count % 2 == 0) { + if (dispatch_width == 16 && count % 2 == 0) { spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen); reg_size = 2; } @@ -785,8 +784,6 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; spill_inst->mlen = 1 + reg_size; /* header, value */ spill_inst->base_mrf = spill_base_mrf; - spill_inst->force_writemask_all = we_all; - spill_inst->force_sechalf = inst->force_sechalf; } } @@ -941,15 +938,12 @@ fs_visitor::spill_reg(int spill_reg) * inst->regs_written(), then we need to unspill the destination * since we write back out all of the regs_written(). */ - bool need_unspill = inst->is_partial_write() || - type_sz(inst->dst.type) != 4; - if (need_unspill) + if (inst->is_partial_write()) emit_unspill(block, inst, spill_src, subset_spill_offset, inst->regs_written); emit_spill(block, inst, spill_src, subset_spill_offset, - inst->regs_written, - need_unspill || inst->force_writemask_all); + inst->regs_written); } } -- cgit v1.2.3 From 17d9a2b0112821d612d4ed1a696de2d3c78e9b61 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Apr 2016 09:48:56 -0700 Subject: i965/surface_formats: Mark A4B4G4R4_UNORM as SKL+ only This is what is indicated by the bspec. --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 61e14080264..7ff15ccc0c6 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -204,7 +204,7 @@ const struct brw_surface_format_info surface_formats[] = { SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) - SF( Y, Y, x, Y, 90, x, x, x, x, x, A4B4G4R4_UNORM) + SF(90, 90, x, x, 90, x, x, x, x, x, A4B4G4R4_UNORM) SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT) SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT) SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) -- cgit v1.2.3 From 8403e6de9f416807362c3f155dbd93ef7d3eb700 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Apr 2016 09:54:42 -0700 Subject: i965: Default to scalar GS --- src/mesa/drivers/dri/i965/brw_compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 46d9a40c2cf..4496699e397 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -156,7 +156,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; -- cgit v1.2.3